mirror of
https://github.com/simon987/sist2.git
synced 2025-12-17 09:19:05 +00:00
Compare commits
63 Commits
v2.11.0
...
a96e65d039
| Author | SHA1 | Date | |
|---|---|---|---|
| a96e65d039 | |||
| 87936eecd4 | |||
|
|
d817a0e9dd | ||
| d40f5052f9 | |||
| ee9a8fa514 | |||
| 81008d8936 | |||
| 52466d5d8a | |||
| 5f73fc024b | |||
| f2fd7ccf41 | |||
| d87fee8e00 | |||
|
|
672d1344d7 | ||
| 27e32db1ed | |||
| bb91139ffb | |||
| 70cfa8c37c | |||
| 7493dedc8c | |||
| c786a31bb2 | |||
| 48d024e751 | |||
| 08b2ca9d43 | |||
| ed8b4f4fad | |||
| 66de93a8bd | |||
| e3f78fb693 | |||
| 030643cee0 | |||
| b17b9439df | |||
| 414f65346c | |||
| be8eedc9c7 | |||
| 5b62fe77f2 | |||
| 61ab68ce15 | |||
| 82ecb8bb85 | |||
| a41b5dcc1f | |||
| 06f21d5f0f | |||
| e82a388d1e | |||
| bf02e571b3 | |||
| 750a392a61 | |||
| 3d7b977a82 | |||
| cd71551a22 | |||
| 58741058cf | |||
| 0a7e59b646 | |||
| 43a566fe2f | |||
| b2631a86c8 | |||
| d0a1deca30 | |||
| b03ce90a05 | |||
| a5eacb4950 | |||
| 0887046b41 | |||
| 17fda1e540 | |||
| 34b363bfd8 | |||
| c9aa4bed72 | |||
| 7267d4bd2c | |||
| 43470e9ce6 | |||
| 0331d46fff | |||
| bbf1aca936 | |||
| 27560a82bb | |||
| f16ead1902 | |||
| e2e07e80c7 | |||
| 9499c6b189 | |||
| c5cd00b76c | |||
| ec5f07cab8 | |||
| f098f7916a | |||
| 85d67a9393 | |||
| c5ac89813f | |||
| ec5642a3df | |||
| c1de74e7eb | |||
| f31f138f2e | |||
| 6a48b219e6 |
36
.drone.yml
36
.drone.yml
@@ -10,22 +10,7 @@ steps:
|
||||
- name: build
|
||||
image: simon987/sist2-build
|
||||
commands:
|
||||
- ./ci/build.sh
|
||||
- name: docker
|
||||
image: plugins/docker
|
||||
settings:
|
||||
username:
|
||||
from_secret: DOCKER_USER
|
||||
password:
|
||||
from_secret: DOCKER_PASSWORD
|
||||
repo: simon987/sist2
|
||||
context: ./
|
||||
dockerfile: ./Dockerfile
|
||||
auto_tag: true
|
||||
auto_tag_suffix: x64-linux
|
||||
when:
|
||||
event:
|
||||
- tag
|
||||
- ./scripts/build.sh
|
||||
- name: scp files
|
||||
image: appleboy/drone-scp
|
||||
settings:
|
||||
@@ -42,6 +27,21 @@ steps:
|
||||
- ./VERSION
|
||||
- ./sist2-x64-linux
|
||||
- ./sist2-x64-linux-debug
|
||||
- name: docker
|
||||
image: plugins/docker
|
||||
settings:
|
||||
username:
|
||||
from_secret: DOCKER_USER
|
||||
password:
|
||||
from_secret: DOCKER_PASSWORD
|
||||
repo: simon987/sist2
|
||||
context: ./
|
||||
dockerfile: ./Dockerfile
|
||||
auto_tag: true
|
||||
auto_tag_suffix: x64-linux
|
||||
when:
|
||||
event:
|
||||
- tag
|
||||
|
||||
---
|
||||
kind: pipeline
|
||||
@@ -55,7 +55,7 @@ steps:
|
||||
- name: build
|
||||
image: simon987/sist2-build-arm64
|
||||
commands:
|
||||
- ./ci/build_arm64.sh
|
||||
- ./scripts/build_arm64.sh
|
||||
- name: scp files
|
||||
image: appleboy/drone-scp
|
||||
settings:
|
||||
@@ -80,7 +80,7 @@ steps:
|
||||
from_secret: DOCKER_PASSWORD
|
||||
repo: simon987/sist2
|
||||
context: ./
|
||||
dockerfile: ./Dockerfile
|
||||
dockerfile: ./Dockerfile.arm64
|
||||
auto_tag: true
|
||||
auto_tag_suffix: arm64-linux
|
||||
when:
|
||||
|
||||
4
.gitignore
vendored
4
.gitignore
vendored
@@ -10,13 +10,13 @@ Makefile
|
||||
LOG
|
||||
sist2*
|
||||
!sist2-vue/
|
||||
index.sist2/
|
||||
*.sist2/
|
||||
bundle*.css
|
||||
bundle.js
|
||||
*.a
|
||||
vgcore.*
|
||||
build/
|
||||
third-party/
|
||||
third-party/argparse
|
||||
*.idx/
|
||||
VERSION
|
||||
git_hash.h
|
||||
|
||||
11
.gitmodules
vendored
11
.gitmodules
vendored
@@ -1,6 +1,9 @@
|
||||
[submodule "third-party/libscan"]
|
||||
path = third-party/libscan
|
||||
url = https://github.com/simon987/libscan
|
||||
[submodule "third-party/argparse"]
|
||||
path = third-party/argparse
|
||||
url = https://github.com/cofyc/argparse
|
||||
url = https://github.com/simon987/argparse
|
||||
[submodule "third-party/libscan/third-party/utf8.h"]
|
||||
path = third-party/libscan/third-party/utf8.h
|
||||
url = https://github.com/sheredom/utf8.h
|
||||
[submodule "third-party/libscan/third-party/antiword"]
|
||||
path = third-party/libscan/third-party/antiword
|
||||
url = https://github.com/simon987/antiword
|
||||
|
||||
@@ -22,9 +22,6 @@ add_subdirectory(third-party/argparse)
|
||||
|
||||
add_executable(sist2
|
||||
|
||||
# argparse
|
||||
third-party/argparse/argparse.h third-party/argparse/argparse.c
|
||||
|
||||
src/main.c
|
||||
src/sist.h
|
||||
src/io/walk.h src/io/walk.c
|
||||
@@ -41,7 +38,11 @@ add_executable(sist2
|
||||
src/log.c src/log.h
|
||||
src/cli.c src/cli.h
|
||||
src/stats.c src/stats.h src/ctx.c
|
||||
src/parsing/sidecar.c src/parsing/sidecar.h)
|
||||
src/parsing/sidecar.c src/parsing/sidecar.h
|
||||
|
||||
# argparse
|
||||
third-party/argparse/argparse.h third-party/argparse/argparse.c
|
||||
)
|
||||
|
||||
target_link_directories(sist2 PRIVATE BEFORE ${_VCPKG_INSTALLED_DIR}/${VCPKG_TARGET_TRIPLET}/lib/)
|
||||
set(CMAKE_FIND_LIBRARY_SUFFIXES .a .lib)
|
||||
@@ -86,6 +87,7 @@ if (SIST_DEBUG)
|
||||
sist2
|
||||
PRIVATE
|
||||
-fsanitize=address
|
||||
-static-libasan
|
||||
)
|
||||
set_target_properties(
|
||||
sist2
|
||||
|
||||
10
Dockerfile
10
Dockerfile
@@ -6,12 +6,10 @@ COPY . .
|
||||
RUN cmake -DSIST_PLATFORM=x64_linux -DSIST_DEBUG=off -DBUILD_TESTS=off -DCMAKE_TOOLCHAIN_FILE=/vcpkg/scripts/buildsystems/vcpkg.cmake .
|
||||
RUN make -j$(nproc)
|
||||
RUN strip sist2
|
||||
RUN ls -lh
|
||||
RUN ls -lh sist2-vue/dist/
|
||||
|
||||
FROM ubuntu:20.10
|
||||
FROM ubuntu:21.10
|
||||
|
||||
RUN apt update && apt install -y curl
|
||||
RUN apt update && apt install -y curl libasan5 && rm -rf /var/lib/apt/lists/*
|
||||
|
||||
RUN mkdir -p /usr/share/tessdata && \
|
||||
cd /usr/share/tessdata/ && \
|
||||
@@ -22,9 +20,9 @@ RUN mkdir -p /usr/share/tessdata && \
|
||||
curl -o /usr/share/tessdata/rus.traineddata https://raw.githubusercontent.com/tesseract-ocr/tessdata/master/rus.traineddata &&\
|
||||
curl -o /usr/share/tessdata/spa.traineddata https://raw.githubusercontent.com/tesseract-ocr/tessdata/master/spa.traineddata
|
||||
|
||||
COPY --from=build /build/sist2 /root/sist2
|
||||
ENTRYPOINT ["/root/sist2"]
|
||||
|
||||
ENV LANG C.UTF-8
|
||||
ENV LC_ALL C.UTF-8
|
||||
|
||||
ENTRYPOINT ["/root/sist2"]
|
||||
COPY --from=build /build/sist2 /root/sist2
|
||||
|
||||
@@ -7,9 +7,9 @@ RUN cmake -DSIST_PLATFORM=arm64_linux -DSIST_DEBUG=off -DBUILD_TESTS=off -DCMAKE
|
||||
RUN make -j$(nproc)
|
||||
RUN strip sist2
|
||||
|
||||
FROM ubuntu:20.10
|
||||
FROM --platform="linux/arm64/v8" ubuntu:21.10
|
||||
|
||||
RUN apt update && apt install -y curl
|
||||
RUN apt update && apt install -y curl libasan5 && rm -rf /var/lib/apt/lists/*
|
||||
|
||||
RUN mkdir -p /usr/share/tessdata && \
|
||||
cd /usr/share/tessdata/ && \
|
||||
@@ -20,9 +20,9 @@ RUN mkdir -p /usr/share/tessdata && \
|
||||
curl -o /usr/share/tessdata/rus.traineddata https://raw.githubusercontent.com/tesseract-ocr/tessdata/master/rus.traineddata &&\
|
||||
curl -o /usr/share/tessdata/spa.traineddata https://raw.githubusercontent.com/tesseract-ocr/tessdata/master/spa.traineddata
|
||||
|
||||
COPY --from=build /build/sist2 /root/sist2
|
||||
|
||||
ENV LANG C.UTF-8
|
||||
ENV LC_ALL C.UTF-8
|
||||
|
||||
ENTRYPOINT ["/root/sist2"]
|
||||
ENTRYPOINT ["/root/sist2"]
|
||||
|
||||
COPY --from=build /build/sist2 /root/sist2
|
||||
28
README.md
28
README.md
@@ -2,7 +2,7 @@
|
||||
[](https://www.codefactor.io/repository/github/simon987/sist2)
|
||||
[](https://files.simon987.net/.gate/sist2/simon987_sist2/)
|
||||
|
||||
**Demo**: [sist2.simon987.net](https://sist2.simon987.net/?i=Demo%20files)
|
||||
**Demo**: [sist2.simon987.net](https://sist2.simon987.net/)
|
||||
|
||||
# sist2
|
||||
|
||||
@@ -10,7 +10,7 @@ sist2 (Simple incremental search tool)
|
||||
|
||||
*Warning: sist2 is in early development*
|
||||
|
||||

|
||||

|
||||
|
||||
## Features
|
||||
|
||||
@@ -33,12 +33,11 @@ sist2 (Simple incremental search tool)
|
||||
|
||||
## Getting Started
|
||||
|
||||
1. Have an Elasticsearch (>= 6.X.X) instance running
|
||||
1. Have an Elasticsearch (>= 6.8.X, ideally >=7.14.0) instance running
|
||||
1. Download [from official website](https://www.elastic.co/downloads/elasticsearch)
|
||||
1. *(or)* Run using docker:
|
||||
```bash
|
||||
docker run -d --name es1 --net sist2_net -p 9200:9200 \
|
||||
-e "discovery.type=single-node" elasticsearch:7.14.0
|
||||
docker run -d -p 9200:9200 -e "discovery.type=single-node" elasticsearch:7.14.0
|
||||
```
|
||||
1. *(or)* Run using docker-compose:
|
||||
```yaml
|
||||
@@ -50,8 +49,9 @@ sist2 (Simple incremental search tool)
|
||||
```
|
||||
1. Download sist2 executable
|
||||
1. Download the [latest sist2 release](https://github.com/simon987/sist2/releases) *
|
||||
1. *(or)* Download a [development snapshot](https://files.simon987.net/.gate/sist2/simon987_sist2/) *(Not recommended!)*
|
||||
1. *(or)* `docker pull simon987/sist2:2.10.3-x64-linux`
|
||||
1. *(or)* Download a [development snapshot](https://files.simon987.net/.gate/sist2/simon987_sist2/) *(Not
|
||||
recommended!)*
|
||||
1. *(or)* `docker pull simon987/sist2:2.11.6-x64-linux`
|
||||
|
||||
1. See [Usage guide](docs/USAGE.md)
|
||||
|
||||
@@ -70,18 +70,20 @@ See [Usage guide](docs/USAGE.md) for more details
|
||||
File type | Library | Content | Thumbnail | Metadata
|
||||
:---|:---|:---|:---|:---
|
||||
pdf,xps,fb2,epub | MuPDF | text+ocr | yes | author, title |
|
||||
cbz,cbr | *(none)* | - | yes | - |
|
||||
cbz,cbr | [libscan](https://github.com/simon987/libscan) | - | yes | - |
|
||||
`audio/*` | ffmpeg | - | yes | ID3 tags |
|
||||
`video/*` | ffmpeg | - | yes | title, comment, artist |
|
||||
`image/*` | ffmpeg | - | yes | [Common EXIF tags](https://github.com/simon987/sist2/blob/efdde2734eca9b14a54f84568863b7ffd59bdba3/src/parsing/media.c#L190), GPS tags |
|
||||
raw, rw2, dng, cr2, crw, dcr, k25, kdc, mrw, pef, xf3, arw, sr2, srf, erf | LibRaw | - | yes | Common EXIF tags, GPS tags |
|
||||
ttf,ttc,cff,woff,fnt,otf | Freetype2 | - | yes, `bmp` | Name & style |
|
||||
`text/plain` | *(none)* | yes | no | - |
|
||||
html, xml | *(none)* | yes | no | - |
|
||||
`text/plain` | [libscan](https://github.com/simon987/libscan) | yes | no | - |
|
||||
html, xml | [libscan](https://github.com/simon987/libscan) | yes | no | - |
|
||||
tar, zip, rar, 7z, ar ... | Libarchive | yes\* | - | no |
|
||||
docx, xlsx, pptx | *(none)* | yes | if embedded | creator, modified_by, title |
|
||||
docx, xlsx, pptx | [libscan](https://github.com/simon987/libscan) | yes | if embedded | creator, modified_by, title |
|
||||
doc (MS Word 97-2003) | antiword | yes | yes | author, title |
|
||||
mobi, azw, azw3 | libmobi | yes | no | author, title |
|
||||
wpd (WordPerfect) | libwpd | yes | no | *planned* |
|
||||
json, jsonl, ndjson | [libscan](https://github.com/simon987/libscan) | yes | - | - |
|
||||
|
||||
\* *See [Archive files](#archive-files)*
|
||||
|
||||
@@ -134,14 +136,14 @@ docker run --rm my-sist2-image cat /root/sist2 > sist2-x64-linux
|
||||
```bash
|
||||
apt install gcc g++ python3 yasm ragel automake autotools-dev wget libtool libssl-dev curl zip unzip tar xorg-dev libglu1-mesa-dev libxcursor-dev libxml2-dev libxinerama-dev gettext nasm git
|
||||
```
|
||||
|
||||
|
||||
1. Apply vcpkg patches, as per [sist2-build](https://github.com/simon987/sist2-build) Dockerfile
|
||||
|
||||
1. Install vcpkg dependencies
|
||||
|
||||
```bash
|
||||
vcpkg install curl[core,openssl]
|
||||
vcpkg install lmdb cjson glib brotli libarchive[core,bzip2,libxml2,lz4,lzma,lzo] pthread tesseract libxml2 libmupdf gtest mongoose libuuid libmagic libraw jasper lcms gumbo
|
||||
vcpkg install lmdb cjson glib brotli libarchive[core,bzip2,libxml2,lz4,lzma,lzo] pthread tesseract libxml2 libmupdf gtest mongoose libmagic libraw jasper lcms gumbo
|
||||
```
|
||||
|
||||
1. Build
|
||||
|
||||
114
docs/USAGE.md
114
docs/USAGE.md
@@ -14,6 +14,7 @@
|
||||
* [examples](#web-examples)
|
||||
* [rewrite_url](#rewrite_url)
|
||||
* [link to specific indices](#link-to-specific-indices)
|
||||
* [elasticsearch](#elasticsearch)
|
||||
* [exec-script](#exec-script)
|
||||
* [tagging](#tagging)
|
||||
* [sidecar files](#sidecar-files)
|
||||
@@ -32,7 +33,7 @@ Lightning-fast file system indexer and search tool.
|
||||
|
||||
Scan options
|
||||
-t, --threads=<int> Number of threads. DEFAULT=1
|
||||
-q, --quality=<flt> Thumbnail quality, on a scale of 1.0 to 31.0, 1.0 being the best. DEFAULT=5
|
||||
-q, --quality=<flt> Thumbnail quality, on a scale of 1.0 to 31.0, 1.0 being the best. DEFAULT=3
|
||||
--size=<int> Thumbnail size, in pixels. Use negative value to disable. DEFAULT=500
|
||||
--content-size=<int> Number of bytes to be extracted from text documents. Use negative value to disable. DEFAULT=32768
|
||||
--incremental=<str> Reuse an existing index and only scan modified files.
|
||||
@@ -41,12 +42,15 @@ Scan options
|
||||
--name=<str> Index display name. DEFAULT: (name of the directory)
|
||||
--depth=<int> Scan up to DEPTH subdirectories deep. Use 0 to only scan files in PATH. DEFAULT: -1
|
||||
--archive=<str> Archive file mode (skip|list|shallow|recurse). skip: Don't parse, list: only get file names as text, shallow: Don't parse archives inside archives. DEFAULT: recurse
|
||||
--archive-passphrase=<str> Passphrase for encrypted archive files
|
||||
--ocr=<str> Tesseract language (use tesseract --list-langs to see which are installed on your machine)
|
||||
-e, --exclude=<str> Files that match this regex will not be scanned
|
||||
--fast Only index file names & mime type
|
||||
--treemap-threshold=<str> Relative size threshold for treemap (see USAGE.md). DEFAULT: 0.0005
|
||||
--mem-buffer=<int> Maximum memory buffer size per thread in MB for files inside archives (see USAGE.md). DEFAULT: 2000
|
||||
--read-subtitles Read subtitles from media files
|
||||
--read-subtitles Read subtitles from media files.
|
||||
--fast-epub Faster but less accurate EPUB parsing (no thumbnails, metadata)
|
||||
--checksums Calculate file checksums when scanning.
|
||||
|
||||
Index options
|
||||
-t, --threads=<int> Number of threads. DEFAULT=1
|
||||
@@ -66,13 +70,14 @@ Web options
|
||||
--bind=<str> Listen on this address. DEFAULT=localhost:4090
|
||||
--auth=<str> Basic auth in user:password format
|
||||
--tag-auth=<str> Basic auth in user:password format for tagging
|
||||
--tagline=<str> Tagline in navbar
|
||||
--dev Serve html & js files from disk (for development)
|
||||
|
||||
Exec-script options
|
||||
--es-url=<str> Elasticsearch url. DEFAULT=http://localhost:9200
|
||||
--es-index=<str> Elasticsearch index name. DEFAULT=sist2
|
||||
--script-file=<str> Path to user script.
|
||||
--async-script Execute user script asynchronously.
|
||||
Made by simon987 <me@simon987.net>. Released under GPL-3.0
|
||||
```
|
||||
|
||||
## Scan
|
||||
@@ -80,9 +85,9 @@ Made by simon987 <me@simon987.net>. Released under GPL-3.0
|
||||
### Scan options
|
||||
|
||||
* `-t, --threads`
|
||||
Number of threads for file parsing. **Do not set a number higher than `$(nproc)` or `$(Get-WmiObject Win32_ComputerSystem).NumberOfLogicalProcessors` in Windows!**
|
||||
Number of threads for file parsing. **Do not set a number higher than `$(nproc)` or `$(Get-CimInstance Win32_ComputerSystem).NumberOfLogicalProcessors` in Windows!**
|
||||
* `-q, --quality`
|
||||
Thumbnail quality, on a scale of 1.0 to 31.0, 1.0 being the best. *Does not affect PDF thumbnails quality*
|
||||
Thumbnail quality, on a scale of 1.0 to 31.0, 1.0 being the best.
|
||||
* `--size`
|
||||
Thumbnail size in pixels.
|
||||
* `--content-size`
|
||||
@@ -125,6 +130,10 @@ Made by simon987 <me@simon987.net>. Released under GPL-3.0
|
||||
|
||||
To check if a media file can be parsed without *seek*, execute `cat file.mp4 | ffprobe -`
|
||||
* `--read-subtitles` When enabled, will attempt to read the subtitles stream from media files.
|
||||
* `--fast-epub` Much faster but less accurate EPUB parsing. When enabled, sist2 will use a simple HTML parser to read epub files instead of the MuPDF library. No thumbnails are generated and author/title metadata are not parsed.
|
||||
* `--checksums` Calculate file checksums (sha1) when scanning files. This option does not cause any additional read
|
||||
operations. Checksums are not calculated for all file types, unless the file is inside an archive. When enabled, duplicate
|
||||
files are hidden in the web UI (this behaviour can be toggled in the Configuration page).
|
||||
|
||||
### Scan examples
|
||||
|
||||
@@ -145,15 +154,11 @@ sist2 scan --incremental ./orig_idx/ -o ./updated_idx/ ~/Documents
|
||||
|
||||
### Index format
|
||||
|
||||
A typical `binary` type index structure looks like this:
|
||||
A typical `ndjson` type index structure looks like this:
|
||||
```
|
||||
documents.idx/
|
||||
├── descriptor.json
|
||||
├── _index_139965416830720
|
||||
├── _index_139965425223424
|
||||
├── _index_139965433616128
|
||||
├── _index_139965442008832
|
||||
├── _index_139965442008832
|
||||
├── _index_main.ndjson.zst
|
||||
├── treemap.csv
|
||||
├── agg_mime.csv
|
||||
├── agg_date.csv
|
||||
@@ -169,9 +174,7 @@ documents.idx/
|
||||
└── lock.mdb
|
||||
```
|
||||
|
||||
The `_index_*` files contain the raw binary index data and are not meant to be
|
||||
read by other applications. The format is generally compatible across different
|
||||
sist2 versions.
|
||||
The `_index_*.ndjson.zst` files contain the document data in JSON format, in a compressed newline-delemited file.
|
||||
|
||||
The `thumbs/` folder is a [LMDB](https://en.wikipedia.org/wiki/Lightning_Memory-Mapped_Database)
|
||||
database containing the thumbnails.
|
||||
@@ -181,66 +184,6 @@ following fields are safe to modify manually: `root`, `name`, [rewrite_url](#rew
|
||||
|
||||
The `.csv` are pre-computed aggregations necessary for the stats page.
|
||||
|
||||
|
||||
*Advanced usage*
|
||||
|
||||
Instead of using the `scan` module, you can also import an index generated
|
||||
by a third party application. The 'external' index must have the following format:
|
||||
|
||||
```
|
||||
my_index/
|
||||
├── descriptor.json
|
||||
├── _index_0
|
||||
└── thumbs/
|
||||
| ├── data.mdb
|
||||
| └── lock.mdb
|
||||
└── meta/
|
||||
└── <empty>
|
||||
```
|
||||
|
||||
*descriptor.json*:
|
||||
```json
|
||||
{
|
||||
"uuid": "<valid UUID4>",
|
||||
"version": "_external_v1",
|
||||
"root": "(optional)",
|
||||
"name": "<name>",
|
||||
"rewrite_url": "(optional)",
|
||||
"type": "json",
|
||||
"timestamp": 1578971024
|
||||
}
|
||||
```
|
||||
|
||||
*_index_0*: NDJSON format (One json object per line)
|
||||
|
||||
```json
|
||||
{
|
||||
"_id": "unique uuid for the file",
|
||||
"index": "index uuid4 (same one as descriptor.json!)",
|
||||
"mime": "application/x-cbz",
|
||||
"size": 14341204,
|
||||
"mtime": 1578882996,
|
||||
"extension": "cbz",
|
||||
"name": "my_book",
|
||||
"path": "path/to/books",
|
||||
"content": "text contents of the book",
|
||||
"title": "Title of the book",
|
||||
"tag": ["genre.fiction", "author.someguy", "etc..."],
|
||||
"_keyword": [
|
||||
{"k": "ISBN", "v": "ABCD34789231"}
|
||||
],
|
||||
"_text": [
|
||||
{"k": "other", "v": "This will be indexed as text"}
|
||||
]
|
||||
}
|
||||
```
|
||||
|
||||
You can find the full list of supported fields [here](../src/io/serialize.c#L90)
|
||||
|
||||
The `_keyword.*` items will be indexed and searchable as **keyword** fields (only full matches allowed).
|
||||
The `_text.*` items will be indexed and searchable as **text** fields (fuzzy searching allowed)
|
||||
|
||||
|
||||
*thumbs/*:
|
||||
|
||||
LMDB key-value store. Keys are **binary** 16-byte md5 hash* (`_id` field)
|
||||
@@ -248,9 +191,6 @@ and values are raw image bytes.
|
||||
|
||||
*\* Hash is calculated from the full path of the file, including the extension, relative to the index root*
|
||||
|
||||
Importing an external `binary` type index is technically possible but
|
||||
it is currently unsupported and has no guaranties of back/forward compatibility.
|
||||
|
||||
|
||||
## Index
|
||||
### Index options
|
||||
@@ -276,6 +216,7 @@ it is currently unsupported and has no guaranties of back/forward compatibility.
|
||||
down the process.
|
||||
* `-f, --force-reset`
|
||||
Reset Elasticsearch mappings and settings.
|
||||
* `-t, --threads` Number of threads to use. Ideally, choose a number equal to the number of logical cores of the machine hosting Elasticsearch.
|
||||
|
||||
### Index examples
|
||||
|
||||
@@ -305,6 +246,8 @@ sist2 index --print ./my_index/ | jq | less
|
||||
* `--auth=<str>` Basic auth in user:password format
|
||||
* `--tag-auth=<str>` Basic auth in user:password format. Works the same way as the
|
||||
`--auth` argument, but authentication is only applied the `/tag/` endpoint.
|
||||
* `--tagline=<str>` When specified, will replace the default tagline in the navbar.
|
||||
* `--dev` Serve html & js files from disk (for development, used to modify frontend files without having to recompile)
|
||||
|
||||
### Web examples
|
||||
|
||||
@@ -324,14 +267,19 @@ sist2 web index1 index2 index3 index4
|
||||
When the `rewrite_url` field is not empty, the web module ignores the `root`
|
||||
field and will return a HTTP redirect to `<rewrite_url><path>/<name><extension>`
|
||||
instead of serving the file from disk.
|
||||
Both the `root` and `rewrite_url` fields are safe to manually modify from the
|
||||
Both the `root` and `rewrite_url` fields are safe to manually modify from the
|
||||
`descriptor.json` file.
|
||||
|
||||
### Link to specific indices
|
||||
# Elasticsearch
|
||||
|
||||
To link to specific indices, you can add a list of comma-separated index name to
|
||||
the URL: `?i=<name>,<name>`. By default, indices with `"(nsfw)"` in their name are
|
||||
not displayed.
|
||||
Elasticsearch versions >=6.8.0, <8.0.0 are supported by sist2.
|
||||
|
||||
Using a version >=7.14.0 is recommended to enable the following features:
|
||||
|
||||
- Bug fix for large documents (See #198)
|
||||
|
||||
When using a legacy version of ES, a notice will be displayed next to the sist2 version in the web UI.
|
||||
If you don't care about the features above, you can ignore it or disable it in the configuration page.
|
||||
|
||||
## exec-script
|
||||
|
||||
@@ -367,7 +315,7 @@ See [scripting](scripting.md) documentation.
|
||||
# Sidecar files
|
||||
|
||||
When scanning, sist2 will read metadata from `.s2meta` JSON files and overwrite the
|
||||
original document's metadata. Sidecar metadata files will also work inside archives.
|
||||
original document's indexed metadata (does not modify the actual file). Sidecar metadata files will also work inside archives.
|
||||
Sidecar files themselves are not saved in the index.
|
||||
|
||||
This feature is useful to leverage third-party applications such as speech-to-text or
|
||||
|
||||
Binary file not shown.
|
Before Width: | Height: | Size: 3.9 KiB After Width: | Height: | Size: 35 KiB |
BIN
docs/sist2.png
BIN
docs/sist2.png
Binary file not shown.
|
Before Width: | Height: | Size: 889 KiB After Width: | Height: | Size: 1011 KiB |
@@ -4,6 +4,10 @@
|
||||
"type": "keyword",
|
||||
"doc_values": true
|
||||
},
|
||||
"checksum": {
|
||||
"type": "keyword",
|
||||
"index": false
|
||||
},
|
||||
"_depth": {
|
||||
"type": "integer"
|
||||
},
|
||||
@@ -74,6 +78,7 @@
|
||||
"name": {
|
||||
"analyzer": "content_analyzer",
|
||||
"type": "text",
|
||||
"fielddata": true,
|
||||
"fields": {
|
||||
"nGram": {
|
||||
"type": "text",
|
||||
|
||||
@@ -2,7 +2,8 @@
|
||||
"index": {
|
||||
"refresh_interval": "30s",
|
||||
"codec": "best_compression",
|
||||
"number_of_replicas": 0
|
||||
"number_of_replicas": 0,
|
||||
"highlight.max_analyzed_offset": 10000000
|
||||
},
|
||||
"analysis": {
|
||||
"tokenizer": {
|
||||
|
||||
58
schema/settings_legacy.json
Normal file
58
schema/settings_legacy.json
Normal file
@@ -0,0 +1,58 @@
|
||||
{
|
||||
"index": {
|
||||
"refresh_interval": "30s",
|
||||
"codec": "best_compression",
|
||||
"number_of_replicas": 0
|
||||
},
|
||||
"analysis": {
|
||||
"tokenizer": {
|
||||
"path_tokenizer": {
|
||||
"type": "path_hierarchy",
|
||||
"delimiter": "/"
|
||||
},
|
||||
"tag_tokenizer": {
|
||||
"type": "path_hierarchy",
|
||||
"delimiter": "."
|
||||
},
|
||||
"my_nGram_tokenizer": {
|
||||
"type": "nGram",
|
||||
"min_gram": 3,
|
||||
"max_gram": 3
|
||||
}
|
||||
},
|
||||
"analyzer": {
|
||||
"path_analyzer": {
|
||||
"tokenizer": "path_tokenizer",
|
||||
"filter": [
|
||||
"lowercase"
|
||||
]
|
||||
},
|
||||
"tag_analyzer": {
|
||||
"tokenizer": "tag_tokenizer",
|
||||
"filter": [
|
||||
"lowercase"
|
||||
]
|
||||
},
|
||||
"case_insensitive_kw_analyzer": {
|
||||
"tokenizer": "keyword",
|
||||
"filter": [
|
||||
"lowercase"
|
||||
]
|
||||
},
|
||||
"my_nGram": {
|
||||
"tokenizer": "my_nGram_tokenizer",
|
||||
"filter": [
|
||||
"lowercase",
|
||||
"asciifolding"
|
||||
]
|
||||
},
|
||||
"content_analyzer": {
|
||||
"tokenizer": "standard",
|
||||
"filter": [
|
||||
"lowercase",
|
||||
"asciifolding"
|
||||
]
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -14,4 +14,4 @@ rm -rf CMakeFiles CMakeCache.txt
|
||||
cmake -DSIST_PLATFORM=arm64_linux -DSIST_DEBUG=on -DBUILD_TESTS=off -DCMAKE_TOOLCHAIN_FILE="${VCPKG_ROOT}/scripts/buildsystems/vcpkg.cmake" .
|
||||
make -j $(nproc)
|
||||
strip sist2
|
||||
mv sist2 sist2-arm64-linux-debug
|
||||
mv sist2_debug sist2-arm64-linux-debug
|
||||
@@ -3,6 +3,7 @@ import json
|
||||
files = [
|
||||
"schema/mappings.json",
|
||||
"schema/settings.json",
|
||||
"schema/settings_legacy.json",
|
||||
"schema/pipeline.json",
|
||||
]
|
||||
|
||||
|
||||
@@ -22,6 +22,7 @@ application/java-archive, jar
|
||||
application/java, class
|
||||
application/javascript,
|
||||
application/json, json
|
||||
application/ndjson, jsonl|ndjson
|
||||
application/marc, mrc
|
||||
application/mbedlet, mbd
|
||||
application/mime, aps
|
||||
@@ -78,9 +79,7 @@ application/vocaltec-media-desc, vmd
|
||||
application/vocaltec-media-file, vmf
|
||||
application/warc, warc
|
||||
application/winhelp, hlp
|
||||
application/wordperfect6.0, w60
|
||||
application/wordperfect6.1, w61
|
||||
application/wordperfect, wp|wp5|wp6|wpd
|
||||
application/wordperfect, wp|wp5|wp6|wpd|w60|w61
|
||||
application/x-123, wk1
|
||||
application/x-7z-compressed, 7z
|
||||
application/x-aim, aim
|
||||
|
||||
|
2
sist2-vue/dist/css/chunk-vendors.css
vendored
2
sist2-vue/dist/css/chunk-vendors.css
vendored
File diff suppressed because one or more lines are too long
2
sist2-vue/dist/css/index.css
vendored
2
sist2-vue/dist/css/index.css
vendored
File diff suppressed because one or more lines are too long
6
sist2-vue/dist/js/chunk-vendors.js
vendored
6
sist2-vue/dist/js/chunk-vendors.js
vendored
File diff suppressed because one or more lines are too long
2
sist2-vue/dist/js/index.js
vendored
2
sist2-vue/dist/js/index.js
vendored
File diff suppressed because one or more lines are too long
15
sist2-vue/package-lock.json
generated
15
sist2-vue/package-lock.json
generated
@@ -23,7 +23,6 @@
|
||||
"vue-color": "^2.8.1",
|
||||
"vue-i18n": "^8.24.4",
|
||||
"vue-masonry-wall": "^0.3.2",
|
||||
"vue-multiselect": "^2.1.6",
|
||||
"vue-router": "^3.2.0",
|
||||
"vue-simple-suggest": "^1.11.1",
|
||||
"vuex": "^3.4.0"
|
||||
@@ -13604,15 +13603,6 @@
|
||||
"node": ">=10"
|
||||
}
|
||||
},
|
||||
"node_modules/vue-multiselect": {
|
||||
"version": "2.1.6",
|
||||
"resolved": "https://registry.npmjs.org/vue-multiselect/-/vue-multiselect-2.1.6.tgz",
|
||||
"integrity": "sha512-s7jmZPlm9FeueJg1RwJtnE9KNPtME/7C8uRWSfp9/yEN4M8XcS/d+bddoyVwVnvFyRh9msFo0HWeW0vTL8Qv+w==",
|
||||
"engines": {
|
||||
"node": ">= 4.0.0",
|
||||
"npm": ">= 3.0.0"
|
||||
}
|
||||
},
|
||||
"node_modules/vue-observe-visibility": {
|
||||
"version": "0.4.6",
|
||||
"resolved": "https://registry.npmjs.org/vue-observe-visibility/-/vue-observe-visibility-0.4.6.tgz",
|
||||
@@ -26376,11 +26366,6 @@
|
||||
"vue-observe-visibility": "^0.4.6"
|
||||
}
|
||||
},
|
||||
"vue-multiselect": {
|
||||
"version": "2.1.6",
|
||||
"resolved": "https://registry.npmjs.org/vue-multiselect/-/vue-multiselect-2.1.6.tgz",
|
||||
"integrity": "sha512-s7jmZPlm9FeueJg1RwJtnE9KNPtME/7C8uRWSfp9/yEN4M8XcS/d+bddoyVwVnvFyRh9msFo0HWeW0vTL8Qv+w=="
|
||||
},
|
||||
"vue-observe-visibility": {
|
||||
"version": "0.4.6",
|
||||
"resolved": "https://registry.npmjs.org/vue-observe-visibility/-/vue-observe-visibility-0.4.6.tgz",
|
||||
|
||||
@@ -22,7 +22,6 @@
|
||||
"vue-color": "^2.8.1",
|
||||
"vue-i18n": "^8.24.4",
|
||||
"vue-masonry-wall": "^0.3.2",
|
||||
"vue-multiselect": "^2.1.6",
|
||||
"vue-router": "^3.2.0",
|
||||
"vue-simple-suggest": "^1.11.1",
|
||||
"vuex": "^3.4.0"
|
||||
|
||||
@@ -50,6 +50,8 @@ export interface EsHit {
|
||||
height: number
|
||||
duration: number
|
||||
tag: string[]
|
||||
checksum: string
|
||||
thumbnail: string
|
||||
}
|
||||
_props: {
|
||||
isSubDocument: boolean
|
||||
@@ -60,6 +62,8 @@ export interface EsHit {
|
||||
isPlayableImage: boolean
|
||||
isAudio: boolean
|
||||
hasThumbnail: boolean
|
||||
tnW: number
|
||||
tnH: number
|
||||
}
|
||||
highlight: {
|
||||
name: string[] | undefined,
|
||||
@@ -130,6 +134,8 @@ class Sist2Api {
|
||||
|
||||
if ("thumbnail" in hit._source) {
|
||||
hit._props.hasThumbnail = true;
|
||||
hit._props.tnW = Number(hit._source.thumbnail.split(",")[0]);
|
||||
hit._props.tnH = Number(hit._source.thumbnail.split(",")[1]);
|
||||
}
|
||||
|
||||
switch (mimeCategory) {
|
||||
|
||||
@@ -43,6 +43,20 @@ const SORT_MODES = {
|
||||
{_tie: {order: "asc"}}
|
||||
],
|
||||
key: (hit: EsHit) => hit._source.size
|
||||
},
|
||||
nameAsc: {
|
||||
mode: [
|
||||
{name: {order: "asc"}},
|
||||
{_tie: {order: "asc"}}
|
||||
],
|
||||
key: (hit: EsHit) => hit._source.name
|
||||
},
|
||||
nameDesc: {
|
||||
mode: [
|
||||
{name: {order: "desc"}},
|
||||
{_tie: {order: "asc"}}
|
||||
],
|
||||
key: (hit: EsHit) => hit._source.name
|
||||
}
|
||||
} as any;
|
||||
|
||||
@@ -73,6 +87,8 @@ class Sist2Query {
|
||||
const selectedMimeTypes = getters.selectedMimeTypes;
|
||||
const selectedTags = getters.selectedTags;
|
||||
|
||||
const legacyES = store.state.sist2Info.esVersionLegacy;
|
||||
|
||||
const filters = [
|
||||
{terms: {index: selectedIndexIds}}
|
||||
] as any[];
|
||||
@@ -189,6 +205,11 @@ class Sist2Query {
|
||||
font_name: {},
|
||||
}
|
||||
};
|
||||
|
||||
if (!legacyES) {
|
||||
q.highlight.max_analyzed_offset = 9_999_999;
|
||||
}
|
||||
|
||||
if (getters.optSearchInPath) {
|
||||
q.highlight.fields["path.text"] = {};
|
||||
q.highlight.fields["path.nGram"] = {};
|
||||
|
||||
@@ -5,7 +5,6 @@
|
||||
|
||||
<b-card-body>
|
||||
|
||||
<!-- TODO: ES connectivity, Link to GH page -->
|
||||
<b-table :items="tableItems" small borderless responsive="md" thead-class="hidden" class="mb-0"></b-table>
|
||||
|
||||
<hr />
|
||||
@@ -32,6 +31,9 @@ export default {
|
||||
{key: "esIndex", value: this.$store.state.sist2Info.esIndex},
|
||||
{key: "tagline", value: this.$store.state.sist2Info.tagline},
|
||||
{key: "dev", value: this.$store.state.sist2Info.dev},
|
||||
{key: "esVersion", value: this.$store.state.sist2Info.esVersion},
|
||||
{key: "esVersionSupported", value: this.$store.state.sist2Info.esVersionSupported},
|
||||
{key: "esVersionLegacy", value: this.$store.state.sist2Info.esVersionLegacy},
|
||||
]
|
||||
}
|
||||
}
|
||||
|
||||
@@ -15,11 +15,15 @@
|
||||
<span class="badge badge-resolution">{{ humanTime(doc._source.duration) }}</span>
|
||||
</div>
|
||||
|
||||
<div v-if="doc._props.isImage && !hover" class="card-img-overlay" :class="{'small-badge': smallBadge}">
|
||||
<div
|
||||
v-if="doc._props.isImage && !hover && doc._props.tnW / doc._props.tnH < 5"
|
||||
class="card-img-overlay"
|
||||
:class="{'small-badge': smallBadge}">
|
||||
<span class="badge badge-resolution">{{ `${doc._source.width}x${doc._source.height}` }}</span>
|
||||
</div>
|
||||
|
||||
<div v-if="(doc._props.isVideo || doc._props.isGif) && doc._source.duration > 0 && !hover" class="card-img-overlay"
|
||||
<div v-if="(doc._props.isVideo || doc._props.isGif) && doc._source.duration > 0 && !hover"
|
||||
class="card-img-overlay"
|
||||
:class="{'small-badge': smallBadge}">
|
||||
<span class="badge badge-resolution">{{ humanTime(doc._source.duration) }}</span>
|
||||
</div>
|
||||
@@ -39,7 +43,8 @@
|
||||
</div>
|
||||
|
||||
<!-- Audio player-->
|
||||
<audio v-if="doc._props.isAudio" ref="audio" preload="none" class="audio-fit fit" controls :type="doc._source.mime"
|
||||
<audio v-if="doc._props.isAudio" ref="audio" preload="none" class="audio-fit fit" controls
|
||||
:type="doc._source.mime"
|
||||
:src="`f/${doc._id}`"
|
||||
@play="onAudioPlay()"></audio>
|
||||
|
||||
|
||||
@@ -4,7 +4,8 @@
|
||||
<template #modal-title>
|
||||
<h5 class="modal-title" :title="doc._source.name + ext(doc)">{{ doc._source.name + ext(doc) }}</h5>
|
||||
</template>
|
||||
<img :src="`t/${doc._source.index}/${doc._id}`" alt="" class="fit card-img-top">
|
||||
|
||||
<img v-if="doc._props.hasThumbnail" :src="`t/${doc._source.index}/${doc._id}`" alt="" class="fit card-img-top">
|
||||
|
||||
<InfoTable :doc="doc"></InfoTable>
|
||||
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
<template>
|
||||
<b-list-group-item class="flex-column align-items-start mb-2">
|
||||
<b-list-group-item class="flex-column align-items-start mb-2" :class="{'sub-document': doc._props.isSubDocument}">
|
||||
|
||||
<!-- Info modal-->
|
||||
<DocInfoModal :show="showInfo" :doc="doc" @close="showInfo = false"></DocInfoModal>
|
||||
@@ -40,9 +40,11 @@
|
||||
</div>
|
||||
|
||||
<div v-if="doc._source.pages || doc._source.author" class="path-row text-muted">
|
||||
<span v-if="doc._source.pages">{{ doc._source.pages }} {{ doc._source.pages > 1 ? $t("pages") : $t("page") }}</span>
|
||||
<span v-if="doc._source.pages">{{ doc._source.pages }} {{
|
||||
doc._source.pages > 1 ? $t("pages") : $t("page")
|
||||
}}</span>
|
||||
<span v-if="doc._source.author && doc._source.pages" class="mx-1">-</span>
|
||||
<span v-if="doc._source.author">{{doc._source.author}}</span>
|
||||
<span v-if="doc._source.author">{{ doc._source.author }}</span>
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
@@ -89,6 +91,14 @@ export default {
|
||||
</script>
|
||||
|
||||
<style scoped>
|
||||
.sub-document {
|
||||
background: #AB47BC1F !important;
|
||||
}
|
||||
|
||||
.theme-black .sub-document {
|
||||
background: #37474F !important;
|
||||
}
|
||||
|
||||
.list-group {
|
||||
margin-top: 1em;
|
||||
}
|
||||
|
||||
@@ -1,93 +1,171 @@
|
||||
<template>
|
||||
<VueMultiselect
|
||||
multiple
|
||||
label="name"
|
||||
:value="selectedIndices"
|
||||
:options="indices"
|
||||
:close-on-select="indices.length <= 1"
|
||||
:placeholder="$t('indexPickerPlaceholder')"
|
||||
@select="addItem"
|
||||
@remove="removeItem">
|
||||
<div v-if="isMobile">
|
||||
<b-form-select
|
||||
:value="selectedIndicesIds"
|
||||
@change="onSelect($event)"
|
||||
:options="indices" multiple :select-size="6" text-field="name"
|
||||
value-field="id"></b-form-select>
|
||||
</div>
|
||||
<div v-else>
|
||||
|
||||
<template slot="option" slot-scope="idx">
|
||||
<b-row>
|
||||
<b-col>
|
||||
<span class="mr-1">{{ idx.option.name }}</span>
|
||||
<SmallBadge pill :text="idx.option.version"></SmallBadge>
|
||||
</b-col>
|
||||
</b-row>
|
||||
<b-row class="mt-1">
|
||||
<b-col>
|
||||
<span>{{ formatIdxDate(idx.option.timestamp) }}</span>
|
||||
</b-col>
|
||||
</b-row>
|
||||
</template>
|
||||
<div class="d-flex justify-content-between align-content-center">
|
||||
<span>
|
||||
{{ selectedIndices.length }}
|
||||
{{ selectedIndices.length === 1 ? $t("indexPicker.selectedIndex") : $t("indexPicker.selectedIndices") }}
|
||||
</span>
|
||||
|
||||
</VueMultiselect>
|
||||
<div>
|
||||
<b-button variant="link" @click="selectAll()"> {{ $t("indexPicker.selectAll") }}</b-button>
|
||||
<b-button variant="link" @click="selectNone()"> {{ $t("indexPicker.selectNone") }}</b-button>
|
||||
</div>
|
||||
</div>
|
||||
|
||||
<b-list-group id="index-picker-desktop" class="unselectable">
|
||||
<b-list-group-item
|
||||
v-for="idx in indices"
|
||||
@click="toggleIndex(idx, $event)"
|
||||
@click.shift="shiftClick(idx, $event)"
|
||||
class="d-flex justify-content-between align-items-center list-group-item-action pointer"
|
||||
:class="{active: lastClickIndex === idx}"
|
||||
>
|
||||
<div class="d-flex">
|
||||
<b-checkbox @change="toggleIndex(idx)" :checked="isSelected(idx)"></b-checkbox>
|
||||
{{ idx.name }}
|
||||
<span class="text-muted timestamp-text ml-2">{{ formatIdxDate(idx.timestamp) }}</span>
|
||||
</div>
|
||||
<b-badge class="version-badge">v{{ idx.version }}</b-badge>
|
||||
</b-list-group-item>
|
||||
</b-list-group>
|
||||
</div>
|
||||
</template>
|
||||
|
||||
<script lang="ts">
|
||||
import VueMultiselect from "vue-multiselect"
|
||||
import SmallBadge from "./SmallBadge.vue"
|
||||
import {mapActions, mapGetters} from "vuex";
|
||||
import {Index} from "@/Sist2Api";
|
||||
import Vue from "vue";
|
||||
import {format} from "date-fns";
|
||||
|
||||
export default Vue.extend({
|
||||
components: {
|
||||
VueMultiselect,
|
||||
SmallBadge
|
||||
},
|
||||
data() {
|
||||
return {
|
||||
loading: true
|
||||
loading: true,
|
||||
lastClickIndex: null
|
||||
}
|
||||
},
|
||||
computed: {
|
||||
...mapGetters([
|
||||
"indices", "selectedIndices"
|
||||
]),
|
||||
selectedIndicesIds() {
|
||||
return this.selectedIndices.map(idx => idx.id)
|
||||
},
|
||||
isMobile() {
|
||||
return window.innerWidth <= 650;
|
||||
}
|
||||
},
|
||||
methods: {
|
||||
...mapActions({
|
||||
setSelectedIndices: "setSelectedIndices"
|
||||
}),
|
||||
removeItem(val: Index): void {
|
||||
this.setSelectedIndices(this.selectedIndices.filter((item: Index) => item !== val))
|
||||
shiftClick(index, e) {
|
||||
if (this.lastClickIndex === null) {
|
||||
return;
|
||||
}
|
||||
|
||||
const select = this.isSelected(this.lastClickIndex);
|
||||
|
||||
let leftBoundary = this.indices.indexOf(this.lastClickIndex);
|
||||
let rightBoundary = this.indices.indexOf(index);
|
||||
|
||||
if (rightBoundary < leftBoundary) {
|
||||
let tmp = leftBoundary;
|
||||
leftBoundary = rightBoundary;
|
||||
rightBoundary = tmp;
|
||||
}
|
||||
|
||||
for (let i = leftBoundary; i <= rightBoundary; i++) {
|
||||
if (select) {
|
||||
if (!this.isSelected(this.indices[i])) {
|
||||
this.setSelectedIndices([this.indices[i], ...this.selectedIndices]);
|
||||
}
|
||||
} else {
|
||||
this.setSelectedIndices(this.selectedIndices.filter(idx => idx !== this.indices[i]));
|
||||
}
|
||||
}
|
||||
},
|
||||
addItem(val: Index): void {
|
||||
this.setSelectedIndices([...this.selectedIndices, val])
|
||||
selectAll() {
|
||||
this.setSelectedIndices(this.indices);
|
||||
},
|
||||
selectNone() {
|
||||
this.setSelectedIndices([]);
|
||||
},
|
||||
onSelect(value) {
|
||||
this.setSelectedIndices(this.indices.filter(idx => value.includes(idx.id)));
|
||||
},
|
||||
formatIdxDate(timestamp: number): string {
|
||||
return format(new Date(timestamp * 1000), "yyyy-MM-dd");
|
||||
},
|
||||
toggleIndex(index, e) {
|
||||
if (e.shiftKey) {
|
||||
return;
|
||||
}
|
||||
|
||||
this.lastClickIndex = index;
|
||||
if (this.isSelected(index)) {
|
||||
this.setSelectedIndices(this.selectedIndices.filter(idx => idx.id != index.id));
|
||||
} else {
|
||||
this.setSelectedIndices([index, ...this.selectedIndices]);
|
||||
}
|
||||
},
|
||||
isSelected(index) {
|
||||
return this.selectedIndices.find(idx => idx.id == index.id) != null;
|
||||
}
|
||||
},
|
||||
})
|
||||
</script>
|
||||
|
||||
<style src="vue-multiselect/dist/vue-multiselect.min.css"></style>
|
||||
|
||||
<style>
|
||||
.multiselect__option {
|
||||
padding: 5px 10px;
|
||||
<style scoped>
|
||||
.timestamp-text {
|
||||
line-height: 24px;
|
||||
font-size: 80%;
|
||||
}
|
||||
|
||||
.multiselect__content-wrapper {
|
||||
overflow: hidden;
|
||||
.theme-black .version-badge {
|
||||
color: #eee !important;
|
||||
background: none;
|
||||
}
|
||||
|
||||
.theme-black .multiselect__tags {
|
||||
background: #37474F;
|
||||
border: 1px solid #616161 !important
|
||||
.version-badge {
|
||||
color: #222 !important;
|
||||
background: none;
|
||||
}
|
||||
|
||||
.theme-black .multiselect__input {
|
||||
color: #dbdbdb;
|
||||
background: #37474F;
|
||||
.list-group-item {
|
||||
padding: 0.2em 0.4em;
|
||||
}
|
||||
|
||||
.theme-black .multiselect__content-wrapper {
|
||||
border: none
|
||||
#index-picker-desktop {
|
||||
overflow-y: auto;
|
||||
max-height: 132px;
|
||||
}
|
||||
|
||||
.btn-link:focus {
|
||||
box-shadow: none;
|
||||
}
|
||||
|
||||
.unselectable {
|
||||
user-select: none;
|
||||
-ms-user-select: none;
|
||||
-moz-user-select: none;
|
||||
-webkit-user-select: none;
|
||||
}
|
||||
|
||||
.list-group-item.active {
|
||||
z-index: 2;
|
||||
background-color: inherit;
|
||||
color: inherit;
|
||||
}
|
||||
</style>
|
||||
@@ -3,7 +3,7 @@
|
||||
|
||||
<template #cell(value)="data">
|
||||
<span v-if="'html' in data.item" v-html="data.item.html"></span>
|
||||
<span v-else>{{data.value}}</span>
|
||||
<span v-else>{{ data.value }}</span>
|
||||
</template>
|
||||
</b-table>
|
||||
</template>
|
||||
@@ -57,7 +57,8 @@ export default {
|
||||
"bitrate", "artist", "album", "album_artist", "genre", "font_name", "author",
|
||||
"modified_by", "pages", "tag",
|
||||
"exif_make", "exif_software", "exif_exposure_time", "exif_fnumber", "exif_focal_length",
|
||||
"exif_user_comment", "exif_iso_speed_ratings", "exif_model", "exif_datetime",
|
||||
"exif_user_comment", "exif_iso_speed_ratings", "exif_model", "exif_datetime",
|
||||
"checksum"
|
||||
];
|
||||
|
||||
fields.forEach(field => {
|
||||
@@ -76,9 +77,9 @@ export default {
|
||||
items.push({
|
||||
key: "Exif GPS",
|
||||
html: makeGpsLink(
|
||||
dmsToDecimal(src["exif_gps_latitude_dms"], src["exif_gps_latitude_ref"]),
|
||||
dmsToDecimal(src["exif_gps_longitude_dms"], src["exif_gps_longitude_ref"]),
|
||||
),
|
||||
dmsToDecimal(src["exif_gps_latitude_dms"], src["exif_gps_latitude_ref"]),
|
||||
dmsToDecimal(src["exif_gps_longitude_dms"], src["exif_gps_longitude_ref"]),
|
||||
),
|
||||
});
|
||||
}
|
||||
|
||||
|
||||
@@ -21,6 +21,9 @@ export default {
|
||||
if (mutation.type === "setUiMimeMap") {
|
||||
const mimeMap = mutation.payload.slice();
|
||||
|
||||
const elem = document.getElementById("mimeTree");
|
||||
console.log(elem);
|
||||
|
||||
this.mimeTree = new InspireTree({
|
||||
selection: {
|
||||
mode: 'checkbox'
|
||||
@@ -43,7 +46,7 @@ export default {
|
||||
},
|
||||
methods: {
|
||||
handleTreeClick(node, e) {
|
||||
if (e === "indeterminate" || e === "collapsed") {
|
||||
if (e === "indeterminate" || e === "collapsed" || e === 'rendered' || e === "focused") {
|
||||
return;
|
||||
}
|
||||
|
||||
|
||||
@@ -8,7 +8,8 @@
|
||||
</b-navbar-brand>
|
||||
|
||||
<span class="badge badge-pill version" v-if="$store && $store.state.sist2Info">
|
||||
{{ sist2Version() }}<span v-if="isDebug()">-dbg</span>
|
||||
v{{ sist2Version() }}<span v-if="isDebug()">-dbg</span><span v-if="isLegacy() && !hideLegacy()">-<a
|
||||
href="https://github.com/simon987/sist2/blob/master/docs/USAGE.md#elasticsearch" target="_blank">legacyES</a></span>
|
||||
</span>
|
||||
|
||||
<span v-if="$store && $store.state.sist2Info" class="tagline" v-html="tagline()"></span>
|
||||
@@ -20,6 +21,7 @@
|
||||
|
||||
<script>
|
||||
import Sist2Icon from "@/components/Sist2Icon";
|
||||
|
||||
export default {
|
||||
name: "NavBar",
|
||||
components: {Sist2Icon},
|
||||
@@ -32,6 +34,12 @@ export default {
|
||||
},
|
||||
isDebug() {
|
||||
return this.$store.state.sist2Info.debug;
|
||||
},
|
||||
isLegacy() {
|
||||
return this.$store.state.sist2Info.esVersionLegacy;
|
||||
},
|
||||
hideLegacy() {
|
||||
return this.$store.state.optHideLegacy;
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -95,7 +103,7 @@ export default {
|
||||
}
|
||||
}
|
||||
|
||||
.theme-light .btn-link{
|
||||
.theme-light .btn-link {
|
||||
color: #222;
|
||||
}
|
||||
</style>
|
||||
@@ -5,9 +5,11 @@
|
||||
<div style="float: right">
|
||||
<b-button v-b-toggle.collapse-1 variant="primary" class="not-mobile">{{ $t("details") }}</b-button>
|
||||
|
||||
<SortSelect class="ml-2"></SortSelect>
|
||||
<template v-if="hitCount !== 0">
|
||||
<SortSelect class="ml-2"></SortSelect>
|
||||
|
||||
<DisplayModeToggle class="ml-2"></DisplayModeToggle>
|
||||
<DisplayModeToggle class="ml-2"></DisplayModeToggle>
|
||||
</template>
|
||||
</div>
|
||||
|
||||
<b-collapse id="collapse-1" class="pt-2" style="clear:both;">
|
||||
@@ -21,7 +23,7 @@
|
||||
<script lang="ts">
|
||||
import {EsResult} from "@/Sist2Api";
|
||||
import Vue from "vue";
|
||||
import {humanFileSize, humanTime} from "@/util";
|
||||
import {humanFileSize} from "@/util";
|
||||
import DisplayModeToggle from "@/components/DisplayModeToggle.vue";
|
||||
import SortSelect from "@/components/SortSelect.vue";
|
||||
|
||||
|
||||
@@ -19,6 +19,14 @@
|
||||
{{ $t("sort.sizeDesc") }}
|
||||
</b-dropdown-item>
|
||||
|
||||
<b-dropdown-item :class="{'dropdown-active': sort === 'nameDesc'}" @click="onSelect('nameDesc')">
|
||||
{{ $t("sort.nameDesc") }}
|
||||
</b-dropdown-item>
|
||||
|
||||
<b-dropdown-item :class="{'dropdown-active': sort === 'nameAsc'}" @click="onSelect('nameAsc')">
|
||||
{{ $t("sort.nameAsc") }}
|
||||
</b-dropdown-item>
|
||||
|
||||
<b-dropdown-item :class="{'dropdown-active': sort === 'random'}" @click="onSelect('random')">
|
||||
{{ $t("sort.random") }}
|
||||
</b-dropdown-item>
|
||||
|
||||
@@ -162,7 +162,7 @@ export default {
|
||||
});
|
||||
},
|
||||
handleTreeClick(node, e) {
|
||||
if (e === "indeterminate" || e === "collapsed" || e === 'rendered') {
|
||||
if (e === "indeterminate" || e === "collapsed" || e === 'rendered' || e === "focused") {
|
||||
return;
|
||||
}
|
||||
|
||||
|
||||
@@ -62,7 +62,9 @@ export default {
|
||||
lightboxLoadOnlyCurrent: "Do not preload full-size images for adjacent slides in image viewer.",
|
||||
slideDuration: "Slide duration",
|
||||
resultSize: "Number of results per page",
|
||||
tagOrOperator: "Use OR operator when specifying multiple tags."
|
||||
tagOrOperator: "Use OR operator when specifying multiple tags.",
|
||||
hideDuplicates: "Hide duplicate results based on checksum",
|
||||
hideLegacy: "Hide the 'legacyES' Elasticsearch notice"
|
||||
},
|
||||
queryMode: {
|
||||
simple: "Simple",
|
||||
@@ -70,7 +72,8 @@ export default {
|
||||
},
|
||||
lang: {
|
||||
en: "English",
|
||||
fr: "Français"
|
||||
fr: "Français",
|
||||
"zh-CN": "简体中文",
|
||||
},
|
||||
displayMode: {
|
||||
grid: "Grid",
|
||||
@@ -129,13 +132,14 @@ export default {
|
||||
saveTagModalTitle: "Add tag",
|
||||
saveTagPlaceholder: "Tag name",
|
||||
confirm: "Confirm",
|
||||
indexPickerPlaceholder: "Select indices",
|
||||
sort: {
|
||||
relevance: "Relevance",
|
||||
dateAsc: "Date (Older first)",
|
||||
dateDesc: "Date (Newer first)",
|
||||
sizeAsc: "Size (Smaller first)",
|
||||
sizeDesc: "Size (Larger first)",
|
||||
nameAsc: "Name (A-z)",
|
||||
nameDesc: "Name (Z-a)",
|
||||
random: "Random",
|
||||
},
|
||||
d3: {
|
||||
@@ -143,7 +147,13 @@ export default {
|
||||
mimeSize: "Size distribution by media type",
|
||||
dateHistogram: "File modification time distribution",
|
||||
sizeHistogram: "File size distribution",
|
||||
}
|
||||
},
|
||||
indexPicker: {
|
||||
selectNone: "Select None",
|
||||
selectAll: "Select All",
|
||||
selectedIndex: "selected index",
|
||||
selectedIndices: "selected indices",
|
||||
},
|
||||
},
|
||||
fr: {
|
||||
searchBar: {
|
||||
@@ -209,7 +219,9 @@ export default {
|
||||
lightboxLoadOnlyCurrent: "Désactiver le chargement des diapositives adjacentes pour le visualiseur d'images",
|
||||
slideDuration: "Durée des diapositives",
|
||||
resultSize: "Nombre de résultats par page",
|
||||
tagOrOperator: "Utiliser l'opérateur OU lors de la spécification de plusieurs tags"
|
||||
tagOrOperator: "Utiliser l'opérateur OU lors de la spécification de plusieurs tags",
|
||||
hideDuplicates: "Masquer les résultats en double",
|
||||
hideLegacy: "Masquer la notice 'legacyES' Elasticsearch"
|
||||
},
|
||||
queryMode: {
|
||||
simple: "Simple",
|
||||
@@ -217,7 +229,8 @@ export default {
|
||||
},
|
||||
lang: {
|
||||
en: "English",
|
||||
fr: "Français"
|
||||
fr: "Français",
|
||||
"zh-CN": "简体中文",
|
||||
},
|
||||
displayMode: {
|
||||
grid: "Grille",
|
||||
@@ -284,6 +297,8 @@ export default {
|
||||
dateDesc: "Date (Plus récent)",
|
||||
sizeAsc: "Taille (Plus petit)",
|
||||
sizeDesc: "Taille (Plus grand)",
|
||||
nameAsc: "Nom (A-z)",
|
||||
nameDesc: "Nom (Z-a)",
|
||||
random: "Aléatoire",
|
||||
},
|
||||
d3: {
|
||||
@@ -291,6 +306,168 @@ export default {
|
||||
mimeSize: "Distribution des tailles de fichiers par type de média",
|
||||
dateHistogram: "Distribution des dates de modification",
|
||||
sizeHistogram: "Distribution des tailles de fichier",
|
||||
}
|
||||
}
|
||||
}
|
||||
},
|
||||
indexPicker: {
|
||||
selectNone: "Sélectionner aucun",
|
||||
selectAll: "Sélectionner tout",
|
||||
selectedIndex: "indice sélectionné",
|
||||
selectedIndices: "indices sélectionnés",
|
||||
},
|
||||
},
|
||||
"zh-CN": {
|
||||
searchBar: {
|
||||
simple: "搜索",
|
||||
advanced: "高级搜索",
|
||||
fuzzy: "模糊搜索"
|
||||
},
|
||||
download: "下载",
|
||||
and: "与",
|
||||
page: "页",
|
||||
pages: "页",
|
||||
mimeTypes: "文件类型",
|
||||
tags: "标签",
|
||||
help: {
|
||||
simpleSearch: "简易搜索",
|
||||
advancedSearch: "高级搜索",
|
||||
help: "帮助",
|
||||
term: "<关键词>",
|
||||
and: "与操作",
|
||||
or: "或操作",
|
||||
not: "反选单个关键词",
|
||||
quotes: "括起来的部分视为一个关键词,保序",
|
||||
prefix: "在词尾使用时,匹配前缀",
|
||||
parens: "表达式编组",
|
||||
tildeTerm: "匹配编辑距离以内的关键词",
|
||||
tildePhrase: "匹配短语,容忍一些非匹配词",
|
||||
example1:
|
||||
"例如: <code>\"番茄\" +(炒蛋 | 牛腩) -饭</code> 将匹配" +
|
||||
"短语 <i>番茄炒蛋</i>、<i>炒蛋</i> 或者 <i>牛腩</i>,而忽略任何带有" +
|
||||
"<i>饭</i>的关键词.",
|
||||
defaultOperator:
|
||||
"表达式中无<code>+</code>或者<code>|</code>时,默认使用" +
|
||||
"<code>+</code>(与操作)。",
|
||||
fuzzy:
|
||||
"选中<b>模糊搜索</b>选项时,返回部分匹配的结果(3-grams)。",
|
||||
moreInfoSimple: "详细信息:<a target=\"_blank\" " +
|
||||
"rel=\"noreferrer\" href=\"//www.elastic.co/guide/en/elasticsearch/reference/current/query-dsl-simple-query-string-query.html\">Elasticsearch文档</a>",
|
||||
moreInfoAdvanced: "高级搜索模式文档:<a target=\"_blank\" rel=\"noreferrer\" href=\"//www.elastic.co/guide/en/elasticsearch/reference/current/query-dsl-query-string-query.html#query-string-syntax\">Elasticsearch文档</a>"
|
||||
},
|
||||
config: "配置",
|
||||
configDescription: "配置在此浏览器中实时保存。",
|
||||
configReset: "重置所有设置",
|
||||
searchOptions: "搜索选项",
|
||||
treemapOptions: "树状图选项",
|
||||
displayOptions: "显示选项",
|
||||
opt: {
|
||||
lang: "语言",
|
||||
highlight: "启用高亮",
|
||||
fuzzy: "默认使用模糊搜索",
|
||||
searchInPath: "匹配文档路径",
|
||||
suggestPath: "搜索框启用自动补全",
|
||||
fragmentSize: "高亮上下文大小",
|
||||
queryMode: "搜索模式",
|
||||
displayMode: "显示",
|
||||
columns: "列数",
|
||||
treemapType: "树状图类属性",
|
||||
treemapTiling: "树状图平铺",
|
||||
treemapColorGroupingDepth: "树状图颜色编组深度(展开)",
|
||||
treemapColor: "树状图颜色(折叠)",
|
||||
treemapSize: "树状图大小",
|
||||
theme: "主题",
|
||||
lightboxLoadOnlyCurrent: "在图片查看器中,不要预读相邻的全图",
|
||||
slideDuration: "幻灯片时长",
|
||||
resultSize: "每页结果数",
|
||||
tagOrOperator: "使用或操作(OR)匹配多个标签。",
|
||||
hideDuplicates: "使用校验码隐藏重复结果",
|
||||
hideLegacy: "隐藏'legacyES' Elasticsearch 通知"
|
||||
},
|
||||
queryMode: {
|
||||
simple: "简单",
|
||||
advanced: "高级",
|
||||
},
|
||||
lang: {
|
||||
en: "English",
|
||||
fr: "Français",
|
||||
"zh-CN": "简体中文",
|
||||
},
|
||||
displayMode: {
|
||||
grid: "网格",
|
||||
list: "列表",
|
||||
},
|
||||
columns: {
|
||||
auto: "自动"
|
||||
},
|
||||
treemapType: {
|
||||
cascaded: "折叠",
|
||||
flat: "平铺(紧凑)"
|
||||
},
|
||||
treemapSize: {
|
||||
small: "小",
|
||||
medium: "中",
|
||||
large: "大",
|
||||
xLarge: "加大",
|
||||
xxLarge: "加加大",
|
||||
custom: "自订",
|
||||
},
|
||||
treemapTiling: {
|
||||
binary: "Binary",
|
||||
squarify: "Squarify",
|
||||
slice: "Slice",
|
||||
dice: "Dice",
|
||||
sliceDice: "Slice & Dice",
|
||||
},
|
||||
theme: {
|
||||
light: "亮",
|
||||
black: "暗"
|
||||
},
|
||||
hit: "命中",
|
||||
hits: "命中",
|
||||
details: "详细信息",
|
||||
stats: "统计信息",
|
||||
queryTime: "查询时间",
|
||||
totalSize: "总大小",
|
||||
pathBar: {
|
||||
placeholder: "过滤路径",
|
||||
modalTitle: "选择路径"
|
||||
},
|
||||
debug: "调试信息",
|
||||
debugDescription: "对调试除错有用的信息。 若您遇到bug或者想建议新功能,请提交新Issue到" +
|
||||
"<a href='https://github.com/simon987/sist2/issues/new/choose'>这里</a>.",
|
||||
tagline: "标签栏",
|
||||
toast: {
|
||||
esConnErrTitle: "Elasticsearch连接错误",
|
||||
esConnErr: "sist2 web 模块连接Elasticsearch出错。" +
|
||||
"查看服务日志以获取更多信息。",
|
||||
esQueryErrTitle: "查询错误",
|
||||
esQueryErr: "无法识别或执行查询,请查阅高级搜索文档。" +
|
||||
"查看服务日志以获取更多信息。",
|
||||
dupeTagTitle: "重复标签",
|
||||
dupeTag: "该标签已存在于此文档。"
|
||||
},
|
||||
saveTagModalTitle: "增加标签",
|
||||
saveTagPlaceholder: "标签名",
|
||||
confirm: "确认",
|
||||
sort: {
|
||||
relevance: "相关度",
|
||||
dateAsc: "日期(由旧到新)",
|
||||
dateDesc: "日期(由新到旧)",
|
||||
sizeAsc: "大小(从小到大)",
|
||||
sizeDesc: "大小(从大到小)",
|
||||
nameAsc: "名字(A-z)",
|
||||
nameDesc: "名字 (Z-a)",
|
||||
random: "随机",
|
||||
},
|
||||
d3: {
|
||||
mimeCount: "各类文件数量分布",
|
||||
mimeSize: "各类文件大小分布",
|
||||
dateHistogram: "文件修改时间分布",
|
||||
sizeHistogram: "文件大小分布",
|
||||
},
|
||||
indexPicker: {
|
||||
selectNone: "清空",
|
||||
selectAll: "全选",
|
||||
selectedIndex: "选中索引",
|
||||
selectedIndices: "选中索引",
|
||||
},
|
||||
},
|
||||
}
|
||||
|
||||
@@ -27,6 +27,7 @@ export default new Vuex.Store({
|
||||
size: 60,
|
||||
|
||||
optLang: "en",
|
||||
optHideDuplicates: true,
|
||||
optTheme: "light",
|
||||
optDisplay: "grid",
|
||||
|
||||
@@ -45,6 +46,7 @@ export default new Vuex.Store({
|
||||
optTreemapColor: "PuBuGn",
|
||||
optLightboxLoadOnlyCurrent: false,
|
||||
optLightboxSlideDuration: 15,
|
||||
optHideLegacy: false,
|
||||
|
||||
_onLoadSelectedIndices: [] as string[],
|
||||
_onLoadSelectedMimeTypes: [] as string[],
|
||||
@@ -79,6 +81,7 @@ export default new Vuex.Store({
|
||||
setSizeMax: (state, val) => state.sizeMax = val,
|
||||
setSist2Info: (state, val) => state.sist2Info = val,
|
||||
setSeed: (state, val) => state.seed = val,
|
||||
setOptHideDuplicates: (state, val) => state.optHideDuplicates = val,
|
||||
setOptLang: (state, val) => state.optLang = val,
|
||||
setSortMode: (state, val) => state.sortMode = val,
|
||||
setIndices: (state, val) => {
|
||||
@@ -142,6 +145,7 @@ export default new Vuex.Store({
|
||||
setOptTreemapColorGroupingDepth: (state, val) => state.optTreemapColorGroupingDepth = val,
|
||||
setOptTreemapSize: (state, val) => state.optTreemapSize = val,
|
||||
setOptTreemapColor: (state, val) => state.optTreemapColor = val,
|
||||
setOptHideLegacy: (state, val) => state.optHideLegacy = val,
|
||||
|
||||
setOptLightboxLoadOnlyCurrent: (state, val) => state.optLightboxLoadOnlyCurrent = val,
|
||||
|
||||
@@ -317,6 +321,7 @@ export default new Vuex.Store({
|
||||
uiLightboxKey: state => state.uiLightboxKey,
|
||||
uiLightboxSlide: state => state.uiLightboxSlide,
|
||||
|
||||
optHideDuplicates: state => state.optHideDuplicates,
|
||||
optLang: state => state.optLang,
|
||||
optTheme: state => state.optTheme,
|
||||
optDisplay: state => state.optDisplay,
|
||||
@@ -336,5 +341,6 @@ export default new Vuex.Store({
|
||||
optLightboxLoadOnlyCurrent: state => state.optLightboxLoadOnlyCurrent,
|
||||
optLightboxSlideDuration: state => state.optLightboxSlideDuration,
|
||||
optResultSize: state => state.size,
|
||||
optHideLegacy: state => state.optHideLegacy,
|
||||
}
|
||||
})
|
||||
@@ -19,6 +19,10 @@
|
||||
{{ $t("opt.lightboxLoadOnlyCurrent") }}
|
||||
</b-form-checkbox>
|
||||
|
||||
<b-form-checkbox :checked="optHideLegacy" @input="setOptHideLegacy">
|
||||
{{ $t("opt.hideLegacy") }}
|
||||
</b-form-checkbox>
|
||||
|
||||
<label>{{ $t("opt.lang") }}</label>
|
||||
<b-form-select :options="langOptions" :value="optLang" @input="setOptLang"></b-form-select>
|
||||
|
||||
@@ -35,6 +39,11 @@
|
||||
<br/>
|
||||
<h4>{{ $t("searchOptions") }}</h4>
|
||||
<b-card>
|
||||
<b-form-checkbox :checked="optHideDuplicates" @input="setOptHideDuplicates">{{
|
||||
$t("opt.hideDuplicates")
|
||||
}}
|
||||
</b-form-checkbox>
|
||||
|
||||
<b-form-checkbox :checked="optHighlight" @input="setOptHighlight">{{ $t("opt.highlight") }}</b-form-checkbox>
|
||||
<b-form-checkbox :checked="optTagOrOperator" @input="setOptTagOrOperator">{{
|
||||
$t("opt.tagOrOperator")
|
||||
@@ -124,6 +133,7 @@ export default {
|
||||
langOptions: [
|
||||
{value: "en", text: this.$t("lang.en")},
|
||||
{value: "fr", text: this.$t("lang.fr")},
|
||||
{value: "zh-CN", text: this.$t("lang.zh-CN")},
|
||||
],
|
||||
queryModeOptions: [
|
||||
{value: "simple", text: this.$t("queryMode.simple")},
|
||||
@@ -206,10 +216,11 @@ export default {
|
||||
"optTreemapSize",
|
||||
"optLightboxLoadOnlyCurrent",
|
||||
"optLightboxSlideDuration",
|
||||
"optContainerWidth",
|
||||
"optResultSize",
|
||||
"optTagOrOperator",
|
||||
"optLang"
|
||||
"optLang",
|
||||
"optHideDuplicates",
|
||||
"optHideLegacy",
|
||||
]),
|
||||
clientWidth() {
|
||||
return window.innerWidth;
|
||||
@@ -248,7 +259,9 @@ export default {
|
||||
"setOptContainerWidth",
|
||||
"setOptResultSize",
|
||||
"setOptTagOrOperator",
|
||||
"setOptLang"
|
||||
"setOptLang",
|
||||
"setOptHideDuplicates",
|
||||
"setOptHideLegacy"
|
||||
]),
|
||||
onResetClick() {
|
||||
localStorage.removeItem("sist2_configuration");
|
||||
|
||||
@@ -31,7 +31,7 @@
|
||||
</b-row>
|
||||
</b-col>
|
||||
<b-col>
|
||||
<b-tabs>
|
||||
<b-tabs justified>
|
||||
<b-tab :title="$t('mimeTypes')">
|
||||
<MimePicker></MimePicker>
|
||||
</b-tab>
|
||||
@@ -43,9 +43,13 @@
|
||||
</b-row>
|
||||
</b-card>
|
||||
|
||||
<Preloader v-if="searchBusy && docs.length === 0" class="mt-3"></Preloader>
|
||||
<div v-show="docs.length === 0 && !uiLoading">
|
||||
<Preloader v-if="searchBusy" class="mt-3"></Preloader>
|
||||
|
||||
<div v-else-if="docs.length > 0">
|
||||
<ResultsCard></ResultsCard>
|
||||
</div>
|
||||
|
||||
<div v-if="docs.length > 0">
|
||||
<ResultsCard></ResultsCard>
|
||||
|
||||
<DocCardWall v-if="optDisplay==='grid'" :docs="docs" :append="appendFunc"></DocCardWall>
|
||||
@@ -91,6 +95,7 @@ export default Vue.extend({
|
||||
search: undefined as any,
|
||||
docs: [] as EsHit[],
|
||||
docIds: new Set(),
|
||||
docChecksums: new Set(),
|
||||
searchBusy: false,
|
||||
Sist2Query: Sist2Query,
|
||||
showHelp: false
|
||||
@@ -108,10 +113,6 @@ export default Vue.extend({
|
||||
|
||||
}, 350, {leading: false});
|
||||
|
||||
Sist2Api.getMimeTypes().then(mimeMap => {
|
||||
this.$store.commit("setUiMimeMap", mimeMap);
|
||||
});
|
||||
|
||||
this.$store.dispatch("loadFromArgs", this.$route).then(() => {
|
||||
this.$store.subscribe(() => this.$store.dispatch("updateArgs", this.$router));
|
||||
this.$store.subscribe((mutation) => {
|
||||
@@ -137,9 +138,13 @@ export default Vue.extend({
|
||||
sist2.getSist2Info().then(data => {
|
||||
this.setSist2Info(data);
|
||||
this.setIndices(data.indices);
|
||||
this.uiLoading = false;
|
||||
|
||||
this.search(true);
|
||||
Sist2Api.getMimeTypes().then(mimeMap => {
|
||||
this.$store.commit("setUiMimeMap", mimeMap);
|
||||
this.uiLoading = false;
|
||||
this.search(true);
|
||||
});
|
||||
|
||||
}).catch(() => {
|
||||
this.showErrorToast();
|
||||
});
|
||||
@@ -193,6 +198,7 @@ export default Vue.extend({
|
||||
async clearResults() {
|
||||
this.docs = [];
|
||||
this.docIds.clear();
|
||||
this.docChecksums.clear();
|
||||
await this.$store.dispatch("clearResults");
|
||||
this.$store.commit("setUiReachedScrollEnd", false);
|
||||
},
|
||||
@@ -202,7 +208,19 @@ export default Vue.extend({
|
||||
}
|
||||
|
||||
resp.hits.hits = resp.hits.hits.filter(hit => !this.docIds.has(hit._id));
|
||||
resp.hits.hits.forEach(hit => this.docIds.add(hit._id));
|
||||
|
||||
if (this.$store.state.optHideDuplicates) {
|
||||
resp.hits.hits = resp.hits.hits.filter(hit => {
|
||||
|
||||
if (!("checksum" in hit._source)) {
|
||||
return true;
|
||||
}
|
||||
|
||||
const isDupe = !this.docChecksums.has(hit._source.checksum);
|
||||
this.docChecksums.add(hit._source.checksum);
|
||||
return isDupe;
|
||||
});
|
||||
}
|
||||
|
||||
for (const hit of resp.hits.hits) {
|
||||
if (hit._props.isPlayableImage || hit._props.isPlayableVideo) {
|
||||
|
||||
19
src/cli.c
19
src/cli.c
@@ -22,6 +22,7 @@
|
||||
const char *TESS_DATAPATHS[] = {
|
||||
"/usr/share/tessdata/",
|
||||
"/usr/share/tesseract-ocr/tessdata/",
|
||||
"/usr/share/tesseract-ocr/4.00/tessdata/",
|
||||
"./",
|
||||
NULL
|
||||
};
|
||||
@@ -218,6 +219,19 @@ int scan_args_validate(scan_args_t *args, int argc, const char **argv) {
|
||||
args->max_memory_buffer = DEFAULT_MAX_MEM_BUFFER;
|
||||
}
|
||||
|
||||
if (args->list_path != NULL) {
|
||||
if(strcmp(args->list_path, "-") == 0) {
|
||||
args->list_file = stdin;
|
||||
LOG_DEBUG("cli.c", "Using stdin as list file")
|
||||
} else {
|
||||
args->list_file = fopen(args->list_path, "r");
|
||||
|
||||
if (args->list_file == NULL) {
|
||||
LOG_FATALF("main.c", "List file could not be opened: %s (%s)", args->list_path, errno);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
LOG_DEBUGF("cli.c", "arg quality=%f", args->quality)
|
||||
LOG_DEBUGF("cli.c", "arg size=%d", args->size)
|
||||
LOG_DEBUGF("cli.c", "arg content_size=%d", args->content_size)
|
||||
@@ -237,6 +251,7 @@ int scan_args_validate(scan_args_t *args, int argc, const char **argv) {
|
||||
LOG_DEBUGF("cli.c", "arg fast_epub=%d", args->fast_epub)
|
||||
LOG_DEBUGF("cli.c", "arg treemap_threshold=%f", args->treemap_threshold)
|
||||
LOG_DEBUGF("cli.c", "arg max_memory_buffer=%d", args->max_memory_buffer)
|
||||
LOG_DEBUGF("cli.c", "arg list_path=%s", args->list_path)
|
||||
|
||||
return 0;
|
||||
}
|
||||
@@ -366,6 +381,10 @@ int web_args_validate(web_args_t *args, int argc, const char **argv) {
|
||||
args->lang = DEFAULT_LANG;
|
||||
}
|
||||
|
||||
if (args->tagline == NULL) {
|
||||
args->tagline = DEFAULT_TAGLINE;
|
||||
}
|
||||
|
||||
if (strlen(args->lang) != 2) {
|
||||
fprintf(stderr, "Invalid --lang value, see usage\n");
|
||||
return 1;
|
||||
|
||||
@@ -28,6 +28,9 @@ typedef struct scan_args {
|
||||
int max_memory_buffer;
|
||||
int read_subtitles;
|
||||
int fast_epub;
|
||||
int calculate_checksums;
|
||||
char *list_path;
|
||||
FILE *list_file;
|
||||
} scan_args_t;
|
||||
|
||||
scan_args_t *scan_args_create();
|
||||
|
||||
@@ -2,6 +2,9 @@
|
||||
|
||||
ScanCtx_t ScanCtx = {
|
||||
.stat_index_size = 0,
|
||||
.stat_tn_size = 0,
|
||||
.dbg_current_files = NULL,
|
||||
.pool = NULL
|
||||
};
|
||||
WebCtx_t WebCtx;
|
||||
IndexCtx_t IndexCtx;
|
||||
|
||||
@@ -14,7 +14,10 @@
|
||||
#include "libscan/mobi/scan_mobi.h"
|
||||
#include "libscan/raw/raw.h"
|
||||
#include "libscan/msdoc/msdoc.h"
|
||||
#include "libscan/wpd/wpd.h"
|
||||
#include "libscan/json/json.h"
|
||||
#include "src/io/store.h"
|
||||
#include "src/index/elastic.h"
|
||||
|
||||
#include <glib.h>
|
||||
#include <pcre.h>
|
||||
@@ -31,12 +34,14 @@ typedef struct {
|
||||
|
||||
int threads;
|
||||
int depth;
|
||||
int calculate_checksums;
|
||||
|
||||
size_t stat_tn_size;
|
||||
size_t stat_index_size;
|
||||
|
||||
GHashTable *original_table;
|
||||
GHashTable *copy_table;
|
||||
pthread_mutex_t copy_table_mu;
|
||||
|
||||
pcre *exclude;
|
||||
pcre_extra *exclude_extra;
|
||||
@@ -60,6 +65,8 @@ typedef struct {
|
||||
scan_mobi_ctx_t mobi_ctx;
|
||||
scan_raw_ctx_t raw_ctx;
|
||||
scan_msdoc_ctx_t msdoc_ctx;
|
||||
scan_wpd_ctx_t wpd_ctx;
|
||||
scan_json_ctx_t json_ctx;
|
||||
} ScanCtx_t;
|
||||
|
||||
typedef struct {
|
||||
@@ -70,6 +77,7 @@ typedef struct {
|
||||
|
||||
typedef struct {
|
||||
char *es_url;
|
||||
es_version_t *es_version;
|
||||
char *es_index;
|
||||
int batch_size;
|
||||
tpool_t *pool;
|
||||
@@ -81,6 +89,7 @@ typedef struct {
|
||||
|
||||
typedef struct {
|
||||
char *es_url;
|
||||
es_version_t *es_version;
|
||||
char *es_index;
|
||||
int index_count;
|
||||
char *auth_user;
|
||||
|
||||
@@ -253,7 +253,7 @@ void _elastic_flush(int max) {
|
||||
} else {
|
||||
|
||||
print_errors(r);
|
||||
LOG_INFOF("elastic.c", "Indexed %d documents (%zukB) <%d>", count, buf_len / 1024, r->status_code);
|
||||
LOG_DEBUGF("elastic.c", "Indexed %d documents (%zukB) <%d>", count, buf_len / 1024, r->status_code);
|
||||
delete_queue(max);
|
||||
|
||||
if (Indexer->queued != 0) {
|
||||
@@ -356,7 +356,65 @@ void finish_indexer(char *script, int async_script, char *index_id) {
|
||||
free_response(r);
|
||||
}
|
||||
|
||||
void elastic_init(int force_reset, const char* user_mappings, const char* user_settings) {
|
||||
es_version_t *elastic_get_version(const char *es_url) {
|
||||
response_t *r = web_get(es_url, 30);
|
||||
|
||||
char *tmp = malloc(r->size + 1);
|
||||
memcpy(tmp, r->body, r->size);
|
||||
*(tmp + r->size) = '\0';
|
||||
cJSON *response = cJSON_Parse(tmp);
|
||||
free(tmp);
|
||||
free_response(r);
|
||||
|
||||
if (response == NULL) {
|
||||
return NULL;
|
||||
}
|
||||
|
||||
if (cJSON_GetObjectItem(response, "version") == NULL ||
|
||||
cJSON_GetObjectItem(cJSON_GetObjectItem(response, "version"), "number") == NULL) {
|
||||
cJSON_Delete(response);
|
||||
return NULL;
|
||||
}
|
||||
|
||||
char *version_str = cJSON_GetObjectItem(cJSON_GetObjectItem(response, "version"), "number")->valuestring;
|
||||
|
||||
es_version_t *version = malloc(sizeof(es_version_t));
|
||||
|
||||
const char *tok = strtok(version_str, ".");
|
||||
version->major = atoi(tok);
|
||||
tok = strtok(NULL, ".");
|
||||
version->minor = atoi(tok);
|
||||
tok = strtok(NULL, ".");
|
||||
version->patch = atoi(tok);
|
||||
|
||||
cJSON_Delete(response);
|
||||
|
||||
return version;
|
||||
}
|
||||
|
||||
void elastic_init(int force_reset, const char *user_mappings, const char *user_settings) {
|
||||
|
||||
es_version_t *es_version = elastic_get_version(IndexCtx.es_url);
|
||||
IndexCtx.es_version = es_version;
|
||||
|
||||
if (es_version == NULL) {
|
||||
LOG_FATAL("elastic.c", "Could not get ES version")
|
||||
}
|
||||
|
||||
LOG_INFOF("elastic.c",
|
||||
"Elasticsearch version is %s (supported=%d, legacy=%d)",
|
||||
format_es_version(es_version), IS_SUPPORTED_ES_VERSION(es_version), USE_LEGACY_ES_SETTINGS(es_version));
|
||||
|
||||
if (!IS_SUPPORTED_ES_VERSION(es_version)) {
|
||||
LOG_FATAL("elastic.c", "sist2 only supports Elasticsearch v6.8 or newer")
|
||||
}
|
||||
|
||||
char *settings = NULL;
|
||||
if (USE_LEGACY_ES_SETTINGS(es_version)) {
|
||||
settings = settings_json;
|
||||
} else {
|
||||
settings = settings_legacy_json;
|
||||
}
|
||||
|
||||
// Check if index exists
|
||||
char url[4096];
|
||||
@@ -392,7 +450,7 @@ void elastic_init(int force_reset, const char* user_mappings, const char* user_s
|
||||
free_response(r);
|
||||
|
||||
snprintf(url, sizeof(url), "%s/%s/_settings", IndexCtx.es_url, IndexCtx.es_index);
|
||||
r = web_put(url, user_settings ? user_settings : settings_json);
|
||||
r = web_put(url, user_settings ? user_settings : settings);
|
||||
LOG_INFOF("elastic.c", "Update ES settings <%d>", r->status_code);
|
||||
if (r->status_code != 200) {
|
||||
print_error(r);
|
||||
|
||||
@@ -9,6 +9,26 @@ typedef struct es_bulk_line {
|
||||
char line[0];
|
||||
} es_bulk_line_t;
|
||||
|
||||
typedef struct {
|
||||
int major;
|
||||
int minor;
|
||||
int patch;
|
||||
} es_version_t;
|
||||
|
||||
#define VERSION_GE(version, maj, min) ((version)->major > (maj) || ((version)->major == (maj) && (version)->minor >= (min)))
|
||||
#define IS_SUPPORTED_ES_VERSION(es_version) VERSION_GE((es_version), 6, 8)
|
||||
#define USE_LEGACY_ES_SETTINGS(es_version) (!VERSION_GE((es_version), 7, 14))
|
||||
|
||||
__always_inline
|
||||
static const char *format_es_version(es_version_t *version) {
|
||||
static char buf[64];
|
||||
|
||||
snprintf(buf, sizeof(buf), "%d.%d.%d", version->major, version->minor, version->patch);
|
||||
|
||||
return buf;
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Note: indexer is *not* thread safe
|
||||
*/
|
||||
@@ -31,6 +51,8 @@ cJSON *elastic_get_document(const char *id_str);
|
||||
|
||||
char *elastic_get_status();
|
||||
|
||||
es_version_t *elastic_get_version(const char *es_url);
|
||||
|
||||
void execute_update_script(const char *script, int async, const char index_id[MD5_STR_LENGTH]);
|
||||
|
||||
#endif
|
||||
|
||||
5
src/index/static_generated.c
vendored
5
src/index/static_generated.c
vendored
File diff suppressed because one or more lines are too long
@@ -74,6 +74,8 @@ char *get_meta_key_text(enum metakey meta_key) {
|
||||
return "exif_gps_latitude_dms";
|
||||
case MetaExifGpsLatitudeDec:
|
||||
return "exif_gps_latitude_dec";
|
||||
case MetaChecksum:
|
||||
return "checksum";
|
||||
default:
|
||||
LOG_FATALF("serialize.c", "FIXME: Unknown meta key: %d", meta_key)
|
||||
}
|
||||
@@ -165,6 +167,7 @@ char *build_json_string(document_t *doc) {
|
||||
case MetaExifGpsLatitudeDMS:
|
||||
case MetaExifGpsLatitudeDec:
|
||||
case MetaExifGpsLatitudeRef:
|
||||
case MetaChecksum:
|
||||
case MetaTitle: {
|
||||
cJSON_AddStringToObject(json, get_meta_key_text(meta->key), meta->str_val);
|
||||
buffer_size_guess += (int) strlen(meta->str_val);
|
||||
|
||||
@@ -4,6 +4,7 @@
|
||||
store_t *store_create(const char *path, size_t chunk_size) {
|
||||
store_t *store = malloc(sizeof(struct store_t));
|
||||
mkdir(path, S_IWUSR | S_IRUSR | S_IXUSR);
|
||||
strcpy(store->path, path);
|
||||
|
||||
#if (SIST_FAKE_STORE != 1)
|
||||
store->chunk_size = chunk_size;
|
||||
@@ -22,7 +23,6 @@ store_t *store_create(const char *path, size_t chunk_size) {
|
||||
}
|
||||
|
||||
store->size = (size_t) store->chunk_size;
|
||||
ScanCtx.stat_tn_size = 0;
|
||||
mdb_env_set_mapsize(store->env, store->size);
|
||||
|
||||
// Open dbi
|
||||
@@ -78,27 +78,57 @@ void store_write(store_t *store, char *key, size_t key_len, char *buf, size_t bu
|
||||
int put_ret = mdb_put(txn, store->dbi, &mdb_key, &mdb_value, 0);
|
||||
ScanCtx.stat_tn_size += buf_len;
|
||||
|
||||
int db_full = FALSE;
|
||||
int should_abort_transaction = FALSE;
|
||||
|
||||
if (put_ret == MDB_MAP_FULL) {
|
||||
mdb_txn_abort(txn);
|
||||
db_full = TRUE;
|
||||
should_abort_transaction = TRUE;
|
||||
} else {
|
||||
int commit_ret = mdb_txn_commit(txn);
|
||||
|
||||
if (commit_ret == MDB_MAP_FULL) {
|
||||
db_full = TRUE;
|
||||
}
|
||||
}
|
||||
|
||||
if (db_full) {
|
||||
LOG_INFOF("store.c", "Updating mdb mapsize to %lu bytes", store->size)
|
||||
|
||||
if (should_abort_transaction) {
|
||||
mdb_txn_abort(txn);
|
||||
}
|
||||
|
||||
pthread_rwlock_unlock(&store->lock);
|
||||
|
||||
// Cannot resize when there is a opened transaction.
|
||||
// Resize take effect on the next commit.
|
||||
pthread_rwlock_wrlock(&store->lock);
|
||||
store->size += store->chunk_size;
|
||||
mdb_env_set_mapsize(store->env, store->size);
|
||||
int resize_ret = mdb_env_set_mapsize(store->env, store->size);
|
||||
if (resize_ret != 0) {
|
||||
LOG_ERROR("store.c", mdb_strerror(put_ret))
|
||||
}
|
||||
mdb_txn_begin(store->env, NULL, 0, &txn);
|
||||
put_ret = mdb_put(txn, store->dbi, &mdb_key, &mdb_value, 0);
|
||||
int put_ret_retry = mdb_put(txn, store->dbi, &mdb_key, &mdb_value, 0);
|
||||
|
||||
if (put_ret_retry != 0) {
|
||||
LOG_ERROR("store.c", mdb_strerror(put_ret))
|
||||
}
|
||||
|
||||
int ret = mdb_txn_commit(txn);
|
||||
if (ret != 0) {
|
||||
LOG_FATALF("store.c", "FIXME: Could not commit to store %s: %s (%d), %d, %d %d",
|
||||
store->path, mdb_strerror(ret), ret,
|
||||
put_ret, put_ret_retry);
|
||||
}
|
||||
LOG_INFOF("store.c", "Updated mdb mapsize to %lu bytes", store->size)
|
||||
}
|
||||
|
||||
mdb_txn_commit(txn);
|
||||
pthread_rwlock_unlock(&store->lock);
|
||||
|
||||
if (put_ret != 0) {
|
||||
} else if (put_ret != 0) {
|
||||
LOG_ERROR("store.c", mdb_strerror(put_ret))
|
||||
}
|
||||
|
||||
pthread_rwlock_unlock(&store->lock);
|
||||
|
||||
#endif
|
||||
}
|
||||
|
||||
|
||||
@@ -6,12 +6,12 @@
|
||||
|
||||
#include <glib.h>
|
||||
|
||||
#define STORE_SIZE_TN 1024 * 1024 * 5
|
||||
#define STORE_SIZE_TAG 1024 * 16
|
||||
#define STORE_SIZE_TN (1024 * 1024 * 5)
|
||||
#define STORE_SIZE_TAG (1024 * 1024)
|
||||
#define STORE_SIZE_META STORE_SIZE_TAG
|
||||
|
||||
typedef struct store_t {
|
||||
char *path;
|
||||
char path[PATH_MAX];
|
||||
char *tmp_path;
|
||||
MDB_dbi dbi;
|
||||
MDB_env *env;
|
||||
|
||||
@@ -4,6 +4,8 @@
|
||||
|
||||
#include <ftw.h>
|
||||
|
||||
#define STR_STARTS_WITH(x, y) (strncmp(y, x, strlen(y) - 1) == 0)
|
||||
|
||||
__always_inline
|
||||
parse_job_t *create_fs_parse_job(const char *filepath, const struct stat *info, int base) {
|
||||
int len = (int) strlen(filepath);
|
||||
@@ -24,39 +26,110 @@ parse_job_t *create_fs_parse_job(const char *filepath, const struct stat *info,
|
||||
|
||||
job->vfile.filepath = job->filepath;
|
||||
job->vfile.read = fs_read;
|
||||
// Filesystem reads are always rewindable
|
||||
job->vfile.read_rewindable = fs_read;
|
||||
job->vfile.reset = fs_reset;
|
||||
job->vfile.close = fs_close;
|
||||
job->vfile.fd = -1;
|
||||
job->vfile.is_fs_file = TRUE;
|
||||
job->vfile.has_checksum = FALSE;
|
||||
job->vfile.rewind_buffer_size = 0;
|
||||
job->vfile.rewind_buffer = NULL;
|
||||
job->vfile.calculate_checksum = ScanCtx.calculate_checksums;
|
||||
|
||||
return job;
|
||||
}
|
||||
|
||||
int sub_strings[30];
|
||||
#define EXCLUDED(str) (pcre_exec(ScanCtx.exclude, ScanCtx.exclude_extra, filepath, strlen(filepath), 0, 0, sub_strings, sizeof(sub_strings)) >= 0)
|
||||
#define EXCLUDED(str) (pcre_exec(ScanCtx.exclude, ScanCtx.exclude_extra, str, strlen(str), 0, 0, sub_strings, sizeof(sub_strings)) >= 0)
|
||||
|
||||
int handle_entry(const char *filepath, const struct stat *info, int typeflag, struct FTW *ftw) {
|
||||
|
||||
if (typeflag == FTW_F && S_ISREG(info->st_mode) && ftw->level <= ScanCtx.depth) {
|
||||
if (ftw->level > ScanCtx.depth) {
|
||||
if (typeflag == FTW_D) {
|
||||
return FTW_SKIP_SUBTREE;
|
||||
}
|
||||
return FTW_CONTINUE;
|
||||
}
|
||||
|
||||
if (ScanCtx.exclude != NULL && EXCLUDED(filepath)) {
|
||||
LOG_DEBUGF("walk.c", "Excluded: %s", filepath)
|
||||
if (ScanCtx.exclude != NULL && EXCLUDED(filepath)) {
|
||||
LOG_DEBUGF("walk.c", "Excluded: %s", filepath)
|
||||
|
||||
if (typeflag == FTW_F && S_ISREG(info->st_mode)) {
|
||||
pthread_mutex_lock(&ScanCtx.dbg_file_counts_mu);
|
||||
ScanCtx.dbg_excluded_files_count += 1;
|
||||
pthread_mutex_unlock(&ScanCtx.dbg_file_counts_mu);
|
||||
return 0;
|
||||
} else if (typeflag == FTW_D) {
|
||||
return FTW_SKIP_SUBTREE;
|
||||
}
|
||||
|
||||
return FTW_CONTINUE;
|
||||
}
|
||||
|
||||
if (typeflag == FTW_F && S_ISREG(info->st_mode)) {
|
||||
parse_job_t *job = create_fs_parse_job(filepath, info, ftw->base);
|
||||
tpool_add_work(ScanCtx.pool, parse, job);
|
||||
}
|
||||
|
||||
return 0;
|
||||
return FTW_CONTINUE;
|
||||
}
|
||||
|
||||
#define MAX_FILE_DESCRIPTORS 64
|
||||
|
||||
int walk_directory_tree(const char *dirpath) {
|
||||
return nftw(dirpath, handle_entry, MAX_FILE_DESCRIPTORS, FTW_PHYS | FTW_DEPTH);
|
||||
return nftw(dirpath, handle_entry, MAX_FILE_DESCRIPTORS, FTW_PHYS | FTW_ACTIONRETVAL);
|
||||
}
|
||||
|
||||
int iterate_file_list(void *input_file) {
|
||||
|
||||
char buf[PATH_MAX];
|
||||
struct stat info;
|
||||
|
||||
while (fgets(buf, sizeof(buf), input_file) != NULL) {
|
||||
|
||||
// Remove trailing newline
|
||||
*(buf + strlen(buf) - 1) = '\0';
|
||||
|
||||
int stat_ret = stat(buf, &info);
|
||||
|
||||
if (stat_ret != 0) {
|
||||
LOG_ERRORF("walk.c", "Could not stat file %s (%s)", buf, strerror(errno));
|
||||
continue;
|
||||
}
|
||||
|
||||
if (!S_ISREG(info.st_mode)) {
|
||||
LOG_ERRORF("walk.c", "Is not a regular file: %s", buf);
|
||||
continue;
|
||||
}
|
||||
|
||||
char *absolute_path = canonicalize_file_name(buf);
|
||||
|
||||
if (absolute_path == NULL) {
|
||||
LOG_FATALF("walk.c", "FIXME: Could not get absolute path of %s", buf);
|
||||
}
|
||||
|
||||
if (ScanCtx.exclude != NULL && EXCLUDED(absolute_path)) {
|
||||
LOG_DEBUGF("walk.c", "Excluded: %s", absolute_path)
|
||||
|
||||
if (S_ISREG(info.st_mode)) {
|
||||
pthread_mutex_lock(&ScanCtx.dbg_file_counts_mu);
|
||||
ScanCtx.dbg_excluded_files_count += 1;
|
||||
pthread_mutex_unlock(&ScanCtx.dbg_file_counts_mu);
|
||||
}
|
||||
|
||||
continue;
|
||||
}
|
||||
|
||||
if (!STR_STARTS_WITH(absolute_path, ScanCtx.index.desc.root)) {
|
||||
LOG_FATALF("walk.c", "File is not a children of root folder (%s): %s", ScanCtx.index.desc.root, buf);
|
||||
}
|
||||
|
||||
int base = (int) (strrchr(buf, '/') - buf) + 1;
|
||||
|
||||
parse_job_t *job = create_fs_parse_job(absolute_path, &info, base);
|
||||
free(absolute_path);
|
||||
tpool_add_work(ScanCtx.pool, parse, job);
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
@@ -5,4 +5,6 @@
|
||||
|
||||
int walk_directory_tree(const char *);
|
||||
|
||||
int iterate_file_list(void* input_file);
|
||||
|
||||
#endif
|
||||
|
||||
20
src/log.c
20
src/log.c
@@ -55,10 +55,14 @@ void vsist_logf(const char *filepath, int level, char *format, va_list ap) {
|
||||
log_len += 1;
|
||||
}
|
||||
|
||||
int ret = write(STDERR_FILENO, log_str, log_len);
|
||||
if (ret == -1) {
|
||||
LOG_FATALF("serialize.c", "Could not write index descriptor: %s", strerror(errno))
|
||||
if (PrintingProgressBar) {
|
||||
PrintingProgressBar = FALSE;
|
||||
memmove(log_str + 1, log_str, log_len);
|
||||
log_str[0] = '\n';
|
||||
log_len += 1;
|
||||
}
|
||||
|
||||
write(STDERR_FILENO, log_str, log_len);
|
||||
}
|
||||
|
||||
void sist_logf(const char *filepath, int level, char *format, ...) {
|
||||
@@ -104,8 +108,12 @@ void sist_log(const char *filepath, int level, char *str) {
|
||||
);
|
||||
}
|
||||
|
||||
int ret = write(STDERR_FILENO, log_str, log_len);
|
||||
if (ret == -1) {
|
||||
LOG_FATALF("serialize.c", "Could not write index descriptor: %s", strerror(errno));
|
||||
if (PrintingProgressBar) {
|
||||
PrintingProgressBar = FALSE;
|
||||
memmove(log_str + 1, log_str, log_len);
|
||||
log_str[0] = '\n';
|
||||
log_len += 1;
|
||||
}
|
||||
|
||||
write(STDERR_FILENO, log_str, log_len);
|
||||
}
|
||||
|
||||
89
src/main.c
89
src/main.c
@@ -14,6 +14,9 @@
|
||||
#include "parsing/mime.h"
|
||||
#include "parsing/parse.h"
|
||||
|
||||
#include <signal.h>
|
||||
#include <unistd.h>
|
||||
|
||||
#include "stats.h"
|
||||
|
||||
#define DESCRIPTION "Lightning-fast file system indexer and search tool."
|
||||
@@ -29,8 +32,6 @@ static const char *const usage[] = {
|
||||
NULL,
|
||||
};
|
||||
|
||||
#include<signal.h>
|
||||
#include<unistd.h>
|
||||
|
||||
static __sighandler_t sigsegv_handler = NULL;
|
||||
static __sighandler_t sigabrt_handler = NULL;
|
||||
@@ -43,30 +44,38 @@ void sig_handler(int signum) {
|
||||
LOG_ERROR("*SIGNAL HANDLER*", "=============================================\n\n");
|
||||
LOG_ERRORF("*SIGNAL HANDLER*", "Uh oh! Caught fatal signal: %s", strsignal(signum));
|
||||
|
||||
GHashTableIter iter;
|
||||
g_hash_table_iter_init(&iter, ScanCtx.dbg_current_files);
|
||||
if (ScanCtx.dbg_current_files != NULL) {
|
||||
GHashTableIter iter;
|
||||
g_hash_table_iter_init(&iter, ScanCtx.dbg_current_files);
|
||||
|
||||
void *key;
|
||||
void *value;
|
||||
while (g_hash_table_iter_next(&iter, &key, &value)) {
|
||||
parse_job_t *job = value;
|
||||
void *key;
|
||||
void *value;
|
||||
while (g_hash_table_iter_next(&iter, &key, &value)) {
|
||||
parse_job_t *job = value;
|
||||
|
||||
if (isatty(STDERR_FILENO)) {
|
||||
LOG_DEBUGF(
|
||||
"*SIGNAL HANDLER*",
|
||||
"Thread \033[%dm[%04llX]\033[0m was working on job '%s'",
|
||||
31 + ((unsigned int) key) % 7, key, job->filepath
|
||||
);
|
||||
} else {
|
||||
LOG_DEBUGF(
|
||||
"*SIGNAL HANDLER*",
|
||||
"THREAD [%04llX] was working on job %s",
|
||||
key, job->filepath
|
||||
);
|
||||
if (isatty(STDERR_FILENO)) {
|
||||
LOG_DEBUGF(
|
||||
"*SIGNAL HANDLER*",
|
||||
"Thread \033[%dm[%04llX]\033[0m was working on job '%s'",
|
||||
31 + ((unsigned int) key) % 7, key, job->filepath
|
||||
);
|
||||
} else {
|
||||
LOG_DEBUGF(
|
||||
"*SIGNAL HANDLER*",
|
||||
"THREAD [%04llX] was working on job %s",
|
||||
key, job->filepath
|
||||
);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
tpool_dump_debug_info(ScanCtx.pool);
|
||||
if (ScanCtx.pool != NULL) {
|
||||
tpool_dump_debug_info(ScanCtx.pool);
|
||||
}
|
||||
|
||||
if (IndexCtx.pool != NULL) {
|
||||
tpool_dump_debug_info(IndexCtx.pool);
|
||||
}
|
||||
|
||||
LOG_INFO(
|
||||
"*SIGNAL HANDLER*",
|
||||
@@ -161,6 +170,9 @@ void initialize_scan_context(scan_args_t *args) {
|
||||
ScanCtx.dbg_current_files = g_hash_table_new_full(g_int64_hash, g_int64_equal, NULL, NULL);
|
||||
pthread_mutex_init(&ScanCtx.dbg_current_files_mu, NULL);
|
||||
pthread_mutex_init(&ScanCtx.dbg_file_counts_mu, NULL);
|
||||
pthread_mutex_init(&ScanCtx.copy_table_mu, NULL);
|
||||
|
||||
ScanCtx.calculate_checksums = args->calculate_checksums;
|
||||
|
||||
// Archive
|
||||
ScanCtx.arc_ctx.mode = args->archive_mode;
|
||||
@@ -250,6 +262,19 @@ void initialize_scan_context(scan_args_t *args) {
|
||||
ScanCtx.raw_ctx.log = _log;
|
||||
ScanCtx.raw_ctx.logf = _logf;
|
||||
ScanCtx.raw_ctx.store = _store;
|
||||
|
||||
// Wpd
|
||||
ScanCtx.wpd_ctx.content_size = args->content_size;
|
||||
ScanCtx.wpd_ctx.log = _log;
|
||||
ScanCtx.wpd_ctx.logf = _logf;
|
||||
ScanCtx.wpd_ctx.wpd_mime = mime_get_mime_by_string(ScanCtx.mime_table, "application/wordperfect");
|
||||
|
||||
// Json
|
||||
ScanCtx.json_ctx.content_size = args->content_size;
|
||||
ScanCtx.json_ctx.log = _log;
|
||||
ScanCtx.json_ctx.logf = _logf;
|
||||
ScanCtx.json_ctx.json_mime = mime_get_mime_by_string(ScanCtx.mime_table, "application/json");
|
||||
ScanCtx.json_ctx.ndjson_mime = mime_get_mime_by_string(ScanCtx.mime_table, "application/ndjson");
|
||||
}
|
||||
|
||||
|
||||
@@ -311,10 +336,20 @@ void sist2_scan(scan_args_t *args) {
|
||||
ScanCtx.writer_pool = tpool_create(1, writer_cleanup, TRUE, FALSE);
|
||||
tpool_start(ScanCtx.writer_pool);
|
||||
|
||||
int walk_ret = walk_directory_tree(ScanCtx.index.desc.root);
|
||||
if (walk_ret == -1) {
|
||||
LOG_FATALF("main.c", "walk_directory_tree() failed! %s (%d)", strerror(errno), errno)
|
||||
if (args->list_path) {
|
||||
// Scan using file list
|
||||
int list_ret = iterate_file_list(args->list_file);
|
||||
if (list_ret != 0) {
|
||||
LOG_FATALF("main.c", "iterate_file_list() failed! (%d)", list_ret)
|
||||
}
|
||||
} else {
|
||||
// Scan directory recursively
|
||||
int walk_ret = walk_directory_tree(ScanCtx.index.desc.root);
|
||||
if (walk_ret == -1) {
|
||||
LOG_FATALF("main.c", "walk_directory_tree() failed! %s (%d)", strerror(errno), errno)
|
||||
}
|
||||
}
|
||||
|
||||
tpool_wait(ScanCtx.pool);
|
||||
tpool_destroy(ScanCtx.pool);
|
||||
|
||||
@@ -410,7 +445,7 @@ void sist2_index(index_args_t *args) {
|
||||
cleanup = elastic_cleanup;
|
||||
}
|
||||
|
||||
IndexCtx.pool = tpool_create(args->threads, cleanup, FALSE, FALSE);
|
||||
IndexCtx.pool = tpool_create(args->threads, cleanup, FALSE, args->print == 0);
|
||||
tpool_start(IndexCtx.pool);
|
||||
|
||||
struct dirent *de;
|
||||
@@ -553,6 +588,10 @@ int main(int argc, const char *argv[]) {
|
||||
OPT_BOOLEAN(0, "read-subtitles", &scan_args->read_subtitles, "Read subtitles from media files."),
|
||||
OPT_BOOLEAN(0, "fast-epub", &scan_args->fast_epub,
|
||||
"Faster but less accurate EPUB parsing (no thumbnails, metadata)"),
|
||||
OPT_BOOLEAN(0, "checksums", &scan_args->calculate_checksums, "Calculate file checksums when scanning."),
|
||||
OPT_STRING(0, "list-file", &scan_args->list_path, "Specify a list of newline-delimited paths to be scanned"
|
||||
" instead of normal directory traversal. Use '-' to read"
|
||||
" from stdin."),
|
||||
|
||||
OPT_GROUP("Index options"),
|
||||
OPT_INTEGER('t', "threads", &common_threads, "Number of threads. DEFAULT=1"),
|
||||
|
||||
849
src/parsing/mime_generated.c
vendored
849
src/parsing/mime_generated.c
vendored
@@ -35,427 +35,426 @@ enum mime {
|
||||
application_mime=655387,
|
||||
application_mspowerpoint=655388,
|
||||
application_msword=655389,
|
||||
application_netmc=655390,
|
||||
application_octet_stream=655391,
|
||||
application_oda=655392,
|
||||
application_ogg=655393,
|
||||
application_pdf=655394 | 0x40000000,
|
||||
application_pgp_keys=655395,
|
||||
application_pgp_signature=655396,
|
||||
application_pkcs7_signature=655397,
|
||||
application_pkix_cert=655398,
|
||||
application_postscript=655399,
|
||||
application_pro_eng=655400,
|
||||
application_ringing_tones=655401,
|
||||
application_smil=655402,
|
||||
application_solids=655403,
|
||||
application_sounder=655404,
|
||||
application_step=655405,
|
||||
application_streamingmedia=655406,
|
||||
application_vda=655407,
|
||||
application_vnd_amazon_mobi8_ebook=655408 | 0x02000000,
|
||||
application_vnd_coffeescript=655409,
|
||||
application_vnd_fdf=655410,
|
||||
application_vnd_font_fontforge_sfd=655411,
|
||||
application_vnd_hp_hpgl=655412,
|
||||
application_vnd_iccprofile=655413,
|
||||
application_vnd_lotus_1_2_3=655414,
|
||||
application_vnd_ms_cab_compressed=655415,
|
||||
application_vnd_ms_excel=655416,
|
||||
application_vnd_ms_fontobject=655417,
|
||||
application_vnd_ms_opentype=655418 | 0x20000000,
|
||||
application_vnd_ms_outlook=655419,
|
||||
application_vnd_ms_pki_certstore=655420,
|
||||
application_vnd_ms_pki_pko=655421,
|
||||
application_vnd_ms_pki_seccat=655422,
|
||||
application_vnd_ms_powerpoint=655423,
|
||||
application_vnd_ms_project=655424,
|
||||
application_vnd_oasis_opendocument_base=655425,
|
||||
application_vnd_oasis_opendocument_formula=655426,
|
||||
application_vnd_oasis_opendocument_graphics=655427,
|
||||
application_vnd_oasis_opendocument_presentation=655428,
|
||||
application_vnd_oasis_opendocument_spreadsheet=655429,
|
||||
application_vnd_oasis_opendocument_text=655430,
|
||||
application_vnd_openxmlformats_officedocument_presentationml_presentation=655431 | 0x04000000,
|
||||
application_vnd_openxmlformats_officedocument_spreadsheetml_sheet=655432 | 0x04000000,
|
||||
application_vnd_openxmlformats_officedocument_wordprocessingml_document=655433 | 0x04000000,
|
||||
application_vnd_symbian_install=655434,
|
||||
application_vnd_tcpdump_pcap=655435,
|
||||
application_vnd_wap_wmlc=655436,
|
||||
application_vnd_wap_wmlscriptc=655437,
|
||||
application_vnd_xara=655438,
|
||||
application_vocaltec_media_desc=655439,
|
||||
application_vocaltec_media_file=655440,
|
||||
application_warc=655441,
|
||||
application_winhelp=655442,
|
||||
application_wordperfect=655443,
|
||||
application_wordperfect6_0=655444,
|
||||
application_wordperfect6_1=655445,
|
||||
application_x_123=655446,
|
||||
application_x_7z_compressed=655447 | 0x10000000,
|
||||
application_x_aim=655448,
|
||||
application_x_apple_diskimage=655449,
|
||||
application_x_arc=655450 | 0x10000000,
|
||||
application_x_archive=655451,
|
||||
application_x_atari_7800_rom=655452,
|
||||
application_x_authorware_bin=655453,
|
||||
application_x_authorware_map=655454,
|
||||
application_x_authorware_seg=655455,
|
||||
application_x_avira_qua=655456,
|
||||
application_x_bcpio=655457,
|
||||
application_x_bittorrent=655458,
|
||||
application_x_bsh=655459,
|
||||
application_x_bytecode_python=655460,
|
||||
application_x_bzip=655461,
|
||||
application_x_bzip2=655462 | 0x08000000,
|
||||
application_x_cbr=655463,
|
||||
application_x_cbz=655464,
|
||||
application_x_cdlink=655465,
|
||||
application_x_chat=655466,
|
||||
application_x_chrome_extension=655467,
|
||||
application_x_cocoa=655468,
|
||||
application_x_conference=655469,
|
||||
application_x_coredump=655470,
|
||||
application_x_cpio=655471,
|
||||
application_x_dbf=655472,
|
||||
application_x_dbt=655473,
|
||||
application_x_debian_package=655474,
|
||||
application_x_deepv=655475,
|
||||
application_x_director=655476,
|
||||
application_x_dmp=655477,
|
||||
application_x_dosdriver=655478,
|
||||
application_x_dosexec=655479,
|
||||
application_x_dvi=655480,
|
||||
application_x_elc=655481,
|
||||
application_ndjson=655390,
|
||||
application_netmc=655391,
|
||||
application_octet_stream=655392,
|
||||
application_oda=655393,
|
||||
application_ogg=655394,
|
||||
application_pdf=655395 | 0x40000000,
|
||||
application_pgp_keys=655396,
|
||||
application_pgp_signature=655397,
|
||||
application_pkcs7_signature=655398,
|
||||
application_pkix_cert=655399,
|
||||
application_postscript=655400,
|
||||
application_pro_eng=655401,
|
||||
application_ringing_tones=655402,
|
||||
application_smil=655403,
|
||||
application_solids=655404,
|
||||
application_sounder=655405,
|
||||
application_step=655406,
|
||||
application_streamingmedia=655407,
|
||||
application_vda=655408,
|
||||
application_vnd_amazon_mobi8_ebook=655409 | 0x02000000,
|
||||
application_vnd_coffeescript=655410,
|
||||
application_vnd_fdf=655411,
|
||||
application_vnd_font_fontforge_sfd=655412,
|
||||
application_vnd_hp_hpgl=655413,
|
||||
application_vnd_iccprofile=655414,
|
||||
application_vnd_lotus_1_2_3=655415,
|
||||
application_vnd_ms_cab_compressed=655416,
|
||||
application_vnd_ms_excel=655417,
|
||||
application_vnd_ms_fontobject=655418,
|
||||
application_vnd_ms_opentype=655419 | 0x20000000,
|
||||
application_vnd_ms_outlook=655420,
|
||||
application_vnd_ms_pki_certstore=655421,
|
||||
application_vnd_ms_pki_pko=655422,
|
||||
application_vnd_ms_pki_seccat=655423,
|
||||
application_vnd_ms_powerpoint=655424,
|
||||
application_vnd_ms_project=655425,
|
||||
application_vnd_oasis_opendocument_base=655426,
|
||||
application_vnd_oasis_opendocument_formula=655427,
|
||||
application_vnd_oasis_opendocument_graphics=655428,
|
||||
application_vnd_oasis_opendocument_presentation=655429,
|
||||
application_vnd_oasis_opendocument_spreadsheet=655430,
|
||||
application_vnd_oasis_opendocument_text=655431,
|
||||
application_vnd_openxmlformats_officedocument_presentationml_presentation=655432 | 0x04000000,
|
||||
application_vnd_openxmlformats_officedocument_spreadsheetml_sheet=655433 | 0x04000000,
|
||||
application_vnd_openxmlformats_officedocument_wordprocessingml_document=655434 | 0x04000000,
|
||||
application_vnd_symbian_install=655435,
|
||||
application_vnd_tcpdump_pcap=655436,
|
||||
application_vnd_wap_wmlc=655437,
|
||||
application_vnd_wap_wmlscriptc=655438,
|
||||
application_vnd_xara=655439,
|
||||
application_vocaltec_media_desc=655440,
|
||||
application_vocaltec_media_file=655441,
|
||||
application_warc=655442,
|
||||
application_winhelp=655443,
|
||||
application_wordperfect=655444,
|
||||
application_x_123=655445,
|
||||
application_x_7z_compressed=655446 | 0x10000000,
|
||||
application_x_aim=655447,
|
||||
application_x_apple_diskimage=655448,
|
||||
application_x_arc=655449 | 0x10000000,
|
||||
application_x_archive=655450,
|
||||
application_x_atari_7800_rom=655451,
|
||||
application_x_authorware_bin=655452,
|
||||
application_x_authorware_map=655453,
|
||||
application_x_authorware_seg=655454,
|
||||
application_x_avira_qua=655455,
|
||||
application_x_bcpio=655456,
|
||||
application_x_bittorrent=655457,
|
||||
application_x_bsh=655458,
|
||||
application_x_bytecode_python=655459,
|
||||
application_x_bzip=655460,
|
||||
application_x_bzip2=655461 | 0x08000000,
|
||||
application_x_cbr=655462,
|
||||
application_x_cbz=655463,
|
||||
application_x_cdlink=655464,
|
||||
application_x_chat=655465,
|
||||
application_x_chrome_extension=655466,
|
||||
application_x_cocoa=655467,
|
||||
application_x_conference=655468,
|
||||
application_x_coredump=655469,
|
||||
application_x_cpio=655470,
|
||||
application_x_dbf=655471,
|
||||
application_x_dbt=655472,
|
||||
application_x_debian_package=655473,
|
||||
application_x_deepv=655474,
|
||||
application_x_director=655475,
|
||||
application_x_dmp=655476,
|
||||
application_x_dosdriver=655477,
|
||||
application_x_dosexec=655478,
|
||||
application_x_dvi=655479,
|
||||
application_x_elc=655480,
|
||||
application_x_empty=1,
|
||||
application_x_envoy=655482,
|
||||
application_x_esrehber=655483,
|
||||
application_x_excel=655484,
|
||||
application_x_executable=655485,
|
||||
application_x_font_gdos=655486,
|
||||
application_x_font_pf2=655487,
|
||||
application_x_font_pfm=655488,
|
||||
application_x_font_sfn=655489,
|
||||
application_x_font_ttf=655490 | 0x20000000,
|
||||
application_x_fptapplication_x_dbt=655491,
|
||||
application_x_freelance=655492,
|
||||
application_x_gamecube_rom=655493,
|
||||
application_x_gdbm=655494,
|
||||
application_x_gettext_translation=655495,
|
||||
application_x_git=655496,
|
||||
application_x_gsp=655497,
|
||||
application_x_gss=655498,
|
||||
application_x_gtar=655499,
|
||||
application_x_gzip=655500,
|
||||
application_x_hdf=655501,
|
||||
application_x_helpfile=655502,
|
||||
application_x_httpd_imap=655503,
|
||||
application_x_ima=655504,
|
||||
application_x_innosetup=655505,
|
||||
application_x_internett_signup=655506,
|
||||
application_x_inventor=655507,
|
||||
application_x_ip2=655508,
|
||||
application_x_java_applet=655509,
|
||||
application_x_java_commerce=655510,
|
||||
application_x_java_image=655511,
|
||||
application_x_java_jmod=655512,
|
||||
application_x_java_keystore=655513,
|
||||
application_x_kdelnk=655514,
|
||||
application_x_koan=655515,
|
||||
application_x_latex=655516,
|
||||
application_x_livescreen=655517,
|
||||
application_x_lotus=655518,
|
||||
application_x_lz4=655519 | 0x08000000,
|
||||
application_x_lz4_json=655520,
|
||||
application_x_lzh=655521,
|
||||
application_x_lzh_compressed=655522,
|
||||
application_x_lzip=655523 | 0x08000000,
|
||||
application_x_lzma=655524 | 0x08000000,
|
||||
application_x_lzop=655525 | 0x08000000,
|
||||
application_x_lzx=655526,
|
||||
application_x_mach_binary=655527,
|
||||
application_x_mach_executable=655528,
|
||||
application_x_magic_cap_package_1_0=655529,
|
||||
application_x_mathcad=655530,
|
||||
application_x_maxis_dbpf=655531,
|
||||
application_x_meme=655532,
|
||||
application_x_midi=655533,
|
||||
application_x_mif=655534,
|
||||
application_x_mix_transfer=655535,
|
||||
application_x_mobipocket_ebook=655536 | 0x02000000,
|
||||
application_x_ms_compress_szdd=655537,
|
||||
application_x_ms_pdb=655538,
|
||||
application_x_ms_reader=655539,
|
||||
application_x_msaccess=655540,
|
||||
application_x_n64_rom=655541,
|
||||
application_x_navi_animation=655542,
|
||||
application_x_navidoc=655543,
|
||||
application_x_navimap=655544,
|
||||
application_x_navistyle=655545,
|
||||
application_x_nes_rom=655546,
|
||||
application_x_netcdf=655547,
|
||||
application_x_newton_compatible_pkg=655548,
|
||||
application_x_nintendo_ds_rom=655549,
|
||||
application_x_object=655550,
|
||||
application_x_omc=655551,
|
||||
application_x_omcdatamaker=655552,
|
||||
application_x_omcregerator=655553,
|
||||
application_x_pagemaker=655554,
|
||||
application_x_pcl=655555,
|
||||
application_x_pgp_keyring=655556,
|
||||
application_x_pixclscript=655557,
|
||||
application_x_pkcs7_certreqresp=655558,
|
||||
application_x_pkcs7_signature=655559,
|
||||
application_x_project=655560,
|
||||
application_x_qpro=655561,
|
||||
application_x_rar=655562 | 0x10000000,
|
||||
application_x_rpm=655563,
|
||||
application_x_sdp=655564,
|
||||
application_x_sea=655565,
|
||||
application_x_seelogo=655566,
|
||||
application_x_setupscript=655567,
|
||||
application_x_shar=655568,
|
||||
application_x_sharedlib=655569,
|
||||
application_x_shockwave_flash=655570,
|
||||
application_x_snappy_framed=655571,
|
||||
application_x_sprite=655572,
|
||||
application_x_sqlite3=655573,
|
||||
application_x_stargallery_thm=655574,
|
||||
application_x_stuffit=655575,
|
||||
application_x_sv4cpio=655576,
|
||||
application_x_sv4crc=655577,
|
||||
application_x_tar=655578 | 0x10000000,
|
||||
application_x_tbook=655579,
|
||||
application_x_terminfo=655580,
|
||||
application_x_terminfo2=655581,
|
||||
application_x_tex_tfm=655582,
|
||||
application_x_texinfo=655583,
|
||||
application_x_ustar=655584,
|
||||
application_x_visio=655585,
|
||||
application_x_vnd_audioexplosion_mzz=655586,
|
||||
application_x_vnd_ls_xpix=655587,
|
||||
application_x_vrml=655588,
|
||||
application_x_wais_source=655589,
|
||||
application_x_wine_extension_ini=655590,
|
||||
application_x_wintalk=655591,
|
||||
application_x_world=655592,
|
||||
application_x_wri=655593,
|
||||
application_x_x509_ca_cert=655594,
|
||||
application_x_xz=655595 | 0x08000000,
|
||||
application_x_zip=655596,
|
||||
application_x_zstd=655597 | 0x08000000,
|
||||
application_x_zstd_dictionary=655598,
|
||||
application_xml=655599,
|
||||
application_zip=655600 | 0x10000000,
|
||||
application_zlib=655601,
|
||||
audio_basic=458994 | 0x80000000,
|
||||
audio_it=458995,
|
||||
audio_make=458996,
|
||||
audio_mid=458997,
|
||||
audio_midi=458998,
|
||||
audio_mp4=458999,
|
||||
audio_mpeg=459000,
|
||||
audio_ogg=459001,
|
||||
audio_s3m=459002,
|
||||
audio_tsp_audio=459003,
|
||||
audio_tsplayer=459004,
|
||||
audio_vnd_qcelp=459005,
|
||||
audio_voxware=459006,
|
||||
audio_x_aiff=459007,
|
||||
audio_x_flac=459008,
|
||||
audio_x_gsm=459009,
|
||||
audio_x_hx_aac_adts=459010,
|
||||
audio_x_jam=459011,
|
||||
audio_x_liveaudio=459012,
|
||||
audio_x_m4a=459013,
|
||||
audio_x_midi=459014,
|
||||
audio_x_mod=459015,
|
||||
audio_x_mp4a_latm=459016,
|
||||
audio_x_mpeg_3=459017,
|
||||
audio_x_mpequrl=459018,
|
||||
audio_x_nspaudio=459019,
|
||||
audio_x_pn_realaudio=459020,
|
||||
audio_x_psid=459021,
|
||||
audio_x_realaudio=459022,
|
||||
audio_x_s3m=459023,
|
||||
audio_x_twinvq=459024,
|
||||
audio_x_twinvq_plugin=459025,
|
||||
audio_x_voc=459026,
|
||||
audio_x_wav=459027,
|
||||
audio_x_xbox_executable=459028 | 0x80000000,
|
||||
audio_x_xbox360_executable=459029 | 0x80000000,
|
||||
audio_xm=459030,
|
||||
font_otf=327959 | 0x20000000,
|
||||
font_sfnt=327960 | 0x20000000,
|
||||
font_woff=327961 | 0x20000000,
|
||||
font_woff2=327962 | 0x20000000,
|
||||
image_bmp=524571,
|
||||
image_cmu_raster=524572,
|
||||
image_fif=524573,
|
||||
image_florian=524574,
|
||||
image_g3fax=524575,
|
||||
image_gif=524576,
|
||||
image_heic=524577,
|
||||
image_ief=524578,
|
||||
image_jpeg=524579,
|
||||
image_jutvision=524580,
|
||||
image_naplps=524581,
|
||||
image_pict=524582,
|
||||
image_png=524583,
|
||||
image_svg=524584 | 0x80000000,
|
||||
image_svg_xml=524585 | 0x80000000,
|
||||
image_tiff=524586,
|
||||
image_vnd_adobe_photoshop=524587 | 0x80000000,
|
||||
image_vnd_djvu=524588 | 0x80000000,
|
||||
image_vnd_fpx=524589,
|
||||
image_vnd_microsoft_icon=524590,
|
||||
image_vnd_rn_realflash=524591,
|
||||
image_vnd_rn_realpix=524592,
|
||||
image_vnd_wap_wbmp=524593,
|
||||
image_vnd_xiff=524594,
|
||||
image_webp=524595,
|
||||
image_wmf=524596,
|
||||
image_x_3ds=524597,
|
||||
image_x_adobe_dng=524598 | 0x00800000,
|
||||
image_x_award_bioslogo=524599,
|
||||
image_x_canon_cr2=524600 | 0x00800000,
|
||||
image_x_canon_crw=524601 | 0x00800000,
|
||||
image_x_cmu_raster=524602,
|
||||
image_x_cur=524603,
|
||||
image_x_dcraw=524604 | 0x00800000,
|
||||
image_x_dwg=524605,
|
||||
image_x_eps=524606,
|
||||
image_x_epson_erf=524607 | 0x00800000,
|
||||
image_x_exr=524608,
|
||||
image_x_fuji_raf=524609 | 0x00800000,
|
||||
image_x_gem=524610,
|
||||
image_x_icns=524611,
|
||||
image_x_icon=524612 | 0x80000000,
|
||||
image_x_jg=524613,
|
||||
image_x_jps=524614,
|
||||
image_x_kodak_dcr=524615 | 0x00800000,
|
||||
image_x_kodak_k25=524616 | 0x00800000,
|
||||
image_x_kodak_kdc=524617 | 0x00800000,
|
||||
image_x_minolta_mrw=524618 | 0x00800000,
|
||||
image_x_ms_bmp=524619,
|
||||
image_x_niff=524620,
|
||||
image_x_nikon_nef=524621 | 0x00800000,
|
||||
image_x_olympus_orf=524622 | 0x00800000,
|
||||
image_x_panasonic_raw=524623 | 0x00800000,
|
||||
image_x_pcx=524624,
|
||||
image_x_pentax_pef=524625 | 0x00800000,
|
||||
image_x_pict=524626,
|
||||
image_x_portable_bitmap=524627,
|
||||
image_x_portable_graymap=524628,
|
||||
image_x_portable_pixmap=524629,
|
||||
image_x_quicktime=524630,
|
||||
image_x_rgb=524631,
|
||||
image_x_sigma_x3f=524632 | 0x00800000,
|
||||
image_x_sony_arw=524633 | 0x00800000,
|
||||
image_x_sony_sr2=524634 | 0x00800000,
|
||||
image_x_sony_srf=524635 | 0x00800000,
|
||||
image_x_tga=524636,
|
||||
image_x_tiff=524637,
|
||||
image_x_win_bitmap=524638,
|
||||
image_x_xcf=524639 | 0x80000000,
|
||||
image_x_xpixmap=524640 | 0x80000000,
|
||||
image_x_xwindowdump=524641,
|
||||
message_news=196962,
|
||||
message_rfc822=196963,
|
||||
model_vnd_dwf=65892,
|
||||
model_vnd_gdl=65893,
|
||||
model_vnd_gs_gdl=65894,
|
||||
model_vrml=65895,
|
||||
model_x_pov=65896,
|
||||
application_x_envoy=655481,
|
||||
application_x_esrehber=655482,
|
||||
application_x_excel=655483,
|
||||
application_x_executable=655484,
|
||||
application_x_font_gdos=655485,
|
||||
application_x_font_pf2=655486,
|
||||
application_x_font_pfm=655487,
|
||||
application_x_font_sfn=655488,
|
||||
application_x_font_ttf=655489 | 0x20000000,
|
||||
application_x_fptapplication_x_dbt=655490,
|
||||
application_x_freelance=655491,
|
||||
application_x_gamecube_rom=655492,
|
||||
application_x_gdbm=655493,
|
||||
application_x_gettext_translation=655494,
|
||||
application_x_git=655495,
|
||||
application_x_gsp=655496,
|
||||
application_x_gss=655497,
|
||||
application_x_gtar=655498,
|
||||
application_x_gzip=655499,
|
||||
application_x_hdf=655500,
|
||||
application_x_helpfile=655501,
|
||||
application_x_httpd_imap=655502,
|
||||
application_x_ima=655503,
|
||||
application_x_innosetup=655504,
|
||||
application_x_internett_signup=655505,
|
||||
application_x_inventor=655506,
|
||||
application_x_ip2=655507,
|
||||
application_x_java_applet=655508,
|
||||
application_x_java_commerce=655509,
|
||||
application_x_java_image=655510,
|
||||
application_x_java_jmod=655511,
|
||||
application_x_java_keystore=655512,
|
||||
application_x_kdelnk=655513,
|
||||
application_x_koan=655514,
|
||||
application_x_latex=655515,
|
||||
application_x_livescreen=655516,
|
||||
application_x_lotus=655517,
|
||||
application_x_lz4=655518 | 0x08000000,
|
||||
application_x_lz4_json=655519,
|
||||
application_x_lzh=655520,
|
||||
application_x_lzh_compressed=655521,
|
||||
application_x_lzip=655522 | 0x08000000,
|
||||
application_x_lzma=655523 | 0x08000000,
|
||||
application_x_lzop=655524 | 0x08000000,
|
||||
application_x_lzx=655525,
|
||||
application_x_mach_binary=655526,
|
||||
application_x_mach_executable=655527,
|
||||
application_x_magic_cap_package_1_0=655528,
|
||||
application_x_mathcad=655529,
|
||||
application_x_maxis_dbpf=655530,
|
||||
application_x_meme=655531,
|
||||
application_x_midi=655532,
|
||||
application_x_mif=655533,
|
||||
application_x_mix_transfer=655534,
|
||||
application_x_mobipocket_ebook=655535 | 0x02000000,
|
||||
application_x_ms_compress_szdd=655536,
|
||||
application_x_ms_pdb=655537,
|
||||
application_x_ms_reader=655538,
|
||||
application_x_msaccess=655539,
|
||||
application_x_n64_rom=655540,
|
||||
application_x_navi_animation=655541,
|
||||
application_x_navidoc=655542,
|
||||
application_x_navimap=655543,
|
||||
application_x_navistyle=655544,
|
||||
application_x_nes_rom=655545,
|
||||
application_x_netcdf=655546,
|
||||
application_x_newton_compatible_pkg=655547,
|
||||
application_x_nintendo_ds_rom=655548,
|
||||
application_x_object=655549,
|
||||
application_x_omc=655550,
|
||||
application_x_omcdatamaker=655551,
|
||||
application_x_omcregerator=655552,
|
||||
application_x_pagemaker=655553,
|
||||
application_x_pcl=655554,
|
||||
application_x_pgp_keyring=655555,
|
||||
application_x_pixclscript=655556,
|
||||
application_x_pkcs7_certreqresp=655557,
|
||||
application_x_pkcs7_signature=655558,
|
||||
application_x_project=655559,
|
||||
application_x_qpro=655560,
|
||||
application_x_rar=655561 | 0x10000000,
|
||||
application_x_rpm=655562,
|
||||
application_x_sdp=655563,
|
||||
application_x_sea=655564,
|
||||
application_x_seelogo=655565,
|
||||
application_x_setupscript=655566,
|
||||
application_x_shar=655567,
|
||||
application_x_sharedlib=655568,
|
||||
application_x_shockwave_flash=655569,
|
||||
application_x_snappy_framed=655570,
|
||||
application_x_sprite=655571,
|
||||
application_x_sqlite3=655572,
|
||||
application_x_stargallery_thm=655573,
|
||||
application_x_stuffit=655574,
|
||||
application_x_sv4cpio=655575,
|
||||
application_x_sv4crc=655576,
|
||||
application_x_tar=655577 | 0x10000000,
|
||||
application_x_tbook=655578,
|
||||
application_x_terminfo=655579,
|
||||
application_x_terminfo2=655580,
|
||||
application_x_tex_tfm=655581,
|
||||
application_x_texinfo=655582,
|
||||
application_x_ustar=655583,
|
||||
application_x_visio=655584,
|
||||
application_x_vnd_audioexplosion_mzz=655585,
|
||||
application_x_vnd_ls_xpix=655586,
|
||||
application_x_vrml=655587,
|
||||
application_x_wais_source=655588,
|
||||
application_x_wine_extension_ini=655589,
|
||||
application_x_wintalk=655590,
|
||||
application_x_world=655591,
|
||||
application_x_wri=655592,
|
||||
application_x_x509_ca_cert=655593,
|
||||
application_x_xz=655594 | 0x08000000,
|
||||
application_x_zip=655595,
|
||||
application_x_zstd=655596 | 0x08000000,
|
||||
application_x_zstd_dictionary=655597,
|
||||
application_xml=655598,
|
||||
application_zip=655599 | 0x10000000,
|
||||
application_zlib=655600,
|
||||
audio_basic=458993 | 0x80000000,
|
||||
audio_it=458994,
|
||||
audio_make=458995,
|
||||
audio_mid=458996,
|
||||
audio_midi=458997,
|
||||
audio_mp4=458998,
|
||||
audio_mpeg=458999,
|
||||
audio_ogg=459000,
|
||||
audio_s3m=459001,
|
||||
audio_tsp_audio=459002,
|
||||
audio_tsplayer=459003,
|
||||
audio_vnd_qcelp=459004,
|
||||
audio_voxware=459005,
|
||||
audio_x_aiff=459006,
|
||||
audio_x_flac=459007,
|
||||
audio_x_gsm=459008,
|
||||
audio_x_hx_aac_adts=459009,
|
||||
audio_x_jam=459010,
|
||||
audio_x_liveaudio=459011,
|
||||
audio_x_m4a=459012,
|
||||
audio_x_midi=459013,
|
||||
audio_x_mod=459014,
|
||||
audio_x_mp4a_latm=459015,
|
||||
audio_x_mpeg_3=459016,
|
||||
audio_x_mpequrl=459017,
|
||||
audio_x_nspaudio=459018,
|
||||
audio_x_pn_realaudio=459019,
|
||||
audio_x_psid=459020,
|
||||
audio_x_realaudio=459021,
|
||||
audio_x_s3m=459022,
|
||||
audio_x_twinvq=459023,
|
||||
audio_x_twinvq_plugin=459024,
|
||||
audio_x_voc=459025,
|
||||
audio_x_wav=459026,
|
||||
audio_x_xbox_executable=459027 | 0x80000000,
|
||||
audio_x_xbox360_executable=459028 | 0x80000000,
|
||||
audio_xm=459029,
|
||||
font_otf=327958 | 0x20000000,
|
||||
font_sfnt=327959 | 0x20000000,
|
||||
font_woff=327960 | 0x20000000,
|
||||
font_woff2=327961 | 0x20000000,
|
||||
image_bmp=524570,
|
||||
image_cmu_raster=524571,
|
||||
image_fif=524572,
|
||||
image_florian=524573,
|
||||
image_g3fax=524574,
|
||||
image_gif=524575,
|
||||
image_heic=524576,
|
||||
image_ief=524577,
|
||||
image_jpeg=524578,
|
||||
image_jutvision=524579,
|
||||
image_naplps=524580,
|
||||
image_pict=524581,
|
||||
image_png=524582,
|
||||
image_svg=524583 | 0x80000000,
|
||||
image_svg_xml=524584 | 0x80000000,
|
||||
image_tiff=524585,
|
||||
image_vnd_adobe_photoshop=524586 | 0x80000000,
|
||||
image_vnd_djvu=524587 | 0x80000000,
|
||||
image_vnd_fpx=524588,
|
||||
image_vnd_microsoft_icon=524589,
|
||||
image_vnd_rn_realflash=524590,
|
||||
image_vnd_rn_realpix=524591,
|
||||
image_vnd_wap_wbmp=524592,
|
||||
image_vnd_xiff=524593,
|
||||
image_webp=524594,
|
||||
image_wmf=524595,
|
||||
image_x_3ds=524596,
|
||||
image_x_adobe_dng=524597 | 0x00800000,
|
||||
image_x_award_bioslogo=524598,
|
||||
image_x_canon_cr2=524599 | 0x00800000,
|
||||
image_x_canon_crw=524600 | 0x00800000,
|
||||
image_x_cmu_raster=524601,
|
||||
image_x_cur=524602,
|
||||
image_x_dcraw=524603 | 0x00800000,
|
||||
image_x_dwg=524604,
|
||||
image_x_eps=524605,
|
||||
image_x_epson_erf=524606 | 0x00800000,
|
||||
image_x_exr=524607,
|
||||
image_x_fuji_raf=524608 | 0x00800000,
|
||||
image_x_gem=524609,
|
||||
image_x_icns=524610,
|
||||
image_x_icon=524611 | 0x80000000,
|
||||
image_x_jg=524612,
|
||||
image_x_jps=524613,
|
||||
image_x_kodak_dcr=524614 | 0x00800000,
|
||||
image_x_kodak_k25=524615 | 0x00800000,
|
||||
image_x_kodak_kdc=524616 | 0x00800000,
|
||||
image_x_minolta_mrw=524617 | 0x00800000,
|
||||
image_x_ms_bmp=524618,
|
||||
image_x_niff=524619,
|
||||
image_x_nikon_nef=524620 | 0x00800000,
|
||||
image_x_olympus_orf=524621 | 0x00800000,
|
||||
image_x_panasonic_raw=524622 | 0x00800000,
|
||||
image_x_pcx=524623,
|
||||
image_x_pentax_pef=524624 | 0x00800000,
|
||||
image_x_pict=524625,
|
||||
image_x_portable_bitmap=524626,
|
||||
image_x_portable_graymap=524627,
|
||||
image_x_portable_pixmap=524628,
|
||||
image_x_quicktime=524629,
|
||||
image_x_rgb=524630,
|
||||
image_x_sigma_x3f=524631 | 0x00800000,
|
||||
image_x_sony_arw=524632 | 0x00800000,
|
||||
image_x_sony_sr2=524633 | 0x00800000,
|
||||
image_x_sony_srf=524634 | 0x00800000,
|
||||
image_x_tga=524635,
|
||||
image_x_tiff=524636,
|
||||
image_x_win_bitmap=524637,
|
||||
image_x_xcf=524638 | 0x80000000,
|
||||
image_x_xpixmap=524639 | 0x80000000,
|
||||
image_x_xwindowdump=524640,
|
||||
message_news=196961,
|
||||
message_rfc822=196962,
|
||||
model_vnd_dwf=65891,
|
||||
model_vnd_gdl=65892,
|
||||
model_vnd_gs_gdl=65893,
|
||||
model_vrml=65894,
|
||||
model_x_pov=65895,
|
||||
sist2_sidecar=2,
|
||||
text_PGP=590185,
|
||||
text_asp=590186,
|
||||
text_css=590187,
|
||||
text_html=590188 | 0x01000000,
|
||||
text_javascript=590189,
|
||||
text_mcf=590190,
|
||||
text_pascal=590191,
|
||||
text_plain=590192,
|
||||
text_richtext=590193,
|
||||
text_rtf=590194,
|
||||
text_scriplet=590195,
|
||||
text_tab_separated_values=590196,
|
||||
text_troff=590197,
|
||||
text_uri_list=590198,
|
||||
text_vnd_abc=590199,
|
||||
text_vnd_fmi_flexstor=590200,
|
||||
text_vnd_wap_wml=590201,
|
||||
text_vnd_wap_wmlscript=590202,
|
||||
text_webviewhtml=590203,
|
||||
text_x_Algol68=590204,
|
||||
text_x_asm=590205,
|
||||
text_x_audiosoft_intra=590206,
|
||||
text_x_awk=590207,
|
||||
text_x_bcpl=590208,
|
||||
text_x_c=590209,
|
||||
text_x_c__=590210,
|
||||
text_x_component=590211,
|
||||
text_x_diff=590212,
|
||||
text_x_fortran=590213,
|
||||
text_x_java=590214,
|
||||
text_x_la_asf=590215,
|
||||
text_x_lisp=590216,
|
||||
text_x_m=590217,
|
||||
text_x_m4=590218,
|
||||
text_x_makefile=590219,
|
||||
text_x_ms_regedit=590220,
|
||||
text_x_msdos_batch=590221,
|
||||
text_x_objective_c=590222,
|
||||
text_x_pascal=590223,
|
||||
text_x_perl=590224,
|
||||
text_x_php=590225,
|
||||
text_x_po=590226,
|
||||
text_x_python=590227,
|
||||
text_x_ruby=590228,
|
||||
text_x_sass=590229,
|
||||
text_x_scss=590230,
|
||||
text_x_server_parsed_html=590231,
|
||||
text_x_setext=590232,
|
||||
text_x_sgml=590233 | 0x01000000,
|
||||
text_x_shellscript=590234,
|
||||
text_x_speech=590235,
|
||||
text_x_tcl=590236,
|
||||
text_x_tex=590237,
|
||||
text_x_uil=590238,
|
||||
text_x_uuencode=590239,
|
||||
text_x_vcalendar=590240,
|
||||
text_x_vcard=590241,
|
||||
text_xml=590242 | 0x01000000,
|
||||
video_MP2T=393635,
|
||||
video_animaflex=393636,
|
||||
video_avi=393637,
|
||||
video_avs_video=393638,
|
||||
video_mp4=393639,
|
||||
video_mpeg=393640,
|
||||
video_quicktime=393641,
|
||||
video_vdo=393642,
|
||||
video_vivo=393643,
|
||||
video_vnd_rn_realvideo=393644,
|
||||
video_vosaic=393645,
|
||||
video_webm=393646,
|
||||
video_x_amt_demorun=393647,
|
||||
video_x_amt_showrun=393648,
|
||||
video_x_atomic3d_feature=393649,
|
||||
video_x_dl=393650,
|
||||
video_x_dv=393651,
|
||||
video_x_fli=393652,
|
||||
video_x_flv=393653,
|
||||
video_x_isvideo=393654,
|
||||
video_x_jng=393655 | 0x80000000,
|
||||
video_x_m4v=393656,
|
||||
video_x_matroska=393657,
|
||||
video_x_mng=393658,
|
||||
video_x_motion_jpeg=393659,
|
||||
video_x_ms_asf=393660,
|
||||
video_x_msvideo=393661,
|
||||
video_x_qtc=393662,
|
||||
video_x_sgi_movie=393663,
|
||||
x_epoc_x_sisx_app=721344,
|
||||
text_PGP=590184,
|
||||
text_asp=590185,
|
||||
text_css=590186,
|
||||
text_html=590187 | 0x01000000,
|
||||
text_javascript=590188,
|
||||
text_mcf=590189,
|
||||
text_pascal=590190,
|
||||
text_plain=590191,
|
||||
text_richtext=590192,
|
||||
text_rtf=590193,
|
||||
text_scriplet=590194,
|
||||
text_tab_separated_values=590195,
|
||||
text_troff=590196,
|
||||
text_uri_list=590197,
|
||||
text_vnd_abc=590198,
|
||||
text_vnd_fmi_flexstor=590199,
|
||||
text_vnd_wap_wml=590200,
|
||||
text_vnd_wap_wmlscript=590201,
|
||||
text_webviewhtml=590202,
|
||||
text_x_Algol68=590203,
|
||||
text_x_asm=590204,
|
||||
text_x_audiosoft_intra=590205,
|
||||
text_x_awk=590206,
|
||||
text_x_bcpl=590207,
|
||||
text_x_c=590208,
|
||||
text_x_c__=590209,
|
||||
text_x_component=590210,
|
||||
text_x_diff=590211,
|
||||
text_x_fortran=590212,
|
||||
text_x_java=590213,
|
||||
text_x_la_asf=590214,
|
||||
text_x_lisp=590215,
|
||||
text_x_m=590216,
|
||||
text_x_m4=590217,
|
||||
text_x_makefile=590218,
|
||||
text_x_ms_regedit=590219,
|
||||
text_x_msdos_batch=590220,
|
||||
text_x_objective_c=590221,
|
||||
text_x_pascal=590222,
|
||||
text_x_perl=590223,
|
||||
text_x_php=590224,
|
||||
text_x_po=590225,
|
||||
text_x_python=590226,
|
||||
text_x_ruby=590227,
|
||||
text_x_sass=590228,
|
||||
text_x_scss=590229,
|
||||
text_x_server_parsed_html=590230,
|
||||
text_x_setext=590231,
|
||||
text_x_sgml=590232 | 0x01000000,
|
||||
text_x_shellscript=590233,
|
||||
text_x_speech=590234,
|
||||
text_x_tcl=590235,
|
||||
text_x_tex=590236,
|
||||
text_x_uil=590237,
|
||||
text_x_uuencode=590238,
|
||||
text_x_vcalendar=590239,
|
||||
text_x_vcard=590240,
|
||||
text_xml=590241 | 0x01000000,
|
||||
video_MP2T=393634,
|
||||
video_animaflex=393635,
|
||||
video_avi=393636,
|
||||
video_avs_video=393637,
|
||||
video_mp4=393638,
|
||||
video_mpeg=393639,
|
||||
video_quicktime=393640,
|
||||
video_vdo=393641,
|
||||
video_vivo=393642,
|
||||
video_vnd_rn_realvideo=393643,
|
||||
video_vosaic=393644,
|
||||
video_webm=393645,
|
||||
video_x_amt_demorun=393646,
|
||||
video_x_amt_showrun=393647,
|
||||
video_x_atomic3d_feature=393648,
|
||||
video_x_dl=393649,
|
||||
video_x_dv=393650,
|
||||
video_x_fli=393651,
|
||||
video_x_flv=393652,
|
||||
video_x_isvideo=393653,
|
||||
video_x_jng=393654 | 0x80000000,
|
||||
video_x_m4v=393655,
|
||||
video_x_matroska=393656,
|
||||
video_x_mng=393657,
|
||||
video_x_motion_jpeg=393658,
|
||||
video_x_ms_asf=393659,
|
||||
video_x_msvideo=393660,
|
||||
video_x_qtc=393661,
|
||||
video_x_sgi_movie=393662,
|
||||
x_epoc_x_sisx_app=721343,
|
||||
};
|
||||
char *mime_get_mime_text(unsigned int mime_id) {switch (mime_id) {
|
||||
case application_arj: return "application/arj";
|
||||
@@ -482,6 +481,7 @@ case application_java_archive: return "application/java-archive";
|
||||
case application_java: return "application/java";
|
||||
case application_javascript: return "application/javascript";
|
||||
case application_json: return "application/json";
|
||||
case application_ndjson: return "application/ndjson";
|
||||
case application_marc: return "application/marc";
|
||||
case application_mbedlet: return "application/mbedlet";
|
||||
case application_mime: return "application/mime";
|
||||
@@ -537,8 +537,6 @@ case application_vocaltec_media_desc: return "application/vocaltec-media-desc";
|
||||
case application_vocaltec_media_file: return "application/vocaltec-media-file";
|
||||
case application_warc: return "application/warc";
|
||||
case application_winhelp: return "application/winhelp";
|
||||
case application_wordperfect6_0: return "application/wordperfect6.0";
|
||||
case application_wordperfect6_1: return "application/wordperfect6.1";
|
||||
case application_wordperfect: return "application/wordperfect";
|
||||
case application_x_123: return "application/x-123";
|
||||
case application_x_7z_compressed: return "application/x-7z-compressed";
|
||||
@@ -934,6 +932,8 @@ g_hash_table_insert(ext_table, "inf", (gpointer)application_inf);
|
||||
g_hash_table_insert(ext_table, "jar", (gpointer)application_java_archive);
|
||||
g_hash_table_insert(ext_table, "class", (gpointer)application_java);
|
||||
g_hash_table_insert(ext_table, "json", (gpointer)application_json);
|
||||
g_hash_table_insert(ext_table, "jsonl", (gpointer)application_ndjson);
|
||||
g_hash_table_insert(ext_table, "ndjson", (gpointer)application_ndjson);
|
||||
g_hash_table_insert(ext_table, "mrc", (gpointer)application_marc);
|
||||
g_hash_table_insert(ext_table, "mbd", (gpointer)application_mbedlet);
|
||||
g_hash_table_insert(ext_table, "aps", (gpointer)application_mime);
|
||||
@@ -1008,12 +1008,12 @@ g_hash_table_insert(ext_table, "vmd", (gpointer)application_vocaltec_media_desc)
|
||||
g_hash_table_insert(ext_table, "vmf", (gpointer)application_vocaltec_media_file);
|
||||
g_hash_table_insert(ext_table, "warc", (gpointer)application_warc);
|
||||
g_hash_table_insert(ext_table, "hlp", (gpointer)application_winhelp);
|
||||
g_hash_table_insert(ext_table, "w60", (gpointer)application_wordperfect6_0);
|
||||
g_hash_table_insert(ext_table, "w61", (gpointer)application_wordperfect6_1);
|
||||
g_hash_table_insert(ext_table, "wp", (gpointer)application_wordperfect);
|
||||
g_hash_table_insert(ext_table, "wp5", (gpointer)application_wordperfect);
|
||||
g_hash_table_insert(ext_table, "wp6", (gpointer)application_wordperfect);
|
||||
g_hash_table_insert(ext_table, "wpd", (gpointer)application_wordperfect);
|
||||
g_hash_table_insert(ext_table, "w60", (gpointer)application_wordperfect);
|
||||
g_hash_table_insert(ext_table, "w61", (gpointer)application_wordperfect);
|
||||
g_hash_table_insert(ext_table, "wk1", (gpointer)application_x_123);
|
||||
g_hash_table_insert(ext_table, "7z", (gpointer)application_x_7z_compressed);
|
||||
g_hash_table_insert(ext_table, "aim", (gpointer)application_x_aim);
|
||||
@@ -1478,6 +1478,7 @@ g_hash_table_insert(mime_table, "application/java-archive", (gpointer)applicatio
|
||||
g_hash_table_insert(mime_table, "application/java", (gpointer)application_java);
|
||||
g_hash_table_insert(mime_table, "application/javascript", (gpointer)application_javascript);
|
||||
g_hash_table_insert(mime_table, "application/json", (gpointer)application_json);
|
||||
g_hash_table_insert(mime_table, "application/ndjson", (gpointer)application_ndjson);
|
||||
g_hash_table_insert(mime_table, "application/marc", (gpointer)application_marc);
|
||||
g_hash_table_insert(mime_table, "application/mbedlet", (gpointer)application_mbedlet);
|
||||
g_hash_table_insert(mime_table, "application/mime", (gpointer)application_mime);
|
||||
@@ -1533,8 +1534,6 @@ g_hash_table_insert(mime_table, "application/vocaltec-media-desc", (gpointer)app
|
||||
g_hash_table_insert(mime_table, "application/vocaltec-media-file", (gpointer)application_vocaltec_media_file);
|
||||
g_hash_table_insert(mime_table, "application/warc", (gpointer)application_warc);
|
||||
g_hash_table_insert(mime_table, "application/winhelp", (gpointer)application_winhelp);
|
||||
g_hash_table_insert(mime_table, "application/wordperfect6.0", (gpointer)application_wordperfect6_0);
|
||||
g_hash_table_insert(mime_table, "application/wordperfect6.1", (gpointer)application_wordperfect6_1);
|
||||
g_hash_table_insert(mime_table, "application/wordperfect", (gpointer)application_wordperfect);
|
||||
g_hash_table_insert(mime_table, "application/x-123", (gpointer)application_x_123);
|
||||
g_hash_table_insert(mime_table, "application/x-7z-compressed", (gpointer)application_x_7z_compressed);
|
||||
|
||||
@@ -9,26 +9,35 @@
|
||||
#include <magic.h>
|
||||
|
||||
|
||||
#define MIN_VIDEO_SIZE 1024 * 64
|
||||
#define MIN_IMAGE_SIZE 1024 * 2
|
||||
#define MIN_VIDEO_SIZE (1024 * 64)
|
||||
#define MIN_IMAGE_SIZE (512)
|
||||
|
||||
int fs_read(struct vfile *f, void *buf, size_t size) {
|
||||
|
||||
if (f->fd == -1) {
|
||||
SHA1_Init(&f->sha1_ctx);
|
||||
|
||||
f->fd = open(f->filepath, O_RDONLY);
|
||||
if (f->fd == -1) {
|
||||
LOG_ERRORF(f->filepath, "open(): [%d] %s", errno, strerror(errno))
|
||||
return -1;
|
||||
}
|
||||
}
|
||||
|
||||
return read(f->fd, buf, size);
|
||||
int ret = (int) read(f->fd, buf, size);
|
||||
|
||||
if (ret != 0 && f->calculate_checksum) {
|
||||
f->has_checksum = TRUE;
|
||||
safe_sha1_update(&f->sha1_ctx, (unsigned char *) buf, ret);
|
||||
}
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
#define CLOSE_FILE(f) if ((f).close != NULL) {(f).close(&(f));};
|
||||
|
||||
void fs_close(struct vfile *f) {
|
||||
if (f->fd != -1) {
|
||||
SHA1_Final(f->sha1_digest, &f->sha1_ctx);
|
||||
close(f->fd);
|
||||
}
|
||||
}
|
||||
@@ -66,11 +75,13 @@ void parse(void *arg) {
|
||||
doc->meta_tail = NULL;
|
||||
doc->mime = 0;
|
||||
doc->size = job->vfile.info.st_size;
|
||||
doc->mtime = job->vfile.info.st_mtim.tv_sec;
|
||||
doc->mtime = (int) job->vfile.info.st_mtim.tv_sec;
|
||||
|
||||
int inc_ts = incremental_get(ScanCtx.original_table, doc->path_md5);
|
||||
if (inc_ts != 0 && inc_ts == job->vfile.info.st_mtim.tv_sec) {
|
||||
pthread_mutex_lock(&ScanCtx.copy_table_mu);
|
||||
incremental_mark_file_for_copy(ScanCtx.copy_table, doc->path_md5);
|
||||
pthread_mutex_unlock(&ScanCtx.copy_table_mu);
|
||||
|
||||
pthread_mutex_lock(&ScanCtx.dbg_file_counts_mu);
|
||||
ScanCtx.dbg_skipped_files_count += 1;
|
||||
@@ -93,18 +104,17 @@ void parse(void *arg) {
|
||||
doc->mime = mime_get_mime_by_ext(ScanCtx.ext_table, job->filepath + job->ext);
|
||||
}
|
||||
|
||||
int bytes_read = 0;
|
||||
|
||||
if (doc->mime == 0 && !ScanCtx.fast) {
|
||||
|
||||
// Get mime type with libmagic
|
||||
if (!job->vfile.is_fs_file) {
|
||||
if (job->vfile.read_rewindable == NULL) {
|
||||
LOG_WARNING(job->filepath,
|
||||
"Guessing mime type with libmagic inside archive files is not currently supported");
|
||||
"File does not support rewindable reads, cannot guess Media type");
|
||||
goto abort;
|
||||
}
|
||||
|
||||
bytes_read = job->vfile.read(&job->vfile, buf, MAGIC_BUF_SIZE);
|
||||
int bytes_read = job->vfile.read_rewindable(&job->vfile, buf, MAGIC_BUF_SIZE);
|
||||
if (bytes_read < 0) {
|
||||
|
||||
if (job->vfile.is_fs_file) {
|
||||
@@ -135,7 +145,9 @@ void parse(void *arg) {
|
||||
}
|
||||
}
|
||||
|
||||
job->vfile.reset(&job->vfile);
|
||||
if (job->vfile.reset != NULL) {
|
||||
job->vfile.reset(&job->vfile);
|
||||
}
|
||||
|
||||
magic_close(magic);
|
||||
}
|
||||
@@ -149,7 +161,7 @@ void parse(void *arg) {
|
||||
} else if ((mmime == MimeVideo && doc->size >= MIN_VIDEO_SIZE) ||
|
||||
(mmime == MimeImage && doc->size >= MIN_IMAGE_SIZE) || mmime == MimeAudio) {
|
||||
|
||||
parse_media(&ScanCtx.media_ctx, &job->vfile, doc);
|
||||
parse_media(&ScanCtx.media_ctx, &job->vfile, doc, mime_get_mime_text(doc->mime));
|
||||
|
||||
} else if (IS_PDF(doc->mime)) {
|
||||
parse_ebook(&ScanCtx.ebook_ctx, &job->vfile, mime_get_mime_text(doc->mime), doc);
|
||||
@@ -169,7 +181,7 @@ void parse(void *arg) {
|
||||
IS_ARC(doc->mime) ||
|
||||
(IS_ARC_FILTER(doc->mime) && should_parse_filtered_file(doc->filepath, doc->ext))
|
||||
)) {
|
||||
parse_archive(&ScanCtx.arc_ctx, &job->vfile, doc);
|
||||
parse_archive(&ScanCtx.arc_ctx, &job->vfile, doc, ScanCtx.exclude, ScanCtx.exclude_extra);
|
||||
} else if ((ScanCtx.ooxml_ctx.content_size > 0 || ScanCtx.media_ctx.tn_size > 0) && IS_DOC(doc->mime)) {
|
||||
parse_ooxml(&ScanCtx.ooxml_ctx, &job->vfile, doc);
|
||||
} else if (is_cbr(&ScanCtx.comic_ctx, doc->mime) || is_cbz(&ScanCtx.comic_ctx, doc->mime)) {
|
||||
@@ -179,9 +191,15 @@ void parse(void *arg) {
|
||||
} else if (doc->mime == MIME_SIST2_SIDECAR) {
|
||||
parse_sidecar(&job->vfile, doc);
|
||||
CLOSE_FILE(job->vfile)
|
||||
free(doc->filepath);
|
||||
free(doc);
|
||||
return;
|
||||
} else if (is_msdoc(&ScanCtx.msdoc_ctx, doc->mime)) {
|
||||
parse_msdoc(&ScanCtx.msdoc_ctx, &job->vfile, doc);
|
||||
} else if (is_json(&ScanCtx.json_ctx, doc->mime)) {
|
||||
parse_json(&ScanCtx.json_ctx, &job->vfile, doc);
|
||||
} else if (is_ndjson(&ScanCtx.json_ctx, doc->mime)) {
|
||||
parse_ndjson(&ScanCtx.json_ctx, &job->vfile, doc);
|
||||
}
|
||||
|
||||
abort:
|
||||
@@ -198,9 +216,15 @@ void parse(void *arg) {
|
||||
doc->has_parent = FALSE;
|
||||
}
|
||||
|
||||
write_document(doc);
|
||||
|
||||
CLOSE_FILE(job->vfile)
|
||||
|
||||
if (job->vfile.has_checksum) {
|
||||
char sha1_digest_str[SHA1_STR_LENGTH];
|
||||
buf2hex((unsigned char *) job->vfile.sha1_digest, SHA1_DIGEST_LENGTH, (char *) sha1_digest_str);
|
||||
APPEND_STR_META(doc, MetaChecksum, (const char *) sha1_digest_str);
|
||||
}
|
||||
|
||||
write_document(doc);
|
||||
}
|
||||
|
||||
void cleanup_parse() {
|
||||
|
||||
@@ -3,7 +3,7 @@
|
||||
|
||||
#include "../sist.h"
|
||||
|
||||
#define MAGIC_BUF_SIZE 4096 * 6
|
||||
#define MAGIC_BUF_SIZE (4096 * 6)
|
||||
|
||||
int fs_read(struct vfile *f, void *buf, size_t size);
|
||||
void fs_close(struct vfile *f);
|
||||
|
||||
@@ -27,7 +27,10 @@ void parse_sidecar(vfile_t *vfile, document_t *doc) {
|
||||
MD5((unsigned char *) vfile->filepath + ScanCtx.index.desc.root_len, doc->ext - 1 - ScanCtx.index.desc.root_len,
|
||||
path_md5);
|
||||
|
||||
store_write(ScanCtx.index.meta_store, (char *) path_md5, sizeof(path_md5), json_str, strlen(json_str) + 1);
|
||||
char path_md5_str[MD5_STR_LENGTH];
|
||||
buf2hex(path_md5, MD5_DIGEST_LENGTH, path_md5_str);
|
||||
|
||||
store_write(ScanCtx.index.meta_store, path_md5_str, MD5_STR_LENGTH, json_str, strlen(json_str) + 1);
|
||||
|
||||
cJSON_Delete(json);
|
||||
free(json_str);
|
||||
|
||||
@@ -1,6 +1,8 @@
|
||||
#ifndef SIST_H
|
||||
#define SIST_H
|
||||
|
||||
#define _GNU_SOURCE
|
||||
|
||||
#ifndef FALSE
|
||||
#define FALSE (0)
|
||||
#define BOOL int
|
||||
@@ -26,6 +28,8 @@
|
||||
#define UNUSED(x) __attribute__((__unused__)) x
|
||||
|
||||
#define MD5_STR_LENGTH 33
|
||||
#define SHA1_STR_LENGTH 41
|
||||
#define SHA1_DIGEST_LENGTH 20
|
||||
|
||||
#include "util.h"
|
||||
#include "log.h"
|
||||
@@ -49,7 +53,7 @@
|
||||
#include <ctype.h>
|
||||
#include "git_hash.h"
|
||||
|
||||
#define VERSION "2.11.0"
|
||||
#define VERSION "2.11.6"
|
||||
static const char *const Version = VERSION;
|
||||
|
||||
#ifndef SIST_PLATFORM
|
||||
|
||||
@@ -177,7 +177,7 @@ static void *tpool_worker(void *arg) {
|
||||
}
|
||||
|
||||
void tpool_wait(tpool_t *pool) {
|
||||
LOG_INFO("tpool.c", "Waiting for worker threads to finish")
|
||||
LOG_DEBUG("tpool.c", "Waiting for worker threads to finish")
|
||||
pthread_mutex_lock(&(pool->work_mutex));
|
||||
while (TRUE) {
|
||||
if (pool->done_cnt < pool->work_cnt) {
|
||||
@@ -191,7 +191,9 @@ void tpool_wait(tpool_t *pool) {
|
||||
}
|
||||
}
|
||||
}
|
||||
progress_bar_print(1.0, ScanCtx.stat_tn_size, ScanCtx.stat_index_size);
|
||||
if (pool->print_progress) {
|
||||
progress_bar_print(1.0, ScanCtx.stat_tn_size, ScanCtx.stat_index_size);
|
||||
}
|
||||
pthread_mutex_unlock(&(pool->work_mutex));
|
||||
|
||||
LOG_INFO("tpool.c", "Worker threads finished")
|
||||
|
||||
26
src/util.c
26
src/util.c
@@ -84,11 +84,13 @@ char *expandpath(const char *path) {
|
||||
return expanded;
|
||||
}
|
||||
|
||||
int PrintingProgressBar = 0;
|
||||
|
||||
void progress_bar_print(double percentage, size_t tn_size, size_t index_size) {
|
||||
|
||||
static int last_val = -1;
|
||||
int val = (int) (percentage * 100);
|
||||
if (last_val == val || val > 100 || index_size < 1024) {
|
||||
if (last_val == val || val > 100) {
|
||||
return;
|
||||
}
|
||||
last_val = val;
|
||||
@@ -114,13 +116,21 @@ void progress_bar_print(double percentage, size_t tn_size, size_t index_size) {
|
||||
index_unit = 'M';
|
||||
}
|
||||
|
||||
printf(
|
||||
"\r%3d%%[%.*s>%*s] TN:%3d%c IDX:%3d%c",
|
||||
val, lpad, PBSTR, rpad, "",
|
||||
(int) tn_size, tn_unit,
|
||||
(int) index_size, index_unit
|
||||
);
|
||||
fflush(stdout);
|
||||
if (tn_size == 0 && index_size == 0) {
|
||||
fprintf(stderr,
|
||||
"\r%3d%%[%.*s>%*s]",
|
||||
val, lpad, PBSTR, rpad, ""
|
||||
);
|
||||
} else {
|
||||
fprintf(stderr,
|
||||
"\r%3d%%[%.*s>%*s] TN:%3d%c IDX:%3d%c",
|
||||
val, lpad, PBSTR, rpad, "",
|
||||
(int) tn_size, tn_unit,
|
||||
(int) index_size, index_unit
|
||||
);
|
||||
}
|
||||
|
||||
PrintingProgressBar = TRUE;
|
||||
}
|
||||
|
||||
GHashTable *incremental_get_table() {
|
||||
|
||||
@@ -19,6 +19,8 @@ char *expandpath(const char *path);
|
||||
|
||||
dyn_buffer_t url_escape(char *str);
|
||||
|
||||
extern int PrintingProgressBar;
|
||||
|
||||
void progress_bar_print(double percentage, size_t tn_size, size_t index_size);
|
||||
|
||||
GHashTable *incremental_get_table();
|
||||
@@ -131,6 +133,9 @@ static int incremental_get_str(GHashTable *table, const char *path_md5) {
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Not thread safe!
|
||||
*/
|
||||
__always_inline
|
||||
static int incremental_mark_file_for_copy(GHashTable *table, const unsigned char path_md5[MD5_DIGEST_LENGTH]) {
|
||||
char *ptr = malloc(MD5_STR_LENGTH);
|
||||
|
||||
@@ -252,12 +252,32 @@ void serve_file_from_disk(cJSON *json, index_t *idx, struct mg_connection *nc, s
|
||||
mg_http_serve_file(nc, hm, full_path, mime, disposition);
|
||||
}
|
||||
|
||||
void cache_es_version() {
|
||||
static int is_cached = FALSE;
|
||||
|
||||
if (is_cached == TRUE) {
|
||||
return;
|
||||
}
|
||||
|
||||
es_version_t *es_version = elastic_get_version(WebCtx.es_url);
|
||||
if (es_version != NULL) {
|
||||
WebCtx.es_version = es_version;
|
||||
is_cached = TRUE;
|
||||
}
|
||||
}
|
||||
|
||||
void index_info(struct mg_connection *nc) {
|
||||
|
||||
cache_es_version();
|
||||
|
||||
cJSON *json = cJSON_CreateObject();
|
||||
cJSON *arr = cJSON_AddArrayToObject(json, "indices");
|
||||
|
||||
cJSON_AddStringToObject(json, "esIndex", WebCtx.es_index);
|
||||
cJSON_AddStringToObject(json, "version", Version);
|
||||
cJSON_AddStringToObject(json, "esVersion", format_es_version(WebCtx.es_version));
|
||||
cJSON_AddBoolToObject(json, "esVersionSupported", IS_SUPPORTED_ES_VERSION(WebCtx.es_version));
|
||||
cJSON_AddBoolToObject(json, "esVersionLegacy", USE_LEGACY_ES_SETTINGS(WebCtx.es_version));
|
||||
cJSON_AddStringToObject(json, "platform", QUOTE(SIST_PLATFORM));
|
||||
cJSON_AddStringToObject(json, "sist2Hash", Sist2CommitHash);
|
||||
cJSON_AddStringToObject(json, "libscanHash", LibScanCommitHash);
|
||||
|
||||
8
src/web/static_generated.c
vendored
8
src/web/static_generated.c
vendored
File diff suppressed because one or more lines are too long
2
third-party/argparse
vendored
2
third-party/argparse
vendored
Submodule third-party/argparse updated: ffd9c23427...225141eb3d
1
third-party/libscan
vendored
1
third-party/libscan
vendored
Submodule third-party/libscan deleted from 22522d7d4a
12
third-party/libscan/.gitignore
vendored
Normal file
12
third-party/libscan/.gitignore
vendored
Normal file
@@ -0,0 +1,12 @@
|
||||
.idea/
|
||||
cmake_install.cmake
|
||||
Makefile
|
||||
libscan.a
|
||||
libscan.so
|
||||
*.cbp
|
||||
CMakeFiles
|
||||
CMakeCache.txt
|
||||
scan_test
|
||||
third-party/ext_*
|
||||
libscan-test-files
|
||||
scan_*_test
|
||||
233
third-party/libscan/CMakeLists.txt
vendored
Normal file
233
third-party/libscan/CMakeLists.txt
vendored
Normal file
@@ -0,0 +1,233 @@
|
||||
cmake_minimum_required(VERSION 3.15)
|
||||
|
||||
project(scan)
|
||||
set(CMAKE_C_STANDARD 11)
|
||||
|
||||
option(BUILD_TESTS "Build tests" on)
|
||||
|
||||
add_subdirectory(third-party/antiword)
|
||||
add_compile_definitions(
|
||||
antiword
|
||||
NDEBUG
|
||||
)
|
||||
|
||||
add_library(
|
||||
scan
|
||||
libscan/util.c libscan/util.h
|
||||
libscan/scan.h
|
||||
libscan/macros.h
|
||||
|
||||
libscan/text/text.c libscan/text/text.h
|
||||
libscan/arc/arc.c libscan/arc/arc.h
|
||||
libscan/ebook/ebook.c libscan/ebook/ebook.h
|
||||
libscan/comic/comic.c libscan/comic/comic.h
|
||||
libscan/ooxml/ooxml.c libscan/ooxml/ooxml.h
|
||||
libscan/media/media.c libscan/media/media.h
|
||||
libscan/font/font.c libscan/font/font.h
|
||||
libscan/msdoc/msdoc.c libscan/msdoc/msdoc.h
|
||||
libscan/json/json.c libscan/json/json.h
|
||||
libscan/wpd/wpd.c libscan/wpd/wpd.h libscan/wpd/libwpd_c_api.h libscan/wpd/libwpd_c_api.cpp
|
||||
|
||||
third-party/utf8.h
|
||||
libscan/mobi/scan_mobi.c libscan/mobi/scan_mobi.h libscan/raw/raw.c libscan/raw/raw.h)
|
||||
set_target_properties(scan PROPERTIES LINKER_LANGUAGE C)
|
||||
|
||||
set(CMAKE_FIND_LIBRARY_SUFFIXES .a .lib .so)
|
||||
|
||||
find_package(cJSON CONFIG REQUIRED)
|
||||
find_package(LibArchive REQUIRED)
|
||||
find_package(BZip2 REQUIRED)
|
||||
find_package(lz4 REQUIRED)
|
||||
|
||||
find_package(Threads REQUIRED)
|
||||
find_package(Tesseract CONFIG REQUIRED)
|
||||
find_package(OpenJPEG CONFIG REQUIRED)
|
||||
find_package(JPEG REQUIRED)
|
||||
find_package(LibXml2 REQUIRED)
|
||||
find_package(LibLZMA REQUIRED)
|
||||
find_package(ZLIB REQUIRED)
|
||||
find_package(unofficial-pcre CONFIG REQUIRED)
|
||||
|
||||
|
||||
find_library(JBIG2DEC_LIB NAMES jbig2decd jbig2dec)
|
||||
find_library(HARFBUZZ_LIB NAMES harfbuzz harfbuzzd)
|
||||
find_library(FREETYPE_LIB NAMES freetype freetyped)
|
||||
find_package(unofficial-brotli CONFIG REQUIRED)
|
||||
find_library(LZO2_LIB NAMES lzo2)
|
||||
|
||||
find_library(RAW_LIB NAMES libraw.a)
|
||||
find_library(MUPDF_LIB NAMES liblibmupdf.a)
|
||||
find_library(CMS_LIB NAMES lcms2)
|
||||
find_library(JAS_LIB NAMES jasper)
|
||||
find_library(GUMBO_LIB NAMES gumbo)
|
||||
find_library(GOMP_LIB NAMES libgomp.a gomp PATHS /usr/lib/gcc/x86_64-linux-gnu/5/ /usr/lib/gcc/x86_64-linux-gnu/9/ /usr/lib/gcc/x86_64-linux-gnu/10/ /usr/lib/gcc/aarch64-linux-gnu/7/ /usr/lib/gcc/aarch64-linux-gnu/9/ /usr/lib/gcc/x86_64-linux-gnu/7/)
|
||||
|
||||
|
||||
target_compile_options(
|
||||
scan
|
||||
PRIVATE
|
||||
-g
|
||||
)
|
||||
|
||||
include(ExternalProject)
|
||||
find_program(MAKE_EXE NAMES gmake nmake make)
|
||||
ExternalProject_Add(
|
||||
libmobi
|
||||
GIT_REPOSITORY https://github.com/simon987/libmobi.git
|
||||
GIT_TAG "public"
|
||||
|
||||
UPDATE_COMMAND ""
|
||||
PATCH_COMMAND ""
|
||||
TEST_COMMAND ""
|
||||
CONFIGURE_COMMAND ./autogen.sh && ./configure
|
||||
INSTALL_COMMAND ""
|
||||
|
||||
PREFIX "third-party/ext_libmobi"
|
||||
SOURCE_DIR "third-party/ext_libmobi/src/libmobi"
|
||||
BINARY_DIR "third-party/ext_libmobi/src/libmobi"
|
||||
|
||||
BUILD_COMMAND ${MAKE_EXE} -j 8 --silent
|
||||
)
|
||||
|
||||
SET(MOBI_LIB_DIR ${CMAKE_CURRENT_BINARY_DIR}/third-party/ext_libmobi/src/libmobi/src/.libs/)
|
||||
SET(MOBI_INCLUDE_DIR ${CMAKE_CURRENT_BINARY_DIR}/third-party/ext_libmobi/src/libmobi/src/)
|
||||
|
||||
if (SIST_DEBUG)
|
||||
SET(FFMPEG_DEBUG "--enable-debug=3" "--disable-optimizations")
|
||||
else()
|
||||
SET(FFMPEG_DEBUG "")
|
||||
endif()
|
||||
|
||||
ExternalProject_Add(
|
||||
ffmpeg
|
||||
GIT_REPOSITORY https://git.ffmpeg.org/ffmpeg.git
|
||||
GIT_TAG "n4.4"
|
||||
|
||||
UPDATE_COMMAND ""
|
||||
PATCH_COMMAND ""
|
||||
TEST_COMMAND ""
|
||||
CONFIGURE_COMMAND ./configure --disable-shared --enable-static --disable-ffmpeg --disable-ffplay
|
||||
--disable-ffprobe --disable-doc --disable-manpages --disable-postproc --disable-avfilter --disable-alsa
|
||||
--disable-lzma --disable-xlib --disable-vdpau --disable-vaapi --disable-sdl2
|
||||
--disable-network ${FFMPEG_DEBUG}
|
||||
INSTALL_COMMAND ""
|
||||
|
||||
PREFIX "third-party/ext_ffmpeg"
|
||||
SOURCE_DIR "third-party/ext_ffmpeg/src/ffmpeg"
|
||||
BINARY_DIR "third-party/ext_ffmpeg/src/ffmpeg"
|
||||
|
||||
BUILD_COMMAND ${MAKE_EXE} -j33 --silent
|
||||
)
|
||||
|
||||
SET(FFMPEG_LIB_DIR ${CMAKE_CURRENT_BINARY_DIR}/third-party/ext_ffmpeg/src/ffmpeg)
|
||||
SET(FFMPEG_INCLUDE_DIR ${CMAKE_CURRENT_BINARY_DIR}/third-party/ext_ffmpeg/src/ffmpeg)
|
||||
|
||||
ExternalProject_Add(
|
||||
libwpd
|
||||
URL http://prdownloads.sourceforge.net/libwpd/libwpd-0.9.9.tar.gz
|
||||
|
||||
UPDATE_COMMAND ""
|
||||
PATCH_COMMAND ""
|
||||
TEST_COMMAND ""
|
||||
CONFIGURE_COMMAND ./configure --without-docs --enable-static --disable-shared
|
||||
INSTALL_COMMAND ""
|
||||
|
||||
PREFIX "third-party/ext_libwpd"
|
||||
SOURCE_DIR "third-party/ext_libwpd/src/libwpd"
|
||||
BINARY_DIR "third-party/ext_libwpd/src/libwpd"
|
||||
|
||||
BUILD_COMMAND ${MAKE_EXE} -j33
|
||||
)
|
||||
SET(WPD_LIB_DIR ${CMAKE_CURRENT_BINARY_DIR}/third-party/ext_libwpd/src/libwpd/src/lib/.libs/)
|
||||
SET(WPD_INCLUDE_DIR ${CMAKE_CURRENT_BINARY_DIR}/third-party/ext_libwpd/src/libwpd/inc/)
|
||||
|
||||
add_dependencies(
|
||||
scan
|
||||
libmobi
|
||||
ffmpeg
|
||||
antiword
|
||||
libwpd
|
||||
)
|
||||
|
||||
target_link_libraries(
|
||||
scan
|
||||
PUBLIC
|
||||
|
||||
cjson
|
||||
${LibArchive_LIBRARIES}
|
||||
ZLIB::ZLIB
|
||||
BZip2::BZip2
|
||||
lz4::lz4
|
||||
${LZO2_LIB}
|
||||
LibLZMA::LibLZMA
|
||||
|
||||
${MUPDF_LIB}
|
||||
openjp2
|
||||
|
||||
${MOBI_LIB_DIR}/libmobi.a
|
||||
|
||||
${WPD_LIB_DIR}/libwpd-0.9.a
|
||||
${WPD_LIB_DIR}/libwpd-stream-0.9.a
|
||||
|
||||
${FREETYPE_LIB}
|
||||
${HARFBUZZ_LIB}
|
||||
${JBIG2DEC_LIB}
|
||||
|
||||
stdc++
|
||||
|
||||
-Wl,--whole-archive
|
||||
m
|
||||
-Wl,--no-whole-archive
|
||||
|
||||
${JPEG_LIBRARIES}
|
||||
${Tesseract_LIBRARIES}
|
||||
${LIBXML2_LIBRARIES}
|
||||
${FREETYPE_LIB}
|
||||
unofficial::brotli::brotlidec-static
|
||||
|
||||
${FFMPEG_LIB_DIR}/libavformat/libavformat.a
|
||||
${FFMPEG_LIB_DIR}/libavcodec/libavcodec.a
|
||||
${FFMPEG_LIB_DIR}/libavutil/libavutil.a
|
||||
${FFMPEG_LIB_DIR}/libswresample/libswresample.a
|
||||
${FFMPEG_LIB_DIR}/libswscale/libswscale.a
|
||||
|
||||
z
|
||||
|
||||
${CMAKE_THREAD_LIBS_INIT}
|
||||
|
||||
${RAW_LIB}
|
||||
${GOMP_LIB}
|
||||
${CMS_LIB}
|
||||
${JAS_LIB}
|
||||
${GUMBO_LIB}
|
||||
dl
|
||||
antiword
|
||||
unofficial::pcre::pcre unofficial::pcre::pcre16 unofficial::pcre::pcre32 unofficial::pcre::pcrecpp
|
||||
)
|
||||
|
||||
target_include_directories(
|
||||
scan
|
||||
PUBLIC
|
||||
${MUPDF_INC_DIR}
|
||||
${JPEG_INCLUDE_DIR}
|
||||
${LIBXML2_INCLUDE_DIR}
|
||||
${FFMPEG_INCLUDE_DIR}
|
||||
${MOBI_INCLUDE_DIR}
|
||||
${WPD_INCLUDE_DIR}
|
||||
)
|
||||
|
||||
if (BUILD_TESTS)
|
||||
find_package(GTest CONFIG REQUIRED)
|
||||
|
||||
add_executable(scan_ub_test test/main.cpp test/test_util.cpp test/test_util.h)
|
||||
target_compile_options(scan_ub_test PRIVATE -g -fsanitize=undefined -fno-omit-frame-pointer)
|
||||
target_link_libraries(scan_ub_test PRIVATE GTest::gtest GTest::gtest_main -fsanitize=undefined scan)
|
||||
|
||||
add_executable(scan_a_test test/main.cpp test/test_util.cpp test/test_util.h)
|
||||
target_compile_options(scan_a_test PRIVATE -g -fsanitize=address -fno-omit-frame-pointer)
|
||||
target_link_libraries(scan_a_test PRIVATE GTest::gtest GTest::gtest_main -fsanitize=address scan)
|
||||
|
||||
add_executable(scan_test test/main.cpp test/test_util.cpp test/test_util.h)
|
||||
target_compile_options(scan_test PRIVATE -g -fno-omit-frame-pointer)
|
||||
target_link_libraries(scan_test PRIVATE GTest::gtest GTest::gtest_main scan)
|
||||
endif()
|
||||
4
third-party/libscan/README.md
vendored
Normal file
4
third-party/libscan/README.md
vendored
Normal file
@@ -0,0 +1,4 @@
|
||||
### Run fuzz tests:
|
||||
```bash
|
||||
./scan_a_test --gtest_filter=*Fuzz* --gtest_repeat=100
|
||||
```
|
||||
244
third-party/libscan/libscan/arc/arc.c
vendored
Normal file
244
third-party/libscan/libscan/arc/arc.c
vendored
Normal file
@@ -0,0 +1,244 @@
|
||||
#include "arc.h"
|
||||
|
||||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
#include <string.h>
|
||||
#include <fcntl.h>
|
||||
#include <openssl/evp.h>
|
||||
#include <pcre.h>
|
||||
|
||||
|
||||
int should_parse_filtered_file(const char *filepath, int ext) {
|
||||
char tmp[PATH_MAX * 2];
|
||||
|
||||
if (ext == 0) {
|
||||
return FALSE;
|
||||
}
|
||||
|
||||
if (strncmp(filepath + ext, "tgz", 3) == 0) {
|
||||
return TRUE;
|
||||
}
|
||||
|
||||
memcpy(tmp, filepath, ext - 1);
|
||||
*(tmp + ext - 1) = '\0';
|
||||
|
||||
char *idx = strrchr(tmp, '.');
|
||||
|
||||
if (idx == NULL) {
|
||||
return FALSE;
|
||||
}
|
||||
|
||||
if (strcmp(idx, ".tar") == 0) {
|
||||
return TRUE;
|
||||
}
|
||||
|
||||
return FALSE;
|
||||
}
|
||||
|
||||
void arc_close(struct vfile *f) {
|
||||
SHA1_Final(f->sha1_digest, &f->sha1_ctx);
|
||||
|
||||
if (f->rewind_buffer != NULL) {
|
||||
free(f->rewind_buffer);
|
||||
f->rewind_buffer = NULL;
|
||||
f->rewind_buffer_size = 0;
|
||||
f->rewind_buffer_cursor = 0;
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
int arc_read(struct vfile *f, void *buf, size_t size) {
|
||||
|
||||
int bytes_copied = 0;
|
||||
|
||||
if (f->rewind_buffer_size != 0) {
|
||||
if (size > f->rewind_buffer_size) {
|
||||
memcpy(buf, f->rewind_buffer + f->rewind_buffer_cursor, f->rewind_buffer_size);
|
||||
|
||||
bytes_copied = f->rewind_buffer_size;
|
||||
size -= f->rewind_buffer_size;
|
||||
buf += f->rewind_buffer_size;
|
||||
f->rewind_buffer_size = 0;
|
||||
} else {
|
||||
memcpy(buf, f->rewind_buffer + f->rewind_buffer_cursor, size);
|
||||
f->rewind_buffer_size -= (int) size;
|
||||
f->rewind_buffer_cursor += (int) size;
|
||||
|
||||
return (int) size;
|
||||
}
|
||||
}
|
||||
|
||||
size_t bytes_read = archive_read_data(f->arc, buf, size);
|
||||
|
||||
if (bytes_read != 0 && bytes_read <= size && f->calculate_checksum) {
|
||||
f->has_checksum = TRUE;
|
||||
|
||||
safe_sha1_update(&f->sha1_ctx, (unsigned char *) buf, bytes_read);
|
||||
}
|
||||
|
||||
if (bytes_read != size && archive_errno(f->arc) != 0) {
|
||||
const char *error_str = archive_error_string(f->arc);
|
||||
if (error_str != NULL) {
|
||||
f->logf(f->filepath, LEVEL_ERROR, "Error reading archive file: %s", error_str);
|
||||
}
|
||||
return -1;
|
||||
}
|
||||
|
||||
return (int) bytes_read + bytes_copied;
|
||||
}
|
||||
|
||||
int arc_read_rewindable(struct vfile *f, void *buf, size_t size) {
|
||||
|
||||
if (f->rewind_buffer != NULL) {
|
||||
fprintf(stderr, "Allocated rewind buffer more than once for %s", f->filepath);
|
||||
exit(-1);
|
||||
}
|
||||
|
||||
size_t bytes_read = archive_read_data(f->arc, buf, size);
|
||||
|
||||
if (bytes_read != size && archive_errno(f->arc) != 0) {
|
||||
const char *error_str = archive_error_string(f->arc);
|
||||
if (error_str != NULL) {
|
||||
f->logf(f->filepath, LEVEL_ERROR, "Error reading archive file: %s", error_str);
|
||||
}
|
||||
return -1;
|
||||
}
|
||||
|
||||
f->rewind_buffer = malloc(size);
|
||||
f->rewind_buffer_size = (int) size;
|
||||
f->rewind_buffer_cursor = 0;
|
||||
memcpy(f->rewind_buffer, buf, size);
|
||||
|
||||
return (int) bytes_read;
|
||||
}
|
||||
|
||||
int arc_open(scan_arc_ctx_t *ctx, vfile_t *f, struct archive **a, arc_data_t *arc_data, int allow_recurse) {
|
||||
arc_data->f = f;
|
||||
|
||||
if (f->is_fs_file) {
|
||||
*a = archive_read_new();
|
||||
archive_read_support_filter_all(*a);
|
||||
archive_read_support_format_all(*a);
|
||||
if (ctx->passphrase[0] != 0) {
|
||||
archive_read_add_passphrase(*a, ctx->passphrase);
|
||||
}
|
||||
|
||||
return archive_read_open_filename(*a, f->filepath, ARC_BUF_SIZE);
|
||||
} else if (allow_recurse) {
|
||||
*a = archive_read_new();
|
||||
archive_read_support_filter_all(*a);
|
||||
archive_read_support_format_all(*a);
|
||||
if (ctx->passphrase[0] != 0) {
|
||||
archive_read_add_passphrase(*a, ctx->passphrase);
|
||||
}
|
||||
|
||||
return archive_read_open(
|
||||
*a, arc_data,
|
||||
vfile_open_callback,
|
||||
vfile_read_callback,
|
||||
vfile_close_callback
|
||||
);
|
||||
} else {
|
||||
return ARC_SKIPPED;
|
||||
}
|
||||
}
|
||||
|
||||
static __thread int sub_strings[30];
|
||||
#define EXCLUDED(str) (pcre_exec(exclude, exclude_extra, str, strlen(str), 0, 0, sub_strings, sizeof(sub_strings)) >= 0)
|
||||
|
||||
scan_code_t parse_archive(scan_arc_ctx_t *ctx, vfile_t *f, document_t *doc, pcre *exclude, pcre_extra *exclude_extra) {
|
||||
|
||||
struct archive *a = NULL;
|
||||
struct archive_entry *entry = NULL;
|
||||
|
||||
arc_data_t arc_data;
|
||||
arc_data.f = f;
|
||||
|
||||
int ret = arc_open(ctx, f, &a, &arc_data, ctx->mode == ARC_MODE_RECURSE);
|
||||
if (ret == ARC_SKIPPED) {
|
||||
return SCAN_OK;
|
||||
}
|
||||
|
||||
if (ret != ARCHIVE_OK) {
|
||||
CTX_LOG_ERRORF(f->filepath, "(arc.c) [%d] %s", ret, archive_error_string(a))
|
||||
archive_read_free(a);
|
||||
return SCAN_ERR_READ;
|
||||
}
|
||||
|
||||
if (ctx->mode == ARC_MODE_LIST) {
|
||||
dyn_buffer_t buf = dyn_buffer_create();
|
||||
|
||||
while (archive_read_next_header(a, &entry) == ARCHIVE_OK) {
|
||||
if (S_ISREG(archive_entry_stat(entry)->st_mode)) {
|
||||
const char *utf8_name = archive_entry_pathname_utf8(entry);
|
||||
const char *file_path = utf8_name == NULL ? archive_entry_pathname(entry) : utf8_name;
|
||||
|
||||
dyn_buffer_append_string(&buf, file_path);
|
||||
dyn_buffer_write_char(&buf, ' ');
|
||||
}
|
||||
}
|
||||
dyn_buffer_write_char(&buf, '\0');
|
||||
|
||||
meta_line_t *meta_list = malloc(sizeof(meta_line_t) + buf.cur);
|
||||
meta_list->key = MetaContent;
|
||||
strcpy(meta_list->str_val, buf.buf);
|
||||
APPEND_META(doc, meta_list)
|
||||
dyn_buffer_destroy(&buf);
|
||||
|
||||
} else {
|
||||
|
||||
parse_job_t *sub_job = malloc(sizeof(parse_job_t) + PATH_MAX * 2);
|
||||
|
||||
sub_job->vfile.close = arc_close;
|
||||
sub_job->vfile.read = arc_read;
|
||||
sub_job->vfile.read_rewindable = arc_read_rewindable;
|
||||
sub_job->vfile.reset = NULL;
|
||||
sub_job->vfile.arc = a;
|
||||
sub_job->vfile.filepath = sub_job->filepath;
|
||||
sub_job->vfile.is_fs_file = FALSE;
|
||||
sub_job->vfile.rewind_buffer_size = 0;
|
||||
sub_job->vfile.rewind_buffer = NULL;
|
||||
sub_job->vfile.log = ctx->log;
|
||||
sub_job->vfile.logf = ctx->logf;
|
||||
sub_job->vfile.has_checksum = FALSE;
|
||||
sub_job->vfile.calculate_checksum = f->calculate_checksum;
|
||||
memcpy(sub_job->parent, doc->path_md5, MD5_DIGEST_LENGTH);
|
||||
|
||||
while (archive_read_next_header(a, &entry) == ARCHIVE_OK) {
|
||||
sub_job->vfile.info = *archive_entry_stat(entry);
|
||||
if (S_ISREG(sub_job->vfile.info.st_mode)) {
|
||||
|
||||
const char *utf8_name = archive_entry_pathname_utf8(entry);
|
||||
|
||||
if (utf8_name == NULL) {
|
||||
sprintf(sub_job->filepath, "%s#/%s", f->filepath, archive_entry_pathname(entry));
|
||||
} else {
|
||||
sprintf(sub_job->filepath, "%s#/%s", f->filepath, utf8_name);
|
||||
}
|
||||
sub_job->base = (int) (strrchr(sub_job->filepath, '/') - sub_job->filepath) + 1;
|
||||
|
||||
// Handle excludes
|
||||
if (exclude != NULL && EXCLUDED(sub_job->filepath)) {
|
||||
CTX_LOG_DEBUGF("arc.c", "Excluded: %s", sub_job->filepath)
|
||||
continue;
|
||||
}
|
||||
|
||||
char *p = strrchr(sub_job->filepath, '.');
|
||||
if (p != NULL && (p - sub_job->filepath) > strlen(f->filepath)) {
|
||||
sub_job->ext = (int) (p - sub_job->filepath + 1);
|
||||
} else {
|
||||
sub_job->ext = (int) strlen(sub_job->filepath);
|
||||
}
|
||||
|
||||
SHA1_Init(&sub_job->vfile.sha1_ctx);
|
||||
|
||||
ctx->parse(sub_job);
|
||||
}
|
||||
}
|
||||
|
||||
free(sub_job);
|
||||
}
|
||||
|
||||
archive_read_free(a);
|
||||
return SCAN_OK;
|
||||
}
|
||||
80
third-party/libscan/libscan/arc/arc.h
vendored
Normal file
80
third-party/libscan/libscan/arc/arc.h
vendored
Normal file
@@ -0,0 +1,80 @@
|
||||
#ifndef SCAN_ARC_H
|
||||
#define SCAN_ARC_H
|
||||
|
||||
#include <archive.h>
|
||||
#include <archive_entry.h>
|
||||
#include <fcntl.h>
|
||||
#include <pcre.h>
|
||||
#include "../scan.h"
|
||||
|
||||
# define ARC_SKIPPED (-1)
|
||||
#define ARC_MODE_SKIP 0
|
||||
#define ARC_MODE_LIST 1
|
||||
#define ARC_MODE_SHALLOW 2
|
||||
#define ARC_MODE_RECURSE 3
|
||||
typedef int archive_mode_t;
|
||||
|
||||
typedef struct {
|
||||
archive_mode_t mode;
|
||||
|
||||
parse_callback_t parse;
|
||||
log_callback_t log;
|
||||
logf_callback_t logf;
|
||||
store_callback_t store;
|
||||
char passphrase[4096];
|
||||
} scan_arc_ctx_t;
|
||||
|
||||
#define ARC_BUF_SIZE 8192
|
||||
|
||||
typedef struct {
|
||||
vfile_t *f;
|
||||
char buf[ARC_BUF_SIZE];
|
||||
} arc_data_t;
|
||||
|
||||
static int vfile_open_callback(struct archive *a, void *user_data) {
|
||||
arc_data_t *data = (arc_data_t *) user_data;
|
||||
|
||||
if (!data->f->is_fs_file) {
|
||||
SHA1_Init(&data->f->sha1_ctx);
|
||||
}
|
||||
|
||||
return ARCHIVE_OK;
|
||||
}
|
||||
|
||||
static long vfile_read_callback(struct archive *a, void *user_data, const void **buf) {
|
||||
arc_data_t *data = (arc_data_t *) user_data;
|
||||
|
||||
*buf = data->buf;
|
||||
long ret = data->f->read(data->f, data->buf, sizeof(data->buf));
|
||||
|
||||
if (!data->f->is_fs_file && ret > 0) {
|
||||
data->f->has_checksum = TRUE;
|
||||
safe_sha1_update(&data->f->sha1_ctx, (unsigned char*)data->buf, ret);
|
||||
}
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
static int vfile_close_callback(struct archive *a, void *user_data) {
|
||||
arc_data_t *data = (arc_data_t *) user_data;
|
||||
|
||||
if (!data->f->is_fs_file) {
|
||||
SHA1_Final((unsigned char *) data->f->sha1_digest, &data->f->sha1_ctx);
|
||||
}
|
||||
|
||||
return ARCHIVE_OK;
|
||||
}
|
||||
|
||||
int arc_open(scan_arc_ctx_t *ctx, vfile_t *f, struct archive **a, arc_data_t *arc_data, int allow_recurse);
|
||||
|
||||
int should_parse_filtered_file(const char *filepath, int ext);
|
||||
|
||||
scan_code_t parse_archive(scan_arc_ctx_t *ctx, vfile_t *f, document_t *doc, pcre *exclude, pcre_extra *exclude_extra);
|
||||
|
||||
int arc_read(struct vfile *f, void *buf, size_t size);
|
||||
|
||||
int arc_read_rewindable(struct vfile *f, void *buf, size_t size);
|
||||
|
||||
void arc_close(struct vfile *f);
|
||||
|
||||
#endif
|
||||
58
third-party/libscan/libscan/comic/comic.c
vendored
Normal file
58
third-party/libscan/libscan/comic/comic.c
vendored
Normal file
@@ -0,0 +1,58 @@
|
||||
#include "comic.h"
|
||||
#include "../media/media.h"
|
||||
#include "../arc/arc.h"
|
||||
|
||||
#include <stdlib.h>
|
||||
#include <archive.h>
|
||||
|
||||
static scan_arc_ctx_t arc_ctx = (scan_arc_ctx_t) {.passphrase = {0,}};
|
||||
|
||||
void parse_comic(scan_comic_ctx_t *ctx, vfile_t *f, document_t *doc) {
|
||||
struct archive *a = NULL;
|
||||
struct archive_entry *entry = NULL;
|
||||
arc_data_t arc_data;
|
||||
|
||||
if (ctx->tn_size <= 0) {
|
||||
return;
|
||||
}
|
||||
|
||||
int ret = arc_open(&arc_ctx, f, &a, &arc_data, TRUE);
|
||||
if (ret != ARCHIVE_OK) {
|
||||
CTX_LOG_ERRORF(f->filepath, "(cbr.c) [%d] %s", ret, archive_error_string(a))
|
||||
archive_read_free(a);
|
||||
return;
|
||||
}
|
||||
|
||||
while (archive_read_next_header(a, &entry) == ARCHIVE_OK) {
|
||||
struct stat info = *archive_entry_stat(entry);
|
||||
if (S_ISREG(info.st_mode)) {
|
||||
const char *utf8_name = archive_entry_pathname_utf8(entry);
|
||||
const char *file_path = utf8_name == NULL ? archive_entry_pathname(entry) : utf8_name;
|
||||
|
||||
char *p = strrchr(file_path, '.');
|
||||
if (p != NULL && (strcmp(p, ".png") == 0 || strcmp(p, ".jpg") == 0 || strcmp(p, ".jpeg") == 0)) {
|
||||
size_t entry_size = archive_entry_size(entry);
|
||||
void *buf = malloc(entry_size);
|
||||
size_t read = archive_read_data(a, buf, entry_size);
|
||||
|
||||
if (read != entry_size) {
|
||||
const char *err_str = archive_error_string(a);
|
||||
if (err_str) {
|
||||
CTX_LOG_ERRORF("comic.c", "Error while reading entry: %s", err_str)
|
||||
}
|
||||
free(buf);
|
||||
break;
|
||||
}
|
||||
|
||||
ret = store_image_thumbnail((scan_media_ctx_t *) ctx, buf, entry_size, doc, file_path);
|
||||
free(buf);
|
||||
|
||||
if (ret == TRUE) {
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
archive_read_free(a);
|
||||
}
|
||||
31
third-party/libscan/libscan/comic/comic.h
vendored
Normal file
31
third-party/libscan/libscan/comic/comic.h
vendored
Normal file
@@ -0,0 +1,31 @@
|
||||
#ifndef SCAN_CBR_H
|
||||
#define SCAN_CBR_H
|
||||
|
||||
#include <stdlib.h>
|
||||
#include "../ebook/ebook.h"
|
||||
|
||||
typedef struct {
|
||||
log_callback_t log;
|
||||
logf_callback_t logf;
|
||||
store_callback_t store;
|
||||
|
||||
int tn_size;
|
||||
float tn_qscale;
|
||||
|
||||
unsigned int cbr_mime;
|
||||
unsigned int cbz_mime;
|
||||
} scan_comic_ctx_t;
|
||||
|
||||
__always_inline
|
||||
static int is_cbr(scan_comic_ctx_t *ctx, unsigned int mime) {
|
||||
return mime == ctx->cbr_mime;
|
||||
}
|
||||
|
||||
__always_inline
|
||||
static int is_cbz(scan_comic_ctx_t *ctx, unsigned int mime) {
|
||||
return mime == ctx->cbz_mime;
|
||||
}
|
||||
|
||||
void parse_comic(scan_comic_ctx_t *ctx, vfile_t *f, document_t *doc);
|
||||
|
||||
#endif
|
||||
495
third-party/libscan/libscan/ebook/ebook.c
vendored
Normal file
495
third-party/libscan/libscan/ebook/ebook.c
vendored
Normal file
@@ -0,0 +1,495 @@
|
||||
#include "ebook.h"
|
||||
#include <mupdf/fitz.h>
|
||||
#include <pthread.h>
|
||||
#include <tesseract/capi.h>
|
||||
|
||||
#include "../media/media.h"
|
||||
#include "../arc/arc.h"
|
||||
|
||||
#define MIN_OCR_SIZE 350
|
||||
#define MIN_OCR_LEN 10
|
||||
|
||||
/* fill_image callback doesn't let us pass opaque pointers unless I create my own device */
|
||||
__thread text_buffer_t thread_buffer;
|
||||
__thread scan_ebook_ctx_t thread_ctx;
|
||||
|
||||
pthread_mutex_t Mutex;
|
||||
|
||||
static void my_fz_lock(UNUSED(void *user), int lock) {
|
||||
if (lock == FZ_LOCK_FREETYPE) {
|
||||
pthread_mutex_lock(&Mutex);
|
||||
}
|
||||
}
|
||||
|
||||
static void my_fz_unlock(UNUSED(void *user), int lock) {
|
||||
if (lock == FZ_LOCK_FREETYPE) {
|
||||
pthread_mutex_unlock(&Mutex);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
int pixmap_is_blank(const fz_pixmap *pixmap) {
|
||||
int pixmap_size = pixmap->n * pixmap->w * pixmap->h;
|
||||
const int pixel0 = pixmap->samples[0];
|
||||
for (int i = 0; i < pixmap_size; i++) {
|
||||
if (pixmap->samples[i] != pixel0) {
|
||||
return FALSE;
|
||||
}
|
||||
}
|
||||
return TRUE;
|
||||
}
|
||||
|
||||
fz_pixmap *
|
||||
load_pixmap(scan_ebook_ctx_t *ctx, int page, fz_context *fzctx, fz_document *fzdoc, document_t *doc, fz_page **cover) {
|
||||
|
||||
int err = 0;
|
||||
|
||||
fz_var(cover);
|
||||
fz_var(err);
|
||||
fz_try(fzctx)*cover = fz_load_page(fzctx, fzdoc, page);
|
||||
fz_catch(fzctx)err = 1;
|
||||
|
||||
if (err != 0) {
|
||||
CTX_LOG_WARNINGF(doc->filepath, "fz_load_page() returned error code [%d] %s", err, fzctx->error.message)
|
||||
return NULL;
|
||||
}
|
||||
|
||||
fz_rect bounds = fz_bound_page(fzctx, *cover);
|
||||
|
||||
float scale;
|
||||
float w = bounds.x1 - bounds.x0;
|
||||
float h = bounds.y1 - bounds.y0;
|
||||
if (w > h) {
|
||||
scale = (float) ctx->tn_size / w;
|
||||
} else {
|
||||
scale = (float) ctx->tn_size / h;
|
||||
}
|
||||
fz_matrix m = fz_scale(scale, scale);
|
||||
|
||||
bounds = fz_transform_rect(bounds, m);
|
||||
fz_irect bbox = fz_round_rect(bounds);
|
||||
fz_pixmap *pixmap = fz_new_pixmap_with_bbox(fzctx, fz_device_rgb(fzctx), bbox, NULL, 0);
|
||||
|
||||
fz_clear_pixmap_with_value(fzctx, pixmap, 0xFF);
|
||||
fz_device *dev = fz_new_draw_device(fzctx, m, pixmap);
|
||||
|
||||
fz_var(err);
|
||||
fz_try(fzctx) {
|
||||
fz_run_page(fzctx, *cover, dev, fz_identity, NULL);
|
||||
} fz_always(fzctx) {
|
||||
fz_close_device(fzctx, dev);
|
||||
fz_drop_device(fzctx, dev);
|
||||
} fz_catch(fzctx)err = fzctx->error.errcode;
|
||||
|
||||
if (err != 0) {
|
||||
CTX_LOG_WARNINGF(doc->filepath, "fz_run_page() returned error code [%d] %s", err, fzctx->error.message)
|
||||
fz_drop_page(fzctx, *cover);
|
||||
fz_drop_pixmap(fzctx, pixmap);
|
||||
return NULL;
|
||||
}
|
||||
|
||||
if (pixmap->n != 3) {
|
||||
CTX_LOG_ERRORF(doc->filepath, "Got unexpected pixmap depth: %d", pixmap->n)
|
||||
fz_drop_page(fzctx, *cover);
|
||||
fz_drop_pixmap(fzctx, pixmap);
|
||||
return NULL;
|
||||
}
|
||||
|
||||
return pixmap;
|
||||
}
|
||||
|
||||
int render_cover(scan_ebook_ctx_t *ctx, fz_context *fzctx, document_t *doc, fz_document *fzdoc) {
|
||||
|
||||
fz_page *cover = NULL;
|
||||
fz_pixmap *pixmap = load_pixmap(ctx, 0, fzctx, fzdoc, doc, &cover);
|
||||
if (pixmap == NULL) {
|
||||
return FALSE;
|
||||
}
|
||||
|
||||
if (pixmap_is_blank(pixmap)) {
|
||||
fz_drop_page(fzctx, cover);
|
||||
fz_drop_pixmap(fzctx, pixmap);
|
||||
CTX_LOG_DEBUG(doc->filepath, "Cover page is blank, using page 1 instead")
|
||||
pixmap = load_pixmap(ctx, 1, fzctx, fzdoc, doc, &cover);
|
||||
if (pixmap == NULL) {
|
||||
return FALSE;
|
||||
}
|
||||
}
|
||||
|
||||
// RGB24 -> YUV420p
|
||||
AVFrame *scaled_frame = av_frame_alloc();
|
||||
|
||||
struct SwsContext *sws_ctx = sws_getContext(
|
||||
pixmap->w, pixmap->h, AV_PIX_FMT_RGB24,
|
||||
pixmap->w, pixmap->h, AV_PIX_FMT_YUV420P,
|
||||
SIST_SWS_ALGO, 0, 0, 0
|
||||
);
|
||||
|
||||
int dst_buf_len = av_image_get_buffer_size(AV_PIX_FMT_YUV420P, pixmap->w, pixmap->h, 1);
|
||||
uint8_t *dst_buf = (uint8_t *) av_malloc(dst_buf_len);
|
||||
|
||||
av_image_fill_arrays(scaled_frame->data, scaled_frame->linesize, dst_buf, AV_PIX_FMT_YUV420P, pixmap->w, pixmap->h,
|
||||
1);
|
||||
|
||||
unsigned char *samples = calloc(1, 1024 * 1024 * 1024);
|
||||
memcpy(samples, pixmap->samples, pixmap->stride * pixmap->h);
|
||||
|
||||
const uint8_t *in_data[1] = {samples,};
|
||||
int in_line_size[1] = {(int) pixmap->stride};
|
||||
|
||||
sws_scale(sws_ctx,
|
||||
in_data, in_line_size,
|
||||
0, pixmap->h,
|
||||
scaled_frame->data, scaled_frame->linesize
|
||||
);
|
||||
|
||||
scaled_frame->width = pixmap->w;
|
||||
scaled_frame->height = pixmap->h;
|
||||
scaled_frame->format = AV_PIX_FMT_YUV420P;
|
||||
|
||||
sws_freeContext(sws_ctx);
|
||||
|
||||
// YUV420p -> JPEG
|
||||
AVCodecContext *jpeg_encoder = alloc_jpeg_encoder(pixmap->w, pixmap->h, ctx->tn_qscale);
|
||||
avcodec_send_frame(jpeg_encoder, scaled_frame);
|
||||
|
||||
AVPacket jpeg_packet;
|
||||
av_init_packet(&jpeg_packet);
|
||||
avcodec_receive_packet(jpeg_encoder, &jpeg_packet);
|
||||
|
||||
APPEND_TN_META(doc, pixmap->w, pixmap->h)
|
||||
ctx->store((char *) doc->path_md5, sizeof(doc->path_md5), (char *) jpeg_packet.data, jpeg_packet.size);
|
||||
|
||||
free(samples);
|
||||
av_packet_unref(&jpeg_packet);
|
||||
av_free(*scaled_frame->data);
|
||||
av_frame_free(&scaled_frame);
|
||||
avcodec_free_context(&jpeg_encoder);
|
||||
|
||||
fz_drop_pixmap(fzctx, pixmap);
|
||||
fz_drop_page(fzctx, cover);
|
||||
|
||||
return TRUE;
|
||||
}
|
||||
|
||||
void fz_err_callback(void *user, const char *message) {
|
||||
document_t *doc = (document_t *) user;
|
||||
|
||||
const scan_ebook_ctx_t *ctx = &thread_ctx;
|
||||
CTX_LOG_WARNINGF(doc->filepath, "FZ: %s", message)
|
||||
}
|
||||
|
||||
void fz_warn_callback(void *user, const char *message) {
|
||||
document_t *doc = (document_t *) user;
|
||||
|
||||
const scan_ebook_ctx_t *ctx = &thread_ctx;
|
||||
CTX_LOG_DEBUGF(doc->filepath, "FZ: %s", message)
|
||||
}
|
||||
|
||||
static void init_fzctx(fz_context *fzctx, document_t *doc) {
|
||||
fz_register_document_handlers(fzctx);
|
||||
|
||||
static int mu_is_initialized = FALSE;
|
||||
if (!mu_is_initialized) {
|
||||
pthread_mutex_init(&Mutex, NULL);
|
||||
mu_is_initialized = TRUE;
|
||||
}
|
||||
|
||||
fzctx->warn.print_user = doc;
|
||||
fzctx->warn.print = fz_warn_callback;
|
||||
fzctx->error.print_user = doc;
|
||||
fzctx->error.print = fz_err_callback;
|
||||
|
||||
fzctx->locks.lock = my_fz_lock;
|
||||
fzctx->locks.unlock = my_fz_unlock;
|
||||
}
|
||||
|
||||
static int read_stext_block(fz_stext_block *block, text_buffer_t *tex) {
|
||||
if (block->type != FZ_STEXT_BLOCK_TEXT) {
|
||||
return 0;
|
||||
}
|
||||
|
||||
fz_stext_line *line = block->u.t.first_line;
|
||||
while (line != NULL) {
|
||||
text_buffer_append_char(tex, ' ');
|
||||
fz_stext_char *c = line->first_char;
|
||||
while (c != NULL) {
|
||||
if (text_buffer_append_char(tex, c->c) == TEXT_BUF_FULL) {
|
||||
return TEXT_BUF_FULL;
|
||||
}
|
||||
c = c->next;
|
||||
}
|
||||
line = line->next;
|
||||
}
|
||||
text_buffer_append_char(tex, ' ');
|
||||
return 0;
|
||||
}
|
||||
|
||||
#define IS_VALID_BPP(d) ((d)==1 || (d)==2 || (d)==4 || (d)==8 || (d)==16 || (d)==24 || (d)==32)
|
||||
|
||||
void fill_image(fz_context *fzctx, UNUSED(fz_device *dev),
|
||||
fz_image *img, UNUSED(fz_matrix ctm), UNUSED(float alpha),
|
||||
UNUSED(fz_color_params color_params)) {
|
||||
|
||||
int l2factor = 0;
|
||||
|
||||
if (img->w > MIN_OCR_SIZE && img->h > MIN_OCR_SIZE && IS_VALID_BPP(img->n)) {
|
||||
|
||||
fz_pixmap *pix = img->get_pixmap(fzctx, img, NULL, img->w, img->h, &l2factor);
|
||||
|
||||
if (pix->h > MIN_OCR_SIZE && img->h > MIN_OCR_SIZE && img->xres != 0) {
|
||||
TessBaseAPI *api = TessBaseAPICreate();
|
||||
TessBaseAPIInit3(api, thread_ctx.tesseract_path, thread_ctx.tesseract_lang);
|
||||
|
||||
TessBaseAPISetImage(api, pix->samples, pix->w, pix->h, pix->n, pix->stride);
|
||||
TessBaseAPISetSourceResolution(api, pix->xres);
|
||||
|
||||
char *text = TessBaseAPIGetUTF8Text(api);
|
||||
size_t len = strlen(text);
|
||||
if (len >= MIN_OCR_LEN) {
|
||||
text_buffer_append_string(&thread_buffer, text, len - 1);
|
||||
}
|
||||
|
||||
TessBaseAPIEnd(api);
|
||||
TessBaseAPIDelete(api);
|
||||
}
|
||||
fz_drop_pixmap(fzctx, pix);
|
||||
}
|
||||
}
|
||||
|
||||
void
|
||||
parse_ebook_mem(scan_ebook_ctx_t *ctx, void *buf, size_t buf_len, const char *mime_str, document_t *doc, int tn_only) {
|
||||
|
||||
fz_context *fzctx = fz_new_context(NULL, NULL, FZ_STORE_DEFAULT);
|
||||
thread_ctx = *ctx;
|
||||
|
||||
init_fzctx(fzctx, doc);
|
||||
|
||||
int err = 0;
|
||||
|
||||
fz_document *fzdoc = NULL;
|
||||
fz_stream *stream = NULL;
|
||||
fz_var(fzdoc);
|
||||
fz_var(stream);
|
||||
fz_var(err);
|
||||
|
||||
fz_try(fzctx) {
|
||||
stream = fz_open_memory(fzctx, buf, buf_len);
|
||||
fzdoc = fz_open_document_with_stream(fzctx, mime_str, stream);
|
||||
} fz_catch(fzctx)err = fzctx->error.errcode;
|
||||
|
||||
if (err != 0) {
|
||||
fz_drop_stream(fzctx, stream);
|
||||
fz_drop_document(fzctx, fzdoc);
|
||||
fz_drop_context(fzctx);
|
||||
return;
|
||||
}
|
||||
|
||||
int page_count = -1;
|
||||
fz_var(err);
|
||||
fz_try(fzctx)page_count = fz_count_pages(fzctx, fzdoc);
|
||||
fz_catch(fzctx)err = fzctx->error.errcode;
|
||||
|
||||
if (err) {
|
||||
CTX_LOG_WARNINGF(doc->filepath, "fz_count_pages() returned error code [%d] %s", err, fzctx->error.message)
|
||||
fz_drop_stream(fzctx, stream);
|
||||
fz_drop_document(fzctx, fzdoc);
|
||||
fz_drop_context(fzctx);
|
||||
return;
|
||||
}
|
||||
|
||||
APPEND_LONG_META(doc, MetaPages, page_count)
|
||||
|
||||
if (ctx->tn_size > 0) {
|
||||
if (render_cover(ctx, fzctx, doc, fzdoc) == FALSE) {
|
||||
fz_drop_stream(fzctx, stream);
|
||||
fz_drop_document(fzctx, fzdoc);
|
||||
fz_drop_context(fzctx);
|
||||
return;
|
||||
}
|
||||
}
|
||||
|
||||
if (tn_only) {
|
||||
fz_drop_stream(fzctx, stream);
|
||||
fz_drop_document(fzctx, fzdoc);
|
||||
fz_drop_context(fzctx);
|
||||
return;
|
||||
}
|
||||
|
||||
char title[8192] = {'\0',};
|
||||
fz_try(fzctx)fz_lookup_metadata(fzctx, fzdoc, FZ_META_INFO_TITLE, title, sizeof(title));
|
||||
fz_catch(fzctx);
|
||||
|
||||
if (strlen(title) > 0) {
|
||||
APPEND_UTF8_META(doc, MetaTitle, title)
|
||||
}
|
||||
|
||||
char author[4096] = {'\0',};
|
||||
fz_try(fzctx)fz_lookup_metadata(fzctx, fzdoc, FZ_META_INFO_AUTHOR, author, sizeof(author));
|
||||
fz_catch(fzctx);
|
||||
|
||||
if (strlen(author) > 0) {
|
||||
APPEND_UTF8_META(doc, MetaAuthor, author)
|
||||
}
|
||||
|
||||
|
||||
if (ctx->content_size > 0) {
|
||||
fz_stext_options opts = {0};
|
||||
thread_buffer = text_buffer_create(ctx->content_size);
|
||||
|
||||
for (int current_page = 0; current_page < page_count; current_page++) {
|
||||
fz_page *page = NULL;
|
||||
fz_var(err);
|
||||
fz_try(fzctx)page = fz_load_page(fzctx, fzdoc, current_page);
|
||||
fz_catch(fzctx)err = fzctx->error.errcode;
|
||||
if (err != 0) {
|
||||
CTX_LOG_WARNINGF(doc->filepath, "fz_load_page() returned error code [%d] %s", err, fzctx->error.message)
|
||||
text_buffer_destroy(&thread_buffer);
|
||||
fz_drop_page(fzctx, page);
|
||||
fz_drop_stream(fzctx, stream);
|
||||
fz_drop_document(fzctx, fzdoc);
|
||||
fz_drop_context(fzctx);
|
||||
return;
|
||||
}
|
||||
|
||||
fz_stext_page *stext = fz_new_stext_page(fzctx, fz_bound_page(fzctx, page));
|
||||
fz_device *dev = fz_new_stext_device(fzctx, stext, &opts);
|
||||
dev->stroke_path = NULL;
|
||||
dev->stroke_text = NULL;
|
||||
dev->clip_text = NULL;
|
||||
dev->clip_stroke_path = NULL;
|
||||
dev->clip_stroke_text = NULL;
|
||||
|
||||
if (ctx->tesseract_lang != NULL) {
|
||||
dev->fill_image = fill_image;
|
||||
}
|
||||
|
||||
fz_var(err);
|
||||
fz_try(fzctx)fz_run_page(fzctx, page, dev, fz_identity, NULL);
|
||||
fz_always(fzctx) {
|
||||
fz_close_device(fzctx, dev);
|
||||
fz_drop_device(fzctx, dev);
|
||||
} fz_catch(fzctx)err = fzctx->error.errcode;
|
||||
|
||||
if (err != 0) {
|
||||
CTX_LOG_WARNINGF(doc->filepath, "fz_run_page() returned error code [%d] %s", err, fzctx->error.message)
|
||||
text_buffer_destroy(&thread_buffer);
|
||||
fz_drop_page(fzctx, page);
|
||||
fz_drop_stext_page(fzctx, stext);
|
||||
fz_drop_stream(fzctx, stream);
|
||||
fz_drop_document(fzctx, fzdoc);
|
||||
fz_drop_context(fzctx);
|
||||
return;
|
||||
}
|
||||
|
||||
fz_stext_block *block = stext->first_block;
|
||||
while (block != NULL) {
|
||||
int ret = read_stext_block(block, &thread_buffer);
|
||||
if (ret == TEXT_BUF_FULL) {
|
||||
break;
|
||||
}
|
||||
block = block->next;
|
||||
}
|
||||
fz_drop_stext_page(fzctx, stext);
|
||||
fz_drop_page(fzctx, page);
|
||||
|
||||
if (thread_buffer.dyn_buffer.cur >= ctx->content_size) {
|
||||
break;
|
||||
}
|
||||
}
|
||||
text_buffer_terminate_string(&thread_buffer);
|
||||
|
||||
meta_line_t *meta_content = malloc(sizeof(meta_line_t) + thread_buffer.dyn_buffer.cur);
|
||||
meta_content->key = MetaContent;
|
||||
memcpy(meta_content->str_val, thread_buffer.dyn_buffer.buf, thread_buffer.dyn_buffer.cur);
|
||||
APPEND_META(doc, meta_content)
|
||||
|
||||
text_buffer_destroy(&thread_buffer);
|
||||
}
|
||||
|
||||
fz_drop_stream(fzctx, stream);
|
||||
fz_drop_document(fzctx, fzdoc);
|
||||
fz_drop_context(fzctx);
|
||||
}
|
||||
|
||||
static scan_arc_ctx_t arc_ctx = (scan_arc_ctx_t) {.passphrase = {0,}};
|
||||
|
||||
void parse_epub_fast(scan_ebook_ctx_t *ctx, vfile_t *f, document_t *doc) {
|
||||
struct archive *a = NULL;
|
||||
struct archive_entry *entry = NULL;
|
||||
arc_data_t arc_data;
|
||||
|
||||
text_buffer_t content_buffer = text_buffer_create(ctx->content_size);
|
||||
|
||||
if (ctx->tn_size <= 0) {
|
||||
return;
|
||||
}
|
||||
|
||||
int ret = arc_open(&arc_ctx, f, &a, &arc_data, TRUE);
|
||||
if (ret != ARCHIVE_OK) {
|
||||
CTX_LOG_ERRORF(f->filepath, "(ebook.c) [%d] %s", ret, archive_error_string(a))
|
||||
archive_read_free(a);
|
||||
return;
|
||||
}
|
||||
|
||||
while (archive_read_next_header(a, &entry) == ARCHIVE_OK) {
|
||||
struct stat info = *archive_entry_stat(entry);
|
||||
if (S_ISREG(info.st_mode)) {
|
||||
const char *utf8_name = archive_entry_pathname_utf8(entry);
|
||||
const char *file_path = utf8_name == NULL ? archive_entry_pathname(entry) : utf8_name;
|
||||
|
||||
char *p = strrchr(file_path, '.');
|
||||
if (p != NULL && (strcmp(p, ".html") == 0 || (strcmp(p, ".xhtml") == 0))) {
|
||||
size_t entry_size = archive_entry_size(entry);
|
||||
void *buf = malloc(entry_size + 1);
|
||||
size_t read = archive_read_data(a, buf, entry_size);
|
||||
*(char *) (buf + entry_size) = '\0';
|
||||
|
||||
if (read != entry_size) {
|
||||
const char *err_str = archive_error_string(a);
|
||||
if (err_str) {
|
||||
CTX_LOG_ERRORF("ebook.c", "Error while reading entry: %s", err_str)
|
||||
}
|
||||
free(buf);
|
||||
break;
|
||||
}
|
||||
|
||||
ret = text_buffer_append_markup(&content_buffer, buf);
|
||||
free(buf);
|
||||
|
||||
if (ret == TEXT_BUF_FULL) {
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
text_buffer_terminate_string(&content_buffer);
|
||||
|
||||
meta_line_t *meta_content = malloc(sizeof(meta_line_t) + content_buffer.dyn_buffer.cur);
|
||||
meta_content->key = MetaContent;
|
||||
memcpy(meta_content->str_val, content_buffer.dyn_buffer.buf, content_buffer.dyn_buffer.cur);
|
||||
APPEND_META(doc, meta_content)
|
||||
|
||||
text_buffer_destroy(&content_buffer);
|
||||
|
||||
archive_read_free(a);
|
||||
}
|
||||
|
||||
void parse_ebook(scan_ebook_ctx_t *ctx, vfile_t *f, const char *mime_str, document_t *doc) {
|
||||
|
||||
if (ctx->fast_epub_parse && is_epub(mime_str)) {
|
||||
parse_epub_fast(ctx, f, doc);
|
||||
return;
|
||||
}
|
||||
|
||||
size_t buf_len;
|
||||
void *buf = read_all(f, &buf_len);
|
||||
if (buf == NULL) {
|
||||
CTX_LOG_ERROR(f->filepath, "read_all() failed")
|
||||
return;
|
||||
}
|
||||
|
||||
parse_ebook_mem(ctx, buf, buf_len, mime_str, doc, FALSE);
|
||||
free(buf);
|
||||
}
|
||||
30
third-party/libscan/libscan/ebook/ebook.h
vendored
Normal file
30
third-party/libscan/libscan/ebook/ebook.h
vendored
Normal file
@@ -0,0 +1,30 @@
|
||||
#ifndef SCAN_EBOOK_H
|
||||
#define SCAN_EBOOK_H
|
||||
|
||||
#include "../scan.h"
|
||||
|
||||
typedef struct {
|
||||
long content_size;
|
||||
int tn_size;
|
||||
const char *tesseract_lang;
|
||||
const char *tesseract_path;
|
||||
pthread_mutex_t mupdf_mutex;
|
||||
|
||||
log_callback_t log;
|
||||
logf_callback_t logf;
|
||||
store_callback_t store;
|
||||
int fast_epub_parse;
|
||||
float tn_qscale;
|
||||
} scan_ebook_ctx_t;
|
||||
|
||||
void parse_ebook(scan_ebook_ctx_t *ctx, vfile_t *f, const char *mime_str, document_t *doc);
|
||||
|
||||
void
|
||||
parse_ebook_mem(scan_ebook_ctx_t *ctx, void *buf, size_t buf_len, const char *mime_str, document_t *doc, int tn_only);
|
||||
|
||||
__always_inline
|
||||
static int is_epub(const char *mime_string) {
|
||||
return strcmp(mime_string, "application/epub+zip") == 0;
|
||||
}
|
||||
|
||||
#endif
|
||||
246
third-party/libscan/libscan/font/font.c
vendored
Normal file
246
third-party/libscan/libscan/font/font.c
vendored
Normal file
@@ -0,0 +1,246 @@
|
||||
#include "font.h"
|
||||
|
||||
#include <ft2build.h>
|
||||
#include <freetype/freetype.h>
|
||||
#include "../util.h"
|
||||
|
||||
|
||||
__thread FT_Library ft_lib = NULL;
|
||||
|
||||
|
||||
typedef struct text_dimensions {
|
||||
unsigned int width;
|
||||
unsigned int height;
|
||||
unsigned int baseline;
|
||||
} text_dimensions_t;
|
||||
|
||||
typedef struct glyph {
|
||||
int top;
|
||||
int height;
|
||||
int width;
|
||||
int descent;
|
||||
int ascent;
|
||||
int advance_width;
|
||||
unsigned char *pixmap;
|
||||
} glyph_t;
|
||||
|
||||
|
||||
__always_inline
|
||||
int kerning_offset(char c, char pc, FT_Face face) {
|
||||
FT_Vector kerning;
|
||||
FT_Get_Kerning(face, c, pc, FT_KERNING_DEFAULT, &kerning);
|
||||
|
||||
return (int) (kerning.x / 64);
|
||||
}
|
||||
|
||||
__always_inline
|
||||
glyph_t ft_glyph_to_glyph(FT_GlyphSlot slot) {
|
||||
glyph_t glyph;
|
||||
|
||||
glyph.pixmap = slot->bitmap.buffer;
|
||||
|
||||
glyph.width = (int) slot->bitmap.width;
|
||||
glyph.height = (int) slot->bitmap.rows;
|
||||
glyph.top = slot->bitmap_top;
|
||||
glyph.advance_width = (int) slot->advance.x / 64;
|
||||
|
||||
glyph.descent = MAX(0, glyph.height - glyph.top);
|
||||
glyph.ascent = MAX(0, MAX(glyph.top, glyph.height) - glyph.descent);
|
||||
|
||||
return glyph;
|
||||
}
|
||||
|
||||
text_dimensions_t text_dimension(char *text, FT_Face face) {
|
||||
text_dimensions_t dimensions;
|
||||
|
||||
dimensions.width = 0;
|
||||
|
||||
int num_chars = (int) strlen(text);
|
||||
|
||||
unsigned int max_ascent = 0;
|
||||
int max_descent = 0;
|
||||
|
||||
char pc = 0;
|
||||
for (int i = 0; i < num_chars; i++) {
|
||||
char c = text[i];
|
||||
|
||||
FT_Load_Char(face, c, 0);
|
||||
glyph_t glyph = ft_glyph_to_glyph(face->glyph);
|
||||
|
||||
max_descent = MAX(max_descent, glyph.descent);
|
||||
max_ascent = MAX(max_ascent, MAX(glyph.height, glyph.ascent));
|
||||
|
||||
int kerning_x = kerning_offset(c, pc, face);
|
||||
dimensions.width += MAX(glyph.advance_width, glyph.width) + kerning_x;
|
||||
|
||||
pc = c;
|
||||
}
|
||||
|
||||
dimensions.height = max_ascent + max_descent;
|
||||
dimensions.baseline = max_descent;
|
||||
|
||||
return dimensions;
|
||||
}
|
||||
|
||||
void draw_glyph(glyph_t *glyph, int x, int y, struct text_dimensions text_info, unsigned char *bitmap) {
|
||||
unsigned int src = 0;
|
||||
unsigned int dst = y * text_info.width + x;
|
||||
unsigned int row_offset = text_info.width - glyph->width;
|
||||
unsigned int buf_len = text_info.width * text_info.height;
|
||||
|
||||
for (unsigned int sy = 0; sy < glyph->height; sy++) {
|
||||
for (unsigned int sx = 0; sx < glyph->width; sx++) {
|
||||
if (dst < buf_len) {
|
||||
bitmap[dst] |= glyph->pixmap[src];
|
||||
}
|
||||
src++;
|
||||
dst++;
|
||||
}
|
||||
dst += row_offset;
|
||||
}
|
||||
}
|
||||
|
||||
void bmp_format(dyn_buffer_t *buf, text_dimensions_t dimensions, const unsigned char *bitmap) {
|
||||
|
||||
dyn_buffer_write_short(buf, 0x4D42); // Magic
|
||||
dyn_buffer_write_int(buf, 0); // Size placeholder
|
||||
dyn_buffer_write_int(buf, 0x5157); //Reserved
|
||||
dyn_buffer_write_int(buf, 14 + 40 + 256 * 4); // pixels offset
|
||||
|
||||
dyn_buffer_write_int(buf, 40); // DIB size
|
||||
dyn_buffer_write_int(buf, (int) dimensions.width);
|
||||
dyn_buffer_write_int(buf, (int) dimensions.height);
|
||||
dyn_buffer_write_short(buf, 1); // Color planes
|
||||
dyn_buffer_write_short(buf, 8); // bits per pixel
|
||||
dyn_buffer_write_int(buf, 0); // compression
|
||||
dyn_buffer_write_int(buf, 0); // Ignored
|
||||
dyn_buffer_write_int(buf, 3800); // hres
|
||||
dyn_buffer_write_int(buf, 3800); // vres
|
||||
dyn_buffer_write_int(buf, 256); // Color count
|
||||
dyn_buffer_write_int(buf, 0); // Ignored
|
||||
|
||||
// RGBA32 Color table (Grayscale)
|
||||
for (int i = 255; i >= 0; i--) {
|
||||
dyn_buffer_write_int(buf, i + (i << 8) + (i << 16));
|
||||
}
|
||||
|
||||
// Pixel array: write from bottom to top, with rows padded to multiples of 4-bytes
|
||||
for (int y = (int) dimensions.height - 1; y >= 0; y--) {
|
||||
for (unsigned int x = 0; x < dimensions.width; x++) {
|
||||
dyn_buffer_write_char(buf, (char) bitmap[y * dimensions.width + x]);
|
||||
}
|
||||
while (buf->cur % 4 != 0) {
|
||||
dyn_buffer_write_char(buf, 0);
|
||||
}
|
||||
}
|
||||
|
||||
// Size
|
||||
*(int *) ((char *) buf->buf + 2) = buf->cur;
|
||||
}
|
||||
|
||||
void parse_font(scan_font_ctx_t *ctx, vfile_t *f, document_t *doc) {
|
||||
if (ft_lib == NULL) {
|
||||
FT_Init_FreeType(&ft_lib);
|
||||
}
|
||||
|
||||
size_t buf_len = 0;
|
||||
void *buf = read_all(f, &buf_len);
|
||||
if (buf == NULL) {
|
||||
CTX_LOG_ERROR(f->filepath, "read_all() failed")
|
||||
return;
|
||||
}
|
||||
|
||||
FT_Face face;
|
||||
FT_Error err = FT_New_Memory_Face(ft_lib, (unsigned char *) buf, (int) buf_len, 0, &face);
|
||||
if (err != 0) {
|
||||
CTX_LOG_ERRORF(doc->filepath, "(font.c) FT_New_Memory_Face() returned error code [%d] %s", err,
|
||||
FT_Error_String(err))
|
||||
free(buf);
|
||||
return;
|
||||
}
|
||||
|
||||
char font_name[4096];
|
||||
|
||||
if (face->style_name == NULL || (strcmp(face->style_name, "?") == 0)) {
|
||||
if (face->family_name == NULL) {
|
||||
strcpy(font_name, "(null)");
|
||||
} else {
|
||||
strncpy(font_name, face->family_name, sizeof(font_name));
|
||||
}
|
||||
} else {
|
||||
snprintf(font_name, sizeof(font_name), "%s %s", face->family_name, face->style_name);
|
||||
}
|
||||
|
||||
meta_line_t *meta_name = malloc(sizeof(meta_line_t) + strlen(font_name));
|
||||
meta_name->key = MetaFontName;
|
||||
strcpy(meta_name->str_val, font_name);
|
||||
APPEND_META(doc, meta_name)
|
||||
|
||||
if (ctx->enable_tn == TRUE) {
|
||||
FT_Done_Face(face);
|
||||
free(buf);
|
||||
return;
|
||||
}
|
||||
|
||||
int pixel = 64;
|
||||
int num_chars = (int) strlen(font_name);
|
||||
|
||||
err = FT_Set_Pixel_Sizes(face, 0, pixel);
|
||||
if (err != 0) {
|
||||
CTX_LOG_WARNINGF(doc->filepath, "(font.c) FT_Set_Pixel_Sizes() returned error code [%d] %s", err,
|
||||
FT_Error_String(err))
|
||||
FT_Done_Face(face);
|
||||
free(buf);
|
||||
return;
|
||||
}
|
||||
|
||||
text_dimensions_t dimensions = text_dimension(font_name, face);
|
||||
unsigned char *bitmap = calloc(dimensions.width * dimensions.height, 1);
|
||||
|
||||
FT_Vector pen;
|
||||
pen.x = 0;
|
||||
|
||||
char pc = 0;
|
||||
for (int i = 0; i < num_chars; i++) {
|
||||
char c = font_name[i];
|
||||
|
||||
err = FT_Load_Char(face, c, FT_LOAD_NO_HINTING | FT_LOAD_RENDER);
|
||||
if (err != 0) {
|
||||
c = c >= 'a' && c <= 'z' ? c - 32 : c + 32;
|
||||
err = FT_Load_Char(face, c, FT_LOAD_NO_HINTING | FT_LOAD_RENDER);
|
||||
if (err != 0) {
|
||||
CTX_LOG_WARNINGF(doc->filepath, "(font.c) FT_Load_Char() returned error code [%d] %s", err,
|
||||
FT_Error_String(err))
|
||||
continue;
|
||||
}
|
||||
}
|
||||
glyph_t glyph = ft_glyph_to_glyph(face->glyph);
|
||||
|
||||
pen.x += kerning_offset(c, pc, face);
|
||||
if (pen.x <= 0) {
|
||||
pen.x = ABS(glyph.advance_width - glyph.width);
|
||||
}
|
||||
pen.y = dimensions.height - glyph.ascent - dimensions.baseline;
|
||||
|
||||
draw_glyph(&glyph, pen.x, pen.y, dimensions, bitmap);
|
||||
|
||||
pen.x += glyph.advance_width;
|
||||
pc = c;
|
||||
}
|
||||
|
||||
dyn_buffer_t bmp_data = dyn_buffer_create();
|
||||
bmp_format(&bmp_data, dimensions, bitmap);
|
||||
|
||||
APPEND_TN_META(doc, dimensions.width, dimensions.height)
|
||||
ctx->store((char *) doc->path_md5, sizeof(doc->path_md5), (char *) bmp_data.buf, bmp_data.cur);
|
||||
|
||||
dyn_buffer_destroy(&bmp_data);
|
||||
free(bitmap);
|
||||
|
||||
FT_Done_Face(face);
|
||||
free(buf);
|
||||
}
|
||||
|
||||
void cleanup_font() {
|
||||
FT_Done_FreeType(ft_lib);
|
||||
}
|
||||
17
third-party/libscan/libscan/font/font.h
vendored
Normal file
17
third-party/libscan/libscan/font/font.h
vendored
Normal file
@@ -0,0 +1,17 @@
|
||||
#ifndef SCAN_FONT_H
|
||||
#define SCAN_FONT_H
|
||||
|
||||
#include "../scan.h"
|
||||
|
||||
|
||||
typedef struct {
|
||||
int enable_tn;
|
||||
log_callback_t log;
|
||||
logf_callback_t logf;
|
||||
store_callback_t store;
|
||||
} scan_font_ctx_t;
|
||||
|
||||
void parse_font(scan_font_ctx_t *ctx, vfile_t *f, document_t *doc);
|
||||
void cleanup_font();
|
||||
|
||||
#endif
|
||||
119
third-party/libscan/libscan/json/json.c
vendored
Normal file
119
third-party/libscan/libscan/json/json.c
vendored
Normal file
@@ -0,0 +1,119 @@
|
||||
#include "json.h"
|
||||
#include "cjson/cJSON.h"
|
||||
|
||||
|
||||
#define JSON_MAX_FILE_SIZE (1024 * 1024 * 50)
|
||||
|
||||
int json_extract_text(cJSON *json, text_buffer_t *tex) {
|
||||
if (cJSON_IsObject(json)) {
|
||||
for (cJSON *child = json->child; child != NULL; child = child->next) {
|
||||
if (json_extract_text(child, tex)) {
|
||||
return TRUE;
|
||||
}
|
||||
}
|
||||
} else if (cJSON_IsArray(json)) {
|
||||
cJSON *child;
|
||||
cJSON_ArrayForEach(child, json) {
|
||||
if (json_extract_text(child, tex)) {
|
||||
return TRUE;
|
||||
}
|
||||
}
|
||||
} else if (cJSON_IsString(json)) {
|
||||
if (text_buffer_append_string0(tex, json->valuestring) == TEXT_BUF_FULL) {
|
||||
return TRUE;
|
||||
}
|
||||
if (text_buffer_append_char(tex, ' ') == TEXT_BUF_FULL) {
|
||||
return TRUE;
|
||||
}
|
||||
}
|
||||
|
||||
return FALSE;
|
||||
}
|
||||
|
||||
scan_code_t parse_json(scan_json_ctx_t *ctx, vfile_t *f, document_t *doc) {
|
||||
|
||||
if (f->info.st_size > JSON_MAX_FILE_SIZE) {
|
||||
CTX_LOG_WARNINGF("json.c", "File larger than maximum allowed [%s]", f->filepath)
|
||||
return SCAN_ERR_SKIP;
|
||||
}
|
||||
|
||||
size_t buf_len;
|
||||
char *buf = read_all(f, &buf_len);
|
||||
|
||||
if (buf == NULL) {
|
||||
return SCAN_ERR_READ;
|
||||
}
|
||||
|
||||
buf_len += 1;
|
||||
buf = realloc(buf, buf_len);
|
||||
*(buf + buf_len - 1) = '\0';
|
||||
|
||||
cJSON *json = cJSON_ParseWithOpts(buf, NULL, TRUE);
|
||||
text_buffer_t tex = text_buffer_create(ctx->content_size);
|
||||
|
||||
json_extract_text(json, &tex);
|
||||
text_buffer_terminate_string(&tex);
|
||||
|
||||
APPEND_STR_META(doc, MetaContent, tex.dyn_buffer.buf);
|
||||
|
||||
cJSON_Delete(json);
|
||||
free(buf);
|
||||
text_buffer_destroy(&tex);
|
||||
|
||||
return SCAN_OK;
|
||||
}
|
||||
|
||||
#define JSON_BUF_SIZE (1024 * 1024 * 5)
|
||||
|
||||
scan_code_t parse_ndjson(scan_json_ctx_t *ctx, vfile_t *f, document_t *doc) {
|
||||
|
||||
char *buf = calloc(JSON_BUF_SIZE + 1, sizeof(char));
|
||||
*(buf + JSON_BUF_SIZE) = '\0';
|
||||
|
||||
text_buffer_t tex = text_buffer_create(ctx->content_size);
|
||||
|
||||
size_t ret;
|
||||
int eof = FALSE;
|
||||
const char *parse_end = buf;
|
||||
size_t to_read;
|
||||
char *ptr = buf;
|
||||
|
||||
while (TRUE) {
|
||||
cJSON *json;
|
||||
|
||||
if (!eof) {
|
||||
to_read = parse_end == buf ? JSON_BUF_SIZE : parse_end - buf;
|
||||
ret = f->read(f, ptr, to_read);
|
||||
if (ret != to_read) {
|
||||
eof = TRUE;
|
||||
}
|
||||
}
|
||||
|
||||
json = cJSON_ParseWithOpts(buf, &parse_end, FALSE);
|
||||
|
||||
if (parse_end == buf + JSON_BUF_SIZE) {
|
||||
CTX_LOG_ERRORF("json.c", "Line too large for buffer [%s]", doc->filepath);
|
||||
cJSON_Delete(json);
|
||||
break;
|
||||
}
|
||||
|
||||
if (parse_end == buf) {
|
||||
cJSON_Delete(json);
|
||||
break;
|
||||
}
|
||||
|
||||
json_extract_text(json, &tex);
|
||||
|
||||
cJSON_Delete(json);
|
||||
|
||||
memmove(buf, parse_end, (buf + JSON_BUF_SIZE - parse_end));
|
||||
ptr = buf + JSON_BUF_SIZE - parse_end + buf;
|
||||
}
|
||||
|
||||
text_buffer_terminate_string(&tex);
|
||||
|
||||
APPEND_STR_META(doc, MetaContent, tex.dyn_buffer.buf);
|
||||
|
||||
free(buf);
|
||||
text_buffer_destroy(&tex);
|
||||
}
|
||||
30
third-party/libscan/libscan/json/json.h
vendored
Normal file
30
third-party/libscan/libscan/json/json.h
vendored
Normal file
@@ -0,0 +1,30 @@
|
||||
#ifndef SCAN_JSON_H
|
||||
#define SCAN_JSON_H
|
||||
|
||||
#include "../scan.h"
|
||||
|
||||
|
||||
typedef struct {
|
||||
long content_size;
|
||||
log_callback_t log;
|
||||
logf_callback_t logf;
|
||||
store_callback_t store;
|
||||
unsigned int json_mime;
|
||||
unsigned int ndjson_mime;
|
||||
} scan_json_ctx_t;
|
||||
|
||||
scan_code_t parse_json(scan_json_ctx_t *ctx, vfile_t *f, document_t *doc);
|
||||
|
||||
scan_code_t parse_ndjson(scan_json_ctx_t *ctx, vfile_t *f, document_t *doc);
|
||||
|
||||
__always_inline
|
||||
static int is_json(scan_json_ctx_t *ctx, unsigned int mime) {
|
||||
return mime == ctx->json_mime;
|
||||
}
|
||||
|
||||
__always_inline
|
||||
static int is_ndjson(scan_json_ctx_t *ctx, unsigned int mime) {
|
||||
return mime == ctx->ndjson_mime;
|
||||
}
|
||||
|
||||
#endif
|
||||
62
third-party/libscan/libscan/macros.h
vendored
Normal file
62
third-party/libscan/libscan/macros.h
vendored
Normal file
@@ -0,0 +1,62 @@
|
||||
#ifndef FALSE
|
||||
#define FALSE (0)
|
||||
#define BOOL int
|
||||
#endif
|
||||
|
||||
#ifndef TRUE
|
||||
#define TRUE (!FALSE)
|
||||
#endif
|
||||
|
||||
#undef MAX
|
||||
#define MAX(a, b) (((a) > (b)) ? (a) : (b))
|
||||
|
||||
#undef MIN
|
||||
#define MIN(a, b) (((a) < (b)) ? (a) : (b))
|
||||
|
||||
#ifndef PATH_MAX
|
||||
#define PATH_MAX 4096
|
||||
#endif
|
||||
|
||||
#undef ABS
|
||||
#define ABS(a) (((a) < 0) ? -(a) : (a))
|
||||
|
||||
#define SHA1_STR_LENGTH 41
|
||||
#define SHA1_DIGEST_LENGTH 20
|
||||
|
||||
#define APPEND_STR_META(doc, keyname, value) \
|
||||
{meta_line_t *meta_str = malloc(sizeof(meta_line_t) + strlen(value)); \
|
||||
meta_str->key = keyname; \
|
||||
strcpy(meta_str->str_val, value); \
|
||||
APPEND_META(doc, meta_str)}
|
||||
|
||||
#define APPEND_LONG_META(doc, keyname, value) \
|
||||
{meta_line_t *meta_long = malloc(sizeof(meta_line_t)); \
|
||||
meta_long->key = keyname; \
|
||||
meta_long->long_val = value; \
|
||||
APPEND_META(doc, meta_long)}
|
||||
|
||||
#define APPEND_TN_META(doc, width, height) \
|
||||
{meta_line_t *meta_str = malloc(sizeof(meta_line_t) + 4 + 1 + 4); \
|
||||
meta_str->key = MetaThumbnail; \
|
||||
sprintf(meta_str->str_val, "%04d,%04d", width, height); \
|
||||
APPEND_META(doc, meta_str)}
|
||||
|
||||
#define APPEND_META(doc, meta) \
|
||||
meta->next = NULL;\
|
||||
if (doc->meta_head == NULL) {\
|
||||
doc->meta_head = meta;\
|
||||
doc->meta_tail = doc->meta_head;\
|
||||
} else {\
|
||||
doc->meta_tail->next = meta;\
|
||||
doc->meta_tail = meta;\
|
||||
}
|
||||
|
||||
#define APPEND_UTF8_META(doc, keyname, str) \
|
||||
text_buffer_t tex = text_buffer_create(-1); \
|
||||
text_buffer_append_string0(&tex, str); \
|
||||
text_buffer_terminate_string(&tex); \
|
||||
meta_line_t *meta_tag = malloc(sizeof(meta_line_t) + tex.dyn_buffer.cur); \
|
||||
meta_tag->key = keyname; \
|
||||
strcpy(meta_tag->str_val, tex.dyn_buffer.buf); \
|
||||
APPEND_META(doc, meta_tag) \
|
||||
text_buffer_destroy(&tex);
|
||||
749
third-party/libscan/libscan/media/media.c
vendored
Normal file
749
third-party/libscan/libscan/media/media.c
vendored
Normal file
@@ -0,0 +1,749 @@
|
||||
#include "media.h"
|
||||
#include <ctype.h>
|
||||
|
||||
#define MIN_SIZE 32
|
||||
#define AVIO_BUF_SIZE 8192
|
||||
#define IS_VIDEO(fmt) (fmt->iformat->name && strcmp(fmt->iformat->name, "image2") != 0)
|
||||
|
||||
#define STORE_AS_IS ((void*)-1)
|
||||
|
||||
const char *get_filepath_with_ext(document_t *doc, const char *filepath, const char *mime_str) {
|
||||
|
||||
int has_extension = doc->ext > doc->base;
|
||||
|
||||
if (!has_extension) {
|
||||
if (strcmp(mime_str, "image/png") == 0) {
|
||||
return "file.png";
|
||||
} else if (strcmp(mime_str, "image/jpeg") == 0) {
|
||||
return "file.jpg";
|
||||
}
|
||||
}
|
||||
|
||||
return filepath;
|
||||
}
|
||||
|
||||
|
||||
__always_inline
|
||||
void *scale_frame(const AVCodecContext *decoder, const AVFrame *frame, int size) {
|
||||
|
||||
if (frame->pict_type == AV_PICTURE_TYPE_NONE) {
|
||||
return NULL;
|
||||
}
|
||||
|
||||
int dstW;
|
||||
int dstH;
|
||||
if (frame->width <= size && frame->height <= size) {
|
||||
if (decoder->codec_id == AV_CODEC_ID_MJPEG || decoder->codec_id == AV_CODEC_ID_PNG) {
|
||||
return STORE_AS_IS;
|
||||
}
|
||||
|
||||
dstW = frame->width;
|
||||
dstH = frame->height;
|
||||
} else {
|
||||
double ratio = (double) frame->width / frame->height;
|
||||
if (frame->width > frame->height) {
|
||||
dstW = size;
|
||||
dstH = (int) (size / ratio);
|
||||
} else {
|
||||
dstW = (int) (size * ratio);
|
||||
dstH = size;
|
||||
}
|
||||
}
|
||||
|
||||
if (dstW <= MIN_SIZE || dstH <= MIN_SIZE) {
|
||||
return NULL;
|
||||
}
|
||||
|
||||
AVFrame *scaled_frame = av_frame_alloc();
|
||||
|
||||
struct SwsContext *sws_ctx = sws_getContext(
|
||||
decoder->width, decoder->height, decoder->pix_fmt,
|
||||
dstW, dstH, AV_PIX_FMT_YUVJ420P,
|
||||
SIST_SWS_ALGO, 0, 0, 0
|
||||
);
|
||||
|
||||
int dst_buf_len = av_image_get_buffer_size(AV_PIX_FMT_YUV420P, dstW, dstH, 1);
|
||||
uint8_t *dst_buf = (uint8_t *) av_malloc(dst_buf_len * 2);
|
||||
|
||||
av_image_fill_arrays(scaled_frame->data, scaled_frame->linesize, dst_buf, AV_PIX_FMT_YUV420P, dstW, dstH, 1);
|
||||
|
||||
sws_scale(sws_ctx,
|
||||
(const uint8_t *const *) frame->data, frame->linesize,
|
||||
0, decoder->height,
|
||||
scaled_frame->data, scaled_frame->linesize
|
||||
);
|
||||
|
||||
scaled_frame->width = dstW;
|
||||
scaled_frame->height = dstH;
|
||||
scaled_frame->format = AV_PIX_FMT_YUV420P;
|
||||
|
||||
sws_freeContext(sws_ctx);
|
||||
|
||||
return scaled_frame;
|
||||
}
|
||||
|
||||
typedef struct {
|
||||
AVPacket *packet;
|
||||
AVFrame *frame;
|
||||
} frame_and_packet_t;
|
||||
|
||||
static void frame_and_packet_free(frame_and_packet_t *frame_and_packet) {
|
||||
if (frame_and_packet->packet != NULL) {
|
||||
av_packet_free(&frame_and_packet->packet);
|
||||
}
|
||||
|
||||
if (frame_and_packet->frame != NULL) {
|
||||
av_frame_free(&frame_and_packet->frame);
|
||||
}
|
||||
|
||||
free(frame_and_packet->packet);
|
||||
free(frame_and_packet);
|
||||
}
|
||||
|
||||
__always_inline
|
||||
static void read_subtitles(scan_media_ctx_t *ctx, AVFormatContext *pFormatCtx, int stream_idx, document_t *doc) {
|
||||
|
||||
text_buffer_t tex = text_buffer_create(-1);
|
||||
|
||||
AVPacket packet;
|
||||
AVSubtitle subtitle;
|
||||
|
||||
AVCodec *subtitle_codec = avcodec_find_decoder(pFormatCtx->streams[stream_idx]->codecpar->codec_id);
|
||||
AVCodecContext *decoder = avcodec_alloc_context3(subtitle_codec);
|
||||
avcodec_parameters_to_context(decoder, pFormatCtx->streams[stream_idx]->codecpar);
|
||||
avcodec_open2(decoder, subtitle_codec, NULL);
|
||||
|
||||
decoder->sub_text_format = FF_SUB_TEXT_FMT_ASS;
|
||||
|
||||
int got_sub;
|
||||
|
||||
while (1) {
|
||||
int read_frame_ret = av_read_frame(pFormatCtx, &packet);
|
||||
|
||||
if (read_frame_ret != 0) {
|
||||
break;
|
||||
}
|
||||
|
||||
if (packet.stream_index != stream_idx) {
|
||||
av_packet_unref(&packet);
|
||||
continue;
|
||||
}
|
||||
|
||||
avcodec_decode_subtitle2(decoder, &subtitle, &got_sub, &packet);
|
||||
|
||||
if (got_sub) {
|
||||
for (int i = 0; i < subtitle.num_rects; i++) {
|
||||
const char *text = subtitle.rects[i]->ass;
|
||||
|
||||
if (text == NULL) {
|
||||
continue;
|
||||
}
|
||||
|
||||
char *idx = strstr(text, "\\N");
|
||||
if (idx != NULL && strlen(idx + 2) > 1) {
|
||||
text_buffer_append_string0(&tex, idx + 2);
|
||||
text_buffer_append_char(&tex, ' ');
|
||||
}
|
||||
}
|
||||
avsubtitle_free(&subtitle);
|
||||
}
|
||||
|
||||
av_packet_unref(&packet);
|
||||
}
|
||||
|
||||
text_buffer_terminate_string(&tex);
|
||||
|
||||
APPEND_STR_META(doc, MetaContent, tex.dyn_buffer.buf)
|
||||
text_buffer_destroy(&tex);
|
||||
avcodec_free_context(&decoder);
|
||||
}
|
||||
|
||||
__always_inline
|
||||
static frame_and_packet_t *
|
||||
read_frame(scan_media_ctx_t *ctx, AVFormatContext *pFormatCtx, AVCodecContext *decoder, int stream_idx,
|
||||
document_t *doc) {
|
||||
|
||||
frame_and_packet_t *result = calloc(1, sizeof(frame_and_packet_t));
|
||||
result->packet = av_packet_alloc();
|
||||
result->frame = av_frame_alloc();
|
||||
|
||||
av_init_packet(result->packet);
|
||||
|
||||
int receive_ret = -EAGAIN;
|
||||
while (receive_ret == -EAGAIN) {
|
||||
// Get video frame
|
||||
while (1) {
|
||||
int read_frame_ret = av_read_frame(pFormatCtx, result->packet);
|
||||
|
||||
if (read_frame_ret != 0) {
|
||||
if (read_frame_ret != AVERROR_EOF) {
|
||||
CTX_LOG_WARNINGF(doc->filepath,
|
||||
"(media.c) avcodec_read_frame() returned error code [%d] %s",
|
||||
read_frame_ret, av_err2str(read_frame_ret)
|
||||
)
|
||||
}
|
||||
frame_and_packet_free(result);
|
||||
return NULL;
|
||||
}
|
||||
|
||||
//Ignore audio/other frames
|
||||
if (result->packet->stream_index != stream_idx) {
|
||||
av_packet_unref(result->packet);
|
||||
continue;
|
||||
}
|
||||
break;
|
||||
}
|
||||
|
||||
// Feed it to decoder
|
||||
int decode_ret = avcodec_send_packet(decoder, result->packet);
|
||||
if (decode_ret != 0) {
|
||||
CTX_LOG_ERRORF(doc->filepath,
|
||||
"(media.c) avcodec_send_packet() returned error code [%d] %s",
|
||||
decode_ret, av_err2str(decode_ret)
|
||||
)
|
||||
frame_and_packet_free(result);
|
||||
return NULL;
|
||||
}
|
||||
|
||||
receive_ret = avcodec_receive_frame(decoder, result->frame);
|
||||
if (receive_ret == -EAGAIN && result->packet != NULL) {
|
||||
av_packet_unref(result->packet);
|
||||
}
|
||||
}
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
void append_tag_meta_if_not_exists(scan_media_ctx_t *ctx, document_t *doc, AVDictionaryEntry *tag, enum metakey key) {
|
||||
|
||||
meta_line_t *meta = doc->meta_head;
|
||||
while (meta != NULL) {
|
||||
if (meta->key == key) {
|
||||
CTX_LOG_DEBUGF(doc->filepath, "Ignoring duplicate tag: '%02x=%s' and '%02x=%s'",
|
||||
key, meta->str_val, key, tag->value)
|
||||
return;
|
||||
}
|
||||
meta = meta->next;
|
||||
}
|
||||
|
||||
text_buffer_t tex = text_buffer_create(-1);
|
||||
text_buffer_append_string0(&tex, tag->value);
|
||||
text_buffer_terminate_string(&tex);
|
||||
meta_line_t *meta_tag = malloc(sizeof(meta_line_t) + tex.dyn_buffer.cur);
|
||||
meta_tag->key = key;
|
||||
strcpy(meta_tag->str_val, tex.dyn_buffer.buf);
|
||||
|
||||
APPEND_META(doc, meta_tag)
|
||||
text_buffer_destroy(&tex);
|
||||
}
|
||||
|
||||
#define APPEND_TAG_META(keyname) \
|
||||
APPEND_UTF8_META(doc, keyname, tag->value)
|
||||
|
||||
#define STRCPY_TOLOWER(dst, str) \
|
||||
strncpy(dst, str, sizeof(dst)); \
|
||||
char *ptr = dst; \
|
||||
for (; *ptr; ++ptr) *ptr = (char) tolower(*ptr);
|
||||
|
||||
__always_inline
|
||||
static void append_audio_meta(AVFormatContext *pFormatCtx, document_t *doc) {
|
||||
|
||||
AVDictionaryEntry *tag = NULL;
|
||||
while ((tag = av_dict_get(pFormatCtx->metadata, "", tag, AV_DICT_IGNORE_SUFFIX))) {
|
||||
char key[256];
|
||||
STRCPY_TOLOWER(key, tag->key)
|
||||
|
||||
if (strcmp(key, "artist") == 0) {
|
||||
APPEND_TAG_META(MetaArtist)
|
||||
} else if (strcmp(key, "genre") == 0) {
|
||||
APPEND_TAG_META(MetaGenre)
|
||||
} else if (strcmp(key, "title") == 0) {
|
||||
APPEND_TAG_META(MetaTitle)
|
||||
} else if (strcmp(key, "album_artist") == 0) {
|
||||
APPEND_TAG_META(MetaAlbumArtist)
|
||||
} else if (strcmp(key, "album") == 0) {
|
||||
APPEND_TAG_META(MetaAlbum)
|
||||
} else if (strcmp(key, "comment") == 0) {
|
||||
APPEND_TAG_META(MetaContent)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
__always_inline
|
||||
static void
|
||||
append_video_meta(scan_media_ctx_t *ctx, AVFormatContext *pFormatCtx, AVFrame *frame, document_t *doc, int is_video) {
|
||||
|
||||
if (is_video) {
|
||||
meta_line_t *meta_duration = malloc(sizeof(meta_line_t));
|
||||
meta_duration->key = MetaMediaDuration;
|
||||
meta_duration->long_val = pFormatCtx->duration / AV_TIME_BASE;
|
||||
if (meta_duration->long_val > INT32_MAX) {
|
||||
meta_duration->long_val = 0;
|
||||
}
|
||||
APPEND_META(doc, meta_duration)
|
||||
|
||||
meta_line_t *meta_bitrate = malloc(sizeof(meta_line_t));
|
||||
meta_bitrate->key = MetaMediaBitrate;
|
||||
meta_bitrate->long_val = pFormatCtx->bit_rate;
|
||||
APPEND_META(doc, meta_bitrate)
|
||||
}
|
||||
|
||||
AVDictionaryEntry *tag = NULL;
|
||||
if (is_video) {
|
||||
while ((tag = av_dict_get(pFormatCtx->metadata, "", tag, AV_DICT_IGNORE_SUFFIX))) {
|
||||
char key[256];
|
||||
STRCPY_TOLOWER(key, tag->key)
|
||||
|
||||
if (strcmp(key, "title") == 0) {
|
||||
append_tag_meta_if_not_exists(ctx, doc, tag, MetaTitle);
|
||||
} else if (strcmp(key, "comment") == 0) {
|
||||
append_tag_meta_if_not_exists(ctx, doc, tag, MetaContent);
|
||||
} else if (strcmp(key, "artist") == 0) {
|
||||
append_tag_meta_if_not_exists(ctx, doc, tag, MetaArtist);
|
||||
}
|
||||
}
|
||||
} else {
|
||||
// EXIF metadata
|
||||
while ((tag = av_dict_get(frame->metadata, "", tag, AV_DICT_IGNORE_SUFFIX))) {
|
||||
char key[256];
|
||||
STRCPY_TOLOWER(key, tag->key)
|
||||
|
||||
if (strcmp(key, "artist") == 0) {
|
||||
append_tag_meta_if_not_exists(ctx, doc, tag, MetaArtist);
|
||||
} else if (strcmp(key, "imagedescription") == 0) {
|
||||
APPEND_TAG_META(MetaContent)
|
||||
} else if (strcmp(key, "make") == 0) {
|
||||
APPEND_TAG_META(MetaExifMake)
|
||||
} else if (strcmp(key, "model") == 0) {
|
||||
APPEND_TAG_META(MetaExifModel)
|
||||
} else if (strcmp(key, "software") == 0) {
|
||||
APPEND_TAG_META(MetaExifSoftware)
|
||||
} else if (strcmp(key, "fnumber") == 0) {
|
||||
APPEND_TAG_META(MetaExifFNumber)
|
||||
} else if (strcmp(key, "focallength") == 0) {
|
||||
APPEND_TAG_META(MetaExifFocalLength)
|
||||
} else if (strcmp(key, "usercomment") == 0) {
|
||||
APPEND_TAG_META(MetaExifUserComment)
|
||||
} else if (strcmp(key, "isospeedratings") == 0) {
|
||||
APPEND_TAG_META(MetaExifIsoSpeedRatings)
|
||||
} else if (strcmp(key, "exposuretime") == 0) {
|
||||
APPEND_TAG_META(MetaExifExposureTime)
|
||||
} else if (strcmp(key, "datetime") == 0) {
|
||||
APPEND_TAG_META(MetaExifDateTime)
|
||||
} else if (strcmp(key, "gpslatitude") == 0) {
|
||||
APPEND_TAG_META(MetaExifGpsLatitudeDMS)
|
||||
} else if (strcmp(key, "gpslatituderef") == 0) {
|
||||
APPEND_TAG_META(MetaExifGpsLatitudeRef)
|
||||
} else if (strcmp(key, "gpslongitude") == 0) {
|
||||
APPEND_TAG_META(MetaExifGpsLongitudeDMS)
|
||||
} else if (strcmp(key, "gpslongituderef") == 0) {
|
||||
APPEND_TAG_META(MetaExifGpsLongitudeRef)
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void parse_media_format_ctx(scan_media_ctx_t *ctx, AVFormatContext *pFormatCtx, document_t *doc) {
|
||||
|
||||
int video_stream = -1;
|
||||
int audio_stream = -1;
|
||||
int subtitle_stream = -1;
|
||||
|
||||
avformat_find_stream_info(pFormatCtx, NULL);
|
||||
|
||||
for (int i = (int) pFormatCtx->nb_streams - 1; i >= 0; i--) {
|
||||
AVStream *stream = pFormatCtx->streams[i];
|
||||
|
||||
if (stream->codecpar->codec_type == AVMEDIA_TYPE_AUDIO) {
|
||||
if (audio_stream == -1) {
|
||||
const AVCodecDescriptor *desc = avcodec_descriptor_get(stream->codecpar->codec_id);
|
||||
|
||||
if (desc != NULL) {
|
||||
APPEND_STR_META(doc, MetaMediaAudioCodec, desc->name)
|
||||
}
|
||||
|
||||
audio_stream = i;
|
||||
}
|
||||
} else if (stream->codecpar->codec_type == AVMEDIA_TYPE_VIDEO) {
|
||||
|
||||
if (video_stream == -1) {
|
||||
const AVCodecDescriptor *desc = avcodec_descriptor_get(stream->codecpar->codec_id);
|
||||
|
||||
if (desc != NULL) {
|
||||
APPEND_STR_META(doc, MetaMediaVideoCodec, desc->name)
|
||||
}
|
||||
|
||||
meta_line_t *meta_w = malloc(sizeof(meta_line_t));
|
||||
meta_w->key = MetaWidth;
|
||||
meta_w->long_val = stream->codecpar->width;
|
||||
APPEND_META(doc, meta_w)
|
||||
|
||||
meta_line_t *meta_h = malloc(sizeof(meta_line_t));
|
||||
meta_h->key = MetaHeight;
|
||||
meta_h->long_val = stream->codecpar->height;
|
||||
APPEND_META(doc, meta_h)
|
||||
|
||||
video_stream = i;
|
||||
}
|
||||
} else if (stream->codecpar->codec_type == AVMEDIA_TYPE_SUBTITLE) {
|
||||
subtitle_stream = i;
|
||||
}
|
||||
}
|
||||
|
||||
if (subtitle_stream != -1 && ctx->read_subtitles) {
|
||||
read_subtitles(ctx, pFormatCtx, subtitle_stream, doc);
|
||||
|
||||
// Reset stream
|
||||
if (video_stream != -1) {
|
||||
av_seek_frame(pFormatCtx, video_stream, 0, 0);
|
||||
}
|
||||
}
|
||||
|
||||
if (audio_stream != -1) {
|
||||
append_audio_meta(pFormatCtx, doc);
|
||||
}
|
||||
|
||||
if (video_stream != -1 && ctx->tn_size > 0) {
|
||||
AVStream *stream = pFormatCtx->streams[video_stream];
|
||||
|
||||
if (stream->codecpar->width <= MIN_SIZE || stream->codecpar->height <= MIN_SIZE) {
|
||||
avformat_close_input(&pFormatCtx);
|
||||
avformat_free_context(pFormatCtx);
|
||||
return;
|
||||
}
|
||||
|
||||
// Decoder
|
||||
AVCodec *video_codec = avcodec_find_decoder(stream->codecpar->codec_id);
|
||||
AVCodecContext *decoder = avcodec_alloc_context3(video_codec);
|
||||
avcodec_parameters_to_context(decoder, stream->codecpar);
|
||||
avcodec_open2(decoder, video_codec, NULL);
|
||||
|
||||
//Seek
|
||||
if (stream->nb_frames > 1 && stream->codecpar->codec_id != AV_CODEC_ID_GIF) {
|
||||
int seek_ret;
|
||||
for (int i = 20; i >= 0; i--) {
|
||||
seek_ret = av_seek_frame(pFormatCtx, video_stream,
|
||||
stream->duration * 0.10, 0);
|
||||
if (seek_ret == 0) {
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
frame_and_packet_t *frame_and_packet = read_frame(ctx, pFormatCtx, decoder, video_stream, doc);
|
||||
if (frame_and_packet == NULL) {
|
||||
avcodec_free_context(&decoder);
|
||||
avformat_close_input(&pFormatCtx);
|
||||
avformat_free_context(pFormatCtx);
|
||||
return;
|
||||
}
|
||||
|
||||
append_video_meta(ctx, pFormatCtx, frame_and_packet->frame, doc, IS_VIDEO(pFormatCtx));
|
||||
|
||||
// Scale frame
|
||||
AVFrame *scaled_frame = scale_frame(decoder, frame_and_packet->frame, ctx->tn_size);
|
||||
|
||||
if (scaled_frame == NULL) {
|
||||
frame_and_packet_free(frame_and_packet);
|
||||
avcodec_free_context(&decoder);
|
||||
avformat_close_input(&pFormatCtx);
|
||||
avformat_free_context(pFormatCtx);
|
||||
return;
|
||||
}
|
||||
|
||||
if (scaled_frame == STORE_AS_IS) {
|
||||
APPEND_TN_META(doc, frame_and_packet->frame->width, frame_and_packet->frame->height)
|
||||
ctx->store((char *) doc->path_md5, sizeof(doc->path_md5), (char *) frame_and_packet->packet->data,
|
||||
frame_and_packet->packet->size);
|
||||
} else {
|
||||
// Encode frame to jpeg
|
||||
AVCodecContext *jpeg_encoder = alloc_jpeg_encoder(scaled_frame->width, scaled_frame->height,
|
||||
ctx->tn_qscale);
|
||||
avcodec_send_frame(jpeg_encoder, scaled_frame);
|
||||
|
||||
AVPacket jpeg_packet;
|
||||
av_init_packet(&jpeg_packet);
|
||||
avcodec_receive_packet(jpeg_encoder, &jpeg_packet);
|
||||
|
||||
// Save thumbnail
|
||||
APPEND_TN_META(doc, scaled_frame->width, scaled_frame->height)
|
||||
ctx->store((char *) doc->path_md5, sizeof(doc->path_md5), (char *) jpeg_packet.data, jpeg_packet.size);
|
||||
|
||||
avcodec_free_context(&jpeg_encoder);
|
||||
av_packet_unref(&jpeg_packet);
|
||||
av_free(*scaled_frame->data);
|
||||
av_frame_free(&scaled_frame);
|
||||
}
|
||||
|
||||
frame_and_packet_free(frame_and_packet);
|
||||
avcodec_free_context(&decoder);
|
||||
}
|
||||
|
||||
avformat_close_input(&pFormatCtx);
|
||||
avformat_free_context(pFormatCtx);
|
||||
}
|
||||
|
||||
void parse_media_filename(scan_media_ctx_t *ctx, const char *filepath, document_t *doc) {
|
||||
|
||||
AVFormatContext *pFormatCtx = avformat_alloc_context();
|
||||
if (pFormatCtx == NULL) {
|
||||
CTX_LOG_ERROR(doc->filepath, "(media.c) Could not allocate context with avformat_alloc_context()")
|
||||
return;
|
||||
}
|
||||
int res = avformat_open_input(&pFormatCtx, filepath, NULL, NULL);
|
||||
if (res < 0) {
|
||||
CTX_LOG_ERRORF(doc->filepath, "(media.c) avformat_open_input() returned [%d] %s", res, av_err2str(res))
|
||||
avformat_close_input(&pFormatCtx);
|
||||
avformat_free_context(pFormatCtx);
|
||||
return;
|
||||
}
|
||||
|
||||
parse_media_format_ctx(ctx, pFormatCtx, doc);
|
||||
}
|
||||
|
||||
int vfile_read(void *ptr, uint8_t *buf, int buf_size) {
|
||||
struct vfile *f = ptr;
|
||||
|
||||
int ret = f->read(f, buf, buf_size);
|
||||
|
||||
if (ret == 0) {
|
||||
return AVERROR_EOF;
|
||||
}
|
||||
return ret;
|
||||
}
|
||||
|
||||
typedef struct {
|
||||
size_t size;
|
||||
FILE *file;
|
||||
void *buf;
|
||||
} memfile_t;
|
||||
|
||||
int memfile_read(void *ptr, uint8_t *buf, int buf_size) {
|
||||
memfile_t *mem = ptr;
|
||||
|
||||
size_t ret = fread(buf, 1, buf_size, mem->file);
|
||||
|
||||
if (ret == 0 && feof(mem->file)) {
|
||||
return AVERROR_EOF;
|
||||
}
|
||||
|
||||
return (int) ret;
|
||||
}
|
||||
|
||||
long memfile_seek(void *ptr, long offset, int whence) {
|
||||
memfile_t *mem = ptr;
|
||||
|
||||
if (whence == 0x10000) {
|
||||
return mem->size;
|
||||
}
|
||||
|
||||
int ret = fseek(mem->file, offset, whence);
|
||||
if (ret != 0) {
|
||||
return AVERROR_EOF;
|
||||
}
|
||||
|
||||
return ftell(mem->file);
|
||||
}
|
||||
|
||||
int memfile_open(vfile_t *f, memfile_t *mem) {
|
||||
mem->size = f->info.st_size;
|
||||
|
||||
mem->buf = malloc(mem->size);
|
||||
if (mem->buf == NULL) {
|
||||
return -1;
|
||||
}
|
||||
|
||||
int ret = f->read(f, mem->buf, mem->size);
|
||||
mem->file = fmemopen(mem->buf, mem->size, "rb");
|
||||
|
||||
if (f->calculate_checksum) {
|
||||
SHA1_Init(&f->sha1_ctx);
|
||||
safe_sha1_update(&f->sha1_ctx, mem->buf, mem->size);
|
||||
SHA1_Final(f->sha1_digest, &f->sha1_ctx);
|
||||
f->has_checksum = TRUE;
|
||||
}
|
||||
|
||||
return (ret == mem->size && mem->file != NULL) ? 0 : -1;
|
||||
}
|
||||
|
||||
int memfile_open_buf(void *buf, size_t buf_len, memfile_t *mem) {
|
||||
mem->size = (int) buf_len;
|
||||
|
||||
mem->buf = buf;
|
||||
mem->file = fmemopen(mem->buf, mem->size, "rb");
|
||||
|
||||
return mem->file != NULL ? 0 : -1;
|
||||
}
|
||||
|
||||
void memfile_close(memfile_t *mem) {
|
||||
if (mem->buf != NULL) {
|
||||
free(mem->buf);
|
||||
fclose(mem->file);
|
||||
}
|
||||
}
|
||||
|
||||
void parse_media_vfile(scan_media_ctx_t *ctx, struct vfile *f, document_t *doc, const char *mime_str) {
|
||||
|
||||
AVFormatContext *pFormatCtx = avformat_alloc_context();
|
||||
if (pFormatCtx == NULL) {
|
||||
CTX_LOG_ERROR(doc->filepath, "(media.c) Could not allocate context with avformat_alloc_context()")
|
||||
return;
|
||||
}
|
||||
|
||||
unsigned char *buffer = (unsigned char *) av_malloc(AVIO_BUF_SIZE);
|
||||
AVIOContext *io_ctx = NULL;
|
||||
memfile_t memfile = {0, 0, 0};
|
||||
|
||||
const char *filepath = get_filepath_with_ext(doc, f->filepath, mime_str);
|
||||
|
||||
if (f->info.st_size <= ctx->max_media_buffer) {
|
||||
int ret = memfile_open(f, &memfile);
|
||||
if (ret == 0) {
|
||||
CTX_LOG_DEBUGF(f->filepath, "Loading media file in memory (%ldB)", f->info.st_size)
|
||||
io_ctx = avio_alloc_context(buffer, AVIO_BUF_SIZE, 0, &memfile, memfile_read, NULL, memfile_seek);
|
||||
}
|
||||
}
|
||||
|
||||
if (io_ctx == NULL) {
|
||||
CTX_LOG_DEBUGF(f->filepath, "Reading media file without seek support", f->info.st_size)
|
||||
io_ctx = avio_alloc_context(buffer, AVIO_BUF_SIZE, 0, f, vfile_read, NULL, NULL);
|
||||
}
|
||||
|
||||
pFormatCtx->pb = io_ctx;
|
||||
|
||||
int res = avformat_open_input(&pFormatCtx, filepath, NULL, NULL);
|
||||
if (res < 0) {
|
||||
if (res != -5) {
|
||||
CTX_LOG_ERRORF(doc->filepath, "(media.c) avformat_open_input() returned [%d] %s", res, av_err2str(res))
|
||||
}
|
||||
av_free(io_ctx->buffer);
|
||||
memfile_close(&memfile);
|
||||
avio_context_free(&io_ctx);
|
||||
avformat_close_input(&pFormatCtx);
|
||||
avformat_free_context(pFormatCtx);
|
||||
return;
|
||||
}
|
||||
|
||||
parse_media_format_ctx(ctx, pFormatCtx, doc);
|
||||
av_free(io_ctx->buffer);
|
||||
avio_context_free(&io_ctx);
|
||||
memfile_close(&memfile);
|
||||
}
|
||||
|
||||
void parse_media(scan_media_ctx_t *ctx, vfile_t *f, document_t *doc, const char *mime_str) {
|
||||
|
||||
if (f->is_fs_file) {
|
||||
parse_media_filename(ctx, f->filepath, doc);
|
||||
} else {
|
||||
parse_media_vfile(ctx, f, doc, mime_str);
|
||||
}
|
||||
}
|
||||
|
||||
void init_media() {
|
||||
av_log_set_level(AV_LOG_QUIET);
|
||||
}
|
||||
|
||||
int store_image_thumbnail(scan_media_ctx_t *ctx, void *buf, size_t buf_len, document_t *doc, const char *url) {
|
||||
memfile_t memfile = {0, 0, 0};
|
||||
AVIOContext *io_ctx = NULL;
|
||||
|
||||
AVFormatContext *pFormatCtx = avformat_alloc_context();
|
||||
if (pFormatCtx == NULL) {
|
||||
CTX_LOG_ERROR(doc->filepath, "(media.c) Could not allocate context with avformat_alloc_context()")
|
||||
return FALSE;
|
||||
}
|
||||
|
||||
unsigned char *buffer = (unsigned char *) av_malloc(AVIO_BUF_SIZE);
|
||||
|
||||
int ret = memfile_open_buf(buf, buf_len, &memfile);
|
||||
if (ret == 0) {
|
||||
CTX_LOG_DEBUGF(doc->filepath, "Loading media file in memory (%ldB)", buf_len)
|
||||
io_ctx = avio_alloc_context(buffer, AVIO_BUF_SIZE, 0, &memfile, memfile_read, NULL, memfile_seek);
|
||||
} else {
|
||||
avformat_close_input(&pFormatCtx);
|
||||
avformat_free_context(pFormatCtx);
|
||||
fclose(memfile.file);
|
||||
return FALSE;
|
||||
}
|
||||
|
||||
pFormatCtx->pb = io_ctx;
|
||||
|
||||
int res = avformat_open_input(&pFormatCtx, url, NULL, NULL);
|
||||
if (res != 0) {
|
||||
av_free(io_ctx->buffer);
|
||||
avformat_close_input(&pFormatCtx);
|
||||
avformat_free_context(pFormatCtx);
|
||||
avio_context_free(&io_ctx);
|
||||
fclose(memfile.file);
|
||||
return FALSE;
|
||||
}
|
||||
|
||||
AVStream *stream = pFormatCtx->streams[0];
|
||||
|
||||
// Decoder
|
||||
const AVCodec *video_codec = avcodec_find_decoder(stream->codecpar->codec_id);
|
||||
AVCodecContext *decoder = avcodec_alloc_context3(video_codec);
|
||||
avcodec_parameters_to_context(decoder, stream->codecpar);
|
||||
avcodec_open2(decoder, video_codec, NULL);
|
||||
|
||||
frame_and_packet_t *frame_and_packet = read_frame(ctx, pFormatCtx, decoder, 0, doc);
|
||||
if (frame_and_packet == NULL) {
|
||||
avcodec_free_context(&decoder);
|
||||
avformat_close_input(&pFormatCtx);
|
||||
avformat_free_context(pFormatCtx);
|
||||
av_free(io_ctx->buffer);
|
||||
avio_context_free(&io_ctx);
|
||||
fclose(memfile.file);
|
||||
return FALSE;
|
||||
}
|
||||
|
||||
// Scale frame
|
||||
AVFrame *scaled_frame = scale_frame(decoder, frame_and_packet->frame, ctx->tn_size);
|
||||
|
||||
if (scaled_frame == NULL) {
|
||||
frame_and_packet_free(frame_and_packet);
|
||||
avcodec_free_context(&decoder);
|
||||
avformat_close_input(&pFormatCtx);
|
||||
avformat_free_context(pFormatCtx);
|
||||
av_free(io_ctx->buffer);
|
||||
avio_context_free(&io_ctx);
|
||||
fclose(memfile.file);
|
||||
return FALSE;
|
||||
}
|
||||
|
||||
if (scaled_frame == STORE_AS_IS) {
|
||||
APPEND_TN_META(doc, frame_and_packet->frame->width, frame_and_packet->frame->height)
|
||||
ctx->store((char *) doc->path_md5, sizeof(doc->path_md5), (char *) frame_and_packet->packet->data,
|
||||
frame_and_packet->packet->size);
|
||||
} else {
|
||||
// Encode frame to jpeg
|
||||
AVCodecContext *jpeg_encoder = alloc_jpeg_encoder(scaled_frame->width, scaled_frame->height,
|
||||
ctx->tn_qscale);
|
||||
avcodec_send_frame(jpeg_encoder, scaled_frame);
|
||||
|
||||
AVPacket jpeg_packet;
|
||||
av_init_packet(&jpeg_packet);
|
||||
avcodec_receive_packet(jpeg_encoder, &jpeg_packet);
|
||||
|
||||
// Save thumbnail
|
||||
APPEND_TN_META(doc, scaled_frame->width, scaled_frame->height)
|
||||
ctx->store((char *) doc->path_md5, sizeof(doc->path_md5), (char *) jpeg_packet.data, jpeg_packet.size);
|
||||
|
||||
av_packet_unref(&jpeg_packet);
|
||||
avcodec_free_context(&jpeg_encoder);
|
||||
av_free(*scaled_frame->data);
|
||||
av_frame_free(&scaled_frame);
|
||||
}
|
||||
|
||||
frame_and_packet_free(frame_and_packet);
|
||||
avcodec_free_context(&decoder);
|
||||
|
||||
avformat_close_input(&pFormatCtx);
|
||||
avformat_free_context(pFormatCtx);
|
||||
|
||||
av_free(io_ctx->buffer);
|
||||
avio_context_free(&io_ctx);
|
||||
fclose(memfile.file);
|
||||
|
||||
return TRUE;
|
||||
}
|
||||
52
third-party/libscan/libscan/media/media.h
vendored
Normal file
52
third-party/libscan/libscan/media/media.h
vendored
Normal file
@@ -0,0 +1,52 @@
|
||||
#ifndef SIST2_MEDIA_H
|
||||
#define SIST2_MEDIA_H
|
||||
|
||||
|
||||
#include "../scan.h"
|
||||
|
||||
#include "libavformat/avformat.h"
|
||||
#include "libswscale/swscale.h"
|
||||
#include "libswresample/swresample.h"
|
||||
#include "libavcodec/avcodec.h"
|
||||
#include "libavutil/imgutils.h"
|
||||
|
||||
typedef struct {
|
||||
log_callback_t log;
|
||||
logf_callback_t logf;
|
||||
store_callback_t store;
|
||||
|
||||
int tn_size;
|
||||
float tn_qscale;
|
||||
long max_media_buffer;
|
||||
int read_subtitles;
|
||||
} scan_media_ctx_t;
|
||||
|
||||
__always_inline
|
||||
static AVCodecContext *alloc_jpeg_encoder(int w, int h, float qscale) {
|
||||
|
||||
const AVCodec *jpeg_codec = avcodec_find_encoder(AV_CODEC_ID_MJPEG);
|
||||
AVCodecContext *jpeg = avcodec_alloc_context3(jpeg_codec);
|
||||
jpeg->width = w;
|
||||
jpeg->height = h;
|
||||
jpeg->time_base.den = 1000000;
|
||||
jpeg->time_base.num = 1;
|
||||
jpeg->i_quant_factor = qscale;
|
||||
|
||||
jpeg->pix_fmt = AV_PIX_FMT_YUVJ420P;
|
||||
int ret = avcodec_open2(jpeg, jpeg_codec, NULL);
|
||||
|
||||
if (ret != 0) {
|
||||
return NULL;
|
||||
}
|
||||
|
||||
return jpeg;
|
||||
}
|
||||
|
||||
|
||||
void parse_media(scan_media_ctx_t *ctx, vfile_t *f, document_t *doc, const char*mime_str);
|
||||
|
||||
void init_media();
|
||||
|
||||
int store_image_thumbnail(scan_media_ctx_t *ctx, void *buf, size_t buf_len, document_t *doc, const char *url);
|
||||
|
||||
#endif
|
||||
79
third-party/libscan/libscan/mobi/scan_mobi.c
vendored
Normal file
79
third-party/libscan/libscan/mobi/scan_mobi.c
vendored
Normal file
@@ -0,0 +1,79 @@
|
||||
#include "scan_mobi.h"
|
||||
|
||||
#include <mobi.h>
|
||||
#include <errno.h>
|
||||
#include "stdlib.h"
|
||||
|
||||
void parse_mobi(scan_mobi_ctx_t *ctx, vfile_t *f, document_t *doc) {
|
||||
|
||||
MOBIData *m = mobi_init();
|
||||
if (m == NULL) {
|
||||
CTX_LOG_ERROR(f->filepath, "mobi_init() failed")
|
||||
return;
|
||||
}
|
||||
|
||||
size_t buf_len;
|
||||
char* buf = read_all(f, &buf_len);
|
||||
if (buf == NULL) {
|
||||
mobi_free(m);
|
||||
CTX_LOG_ERROR(f->filepath, "read_all() failed")
|
||||
return;
|
||||
}
|
||||
|
||||
FILE *file = fmemopen(buf, buf_len, "rb");
|
||||
if (file == NULL) {
|
||||
mobi_free(m);
|
||||
free(buf);
|
||||
CTX_LOG_ERRORF(f->filepath, "fmemopen() failed (%d)", errno)
|
||||
return;
|
||||
}
|
||||
|
||||
MOBI_RET mobi_ret = mobi_load_file(m, file);
|
||||
fclose(file);
|
||||
if (mobi_ret != MOBI_SUCCESS) {
|
||||
mobi_free(m);
|
||||
free(buf);
|
||||
CTX_LOG_ERRORF(f->filepath, "mobi_laod_file() returned error code [%d]", mobi_ret)
|
||||
return;
|
||||
}
|
||||
|
||||
char *author = mobi_meta_get_author(m);
|
||||
if (author != NULL) {
|
||||
APPEND_STR_META(doc, MetaAuthor, author)
|
||||
free(author);
|
||||
}
|
||||
char *title = mobi_meta_get_title(m);
|
||||
if (title != NULL) {
|
||||
APPEND_STR_META(doc, MetaTitle, title)
|
||||
free(title);
|
||||
}
|
||||
|
||||
const size_t maxlen = mobi_get_text_maxsize(m);
|
||||
if (maxlen == MOBI_NOTSET) {
|
||||
free(buf);
|
||||
CTX_LOG_DEBUGF("%s", "Invalid text maxsize: %zu", maxlen)
|
||||
return;
|
||||
}
|
||||
|
||||
char *content_str = malloc(maxlen + 1);
|
||||
size_t length = maxlen;
|
||||
mobi_ret = mobi_get_rawml(m, content_str, &length);
|
||||
if (mobi_ret != MOBI_SUCCESS) {
|
||||
mobi_free(m);
|
||||
free(content_str);
|
||||
free(buf);
|
||||
CTX_LOG_ERRORF(f->filepath, "mobi_get_rawml() returned error code [%d]", mobi_ret)
|
||||
return;
|
||||
}
|
||||
|
||||
text_buffer_t tex = text_buffer_create(ctx->content_size);
|
||||
text_buffer_append_markup(&tex, content_str);
|
||||
text_buffer_terminate_string(&tex);
|
||||
|
||||
APPEND_STR_META(doc, MetaContent, tex.dyn_buffer.buf)
|
||||
|
||||
free(content_str);
|
||||
free(buf);
|
||||
text_buffer_destroy(&tex);
|
||||
mobi_free(m);
|
||||
}
|
||||
14
third-party/libscan/libscan/mobi/scan_mobi.h
vendored
Normal file
14
third-party/libscan/libscan/mobi/scan_mobi.h
vendored
Normal file
@@ -0,0 +1,14 @@
|
||||
#ifndef SCAN_SCAN_MOBI_H
|
||||
#define SCAN_SCAN_MOBI_H
|
||||
|
||||
#include "../scan.h"
|
||||
|
||||
typedef struct {
|
||||
long content_size;
|
||||
log_callback_t log;
|
||||
logf_callback_t logf;
|
||||
} scan_mobi_ctx_t;
|
||||
|
||||
void parse_mobi(scan_mobi_ctx_t *ctx, vfile_t *f, document_t *doc);
|
||||
|
||||
#endif
|
||||
147
third-party/libscan/libscan/msdoc/msdoc.c
vendored
Normal file
147
third-party/libscan/libscan/msdoc/msdoc.c
vendored
Normal file
@@ -0,0 +1,147 @@
|
||||
#include "msdoc.h"
|
||||
#include <errno.h>
|
||||
|
||||
#include <sys/mman.h>
|
||||
#include "../../third-party/antiword/src/antiword.h"
|
||||
|
||||
#include "../ebook/ebook.h"
|
||||
|
||||
void parse_msdoc_text(scan_msdoc_ctx_t *ctx, document_t *doc, FILE *file_in, void *buf, size_t buf_len) {
|
||||
|
||||
// Open word doc
|
||||
options_type *opts = direct_vGetOptions();
|
||||
opts->iParagraphBreak = 74;
|
||||
opts->eConversionType = conversion_text;
|
||||
opts->bHideHiddenText = 1;
|
||||
opts->bRemoveRemovedText = 1;
|
||||
opts->bUseLandscape = 0;
|
||||
opts->eEncoding = encoding_utf_8;
|
||||
opts->iPageHeight = 842; // A4
|
||||
opts->iPageWidth = 595;
|
||||
opts->eImageLevel = level_ps_3;
|
||||
|
||||
int doc_word_version = iGuessVersionNumber(file_in, (int) buf_len);
|
||||
if (doc_word_version < 0 || doc_word_version == 3) {
|
||||
free(buf);
|
||||
return;
|
||||
}
|
||||
rewind(file_in);
|
||||
|
||||
size_t out_len;
|
||||
char *out_buf;
|
||||
|
||||
FILE *file_out = open_memstream(&out_buf, &out_len);
|
||||
|
||||
diagram_type *diag = pCreateDiagram("antiword", NULL, file_out);
|
||||
if (diag == NULL) {
|
||||
fclose(file_in);
|
||||
return;
|
||||
}
|
||||
|
||||
iInitDocument(file_in, (int) buf_len);
|
||||
const char *author = szGetAuthor();
|
||||
if (author != NULL) {
|
||||
APPEND_UTF8_META(doc, MetaAuthor, author)
|
||||
}
|
||||
|
||||
const char *title = szGetTitle();
|
||||
if (title != NULL) {
|
||||
APPEND_UTF8_META(doc, MetaTitle, title)
|
||||
}
|
||||
vFreeDocument();
|
||||
|
||||
bWordDecryptor(file_in, (int) buf_len, diag);
|
||||
vDestroyDiagram(diag);
|
||||
fclose(file_out);
|
||||
|
||||
if (buf_len > 0) {
|
||||
text_buffer_t tex = text_buffer_create(ctx->content_size);
|
||||
text_buffer_append_string(&tex, out_buf, out_len);
|
||||
text_buffer_terminate_string(&tex);
|
||||
|
||||
meta_line_t *meta_content = malloc(sizeof(meta_line_t) + tex.dyn_buffer.cur);
|
||||
meta_content->key = MetaContent;
|
||||
memcpy(meta_content->str_val, tex.dyn_buffer.buf, tex.dyn_buffer.cur);
|
||||
APPEND_META(doc, meta_content)
|
||||
|
||||
text_buffer_destroy(&tex);
|
||||
}
|
||||
|
||||
free(buf);
|
||||
free(out_buf);
|
||||
}
|
||||
|
||||
void parse_msdoc_pdf(scan_msdoc_ctx_t *ctx, document_t *doc, FILE *file, void *buf, size_t buf_len) {
|
||||
|
||||
scan_ebook_ctx_t ebook_ctx = {
|
||||
.content_size = ctx->content_size,
|
||||
.tn_size = ctx->tn_size,
|
||||
.log = ctx->log,
|
||||
.logf = ctx->logf,
|
||||
.store = ctx->store,
|
||||
};
|
||||
|
||||
// Open word doc
|
||||
options_type *opts = direct_vGetOptions();
|
||||
opts->iParagraphBreak = 74;
|
||||
opts->eConversionType = conversion_pdf;
|
||||
opts->bHideHiddenText = 1;
|
||||
opts->bRemoveRemovedText = 1;
|
||||
opts->bUseLandscape = 0;
|
||||
opts->eEncoding = encoding_latin_1;
|
||||
opts->iPageHeight = 842; // A4
|
||||
opts->iPageWidth = 595;
|
||||
opts->eImageLevel = level_ps_3;
|
||||
|
||||
int doc_word_version = iGuessVersionNumber(file, (int) buf_len);
|
||||
if (doc_word_version < 0 || doc_word_version == 3) {
|
||||
free(buf);
|
||||
return;
|
||||
}
|
||||
rewind(file);
|
||||
|
||||
size_t out_len;
|
||||
char *out_buf;
|
||||
|
||||
FILE *file_out = open_memstream(&out_buf, &out_len);
|
||||
|
||||
diagram_type *diag = pCreateDiagram("antiword", NULL, file_out);
|
||||
if (diag == NULL) {
|
||||
return;
|
||||
}
|
||||
|
||||
bWordDecryptor(file, (int) buf_len, diag);
|
||||
vDestroyDiagram(diag);
|
||||
|
||||
fclose(file_out);
|
||||
|
||||
parse_ebook_mem(&ebook_ctx, out_buf, out_len, "application/pdf", doc, TRUE);
|
||||
|
||||
free(buf);
|
||||
free(out_buf);
|
||||
}
|
||||
|
||||
void parse_msdoc(scan_msdoc_ctx_t *ctx, vfile_t *f, document_t *doc) {
|
||||
|
||||
size_t buf_len;
|
||||
char *buf = read_all(f, &buf_len);
|
||||
if (buf == NULL) {
|
||||
CTX_LOG_ERROR(f->filepath, "read_all() failed")
|
||||
return;
|
||||
}
|
||||
|
||||
FILE *file = fmemopen(buf, buf_len, "rb");
|
||||
if (file == NULL) {
|
||||
free(buf);
|
||||
CTX_LOG_ERRORF(f->filepath, "fmemopen() failed (%d)", errno)
|
||||
return;
|
||||
}
|
||||
|
||||
if (ctx->tn_size > 0) {
|
||||
char *buf_pdf = malloc(buf_len);
|
||||
memcpy(buf_pdf, buf, buf_len);
|
||||
parse_msdoc_pdf(ctx, doc, file, buf_pdf, buf_len);
|
||||
}
|
||||
parse_msdoc_text(ctx, doc, file, buf, buf_len);
|
||||
fclose(file);
|
||||
}
|
||||
24
third-party/libscan/libscan/msdoc/msdoc.h
vendored
Normal file
24
third-party/libscan/libscan/msdoc/msdoc.h
vendored
Normal file
@@ -0,0 +1,24 @@
|
||||
#ifndef SCAN_SCAN_MSDOC_H
|
||||
#define SCAN_SCAN_MSDOC_H
|
||||
|
||||
#include "../scan.h"
|
||||
|
||||
typedef struct {
|
||||
long content_size;
|
||||
int tn_size;
|
||||
log_callback_t log;
|
||||
logf_callback_t logf;
|
||||
store_callback_t store;
|
||||
unsigned int msdoc_mime;
|
||||
} scan_msdoc_ctx_t;
|
||||
|
||||
__always_inline
|
||||
static int is_msdoc(scan_msdoc_ctx_t *ctx, unsigned int mime) {
|
||||
return mime == ctx->msdoc_mime;
|
||||
}
|
||||
|
||||
void parse_msdoc(scan_msdoc_ctx_t *ctx, vfile_t *f, document_t *doc);
|
||||
|
||||
void parse_msdoc_text(scan_msdoc_ctx_t *ctx, document_t *doc, FILE *file_in, void* buf, size_t buf_len);
|
||||
|
||||
#endif
|
||||
260
third-party/libscan/libscan/ooxml/ooxml.c
vendored
Normal file
260
third-party/libscan/libscan/ooxml/ooxml.c
vendored
Normal file
@@ -0,0 +1,260 @@
|
||||
#include "ooxml.h"
|
||||
|
||||
#include <archive.h>
|
||||
#include <archive_entry.h>
|
||||
#include <libxml/xmlstring.h>
|
||||
#include <libxml/parser.h>
|
||||
|
||||
#define _X(str) ((const xmlChar*)str)
|
||||
|
||||
__always_inline
|
||||
static int should_read_part(const char *part) {
|
||||
|
||||
if (part == NULL) {
|
||||
return FALSE;
|
||||
}
|
||||
|
||||
if ( // Word
|
||||
STR_STARTS_WITH_CONSTANT(part, "word/document.xml")
|
||||
|| STR_STARTS_WITH_CONSTANT(part, "word/footnotes.xml")
|
||||
|| STR_STARTS_WITH_CONSTANT(part, "word/endnotes.xml")
|
||||
|| STR_STARTS_WITH_CONSTANT(part, "word/footer")
|
||||
|| STR_STARTS_WITH_CONSTANT(part, "word/header")
|
||||
// PowerPoint
|
||||
|| STR_STARTS_WITH_CONSTANT(part, "ppt/slides/slide")
|
||||
|| STR_STARTS_WITH_CONSTANT(part, "ppt/notesSlides/slide")
|
||||
// Excel
|
||||
|| STR_STARTS_WITH_CONSTANT(part, "xl/worksheets/sheet")
|
||||
|| STR_STARTS_WITH_CONSTANT(part, "xl/sharedStrings.xml")
|
||||
|| STR_STARTS_WITH_CONSTANT(part, "xl/workbook.xml")
|
||||
) {
|
||||
return TRUE;
|
||||
}
|
||||
|
||||
return FALSE;
|
||||
}
|
||||
|
||||
int extract_text(scan_ooxml_ctx_t *ctx, xmlDoc *xml, xmlNode *node, text_buffer_t *buf) {
|
||||
//TODO: Check which nodes are likely to have a 't' child, and ignore nodes that aren't
|
||||
xmlErrorPtr err = xmlGetLastError();
|
||||
if (err != NULL) {
|
||||
if (err->level == XML_ERR_FATAL) {
|
||||
CTX_LOG_ERRORF("ooxml.c", "Got fatal XML error while parsing document: %s", err->message)
|
||||
return -1;
|
||||
}
|
||||
}
|
||||
|
||||
for (xmlNode *child = node; child; child = child->next) {
|
||||
if (child->name != NULL && *child->name == 't' && *(child->name + 1) == '\0') {
|
||||
xmlChar *text = xmlNodeListGetString(xml, child->xmlChildrenNode, 1);
|
||||
|
||||
if (text) {
|
||||
int ret = text_buffer_append_string0(buf, (char *) text);
|
||||
text_buffer_append_char(buf, ' ');
|
||||
xmlFree(text);
|
||||
|
||||
if (ret == TEXT_BUF_FULL) {
|
||||
return ret;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if (extract_text(ctx, xml, child->children, buf) == TEXT_BUF_FULL) {
|
||||
return TEXT_BUF_FULL;
|
||||
}
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
int xml_io_read(void *context, char *buffer, int len) {
|
||||
struct archive *a = context;
|
||||
return (int) archive_read_data(a, buffer, len);
|
||||
}
|
||||
|
||||
int xml_io_close(UNUSED(void *context)) {
|
||||
//noop
|
||||
return 0;
|
||||
}
|
||||
|
||||
#define READ_PART_ERR (-2)
|
||||
|
||||
__always_inline
|
||||
static int read_part(scan_ooxml_ctx_t *ctx, struct archive *a, text_buffer_t *buf, document_t *doc) {
|
||||
|
||||
xmlDoc *xml = xmlReadIO(xml_io_read, xml_io_close, a, "/", NULL,
|
||||
XML_PARSE_RECOVER | XML_PARSE_NOWARNING | XML_PARSE_NOERROR | XML_PARSE_NONET);
|
||||
|
||||
if (xml == NULL) {
|
||||
CTX_LOG_ERROR(doc->filepath, "Could not parse XML")
|
||||
return READ_PART_ERR;
|
||||
}
|
||||
|
||||
xmlNode *root = xmlDocGetRootElement(xml);
|
||||
if (root == NULL) {
|
||||
CTX_LOG_ERROR(doc->filepath, "Empty document")
|
||||
xmlFreeDoc(xml);
|
||||
return READ_PART_ERR;
|
||||
}
|
||||
|
||||
int ret = extract_text(ctx, xml, root, buf);
|
||||
xmlFreeDoc(xml);
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
__always_inline
|
||||
static int read_doc_props_app(scan_ooxml_ctx_t *ctx, struct archive *a, document_t *doc) {
|
||||
xmlDoc *xml = xmlReadIO(xml_io_read, xml_io_close, a, "/", NULL,
|
||||
XML_PARSE_RECOVER | XML_PARSE_NOWARNING | XML_PARSE_NOERROR | XML_PARSE_NONET);
|
||||
|
||||
if (xml == NULL) {
|
||||
CTX_LOG_ERROR(doc->filepath, "Could not parse XML")
|
||||
return -1;
|
||||
}
|
||||
|
||||
xmlNode *root = xmlDocGetRootElement(xml);
|
||||
if (root == NULL) {
|
||||
CTX_LOG_ERROR(doc->filepath, "Empty document")
|
||||
xmlFreeDoc(xml);
|
||||
return -1;
|
||||
}
|
||||
|
||||
if (xmlStrEqual(root->name, _X("Properties"))) {
|
||||
for (xmlNode *child = root->children; child; child = child->next) {
|
||||
xmlChar *text = xmlNodeListGetString(xml, child->xmlChildrenNode, 1);
|
||||
if (text == NULL) {
|
||||
continue;
|
||||
}
|
||||
|
||||
if (xmlStrEqual(child->name, _X("Pages"))) {
|
||||
APPEND_LONG_META(doc, MetaPages, strtol((char *) text, NULL, 10))
|
||||
}
|
||||
|
||||
xmlFree(text);
|
||||
}
|
||||
}
|
||||
xmlFreeDoc(xml);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
__always_inline
|
||||
static int read_doc_props(scan_ooxml_ctx_t *ctx, struct archive *a, document_t *doc) {
|
||||
xmlDoc *xml = xmlReadIO(xml_io_read, xml_io_close, a, "/", NULL,
|
||||
XML_PARSE_RECOVER | XML_PARSE_NOWARNING | XML_PARSE_NOERROR | XML_PARSE_NONET);
|
||||
|
||||
if (xml == NULL) {
|
||||
CTX_LOG_ERROR(doc->filepath, "Could not parse XML")
|
||||
return -1;
|
||||
}
|
||||
|
||||
xmlNode *root = xmlDocGetRootElement(xml);
|
||||
if (root == NULL) {
|
||||
CTX_LOG_ERROR(doc->filepath, "Empty document")
|
||||
xmlFreeDoc(xml);
|
||||
return -1;
|
||||
}
|
||||
|
||||
if (xmlStrEqual(root->name, _X("coreProperties"))) {
|
||||
for (xmlNode *child = root->children; child; child = child->next) {
|
||||
xmlChar *text = xmlNodeListGetString(xml, child->xmlChildrenNode, 1);
|
||||
if (text == NULL) {
|
||||
continue;
|
||||
}
|
||||
|
||||
if (xmlStrEqual(child->name, _X("title"))) {
|
||||
APPEND_STR_META(doc, MetaTitle, (char *) text)
|
||||
} else if (xmlStrEqual(child->name, _X("creator"))) {
|
||||
APPEND_STR_META(doc, MetaAuthor, (char *) text)
|
||||
} else if (xmlStrEqual(child->name, _X("lastModifiedBy"))) {
|
||||
APPEND_STR_META(doc, MetaModifiedBy, (char *) text)
|
||||
}
|
||||
|
||||
xmlFree(text);
|
||||
}
|
||||
}
|
||||
xmlFreeDoc(xml);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
#define MAX_TN_SIZE (1024 * 1024 * 15)
|
||||
|
||||
void read_thumbnail(scan_ooxml_ctx_t *ctx, document_t *doc, struct archive *a, struct archive_entry *entry) {
|
||||
size_t entry_size = archive_entry_size(entry);
|
||||
|
||||
if (entry_size <= 0 || entry_size > MAX_TN_SIZE) {
|
||||
return;
|
||||
}
|
||||
|
||||
char *buf = malloc(entry_size);
|
||||
archive_read_data(a, buf, entry_size);
|
||||
|
||||
APPEND_TN_META(doc, 1, 1) // Size unknown
|
||||
ctx->store((char *) doc->path_md5, sizeof(doc->path_md5), buf, entry_size);
|
||||
free(buf);
|
||||
}
|
||||
|
||||
void parse_ooxml(scan_ooxml_ctx_t *ctx, vfile_t *f, document_t *doc) {
|
||||
|
||||
size_t buf_len;
|
||||
void *buf = read_all(f, &buf_len);
|
||||
if (buf == NULL) {
|
||||
CTX_LOG_ERROR(f->filepath, "read_all() failed")
|
||||
return;
|
||||
}
|
||||
|
||||
struct archive *a = archive_read_new();
|
||||
archive_read_support_format_zip(a);
|
||||
|
||||
int ret = archive_read_open_memory(a, buf, buf_len);
|
||||
if (ret != ARCHIVE_OK) {
|
||||
CTX_LOG_ERRORF(doc->filepath, "Could not read archive: %s", archive_error_string(a))
|
||||
archive_read_free(a);
|
||||
free(buf);
|
||||
return;
|
||||
}
|
||||
|
||||
text_buffer_t tex = text_buffer_create(ctx->content_size);
|
||||
|
||||
struct archive_entry *entry;
|
||||
int buffer_full = FALSE;
|
||||
while (archive_read_next_header(a, &entry) == ARCHIVE_OK) {
|
||||
if (S_ISREG(archive_entry_stat(entry)->st_mode)) {
|
||||
const char *path = archive_entry_pathname(entry);
|
||||
|
||||
if (!buffer_full && should_read_part(path) && ctx->content_size > 0) {
|
||||
ret = read_part(ctx, a, &tex, doc);
|
||||
if (ret == READ_PART_ERR) {
|
||||
break;
|
||||
} else if (ret == TEXT_BUF_FULL) {
|
||||
buffer_full = TRUE;
|
||||
}
|
||||
} else if (strcmp(path, "docProps/app.xml") == 0) {
|
||||
if (read_doc_props_app(ctx, a, doc) != 0) {
|
||||
break;
|
||||
}
|
||||
} else if (strcmp(path, "docProps/core.xml") == 0) {
|
||||
if (read_doc_props(ctx, a, doc) != 0) {
|
||||
break;
|
||||
}
|
||||
} else if (strcmp(path, "docProps/thumbnail.jpeg") == 0) {
|
||||
read_thumbnail(ctx, doc, a, entry);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if (tex.dyn_buffer.cur > 0) {
|
||||
text_buffer_terminate_string(&tex);
|
||||
|
||||
meta_line_t *meta = malloc(sizeof(meta_line_t) + tex.dyn_buffer.cur);
|
||||
meta->key = MetaContent;
|
||||
strcpy(meta->str_val, tex.dyn_buffer.buf);
|
||||
APPEND_META(doc, meta)
|
||||
}
|
||||
|
||||
archive_read_close(a);
|
||||
archive_read_free(a);
|
||||
text_buffer_destroy(&tex);
|
||||
free(buf);
|
||||
}
|
||||
16
third-party/libscan/libscan/ooxml/ooxml.h
vendored
Normal file
16
third-party/libscan/libscan/ooxml/ooxml.h
vendored
Normal file
@@ -0,0 +1,16 @@
|
||||
#ifndef SCAN_OOXML_H
|
||||
#define SCAN_OOXML_H
|
||||
|
||||
#include <stdlib.h>
|
||||
#include "../scan.h"
|
||||
|
||||
typedef struct {
|
||||
long content_size;
|
||||
log_callback_t log;
|
||||
logf_callback_t logf;
|
||||
store_callback_t store;
|
||||
} scan_ooxml_ctx_t;
|
||||
|
||||
void parse_ooxml(scan_ooxml_ctx_t *ctx, vfile_t *f, document_t *doc);
|
||||
|
||||
#endif
|
||||
224
third-party/libscan/libscan/raw/raw.c
vendored
Normal file
224
third-party/libscan/libscan/raw/raw.c
vendored
Normal file
@@ -0,0 +1,224 @@
|
||||
#include "raw.h"
|
||||
#include <libraw/libraw.h>
|
||||
|
||||
#include "../media/media.h"
|
||||
#include <unistd.h>
|
||||
|
||||
|
||||
#define MIN_SIZE 32
|
||||
|
||||
int store_thumbnail_jpeg(scan_raw_ctx_t *ctx, libraw_processed_image_t *img, document_t *doc) {
|
||||
return store_image_thumbnail((scan_media_ctx_t *) ctx, img->data, img->data_size, doc, "x.jpeg");
|
||||
}
|
||||
|
||||
int store_thumbnail_rgb24(scan_raw_ctx_t *ctx, libraw_processed_image_t *img, document_t *doc) {
|
||||
|
||||
int dstW;
|
||||
int dstH;
|
||||
|
||||
if (img->width <= ctx->tn_size && img->height <= ctx->tn_size) {
|
||||
dstW = img->width;
|
||||
dstH = img->height;
|
||||
} else {
|
||||
double ratio = (double) img->width / img->height;
|
||||
if (img->width > img->height) {
|
||||
dstW = ctx->tn_size;
|
||||
dstH = (int) (ctx->tn_size / ratio);
|
||||
} else {
|
||||
dstW = (int) (ctx->tn_size * ratio);
|
||||
dstH = ctx->tn_size;
|
||||
}
|
||||
}
|
||||
|
||||
if (dstW <= MIN_SIZE || dstH <= MIN_SIZE) {
|
||||
return FALSE;
|
||||
}
|
||||
|
||||
AVFrame *scaled_frame = av_frame_alloc();
|
||||
|
||||
struct SwsContext *sws_ctx = sws_getContext(
|
||||
img->width, img->height, AV_PIX_FMT_RGB24,
|
||||
dstW, dstH, AV_PIX_FMT_YUVJ420P,
|
||||
SIST_SWS_ALGO, 0, 0, 0
|
||||
);
|
||||
|
||||
int dst_buf_len = av_image_get_buffer_size(AV_PIX_FMT_YUV420P, dstW, dstH, 1);
|
||||
uint8_t *dst_buf = (uint8_t *) av_malloc(dst_buf_len);
|
||||
|
||||
av_image_fill_arrays(scaled_frame->data, scaled_frame->linesize, dst_buf, AV_PIX_FMT_YUV420P, dstW, dstH, 1);
|
||||
|
||||
const uint8_t *in_data[1] = {img->data};
|
||||
int in_line_size[1] = {3 * img->width};
|
||||
|
||||
sws_scale(sws_ctx,
|
||||
in_data, in_line_size,
|
||||
0, img->height,
|
||||
scaled_frame->data, scaled_frame->linesize
|
||||
);
|
||||
|
||||
scaled_frame->width = dstW;
|
||||
scaled_frame->height = dstH;
|
||||
scaled_frame->format = AV_PIX_FMT_YUV420P;
|
||||
|
||||
sws_freeContext(sws_ctx);
|
||||
|
||||
AVCodecContext *jpeg_encoder = alloc_jpeg_encoder(scaled_frame->width, scaled_frame->height, 1.0f);
|
||||
avcodec_send_frame(jpeg_encoder, scaled_frame);
|
||||
|
||||
AVPacket jpeg_packet;
|
||||
av_init_packet(&jpeg_packet);
|
||||
avcodec_receive_packet(jpeg_encoder, &jpeg_packet);
|
||||
|
||||
APPEND_TN_META(doc, scaled_frame->width, scaled_frame->height)
|
||||
ctx->store((char *) doc->path_md5, sizeof(doc->path_md5), (char *) jpeg_packet.data, jpeg_packet.size);
|
||||
|
||||
av_packet_unref(&jpeg_packet);
|
||||
av_free(*scaled_frame->data);
|
||||
av_frame_free(&scaled_frame);
|
||||
avcodec_free_context(&jpeg_encoder);
|
||||
|
||||
return TRUE;
|
||||
}
|
||||
|
||||
#define DMS_REF(ref) (((ref) == 'S' || (ref) == 'W') ? -1 : 1)
|
||||
|
||||
void parse_raw(scan_raw_ctx_t *ctx, vfile_t *f, document_t *doc) {
|
||||
libraw_data_t *libraw_lib = libraw_init(0);
|
||||
|
||||
if (!libraw_lib) {
|
||||
CTX_LOG_ERROR("raw.c", "Cannot create libraw handle")
|
||||
return;
|
||||
}
|
||||
|
||||
size_t buf_len = 0;
|
||||
void *buf = read_all(f, &buf_len);
|
||||
if (buf == NULL) {
|
||||
CTX_LOG_ERROR(f->filepath, "read_all() failed")
|
||||
return;
|
||||
}
|
||||
|
||||
int ret = libraw_open_buffer(libraw_lib, buf, buf_len);
|
||||
if (ret != 0) {
|
||||
CTX_LOG_ERROR(f->filepath, "Could not open raw file")
|
||||
free(buf);
|
||||
libraw_close(libraw_lib);
|
||||
return;
|
||||
}
|
||||
|
||||
if (*libraw_lib->idata.model != '\0') {
|
||||
APPEND_STR_META(doc, MetaExifModel, libraw_lib->idata.model)
|
||||
}
|
||||
if (*libraw_lib->idata.make != '\0') {
|
||||
APPEND_STR_META(doc, MetaExifMake, libraw_lib->idata.make)
|
||||
}
|
||||
if (*libraw_lib->idata.software != '\0') {
|
||||
APPEND_STR_META(doc, MetaExifSoftware, libraw_lib->idata.software)
|
||||
}
|
||||
APPEND_LONG_META(doc, MetaWidth, libraw_lib->sizes.width)
|
||||
APPEND_LONG_META(doc, MetaHeight, libraw_lib->sizes.height)
|
||||
char tmp[1024];
|
||||
snprintf(tmp, sizeof(tmp), "%g", libraw_lib->other.iso_speed);
|
||||
APPEND_STR_META(doc, MetaExifIsoSpeedRatings, tmp)
|
||||
|
||||
if (*libraw_lib->other.desc != '\0') {
|
||||
APPEND_STR_META(doc, MetaContent, libraw_lib->other.desc)
|
||||
}
|
||||
if (*libraw_lib->other.artist != '\0') {
|
||||
APPEND_STR_META(doc, MetaArtist, libraw_lib->other.artist)
|
||||
}
|
||||
|
||||
struct tm *time = localtime(&libraw_lib->other.timestamp);
|
||||
strftime(tmp, sizeof(tmp), "%Y:%m:%d %H:%M:%S", time);
|
||||
APPEND_STR_META(doc, MetaExifDateTime, tmp)
|
||||
|
||||
snprintf(tmp, sizeof(tmp), "%.1f", libraw_lib->other.focal_len);
|
||||
APPEND_STR_META(doc, MetaExifFocalLength, tmp)
|
||||
|
||||
snprintf(tmp, sizeof(tmp), "%.1f", libraw_lib->other.aperture);
|
||||
APPEND_STR_META(doc, MetaExifFNumber, tmp)
|
||||
|
||||
int denominator = (int) roundf(1 / libraw_lib->other.shutter);
|
||||
snprintf(tmp, sizeof(tmp), "1/%d", denominator);
|
||||
APPEND_STR_META(doc, MetaExifExposureTime, tmp)
|
||||
|
||||
libraw_gps_info_t gps = libraw_lib->other.parsed_gps;
|
||||
double gps_longitude_dec =
|
||||
(gps.longitude[0] + gps.longitude[1] / 60 + gps.longitude[2] / 3600) * DMS_REF(gps.longref);
|
||||
snprintf(tmp, sizeof(tmp), "%.15f", gps_longitude_dec);
|
||||
if (gps_longitude_dec != 0.0) {
|
||||
APPEND_STR_META(doc, MetaExifGpsLongitudeDec, tmp)
|
||||
}
|
||||
|
||||
double gps_latitude_dec = (gps.latitude[0] + gps.latitude[1] / 60 + gps.latitude[2] / 3600) * DMS_REF(gps.latref);
|
||||
snprintf(tmp, sizeof(tmp), "%.15f", gps_latitude_dec);
|
||||
if (gps_latitude_dec != 0.0) {
|
||||
APPEND_STR_META(doc, MetaExifGpsLatitudeDec, tmp)
|
||||
}
|
||||
|
||||
APPEND_STR_META(doc, MetaMediaVideoCodec, "raw")
|
||||
|
||||
if (ctx->tn_size <= 0) {
|
||||
free(buf);
|
||||
libraw_close(libraw_lib);
|
||||
return;
|
||||
}
|
||||
|
||||
int unpack_ret = libraw_unpack_thumb(libraw_lib);
|
||||
if (unpack_ret != 0) {
|
||||
CTX_LOG_ERRORF(f->filepath, "libraw_unpack_thumb returned error code %d", unpack_ret)
|
||||
free(buf);
|
||||
libraw_close(libraw_lib);
|
||||
return;
|
||||
}
|
||||
|
||||
int errc = 0;
|
||||
libraw_processed_image_t *thumb = libraw_dcraw_make_mem_thumb(libraw_lib, &errc);
|
||||
if (errc != 0) {
|
||||
free(buf);
|
||||
libraw_dcraw_clear_mem(thumb);
|
||||
libraw_close(libraw_lib);
|
||||
return;
|
||||
}
|
||||
|
||||
int tn_ok = 0;
|
||||
if (libraw_lib->thumbnail.tformat == LIBRAW_THUMBNAIL_JPEG) {
|
||||
tn_ok = store_thumbnail_jpeg(ctx, thumb, doc);
|
||||
} else if (libraw_lib->thumbnail.tformat == LIBRAW_THUMBNAIL_BITMAP) {
|
||||
// TODO: technically this should work but is currently untested
|
||||
tn_ok = store_thumbnail_rgb24(ctx, thumb, doc);
|
||||
}
|
||||
|
||||
libraw_dcraw_clear_mem(thumb);
|
||||
|
||||
if (tn_ok == TRUE) {
|
||||
free(buf);
|
||||
libraw_close(libraw_lib);
|
||||
return;
|
||||
}
|
||||
|
||||
ret = libraw_unpack(libraw_lib);
|
||||
if (ret != 0) {
|
||||
CTX_LOG_ERROR(f->filepath, "Could not unpack raw file")
|
||||
free(buf);
|
||||
libraw_close(libraw_lib);
|
||||
return;
|
||||
}
|
||||
|
||||
libraw_dcraw_process(libraw_lib);
|
||||
|
||||
errc = 0;
|
||||
libraw_processed_image_t *img = libraw_dcraw_make_mem_image(libraw_lib, &errc);
|
||||
if (errc != 0) {
|
||||
free(buf);
|
||||
libraw_dcraw_clear_mem(img);
|
||||
libraw_close(libraw_lib);
|
||||
return;
|
||||
}
|
||||
|
||||
store_thumbnail_rgb24(ctx, img, doc);
|
||||
|
||||
libraw_dcraw_clear_mem(img);
|
||||
libraw_close(libraw_lib);
|
||||
|
||||
free(buf);
|
||||
}
|
||||
17
third-party/libscan/libscan/raw/raw.h
vendored
Normal file
17
third-party/libscan/libscan/raw/raw.h
vendored
Normal file
@@ -0,0 +1,17 @@
|
||||
#ifndef SIST2_RAW_H
|
||||
#define SIST2_RAW_H
|
||||
|
||||
#include "../scan.h"
|
||||
|
||||
typedef struct {
|
||||
log_callback_t log;
|
||||
logf_callback_t logf;
|
||||
store_callback_t store;
|
||||
|
||||
int tn_size;
|
||||
float tn_qscale;
|
||||
} scan_raw_ctx_t;
|
||||
|
||||
void parse_raw(scan_raw_ctx_t *ctx, vfile_t *f, document_t *doc);
|
||||
|
||||
#endif //SIST2_RAW_H
|
||||
170
third-party/libscan/libscan/scan.h
vendored
Normal file
170
third-party/libscan/libscan/scan.h
vendored
Normal file
@@ -0,0 +1,170 @@
|
||||
#ifndef SCAN_SCAN_H
|
||||
#define SCAN_SCAN_H
|
||||
|
||||
#ifndef _GNU_SOURCE
|
||||
#define _GNU_SOURCE
|
||||
#endif
|
||||
|
||||
#include <stdio.h>
|
||||
#include <sys/stat.h>
|
||||
#include <openssl/md5.h>
|
||||
#include <openssl/sha.h>
|
||||
|
||||
#include "macros.h"
|
||||
|
||||
#define SIST_SWS_ALGO SWS_LANCZOS
|
||||
|
||||
#define UNUSED(x) __attribute__((__unused__)) x
|
||||
|
||||
typedef void (*store_callback_t)(char *key, size_t key_len, char *buf, size_t buf_len);
|
||||
|
||||
typedef void (*logf_callback_t)(const char *filepath, int level, char *format, ...);
|
||||
|
||||
typedef void (*log_callback_t)(const char *filepath, int level, char *str);
|
||||
|
||||
typedef int scan_code_t;
|
||||
#define SCAN_OK (scan_code_t) 0
|
||||
#define SCAN_ERR_READ (scan_code_t) (-1)
|
||||
#define SCAN_ERR_SKIP (scan_code_t) (-2)
|
||||
|
||||
#define LEVEL_DEBUG 0
|
||||
#define LEVEL_INFO 1
|
||||
#define LEVEL_WARNING 2
|
||||
#define LEVEL_ERROR 3
|
||||
#define LEVEL_FATAL 4
|
||||
|
||||
#define CTX_LOG_DEBUGF(filepath, fmt, ...) ctx->logf(filepath, LEVEL_DEBUG, fmt, __VA_ARGS__);
|
||||
#define CTX_LOG_DEBUG(filepath, str) ctx->log(filepath, LEVEL_DEBUG, str);
|
||||
|
||||
#define CTX_LOG_INFOF(filepath, fmt, ...) ctx->logf(filepath, LEVEL_INFO, fmt, __VA_ARGS__);
|
||||
#define CTX_LOG_INFO(filepath, str) ctx->log(filepath, LEVEL_INFO, str);
|
||||
|
||||
#define CTX_LOG_WARNINGF(filepath, fmt, ...) ctx->logf(filepath, LEVEL_WARNING, fmt, __VA_ARGS__);
|
||||
#define CTX_LOG_WARNING(filepath, str) ctx->log(filepath, LEVEL_WARNING, str);
|
||||
|
||||
#define CTX_LOG_ERRORF(filepath, fmt, ...) ctx->logf(filepath, LEVEL_ERROR, fmt, __VA_ARGS__);
|
||||
#define CTX_LOG_ERROR(filepath, str) ctx->log(filepath, LEVEL_ERROR, str);
|
||||
|
||||
#define CTX_LOG_FATALF(filepath, fmt, ...) ctx->logf(filepath, LEVEL_FATAL, fmt, __VA_ARGS__); exit(-1);
|
||||
#define CTX_LOG_FATAL(filepath, str) ctx->log(filepath, LEVEL_FATAL, str); exit(-1);
|
||||
|
||||
enum metakey {
|
||||
// String
|
||||
MetaContent = 1,
|
||||
MetaMediaAudioCodec,
|
||||
MetaMediaVideoCodec,
|
||||
MetaArtist,
|
||||
MetaAlbum,
|
||||
MetaAlbumArtist,
|
||||
MetaGenre,
|
||||
MetaTitle,
|
||||
MetaFontName,
|
||||
MetaParent,
|
||||
MetaExifMake,
|
||||
MetaExifSoftware,
|
||||
MetaExifExposureTime,
|
||||
MetaExifFNumber,
|
||||
MetaExifFocalLength,
|
||||
MetaExifUserComment,
|
||||
MetaExifModel,
|
||||
MetaExifIsoSpeedRatings,
|
||||
MetaExifDateTime,
|
||||
MetaAuthor,
|
||||
MetaModifiedBy,
|
||||
MetaThumbnail,
|
||||
MetaChecksum,
|
||||
|
||||
// Number
|
||||
MetaWidth,
|
||||
MetaHeight,
|
||||
MetaMediaDuration,
|
||||
MetaMediaBitrate,
|
||||
MetaPages,
|
||||
|
||||
// ??
|
||||
MetaExifGpsLongitudeDMS,
|
||||
MetaExifGpsLongitudeRef,
|
||||
MetaExifGpsLatitudeDMS,
|
||||
MetaExifGpsLatitudeRef,
|
||||
MetaExifGpsLatitudeDec,
|
||||
MetaExifGpsLongitudeDec,
|
||||
};
|
||||
|
||||
typedef struct meta_line {
|
||||
struct meta_line *next;
|
||||
enum metakey key;
|
||||
union {
|
||||
char str_val[0];
|
||||
unsigned long long_val;
|
||||
double double_val;
|
||||
};
|
||||
} meta_line_t;
|
||||
|
||||
|
||||
typedef struct document {
|
||||
unsigned char path_md5[MD5_DIGEST_LENGTH];
|
||||
unsigned long size;
|
||||
unsigned int mime;
|
||||
int mtime;
|
||||
short base;
|
||||
short ext;
|
||||
char has_parent;
|
||||
meta_line_t *meta_head;
|
||||
meta_line_t *meta_tail;
|
||||
char *filepath;
|
||||
} document_t;
|
||||
|
||||
typedef struct vfile vfile_t;
|
||||
|
||||
__attribute__((warn_unused_result))
|
||||
typedef int (*read_func_t)(struct vfile *, void *buf, size_t size);
|
||||
|
||||
__attribute__((warn_unused_result))
|
||||
typedef long (*seek_func_t)(struct vfile *, long offset, int whence);
|
||||
|
||||
typedef void (*close_func_t)(struct vfile *);
|
||||
|
||||
typedef void (*reset_func_t)(struct vfile *);
|
||||
|
||||
typedef struct vfile {
|
||||
union {
|
||||
int fd;
|
||||
struct archive *arc;
|
||||
const void *_test_data;
|
||||
};
|
||||
|
||||
int is_fs_file;
|
||||
int has_checksum;
|
||||
int calculate_checksum;
|
||||
const char *filepath;
|
||||
struct stat info;
|
||||
|
||||
SHA_CTX sha1_ctx;
|
||||
unsigned char sha1_digest[SHA1_DIGEST_LENGTH];
|
||||
|
||||
void *rewind_buffer;
|
||||
int rewind_buffer_size;
|
||||
int rewind_buffer_cursor;
|
||||
|
||||
read_func_t read;
|
||||
read_func_t read_rewindable;
|
||||
close_func_t close;
|
||||
reset_func_t reset;
|
||||
log_callback_t log;
|
||||
logf_callback_t logf;
|
||||
} vfile_t;
|
||||
|
||||
typedef struct parse_job_t {
|
||||
int base;
|
||||
int ext;
|
||||
struct vfile vfile;
|
||||
unsigned char parent[MD5_DIGEST_LENGTH];
|
||||
char filepath[1];
|
||||
} parse_job_t;
|
||||
|
||||
|
||||
#include "util.h"
|
||||
|
||||
typedef void (*parse_callback_t)(parse_job_t *job);
|
||||
|
||||
#endif
|
||||
64
third-party/libscan/libscan/text/text.c
vendored
Normal file
64
third-party/libscan/libscan/text/text.c
vendored
Normal file
@@ -0,0 +1,64 @@
|
||||
#include "text.h"
|
||||
|
||||
scan_code_t parse_text(scan_text_ctx_t *ctx, vfile_t *f, document_t *doc) {
|
||||
|
||||
int to_read = MIN(ctx->content_size, f->info.st_size);
|
||||
|
||||
if (to_read <= 2) {
|
||||
return SCAN_OK;
|
||||
}
|
||||
|
||||
char *buf = malloc(to_read);
|
||||
int ret = f->read(f, buf, to_read);
|
||||
if (ret < 0) {
|
||||
CTX_LOG_ERRORF(doc->filepath, "read() returned error code: [%d]", ret)
|
||||
free(buf);
|
||||
return SCAN_ERR_READ;
|
||||
}
|
||||
|
||||
text_buffer_t tex = text_buffer_create(ctx->content_size);
|
||||
|
||||
if ((*(int16_t*)buf) == (int16_t)0xFFFE) {
|
||||
text_buffer_append_string16_le(&tex, buf + 2, to_read - 2);
|
||||
} else if((*(int16_t*)buf) == (int16_t)0xFEFF) {
|
||||
text_buffer_append_string16_be(&tex, buf + 2, to_read - 2);
|
||||
} else {
|
||||
text_buffer_append_string(&tex, buf, to_read);
|
||||
}
|
||||
text_buffer_terminate_string(&tex);
|
||||
|
||||
APPEND_STR_META(doc, MetaContent, tex.dyn_buffer.buf);
|
||||
|
||||
free(buf);
|
||||
text_buffer_destroy(&tex);
|
||||
|
||||
return SCAN_OK;
|
||||
}
|
||||
|
||||
#define MAX_MARKUP_SIZE (1024 * 1024)
|
||||
|
||||
scan_code_t parse_markup(scan_text_ctx_t *ctx, vfile_t *f, document_t *doc) {
|
||||
|
||||
int to_read = MIN(MAX_MARKUP_SIZE, f->info.st_size);
|
||||
|
||||
char *buf = malloc(to_read + 1);
|
||||
int ret = f->read(f, buf, to_read);
|
||||
if (ret < 0) {
|
||||
CTX_LOG_ERRORF(doc->filepath, "read() returned error code: [%d]", ret)
|
||||
free(buf);
|
||||
return SCAN_ERR_READ;
|
||||
}
|
||||
|
||||
*(buf + to_read) = '\0';
|
||||
|
||||
text_buffer_t tex = text_buffer_create(ctx->content_size);
|
||||
text_buffer_append_markup(&tex, buf);
|
||||
text_buffer_terminate_string(&tex);
|
||||
|
||||
APPEND_STR_META(doc, MetaContent, tex.dyn_buffer.buf);
|
||||
|
||||
free(buf);
|
||||
text_buffer_destroy(&tex);
|
||||
|
||||
return SCAN_OK;
|
||||
}
|
||||
18
third-party/libscan/libscan/text/text.h
vendored
Normal file
18
third-party/libscan/libscan/text/text.h
vendored
Normal file
@@ -0,0 +1,18 @@
|
||||
#ifndef SCAN_TEXT_H
|
||||
#define SCAN_TEXT_H
|
||||
|
||||
#include "../scan.h"
|
||||
#include "../util.h"
|
||||
|
||||
typedef struct {
|
||||
long content_size;
|
||||
|
||||
log_callback_t log;
|
||||
logf_callback_t logf;
|
||||
} scan_text_ctx_t;
|
||||
|
||||
scan_code_t parse_text(scan_text_ctx_t *ctx, vfile_t *f, document_t *doc);
|
||||
|
||||
scan_code_t parse_markup(scan_text_ctx_t *ctx, vfile_t *f, document_t *doc);
|
||||
|
||||
#endif
|
||||
0
third-party/libscan/libscan/util.c
vendored
Normal file
0
third-party/libscan/libscan/util.c
vendored
Normal file
361
third-party/libscan/libscan/util.h
vendored
Normal file
361
third-party/libscan/libscan/util.h
vendored
Normal file
@@ -0,0 +1,361 @@
|
||||
#ifndef SCAN_UTIL_H
|
||||
#define SCAN_UTIL_H
|
||||
|
||||
#include "stdio.h"
|
||||
#include "stdlib.h"
|
||||
#include "string.h"
|
||||
#include "../third-party/utf8.h/utf8.h"
|
||||
#include "macros.h"
|
||||
|
||||
#define STR_STARTS_WITH_CONSTANT(x, y) (strncmp(y, x, sizeof(y) - 1) == 0)
|
||||
|
||||
#define TEXT_BUF_FULL (-1)
|
||||
#define INITIAL_BUF_SIZE (1024 * 16)
|
||||
|
||||
#define SHOULD_IGNORE_CHAR(c) !(SHOULD_KEEP_CHAR(c))
|
||||
#define SHOULD_KEEP_CHAR(c) (\
|
||||
((c) >= '\'' && (c) <= ';') || \
|
||||
((c) >= 'A' && (c) <= 'z') || \
|
||||
((c) > 127 && (c) != 0x00A0 && (c) && (c) != 0xFFFD))
|
||||
|
||||
|
||||
typedef struct dyn_buffer {
|
||||
char *buf;
|
||||
size_t cur;
|
||||
size_t size;
|
||||
} dyn_buffer_t;
|
||||
|
||||
typedef struct text_buffer {
|
||||
long max_size;
|
||||
int last_char_was_whitespace;
|
||||
dyn_buffer_t dyn_buffer;
|
||||
} text_buffer_t;
|
||||
|
||||
static int utf8_validchr2(const char *s) {
|
||||
if (0x00 == (0x80 & *s)) {
|
||||
return TRUE;
|
||||
} else if (0xf0 == (0xf8 & *s)) {
|
||||
if ((0x80 != (0xc0 & s[1])) || (0x80 != (0xc0 & s[2])) ||
|
||||
(0x80 != (0xc0 & s[3]))) {
|
||||
return FALSE;
|
||||
}
|
||||
|
||||
if (0x80 == (0xc0 & s[4])) {
|
||||
return FALSE;
|
||||
}
|
||||
|
||||
if ((0 == (0x07 & s[0])) && (0 == (0x30 & s[1]))) {
|
||||
return FALSE;
|
||||
}
|
||||
} else if (0xe0 == (0xf0 & *s)) {
|
||||
if ((0x80 != (0xc0 & s[1])) || (0x80 != (0xc0 & s[2]))) {
|
||||
return FALSE;
|
||||
}
|
||||
|
||||
if (0x80 == (0xc0 & s[3])) {
|
||||
return FALSE;
|
||||
}
|
||||
|
||||
if ((0 == (0x0f & s[0])) && (0 == (0x20 & s[1]))) {
|
||||
return FALSE;
|
||||
}
|
||||
} else if (0xc0 == (0xe0 & *s)) {
|
||||
if (0x80 != (0xc0 & s[1])) {
|
||||
return FALSE;
|
||||
}
|
||||
|
||||
if (0x80 == (0xc0 & s[2])) {
|
||||
return FALSE;
|
||||
}
|
||||
|
||||
if (0 == (0x1e & s[0])) {
|
||||
return FALSE;
|
||||
}
|
||||
} else {
|
||||
return FALSE;
|
||||
}
|
||||
|
||||
return TRUE;
|
||||
}
|
||||
|
||||
|
||||
static dyn_buffer_t dyn_buffer_create() {
|
||||
dyn_buffer_t buf;
|
||||
|
||||
buf.size = INITIAL_BUF_SIZE;
|
||||
buf.cur = 0;
|
||||
buf.buf = (char *) malloc(INITIAL_BUF_SIZE);
|
||||
|
||||
return buf;
|
||||
}
|
||||
|
||||
static void grow_buffer(dyn_buffer_t *buf, size_t size) {
|
||||
if (buf->cur + size > buf->size) {
|
||||
do {
|
||||
buf->size *= 2;
|
||||
} while (buf->cur + size > buf->size);
|
||||
|
||||
buf->buf = (char *) realloc(buf->buf, buf->size);
|
||||
}
|
||||
}
|
||||
|
||||
static void grow_buffer_small(dyn_buffer_t *buf) {
|
||||
if (buf->cur + sizeof(long) > buf->size) {
|
||||
buf->size *= 2;
|
||||
buf->buf = (char *) realloc(buf->buf, buf->size);
|
||||
}
|
||||
}
|
||||
|
||||
static void dyn_buffer_write(dyn_buffer_t *buf, const void *data, size_t size) {
|
||||
grow_buffer(buf, size);
|
||||
|
||||
memcpy(buf->buf + buf->cur, data, size);
|
||||
buf->cur += size;
|
||||
}
|
||||
|
||||
static void dyn_buffer_write_char(dyn_buffer_t *buf, char c) {
|
||||
grow_buffer_small(buf);
|
||||
|
||||
*(buf->buf + buf->cur) = c;
|
||||
buf->cur += sizeof(c);
|
||||
}
|
||||
|
||||
static void dyn_buffer_write_str(dyn_buffer_t *buf, const char *str) {
|
||||
dyn_buffer_write(buf, str, strlen(str));
|
||||
dyn_buffer_write_char(buf, '\0');
|
||||
}
|
||||
|
||||
static void dyn_buffer_append_string(dyn_buffer_t *buf, const char *str) {
|
||||
dyn_buffer_write(buf, str, strlen(str));
|
||||
}
|
||||
|
||||
static void dyn_buffer_write_int(dyn_buffer_t *buf, int d) {
|
||||
grow_buffer_small(buf);
|
||||
|
||||
*(int *) (buf->buf + buf->cur) = d;
|
||||
buf->cur += sizeof(int);
|
||||
}
|
||||
|
||||
static void dyn_buffer_write_short(dyn_buffer_t *buf, uint16_t s) {
|
||||
grow_buffer_small(buf);
|
||||
|
||||
*(uint16_t *) (buf->buf + buf->cur) = s;
|
||||
buf->cur += sizeof(uint16_t);
|
||||
}
|
||||
|
||||
static void dyn_buffer_write_long(dyn_buffer_t *buf, unsigned long l) {
|
||||
grow_buffer_small(buf);
|
||||
|
||||
*(unsigned long *) (buf->buf + buf->cur) = l;
|
||||
buf->cur += sizeof(unsigned long);
|
||||
}
|
||||
|
||||
static void dyn_buffer_destroy(dyn_buffer_t *buf) {
|
||||
free(buf->buf);
|
||||
}
|
||||
|
||||
static void text_buffer_destroy(text_buffer_t *buf) {
|
||||
dyn_buffer_destroy(&buf->dyn_buffer);
|
||||
}
|
||||
|
||||
static text_buffer_t text_buffer_create(long max_size) {
|
||||
text_buffer_t text_buf;
|
||||
|
||||
text_buf.dyn_buffer = dyn_buffer_create();
|
||||
text_buf.max_size = max_size;
|
||||
text_buf.last_char_was_whitespace = FALSE;
|
||||
|
||||
return text_buf;
|
||||
}
|
||||
|
||||
static int text_buffer_append_char(text_buffer_t *buf, int c) {
|
||||
|
||||
if (SHOULD_IGNORE_CHAR(c) || c == ' ') {
|
||||
if (!buf->last_char_was_whitespace && buf->dyn_buffer.cur != 0) {
|
||||
dyn_buffer_write_char(&buf->dyn_buffer, ' ');
|
||||
buf->last_char_was_whitespace = TRUE;
|
||||
|
||||
if (buf->max_size > 0 && buf->dyn_buffer.cur > buf->max_size) {
|
||||
return TEXT_BUF_FULL;
|
||||
}
|
||||
}
|
||||
} else {
|
||||
buf->last_char_was_whitespace = FALSE;
|
||||
grow_buffer_small(&buf->dyn_buffer);
|
||||
|
||||
if (((utf8_int32_t) 0xffffff80 & c) == 0) {
|
||||
*(buf->dyn_buffer.buf + buf->dyn_buffer.cur++) = (char) c;
|
||||
} else if (((utf8_int32_t) 0xfffff800 & c) == 0) {
|
||||
*(buf->dyn_buffer.buf + buf->dyn_buffer.cur++) = 0xc0 | (char) (c >> 6);
|
||||
*(buf->dyn_buffer.buf + buf->dyn_buffer.cur++) = 0x80 | (char) (c & 0x3f);
|
||||
} else if (((utf8_int32_t) 0xffff0000 & c) == 0) {
|
||||
*(buf->dyn_buffer.buf + buf->dyn_buffer.cur++) = 0xe0 | (char) (c >> 12);
|
||||
*(buf->dyn_buffer.buf + buf->dyn_buffer.cur++) = 0x80 | (char) ((c >> 6) & 0x3f);
|
||||
*(buf->dyn_buffer.buf + buf->dyn_buffer.cur++) = 0x80 | (char) (c & 0x3f);
|
||||
} else {
|
||||
*(buf->dyn_buffer.buf + buf->dyn_buffer.cur++) = 0xf0 | (char) (c >> 18);
|
||||
*(buf->dyn_buffer.buf + buf->dyn_buffer.cur++) = 0x80 | (char) ((c >> 12) & 0x3f);
|
||||
*(buf->dyn_buffer.buf + buf->dyn_buffer.cur++) = 0x80 | (char) ((c >> 6) & 0x3f);
|
||||
*(buf->dyn_buffer.buf + buf->dyn_buffer.cur++) = 0x80 | (char) (c & 0x3f);
|
||||
}
|
||||
|
||||
if (buf->max_size > 0 && buf->dyn_buffer.cur > buf->max_size) {
|
||||
return TEXT_BUF_FULL;
|
||||
}
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
||||
static void text_buffer_terminate_string(text_buffer_t *buf) {
|
||||
if (buf->dyn_buffer.cur > 0 && *(buf->dyn_buffer.buf + buf->dyn_buffer.cur - 1) == ' ') {
|
||||
*(buf->dyn_buffer.buf + buf->dyn_buffer.cur - 1) = '\0';
|
||||
} else {
|
||||
dyn_buffer_write_char(&buf->dyn_buffer, '\0');
|
||||
}
|
||||
}
|
||||
|
||||
// Naive UTF16 -> ascii conversion
|
||||
static int text_buffer_append_string16_le(text_buffer_t *buf, const char *str, size_t len) {
|
||||
int ret = 0;
|
||||
for (int i = 1; i < len; i += 2) {
|
||||
ret = text_buffer_append_char(buf, str[i]);
|
||||
}
|
||||
return ret;
|
||||
}
|
||||
|
||||
static int text_buffer_append_string16_be(text_buffer_t *buf, const char *str, size_t len) {
|
||||
int ret = 0;
|
||||
for (int i = 0; i < len; i += 2) {
|
||||
ret = text_buffer_append_char(buf, str[i]);
|
||||
}
|
||||
return ret;
|
||||
}
|
||||
|
||||
#define UTF8_END_OF_STRING \
|
||||
(ptr - str >= len || *ptr == 0 || \
|
||||
(0xc0 == (0xe0 & *ptr) && ptr - str > len - 2) || \
|
||||
(0xe0 == (0xf0 & *ptr) && ptr - str > len - 3) || \
|
||||
(0xf0 == (0xf8 & *ptr) && ptr - str > len - 4))
|
||||
|
||||
static int text_buffer_append_string(text_buffer_t *buf, const char *str, size_t len) {
|
||||
|
||||
const char *ptr = str;
|
||||
const char *oldPtr = ptr;
|
||||
|
||||
if (str == NULL || UTF8_END_OF_STRING) {
|
||||
return 0;
|
||||
}
|
||||
|
||||
if (len <= 4) {
|
||||
for (int i = 0; i < len; i++) {
|
||||
if (((utf8_int32_t) 0xffffff80 & str[i]) == 0 && SHOULD_KEEP_CHAR(str[i])) {
|
||||
dyn_buffer_write_char(&buf->dyn_buffer, str[i]);
|
||||
}
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
utf8_int32_t c;
|
||||
char tmp[16] = {0};
|
||||
|
||||
do {
|
||||
ptr = (char *) utf8codepoint(ptr, &c);
|
||||
*(int *) tmp = 0x00000000;
|
||||
memcpy(tmp, oldPtr, ptr - oldPtr);
|
||||
oldPtr = ptr;
|
||||
|
||||
if (!utf8_validchr2(tmp)) {
|
||||
continue;
|
||||
}
|
||||
|
||||
int ret = text_buffer_append_char(buf, c);
|
||||
|
||||
if (ret != 0) {
|
||||
return ret;
|
||||
}
|
||||
} while (!UTF8_END_OF_STRING);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int text_buffer_append_string0(text_buffer_t *buf, const char *str) {
|
||||
return text_buffer_append_string(buf, str, strlen(str));
|
||||
}
|
||||
|
||||
static int text_buffer_append_markup(text_buffer_t *buf, const char *markup) {
|
||||
|
||||
int tag_open = TRUE;
|
||||
const char *ptr = markup;
|
||||
const char *start = markup;
|
||||
|
||||
while (*ptr != '\0') {
|
||||
if (tag_open) {
|
||||
if (*ptr == '>') {
|
||||
tag_open = FALSE;
|
||||
start = ptr + 1;
|
||||
}
|
||||
} else {
|
||||
if (*ptr == '<') {
|
||||
tag_open = TRUE;
|
||||
if (ptr != start) {
|
||||
if (text_buffer_append_string(buf, start, (ptr - start)) == TEXT_BUF_FULL) {
|
||||
return TEXT_BUF_FULL;
|
||||
}
|
||||
if (text_buffer_append_char(buf, ' ') == TEXT_BUF_FULL) {
|
||||
return TEXT_BUF_FULL;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
ptr += 1;
|
||||
}
|
||||
|
||||
if (ptr != start) {
|
||||
if (text_buffer_append_string(buf, start, (ptr - start)) == TEXT_BUF_FULL) {
|
||||
return TEXT_BUF_FULL;
|
||||
}
|
||||
if (text_buffer_append_char(buf, ' ') == TEXT_BUF_FULL) {
|
||||
return TEXT_BUF_FULL;
|
||||
}
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
static void *read_all(vfile_t *f, size_t *size) {
|
||||
void *buf = malloc(f->info.st_size);
|
||||
*size = f->read(f, buf, f->info.st_size);
|
||||
|
||||
if (*size != f->info.st_size) {
|
||||
free(buf);
|
||||
return NULL;
|
||||
}
|
||||
|
||||
return buf;
|
||||
}
|
||||
|
||||
#define STACK_BUFFER_SIZE (size_t)(4096 * 8)
|
||||
|
||||
__always_inline
|
||||
static void safe_sha1_update(SHA_CTX *ctx, void *buf, size_t size) {
|
||||
unsigned char stack_buf[STACK_BUFFER_SIZE];
|
||||
|
||||
void *sha1_buf;
|
||||
if (size <= STACK_BUFFER_SIZE) {
|
||||
sha1_buf = stack_buf;
|
||||
} else {
|
||||
void *heap_sha1_buf = malloc(size);
|
||||
sha1_buf = heap_sha1_buf;
|
||||
}
|
||||
|
||||
memcpy(sha1_buf, buf, size);
|
||||
SHA1_Update(ctx, (const void *) sha1_buf, size);
|
||||
|
||||
if (sha1_buf != stack_buf) {
|
||||
free(sha1_buf);
|
||||
}
|
||||
}
|
||||
|
||||
#endif
|
||||
200
third-party/libscan/libscan/wpd/libwpd_c_api.cpp
vendored
Normal file
200
third-party/libscan/libscan/wpd/libwpd_c_api.cpp
vendored
Normal file
@@ -0,0 +1,200 @@
|
||||
#include "libwpd_c_api.h"
|
||||
#include "libwpd/libwpd.h"
|
||||
#include "libwpd/WPXProperty.h"
|
||||
#include "libwpd-stream/libwpd-stream.h"
|
||||
|
||||
class StringDocument : public WPXDocumentInterface {
|
||||
|
||||
private:
|
||||
text_buffer_t *tex;
|
||||
document_t *doc;
|
||||
bool is_full;
|
||||
public:
|
||||
|
||||
StringDocument(text_buffer_t *tex, document_t *doc) {
|
||||
this->tex = tex;
|
||||
this->doc = doc;
|
||||
this->is_full = false;
|
||||
}
|
||||
|
||||
void setDocumentMetaData(const WPXPropertyList &propList) override {
|
||||
|
||||
WPXPropertyList::Iter propIter(propList);
|
||||
for (propIter.rewind(); propIter.next();) {
|
||||
// TODO: Read metadata here ?!
|
||||
}
|
||||
}
|
||||
|
||||
void endDocument() override {
|
||||
text_buffer_terminate_string(this->tex);
|
||||
}
|
||||
|
||||
void closeParagraph() override {
|
||||
if (!this->is_full) {
|
||||
if (text_buffer_append_char(tex, ' ') == TEXT_BUF_FULL) {
|
||||
this->is_full = true;
|
||||
};
|
||||
}
|
||||
}
|
||||
|
||||
void closeSpan() override {
|
||||
if (!this->is_full) {
|
||||
if (text_buffer_append_char(tex, ' ') == TEXT_BUF_FULL) {
|
||||
this->is_full = true;
|
||||
};
|
||||
}
|
||||
}
|
||||
|
||||
void closeSection() override {
|
||||
if (!this->is_full) {
|
||||
if (text_buffer_append_char(tex, ' ') == TEXT_BUF_FULL) {
|
||||
this->is_full = true;
|
||||
};
|
||||
}
|
||||
}
|
||||
|
||||
void insertTab() override {
|
||||
if (!this->is_full) {
|
||||
if (text_buffer_append_char(tex, ' ') == TEXT_BUF_FULL) {
|
||||
this->is_full = true;
|
||||
};
|
||||
}
|
||||
}
|
||||
|
||||
void insertSpace() override {
|
||||
if (!this->is_full) {
|
||||
if (text_buffer_append_char(tex, ' ') == TEXT_BUF_FULL) {
|
||||
this->is_full = true;
|
||||
};
|
||||
}
|
||||
}
|
||||
|
||||
void insertText(const WPXString &text) override {
|
||||
if (!this->is_full) {
|
||||
if (text_buffer_append_string0(tex, text.cstr()) == TEXT_BUF_FULL) {
|
||||
this->is_full = true;
|
||||
};
|
||||
}
|
||||
}
|
||||
|
||||
void insertLineBreak() override {
|
||||
if (!this->is_full) {
|
||||
if (text_buffer_append_char(tex, ' ') == TEXT_BUF_FULL) {
|
||||
this->is_full = true;
|
||||
};
|
||||
}
|
||||
}
|
||||
|
||||
void definePageStyle(const WPXPropertyList &propList) override { /* noop */ }
|
||||
|
||||
void closePageSpan() override { /* noop */ }
|
||||
|
||||
void openHeader(const WPXPropertyList &propList) override { /* noop */ }
|
||||
|
||||
void closeHeader() override { /* noop */ }
|
||||
|
||||
void openFooter(const WPXPropertyList &propList) override { /* noop */ }
|
||||
|
||||
void closeFooter() override { /* noop */ }
|
||||
|
||||
void
|
||||
defineParagraphStyle(const WPXPropertyList &propList, const WPXPropertyListVector &tabStops) override { /* noop */ }
|
||||
|
||||
void openParagraph(const WPXPropertyList &propList, const WPXPropertyListVector &tabStops) override { /* noop */ }
|
||||
|
||||
void defineCharacterStyle(const WPXPropertyList &propList) override { /* noop */ }
|
||||
|
||||
void openSpan(const WPXPropertyList &propList) override { /* noop */ }
|
||||
|
||||
void
|
||||
defineSectionStyle(const WPXPropertyList &propList, const WPXPropertyListVector &columns) override { /* noop */ }
|
||||
|
||||
void openSection(const WPXPropertyList &propList, const WPXPropertyListVector &columns) override { /* noop */ }
|
||||
|
||||
void insertField(const WPXString &type, const WPXPropertyList &propList) override { /* noop */ }
|
||||
|
||||
void defineOrderedListLevel(const WPXPropertyList &propList) override { /* noop */ }
|
||||
|
||||
void defineUnorderedListLevel(const WPXPropertyList &propList) override { /* noop */ }
|
||||
|
||||
void openOrderedListLevel(const WPXPropertyList &propList) override { /* noop */ }
|
||||
|
||||
void openUnorderedListLevel(const WPXPropertyList &propList) override { /* noop */ }
|
||||
|
||||
void closeOrderedListLevel() override { /* noop */ }
|
||||
|
||||
void closeUnorderedListLevel() override { /* noop */ }
|
||||
|
||||
void openListElement(const WPXPropertyList &propList, const WPXPropertyListVector &tabStops) override { /* noop */ }
|
||||
|
||||
void closeListElement() override { /* noop */ }
|
||||
|
||||
void openFootnote(const WPXPropertyList &propList) override { /* noop */ }
|
||||
|
||||
void closeFootnote() override { /* noop */ }
|
||||
|
||||
void openEndnote(const WPXPropertyList &propList) override { /* noop */ }
|
||||
|
||||
void closeEndnote() override { /* noop */ }
|
||||
|
||||
void openComment(const WPXPropertyList &propList) override { /* noop */ }
|
||||
|
||||
void closeComment() override { /* noop */ }
|
||||
|
||||
void openTextBox(const WPXPropertyList &propList) override { /* noop */ }
|
||||
|
||||
void closeTextBox() override { /* noop */ }
|
||||
|
||||
void openTable(const WPXPropertyList &propList, const WPXPropertyListVector &columns) override { /* noop */ }
|
||||
|
||||
void openTableRow(const WPXPropertyList &propList) override { /* noop */ }
|
||||
|
||||
void closeTableRow() override { /* noop */ }
|
||||
|
||||
void openTableCell(const WPXPropertyList &propList) override { /* noop */ }
|
||||
|
||||
void closeTableCell() override { /* noop */ }
|
||||
|
||||
void insertCoveredTableCell(const WPXPropertyList &propList) override { /* noop */ }
|
||||
|
||||
void closeTable() override { /* noop */ }
|
||||
|
||||
void openFrame(const WPXPropertyList &propList) override { /* noop */ }
|
||||
|
||||
void closeFrame() override { /* noop */ }
|
||||
|
||||
void insertBinaryObject(const WPXPropertyList &propList, const WPXBinaryData &data) override { /* noop */ }
|
||||
|
||||
void insertEquation(const WPXPropertyList &propList, const WPXString &data) override { /* noop */ }
|
||||
|
||||
void openPageSpan(const WPXPropertyList &propList) override { /* noop */ }
|
||||
|
||||
void startDocument() override { /* noop */ };
|
||||
};
|
||||
|
||||
|
||||
wpd_stream_t wpd_memory_stream_create(const unsigned char *buf, size_t buf_len) {
|
||||
auto *input = new WPXStringStream(buf, buf_len);
|
||||
return input;
|
||||
}
|
||||
|
||||
wpd_confidence_t wpd_is_file_format_supported(wpd_stream_t ptr) {
|
||||
auto *stream = (WPXStringStream *) ptr;
|
||||
WPDConfidence confidence = WPDocument::isFileFormatSupported(stream);
|
||||
|
||||
return (wpd_confidence_t) confidence;
|
||||
}
|
||||
|
||||
wpd_result_t wpd_parse(wpd_stream_t ptr, text_buffer_t *tex, document_t *doc) {
|
||||
auto *stream = (WPXStringStream *) ptr;
|
||||
|
||||
auto myDoc = StringDocument(tex, doc);
|
||||
WPDResult result2 = WPDocument::parse(stream, &myDoc, nullptr);
|
||||
|
||||
return (wpd_result_t) result2;
|
||||
}
|
||||
|
||||
void wpd_memory_stream_destroy(wpd_stream_t ptr) {
|
||||
auto *stream = (WPXStringStream *) ptr;
|
||||
delete stream;
|
||||
}
|
||||
50
third-party/libscan/libscan/wpd/libwpd_c_api.h
vendored
Normal file
50
third-party/libscan/libscan/wpd/libwpd_c_api.h
vendored
Normal file
@@ -0,0 +1,50 @@
|
||||
#ifndef SIST2_LIBWPD_C_API_H
|
||||
#define SIST2_LIBWPD_C_API_H
|
||||
|
||||
#include "stdlib.h"
|
||||
|
||||
#ifdef __cplusplus
|
||||
#define EXTERNC extern "C"
|
||||
#else
|
||||
#define EXTERNC
|
||||
#endif
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
#endif
|
||||
#include "../scan.h"
|
||||
#include "../util.h"
|
||||
#ifdef __cplusplus
|
||||
};
|
||||
#endif
|
||||
|
||||
|
||||
typedef void *wpd_stream_t;
|
||||
|
||||
typedef enum {
|
||||
C_WPD_CONFIDENCE_NONE = 0,
|
||||
C_WPD_CONFIDENCE_UNSUPPORTED_ENCRYPTION,
|
||||
C_WPD_CONFIDENCE_SUPPORTED_ENCRYPTION,
|
||||
C_WPD_CONFIDENCE_EXCELLENT
|
||||
} wpd_confidence_t;
|
||||
|
||||
typedef enum {
|
||||
C_WPD_OK,
|
||||
C_WPD_FILE_ACCESS_ERROR,
|
||||
C_WPD_PARSE_ERROR,
|
||||
C_WPD_UNSUPPORTED_ENCRYPTION_ERROR,
|
||||
C_WPD_PASSWORD_MISSMATCH_ERROR,
|
||||
C_WPD_OLE_ERROR,
|
||||
C_WPD_UNKNOWN_ERROR
|
||||
} wpd_result_t;
|
||||
|
||||
|
||||
EXTERNC wpd_confidence_t wpd_is_file_format_supported(wpd_stream_t stream);
|
||||
|
||||
EXTERNC wpd_stream_t wpd_memory_stream_create(const unsigned char *buf, size_t buf_len);
|
||||
|
||||
EXTERNC void wpd_memory_stream_destroy(wpd_stream_t stream);
|
||||
|
||||
EXTERNC wpd_result_t wpd_parse(wpd_stream_t ptr, text_buffer_t *tex, document_t *doc);
|
||||
|
||||
#endif
|
||||
41
third-party/libscan/libscan/wpd/wpd.c
vendored
Normal file
41
third-party/libscan/libscan/wpd/wpd.c
vendored
Normal file
@@ -0,0 +1,41 @@
|
||||
#include "wpd.h"
|
||||
#include "libwpd_c_api.h"
|
||||
|
||||
scan_code_t parse_wpd(scan_wpd_ctx_t *ctx, vfile_t *f, document_t *doc) {
|
||||
|
||||
size_t buf_len;
|
||||
void *buf = read_all(f, &buf_len);
|
||||
|
||||
void *stream = wpd_memory_stream_create(buf, buf_len);
|
||||
wpd_confidence_t conf = wpd_is_file_format_supported(stream);
|
||||
|
||||
if (conf == C_WPD_CONFIDENCE_SUPPORTED_ENCRYPTION || conf == C_WPD_CONFIDENCE_UNSUPPORTED_ENCRYPTION) {
|
||||
CTX_LOG_DEBUGF("wpd.c", "File is encrypted! Password-protected WPD files are not supported yet (conf=%d)", conf)
|
||||
wpd_memory_stream_destroy(stream);
|
||||
free(buf);
|
||||
return SCAN_ERR_READ;
|
||||
}
|
||||
|
||||
if (conf != C_WPD_CONFIDENCE_EXCELLENT) {
|
||||
CTX_LOG_ERRORF("wpd.c", "Unsupported file format! [%s] (conf=%d)", doc->filepath, conf)
|
||||
wpd_memory_stream_destroy(stream);
|
||||
free(buf);
|
||||
return SCAN_ERR_READ;
|
||||
}
|
||||
|
||||
text_buffer_t tex = text_buffer_create(-1);
|
||||
wpd_result_t res = wpd_parse(stream, &tex, doc);
|
||||
|
||||
if (res != C_WPD_OK) {
|
||||
CTX_LOG_ERRORF("wpd.c", "Error while parsing WPD file [%s] (%d)",
|
||||
doc->filepath, res)
|
||||
}
|
||||
|
||||
if (tex.dyn_buffer.cur != 0) {
|
||||
APPEND_STR_META(doc, MetaContent, tex.dyn_buffer.buf)
|
||||
}
|
||||
|
||||
text_buffer_destroy(&tex);
|
||||
wpd_memory_stream_destroy(stream);
|
||||
free(buf);
|
||||
}
|
||||
23
third-party/libscan/libscan/wpd/wpd.h
vendored
Normal file
23
third-party/libscan/libscan/wpd/wpd.h
vendored
Normal file
@@ -0,0 +1,23 @@
|
||||
#ifndef SIST2_WPD_H
|
||||
#define SIST2_WPD_H
|
||||
|
||||
#include "../scan.h"
|
||||
#include "../util.h"
|
||||
|
||||
typedef struct {
|
||||
long content_size;
|
||||
|
||||
log_callback_t log;
|
||||
logf_callback_t logf;
|
||||
|
||||
unsigned int wpd_mime;
|
||||
} scan_wpd_ctx_t;
|
||||
|
||||
scan_code_t parse_wpd(scan_wpd_ctx_t *ctx, vfile_t *f, document_t *doc);
|
||||
|
||||
__always_inline
|
||||
static int is_wpd(scan_wpd_ctx_t *ctx, unsigned int mime) {
|
||||
return mime == ctx->wpd_mime;
|
||||
}
|
||||
|
||||
#endif
|
||||
1182
third-party/libscan/test/main.cpp
vendored
Normal file
1182
third-party/libscan/test/main.cpp
vendored
Normal file
File diff suppressed because it is too large
Load Diff
Some files were not shown because too many files have changed in this diff Show More
Reference in New Issue
Block a user