mirror of
https://github.com/simon987/sist2.git
synced 2025-12-12 23:18:51 +00:00
Compare commits
33 Commits
| Author | SHA1 | Date | |
|---|---|---|---|
| c786a31bb2 | |||
| 48d024e751 | |||
| 08b2ca9d43 | |||
| ed8b4f4fad | |||
| 66de93a8bd | |||
| e3f78fb693 | |||
| 030643cee0 | |||
| b17b9439df | |||
| 414f65346c | |||
| be8eedc9c7 | |||
| 5b62fe77f2 | |||
| 61ab68ce15 | |||
| 82ecb8bb85 | |||
| a41b5dcc1f | |||
| 06f21d5f0f | |||
| e82a388d1e | |||
| bf02e571b3 | |||
| 750a392a61 | |||
| 3d7b977a82 | |||
| cd71551a22 | |||
| 58741058cf | |||
| 0a7e59b646 | |||
| 43a566fe2f | |||
| b2631a86c8 | |||
| d0a1deca30 | |||
| b03ce90a05 | |||
| a5eacb4950 | |||
| 0887046b41 | |||
| 17fda1e540 | |||
| 34b363bfd8 | |||
| c9aa4bed72 | |||
| 7267d4bd2c | |||
| 43470e9ce6 |
@@ -10,7 +10,7 @@ steps:
|
|||||||
- name: build
|
- name: build
|
||||||
image: simon987/sist2-build
|
image: simon987/sist2-build
|
||||||
commands:
|
commands:
|
||||||
- ./ci/build.sh
|
- ./scripts/build.sh
|
||||||
- name: docker
|
- name: docker
|
||||||
image: plugins/docker
|
image: plugins/docker
|
||||||
settings:
|
settings:
|
||||||
@@ -55,7 +55,7 @@ steps:
|
|||||||
- name: build
|
- name: build
|
||||||
image: simon987/sist2-build-arm64
|
image: simon987/sist2-build-arm64
|
||||||
commands:
|
commands:
|
||||||
- ./ci/build_arm64.sh
|
- ./scripts/build_arm64.sh
|
||||||
- name: scp files
|
- name: scp files
|
||||||
image: appleboy/drone-scp
|
image: appleboy/drone-scp
|
||||||
settings:
|
settings:
|
||||||
|
|||||||
4
.gitignore
vendored
4
.gitignore
vendored
@@ -10,13 +10,13 @@ Makefile
|
|||||||
LOG
|
LOG
|
||||||
sist2*
|
sist2*
|
||||||
!sist2-vue/
|
!sist2-vue/
|
||||||
index.sist2/
|
*.sist2/
|
||||||
bundle*.css
|
bundle*.css
|
||||||
bundle.js
|
bundle.js
|
||||||
*.a
|
*.a
|
||||||
vgcore.*
|
vgcore.*
|
||||||
build/
|
build/
|
||||||
third-party/
|
third-party/argparse
|
||||||
*.idx/
|
*.idx/
|
||||||
VERSION
|
VERSION
|
||||||
git_hash.h
|
git_hash.h
|
||||||
|
|||||||
8
.gitmodules
vendored
8
.gitmodules
vendored
@@ -3,4 +3,10 @@
|
|||||||
url = https://github.com/simon987/libscan
|
url = https://github.com/simon987/libscan
|
||||||
[submodule "third-party/argparse"]
|
[submodule "third-party/argparse"]
|
||||||
path = third-party/argparse
|
path = third-party/argparse
|
||||||
url = https://github.com/cofyc/argparse
|
url = https://github.com/simon987/argparse
|
||||||
|
[submodule "third-party/libscan/third-party/utf8.h"]
|
||||||
|
path = third-party/libscan/third-party/utf8.h
|
||||||
|
url = https://github.com/sheredom/utf8.h
|
||||||
|
[submodule "third-party/libscan/third-party/antiword"]
|
||||||
|
path = third-party/libscan/third-party/antiword
|
||||||
|
url = https://github.com/simon987/antiword
|
||||||
|
|||||||
10
Dockerfile
10
Dockerfile
@@ -6,12 +6,10 @@ COPY . .
|
|||||||
RUN cmake -DSIST_PLATFORM=x64_linux -DSIST_DEBUG=off -DBUILD_TESTS=off -DCMAKE_TOOLCHAIN_FILE=/vcpkg/scripts/buildsystems/vcpkg.cmake .
|
RUN cmake -DSIST_PLATFORM=x64_linux -DSIST_DEBUG=off -DBUILD_TESTS=off -DCMAKE_TOOLCHAIN_FILE=/vcpkg/scripts/buildsystems/vcpkg.cmake .
|
||||||
RUN make -j$(nproc)
|
RUN make -j$(nproc)
|
||||||
RUN strip sist2
|
RUN strip sist2
|
||||||
RUN ls -lh
|
|
||||||
RUN ls -lh sist2-vue/dist/
|
|
||||||
|
|
||||||
FROM ubuntu:20.10
|
FROM ubuntu:21.10
|
||||||
|
|
||||||
RUN apt update && apt install -y curl libasan5
|
RUN apt update && apt install -y curl libasan5 && rm -rf /var/lib/apt/lists/*
|
||||||
|
|
||||||
RUN mkdir -p /usr/share/tessdata && \
|
RUN mkdir -p /usr/share/tessdata && \
|
||||||
cd /usr/share/tessdata/ && \
|
cd /usr/share/tessdata/ && \
|
||||||
@@ -22,9 +20,9 @@ RUN mkdir -p /usr/share/tessdata && \
|
|||||||
curl -o /usr/share/tessdata/rus.traineddata https://raw.githubusercontent.com/tesseract-ocr/tessdata/master/rus.traineddata &&\
|
curl -o /usr/share/tessdata/rus.traineddata https://raw.githubusercontent.com/tesseract-ocr/tessdata/master/rus.traineddata &&\
|
||||||
curl -o /usr/share/tessdata/spa.traineddata https://raw.githubusercontent.com/tesseract-ocr/tessdata/master/spa.traineddata
|
curl -o /usr/share/tessdata/spa.traineddata https://raw.githubusercontent.com/tesseract-ocr/tessdata/master/spa.traineddata
|
||||||
|
|
||||||
COPY --from=build /build/sist2 /root/sist2
|
ENTRYPOINT ["/root/sist2"]
|
||||||
|
|
||||||
ENV LANG C.UTF-8
|
ENV LANG C.UTF-8
|
||||||
ENV LC_ALL C.UTF-8
|
ENV LC_ALL C.UTF-8
|
||||||
|
|
||||||
ENTRYPOINT ["/root/sist2"]
|
COPY --from=build /build/sist2 /root/sist2
|
||||||
|
|||||||
@@ -7,9 +7,9 @@ RUN cmake -DSIST_PLATFORM=arm64_linux -DSIST_DEBUG=off -DBUILD_TESTS=off -DCMAKE
|
|||||||
RUN make -j$(nproc)
|
RUN make -j$(nproc)
|
||||||
RUN strip sist2
|
RUN strip sist2
|
||||||
|
|
||||||
FROM ubuntu:20.10
|
FROM --platform linux/arm64/v8 ubuntu:21.10
|
||||||
|
|
||||||
RUN apt update && apt install -y curl libasan5
|
RUN apt update && apt install -y curl libasan5 && rm -rf /var/lib/apt/lists/*
|
||||||
|
|
||||||
RUN mkdir -p /usr/share/tessdata && \
|
RUN mkdir -p /usr/share/tessdata && \
|
||||||
cd /usr/share/tessdata/ && \
|
cd /usr/share/tessdata/ && \
|
||||||
@@ -20,9 +20,9 @@ RUN mkdir -p /usr/share/tessdata && \
|
|||||||
curl -o /usr/share/tessdata/rus.traineddata https://raw.githubusercontent.com/tesseract-ocr/tessdata/master/rus.traineddata &&\
|
curl -o /usr/share/tessdata/rus.traineddata https://raw.githubusercontent.com/tesseract-ocr/tessdata/master/rus.traineddata &&\
|
||||||
curl -o /usr/share/tessdata/spa.traineddata https://raw.githubusercontent.com/tesseract-ocr/tessdata/master/spa.traineddata
|
curl -o /usr/share/tessdata/spa.traineddata https://raw.githubusercontent.com/tesseract-ocr/tessdata/master/spa.traineddata
|
||||||
|
|
||||||
COPY --from=build /build/sist2 /root/sist2
|
|
||||||
|
|
||||||
ENV LANG C.UTF-8
|
ENV LANG C.UTF-8
|
||||||
ENV LC_ALL C.UTF-8
|
ENV LC_ALL C.UTF-8
|
||||||
|
|
||||||
ENTRYPOINT ["/root/sist2"]
|
ENTRYPOINT ["/root/sist2"]
|
||||||
|
|
||||||
|
COPY --from=build /build/sist2 /root/sist2
|
||||||
25
README.md
25
README.md
@@ -2,7 +2,7 @@
|
|||||||
[](https://www.codefactor.io/repository/github/simon987/sist2)
|
[](https://www.codefactor.io/repository/github/simon987/sist2)
|
||||||
[](https://files.simon987.net/.gate/sist2/simon987_sist2/)
|
[](https://files.simon987.net/.gate/sist2/simon987_sist2/)
|
||||||
|
|
||||||
**Demo**: [sist2.simon987.net](https://sist2.simon987.net/?i=Demo%20files)
|
**Demo**: [sist2.simon987.net](https://sist2.simon987.net/)
|
||||||
|
|
||||||
# sist2
|
# sist2
|
||||||
|
|
||||||
@@ -10,7 +10,7 @@ sist2 (Simple incremental search tool)
|
|||||||
|
|
||||||
*Warning: sist2 is in early development*
|
*Warning: sist2 is in early development*
|
||||||
|
|
||||||

|

|
||||||
|
|
||||||
## Features
|
## Features
|
||||||
|
|
||||||
@@ -33,12 +33,11 @@ sist2 (Simple incremental search tool)
|
|||||||
|
|
||||||
## Getting Started
|
## Getting Started
|
||||||
|
|
||||||
1. Have an Elasticsearch (>= 6.X.X) instance running
|
1. Have an Elasticsearch (>= 6.8.X, ideally >=7.14.0) instance running
|
||||||
1. Download [from official website](https://www.elastic.co/downloads/elasticsearch)
|
1. Download [from official website](https://www.elastic.co/downloads/elasticsearch)
|
||||||
1. *(or)* Run using docker:
|
1. *(or)* Run using docker:
|
||||||
```bash
|
```bash
|
||||||
docker run -d --name es1 --net sist2_net -p 9200:9200 \
|
docker run -d -p 9200:9200 -e "discovery.type=single-node" elasticsearch:7.14.0
|
||||||
-e "discovery.type=single-node" elasticsearch:7.14.0
|
|
||||||
```
|
```
|
||||||
1. *(or)* Run using docker-compose:
|
1. *(or)* Run using docker-compose:
|
||||||
```yaml
|
```yaml
|
||||||
@@ -50,8 +49,9 @@ sist2 (Simple incremental search tool)
|
|||||||
```
|
```
|
||||||
1. Download sist2 executable
|
1. Download sist2 executable
|
||||||
1. Download the [latest sist2 release](https://github.com/simon987/sist2/releases) *
|
1. Download the [latest sist2 release](https://github.com/simon987/sist2/releases) *
|
||||||
1. *(or)* Download a [development snapshot](https://files.simon987.net/.gate/sist2/simon987_sist2/) *(Not recommended!)*
|
1. *(or)* Download a [development snapshot](https://files.simon987.net/.gate/sist2/simon987_sist2/) *(Not
|
||||||
1. *(or)* `docker pull simon987/sist2:2.11.2-x64-linux`
|
recommended!)*
|
||||||
|
1. *(or)* `docker pull simon987/sist2:2.11.4-x64-linux`
|
||||||
|
|
||||||
1. See [Usage guide](docs/USAGE.md)
|
1. See [Usage guide](docs/USAGE.md)
|
||||||
|
|
||||||
@@ -70,19 +70,20 @@ See [Usage guide](docs/USAGE.md) for more details
|
|||||||
File type | Library | Content | Thumbnail | Metadata
|
File type | Library | Content | Thumbnail | Metadata
|
||||||
:---|:---|:---|:---|:---
|
:---|:---|:---|:---|:---
|
||||||
pdf,xps,fb2,epub | MuPDF | text+ocr | yes | author, title |
|
pdf,xps,fb2,epub | MuPDF | text+ocr | yes | author, title |
|
||||||
cbz,cbr | *(none)* | - | yes | - |
|
cbz,cbr | [libscan](https://github.com/simon987/libscan) | - | yes | - |
|
||||||
`audio/*` | ffmpeg | - | yes | ID3 tags |
|
`audio/*` | ffmpeg | - | yes | ID3 tags |
|
||||||
`video/*` | ffmpeg | - | yes | title, comment, artist |
|
`video/*` | ffmpeg | - | yes | title, comment, artist |
|
||||||
`image/*` | ffmpeg | - | yes | [Common EXIF tags](https://github.com/simon987/sist2/blob/efdde2734eca9b14a54f84568863b7ffd59bdba3/src/parsing/media.c#L190), GPS tags |
|
`image/*` | ffmpeg | - | yes | [Common EXIF tags](https://github.com/simon987/sist2/blob/efdde2734eca9b14a54f84568863b7ffd59bdba3/src/parsing/media.c#L190), GPS tags |
|
||||||
raw, rw2, dng, cr2, crw, dcr, k25, kdc, mrw, pef, xf3, arw, sr2, srf, erf | LibRaw | - | yes | Common EXIF tags, GPS tags |
|
raw, rw2, dng, cr2, crw, dcr, k25, kdc, mrw, pef, xf3, arw, sr2, srf, erf | LibRaw | - | yes | Common EXIF tags, GPS tags |
|
||||||
ttf,ttc,cff,woff,fnt,otf | Freetype2 | - | yes, `bmp` | Name & style |
|
ttf,ttc,cff,woff,fnt,otf | Freetype2 | - | yes, `bmp` | Name & style |
|
||||||
`text/plain` | *(none)* | yes | no | - |
|
`text/plain` | [libscan](https://github.com/simon987/libscan) | yes | no | - |
|
||||||
html, xml | *(none)* | yes | no | - |
|
html, xml | [libscan](https://github.com/simon987/libscan) | yes | no | - |
|
||||||
tar, zip, rar, 7z, ar ... | Libarchive | yes\* | - | no |
|
tar, zip, rar, 7z, ar ... | Libarchive | yes\* | - | no |
|
||||||
docx, xlsx, pptx | *(none)* | yes | if embedded | creator, modified_by, title |
|
docx, xlsx, pptx | [libscan](https://github.com/simon987/libscan) | yes | if embedded | creator, modified_by, title |
|
||||||
doc (MS Word 97-2003) | antiword | yes | yes | author, title |
|
doc (MS Word 97-2003) | antiword | yes | yes | author, title |
|
||||||
mobi, azw, azw3 | libmobi | yes | no | author, title |
|
mobi, azw, azw3 | libmobi | yes | no | author, title |
|
||||||
wpd (WordPerfect) | libwpd | yes | no | *planned* |
|
wpd (WordPerfect) | libwpd | yes | no | *planned* |
|
||||||
|
json, jsonl, ndjson | [libscan](https://github.com/simon987/libscan) | yes | - | - |
|
||||||
|
|
||||||
\* *See [Archive files](#archive-files)*
|
\* *See [Archive files](#archive-files)*
|
||||||
|
|
||||||
@@ -135,7 +136,7 @@ docker run --rm my-sist2-image cat /root/sist2 > sist2-x64-linux
|
|||||||
```bash
|
```bash
|
||||||
apt install gcc g++ python3 yasm ragel automake autotools-dev wget libtool libssl-dev curl zip unzip tar xorg-dev libglu1-mesa-dev libxcursor-dev libxml2-dev libxinerama-dev gettext nasm git
|
apt install gcc g++ python3 yasm ragel automake autotools-dev wget libtool libssl-dev curl zip unzip tar xorg-dev libglu1-mesa-dev libxcursor-dev libxml2-dev libxinerama-dev gettext nasm git
|
||||||
```
|
```
|
||||||
|
|
||||||
1. Apply vcpkg patches, as per [sist2-build](https://github.com/simon987/sist2-build) Dockerfile
|
1. Apply vcpkg patches, as per [sist2-build](https://github.com/simon987/sist2-build) Dockerfile
|
||||||
|
|
||||||
1. Install vcpkg dependencies
|
1. Install vcpkg dependencies
|
||||||
|
|||||||
@@ -14,6 +14,7 @@
|
|||||||
* [examples](#web-examples)
|
* [examples](#web-examples)
|
||||||
* [rewrite_url](#rewrite_url)
|
* [rewrite_url](#rewrite_url)
|
||||||
* [link to specific indices](#link-to-specific-indices)
|
* [link to specific indices](#link-to-specific-indices)
|
||||||
|
* [elasticsearch](#elasticsearch)
|
||||||
* [exec-script](#exec-script)
|
* [exec-script](#exec-script)
|
||||||
* [tagging](#tagging)
|
* [tagging](#tagging)
|
||||||
* [sidecar files](#sidecar-files)
|
* [sidecar files](#sidecar-files)
|
||||||
@@ -49,6 +50,7 @@ Scan options
|
|||||||
--mem-buffer=<int> Maximum memory buffer size per thread in MB for files inside archives (see USAGE.md). DEFAULT: 2000
|
--mem-buffer=<int> Maximum memory buffer size per thread in MB for files inside archives (see USAGE.md). DEFAULT: 2000
|
||||||
--read-subtitles Read subtitles from media files.
|
--read-subtitles Read subtitles from media files.
|
||||||
--fast-epub Faster but less accurate EPUB parsing (no thumbnails, metadata)
|
--fast-epub Faster but less accurate EPUB parsing (no thumbnails, metadata)
|
||||||
|
--checksums Calculate file checksums when scanning.
|
||||||
|
|
||||||
Index options
|
Index options
|
||||||
-t, --threads=<int> Number of threads. DEFAULT=1
|
-t, --threads=<int> Number of threads. DEFAULT=1
|
||||||
@@ -129,6 +131,9 @@ Exec-script options
|
|||||||
To check if a media file can be parsed without *seek*, execute `cat file.mp4 | ffprobe -`
|
To check if a media file can be parsed without *seek*, execute `cat file.mp4 | ffprobe -`
|
||||||
* `--read-subtitles` When enabled, will attempt to read the subtitles stream from media files.
|
* `--read-subtitles` When enabled, will attempt to read the subtitles stream from media files.
|
||||||
* `--fast-epub` Much faster but less accurate EPUB parsing. When enabled, sist2 will use a simple HTML parser to read epub files instead of the MuPDF library. No thumbnails are generated and author/title metadata are not parsed.
|
* `--fast-epub` Much faster but less accurate EPUB parsing. When enabled, sist2 will use a simple HTML parser to read epub files instead of the MuPDF library. No thumbnails are generated and author/title metadata are not parsed.
|
||||||
|
* `--checksums` Calculate file checksums (sha1) when scanning files. This option does not cause any additional read
|
||||||
|
operations. Checksums are not calculated for all file types, unless the file is inside an archive. When enabled, duplicate
|
||||||
|
files are hidden in the web UI (this behaviour can be toggled in the Configuration page).
|
||||||
|
|
||||||
### Scan examples
|
### Scan examples
|
||||||
|
|
||||||
@@ -262,9 +267,20 @@ sist2 web index1 index2 index3 index4
|
|||||||
When the `rewrite_url` field is not empty, the web module ignores the `root`
|
When the `rewrite_url` field is not empty, the web module ignores the `root`
|
||||||
field and will return a HTTP redirect to `<rewrite_url><path>/<name><extension>`
|
field and will return a HTTP redirect to `<rewrite_url><path>/<name><extension>`
|
||||||
instead of serving the file from disk.
|
instead of serving the file from disk.
|
||||||
Both the `root` and `rewrite_url` fields are safe to manually modify from the
|
Both the `root` and `rewrite_url` fields are safe to manually modify from the
|
||||||
`descriptor.json` file.
|
`descriptor.json` file.
|
||||||
|
|
||||||
|
# Elasticsearch
|
||||||
|
|
||||||
|
Elasticsearch versions >=6.8.0, <8.0.0 are supported by sist2.
|
||||||
|
|
||||||
|
Using a version >=7.14.0 is recommended to enable the following features:
|
||||||
|
|
||||||
|
- Bug fix for large documents (See #198)
|
||||||
|
|
||||||
|
When using a legacy version of ES, a notice will be displayed next to the sist2 version in the web UI.
|
||||||
|
If you don't care about the features above, you can ignore it or disable it in the configuration page.
|
||||||
|
|
||||||
## exec-script
|
## exec-script
|
||||||
|
|
||||||
The `exec-script` command is used to execute a user script for an index that has already been imported to Elasticsearch with the `index` command. Note that the documents will not be reset to their default state before each execution as the `index` command does: if you make undesired changes to the documents by accident, you will need to run `index` again to revert to the original state.
|
The `exec-script` command is used to execute a user script for an index that has already been imported to Elasticsearch with the `index` command. Note that the documents will not be reset to their default state before each execution as the `index` command does: if you make undesired changes to the documents by accident, you will need to run `index` again to revert to the original state.
|
||||||
@@ -299,7 +315,7 @@ See [scripting](scripting.md) documentation.
|
|||||||
# Sidecar files
|
# Sidecar files
|
||||||
|
|
||||||
When scanning, sist2 will read metadata from `.s2meta` JSON files and overwrite the
|
When scanning, sist2 will read metadata from `.s2meta` JSON files and overwrite the
|
||||||
original document's metadata. Sidecar metadata files will also work inside archives.
|
original document's indexed metadata (does not modify the actual file). Sidecar metadata files will also work inside archives.
|
||||||
Sidecar files themselves are not saved in the index.
|
Sidecar files themselves are not saved in the index.
|
||||||
|
|
||||||
This feature is useful to leverage third-party applications such as speech-to-text or
|
This feature is useful to leverage third-party applications such as speech-to-text or
|
||||||
|
|||||||
Binary file not shown.
|
Before Width: | Height: | Size: 3.9 KiB After Width: | Height: | Size: 35 KiB |
BIN
docs/sist2.png
BIN
docs/sist2.png
Binary file not shown.
|
Before Width: | Height: | Size: 889 KiB After Width: | Height: | Size: 1011 KiB |
@@ -4,6 +4,10 @@
|
|||||||
"type": "keyword",
|
"type": "keyword",
|
||||||
"doc_values": true
|
"doc_values": true
|
||||||
},
|
},
|
||||||
|
"checksum": {
|
||||||
|
"type": "keyword",
|
||||||
|
"index": false
|
||||||
|
},
|
||||||
"_depth": {
|
"_depth": {
|
||||||
"type": "integer"
|
"type": "integer"
|
||||||
},
|
},
|
||||||
@@ -74,6 +78,7 @@
|
|||||||
"name": {
|
"name": {
|
||||||
"analyzer": "content_analyzer",
|
"analyzer": "content_analyzer",
|
||||||
"type": "text",
|
"type": "text",
|
||||||
|
"fielddata": true,
|
||||||
"fields": {
|
"fields": {
|
||||||
"nGram": {
|
"nGram": {
|
||||||
"type": "text",
|
"type": "text",
|
||||||
|
|||||||
@@ -2,7 +2,8 @@
|
|||||||
"index": {
|
"index": {
|
||||||
"refresh_interval": "30s",
|
"refresh_interval": "30s",
|
||||||
"codec": "best_compression",
|
"codec": "best_compression",
|
||||||
"number_of_replicas": 0
|
"number_of_replicas": 0,
|
||||||
|
"highlight.max_analyzed_offset": 10000000
|
||||||
},
|
},
|
||||||
"analysis": {
|
"analysis": {
|
||||||
"tokenizer": {
|
"tokenizer": {
|
||||||
|
|||||||
58
schema/settings_legacy.json
Normal file
58
schema/settings_legacy.json
Normal file
@@ -0,0 +1,58 @@
|
|||||||
|
{
|
||||||
|
"index": {
|
||||||
|
"refresh_interval": "30s",
|
||||||
|
"codec": "best_compression",
|
||||||
|
"number_of_replicas": 0
|
||||||
|
},
|
||||||
|
"analysis": {
|
||||||
|
"tokenizer": {
|
||||||
|
"path_tokenizer": {
|
||||||
|
"type": "path_hierarchy",
|
||||||
|
"delimiter": "/"
|
||||||
|
},
|
||||||
|
"tag_tokenizer": {
|
||||||
|
"type": "path_hierarchy",
|
||||||
|
"delimiter": "."
|
||||||
|
},
|
||||||
|
"my_nGram_tokenizer": {
|
||||||
|
"type": "nGram",
|
||||||
|
"min_gram": 3,
|
||||||
|
"max_gram": 3
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"analyzer": {
|
||||||
|
"path_analyzer": {
|
||||||
|
"tokenizer": "path_tokenizer",
|
||||||
|
"filter": [
|
||||||
|
"lowercase"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
"tag_analyzer": {
|
||||||
|
"tokenizer": "tag_tokenizer",
|
||||||
|
"filter": [
|
||||||
|
"lowercase"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
"case_insensitive_kw_analyzer": {
|
||||||
|
"tokenizer": "keyword",
|
||||||
|
"filter": [
|
||||||
|
"lowercase"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
"my_nGram": {
|
||||||
|
"tokenizer": "my_nGram_tokenizer",
|
||||||
|
"filter": [
|
||||||
|
"lowercase",
|
||||||
|
"asciifolding"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
"content_analyzer": {
|
||||||
|
"tokenizer": "standard",
|
||||||
|
"filter": [
|
||||||
|
"lowercase",
|
||||||
|
"asciifolding"
|
||||||
|
]
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
@@ -3,6 +3,7 @@ import json
|
|||||||
files = [
|
files = [
|
||||||
"schema/mappings.json",
|
"schema/mappings.json",
|
||||||
"schema/settings.json",
|
"schema/settings.json",
|
||||||
|
"schema/settings_legacy.json",
|
||||||
"schema/pipeline.json",
|
"schema/pipeline.json",
|
||||||
]
|
]
|
||||||
|
|
||||||
|
|||||||
@@ -22,6 +22,7 @@ application/java-archive, jar
|
|||||||
application/java, class
|
application/java, class
|
||||||
application/javascript,
|
application/javascript,
|
||||||
application/json, json
|
application/json, json
|
||||||
|
application/ndjson, jsonl|ndjson
|
||||||
application/marc, mrc
|
application/marc, mrc
|
||||||
application/mbedlet, mbd
|
application/mbedlet, mbd
|
||||||
application/mime, aps
|
application/mime, aps
|
||||||
|
|||||||
|
2
sist2-vue/dist/css/chunk-vendors.css
vendored
2
sist2-vue/dist/css/chunk-vendors.css
vendored
File diff suppressed because one or more lines are too long
2
sist2-vue/dist/css/index.css
vendored
2
sist2-vue/dist/css/index.css
vendored
File diff suppressed because one or more lines are too long
6
sist2-vue/dist/js/chunk-vendors.js
vendored
6
sist2-vue/dist/js/chunk-vendors.js
vendored
File diff suppressed because one or more lines are too long
2
sist2-vue/dist/js/index.js
vendored
2
sist2-vue/dist/js/index.js
vendored
File diff suppressed because one or more lines are too long
15
sist2-vue/package-lock.json
generated
15
sist2-vue/package-lock.json
generated
@@ -23,7 +23,6 @@
|
|||||||
"vue-color": "^2.8.1",
|
"vue-color": "^2.8.1",
|
||||||
"vue-i18n": "^8.24.4",
|
"vue-i18n": "^8.24.4",
|
||||||
"vue-masonry-wall": "^0.3.2",
|
"vue-masonry-wall": "^0.3.2",
|
||||||
"vue-multiselect": "^2.1.6",
|
|
||||||
"vue-router": "^3.2.0",
|
"vue-router": "^3.2.0",
|
||||||
"vue-simple-suggest": "^1.11.1",
|
"vue-simple-suggest": "^1.11.1",
|
||||||
"vuex": "^3.4.0"
|
"vuex": "^3.4.0"
|
||||||
@@ -13604,15 +13603,6 @@
|
|||||||
"node": ">=10"
|
"node": ">=10"
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
"node_modules/vue-multiselect": {
|
|
||||||
"version": "2.1.6",
|
|
||||||
"resolved": "https://registry.npmjs.org/vue-multiselect/-/vue-multiselect-2.1.6.tgz",
|
|
||||||
"integrity": "sha512-s7jmZPlm9FeueJg1RwJtnE9KNPtME/7C8uRWSfp9/yEN4M8XcS/d+bddoyVwVnvFyRh9msFo0HWeW0vTL8Qv+w==",
|
|
||||||
"engines": {
|
|
||||||
"node": ">= 4.0.0",
|
|
||||||
"npm": ">= 3.0.0"
|
|
||||||
}
|
|
||||||
},
|
|
||||||
"node_modules/vue-observe-visibility": {
|
"node_modules/vue-observe-visibility": {
|
||||||
"version": "0.4.6",
|
"version": "0.4.6",
|
||||||
"resolved": "https://registry.npmjs.org/vue-observe-visibility/-/vue-observe-visibility-0.4.6.tgz",
|
"resolved": "https://registry.npmjs.org/vue-observe-visibility/-/vue-observe-visibility-0.4.6.tgz",
|
||||||
@@ -26376,11 +26366,6 @@
|
|||||||
"vue-observe-visibility": "^0.4.6"
|
"vue-observe-visibility": "^0.4.6"
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
"vue-multiselect": {
|
|
||||||
"version": "2.1.6",
|
|
||||||
"resolved": "https://registry.npmjs.org/vue-multiselect/-/vue-multiselect-2.1.6.tgz",
|
|
||||||
"integrity": "sha512-s7jmZPlm9FeueJg1RwJtnE9KNPtME/7C8uRWSfp9/yEN4M8XcS/d+bddoyVwVnvFyRh9msFo0HWeW0vTL8Qv+w=="
|
|
||||||
},
|
|
||||||
"vue-observe-visibility": {
|
"vue-observe-visibility": {
|
||||||
"version": "0.4.6",
|
"version": "0.4.6",
|
||||||
"resolved": "https://registry.npmjs.org/vue-observe-visibility/-/vue-observe-visibility-0.4.6.tgz",
|
"resolved": "https://registry.npmjs.org/vue-observe-visibility/-/vue-observe-visibility-0.4.6.tgz",
|
||||||
|
|||||||
@@ -22,7 +22,6 @@
|
|||||||
"vue-color": "^2.8.1",
|
"vue-color": "^2.8.1",
|
||||||
"vue-i18n": "^8.24.4",
|
"vue-i18n": "^8.24.4",
|
||||||
"vue-masonry-wall": "^0.3.2",
|
"vue-masonry-wall": "^0.3.2",
|
||||||
"vue-multiselect": "^2.1.6",
|
|
||||||
"vue-router": "^3.2.0",
|
"vue-router": "^3.2.0",
|
||||||
"vue-simple-suggest": "^1.11.1",
|
"vue-simple-suggest": "^1.11.1",
|
||||||
"vuex": "^3.4.0"
|
"vuex": "^3.4.0"
|
||||||
|
|||||||
@@ -50,6 +50,8 @@ export interface EsHit {
|
|||||||
height: number
|
height: number
|
||||||
duration: number
|
duration: number
|
||||||
tag: string[]
|
tag: string[]
|
||||||
|
checksum: string
|
||||||
|
thumbnail: string
|
||||||
}
|
}
|
||||||
_props: {
|
_props: {
|
||||||
isSubDocument: boolean
|
isSubDocument: boolean
|
||||||
@@ -60,6 +62,8 @@ export interface EsHit {
|
|||||||
isPlayableImage: boolean
|
isPlayableImage: boolean
|
||||||
isAudio: boolean
|
isAudio: boolean
|
||||||
hasThumbnail: boolean
|
hasThumbnail: boolean
|
||||||
|
tnW: number
|
||||||
|
tnH: number
|
||||||
}
|
}
|
||||||
highlight: {
|
highlight: {
|
||||||
name: string[] | undefined,
|
name: string[] | undefined,
|
||||||
@@ -130,6 +134,8 @@ class Sist2Api {
|
|||||||
|
|
||||||
if ("thumbnail" in hit._source) {
|
if ("thumbnail" in hit._source) {
|
||||||
hit._props.hasThumbnail = true;
|
hit._props.hasThumbnail = true;
|
||||||
|
hit._props.tnW = Number(hit._source.thumbnail.split(",")[0]);
|
||||||
|
hit._props.tnH = Number(hit._source.thumbnail.split(",")[1]);
|
||||||
}
|
}
|
||||||
|
|
||||||
switch (mimeCategory) {
|
switch (mimeCategory) {
|
||||||
|
|||||||
@@ -43,6 +43,20 @@ const SORT_MODES = {
|
|||||||
{_tie: {order: "asc"}}
|
{_tie: {order: "asc"}}
|
||||||
],
|
],
|
||||||
key: (hit: EsHit) => hit._source.size
|
key: (hit: EsHit) => hit._source.size
|
||||||
|
},
|
||||||
|
nameAsc: {
|
||||||
|
mode: [
|
||||||
|
{name: {order: "asc"}},
|
||||||
|
{_tie: {order: "asc"}}
|
||||||
|
],
|
||||||
|
key: (hit: EsHit) => hit._source.name
|
||||||
|
},
|
||||||
|
nameDesc: {
|
||||||
|
mode: [
|
||||||
|
{name: {order: "desc"}},
|
||||||
|
{_tie: {order: "asc"}}
|
||||||
|
],
|
||||||
|
key: (hit: EsHit) => hit._source.name
|
||||||
}
|
}
|
||||||
} as any;
|
} as any;
|
||||||
|
|
||||||
@@ -73,6 +87,8 @@ class Sist2Query {
|
|||||||
const selectedMimeTypes = getters.selectedMimeTypes;
|
const selectedMimeTypes = getters.selectedMimeTypes;
|
||||||
const selectedTags = getters.selectedTags;
|
const selectedTags = getters.selectedTags;
|
||||||
|
|
||||||
|
const legacyES = store.state.sist2Info.esVersionLegacy;
|
||||||
|
|
||||||
const filters = [
|
const filters = [
|
||||||
{terms: {index: selectedIndexIds}}
|
{terms: {index: selectedIndexIds}}
|
||||||
] as any[];
|
] as any[];
|
||||||
@@ -189,6 +205,11 @@ class Sist2Query {
|
|||||||
font_name: {},
|
font_name: {},
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
|
if (!legacyES) {
|
||||||
|
q.highlight.max_analyzed_offset = 9_999_999;
|
||||||
|
}
|
||||||
|
|
||||||
if (getters.optSearchInPath) {
|
if (getters.optSearchInPath) {
|
||||||
q.highlight.fields["path.text"] = {};
|
q.highlight.fields["path.text"] = {};
|
||||||
q.highlight.fields["path.nGram"] = {};
|
q.highlight.fields["path.nGram"] = {};
|
||||||
|
|||||||
@@ -5,7 +5,6 @@
|
|||||||
|
|
||||||
<b-card-body>
|
<b-card-body>
|
||||||
|
|
||||||
<!-- TODO: ES connectivity, Link to GH page -->
|
|
||||||
<b-table :items="tableItems" small borderless responsive="md" thead-class="hidden" class="mb-0"></b-table>
|
<b-table :items="tableItems" small borderless responsive="md" thead-class="hidden" class="mb-0"></b-table>
|
||||||
|
|
||||||
<hr />
|
<hr />
|
||||||
@@ -32,6 +31,9 @@ export default {
|
|||||||
{key: "esIndex", value: this.$store.state.sist2Info.esIndex},
|
{key: "esIndex", value: this.$store.state.sist2Info.esIndex},
|
||||||
{key: "tagline", value: this.$store.state.sist2Info.tagline},
|
{key: "tagline", value: this.$store.state.sist2Info.tagline},
|
||||||
{key: "dev", value: this.$store.state.sist2Info.dev},
|
{key: "dev", value: this.$store.state.sist2Info.dev},
|
||||||
|
{key: "esVersion", value: this.$store.state.sist2Info.esVersion},
|
||||||
|
{key: "esVersionSupported", value: this.$store.state.sist2Info.esVersionSupported},
|
||||||
|
{key: "esVersionLegacy", value: this.$store.state.sist2Info.esVersionLegacy},
|
||||||
]
|
]
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -15,11 +15,15 @@
|
|||||||
<span class="badge badge-resolution">{{ humanTime(doc._source.duration) }}</span>
|
<span class="badge badge-resolution">{{ humanTime(doc._source.duration) }}</span>
|
||||||
</div>
|
</div>
|
||||||
|
|
||||||
<div v-if="doc._props.isImage && !hover" class="card-img-overlay" :class="{'small-badge': smallBadge}">
|
<div
|
||||||
|
v-if="doc._props.isImage && !hover && doc._props.tnW / doc._props.tnH < 5"
|
||||||
|
class="card-img-overlay"
|
||||||
|
:class="{'small-badge': smallBadge}">
|
||||||
<span class="badge badge-resolution">{{ `${doc._source.width}x${doc._source.height}` }}</span>
|
<span class="badge badge-resolution">{{ `${doc._source.width}x${doc._source.height}` }}</span>
|
||||||
</div>
|
</div>
|
||||||
|
|
||||||
<div v-if="(doc._props.isVideo || doc._props.isGif) && doc._source.duration > 0 && !hover" class="card-img-overlay"
|
<div v-if="(doc._props.isVideo || doc._props.isGif) && doc._source.duration > 0 && !hover"
|
||||||
|
class="card-img-overlay"
|
||||||
:class="{'small-badge': smallBadge}">
|
:class="{'small-badge': smallBadge}">
|
||||||
<span class="badge badge-resolution">{{ humanTime(doc._source.duration) }}</span>
|
<span class="badge badge-resolution">{{ humanTime(doc._source.duration) }}</span>
|
||||||
</div>
|
</div>
|
||||||
@@ -39,7 +43,8 @@
|
|||||||
</div>
|
</div>
|
||||||
|
|
||||||
<!-- Audio player-->
|
<!-- Audio player-->
|
||||||
<audio v-if="doc._props.isAudio" ref="audio" preload="none" class="audio-fit fit" controls :type="doc._source.mime"
|
<audio v-if="doc._props.isAudio" ref="audio" preload="none" class="audio-fit fit" controls
|
||||||
|
:type="doc._source.mime"
|
||||||
:src="`f/${doc._id}`"
|
:src="`f/${doc._id}`"
|
||||||
@play="onAudioPlay()"></audio>
|
@play="onAudioPlay()"></audio>
|
||||||
|
|
||||||
|
|||||||
@@ -4,7 +4,8 @@
|
|||||||
<template #modal-title>
|
<template #modal-title>
|
||||||
<h5 class="modal-title" :title="doc._source.name + ext(doc)">{{ doc._source.name + ext(doc) }}</h5>
|
<h5 class="modal-title" :title="doc._source.name + ext(doc)">{{ doc._source.name + ext(doc) }}</h5>
|
||||||
</template>
|
</template>
|
||||||
<img :src="`t/${doc._source.index}/${doc._id}`" alt="" class="fit card-img-top">
|
|
||||||
|
<img v-if="doc._props.hasThumbnail" :src="`t/${doc._source.index}/${doc._id}`" alt="" class="fit card-img-top">
|
||||||
|
|
||||||
<InfoTable :doc="doc"></InfoTable>
|
<InfoTable :doc="doc"></InfoTable>
|
||||||
|
|
||||||
|
|||||||
@@ -1,5 +1,5 @@
|
|||||||
<template>
|
<template>
|
||||||
<b-list-group-item class="flex-column align-items-start mb-2">
|
<b-list-group-item class="flex-column align-items-start mb-2" :class="{'sub-document': doc._props.isSubDocument}">
|
||||||
|
|
||||||
<!-- Info modal-->
|
<!-- Info modal-->
|
||||||
<DocInfoModal :show="showInfo" :doc="doc" @close="showInfo = false"></DocInfoModal>
|
<DocInfoModal :show="showInfo" :doc="doc" @close="showInfo = false"></DocInfoModal>
|
||||||
@@ -40,9 +40,11 @@
|
|||||||
</div>
|
</div>
|
||||||
|
|
||||||
<div v-if="doc._source.pages || doc._source.author" class="path-row text-muted">
|
<div v-if="doc._source.pages || doc._source.author" class="path-row text-muted">
|
||||||
<span v-if="doc._source.pages">{{ doc._source.pages }} {{ doc._source.pages > 1 ? $t("pages") : $t("page") }}</span>
|
<span v-if="doc._source.pages">{{ doc._source.pages }} {{
|
||||||
|
doc._source.pages > 1 ? $t("pages") : $t("page")
|
||||||
|
}}</span>
|
||||||
<span v-if="doc._source.author && doc._source.pages" class="mx-1">-</span>
|
<span v-if="doc._source.author && doc._source.pages" class="mx-1">-</span>
|
||||||
<span v-if="doc._source.author">{{doc._source.author}}</span>
|
<span v-if="doc._source.author">{{ doc._source.author }}</span>
|
||||||
</div>
|
</div>
|
||||||
</div>
|
</div>
|
||||||
</div>
|
</div>
|
||||||
@@ -89,6 +91,14 @@ export default {
|
|||||||
</script>
|
</script>
|
||||||
|
|
||||||
<style scoped>
|
<style scoped>
|
||||||
|
.sub-document {
|
||||||
|
background: #AB47BC1F !important;
|
||||||
|
}
|
||||||
|
|
||||||
|
.theme-black .sub-document {
|
||||||
|
background: #37474F !important;
|
||||||
|
}
|
||||||
|
|
||||||
.list-group {
|
.list-group {
|
||||||
margin-top: 1em;
|
margin-top: 1em;
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -1,93 +1,166 @@
|
|||||||
<template>
|
<template>
|
||||||
<VueMultiselect
|
<div v-if="isMobile">
|
||||||
multiple
|
<b-form-select
|
||||||
label="name"
|
:value="selectedIndicesIds"
|
||||||
:value="selectedIndices"
|
@change="onSelect($event)"
|
||||||
:options="indices"
|
:options="indices" multiple :select-size="6" text-field="name"
|
||||||
:close-on-select="indices.length <= 1"
|
value-field="id"></b-form-select>
|
||||||
:placeholder="$t('indexPickerPlaceholder')"
|
</div>
|
||||||
@select="addItem"
|
<div v-else>
|
||||||
@remove="removeItem">
|
|
||||||
|
|
||||||
<template slot="option" slot-scope="idx">
|
<div class="d-flex justify-content-between align-content-center">
|
||||||
<b-row>
|
<span>
|
||||||
<b-col>
|
{{ selectedIndices.length }}
|
||||||
<span class="mr-1">{{ idx.option.name }}</span>
|
{{ selectedIndices.length === 1 ? $t("indexPicker.selectedIndex") : $t("indexPicker.selectedIndices") }}
|
||||||
<SmallBadge pill :text="idx.option.version"></SmallBadge>
|
</span>
|
||||||
</b-col>
|
|
||||||
</b-row>
|
|
||||||
<b-row class="mt-1">
|
|
||||||
<b-col>
|
|
||||||
<span>{{ formatIdxDate(idx.option.timestamp) }}</span>
|
|
||||||
</b-col>
|
|
||||||
</b-row>
|
|
||||||
</template>
|
|
||||||
|
|
||||||
</VueMultiselect>
|
<div>
|
||||||
|
<b-button variant="link" @click="selectAll()"> {{ $t("indexPicker.selectAll") }}</b-button>
|
||||||
|
<b-button variant="link" @click="selectNone()"> {{ $t("indexPicker.selectNone") }}</b-button>
|
||||||
|
</div>
|
||||||
|
</div>
|
||||||
|
|
||||||
|
<b-list-group id="index-picker-desktop" class="unselectable">
|
||||||
|
<b-list-group-item
|
||||||
|
v-for="idx in indices"
|
||||||
|
@click="toggleIndex(idx, $event)"
|
||||||
|
@click.shift="shiftClick(idx, $event)"
|
||||||
|
class="d-flex justify-content-between align-items-center list-group-item-action pointer"
|
||||||
|
:class="{active: lastClickIndex === idx}"
|
||||||
|
>
|
||||||
|
<div class="d-flex">
|
||||||
|
<b-checkbox @change="toggleIndex(idx)" :checked="isSelected(idx)"></b-checkbox>
|
||||||
|
{{ idx.name }}
|
||||||
|
<span class="text-muted timestamp-text ml-2">{{ formatIdxDate(idx.timestamp) }}</span>
|
||||||
|
</div>
|
||||||
|
<b-badge class="version-badge">v{{ idx.version }}</b-badge>
|
||||||
|
</b-list-group-item>
|
||||||
|
</b-list-group>
|
||||||
|
</div>
|
||||||
</template>
|
</template>
|
||||||
|
|
||||||
<script lang="ts">
|
<script lang="ts">
|
||||||
import VueMultiselect from "vue-multiselect"
|
|
||||||
import SmallBadge from "./SmallBadge.vue"
|
import SmallBadge from "./SmallBadge.vue"
|
||||||
import {mapActions, mapGetters} from "vuex";
|
import {mapActions, mapGetters} from "vuex";
|
||||||
import {Index} from "@/Sist2Api";
|
|
||||||
import Vue from "vue";
|
import Vue from "vue";
|
||||||
import {format} from "date-fns";
|
import {format} from "date-fns";
|
||||||
|
|
||||||
export default Vue.extend({
|
export default Vue.extend({
|
||||||
components: {
|
components: {
|
||||||
VueMultiselect,
|
|
||||||
SmallBadge
|
SmallBadge
|
||||||
},
|
},
|
||||||
data() {
|
data() {
|
||||||
return {
|
return {
|
||||||
loading: true
|
loading: true,
|
||||||
|
lastClickIndex: null
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
computed: {
|
computed: {
|
||||||
...mapGetters([
|
...mapGetters([
|
||||||
"indices", "selectedIndices"
|
"indices", "selectedIndices"
|
||||||
]),
|
]),
|
||||||
|
selectedIndicesIds() {
|
||||||
|
return this.selectedIndices.map(idx => idx.id)
|
||||||
|
},
|
||||||
|
isMobile() {
|
||||||
|
return window.innerWidth <= 650;
|
||||||
|
}
|
||||||
},
|
},
|
||||||
methods: {
|
methods: {
|
||||||
...mapActions({
|
...mapActions({
|
||||||
setSelectedIndices: "setSelectedIndices"
|
setSelectedIndices: "setSelectedIndices"
|
||||||
}),
|
}),
|
||||||
removeItem(val: Index): void {
|
shiftClick(index, e) {
|
||||||
this.setSelectedIndices(this.selectedIndices.filter((item: Index) => item !== val))
|
if (this.lastClickIndex === null) {
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
const select = this.isSelected(this.lastClickIndex);
|
||||||
|
|
||||||
|
let leftBoundary = this.indices.indexOf(this.lastClickIndex);
|
||||||
|
let rightBoundary = this.indices.indexOf(index);
|
||||||
|
|
||||||
|
if (rightBoundary < leftBoundary) {
|
||||||
|
let tmp = leftBoundary;
|
||||||
|
leftBoundary = rightBoundary;
|
||||||
|
rightBoundary = tmp;
|
||||||
|
}
|
||||||
|
|
||||||
|
for (let i = leftBoundary; i <= rightBoundary; i++) {
|
||||||
|
if (select) {
|
||||||
|
if (!this.isSelected(this.indices[i])) {
|
||||||
|
this.setSelectedIndices([this.indices[i], ...this.selectedIndices]);
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
this.setSelectedIndices(this.selectedIndices.filter(idx => idx !== this.indices[i]));
|
||||||
|
}
|
||||||
|
}
|
||||||
},
|
},
|
||||||
addItem(val: Index): void {
|
selectAll() {
|
||||||
this.setSelectedIndices([...this.selectedIndices, val])
|
this.setSelectedIndices(this.indices);
|
||||||
|
},
|
||||||
|
selectNone() {
|
||||||
|
this.setSelectedIndices([]);
|
||||||
|
},
|
||||||
|
onSelect(value) {
|
||||||
|
this.setSelectedIndices(this.indices.filter(idx => value.includes(idx.id)));
|
||||||
},
|
},
|
||||||
formatIdxDate(timestamp: number): string {
|
formatIdxDate(timestamp: number): string {
|
||||||
return format(new Date(timestamp * 1000), "yyyy-MM-dd");
|
return format(new Date(timestamp * 1000), "yyyy-MM-dd");
|
||||||
|
},
|
||||||
|
toggleIndex(index, e) {
|
||||||
|
if (e.shiftKey) {
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
this.lastClickIndex = index;
|
||||||
|
if (this.isSelected(index)) {
|
||||||
|
this.setSelectedIndices(this.selectedIndices.filter(idx => idx.id != index.id));
|
||||||
|
} else {
|
||||||
|
this.setSelectedIndices([index, ...this.selectedIndices]);
|
||||||
|
}
|
||||||
|
},
|
||||||
|
isSelected(index) {
|
||||||
|
return this.selectedIndices.find(idx => idx.id == index.id) != null;
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
})
|
})
|
||||||
</script>
|
</script>
|
||||||
|
|
||||||
<style src="vue-multiselect/dist/vue-multiselect.min.css"></style>
|
<style scoped>
|
||||||
|
.timestamp-text {
|
||||||
<style>
|
line-height: 24px;
|
||||||
.multiselect__option {
|
font-size: 80%;
|
||||||
padding: 5px 10px;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
.multiselect__content-wrapper {
|
.version-badge {
|
||||||
overflow: hidden;
|
color: #222 !important;
|
||||||
|
background: none;
|
||||||
}
|
}
|
||||||
|
|
||||||
.theme-black .multiselect__tags {
|
.list-group-item {
|
||||||
background: #37474F;
|
padding: 0.2em 0.4em;
|
||||||
border: 1px solid #616161 !important
|
|
||||||
}
|
}
|
||||||
|
|
||||||
.theme-black .multiselect__input {
|
#index-picker-desktop {
|
||||||
color: #dbdbdb;
|
overflow-y: auto;
|
||||||
background: #37474F;
|
max-height: 132px;
|
||||||
}
|
}
|
||||||
|
|
||||||
.theme-black .multiselect__content-wrapper {
|
.btn-link:focus {
|
||||||
border: none
|
box-shadow: none;
|
||||||
|
}
|
||||||
|
|
||||||
|
.unselectable {
|
||||||
|
user-select: none;
|
||||||
|
-ms-user-select: none;
|
||||||
|
-moz-user-select: none;
|
||||||
|
-webkit-user-select: none;
|
||||||
|
}
|
||||||
|
|
||||||
|
.list-group-item.active {
|
||||||
|
z-index: 2;
|
||||||
|
background-color: inherit;
|
||||||
|
color: inherit;
|
||||||
}
|
}
|
||||||
</style>
|
</style>
|
||||||
@@ -3,7 +3,7 @@
|
|||||||
|
|
||||||
<template #cell(value)="data">
|
<template #cell(value)="data">
|
||||||
<span v-if="'html' in data.item" v-html="data.item.html"></span>
|
<span v-if="'html' in data.item" v-html="data.item.html"></span>
|
||||||
<span v-else>{{data.value}}</span>
|
<span v-else>{{ data.value }}</span>
|
||||||
</template>
|
</template>
|
||||||
</b-table>
|
</b-table>
|
||||||
</template>
|
</template>
|
||||||
@@ -57,7 +57,8 @@ export default {
|
|||||||
"bitrate", "artist", "album", "album_artist", "genre", "font_name", "author",
|
"bitrate", "artist", "album", "album_artist", "genre", "font_name", "author",
|
||||||
"modified_by", "pages", "tag",
|
"modified_by", "pages", "tag",
|
||||||
"exif_make", "exif_software", "exif_exposure_time", "exif_fnumber", "exif_focal_length",
|
"exif_make", "exif_software", "exif_exposure_time", "exif_fnumber", "exif_focal_length",
|
||||||
"exif_user_comment", "exif_iso_speed_ratings", "exif_model", "exif_datetime",
|
"exif_user_comment", "exif_iso_speed_ratings", "exif_model", "exif_datetime",
|
||||||
|
"checksum"
|
||||||
];
|
];
|
||||||
|
|
||||||
fields.forEach(field => {
|
fields.forEach(field => {
|
||||||
@@ -76,9 +77,9 @@ export default {
|
|||||||
items.push({
|
items.push({
|
||||||
key: "Exif GPS",
|
key: "Exif GPS",
|
||||||
html: makeGpsLink(
|
html: makeGpsLink(
|
||||||
dmsToDecimal(src["exif_gps_latitude_dms"], src["exif_gps_latitude_ref"]),
|
dmsToDecimal(src["exif_gps_latitude_dms"], src["exif_gps_latitude_ref"]),
|
||||||
dmsToDecimal(src["exif_gps_longitude_dms"], src["exif_gps_longitude_ref"]),
|
dmsToDecimal(src["exif_gps_longitude_dms"], src["exif_gps_longitude_ref"]),
|
||||||
),
|
),
|
||||||
});
|
});
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|||||||
@@ -21,6 +21,9 @@ export default {
|
|||||||
if (mutation.type === "setUiMimeMap") {
|
if (mutation.type === "setUiMimeMap") {
|
||||||
const mimeMap = mutation.payload.slice();
|
const mimeMap = mutation.payload.slice();
|
||||||
|
|
||||||
|
const elem = document.getElementById("mimeTree");
|
||||||
|
console.log(elem);
|
||||||
|
|
||||||
this.mimeTree = new InspireTree({
|
this.mimeTree = new InspireTree({
|
||||||
selection: {
|
selection: {
|
||||||
mode: 'checkbox'
|
mode: 'checkbox'
|
||||||
|
|||||||
@@ -8,7 +8,8 @@
|
|||||||
</b-navbar-brand>
|
</b-navbar-brand>
|
||||||
|
|
||||||
<span class="badge badge-pill version" v-if="$store && $store.state.sist2Info">
|
<span class="badge badge-pill version" v-if="$store && $store.state.sist2Info">
|
||||||
v{{ sist2Version() }}<span v-if="isDebug()">-dbg</span>
|
v{{ sist2Version() }}<span v-if="isDebug()">-dbg</span><span v-if="isLegacy() && !hideLegacy()">-<a
|
||||||
|
href="https://github.com/simon987/sist2/blob/master/docs/USAGE.md#elasticsearch" target="_blank">legacyES</a></span>
|
||||||
</span>
|
</span>
|
||||||
|
|
||||||
<span v-if="$store && $store.state.sist2Info" class="tagline" v-html="tagline()"></span>
|
<span v-if="$store && $store.state.sist2Info" class="tagline" v-html="tagline()"></span>
|
||||||
@@ -20,6 +21,7 @@
|
|||||||
|
|
||||||
<script>
|
<script>
|
||||||
import Sist2Icon from "@/components/Sist2Icon";
|
import Sist2Icon from "@/components/Sist2Icon";
|
||||||
|
|
||||||
export default {
|
export default {
|
||||||
name: "NavBar",
|
name: "NavBar",
|
||||||
components: {Sist2Icon},
|
components: {Sist2Icon},
|
||||||
@@ -32,6 +34,12 @@ export default {
|
|||||||
},
|
},
|
||||||
isDebug() {
|
isDebug() {
|
||||||
return this.$store.state.sist2Info.debug;
|
return this.$store.state.sist2Info.debug;
|
||||||
|
},
|
||||||
|
isLegacy() {
|
||||||
|
return this.$store.state.sist2Info.esVersionLegacy;
|
||||||
|
},
|
||||||
|
hideLegacy() {
|
||||||
|
return this.$store.state.optHideLegacy;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@@ -95,7 +103,7 @@ export default {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
.theme-light .btn-link{
|
.theme-light .btn-link {
|
||||||
color: #222;
|
color: #222;
|
||||||
}
|
}
|
||||||
</style>
|
</style>
|
||||||
@@ -5,9 +5,11 @@
|
|||||||
<div style="float: right">
|
<div style="float: right">
|
||||||
<b-button v-b-toggle.collapse-1 variant="primary" class="not-mobile">{{ $t("details") }}</b-button>
|
<b-button v-b-toggle.collapse-1 variant="primary" class="not-mobile">{{ $t("details") }}</b-button>
|
||||||
|
|
||||||
<SortSelect class="ml-2"></SortSelect>
|
<template v-if="hitCount !== 0">
|
||||||
|
<SortSelect class="ml-2"></SortSelect>
|
||||||
|
|
||||||
<DisplayModeToggle class="ml-2"></DisplayModeToggle>
|
<DisplayModeToggle class="ml-2"></DisplayModeToggle>
|
||||||
|
</template>
|
||||||
</div>
|
</div>
|
||||||
|
|
||||||
<b-collapse id="collapse-1" class="pt-2" style="clear:both;">
|
<b-collapse id="collapse-1" class="pt-2" style="clear:both;">
|
||||||
@@ -21,7 +23,7 @@
|
|||||||
<script lang="ts">
|
<script lang="ts">
|
||||||
import {EsResult} from "@/Sist2Api";
|
import {EsResult} from "@/Sist2Api";
|
||||||
import Vue from "vue";
|
import Vue from "vue";
|
||||||
import {humanFileSize, humanTime} from "@/util";
|
import {humanFileSize} from "@/util";
|
||||||
import DisplayModeToggle from "@/components/DisplayModeToggle.vue";
|
import DisplayModeToggle from "@/components/DisplayModeToggle.vue";
|
||||||
import SortSelect from "@/components/SortSelect.vue";
|
import SortSelect from "@/components/SortSelect.vue";
|
||||||
|
|
||||||
|
|||||||
@@ -19,6 +19,14 @@
|
|||||||
{{ $t("sort.sizeDesc") }}
|
{{ $t("sort.sizeDesc") }}
|
||||||
</b-dropdown-item>
|
</b-dropdown-item>
|
||||||
|
|
||||||
|
<b-dropdown-item :class="{'dropdown-active': sort === 'nameDesc'}" @click="onSelect('nameDesc')">
|
||||||
|
{{ $t("sort.nameDesc") }}
|
||||||
|
</b-dropdown-item>
|
||||||
|
|
||||||
|
<b-dropdown-item :class="{'dropdown-active': sort === 'nameAsc'}" @click="onSelect('nameAsc')">
|
||||||
|
{{ $t("sort.nameAsc") }}
|
||||||
|
</b-dropdown-item>
|
||||||
|
|
||||||
<b-dropdown-item :class="{'dropdown-active': sort === 'random'}" @click="onSelect('random')">
|
<b-dropdown-item :class="{'dropdown-active': sort === 'random'}" @click="onSelect('random')">
|
||||||
{{ $t("sort.random") }}
|
{{ $t("sort.random") }}
|
||||||
</b-dropdown-item>
|
</b-dropdown-item>
|
||||||
|
|||||||
@@ -62,7 +62,9 @@ export default {
|
|||||||
lightboxLoadOnlyCurrent: "Do not preload full-size images for adjacent slides in image viewer.",
|
lightboxLoadOnlyCurrent: "Do not preload full-size images for adjacent slides in image viewer.",
|
||||||
slideDuration: "Slide duration",
|
slideDuration: "Slide duration",
|
||||||
resultSize: "Number of results per page",
|
resultSize: "Number of results per page",
|
||||||
tagOrOperator: "Use OR operator when specifying multiple tags."
|
tagOrOperator: "Use OR operator when specifying multiple tags.",
|
||||||
|
hideDuplicates: "Hide duplicate results based on checksum",
|
||||||
|
hideLegacy: "Hide the 'legacyES' Elasticsearch notice"
|
||||||
},
|
},
|
||||||
queryMode: {
|
queryMode: {
|
||||||
simple: "Simple",
|
simple: "Simple",
|
||||||
@@ -129,13 +131,14 @@ export default {
|
|||||||
saveTagModalTitle: "Add tag",
|
saveTagModalTitle: "Add tag",
|
||||||
saveTagPlaceholder: "Tag name",
|
saveTagPlaceholder: "Tag name",
|
||||||
confirm: "Confirm",
|
confirm: "Confirm",
|
||||||
indexPickerPlaceholder: "Select indices",
|
|
||||||
sort: {
|
sort: {
|
||||||
relevance: "Relevance",
|
relevance: "Relevance",
|
||||||
dateAsc: "Date (Older first)",
|
dateAsc: "Date (Older first)",
|
||||||
dateDesc: "Date (Newer first)",
|
dateDesc: "Date (Newer first)",
|
||||||
sizeAsc: "Size (Smaller first)",
|
sizeAsc: "Size (Smaller first)",
|
||||||
sizeDesc: "Size (Larger first)",
|
sizeDesc: "Size (Larger first)",
|
||||||
|
nameAsc: "Name (A-z)",
|
||||||
|
nameDesc: "Name (Z-a)",
|
||||||
random: "Random",
|
random: "Random",
|
||||||
},
|
},
|
||||||
d3: {
|
d3: {
|
||||||
@@ -143,7 +146,13 @@ export default {
|
|||||||
mimeSize: "Size distribution by media type",
|
mimeSize: "Size distribution by media type",
|
||||||
dateHistogram: "File modification time distribution",
|
dateHistogram: "File modification time distribution",
|
||||||
sizeHistogram: "File size distribution",
|
sizeHistogram: "File size distribution",
|
||||||
}
|
},
|
||||||
|
indexPicker: {
|
||||||
|
selectNone: "Select None",
|
||||||
|
selectAll: "Select All",
|
||||||
|
selectedIndex: "selected index",
|
||||||
|
selectedIndices: "selected indices",
|
||||||
|
},
|
||||||
},
|
},
|
||||||
fr: {
|
fr: {
|
||||||
searchBar: {
|
searchBar: {
|
||||||
@@ -209,7 +218,9 @@ export default {
|
|||||||
lightboxLoadOnlyCurrent: "Désactiver le chargement des diapositives adjacentes pour le visualiseur d'images",
|
lightboxLoadOnlyCurrent: "Désactiver le chargement des diapositives adjacentes pour le visualiseur d'images",
|
||||||
slideDuration: "Durée des diapositives",
|
slideDuration: "Durée des diapositives",
|
||||||
resultSize: "Nombre de résultats par page",
|
resultSize: "Nombre de résultats par page",
|
||||||
tagOrOperator: "Utiliser l'opérateur OU lors de la spécification de plusieurs tags"
|
tagOrOperator: "Utiliser l'opérateur OU lors de la spécification de plusieurs tags",
|
||||||
|
hideDuplicates: "Masquer les résultats en double",
|
||||||
|
hideLegacy: "Masquer la notice 'legacyES' Elasticsearch"
|
||||||
},
|
},
|
||||||
queryMode: {
|
queryMode: {
|
||||||
simple: "Simple",
|
simple: "Simple",
|
||||||
@@ -284,6 +295,8 @@ export default {
|
|||||||
dateDesc: "Date (Plus récent)",
|
dateDesc: "Date (Plus récent)",
|
||||||
sizeAsc: "Taille (Plus petit)",
|
sizeAsc: "Taille (Plus petit)",
|
||||||
sizeDesc: "Taille (Plus grand)",
|
sizeDesc: "Taille (Plus grand)",
|
||||||
|
nameAsc: "Nom (A-z)",
|
||||||
|
nameDesc: "Nom (Z-a)",
|
||||||
random: "Aléatoire",
|
random: "Aléatoire",
|
||||||
},
|
},
|
||||||
d3: {
|
d3: {
|
||||||
@@ -291,6 +304,12 @@ export default {
|
|||||||
mimeSize: "Distribution des tailles de fichiers par type de média",
|
mimeSize: "Distribution des tailles de fichiers par type de média",
|
||||||
dateHistogram: "Distribution des dates de modification",
|
dateHistogram: "Distribution des dates de modification",
|
||||||
sizeHistogram: "Distribution des tailles de fichier",
|
sizeHistogram: "Distribution des tailles de fichier",
|
||||||
}
|
},
|
||||||
|
indexPicker: {
|
||||||
|
selectNone: "Sélectionner aucun",
|
||||||
|
selectAll: "Sélectionner tout",
|
||||||
|
selectedIndex: "indice sélectionné",
|
||||||
|
selectedIndices: "indices sélectionnés",
|
||||||
|
},
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@@ -27,6 +27,7 @@ export default new Vuex.Store({
|
|||||||
size: 60,
|
size: 60,
|
||||||
|
|
||||||
optLang: "en",
|
optLang: "en",
|
||||||
|
optHideDuplicates: true,
|
||||||
optTheme: "light",
|
optTheme: "light",
|
||||||
optDisplay: "grid",
|
optDisplay: "grid",
|
||||||
|
|
||||||
@@ -45,6 +46,7 @@ export default new Vuex.Store({
|
|||||||
optTreemapColor: "PuBuGn",
|
optTreemapColor: "PuBuGn",
|
||||||
optLightboxLoadOnlyCurrent: false,
|
optLightboxLoadOnlyCurrent: false,
|
||||||
optLightboxSlideDuration: 15,
|
optLightboxSlideDuration: 15,
|
||||||
|
optHideLegacy: false,
|
||||||
|
|
||||||
_onLoadSelectedIndices: [] as string[],
|
_onLoadSelectedIndices: [] as string[],
|
||||||
_onLoadSelectedMimeTypes: [] as string[],
|
_onLoadSelectedMimeTypes: [] as string[],
|
||||||
@@ -79,6 +81,7 @@ export default new Vuex.Store({
|
|||||||
setSizeMax: (state, val) => state.sizeMax = val,
|
setSizeMax: (state, val) => state.sizeMax = val,
|
||||||
setSist2Info: (state, val) => state.sist2Info = val,
|
setSist2Info: (state, val) => state.sist2Info = val,
|
||||||
setSeed: (state, val) => state.seed = val,
|
setSeed: (state, val) => state.seed = val,
|
||||||
|
setOptHideDuplicates: (state, val) => state.optHideDuplicates = val,
|
||||||
setOptLang: (state, val) => state.optLang = val,
|
setOptLang: (state, val) => state.optLang = val,
|
||||||
setSortMode: (state, val) => state.sortMode = val,
|
setSortMode: (state, val) => state.sortMode = val,
|
||||||
setIndices: (state, val) => {
|
setIndices: (state, val) => {
|
||||||
@@ -142,6 +145,7 @@ export default new Vuex.Store({
|
|||||||
setOptTreemapColorGroupingDepth: (state, val) => state.optTreemapColorGroupingDepth = val,
|
setOptTreemapColorGroupingDepth: (state, val) => state.optTreemapColorGroupingDepth = val,
|
||||||
setOptTreemapSize: (state, val) => state.optTreemapSize = val,
|
setOptTreemapSize: (state, val) => state.optTreemapSize = val,
|
||||||
setOptTreemapColor: (state, val) => state.optTreemapColor = val,
|
setOptTreemapColor: (state, val) => state.optTreemapColor = val,
|
||||||
|
setOptHideLegacy: (state, val) => state.optHideLegacy = val,
|
||||||
|
|
||||||
setOptLightboxLoadOnlyCurrent: (state, val) => state.optLightboxLoadOnlyCurrent = val,
|
setOptLightboxLoadOnlyCurrent: (state, val) => state.optLightboxLoadOnlyCurrent = val,
|
||||||
|
|
||||||
@@ -317,6 +321,7 @@ export default new Vuex.Store({
|
|||||||
uiLightboxKey: state => state.uiLightboxKey,
|
uiLightboxKey: state => state.uiLightboxKey,
|
||||||
uiLightboxSlide: state => state.uiLightboxSlide,
|
uiLightboxSlide: state => state.uiLightboxSlide,
|
||||||
|
|
||||||
|
optHideDuplicates: state => state.optHideDuplicates,
|
||||||
optLang: state => state.optLang,
|
optLang: state => state.optLang,
|
||||||
optTheme: state => state.optTheme,
|
optTheme: state => state.optTheme,
|
||||||
optDisplay: state => state.optDisplay,
|
optDisplay: state => state.optDisplay,
|
||||||
@@ -336,5 +341,6 @@ export default new Vuex.Store({
|
|||||||
optLightboxLoadOnlyCurrent: state => state.optLightboxLoadOnlyCurrent,
|
optLightboxLoadOnlyCurrent: state => state.optLightboxLoadOnlyCurrent,
|
||||||
optLightboxSlideDuration: state => state.optLightboxSlideDuration,
|
optLightboxSlideDuration: state => state.optLightboxSlideDuration,
|
||||||
optResultSize: state => state.size,
|
optResultSize: state => state.size,
|
||||||
|
optHideLegacy: state => state.optHideLegacy,
|
||||||
}
|
}
|
||||||
})
|
})
|
||||||
@@ -19,6 +19,10 @@
|
|||||||
{{ $t("opt.lightboxLoadOnlyCurrent") }}
|
{{ $t("opt.lightboxLoadOnlyCurrent") }}
|
||||||
</b-form-checkbox>
|
</b-form-checkbox>
|
||||||
|
|
||||||
|
<b-form-checkbox :checked="optHideLegacy" @input="setOptHideLegacy">
|
||||||
|
{{ $t("opt.hideLegacy") }}
|
||||||
|
</b-form-checkbox>
|
||||||
|
|
||||||
<label>{{ $t("opt.lang") }}</label>
|
<label>{{ $t("opt.lang") }}</label>
|
||||||
<b-form-select :options="langOptions" :value="optLang" @input="setOptLang"></b-form-select>
|
<b-form-select :options="langOptions" :value="optLang" @input="setOptLang"></b-form-select>
|
||||||
|
|
||||||
@@ -35,6 +39,11 @@
|
|||||||
<br/>
|
<br/>
|
||||||
<h4>{{ $t("searchOptions") }}</h4>
|
<h4>{{ $t("searchOptions") }}</h4>
|
||||||
<b-card>
|
<b-card>
|
||||||
|
<b-form-checkbox :checked="optHideDuplicates" @input="setOptHideDuplicates">{{
|
||||||
|
$t("opt.hideDuplicates")
|
||||||
|
}}
|
||||||
|
</b-form-checkbox>
|
||||||
|
|
||||||
<b-form-checkbox :checked="optHighlight" @input="setOptHighlight">{{ $t("opt.highlight") }}</b-form-checkbox>
|
<b-form-checkbox :checked="optHighlight" @input="setOptHighlight">{{ $t("opt.highlight") }}</b-form-checkbox>
|
||||||
<b-form-checkbox :checked="optTagOrOperator" @input="setOptTagOrOperator">{{
|
<b-form-checkbox :checked="optTagOrOperator" @input="setOptTagOrOperator">{{
|
||||||
$t("opt.tagOrOperator")
|
$t("opt.tagOrOperator")
|
||||||
@@ -206,10 +215,11 @@ export default {
|
|||||||
"optTreemapSize",
|
"optTreemapSize",
|
||||||
"optLightboxLoadOnlyCurrent",
|
"optLightboxLoadOnlyCurrent",
|
||||||
"optLightboxSlideDuration",
|
"optLightboxSlideDuration",
|
||||||
"optContainerWidth",
|
|
||||||
"optResultSize",
|
"optResultSize",
|
||||||
"optTagOrOperator",
|
"optTagOrOperator",
|
||||||
"optLang"
|
"optLang",
|
||||||
|
"optHideDuplicates",
|
||||||
|
"optHideLegacy",
|
||||||
]),
|
]),
|
||||||
clientWidth() {
|
clientWidth() {
|
||||||
return window.innerWidth;
|
return window.innerWidth;
|
||||||
@@ -248,7 +258,9 @@ export default {
|
|||||||
"setOptContainerWidth",
|
"setOptContainerWidth",
|
||||||
"setOptResultSize",
|
"setOptResultSize",
|
||||||
"setOptTagOrOperator",
|
"setOptTagOrOperator",
|
||||||
"setOptLang"
|
"setOptLang",
|
||||||
|
"setOptHideDuplicates",
|
||||||
|
"setOptHideLegacy"
|
||||||
]),
|
]),
|
||||||
onResetClick() {
|
onResetClick() {
|
||||||
localStorage.removeItem("sist2_configuration");
|
localStorage.removeItem("sist2_configuration");
|
||||||
|
|||||||
@@ -31,7 +31,7 @@
|
|||||||
</b-row>
|
</b-row>
|
||||||
</b-col>
|
</b-col>
|
||||||
<b-col>
|
<b-col>
|
||||||
<b-tabs>
|
<b-tabs justified>
|
||||||
<b-tab :title="$t('mimeTypes')">
|
<b-tab :title="$t('mimeTypes')">
|
||||||
<MimePicker></MimePicker>
|
<MimePicker></MimePicker>
|
||||||
</b-tab>
|
</b-tab>
|
||||||
@@ -43,9 +43,13 @@
|
|||||||
</b-row>
|
</b-row>
|
||||||
</b-card>
|
</b-card>
|
||||||
|
|
||||||
<Preloader v-if="searchBusy && docs.length === 0" class="mt-3"></Preloader>
|
<div v-show="docs.length === 0 && !uiLoading">
|
||||||
|
<Preloader v-if="searchBusy" class="mt-3"></Preloader>
|
||||||
|
|
||||||
<div v-else-if="docs.length > 0">
|
<ResultsCard></ResultsCard>
|
||||||
|
</div>
|
||||||
|
|
||||||
|
<div v-if="docs.length > 0">
|
||||||
<ResultsCard></ResultsCard>
|
<ResultsCard></ResultsCard>
|
||||||
|
|
||||||
<DocCardWall v-if="optDisplay==='grid'" :docs="docs" :append="appendFunc"></DocCardWall>
|
<DocCardWall v-if="optDisplay==='grid'" :docs="docs" :append="appendFunc"></DocCardWall>
|
||||||
@@ -91,6 +95,7 @@ export default Vue.extend({
|
|||||||
search: undefined as any,
|
search: undefined as any,
|
||||||
docs: [] as EsHit[],
|
docs: [] as EsHit[],
|
||||||
docIds: new Set(),
|
docIds: new Set(),
|
||||||
|
docChecksums: new Set(),
|
||||||
searchBusy: false,
|
searchBusy: false,
|
||||||
Sist2Query: Sist2Query,
|
Sist2Query: Sist2Query,
|
||||||
showHelp: false
|
showHelp: false
|
||||||
@@ -108,10 +113,6 @@ export default Vue.extend({
|
|||||||
|
|
||||||
}, 350, {leading: false});
|
}, 350, {leading: false});
|
||||||
|
|
||||||
Sist2Api.getMimeTypes().then(mimeMap => {
|
|
||||||
this.$store.commit("setUiMimeMap", mimeMap);
|
|
||||||
});
|
|
||||||
|
|
||||||
this.$store.dispatch("loadFromArgs", this.$route).then(() => {
|
this.$store.dispatch("loadFromArgs", this.$route).then(() => {
|
||||||
this.$store.subscribe(() => this.$store.dispatch("updateArgs", this.$router));
|
this.$store.subscribe(() => this.$store.dispatch("updateArgs", this.$router));
|
||||||
this.$store.subscribe((mutation) => {
|
this.$store.subscribe((mutation) => {
|
||||||
@@ -137,9 +138,13 @@ export default Vue.extend({
|
|||||||
sist2.getSist2Info().then(data => {
|
sist2.getSist2Info().then(data => {
|
||||||
this.setSist2Info(data);
|
this.setSist2Info(data);
|
||||||
this.setIndices(data.indices);
|
this.setIndices(data.indices);
|
||||||
this.uiLoading = false;
|
|
||||||
|
|
||||||
this.search(true);
|
Sist2Api.getMimeTypes().then(mimeMap => {
|
||||||
|
this.$store.commit("setUiMimeMap", mimeMap);
|
||||||
|
this.uiLoading = false;
|
||||||
|
this.search(true);
|
||||||
|
});
|
||||||
|
|
||||||
}).catch(() => {
|
}).catch(() => {
|
||||||
this.showErrorToast();
|
this.showErrorToast();
|
||||||
});
|
});
|
||||||
@@ -193,6 +198,7 @@ export default Vue.extend({
|
|||||||
async clearResults() {
|
async clearResults() {
|
||||||
this.docs = [];
|
this.docs = [];
|
||||||
this.docIds.clear();
|
this.docIds.clear();
|
||||||
|
this.docChecksums.clear();
|
||||||
await this.$store.dispatch("clearResults");
|
await this.$store.dispatch("clearResults");
|
||||||
this.$store.commit("setUiReachedScrollEnd", false);
|
this.$store.commit("setUiReachedScrollEnd", false);
|
||||||
},
|
},
|
||||||
@@ -202,7 +208,19 @@ export default Vue.extend({
|
|||||||
}
|
}
|
||||||
|
|
||||||
resp.hits.hits = resp.hits.hits.filter(hit => !this.docIds.has(hit._id));
|
resp.hits.hits = resp.hits.hits.filter(hit => !this.docIds.has(hit._id));
|
||||||
resp.hits.hits.forEach(hit => this.docIds.add(hit._id));
|
|
||||||
|
if (this.$store.state.optHideDuplicates) {
|
||||||
|
resp.hits.hits = resp.hits.hits.filter(hit => {
|
||||||
|
|
||||||
|
if (!("checksum" in hit._source)) {
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
|
const isDupe = !this.docChecksums.has(hit._source.checksum);
|
||||||
|
this.docChecksums.add(hit._source.checksum);
|
||||||
|
return isDupe;
|
||||||
|
});
|
||||||
|
}
|
||||||
|
|
||||||
for (const hit of resp.hits.hits) {
|
for (const hit of resp.hits.hits) {
|
||||||
if (hit._props.isPlayableImage || hit._props.isPlayableVideo) {
|
if (hit._props.isPlayableImage || hit._props.isPlayableVideo) {
|
||||||
|
|||||||
@@ -28,6 +28,7 @@ typedef struct scan_args {
|
|||||||
int max_memory_buffer;
|
int max_memory_buffer;
|
||||||
int read_subtitles;
|
int read_subtitles;
|
||||||
int fast_epub;
|
int fast_epub;
|
||||||
|
int calculate_checksums;
|
||||||
} scan_args_t;
|
} scan_args_t;
|
||||||
|
|
||||||
scan_args_t *scan_args_create();
|
scan_args_t *scan_args_create();
|
||||||
|
|||||||
@@ -2,6 +2,7 @@
|
|||||||
|
|
||||||
ScanCtx_t ScanCtx = {
|
ScanCtx_t ScanCtx = {
|
||||||
.stat_index_size = 0,
|
.stat_index_size = 0,
|
||||||
|
.stat_tn_size = 0,
|
||||||
.dbg_current_files = NULL,
|
.dbg_current_files = NULL,
|
||||||
.pool = NULL
|
.pool = NULL
|
||||||
};
|
};
|
||||||
|
|||||||
@@ -15,7 +15,9 @@
|
|||||||
#include "libscan/raw/raw.h"
|
#include "libscan/raw/raw.h"
|
||||||
#include "libscan/msdoc/msdoc.h"
|
#include "libscan/msdoc/msdoc.h"
|
||||||
#include "libscan/wpd/wpd.h"
|
#include "libscan/wpd/wpd.h"
|
||||||
|
#include "libscan/json/json.h"
|
||||||
#include "src/io/store.h"
|
#include "src/io/store.h"
|
||||||
|
#include "src/index/elastic.h"
|
||||||
|
|
||||||
#include <glib.h>
|
#include <glib.h>
|
||||||
#include <pcre.h>
|
#include <pcre.h>
|
||||||
@@ -32,6 +34,7 @@ typedef struct {
|
|||||||
|
|
||||||
int threads;
|
int threads;
|
||||||
int depth;
|
int depth;
|
||||||
|
int calculate_checksums;
|
||||||
|
|
||||||
size_t stat_tn_size;
|
size_t stat_tn_size;
|
||||||
size_t stat_index_size;
|
size_t stat_index_size;
|
||||||
@@ -62,6 +65,7 @@ typedef struct {
|
|||||||
scan_raw_ctx_t raw_ctx;
|
scan_raw_ctx_t raw_ctx;
|
||||||
scan_msdoc_ctx_t msdoc_ctx;
|
scan_msdoc_ctx_t msdoc_ctx;
|
||||||
scan_wpd_ctx_t wpd_ctx;
|
scan_wpd_ctx_t wpd_ctx;
|
||||||
|
scan_json_ctx_t json_ctx;
|
||||||
} ScanCtx_t;
|
} ScanCtx_t;
|
||||||
|
|
||||||
typedef struct {
|
typedef struct {
|
||||||
@@ -72,6 +76,7 @@ typedef struct {
|
|||||||
|
|
||||||
typedef struct {
|
typedef struct {
|
||||||
char *es_url;
|
char *es_url;
|
||||||
|
es_version_t *es_version;
|
||||||
char *es_index;
|
char *es_index;
|
||||||
int batch_size;
|
int batch_size;
|
||||||
tpool_t *pool;
|
tpool_t *pool;
|
||||||
@@ -83,6 +88,7 @@ typedef struct {
|
|||||||
|
|
||||||
typedef struct {
|
typedef struct {
|
||||||
char *es_url;
|
char *es_url;
|
||||||
|
es_version_t *es_version;
|
||||||
char *es_index;
|
char *es_index;
|
||||||
int index_count;
|
int index_count;
|
||||||
char *auth_user;
|
char *auth_user;
|
||||||
|
|||||||
@@ -253,7 +253,7 @@ void _elastic_flush(int max) {
|
|||||||
} else {
|
} else {
|
||||||
|
|
||||||
print_errors(r);
|
print_errors(r);
|
||||||
LOG_INFOF("elastic.c", "Indexed %d documents (%zukB) <%d>", count, buf_len / 1024, r->status_code);
|
LOG_DEBUGF("elastic.c", "Indexed %d documents (%zukB) <%d>", count, buf_len / 1024, r->status_code);
|
||||||
delete_queue(max);
|
delete_queue(max);
|
||||||
|
|
||||||
if (Indexer->queued != 0) {
|
if (Indexer->queued != 0) {
|
||||||
@@ -356,7 +356,65 @@ void finish_indexer(char *script, int async_script, char *index_id) {
|
|||||||
free_response(r);
|
free_response(r);
|
||||||
}
|
}
|
||||||
|
|
||||||
void elastic_init(int force_reset, const char* user_mappings, const char* user_settings) {
|
es_version_t *elastic_get_version(const char *es_url) {
|
||||||
|
response_t *r = web_get(es_url, 30);
|
||||||
|
|
||||||
|
char *tmp = malloc(r->size + 1);
|
||||||
|
memcpy(tmp, r->body, r->size);
|
||||||
|
*(tmp + r->size) = '\0';
|
||||||
|
cJSON *response = cJSON_Parse(tmp);
|
||||||
|
free(tmp);
|
||||||
|
free_response(r);
|
||||||
|
|
||||||
|
if (response == NULL) {
|
||||||
|
return NULL;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (cJSON_GetObjectItem(response, "version") == NULL ||
|
||||||
|
cJSON_GetObjectItem(cJSON_GetObjectItem(response, "version"), "number") == NULL) {
|
||||||
|
cJSON_Delete(response);
|
||||||
|
return NULL;
|
||||||
|
}
|
||||||
|
|
||||||
|
char *version_str = cJSON_GetObjectItem(cJSON_GetObjectItem(response, "version"), "number")->valuestring;
|
||||||
|
|
||||||
|
es_version_t *version = malloc(sizeof(es_version_t));
|
||||||
|
|
||||||
|
const char *tok = strtok(version_str, ".");
|
||||||
|
version->major = atoi(tok);
|
||||||
|
tok = strtok(NULL, ".");
|
||||||
|
version->minor = atoi(tok);
|
||||||
|
tok = strtok(NULL, ".");
|
||||||
|
version->patch = atoi(tok);
|
||||||
|
|
||||||
|
cJSON_Delete(response);
|
||||||
|
|
||||||
|
return version;
|
||||||
|
}
|
||||||
|
|
||||||
|
void elastic_init(int force_reset, const char *user_mappings, const char *user_settings) {
|
||||||
|
|
||||||
|
es_version_t *es_version = elastic_get_version(IndexCtx.es_url);
|
||||||
|
IndexCtx.es_version = es_version;
|
||||||
|
|
||||||
|
if (es_version == NULL) {
|
||||||
|
LOG_FATAL("elastic.c", "Could not get ES version")
|
||||||
|
}
|
||||||
|
|
||||||
|
LOG_INFOF("elastic.c",
|
||||||
|
"Elasticsearch version is %s (supported=%d, legacy=%d)",
|
||||||
|
format_es_version(es_version), IS_SUPPORTED_ES_VERSION(es_version), USE_LEGACY_ES_SETTINGS(es_version));
|
||||||
|
|
||||||
|
if (!IS_SUPPORTED_ES_VERSION(es_version)) {
|
||||||
|
LOG_FATAL("elastic.c", "sist2 only supports Elasticsearch v6.8 or newer")
|
||||||
|
}
|
||||||
|
|
||||||
|
char *settings = NULL;
|
||||||
|
if (USE_LEGACY_ES_SETTINGS(es_version)) {
|
||||||
|
settings = settings_json;
|
||||||
|
} else {
|
||||||
|
settings = settings_legacy_json;
|
||||||
|
}
|
||||||
|
|
||||||
// Check if index exists
|
// Check if index exists
|
||||||
char url[4096];
|
char url[4096];
|
||||||
@@ -392,7 +450,7 @@ void elastic_init(int force_reset, const char* user_mappings, const char* user_s
|
|||||||
free_response(r);
|
free_response(r);
|
||||||
|
|
||||||
snprintf(url, sizeof(url), "%s/%s/_settings", IndexCtx.es_url, IndexCtx.es_index);
|
snprintf(url, sizeof(url), "%s/%s/_settings", IndexCtx.es_url, IndexCtx.es_index);
|
||||||
r = web_put(url, user_settings ? user_settings : settings_json);
|
r = web_put(url, user_settings ? user_settings : settings);
|
||||||
LOG_INFOF("elastic.c", "Update ES settings <%d>", r->status_code);
|
LOG_INFOF("elastic.c", "Update ES settings <%d>", r->status_code);
|
||||||
if (r->status_code != 200) {
|
if (r->status_code != 200) {
|
||||||
print_error(r);
|
print_error(r);
|
||||||
|
|||||||
@@ -9,6 +9,26 @@ typedef struct es_bulk_line {
|
|||||||
char line[0];
|
char line[0];
|
||||||
} es_bulk_line_t;
|
} es_bulk_line_t;
|
||||||
|
|
||||||
|
typedef struct {
|
||||||
|
int major;
|
||||||
|
int minor;
|
||||||
|
int patch;
|
||||||
|
} es_version_t;
|
||||||
|
|
||||||
|
#define VERSION_GE(version, maj, min) ((version)->major > (maj) || ((version)->major == (maj) && (version)->minor >= (min)))
|
||||||
|
#define IS_SUPPORTED_ES_VERSION(es_version) VERSION_GE((es_version), 6, 8)
|
||||||
|
#define USE_LEGACY_ES_SETTINGS(es_version) (!VERSION_GE((es_version), 7, 14))
|
||||||
|
|
||||||
|
__always_inline
|
||||||
|
static const char *format_es_version(es_version_t *version) {
|
||||||
|
static char buf[64];
|
||||||
|
|
||||||
|
snprintf(buf, sizeof(buf), "%d.%d.%d", version->major, version->minor, version->patch);
|
||||||
|
|
||||||
|
return buf;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Note: indexer is *not* thread safe
|
* Note: indexer is *not* thread safe
|
||||||
*/
|
*/
|
||||||
@@ -31,6 +51,8 @@ cJSON *elastic_get_document(const char *id_str);
|
|||||||
|
|
||||||
char *elastic_get_status();
|
char *elastic_get_status();
|
||||||
|
|
||||||
|
es_version_t *elastic_get_version(const char *es_url);
|
||||||
|
|
||||||
void execute_update_script(const char *script, int async, const char index_id[MD5_STR_LENGTH]);
|
void execute_update_script(const char *script, int async, const char index_id[MD5_STR_LENGTH]);
|
||||||
|
|
||||||
#endif
|
#endif
|
||||||
|
|||||||
5
src/index/static_generated.c
vendored
5
src/index/static_generated.c
vendored
File diff suppressed because one or more lines are too long
@@ -74,6 +74,8 @@ char *get_meta_key_text(enum metakey meta_key) {
|
|||||||
return "exif_gps_latitude_dms";
|
return "exif_gps_latitude_dms";
|
||||||
case MetaExifGpsLatitudeDec:
|
case MetaExifGpsLatitudeDec:
|
||||||
return "exif_gps_latitude_dec";
|
return "exif_gps_latitude_dec";
|
||||||
|
case MetaChecksum:
|
||||||
|
return "checksum";
|
||||||
default:
|
default:
|
||||||
LOG_FATALF("serialize.c", "FIXME: Unknown meta key: %d", meta_key)
|
LOG_FATALF("serialize.c", "FIXME: Unknown meta key: %d", meta_key)
|
||||||
}
|
}
|
||||||
@@ -165,6 +167,7 @@ char *build_json_string(document_t *doc) {
|
|||||||
case MetaExifGpsLatitudeDMS:
|
case MetaExifGpsLatitudeDMS:
|
||||||
case MetaExifGpsLatitudeDec:
|
case MetaExifGpsLatitudeDec:
|
||||||
case MetaExifGpsLatitudeRef:
|
case MetaExifGpsLatitudeRef:
|
||||||
|
case MetaChecksum:
|
||||||
case MetaTitle: {
|
case MetaTitle: {
|
||||||
cJSON_AddStringToObject(json, get_meta_key_text(meta->key), meta->str_val);
|
cJSON_AddStringToObject(json, get_meta_key_text(meta->key), meta->str_val);
|
||||||
buffer_size_guess += (int) strlen(meta->str_val);
|
buffer_size_guess += (int) strlen(meta->str_val);
|
||||||
|
|||||||
@@ -4,6 +4,7 @@
|
|||||||
store_t *store_create(const char *path, size_t chunk_size) {
|
store_t *store_create(const char *path, size_t chunk_size) {
|
||||||
store_t *store = malloc(sizeof(struct store_t));
|
store_t *store = malloc(sizeof(struct store_t));
|
||||||
mkdir(path, S_IWUSR | S_IRUSR | S_IXUSR);
|
mkdir(path, S_IWUSR | S_IRUSR | S_IXUSR);
|
||||||
|
strcpy(store->path, path);
|
||||||
|
|
||||||
#if (SIST_FAKE_STORE != 1)
|
#if (SIST_FAKE_STORE != 1)
|
||||||
store->chunk_size = chunk_size;
|
store->chunk_size = chunk_size;
|
||||||
@@ -22,7 +23,6 @@ store_t *store_create(const char *path, size_t chunk_size) {
|
|||||||
}
|
}
|
||||||
|
|
||||||
store->size = (size_t) store->chunk_size;
|
store->size = (size_t) store->chunk_size;
|
||||||
ScanCtx.stat_tn_size = 0;
|
|
||||||
mdb_env_set_mapsize(store->env, store->size);
|
mdb_env_set_mapsize(store->env, store->size);
|
||||||
|
|
||||||
// Open dbi
|
// Open dbi
|
||||||
@@ -78,27 +78,57 @@ void store_write(store_t *store, char *key, size_t key_len, char *buf, size_t bu
|
|||||||
int put_ret = mdb_put(txn, store->dbi, &mdb_key, &mdb_value, 0);
|
int put_ret = mdb_put(txn, store->dbi, &mdb_key, &mdb_value, 0);
|
||||||
ScanCtx.stat_tn_size += buf_len;
|
ScanCtx.stat_tn_size += buf_len;
|
||||||
|
|
||||||
|
int db_full = FALSE;
|
||||||
|
int should_abort_transaction = FALSE;
|
||||||
|
|
||||||
if (put_ret == MDB_MAP_FULL) {
|
if (put_ret == MDB_MAP_FULL) {
|
||||||
mdb_txn_abort(txn);
|
db_full = TRUE;
|
||||||
|
should_abort_transaction = TRUE;
|
||||||
|
} else {
|
||||||
|
int commit_ret = mdb_txn_commit(txn);
|
||||||
|
|
||||||
|
if (commit_ret == MDB_MAP_FULL) {
|
||||||
|
db_full = TRUE;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if (db_full) {
|
||||||
|
LOG_INFOF("store.c", "Updating mdb mapsize to %lu bytes", store->size)
|
||||||
|
|
||||||
|
if (should_abort_transaction) {
|
||||||
|
mdb_txn_abort(txn);
|
||||||
|
}
|
||||||
|
|
||||||
pthread_rwlock_unlock(&store->lock);
|
pthread_rwlock_unlock(&store->lock);
|
||||||
|
|
||||||
// Cannot resize when there is a opened transaction.
|
// Cannot resize when there is a opened transaction.
|
||||||
// Resize take effect on the next commit.
|
// Resize take effect on the next commit.
|
||||||
pthread_rwlock_wrlock(&store->lock);
|
pthread_rwlock_wrlock(&store->lock);
|
||||||
store->size += store->chunk_size;
|
store->size += store->chunk_size;
|
||||||
mdb_env_set_mapsize(store->env, store->size);
|
int resize_ret = mdb_env_set_mapsize(store->env, store->size);
|
||||||
|
if (resize_ret != 0) {
|
||||||
|
LOG_ERROR("store.c", mdb_strerror(put_ret))
|
||||||
|
}
|
||||||
mdb_txn_begin(store->env, NULL, 0, &txn);
|
mdb_txn_begin(store->env, NULL, 0, &txn);
|
||||||
put_ret = mdb_put(txn, store->dbi, &mdb_key, &mdb_value, 0);
|
int put_ret_retry = mdb_put(txn, store->dbi, &mdb_key, &mdb_value, 0);
|
||||||
|
|
||||||
|
if (put_ret_retry != 0) {
|
||||||
|
LOG_ERROR("store.c", mdb_strerror(put_ret))
|
||||||
|
}
|
||||||
|
|
||||||
|
int ret = mdb_txn_commit(txn);
|
||||||
|
if (ret != 0) {
|
||||||
|
LOG_FATALF("store.c", "FIXME: Could not commit to store %s: %s (%d), %d, %d %d",
|
||||||
|
store->path, mdb_strerror(ret), ret,
|
||||||
|
put_ret, put_ret_retry);
|
||||||
|
}
|
||||||
LOG_INFOF("store.c", "Updated mdb mapsize to %lu bytes", store->size)
|
LOG_INFOF("store.c", "Updated mdb mapsize to %lu bytes", store->size)
|
||||||
}
|
} else if (put_ret != 0) {
|
||||||
|
|
||||||
mdb_txn_commit(txn);
|
|
||||||
pthread_rwlock_unlock(&store->lock);
|
|
||||||
|
|
||||||
if (put_ret != 0) {
|
|
||||||
LOG_ERROR("store.c", mdb_strerror(put_ret))
|
LOG_ERROR("store.c", mdb_strerror(put_ret))
|
||||||
}
|
}
|
||||||
|
|
||||||
|
pthread_rwlock_unlock(&store->lock);
|
||||||
|
|
||||||
#endif
|
#endif
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|||||||
@@ -6,12 +6,12 @@
|
|||||||
|
|
||||||
#include <glib.h>
|
#include <glib.h>
|
||||||
|
|
||||||
#define STORE_SIZE_TN 1024 * 1024 * 5
|
#define STORE_SIZE_TN (1024 * 1024 * 5)
|
||||||
#define STORE_SIZE_TAG 1024 * 16
|
#define STORE_SIZE_TAG (1024 * 1024)
|
||||||
#define STORE_SIZE_META STORE_SIZE_TAG
|
#define STORE_SIZE_META STORE_SIZE_TAG
|
||||||
|
|
||||||
typedef struct store_t {
|
typedef struct store_t {
|
||||||
char *path;
|
char path[PATH_MAX];
|
||||||
char *tmp_path;
|
char *tmp_path;
|
||||||
MDB_dbi dbi;
|
MDB_dbi dbi;
|
||||||
MDB_env *env;
|
MDB_env *env;
|
||||||
|
|||||||
@@ -24,39 +24,55 @@ parse_job_t *create_fs_parse_job(const char *filepath, const struct stat *info,
|
|||||||
|
|
||||||
job->vfile.filepath = job->filepath;
|
job->vfile.filepath = job->filepath;
|
||||||
job->vfile.read = fs_read;
|
job->vfile.read = fs_read;
|
||||||
|
// Filesystem reads are always rewindable
|
||||||
|
job->vfile.read_rewindable = fs_read;
|
||||||
job->vfile.reset = fs_reset;
|
job->vfile.reset = fs_reset;
|
||||||
job->vfile.close = fs_close;
|
job->vfile.close = fs_close;
|
||||||
job->vfile.fd = -1;
|
job->vfile.fd = -1;
|
||||||
job->vfile.is_fs_file = TRUE;
|
job->vfile.is_fs_file = TRUE;
|
||||||
|
job->vfile.has_checksum = FALSE;
|
||||||
|
job->vfile.rewind_buffer_size = 0;
|
||||||
|
job->vfile.rewind_buffer = NULL;
|
||||||
|
job->vfile.calculate_checksum = ScanCtx.calculate_checksums;
|
||||||
|
|
||||||
return job;
|
return job;
|
||||||
}
|
}
|
||||||
|
|
||||||
int sub_strings[30];
|
int sub_strings[30];
|
||||||
#define EXCLUDED(str) (pcre_exec(ScanCtx.exclude, ScanCtx.exclude_extra, filepath, strlen(filepath), 0, 0, sub_strings, sizeof(sub_strings)) >= 0)
|
#define EXCLUDED(str) (pcre_exec(ScanCtx.exclude, ScanCtx.exclude_extra, str, strlen(str), 0, 0, sub_strings, sizeof(sub_strings)) >= 0)
|
||||||
|
|
||||||
int handle_entry(const char *filepath, const struct stat *info, int typeflag, struct FTW *ftw) {
|
int handle_entry(const char *filepath, const struct stat *info, int typeflag, struct FTW *ftw) {
|
||||||
|
|
||||||
if (typeflag == FTW_F && S_ISREG(info->st_mode) && ftw->level <= ScanCtx.depth) {
|
if (ftw->level > ScanCtx.depth) {
|
||||||
|
if (typeflag == FTW_D) {
|
||||||
|
return FTW_SKIP_SUBTREE;
|
||||||
|
}
|
||||||
|
return FTW_CONTINUE;
|
||||||
|
}
|
||||||
|
|
||||||
if (ScanCtx.exclude != NULL && EXCLUDED(filepath)) {
|
if (ScanCtx.exclude != NULL && EXCLUDED(filepath)) {
|
||||||
LOG_DEBUGF("walk.c", "Excluded: %s", filepath)
|
LOG_DEBUGF("walk.c", "Excluded: %s", filepath)
|
||||||
|
|
||||||
|
if (typeflag == FTW_F && S_ISREG(info->st_mode)) {
|
||||||
pthread_mutex_lock(&ScanCtx.dbg_file_counts_mu);
|
pthread_mutex_lock(&ScanCtx.dbg_file_counts_mu);
|
||||||
ScanCtx.dbg_excluded_files_count += 1;
|
ScanCtx.dbg_excluded_files_count += 1;
|
||||||
pthread_mutex_unlock(&ScanCtx.dbg_file_counts_mu);
|
pthread_mutex_unlock(&ScanCtx.dbg_file_counts_mu);
|
||||||
return 0;
|
return 0;
|
||||||
|
} else if (typeflag == FTW_D) {
|
||||||
|
return FTW_SKIP_SUBTREE;
|
||||||
}
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if (typeflag == FTW_F && S_ISREG(info->st_mode)) {
|
||||||
parse_job_t *job = create_fs_parse_job(filepath, info, ftw->base);
|
parse_job_t *job = create_fs_parse_job(filepath, info, ftw->base);
|
||||||
tpool_add_work(ScanCtx.pool, parse, job);
|
tpool_add_work(ScanCtx.pool, parse, job);
|
||||||
}
|
}
|
||||||
|
|
||||||
return 0;
|
return FTW_CONTINUE;
|
||||||
}
|
}
|
||||||
|
|
||||||
#define MAX_FILE_DESCRIPTORS 64
|
#define MAX_FILE_DESCRIPTORS 64
|
||||||
|
|
||||||
int walk_directory_tree(const char *dirpath) {
|
int walk_directory_tree(const char *dirpath) {
|
||||||
return nftw(dirpath, handle_entry, MAX_FILE_DESCRIPTORS, FTW_PHYS | FTW_DEPTH);
|
return nftw(dirpath, handle_entry, MAX_FILE_DESCRIPTORS, FTW_PHYS | FTW_ACTIONRETVAL);
|
||||||
}
|
}
|
||||||
|
|||||||
17
src/main.c
17
src/main.c
@@ -170,6 +170,8 @@ void initialize_scan_context(scan_args_t *args) {
|
|||||||
pthread_mutex_init(&ScanCtx.dbg_current_files_mu, NULL);
|
pthread_mutex_init(&ScanCtx.dbg_current_files_mu, NULL);
|
||||||
pthread_mutex_init(&ScanCtx.dbg_file_counts_mu, NULL);
|
pthread_mutex_init(&ScanCtx.dbg_file_counts_mu, NULL);
|
||||||
|
|
||||||
|
ScanCtx.calculate_checksums = args->calculate_checksums;
|
||||||
|
|
||||||
// Archive
|
// Archive
|
||||||
ScanCtx.arc_ctx.mode = args->archive_mode;
|
ScanCtx.arc_ctx.mode = args->archive_mode;
|
||||||
ScanCtx.arc_ctx.log = _log;
|
ScanCtx.arc_ctx.log = _log;
|
||||||
@@ -259,10 +261,18 @@ void initialize_scan_context(scan_args_t *args) {
|
|||||||
ScanCtx.raw_ctx.logf = _logf;
|
ScanCtx.raw_ctx.logf = _logf;
|
||||||
ScanCtx.raw_ctx.store = _store;
|
ScanCtx.raw_ctx.store = _store;
|
||||||
|
|
||||||
|
// Wpd
|
||||||
ScanCtx.wpd_ctx.content_size = args->content_size;
|
ScanCtx.wpd_ctx.content_size = args->content_size;
|
||||||
ScanCtx.wpd_ctx.log = _log;
|
ScanCtx.wpd_ctx.log = _log;
|
||||||
ScanCtx.wpd_ctx.logf = _logf;
|
ScanCtx.wpd_ctx.logf = _logf;
|
||||||
ScanCtx.wpd_ctx.wpd_mime = mime_get_mime_by_string(ScanCtx.mime_table, "application/wordperfect");
|
ScanCtx.wpd_ctx.wpd_mime = mime_get_mime_by_string(ScanCtx.mime_table, "application/wordperfect");
|
||||||
|
|
||||||
|
// Json
|
||||||
|
ScanCtx.json_ctx.content_size = args->content_size;
|
||||||
|
ScanCtx.json_ctx.log = _log;
|
||||||
|
ScanCtx.json_ctx.logf = _logf;
|
||||||
|
ScanCtx.json_ctx.json_mime = mime_get_mime_by_string(ScanCtx.mime_table, "application/json");
|
||||||
|
ScanCtx.json_ctx.ndjson_mime = mime_get_mime_by_string(ScanCtx.mime_table, "application/ndjson");
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
@@ -423,7 +433,7 @@ void sist2_index(index_args_t *args) {
|
|||||||
cleanup = elastic_cleanup;
|
cleanup = elastic_cleanup;
|
||||||
}
|
}
|
||||||
|
|
||||||
IndexCtx.pool = tpool_create(args->threads, cleanup, FALSE, FALSE);
|
IndexCtx.pool = tpool_create(args->threads, cleanup, FALSE, TRUE);
|
||||||
tpool_start(IndexCtx.pool);
|
tpool_start(IndexCtx.pool);
|
||||||
|
|
||||||
struct dirent *de;
|
struct dirent *de;
|
||||||
@@ -508,8 +518,8 @@ void sist2_web(web_args_t *args) {
|
|||||||
|
|
||||||
|
|
||||||
int main(int argc, const char *argv[]) {
|
int main(int argc, const char *argv[]) {
|
||||||
sigsegv_handler = signal(SIGSEGV, sig_handler);
|
// sigsegv_handler = signal(SIGSEGV, sig_handler);
|
||||||
sigabrt_handler = signal(SIGABRT, sig_handler);
|
// sigabrt_handler = signal(SIGABRT, sig_handler);
|
||||||
|
|
||||||
setlocale(LC_ALL, "");
|
setlocale(LC_ALL, "");
|
||||||
|
|
||||||
@@ -566,6 +576,7 @@ int main(int argc, const char *argv[]) {
|
|||||||
OPT_BOOLEAN(0, "read-subtitles", &scan_args->read_subtitles, "Read subtitles from media files."),
|
OPT_BOOLEAN(0, "read-subtitles", &scan_args->read_subtitles, "Read subtitles from media files."),
|
||||||
OPT_BOOLEAN(0, "fast-epub", &scan_args->fast_epub,
|
OPT_BOOLEAN(0, "fast-epub", &scan_args->fast_epub,
|
||||||
"Faster but less accurate EPUB parsing (no thumbnails, metadata)"),
|
"Faster but less accurate EPUB parsing (no thumbnails, metadata)"),
|
||||||
|
OPT_BOOLEAN(0, "checksums", &scan_args->calculate_checksums, "Calculate file checksums when scanning."),
|
||||||
|
|
||||||
OPT_GROUP("Index options"),
|
OPT_GROUP("Index options"),
|
||||||
OPT_INTEGER('t', "threads", &common_threads, "Number of threads. DEFAULT=1"),
|
OPT_INTEGER('t', "threads", &common_threads, "Number of threads. DEFAULT=1"),
|
||||||
|
|||||||
839
src/parsing/mime_generated.c
vendored
839
src/parsing/mime_generated.c
vendored
@@ -35,425 +35,426 @@ enum mime {
|
|||||||
application_mime=655387,
|
application_mime=655387,
|
||||||
application_mspowerpoint=655388,
|
application_mspowerpoint=655388,
|
||||||
application_msword=655389,
|
application_msword=655389,
|
||||||
application_netmc=655390,
|
application_ndjson=655390,
|
||||||
application_octet_stream=655391,
|
application_netmc=655391,
|
||||||
application_oda=655392,
|
application_octet_stream=655392,
|
||||||
application_ogg=655393,
|
application_oda=655393,
|
||||||
application_pdf=655394 | 0x40000000,
|
application_ogg=655394,
|
||||||
application_pgp_keys=655395,
|
application_pdf=655395 | 0x40000000,
|
||||||
application_pgp_signature=655396,
|
application_pgp_keys=655396,
|
||||||
application_pkcs7_signature=655397,
|
application_pgp_signature=655397,
|
||||||
application_pkix_cert=655398,
|
application_pkcs7_signature=655398,
|
||||||
application_postscript=655399,
|
application_pkix_cert=655399,
|
||||||
application_pro_eng=655400,
|
application_postscript=655400,
|
||||||
application_ringing_tones=655401,
|
application_pro_eng=655401,
|
||||||
application_smil=655402,
|
application_ringing_tones=655402,
|
||||||
application_solids=655403,
|
application_smil=655403,
|
||||||
application_sounder=655404,
|
application_solids=655404,
|
||||||
application_step=655405,
|
application_sounder=655405,
|
||||||
application_streamingmedia=655406,
|
application_step=655406,
|
||||||
application_vda=655407,
|
application_streamingmedia=655407,
|
||||||
application_vnd_amazon_mobi8_ebook=655408 | 0x02000000,
|
application_vda=655408,
|
||||||
application_vnd_coffeescript=655409,
|
application_vnd_amazon_mobi8_ebook=655409 | 0x02000000,
|
||||||
application_vnd_fdf=655410,
|
application_vnd_coffeescript=655410,
|
||||||
application_vnd_font_fontforge_sfd=655411,
|
application_vnd_fdf=655411,
|
||||||
application_vnd_hp_hpgl=655412,
|
application_vnd_font_fontforge_sfd=655412,
|
||||||
application_vnd_iccprofile=655413,
|
application_vnd_hp_hpgl=655413,
|
||||||
application_vnd_lotus_1_2_3=655414,
|
application_vnd_iccprofile=655414,
|
||||||
application_vnd_ms_cab_compressed=655415,
|
application_vnd_lotus_1_2_3=655415,
|
||||||
application_vnd_ms_excel=655416,
|
application_vnd_ms_cab_compressed=655416,
|
||||||
application_vnd_ms_fontobject=655417,
|
application_vnd_ms_excel=655417,
|
||||||
application_vnd_ms_opentype=655418 | 0x20000000,
|
application_vnd_ms_fontobject=655418,
|
||||||
application_vnd_ms_outlook=655419,
|
application_vnd_ms_opentype=655419 | 0x20000000,
|
||||||
application_vnd_ms_pki_certstore=655420,
|
application_vnd_ms_outlook=655420,
|
||||||
application_vnd_ms_pki_pko=655421,
|
application_vnd_ms_pki_certstore=655421,
|
||||||
application_vnd_ms_pki_seccat=655422,
|
application_vnd_ms_pki_pko=655422,
|
||||||
application_vnd_ms_powerpoint=655423,
|
application_vnd_ms_pki_seccat=655423,
|
||||||
application_vnd_ms_project=655424,
|
application_vnd_ms_powerpoint=655424,
|
||||||
application_vnd_oasis_opendocument_base=655425,
|
application_vnd_ms_project=655425,
|
||||||
application_vnd_oasis_opendocument_formula=655426,
|
application_vnd_oasis_opendocument_base=655426,
|
||||||
application_vnd_oasis_opendocument_graphics=655427,
|
application_vnd_oasis_opendocument_formula=655427,
|
||||||
application_vnd_oasis_opendocument_presentation=655428,
|
application_vnd_oasis_opendocument_graphics=655428,
|
||||||
application_vnd_oasis_opendocument_spreadsheet=655429,
|
application_vnd_oasis_opendocument_presentation=655429,
|
||||||
application_vnd_oasis_opendocument_text=655430,
|
application_vnd_oasis_opendocument_spreadsheet=655430,
|
||||||
application_vnd_openxmlformats_officedocument_presentationml_presentation=655431 | 0x04000000,
|
application_vnd_oasis_opendocument_text=655431,
|
||||||
application_vnd_openxmlformats_officedocument_spreadsheetml_sheet=655432 | 0x04000000,
|
application_vnd_openxmlformats_officedocument_presentationml_presentation=655432 | 0x04000000,
|
||||||
application_vnd_openxmlformats_officedocument_wordprocessingml_document=655433 | 0x04000000,
|
application_vnd_openxmlformats_officedocument_spreadsheetml_sheet=655433 | 0x04000000,
|
||||||
application_vnd_symbian_install=655434,
|
application_vnd_openxmlformats_officedocument_wordprocessingml_document=655434 | 0x04000000,
|
||||||
application_vnd_tcpdump_pcap=655435,
|
application_vnd_symbian_install=655435,
|
||||||
application_vnd_wap_wmlc=655436,
|
application_vnd_tcpdump_pcap=655436,
|
||||||
application_vnd_wap_wmlscriptc=655437,
|
application_vnd_wap_wmlc=655437,
|
||||||
application_vnd_xara=655438,
|
application_vnd_wap_wmlscriptc=655438,
|
||||||
application_vocaltec_media_desc=655439,
|
application_vnd_xara=655439,
|
||||||
application_vocaltec_media_file=655440,
|
application_vocaltec_media_desc=655440,
|
||||||
application_warc=655441,
|
application_vocaltec_media_file=655441,
|
||||||
application_winhelp=655442,
|
application_warc=655442,
|
||||||
application_wordperfect=655443,
|
application_winhelp=655443,
|
||||||
application_x_123=655444,
|
application_wordperfect=655444,
|
||||||
application_x_7z_compressed=655445 | 0x10000000,
|
application_x_123=655445,
|
||||||
application_x_aim=655446,
|
application_x_7z_compressed=655446 | 0x10000000,
|
||||||
application_x_apple_diskimage=655447,
|
application_x_aim=655447,
|
||||||
application_x_arc=655448 | 0x10000000,
|
application_x_apple_diskimage=655448,
|
||||||
application_x_archive=655449,
|
application_x_arc=655449 | 0x10000000,
|
||||||
application_x_atari_7800_rom=655450,
|
application_x_archive=655450,
|
||||||
application_x_authorware_bin=655451,
|
application_x_atari_7800_rom=655451,
|
||||||
application_x_authorware_map=655452,
|
application_x_authorware_bin=655452,
|
||||||
application_x_authorware_seg=655453,
|
application_x_authorware_map=655453,
|
||||||
application_x_avira_qua=655454,
|
application_x_authorware_seg=655454,
|
||||||
application_x_bcpio=655455,
|
application_x_avira_qua=655455,
|
||||||
application_x_bittorrent=655456,
|
application_x_bcpio=655456,
|
||||||
application_x_bsh=655457,
|
application_x_bittorrent=655457,
|
||||||
application_x_bytecode_python=655458,
|
application_x_bsh=655458,
|
||||||
application_x_bzip=655459,
|
application_x_bytecode_python=655459,
|
||||||
application_x_bzip2=655460 | 0x08000000,
|
application_x_bzip=655460,
|
||||||
application_x_cbr=655461,
|
application_x_bzip2=655461 | 0x08000000,
|
||||||
application_x_cbz=655462,
|
application_x_cbr=655462,
|
||||||
application_x_cdlink=655463,
|
application_x_cbz=655463,
|
||||||
application_x_chat=655464,
|
application_x_cdlink=655464,
|
||||||
application_x_chrome_extension=655465,
|
application_x_chat=655465,
|
||||||
application_x_cocoa=655466,
|
application_x_chrome_extension=655466,
|
||||||
application_x_conference=655467,
|
application_x_cocoa=655467,
|
||||||
application_x_coredump=655468,
|
application_x_conference=655468,
|
||||||
application_x_cpio=655469,
|
application_x_coredump=655469,
|
||||||
application_x_dbf=655470,
|
application_x_cpio=655470,
|
||||||
application_x_dbt=655471,
|
application_x_dbf=655471,
|
||||||
application_x_debian_package=655472,
|
application_x_dbt=655472,
|
||||||
application_x_deepv=655473,
|
application_x_debian_package=655473,
|
||||||
application_x_director=655474,
|
application_x_deepv=655474,
|
||||||
application_x_dmp=655475,
|
application_x_director=655475,
|
||||||
application_x_dosdriver=655476,
|
application_x_dmp=655476,
|
||||||
application_x_dosexec=655477,
|
application_x_dosdriver=655477,
|
||||||
application_x_dvi=655478,
|
application_x_dosexec=655478,
|
||||||
application_x_elc=655479,
|
application_x_dvi=655479,
|
||||||
|
application_x_elc=655480,
|
||||||
application_x_empty=1,
|
application_x_empty=1,
|
||||||
application_x_envoy=655480,
|
application_x_envoy=655481,
|
||||||
application_x_esrehber=655481,
|
application_x_esrehber=655482,
|
||||||
application_x_excel=655482,
|
application_x_excel=655483,
|
||||||
application_x_executable=655483,
|
application_x_executable=655484,
|
||||||
application_x_font_gdos=655484,
|
application_x_font_gdos=655485,
|
||||||
application_x_font_pf2=655485,
|
application_x_font_pf2=655486,
|
||||||
application_x_font_pfm=655486,
|
application_x_font_pfm=655487,
|
||||||
application_x_font_sfn=655487,
|
application_x_font_sfn=655488,
|
||||||
application_x_font_ttf=655488 | 0x20000000,
|
application_x_font_ttf=655489 | 0x20000000,
|
||||||
application_x_fptapplication_x_dbt=655489,
|
application_x_fptapplication_x_dbt=655490,
|
||||||
application_x_freelance=655490,
|
application_x_freelance=655491,
|
||||||
application_x_gamecube_rom=655491,
|
application_x_gamecube_rom=655492,
|
||||||
application_x_gdbm=655492,
|
application_x_gdbm=655493,
|
||||||
application_x_gettext_translation=655493,
|
application_x_gettext_translation=655494,
|
||||||
application_x_git=655494,
|
application_x_git=655495,
|
||||||
application_x_gsp=655495,
|
application_x_gsp=655496,
|
||||||
application_x_gss=655496,
|
application_x_gss=655497,
|
||||||
application_x_gtar=655497,
|
application_x_gtar=655498,
|
||||||
application_x_gzip=655498,
|
application_x_gzip=655499,
|
||||||
application_x_hdf=655499,
|
application_x_hdf=655500,
|
||||||
application_x_helpfile=655500,
|
application_x_helpfile=655501,
|
||||||
application_x_httpd_imap=655501,
|
application_x_httpd_imap=655502,
|
||||||
application_x_ima=655502,
|
application_x_ima=655503,
|
||||||
application_x_innosetup=655503,
|
application_x_innosetup=655504,
|
||||||
application_x_internett_signup=655504,
|
application_x_internett_signup=655505,
|
||||||
application_x_inventor=655505,
|
application_x_inventor=655506,
|
||||||
application_x_ip2=655506,
|
application_x_ip2=655507,
|
||||||
application_x_java_applet=655507,
|
application_x_java_applet=655508,
|
||||||
application_x_java_commerce=655508,
|
application_x_java_commerce=655509,
|
||||||
application_x_java_image=655509,
|
application_x_java_image=655510,
|
||||||
application_x_java_jmod=655510,
|
application_x_java_jmod=655511,
|
||||||
application_x_java_keystore=655511,
|
application_x_java_keystore=655512,
|
||||||
application_x_kdelnk=655512,
|
application_x_kdelnk=655513,
|
||||||
application_x_koan=655513,
|
application_x_koan=655514,
|
||||||
application_x_latex=655514,
|
application_x_latex=655515,
|
||||||
application_x_livescreen=655515,
|
application_x_livescreen=655516,
|
||||||
application_x_lotus=655516,
|
application_x_lotus=655517,
|
||||||
application_x_lz4=655517 | 0x08000000,
|
application_x_lz4=655518 | 0x08000000,
|
||||||
application_x_lz4_json=655518,
|
application_x_lz4_json=655519,
|
||||||
application_x_lzh=655519,
|
application_x_lzh=655520,
|
||||||
application_x_lzh_compressed=655520,
|
application_x_lzh_compressed=655521,
|
||||||
application_x_lzip=655521 | 0x08000000,
|
application_x_lzip=655522 | 0x08000000,
|
||||||
application_x_lzma=655522 | 0x08000000,
|
application_x_lzma=655523 | 0x08000000,
|
||||||
application_x_lzop=655523 | 0x08000000,
|
application_x_lzop=655524 | 0x08000000,
|
||||||
application_x_lzx=655524,
|
application_x_lzx=655525,
|
||||||
application_x_mach_binary=655525,
|
application_x_mach_binary=655526,
|
||||||
application_x_mach_executable=655526,
|
application_x_mach_executable=655527,
|
||||||
application_x_magic_cap_package_1_0=655527,
|
application_x_magic_cap_package_1_0=655528,
|
||||||
application_x_mathcad=655528,
|
application_x_mathcad=655529,
|
||||||
application_x_maxis_dbpf=655529,
|
application_x_maxis_dbpf=655530,
|
||||||
application_x_meme=655530,
|
application_x_meme=655531,
|
||||||
application_x_midi=655531,
|
application_x_midi=655532,
|
||||||
application_x_mif=655532,
|
application_x_mif=655533,
|
||||||
application_x_mix_transfer=655533,
|
application_x_mix_transfer=655534,
|
||||||
application_x_mobipocket_ebook=655534 | 0x02000000,
|
application_x_mobipocket_ebook=655535 | 0x02000000,
|
||||||
application_x_ms_compress_szdd=655535,
|
application_x_ms_compress_szdd=655536,
|
||||||
application_x_ms_pdb=655536,
|
application_x_ms_pdb=655537,
|
||||||
application_x_ms_reader=655537,
|
application_x_ms_reader=655538,
|
||||||
application_x_msaccess=655538,
|
application_x_msaccess=655539,
|
||||||
application_x_n64_rom=655539,
|
application_x_n64_rom=655540,
|
||||||
application_x_navi_animation=655540,
|
application_x_navi_animation=655541,
|
||||||
application_x_navidoc=655541,
|
application_x_navidoc=655542,
|
||||||
application_x_navimap=655542,
|
application_x_navimap=655543,
|
||||||
application_x_navistyle=655543,
|
application_x_navistyle=655544,
|
||||||
application_x_nes_rom=655544,
|
application_x_nes_rom=655545,
|
||||||
application_x_netcdf=655545,
|
application_x_netcdf=655546,
|
||||||
application_x_newton_compatible_pkg=655546,
|
application_x_newton_compatible_pkg=655547,
|
||||||
application_x_nintendo_ds_rom=655547,
|
application_x_nintendo_ds_rom=655548,
|
||||||
application_x_object=655548,
|
application_x_object=655549,
|
||||||
application_x_omc=655549,
|
application_x_omc=655550,
|
||||||
application_x_omcdatamaker=655550,
|
application_x_omcdatamaker=655551,
|
||||||
application_x_omcregerator=655551,
|
application_x_omcregerator=655552,
|
||||||
application_x_pagemaker=655552,
|
application_x_pagemaker=655553,
|
||||||
application_x_pcl=655553,
|
application_x_pcl=655554,
|
||||||
application_x_pgp_keyring=655554,
|
application_x_pgp_keyring=655555,
|
||||||
application_x_pixclscript=655555,
|
application_x_pixclscript=655556,
|
||||||
application_x_pkcs7_certreqresp=655556,
|
application_x_pkcs7_certreqresp=655557,
|
||||||
application_x_pkcs7_signature=655557,
|
application_x_pkcs7_signature=655558,
|
||||||
application_x_project=655558,
|
application_x_project=655559,
|
||||||
application_x_qpro=655559,
|
application_x_qpro=655560,
|
||||||
application_x_rar=655560 | 0x10000000,
|
application_x_rar=655561 | 0x10000000,
|
||||||
application_x_rpm=655561,
|
application_x_rpm=655562,
|
||||||
application_x_sdp=655562,
|
application_x_sdp=655563,
|
||||||
application_x_sea=655563,
|
application_x_sea=655564,
|
||||||
application_x_seelogo=655564,
|
application_x_seelogo=655565,
|
||||||
application_x_setupscript=655565,
|
application_x_setupscript=655566,
|
||||||
application_x_shar=655566,
|
application_x_shar=655567,
|
||||||
application_x_sharedlib=655567,
|
application_x_sharedlib=655568,
|
||||||
application_x_shockwave_flash=655568,
|
application_x_shockwave_flash=655569,
|
||||||
application_x_snappy_framed=655569,
|
application_x_snappy_framed=655570,
|
||||||
application_x_sprite=655570,
|
application_x_sprite=655571,
|
||||||
application_x_sqlite3=655571,
|
application_x_sqlite3=655572,
|
||||||
application_x_stargallery_thm=655572,
|
application_x_stargallery_thm=655573,
|
||||||
application_x_stuffit=655573,
|
application_x_stuffit=655574,
|
||||||
application_x_sv4cpio=655574,
|
application_x_sv4cpio=655575,
|
||||||
application_x_sv4crc=655575,
|
application_x_sv4crc=655576,
|
||||||
application_x_tar=655576 | 0x10000000,
|
application_x_tar=655577 | 0x10000000,
|
||||||
application_x_tbook=655577,
|
application_x_tbook=655578,
|
||||||
application_x_terminfo=655578,
|
application_x_terminfo=655579,
|
||||||
application_x_terminfo2=655579,
|
application_x_terminfo2=655580,
|
||||||
application_x_tex_tfm=655580,
|
application_x_tex_tfm=655581,
|
||||||
application_x_texinfo=655581,
|
application_x_texinfo=655582,
|
||||||
application_x_ustar=655582,
|
application_x_ustar=655583,
|
||||||
application_x_visio=655583,
|
application_x_visio=655584,
|
||||||
application_x_vnd_audioexplosion_mzz=655584,
|
application_x_vnd_audioexplosion_mzz=655585,
|
||||||
application_x_vnd_ls_xpix=655585,
|
application_x_vnd_ls_xpix=655586,
|
||||||
application_x_vrml=655586,
|
application_x_vrml=655587,
|
||||||
application_x_wais_source=655587,
|
application_x_wais_source=655588,
|
||||||
application_x_wine_extension_ini=655588,
|
application_x_wine_extension_ini=655589,
|
||||||
application_x_wintalk=655589,
|
application_x_wintalk=655590,
|
||||||
application_x_world=655590,
|
application_x_world=655591,
|
||||||
application_x_wri=655591,
|
application_x_wri=655592,
|
||||||
application_x_x509_ca_cert=655592,
|
application_x_x509_ca_cert=655593,
|
||||||
application_x_xz=655593 | 0x08000000,
|
application_x_xz=655594 | 0x08000000,
|
||||||
application_x_zip=655594,
|
application_x_zip=655595,
|
||||||
application_x_zstd=655595 | 0x08000000,
|
application_x_zstd=655596 | 0x08000000,
|
||||||
application_x_zstd_dictionary=655596,
|
application_x_zstd_dictionary=655597,
|
||||||
application_xml=655597,
|
application_xml=655598,
|
||||||
application_zip=655598 | 0x10000000,
|
application_zip=655599 | 0x10000000,
|
||||||
application_zlib=655599,
|
application_zlib=655600,
|
||||||
audio_basic=458992 | 0x80000000,
|
audio_basic=458993 | 0x80000000,
|
||||||
audio_it=458993,
|
audio_it=458994,
|
||||||
audio_make=458994,
|
audio_make=458995,
|
||||||
audio_mid=458995,
|
audio_mid=458996,
|
||||||
audio_midi=458996,
|
audio_midi=458997,
|
||||||
audio_mp4=458997,
|
audio_mp4=458998,
|
||||||
audio_mpeg=458998,
|
audio_mpeg=458999,
|
||||||
audio_ogg=458999,
|
audio_ogg=459000,
|
||||||
audio_s3m=459000,
|
audio_s3m=459001,
|
||||||
audio_tsp_audio=459001,
|
audio_tsp_audio=459002,
|
||||||
audio_tsplayer=459002,
|
audio_tsplayer=459003,
|
||||||
audio_vnd_qcelp=459003,
|
audio_vnd_qcelp=459004,
|
||||||
audio_voxware=459004,
|
audio_voxware=459005,
|
||||||
audio_x_aiff=459005,
|
audio_x_aiff=459006,
|
||||||
audio_x_flac=459006,
|
audio_x_flac=459007,
|
||||||
audio_x_gsm=459007,
|
audio_x_gsm=459008,
|
||||||
audio_x_hx_aac_adts=459008,
|
audio_x_hx_aac_adts=459009,
|
||||||
audio_x_jam=459009,
|
audio_x_jam=459010,
|
||||||
audio_x_liveaudio=459010,
|
audio_x_liveaudio=459011,
|
||||||
audio_x_m4a=459011,
|
audio_x_m4a=459012,
|
||||||
audio_x_midi=459012,
|
audio_x_midi=459013,
|
||||||
audio_x_mod=459013,
|
audio_x_mod=459014,
|
||||||
audio_x_mp4a_latm=459014,
|
audio_x_mp4a_latm=459015,
|
||||||
audio_x_mpeg_3=459015,
|
audio_x_mpeg_3=459016,
|
||||||
audio_x_mpequrl=459016,
|
audio_x_mpequrl=459017,
|
||||||
audio_x_nspaudio=459017,
|
audio_x_nspaudio=459018,
|
||||||
audio_x_pn_realaudio=459018,
|
audio_x_pn_realaudio=459019,
|
||||||
audio_x_psid=459019,
|
audio_x_psid=459020,
|
||||||
audio_x_realaudio=459020,
|
audio_x_realaudio=459021,
|
||||||
audio_x_s3m=459021,
|
audio_x_s3m=459022,
|
||||||
audio_x_twinvq=459022,
|
audio_x_twinvq=459023,
|
||||||
audio_x_twinvq_plugin=459023,
|
audio_x_twinvq_plugin=459024,
|
||||||
audio_x_voc=459024,
|
audio_x_voc=459025,
|
||||||
audio_x_wav=459025,
|
audio_x_wav=459026,
|
||||||
audio_x_xbox_executable=459026 | 0x80000000,
|
audio_x_xbox_executable=459027 | 0x80000000,
|
||||||
audio_x_xbox360_executable=459027 | 0x80000000,
|
audio_x_xbox360_executable=459028 | 0x80000000,
|
||||||
audio_xm=459028,
|
audio_xm=459029,
|
||||||
font_otf=327957 | 0x20000000,
|
font_otf=327958 | 0x20000000,
|
||||||
font_sfnt=327958 | 0x20000000,
|
font_sfnt=327959 | 0x20000000,
|
||||||
font_woff=327959 | 0x20000000,
|
font_woff=327960 | 0x20000000,
|
||||||
font_woff2=327960 | 0x20000000,
|
font_woff2=327961 | 0x20000000,
|
||||||
image_bmp=524569,
|
image_bmp=524570,
|
||||||
image_cmu_raster=524570,
|
image_cmu_raster=524571,
|
||||||
image_fif=524571,
|
image_fif=524572,
|
||||||
image_florian=524572,
|
image_florian=524573,
|
||||||
image_g3fax=524573,
|
image_g3fax=524574,
|
||||||
image_gif=524574,
|
image_gif=524575,
|
||||||
image_heic=524575,
|
image_heic=524576,
|
||||||
image_ief=524576,
|
image_ief=524577,
|
||||||
image_jpeg=524577,
|
image_jpeg=524578,
|
||||||
image_jutvision=524578,
|
image_jutvision=524579,
|
||||||
image_naplps=524579,
|
image_naplps=524580,
|
||||||
image_pict=524580,
|
image_pict=524581,
|
||||||
image_png=524581,
|
image_png=524582,
|
||||||
image_svg=524582 | 0x80000000,
|
image_svg=524583 | 0x80000000,
|
||||||
image_svg_xml=524583 | 0x80000000,
|
image_svg_xml=524584 | 0x80000000,
|
||||||
image_tiff=524584,
|
image_tiff=524585,
|
||||||
image_vnd_adobe_photoshop=524585 | 0x80000000,
|
image_vnd_adobe_photoshop=524586 | 0x80000000,
|
||||||
image_vnd_djvu=524586 | 0x80000000,
|
image_vnd_djvu=524587 | 0x80000000,
|
||||||
image_vnd_fpx=524587,
|
image_vnd_fpx=524588,
|
||||||
image_vnd_microsoft_icon=524588,
|
image_vnd_microsoft_icon=524589,
|
||||||
image_vnd_rn_realflash=524589,
|
image_vnd_rn_realflash=524590,
|
||||||
image_vnd_rn_realpix=524590,
|
image_vnd_rn_realpix=524591,
|
||||||
image_vnd_wap_wbmp=524591,
|
image_vnd_wap_wbmp=524592,
|
||||||
image_vnd_xiff=524592,
|
image_vnd_xiff=524593,
|
||||||
image_webp=524593,
|
image_webp=524594,
|
||||||
image_wmf=524594,
|
image_wmf=524595,
|
||||||
image_x_3ds=524595,
|
image_x_3ds=524596,
|
||||||
image_x_adobe_dng=524596 | 0x00800000,
|
image_x_adobe_dng=524597 | 0x00800000,
|
||||||
image_x_award_bioslogo=524597,
|
image_x_award_bioslogo=524598,
|
||||||
image_x_canon_cr2=524598 | 0x00800000,
|
image_x_canon_cr2=524599 | 0x00800000,
|
||||||
image_x_canon_crw=524599 | 0x00800000,
|
image_x_canon_crw=524600 | 0x00800000,
|
||||||
image_x_cmu_raster=524600,
|
image_x_cmu_raster=524601,
|
||||||
image_x_cur=524601,
|
image_x_cur=524602,
|
||||||
image_x_dcraw=524602 | 0x00800000,
|
image_x_dcraw=524603 | 0x00800000,
|
||||||
image_x_dwg=524603,
|
image_x_dwg=524604,
|
||||||
image_x_eps=524604,
|
image_x_eps=524605,
|
||||||
image_x_epson_erf=524605 | 0x00800000,
|
image_x_epson_erf=524606 | 0x00800000,
|
||||||
image_x_exr=524606,
|
image_x_exr=524607,
|
||||||
image_x_fuji_raf=524607 | 0x00800000,
|
image_x_fuji_raf=524608 | 0x00800000,
|
||||||
image_x_gem=524608,
|
image_x_gem=524609,
|
||||||
image_x_icns=524609,
|
image_x_icns=524610,
|
||||||
image_x_icon=524610 | 0x80000000,
|
image_x_icon=524611 | 0x80000000,
|
||||||
image_x_jg=524611,
|
image_x_jg=524612,
|
||||||
image_x_jps=524612,
|
image_x_jps=524613,
|
||||||
image_x_kodak_dcr=524613 | 0x00800000,
|
image_x_kodak_dcr=524614 | 0x00800000,
|
||||||
image_x_kodak_k25=524614 | 0x00800000,
|
image_x_kodak_k25=524615 | 0x00800000,
|
||||||
image_x_kodak_kdc=524615 | 0x00800000,
|
image_x_kodak_kdc=524616 | 0x00800000,
|
||||||
image_x_minolta_mrw=524616 | 0x00800000,
|
image_x_minolta_mrw=524617 | 0x00800000,
|
||||||
image_x_ms_bmp=524617,
|
image_x_ms_bmp=524618,
|
||||||
image_x_niff=524618,
|
image_x_niff=524619,
|
||||||
image_x_nikon_nef=524619 | 0x00800000,
|
image_x_nikon_nef=524620 | 0x00800000,
|
||||||
image_x_olympus_orf=524620 | 0x00800000,
|
image_x_olympus_orf=524621 | 0x00800000,
|
||||||
image_x_panasonic_raw=524621 | 0x00800000,
|
image_x_panasonic_raw=524622 | 0x00800000,
|
||||||
image_x_pcx=524622,
|
image_x_pcx=524623,
|
||||||
image_x_pentax_pef=524623 | 0x00800000,
|
image_x_pentax_pef=524624 | 0x00800000,
|
||||||
image_x_pict=524624,
|
image_x_pict=524625,
|
||||||
image_x_portable_bitmap=524625,
|
image_x_portable_bitmap=524626,
|
||||||
image_x_portable_graymap=524626,
|
image_x_portable_graymap=524627,
|
||||||
image_x_portable_pixmap=524627,
|
image_x_portable_pixmap=524628,
|
||||||
image_x_quicktime=524628,
|
image_x_quicktime=524629,
|
||||||
image_x_rgb=524629,
|
image_x_rgb=524630,
|
||||||
image_x_sigma_x3f=524630 | 0x00800000,
|
image_x_sigma_x3f=524631 | 0x00800000,
|
||||||
image_x_sony_arw=524631 | 0x00800000,
|
image_x_sony_arw=524632 | 0x00800000,
|
||||||
image_x_sony_sr2=524632 | 0x00800000,
|
image_x_sony_sr2=524633 | 0x00800000,
|
||||||
image_x_sony_srf=524633 | 0x00800000,
|
image_x_sony_srf=524634 | 0x00800000,
|
||||||
image_x_tga=524634,
|
image_x_tga=524635,
|
||||||
image_x_tiff=524635,
|
image_x_tiff=524636,
|
||||||
image_x_win_bitmap=524636,
|
image_x_win_bitmap=524637,
|
||||||
image_x_xcf=524637 | 0x80000000,
|
image_x_xcf=524638 | 0x80000000,
|
||||||
image_x_xpixmap=524638 | 0x80000000,
|
image_x_xpixmap=524639 | 0x80000000,
|
||||||
image_x_xwindowdump=524639,
|
image_x_xwindowdump=524640,
|
||||||
message_news=196960,
|
message_news=196961,
|
||||||
message_rfc822=196961,
|
message_rfc822=196962,
|
||||||
model_vnd_dwf=65890,
|
model_vnd_dwf=65891,
|
||||||
model_vnd_gdl=65891,
|
model_vnd_gdl=65892,
|
||||||
model_vnd_gs_gdl=65892,
|
model_vnd_gs_gdl=65893,
|
||||||
model_vrml=65893,
|
model_vrml=65894,
|
||||||
model_x_pov=65894,
|
model_x_pov=65895,
|
||||||
sist2_sidecar=2,
|
sist2_sidecar=2,
|
||||||
text_PGP=590183,
|
text_PGP=590184,
|
||||||
text_asp=590184,
|
text_asp=590185,
|
||||||
text_css=590185,
|
text_css=590186,
|
||||||
text_html=590186 | 0x01000000,
|
text_html=590187 | 0x01000000,
|
||||||
text_javascript=590187,
|
text_javascript=590188,
|
||||||
text_mcf=590188,
|
text_mcf=590189,
|
||||||
text_pascal=590189,
|
text_pascal=590190,
|
||||||
text_plain=590190,
|
text_plain=590191,
|
||||||
text_richtext=590191,
|
text_richtext=590192,
|
||||||
text_rtf=590192,
|
text_rtf=590193,
|
||||||
text_scriplet=590193,
|
text_scriplet=590194,
|
||||||
text_tab_separated_values=590194,
|
text_tab_separated_values=590195,
|
||||||
text_troff=590195,
|
text_troff=590196,
|
||||||
text_uri_list=590196,
|
text_uri_list=590197,
|
||||||
text_vnd_abc=590197,
|
text_vnd_abc=590198,
|
||||||
text_vnd_fmi_flexstor=590198,
|
text_vnd_fmi_flexstor=590199,
|
||||||
text_vnd_wap_wml=590199,
|
text_vnd_wap_wml=590200,
|
||||||
text_vnd_wap_wmlscript=590200,
|
text_vnd_wap_wmlscript=590201,
|
||||||
text_webviewhtml=590201,
|
text_webviewhtml=590202,
|
||||||
text_x_Algol68=590202,
|
text_x_Algol68=590203,
|
||||||
text_x_asm=590203,
|
text_x_asm=590204,
|
||||||
text_x_audiosoft_intra=590204,
|
text_x_audiosoft_intra=590205,
|
||||||
text_x_awk=590205,
|
text_x_awk=590206,
|
||||||
text_x_bcpl=590206,
|
text_x_bcpl=590207,
|
||||||
text_x_c=590207,
|
text_x_c=590208,
|
||||||
text_x_c__=590208,
|
text_x_c__=590209,
|
||||||
text_x_component=590209,
|
text_x_component=590210,
|
||||||
text_x_diff=590210,
|
text_x_diff=590211,
|
||||||
text_x_fortran=590211,
|
text_x_fortran=590212,
|
||||||
text_x_java=590212,
|
text_x_java=590213,
|
||||||
text_x_la_asf=590213,
|
text_x_la_asf=590214,
|
||||||
text_x_lisp=590214,
|
text_x_lisp=590215,
|
||||||
text_x_m=590215,
|
text_x_m=590216,
|
||||||
text_x_m4=590216,
|
text_x_m4=590217,
|
||||||
text_x_makefile=590217,
|
text_x_makefile=590218,
|
||||||
text_x_ms_regedit=590218,
|
text_x_ms_regedit=590219,
|
||||||
text_x_msdos_batch=590219,
|
text_x_msdos_batch=590220,
|
||||||
text_x_objective_c=590220,
|
text_x_objective_c=590221,
|
||||||
text_x_pascal=590221,
|
text_x_pascal=590222,
|
||||||
text_x_perl=590222,
|
text_x_perl=590223,
|
||||||
text_x_php=590223,
|
text_x_php=590224,
|
||||||
text_x_po=590224,
|
text_x_po=590225,
|
||||||
text_x_python=590225,
|
text_x_python=590226,
|
||||||
text_x_ruby=590226,
|
text_x_ruby=590227,
|
||||||
text_x_sass=590227,
|
text_x_sass=590228,
|
||||||
text_x_scss=590228,
|
text_x_scss=590229,
|
||||||
text_x_server_parsed_html=590229,
|
text_x_server_parsed_html=590230,
|
||||||
text_x_setext=590230,
|
text_x_setext=590231,
|
||||||
text_x_sgml=590231 | 0x01000000,
|
text_x_sgml=590232 | 0x01000000,
|
||||||
text_x_shellscript=590232,
|
text_x_shellscript=590233,
|
||||||
text_x_speech=590233,
|
text_x_speech=590234,
|
||||||
text_x_tcl=590234,
|
text_x_tcl=590235,
|
||||||
text_x_tex=590235,
|
text_x_tex=590236,
|
||||||
text_x_uil=590236,
|
text_x_uil=590237,
|
||||||
text_x_uuencode=590237,
|
text_x_uuencode=590238,
|
||||||
text_x_vcalendar=590238,
|
text_x_vcalendar=590239,
|
||||||
text_x_vcard=590239,
|
text_x_vcard=590240,
|
||||||
text_xml=590240 | 0x01000000,
|
text_xml=590241 | 0x01000000,
|
||||||
video_MP2T=393633,
|
video_MP2T=393634,
|
||||||
video_animaflex=393634,
|
video_animaflex=393635,
|
||||||
video_avi=393635,
|
video_avi=393636,
|
||||||
video_avs_video=393636,
|
video_avs_video=393637,
|
||||||
video_mp4=393637,
|
video_mp4=393638,
|
||||||
video_mpeg=393638,
|
video_mpeg=393639,
|
||||||
video_quicktime=393639,
|
video_quicktime=393640,
|
||||||
video_vdo=393640,
|
video_vdo=393641,
|
||||||
video_vivo=393641,
|
video_vivo=393642,
|
||||||
video_vnd_rn_realvideo=393642,
|
video_vnd_rn_realvideo=393643,
|
||||||
video_vosaic=393643,
|
video_vosaic=393644,
|
||||||
video_webm=393644,
|
video_webm=393645,
|
||||||
video_x_amt_demorun=393645,
|
video_x_amt_demorun=393646,
|
||||||
video_x_amt_showrun=393646,
|
video_x_amt_showrun=393647,
|
||||||
video_x_atomic3d_feature=393647,
|
video_x_atomic3d_feature=393648,
|
||||||
video_x_dl=393648,
|
video_x_dl=393649,
|
||||||
video_x_dv=393649,
|
video_x_dv=393650,
|
||||||
video_x_fli=393650,
|
video_x_fli=393651,
|
||||||
video_x_flv=393651,
|
video_x_flv=393652,
|
||||||
video_x_isvideo=393652,
|
video_x_isvideo=393653,
|
||||||
video_x_jng=393653 | 0x80000000,
|
video_x_jng=393654 | 0x80000000,
|
||||||
video_x_m4v=393654,
|
video_x_m4v=393655,
|
||||||
video_x_matroska=393655,
|
video_x_matroska=393656,
|
||||||
video_x_mng=393656,
|
video_x_mng=393657,
|
||||||
video_x_motion_jpeg=393657,
|
video_x_motion_jpeg=393658,
|
||||||
video_x_ms_asf=393658,
|
video_x_ms_asf=393659,
|
||||||
video_x_msvideo=393659,
|
video_x_msvideo=393660,
|
||||||
video_x_qtc=393660,
|
video_x_qtc=393661,
|
||||||
video_x_sgi_movie=393661,
|
video_x_sgi_movie=393662,
|
||||||
x_epoc_x_sisx_app=721342,
|
x_epoc_x_sisx_app=721343,
|
||||||
};
|
};
|
||||||
char *mime_get_mime_text(unsigned int mime_id) {switch (mime_id) {
|
char *mime_get_mime_text(unsigned int mime_id) {switch (mime_id) {
|
||||||
case application_arj: return "application/arj";
|
case application_arj: return "application/arj";
|
||||||
@@ -480,6 +481,7 @@ case application_java_archive: return "application/java-archive";
|
|||||||
case application_java: return "application/java";
|
case application_java: return "application/java";
|
||||||
case application_javascript: return "application/javascript";
|
case application_javascript: return "application/javascript";
|
||||||
case application_json: return "application/json";
|
case application_json: return "application/json";
|
||||||
|
case application_ndjson: return "application/ndjson";
|
||||||
case application_marc: return "application/marc";
|
case application_marc: return "application/marc";
|
||||||
case application_mbedlet: return "application/mbedlet";
|
case application_mbedlet: return "application/mbedlet";
|
||||||
case application_mime: return "application/mime";
|
case application_mime: return "application/mime";
|
||||||
@@ -930,6 +932,8 @@ g_hash_table_insert(ext_table, "inf", (gpointer)application_inf);
|
|||||||
g_hash_table_insert(ext_table, "jar", (gpointer)application_java_archive);
|
g_hash_table_insert(ext_table, "jar", (gpointer)application_java_archive);
|
||||||
g_hash_table_insert(ext_table, "class", (gpointer)application_java);
|
g_hash_table_insert(ext_table, "class", (gpointer)application_java);
|
||||||
g_hash_table_insert(ext_table, "json", (gpointer)application_json);
|
g_hash_table_insert(ext_table, "json", (gpointer)application_json);
|
||||||
|
g_hash_table_insert(ext_table, "jsonl", (gpointer)application_ndjson);
|
||||||
|
g_hash_table_insert(ext_table, "ndjson", (gpointer)application_ndjson);
|
||||||
g_hash_table_insert(ext_table, "mrc", (gpointer)application_marc);
|
g_hash_table_insert(ext_table, "mrc", (gpointer)application_marc);
|
||||||
g_hash_table_insert(ext_table, "mbd", (gpointer)application_mbedlet);
|
g_hash_table_insert(ext_table, "mbd", (gpointer)application_mbedlet);
|
||||||
g_hash_table_insert(ext_table, "aps", (gpointer)application_mime);
|
g_hash_table_insert(ext_table, "aps", (gpointer)application_mime);
|
||||||
@@ -1474,6 +1478,7 @@ g_hash_table_insert(mime_table, "application/java-archive", (gpointer)applicatio
|
|||||||
g_hash_table_insert(mime_table, "application/java", (gpointer)application_java);
|
g_hash_table_insert(mime_table, "application/java", (gpointer)application_java);
|
||||||
g_hash_table_insert(mime_table, "application/javascript", (gpointer)application_javascript);
|
g_hash_table_insert(mime_table, "application/javascript", (gpointer)application_javascript);
|
||||||
g_hash_table_insert(mime_table, "application/json", (gpointer)application_json);
|
g_hash_table_insert(mime_table, "application/json", (gpointer)application_json);
|
||||||
|
g_hash_table_insert(mime_table, "application/ndjson", (gpointer)application_ndjson);
|
||||||
g_hash_table_insert(mime_table, "application/marc", (gpointer)application_marc);
|
g_hash_table_insert(mime_table, "application/marc", (gpointer)application_marc);
|
||||||
g_hash_table_insert(mime_table, "application/mbedlet", (gpointer)application_mbedlet);
|
g_hash_table_insert(mime_table, "application/mbedlet", (gpointer)application_mbedlet);
|
||||||
g_hash_table_insert(mime_table, "application/mime", (gpointer)application_mime);
|
g_hash_table_insert(mime_table, "application/mime", (gpointer)application_mime);
|
||||||
|
|||||||
@@ -10,25 +10,34 @@
|
|||||||
|
|
||||||
|
|
||||||
#define MIN_VIDEO_SIZE (1024 * 64)
|
#define MIN_VIDEO_SIZE (1024 * 64)
|
||||||
#define MIN_IMAGE_SIZE (1024 * 2)
|
#define MIN_IMAGE_SIZE (512)
|
||||||
|
|
||||||
int fs_read(struct vfile *f, void *buf, size_t size) {
|
int fs_read(struct vfile *f, void *buf, size_t size) {
|
||||||
|
|
||||||
if (f->fd == -1) {
|
if (f->fd == -1) {
|
||||||
|
SHA1_Init(&f->sha1_ctx);
|
||||||
|
|
||||||
f->fd = open(f->filepath, O_RDONLY);
|
f->fd = open(f->filepath, O_RDONLY);
|
||||||
if (f->fd == -1) {
|
if (f->fd == -1) {
|
||||||
LOG_ERRORF(f->filepath, "open(): [%d] %s", errno, strerror(errno))
|
|
||||||
return -1;
|
return -1;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
return read(f->fd, buf, size);
|
int ret = (int) read(f->fd, buf, size);
|
||||||
|
|
||||||
|
if (ret != 0 && f->calculate_checksum) {
|
||||||
|
f->has_checksum = TRUE;
|
||||||
|
safe_sha1_update(&f->sha1_ctx, (unsigned char *) buf, ret);
|
||||||
|
}
|
||||||
|
|
||||||
|
return ret;
|
||||||
}
|
}
|
||||||
|
|
||||||
#define CLOSE_FILE(f) if ((f).close != NULL) {(f).close(&(f));};
|
#define CLOSE_FILE(f) if ((f).close != NULL) {(f).close(&(f));};
|
||||||
|
|
||||||
void fs_close(struct vfile *f) {
|
void fs_close(struct vfile *f) {
|
||||||
if (f->fd != -1) {
|
if (f->fd != -1) {
|
||||||
|
SHA1_Final(f->sha1_digest, &f->sha1_ctx);
|
||||||
close(f->fd);
|
close(f->fd);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@@ -66,7 +75,7 @@ void parse(void *arg) {
|
|||||||
doc->meta_tail = NULL;
|
doc->meta_tail = NULL;
|
||||||
doc->mime = 0;
|
doc->mime = 0;
|
||||||
doc->size = job->vfile.info.st_size;
|
doc->size = job->vfile.info.st_size;
|
||||||
doc->mtime = job->vfile.info.st_mtim.tv_sec;
|
doc->mtime = (int) job->vfile.info.st_mtim.tv_sec;
|
||||||
|
|
||||||
int inc_ts = incremental_get(ScanCtx.original_table, doc->path_md5);
|
int inc_ts = incremental_get(ScanCtx.original_table, doc->path_md5);
|
||||||
if (inc_ts != 0 && inc_ts == job->vfile.info.st_mtim.tv_sec) {
|
if (inc_ts != 0 && inc_ts == job->vfile.info.st_mtim.tv_sec) {
|
||||||
@@ -93,18 +102,17 @@ void parse(void *arg) {
|
|||||||
doc->mime = mime_get_mime_by_ext(ScanCtx.ext_table, job->filepath + job->ext);
|
doc->mime = mime_get_mime_by_ext(ScanCtx.ext_table, job->filepath + job->ext);
|
||||||
}
|
}
|
||||||
|
|
||||||
int bytes_read = 0;
|
|
||||||
|
|
||||||
if (doc->mime == 0 && !ScanCtx.fast) {
|
if (doc->mime == 0 && !ScanCtx.fast) {
|
||||||
|
|
||||||
// Get mime type with libmagic
|
// Get mime type with libmagic
|
||||||
if (!job->vfile.is_fs_file) {
|
if (job->vfile.read_rewindable == NULL) {
|
||||||
LOG_WARNING(job->filepath,
|
LOG_WARNING(job->filepath,
|
||||||
"Guessing mime type with libmagic inside archive files is not currently supported");
|
"File does not support rewindable reads, cannot guess Media type");
|
||||||
goto abort;
|
goto abort;
|
||||||
}
|
}
|
||||||
|
|
||||||
bytes_read = job->vfile.read(&job->vfile, buf, MAGIC_BUF_SIZE);
|
int bytes_read = job->vfile.read_rewindable(&job->vfile, buf, MAGIC_BUF_SIZE);
|
||||||
if (bytes_read < 0) {
|
if (bytes_read < 0) {
|
||||||
|
|
||||||
if (job->vfile.is_fs_file) {
|
if (job->vfile.is_fs_file) {
|
||||||
@@ -135,7 +143,9 @@ void parse(void *arg) {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
job->vfile.reset(&job->vfile);
|
if (job->vfile.reset != NULL) {
|
||||||
|
job->vfile.reset(&job->vfile);
|
||||||
|
}
|
||||||
|
|
||||||
magic_close(magic);
|
magic_close(magic);
|
||||||
}
|
}
|
||||||
@@ -149,7 +159,7 @@ void parse(void *arg) {
|
|||||||
} else if ((mmime == MimeVideo && doc->size >= MIN_VIDEO_SIZE) ||
|
} else if ((mmime == MimeVideo && doc->size >= MIN_VIDEO_SIZE) ||
|
||||||
(mmime == MimeImage && doc->size >= MIN_IMAGE_SIZE) || mmime == MimeAudio) {
|
(mmime == MimeImage && doc->size >= MIN_IMAGE_SIZE) || mmime == MimeAudio) {
|
||||||
|
|
||||||
parse_media(&ScanCtx.media_ctx, &job->vfile, doc);
|
parse_media(&ScanCtx.media_ctx, &job->vfile, doc, mime_get_mime_text(doc->mime));
|
||||||
|
|
||||||
} else if (IS_PDF(doc->mime)) {
|
} else if (IS_PDF(doc->mime)) {
|
||||||
parse_ebook(&ScanCtx.ebook_ctx, &job->vfile, mime_get_mime_text(doc->mime), doc);
|
parse_ebook(&ScanCtx.ebook_ctx, &job->vfile, mime_get_mime_text(doc->mime), doc);
|
||||||
@@ -169,7 +179,7 @@ void parse(void *arg) {
|
|||||||
IS_ARC(doc->mime) ||
|
IS_ARC(doc->mime) ||
|
||||||
(IS_ARC_FILTER(doc->mime) && should_parse_filtered_file(doc->filepath, doc->ext))
|
(IS_ARC_FILTER(doc->mime) && should_parse_filtered_file(doc->filepath, doc->ext))
|
||||||
)) {
|
)) {
|
||||||
parse_archive(&ScanCtx.arc_ctx, &job->vfile, doc);
|
parse_archive(&ScanCtx.arc_ctx, &job->vfile, doc, ScanCtx.exclude, ScanCtx.exclude_extra);
|
||||||
} else if ((ScanCtx.ooxml_ctx.content_size > 0 || ScanCtx.media_ctx.tn_size > 0) && IS_DOC(doc->mime)) {
|
} else if ((ScanCtx.ooxml_ctx.content_size > 0 || ScanCtx.media_ctx.tn_size > 0) && IS_DOC(doc->mime)) {
|
||||||
parse_ooxml(&ScanCtx.ooxml_ctx, &job->vfile, doc);
|
parse_ooxml(&ScanCtx.ooxml_ctx, &job->vfile, doc);
|
||||||
} else if (is_cbr(&ScanCtx.comic_ctx, doc->mime) || is_cbz(&ScanCtx.comic_ctx, doc->mime)) {
|
} else if (is_cbr(&ScanCtx.comic_ctx, doc->mime) || is_cbz(&ScanCtx.comic_ctx, doc->mime)) {
|
||||||
@@ -179,11 +189,15 @@ void parse(void *arg) {
|
|||||||
} else if (doc->mime == MIME_SIST2_SIDECAR) {
|
} else if (doc->mime == MIME_SIST2_SIDECAR) {
|
||||||
parse_sidecar(&job->vfile, doc);
|
parse_sidecar(&job->vfile, doc);
|
||||||
CLOSE_FILE(job->vfile)
|
CLOSE_FILE(job->vfile)
|
||||||
|
free(doc->filepath);
|
||||||
|
free(doc);
|
||||||
return;
|
return;
|
||||||
} else if (is_msdoc(&ScanCtx.msdoc_ctx, doc->mime)) {
|
} else if (is_msdoc(&ScanCtx.msdoc_ctx, doc->mime)) {
|
||||||
parse_msdoc(&ScanCtx.msdoc_ctx, &job->vfile, doc);
|
parse_msdoc(&ScanCtx.msdoc_ctx, &job->vfile, doc);
|
||||||
} else if (is_wpd(&ScanCtx.wpd_ctx, doc->mime)) {
|
} else if (is_json(&ScanCtx.json_ctx, doc->mime)) {
|
||||||
parse_wpd(&ScanCtx.wpd_ctx, &job->vfile, doc);
|
parse_json(&ScanCtx.json_ctx, &job->vfile, doc);
|
||||||
|
} else if (is_ndjson(&ScanCtx.json_ctx, doc->mime)) {
|
||||||
|
parse_ndjson(&ScanCtx.json_ctx, &job->vfile, doc);
|
||||||
}
|
}
|
||||||
|
|
||||||
abort:
|
abort:
|
||||||
@@ -200,9 +214,15 @@ void parse(void *arg) {
|
|||||||
doc->has_parent = FALSE;
|
doc->has_parent = FALSE;
|
||||||
}
|
}
|
||||||
|
|
||||||
write_document(doc);
|
|
||||||
|
|
||||||
CLOSE_FILE(job->vfile)
|
CLOSE_FILE(job->vfile)
|
||||||
|
|
||||||
|
if (job->vfile.has_checksum) {
|
||||||
|
char sha1_digest_str[SHA1_STR_LENGTH];
|
||||||
|
buf2hex((unsigned char *) job->vfile.sha1_digest, SHA1_DIGEST_LENGTH, (char *) sha1_digest_str);
|
||||||
|
APPEND_STR_META(doc, MetaChecksum, (const char *) sha1_digest_str);
|
||||||
|
}
|
||||||
|
|
||||||
|
write_document(doc);
|
||||||
}
|
}
|
||||||
|
|
||||||
void cleanup_parse() {
|
void cleanup_parse() {
|
||||||
|
|||||||
@@ -3,7 +3,7 @@
|
|||||||
|
|
||||||
#include "../sist.h"
|
#include "../sist.h"
|
||||||
|
|
||||||
#define MAGIC_BUF_SIZE 4096 * 6
|
#define MAGIC_BUF_SIZE (4096 * 6)
|
||||||
|
|
||||||
int fs_read(struct vfile *f, void *buf, size_t size);
|
int fs_read(struct vfile *f, void *buf, size_t size);
|
||||||
void fs_close(struct vfile *f);
|
void fs_close(struct vfile *f);
|
||||||
|
|||||||
@@ -27,7 +27,10 @@ void parse_sidecar(vfile_t *vfile, document_t *doc) {
|
|||||||
MD5((unsigned char *) vfile->filepath + ScanCtx.index.desc.root_len, doc->ext - 1 - ScanCtx.index.desc.root_len,
|
MD5((unsigned char *) vfile->filepath + ScanCtx.index.desc.root_len, doc->ext - 1 - ScanCtx.index.desc.root_len,
|
||||||
path_md5);
|
path_md5);
|
||||||
|
|
||||||
store_write(ScanCtx.index.meta_store, (char *) path_md5, sizeof(path_md5), json_str, strlen(json_str) + 1);
|
char path_md5_str[MD5_STR_LENGTH];
|
||||||
|
buf2hex(path_md5, MD5_DIGEST_LENGTH, path_md5_str);
|
||||||
|
|
||||||
|
store_write(ScanCtx.index.meta_store, path_md5_str, MD5_STR_LENGTH, json_str, strlen(json_str) + 1);
|
||||||
|
|
||||||
cJSON_Delete(json);
|
cJSON_Delete(json);
|
||||||
free(json_str);
|
free(json_str);
|
||||||
|
|||||||
@@ -1,6 +1,8 @@
|
|||||||
#ifndef SIST_H
|
#ifndef SIST_H
|
||||||
#define SIST_H
|
#define SIST_H
|
||||||
|
|
||||||
|
#define _GNU_SOURCE
|
||||||
|
|
||||||
#ifndef FALSE
|
#ifndef FALSE
|
||||||
#define FALSE (0)
|
#define FALSE (0)
|
||||||
#define BOOL int
|
#define BOOL int
|
||||||
@@ -26,6 +28,8 @@
|
|||||||
#define UNUSED(x) __attribute__((__unused__)) x
|
#define UNUSED(x) __attribute__((__unused__)) x
|
||||||
|
|
||||||
#define MD5_STR_LENGTH 33
|
#define MD5_STR_LENGTH 33
|
||||||
|
#define SHA1_STR_LENGTH 41
|
||||||
|
#define SHA1_DIGEST_LENGTH 20
|
||||||
|
|
||||||
#include "util.h"
|
#include "util.h"
|
||||||
#include "log.h"
|
#include "log.h"
|
||||||
@@ -49,7 +53,7 @@
|
|||||||
#include <ctype.h>
|
#include <ctype.h>
|
||||||
#include "git_hash.h"
|
#include "git_hash.h"
|
||||||
|
|
||||||
#define VERSION "2.11.2"
|
#define VERSION "2.11.4"
|
||||||
static const char *const Version = VERSION;
|
static const char *const Version = VERSION;
|
||||||
|
|
||||||
#ifndef SIST_PLATFORM
|
#ifndef SIST_PLATFORM
|
||||||
|
|||||||
@@ -177,7 +177,7 @@ static void *tpool_worker(void *arg) {
|
|||||||
}
|
}
|
||||||
|
|
||||||
void tpool_wait(tpool_t *pool) {
|
void tpool_wait(tpool_t *pool) {
|
||||||
LOG_INFO("tpool.c", "Waiting for worker threads to finish")
|
LOG_DEBUG("tpool.c", "Waiting for worker threads to finish")
|
||||||
pthread_mutex_lock(&(pool->work_mutex));
|
pthread_mutex_lock(&(pool->work_mutex));
|
||||||
while (TRUE) {
|
while (TRUE) {
|
||||||
if (pool->done_cnt < pool->work_cnt) {
|
if (pool->done_cnt < pool->work_cnt) {
|
||||||
|
|||||||
@@ -88,7 +88,7 @@ void progress_bar_print(double percentage, size_t tn_size, size_t index_size) {
|
|||||||
|
|
||||||
static int last_val = -1;
|
static int last_val = -1;
|
||||||
int val = (int) (percentage * 100);
|
int val = (int) (percentage * 100);
|
||||||
if (last_val == val || val > 100 || index_size < 1024) {
|
if (last_val == val || val > 100) {
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
last_val = val;
|
last_val = val;
|
||||||
|
|||||||
@@ -252,12 +252,32 @@ void serve_file_from_disk(cJSON *json, index_t *idx, struct mg_connection *nc, s
|
|||||||
mg_http_serve_file(nc, hm, full_path, mime, disposition);
|
mg_http_serve_file(nc, hm, full_path, mime, disposition);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
void cache_es_version() {
|
||||||
|
static int is_cached = FALSE;
|
||||||
|
|
||||||
|
if (is_cached == TRUE) {
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
es_version_t *es_version = elastic_get_version(WebCtx.es_url);
|
||||||
|
if (es_version != NULL) {
|
||||||
|
WebCtx.es_version = es_version;
|
||||||
|
is_cached = TRUE;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
void index_info(struct mg_connection *nc) {
|
void index_info(struct mg_connection *nc) {
|
||||||
|
|
||||||
|
cache_es_version();
|
||||||
|
|
||||||
cJSON *json = cJSON_CreateObject();
|
cJSON *json = cJSON_CreateObject();
|
||||||
cJSON *arr = cJSON_AddArrayToObject(json, "indices");
|
cJSON *arr = cJSON_AddArrayToObject(json, "indices");
|
||||||
|
|
||||||
cJSON_AddStringToObject(json, "esIndex", WebCtx.es_index);
|
cJSON_AddStringToObject(json, "esIndex", WebCtx.es_index);
|
||||||
cJSON_AddStringToObject(json, "version", Version);
|
cJSON_AddStringToObject(json, "version", Version);
|
||||||
|
cJSON_AddStringToObject(json, "esVersion", format_es_version(WebCtx.es_version));
|
||||||
|
cJSON_AddBoolToObject(json, "esVersionSupported", IS_SUPPORTED_ES_VERSION(WebCtx.es_version));
|
||||||
|
cJSON_AddBoolToObject(json, "esVersionLegacy", USE_LEGACY_ES_SETTINGS(WebCtx.es_version));
|
||||||
cJSON_AddStringToObject(json, "platform", QUOTE(SIST_PLATFORM));
|
cJSON_AddStringToObject(json, "platform", QUOTE(SIST_PLATFORM));
|
||||||
cJSON_AddStringToObject(json, "sist2Hash", Sist2CommitHash);
|
cJSON_AddStringToObject(json, "sist2Hash", Sist2CommitHash);
|
||||||
cJSON_AddStringToObject(json, "libscanHash", LibScanCommitHash);
|
cJSON_AddStringToObject(json, "libscanHash", LibScanCommitHash);
|
||||||
|
|||||||
10
src/web/static_generated.c
vendored
10
src/web/static_generated.c
vendored
File diff suppressed because one or more lines are too long
2
third-party/argparse
vendored
2
third-party/argparse
vendored
Submodule third-party/argparse updated: ffd9c23427...225141eb3d
1
third-party/libscan
vendored
1
third-party/libscan
vendored
Submodule third-party/libscan deleted from fe53e1a219
12
third-party/libscan/.gitignore
vendored
Normal file
12
third-party/libscan/.gitignore
vendored
Normal file
@@ -0,0 +1,12 @@
|
|||||||
|
.idea/
|
||||||
|
cmake_install.cmake
|
||||||
|
Makefile
|
||||||
|
libscan.a
|
||||||
|
libscan.so
|
||||||
|
*.cbp
|
||||||
|
CMakeFiles
|
||||||
|
CMakeCache.txt
|
||||||
|
scan_test
|
||||||
|
third-party/ext_*
|
||||||
|
libscan-test-files
|
||||||
|
scan_*_test
|
||||||
233
third-party/libscan/CMakeLists.txt
vendored
Normal file
233
third-party/libscan/CMakeLists.txt
vendored
Normal file
@@ -0,0 +1,233 @@
|
|||||||
|
cmake_minimum_required(VERSION 3.15)
|
||||||
|
|
||||||
|
project(scan)
|
||||||
|
set(CMAKE_C_STANDARD 11)
|
||||||
|
|
||||||
|
option(BUILD_TESTS "Build tests" on)
|
||||||
|
|
||||||
|
add_subdirectory(third-party/antiword)
|
||||||
|
add_compile_definitions(
|
||||||
|
antiword
|
||||||
|
NDEBUG
|
||||||
|
)
|
||||||
|
|
||||||
|
add_library(
|
||||||
|
scan
|
||||||
|
libscan/util.c libscan/util.h
|
||||||
|
libscan/scan.h
|
||||||
|
libscan/macros.h
|
||||||
|
|
||||||
|
libscan/text/text.c libscan/text/text.h
|
||||||
|
libscan/arc/arc.c libscan/arc/arc.h
|
||||||
|
libscan/ebook/ebook.c libscan/ebook/ebook.h
|
||||||
|
libscan/comic/comic.c libscan/comic/comic.h
|
||||||
|
libscan/ooxml/ooxml.c libscan/ooxml/ooxml.h
|
||||||
|
libscan/media/media.c libscan/media/media.h
|
||||||
|
libscan/font/font.c libscan/font/font.h
|
||||||
|
libscan/msdoc/msdoc.c libscan/msdoc/msdoc.h
|
||||||
|
libscan/json/json.c libscan/json/json.h
|
||||||
|
libscan/wpd/wpd.c libscan/wpd/wpd.h libscan/wpd/libwpd_c_api.h libscan/wpd/libwpd_c_api.cpp
|
||||||
|
|
||||||
|
third-party/utf8.h
|
||||||
|
libscan/mobi/scan_mobi.c libscan/mobi/scan_mobi.h libscan/raw/raw.c libscan/raw/raw.h)
|
||||||
|
set_target_properties(scan PROPERTIES LINKER_LANGUAGE C)
|
||||||
|
|
||||||
|
set(CMAKE_FIND_LIBRARY_SUFFIXES .a .lib .so)
|
||||||
|
|
||||||
|
find_package(cJSON CONFIG REQUIRED)
|
||||||
|
find_package(LibArchive REQUIRED)
|
||||||
|
find_package(BZip2 REQUIRED)
|
||||||
|
find_package(lz4 REQUIRED)
|
||||||
|
|
||||||
|
find_package(Threads REQUIRED)
|
||||||
|
find_package(Tesseract CONFIG REQUIRED)
|
||||||
|
find_package(OpenJPEG CONFIG REQUIRED)
|
||||||
|
find_package(JPEG REQUIRED)
|
||||||
|
find_package(LibXml2 REQUIRED)
|
||||||
|
find_package(LibLZMA REQUIRED)
|
||||||
|
find_package(ZLIB REQUIRED)
|
||||||
|
find_package(unofficial-pcre CONFIG REQUIRED)
|
||||||
|
|
||||||
|
|
||||||
|
find_library(JBIG2DEC_LIB NAMES jbig2decd jbig2dec)
|
||||||
|
find_library(HARFBUZZ_LIB NAMES harfbuzz harfbuzzd)
|
||||||
|
find_library(FREETYPE_LIB NAMES freetype freetyped)
|
||||||
|
find_package(unofficial-brotli CONFIG REQUIRED)
|
||||||
|
find_library(LZO2_LIB NAMES lzo2)
|
||||||
|
|
||||||
|
find_library(RAW_LIB NAMES libraw.a)
|
||||||
|
find_library(MUPDF_LIB NAMES liblibmupdf.a)
|
||||||
|
find_library(CMS_LIB NAMES lcms2)
|
||||||
|
find_library(JAS_LIB NAMES jasper)
|
||||||
|
find_library(GUMBO_LIB NAMES gumbo)
|
||||||
|
find_library(GOMP_LIB NAMES libgomp.a gomp PATHS /usr/lib/gcc/x86_64-linux-gnu/5/ /usr/lib/gcc/x86_64-linux-gnu/9/ /usr/lib/gcc/x86_64-linux-gnu/10/ /usr/lib/gcc/aarch64-linux-gnu/7/ /usr/lib/gcc/aarch64-linux-gnu/9/ /usr/lib/gcc/x86_64-linux-gnu/7/)
|
||||||
|
|
||||||
|
|
||||||
|
target_compile_options(
|
||||||
|
scan
|
||||||
|
PRIVATE
|
||||||
|
-g
|
||||||
|
)
|
||||||
|
|
||||||
|
include(ExternalProject)
|
||||||
|
find_program(MAKE_EXE NAMES gmake nmake make)
|
||||||
|
ExternalProject_Add(
|
||||||
|
libmobi
|
||||||
|
GIT_REPOSITORY https://github.com/simon987/libmobi.git
|
||||||
|
GIT_TAG "public"
|
||||||
|
|
||||||
|
UPDATE_COMMAND ""
|
||||||
|
PATCH_COMMAND ""
|
||||||
|
TEST_COMMAND ""
|
||||||
|
CONFIGURE_COMMAND ./autogen.sh && ./configure
|
||||||
|
INSTALL_COMMAND ""
|
||||||
|
|
||||||
|
PREFIX "third-party/ext_libmobi"
|
||||||
|
SOURCE_DIR "third-party/ext_libmobi/src/libmobi"
|
||||||
|
BINARY_DIR "third-party/ext_libmobi/src/libmobi"
|
||||||
|
|
||||||
|
BUILD_COMMAND ${MAKE_EXE} -j 8 --silent
|
||||||
|
)
|
||||||
|
|
||||||
|
SET(MOBI_LIB_DIR ${CMAKE_CURRENT_BINARY_DIR}/third-party/ext_libmobi/src/libmobi/src/.libs/)
|
||||||
|
SET(MOBI_INCLUDE_DIR ${CMAKE_CURRENT_BINARY_DIR}/third-party/ext_libmobi/src/libmobi/src/)
|
||||||
|
|
||||||
|
if (SIST_DEBUG)
|
||||||
|
SET(FFMPEG_DEBUG "--enable-debug=3" "--disable-optimizations")
|
||||||
|
else()
|
||||||
|
SET(FFMPEG_DEBUG "")
|
||||||
|
endif()
|
||||||
|
|
||||||
|
ExternalProject_Add(
|
||||||
|
ffmpeg
|
||||||
|
GIT_REPOSITORY https://git.ffmpeg.org/ffmpeg.git
|
||||||
|
GIT_TAG "n4.4"
|
||||||
|
|
||||||
|
UPDATE_COMMAND ""
|
||||||
|
PATCH_COMMAND ""
|
||||||
|
TEST_COMMAND ""
|
||||||
|
CONFIGURE_COMMAND ./configure --disable-shared --enable-static --disable-ffmpeg --disable-ffplay
|
||||||
|
--disable-ffprobe --disable-doc --disable-manpages --disable-postproc --disable-avfilter --disable-alsa
|
||||||
|
--disable-lzma --disable-xlib --disable-vdpau --disable-vaapi --disable-sdl2
|
||||||
|
--disable-network ${FFMPEG_DEBUG}
|
||||||
|
INSTALL_COMMAND ""
|
||||||
|
|
||||||
|
PREFIX "third-party/ext_ffmpeg"
|
||||||
|
SOURCE_DIR "third-party/ext_ffmpeg/src/ffmpeg"
|
||||||
|
BINARY_DIR "third-party/ext_ffmpeg/src/ffmpeg"
|
||||||
|
|
||||||
|
BUILD_COMMAND ${MAKE_EXE} -j33 --silent
|
||||||
|
)
|
||||||
|
|
||||||
|
SET(FFMPEG_LIB_DIR ${CMAKE_CURRENT_BINARY_DIR}/third-party/ext_ffmpeg/src/ffmpeg)
|
||||||
|
SET(FFMPEG_INCLUDE_DIR ${CMAKE_CURRENT_BINARY_DIR}/third-party/ext_ffmpeg/src/ffmpeg)
|
||||||
|
|
||||||
|
ExternalProject_Add(
|
||||||
|
libwpd
|
||||||
|
URL http://prdownloads.sourceforge.net/libwpd/libwpd-0.9.9.tar.gz
|
||||||
|
|
||||||
|
UPDATE_COMMAND ""
|
||||||
|
PATCH_COMMAND ""
|
||||||
|
TEST_COMMAND ""
|
||||||
|
CONFIGURE_COMMAND ./configure --without-docs --enable-static --disable-shared
|
||||||
|
INSTALL_COMMAND ""
|
||||||
|
|
||||||
|
PREFIX "third-party/ext_libwpd"
|
||||||
|
SOURCE_DIR "third-party/ext_libwpd/src/libwpd"
|
||||||
|
BINARY_DIR "third-party/ext_libwpd/src/libwpd"
|
||||||
|
|
||||||
|
BUILD_COMMAND ${MAKE_EXE} -j33
|
||||||
|
)
|
||||||
|
SET(WPD_LIB_DIR ${CMAKE_CURRENT_BINARY_DIR}/third-party/ext_libwpd/src/libwpd/src/lib/.libs/)
|
||||||
|
SET(WPD_INCLUDE_DIR ${CMAKE_CURRENT_BINARY_DIR}/third-party/ext_libwpd/src/libwpd/inc/)
|
||||||
|
|
||||||
|
add_dependencies(
|
||||||
|
scan
|
||||||
|
libmobi
|
||||||
|
ffmpeg
|
||||||
|
antiword
|
||||||
|
libwpd
|
||||||
|
)
|
||||||
|
|
||||||
|
target_link_libraries(
|
||||||
|
scan
|
||||||
|
PUBLIC
|
||||||
|
|
||||||
|
cjson
|
||||||
|
${LibArchive_LIBRARIES}
|
||||||
|
ZLIB::ZLIB
|
||||||
|
BZip2::BZip2
|
||||||
|
lz4::lz4
|
||||||
|
${LZO2_LIB}
|
||||||
|
LibLZMA::LibLZMA
|
||||||
|
|
||||||
|
${MUPDF_LIB}
|
||||||
|
openjp2
|
||||||
|
|
||||||
|
${MOBI_LIB_DIR}/libmobi.a
|
||||||
|
|
||||||
|
${WPD_LIB_DIR}/libwpd-0.9.a
|
||||||
|
${WPD_LIB_DIR}/libwpd-stream-0.9.a
|
||||||
|
|
||||||
|
${FREETYPE_LIB}
|
||||||
|
${HARFBUZZ_LIB}
|
||||||
|
${JBIG2DEC_LIB}
|
||||||
|
|
||||||
|
stdc++
|
||||||
|
|
||||||
|
-Wl,--whole-archive
|
||||||
|
m
|
||||||
|
-Wl,--no-whole-archive
|
||||||
|
|
||||||
|
${JPEG_LIBRARIES}
|
||||||
|
${Tesseract_LIBRARIES}
|
||||||
|
${LIBXML2_LIBRARIES}
|
||||||
|
${FREETYPE_LIB}
|
||||||
|
unofficial::brotli::brotlidec-static
|
||||||
|
|
||||||
|
${FFMPEG_LIB_DIR}/libavformat/libavformat.a
|
||||||
|
${FFMPEG_LIB_DIR}/libavcodec/libavcodec.a
|
||||||
|
${FFMPEG_LIB_DIR}/libavutil/libavutil.a
|
||||||
|
${FFMPEG_LIB_DIR}/libswresample/libswresample.a
|
||||||
|
${FFMPEG_LIB_DIR}/libswscale/libswscale.a
|
||||||
|
|
||||||
|
z
|
||||||
|
|
||||||
|
${CMAKE_THREAD_LIBS_INIT}
|
||||||
|
|
||||||
|
${RAW_LIB}
|
||||||
|
${GOMP_LIB}
|
||||||
|
${CMS_LIB}
|
||||||
|
${JAS_LIB}
|
||||||
|
${GUMBO_LIB}
|
||||||
|
dl
|
||||||
|
antiword
|
||||||
|
unofficial::pcre::pcre unofficial::pcre::pcre16 unofficial::pcre::pcre32 unofficial::pcre::pcrecpp
|
||||||
|
)
|
||||||
|
|
||||||
|
target_include_directories(
|
||||||
|
scan
|
||||||
|
PUBLIC
|
||||||
|
${MUPDF_INC_DIR}
|
||||||
|
${JPEG_INCLUDE_DIR}
|
||||||
|
${LIBXML2_INCLUDE_DIR}
|
||||||
|
${FFMPEG_INCLUDE_DIR}
|
||||||
|
${MOBI_INCLUDE_DIR}
|
||||||
|
${WPD_INCLUDE_DIR}
|
||||||
|
)
|
||||||
|
|
||||||
|
if (BUILD_TESTS)
|
||||||
|
find_package(GTest CONFIG REQUIRED)
|
||||||
|
|
||||||
|
add_executable(scan_ub_test test/main.cpp test/test_util.cpp test/test_util.h)
|
||||||
|
target_compile_options(scan_ub_test PRIVATE -g -fsanitize=undefined -fno-omit-frame-pointer)
|
||||||
|
target_link_libraries(scan_ub_test PRIVATE GTest::gtest GTest::gtest_main -fsanitize=undefined scan)
|
||||||
|
|
||||||
|
add_executable(scan_a_test test/main.cpp test/test_util.cpp test/test_util.h)
|
||||||
|
target_compile_options(scan_a_test PRIVATE -g -fsanitize=address -fno-omit-frame-pointer)
|
||||||
|
target_link_libraries(scan_a_test PRIVATE GTest::gtest GTest::gtest_main -fsanitize=address scan)
|
||||||
|
|
||||||
|
add_executable(scan_test test/main.cpp test/test_util.cpp test/test_util.h)
|
||||||
|
target_compile_options(scan_test PRIVATE -g -fno-omit-frame-pointer)
|
||||||
|
target_link_libraries(scan_test PRIVATE GTest::gtest GTest::gtest_main scan)
|
||||||
|
endif()
|
||||||
4
third-party/libscan/README.md
vendored
Normal file
4
third-party/libscan/README.md
vendored
Normal file
@@ -0,0 +1,4 @@
|
|||||||
|
### Run fuzz tests:
|
||||||
|
```bash
|
||||||
|
./scan_a_test --gtest_filter=*Fuzz* --gtest_repeat=100
|
||||||
|
```
|
||||||
244
third-party/libscan/libscan/arc/arc.c
vendored
Normal file
244
third-party/libscan/libscan/arc/arc.c
vendored
Normal file
@@ -0,0 +1,244 @@
|
|||||||
|
#include "arc.h"
|
||||||
|
|
||||||
|
#include <stdio.h>
|
||||||
|
#include <stdlib.h>
|
||||||
|
#include <string.h>
|
||||||
|
#include <fcntl.h>
|
||||||
|
#include <openssl/evp.h>
|
||||||
|
#include <pcre.h>
|
||||||
|
|
||||||
|
|
||||||
|
int should_parse_filtered_file(const char *filepath, int ext) {
|
||||||
|
char tmp[PATH_MAX * 2];
|
||||||
|
|
||||||
|
if (ext == 0) {
|
||||||
|
return FALSE;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (strncmp(filepath + ext, "tgz", 3) == 0) {
|
||||||
|
return TRUE;
|
||||||
|
}
|
||||||
|
|
||||||
|
memcpy(tmp, filepath, ext - 1);
|
||||||
|
*(tmp + ext - 1) = '\0';
|
||||||
|
|
||||||
|
char *idx = strrchr(tmp, '.');
|
||||||
|
|
||||||
|
if (idx == NULL) {
|
||||||
|
return FALSE;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (strcmp(idx, ".tar") == 0) {
|
||||||
|
return TRUE;
|
||||||
|
}
|
||||||
|
|
||||||
|
return FALSE;
|
||||||
|
}
|
||||||
|
|
||||||
|
void arc_close(struct vfile *f) {
|
||||||
|
SHA1_Final(f->sha1_digest, &f->sha1_ctx);
|
||||||
|
|
||||||
|
if (f->rewind_buffer != NULL) {
|
||||||
|
free(f->rewind_buffer);
|
||||||
|
f->rewind_buffer = NULL;
|
||||||
|
f->rewind_buffer_size = 0;
|
||||||
|
f->rewind_buffer_cursor = 0;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
int arc_read(struct vfile *f, void *buf, size_t size) {
|
||||||
|
|
||||||
|
int bytes_copied = 0;
|
||||||
|
|
||||||
|
if (f->rewind_buffer_size != 0) {
|
||||||
|
if (size > f->rewind_buffer_size) {
|
||||||
|
memcpy(buf, f->rewind_buffer + f->rewind_buffer_cursor, f->rewind_buffer_size);
|
||||||
|
|
||||||
|
bytes_copied = f->rewind_buffer_size;
|
||||||
|
size -= f->rewind_buffer_size;
|
||||||
|
buf += f->rewind_buffer_size;
|
||||||
|
f->rewind_buffer_size = 0;
|
||||||
|
} else {
|
||||||
|
memcpy(buf, f->rewind_buffer + f->rewind_buffer_cursor, size);
|
||||||
|
f->rewind_buffer_size -= (int) size;
|
||||||
|
f->rewind_buffer_cursor += (int) size;
|
||||||
|
|
||||||
|
return (int) size;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
size_t bytes_read = archive_read_data(f->arc, buf, size);
|
||||||
|
|
||||||
|
if (bytes_read != 0 && bytes_read <= size && f->calculate_checksum) {
|
||||||
|
f->has_checksum = TRUE;
|
||||||
|
|
||||||
|
safe_sha1_update(&f->sha1_ctx, (unsigned char *) buf, bytes_read);
|
||||||
|
}
|
||||||
|
|
||||||
|
if (bytes_read != size && archive_errno(f->arc) != 0) {
|
||||||
|
const char *error_str = archive_error_string(f->arc);
|
||||||
|
if (error_str != NULL) {
|
||||||
|
f->logf(f->filepath, LEVEL_ERROR, "Error reading archive file: %s", error_str);
|
||||||
|
}
|
||||||
|
return -1;
|
||||||
|
}
|
||||||
|
|
||||||
|
return (int) bytes_read + bytes_copied;
|
||||||
|
}
|
||||||
|
|
||||||
|
int arc_read_rewindable(struct vfile *f, void *buf, size_t size) {
|
||||||
|
|
||||||
|
if (f->rewind_buffer != NULL) {
|
||||||
|
fprintf(stderr, "Allocated rewind buffer more than once for %s", f->filepath);
|
||||||
|
exit(-1);
|
||||||
|
}
|
||||||
|
|
||||||
|
size_t bytes_read = archive_read_data(f->arc, buf, size);
|
||||||
|
|
||||||
|
if (bytes_read != size && archive_errno(f->arc) != 0) {
|
||||||
|
const char *error_str = archive_error_string(f->arc);
|
||||||
|
if (error_str != NULL) {
|
||||||
|
f->logf(f->filepath, LEVEL_ERROR, "Error reading archive file: %s", error_str);
|
||||||
|
}
|
||||||
|
return -1;
|
||||||
|
}
|
||||||
|
|
||||||
|
f->rewind_buffer = malloc(size);
|
||||||
|
f->rewind_buffer_size = (int) size;
|
||||||
|
f->rewind_buffer_cursor = 0;
|
||||||
|
memcpy(f->rewind_buffer, buf, size);
|
||||||
|
|
||||||
|
return (int) bytes_read;
|
||||||
|
}
|
||||||
|
|
||||||
|
int arc_open(scan_arc_ctx_t *ctx, vfile_t *f, struct archive **a, arc_data_t *arc_data, int allow_recurse) {
|
||||||
|
arc_data->f = f;
|
||||||
|
|
||||||
|
if (f->is_fs_file) {
|
||||||
|
*a = archive_read_new();
|
||||||
|
archive_read_support_filter_all(*a);
|
||||||
|
archive_read_support_format_all(*a);
|
||||||
|
if (ctx->passphrase[0] != 0) {
|
||||||
|
archive_read_add_passphrase(*a, ctx->passphrase);
|
||||||
|
}
|
||||||
|
|
||||||
|
return archive_read_open_filename(*a, f->filepath, ARC_BUF_SIZE);
|
||||||
|
} else if (allow_recurse) {
|
||||||
|
*a = archive_read_new();
|
||||||
|
archive_read_support_filter_all(*a);
|
||||||
|
archive_read_support_format_all(*a);
|
||||||
|
if (ctx->passphrase[0] != 0) {
|
||||||
|
archive_read_add_passphrase(*a, ctx->passphrase);
|
||||||
|
}
|
||||||
|
|
||||||
|
return archive_read_open(
|
||||||
|
*a, arc_data,
|
||||||
|
vfile_open_callback,
|
||||||
|
vfile_read_callback,
|
||||||
|
vfile_close_callback
|
||||||
|
);
|
||||||
|
} else {
|
||||||
|
return ARC_SKIPPED;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
static __thread int sub_strings[30];
|
||||||
|
#define EXCLUDED(str) (pcre_exec(exclude, exclude_extra, str, strlen(str), 0, 0, sub_strings, sizeof(sub_strings)) >= 0)
|
||||||
|
|
||||||
|
scan_code_t parse_archive(scan_arc_ctx_t *ctx, vfile_t *f, document_t *doc, pcre *exclude, pcre_extra *exclude_extra) {
|
||||||
|
|
||||||
|
struct archive *a = NULL;
|
||||||
|
struct archive_entry *entry = NULL;
|
||||||
|
|
||||||
|
arc_data_t arc_data;
|
||||||
|
arc_data.f = f;
|
||||||
|
|
||||||
|
int ret = arc_open(ctx, f, &a, &arc_data, ctx->mode == ARC_MODE_RECURSE);
|
||||||
|
if (ret == ARC_SKIPPED) {
|
||||||
|
return SCAN_OK;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (ret != ARCHIVE_OK) {
|
||||||
|
CTX_LOG_ERRORF(f->filepath, "(arc.c) [%d] %s", ret, archive_error_string(a))
|
||||||
|
archive_read_free(a);
|
||||||
|
return SCAN_ERR_READ;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (ctx->mode == ARC_MODE_LIST) {
|
||||||
|
dyn_buffer_t buf = dyn_buffer_create();
|
||||||
|
|
||||||
|
while (archive_read_next_header(a, &entry) == ARCHIVE_OK) {
|
||||||
|
if (S_ISREG(archive_entry_stat(entry)->st_mode)) {
|
||||||
|
const char *utf8_name = archive_entry_pathname_utf8(entry);
|
||||||
|
const char *file_path = utf8_name == NULL ? archive_entry_pathname(entry) : utf8_name;
|
||||||
|
|
||||||
|
dyn_buffer_append_string(&buf, file_path);
|
||||||
|
dyn_buffer_write_char(&buf, ' ');
|
||||||
|
}
|
||||||
|
}
|
||||||
|
dyn_buffer_write_char(&buf, '\0');
|
||||||
|
|
||||||
|
meta_line_t *meta_list = malloc(sizeof(meta_line_t) + buf.cur);
|
||||||
|
meta_list->key = MetaContent;
|
||||||
|
strcpy(meta_list->str_val, buf.buf);
|
||||||
|
APPEND_META(doc, meta_list)
|
||||||
|
dyn_buffer_destroy(&buf);
|
||||||
|
|
||||||
|
} else {
|
||||||
|
|
||||||
|
parse_job_t *sub_job = malloc(sizeof(parse_job_t) + PATH_MAX * 2);
|
||||||
|
|
||||||
|
sub_job->vfile.close = arc_close;
|
||||||
|
sub_job->vfile.read = arc_read;
|
||||||
|
sub_job->vfile.read_rewindable = arc_read_rewindable;
|
||||||
|
sub_job->vfile.reset = NULL;
|
||||||
|
sub_job->vfile.arc = a;
|
||||||
|
sub_job->vfile.filepath = sub_job->filepath;
|
||||||
|
sub_job->vfile.is_fs_file = FALSE;
|
||||||
|
sub_job->vfile.rewind_buffer_size = 0;
|
||||||
|
sub_job->vfile.rewind_buffer = NULL;
|
||||||
|
sub_job->vfile.log = ctx->log;
|
||||||
|
sub_job->vfile.logf = ctx->logf;
|
||||||
|
sub_job->vfile.has_checksum = FALSE;
|
||||||
|
sub_job->vfile.calculate_checksum = f->calculate_checksum;
|
||||||
|
memcpy(sub_job->parent, doc->path_md5, MD5_DIGEST_LENGTH);
|
||||||
|
|
||||||
|
while (archive_read_next_header(a, &entry) == ARCHIVE_OK) {
|
||||||
|
sub_job->vfile.info = *archive_entry_stat(entry);
|
||||||
|
if (S_ISREG(sub_job->vfile.info.st_mode)) {
|
||||||
|
|
||||||
|
const char *utf8_name = archive_entry_pathname_utf8(entry);
|
||||||
|
|
||||||
|
if (utf8_name == NULL) {
|
||||||
|
sprintf(sub_job->filepath, "%s#/%s", f->filepath, archive_entry_pathname(entry));
|
||||||
|
} else {
|
||||||
|
sprintf(sub_job->filepath, "%s#/%s", f->filepath, utf8_name);
|
||||||
|
}
|
||||||
|
sub_job->base = (int) (strrchr(sub_job->filepath, '/') - sub_job->filepath) + 1;
|
||||||
|
|
||||||
|
// Handle excludes
|
||||||
|
if (exclude != NULL && EXCLUDED(sub_job->filepath)) {
|
||||||
|
CTX_LOG_DEBUGF("arc.c", "Excluded: %s", sub_job->filepath)
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
|
||||||
|
char *p = strrchr(sub_job->filepath, '.');
|
||||||
|
if (p != NULL && (p - sub_job->filepath) > strlen(f->filepath)) {
|
||||||
|
sub_job->ext = (int) (p - sub_job->filepath + 1);
|
||||||
|
} else {
|
||||||
|
sub_job->ext = (int) strlen(sub_job->filepath);
|
||||||
|
}
|
||||||
|
|
||||||
|
SHA1_Init(&sub_job->vfile.sha1_ctx);
|
||||||
|
|
||||||
|
ctx->parse(sub_job);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
free(sub_job);
|
||||||
|
}
|
||||||
|
|
||||||
|
archive_read_free(a);
|
||||||
|
return SCAN_OK;
|
||||||
|
}
|
||||||
80
third-party/libscan/libscan/arc/arc.h
vendored
Normal file
80
third-party/libscan/libscan/arc/arc.h
vendored
Normal file
@@ -0,0 +1,80 @@
|
|||||||
|
#ifndef SCAN_ARC_H
|
||||||
|
#define SCAN_ARC_H
|
||||||
|
|
||||||
|
#include <archive.h>
|
||||||
|
#include <archive_entry.h>
|
||||||
|
#include <fcntl.h>
|
||||||
|
#include <pcre.h>
|
||||||
|
#include "../scan.h"
|
||||||
|
|
||||||
|
# define ARC_SKIPPED (-1)
|
||||||
|
#define ARC_MODE_SKIP 0
|
||||||
|
#define ARC_MODE_LIST 1
|
||||||
|
#define ARC_MODE_SHALLOW 2
|
||||||
|
#define ARC_MODE_RECURSE 3
|
||||||
|
typedef int archive_mode_t;
|
||||||
|
|
||||||
|
typedef struct {
|
||||||
|
archive_mode_t mode;
|
||||||
|
|
||||||
|
parse_callback_t parse;
|
||||||
|
log_callback_t log;
|
||||||
|
logf_callback_t logf;
|
||||||
|
store_callback_t store;
|
||||||
|
char passphrase[4096];
|
||||||
|
} scan_arc_ctx_t;
|
||||||
|
|
||||||
|
#define ARC_BUF_SIZE 8192
|
||||||
|
|
||||||
|
typedef struct {
|
||||||
|
vfile_t *f;
|
||||||
|
char buf[ARC_BUF_SIZE];
|
||||||
|
} arc_data_t;
|
||||||
|
|
||||||
|
static int vfile_open_callback(struct archive *a, void *user_data) {
|
||||||
|
arc_data_t *data = (arc_data_t *) user_data;
|
||||||
|
|
||||||
|
if (!data->f->is_fs_file) {
|
||||||
|
SHA1_Init(&data->f->sha1_ctx);
|
||||||
|
}
|
||||||
|
|
||||||
|
return ARCHIVE_OK;
|
||||||
|
}
|
||||||
|
|
||||||
|
static long vfile_read_callback(struct archive *a, void *user_data, const void **buf) {
|
||||||
|
arc_data_t *data = (arc_data_t *) user_data;
|
||||||
|
|
||||||
|
*buf = data->buf;
|
||||||
|
long ret = data->f->read(data->f, data->buf, sizeof(data->buf));
|
||||||
|
|
||||||
|
if (!data->f->is_fs_file && ret > 0) {
|
||||||
|
data->f->has_checksum = TRUE;
|
||||||
|
safe_sha1_update(&data->f->sha1_ctx, (unsigned char*)data->buf, ret);
|
||||||
|
}
|
||||||
|
|
||||||
|
return ret;
|
||||||
|
}
|
||||||
|
|
||||||
|
static int vfile_close_callback(struct archive *a, void *user_data) {
|
||||||
|
arc_data_t *data = (arc_data_t *) user_data;
|
||||||
|
|
||||||
|
if (!data->f->is_fs_file) {
|
||||||
|
SHA1_Final((unsigned char *) data->f->sha1_digest, &data->f->sha1_ctx);
|
||||||
|
}
|
||||||
|
|
||||||
|
return ARCHIVE_OK;
|
||||||
|
}
|
||||||
|
|
||||||
|
int arc_open(scan_arc_ctx_t *ctx, vfile_t *f, struct archive **a, arc_data_t *arc_data, int allow_recurse);
|
||||||
|
|
||||||
|
int should_parse_filtered_file(const char *filepath, int ext);
|
||||||
|
|
||||||
|
scan_code_t parse_archive(scan_arc_ctx_t *ctx, vfile_t *f, document_t *doc, pcre *exclude, pcre_extra *exclude_extra);
|
||||||
|
|
||||||
|
int arc_read(struct vfile *f, void *buf, size_t size);
|
||||||
|
|
||||||
|
int arc_read_rewindable(struct vfile *f, void *buf, size_t size);
|
||||||
|
|
||||||
|
void arc_close(struct vfile *f);
|
||||||
|
|
||||||
|
#endif
|
||||||
58
third-party/libscan/libscan/comic/comic.c
vendored
Normal file
58
third-party/libscan/libscan/comic/comic.c
vendored
Normal file
@@ -0,0 +1,58 @@
|
|||||||
|
#include "comic.h"
|
||||||
|
#include "../media/media.h"
|
||||||
|
#include "../arc/arc.h"
|
||||||
|
|
||||||
|
#include <stdlib.h>
|
||||||
|
#include <archive.h>
|
||||||
|
|
||||||
|
static scan_arc_ctx_t arc_ctx = (scan_arc_ctx_t) {.passphrase = {0,}};
|
||||||
|
|
||||||
|
void parse_comic(scan_comic_ctx_t *ctx, vfile_t *f, document_t *doc) {
|
||||||
|
struct archive *a = NULL;
|
||||||
|
struct archive_entry *entry = NULL;
|
||||||
|
arc_data_t arc_data;
|
||||||
|
|
||||||
|
if (ctx->tn_size <= 0) {
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
int ret = arc_open(&arc_ctx, f, &a, &arc_data, TRUE);
|
||||||
|
if (ret != ARCHIVE_OK) {
|
||||||
|
CTX_LOG_ERRORF(f->filepath, "(cbr.c) [%d] %s", ret, archive_error_string(a))
|
||||||
|
archive_read_free(a);
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
while (archive_read_next_header(a, &entry) == ARCHIVE_OK) {
|
||||||
|
struct stat info = *archive_entry_stat(entry);
|
||||||
|
if (S_ISREG(info.st_mode)) {
|
||||||
|
const char *utf8_name = archive_entry_pathname_utf8(entry);
|
||||||
|
const char *file_path = utf8_name == NULL ? archive_entry_pathname(entry) : utf8_name;
|
||||||
|
|
||||||
|
char *p = strrchr(file_path, '.');
|
||||||
|
if (p != NULL && (strcmp(p, ".png") == 0 || strcmp(p, ".jpg") == 0 || strcmp(p, ".jpeg") == 0)) {
|
||||||
|
size_t entry_size = archive_entry_size(entry);
|
||||||
|
void *buf = malloc(entry_size);
|
||||||
|
size_t read = archive_read_data(a, buf, entry_size);
|
||||||
|
|
||||||
|
if (read != entry_size) {
|
||||||
|
const char *err_str = archive_error_string(a);
|
||||||
|
if (err_str) {
|
||||||
|
CTX_LOG_ERRORF("comic.c", "Error while reading entry: %s", err_str)
|
||||||
|
}
|
||||||
|
free(buf);
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
|
||||||
|
ret = store_image_thumbnail((scan_media_ctx_t *) ctx, buf, entry_size, doc, file_path);
|
||||||
|
free(buf);
|
||||||
|
|
||||||
|
if (ret == TRUE) {
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
archive_read_free(a);
|
||||||
|
}
|
||||||
31
third-party/libscan/libscan/comic/comic.h
vendored
Normal file
31
third-party/libscan/libscan/comic/comic.h
vendored
Normal file
@@ -0,0 +1,31 @@
|
|||||||
|
#ifndef SCAN_CBR_H
|
||||||
|
#define SCAN_CBR_H
|
||||||
|
|
||||||
|
#include <stdlib.h>
|
||||||
|
#include "../ebook/ebook.h"
|
||||||
|
|
||||||
|
typedef struct {
|
||||||
|
log_callback_t log;
|
||||||
|
logf_callback_t logf;
|
||||||
|
store_callback_t store;
|
||||||
|
|
||||||
|
int tn_size;
|
||||||
|
float tn_qscale;
|
||||||
|
|
||||||
|
unsigned int cbr_mime;
|
||||||
|
unsigned int cbz_mime;
|
||||||
|
} scan_comic_ctx_t;
|
||||||
|
|
||||||
|
__always_inline
|
||||||
|
static int is_cbr(scan_comic_ctx_t *ctx, unsigned int mime) {
|
||||||
|
return mime == ctx->cbr_mime;
|
||||||
|
}
|
||||||
|
|
||||||
|
__always_inline
|
||||||
|
static int is_cbz(scan_comic_ctx_t *ctx, unsigned int mime) {
|
||||||
|
return mime == ctx->cbz_mime;
|
||||||
|
}
|
||||||
|
|
||||||
|
void parse_comic(scan_comic_ctx_t *ctx, vfile_t *f, document_t *doc);
|
||||||
|
|
||||||
|
#endif
|
||||||
495
third-party/libscan/libscan/ebook/ebook.c
vendored
Normal file
495
third-party/libscan/libscan/ebook/ebook.c
vendored
Normal file
@@ -0,0 +1,495 @@
|
|||||||
|
#include "ebook.h"
|
||||||
|
#include <mupdf/fitz.h>
|
||||||
|
#include <pthread.h>
|
||||||
|
#include <tesseract/capi.h>
|
||||||
|
|
||||||
|
#include "../media/media.h"
|
||||||
|
#include "../arc/arc.h"
|
||||||
|
|
||||||
|
#define MIN_OCR_SIZE 350
|
||||||
|
#define MIN_OCR_LEN 10
|
||||||
|
|
||||||
|
/* fill_image callback doesn't let us pass opaque pointers unless I create my own device */
|
||||||
|
__thread text_buffer_t thread_buffer;
|
||||||
|
__thread scan_ebook_ctx_t thread_ctx;
|
||||||
|
|
||||||
|
pthread_mutex_t Mutex;
|
||||||
|
|
||||||
|
static void my_fz_lock(UNUSED(void *user), int lock) {
|
||||||
|
if (lock == FZ_LOCK_FREETYPE) {
|
||||||
|
pthread_mutex_lock(&Mutex);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
static void my_fz_unlock(UNUSED(void *user), int lock) {
|
||||||
|
if (lock == FZ_LOCK_FREETYPE) {
|
||||||
|
pthread_mutex_unlock(&Mutex);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
int pixmap_is_blank(const fz_pixmap *pixmap) {
|
||||||
|
int pixmap_size = pixmap->n * pixmap->w * pixmap->h;
|
||||||
|
const int pixel0 = pixmap->samples[0];
|
||||||
|
for (int i = 0; i < pixmap_size; i++) {
|
||||||
|
if (pixmap->samples[i] != pixel0) {
|
||||||
|
return FALSE;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return TRUE;
|
||||||
|
}
|
||||||
|
|
||||||
|
fz_pixmap *
|
||||||
|
load_pixmap(scan_ebook_ctx_t *ctx, int page, fz_context *fzctx, fz_document *fzdoc, document_t *doc, fz_page **cover) {
|
||||||
|
|
||||||
|
int err = 0;
|
||||||
|
|
||||||
|
fz_var(cover);
|
||||||
|
fz_var(err);
|
||||||
|
fz_try(fzctx)*cover = fz_load_page(fzctx, fzdoc, page);
|
||||||
|
fz_catch(fzctx)err = 1;
|
||||||
|
|
||||||
|
if (err != 0) {
|
||||||
|
CTX_LOG_WARNINGF(doc->filepath, "fz_load_page() returned error code [%d] %s", err, fzctx->error.message)
|
||||||
|
return NULL;
|
||||||
|
}
|
||||||
|
|
||||||
|
fz_rect bounds = fz_bound_page(fzctx, *cover);
|
||||||
|
|
||||||
|
float scale;
|
||||||
|
float w = bounds.x1 - bounds.x0;
|
||||||
|
float h = bounds.y1 - bounds.y0;
|
||||||
|
if (w > h) {
|
||||||
|
scale = (float) ctx->tn_size / w;
|
||||||
|
} else {
|
||||||
|
scale = (float) ctx->tn_size / h;
|
||||||
|
}
|
||||||
|
fz_matrix m = fz_scale(scale, scale);
|
||||||
|
|
||||||
|
bounds = fz_transform_rect(bounds, m);
|
||||||
|
fz_irect bbox = fz_round_rect(bounds);
|
||||||
|
fz_pixmap *pixmap = fz_new_pixmap_with_bbox(fzctx, fz_device_rgb(fzctx), bbox, NULL, 0);
|
||||||
|
|
||||||
|
fz_clear_pixmap_with_value(fzctx, pixmap, 0xFF);
|
||||||
|
fz_device *dev = fz_new_draw_device(fzctx, m, pixmap);
|
||||||
|
|
||||||
|
fz_var(err);
|
||||||
|
fz_try(fzctx) {
|
||||||
|
fz_run_page(fzctx, *cover, dev, fz_identity, NULL);
|
||||||
|
} fz_always(fzctx) {
|
||||||
|
fz_close_device(fzctx, dev);
|
||||||
|
fz_drop_device(fzctx, dev);
|
||||||
|
} fz_catch(fzctx)err = fzctx->error.errcode;
|
||||||
|
|
||||||
|
if (err != 0) {
|
||||||
|
CTX_LOG_WARNINGF(doc->filepath, "fz_run_page() returned error code [%d] %s", err, fzctx->error.message)
|
||||||
|
fz_drop_page(fzctx, *cover);
|
||||||
|
fz_drop_pixmap(fzctx, pixmap);
|
||||||
|
return NULL;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (pixmap->n != 3) {
|
||||||
|
CTX_LOG_ERRORF(doc->filepath, "Got unexpected pixmap depth: %d", pixmap->n)
|
||||||
|
fz_drop_page(fzctx, *cover);
|
||||||
|
fz_drop_pixmap(fzctx, pixmap);
|
||||||
|
return NULL;
|
||||||
|
}
|
||||||
|
|
||||||
|
return pixmap;
|
||||||
|
}
|
||||||
|
|
||||||
|
int render_cover(scan_ebook_ctx_t *ctx, fz_context *fzctx, document_t *doc, fz_document *fzdoc) {
|
||||||
|
|
||||||
|
fz_page *cover = NULL;
|
||||||
|
fz_pixmap *pixmap = load_pixmap(ctx, 0, fzctx, fzdoc, doc, &cover);
|
||||||
|
if (pixmap == NULL) {
|
||||||
|
return FALSE;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (pixmap_is_blank(pixmap)) {
|
||||||
|
fz_drop_page(fzctx, cover);
|
||||||
|
fz_drop_pixmap(fzctx, pixmap);
|
||||||
|
CTX_LOG_DEBUG(doc->filepath, "Cover page is blank, using page 1 instead")
|
||||||
|
pixmap = load_pixmap(ctx, 1, fzctx, fzdoc, doc, &cover);
|
||||||
|
if (pixmap == NULL) {
|
||||||
|
return FALSE;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// RGB24 -> YUV420p
|
||||||
|
AVFrame *scaled_frame = av_frame_alloc();
|
||||||
|
|
||||||
|
struct SwsContext *sws_ctx = sws_getContext(
|
||||||
|
pixmap->w, pixmap->h, AV_PIX_FMT_RGB24,
|
||||||
|
pixmap->w, pixmap->h, AV_PIX_FMT_YUV420P,
|
||||||
|
SIST_SWS_ALGO, 0, 0, 0
|
||||||
|
);
|
||||||
|
|
||||||
|
int dst_buf_len = av_image_get_buffer_size(AV_PIX_FMT_YUV420P, pixmap->w, pixmap->h, 1);
|
||||||
|
uint8_t *dst_buf = (uint8_t *) av_malloc(dst_buf_len);
|
||||||
|
|
||||||
|
av_image_fill_arrays(scaled_frame->data, scaled_frame->linesize, dst_buf, AV_PIX_FMT_YUV420P, pixmap->w, pixmap->h,
|
||||||
|
1);
|
||||||
|
|
||||||
|
unsigned char *samples = calloc(1, 1024 * 1024 * 1024);
|
||||||
|
memcpy(samples, pixmap->samples, pixmap->stride * pixmap->h);
|
||||||
|
|
||||||
|
const uint8_t *in_data[1] = {samples,};
|
||||||
|
int in_line_size[1] = {(int) pixmap->stride};
|
||||||
|
|
||||||
|
sws_scale(sws_ctx,
|
||||||
|
in_data, in_line_size,
|
||||||
|
0, pixmap->h,
|
||||||
|
scaled_frame->data, scaled_frame->linesize
|
||||||
|
);
|
||||||
|
|
||||||
|
scaled_frame->width = pixmap->w;
|
||||||
|
scaled_frame->height = pixmap->h;
|
||||||
|
scaled_frame->format = AV_PIX_FMT_YUV420P;
|
||||||
|
|
||||||
|
sws_freeContext(sws_ctx);
|
||||||
|
|
||||||
|
// YUV420p -> JPEG
|
||||||
|
AVCodecContext *jpeg_encoder = alloc_jpeg_encoder(pixmap->w, pixmap->h, ctx->tn_qscale);
|
||||||
|
avcodec_send_frame(jpeg_encoder, scaled_frame);
|
||||||
|
|
||||||
|
AVPacket jpeg_packet;
|
||||||
|
av_init_packet(&jpeg_packet);
|
||||||
|
avcodec_receive_packet(jpeg_encoder, &jpeg_packet);
|
||||||
|
|
||||||
|
APPEND_TN_META(doc, pixmap->w, pixmap->h)
|
||||||
|
ctx->store((char *) doc->path_md5, sizeof(doc->path_md5), (char *) jpeg_packet.data, jpeg_packet.size);
|
||||||
|
|
||||||
|
free(samples);
|
||||||
|
av_packet_unref(&jpeg_packet);
|
||||||
|
av_free(*scaled_frame->data);
|
||||||
|
av_frame_free(&scaled_frame);
|
||||||
|
avcodec_free_context(&jpeg_encoder);
|
||||||
|
|
||||||
|
fz_drop_pixmap(fzctx, pixmap);
|
||||||
|
fz_drop_page(fzctx, cover);
|
||||||
|
|
||||||
|
return TRUE;
|
||||||
|
}
|
||||||
|
|
||||||
|
void fz_err_callback(void *user, const char *message) {
|
||||||
|
document_t *doc = (document_t *) user;
|
||||||
|
|
||||||
|
const scan_ebook_ctx_t *ctx = &thread_ctx;
|
||||||
|
CTX_LOG_WARNINGF(doc->filepath, "FZ: %s", message)
|
||||||
|
}
|
||||||
|
|
||||||
|
void fz_warn_callback(void *user, const char *message) {
|
||||||
|
document_t *doc = (document_t *) user;
|
||||||
|
|
||||||
|
const scan_ebook_ctx_t *ctx = &thread_ctx;
|
||||||
|
CTX_LOG_DEBUGF(doc->filepath, "FZ: %s", message)
|
||||||
|
}
|
||||||
|
|
||||||
|
static void init_fzctx(fz_context *fzctx, document_t *doc) {
|
||||||
|
fz_register_document_handlers(fzctx);
|
||||||
|
|
||||||
|
static int mu_is_initialized = FALSE;
|
||||||
|
if (!mu_is_initialized) {
|
||||||
|
pthread_mutex_init(&Mutex, NULL);
|
||||||
|
mu_is_initialized = TRUE;
|
||||||
|
}
|
||||||
|
|
||||||
|
fzctx->warn.print_user = doc;
|
||||||
|
fzctx->warn.print = fz_warn_callback;
|
||||||
|
fzctx->error.print_user = doc;
|
||||||
|
fzctx->error.print = fz_err_callback;
|
||||||
|
|
||||||
|
fzctx->locks.lock = my_fz_lock;
|
||||||
|
fzctx->locks.unlock = my_fz_unlock;
|
||||||
|
}
|
||||||
|
|
||||||
|
static int read_stext_block(fz_stext_block *block, text_buffer_t *tex) {
|
||||||
|
if (block->type != FZ_STEXT_BLOCK_TEXT) {
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
fz_stext_line *line = block->u.t.first_line;
|
||||||
|
while (line != NULL) {
|
||||||
|
text_buffer_append_char(tex, ' ');
|
||||||
|
fz_stext_char *c = line->first_char;
|
||||||
|
while (c != NULL) {
|
||||||
|
if (text_buffer_append_char(tex, c->c) == TEXT_BUF_FULL) {
|
||||||
|
return TEXT_BUF_FULL;
|
||||||
|
}
|
||||||
|
c = c->next;
|
||||||
|
}
|
||||||
|
line = line->next;
|
||||||
|
}
|
||||||
|
text_buffer_append_char(tex, ' ');
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
#define IS_VALID_BPP(d) ((d)==1 || (d)==2 || (d)==4 || (d)==8 || (d)==16 || (d)==24 || (d)==32)
|
||||||
|
|
||||||
|
void fill_image(fz_context *fzctx, UNUSED(fz_device *dev),
|
||||||
|
fz_image *img, UNUSED(fz_matrix ctm), UNUSED(float alpha),
|
||||||
|
UNUSED(fz_color_params color_params)) {
|
||||||
|
|
||||||
|
int l2factor = 0;
|
||||||
|
|
||||||
|
if (img->w > MIN_OCR_SIZE && img->h > MIN_OCR_SIZE && IS_VALID_BPP(img->n)) {
|
||||||
|
|
||||||
|
fz_pixmap *pix = img->get_pixmap(fzctx, img, NULL, img->w, img->h, &l2factor);
|
||||||
|
|
||||||
|
if (pix->h > MIN_OCR_SIZE && img->h > MIN_OCR_SIZE && img->xres != 0) {
|
||||||
|
TessBaseAPI *api = TessBaseAPICreate();
|
||||||
|
TessBaseAPIInit3(api, thread_ctx.tesseract_path, thread_ctx.tesseract_lang);
|
||||||
|
|
||||||
|
TessBaseAPISetImage(api, pix->samples, pix->w, pix->h, pix->n, pix->stride);
|
||||||
|
TessBaseAPISetSourceResolution(api, pix->xres);
|
||||||
|
|
||||||
|
char *text = TessBaseAPIGetUTF8Text(api);
|
||||||
|
size_t len = strlen(text);
|
||||||
|
if (len >= MIN_OCR_LEN) {
|
||||||
|
text_buffer_append_string(&thread_buffer, text, len - 1);
|
||||||
|
}
|
||||||
|
|
||||||
|
TessBaseAPIEnd(api);
|
||||||
|
TessBaseAPIDelete(api);
|
||||||
|
}
|
||||||
|
fz_drop_pixmap(fzctx, pix);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
void
|
||||||
|
parse_ebook_mem(scan_ebook_ctx_t *ctx, void *buf, size_t buf_len, const char *mime_str, document_t *doc, int tn_only) {
|
||||||
|
|
||||||
|
fz_context *fzctx = fz_new_context(NULL, NULL, FZ_STORE_DEFAULT);
|
||||||
|
thread_ctx = *ctx;
|
||||||
|
|
||||||
|
init_fzctx(fzctx, doc);
|
||||||
|
|
||||||
|
int err = 0;
|
||||||
|
|
||||||
|
fz_document *fzdoc = NULL;
|
||||||
|
fz_stream *stream = NULL;
|
||||||
|
fz_var(fzdoc);
|
||||||
|
fz_var(stream);
|
||||||
|
fz_var(err);
|
||||||
|
|
||||||
|
fz_try(fzctx) {
|
||||||
|
stream = fz_open_memory(fzctx, buf, buf_len);
|
||||||
|
fzdoc = fz_open_document_with_stream(fzctx, mime_str, stream);
|
||||||
|
} fz_catch(fzctx)err = fzctx->error.errcode;
|
||||||
|
|
||||||
|
if (err != 0) {
|
||||||
|
fz_drop_stream(fzctx, stream);
|
||||||
|
fz_drop_document(fzctx, fzdoc);
|
||||||
|
fz_drop_context(fzctx);
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
int page_count = -1;
|
||||||
|
fz_var(err);
|
||||||
|
fz_try(fzctx)page_count = fz_count_pages(fzctx, fzdoc);
|
||||||
|
fz_catch(fzctx)err = fzctx->error.errcode;
|
||||||
|
|
||||||
|
if (err) {
|
||||||
|
CTX_LOG_WARNINGF(doc->filepath, "fz_count_pages() returned error code [%d] %s", err, fzctx->error.message)
|
||||||
|
fz_drop_stream(fzctx, stream);
|
||||||
|
fz_drop_document(fzctx, fzdoc);
|
||||||
|
fz_drop_context(fzctx);
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
APPEND_LONG_META(doc, MetaPages, page_count)
|
||||||
|
|
||||||
|
if (ctx->tn_size > 0) {
|
||||||
|
if (render_cover(ctx, fzctx, doc, fzdoc) == FALSE) {
|
||||||
|
fz_drop_stream(fzctx, stream);
|
||||||
|
fz_drop_document(fzctx, fzdoc);
|
||||||
|
fz_drop_context(fzctx);
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if (tn_only) {
|
||||||
|
fz_drop_stream(fzctx, stream);
|
||||||
|
fz_drop_document(fzctx, fzdoc);
|
||||||
|
fz_drop_context(fzctx);
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
char title[8192] = {'\0',};
|
||||||
|
fz_try(fzctx)fz_lookup_metadata(fzctx, fzdoc, FZ_META_INFO_TITLE, title, sizeof(title));
|
||||||
|
fz_catch(fzctx);
|
||||||
|
|
||||||
|
if (strlen(title) > 0) {
|
||||||
|
APPEND_UTF8_META(doc, MetaTitle, title)
|
||||||
|
}
|
||||||
|
|
||||||
|
char author[4096] = {'\0',};
|
||||||
|
fz_try(fzctx)fz_lookup_metadata(fzctx, fzdoc, FZ_META_INFO_AUTHOR, author, sizeof(author));
|
||||||
|
fz_catch(fzctx);
|
||||||
|
|
||||||
|
if (strlen(author) > 0) {
|
||||||
|
APPEND_UTF8_META(doc, MetaAuthor, author)
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
if (ctx->content_size > 0) {
|
||||||
|
fz_stext_options opts = {0};
|
||||||
|
thread_buffer = text_buffer_create(ctx->content_size);
|
||||||
|
|
||||||
|
for (int current_page = 0; current_page < page_count; current_page++) {
|
||||||
|
fz_page *page = NULL;
|
||||||
|
fz_var(err);
|
||||||
|
fz_try(fzctx)page = fz_load_page(fzctx, fzdoc, current_page);
|
||||||
|
fz_catch(fzctx)err = fzctx->error.errcode;
|
||||||
|
if (err != 0) {
|
||||||
|
CTX_LOG_WARNINGF(doc->filepath, "fz_load_page() returned error code [%d] %s", err, fzctx->error.message)
|
||||||
|
text_buffer_destroy(&thread_buffer);
|
||||||
|
fz_drop_page(fzctx, page);
|
||||||
|
fz_drop_stream(fzctx, stream);
|
||||||
|
fz_drop_document(fzctx, fzdoc);
|
||||||
|
fz_drop_context(fzctx);
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
fz_stext_page *stext = fz_new_stext_page(fzctx, fz_bound_page(fzctx, page));
|
||||||
|
fz_device *dev = fz_new_stext_device(fzctx, stext, &opts);
|
||||||
|
dev->stroke_path = NULL;
|
||||||
|
dev->stroke_text = NULL;
|
||||||
|
dev->clip_text = NULL;
|
||||||
|
dev->clip_stroke_path = NULL;
|
||||||
|
dev->clip_stroke_text = NULL;
|
||||||
|
|
||||||
|
if (ctx->tesseract_lang != NULL) {
|
||||||
|
dev->fill_image = fill_image;
|
||||||
|
}
|
||||||
|
|
||||||
|
fz_var(err);
|
||||||
|
fz_try(fzctx)fz_run_page(fzctx, page, dev, fz_identity, NULL);
|
||||||
|
fz_always(fzctx) {
|
||||||
|
fz_close_device(fzctx, dev);
|
||||||
|
fz_drop_device(fzctx, dev);
|
||||||
|
} fz_catch(fzctx)err = fzctx->error.errcode;
|
||||||
|
|
||||||
|
if (err != 0) {
|
||||||
|
CTX_LOG_WARNINGF(doc->filepath, "fz_run_page() returned error code [%d] %s", err, fzctx->error.message)
|
||||||
|
text_buffer_destroy(&thread_buffer);
|
||||||
|
fz_drop_page(fzctx, page);
|
||||||
|
fz_drop_stext_page(fzctx, stext);
|
||||||
|
fz_drop_stream(fzctx, stream);
|
||||||
|
fz_drop_document(fzctx, fzdoc);
|
||||||
|
fz_drop_context(fzctx);
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
fz_stext_block *block = stext->first_block;
|
||||||
|
while (block != NULL) {
|
||||||
|
int ret = read_stext_block(block, &thread_buffer);
|
||||||
|
if (ret == TEXT_BUF_FULL) {
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
block = block->next;
|
||||||
|
}
|
||||||
|
fz_drop_stext_page(fzctx, stext);
|
||||||
|
fz_drop_page(fzctx, page);
|
||||||
|
|
||||||
|
if (thread_buffer.dyn_buffer.cur >= ctx->content_size) {
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
text_buffer_terminate_string(&thread_buffer);
|
||||||
|
|
||||||
|
meta_line_t *meta_content = malloc(sizeof(meta_line_t) + thread_buffer.dyn_buffer.cur);
|
||||||
|
meta_content->key = MetaContent;
|
||||||
|
memcpy(meta_content->str_val, thread_buffer.dyn_buffer.buf, thread_buffer.dyn_buffer.cur);
|
||||||
|
APPEND_META(doc, meta_content)
|
||||||
|
|
||||||
|
text_buffer_destroy(&thread_buffer);
|
||||||
|
}
|
||||||
|
|
||||||
|
fz_drop_stream(fzctx, stream);
|
||||||
|
fz_drop_document(fzctx, fzdoc);
|
||||||
|
fz_drop_context(fzctx);
|
||||||
|
}
|
||||||
|
|
||||||
|
static scan_arc_ctx_t arc_ctx = (scan_arc_ctx_t) {.passphrase = {0,}};
|
||||||
|
|
||||||
|
void parse_epub_fast(scan_ebook_ctx_t *ctx, vfile_t *f, document_t *doc) {
|
||||||
|
struct archive *a = NULL;
|
||||||
|
struct archive_entry *entry = NULL;
|
||||||
|
arc_data_t arc_data;
|
||||||
|
|
||||||
|
text_buffer_t content_buffer = text_buffer_create(ctx->content_size);
|
||||||
|
|
||||||
|
if (ctx->tn_size <= 0) {
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
int ret = arc_open(&arc_ctx, f, &a, &arc_data, TRUE);
|
||||||
|
if (ret != ARCHIVE_OK) {
|
||||||
|
CTX_LOG_ERRORF(f->filepath, "(ebook.c) [%d] %s", ret, archive_error_string(a))
|
||||||
|
archive_read_free(a);
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
while (archive_read_next_header(a, &entry) == ARCHIVE_OK) {
|
||||||
|
struct stat info = *archive_entry_stat(entry);
|
||||||
|
if (S_ISREG(info.st_mode)) {
|
||||||
|
const char *utf8_name = archive_entry_pathname_utf8(entry);
|
||||||
|
const char *file_path = utf8_name == NULL ? archive_entry_pathname(entry) : utf8_name;
|
||||||
|
|
||||||
|
char *p = strrchr(file_path, '.');
|
||||||
|
if (p != NULL && (strcmp(p, ".html") == 0 || (strcmp(p, ".xhtml") == 0))) {
|
||||||
|
size_t entry_size = archive_entry_size(entry);
|
||||||
|
void *buf = malloc(entry_size + 1);
|
||||||
|
size_t read = archive_read_data(a, buf, entry_size);
|
||||||
|
*(char *) (buf + entry_size) = '\0';
|
||||||
|
|
||||||
|
if (read != entry_size) {
|
||||||
|
const char *err_str = archive_error_string(a);
|
||||||
|
if (err_str) {
|
||||||
|
CTX_LOG_ERRORF("ebook.c", "Error while reading entry: %s", err_str)
|
||||||
|
}
|
||||||
|
free(buf);
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
|
||||||
|
ret = text_buffer_append_markup(&content_buffer, buf);
|
||||||
|
free(buf);
|
||||||
|
|
||||||
|
if (ret == TEXT_BUF_FULL) {
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
text_buffer_terminate_string(&content_buffer);
|
||||||
|
|
||||||
|
meta_line_t *meta_content = malloc(sizeof(meta_line_t) + content_buffer.dyn_buffer.cur);
|
||||||
|
meta_content->key = MetaContent;
|
||||||
|
memcpy(meta_content->str_val, content_buffer.dyn_buffer.buf, content_buffer.dyn_buffer.cur);
|
||||||
|
APPEND_META(doc, meta_content)
|
||||||
|
|
||||||
|
text_buffer_destroy(&content_buffer);
|
||||||
|
|
||||||
|
archive_read_free(a);
|
||||||
|
}
|
||||||
|
|
||||||
|
void parse_ebook(scan_ebook_ctx_t *ctx, vfile_t *f, const char *mime_str, document_t *doc) {
|
||||||
|
|
||||||
|
if (ctx->fast_epub_parse && is_epub(mime_str)) {
|
||||||
|
parse_epub_fast(ctx, f, doc);
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
size_t buf_len;
|
||||||
|
void *buf = read_all(f, &buf_len);
|
||||||
|
if (buf == NULL) {
|
||||||
|
CTX_LOG_ERROR(f->filepath, "read_all() failed")
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
parse_ebook_mem(ctx, buf, buf_len, mime_str, doc, FALSE);
|
||||||
|
free(buf);
|
||||||
|
}
|
||||||
30
third-party/libscan/libscan/ebook/ebook.h
vendored
Normal file
30
third-party/libscan/libscan/ebook/ebook.h
vendored
Normal file
@@ -0,0 +1,30 @@
|
|||||||
|
#ifndef SCAN_EBOOK_H
|
||||||
|
#define SCAN_EBOOK_H
|
||||||
|
|
||||||
|
#include "../scan.h"
|
||||||
|
|
||||||
|
typedef struct {
|
||||||
|
long content_size;
|
||||||
|
int tn_size;
|
||||||
|
const char *tesseract_lang;
|
||||||
|
const char *tesseract_path;
|
||||||
|
pthread_mutex_t mupdf_mutex;
|
||||||
|
|
||||||
|
log_callback_t log;
|
||||||
|
logf_callback_t logf;
|
||||||
|
store_callback_t store;
|
||||||
|
int fast_epub_parse;
|
||||||
|
float tn_qscale;
|
||||||
|
} scan_ebook_ctx_t;
|
||||||
|
|
||||||
|
void parse_ebook(scan_ebook_ctx_t *ctx, vfile_t *f, const char *mime_str, document_t *doc);
|
||||||
|
|
||||||
|
void
|
||||||
|
parse_ebook_mem(scan_ebook_ctx_t *ctx, void *buf, size_t buf_len, const char *mime_str, document_t *doc, int tn_only);
|
||||||
|
|
||||||
|
__always_inline
|
||||||
|
static int is_epub(const char *mime_string) {
|
||||||
|
return strcmp(mime_string, "application/epub+zip") == 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
#endif
|
||||||
246
third-party/libscan/libscan/font/font.c
vendored
Normal file
246
third-party/libscan/libscan/font/font.c
vendored
Normal file
@@ -0,0 +1,246 @@
|
|||||||
|
#include "font.h"
|
||||||
|
|
||||||
|
#include <ft2build.h>
|
||||||
|
#include <freetype/freetype.h>
|
||||||
|
#include "../util.h"
|
||||||
|
|
||||||
|
|
||||||
|
__thread FT_Library ft_lib = NULL;
|
||||||
|
|
||||||
|
|
||||||
|
typedef struct text_dimensions {
|
||||||
|
unsigned int width;
|
||||||
|
unsigned int height;
|
||||||
|
unsigned int baseline;
|
||||||
|
} text_dimensions_t;
|
||||||
|
|
||||||
|
typedef struct glyph {
|
||||||
|
int top;
|
||||||
|
int height;
|
||||||
|
int width;
|
||||||
|
int descent;
|
||||||
|
int ascent;
|
||||||
|
int advance_width;
|
||||||
|
unsigned char *pixmap;
|
||||||
|
} glyph_t;
|
||||||
|
|
||||||
|
|
||||||
|
__always_inline
|
||||||
|
int kerning_offset(char c, char pc, FT_Face face) {
|
||||||
|
FT_Vector kerning;
|
||||||
|
FT_Get_Kerning(face, c, pc, FT_KERNING_DEFAULT, &kerning);
|
||||||
|
|
||||||
|
return (int) (kerning.x / 64);
|
||||||
|
}
|
||||||
|
|
||||||
|
__always_inline
|
||||||
|
glyph_t ft_glyph_to_glyph(FT_GlyphSlot slot) {
|
||||||
|
glyph_t glyph;
|
||||||
|
|
||||||
|
glyph.pixmap = slot->bitmap.buffer;
|
||||||
|
|
||||||
|
glyph.width = (int) slot->bitmap.width;
|
||||||
|
glyph.height = (int) slot->bitmap.rows;
|
||||||
|
glyph.top = slot->bitmap_top;
|
||||||
|
glyph.advance_width = (int) slot->advance.x / 64;
|
||||||
|
|
||||||
|
glyph.descent = MAX(0, glyph.height - glyph.top);
|
||||||
|
glyph.ascent = MAX(0, MAX(glyph.top, glyph.height) - glyph.descent);
|
||||||
|
|
||||||
|
return glyph;
|
||||||
|
}
|
||||||
|
|
||||||
|
text_dimensions_t text_dimension(char *text, FT_Face face) {
|
||||||
|
text_dimensions_t dimensions;
|
||||||
|
|
||||||
|
dimensions.width = 0;
|
||||||
|
|
||||||
|
int num_chars = (int) strlen(text);
|
||||||
|
|
||||||
|
unsigned int max_ascent = 0;
|
||||||
|
int max_descent = 0;
|
||||||
|
|
||||||
|
char pc = 0;
|
||||||
|
for (int i = 0; i < num_chars; i++) {
|
||||||
|
char c = text[i];
|
||||||
|
|
||||||
|
FT_Load_Char(face, c, 0);
|
||||||
|
glyph_t glyph = ft_glyph_to_glyph(face->glyph);
|
||||||
|
|
||||||
|
max_descent = MAX(max_descent, glyph.descent);
|
||||||
|
max_ascent = MAX(max_ascent, MAX(glyph.height, glyph.ascent));
|
||||||
|
|
||||||
|
int kerning_x = kerning_offset(c, pc, face);
|
||||||
|
dimensions.width += MAX(glyph.advance_width, glyph.width) + kerning_x;
|
||||||
|
|
||||||
|
pc = c;
|
||||||
|
}
|
||||||
|
|
||||||
|
dimensions.height = max_ascent + max_descent;
|
||||||
|
dimensions.baseline = max_descent;
|
||||||
|
|
||||||
|
return dimensions;
|
||||||
|
}
|
||||||
|
|
||||||
|
void draw_glyph(glyph_t *glyph, int x, int y, struct text_dimensions text_info, unsigned char *bitmap) {
|
||||||
|
unsigned int src = 0;
|
||||||
|
unsigned int dst = y * text_info.width + x;
|
||||||
|
unsigned int row_offset = text_info.width - glyph->width;
|
||||||
|
unsigned int buf_len = text_info.width * text_info.height;
|
||||||
|
|
||||||
|
for (unsigned int sy = 0; sy < glyph->height; sy++) {
|
||||||
|
for (unsigned int sx = 0; sx < glyph->width; sx++) {
|
||||||
|
if (dst < buf_len) {
|
||||||
|
bitmap[dst] |= glyph->pixmap[src];
|
||||||
|
}
|
||||||
|
src++;
|
||||||
|
dst++;
|
||||||
|
}
|
||||||
|
dst += row_offset;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
void bmp_format(dyn_buffer_t *buf, text_dimensions_t dimensions, const unsigned char *bitmap) {
|
||||||
|
|
||||||
|
dyn_buffer_write_short(buf, 0x4D42); // Magic
|
||||||
|
dyn_buffer_write_int(buf, 0); // Size placeholder
|
||||||
|
dyn_buffer_write_int(buf, 0x5157); //Reserved
|
||||||
|
dyn_buffer_write_int(buf, 14 + 40 + 256 * 4); // pixels offset
|
||||||
|
|
||||||
|
dyn_buffer_write_int(buf, 40); // DIB size
|
||||||
|
dyn_buffer_write_int(buf, (int) dimensions.width);
|
||||||
|
dyn_buffer_write_int(buf, (int) dimensions.height);
|
||||||
|
dyn_buffer_write_short(buf, 1); // Color planes
|
||||||
|
dyn_buffer_write_short(buf, 8); // bits per pixel
|
||||||
|
dyn_buffer_write_int(buf, 0); // compression
|
||||||
|
dyn_buffer_write_int(buf, 0); // Ignored
|
||||||
|
dyn_buffer_write_int(buf, 3800); // hres
|
||||||
|
dyn_buffer_write_int(buf, 3800); // vres
|
||||||
|
dyn_buffer_write_int(buf, 256); // Color count
|
||||||
|
dyn_buffer_write_int(buf, 0); // Ignored
|
||||||
|
|
||||||
|
// RGBA32 Color table (Grayscale)
|
||||||
|
for (int i = 255; i >= 0; i--) {
|
||||||
|
dyn_buffer_write_int(buf, i + (i << 8) + (i << 16));
|
||||||
|
}
|
||||||
|
|
||||||
|
// Pixel array: write from bottom to top, with rows padded to multiples of 4-bytes
|
||||||
|
for (int y = (int) dimensions.height - 1; y >= 0; y--) {
|
||||||
|
for (unsigned int x = 0; x < dimensions.width; x++) {
|
||||||
|
dyn_buffer_write_char(buf, (char) bitmap[y * dimensions.width + x]);
|
||||||
|
}
|
||||||
|
while (buf->cur % 4 != 0) {
|
||||||
|
dyn_buffer_write_char(buf, 0);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Size
|
||||||
|
*(int *) ((char *) buf->buf + 2) = buf->cur;
|
||||||
|
}
|
||||||
|
|
||||||
|
void parse_font(scan_font_ctx_t *ctx, vfile_t *f, document_t *doc) {
|
||||||
|
if (ft_lib == NULL) {
|
||||||
|
FT_Init_FreeType(&ft_lib);
|
||||||
|
}
|
||||||
|
|
||||||
|
size_t buf_len = 0;
|
||||||
|
void *buf = read_all(f, &buf_len);
|
||||||
|
if (buf == NULL) {
|
||||||
|
CTX_LOG_ERROR(f->filepath, "read_all() failed")
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
FT_Face face;
|
||||||
|
FT_Error err = FT_New_Memory_Face(ft_lib, (unsigned char *) buf, (int) buf_len, 0, &face);
|
||||||
|
if (err != 0) {
|
||||||
|
CTX_LOG_ERRORF(doc->filepath, "(font.c) FT_New_Memory_Face() returned error code [%d] %s", err,
|
||||||
|
FT_Error_String(err))
|
||||||
|
free(buf);
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
char font_name[4096];
|
||||||
|
|
||||||
|
if (face->style_name == NULL || (strcmp(face->style_name, "?") == 0)) {
|
||||||
|
if (face->family_name == NULL) {
|
||||||
|
strcpy(font_name, "(null)");
|
||||||
|
} else {
|
||||||
|
strncpy(font_name, face->family_name, sizeof(font_name));
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
snprintf(font_name, sizeof(font_name), "%s %s", face->family_name, face->style_name);
|
||||||
|
}
|
||||||
|
|
||||||
|
meta_line_t *meta_name = malloc(sizeof(meta_line_t) + strlen(font_name));
|
||||||
|
meta_name->key = MetaFontName;
|
||||||
|
strcpy(meta_name->str_val, font_name);
|
||||||
|
APPEND_META(doc, meta_name)
|
||||||
|
|
||||||
|
if (ctx->enable_tn == TRUE) {
|
||||||
|
FT_Done_Face(face);
|
||||||
|
free(buf);
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
int pixel = 64;
|
||||||
|
int num_chars = (int) strlen(font_name);
|
||||||
|
|
||||||
|
err = FT_Set_Pixel_Sizes(face, 0, pixel);
|
||||||
|
if (err != 0) {
|
||||||
|
CTX_LOG_WARNINGF(doc->filepath, "(font.c) FT_Set_Pixel_Sizes() returned error code [%d] %s", err,
|
||||||
|
FT_Error_String(err))
|
||||||
|
FT_Done_Face(face);
|
||||||
|
free(buf);
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
text_dimensions_t dimensions = text_dimension(font_name, face);
|
||||||
|
unsigned char *bitmap = calloc(dimensions.width * dimensions.height, 1);
|
||||||
|
|
||||||
|
FT_Vector pen;
|
||||||
|
pen.x = 0;
|
||||||
|
|
||||||
|
char pc = 0;
|
||||||
|
for (int i = 0; i < num_chars; i++) {
|
||||||
|
char c = font_name[i];
|
||||||
|
|
||||||
|
err = FT_Load_Char(face, c, FT_LOAD_NO_HINTING | FT_LOAD_RENDER);
|
||||||
|
if (err != 0) {
|
||||||
|
c = c >= 'a' && c <= 'z' ? c - 32 : c + 32;
|
||||||
|
err = FT_Load_Char(face, c, FT_LOAD_NO_HINTING | FT_LOAD_RENDER);
|
||||||
|
if (err != 0) {
|
||||||
|
CTX_LOG_WARNINGF(doc->filepath, "(font.c) FT_Load_Char() returned error code [%d] %s", err,
|
||||||
|
FT_Error_String(err))
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
glyph_t glyph = ft_glyph_to_glyph(face->glyph);
|
||||||
|
|
||||||
|
pen.x += kerning_offset(c, pc, face);
|
||||||
|
if (pen.x <= 0) {
|
||||||
|
pen.x = ABS(glyph.advance_width - glyph.width);
|
||||||
|
}
|
||||||
|
pen.y = dimensions.height - glyph.ascent - dimensions.baseline;
|
||||||
|
|
||||||
|
draw_glyph(&glyph, pen.x, pen.y, dimensions, bitmap);
|
||||||
|
|
||||||
|
pen.x += glyph.advance_width;
|
||||||
|
pc = c;
|
||||||
|
}
|
||||||
|
|
||||||
|
dyn_buffer_t bmp_data = dyn_buffer_create();
|
||||||
|
bmp_format(&bmp_data, dimensions, bitmap);
|
||||||
|
|
||||||
|
APPEND_TN_META(doc, dimensions.width, dimensions.height)
|
||||||
|
ctx->store((char *) doc->path_md5, sizeof(doc->path_md5), (char *) bmp_data.buf, bmp_data.cur);
|
||||||
|
|
||||||
|
dyn_buffer_destroy(&bmp_data);
|
||||||
|
free(bitmap);
|
||||||
|
|
||||||
|
FT_Done_Face(face);
|
||||||
|
free(buf);
|
||||||
|
}
|
||||||
|
|
||||||
|
void cleanup_font() {
|
||||||
|
FT_Done_FreeType(ft_lib);
|
||||||
|
}
|
||||||
17
third-party/libscan/libscan/font/font.h
vendored
Normal file
17
third-party/libscan/libscan/font/font.h
vendored
Normal file
@@ -0,0 +1,17 @@
|
|||||||
|
#ifndef SCAN_FONT_H
|
||||||
|
#define SCAN_FONT_H
|
||||||
|
|
||||||
|
#include "../scan.h"
|
||||||
|
|
||||||
|
|
||||||
|
typedef struct {
|
||||||
|
int enable_tn;
|
||||||
|
log_callback_t log;
|
||||||
|
logf_callback_t logf;
|
||||||
|
store_callback_t store;
|
||||||
|
} scan_font_ctx_t;
|
||||||
|
|
||||||
|
void parse_font(scan_font_ctx_t *ctx, vfile_t *f, document_t *doc);
|
||||||
|
void cleanup_font();
|
||||||
|
|
||||||
|
#endif
|
||||||
119
third-party/libscan/libscan/json/json.c
vendored
Normal file
119
third-party/libscan/libscan/json/json.c
vendored
Normal file
@@ -0,0 +1,119 @@
|
|||||||
|
#include "json.h"
|
||||||
|
#include "cjson/cJSON.h"
|
||||||
|
|
||||||
|
|
||||||
|
#define JSON_MAX_FILE_SIZE (1024 * 1024 * 50)
|
||||||
|
|
||||||
|
int json_extract_text(cJSON *json, text_buffer_t *tex) {
|
||||||
|
if (cJSON_IsObject(json)) {
|
||||||
|
for (cJSON *child = json->child; child != NULL; child = child->next) {
|
||||||
|
if (json_extract_text(child, tex)) {
|
||||||
|
return TRUE;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
} else if (cJSON_IsArray(json)) {
|
||||||
|
cJSON *child;
|
||||||
|
cJSON_ArrayForEach(child, json) {
|
||||||
|
if (json_extract_text(child, tex)) {
|
||||||
|
return TRUE;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
} else if (cJSON_IsString(json)) {
|
||||||
|
if (text_buffer_append_string0(tex, json->valuestring) == TEXT_BUF_FULL) {
|
||||||
|
return TRUE;
|
||||||
|
}
|
||||||
|
if (text_buffer_append_char(tex, ' ') == TEXT_BUF_FULL) {
|
||||||
|
return TRUE;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return FALSE;
|
||||||
|
}
|
||||||
|
|
||||||
|
scan_code_t parse_json(scan_json_ctx_t *ctx, vfile_t *f, document_t *doc) {
|
||||||
|
|
||||||
|
if (f->info.st_size > JSON_MAX_FILE_SIZE) {
|
||||||
|
CTX_LOG_WARNINGF("json.c", "File larger than maximum allowed [%s]", f->filepath)
|
||||||
|
return SCAN_ERR_SKIP;
|
||||||
|
}
|
||||||
|
|
||||||
|
size_t buf_len;
|
||||||
|
char *buf = read_all(f, &buf_len);
|
||||||
|
|
||||||
|
if (buf == NULL) {
|
||||||
|
return SCAN_ERR_READ;
|
||||||
|
}
|
||||||
|
|
||||||
|
buf_len += 1;
|
||||||
|
buf = realloc(buf, buf_len);
|
||||||
|
*(buf + buf_len - 1) = '\0';
|
||||||
|
|
||||||
|
cJSON *json = cJSON_ParseWithOpts(buf, NULL, TRUE);
|
||||||
|
text_buffer_t tex = text_buffer_create(ctx->content_size);
|
||||||
|
|
||||||
|
json_extract_text(json, &tex);
|
||||||
|
text_buffer_terminate_string(&tex);
|
||||||
|
|
||||||
|
APPEND_STR_META(doc, MetaContent, tex.dyn_buffer.buf);
|
||||||
|
|
||||||
|
cJSON_Delete(json);
|
||||||
|
free(buf);
|
||||||
|
text_buffer_destroy(&tex);
|
||||||
|
|
||||||
|
return SCAN_OK;
|
||||||
|
}
|
||||||
|
|
||||||
|
#define JSON_BUF_SIZE (1024 * 1024 * 5)
|
||||||
|
|
||||||
|
scan_code_t parse_ndjson(scan_json_ctx_t *ctx, vfile_t *f, document_t *doc) {
|
||||||
|
|
||||||
|
char *buf = calloc(JSON_BUF_SIZE + 1, sizeof(char));
|
||||||
|
*(buf + JSON_BUF_SIZE) = '\0';
|
||||||
|
|
||||||
|
text_buffer_t tex = text_buffer_create(ctx->content_size);
|
||||||
|
|
||||||
|
size_t ret;
|
||||||
|
int eof = FALSE;
|
||||||
|
const char *parse_end = buf;
|
||||||
|
size_t to_read;
|
||||||
|
char *ptr = buf;
|
||||||
|
|
||||||
|
while (TRUE) {
|
||||||
|
cJSON *json;
|
||||||
|
|
||||||
|
if (!eof) {
|
||||||
|
to_read = parse_end == buf ? JSON_BUF_SIZE : parse_end - buf;
|
||||||
|
ret = f->read(f, ptr, to_read);
|
||||||
|
if (ret != to_read) {
|
||||||
|
eof = TRUE;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
json = cJSON_ParseWithOpts(buf, &parse_end, FALSE);
|
||||||
|
|
||||||
|
if (parse_end == buf + JSON_BUF_SIZE) {
|
||||||
|
CTX_LOG_ERRORF("json.c", "Line too large for buffer [%s]", doc->filepath);
|
||||||
|
cJSON_Delete(json);
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (parse_end == buf) {
|
||||||
|
cJSON_Delete(json);
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
|
||||||
|
json_extract_text(json, &tex);
|
||||||
|
|
||||||
|
cJSON_Delete(json);
|
||||||
|
|
||||||
|
memmove(buf, parse_end, (buf + JSON_BUF_SIZE - parse_end));
|
||||||
|
ptr = buf + JSON_BUF_SIZE - parse_end + buf;
|
||||||
|
}
|
||||||
|
|
||||||
|
text_buffer_terminate_string(&tex);
|
||||||
|
|
||||||
|
APPEND_STR_META(doc, MetaContent, tex.dyn_buffer.buf);
|
||||||
|
|
||||||
|
free(buf);
|
||||||
|
text_buffer_destroy(&tex);
|
||||||
|
}
|
||||||
30
third-party/libscan/libscan/json/json.h
vendored
Normal file
30
third-party/libscan/libscan/json/json.h
vendored
Normal file
@@ -0,0 +1,30 @@
|
|||||||
|
#ifndef SCAN_JSON_H
|
||||||
|
#define SCAN_JSON_H
|
||||||
|
|
||||||
|
#include "../scan.h"
|
||||||
|
|
||||||
|
|
||||||
|
typedef struct {
|
||||||
|
long content_size;
|
||||||
|
log_callback_t log;
|
||||||
|
logf_callback_t logf;
|
||||||
|
store_callback_t store;
|
||||||
|
unsigned int json_mime;
|
||||||
|
unsigned int ndjson_mime;
|
||||||
|
} scan_json_ctx_t;
|
||||||
|
|
||||||
|
scan_code_t parse_json(scan_json_ctx_t *ctx, vfile_t *f, document_t *doc);
|
||||||
|
|
||||||
|
scan_code_t parse_ndjson(scan_json_ctx_t *ctx, vfile_t *f, document_t *doc);
|
||||||
|
|
||||||
|
__always_inline
|
||||||
|
static int is_json(scan_json_ctx_t *ctx, unsigned int mime) {
|
||||||
|
return mime == ctx->json_mime;
|
||||||
|
}
|
||||||
|
|
||||||
|
__always_inline
|
||||||
|
static int is_ndjson(scan_json_ctx_t *ctx, unsigned int mime) {
|
||||||
|
return mime == ctx->ndjson_mime;
|
||||||
|
}
|
||||||
|
|
||||||
|
#endif
|
||||||
62
third-party/libscan/libscan/macros.h
vendored
Normal file
62
third-party/libscan/libscan/macros.h
vendored
Normal file
@@ -0,0 +1,62 @@
|
|||||||
|
#ifndef FALSE
|
||||||
|
#define FALSE (0)
|
||||||
|
#define BOOL int
|
||||||
|
#endif
|
||||||
|
|
||||||
|
#ifndef TRUE
|
||||||
|
#define TRUE (!FALSE)
|
||||||
|
#endif
|
||||||
|
|
||||||
|
#undef MAX
|
||||||
|
#define MAX(a, b) (((a) > (b)) ? (a) : (b))
|
||||||
|
|
||||||
|
#undef MIN
|
||||||
|
#define MIN(a, b) (((a) < (b)) ? (a) : (b))
|
||||||
|
|
||||||
|
#ifndef PATH_MAX
|
||||||
|
#define PATH_MAX 4096
|
||||||
|
#endif
|
||||||
|
|
||||||
|
#undef ABS
|
||||||
|
#define ABS(a) (((a) < 0) ? -(a) : (a))
|
||||||
|
|
||||||
|
#define SHA1_STR_LENGTH 41
|
||||||
|
#define SHA1_DIGEST_LENGTH 20
|
||||||
|
|
||||||
|
#define APPEND_STR_META(doc, keyname, value) \
|
||||||
|
{meta_line_t *meta_str = malloc(sizeof(meta_line_t) + strlen(value)); \
|
||||||
|
meta_str->key = keyname; \
|
||||||
|
strcpy(meta_str->str_val, value); \
|
||||||
|
APPEND_META(doc, meta_str)}
|
||||||
|
|
||||||
|
#define APPEND_LONG_META(doc, keyname, value) \
|
||||||
|
{meta_line_t *meta_long = malloc(sizeof(meta_line_t)); \
|
||||||
|
meta_long->key = keyname; \
|
||||||
|
meta_long->long_val = value; \
|
||||||
|
APPEND_META(doc, meta_long)}
|
||||||
|
|
||||||
|
#define APPEND_TN_META(doc, width, height) \
|
||||||
|
{meta_line_t *meta_str = malloc(sizeof(meta_line_t) + 4 + 1 + 4); \
|
||||||
|
meta_str->key = MetaThumbnail; \
|
||||||
|
sprintf(meta_str->str_val, "%04d,%04d", width, height); \
|
||||||
|
APPEND_META(doc, meta_str)}
|
||||||
|
|
||||||
|
#define APPEND_META(doc, meta) \
|
||||||
|
meta->next = NULL;\
|
||||||
|
if (doc->meta_head == NULL) {\
|
||||||
|
doc->meta_head = meta;\
|
||||||
|
doc->meta_tail = doc->meta_head;\
|
||||||
|
} else {\
|
||||||
|
doc->meta_tail->next = meta;\
|
||||||
|
doc->meta_tail = meta;\
|
||||||
|
}
|
||||||
|
|
||||||
|
#define APPEND_UTF8_META(doc, keyname, str) \
|
||||||
|
text_buffer_t tex = text_buffer_create(-1); \
|
||||||
|
text_buffer_append_string0(&tex, str); \
|
||||||
|
text_buffer_terminate_string(&tex); \
|
||||||
|
meta_line_t *meta_tag = malloc(sizeof(meta_line_t) + tex.dyn_buffer.cur); \
|
||||||
|
meta_tag->key = keyname; \
|
||||||
|
strcpy(meta_tag->str_val, tex.dyn_buffer.buf); \
|
||||||
|
APPEND_META(doc, meta_tag) \
|
||||||
|
text_buffer_destroy(&tex);
|
||||||
749
third-party/libscan/libscan/media/media.c
vendored
Normal file
749
third-party/libscan/libscan/media/media.c
vendored
Normal file
@@ -0,0 +1,749 @@
|
|||||||
|
#include "media.h"
|
||||||
|
#include <ctype.h>
|
||||||
|
|
||||||
|
#define MIN_SIZE 32
|
||||||
|
#define AVIO_BUF_SIZE 8192
|
||||||
|
#define IS_VIDEO(fmt) (fmt->iformat->name && strcmp(fmt->iformat->name, "image2") != 0)
|
||||||
|
|
||||||
|
#define STORE_AS_IS ((void*)-1)
|
||||||
|
|
||||||
|
const char *get_filepath_with_ext(document_t *doc, const char *filepath, const char *mime_str) {
|
||||||
|
|
||||||
|
int has_extension = doc->ext > doc->base;
|
||||||
|
|
||||||
|
if (!has_extension) {
|
||||||
|
if (strcmp(mime_str, "image/png") == 0) {
|
||||||
|
return "file.png";
|
||||||
|
} else if (strcmp(mime_str, "image/jpeg") == 0) {
|
||||||
|
return "file.jpg";
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return filepath;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
__always_inline
|
||||||
|
void *scale_frame(const AVCodecContext *decoder, const AVFrame *frame, int size) {
|
||||||
|
|
||||||
|
if (frame->pict_type == AV_PICTURE_TYPE_NONE) {
|
||||||
|
return NULL;
|
||||||
|
}
|
||||||
|
|
||||||
|
int dstW;
|
||||||
|
int dstH;
|
||||||
|
if (frame->width <= size && frame->height <= size) {
|
||||||
|
if (decoder->codec_id == AV_CODEC_ID_MJPEG || decoder->codec_id == AV_CODEC_ID_PNG) {
|
||||||
|
return STORE_AS_IS;
|
||||||
|
}
|
||||||
|
|
||||||
|
dstW = frame->width;
|
||||||
|
dstH = frame->height;
|
||||||
|
} else {
|
||||||
|
double ratio = (double) frame->width / frame->height;
|
||||||
|
if (frame->width > frame->height) {
|
||||||
|
dstW = size;
|
||||||
|
dstH = (int) (size / ratio);
|
||||||
|
} else {
|
||||||
|
dstW = (int) (size * ratio);
|
||||||
|
dstH = size;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if (dstW <= MIN_SIZE || dstH <= MIN_SIZE) {
|
||||||
|
return NULL;
|
||||||
|
}
|
||||||
|
|
||||||
|
AVFrame *scaled_frame = av_frame_alloc();
|
||||||
|
|
||||||
|
struct SwsContext *sws_ctx = sws_getContext(
|
||||||
|
decoder->width, decoder->height, decoder->pix_fmt,
|
||||||
|
dstW, dstH, AV_PIX_FMT_YUVJ420P,
|
||||||
|
SIST_SWS_ALGO, 0, 0, 0
|
||||||
|
);
|
||||||
|
|
||||||
|
int dst_buf_len = av_image_get_buffer_size(AV_PIX_FMT_YUV420P, dstW, dstH, 1);
|
||||||
|
uint8_t *dst_buf = (uint8_t *) av_malloc(dst_buf_len * 2);
|
||||||
|
|
||||||
|
av_image_fill_arrays(scaled_frame->data, scaled_frame->linesize, dst_buf, AV_PIX_FMT_YUV420P, dstW, dstH, 1);
|
||||||
|
|
||||||
|
sws_scale(sws_ctx,
|
||||||
|
(const uint8_t *const *) frame->data, frame->linesize,
|
||||||
|
0, decoder->height,
|
||||||
|
scaled_frame->data, scaled_frame->linesize
|
||||||
|
);
|
||||||
|
|
||||||
|
scaled_frame->width = dstW;
|
||||||
|
scaled_frame->height = dstH;
|
||||||
|
scaled_frame->format = AV_PIX_FMT_YUV420P;
|
||||||
|
|
||||||
|
sws_freeContext(sws_ctx);
|
||||||
|
|
||||||
|
return scaled_frame;
|
||||||
|
}
|
||||||
|
|
||||||
|
typedef struct {
|
||||||
|
AVPacket *packet;
|
||||||
|
AVFrame *frame;
|
||||||
|
} frame_and_packet_t;
|
||||||
|
|
||||||
|
static void frame_and_packet_free(frame_and_packet_t *frame_and_packet) {
|
||||||
|
if (frame_and_packet->packet != NULL) {
|
||||||
|
av_packet_free(&frame_and_packet->packet);
|
||||||
|
}
|
||||||
|
|
||||||
|
if (frame_and_packet->frame != NULL) {
|
||||||
|
av_frame_free(&frame_and_packet->frame);
|
||||||
|
}
|
||||||
|
|
||||||
|
free(frame_and_packet->packet);
|
||||||
|
free(frame_and_packet);
|
||||||
|
}
|
||||||
|
|
||||||
|
__always_inline
|
||||||
|
static void read_subtitles(scan_media_ctx_t *ctx, AVFormatContext *pFormatCtx, int stream_idx, document_t *doc) {
|
||||||
|
|
||||||
|
text_buffer_t tex = text_buffer_create(-1);
|
||||||
|
|
||||||
|
AVPacket packet;
|
||||||
|
AVSubtitle subtitle;
|
||||||
|
|
||||||
|
AVCodec *subtitle_codec = avcodec_find_decoder(pFormatCtx->streams[stream_idx]->codecpar->codec_id);
|
||||||
|
AVCodecContext *decoder = avcodec_alloc_context3(subtitle_codec);
|
||||||
|
avcodec_parameters_to_context(decoder, pFormatCtx->streams[stream_idx]->codecpar);
|
||||||
|
avcodec_open2(decoder, subtitle_codec, NULL);
|
||||||
|
|
||||||
|
decoder->sub_text_format = FF_SUB_TEXT_FMT_ASS;
|
||||||
|
|
||||||
|
int got_sub;
|
||||||
|
|
||||||
|
while (1) {
|
||||||
|
int read_frame_ret = av_read_frame(pFormatCtx, &packet);
|
||||||
|
|
||||||
|
if (read_frame_ret != 0) {
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (packet.stream_index != stream_idx) {
|
||||||
|
av_packet_unref(&packet);
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
|
||||||
|
avcodec_decode_subtitle2(decoder, &subtitle, &got_sub, &packet);
|
||||||
|
|
||||||
|
if (got_sub) {
|
||||||
|
for (int i = 0; i < subtitle.num_rects; i++) {
|
||||||
|
const char *text = subtitle.rects[i]->ass;
|
||||||
|
|
||||||
|
if (text == NULL) {
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
|
||||||
|
char *idx = strstr(text, "\\N");
|
||||||
|
if (idx != NULL && strlen(idx + 2) > 1) {
|
||||||
|
text_buffer_append_string0(&tex, idx + 2);
|
||||||
|
text_buffer_append_char(&tex, ' ');
|
||||||
|
}
|
||||||
|
}
|
||||||
|
avsubtitle_free(&subtitle);
|
||||||
|
}
|
||||||
|
|
||||||
|
av_packet_unref(&packet);
|
||||||
|
}
|
||||||
|
|
||||||
|
text_buffer_terminate_string(&tex);
|
||||||
|
|
||||||
|
APPEND_STR_META(doc, MetaContent, tex.dyn_buffer.buf)
|
||||||
|
text_buffer_destroy(&tex);
|
||||||
|
avcodec_free_context(&decoder);
|
||||||
|
}
|
||||||
|
|
||||||
|
__always_inline
|
||||||
|
static frame_and_packet_t *
|
||||||
|
read_frame(scan_media_ctx_t *ctx, AVFormatContext *pFormatCtx, AVCodecContext *decoder, int stream_idx,
|
||||||
|
document_t *doc) {
|
||||||
|
|
||||||
|
frame_and_packet_t *result = calloc(1, sizeof(frame_and_packet_t));
|
||||||
|
result->packet = av_packet_alloc();
|
||||||
|
result->frame = av_frame_alloc();
|
||||||
|
|
||||||
|
av_init_packet(result->packet);
|
||||||
|
|
||||||
|
int receive_ret = -EAGAIN;
|
||||||
|
while (receive_ret == -EAGAIN) {
|
||||||
|
// Get video frame
|
||||||
|
while (1) {
|
||||||
|
int read_frame_ret = av_read_frame(pFormatCtx, result->packet);
|
||||||
|
|
||||||
|
if (read_frame_ret != 0) {
|
||||||
|
if (read_frame_ret != AVERROR_EOF) {
|
||||||
|
CTX_LOG_WARNINGF(doc->filepath,
|
||||||
|
"(media.c) avcodec_read_frame() returned error code [%d] %s",
|
||||||
|
read_frame_ret, av_err2str(read_frame_ret)
|
||||||
|
)
|
||||||
|
}
|
||||||
|
frame_and_packet_free(result);
|
||||||
|
return NULL;
|
||||||
|
}
|
||||||
|
|
||||||
|
//Ignore audio/other frames
|
||||||
|
if (result->packet->stream_index != stream_idx) {
|
||||||
|
av_packet_unref(result->packet);
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Feed it to decoder
|
||||||
|
int decode_ret = avcodec_send_packet(decoder, result->packet);
|
||||||
|
if (decode_ret != 0) {
|
||||||
|
CTX_LOG_ERRORF(doc->filepath,
|
||||||
|
"(media.c) avcodec_send_packet() returned error code [%d] %s",
|
||||||
|
decode_ret, av_err2str(decode_ret)
|
||||||
|
)
|
||||||
|
frame_and_packet_free(result);
|
||||||
|
return NULL;
|
||||||
|
}
|
||||||
|
|
||||||
|
receive_ret = avcodec_receive_frame(decoder, result->frame);
|
||||||
|
if (receive_ret == -EAGAIN && result->packet != NULL) {
|
||||||
|
av_packet_unref(result->packet);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return result;
|
||||||
|
}
|
||||||
|
|
||||||
|
void append_tag_meta_if_not_exists(scan_media_ctx_t *ctx, document_t *doc, AVDictionaryEntry *tag, enum metakey key) {
|
||||||
|
|
||||||
|
meta_line_t *meta = doc->meta_head;
|
||||||
|
while (meta != NULL) {
|
||||||
|
if (meta->key == key) {
|
||||||
|
CTX_LOG_DEBUGF(doc->filepath, "Ignoring duplicate tag: '%02x=%s' and '%02x=%s'",
|
||||||
|
key, meta->str_val, key, tag->value)
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
meta = meta->next;
|
||||||
|
}
|
||||||
|
|
||||||
|
text_buffer_t tex = text_buffer_create(-1);
|
||||||
|
text_buffer_append_string0(&tex, tag->value);
|
||||||
|
text_buffer_terminate_string(&tex);
|
||||||
|
meta_line_t *meta_tag = malloc(sizeof(meta_line_t) + tex.dyn_buffer.cur);
|
||||||
|
meta_tag->key = key;
|
||||||
|
strcpy(meta_tag->str_val, tex.dyn_buffer.buf);
|
||||||
|
|
||||||
|
APPEND_META(doc, meta_tag)
|
||||||
|
text_buffer_destroy(&tex);
|
||||||
|
}
|
||||||
|
|
||||||
|
#define APPEND_TAG_META(keyname) \
|
||||||
|
APPEND_UTF8_META(doc, keyname, tag->value)
|
||||||
|
|
||||||
|
#define STRCPY_TOLOWER(dst, str) \
|
||||||
|
strncpy(dst, str, sizeof(dst)); \
|
||||||
|
char *ptr = dst; \
|
||||||
|
for (; *ptr; ++ptr) *ptr = (char) tolower(*ptr);
|
||||||
|
|
||||||
|
__always_inline
|
||||||
|
static void append_audio_meta(AVFormatContext *pFormatCtx, document_t *doc) {
|
||||||
|
|
||||||
|
AVDictionaryEntry *tag = NULL;
|
||||||
|
while ((tag = av_dict_get(pFormatCtx->metadata, "", tag, AV_DICT_IGNORE_SUFFIX))) {
|
||||||
|
char key[256];
|
||||||
|
STRCPY_TOLOWER(key, tag->key)
|
||||||
|
|
||||||
|
if (strcmp(key, "artist") == 0) {
|
||||||
|
APPEND_TAG_META(MetaArtist)
|
||||||
|
} else if (strcmp(key, "genre") == 0) {
|
||||||
|
APPEND_TAG_META(MetaGenre)
|
||||||
|
} else if (strcmp(key, "title") == 0) {
|
||||||
|
APPEND_TAG_META(MetaTitle)
|
||||||
|
} else if (strcmp(key, "album_artist") == 0) {
|
||||||
|
APPEND_TAG_META(MetaAlbumArtist)
|
||||||
|
} else if (strcmp(key, "album") == 0) {
|
||||||
|
APPEND_TAG_META(MetaAlbum)
|
||||||
|
} else if (strcmp(key, "comment") == 0) {
|
||||||
|
APPEND_TAG_META(MetaContent)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
__always_inline
|
||||||
|
static void
|
||||||
|
append_video_meta(scan_media_ctx_t *ctx, AVFormatContext *pFormatCtx, AVFrame *frame, document_t *doc, int is_video) {
|
||||||
|
|
||||||
|
if (is_video) {
|
||||||
|
meta_line_t *meta_duration = malloc(sizeof(meta_line_t));
|
||||||
|
meta_duration->key = MetaMediaDuration;
|
||||||
|
meta_duration->long_val = pFormatCtx->duration / AV_TIME_BASE;
|
||||||
|
if (meta_duration->long_val > INT32_MAX) {
|
||||||
|
meta_duration->long_val = 0;
|
||||||
|
}
|
||||||
|
APPEND_META(doc, meta_duration)
|
||||||
|
|
||||||
|
meta_line_t *meta_bitrate = malloc(sizeof(meta_line_t));
|
||||||
|
meta_bitrate->key = MetaMediaBitrate;
|
||||||
|
meta_bitrate->long_val = pFormatCtx->bit_rate;
|
||||||
|
APPEND_META(doc, meta_bitrate)
|
||||||
|
}
|
||||||
|
|
||||||
|
AVDictionaryEntry *tag = NULL;
|
||||||
|
if (is_video) {
|
||||||
|
while ((tag = av_dict_get(pFormatCtx->metadata, "", tag, AV_DICT_IGNORE_SUFFIX))) {
|
||||||
|
char key[256];
|
||||||
|
STRCPY_TOLOWER(key, tag->key)
|
||||||
|
|
||||||
|
if (strcmp(key, "title") == 0) {
|
||||||
|
append_tag_meta_if_not_exists(ctx, doc, tag, MetaTitle);
|
||||||
|
} else if (strcmp(key, "comment") == 0) {
|
||||||
|
append_tag_meta_if_not_exists(ctx, doc, tag, MetaContent);
|
||||||
|
} else if (strcmp(key, "artist") == 0) {
|
||||||
|
append_tag_meta_if_not_exists(ctx, doc, tag, MetaArtist);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
// EXIF metadata
|
||||||
|
while ((tag = av_dict_get(frame->metadata, "", tag, AV_DICT_IGNORE_SUFFIX))) {
|
||||||
|
char key[256];
|
||||||
|
STRCPY_TOLOWER(key, tag->key)
|
||||||
|
|
||||||
|
if (strcmp(key, "artist") == 0) {
|
||||||
|
append_tag_meta_if_not_exists(ctx, doc, tag, MetaArtist);
|
||||||
|
} else if (strcmp(key, "imagedescription") == 0) {
|
||||||
|
APPEND_TAG_META(MetaContent)
|
||||||
|
} else if (strcmp(key, "make") == 0) {
|
||||||
|
APPEND_TAG_META(MetaExifMake)
|
||||||
|
} else if (strcmp(key, "model") == 0) {
|
||||||
|
APPEND_TAG_META(MetaExifModel)
|
||||||
|
} else if (strcmp(key, "software") == 0) {
|
||||||
|
APPEND_TAG_META(MetaExifSoftware)
|
||||||
|
} else if (strcmp(key, "fnumber") == 0) {
|
||||||
|
APPEND_TAG_META(MetaExifFNumber)
|
||||||
|
} else if (strcmp(key, "focallength") == 0) {
|
||||||
|
APPEND_TAG_META(MetaExifFocalLength)
|
||||||
|
} else if (strcmp(key, "usercomment") == 0) {
|
||||||
|
APPEND_TAG_META(MetaExifUserComment)
|
||||||
|
} else if (strcmp(key, "isospeedratings") == 0) {
|
||||||
|
APPEND_TAG_META(MetaExifIsoSpeedRatings)
|
||||||
|
} else if (strcmp(key, "exposuretime") == 0) {
|
||||||
|
APPEND_TAG_META(MetaExifExposureTime)
|
||||||
|
} else if (strcmp(key, "datetime") == 0) {
|
||||||
|
APPEND_TAG_META(MetaExifDateTime)
|
||||||
|
} else if (strcmp(key, "gpslatitude") == 0) {
|
||||||
|
APPEND_TAG_META(MetaExifGpsLatitudeDMS)
|
||||||
|
} else if (strcmp(key, "gpslatituderef") == 0) {
|
||||||
|
APPEND_TAG_META(MetaExifGpsLatitudeRef)
|
||||||
|
} else if (strcmp(key, "gpslongitude") == 0) {
|
||||||
|
APPEND_TAG_META(MetaExifGpsLongitudeDMS)
|
||||||
|
} else if (strcmp(key, "gpslongituderef") == 0) {
|
||||||
|
APPEND_TAG_META(MetaExifGpsLongitudeRef)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
void parse_media_format_ctx(scan_media_ctx_t *ctx, AVFormatContext *pFormatCtx, document_t *doc) {
|
||||||
|
|
||||||
|
int video_stream = -1;
|
||||||
|
int audio_stream = -1;
|
||||||
|
int subtitle_stream = -1;
|
||||||
|
|
||||||
|
avformat_find_stream_info(pFormatCtx, NULL);
|
||||||
|
|
||||||
|
for (int i = (int) pFormatCtx->nb_streams - 1; i >= 0; i--) {
|
||||||
|
AVStream *stream = pFormatCtx->streams[i];
|
||||||
|
|
||||||
|
if (stream->codecpar->codec_type == AVMEDIA_TYPE_AUDIO) {
|
||||||
|
if (audio_stream == -1) {
|
||||||
|
const AVCodecDescriptor *desc = avcodec_descriptor_get(stream->codecpar->codec_id);
|
||||||
|
|
||||||
|
if (desc != NULL) {
|
||||||
|
APPEND_STR_META(doc, MetaMediaAudioCodec, desc->name)
|
||||||
|
}
|
||||||
|
|
||||||
|
audio_stream = i;
|
||||||
|
}
|
||||||
|
} else if (stream->codecpar->codec_type == AVMEDIA_TYPE_VIDEO) {
|
||||||
|
|
||||||
|
if (video_stream == -1) {
|
||||||
|
const AVCodecDescriptor *desc = avcodec_descriptor_get(stream->codecpar->codec_id);
|
||||||
|
|
||||||
|
if (desc != NULL) {
|
||||||
|
APPEND_STR_META(doc, MetaMediaVideoCodec, desc->name)
|
||||||
|
}
|
||||||
|
|
||||||
|
meta_line_t *meta_w = malloc(sizeof(meta_line_t));
|
||||||
|
meta_w->key = MetaWidth;
|
||||||
|
meta_w->long_val = stream->codecpar->width;
|
||||||
|
APPEND_META(doc, meta_w)
|
||||||
|
|
||||||
|
meta_line_t *meta_h = malloc(sizeof(meta_line_t));
|
||||||
|
meta_h->key = MetaHeight;
|
||||||
|
meta_h->long_val = stream->codecpar->height;
|
||||||
|
APPEND_META(doc, meta_h)
|
||||||
|
|
||||||
|
video_stream = i;
|
||||||
|
}
|
||||||
|
} else if (stream->codecpar->codec_type == AVMEDIA_TYPE_SUBTITLE) {
|
||||||
|
subtitle_stream = i;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if (subtitle_stream != -1 && ctx->read_subtitles) {
|
||||||
|
read_subtitles(ctx, pFormatCtx, subtitle_stream, doc);
|
||||||
|
|
||||||
|
// Reset stream
|
||||||
|
if (video_stream != -1) {
|
||||||
|
av_seek_frame(pFormatCtx, video_stream, 0, 0);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if (audio_stream != -1) {
|
||||||
|
append_audio_meta(pFormatCtx, doc);
|
||||||
|
}
|
||||||
|
|
||||||
|
if (video_stream != -1 && ctx->tn_size > 0) {
|
||||||
|
AVStream *stream = pFormatCtx->streams[video_stream];
|
||||||
|
|
||||||
|
if (stream->codecpar->width <= MIN_SIZE || stream->codecpar->height <= MIN_SIZE) {
|
||||||
|
avformat_close_input(&pFormatCtx);
|
||||||
|
avformat_free_context(pFormatCtx);
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Decoder
|
||||||
|
AVCodec *video_codec = avcodec_find_decoder(stream->codecpar->codec_id);
|
||||||
|
AVCodecContext *decoder = avcodec_alloc_context3(video_codec);
|
||||||
|
avcodec_parameters_to_context(decoder, stream->codecpar);
|
||||||
|
avcodec_open2(decoder, video_codec, NULL);
|
||||||
|
|
||||||
|
//Seek
|
||||||
|
if (stream->nb_frames > 1 && stream->codecpar->codec_id != AV_CODEC_ID_GIF) {
|
||||||
|
int seek_ret;
|
||||||
|
for (int i = 20; i >= 0; i--) {
|
||||||
|
seek_ret = av_seek_frame(pFormatCtx, video_stream,
|
||||||
|
stream->duration * 0.10, 0);
|
||||||
|
if (seek_ret == 0) {
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
frame_and_packet_t *frame_and_packet = read_frame(ctx, pFormatCtx, decoder, video_stream, doc);
|
||||||
|
if (frame_and_packet == NULL) {
|
||||||
|
avcodec_free_context(&decoder);
|
||||||
|
avformat_close_input(&pFormatCtx);
|
||||||
|
avformat_free_context(pFormatCtx);
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
append_video_meta(ctx, pFormatCtx, frame_and_packet->frame, doc, IS_VIDEO(pFormatCtx));
|
||||||
|
|
||||||
|
// Scale frame
|
||||||
|
AVFrame *scaled_frame = scale_frame(decoder, frame_and_packet->frame, ctx->tn_size);
|
||||||
|
|
||||||
|
if (scaled_frame == NULL) {
|
||||||
|
frame_and_packet_free(frame_and_packet);
|
||||||
|
avcodec_free_context(&decoder);
|
||||||
|
avformat_close_input(&pFormatCtx);
|
||||||
|
avformat_free_context(pFormatCtx);
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (scaled_frame == STORE_AS_IS) {
|
||||||
|
APPEND_TN_META(doc, frame_and_packet->frame->width, frame_and_packet->frame->height)
|
||||||
|
ctx->store((char *) doc->path_md5, sizeof(doc->path_md5), (char *) frame_and_packet->packet->data,
|
||||||
|
frame_and_packet->packet->size);
|
||||||
|
} else {
|
||||||
|
// Encode frame to jpeg
|
||||||
|
AVCodecContext *jpeg_encoder = alloc_jpeg_encoder(scaled_frame->width, scaled_frame->height,
|
||||||
|
ctx->tn_qscale);
|
||||||
|
avcodec_send_frame(jpeg_encoder, scaled_frame);
|
||||||
|
|
||||||
|
AVPacket jpeg_packet;
|
||||||
|
av_init_packet(&jpeg_packet);
|
||||||
|
avcodec_receive_packet(jpeg_encoder, &jpeg_packet);
|
||||||
|
|
||||||
|
// Save thumbnail
|
||||||
|
APPEND_TN_META(doc, scaled_frame->width, scaled_frame->height)
|
||||||
|
ctx->store((char *) doc->path_md5, sizeof(doc->path_md5), (char *) jpeg_packet.data, jpeg_packet.size);
|
||||||
|
|
||||||
|
avcodec_free_context(&jpeg_encoder);
|
||||||
|
av_packet_unref(&jpeg_packet);
|
||||||
|
av_free(*scaled_frame->data);
|
||||||
|
av_frame_free(&scaled_frame);
|
||||||
|
}
|
||||||
|
|
||||||
|
frame_and_packet_free(frame_and_packet);
|
||||||
|
avcodec_free_context(&decoder);
|
||||||
|
}
|
||||||
|
|
||||||
|
avformat_close_input(&pFormatCtx);
|
||||||
|
avformat_free_context(pFormatCtx);
|
||||||
|
}
|
||||||
|
|
||||||
|
void parse_media_filename(scan_media_ctx_t *ctx, const char *filepath, document_t *doc) {
|
||||||
|
|
||||||
|
AVFormatContext *pFormatCtx = avformat_alloc_context();
|
||||||
|
if (pFormatCtx == NULL) {
|
||||||
|
CTX_LOG_ERROR(doc->filepath, "(media.c) Could not allocate context with avformat_alloc_context()")
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
int res = avformat_open_input(&pFormatCtx, filepath, NULL, NULL);
|
||||||
|
if (res < 0) {
|
||||||
|
CTX_LOG_ERRORF(doc->filepath, "(media.c) avformat_open_input() returned [%d] %s", res, av_err2str(res))
|
||||||
|
avformat_close_input(&pFormatCtx);
|
||||||
|
avformat_free_context(pFormatCtx);
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
parse_media_format_ctx(ctx, pFormatCtx, doc);
|
||||||
|
}
|
||||||
|
|
||||||
|
int vfile_read(void *ptr, uint8_t *buf, int buf_size) {
|
||||||
|
struct vfile *f = ptr;
|
||||||
|
|
||||||
|
int ret = f->read(f, buf, buf_size);
|
||||||
|
|
||||||
|
if (ret == 0) {
|
||||||
|
return AVERROR_EOF;
|
||||||
|
}
|
||||||
|
return ret;
|
||||||
|
}
|
||||||
|
|
||||||
|
typedef struct {
|
||||||
|
size_t size;
|
||||||
|
FILE *file;
|
||||||
|
void *buf;
|
||||||
|
} memfile_t;
|
||||||
|
|
||||||
|
int memfile_read(void *ptr, uint8_t *buf, int buf_size) {
|
||||||
|
memfile_t *mem = ptr;
|
||||||
|
|
||||||
|
size_t ret = fread(buf, 1, buf_size, mem->file);
|
||||||
|
|
||||||
|
if (ret == 0 && feof(mem->file)) {
|
||||||
|
return AVERROR_EOF;
|
||||||
|
}
|
||||||
|
|
||||||
|
return (int) ret;
|
||||||
|
}
|
||||||
|
|
||||||
|
long memfile_seek(void *ptr, long offset, int whence) {
|
||||||
|
memfile_t *mem = ptr;
|
||||||
|
|
||||||
|
if (whence == 0x10000) {
|
||||||
|
return mem->size;
|
||||||
|
}
|
||||||
|
|
||||||
|
int ret = fseek(mem->file, offset, whence);
|
||||||
|
if (ret != 0) {
|
||||||
|
return AVERROR_EOF;
|
||||||
|
}
|
||||||
|
|
||||||
|
return ftell(mem->file);
|
||||||
|
}
|
||||||
|
|
||||||
|
int memfile_open(vfile_t *f, memfile_t *mem) {
|
||||||
|
mem->size = f->info.st_size;
|
||||||
|
|
||||||
|
mem->buf = malloc(mem->size);
|
||||||
|
if (mem->buf == NULL) {
|
||||||
|
return -1;
|
||||||
|
}
|
||||||
|
|
||||||
|
int ret = f->read(f, mem->buf, mem->size);
|
||||||
|
mem->file = fmemopen(mem->buf, mem->size, "rb");
|
||||||
|
|
||||||
|
if (f->calculate_checksum) {
|
||||||
|
SHA1_Init(&f->sha1_ctx);
|
||||||
|
safe_sha1_update(&f->sha1_ctx, mem->buf, mem->size);
|
||||||
|
SHA1_Final(f->sha1_digest, &f->sha1_ctx);
|
||||||
|
f->has_checksum = TRUE;
|
||||||
|
}
|
||||||
|
|
||||||
|
return (ret == mem->size && mem->file != NULL) ? 0 : -1;
|
||||||
|
}
|
||||||
|
|
||||||
|
int memfile_open_buf(void *buf, size_t buf_len, memfile_t *mem) {
|
||||||
|
mem->size = (int) buf_len;
|
||||||
|
|
||||||
|
mem->buf = buf;
|
||||||
|
mem->file = fmemopen(mem->buf, mem->size, "rb");
|
||||||
|
|
||||||
|
return mem->file != NULL ? 0 : -1;
|
||||||
|
}
|
||||||
|
|
||||||
|
void memfile_close(memfile_t *mem) {
|
||||||
|
if (mem->buf != NULL) {
|
||||||
|
free(mem->buf);
|
||||||
|
fclose(mem->file);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
void parse_media_vfile(scan_media_ctx_t *ctx, struct vfile *f, document_t *doc, const char *mime_str) {
|
||||||
|
|
||||||
|
AVFormatContext *pFormatCtx = avformat_alloc_context();
|
||||||
|
if (pFormatCtx == NULL) {
|
||||||
|
CTX_LOG_ERROR(doc->filepath, "(media.c) Could not allocate context with avformat_alloc_context()")
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
unsigned char *buffer = (unsigned char *) av_malloc(AVIO_BUF_SIZE);
|
||||||
|
AVIOContext *io_ctx = NULL;
|
||||||
|
memfile_t memfile = {0, 0, 0};
|
||||||
|
|
||||||
|
const char *filepath = get_filepath_with_ext(doc, f->filepath, mime_str);
|
||||||
|
|
||||||
|
if (f->info.st_size <= ctx->max_media_buffer) {
|
||||||
|
int ret = memfile_open(f, &memfile);
|
||||||
|
if (ret == 0) {
|
||||||
|
CTX_LOG_DEBUGF(f->filepath, "Loading media file in memory (%ldB)", f->info.st_size)
|
||||||
|
io_ctx = avio_alloc_context(buffer, AVIO_BUF_SIZE, 0, &memfile, memfile_read, NULL, memfile_seek);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if (io_ctx == NULL) {
|
||||||
|
CTX_LOG_DEBUGF(f->filepath, "Reading media file without seek support", f->info.st_size)
|
||||||
|
io_ctx = avio_alloc_context(buffer, AVIO_BUF_SIZE, 0, f, vfile_read, NULL, NULL);
|
||||||
|
}
|
||||||
|
|
||||||
|
pFormatCtx->pb = io_ctx;
|
||||||
|
|
||||||
|
int res = avformat_open_input(&pFormatCtx, filepath, NULL, NULL);
|
||||||
|
if (res < 0) {
|
||||||
|
if (res != -5) {
|
||||||
|
CTX_LOG_ERRORF(doc->filepath, "(media.c) avformat_open_input() returned [%d] %s", res, av_err2str(res))
|
||||||
|
}
|
||||||
|
av_free(io_ctx->buffer);
|
||||||
|
memfile_close(&memfile);
|
||||||
|
avio_context_free(&io_ctx);
|
||||||
|
avformat_close_input(&pFormatCtx);
|
||||||
|
avformat_free_context(pFormatCtx);
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
parse_media_format_ctx(ctx, pFormatCtx, doc);
|
||||||
|
av_free(io_ctx->buffer);
|
||||||
|
avio_context_free(&io_ctx);
|
||||||
|
memfile_close(&memfile);
|
||||||
|
}
|
||||||
|
|
||||||
|
void parse_media(scan_media_ctx_t *ctx, vfile_t *f, document_t *doc, const char *mime_str) {
|
||||||
|
|
||||||
|
if (f->is_fs_file) {
|
||||||
|
parse_media_filename(ctx, f->filepath, doc);
|
||||||
|
} else {
|
||||||
|
parse_media_vfile(ctx, f, doc, mime_str);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
void init_media() {
|
||||||
|
av_log_set_level(AV_LOG_QUIET);
|
||||||
|
}
|
||||||
|
|
||||||
|
int store_image_thumbnail(scan_media_ctx_t *ctx, void *buf, size_t buf_len, document_t *doc, const char *url) {
|
||||||
|
memfile_t memfile = {0, 0, 0};
|
||||||
|
AVIOContext *io_ctx = NULL;
|
||||||
|
|
||||||
|
AVFormatContext *pFormatCtx = avformat_alloc_context();
|
||||||
|
if (pFormatCtx == NULL) {
|
||||||
|
CTX_LOG_ERROR(doc->filepath, "(media.c) Could not allocate context with avformat_alloc_context()")
|
||||||
|
return FALSE;
|
||||||
|
}
|
||||||
|
|
||||||
|
unsigned char *buffer = (unsigned char *) av_malloc(AVIO_BUF_SIZE);
|
||||||
|
|
||||||
|
int ret = memfile_open_buf(buf, buf_len, &memfile);
|
||||||
|
if (ret == 0) {
|
||||||
|
CTX_LOG_DEBUGF(doc->filepath, "Loading media file in memory (%ldB)", buf_len)
|
||||||
|
io_ctx = avio_alloc_context(buffer, AVIO_BUF_SIZE, 0, &memfile, memfile_read, NULL, memfile_seek);
|
||||||
|
} else {
|
||||||
|
avformat_close_input(&pFormatCtx);
|
||||||
|
avformat_free_context(pFormatCtx);
|
||||||
|
fclose(memfile.file);
|
||||||
|
return FALSE;
|
||||||
|
}
|
||||||
|
|
||||||
|
pFormatCtx->pb = io_ctx;
|
||||||
|
|
||||||
|
int res = avformat_open_input(&pFormatCtx, url, NULL, NULL);
|
||||||
|
if (res != 0) {
|
||||||
|
av_free(io_ctx->buffer);
|
||||||
|
avformat_close_input(&pFormatCtx);
|
||||||
|
avformat_free_context(pFormatCtx);
|
||||||
|
avio_context_free(&io_ctx);
|
||||||
|
fclose(memfile.file);
|
||||||
|
return FALSE;
|
||||||
|
}
|
||||||
|
|
||||||
|
AVStream *stream = pFormatCtx->streams[0];
|
||||||
|
|
||||||
|
// Decoder
|
||||||
|
const AVCodec *video_codec = avcodec_find_decoder(stream->codecpar->codec_id);
|
||||||
|
AVCodecContext *decoder = avcodec_alloc_context3(video_codec);
|
||||||
|
avcodec_parameters_to_context(decoder, stream->codecpar);
|
||||||
|
avcodec_open2(decoder, video_codec, NULL);
|
||||||
|
|
||||||
|
frame_and_packet_t *frame_and_packet = read_frame(ctx, pFormatCtx, decoder, 0, doc);
|
||||||
|
if (frame_and_packet == NULL) {
|
||||||
|
avcodec_free_context(&decoder);
|
||||||
|
avformat_close_input(&pFormatCtx);
|
||||||
|
avformat_free_context(pFormatCtx);
|
||||||
|
av_free(io_ctx->buffer);
|
||||||
|
avio_context_free(&io_ctx);
|
||||||
|
fclose(memfile.file);
|
||||||
|
return FALSE;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Scale frame
|
||||||
|
AVFrame *scaled_frame = scale_frame(decoder, frame_and_packet->frame, ctx->tn_size);
|
||||||
|
|
||||||
|
if (scaled_frame == NULL) {
|
||||||
|
frame_and_packet_free(frame_and_packet);
|
||||||
|
avcodec_free_context(&decoder);
|
||||||
|
avformat_close_input(&pFormatCtx);
|
||||||
|
avformat_free_context(pFormatCtx);
|
||||||
|
av_free(io_ctx->buffer);
|
||||||
|
avio_context_free(&io_ctx);
|
||||||
|
fclose(memfile.file);
|
||||||
|
return FALSE;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (scaled_frame == STORE_AS_IS) {
|
||||||
|
APPEND_TN_META(doc, frame_and_packet->frame->width, frame_and_packet->frame->height)
|
||||||
|
ctx->store((char *) doc->path_md5, sizeof(doc->path_md5), (char *) frame_and_packet->packet->data,
|
||||||
|
frame_and_packet->packet->size);
|
||||||
|
} else {
|
||||||
|
// Encode frame to jpeg
|
||||||
|
AVCodecContext *jpeg_encoder = alloc_jpeg_encoder(scaled_frame->width, scaled_frame->height,
|
||||||
|
ctx->tn_qscale);
|
||||||
|
avcodec_send_frame(jpeg_encoder, scaled_frame);
|
||||||
|
|
||||||
|
AVPacket jpeg_packet;
|
||||||
|
av_init_packet(&jpeg_packet);
|
||||||
|
avcodec_receive_packet(jpeg_encoder, &jpeg_packet);
|
||||||
|
|
||||||
|
// Save thumbnail
|
||||||
|
APPEND_TN_META(doc, scaled_frame->width, scaled_frame->height)
|
||||||
|
ctx->store((char *) doc->path_md5, sizeof(doc->path_md5), (char *) jpeg_packet.data, jpeg_packet.size);
|
||||||
|
|
||||||
|
av_packet_unref(&jpeg_packet);
|
||||||
|
avcodec_free_context(&jpeg_encoder);
|
||||||
|
av_free(*scaled_frame->data);
|
||||||
|
av_frame_free(&scaled_frame);
|
||||||
|
}
|
||||||
|
|
||||||
|
frame_and_packet_free(frame_and_packet);
|
||||||
|
avcodec_free_context(&decoder);
|
||||||
|
|
||||||
|
avformat_close_input(&pFormatCtx);
|
||||||
|
avformat_free_context(pFormatCtx);
|
||||||
|
|
||||||
|
av_free(io_ctx->buffer);
|
||||||
|
avio_context_free(&io_ctx);
|
||||||
|
fclose(memfile.file);
|
||||||
|
|
||||||
|
return TRUE;
|
||||||
|
}
|
||||||
52
third-party/libscan/libscan/media/media.h
vendored
Normal file
52
third-party/libscan/libscan/media/media.h
vendored
Normal file
@@ -0,0 +1,52 @@
|
|||||||
|
#ifndef SIST2_MEDIA_H
|
||||||
|
#define SIST2_MEDIA_H
|
||||||
|
|
||||||
|
|
||||||
|
#include "../scan.h"
|
||||||
|
|
||||||
|
#include "libavformat/avformat.h"
|
||||||
|
#include "libswscale/swscale.h"
|
||||||
|
#include "libswresample/swresample.h"
|
||||||
|
#include "libavcodec/avcodec.h"
|
||||||
|
#include "libavutil/imgutils.h"
|
||||||
|
|
||||||
|
typedef struct {
|
||||||
|
log_callback_t log;
|
||||||
|
logf_callback_t logf;
|
||||||
|
store_callback_t store;
|
||||||
|
|
||||||
|
int tn_size;
|
||||||
|
float tn_qscale;
|
||||||
|
long max_media_buffer;
|
||||||
|
int read_subtitles;
|
||||||
|
} scan_media_ctx_t;
|
||||||
|
|
||||||
|
__always_inline
|
||||||
|
static AVCodecContext *alloc_jpeg_encoder(int w, int h, float qscale) {
|
||||||
|
|
||||||
|
const AVCodec *jpeg_codec = avcodec_find_encoder(AV_CODEC_ID_MJPEG);
|
||||||
|
AVCodecContext *jpeg = avcodec_alloc_context3(jpeg_codec);
|
||||||
|
jpeg->width = w;
|
||||||
|
jpeg->height = h;
|
||||||
|
jpeg->time_base.den = 1000000;
|
||||||
|
jpeg->time_base.num = 1;
|
||||||
|
jpeg->i_quant_factor = qscale;
|
||||||
|
|
||||||
|
jpeg->pix_fmt = AV_PIX_FMT_YUVJ420P;
|
||||||
|
int ret = avcodec_open2(jpeg, jpeg_codec, NULL);
|
||||||
|
|
||||||
|
if (ret != 0) {
|
||||||
|
return NULL;
|
||||||
|
}
|
||||||
|
|
||||||
|
return jpeg;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
void parse_media(scan_media_ctx_t *ctx, vfile_t *f, document_t *doc, const char*mime_str);
|
||||||
|
|
||||||
|
void init_media();
|
||||||
|
|
||||||
|
int store_image_thumbnail(scan_media_ctx_t *ctx, void *buf, size_t buf_len, document_t *doc, const char *url);
|
||||||
|
|
||||||
|
#endif
|
||||||
79
third-party/libscan/libscan/mobi/scan_mobi.c
vendored
Normal file
79
third-party/libscan/libscan/mobi/scan_mobi.c
vendored
Normal file
@@ -0,0 +1,79 @@
|
|||||||
|
#include "scan_mobi.h"
|
||||||
|
|
||||||
|
#include <mobi.h>
|
||||||
|
#include <errno.h>
|
||||||
|
#include "stdlib.h"
|
||||||
|
|
||||||
|
void parse_mobi(scan_mobi_ctx_t *ctx, vfile_t *f, document_t *doc) {
|
||||||
|
|
||||||
|
MOBIData *m = mobi_init();
|
||||||
|
if (m == NULL) {
|
||||||
|
CTX_LOG_ERROR(f->filepath, "mobi_init() failed")
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
size_t buf_len;
|
||||||
|
char* buf = read_all(f, &buf_len);
|
||||||
|
if (buf == NULL) {
|
||||||
|
mobi_free(m);
|
||||||
|
CTX_LOG_ERROR(f->filepath, "read_all() failed")
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
FILE *file = fmemopen(buf, buf_len, "rb");
|
||||||
|
if (file == NULL) {
|
||||||
|
mobi_free(m);
|
||||||
|
free(buf);
|
||||||
|
CTX_LOG_ERRORF(f->filepath, "fmemopen() failed (%d)", errno)
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
MOBI_RET mobi_ret = mobi_load_file(m, file);
|
||||||
|
fclose(file);
|
||||||
|
if (mobi_ret != MOBI_SUCCESS) {
|
||||||
|
mobi_free(m);
|
||||||
|
free(buf);
|
||||||
|
CTX_LOG_ERRORF(f->filepath, "mobi_laod_file() returned error code [%d]", mobi_ret)
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
char *author = mobi_meta_get_author(m);
|
||||||
|
if (author != NULL) {
|
||||||
|
APPEND_STR_META(doc, MetaAuthor, author)
|
||||||
|
free(author);
|
||||||
|
}
|
||||||
|
char *title = mobi_meta_get_title(m);
|
||||||
|
if (title != NULL) {
|
||||||
|
APPEND_STR_META(doc, MetaTitle, title)
|
||||||
|
free(title);
|
||||||
|
}
|
||||||
|
|
||||||
|
const size_t maxlen = mobi_get_text_maxsize(m);
|
||||||
|
if (maxlen == MOBI_NOTSET) {
|
||||||
|
free(buf);
|
||||||
|
CTX_LOG_DEBUGF("%s", "Invalid text maxsize: %zu", maxlen)
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
char *content_str = malloc(maxlen + 1);
|
||||||
|
size_t length = maxlen;
|
||||||
|
mobi_ret = mobi_get_rawml(m, content_str, &length);
|
||||||
|
if (mobi_ret != MOBI_SUCCESS) {
|
||||||
|
mobi_free(m);
|
||||||
|
free(content_str);
|
||||||
|
free(buf);
|
||||||
|
CTX_LOG_ERRORF(f->filepath, "mobi_get_rawml() returned error code [%d]", mobi_ret)
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
text_buffer_t tex = text_buffer_create(ctx->content_size);
|
||||||
|
text_buffer_append_markup(&tex, content_str);
|
||||||
|
text_buffer_terminate_string(&tex);
|
||||||
|
|
||||||
|
APPEND_STR_META(doc, MetaContent, tex.dyn_buffer.buf)
|
||||||
|
|
||||||
|
free(content_str);
|
||||||
|
free(buf);
|
||||||
|
text_buffer_destroy(&tex);
|
||||||
|
mobi_free(m);
|
||||||
|
}
|
||||||
14
third-party/libscan/libscan/mobi/scan_mobi.h
vendored
Normal file
14
third-party/libscan/libscan/mobi/scan_mobi.h
vendored
Normal file
@@ -0,0 +1,14 @@
|
|||||||
|
#ifndef SCAN_SCAN_MOBI_H
|
||||||
|
#define SCAN_SCAN_MOBI_H
|
||||||
|
|
||||||
|
#include "../scan.h"
|
||||||
|
|
||||||
|
typedef struct {
|
||||||
|
long content_size;
|
||||||
|
log_callback_t log;
|
||||||
|
logf_callback_t logf;
|
||||||
|
} scan_mobi_ctx_t;
|
||||||
|
|
||||||
|
void parse_mobi(scan_mobi_ctx_t *ctx, vfile_t *f, document_t *doc);
|
||||||
|
|
||||||
|
#endif
|
||||||
147
third-party/libscan/libscan/msdoc/msdoc.c
vendored
Normal file
147
third-party/libscan/libscan/msdoc/msdoc.c
vendored
Normal file
@@ -0,0 +1,147 @@
|
|||||||
|
#include "msdoc.h"
|
||||||
|
#include <errno.h>
|
||||||
|
|
||||||
|
#include <sys/mman.h>
|
||||||
|
#include "../../third-party/antiword/src/antiword.h"
|
||||||
|
|
||||||
|
#include "../ebook/ebook.h"
|
||||||
|
|
||||||
|
void parse_msdoc_text(scan_msdoc_ctx_t *ctx, document_t *doc, FILE *file_in, void *buf, size_t buf_len) {
|
||||||
|
|
||||||
|
// Open word doc
|
||||||
|
options_type *opts = direct_vGetOptions();
|
||||||
|
opts->iParagraphBreak = 74;
|
||||||
|
opts->eConversionType = conversion_text;
|
||||||
|
opts->bHideHiddenText = 1;
|
||||||
|
opts->bRemoveRemovedText = 1;
|
||||||
|
opts->bUseLandscape = 0;
|
||||||
|
opts->eEncoding = encoding_utf_8;
|
||||||
|
opts->iPageHeight = 842; // A4
|
||||||
|
opts->iPageWidth = 595;
|
||||||
|
opts->eImageLevel = level_ps_3;
|
||||||
|
|
||||||
|
int doc_word_version = iGuessVersionNumber(file_in, (int) buf_len);
|
||||||
|
if (doc_word_version < 0 || doc_word_version == 3) {
|
||||||
|
free(buf);
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
rewind(file_in);
|
||||||
|
|
||||||
|
size_t out_len;
|
||||||
|
char *out_buf;
|
||||||
|
|
||||||
|
FILE *file_out = open_memstream(&out_buf, &out_len);
|
||||||
|
|
||||||
|
diagram_type *diag = pCreateDiagram("antiword", NULL, file_out);
|
||||||
|
if (diag == NULL) {
|
||||||
|
fclose(file_in);
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
iInitDocument(file_in, (int) buf_len);
|
||||||
|
const char *author = szGetAuthor();
|
||||||
|
if (author != NULL) {
|
||||||
|
APPEND_UTF8_META(doc, MetaAuthor, author)
|
||||||
|
}
|
||||||
|
|
||||||
|
const char *title = szGetTitle();
|
||||||
|
if (title != NULL) {
|
||||||
|
APPEND_UTF8_META(doc, MetaTitle, title)
|
||||||
|
}
|
||||||
|
vFreeDocument();
|
||||||
|
|
||||||
|
bWordDecryptor(file_in, (int) buf_len, diag);
|
||||||
|
vDestroyDiagram(diag);
|
||||||
|
fclose(file_out);
|
||||||
|
|
||||||
|
if (buf_len > 0) {
|
||||||
|
text_buffer_t tex = text_buffer_create(ctx->content_size);
|
||||||
|
text_buffer_append_string(&tex, out_buf, out_len);
|
||||||
|
text_buffer_terminate_string(&tex);
|
||||||
|
|
||||||
|
meta_line_t *meta_content = malloc(sizeof(meta_line_t) + tex.dyn_buffer.cur);
|
||||||
|
meta_content->key = MetaContent;
|
||||||
|
memcpy(meta_content->str_val, tex.dyn_buffer.buf, tex.dyn_buffer.cur);
|
||||||
|
APPEND_META(doc, meta_content)
|
||||||
|
|
||||||
|
text_buffer_destroy(&tex);
|
||||||
|
}
|
||||||
|
|
||||||
|
free(buf);
|
||||||
|
free(out_buf);
|
||||||
|
}
|
||||||
|
|
||||||
|
void parse_msdoc_pdf(scan_msdoc_ctx_t *ctx, document_t *doc, FILE *file, void *buf, size_t buf_len) {
|
||||||
|
|
||||||
|
scan_ebook_ctx_t ebook_ctx = {
|
||||||
|
.content_size = ctx->content_size,
|
||||||
|
.tn_size = ctx->tn_size,
|
||||||
|
.log = ctx->log,
|
||||||
|
.logf = ctx->logf,
|
||||||
|
.store = ctx->store,
|
||||||
|
};
|
||||||
|
|
||||||
|
// Open word doc
|
||||||
|
options_type *opts = direct_vGetOptions();
|
||||||
|
opts->iParagraphBreak = 74;
|
||||||
|
opts->eConversionType = conversion_pdf;
|
||||||
|
opts->bHideHiddenText = 1;
|
||||||
|
opts->bRemoveRemovedText = 1;
|
||||||
|
opts->bUseLandscape = 0;
|
||||||
|
opts->eEncoding = encoding_latin_1;
|
||||||
|
opts->iPageHeight = 842; // A4
|
||||||
|
opts->iPageWidth = 595;
|
||||||
|
opts->eImageLevel = level_ps_3;
|
||||||
|
|
||||||
|
int doc_word_version = iGuessVersionNumber(file, (int) buf_len);
|
||||||
|
if (doc_word_version < 0 || doc_word_version == 3) {
|
||||||
|
free(buf);
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
rewind(file);
|
||||||
|
|
||||||
|
size_t out_len;
|
||||||
|
char *out_buf;
|
||||||
|
|
||||||
|
FILE *file_out = open_memstream(&out_buf, &out_len);
|
||||||
|
|
||||||
|
diagram_type *diag = pCreateDiagram("antiword", NULL, file_out);
|
||||||
|
if (diag == NULL) {
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
bWordDecryptor(file, (int) buf_len, diag);
|
||||||
|
vDestroyDiagram(diag);
|
||||||
|
|
||||||
|
fclose(file_out);
|
||||||
|
|
||||||
|
parse_ebook_mem(&ebook_ctx, out_buf, out_len, "application/pdf", doc, TRUE);
|
||||||
|
|
||||||
|
free(buf);
|
||||||
|
free(out_buf);
|
||||||
|
}
|
||||||
|
|
||||||
|
void parse_msdoc(scan_msdoc_ctx_t *ctx, vfile_t *f, document_t *doc) {
|
||||||
|
|
||||||
|
size_t buf_len;
|
||||||
|
char *buf = read_all(f, &buf_len);
|
||||||
|
if (buf == NULL) {
|
||||||
|
CTX_LOG_ERROR(f->filepath, "read_all() failed")
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
FILE *file = fmemopen(buf, buf_len, "rb");
|
||||||
|
if (file == NULL) {
|
||||||
|
free(buf);
|
||||||
|
CTX_LOG_ERRORF(f->filepath, "fmemopen() failed (%d)", errno)
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (ctx->tn_size > 0) {
|
||||||
|
char *buf_pdf = malloc(buf_len);
|
||||||
|
memcpy(buf_pdf, buf, buf_len);
|
||||||
|
parse_msdoc_pdf(ctx, doc, file, buf_pdf, buf_len);
|
||||||
|
}
|
||||||
|
parse_msdoc_text(ctx, doc, file, buf, buf_len);
|
||||||
|
fclose(file);
|
||||||
|
}
|
||||||
24
third-party/libscan/libscan/msdoc/msdoc.h
vendored
Normal file
24
third-party/libscan/libscan/msdoc/msdoc.h
vendored
Normal file
@@ -0,0 +1,24 @@
|
|||||||
|
#ifndef SCAN_SCAN_MSDOC_H
|
||||||
|
#define SCAN_SCAN_MSDOC_H
|
||||||
|
|
||||||
|
#include "../scan.h"
|
||||||
|
|
||||||
|
typedef struct {
|
||||||
|
long content_size;
|
||||||
|
int tn_size;
|
||||||
|
log_callback_t log;
|
||||||
|
logf_callback_t logf;
|
||||||
|
store_callback_t store;
|
||||||
|
unsigned int msdoc_mime;
|
||||||
|
} scan_msdoc_ctx_t;
|
||||||
|
|
||||||
|
__always_inline
|
||||||
|
static int is_msdoc(scan_msdoc_ctx_t *ctx, unsigned int mime) {
|
||||||
|
return mime == ctx->msdoc_mime;
|
||||||
|
}
|
||||||
|
|
||||||
|
void parse_msdoc(scan_msdoc_ctx_t *ctx, vfile_t *f, document_t *doc);
|
||||||
|
|
||||||
|
void parse_msdoc_text(scan_msdoc_ctx_t *ctx, document_t *doc, FILE *file_in, void* buf, size_t buf_len);
|
||||||
|
|
||||||
|
#endif
|
||||||
260
third-party/libscan/libscan/ooxml/ooxml.c
vendored
Normal file
260
third-party/libscan/libscan/ooxml/ooxml.c
vendored
Normal file
@@ -0,0 +1,260 @@
|
|||||||
|
#include "ooxml.h"
|
||||||
|
|
||||||
|
#include <archive.h>
|
||||||
|
#include <archive_entry.h>
|
||||||
|
#include <libxml/xmlstring.h>
|
||||||
|
#include <libxml/parser.h>
|
||||||
|
|
||||||
|
#define _X(str) ((const xmlChar*)str)
|
||||||
|
|
||||||
|
__always_inline
|
||||||
|
static int should_read_part(const char *part) {
|
||||||
|
|
||||||
|
if (part == NULL) {
|
||||||
|
return FALSE;
|
||||||
|
}
|
||||||
|
|
||||||
|
if ( // Word
|
||||||
|
STR_STARTS_WITH(part, "word/document.xml")
|
||||||
|
|| STR_STARTS_WITH(part, "word/footnotes.xml")
|
||||||
|
|| STR_STARTS_WITH(part, "word/endnotes.xml")
|
||||||
|
|| STR_STARTS_WITH(part, "word/footer")
|
||||||
|
|| STR_STARTS_WITH(part, "word/header")
|
||||||
|
// PowerPoint
|
||||||
|
|| STR_STARTS_WITH(part, "ppt/slides/slide")
|
||||||
|
|| STR_STARTS_WITH(part, "ppt/notesSlides/slide")
|
||||||
|
// Excel
|
||||||
|
|| STR_STARTS_WITH(part, "xl/worksheets/sheet")
|
||||||
|
|| STR_STARTS_WITH(part, "xl/sharedStrings.xml")
|
||||||
|
|| STR_STARTS_WITH(part, "xl/workbook.xml")
|
||||||
|
) {
|
||||||
|
return TRUE;
|
||||||
|
}
|
||||||
|
|
||||||
|
return FALSE;
|
||||||
|
}
|
||||||
|
|
||||||
|
int extract_text(scan_ooxml_ctx_t *ctx, xmlDoc *xml, xmlNode *node, text_buffer_t *buf) {
|
||||||
|
//TODO: Check which nodes are likely to have a 't' child, and ignore nodes that aren't
|
||||||
|
xmlErrorPtr err = xmlGetLastError();
|
||||||
|
if (err != NULL) {
|
||||||
|
if (err->level == XML_ERR_FATAL) {
|
||||||
|
CTX_LOG_ERRORF("ooxml.c", "Got fatal XML error while parsing document: %s", err->message)
|
||||||
|
return -1;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
for (xmlNode *child = node; child; child = child->next) {
|
||||||
|
if (child->name != NULL && *child->name == 't' && *(child->name + 1) == '\0') {
|
||||||
|
xmlChar *text = xmlNodeListGetString(xml, child->xmlChildrenNode, 1);
|
||||||
|
|
||||||
|
if (text) {
|
||||||
|
int ret = text_buffer_append_string0(buf, (char *) text);
|
||||||
|
text_buffer_append_char(buf, ' ');
|
||||||
|
xmlFree(text);
|
||||||
|
|
||||||
|
if (ret == TEXT_BUF_FULL) {
|
||||||
|
return ret;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if (extract_text(ctx, xml, child->children, buf) == TEXT_BUF_FULL) {
|
||||||
|
return TEXT_BUF_FULL;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
int xml_io_read(void *context, char *buffer, int len) {
|
||||||
|
struct archive *a = context;
|
||||||
|
return (int) archive_read_data(a, buffer, len);
|
||||||
|
}
|
||||||
|
|
||||||
|
int xml_io_close(UNUSED(void *context)) {
|
||||||
|
//noop
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
#define READ_PART_ERR (-2)
|
||||||
|
|
||||||
|
__always_inline
|
||||||
|
static int read_part(scan_ooxml_ctx_t *ctx, struct archive *a, text_buffer_t *buf, document_t *doc) {
|
||||||
|
|
||||||
|
xmlDoc *xml = xmlReadIO(xml_io_read, xml_io_close, a, "/", NULL,
|
||||||
|
XML_PARSE_RECOVER | XML_PARSE_NOWARNING | XML_PARSE_NOERROR | XML_PARSE_NONET);
|
||||||
|
|
||||||
|
if (xml == NULL) {
|
||||||
|
CTX_LOG_ERROR(doc->filepath, "Could not parse XML")
|
||||||
|
return READ_PART_ERR;
|
||||||
|
}
|
||||||
|
|
||||||
|
xmlNode *root = xmlDocGetRootElement(xml);
|
||||||
|
if (root == NULL) {
|
||||||
|
CTX_LOG_ERROR(doc->filepath, "Empty document")
|
||||||
|
xmlFreeDoc(xml);
|
||||||
|
return READ_PART_ERR;
|
||||||
|
}
|
||||||
|
|
||||||
|
int ret = extract_text(ctx, xml, root, buf);
|
||||||
|
xmlFreeDoc(xml);
|
||||||
|
|
||||||
|
return ret;
|
||||||
|
}
|
||||||
|
|
||||||
|
__always_inline
|
||||||
|
static int read_doc_props_app(scan_ooxml_ctx_t *ctx, struct archive *a, document_t *doc) {
|
||||||
|
xmlDoc *xml = xmlReadIO(xml_io_read, xml_io_close, a, "/", NULL,
|
||||||
|
XML_PARSE_RECOVER | XML_PARSE_NOWARNING | XML_PARSE_NOERROR | XML_PARSE_NONET);
|
||||||
|
|
||||||
|
if (xml == NULL) {
|
||||||
|
CTX_LOG_ERROR(doc->filepath, "Could not parse XML")
|
||||||
|
return -1;
|
||||||
|
}
|
||||||
|
|
||||||
|
xmlNode *root = xmlDocGetRootElement(xml);
|
||||||
|
if (root == NULL) {
|
||||||
|
CTX_LOG_ERROR(doc->filepath, "Empty document")
|
||||||
|
xmlFreeDoc(xml);
|
||||||
|
return -1;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (xmlStrEqual(root->name, _X("Properties"))) {
|
||||||
|
for (xmlNode *child = root->children; child; child = child->next) {
|
||||||
|
xmlChar *text = xmlNodeListGetString(xml, child->xmlChildrenNode, 1);
|
||||||
|
if (text == NULL) {
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (xmlStrEqual(child->name, _X("Pages"))) {
|
||||||
|
APPEND_LONG_META(doc, MetaPages, strtol((char *) text, NULL, 10))
|
||||||
|
}
|
||||||
|
|
||||||
|
xmlFree(text);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
xmlFreeDoc(xml);
|
||||||
|
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
__always_inline
|
||||||
|
static int read_doc_props(scan_ooxml_ctx_t *ctx, struct archive *a, document_t *doc) {
|
||||||
|
xmlDoc *xml = xmlReadIO(xml_io_read, xml_io_close, a, "/", NULL,
|
||||||
|
XML_PARSE_RECOVER | XML_PARSE_NOWARNING | XML_PARSE_NOERROR | XML_PARSE_NONET);
|
||||||
|
|
||||||
|
if (xml == NULL) {
|
||||||
|
CTX_LOG_ERROR(doc->filepath, "Could not parse XML")
|
||||||
|
return -1;
|
||||||
|
}
|
||||||
|
|
||||||
|
xmlNode *root = xmlDocGetRootElement(xml);
|
||||||
|
if (root == NULL) {
|
||||||
|
CTX_LOG_ERROR(doc->filepath, "Empty document")
|
||||||
|
xmlFreeDoc(xml);
|
||||||
|
return -1;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (xmlStrEqual(root->name, _X("coreProperties"))) {
|
||||||
|
for (xmlNode *child = root->children; child; child = child->next) {
|
||||||
|
xmlChar *text = xmlNodeListGetString(xml, child->xmlChildrenNode, 1);
|
||||||
|
if (text == NULL) {
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (xmlStrEqual(child->name, _X("title"))) {
|
||||||
|
APPEND_STR_META(doc, MetaTitle, (char *) text)
|
||||||
|
} else if (xmlStrEqual(child->name, _X("creator"))) {
|
||||||
|
APPEND_STR_META(doc, MetaAuthor, (char *) text)
|
||||||
|
} else if (xmlStrEqual(child->name, _X("lastModifiedBy"))) {
|
||||||
|
APPEND_STR_META(doc, MetaModifiedBy, (char *) text)
|
||||||
|
}
|
||||||
|
|
||||||
|
xmlFree(text);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
xmlFreeDoc(xml);
|
||||||
|
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
#define MAX_TN_SIZE (1024 * 1024 * 15)
|
||||||
|
|
||||||
|
void read_thumbnail(scan_ooxml_ctx_t *ctx, document_t *doc, struct archive *a, struct archive_entry *entry) {
|
||||||
|
size_t entry_size = archive_entry_size(entry);
|
||||||
|
|
||||||
|
if (entry_size <= 0 || entry_size > MAX_TN_SIZE) {
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
char *buf = malloc(entry_size);
|
||||||
|
archive_read_data(a, buf, entry_size);
|
||||||
|
|
||||||
|
APPEND_TN_META(doc, 1, 1) // Size unknown
|
||||||
|
ctx->store((char *) doc->path_md5, sizeof(doc->path_md5), buf, entry_size);
|
||||||
|
free(buf);
|
||||||
|
}
|
||||||
|
|
||||||
|
void parse_ooxml(scan_ooxml_ctx_t *ctx, vfile_t *f, document_t *doc) {
|
||||||
|
|
||||||
|
size_t buf_len;
|
||||||
|
void *buf = read_all(f, &buf_len);
|
||||||
|
if (buf == NULL) {
|
||||||
|
CTX_LOG_ERROR(f->filepath, "read_all() failed")
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
struct archive *a = archive_read_new();
|
||||||
|
archive_read_support_format_zip(a);
|
||||||
|
|
||||||
|
int ret = archive_read_open_memory(a, buf, buf_len);
|
||||||
|
if (ret != ARCHIVE_OK) {
|
||||||
|
CTX_LOG_ERRORF(doc->filepath, "Could not read archive: %s", archive_error_string(a))
|
||||||
|
archive_read_free(a);
|
||||||
|
free(buf);
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
text_buffer_t tex = text_buffer_create(ctx->content_size);
|
||||||
|
|
||||||
|
struct archive_entry *entry;
|
||||||
|
int buffer_full = FALSE;
|
||||||
|
while (archive_read_next_header(a, &entry) == ARCHIVE_OK) {
|
||||||
|
if (S_ISREG(archive_entry_stat(entry)->st_mode)) {
|
||||||
|
const char *path = archive_entry_pathname(entry);
|
||||||
|
|
||||||
|
if (!buffer_full && should_read_part(path) && ctx->content_size > 0) {
|
||||||
|
ret = read_part(ctx, a, &tex, doc);
|
||||||
|
if (ret == READ_PART_ERR) {
|
||||||
|
break;
|
||||||
|
} else if (ret == TEXT_BUF_FULL) {
|
||||||
|
buffer_full = TRUE;
|
||||||
|
}
|
||||||
|
} else if (strcmp(path, "docProps/app.xml") == 0) {
|
||||||
|
if (read_doc_props_app(ctx, a, doc) != 0) {
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
} else if (strcmp(path, "docProps/core.xml") == 0) {
|
||||||
|
if (read_doc_props(ctx, a, doc) != 0) {
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
} else if (strcmp(path, "docProps/thumbnail.jpeg") == 0) {
|
||||||
|
read_thumbnail(ctx, doc, a, entry);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if (tex.dyn_buffer.cur > 0) {
|
||||||
|
text_buffer_terminate_string(&tex);
|
||||||
|
|
||||||
|
meta_line_t *meta = malloc(sizeof(meta_line_t) + tex.dyn_buffer.cur);
|
||||||
|
meta->key = MetaContent;
|
||||||
|
strcpy(meta->str_val, tex.dyn_buffer.buf);
|
||||||
|
APPEND_META(doc, meta)
|
||||||
|
}
|
||||||
|
|
||||||
|
archive_read_close(a);
|
||||||
|
archive_read_free(a);
|
||||||
|
text_buffer_destroy(&tex);
|
||||||
|
free(buf);
|
||||||
|
}
|
||||||
16
third-party/libscan/libscan/ooxml/ooxml.h
vendored
Normal file
16
third-party/libscan/libscan/ooxml/ooxml.h
vendored
Normal file
@@ -0,0 +1,16 @@
|
|||||||
|
#ifndef SCAN_OOXML_H
|
||||||
|
#define SCAN_OOXML_H
|
||||||
|
|
||||||
|
#include <stdlib.h>
|
||||||
|
#include "../scan.h"
|
||||||
|
|
||||||
|
typedef struct {
|
||||||
|
long content_size;
|
||||||
|
log_callback_t log;
|
||||||
|
logf_callback_t logf;
|
||||||
|
store_callback_t store;
|
||||||
|
} scan_ooxml_ctx_t;
|
||||||
|
|
||||||
|
void parse_ooxml(scan_ooxml_ctx_t *ctx, vfile_t *f, document_t *doc);
|
||||||
|
|
||||||
|
#endif
|
||||||
218
third-party/libscan/libscan/raw/raw.c
vendored
Normal file
218
third-party/libscan/libscan/raw/raw.c
vendored
Normal file
@@ -0,0 +1,218 @@
|
|||||||
|
#include "raw.h"
|
||||||
|
#include <libraw/libraw.h>
|
||||||
|
|
||||||
|
#include "../media/media.h"
|
||||||
|
#include <unistd.h>
|
||||||
|
|
||||||
|
|
||||||
|
#define MIN_SIZE 32
|
||||||
|
|
||||||
|
int store_thumbnail_jpeg(scan_raw_ctx_t *ctx, libraw_processed_image_t *img, document_t *doc) {
|
||||||
|
return store_image_thumbnail((scan_media_ctx_t *) ctx, img->data, img->data_size, doc, "x.jpeg");
|
||||||
|
}
|
||||||
|
|
||||||
|
int store_thumbnail_rgb24(scan_raw_ctx_t *ctx, libraw_processed_image_t *img, document_t *doc) {
|
||||||
|
|
||||||
|
int dstW;
|
||||||
|
int dstH;
|
||||||
|
|
||||||
|
if (img->width <= ctx->tn_size && img->height <= ctx->tn_size) {
|
||||||
|
dstW = img->width;
|
||||||
|
dstH = img->height;
|
||||||
|
} else {
|
||||||
|
double ratio = (double) img->width / img->height;
|
||||||
|
if (img->width > img->height) {
|
||||||
|
dstW = ctx->tn_size;
|
||||||
|
dstH = (int) (ctx->tn_size / ratio);
|
||||||
|
} else {
|
||||||
|
dstW = (int) (ctx->tn_size * ratio);
|
||||||
|
dstH = ctx->tn_size;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if (dstW <= MIN_SIZE || dstH <= MIN_SIZE) {
|
||||||
|
return FALSE;
|
||||||
|
}
|
||||||
|
|
||||||
|
AVFrame *scaled_frame = av_frame_alloc();
|
||||||
|
|
||||||
|
struct SwsContext *sws_ctx = sws_getContext(
|
||||||
|
img->width, img->height, AV_PIX_FMT_RGB24,
|
||||||
|
dstW, dstH, AV_PIX_FMT_YUVJ420P,
|
||||||
|
SIST_SWS_ALGO, 0, 0, 0
|
||||||
|
);
|
||||||
|
|
||||||
|
int dst_buf_len = av_image_get_buffer_size(AV_PIX_FMT_YUV420P, dstW, dstH, 1);
|
||||||
|
uint8_t *dst_buf = (uint8_t *) av_malloc(dst_buf_len);
|
||||||
|
|
||||||
|
av_image_fill_arrays(scaled_frame->data, scaled_frame->linesize, dst_buf, AV_PIX_FMT_YUV420P, dstW, dstH, 1);
|
||||||
|
|
||||||
|
const uint8_t *in_data[1] = {img->data};
|
||||||
|
int in_line_size[1] = {3 * img->width};
|
||||||
|
|
||||||
|
sws_scale(sws_ctx,
|
||||||
|
in_data, in_line_size,
|
||||||
|
0, img->height,
|
||||||
|
scaled_frame->data, scaled_frame->linesize
|
||||||
|
);
|
||||||
|
|
||||||
|
scaled_frame->width = dstW;
|
||||||
|
scaled_frame->height = dstH;
|
||||||
|
scaled_frame->format = AV_PIX_FMT_YUV420P;
|
||||||
|
|
||||||
|
sws_freeContext(sws_ctx);
|
||||||
|
|
||||||
|
AVCodecContext *jpeg_encoder = alloc_jpeg_encoder(scaled_frame->width, scaled_frame->height, 1.0f);
|
||||||
|
avcodec_send_frame(jpeg_encoder, scaled_frame);
|
||||||
|
|
||||||
|
AVPacket jpeg_packet;
|
||||||
|
av_init_packet(&jpeg_packet);
|
||||||
|
avcodec_receive_packet(jpeg_encoder, &jpeg_packet);
|
||||||
|
|
||||||
|
APPEND_TN_META(doc, scaled_frame->width, scaled_frame->height)
|
||||||
|
ctx->store((char *) doc->path_md5, sizeof(doc->path_md5), (char *) jpeg_packet.data, jpeg_packet.size);
|
||||||
|
|
||||||
|
av_packet_unref(&jpeg_packet);
|
||||||
|
av_free(*scaled_frame->data);
|
||||||
|
av_frame_free(&scaled_frame);
|
||||||
|
avcodec_free_context(&jpeg_encoder);
|
||||||
|
|
||||||
|
return TRUE;
|
||||||
|
}
|
||||||
|
|
||||||
|
#define DMS_REF(ref) (((ref) == 'S' || (ref) == 'W') ? -1 : 1)
|
||||||
|
|
||||||
|
void parse_raw(scan_raw_ctx_t *ctx, vfile_t *f, document_t *doc) {
|
||||||
|
libraw_data_t *libraw_lib = libraw_init(0);
|
||||||
|
|
||||||
|
if (!libraw_lib) {
|
||||||
|
CTX_LOG_ERROR("raw.c", "Cannot create libraw handle")
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
size_t buf_len = 0;
|
||||||
|
void *buf = read_all(f, &buf_len);
|
||||||
|
if (buf == NULL) {
|
||||||
|
CTX_LOG_ERROR(f->filepath, "read_all() failed")
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
int ret = libraw_open_buffer(libraw_lib, buf, buf_len);
|
||||||
|
if (ret != 0) {
|
||||||
|
CTX_LOG_ERROR(f->filepath, "Could not open raw file")
|
||||||
|
free(buf);
|
||||||
|
libraw_close(libraw_lib);
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (*libraw_lib->idata.model != '\0') {
|
||||||
|
APPEND_STR_META(doc, MetaExifModel, libraw_lib->idata.model)
|
||||||
|
}
|
||||||
|
if (*libraw_lib->idata.make != '\0') {
|
||||||
|
APPEND_STR_META(doc, MetaExifMake, libraw_lib->idata.make)
|
||||||
|
}
|
||||||
|
if (*libraw_lib->idata.software != '\0') {
|
||||||
|
APPEND_STR_META(doc, MetaExifSoftware, libraw_lib->idata.software)
|
||||||
|
}
|
||||||
|
APPEND_LONG_META(doc, MetaWidth, libraw_lib->sizes.width)
|
||||||
|
APPEND_LONG_META(doc, MetaHeight, libraw_lib->sizes.height)
|
||||||
|
char tmp[1024];
|
||||||
|
snprintf(tmp, sizeof(tmp), "%g", libraw_lib->other.iso_speed);
|
||||||
|
APPEND_STR_META(doc, MetaExifIsoSpeedRatings, tmp)
|
||||||
|
|
||||||
|
if (*libraw_lib->other.desc != '\0') {
|
||||||
|
APPEND_STR_META(doc, MetaContent, libraw_lib->other.desc)
|
||||||
|
}
|
||||||
|
if (*libraw_lib->other.artist != '\0') {
|
||||||
|
APPEND_STR_META(doc, MetaArtist, libraw_lib->other.artist)
|
||||||
|
}
|
||||||
|
|
||||||
|
struct tm *time = localtime(&libraw_lib->other.timestamp);
|
||||||
|
strftime(tmp, sizeof(tmp), "%Y:%m:%d %H:%M:%S", time);
|
||||||
|
APPEND_STR_META(doc, MetaExifDateTime, tmp)
|
||||||
|
|
||||||
|
snprintf(tmp, sizeof(tmp), "%.1f", libraw_lib->other.focal_len);
|
||||||
|
APPEND_STR_META(doc, MetaExifFocalLength, tmp)
|
||||||
|
|
||||||
|
snprintf(tmp, sizeof(tmp), "%.1f", libraw_lib->other.aperture);
|
||||||
|
APPEND_STR_META(doc, MetaExifFNumber, tmp)
|
||||||
|
|
||||||
|
int denominator = (int) roundf(1 / libraw_lib->other.shutter);
|
||||||
|
snprintf(tmp, sizeof(tmp), "1/%d", denominator);
|
||||||
|
APPEND_STR_META(doc, MetaExifExposureTime, tmp)
|
||||||
|
|
||||||
|
libraw_gps_info_t gps = libraw_lib->other.parsed_gps;
|
||||||
|
double gps_longitude_dec =
|
||||||
|
(gps.longtitude[0] + gps.longtitude[1] / 60 + gps.longtitude[2] / 3600) * DMS_REF(gps.longref);
|
||||||
|
snprintf(tmp, sizeof(tmp), "%.15f", gps_longitude_dec);
|
||||||
|
if (gps_longitude_dec != 0.0) {
|
||||||
|
APPEND_STR_META(doc, MetaExifGpsLongitudeDec, tmp)
|
||||||
|
}
|
||||||
|
|
||||||
|
double gps_latitude_dec = (gps.latitude[0] + gps.latitude[1] / 60 + gps.latitude[2] / 3600) * DMS_REF(gps.latref);
|
||||||
|
snprintf(tmp, sizeof(tmp), "%.15f", gps_latitude_dec);
|
||||||
|
if (gps_latitude_dec != 0.0) {
|
||||||
|
APPEND_STR_META(doc, MetaExifGpsLatitudeDec, tmp)
|
||||||
|
}
|
||||||
|
|
||||||
|
APPEND_STR_META(doc, MetaMediaVideoCodec, "raw")
|
||||||
|
|
||||||
|
if (ctx->tn_size <= 0) {
|
||||||
|
free(buf);
|
||||||
|
libraw_close(libraw_lib);
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
libraw_unpack_thumb(libraw_lib);
|
||||||
|
|
||||||
|
int errc = 0;
|
||||||
|
libraw_processed_image_t *thumb = libraw_dcraw_make_mem_thumb(libraw_lib, &errc);
|
||||||
|
if (errc != 0) {
|
||||||
|
free(buf);
|
||||||
|
libraw_dcraw_clear_mem(thumb);
|
||||||
|
libraw_close(libraw_lib);
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
int tn_ok = 0;
|
||||||
|
if (libraw_lib->thumbnail.tformat == LIBRAW_THUMBNAIL_JPEG) {
|
||||||
|
tn_ok = store_thumbnail_jpeg(ctx, thumb, doc);
|
||||||
|
} else if (libraw_lib->thumbnail.tformat == LIBRAW_THUMBNAIL_BITMAP) {
|
||||||
|
// TODO: technically this should work but is currently untested
|
||||||
|
tn_ok = store_thumbnail_rgb24(ctx, thumb, doc);
|
||||||
|
}
|
||||||
|
|
||||||
|
libraw_dcraw_clear_mem(thumb);
|
||||||
|
|
||||||
|
if (tn_ok == TRUE) {
|
||||||
|
free(buf);
|
||||||
|
libraw_close(libraw_lib);
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
ret = libraw_unpack(libraw_lib);
|
||||||
|
if (ret != 0) {
|
||||||
|
CTX_LOG_ERROR(f->filepath, "Could not unpack raw file")
|
||||||
|
free(buf);
|
||||||
|
libraw_close(libraw_lib);
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
libraw_dcraw_process(libraw_lib);
|
||||||
|
|
||||||
|
errc = 0;
|
||||||
|
libraw_processed_image_t *img = libraw_dcraw_make_mem_image(libraw_lib, &errc);
|
||||||
|
if (errc != 0) {
|
||||||
|
free(buf);
|
||||||
|
libraw_dcraw_clear_mem(img);
|
||||||
|
libraw_close(libraw_lib);
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
store_thumbnail_rgb24(ctx, img, doc);
|
||||||
|
|
||||||
|
libraw_dcraw_clear_mem(img);
|
||||||
|
libraw_close(libraw_lib);
|
||||||
|
|
||||||
|
free(buf);
|
||||||
|
}
|
||||||
17
third-party/libscan/libscan/raw/raw.h
vendored
Normal file
17
third-party/libscan/libscan/raw/raw.h
vendored
Normal file
@@ -0,0 +1,17 @@
|
|||||||
|
#ifndef SIST2_RAW_H
|
||||||
|
#define SIST2_RAW_H
|
||||||
|
|
||||||
|
#include "../scan.h"
|
||||||
|
|
||||||
|
typedef struct {
|
||||||
|
log_callback_t log;
|
||||||
|
logf_callback_t logf;
|
||||||
|
store_callback_t store;
|
||||||
|
|
||||||
|
int tn_size;
|
||||||
|
float tn_qscale;
|
||||||
|
} scan_raw_ctx_t;
|
||||||
|
|
||||||
|
void parse_raw(scan_raw_ctx_t *ctx, vfile_t *f, document_t *doc);
|
||||||
|
|
||||||
|
#endif //SIST2_RAW_H
|
||||||
170
third-party/libscan/libscan/scan.h
vendored
Normal file
170
third-party/libscan/libscan/scan.h
vendored
Normal file
@@ -0,0 +1,170 @@
|
|||||||
|
#ifndef SCAN_SCAN_H
|
||||||
|
#define SCAN_SCAN_H
|
||||||
|
|
||||||
|
#ifndef _GNU_SOURCE
|
||||||
|
#define _GNU_SOURCE
|
||||||
|
#endif
|
||||||
|
|
||||||
|
#include <stdio.h>
|
||||||
|
#include <sys/stat.h>
|
||||||
|
#include <openssl/md5.h>
|
||||||
|
#include <openssl/sha.h>
|
||||||
|
|
||||||
|
#include "macros.h"
|
||||||
|
|
||||||
|
#define SIST_SWS_ALGO SWS_LANCZOS
|
||||||
|
|
||||||
|
#define UNUSED(x) __attribute__((__unused__)) x
|
||||||
|
|
||||||
|
typedef void (*store_callback_t)(char *key, size_t key_len, char *buf, size_t buf_len);
|
||||||
|
|
||||||
|
typedef void (*logf_callback_t)(const char *filepath, int level, char *format, ...);
|
||||||
|
|
||||||
|
typedef void (*log_callback_t)(const char *filepath, int level, char *str);
|
||||||
|
|
||||||
|
typedef int scan_code_t;
|
||||||
|
#define SCAN_OK (scan_code_t) 0
|
||||||
|
#define SCAN_ERR_READ (scan_code_t) (-1)
|
||||||
|
#define SCAN_ERR_SKIP (scan_code_t) (-2)
|
||||||
|
|
||||||
|
#define LEVEL_DEBUG 0
|
||||||
|
#define LEVEL_INFO 1
|
||||||
|
#define LEVEL_WARNING 2
|
||||||
|
#define LEVEL_ERROR 3
|
||||||
|
#define LEVEL_FATAL 4
|
||||||
|
|
||||||
|
#define CTX_LOG_DEBUGF(filepath, fmt, ...) ctx->logf(filepath, LEVEL_DEBUG, fmt, __VA_ARGS__);
|
||||||
|
#define CTX_LOG_DEBUG(filepath, str) ctx->log(filepath, LEVEL_DEBUG, str);
|
||||||
|
|
||||||
|
#define CTX_LOG_INFOF(filepath, fmt, ...) ctx->logf(filepath, LEVEL_INFO, fmt, __VA_ARGS__);
|
||||||
|
#define CTX_LOG_INFO(filepath, str) ctx->log(filepath, LEVEL_INFO, str);
|
||||||
|
|
||||||
|
#define CTX_LOG_WARNINGF(filepath, fmt, ...) ctx->logf(filepath, LEVEL_WARNING, fmt, __VA_ARGS__);
|
||||||
|
#define CTX_LOG_WARNING(filepath, str) ctx->log(filepath, LEVEL_WARNING, str);
|
||||||
|
|
||||||
|
#define CTX_LOG_ERRORF(filepath, fmt, ...) ctx->logf(filepath, LEVEL_ERROR, fmt, __VA_ARGS__);
|
||||||
|
#define CTX_LOG_ERROR(filepath, str) ctx->log(filepath, LEVEL_ERROR, str);
|
||||||
|
|
||||||
|
#define CTX_LOG_FATALF(filepath, fmt, ...) ctx->logf(filepath, LEVEL_FATAL, fmt, __VA_ARGS__); exit(-1);
|
||||||
|
#define CTX_LOG_FATAL(filepath, str) ctx->log(filepath, LEVEL_FATAL, str); exit(-1);
|
||||||
|
|
||||||
|
enum metakey {
|
||||||
|
// String
|
||||||
|
MetaContent = 1,
|
||||||
|
MetaMediaAudioCodec,
|
||||||
|
MetaMediaVideoCodec,
|
||||||
|
MetaArtist,
|
||||||
|
MetaAlbum,
|
||||||
|
MetaAlbumArtist,
|
||||||
|
MetaGenre,
|
||||||
|
MetaTitle,
|
||||||
|
MetaFontName,
|
||||||
|
MetaParent,
|
||||||
|
MetaExifMake,
|
||||||
|
MetaExifSoftware,
|
||||||
|
MetaExifExposureTime,
|
||||||
|
MetaExifFNumber,
|
||||||
|
MetaExifFocalLength,
|
||||||
|
MetaExifUserComment,
|
||||||
|
MetaExifModel,
|
||||||
|
MetaExifIsoSpeedRatings,
|
||||||
|
MetaExifDateTime,
|
||||||
|
MetaAuthor,
|
||||||
|
MetaModifiedBy,
|
||||||
|
MetaThumbnail,
|
||||||
|
MetaChecksum,
|
||||||
|
|
||||||
|
// Number
|
||||||
|
MetaWidth,
|
||||||
|
MetaHeight,
|
||||||
|
MetaMediaDuration,
|
||||||
|
MetaMediaBitrate,
|
||||||
|
MetaPages,
|
||||||
|
|
||||||
|
// ??
|
||||||
|
MetaExifGpsLongitudeDMS,
|
||||||
|
MetaExifGpsLongitudeRef,
|
||||||
|
MetaExifGpsLatitudeDMS,
|
||||||
|
MetaExifGpsLatitudeRef,
|
||||||
|
MetaExifGpsLatitudeDec,
|
||||||
|
MetaExifGpsLongitudeDec,
|
||||||
|
};
|
||||||
|
|
||||||
|
typedef struct meta_line {
|
||||||
|
struct meta_line *next;
|
||||||
|
enum metakey key;
|
||||||
|
union {
|
||||||
|
char str_val[0];
|
||||||
|
unsigned long long_val;
|
||||||
|
double double_val;
|
||||||
|
};
|
||||||
|
} meta_line_t;
|
||||||
|
|
||||||
|
|
||||||
|
typedef struct document {
|
||||||
|
unsigned char path_md5[MD5_DIGEST_LENGTH];
|
||||||
|
unsigned long size;
|
||||||
|
unsigned int mime;
|
||||||
|
int mtime;
|
||||||
|
short base;
|
||||||
|
short ext;
|
||||||
|
char has_parent;
|
||||||
|
meta_line_t *meta_head;
|
||||||
|
meta_line_t *meta_tail;
|
||||||
|
char *filepath;
|
||||||
|
} document_t;
|
||||||
|
|
||||||
|
typedef struct vfile vfile_t;
|
||||||
|
|
||||||
|
__attribute__((warn_unused_result))
|
||||||
|
typedef int (*read_func_t)(struct vfile *, void *buf, size_t size);
|
||||||
|
|
||||||
|
__attribute__((warn_unused_result))
|
||||||
|
typedef long (*seek_func_t)(struct vfile *, long offset, int whence);
|
||||||
|
|
||||||
|
typedef void (*close_func_t)(struct vfile *);
|
||||||
|
|
||||||
|
typedef void (*reset_func_t)(struct vfile *);
|
||||||
|
|
||||||
|
typedef struct vfile {
|
||||||
|
union {
|
||||||
|
int fd;
|
||||||
|
struct archive *arc;
|
||||||
|
const void *_test_data;
|
||||||
|
};
|
||||||
|
|
||||||
|
int is_fs_file;
|
||||||
|
int has_checksum;
|
||||||
|
int calculate_checksum;
|
||||||
|
const char *filepath;
|
||||||
|
struct stat info;
|
||||||
|
|
||||||
|
SHA_CTX sha1_ctx;
|
||||||
|
unsigned char sha1_digest[SHA1_DIGEST_LENGTH];
|
||||||
|
|
||||||
|
void *rewind_buffer;
|
||||||
|
int rewind_buffer_size;
|
||||||
|
int rewind_buffer_cursor;
|
||||||
|
|
||||||
|
read_func_t read;
|
||||||
|
read_func_t read_rewindable;
|
||||||
|
close_func_t close;
|
||||||
|
reset_func_t reset;
|
||||||
|
log_callback_t log;
|
||||||
|
logf_callback_t logf;
|
||||||
|
} vfile_t;
|
||||||
|
|
||||||
|
typedef struct parse_job_t {
|
||||||
|
int base;
|
||||||
|
int ext;
|
||||||
|
struct vfile vfile;
|
||||||
|
unsigned char parent[MD5_DIGEST_LENGTH];
|
||||||
|
char filepath[1];
|
||||||
|
} parse_job_t;
|
||||||
|
|
||||||
|
|
||||||
|
#include "util.h"
|
||||||
|
|
||||||
|
typedef void (*parse_callback_t)(parse_job_t *job);
|
||||||
|
|
||||||
|
#endif
|
||||||
64
third-party/libscan/libscan/text/text.c
vendored
Normal file
64
third-party/libscan/libscan/text/text.c
vendored
Normal file
@@ -0,0 +1,64 @@
|
|||||||
|
#include "text.h"
|
||||||
|
|
||||||
|
scan_code_t parse_text(scan_text_ctx_t *ctx, vfile_t *f, document_t *doc) {
|
||||||
|
|
||||||
|
int to_read = MIN(ctx->content_size, f->info.st_size);
|
||||||
|
|
||||||
|
if (to_read <= 2) {
|
||||||
|
return SCAN_OK;
|
||||||
|
}
|
||||||
|
|
||||||
|
char *buf = malloc(to_read);
|
||||||
|
int ret = f->read(f, buf, to_read);
|
||||||
|
if (ret < 0) {
|
||||||
|
CTX_LOG_ERRORF(doc->filepath, "read() returned error code: [%d]", ret)
|
||||||
|
free(buf);
|
||||||
|
return SCAN_ERR_READ;
|
||||||
|
}
|
||||||
|
|
||||||
|
text_buffer_t tex = text_buffer_create(ctx->content_size);
|
||||||
|
|
||||||
|
if ((*(int16_t*)buf) == (int16_t)0xFFFE) {
|
||||||
|
text_buffer_append_string16_le(&tex, buf + 2, to_read - 2);
|
||||||
|
} else if((*(int16_t*)buf) == (int16_t)0xFEFF) {
|
||||||
|
text_buffer_append_string16_be(&tex, buf + 2, to_read - 2);
|
||||||
|
} else {
|
||||||
|
text_buffer_append_string(&tex, buf, to_read);
|
||||||
|
}
|
||||||
|
text_buffer_terminate_string(&tex);
|
||||||
|
|
||||||
|
APPEND_STR_META(doc, MetaContent, tex.dyn_buffer.buf);
|
||||||
|
|
||||||
|
free(buf);
|
||||||
|
text_buffer_destroy(&tex);
|
||||||
|
|
||||||
|
return SCAN_OK;
|
||||||
|
}
|
||||||
|
|
||||||
|
#define MAX_MARKUP_SIZE (1024 * 1024)
|
||||||
|
|
||||||
|
scan_code_t parse_markup(scan_text_ctx_t *ctx, vfile_t *f, document_t *doc) {
|
||||||
|
|
||||||
|
int to_read = MIN(MAX_MARKUP_SIZE, f->info.st_size);
|
||||||
|
|
||||||
|
char *buf = malloc(to_read + 1);
|
||||||
|
int ret = f->read(f, buf, to_read);
|
||||||
|
if (ret < 0) {
|
||||||
|
CTX_LOG_ERRORF(doc->filepath, "read() returned error code: [%d]", ret)
|
||||||
|
free(buf);
|
||||||
|
return SCAN_ERR_READ;
|
||||||
|
}
|
||||||
|
|
||||||
|
*(buf + to_read) = '\0';
|
||||||
|
|
||||||
|
text_buffer_t tex = text_buffer_create(ctx->content_size);
|
||||||
|
text_buffer_append_markup(&tex, buf);
|
||||||
|
text_buffer_terminate_string(&tex);
|
||||||
|
|
||||||
|
APPEND_STR_META(doc, MetaContent, tex.dyn_buffer.buf);
|
||||||
|
|
||||||
|
free(buf);
|
||||||
|
text_buffer_destroy(&tex);
|
||||||
|
|
||||||
|
return SCAN_OK;
|
||||||
|
}
|
||||||
18
third-party/libscan/libscan/text/text.h
vendored
Normal file
18
third-party/libscan/libscan/text/text.h
vendored
Normal file
@@ -0,0 +1,18 @@
|
|||||||
|
#ifndef SCAN_TEXT_H
|
||||||
|
#define SCAN_TEXT_H
|
||||||
|
|
||||||
|
#include "../scan.h"
|
||||||
|
#include "../util.h"
|
||||||
|
|
||||||
|
typedef struct {
|
||||||
|
long content_size;
|
||||||
|
|
||||||
|
log_callback_t log;
|
||||||
|
logf_callback_t logf;
|
||||||
|
} scan_text_ctx_t;
|
||||||
|
|
||||||
|
scan_code_t parse_text(scan_text_ctx_t *ctx, vfile_t *f, document_t *doc);
|
||||||
|
|
||||||
|
scan_code_t parse_markup(scan_text_ctx_t *ctx, vfile_t *f, document_t *doc);
|
||||||
|
|
||||||
|
#endif
|
||||||
0
third-party/libscan/libscan/util.c
vendored
Normal file
0
third-party/libscan/libscan/util.c
vendored
Normal file
361
third-party/libscan/libscan/util.h
vendored
Normal file
361
third-party/libscan/libscan/util.h
vendored
Normal file
@@ -0,0 +1,361 @@
|
|||||||
|
#ifndef SCAN_UTIL_H
|
||||||
|
#define SCAN_UTIL_H
|
||||||
|
|
||||||
|
#include "stdio.h"
|
||||||
|
#include "stdlib.h"
|
||||||
|
#include "string.h"
|
||||||
|
#include "../third-party/utf8.h/utf8.h"
|
||||||
|
#include "macros.h"
|
||||||
|
|
||||||
|
#define STR_STARTS_WITH(x, y) (strncmp(y, x, sizeof(y) - 1) == 0)
|
||||||
|
|
||||||
|
#define TEXT_BUF_FULL (-1)
|
||||||
|
#define INITIAL_BUF_SIZE (1024 * 16)
|
||||||
|
|
||||||
|
#define SHOULD_IGNORE_CHAR(c) !(SHOULD_KEEP_CHAR(c))
|
||||||
|
#define SHOULD_KEEP_CHAR(c) (\
|
||||||
|
((c) >= '\'' && (c) <= ';') || \
|
||||||
|
((c) >= 'A' && (c) <= 'z') || \
|
||||||
|
((c) > 127 && (c) != 0x00A0 && (c) && (c) != 0xFFFD))
|
||||||
|
|
||||||
|
|
||||||
|
typedef struct dyn_buffer {
|
||||||
|
char *buf;
|
||||||
|
size_t cur;
|
||||||
|
size_t size;
|
||||||
|
} dyn_buffer_t;
|
||||||
|
|
||||||
|
typedef struct text_buffer {
|
||||||
|
long max_size;
|
||||||
|
int last_char_was_whitespace;
|
||||||
|
dyn_buffer_t dyn_buffer;
|
||||||
|
} text_buffer_t;
|
||||||
|
|
||||||
|
static int utf8_validchr2(const char *s) {
|
||||||
|
if (0x00 == (0x80 & *s)) {
|
||||||
|
return TRUE;
|
||||||
|
} else if (0xf0 == (0xf8 & *s)) {
|
||||||
|
if ((0x80 != (0xc0 & s[1])) || (0x80 != (0xc0 & s[2])) ||
|
||||||
|
(0x80 != (0xc0 & s[3]))) {
|
||||||
|
return FALSE;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (0x80 == (0xc0 & s[4])) {
|
||||||
|
return FALSE;
|
||||||
|
}
|
||||||
|
|
||||||
|
if ((0 == (0x07 & s[0])) && (0 == (0x30 & s[1]))) {
|
||||||
|
return FALSE;
|
||||||
|
}
|
||||||
|
} else if (0xe0 == (0xf0 & *s)) {
|
||||||
|
if ((0x80 != (0xc0 & s[1])) || (0x80 != (0xc0 & s[2]))) {
|
||||||
|
return FALSE;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (0x80 == (0xc0 & s[3])) {
|
||||||
|
return FALSE;
|
||||||
|
}
|
||||||
|
|
||||||
|
if ((0 == (0x0f & s[0])) && (0 == (0x20 & s[1]))) {
|
||||||
|
return FALSE;
|
||||||
|
}
|
||||||
|
} else if (0xc0 == (0xe0 & *s)) {
|
||||||
|
if (0x80 != (0xc0 & s[1])) {
|
||||||
|
return FALSE;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (0x80 == (0xc0 & s[2])) {
|
||||||
|
return FALSE;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (0 == (0x1e & s[0])) {
|
||||||
|
return FALSE;
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
return FALSE;
|
||||||
|
}
|
||||||
|
|
||||||
|
return TRUE;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
static dyn_buffer_t dyn_buffer_create() {
|
||||||
|
dyn_buffer_t buf;
|
||||||
|
|
||||||
|
buf.size = INITIAL_BUF_SIZE;
|
||||||
|
buf.cur = 0;
|
||||||
|
buf.buf = (char *) malloc(INITIAL_BUF_SIZE);
|
||||||
|
|
||||||
|
return buf;
|
||||||
|
}
|
||||||
|
|
||||||
|
static void grow_buffer(dyn_buffer_t *buf, size_t size) {
|
||||||
|
if (buf->cur + size > buf->size) {
|
||||||
|
do {
|
||||||
|
buf->size *= 2;
|
||||||
|
} while (buf->cur + size > buf->size);
|
||||||
|
|
||||||
|
buf->buf = (char *) realloc(buf->buf, buf->size);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
static void grow_buffer_small(dyn_buffer_t *buf) {
|
||||||
|
if (buf->cur + sizeof(long) > buf->size) {
|
||||||
|
buf->size *= 2;
|
||||||
|
buf->buf = (char *) realloc(buf->buf, buf->size);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
static void dyn_buffer_write(dyn_buffer_t *buf, const void *data, size_t size) {
|
||||||
|
grow_buffer(buf, size);
|
||||||
|
|
||||||
|
memcpy(buf->buf + buf->cur, data, size);
|
||||||
|
buf->cur += size;
|
||||||
|
}
|
||||||
|
|
||||||
|
static void dyn_buffer_write_char(dyn_buffer_t *buf, char c) {
|
||||||
|
grow_buffer_small(buf);
|
||||||
|
|
||||||
|
*(buf->buf + buf->cur) = c;
|
||||||
|
buf->cur += sizeof(c);
|
||||||
|
}
|
||||||
|
|
||||||
|
static void dyn_buffer_write_str(dyn_buffer_t *buf, const char *str) {
|
||||||
|
dyn_buffer_write(buf, str, strlen(str));
|
||||||
|
dyn_buffer_write_char(buf, '\0');
|
||||||
|
}
|
||||||
|
|
||||||
|
static void dyn_buffer_append_string(dyn_buffer_t *buf, const char *str) {
|
||||||
|
dyn_buffer_write(buf, str, strlen(str));
|
||||||
|
}
|
||||||
|
|
||||||
|
static void dyn_buffer_write_int(dyn_buffer_t *buf, int d) {
|
||||||
|
grow_buffer_small(buf);
|
||||||
|
|
||||||
|
*(int *) (buf->buf + buf->cur) = d;
|
||||||
|
buf->cur += sizeof(int);
|
||||||
|
}
|
||||||
|
|
||||||
|
static void dyn_buffer_write_short(dyn_buffer_t *buf, uint16_t s) {
|
||||||
|
grow_buffer_small(buf);
|
||||||
|
|
||||||
|
*(uint16_t *) (buf->buf + buf->cur) = s;
|
||||||
|
buf->cur += sizeof(uint16_t);
|
||||||
|
}
|
||||||
|
|
||||||
|
static void dyn_buffer_write_long(dyn_buffer_t *buf, unsigned long l) {
|
||||||
|
grow_buffer_small(buf);
|
||||||
|
|
||||||
|
*(unsigned long *) (buf->buf + buf->cur) = l;
|
||||||
|
buf->cur += sizeof(unsigned long);
|
||||||
|
}
|
||||||
|
|
||||||
|
static void dyn_buffer_destroy(dyn_buffer_t *buf) {
|
||||||
|
free(buf->buf);
|
||||||
|
}
|
||||||
|
|
||||||
|
static void text_buffer_destroy(text_buffer_t *buf) {
|
||||||
|
dyn_buffer_destroy(&buf->dyn_buffer);
|
||||||
|
}
|
||||||
|
|
||||||
|
static text_buffer_t text_buffer_create(long max_size) {
|
||||||
|
text_buffer_t text_buf;
|
||||||
|
|
||||||
|
text_buf.dyn_buffer = dyn_buffer_create();
|
||||||
|
text_buf.max_size = max_size;
|
||||||
|
text_buf.last_char_was_whitespace = FALSE;
|
||||||
|
|
||||||
|
return text_buf;
|
||||||
|
}
|
||||||
|
|
||||||
|
static int text_buffer_append_char(text_buffer_t *buf, int c) {
|
||||||
|
|
||||||
|
if (SHOULD_IGNORE_CHAR(c) || c == ' ') {
|
||||||
|
if (!buf->last_char_was_whitespace && buf->dyn_buffer.cur != 0) {
|
||||||
|
dyn_buffer_write_char(&buf->dyn_buffer, ' ');
|
||||||
|
buf->last_char_was_whitespace = TRUE;
|
||||||
|
|
||||||
|
if (buf->max_size > 0 && buf->dyn_buffer.cur > buf->max_size) {
|
||||||
|
return TEXT_BUF_FULL;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
buf->last_char_was_whitespace = FALSE;
|
||||||
|
grow_buffer_small(&buf->dyn_buffer);
|
||||||
|
|
||||||
|
if (((utf8_int32_t) 0xffffff80 & c) == 0) {
|
||||||
|
*(buf->dyn_buffer.buf + buf->dyn_buffer.cur++) = (char) c;
|
||||||
|
} else if (((utf8_int32_t) 0xfffff800 & c) == 0) {
|
||||||
|
*(buf->dyn_buffer.buf + buf->dyn_buffer.cur++) = 0xc0 | (char) (c >> 6);
|
||||||
|
*(buf->dyn_buffer.buf + buf->dyn_buffer.cur++) = 0x80 | (char) (c & 0x3f);
|
||||||
|
} else if (((utf8_int32_t) 0xffff0000 & c) == 0) {
|
||||||
|
*(buf->dyn_buffer.buf + buf->dyn_buffer.cur++) = 0xe0 | (char) (c >> 12);
|
||||||
|
*(buf->dyn_buffer.buf + buf->dyn_buffer.cur++) = 0x80 | (char) ((c >> 6) & 0x3f);
|
||||||
|
*(buf->dyn_buffer.buf + buf->dyn_buffer.cur++) = 0x80 | (char) (c & 0x3f);
|
||||||
|
} else {
|
||||||
|
*(buf->dyn_buffer.buf + buf->dyn_buffer.cur++) = 0xf0 | (char) (c >> 18);
|
||||||
|
*(buf->dyn_buffer.buf + buf->dyn_buffer.cur++) = 0x80 | (char) ((c >> 12) & 0x3f);
|
||||||
|
*(buf->dyn_buffer.buf + buf->dyn_buffer.cur++) = 0x80 | (char) ((c >> 6) & 0x3f);
|
||||||
|
*(buf->dyn_buffer.buf + buf->dyn_buffer.cur++) = 0x80 | (char) (c & 0x3f);
|
||||||
|
}
|
||||||
|
|
||||||
|
if (buf->max_size > 0 && buf->dyn_buffer.cur > buf->max_size) {
|
||||||
|
return TEXT_BUF_FULL;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
static void text_buffer_terminate_string(text_buffer_t *buf) {
|
||||||
|
if (buf->dyn_buffer.cur > 0 && *(buf->dyn_buffer.buf + buf->dyn_buffer.cur - 1) == ' ') {
|
||||||
|
*(buf->dyn_buffer.buf + buf->dyn_buffer.cur - 1) = '\0';
|
||||||
|
} else {
|
||||||
|
dyn_buffer_write_char(&buf->dyn_buffer, '\0');
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Naive UTF16 -> ascii conversion
|
||||||
|
static int text_buffer_append_string16_le(text_buffer_t *buf, const char *str, size_t len) {
|
||||||
|
int ret = 0;
|
||||||
|
for (int i = 1; i < len; i += 2) {
|
||||||
|
ret = text_buffer_append_char(buf, str[i]);
|
||||||
|
}
|
||||||
|
return ret;
|
||||||
|
}
|
||||||
|
|
||||||
|
static int text_buffer_append_string16_be(text_buffer_t *buf, const char *str, size_t len) {
|
||||||
|
int ret = 0;
|
||||||
|
for (int i = 0; i < len; i += 2) {
|
||||||
|
ret = text_buffer_append_char(buf, str[i]);
|
||||||
|
}
|
||||||
|
return ret;
|
||||||
|
}
|
||||||
|
|
||||||
|
#define UTF8_END_OF_STRING \
|
||||||
|
(ptr - str >= len || *ptr == 0 || \
|
||||||
|
(0xc0 == (0xe0 & *ptr) && ptr - str > len - 2) || \
|
||||||
|
(0xe0 == (0xf0 & *ptr) && ptr - str > len - 3) || \
|
||||||
|
(0xf0 == (0xf8 & *ptr) && ptr - str > len - 4))
|
||||||
|
|
||||||
|
static int text_buffer_append_string(text_buffer_t *buf, const char *str, size_t len) {
|
||||||
|
|
||||||
|
const char *ptr = str;
|
||||||
|
const char *oldPtr = ptr;
|
||||||
|
|
||||||
|
if (str == NULL || UTF8_END_OF_STRING) {
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (len <= 4) {
|
||||||
|
for (int i = 0; i < len; i++) {
|
||||||
|
if (((utf8_int32_t) 0xffffff80 & str[i]) == 0 && SHOULD_KEEP_CHAR(str[i])) {
|
||||||
|
dyn_buffer_write_char(&buf->dyn_buffer, str[i]);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
utf8_int32_t c;
|
||||||
|
char tmp[16] = {0};
|
||||||
|
|
||||||
|
do {
|
||||||
|
ptr = (char *) utf8codepoint(ptr, &c);
|
||||||
|
*(int *) tmp = 0x00000000;
|
||||||
|
memcpy(tmp, oldPtr, ptr - oldPtr);
|
||||||
|
oldPtr = ptr;
|
||||||
|
|
||||||
|
if (!utf8_validchr2(tmp)) {
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
|
||||||
|
int ret = text_buffer_append_char(buf, c);
|
||||||
|
|
||||||
|
if (ret != 0) {
|
||||||
|
return ret;
|
||||||
|
}
|
||||||
|
} while (!UTF8_END_OF_STRING);
|
||||||
|
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
static int text_buffer_append_string0(text_buffer_t *buf, const char *str) {
|
||||||
|
return text_buffer_append_string(buf, str, strlen(str));
|
||||||
|
}
|
||||||
|
|
||||||
|
static int text_buffer_append_markup(text_buffer_t *buf, const char *markup) {
|
||||||
|
|
||||||
|
int tag_open = TRUE;
|
||||||
|
const char *ptr = markup;
|
||||||
|
const char *start = markup;
|
||||||
|
|
||||||
|
while (*ptr != '\0') {
|
||||||
|
if (tag_open) {
|
||||||
|
if (*ptr == '>') {
|
||||||
|
tag_open = FALSE;
|
||||||
|
start = ptr + 1;
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
if (*ptr == '<') {
|
||||||
|
tag_open = TRUE;
|
||||||
|
if (ptr != start) {
|
||||||
|
if (text_buffer_append_string(buf, start, (ptr - start)) == TEXT_BUF_FULL) {
|
||||||
|
return TEXT_BUF_FULL;
|
||||||
|
}
|
||||||
|
if (text_buffer_append_char(buf, ' ') == TEXT_BUF_FULL) {
|
||||||
|
return TEXT_BUF_FULL;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
ptr += 1;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (ptr != start) {
|
||||||
|
if (text_buffer_append_string(buf, start, (ptr - start)) == TEXT_BUF_FULL) {
|
||||||
|
return TEXT_BUF_FULL;
|
||||||
|
}
|
||||||
|
if (text_buffer_append_char(buf, ' ') == TEXT_BUF_FULL) {
|
||||||
|
return TEXT_BUF_FULL;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
static void *read_all(vfile_t *f, size_t *size) {
|
||||||
|
void *buf = malloc(f->info.st_size);
|
||||||
|
*size = f->read(f, buf, f->info.st_size);
|
||||||
|
|
||||||
|
if (*size != f->info.st_size) {
|
||||||
|
free(buf);
|
||||||
|
return NULL;
|
||||||
|
}
|
||||||
|
|
||||||
|
return buf;
|
||||||
|
}
|
||||||
|
|
||||||
|
#define STACK_BUFFER_SIZE (size_t)(4096 * 8)
|
||||||
|
|
||||||
|
__always_inline
|
||||||
|
static void safe_sha1_update(SHA_CTX *ctx, void *buf, size_t size) {
|
||||||
|
unsigned char stack_buf[STACK_BUFFER_SIZE];
|
||||||
|
|
||||||
|
void *sha1_buf;
|
||||||
|
if (size <= STACK_BUFFER_SIZE) {
|
||||||
|
sha1_buf = stack_buf;
|
||||||
|
} else {
|
||||||
|
void *heap_sha1_buf = malloc(size);
|
||||||
|
sha1_buf = heap_sha1_buf;
|
||||||
|
}
|
||||||
|
|
||||||
|
memcpy(sha1_buf, buf, size);
|
||||||
|
SHA1_Update(ctx, (const void *) sha1_buf, size);
|
||||||
|
|
||||||
|
if (sha1_buf != stack_buf) {
|
||||||
|
free(sha1_buf);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
#endif
|
||||||
200
third-party/libscan/libscan/wpd/libwpd_c_api.cpp
vendored
Normal file
200
third-party/libscan/libscan/wpd/libwpd_c_api.cpp
vendored
Normal file
@@ -0,0 +1,200 @@
|
|||||||
|
#include "libwpd_c_api.h"
|
||||||
|
#include "libwpd/libwpd.h"
|
||||||
|
#include "libwpd/WPXProperty.h"
|
||||||
|
#include "libwpd-stream/libwpd-stream.h"
|
||||||
|
|
||||||
|
class StringDocument : public WPXDocumentInterface {
|
||||||
|
|
||||||
|
private:
|
||||||
|
text_buffer_t *tex;
|
||||||
|
document_t *doc;
|
||||||
|
bool is_full;
|
||||||
|
public:
|
||||||
|
|
||||||
|
StringDocument(text_buffer_t *tex, document_t *doc) {
|
||||||
|
this->tex = tex;
|
||||||
|
this->doc = doc;
|
||||||
|
this->is_full = false;
|
||||||
|
}
|
||||||
|
|
||||||
|
void setDocumentMetaData(const WPXPropertyList &propList) override {
|
||||||
|
|
||||||
|
WPXPropertyList::Iter propIter(propList);
|
||||||
|
for (propIter.rewind(); propIter.next();) {
|
||||||
|
// TODO: Read metadata here ?!
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
void endDocument() override {
|
||||||
|
text_buffer_terminate_string(this->tex);
|
||||||
|
}
|
||||||
|
|
||||||
|
void closeParagraph() override {
|
||||||
|
if (!this->is_full) {
|
||||||
|
if (text_buffer_append_char(tex, ' ') == TEXT_BUF_FULL) {
|
||||||
|
this->is_full = true;
|
||||||
|
};
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
void closeSpan() override {
|
||||||
|
if (!this->is_full) {
|
||||||
|
if (text_buffer_append_char(tex, ' ') == TEXT_BUF_FULL) {
|
||||||
|
this->is_full = true;
|
||||||
|
};
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
void closeSection() override {
|
||||||
|
if (!this->is_full) {
|
||||||
|
if (text_buffer_append_char(tex, ' ') == TEXT_BUF_FULL) {
|
||||||
|
this->is_full = true;
|
||||||
|
};
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
void insertTab() override {
|
||||||
|
if (!this->is_full) {
|
||||||
|
if (text_buffer_append_char(tex, ' ') == TEXT_BUF_FULL) {
|
||||||
|
this->is_full = true;
|
||||||
|
};
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
void insertSpace() override {
|
||||||
|
if (!this->is_full) {
|
||||||
|
if (text_buffer_append_char(tex, ' ') == TEXT_BUF_FULL) {
|
||||||
|
this->is_full = true;
|
||||||
|
};
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
void insertText(const WPXString &text) override {
|
||||||
|
if (!this->is_full) {
|
||||||
|
if (text_buffer_append_string0(tex, text.cstr()) == TEXT_BUF_FULL) {
|
||||||
|
this->is_full = true;
|
||||||
|
};
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
void insertLineBreak() override {
|
||||||
|
if (!this->is_full) {
|
||||||
|
if (text_buffer_append_char(tex, ' ') == TEXT_BUF_FULL) {
|
||||||
|
this->is_full = true;
|
||||||
|
};
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
void definePageStyle(const WPXPropertyList &propList) override { /* noop */ }
|
||||||
|
|
||||||
|
void closePageSpan() override { /* noop */ }
|
||||||
|
|
||||||
|
void openHeader(const WPXPropertyList &propList) override { /* noop */ }
|
||||||
|
|
||||||
|
void closeHeader() override { /* noop */ }
|
||||||
|
|
||||||
|
void openFooter(const WPXPropertyList &propList) override { /* noop */ }
|
||||||
|
|
||||||
|
void closeFooter() override { /* noop */ }
|
||||||
|
|
||||||
|
void
|
||||||
|
defineParagraphStyle(const WPXPropertyList &propList, const WPXPropertyListVector &tabStops) override { /* noop */ }
|
||||||
|
|
||||||
|
void openParagraph(const WPXPropertyList &propList, const WPXPropertyListVector &tabStops) override { /* noop */ }
|
||||||
|
|
||||||
|
void defineCharacterStyle(const WPXPropertyList &propList) override { /* noop */ }
|
||||||
|
|
||||||
|
void openSpan(const WPXPropertyList &propList) override { /* noop */ }
|
||||||
|
|
||||||
|
void
|
||||||
|
defineSectionStyle(const WPXPropertyList &propList, const WPXPropertyListVector &columns) override { /* noop */ }
|
||||||
|
|
||||||
|
void openSection(const WPXPropertyList &propList, const WPXPropertyListVector &columns) override { /* noop */ }
|
||||||
|
|
||||||
|
void insertField(const WPXString &type, const WPXPropertyList &propList) override { /* noop */ }
|
||||||
|
|
||||||
|
void defineOrderedListLevel(const WPXPropertyList &propList) override { /* noop */ }
|
||||||
|
|
||||||
|
void defineUnorderedListLevel(const WPXPropertyList &propList) override { /* noop */ }
|
||||||
|
|
||||||
|
void openOrderedListLevel(const WPXPropertyList &propList) override { /* noop */ }
|
||||||
|
|
||||||
|
void openUnorderedListLevel(const WPXPropertyList &propList) override { /* noop */ }
|
||||||
|
|
||||||
|
void closeOrderedListLevel() override { /* noop */ }
|
||||||
|
|
||||||
|
void closeUnorderedListLevel() override { /* noop */ }
|
||||||
|
|
||||||
|
void openListElement(const WPXPropertyList &propList, const WPXPropertyListVector &tabStops) override { /* noop */ }
|
||||||
|
|
||||||
|
void closeListElement() override { /* noop */ }
|
||||||
|
|
||||||
|
void openFootnote(const WPXPropertyList &propList) override { /* noop */ }
|
||||||
|
|
||||||
|
void closeFootnote() override { /* noop */ }
|
||||||
|
|
||||||
|
void openEndnote(const WPXPropertyList &propList) override { /* noop */ }
|
||||||
|
|
||||||
|
void closeEndnote() override { /* noop */ }
|
||||||
|
|
||||||
|
void openComment(const WPXPropertyList &propList) override { /* noop */ }
|
||||||
|
|
||||||
|
void closeComment() override { /* noop */ }
|
||||||
|
|
||||||
|
void openTextBox(const WPXPropertyList &propList) override { /* noop */ }
|
||||||
|
|
||||||
|
void closeTextBox() override { /* noop */ }
|
||||||
|
|
||||||
|
void openTable(const WPXPropertyList &propList, const WPXPropertyListVector &columns) override { /* noop */ }
|
||||||
|
|
||||||
|
void openTableRow(const WPXPropertyList &propList) override { /* noop */ }
|
||||||
|
|
||||||
|
void closeTableRow() override { /* noop */ }
|
||||||
|
|
||||||
|
void openTableCell(const WPXPropertyList &propList) override { /* noop */ }
|
||||||
|
|
||||||
|
void closeTableCell() override { /* noop */ }
|
||||||
|
|
||||||
|
void insertCoveredTableCell(const WPXPropertyList &propList) override { /* noop */ }
|
||||||
|
|
||||||
|
void closeTable() override { /* noop */ }
|
||||||
|
|
||||||
|
void openFrame(const WPXPropertyList &propList) override { /* noop */ }
|
||||||
|
|
||||||
|
void closeFrame() override { /* noop */ }
|
||||||
|
|
||||||
|
void insertBinaryObject(const WPXPropertyList &propList, const WPXBinaryData &data) override { /* noop */ }
|
||||||
|
|
||||||
|
void insertEquation(const WPXPropertyList &propList, const WPXString &data) override { /* noop */ }
|
||||||
|
|
||||||
|
void openPageSpan(const WPXPropertyList &propList) override { /* noop */ }
|
||||||
|
|
||||||
|
void startDocument() override { /* noop */ };
|
||||||
|
};
|
||||||
|
|
||||||
|
|
||||||
|
wpd_stream_t wpd_memory_stream_create(const unsigned char *buf, size_t buf_len) {
|
||||||
|
auto *input = new WPXStringStream(buf, buf_len);
|
||||||
|
return input;
|
||||||
|
}
|
||||||
|
|
||||||
|
wpd_confidence_t wpd_is_file_format_supported(wpd_stream_t ptr) {
|
||||||
|
auto *stream = (WPXStringStream *) ptr;
|
||||||
|
WPDConfidence confidence = WPDocument::isFileFormatSupported(stream);
|
||||||
|
|
||||||
|
return (wpd_confidence_t) confidence;
|
||||||
|
}
|
||||||
|
|
||||||
|
wpd_result_t wpd_parse(wpd_stream_t ptr, text_buffer_t *tex, document_t *doc) {
|
||||||
|
auto *stream = (WPXStringStream *) ptr;
|
||||||
|
|
||||||
|
auto myDoc = StringDocument(tex, doc);
|
||||||
|
WPDResult result2 = WPDocument::parse(stream, &myDoc, nullptr);
|
||||||
|
|
||||||
|
return (wpd_result_t) result2;
|
||||||
|
}
|
||||||
|
|
||||||
|
void wpd_memory_stream_destroy(wpd_stream_t ptr) {
|
||||||
|
auto *stream = (WPXStringStream *) ptr;
|
||||||
|
delete stream;
|
||||||
|
}
|
||||||
50
third-party/libscan/libscan/wpd/libwpd_c_api.h
vendored
Normal file
50
third-party/libscan/libscan/wpd/libwpd_c_api.h
vendored
Normal file
@@ -0,0 +1,50 @@
|
|||||||
|
#ifndef SIST2_LIBWPD_C_API_H
|
||||||
|
#define SIST2_LIBWPD_C_API_H
|
||||||
|
|
||||||
|
#include "stdlib.h"
|
||||||
|
|
||||||
|
#ifdef __cplusplus
|
||||||
|
#define EXTERNC extern "C"
|
||||||
|
#else
|
||||||
|
#define EXTERNC
|
||||||
|
#endif
|
||||||
|
|
||||||
|
#ifdef __cplusplus
|
||||||
|
extern "C" {
|
||||||
|
#endif
|
||||||
|
#include "../scan.h"
|
||||||
|
#include "../util.h"
|
||||||
|
#ifdef __cplusplus
|
||||||
|
};
|
||||||
|
#endif
|
||||||
|
|
||||||
|
|
||||||
|
typedef void *wpd_stream_t;
|
||||||
|
|
||||||
|
typedef enum {
|
||||||
|
C_WPD_CONFIDENCE_NONE = 0,
|
||||||
|
C_WPD_CONFIDENCE_UNSUPPORTED_ENCRYPTION,
|
||||||
|
C_WPD_CONFIDENCE_SUPPORTED_ENCRYPTION,
|
||||||
|
C_WPD_CONFIDENCE_EXCELLENT
|
||||||
|
} wpd_confidence_t;
|
||||||
|
|
||||||
|
typedef enum {
|
||||||
|
C_WPD_OK,
|
||||||
|
C_WPD_FILE_ACCESS_ERROR,
|
||||||
|
C_WPD_PARSE_ERROR,
|
||||||
|
C_WPD_UNSUPPORTED_ENCRYPTION_ERROR,
|
||||||
|
C_WPD_PASSWORD_MISSMATCH_ERROR,
|
||||||
|
C_WPD_OLE_ERROR,
|
||||||
|
C_WPD_UNKNOWN_ERROR
|
||||||
|
} wpd_result_t;
|
||||||
|
|
||||||
|
|
||||||
|
EXTERNC wpd_confidence_t wpd_is_file_format_supported(wpd_stream_t stream);
|
||||||
|
|
||||||
|
EXTERNC wpd_stream_t wpd_memory_stream_create(const unsigned char *buf, size_t buf_len);
|
||||||
|
|
||||||
|
EXTERNC void wpd_memory_stream_destroy(wpd_stream_t stream);
|
||||||
|
|
||||||
|
EXTERNC wpd_result_t wpd_parse(wpd_stream_t ptr, text_buffer_t *tex, document_t *doc);
|
||||||
|
|
||||||
|
#endif
|
||||||
41
third-party/libscan/libscan/wpd/wpd.c
vendored
Normal file
41
third-party/libscan/libscan/wpd/wpd.c
vendored
Normal file
@@ -0,0 +1,41 @@
|
|||||||
|
#include "wpd.h"
|
||||||
|
#include "libwpd_c_api.h"
|
||||||
|
|
||||||
|
scan_code_t parse_wpd(scan_wpd_ctx_t *ctx, vfile_t *f, document_t *doc) {
|
||||||
|
|
||||||
|
size_t buf_len;
|
||||||
|
void *buf = read_all(f, &buf_len);
|
||||||
|
|
||||||
|
void *stream = wpd_memory_stream_create(buf, buf_len);
|
||||||
|
wpd_confidence_t conf = wpd_is_file_format_supported(stream);
|
||||||
|
|
||||||
|
if (conf == C_WPD_CONFIDENCE_SUPPORTED_ENCRYPTION || conf == C_WPD_CONFIDENCE_UNSUPPORTED_ENCRYPTION) {
|
||||||
|
CTX_LOG_DEBUGF("wpd.c", "File is encrypted! Password-protected WPD files are not supported yet (conf=%d)", conf)
|
||||||
|
wpd_memory_stream_destroy(stream);
|
||||||
|
free(buf);
|
||||||
|
return SCAN_ERR_READ;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (conf != C_WPD_CONFIDENCE_EXCELLENT) {
|
||||||
|
CTX_LOG_ERRORF("wpd.c", "Unsupported file format! [%s] (conf=%d)", doc->filepath, conf)
|
||||||
|
wpd_memory_stream_destroy(stream);
|
||||||
|
free(buf);
|
||||||
|
return SCAN_ERR_READ;
|
||||||
|
}
|
||||||
|
|
||||||
|
text_buffer_t tex = text_buffer_create(-1);
|
||||||
|
wpd_result_t res = wpd_parse(stream, &tex, doc);
|
||||||
|
|
||||||
|
if (res != C_WPD_OK) {
|
||||||
|
CTX_LOG_ERRORF("wpd.c", "Error while parsing WPD file [%s] (%d)",
|
||||||
|
doc->filepath, res)
|
||||||
|
}
|
||||||
|
|
||||||
|
if (tex.dyn_buffer.cur != 0) {
|
||||||
|
APPEND_STR_META(doc, MetaContent, tex.dyn_buffer.buf)
|
||||||
|
}
|
||||||
|
|
||||||
|
text_buffer_destroy(&tex);
|
||||||
|
wpd_memory_stream_destroy(stream);
|
||||||
|
free(buf);
|
||||||
|
}
|
||||||
23
third-party/libscan/libscan/wpd/wpd.h
vendored
Normal file
23
third-party/libscan/libscan/wpd/wpd.h
vendored
Normal file
@@ -0,0 +1,23 @@
|
|||||||
|
#ifndef SIST2_WPD_H
|
||||||
|
#define SIST2_WPD_H
|
||||||
|
|
||||||
|
#include "../scan.h"
|
||||||
|
#include "../util.h"
|
||||||
|
|
||||||
|
typedef struct {
|
||||||
|
long content_size;
|
||||||
|
|
||||||
|
log_callback_t log;
|
||||||
|
logf_callback_t logf;
|
||||||
|
|
||||||
|
unsigned int wpd_mime;
|
||||||
|
} scan_wpd_ctx_t;
|
||||||
|
|
||||||
|
scan_code_t parse_wpd(scan_wpd_ctx_t *ctx, vfile_t *f, document_t *doc);
|
||||||
|
|
||||||
|
__always_inline
|
||||||
|
static int is_wpd(scan_wpd_ctx_t *ctx, unsigned int mime) {
|
||||||
|
return mime == ctx->wpd_mime;
|
||||||
|
}
|
||||||
|
|
||||||
|
#endif
|
||||||
1169
third-party/libscan/test/main.cpp
vendored
Normal file
1169
third-party/libscan/test/main.cpp
vendored
Normal file
File diff suppressed because it is too large
Load Diff
114
third-party/libscan/test/test_util.cpp
vendored
Normal file
114
third-party/libscan/test/test_util.cpp
vendored
Normal file
@@ -0,0 +1,114 @@
|
|||||||
|
#include "test_util.h"
|
||||||
|
#include <gtest/gtest.h>
|
||||||
|
|
||||||
|
#include <unistd.h>
|
||||||
|
#include <fcntl.h>
|
||||||
|
|
||||||
|
#define FILE_NOT_FOUND_ERR "Could not file, did you clone the test files repo?"
|
||||||
|
|
||||||
|
|
||||||
|
int fs_read(struct vfile *f, void *buf, size_t size) {
|
||||||
|
|
||||||
|
if (f->fd == -1) {
|
||||||
|
f->fd = open(f->filepath, O_RDONLY);
|
||||||
|
if (f->fd == -1) {
|
||||||
|
return -1;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return (int) read(f->fd, buf, size);
|
||||||
|
}
|
||||||
|
|
||||||
|
//Note: No out of bounds check
|
||||||
|
int mem_read(vfile_t *f, void *buf, size_t size) {
|
||||||
|
memcpy(buf, f->_test_data, size);
|
||||||
|
f->_test_data = (char *) f->_test_data + size;
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
void fs_close(vfile_t *f) {
|
||||||
|
if (f->fd != -1) {
|
||||||
|
close(f->fd);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
void load_doc_file(const char *filepath, vfile_t *f, document_t *doc) {
|
||||||
|
doc->meta_head = nullptr;
|
||||||
|
doc->meta_tail = nullptr;
|
||||||
|
load_file(filepath, f);
|
||||||
|
}
|
||||||
|
|
||||||
|
void load_doc_mem(void *mem, size_t mem_len, vfile_t *f, document_t *doc) {
|
||||||
|
doc->meta_head = nullptr;
|
||||||
|
doc->meta_tail = nullptr;
|
||||||
|
load_mem(mem, mem_len, f);
|
||||||
|
}
|
||||||
|
|
||||||
|
void cleanup(document_t *doc, vfile_t *f) {
|
||||||
|
destroy_doc(doc);
|
||||||
|
CLOSE_FILE((*f))
|
||||||
|
}
|
||||||
|
|
||||||
|
void load_file(const char *filepath, vfile_t *f) {
|
||||||
|
stat(filepath, &f->info);
|
||||||
|
f->fd = open(filepath, O_RDONLY);
|
||||||
|
|
||||||
|
if (f->fd == -1) {
|
||||||
|
FAIL() << FILE_NOT_FOUND_ERR;
|
||||||
|
}
|
||||||
|
|
||||||
|
f->filepath = filepath;
|
||||||
|
f->read = fs_read;
|
||||||
|
f->close = fs_close;
|
||||||
|
f->is_fs_file = TRUE;
|
||||||
|
f->calculate_checksum = TRUE;
|
||||||
|
f->has_checksum = FALSE;
|
||||||
|
}
|
||||||
|
|
||||||
|
void load_mem(void *mem, size_t size, vfile_t *f) {
|
||||||
|
f->filepath = "_mem_";
|
||||||
|
f->_test_data = mem;
|
||||||
|
f->info.st_size = (int) size;
|
||||||
|
f->read = mem_read;
|
||||||
|
f->close = nullptr;
|
||||||
|
f->is_fs_file = TRUE;
|
||||||
|
}
|
||||||
|
|
||||||
|
meta_line_t *get_meta(document_t *doc, metakey key) {
|
||||||
|
return get_meta_from(doc->meta_head, key);
|
||||||
|
}
|
||||||
|
|
||||||
|
meta_line_t *get_meta_from(meta_line_t *meta, metakey key) {
|
||||||
|
while (meta != nullptr) {
|
||||||
|
if (meta->key == key) {
|
||||||
|
return meta;
|
||||||
|
}
|
||||||
|
meta = meta->next;
|
||||||
|
}
|
||||||
|
return nullptr;
|
||||||
|
}
|
||||||
|
|
||||||
|
void destroy_doc(document_t *doc) {
|
||||||
|
meta_line_t *meta = doc->meta_head;
|
||||||
|
while (meta != nullptr) {
|
||||||
|
meta_line_t *tmp = meta;
|
||||||
|
meta = tmp->next;
|
||||||
|
free(tmp);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
void fuzz_buffer(char *buf, size_t *buf_len, int width, int n, int trunc_p) {
|
||||||
|
for (int i = 0; i < n; i++) {
|
||||||
|
|
||||||
|
size_t offset = rand() % (*buf_len - width - 1);
|
||||||
|
|
||||||
|
if (rand() % 100 < trunc_p) {
|
||||||
|
*buf_len = MAX(offset, 1000);
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
|
||||||
|
for (int disp = 0; disp < width; disp++) {
|
||||||
|
buf[offset + disp] = (int8_t) rand();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
46
third-party/libscan/test/test_util.h
vendored
Normal file
46
third-party/libscan/test/test_util.h
vendored
Normal file
@@ -0,0 +1,46 @@
|
|||||||
|
#ifndef SCAN_TEST_UTIL_H
|
||||||
|
#define SCAN_TEST_UTIL_H
|
||||||
|
|
||||||
|
#include "../libscan/scan.h"
|
||||||
|
#include <fcntl.h>
|
||||||
|
#include <unistd.h>
|
||||||
|
|
||||||
|
void load_file(const char *filepath, vfile_t *f);
|
||||||
|
void load_mem(void *mem, size_t size, vfile_t *f);
|
||||||
|
void load_doc_mem(void *mem, size_t mem_len, vfile_t *f, document_t *doc);
|
||||||
|
void load_doc_file(const char *filepath, vfile_t *f, document_t *doc);
|
||||||
|
void cleanup(document_t *doc, vfile_t *f);
|
||||||
|
|
||||||
|
static void noop_logf(const char *filepath, int level, char *format, ...) {
|
||||||
|
// noop
|
||||||
|
}
|
||||||
|
|
||||||
|
static void noop_log(const char *filepath, int level, char *str) {
|
||||||
|
// noop
|
||||||
|
}
|
||||||
|
|
||||||
|
static size_t store_size = 0;
|
||||||
|
|
||||||
|
static void counter_store(char* key, size_t key_len, char *value, size_t value_len) {
|
||||||
|
store_size += value_len;
|
||||||
|
// char id[37];
|
||||||
|
// char tmp[PATH_MAX];
|
||||||
|
// uuid_unparse(reinterpret_cast<const unsigned char *>(key), id);
|
||||||
|
// sprintf(tmp, "%s.jpeg", id);
|
||||||
|
// int fd = open(tmp, O_TRUNC|O_WRONLY|O_CREAT, 0777);
|
||||||
|
// write(fd, value, value_len);
|
||||||
|
// close(fd);
|
||||||
|
}
|
||||||
|
|
||||||
|
meta_line_t *get_meta(document_t *doc, metakey key);
|
||||||
|
|
||||||
|
meta_line_t *get_meta_from(meta_line_t *meta, metakey key);
|
||||||
|
|
||||||
|
|
||||||
|
#define CLOSE_FILE(f) if (f.close != NULL) {f.close(&f);};
|
||||||
|
|
||||||
|
void destroy_doc(document_t *doc);
|
||||||
|
|
||||||
|
void fuzz_buffer(char *buf, size_t *buf_len, int width, int n, int trunc_p);
|
||||||
|
|
||||||
|
#endif
|
||||||
1
third-party/libscan/third-party/antiword
vendored
Submodule
1
third-party/libscan/third-party/antiword
vendored
Submodule
Submodule third-party/libscan/third-party/antiword added at 62ae66db99
1
third-party/libscan/third-party/utf8.h
vendored
Submodule
1
third-party/libscan/third-party/utf8.h
vendored
Submodule
Submodule third-party/libscan/third-party/utf8.h added at 146be69f88
Reference in New Issue
Block a user