mirror of
https://github.com/simon987/sist2.git
synced 2025-12-13 15:29:04 +00:00
Compare commits
1 Commits
3.0.4
...
process-po
| Author | SHA1 | Date | |
|---|---|---|---|
| 903feb4889 |
3
.gitattributes
vendored
Normal file
3
.gitattributes
vendored
Normal file
@@ -0,0 +1,3 @@
|
|||||||
|
CMakeModules/* linguist-vendored
|
||||||
|
**/*_generated.c linguist-vendored
|
||||||
|
**/*_generated.h linguist-vendored
|
||||||
@@ -5,7 +5,6 @@ set(CMAKE_C_STANDARD 11)
|
|||||||
|
|
||||||
option(SIST_DEBUG "Build a debug executable" on)
|
option(SIST_DEBUG "Build a debug executable" on)
|
||||||
option(SIST_FAST "Enable more optimisation flags" off)
|
option(SIST_FAST "Enable more optimisation flags" off)
|
||||||
option(SIST_DEBUG_INFO "Turn on debug information in web interface" on)
|
|
||||||
|
|
||||||
add_compile_definitions(
|
add_compile_definitions(
|
||||||
"SIST_PLATFORM=${SIST_PLATFORM}"
|
"SIST_PLATFORM=${SIST_PLATFORM}"
|
||||||
@@ -15,17 +14,7 @@ if (SIST_DEBUG)
|
|||||||
add_compile_definitions(
|
add_compile_definitions(
|
||||||
"SIST_DEBUG=${SIST_DEBUG}"
|
"SIST_DEBUG=${SIST_DEBUG}"
|
||||||
)
|
)
|
||||||
set(VCPKG_BUILD_TYPE debug)
|
endif()
|
||||||
else ()
|
|
||||||
set(VCPKG_BUILD_TYPE release)
|
|
||||||
endif ()
|
|
||||||
|
|
||||||
if (SIST_DEBUG_INFO)
|
|
||||||
add_compile_definitions(
|
|
||||||
"SIST_DEBUG_INFO=${SIST_DEBUG_INFO}"
|
|
||||||
)
|
|
||||||
endif ()
|
|
||||||
|
|
||||||
|
|
||||||
add_subdirectory(third-party/libscan)
|
add_subdirectory(third-party/libscan)
|
||||||
set(ARGPARSE_SHARED off)
|
set(ARGPARSE_SHARED off)
|
||||||
@@ -58,7 +47,7 @@ add_executable(sist2
|
|||||||
|
|
||||||
src/auth0/auth0_c_api.h src/auth0/auth0_c_api.cpp
|
src/auth0/auth0_c_api.h src/auth0/auth0_c_api.cpp
|
||||||
|
|
||||||
src/database/database_stats.c src/database/database_schema.c)
|
src/database/database_stats.c src/database/database_stats.h src/database/database_schema.c)
|
||||||
set_target_properties(sist2 PROPERTIES LINKER_LANGUAGE C)
|
set_target_properties(sist2 PROPERTIES LINKER_LANGUAGE C)
|
||||||
|
|
||||||
target_link_directories(sist2 PRIVATE BEFORE ${_VCPKG_INSTALLED_DIR}/${VCPKG_TARGET_TRIPLET}/lib/)
|
target_link_directories(sist2 PRIVATE BEFORE ${_VCPKG_INSTALLED_DIR}/${VCPKG_TARGET_TRIPLET}/lib/)
|
||||||
@@ -96,7 +85,7 @@ if (SIST_DEBUG)
|
|||||||
-fno-omit-frame-pointer
|
-fno-omit-frame-pointer
|
||||||
-fsanitize=address
|
-fsanitize=address
|
||||||
-fno-inline
|
-fno-inline
|
||||||
# -O2
|
# -O2
|
||||||
)
|
)
|
||||||
target_link_options(
|
target_link_options(
|
||||||
sist2
|
sist2
|
||||||
|
|||||||
10
Dockerfile
10
Dockerfile
@@ -19,12 +19,13 @@ COPY sist2-admin sist2-admin
|
|||||||
RUN cd sist2-vue/ && npm install && npm run build
|
RUN cd sist2-vue/ && npm install && npm run build
|
||||||
RUN cd sist2-admin/frontend/ && npm install && npm run build
|
RUN cd sist2-admin/frontend/ && npm install && npm run build
|
||||||
|
|
||||||
RUN mkdir build && cd build && cmake -DSIST_PLATFORM=x64_linux_docker -DSIST_DEBUG_INFO=on -DSIST_DEBUG=off -DBUILD_TESTS=off -DCMAKE_TOOLCHAIN_FILE=/vcpkg/scripts/buildsystems/vcpkg.cmake ..
|
RUN mkdir build && cd build && cmake -DSIST_PLATFORM=x64_linux -DSIST_DEBUG=off -DBUILD_TESTS=off -DCMAKE_TOOLCHAIN_FILE=/vcpkg/scripts/buildsystems/vcpkg.cmake ..
|
||||||
RUN cd build && make -j$(nproc)
|
RUN cd build && make -j$(nproc)
|
||||||
RUN strip build/sist2 || mv build/sist2_debug build/sist2
|
RUN strip build/sist2 || mv build/sist2_debug build/sist2
|
||||||
|
|
||||||
FROM --platform="linux/amd64" ubuntu@sha256:965fbcae990b0467ed5657caceaec165018ef44a4d2d46c7cdea80a9dff0d1ea
|
FROM --platform="linux/amd64" ubuntu@sha256:965fbcae990b0467ed5657caceaec165018ef44a4d2d46c7cdea80a9dff0d1ea
|
||||||
|
|
||||||
|
WORKDIR /root
|
||||||
|
|
||||||
ENV LANG C.UTF-8
|
ENV LANG C.UTF-8
|
||||||
ENV LC_ALL C.UTF-8
|
ENV LC_ALL C.UTF-8
|
||||||
@@ -51,7 +52,6 @@ RUN mkdir -p /usr/share/tessdata && \
|
|||||||
COPY --from=build /build/build/sist2 /root/sist2
|
COPY --from=build /build/build/sist2 /root/sist2
|
||||||
|
|
||||||
# sist2-admin
|
# sist2-admin
|
||||||
WORKDIR /root/sist2-admin
|
COPY sist2-admin/requirements.txt sist2-admin/
|
||||||
COPY sist2-admin/requirements.txt /root/sist2-admin/
|
RUN python3 -m pip install --no-cache -r sist2-admin/requirements.txt
|
||||||
RUN python3 -m pip install --no-cache -r /root/sist2-admin/requirements.txt
|
COPY --from=build /build/sist2-admin/ sist2-admin/
|
||||||
COPY --from=build /build/sist2-admin/ /root/sist2-admin/
|
|
||||||
|
|||||||
@@ -3,7 +3,7 @@ MAINTAINER simon987 <me@simon987.net>
|
|||||||
|
|
||||||
WORKDIR /build/
|
WORKDIR /build/
|
||||||
ADD . /build/
|
ADD . /build/
|
||||||
RUN mkdir build && cd build && cmake -DSIST_PLATFORM=arm64_linux_docker -DSIST_DEBUG_INFO=on -DSIST_DEBUG=off -DBUILD_TESTS=off -DCMAKE_TOOLCHAIN_FILE=/vcpkg/scripts/buildsystems/vcpkg.cmake ..
|
RUN mkdir build && cd build && cmake -DSIST_PLATFORM=arm64_linux -DSIST_DEBUG=off -DBUILD_TESTS=off -DCMAKE_TOOLCHAIN_FILE=/vcpkg/scripts/buildsystems/vcpkg.cmake ..
|
||||||
RUN cd build && make -j$(nproc)
|
RUN cd build && make -j$(nproc)
|
||||||
RUN strip build/sist2 || mv build/sist2_debug build/sist2
|
RUN strip build/sist2 || mv build/sist2_debug build/sist2
|
||||||
|
|
||||||
|
|||||||
108
README.md
108
README.md
@@ -10,13 +10,13 @@ sist2 (Simple incremental search tool)
|
|||||||
|
|
||||||
*Warning: sist2 is in early development*
|
*Warning: sist2 is in early development*
|
||||||
|
|
||||||

|

|
||||||
|
|
||||||
## Features
|
## Features
|
||||||
|
|
||||||
* Fast, low memory usage, multi-threaded
|
* Fast, low memory usage, multi-threaded
|
||||||
* Manage & schedule scan jobs with simple web interface (Docker only)
|
|
||||||
* Mobile-friendly Web interface
|
* Mobile-friendly Web interface
|
||||||
|
* Portable (all its features are packaged in a single executable)
|
||||||
* Extracts text and metadata from common file types \*
|
* Extracts text and metadata from common file types \*
|
||||||
* Generates thumbnails \*
|
* Generates thumbnails \*
|
||||||
* Incremental scanning
|
* Incremental scanning
|
||||||
@@ -24,60 +24,47 @@ sist2 (Simple incremental search tool)
|
|||||||
* Recursive scan inside archive files \*\*
|
* Recursive scan inside archive files \*\*
|
||||||
* OCR support with tesseract \*\*\*
|
* OCR support with tesseract \*\*\*
|
||||||
* Stats page & disk utilisation visualization
|
* Stats page & disk utilisation visualization
|
||||||
* Named-entity recognition (client-side) \*\*\*\*
|
|
||||||
|
|
||||||
\* See [format support](#format-support)
|
\* See [format support](#format-support)
|
||||||
\*\* See [Archive files](#archive-files)
|
\*\* See [Archive files](#archive-files)
|
||||||
\*\*\* See [OCR](#ocr)
|
\*\*\* See [OCR](#ocr)
|
||||||
\*\*\*\* See [Named-Entity Recognition](#NER)
|
|
||||||
|

|
||||||
|
|
||||||
## Getting Started
|
## Getting Started
|
||||||
|
|
||||||
### Using Docker Compose *(Windows/Linux/Mac)*
|
|
||||||
|
|
||||||
```yaml
|
|
||||||
version: "3"
|
|
||||||
|
|
||||||
services:
|
|
||||||
elasticsearch:
|
|
||||||
image: elasticsearch:7.17.9
|
|
||||||
restart: unless-stopped
|
|
||||||
environment:
|
|
||||||
- "discovery.type=single-node"
|
|
||||||
- "ES_JAVA_OPTS=-Xms2g -Xmx2g"
|
|
||||||
sist2-admin:
|
|
||||||
image: simon987/sist2:3.0.3
|
|
||||||
restart: unless-stopped
|
|
||||||
volumes:
|
|
||||||
- ./sist2-admin-data/:/sist2-admin/
|
|
||||||
- /:/host
|
|
||||||
ports:
|
|
||||||
- 4090:4090 # sist2
|
|
||||||
- 8080:8080 # sist2-admin
|
|
||||||
working_dir: /root/sist2-admin/
|
|
||||||
entrypoint: python3 /root/sist2-admin/sist2_admin/app.py
|
|
||||||
```
|
|
||||||
|
|
||||||
Navigate to http://localhost:8080/ to configure sist2-admin.
|
|
||||||
|
|
||||||
### Using the executable file *(Linux/WSL only)*
|
|
||||||
|
|
||||||
1. Have an Elasticsearch (>= 6.8.X, ideally >=7.14.0) instance running
|
1. Have an Elasticsearch (>= 6.8.X, ideally >=7.14.0) instance running
|
||||||
1. Download [from official website](https://www.elastic.co/downloads/elasticsearch)
|
1. Download [from official website](https://www.elastic.co/downloads/elasticsearch)
|
||||||
2. *(or)* Run using docker:
|
1. *(or)* Run using docker:
|
||||||
```bash
|
```bash
|
||||||
docker run -d -p 9200:9200 -e "discovery.type=single-node" elasticsearch:7.17.9
|
docker run -d -p 9200:9200 -e "discovery.type=single-node" elasticsearch:7.17.9
|
||||||
```
|
```
|
||||||
|
1. *(or)* Run using docker-compose:
|
||||||
|
```yaml
|
||||||
|
elasticsearch:
|
||||||
|
image: docker.elastic.co/elasticsearch/elasticsearch:7.17.9
|
||||||
|
environment:
|
||||||
|
- discovery.type=single-node
|
||||||
|
- "ES_JAVA_OPTS=-Xms1G -Xmx2G"
|
||||||
|
```
|
||||||
|
1. Download sist2 executable
|
||||||
|
1. Download the [latest sist2 release](https://github.com/simon987/sist2/releases).
|
||||||
|
Select the file corresponding to your CPU architecture and mark the binary as executable with `chmod +x` *
|
||||||
|
2. *(or)* Download a [development snapshot](https://files.simon987.net/.gate/sist2/simon987_sist2/) *(Not
|
||||||
|
recommended!)*
|
||||||
|
3. *(or)* `docker pull simon987/sist2:2.12.1-x64-linux`
|
||||||
|
|
||||||
2. Download the [latest sist2 release](https://github.com/simon987/sist2/releases).
|
1. See [Usage guide](docs/USAGE.md)
|
||||||
Select the file corresponding to your CPU architecture and mark the binary as executable with `chmod +x`.
|
|
||||||
3. See [usage guide](docs/USAGE.md) for command line usage.
|
|
||||||
|
|
||||||
Example usage:
|
\* *Windows users*: **sist2** runs under [WSL](https://en.wikipedia.org/wiki/Windows_Subsystem_for_Linux)
|
||||||
|
|
||||||
1. Scan a directory: `sist2 scan ~/Documents --output ./documents.sist2`
|
## Example usage
|
||||||
2. Push index to Elasticsearch: `sist2 index ./documents.sist2`
|
|
||||||
3. Start web interface: `sist2 web ./documents.sist2`
|
See [Usage guide](docs/USAGE.md) for more details
|
||||||
|
|
||||||
|
1. Scan a directory: `sist2 scan ~/Documents -o ./docs_idx`
|
||||||
|
1. Push index to Elasticsearch: `sist2 index ./docs_idx`
|
||||||
|
1. Start web interface: `sist2 web ./docs_idx`
|
||||||
|
|
||||||
## Format support
|
## Format support
|
||||||
|
|
||||||
@@ -95,7 +82,7 @@ Example usage:
|
|||||||
| tar, zip, rar, 7z, ar ... | Libarchive | yes\* | - | no |
|
| tar, zip, rar, 7z, ar ... | Libarchive | yes\* | - | no |
|
||||||
| docx, xlsx, pptx | [libscan](https://github.com/simon987/sist2/tree/master/third-party/libscan) | yes | if embedded | creator, modified_by, title |
|
| docx, xlsx, pptx | [libscan](https://github.com/simon987/sist2/tree/master/third-party/libscan) | yes | if embedded | creator, modified_by, title |
|
||||||
| doc (MS Word 97-2003) | antiword | yes | no | author, title |
|
| doc (MS Word 97-2003) | antiword | yes | no | author, title |
|
||||||
| mobi, azw, azw3 | libmobi | yes | yes | author, title |
|
| mobi, azw, azw3 | libmobi | yes | no | author, title |
|
||||||
| wpd (WordPerfect) | libwpd | yes | no | *planned* |
|
| wpd (WordPerfect) | libwpd | yes | no | *planned* |
|
||||||
| json, jsonl, ndjson | [libscan](https://github.com/simon987/sist2/tree/master/third-party/libscan) | yes | - | - |
|
| json, jsonl, ndjson | [libscan](https://github.com/simon987/sist2/tree/master/third-party/libscan) | yes | - | - |
|
||||||
|
|
||||||
@@ -136,44 +123,20 @@ sist2 scan --ocr-images --ocr-lang eng ~/Images/Screenshots/
|
|||||||
sist2 scan --ocr-ebooks --ocr-images --ocr-lang eng+chi_sim ~/Chinese-Bilingual/
|
sist2 scan --ocr-ebooks --ocr-images --ocr-lang eng+chi_sim ~/Chinese-Bilingual/
|
||||||
```
|
```
|
||||||
|
|
||||||
### NER
|
|
||||||
|
|
||||||
sist2 v3.0.4+ supports named-entity recognition (NER). Simply add a supported repository URL to
|
|
||||||
**Configuration** > **Machine learning options** > **Model repositories**
|
|
||||||
to enable it.
|
|
||||||
|
|
||||||
The text processing is done in your browser, no data is sent to any third-party services.
|
|
||||||
See [simon987/sist2-ner-models](https://github.com/simon987/sist2-ner-models) for more details.
|
|
||||||
|
|
||||||
#### List of available repositories:
|
|
||||||
|
|
||||||
| URL | Maintainer | Purpose |
|
|
||||||
|---------------------------------------------------------------------------------------------------------|-----------------------------------------|---------|
|
|
||||||
| [simon987/sist2-ner-models](https://raw.githubusercontent.com/simon987/sist2-ner-models/main/repo.json) | [simon987](https://github.com/simon987) | General |
|
|
||||||
|
|
||||||
|
|
||||||
<details>
|
|
||||||
<summary>Screenshot</summary>
|
|
||||||
|
|
||||||

|
|
||||||
|
|
||||||
</details>
|
|
||||||
|
|
||||||
## Build from source
|
## Build from source
|
||||||
|
|
||||||
You can compile **sist2** by yourself if you don't want to use the pre-compiled binaries
|
You can compile **sist2** by yourself if you don't want to use the pre-compiled binaries
|
||||||
|
|
||||||
### Using docker
|
### With docker (recommended)
|
||||||
|
|
||||||
```bash
|
```bash
|
||||||
git clone --recursive https://github.com/simon987/sist2/
|
git clone --recursive https://github.com/simon987/sist2/
|
||||||
cd sist2
|
cd sist2
|
||||||
docker build . -t my-sist2-image
|
docker build . -f ./Dockerfile -t my-sist2-image
|
||||||
# Copy sist2 executable from docker image
|
|
||||||
docker run --rm --entrypoint cat my-sist2-image /root/sist2 > sist2-x64-linux
|
docker run --rm --entrypoint cat my-sist2-image /root/sist2 > sist2-x64-linux
|
||||||
```
|
```
|
||||||
|
|
||||||
### Using a linux computer
|
### On a linux computer
|
||||||
|
|
||||||
1. Install compile-time dependencies
|
1. Install compile-time dependencies
|
||||||
|
|
||||||
@@ -181,14 +144,15 @@ docker run --rm --entrypoint cat my-sist2-image /root/sist2 > sist2-x64-linux
|
|||||||
apt install gcc g++ python3 yasm ragel automake autotools-dev wget libtool libssl-dev curl zip unzip tar xorg-dev libglu1-mesa-dev libxcursor-dev libxml2-dev libxinerama-dev gettext nasm git nodejs
|
apt install gcc g++ python3 yasm ragel automake autotools-dev wget libtool libssl-dev curl zip unzip tar xorg-dev libglu1-mesa-dev libxcursor-dev libxml2-dev libxinerama-dev gettext nasm git nodejs
|
||||||
```
|
```
|
||||||
|
|
||||||
2. Install vcpkg using my fork: https://github.com/simon987/vcpkg
|
1. Apply vcpkg patches, as per [sist2-build](https://github.com/simon987/sist2-build) Dockerfile
|
||||||
3. Install vcpkg dependencies
|
|
||||||
|
1. Install vcpkg dependencies
|
||||||
|
|
||||||
```bash
|
```bash
|
||||||
vcpkg install curl[core,openssl] sqlite3 cpp-jwt pcre cjson brotli libarchive[core,bzip2,libxml2,lz4,lzma,lzo] pthread tesseract libxml2 libmupdf gtest mongoose libmagic libraw gumbo ffmpeg[core,avcodec,avformat,swscale,swresample]
|
vcpkg install curl[core,openssl] sqlite3 cpp-jwt pcre cjson brotli libarchive[core,bzip2,libxml2,lz4,lzma,lzo] pthread tesseract libxml2 libmupdf gtest mongoose libmagic libraw gumbo ffmpeg[core,avcodec,avformat,swscale,swresample]
|
||||||
```
|
```
|
||||||
|
|
||||||
4. Build
|
1. Build
|
||||||
```bash
|
```bash
|
||||||
git clone --recursive https://github.com/simon987/sist2/
|
git clone --recursive https://github.com/simon987/sist2/
|
||||||
(cd sist2-vue; npm install; npm run build)
|
(cd sist2-vue; npm install; npm run build)
|
||||||
|
|||||||
231
docs/USAGE.md
231
docs/USAGE.md
@@ -1,64 +1,78 @@
|
|||||||
# Usage
|
# Usage
|
||||||
|
|
||||||
|
*More examples (specifically with docker/compose) are in progress*
|
||||||
|
|
||||||
|
* [scan](#scan)
|
||||||
|
* [options](#scan-options)
|
||||||
|
* [examples](#scan-examples)
|
||||||
|
* [index format](#index-format)
|
||||||
|
* [index](#index)
|
||||||
|
* [options](#index-options)
|
||||||
|
* [examples](#index-examples)
|
||||||
|
* [web](#web)
|
||||||
|
* [options](#web-options)
|
||||||
|
* [examples](#web-examples)
|
||||||
|
* [rewrite_url](#rewrite_url)
|
||||||
|
* [elasticsearch](#elasticsearch)
|
||||||
|
* [exec-script](#exec-script)
|
||||||
|
* [tagging](#tagging)
|
||||||
|
* [sidecar files](#sidecar-files)
|
||||||
|
|
||||||
```
|
```
|
||||||
Usage: sist2 scan [OPTION]... PATH
|
Usage: sist2 scan [OPTION]... PATH
|
||||||
or: sist2 index [OPTION]... INDEX
|
or: sist2 index [OPTION]... INDEX
|
||||||
or: sist2 web [OPTION]... INDEX...
|
or: sist2 web [OPTION]... INDEX...
|
||||||
or: sist2 exec-script [OPTION]... INDEX
|
or: sist2 exec-script [OPTION]... INDEX
|
||||||
|
|
||||||
Lightning-fast file system indexer and search tool.
|
Lightning-fast file system indexer and search tool.
|
||||||
|
|
||||||
-h, --help show this help message and exit
|
-h, --help show this help message and exit
|
||||||
-v, --version Print version and exit.
|
-v, --version Show version and exit
|
||||||
--verbose Turn on logging.
|
--verbose Turn on logging
|
||||||
--very-verbose Turn on debug messages.
|
--very-verbose Turn on debug messages
|
||||||
--json-logs Output logs in JSON format.
|
|
||||||
|
|
||||||
Scan options
|
Scan options
|
||||||
-t, --threads=<int> Number of threads. DEFAULT: 1
|
-t, --threads=<int> Number of threads. DEFAULT=1
|
||||||
-q, --thumbnail-quality=<int> Thumbnail quality, on a scale of 2 to 31, 2 being the best. DEFAULT: 2
|
--mem-throttle=<int> Total memory threshold in MiB for scan throttling. DEFAULT=0
|
||||||
--thumbnail-size=<int> Thumbnail size, in pixels. DEFAULT: 552
|
-q, --thumbnail-quality=<int> Thumbnail quality, on a scale of 2 to 31, 2 being the best. DEFAULT=2
|
||||||
--thumbnail-count=<int> Number of thumbnails to generate. Set a value > 1 to create video previews, set to 0 to disable thumbnails. DEFAULT: 1
|
--thumbnail-size=<int> Thumbnail size, in pixels. DEFAULT=500
|
||||||
--content-size=<int> Number of bytes to be extracted from text documents. Set to 0 to disable. DEFAULT: 32768
|
--thumbnail-count=<int> Number of thumbnails to generate. Set a value > 1 to create video previews, set to 0 to disable thumbnails. DEFAULT=1
|
||||||
-o, --output=<str> Output index file path. DEFAULT: index.sist2
|
--content-size=<int> Number of bytes to be extracted from text documents. Set to 0 to disable. DEFAULT=32768
|
||||||
--incremental If the output file path exists, only scan new or modified files.
|
--incremental=<str> Reuse an existing index and only scan modified files.
|
||||||
--optimize-index Defragment index file after scan to reduce its file size.
|
-o, --output=<str> Output directory. DEFAULT=index.sist2/
|
||||||
--rewrite-url=<str> Serve files from this url instead of from disk.
|
--rewrite-url=<str> Serve files from this url instead of from disk.
|
||||||
--name=<str> Index display name. DEFAULT: index
|
--name=<str> Index display name. DEFAULT: (name of the directory)
|
||||||
--depth=<int> Scan up to DEPTH subdirectories deep. Use 0 to only scan files in PATH. DEFAULT: -1
|
--depth=<int> Scan up to DEPTH subdirectories deep. Use 0 to only scan files in PATH. DEFAULT: -1
|
||||||
--archive=<str> Archive file mode (skip|list|shallow|recurse). skip: don't scan, list: only save file names as text, shallow: don't scan archives inside archives. DEFAULT: recurse
|
--archive=<str> Archive file mode (skip|list|shallow|recurse). skip: Don't parse, list: only get file names as text, shallow: Don't parse archives inside archives. DEFAULT: recurse
|
||||||
--archive-passphrase=<str> Passphrase for encrypted archive files
|
--archive-passphrase=<str> Passphrase for encrypted archive files
|
||||||
--ocr-lang=<str> Tesseract language (use 'tesseract --list-langs' to see which are installed on your machine)
|
--ocr-lang=<str> Tesseract language (use 'tesseract --list-langs' to see which are installed on your machine)
|
||||||
--ocr-images Enable OCR'ing of image files.
|
--ocr-images Enable OCR'ing of image files.
|
||||||
--ocr-ebooks Enable OCR'ing of ebook files.
|
--ocr-ebooks Enable OCR'ing of ebook files.
|
||||||
-e, --exclude=<str> Files that match this regex will not be scanned.
|
-e, --exclude=<str> Files that match this regex will not be scanned
|
||||||
--fast Only index file names & mime type.
|
--fast Only index file names & mime type
|
||||||
--treemap-threshold=<str> Relative size threshold for treemap (see USAGE.md). DEFAULT: 0.0005
|
--treemap-threshold=<str> Relative size threshold for treemap (see USAGE.md). DEFAULT: 0.0005
|
||||||
--mem-buffer=<int> Maximum memory buffer size per thread in MiB for files inside archives (see USAGE.md). DEFAULT: 2000
|
--mem-buffer=<int> Maximum memory buffer size per thread in MiB for files inside archives (see USAGE.md). DEFAULT: 2000
|
||||||
--read-subtitles Read subtitles from media files.
|
--read-subtitles Read subtitles from media files.
|
||||||
--fast-epub Faster but less accurate EPUB parsing (no thumbnails, metadata).
|
--fast-epub Faster but less accurate EPUB parsing (no thumbnails, metadata)
|
||||||
--checksums Calculate file checksums when scanning.
|
--checksums Calculate file checksums when scanning.
|
||||||
--list-file=<str> Specify a list of newline-delimited paths to be scanned instead of normal directory traversal. Use '-' to read from stdin.
|
--list-file=<str> Specify a list of newline-delimited paths to be scanned instead of normal directory traversal. Use '-' to read from stdin.
|
||||||
|
|
||||||
Index options
|
Index options
|
||||||
-t, --threads=<int> Number of threads. DEFAULT: 1
|
-t, --threads=<int> Number of threads. DEFAULT=1
|
||||||
--es-url=<str> Elasticsearch url with port. DEFAULT: http://localhost:9200
|
--es-url=<str> Elasticsearch url with port. DEFAULT=http://localhost:9200
|
||||||
--es-insecure-ssl Do not verify SSL connections to Elasticsearch.
|
--es-index=<str> Elasticsearch index name. DEFAULT=sist2
|
||||||
--es-index=<str> Elasticsearch index name. DEFAULT: sist2
|
-p, --print Just print JSON documents to stdout.
|
||||||
-p, --print Print JSON documents to stdout instead of indexing to elasticsearch.
|
--incremental-index Conduct incremental indexing, assumes that the old index is already digested by Elasticsearch.
|
||||||
--incremental-index Conduct incremental indexing. Assumes that the old index is already ingested in Elasticsearch.
|
|
||||||
--script-file=<str> Path to user script.
|
--script-file=<str> Path to user script.
|
||||||
--mappings-file=<str> Path to Elasticsearch mappings.
|
--mappings-file=<str> Path to Elasticsearch mappings.
|
||||||
--settings-file=<str> Path to Elasticsearch settings.
|
--settings-file=<str> Path to Elasticsearch settings.
|
||||||
--async-script Execute user script asynchronously.
|
--async-script Execute user script asynchronously.
|
||||||
--batch-size=<int> Index batch size. DEFAULT: 70
|
--batch-size=<int> Index batch size. DEFAULT: 100
|
||||||
-f, --force-reset Reset Elasticsearch mappings and settings.
|
-f, --force-reset Reset Elasticsearch mappings and settings. (You must use this option the first time you use the index command)
|
||||||
|
|
||||||
Web options
|
Web options
|
||||||
--es-url=<str> Elasticsearch url. DEFAULT: http://localhost:9200
|
--es-url=<str> Elasticsearch url. DEFAULT=http://localhost:9200
|
||||||
--es-insecure-ssl Do not verify SSL connections to Elasticsearch.
|
--es-index=<str> Elasticsearch index name. DEFAULT=sist2
|
||||||
--es-index=<str> Elasticsearch index name. DEFAULT: sist2
|
--bind=<str> Listen on this address. DEFAULT=localhost:4090
|
||||||
--bind=<str> Listen for connections on this address. DEFAULT: localhost:4090
|
|
||||||
--auth=<str> Basic auth in user:password format
|
--auth=<str> Basic auth in user:password format
|
||||||
--auth0-audience=<str> API audience/identifier
|
--auth0-audience=<str> API audience/identifier
|
||||||
--auth0-domain=<str> Application domain
|
--auth0-domain=<str> Application domain
|
||||||
@@ -70,15 +84,77 @@ Web options
|
|||||||
--lang=<str> Default UI language. Can be changed by the user
|
--lang=<str> Default UI language. Can be changed by the user
|
||||||
|
|
||||||
Exec-script options
|
Exec-script options
|
||||||
--es-url=<str> Elasticsearch url. DEFAULT: http://localhost:9200
|
--es-url=<str> Elasticsearch url. DEFAULT=http://localhost:9200
|
||||||
--es-insecure-ssl Do not verify SSL connections to Elasticsearch.
|
--es-index=<str> Elasticsearch index name. DEFAULT=sist2
|
||||||
--es-index=<str> Elasticsearch index name. DEFAULT: sist2
|
|
||||||
--script-file=<str> Path to user script.
|
--script-file=<str> Path to user script.
|
||||||
--async-script Execute user script asynchronously.
|
--async-script Execute user script asynchronously.
|
||||||
|
|
||||||
Made by simon987 <me@simon987.net>. Released under GPL-3.0
|
Made by simon987 <me@simon987.net>. Released under GPL-3.0
|
||||||
```
|
```
|
||||||
|
|
||||||
|
## Scan
|
||||||
|
|
||||||
|
### Scan options
|
||||||
|
|
||||||
|
* `-t, --threads`
|
||||||
|
Number of threads for file parsing. **Do not set a number higher than `$(nproc)` or `$(Get-CimInstance Win32_ComputerSystem).NumberOfLogicalProcessors` in Windows!**
|
||||||
|
* `--mem-throttle`
|
||||||
|
Total memory threshold in MiB for scan throttling. Worker threads will not start a new parse job
|
||||||
|
until the total memory usage of sist2 is below this threshold. Set to 0 to disable. DEFAULT=0
|
||||||
|
* `-q, --thumbnail-quality`
|
||||||
|
Thumbnail quality, on a scale of 2 to 32, 2 being the best. See section below for a rough estimate of thumbnail database size
|
||||||
|
* `--thumbnail-size`
|
||||||
|
Thumbnail size in pixels.
|
||||||
|
* `--thumbnail-count`
|
||||||
|
Maximum number of thumbnails to generate. When set to a value >= 2, thumbnails for video previews
|
||||||
|
will be generated. The actual number of thumbnails generated depends on the length of the video (maximum 1 image
|
||||||
|
every ~7s). Set to 0 to completely disable thumbnails.
|
||||||
|
* `--content-size`
|
||||||
|
Number of bytes of text to be extracted from the content of files (plain text, PDFs etc.).
|
||||||
|
Repeated whitespace and special characters do not count toward this limit.
|
||||||
|
Set to 0 to completely disable content parsing.
|
||||||
|
* `--incremental`
|
||||||
|
Specify an existing index. Information about files in this index that were not modified (based on *mtime* attribute)
|
||||||
|
will be copied to the new index and will not be parsed again.
|
||||||
|
* `-o, --output` Output directory.
|
||||||
|
* `--rewrite-url` Set the `rewrite_url` option for the web module (See [rewrite_url](#rewrite_url))
|
||||||
|
* `--name` Set the `name` option for the web module
|
||||||
|
* `--depth` Maximum scan dept. Set to 0 only scan files directly in the root directory, set to -1 for infinite depth
|
||||||
|
* `--archive` Archive file mode.
|
||||||
|
* skip: Don't parse
|
||||||
|
* list: Only get file names as text
|
||||||
|
* shallow: Don't parse archives inside archives.
|
||||||
|
* recurse: Scan archives recursively (default)
|
||||||
|
* `--ocr-lang`, `--ocr-ebooks`, `--ocr-images` See [OCR](../README.md#OCR)
|
||||||
|
* `-e, --exclude` Regex pattern to exclude files. A file is excluded if the pattern matches any
|
||||||
|
part of the full absolute path.
|
||||||
|
|
||||||
|
Examples:
|
||||||
|
* `-e ".*\.ttf"`: Ignore ttf files
|
||||||
|
* `-e ".*\.(ttf|rar)"`: Ignore ttf and rar files
|
||||||
|
* `-e "^/mnt/backups/"`: Ignore all files in the `/mnt/backups/` directory
|
||||||
|
* `-e "^/mnt/Data[12]/"`: Ignore all files in the `/mnt/Data1/` and `/mnt/Data2/` directory
|
||||||
|
* `-e "(^/usr/)|(^/var/)|(^/media/DRIVE-A/tmp/)|(^/media/DRIVE-B/Trash/)"` Exclude the
|
||||||
|
`/usr`, `/var`, `/media/DRIVE-A/tmp`, `/media/DRIVE-B/Trash` directories
|
||||||
|
* `--fast` Only index file names and mime type
|
||||||
|
* `--treemap-threshold` Directories smaller than (`treemap-threshold` * `<total size of the index>`)
|
||||||
|
will not be considered for the disk utilisation visualization; their size will be added to
|
||||||
|
the parent directory. If the parent directory is still smaller than the threshold, it will also be "merged upwards"
|
||||||
|
and so on.
|
||||||
|
|
||||||
|
In effect, smaller `treemap-threshold` values will yield a more detailed
|
||||||
|
(but also a more cluttered and harder to read) visualization.
|
||||||
|
|
||||||
|
* `--mem-buffer` Maximum memory buffer size in MiB (per thread) for files inside archives. Media files
|
||||||
|
larger than this number will be read sequentially and no *seek* operations will be supported.
|
||||||
|
|
||||||
|
To check if a media file can be parsed without *seek*, execute `cat file.mp4 | ffprobe -`
|
||||||
|
* `--read-subtitles` When enabled, will attempt to read the subtitles stream from media files.
|
||||||
|
* `--fast-epub` Much faster but less accurate EPUB parsing. When enabled, sist2 will use a simple HTML parser to read epub files instead of the MuPDF library. No thumbnails are generated and author/title metadata are not parsed.
|
||||||
|
* `--checksums` Calculate file checksums (SHA1) when scanning files. This option does not cause any additional read
|
||||||
|
operations. Checksums are not calculated for all file types, unless the file is inside an archive. When enabled, duplicate
|
||||||
|
files are hidden in the web UI (this behaviour can be toggled in the Configuration page).
|
||||||
|
|
||||||
|
|
||||||
#### Thumbnail database size estimation
|
#### Thumbnail database size estimation
|
||||||
|
|
||||||
See chart below for rough estimate of thumbnail size vs. thumbnail size & quality arguments:
|
See chart below for rough estimate of thumbnail size vs. thumbnail size & quality arguments:
|
||||||
@@ -88,6 +164,8 @@ that is about `8000000 * 36kB = 288GB`.
|
|||||||
|
|
||||||

|

|
||||||
|
|
||||||
|
// TODO: add note about LMDB page size 4096
|
||||||
|
|
||||||
### Scan examples
|
### Scan examples
|
||||||
|
|
||||||
Simple scan
|
Simple scan
|
||||||
@@ -97,19 +175,82 @@ sist2 scan ~/Documents
|
|||||||
sist2 scan \
|
sist2 scan \
|
||||||
--threads 4 --content-size 16000000 --thumbnail-quality 2 --archive shallow \
|
--threads 4 --content-size 16000000 --thumbnail-quality 2 --archive shallow \
|
||||||
--name "My Documents" --rewrite-url "http://nas.domain.local/My Documents/" \
|
--name "My Documents" --rewrite-url "http://nas.domain.local/My Documents/" \
|
||||||
~/Documents -o ./documents.sist2
|
~/Documents -o ./documents.idx/
|
||||||
```
|
```
|
||||||
|
|
||||||
Incremental scan
|
Incremental scan
|
||||||
|
|
||||||
If the index file does not exist, `--incremental` has no effect.
|
|
||||||
```bash
|
|
||||||
sist scan ~/Documents -o ./documents.sist2
|
|
||||||
sist scan ~/Documents -o ./documents.sist2 --incremental
|
|
||||||
# or
|
|
||||||
sist scan ~/Documents -o ./documents.sist2 --incremental
|
|
||||||
sist scan ~/Documents -o ./documents.sist2 --incremental
|
|
||||||
```
|
```
|
||||||
|
sist2 scan --incremental ./orig_idx/ -o ./updated_idx/ ~/Documents
|
||||||
|
```
|
||||||
|
|
||||||
|
### Index format
|
||||||
|
|
||||||
|
A typical `ndjson` type index structure looks like this:
|
||||||
|
```
|
||||||
|
documents.idx/
|
||||||
|
├── descriptor.json
|
||||||
|
├── _index_main.ndjson.zst
|
||||||
|
├── treemap.csv
|
||||||
|
├── agg_mime.csv
|
||||||
|
├── agg_date.csv
|
||||||
|
├── add_size.csv
|
||||||
|
├── thumbs/
|
||||||
|
| ├── data.mdb
|
||||||
|
| └── lock.mdb
|
||||||
|
├── tags/
|
||||||
|
| ├── data.mdb
|
||||||
|
| └── lock.mdb
|
||||||
|
└── meta/
|
||||||
|
├── data.mdb
|
||||||
|
└── lock.mdb
|
||||||
|
```
|
||||||
|
|
||||||
|
The `_index_*.ndjson.zst` files contain the document data in JSON format, in a compressed newline-delemited file.
|
||||||
|
|
||||||
|
The `thumbs/` folder is a [LMDB](https://en.wikipedia.org/wiki/Lightning_Memory-Mapped_Database)
|
||||||
|
database containing the thumbnails.
|
||||||
|
|
||||||
|
The `descriptor.json` file contains general information about the index. The
|
||||||
|
following fields are safe to modify manually: `root`, `name`, [rewrite_url](#rewrite_url) and `timestamp`.
|
||||||
|
|
||||||
|
The `.csv` are pre-computed aggregations necessary for the stats page.
|
||||||
|
|
||||||
|
*thumbs/*:
|
||||||
|
|
||||||
|
LMDB key-value store. Keys are **binary** 16-byte md5 hash* (`_id` field)
|
||||||
|
and values are raw image bytes.
|
||||||
|
|
||||||
|
*\* Hash is calculated from the full path of the file, including the extension, relative to the index root*
|
||||||
|
|
||||||
|
|
||||||
|
## Index
|
||||||
|
### Index options
|
||||||
|
* `--es-url`
|
||||||
|
Elasticsearch url and port. If you are using docker, make sure that both containers are on the
|
||||||
|
same network.
|
||||||
|
* `--es-index`
|
||||||
|
Elasticsearch index name. DEFAULT=sist2
|
||||||
|
* `-p, --print`
|
||||||
|
Print index in JSON format to stdout.
|
||||||
|
* `--incremental-index`
|
||||||
|
Conduct incremental indexing. Assumes that the old index is already ingested in Elasticsearch.
|
||||||
|
Only the new changes since the last scan will be sent.
|
||||||
|
* `--script-file`
|
||||||
|
Path to user script. See [Scripting](scripting.md).
|
||||||
|
* `--mappings-file`
|
||||||
|
Path to custom Elasticsearch mappings. If none is specified, [the bundled mappings](https://github.com/simon987/sist2/tree/master/schema) will be used.
|
||||||
|
* `--settings-file`
|
||||||
|
Path to custom Elasticsearch settings. *(See above)*
|
||||||
|
* `--async-script`
|
||||||
|
Use `wait_for_completion=false` elasticsearch option while executing user script.
|
||||||
|
(See [Elasticsearch documentation](https://www.elastic.co/guide/en/elasticsearch/reference/current/tasks.html))
|
||||||
|
* `--batch-size=<int>`
|
||||||
|
Index batch size. Indexing is generally faster with larger batches, but payloads that
|
||||||
|
are too large will fail and additional overhead for retrying with smaller sizes may slow
|
||||||
|
down the process.
|
||||||
|
* `-f, --force-reset`
|
||||||
|
Reset Elasticsearch mappings and settings.
|
||||||
|
* `-t, --threads` Number of threads to use. Ideally, choose a number equal to the number of logical cores of the machine hosting Elasticsearch.
|
||||||
|
|
||||||
### Index examples
|
### Index examples
|
||||||
|
|
||||||
@@ -239,8 +380,8 @@ The sidecar file must have exactly the same file path and the `.s2meta` suffix.
|
|||||||
```
|
```
|
||||||
|
|
||||||
```
|
```
|
||||||
sist2 scan ~/Documents -o ./docs.sist2
|
sist2 scan ~/Documents -o ./docs.idx
|
||||||
sist2 index ./docs.sist2
|
sist2 index ./docs.idx
|
||||||
```
|
```
|
||||||
|
|
||||||
*NOTE*: It is technically possible to overwrite the `tag` value using sidecar files, however,
|
*NOTE*: It is technically possible to overwrite the `tag` value using sidecar files, however,
|
||||||
|
|||||||
BIN
docs/ner.png
BIN
docs/ner.png
Binary file not shown.
|
Before Width: | Height: | Size: 448 KiB |
BIN
docs/sist2.gif
BIN
docs/sist2.gif
Binary file not shown.
|
Before Width: | Height: | Size: 3.7 MiB |
BIN
docs/sist2.png
Normal file
BIN
docs/sist2.png
Normal file
Binary file not shown.
|
After Width: | Height: | Size: 1011 KiB |
@@ -7,7 +7,7 @@ git submodule update --init --recursive
|
|||||||
mkdir build
|
mkdir build
|
||||||
(
|
(
|
||||||
cd build
|
cd build
|
||||||
cmake -DSIST_PLATFORM=x64_linux -DSIST_DEBUG_INFO=on -DSIST_DEBUG=off -DBUILD_TESTS=off -DCMAKE_TOOLCHAIN_FILE="${VCPKG_ROOT}/scripts/buildsystems/vcpkg.cmake" ..
|
cmake -DSIST_PLATFORM=x64_linux -DSIST_DEBUG=off -DBUILD_TESTS=off -DCMAKE_TOOLCHAIN_FILE="${VCPKG_ROOT}/scripts/buildsystems/vcpkg.cmake" ..
|
||||||
make -j $(nproc)
|
make -j $(nproc)
|
||||||
strip sist2
|
strip sist2
|
||||||
./sist2 -v > VERSION
|
./sist2 -v > VERSION
|
||||||
@@ -17,7 +17,7 @@ mv build/sist2 sist2-x64-linux
|
|||||||
(
|
(
|
||||||
cd build
|
cd build
|
||||||
rm -rf CMakeFiles CMakeCache.txt
|
rm -rf CMakeFiles CMakeCache.txt
|
||||||
cmake -DSIST_PLATFORM=x64_linux -DSIST_DEBUG_INFO=on -DSIST_DEBUG=on -DBUILD_TESTS=off -DCMAKE_TOOLCHAIN_FILE="${VCPKG_ROOT}/scripts/buildsystems/vcpkg.cmake" ..
|
cmake -DSIST_PLATFORM=x64_linux -DSIST_DEBUG=on -DBUILD_TESTS=off -DCMAKE_TOOLCHAIN_FILE="${VCPKG_ROOT}/scripts/buildsystems/vcpkg.cmake" ..
|
||||||
make -j $(nproc)
|
make -j $(nproc)
|
||||||
)
|
)
|
||||||
mv build/sist2_debug sist2-x64-linux-debug
|
mv build/sist2_debug sist2-x64-linux-debug
|
||||||
@@ -7,7 +7,7 @@ git submodule update --init --recursive
|
|||||||
mkdir build
|
mkdir build
|
||||||
(
|
(
|
||||||
cd build
|
cd build
|
||||||
cmake -DSIST_PLATFORM=arm64_linux -DSIST_DEBUG_INFO=on -DSIST_DEBUG=off -DBUILD_TESTS=off -DCMAKE_TOOLCHAIN_FILE="${VCPKG_ROOT}/scripts/buildsystems/vcpkg.cmake" ..
|
cmake -DSIST_PLATFORM=arm64_linux -DSIST_DEBUG=off -DBUILD_TESTS=off -DCMAKE_TOOLCHAIN_FILE="${VCPKG_ROOT}/scripts/buildsystems/vcpkg.cmake" .
|
||||||
make -j $(nproc)
|
make -j $(nproc)
|
||||||
strip sist2
|
strip sist2
|
||||||
)
|
)
|
||||||
@@ -16,7 +16,7 @@ mv build/sist2 sist2-arm64-linux
|
|||||||
rm -rf CMakeFiles CMakeCache.txt
|
rm -rf CMakeFiles CMakeCache.txt
|
||||||
(
|
(
|
||||||
cd build
|
cd build
|
||||||
cmake -DSIST_PLATFORM=arm64_linux -DSIST_DEBUG_INFO=on -DSIST_DEBUG=on -DBUILD_TESTS=off -DCMAKE_TOOLCHAIN_FILE="${VCPKG_ROOT}/scripts/buildsystems/vcpkg.cmake" ..
|
cmake -DSIST_PLATFORM=arm64_linux -DSIST_DEBUG=on -DBUILD_TESTS=off -DCMAKE_TOOLCHAIN_FILE="${VCPKG_ROOT}/scripts/buildsystems/vcpkg.cmake" .
|
||||||
make -j $(nproc)
|
make -j $(nproc)
|
||||||
)
|
)
|
||||||
mv build/sist2_debug sist2-arm64-linux-debug
|
mv build/sist2_debug sist2-arm64-linux-debug
|
||||||
@@ -1,3 +1,3 @@
|
|||||||
docker run --rm -it --name "sist2-dev-es"\
|
docker run --rm -it --name "sist2-dev-es"\
|
||||||
-p 9200:9200 -p 9300:9300 -e "discovery.type=single-node" \
|
-p 9200:9200 -p 9300:9300 -e "discovery.type=single-node" \
|
||||||
-e "ES_JAVA_OPTS=-Xms8g -Xmx8g" elasticsearch:8.7.0
|
-e "ES_JAVA_OPTS=-Xms8g -Xmx8g" elasticsearch:8.1.2
|
||||||
|
|||||||
12
sist2-admin/frontend/package-lock.json
generated
12
sist2-admin/frontend/package-lock.json
generated
@@ -10491,9 +10491,9 @@
|
|||||||
"integrity": "sha1-JFNCdeKnvGvnvIZhHMFq4KVlSHE="
|
"integrity": "sha1-JFNCdeKnvGvnvIZhHMFq4KVlSHE="
|
||||||
},
|
},
|
||||||
"node_modules/webpack": {
|
"node_modules/webpack": {
|
||||||
"version": "5.78.0",
|
"version": "5.75.0",
|
||||||
"resolved": "https://registry.npmjs.org/webpack/-/webpack-5.78.0.tgz",
|
"resolved": "https://registry.npmjs.org/webpack/-/webpack-5.75.0.tgz",
|
||||||
"integrity": "sha512-gT5DP72KInmE/3azEaQrISjTvLYlSM0j1Ezhht/KLVkrqtv10JoP/RXhwmX/frrutOPuSq3o5Vq0ehR/4Vmd1g==",
|
"integrity": "sha512-piaIaoVJlqMsPtX/+3KTTO6jfvrSYgauFVdt8cr9LTHKmcq/AMd4mhzsiP7ZF/PGRNPGA8336jldh9l2Kt2ogQ==",
|
||||||
"dev": true,
|
"dev": true,
|
||||||
"dependencies": {
|
"dependencies": {
|
||||||
"@types/eslint-scope": "^3.7.3",
|
"@types/eslint-scope": "^3.7.3",
|
||||||
@@ -18719,9 +18719,9 @@
|
|||||||
"integrity": "sha1-JFNCdeKnvGvnvIZhHMFq4KVlSHE="
|
"integrity": "sha1-JFNCdeKnvGvnvIZhHMFq4KVlSHE="
|
||||||
},
|
},
|
||||||
"webpack": {
|
"webpack": {
|
||||||
"version": "5.78.0",
|
"version": "5.75.0",
|
||||||
"resolved": "https://registry.npmjs.org/webpack/-/webpack-5.78.0.tgz",
|
"resolved": "https://registry.npmjs.org/webpack/-/webpack-5.75.0.tgz",
|
||||||
"integrity": "sha512-gT5DP72KInmE/3azEaQrISjTvLYlSM0j1Ezhht/KLVkrqtv10JoP/RXhwmX/frrutOPuSq3o5Vq0ehR/4Vmd1g==",
|
"integrity": "sha512-piaIaoVJlqMsPtX/+3KTTO6jfvrSYgauFVdt8cr9LTHKmcq/AMd4mhzsiP7ZF/PGRNPGA8336jldh9l2Kt2ogQ==",
|
||||||
"dev": true,
|
"dev": true,
|
||||||
"requires": {
|
"requires": {
|
||||||
"@types/eslint-scope": "^3.7.3",
|
"@types/eslint-scope": "^3.7.3",
|
||||||
|
|||||||
@@ -1390,14 +1390,14 @@
|
|||||||
thread-loader "^3.0.0"
|
thread-loader "^3.0.0"
|
||||||
webpack "^5.54.0"
|
webpack "^5.54.0"
|
||||||
|
|
||||||
"@vue/cli-plugin-router@^5.0.8", "@vue/cli-plugin-router@~5.0.8":
|
"@vue/cli-plugin-router@~5.0.8":
|
||||||
version "5.0.8"
|
version "5.0.8"
|
||||||
resolved "https://registry.npmjs.org/@vue/cli-plugin-router/-/cli-plugin-router-5.0.8.tgz"
|
resolved "https://registry.npmjs.org/@vue/cli-plugin-router/-/cli-plugin-router-5.0.8.tgz"
|
||||||
integrity sha512-Gmv4dsGdAsWPqVijz3Ux2OS2HkMrWi1ENj2cYL75nUeL+Xj5HEstSqdtfZ0b1q9NCce+BFB6QnHfTBXc/fCvMg==
|
integrity sha512-Gmv4dsGdAsWPqVijz3Ux2OS2HkMrWi1ENj2cYL75nUeL+Xj5HEstSqdtfZ0b1q9NCce+BFB6QnHfTBXc/fCvMg==
|
||||||
dependencies:
|
dependencies:
|
||||||
"@vue/cli-shared-utils" "^5.0.8"
|
"@vue/cli-shared-utils" "^5.0.8"
|
||||||
|
|
||||||
"@vue/cli-plugin-vuex@^5.0.8", "@vue/cli-plugin-vuex@~5.0.8":
|
"@vue/cli-plugin-vuex@~5.0.8":
|
||||||
version "5.0.8"
|
version "5.0.8"
|
||||||
resolved "https://registry.npmjs.org/@vue/cli-plugin-vuex/-/cli-plugin-vuex-5.0.8.tgz"
|
resolved "https://registry.npmjs.org/@vue/cli-plugin-vuex/-/cli-plugin-vuex-5.0.8.tgz"
|
||||||
integrity sha512-HSYWPqrunRE5ZZs8kVwiY6oWcn95qf/OQabwLfprhdpFWAGtLStShjsGED2aDpSSeGAskQETrtR/5h7VqgIlBA==
|
integrity sha512-HSYWPqrunRE5ZZs8kVwiY6oWcn95qf/OQabwLfprhdpFWAGtLStShjsGED2aDpSSeGAskQETrtR/5h7VqgIlBA==
|
||||||
@@ -5492,9 +5492,9 @@ webpack-virtual-modules@^0.4.2:
|
|||||||
integrity sha512-5tyDlKLqPfMqjT3Q9TAqf2YqjwmnUleZwzJi1A5qXnlBCdj2AtOJ6wAWdglTIDOPgOiOrXeBeFcsQ8+aGQ6QbA==
|
integrity sha512-5tyDlKLqPfMqjT3Q9TAqf2YqjwmnUleZwzJi1A5qXnlBCdj2AtOJ6wAWdglTIDOPgOiOrXeBeFcsQ8+aGQ6QbA==
|
||||||
|
|
||||||
webpack@^5.54.0:
|
webpack@^5.54.0:
|
||||||
version "5.78.0"
|
version "5.75.0"
|
||||||
resolved "https://registry.yarnpkg.com/webpack/-/webpack-5.78.0.tgz#836452a12416af2a7beae906b31644cb2562f9e6"
|
resolved "https://registry.npmjs.org/webpack/-/webpack-5.75.0.tgz"
|
||||||
integrity sha512-gT5DP72KInmE/3azEaQrISjTvLYlSM0j1Ezhht/KLVkrqtv10JoP/RXhwmX/frrutOPuSq3o5Vq0ehR/4Vmd1g==
|
integrity sha512-piaIaoVJlqMsPtX/+3KTTO6jfvrSYgauFVdt8cr9LTHKmcq/AMd4mhzsiP7ZF/PGRNPGA8336jldh9l2Kt2ogQ==
|
||||||
dependencies:
|
dependencies:
|
||||||
"@types/eslint-scope" "^3.7.3"
|
"@types/eslint-scope" "^3.7.3"
|
||||||
"@types/estree" "^0.0.51"
|
"@types/estree" "^0.0.51"
|
||||||
|
|||||||
1613
sist2-vue/package-lock.json
generated
1613
sist2-vue/package-lock.json
generated
File diff suppressed because it is too large
Load Diff
@@ -9,11 +9,10 @@
|
|||||||
"dependencies": {
|
"dependencies": {
|
||||||
"@auth0/auth0-spa-js": "^2.0.2",
|
"@auth0/auth0-spa-js": "^2.0.2",
|
||||||
"@egjs/vue-infinitegrid": "3.3.0",
|
"@egjs/vue-infinitegrid": "3.3.0",
|
||||||
"@tensorflow/tfjs": "^4.4.0",
|
|
||||||
"axios": "^0.25.0",
|
"axios": "^0.25.0",
|
||||||
"bootstrap-vue": "^2.21.2",
|
"bootstrap-vue": "^2.21.2",
|
||||||
"core-js": "^3.6.5",
|
"core-js": "^3.6.5",
|
||||||
"d3": "^7.8.4",
|
"d3": "^5.6.1",
|
||||||
"date-fns": "^2.21.3",
|
"date-fns": "^2.21.3",
|
||||||
"dom-to-image": "^2.6.0",
|
"dom-to-image": "^2.6.0",
|
||||||
"fslightbox-vue": "fslightbox-vue.tgz",
|
"fslightbox-vue": "fslightbox-vue.tgz",
|
||||||
|
|||||||
@@ -19,7 +19,6 @@
|
|||||||
import NavBar from "@/components/NavBar";
|
import NavBar from "@/components/NavBar";
|
||||||
import {mapActions, mapGetters, mapMutations} from "vuex";
|
import {mapActions, mapGetters, mapMutations} from "vuex";
|
||||||
import Sist2Api from "@/Sist2Api";
|
import Sist2Api from "@/Sist2Api";
|
||||||
import ModelsRepo from "@/ml/modelsRepo";
|
|
||||||
import {setupAuth0} from "@/main";
|
import {setupAuth0} from "@/main";
|
||||||
|
|
||||||
export default {
|
export default {
|
||||||
@@ -37,17 +36,6 @@ export default {
|
|||||||
mounted() {
|
mounted() {
|
||||||
this.$store.dispatch("loadConfiguration").then(() => {
|
this.$store.dispatch("loadConfiguration").then(() => {
|
||||||
this.$root.$i18n.locale = this.$store.state.optLang;
|
this.$root.$i18n.locale = this.$store.state.optLang;
|
||||||
ModelsRepo.init(this.$store.getters.mlRepositoryList).catch(err => {
|
|
||||||
this.$bvToast.toast(
|
|
||||||
this.$t("ml.repoFetchError"),
|
|
||||||
{
|
|
||||||
title: this.$t("ml.repoFetchErrorTitle"),
|
|
||||||
noAutoHide: true,
|
|
||||||
toaster: "b-toaster-bottom-right",
|
|
||||||
headerClass: "toast-header-warning",
|
|
||||||
bodyClass: "toast-body-warning",
|
|
||||||
});
|
|
||||||
});
|
|
||||||
});
|
});
|
||||||
|
|
||||||
this.$store.subscribe((mutation) => {
|
this.$store.subscribe((mutation) => {
|
||||||
|
|||||||
@@ -361,20 +361,20 @@ class Sist2Api {
|
|||||||
});
|
});
|
||||||
}
|
}
|
||||||
|
|
||||||
getTreemapStat(indexId: string) {
|
getTreemapCsvUrl(indexId: string) {
|
||||||
return `${this.baseUrl}s/${indexId}/TMAP`;
|
return `${this.baseUrl}s/${indexId}/1`;
|
||||||
}
|
}
|
||||||
|
|
||||||
getMimeStat(indexId: string) {
|
getMimeCsvUrl(indexId: string) {
|
||||||
return `${this.baseUrl}s/${indexId}/MAGG`;
|
return `${this.baseUrl}s/${indexId}/2`;
|
||||||
}
|
}
|
||||||
|
|
||||||
getSizeStat(indexId: string) {
|
getSizeCsv(indexId: string) {
|
||||||
return `${this.baseUrl}s/${indexId}/SAGG`;
|
return `${this.baseUrl}s/${indexId}/3`;
|
||||||
}
|
}
|
||||||
|
|
||||||
getDateStat(indexId: string) {
|
getDateCsv(indexId: string) {
|
||||||
return `${this.baseUrl}s/${indexId}/DAGG`;
|
return `${this.baseUrl}s/${indexId}/4`;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|||||||
@@ -1,21 +0,0 @@
|
|||||||
<template>
|
|
||||||
<span :style="getStyle()">{{span.text}}</span>
|
|
||||||
</template>
|
|
||||||
|
|
||||||
<script>
|
|
||||||
|
|
||||||
|
|
||||||
import ModelsRepo from "@/ml/modelsRepo";
|
|
||||||
|
|
||||||
export default {
|
|
||||||
name: "AnalyzedContentSpan",
|
|
||||||
props: ["span", "text"],
|
|
||||||
methods: {
|
|
||||||
getStyle() {
|
|
||||||
return ModelsRepo.data[this.$store.getters.mlModel.name].labelStyles[this.span.label];
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
</script>
|
|
||||||
|
|
||||||
<style scoped></style>
|
|
||||||
@@ -1,75 +0,0 @@
|
|||||||
<template>
|
|
||||||
<div>
|
|
||||||
<b-card class="mb-2">
|
|
||||||
<AnalyzedContentSpan v-for="span of legend" :key="span.id" :span="span"
|
|
||||||
class="mr-2"></AnalyzedContentSpan>
|
|
||||||
</b-card>
|
|
||||||
<div class="content-div">
|
|
||||||
<AnalyzedContentSpan v-for="span of mergedSpans" :key="span.id" :span="span"></AnalyzedContentSpan>
|
|
||||||
</div>
|
|
||||||
</div>
|
|
||||||
</template>
|
|
||||||
|
|
||||||
<script>
|
|
||||||
|
|
||||||
|
|
||||||
import AnalyzedContentSpan from "@/components/AnalyzedContentSpan.vue";
|
|
||||||
import ModelsRepo from "@/ml/modelsRepo";
|
|
||||||
|
|
||||||
export default {
|
|
||||||
name: "AnalyzedContentSpanContainer",
|
|
||||||
components: {AnalyzedContentSpan},
|
|
||||||
props: ["spans", "text"],
|
|
||||||
computed: {
|
|
||||||
legend() {
|
|
||||||
return Object.entries(ModelsRepo.data[this.$store.state.mlModel.name].legend)
|
|
||||||
.map(([label, name]) => ({
|
|
||||||
text: name,
|
|
||||||
id: label,
|
|
||||||
label: label
|
|
||||||
}));
|
|
||||||
},
|
|
||||||
mergedSpans() {
|
|
||||||
const spans = this.spans;
|
|
||||||
|
|
||||||
const merged = [];
|
|
||||||
|
|
||||||
let lastLabel = null;
|
|
||||||
let fixSpace = false;
|
|
||||||
for (let i = 0; i < spans.length; i++) {
|
|
||||||
|
|
||||||
if (spans[i].label !== lastLabel) {
|
|
||||||
let start = spans[i].wordIndex;
|
|
||||||
const nextSpan = spans.slice(i + 1).find(s => s.label !== spans[i].label)
|
|
||||||
let end = nextSpan ? nextSpan.wordIndex : undefined;
|
|
||||||
|
|
||||||
if (end !== undefined && this.text[end - 1] === " ") {
|
|
||||||
end -= 1;
|
|
||||||
fixSpace = true;
|
|
||||||
}
|
|
||||||
|
|
||||||
merged.push({
|
|
||||||
text: this.text.slice(start, end),
|
|
||||||
label: spans[i].label,
|
|
||||||
id: spans[i].wordIndex
|
|
||||||
});
|
|
||||||
|
|
||||||
if (fixSpace) {
|
|
||||||
merged.push({
|
|
||||||
text: " ",
|
|
||||||
label: "O",
|
|
||||||
id: end
|
|
||||||
});
|
|
||||||
fixSpace = false;
|
|
||||||
}
|
|
||||||
lastLabel = spans[i].label;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
return merged;
|
|
||||||
},
|
|
||||||
},
|
|
||||||
}
|
|
||||||
</script>
|
|
||||||
|
|
||||||
<style scoped></style>
|
|
||||||
@@ -120,7 +120,7 @@ export default {
|
|||||||
update(indexId) {
|
update(indexId) {
|
||||||
const svg = d3.select("#date-histogram");
|
const svg = d3.select("#date-histogram");
|
||||||
|
|
||||||
d3.json(Sist2Api.getDateStat(indexId)).then(tabularData => {
|
d3.csv(Sist2Api.getDateCsv(indexId)).then(tabularData => {
|
||||||
dateHistogram(tabularData.slice(), svg, this.$t("d3.dateHistogram"));
|
dateHistogram(tabularData.slice(), svg, this.$t("d3.dateHistogram"));
|
||||||
});
|
});
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -91,7 +91,7 @@ export default {
|
|||||||
const mimeSvgCount = d3.select("#agg-mime-count");
|
const mimeSvgCount = d3.select("#agg-mime-count");
|
||||||
const fillOpacity = this.$store.state.optTheme === "black" ? 0.9 : 0.6;
|
const fillOpacity = this.$store.state.optTheme === "black" ? 0.9 : 0.6;
|
||||||
|
|
||||||
d3.json(Sist2Api.getMimeStat(indexId)).then(tabularData => {
|
d3.csv(Sist2Api.getMimeCsvUrl(indexId)).then(tabularData => {
|
||||||
mimeBarCount(tabularData.slice(), mimeSvgCount, fillOpacity, this.$t("d3.mimeCount"));
|
mimeBarCount(tabularData.slice(), mimeSvgCount, fillOpacity, this.$t("d3.mimeCount"));
|
||||||
});
|
});
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -90,7 +90,7 @@ export default {
|
|||||||
const mimeSvgSize = d3.select("#agg-mime-size");
|
const mimeSvgSize = d3.select("#agg-mime-size");
|
||||||
const fillOpacity = this.$store.state.optTheme === "black" ? 0.9 : 0.6;
|
const fillOpacity = this.$store.state.optTheme === "black" ? 0.9 : 0.6;
|
||||||
|
|
||||||
d3.json(Sist2Api.getMimeStat(indexId)).then(tabularData => {
|
d3.csv(Sist2Api.getMimeCsvUrl(indexId)).then(tabularData => {
|
||||||
mimeBarSize(tabularData.slice(), mimeSvgSize, fillOpacity, this.$t("d3.mimeSize"));
|
mimeBarSize(tabularData.slice(), mimeSvgSize, fillOpacity, this.$t("d3.mimeSize"));
|
||||||
});
|
});
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -117,7 +117,7 @@ export default {
|
|||||||
update(indexId) {
|
update(indexId) {
|
||||||
const svg = d3.select("#size-histogram");
|
const svg = d3.select("#size-histogram");
|
||||||
|
|
||||||
d3.json(Sist2Api.getSizeStat(indexId)).then(tabularData => {
|
d3.csv(Sist2Api.getSizeCsv(indexId)).then(tabularData => {
|
||||||
sizeHistogram(tabularData.slice(), svg, this.$t("d3.sizeHistogram"));
|
sizeHistogram(tabularData.slice(), svg, this.$t("d3.sizeHistogram"));
|
||||||
});
|
});
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -240,7 +240,7 @@ export default {
|
|||||||
.style("overflow", "visible")
|
.style("overflow", "visible")
|
||||||
.style("font", "10px sans-serif");
|
.style("font", "10px sans-serif");
|
||||||
|
|
||||||
d3.json(Sist2Api.getTreemapStat(indexId)).then(tabularData => {
|
d3.csv(Sist2Api.getTreemapCsvUrl(indexId)).then(tabularData => {
|
||||||
tabularData.forEach(row => {
|
tabularData.forEach(row => {
|
||||||
row.taxonomy = row.path.split("/");
|
row.taxonomy = row.path.split("/");
|
||||||
row.size = Number(row.size);
|
row.size = Number(row.size);
|
||||||
|
|||||||
@@ -1,5 +1,5 @@
|
|||||||
<template>
|
<template>
|
||||||
<b-card v-if="$store.state.sist2Info.showDebugInfo" class="mb-4 mt-4">
|
<b-card class="mb-4 mt-4">
|
||||||
<b-card-title><DebugIcon class="mr-1"></DebugIcon>{{ $t("debug") }}</b-card-title>
|
<b-card-title><DebugIcon class="mr-1"></DebugIcon>{{ $t("debug") }}</b-card-title>
|
||||||
<p v-html="$t('debugDescription')"></p>
|
<p v-html="$t('debugDescription')"></p>
|
||||||
|
|
||||||
|
|||||||
@@ -16,10 +16,6 @@ export default {
|
|||||||
props: ["doc"],
|
props: ["doc"],
|
||||||
computed: {
|
computed: {
|
||||||
featuredLineHtml() {
|
featuredLineHtml() {
|
||||||
if (this.$store.getters.optFeaturedFields === undefined) {
|
|
||||||
return "";
|
|
||||||
}
|
|
||||||
|
|
||||||
const scope = {doc: this.doc._source, humanDate: humanDate, humanFileSize: humanFileSize};
|
const scope = {doc: this.doc._source, humanDate: humanDate, humanFileSize: humanFileSize};
|
||||||
|
|
||||||
return this.$store.getters.optFeaturedFields
|
return this.$store.getters.optFeaturedFields
|
||||||
|
|||||||
@@ -1,36 +1,6 @@
|
|||||||
<template>
|
<template>
|
||||||
<Preloader v-if="loading"></Preloader>
|
<Preloader v-if="loading"></Preloader>
|
||||||
<div v-else-if="content">
|
<div v-else-if="content" class="content-div" v-html="content"></div>
|
||||||
<b-form inline class="my-2" v-if="ModelsRepo.getOptions().length > 0">
|
|
||||||
<b-checkbox class="ml-auto mr-2" :checked="optAutoAnalyze"
|
|
||||||
@input="setOptAutoAnalyze($event); $store.dispatch('updateConfiguration')">
|
|
||||||
{{ $t("ml.auto") }}
|
|
||||||
</b-checkbox>
|
|
||||||
<b-button :disabled="mlPredictionsLoading || mlLoading" @click="mlAnalyze" variant="primary"
|
|
||||||
>{{ $t("ml.analyzeText") }}
|
|
||||||
</b-button>
|
|
||||||
<b-select :disabled="mlPredictionsLoading || mlLoading" class="ml-2" v-model="mlModel">
|
|
||||||
<b-select-option :value="opt.value" v-for="opt of ModelsRepo.getOptions()">{{ opt.text }}
|
|
||||||
</b-select-option>
|
|
||||||
</b-select>
|
|
||||||
</b-form>
|
|
||||||
|
|
||||||
<b-progress v-if="mlLoading" variant="warning" show-progress :max="1" class="mb-3"
|
|
||||||
>
|
|
||||||
<b-progress-bar :value="modelLoadingProgress">
|
|
||||||
<strong>{{ ((modelLoadingProgress * modelSize) / (1024*1024)).toFixed(1) }}MB / {{
|
|
||||||
(modelSize / (1024 * 1024)).toFixed(1)
|
|
||||||
}}MB</strong>
|
|
||||||
</b-progress-bar>
|
|
||||||
</b-progress>
|
|
||||||
|
|
||||||
<b-progress v-if="mlPredictionsLoading" variant="primary" :value="modelPredictionProgress"
|
|
||||||
:max="content.length" class="mb-3"></b-progress>
|
|
||||||
|
|
||||||
<AnalyzedContentSpansContainer v-if="analyzedContentSpans.length > 0"
|
|
||||||
:spans="analyzedContentSpans" :text="rawContent"></AnalyzedContentSpansContainer>
|
|
||||||
<div v-else class="content-div" v-html="content"></div>
|
|
||||||
</div>
|
|
||||||
</template>
|
</template>
|
||||||
|
|
||||||
<script>
|
<script>
|
||||||
@@ -38,40 +8,22 @@ import Sist2Api from "@/Sist2Api";
|
|||||||
import Preloader from "@/components/Preloader";
|
import Preloader from "@/components/Preloader";
|
||||||
import Sist2Query from "@/Sist2Query";
|
import Sist2Query from "@/Sist2Query";
|
||||||
import store from "@/store";
|
import store from "@/store";
|
||||||
import BertNerModel from "@/ml/BertNerModel";
|
|
||||||
import AnalyzedContentSpansContainer from "@/components/AnalyzedContentSpanContainer.vue";
|
|
||||||
import ModelsRepo from "@/ml/modelsRepo";
|
|
||||||
import {mapGetters, mapMutations} from "vuex";
|
|
||||||
|
|
||||||
export default {
|
export default {
|
||||||
name: "LazyContentDiv",
|
name: "LazyContentDiv",
|
||||||
components: {AnalyzedContentSpansContainer, Preloader},
|
components: {Preloader},
|
||||||
props: ["docId"],
|
props: ["docId"],
|
||||||
data() {
|
data() {
|
||||||
return {
|
return {
|
||||||
ModelsRepo,
|
|
||||||
content: "",
|
content: "",
|
||||||
rawContent: "",
|
loading: true
|
||||||
loading: true,
|
|
||||||
modelLoadingProgress: 0,
|
|
||||||
modelPredictionProgress: 0,
|
|
||||||
mlPredictionsLoading: false,
|
|
||||||
mlLoading: false,
|
|
||||||
mlModel: null,
|
|
||||||
analyzedContentSpans: []
|
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
mounted() {
|
mounted() {
|
||||||
|
|
||||||
if (this.$store.getters.optMlDefaultModel) {
|
|
||||||
this.mlModel = this.$store.getters.optMlDefaultModel
|
|
||||||
} else {
|
|
||||||
this.mlModel = ModelsRepo.getDefaultModel();
|
|
||||||
}
|
|
||||||
|
|
||||||
const query = Sist2Query.searchQuery();
|
const query = Sist2Query.searchQuery();
|
||||||
|
|
||||||
if (this.$store.state.optHighlight) {
|
if (this.$store.state.optHighlight) {
|
||||||
|
|
||||||
const fields = this.$store.state.fuzzy
|
const fields = this.$store.state.fuzzy
|
||||||
? {"content.nGram": {}}
|
? {"content.nGram": {}}
|
||||||
: {content: {}};
|
: {content: {}};
|
||||||
@@ -115,28 +67,14 @@ export default {
|
|||||||
this.loading = false;
|
this.loading = false;
|
||||||
if (resp.hits.hits.length === 1) {
|
if (resp.hits.hits.length === 1) {
|
||||||
this.content = this.getContent(resp.hits.hits[0]);
|
this.content = this.getContent(resp.hits.hits[0]);
|
||||||
}
|
} else {
|
||||||
|
console.log("FIXME: could not get content")
|
||||||
if (this.optAutoAnalyze) {
|
console.log(resp)
|
||||||
this.mlAnalyze();
|
|
||||||
}
|
}
|
||||||
});
|
});
|
||||||
},
|
},
|
||||||
computed: {
|
|
||||||
...mapGetters(["optAutoAnalyze"]),
|
|
||||||
modelSize() {
|
|
||||||
const modelData = ModelsRepo.data[this.mlModel];
|
|
||||||
if (!modelData) {
|
|
||||||
return 0;
|
|
||||||
}
|
|
||||||
return modelData.size;
|
|
||||||
}
|
|
||||||
},
|
|
||||||
methods: {
|
methods: {
|
||||||
...mapMutations(["setOptAutoAnalyze"]),
|
|
||||||
getContent(doc) {
|
getContent(doc) {
|
||||||
this.rawContent = doc._source.content;
|
|
||||||
|
|
||||||
if (!doc.highlight) {
|
if (!doc.highlight) {
|
||||||
return doc._source.content;
|
return doc._source.content;
|
||||||
}
|
}
|
||||||
@@ -147,60 +85,10 @@ export default {
|
|||||||
if (doc.highlight.content) {
|
if (doc.highlight.content) {
|
||||||
return doc.highlight.content[0];
|
return doc.highlight.content[0];
|
||||||
}
|
}
|
||||||
},
|
|
||||||
async getMlModel() {
|
|
||||||
if (this.$store.getters.mlModel.name !== this.mlModel) {
|
|
||||||
this.mlLoading = true;
|
|
||||||
this.modelLoadingProgress = 0;
|
|
||||||
const modelInfo = ModelsRepo.data[this.mlModel];
|
|
||||||
|
|
||||||
const model = new BertNerModel(
|
|
||||||
modelInfo.vocabUrl,
|
|
||||||
modelInfo.modelUrl,
|
|
||||||
modelInfo.id2label,
|
|
||||||
)
|
|
||||||
|
|
||||||
await model.init(progress => this.modelLoadingProgress = progress);
|
|
||||||
this.$store.commit("setMlModel", {model, name: this.mlModel});
|
|
||||||
|
|
||||||
this.mlLoading = false;
|
|
||||||
return model
|
|
||||||
}
|
|
||||||
|
|
||||||
return this.$store.getters.mlModel.model;
|
|
||||||
},
|
|
||||||
async mlAnalyze() {
|
|
||||||
if (!this.content) {
|
|
||||||
return;
|
|
||||||
}
|
|
||||||
|
|
||||||
const modelInfo = ModelsRepo.data[this.mlModel];
|
|
||||||
if (modelInfo === undefined) {
|
|
||||||
return;
|
|
||||||
}
|
|
||||||
|
|
||||||
this.$store.commit("setOptMlDefaultModel", this.mlModel);
|
|
||||||
await this.$store.dispatch("updateConfiguration");
|
|
||||||
|
|
||||||
const model = await this.getMlModel();
|
|
||||||
|
|
||||||
this.analyzedContentSpans = [];
|
|
||||||
|
|
||||||
this.mlPredictionsLoading = true;
|
|
||||||
|
|
||||||
await model.predict(this.rawContent, results => {
|
|
||||||
results.forEach(result => result.label = modelInfo.humanLabels[result.label]);
|
|
||||||
this.analyzedContentSpans.push(...results);
|
|
||||||
this.modelPredictionProgress = results[results.length - 1].wordIndex;
|
|
||||||
});
|
|
||||||
this.mlPredictionsLoading = false;
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
</script>
|
</script>
|
||||||
|
|
||||||
<style>
|
<style scoped>
|
||||||
.progress-bar {
|
|
||||||
transition: none;
|
|
||||||
}
|
|
||||||
</style>
|
</style>
|
||||||
@@ -49,7 +49,6 @@ export default {
|
|||||||
configReset: "Reset configuration",
|
configReset: "Reset configuration",
|
||||||
searchOptions: "Search options",
|
searchOptions: "Search options",
|
||||||
treemapOptions: "Treemap options",
|
treemapOptions: "Treemap options",
|
||||||
mlOptions: "Machine learning options",
|
|
||||||
displayOptions: "Display options",
|
displayOptions: "Display options",
|
||||||
opt: {
|
opt: {
|
||||||
lang: "Language",
|
lang: "Language",
|
||||||
@@ -79,10 +78,7 @@ export default {
|
|||||||
simpleLightbox: "Disable animations in image viewer",
|
simpleLightbox: "Disable animations in image viewer",
|
||||||
showTagPickerFilter: "Display the tag filter bar",
|
showTagPickerFilter: "Display the tag filter bar",
|
||||||
featuredFields: "Featured fields Javascript template string. Will appear in the search results.",
|
featuredFields: "Featured fields Javascript template string. Will appear in the search results.",
|
||||||
featuredFieldsList: "Available variables",
|
featuredFieldsList: "Available variables"
|
||||||
autoAnalyze: "Automatically analyze text",
|
|
||||||
defaultModel: "Default model",
|
|
||||||
mlRepositories: "Model repositories (one per line)"
|
|
||||||
},
|
},
|
||||||
queryMode: {
|
queryMode: {
|
||||||
simple: "Simple",
|
simple: "Simple",
|
||||||
@@ -175,12 +171,6 @@ export default {
|
|||||||
selectedIndex: "selected index",
|
selectedIndex: "selected index",
|
||||||
selectedIndices: "selected indices",
|
selectedIndices: "selected indices",
|
||||||
},
|
},
|
||||||
ml: {
|
|
||||||
analyzeText: "Analyze",
|
|
||||||
auto: "Auto",
|
|
||||||
repoFetchError: "Failed to get list of models. Check browser console for more details.",
|
|
||||||
repoFetchErrorTitle: "Could not fetch model repositories",
|
|
||||||
}
|
|
||||||
},
|
},
|
||||||
de: {
|
de: {
|
||||||
filePage: {
|
filePage: {
|
||||||
@@ -260,8 +250,8 @@ export default {
|
|||||||
vidPreviewInterval: "Videovorschau Framedauer in ms",
|
vidPreviewInterval: "Videovorschau Framedauer in ms",
|
||||||
simpleLightbox: "Schalte Animationen im Image-Viewer ab",
|
simpleLightbox: "Schalte Animationen im Image-Viewer ab",
|
||||||
showTagPickerFilter: "Zeige die Tag-Filter-Leiste",
|
showTagPickerFilter: "Zeige die Tag-Filter-Leiste",
|
||||||
featuredFields: "Variablen, welche zusätzlich in den Suchergebnissen angezeigt werden können.",
|
featuredFields: "Ausgewählte Felder Javascript Vorlage String. Wird in den Suchergebnissen angezeigt.",
|
||||||
featuredFieldsList: "verfügbare Variablen"
|
featuredFieldsList: "Verfügbare Variablen"
|
||||||
},
|
},
|
||||||
queryMode: {
|
queryMode: {
|
||||||
simple: "Einfach",
|
simple: "Einfach",
|
||||||
@@ -343,10 +333,10 @@ export default {
|
|||||||
random: "zufällig",
|
random: "zufällig",
|
||||||
},
|
},
|
||||||
d3: {
|
d3: {
|
||||||
mimeCount: "Anzahl nach Medientyp",
|
mimeCount: "Anzahlverteilung nach Medientyp",
|
||||||
mimeSize: "Größen nach Medientyp",
|
mimeSize: "Größenverteilung nach Medientyp",
|
||||||
dateHistogram: "Änderungszeiten",
|
dateHistogram: "Verteilung der Änderungszeiten",
|
||||||
sizeHistogram: "Dateigrößen",
|
sizeHistogram: "Verteilung der Dateigrößen",
|
||||||
},
|
},
|
||||||
indexPicker: {
|
indexPicker: {
|
||||||
selectNone: "keinen auswählen",
|
selectNone: "keinen auswählen",
|
||||||
|
|||||||
@@ -1,77 +0,0 @@
|
|||||||
import BertTokenizer from "@/ml/BertTokenizer";
|
|
||||||
import * as tf from "@tensorflow/tfjs";
|
|
||||||
import axios from "axios";
|
|
||||||
|
|
||||||
export default class BertNerModel {
|
|
||||||
vocabUrl;
|
|
||||||
modelUrl;
|
|
||||||
|
|
||||||
id2label;
|
|
||||||
_tokenizer;
|
|
||||||
_model;
|
|
||||||
inputSize = 128;
|
|
||||||
|
|
||||||
_previousWordId = null;
|
|
||||||
|
|
||||||
constructor(vocabUrl, modelUrl, id2label) {
|
|
||||||
this.vocabUrl = vocabUrl;
|
|
||||||
this.modelUrl = modelUrl;
|
|
||||||
this.id2label = id2label;
|
|
||||||
}
|
|
||||||
|
|
||||||
async init(onProgress) {
|
|
||||||
await Promise.all([this.loadTokenizer(), this.loadModel(onProgress)]);
|
|
||||||
}
|
|
||||||
|
|
||||||
async loadTokenizer() {
|
|
||||||
const vocab = (await axios.get(this.vocabUrl)).data;
|
|
||||||
this._tokenizer = new BertTokenizer(vocab);
|
|
||||||
}
|
|
||||||
|
|
||||||
async loadModel(onProgress) {
|
|
||||||
this._model = await tf.loadGraphModel(this.modelUrl, {onProgress});
|
|
||||||
}
|
|
||||||
|
|
||||||
alignLabels(labels, wordIds, words) {
|
|
||||||
const result = [];
|
|
||||||
|
|
||||||
for (let i = 0; i < this.inputSize; i++) {
|
|
||||||
const label = labels[i];
|
|
||||||
const wordId = wordIds[i];
|
|
||||||
|
|
||||||
if (wordId === -1) {
|
|
||||||
continue;
|
|
||||||
}
|
|
||||||
if (wordId === this._previousWordId) {
|
|
||||||
continue;
|
|
||||||
}
|
|
||||||
|
|
||||||
result.push({
|
|
||||||
word: words[wordId].text, wordIndex: words[wordId].index, label: label
|
|
||||||
});
|
|
||||||
this._previousWordId = wordId;
|
|
||||||
}
|
|
||||||
|
|
||||||
return result;
|
|
||||||
}
|
|
||||||
|
|
||||||
async predict(text, callback) {
|
|
||||||
this._previousWordId = null;
|
|
||||||
const encoded = this._tokenizer.encodeText(text, this.inputSize)
|
|
||||||
|
|
||||||
for (let chunk of encoded.inputChunks) {
|
|
||||||
const rawResult = tf.tidy(() => this._model.execute({
|
|
||||||
input_ids: tf.tensor2d(chunk.inputIds, [1, this.inputSize], "int32"),
|
|
||||||
token_type_ids: tf.tensor2d(chunk.segmentIds, [1, this.inputSize], "int32"),
|
|
||||||
attention_mask: tf.tensor2d(chunk.inputMask, [1, this.inputSize], "int32"),
|
|
||||||
}));
|
|
||||||
|
|
||||||
const labelIds = await tf.argMax(rawResult, -1);
|
|
||||||
const labelIdsArray = await labelIds.array();
|
|
||||||
const labels = labelIdsArray[0].map(id => this.id2label[id]);
|
|
||||||
rawResult.dispose()
|
|
||||||
|
|
||||||
callback(this.alignLabels(labels, chunk.wordIds, encoded.words))
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
@@ -1,184 +0,0 @@
|
|||||||
import {zip, chunk} from "underscore";
|
|
||||||
|
|
||||||
const UNK_INDEX = 100;
|
|
||||||
const CLS_INDEX = 101;
|
|
||||||
const SEP_INDEX = 102;
|
|
||||||
const CONTINUING_SUBWORD_PREFIX = "##";
|
|
||||||
|
|
||||||
function isWhitespace(ch) {
|
|
||||||
return /\s/.test(ch);
|
|
||||||
}
|
|
||||||
|
|
||||||
function isInvalid(ch) {
|
|
||||||
return (ch.charCodeAt(0) === 0 || ch.charCodeAt(0) === 0xfffd);
|
|
||||||
}
|
|
||||||
|
|
||||||
const punctuations = '[~`!@#$%^&*(){}[];:"\'<,.>?/\\|-_+=';
|
|
||||||
|
|
||||||
/** To judge whether it's a punctuation. */
|
|
||||||
function isPunctuation(ch) {
|
|
||||||
return punctuations.indexOf(ch) !== -1;
|
|
||||||
}
|
|
||||||
|
|
||||||
export default class BertTokenizer {
|
|
||||||
vocab;
|
|
||||||
|
|
||||||
constructor(vocab) {
|
|
||||||
this.vocab = vocab;
|
|
||||||
}
|
|
||||||
|
|
||||||
tokenize(text) {
|
|
||||||
const charOriginalIndex = [];
|
|
||||||
const cleanedText = this.cleanText(text, charOriginalIndex);
|
|
||||||
const origTokens = cleanedText.split(' ');
|
|
||||||
|
|
||||||
let charCount = 0;
|
|
||||||
const tokens = origTokens.map((token) => {
|
|
||||||
token = token.toLowerCase();
|
|
||||||
const tokens = this.runSplitOnPunctuation(token, charCount, charOriginalIndex);
|
|
||||||
charCount += token.length + 1;
|
|
||||||
return tokens;
|
|
||||||
});
|
|
||||||
|
|
||||||
let flattenTokens = [];
|
|
||||||
for (let index = 0; index < tokens.length; index++) {
|
|
||||||
flattenTokens = flattenTokens.concat(tokens[index]);
|
|
||||||
}
|
|
||||||
return flattenTokens;
|
|
||||||
}
|
|
||||||
|
|
||||||
/* Performs invalid character removal and whitespace cleanup on text. */
|
|
||||||
cleanText(text, charOriginalIndex) {
|
|
||||||
text = text.replace(/\?/g, "").trim();
|
|
||||||
|
|
||||||
const stringBuilder = [];
|
|
||||||
let originalCharIndex = 0;
|
|
||||||
let newCharIndex = 0;
|
|
||||||
|
|
||||||
for (const ch of text) {
|
|
||||||
// Skip the characters that cannot be used.
|
|
||||||
if (isInvalid(ch)) {
|
|
||||||
originalCharIndex += ch.length;
|
|
||||||
continue;
|
|
||||||
}
|
|
||||||
if (isWhitespace(ch)) {
|
|
||||||
if (stringBuilder.length > 0 && stringBuilder[stringBuilder.length - 1] !== ' ') {
|
|
||||||
stringBuilder.push(' ');
|
|
||||||
charOriginalIndex[newCharIndex] = originalCharIndex;
|
|
||||||
originalCharIndex += ch.length;
|
|
||||||
} else {
|
|
||||||
originalCharIndex += ch.length;
|
|
||||||
continue;
|
|
||||||
}
|
|
||||||
} else {
|
|
||||||
stringBuilder.push(ch);
|
|
||||||
charOriginalIndex[newCharIndex] = originalCharIndex;
|
|
||||||
originalCharIndex += ch.length;
|
|
||||||
}
|
|
||||||
newCharIndex++;
|
|
||||||
}
|
|
||||||
return stringBuilder.join('');
|
|
||||||
}
|
|
||||||
|
|
||||||
/* Splits punctuation on a piece of text. */
|
|
||||||
runSplitOnPunctuation(text, count, charOriginalIndex) {
|
|
||||||
const tokens = [];
|
|
||||||
let startNewWord = true;
|
|
||||||
for (const ch of text) {
|
|
||||||
if (isPunctuation(ch)) {
|
|
||||||
tokens.push({text: ch, index: charOriginalIndex[count]});
|
|
||||||
count += ch.length;
|
|
||||||
startNewWord = true;
|
|
||||||
} else {
|
|
||||||
if (startNewWord) {
|
|
||||||
tokens.push({text: '', index: charOriginalIndex[count]});
|
|
||||||
startNewWord = false;
|
|
||||||
}
|
|
||||||
tokens[tokens.length - 1].text += ch;
|
|
||||||
count += ch.length;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
return tokens;
|
|
||||||
}
|
|
||||||
|
|
||||||
encode(words) {
|
|
||||||
let outputTokens = [];
|
|
||||||
const wordIds = [];
|
|
||||||
|
|
||||||
for (let i = 0; i < words.length; i++) {
|
|
||||||
let chars = [...words[i].text];
|
|
||||||
|
|
||||||
let isUnknown = false;
|
|
||||||
let start = 0;
|
|
||||||
let subTokens = [];
|
|
||||||
|
|
||||||
while (start < chars.length) {
|
|
||||||
let end = chars.length;
|
|
||||||
let currentSubstring = null;
|
|
||||||
while (start < end) {
|
|
||||||
let substr = chars.slice(start, end).join('');
|
|
||||||
|
|
||||||
if (start > 0) {
|
|
||||||
substr = CONTINUING_SUBWORD_PREFIX + substr;
|
|
||||||
}
|
|
||||||
if (this.vocab.includes(substr)) {
|
|
||||||
currentSubstring = this.vocab.indexOf(substr);
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
|
|
||||||
--end;
|
|
||||||
}
|
|
||||||
if (currentSubstring == null) {
|
|
||||||
isUnknown = true;
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
subTokens.push(currentSubstring);
|
|
||||||
start = end;
|
|
||||||
}
|
|
||||||
|
|
||||||
if (isUnknown) {
|
|
||||||
outputTokens.push(UNK_INDEX);
|
|
||||||
wordIds.push(i);
|
|
||||||
} else {
|
|
||||||
subTokens.forEach(tok => {
|
|
||||||
outputTokens.push(tok);
|
|
||||||
wordIds.push(i)
|
|
||||||
});
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
return {tokens: outputTokens, wordIds};
|
|
||||||
}
|
|
||||||
|
|
||||||
encodeText(inputText, inputSize) {
|
|
||||||
|
|
||||||
const tokenized = this.tokenize(inputText);
|
|
||||||
const encoded = this.encode(tokenized);
|
|
||||||
|
|
||||||
const encodedTokenChunks = chunk(encoded.tokens, inputSize - 2);
|
|
||||||
const encodedWordIdChunks = chunk(encoded.wordIds, inputSize - 2);
|
|
||||||
|
|
||||||
const chunks = [];
|
|
||||||
|
|
||||||
zip(encodedTokenChunks, encodedWordIdChunks).forEach(([tokens, wordIds]) => {
|
|
||||||
const inputIds = [CLS_INDEX, ...tokens, SEP_INDEX];
|
|
||||||
const segmentIds = Array(inputIds.length).fill(0);
|
|
||||||
const inputMask = Array(inputIds.length).fill(1);
|
|
||||||
wordIds = [-1, ...wordIds, -1];
|
|
||||||
|
|
||||||
while (inputIds.length < inputSize) {
|
|
||||||
inputIds.push(0);
|
|
||||||
inputMask.push(0);
|
|
||||||
segmentIds.push(0);
|
|
||||||
wordIds.push(-1);
|
|
||||||
}
|
|
||||||
|
|
||||||
chunks.push({inputIds, inputMask, segmentIds, wordIds})
|
|
||||||
});
|
|
||||||
|
|
||||||
return {
|
|
||||||
inputChunks: chunks,
|
|
||||||
words: tokenized
|
|
||||||
};
|
|
||||||
}
|
|
||||||
}
|
|
||||||
@@ -1,43 +0,0 @@
|
|||||||
import axios from "axios";
|
|
||||||
|
|
||||||
class ModelsRepo {
|
|
||||||
_repositories;
|
|
||||||
data = {};
|
|
||||||
|
|
||||||
async init(repositories) {
|
|
||||||
this._repositories = repositories;
|
|
||||||
|
|
||||||
const data = await Promise.all(this._repositories.map(this._loadRepository));
|
|
||||||
|
|
||||||
data.forEach(models => {
|
|
||||||
models.forEach(model => {
|
|
||||||
this.data[model.name] = model;
|
|
||||||
})
|
|
||||||
});
|
|
||||||
}
|
|
||||||
|
|
||||||
async _loadRepository(repository) {
|
|
||||||
const data = (await axios.get(repository)).data;
|
|
||||||
data.forEach(model => {
|
|
||||||
model["modelUrl"] = new URL(model["modelPath"], repository).href;
|
|
||||||
model["vocabUrl"] = new URL(model["vocabPath"], repository).href;
|
|
||||||
});
|
|
||||||
return data;
|
|
||||||
}
|
|
||||||
|
|
||||||
getOptions() {
|
|
||||||
return Object.values(this.data).map(model => ({
|
|
||||||
text: `${model.name} (${Math.round(model.size / (1024*1024))}MB)`,
|
|
||||||
value: model.name
|
|
||||||
}));
|
|
||||||
}
|
|
||||||
|
|
||||||
getDefaultModel() {
|
|
||||||
if (Object.values(this.data).length === 0) {
|
|
||||||
return null;
|
|
||||||
}
|
|
||||||
return Object.values(this.data).find(model => model.default).name;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
export default new ModelsRepo();
|
|
||||||
@@ -57,9 +57,6 @@ export default new Vuex.Store({
|
|||||||
optVidPreviewInterval: 700,
|
optVidPreviewInterval: 700,
|
||||||
optSimpleLightbox: true,
|
optSimpleLightbox: true,
|
||||||
optShowTagPickerFilter: true,
|
optShowTagPickerFilter: true,
|
||||||
optMlRepositories: "https://raw.githubusercontent.com/simon987/sist2-ner-models/main/repo.json",
|
|
||||||
optAutoAnalyze: false,
|
|
||||||
optMlDefaultModel: null,
|
|
||||||
|
|
||||||
_onLoadSelectedIndices: [] as string[],
|
_onLoadSelectedIndices: [] as string[],
|
||||||
_onLoadSelectedMimeTypes: [] as string[],
|
_onLoadSelectedMimeTypes: [] as string[],
|
||||||
@@ -89,11 +86,7 @@ export default new Vuex.Store({
|
|||||||
|
|
||||||
uiMimeMap: [] as any[],
|
uiMimeMap: [] as any[],
|
||||||
|
|
||||||
auth0Token: null,
|
auth0Token: null
|
||||||
mlModel: {
|
|
||||||
model: null,
|
|
||||||
name: null
|
|
||||||
},
|
|
||||||
},
|
},
|
||||||
mutations: {
|
mutations: {
|
||||||
setUiShowDetails: (state, val) => state.uiShowDetails = val,
|
setUiShowDetails: (state, val) => state.uiShowDetails = val,
|
||||||
@@ -179,9 +172,6 @@ export default new Vuex.Store({
|
|||||||
setOptVidPreviewInterval: (state, val) => state.optVidPreviewInterval = val,
|
setOptVidPreviewInterval: (state, val) => state.optVidPreviewInterval = val,
|
||||||
setOptSimpleLightbox: (state, val) => state.optSimpleLightbox = val,
|
setOptSimpleLightbox: (state, val) => state.optSimpleLightbox = val,
|
||||||
setOptShowTagPickerFilter: (state, val) => state.optShowTagPickerFilter = val,
|
setOptShowTagPickerFilter: (state, val) => state.optShowTagPickerFilter = val,
|
||||||
setOptAutoAnalyze: (state, val) => {state.optAutoAnalyze = val},
|
|
||||||
setOptMlRepositories: (state, val) => {state.optMlRepositories = val},
|
|
||||||
setOptMlDefaultModel: (state, val) => {state.optMlDefaultModel = val},
|
|
||||||
|
|
||||||
setOptLightboxLoadOnlyCurrent: (state, val) => state.optLightboxLoadOnlyCurrent = val,
|
setOptLightboxLoadOnlyCurrent: (state, val) => state.optLightboxLoadOnlyCurrent = val,
|
||||||
setOptLightboxSlideDuration: (state, val) => state.optLightboxSlideDuration = val,
|
setOptLightboxSlideDuration: (state, val) => state.optLightboxSlideDuration = val,
|
||||||
@@ -204,7 +194,6 @@ export default new Vuex.Store({
|
|||||||
// noop
|
// noop
|
||||||
},
|
},
|
||||||
setAuth0Token: (state, val) => state.auth0Token = val,
|
setAuth0Token: (state, val) => state.auth0Token = val,
|
||||||
setMlModel: (state, val) => state.mlModel = val,
|
|
||||||
},
|
},
|
||||||
actions: {
|
actions: {
|
||||||
setSist2Info: (store, val) => {
|
setSist2Info: (store, val) => {
|
||||||
@@ -361,7 +350,6 @@ export default new Vuex.Store({
|
|||||||
},
|
},
|
||||||
modules: {},
|
modules: {},
|
||||||
getters: {
|
getters: {
|
||||||
mlModel: (state) => state.mlModel,
|
|
||||||
seed: (state) => state.seed,
|
seed: (state) => state.seed,
|
||||||
getPathText: (state) => state.pathText,
|
getPathText: (state) => state.pathText,
|
||||||
indices: state => state.indices,
|
indices: state => state.indices,
|
||||||
@@ -428,12 +416,5 @@ export default new Vuex.Store({
|
|||||||
optSimpleLightbox: state => state.optSimpleLightbox,
|
optSimpleLightbox: state => state.optSimpleLightbox,
|
||||||
optShowTagPickerFilter: state => state.optShowTagPickerFilter,
|
optShowTagPickerFilter: state => state.optShowTagPickerFilter,
|
||||||
optFeaturedFields: state => state.optFeaturedFields,
|
optFeaturedFields: state => state.optFeaturedFields,
|
||||||
optMlRepositories: state => state.optMlRepositories,
|
|
||||||
mlRepositoryList: state => {
|
|
||||||
const repos = state.optMlRepositories.split("\n")
|
|
||||||
return repos[0] == "" ? [] : repos;
|
|
||||||
},
|
|
||||||
optMlDefaultModel: state => state.optMlDefaultModel,
|
|
||||||
optAutoAnalyze: state => state.optAutoAnalyze,
|
|
||||||
}
|
}
|
||||||
})
|
})
|
||||||
@@ -25,8 +25,7 @@
|
|||||||
<b-form-select :options="themeOptions" :value="optTheme" @input="setOptTheme"></b-form-select>
|
<b-form-select :options="themeOptions" :value="optTheme" @input="setOptTheme"></b-form-select>
|
||||||
|
|
||||||
<label>{{ $t("opt.displayMode") }}</label>
|
<label>{{ $t("opt.displayMode") }}</label>
|
||||||
<b-form-select :options="displayModeOptions" :value="optDisplay"
|
<b-form-select :options="displayModeOptions" :value="optDisplay" @input="setOptDisplay"></b-form-select>
|
||||||
@input="setOptDisplay"></b-form-select>
|
|
||||||
|
|
||||||
<label>{{ $t("opt.columns") }}</label>
|
<label>{{ $t("opt.columns") }}</label>
|
||||||
<b-form-select :options="columnsOptions" :value="optColumns" @input="setOptColumns"></b-form-select>
|
<b-form-select :options="columnsOptions" :value="optColumns" @input="setOptColumns"></b-form-select>
|
||||||
@@ -124,10 +123,7 @@
|
|||||||
}}
|
}}
|
||||||
</b-form-checkbox>
|
</b-form-checkbox>
|
||||||
|
|
||||||
<b-form-checkbox :checked="optHighlight" @input="setOptHighlight">{{
|
<b-form-checkbox :checked="optHighlight" @input="setOptHighlight">{{ $t("opt.highlight") }}</b-form-checkbox>
|
||||||
$t("opt.highlight")
|
|
||||||
}}
|
|
||||||
</b-form-checkbox>
|
|
||||||
<b-form-checkbox :checked="optTagOrOperator" @input="setOptTagOrOperator">{{
|
<b-form-checkbox :checked="optTagOrOperator" @input="setOptTagOrOperator">{{
|
||||||
$t("opt.tagOrOperator")
|
$t("opt.tagOrOperator")
|
||||||
}}
|
}}
|
||||||
@@ -152,8 +148,7 @@
|
|||||||
@input="setOptResultSize"></b-form-input>
|
@input="setOptResultSize"></b-form-input>
|
||||||
|
|
||||||
<label>{{ $t("opt.queryMode") }}</label>
|
<label>{{ $t("opt.queryMode") }}</label>
|
||||||
<b-form-select :options="queryModeOptions" :value="optQueryMode"
|
<b-form-select :options="queryModeOptions" :value="optQueryMode" @input="setOptQueryMode"></b-form-select>
|
||||||
@input="setOptQueryMode"></b-form-select>
|
|
||||||
|
|
||||||
<label>{{ $t("opt.slideDuration") }}</label>
|
<label>{{ $t("opt.slideDuration") }}</label>
|
||||||
<b-form-input :value="optLightboxSlideDuration" type="number" min="1"
|
<b-form-input :value="optLightboxSlideDuration" type="number" min="1"
|
||||||
@@ -164,17 +159,6 @@
|
|||||||
@input="setOptVidPreviewInterval"></b-form-input>
|
@input="setOptVidPreviewInterval"></b-form-input>
|
||||||
</b-card>
|
</b-card>
|
||||||
|
|
||||||
<h4 class="mt-3">{{ $t("mlOptions") }}</h4>
|
|
||||||
<b-card>
|
|
||||||
<label>{{ $t("opt.mlRepositories") }}</label>
|
|
||||||
<b-textarea rows="3" :value="optMlRepositories" @input="setOptMlRepositories"></b-textarea>
|
|
||||||
<br>
|
|
||||||
<b-form-checkbox :checked="optAutoAnalyze" @input="setOptAutoAnalyze">{{
|
|
||||||
$t("opt.autoAnalyze")
|
|
||||||
}}
|
|
||||||
</b-form-checkbox>
|
|
||||||
</b-card>
|
|
||||||
|
|
||||||
<h4 class="mt-3">{{ $t("treemapOptions") }}</h4>
|
<h4 class="mt-3">{{ $t("treemapOptions") }}</h4>
|
||||||
<b-card>
|
<b-card>
|
||||||
<label>{{ $t("opt.treemapType") }}</label>
|
<label>{{ $t("opt.treemapType") }}</label>
|
||||||
@@ -327,8 +311,6 @@ export default {
|
|||||||
"optSimpleLightbox",
|
"optSimpleLightbox",
|
||||||
"optShowTagPickerFilter",
|
"optShowTagPickerFilter",
|
||||||
"optFeaturedFields",
|
"optFeaturedFields",
|
||||||
"optMlRepositories",
|
|
||||||
"optAutoAnalyze",
|
|
||||||
]),
|
]),
|
||||||
clientWidth() {
|
clientWidth() {
|
||||||
return window.innerWidth;
|
return window.innerWidth;
|
||||||
@@ -373,8 +355,6 @@ export default {
|
|||||||
"setOptSimpleLightbox",
|
"setOptSimpleLightbox",
|
||||||
"setOptShowTagPickerFilter",
|
"setOptShowTagPickerFilter",
|
||||||
"setOptFeaturedFields",
|
"setOptFeaturedFields",
|
||||||
"setOptMlRepositories",
|
|
||||||
"setOptAutoAnalyze",
|
|
||||||
]),
|
]),
|
||||||
onResetClick() {
|
onResetClick() {
|
||||||
localStorage.removeItem("sist2_configuration");
|
localStorage.removeItem("sist2_configuration");
|
||||||
|
|||||||
@@ -7,11 +7,7 @@
|
|||||||
<Preloader></Preloader>
|
<Preloader></Preloader>
|
||||||
</b-card>
|
</b-card>
|
||||||
|
|
||||||
<b-alert v-show="!uiLoading && showEsConnectionError" show variant="danger" class="mt-2">
|
<b-card v-show="!uiLoading" id="search-panel">
|
||||||
{{ $t("toast.esConnErr") }}
|
|
||||||
</b-alert>
|
|
||||||
|
|
||||||
<b-card v-show="!uiLoading && !showEsConnectionError" id="search-panel">
|
|
||||||
<SearchBar @show-help="showHelp=true"></SearchBar>
|
<SearchBar @show-help="showHelp=true"></SearchBar>
|
||||||
<b-row>
|
<b-row>
|
||||||
<b-col style="height: 70px;" sm="6">
|
<b-col style="height: 70px;" sm="6">
|
||||||
@@ -98,8 +94,7 @@ export default Vue.extend({
|
|||||||
docChecksums: new Set(),
|
docChecksums: new Set(),
|
||||||
searchBusy: false,
|
searchBusy: false,
|
||||||
Sist2Query: Sist2Query,
|
Sist2Query: Sist2Query,
|
||||||
showHelp: false,
|
showHelp: false
|
||||||
showEsConnectionError: false
|
|
||||||
}),
|
}),
|
||||||
computed: {
|
computed: {
|
||||||
...mapGetters(["indices", "optDisplay"]),
|
...mapGetters(["indices", "optDisplay"]),
|
||||||
@@ -148,15 +143,6 @@ export default Vue.extend({
|
|||||||
this.uiLoading = false;
|
this.uiLoading = false;
|
||||||
this.search(true);
|
this.search(true);
|
||||||
});
|
});
|
||||||
}).catch(error => {
|
|
||||||
console.log(error);
|
|
||||||
|
|
||||||
if (error.response.status == 503 || error.response.status == 500) {
|
|
||||||
this.showEsConnectionError = true;
|
|
||||||
this.uiLoading = false;
|
|
||||||
} else {
|
|
||||||
this.showErrorToast();
|
|
||||||
}
|
|
||||||
});
|
});
|
||||||
},
|
},
|
||||||
methods: {
|
methods: {
|
||||||
@@ -267,20 +253,11 @@ export default Vue.extend({
|
|||||||
},
|
},
|
||||||
size: 0
|
size: 0
|
||||||
}).then(res => {
|
}).then(res => {
|
||||||
const range = {
|
return {
|
||||||
min: res.aggregations.dateMin.value,
|
min: res.aggregations.dateMin.value,
|
||||||
max: res.aggregations.dateMax.value,
|
max: res.aggregations.dateMax.value,
|
||||||
}
|
}
|
||||||
|
})
|
||||||
if (range.min == null) {
|
|
||||||
range.min = 0;
|
|
||||||
range.max = 1;
|
|
||||||
} else if (range.min == range.max) {
|
|
||||||
range.max += 1;
|
|
||||||
}
|
|
||||||
|
|
||||||
return range;
|
|
||||||
});
|
|
||||||
},
|
},
|
||||||
appendFunc() {
|
appendFunc() {
|
||||||
if (!this.$store.state.uiReachedScrollEnd && this.search && !this.searchBusy) {
|
if (!this.$store.state.uiReachedScrollEnd && this.search && !this.searchBusy) {
|
||||||
|
|||||||
@@ -83,7 +83,6 @@ void database_open(database_t *db) {
|
|||||||
LOG_DEBUGF("database.c", "Opening database %s (%d)", db->filename, db->type);
|
LOG_DEBUGF("database.c", "Opening database %s (%d)", db->filename, db->type);
|
||||||
|
|
||||||
CRASH_IF_NOT_SQLITE_OK(sqlite3_open(db->filename, &db->db));
|
CRASH_IF_NOT_SQLITE_OK(sqlite3_open(db->filename, &db->db));
|
||||||
sqlite3_busy_timeout(db->db, 1000);
|
|
||||||
|
|
||||||
CRASH_IF_NOT_SQLITE_OK(sqlite3_exec(db->db, "PRAGMA cache_size = -200000;", NULL, NULL, NULL));
|
CRASH_IF_NOT_SQLITE_OK(sqlite3_exec(db->db, "PRAGMA cache_size = -200000;", NULL, NULL, NULL));
|
||||||
CRASH_IF_NOT_SQLITE_OK(sqlite3_exec(db->db, "PRAGMA synchronous = OFF;", NULL, NULL, NULL));
|
CRASH_IF_NOT_SQLITE_OK(sqlite3_exec(db->db, "PRAGMA synchronous = OFF;", NULL, NULL, NULL));
|
||||||
@@ -329,18 +328,18 @@ database_iterator_t *database_create_document_iterator(database_t *db) {
|
|||||||
" WHEN sc.json_data IS NULL THEN"
|
" WHEN sc.json_data IS NULL THEN"
|
||||||
" CASE"
|
" CASE"
|
||||||
" WHEN t.tag IS NULL THEN"
|
" WHEN t.tag IS NULL THEN"
|
||||||
" json_set(document.json_data, '$._id', document.id, '$.size', document.size, '$.mtime', document.mtime)"
|
" document.json_data"
|
||||||
" ELSE"
|
" ELSE"
|
||||||
" json_set(document.json_data, '$._id', document.id, '$.size', document.size, '$.mtime', document.mtime, '$.tag', json_group_array(t.tag))"
|
" json_set(document.json_data, '$.tag', json_group_array(t.tag))"
|
||||||
" END"
|
" END"
|
||||||
" ELSE"
|
" ELSE"
|
||||||
" CASE"
|
" CASE"
|
||||||
" WHEN t.tag IS NULL THEN"
|
" WHEN t.tag IS NULL THEN"
|
||||||
" json_patch(json_set(document.json_data, '$._id', document.id, '$.size', document.size, '$.mtime', document.mtime), sc.json_data)"
|
" json_patch(document.json_data, sc.json_data)"
|
||||||
" ELSE"
|
" ELSE"
|
||||||
// This will overwrite any tags specified in the sidecar file!
|
// This will overwrite any tags specified in the sidecar file!
|
||||||
// TODO: concatenate the two arrays?
|
// TODO: concatenate the two arrays?
|
||||||
" json_set(json_patch(document.json_data, sc.json_data), '$._id', document.id, '$.size', document.size, '$.mtime', document.mtime, '$.tag', json_group_array(t.tag))"
|
" json_set(json_patch(document.json_data, sc.json_data), '$.tag', json_group_array(t.tag))"
|
||||||
" END"
|
" END"
|
||||||
" END"
|
" END"
|
||||||
" FROM document"
|
" FROM document"
|
||||||
@@ -582,33 +581,18 @@ void database_add_work(database_t *db, job_t *job) {
|
|||||||
ret = sqlite3_step(db->insert_parse_job_stmt);
|
ret = sqlite3_step(db->insert_parse_job_stmt);
|
||||||
|
|
||||||
if (ret == SQLITE_FULL) {
|
if (ret == SQLITE_FULL) {
|
||||||
sqlite3_reset(db->insert_parse_job_stmt);
|
|
||||||
pthread_mutex_unlock(&db->ipc_ctx->db_mutex);
|
|
||||||
usleep(1000000);
|
usleep(1000000);
|
||||||
pthread_mutex_lock(&db->ipc_ctx->db_mutex);
|
|
||||||
continue;
|
|
||||||
} else {
|
} else {
|
||||||
CRASH_IF_STMT_FAIL(ret);
|
CRASH_IF_STMT_FAIL(ret);
|
||||||
}
|
}
|
||||||
|
|
||||||
ret = sqlite3_reset(db->insert_parse_job_stmt);
|
CRASH_IF_NOT_SQLITE_OK(sqlite3_reset(db->insert_parse_job_stmt));
|
||||||
if (ret == SQLITE_FULL) {
|
} while (ret != SQLITE_DONE);
|
||||||
pthread_mutex_unlock(&db->ipc_ctx->db_mutex);
|
|
||||||
usleep(100000);
|
|
||||||
pthread_mutex_lock(&db->ipc_ctx->db_mutex);
|
|
||||||
} else if (ret != SQLITE_OK) {
|
|
||||||
LOG_FATALF("database.c", "sqlite3_reset returned error %d", ret);
|
|
||||||
}
|
|
||||||
} while (ret != SQLITE_DONE && ret != SQLITE_OK);
|
|
||||||
} else if (job->type == JOB_BULK_LINE) {
|
} else if (job->type == JOB_BULK_LINE) {
|
||||||
do {
|
do {
|
||||||
sqlite3_bind_text(db->insert_index_job_stmt, 1, job->bulk_line->doc_id, -1, SQLITE_STATIC);
|
sqlite3_bind_text(db->insert_index_job_stmt, 1, job->bulk_line->doc_id, -1, SQLITE_STATIC);
|
||||||
sqlite3_bind_int(db->insert_index_job_stmt, 2, job->bulk_line->type);
|
sqlite3_bind_int(db->insert_index_job_stmt, 2, job->bulk_line->type);
|
||||||
if (job->bulk_line->type != ES_BULK_LINE_DELETE) {
|
|
||||||
sqlite3_bind_text(db->insert_index_job_stmt, 3, job->bulk_line->line, -1, SQLITE_STATIC);
|
sqlite3_bind_text(db->insert_index_job_stmt, 3, job->bulk_line->line, -1, SQLITE_STATIC);
|
||||||
} else {
|
|
||||||
sqlite3_bind_null(db->insert_index_job_stmt, 3);
|
|
||||||
}
|
|
||||||
|
|
||||||
ret = sqlite3_step(db->insert_index_job_stmt);
|
ret = sqlite3_step(db->insert_index_job_stmt);
|
||||||
|
|
||||||
@@ -627,8 +611,6 @@ void database_add_work(database_t *db, job_t *job) {
|
|||||||
pthread_mutex_unlock(&db->ipc_ctx->db_mutex);
|
pthread_mutex_unlock(&db->ipc_ctx->db_mutex);
|
||||||
usleep(100000);
|
usleep(100000);
|
||||||
pthread_mutex_lock(&db->ipc_ctx->db_mutex);
|
pthread_mutex_lock(&db->ipc_ctx->db_mutex);
|
||||||
} else if (ret != SQLITE_OK) {
|
|
||||||
LOG_FATALF("database.c", "sqlite3_reset returned error %d", ret);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
} while (ret != SQLITE_DONE && ret != SQLITE_OK);
|
} while (ret != SQLITE_DONE && ret != SQLITE_OK);
|
||||||
|
|||||||
@@ -18,14 +18,6 @@ typedef enum {
|
|||||||
FTS_DATABASE
|
FTS_DATABASE
|
||||||
} database_type_t;
|
} database_type_t;
|
||||||
|
|
||||||
typedef enum {
|
|
||||||
DATABASE_STAT_INVALID,
|
|
||||||
DATABASE_STAT_TREEMAP,
|
|
||||||
DATABASE_STAT_MIME_AGG,
|
|
||||||
DATABASE_STAT_SIZE_AGG,
|
|
||||||
DATABASE_STAT_DATE_AGG,
|
|
||||||
} database_stat_type_d;
|
|
||||||
|
|
||||||
typedef enum {
|
typedef enum {
|
||||||
JOB_UNDEFINED,
|
JOB_UNDEFINED,
|
||||||
JOB_BULK_LINE,
|
JOB_BULK_LINE,
|
||||||
@@ -112,14 +104,14 @@ database_iterator_t *database_create_document_iterator(database_t *db);
|
|||||||
cJSON *database_document_iter(database_iterator_t *);
|
cJSON *database_document_iter(database_iterator_t *);
|
||||||
|
|
||||||
#define database_document_iter_foreach(element, iter) \
|
#define database_document_iter_foreach(element, iter) \
|
||||||
for (cJSON *(element) = database_document_iter(iter); (element) != NULL; (element) = database_document_iter(iter))
|
for (cJSON *element = database_document_iter(iter); element != NULL; element = database_document_iter(iter))
|
||||||
|
|
||||||
database_iterator_t *database_create_delete_list_iterator(database_t *db);
|
database_iterator_t *database_create_delete_list_iterator(database_t *db);
|
||||||
|
|
||||||
char * database_delete_list_iter(database_iterator_t *iter);
|
char * database_delete_list_iter(database_iterator_t *iter);
|
||||||
|
|
||||||
#define database_delete_list_iter_foreach(element, iter) \
|
#define database_delete_list_iter_foreach(element, iter) \
|
||||||
for (char *(element) = database_delete_list_iter(iter); (element) != NULL; (element) = database_delete_list_iter(iter))
|
for (char *element = database_delete_list_iter(iter); element != NULL; element = database_delete_list_iter(iter))
|
||||||
|
|
||||||
|
|
||||||
cJSON *database_incremental_scan_begin(database_t *db);
|
cJSON *database_incremental_scan_begin(database_t *db);
|
||||||
@@ -140,16 +132,12 @@ treemap_row_t database_treemap_iter(database_iterator_t *iter);
|
|||||||
|
|
||||||
void database_generate_stats(database_t *db, double treemap_threshold);
|
void database_generate_stats(database_t *db, double treemap_threshold);
|
||||||
|
|
||||||
database_stat_type_d database_get_stat_type_by_mnemonic(const char *name);
|
|
||||||
|
|
||||||
job_t *database_get_work(database_t *db, job_type_t job_type);
|
job_t *database_get_work(database_t *db, job_type_t job_type);
|
||||||
|
|
||||||
void database_add_work(database_t *db, job_t *job);
|
void database_add_work(database_t *db, job_t *job);
|
||||||
|
|
||||||
//void database_index(database_t *db);
|
//void database_index(database_t *db);
|
||||||
|
|
||||||
cJSON *database_get_stats(database_t *db, database_stat_type_d type);
|
|
||||||
|
|
||||||
#define CRASH_IF_STMT_FAIL(x) do { \
|
#define CRASH_IF_STMT_FAIL(x) do { \
|
||||||
int return_value = x; \
|
int return_value = x; \
|
||||||
if (return_value != SQLITE_DONE && return_value != SQLITE_ROW) { \
|
if (return_value != SQLITE_DONE && return_value != SQLITE_ROW) { \
|
||||||
|
|||||||
@@ -6,7 +6,6 @@
|
|||||||
#define SIZE_BUCKET (long)(5 * 1000 * 1000)
|
#define SIZE_BUCKET (long)(5 * 1000 * 1000)
|
||||||
#define DATE_BUCKET (long)(2629800) // ~30 days
|
#define DATE_BUCKET (long)(2629800) // ~30 days
|
||||||
|
|
||||||
|
|
||||||
database_iterator_t *database_create_treemap_iterator(database_t *db, long threshold) {
|
database_iterator_t *database_create_treemap_iterator(database_t *db, long threshold) {
|
||||||
|
|
||||||
sqlite3_stmt *stmt;
|
sqlite3_stmt *stmt;
|
||||||
@@ -158,85 +157,3 @@ void database_generate_stats(database_t *db, double treemap_threshold) {
|
|||||||
LOG_INFO("database.c", "Done!");
|
LOG_INFO("database.c", "Done!");
|
||||||
}
|
}
|
||||||
|
|
||||||
database_stat_type_d database_get_stat_type_by_mnemonic(const char *name) {
|
|
||||||
if (strcmp(name, "TMAP") == 0) {
|
|
||||||
return DATABASE_STAT_TREEMAP;
|
|
||||||
}
|
|
||||||
if (strcmp(name, "MAGG") == 0) {
|
|
||||||
return DATABASE_STAT_MIME_AGG;
|
|
||||||
}
|
|
||||||
if (strcmp(name, "SAGG") == 0) {
|
|
||||||
return DATABASE_STAT_SIZE_AGG;
|
|
||||||
}
|
|
||||||
if (strcmp(name, "DAGG") == 0) {
|
|
||||||
return DATABASE_STAT_DATE_AGG;
|
|
||||||
}
|
|
||||||
|
|
||||||
return DATABASE_STAT_INVALID;
|
|
||||||
}
|
|
||||||
|
|
||||||
cJSON *database_get_stats(database_t *db, database_stat_type_d type) {
|
|
||||||
|
|
||||||
sqlite3_stmt *stmt;
|
|
||||||
|
|
||||||
switch (type) {
|
|
||||||
case DATABASE_STAT_TREEMAP:
|
|
||||||
CRASH_IF_NOT_SQLITE_OK(sqlite3_prepare_v2(
|
|
||||||
db->db, "SELECT path,size FROM stats_treemap", -1, &stmt, NULL
|
|
||||||
));
|
|
||||||
break;
|
|
||||||
case DATABASE_STAT_DATE_AGG:
|
|
||||||
CRASH_IF_NOT_SQLITE_OK(sqlite3_prepare_v2(
|
|
||||||
db->db, "SELECT bucket,count FROM stats_date_agg", -1, &stmt, NULL
|
|
||||||
));
|
|
||||||
break;
|
|
||||||
case DATABASE_STAT_SIZE_AGG:
|
|
||||||
CRASH_IF_NOT_SQLITE_OK(sqlite3_prepare_v2(
|
|
||||||
db->db, "SELECT bucket,count FROM stats_size_agg", -1, &stmt, NULL
|
|
||||||
));
|
|
||||||
break;
|
|
||||||
case DATABASE_STAT_MIME_AGG:
|
|
||||||
CRASH_IF_NOT_SQLITE_OK(sqlite3_prepare_v2(
|
|
||||||
db->db, "SELECT mime,size,count FROM stats_mime_agg", -1, &stmt, NULL
|
|
||||||
));
|
|
||||||
break;
|
|
||||||
case DATABASE_STAT_INVALID:
|
|
||||||
default:
|
|
||||||
LOG_FATALF("database_stats.c", "Invalid stat type: %d", type);
|
|
||||||
}
|
|
||||||
|
|
||||||
cJSON *json = cJSON_CreateArray();
|
|
||||||
|
|
||||||
int ret;
|
|
||||||
do {
|
|
||||||
ret = sqlite3_step(stmt);
|
|
||||||
CRASH_IF_STMT_FAIL(ret);
|
|
||||||
|
|
||||||
if (ret == SQLITE_DONE) {
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
|
|
||||||
cJSON *row = cJSON_CreateObject();
|
|
||||||
|
|
||||||
switch (type) {
|
|
||||||
case DATABASE_STAT_TREEMAP:
|
|
||||||
cJSON_AddStringToObject(row, "path", (const char *) sqlite3_column_text(stmt, 0));
|
|
||||||
cJSON_AddNumberToObject(row, "size", (double) sqlite3_column_int64(stmt, 1));
|
|
||||||
break;
|
|
||||||
case DATABASE_STAT_DATE_AGG:
|
|
||||||
case DATABASE_STAT_SIZE_AGG:
|
|
||||||
cJSON_AddNumberToObject(row, "bucket", (double) sqlite3_column_int64(stmt, 0));
|
|
||||||
cJSON_AddNumberToObject(row, "count", (double) sqlite3_column_int64(stmt, 1));
|
|
||||||
break;
|
|
||||||
case DATABASE_STAT_MIME_AGG:
|
|
||||||
cJSON_AddStringToObject(row, "mime", (const char *) sqlite3_column_text(stmt, 0));
|
|
||||||
cJSON_AddNumberToObject(row, "size", (double) sqlite3_column_int64(stmt, 1));
|
|
||||||
cJSON_AddNumberToObject(row, "count", (double) sqlite3_column_int64(stmt, 2));
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
|
|
||||||
cJSON_AddItemToArray(json, row);
|
|
||||||
} while (TRUE);
|
|
||||||
|
|
||||||
return json;
|
|
||||||
}
|
|
||||||
5
src/database/database_stats.h
Normal file
5
src/database/database_stats.h
Normal file
@@ -0,0 +1,5 @@
|
|||||||
|
#ifndef SIST2_DATABASE_STATS_H
|
||||||
|
#define SIST2_DATABASE_STATS_H
|
||||||
|
|
||||||
|
|
||||||
|
#endif //SIST2_DATABASE_STATS_H
|
||||||
@@ -64,16 +64,20 @@ void print_json(cJSON *document, const char id_str[SIST_DOC_ID_LEN]) {
|
|||||||
cJSON_Delete(line);
|
cJSON_Delete(line);
|
||||||
}
|
}
|
||||||
|
|
||||||
void delete_document(const char *document_id) {
|
void index_json_func(job_t *job) {
|
||||||
es_bulk_line_t bulk_line;
|
elastic_index_line(job->bulk_line);
|
||||||
|
}
|
||||||
|
|
||||||
bulk_line.type = ES_BULK_LINE_DELETE;
|
void delete_document(const char *document_id) {
|
||||||
bulk_line.next = NULL;
|
es_bulk_line_t *bulk_line = malloc(sizeof(es_bulk_line_t));
|
||||||
strcpy(bulk_line.doc_id, document_id);
|
|
||||||
|
bulk_line->type = ES_BULK_LINE_DELETE;
|
||||||
|
bulk_line->next = NULL;
|
||||||
|
strcpy(bulk_line->doc_id, document_id);
|
||||||
|
|
||||||
tpool_add_work(IndexCtx.pool, &(job_t) {
|
tpool_add_work(IndexCtx.pool, &(job_t) {
|
||||||
.type = JOB_BULK_LINE,
|
.type = JOB_BULK_LINE,
|
||||||
.bulk_line = &bulk_line,
|
.bulk_line = bulk_line,
|
||||||
});
|
});
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -95,7 +99,6 @@ void index_json(cJSON *document, const char doc_id[SIST_DOC_ID_LEN]) {
|
|||||||
.type = JOB_BULK_LINE,
|
.type = JOB_BULK_LINE,
|
||||||
.bulk_line = bulk_line,
|
.bulk_line = bulk_line,
|
||||||
});
|
});
|
||||||
free(bulk_line);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
void execute_update_script(const char *script, int async, const char index_id[SIST_INDEX_ID_LEN]) {
|
void execute_update_script(const char *script, int async, const char index_id[SIST_INDEX_ID_LEN]) {
|
||||||
|
|||||||
@@ -91,6 +91,8 @@ char *build_json_string(document_t *doc) {
|
|||||||
} else {
|
} else {
|
||||||
cJSON_AddStringToObject(json, "mime", mime_text);
|
cJSON_AddStringToObject(json, "mime", mime_text);
|
||||||
}
|
}
|
||||||
|
cJSON_AddNumberToObject(json, "size", (double) doc->size);
|
||||||
|
cJSON_AddNumberToObject(json, "mtime", doc->mtime);
|
||||||
|
|
||||||
// Ignore root directory in the file path
|
// Ignore root directory in the file path
|
||||||
doc->ext = (short) (doc->ext - ScanCtx.index.desc.root_len);
|
doc->ext = (short) (doc->ext - ScanCtx.index.desc.root_len);
|
||||||
@@ -120,6 +122,8 @@ char *build_json_string(document_t *doc) {
|
|||||||
cJSON_AddStringToObject(json, "path", "");
|
cJSON_AddStringToObject(json, "path", "");
|
||||||
}
|
}
|
||||||
|
|
||||||
|
cJSON_AddStringToObject(json, "_id", doc->doc_id);
|
||||||
|
|
||||||
// Metadata
|
// Metadata
|
||||||
meta_line_t *meta = doc->meta_head;
|
meta_line_t *meta = doc->meta_head;
|
||||||
while (meta != NULL) {
|
while (meta != NULL) {
|
||||||
|
|||||||
14
src/main.c
14
src/main.c
@@ -195,10 +195,6 @@ void initialize_scan_context(scan_args_t *args) {
|
|||||||
ScanCtx.mobi_ctx.content_size = args->content_size;
|
ScanCtx.mobi_ctx.content_size = args->content_size;
|
||||||
ScanCtx.mobi_ctx.log = log_callback;
|
ScanCtx.mobi_ctx.log = log_callback;
|
||||||
ScanCtx.mobi_ctx.logf = logf_callback;
|
ScanCtx.mobi_ctx.logf = logf_callback;
|
||||||
ScanCtx.mobi_ctx.store = write_thumbnail_callback;
|
|
||||||
ScanCtx.mobi_ctx.enable_tn = args->tn_count > 0;
|
|
||||||
ScanCtx.mobi_ctx.tn_size = args->tn_size;
|
|
||||||
ScanCtx.mobi_ctx.tn_qscale = args->tn_quality;
|
|
||||||
|
|
||||||
// TEXT
|
// TEXT
|
||||||
ScanCtx.text_ctx.content_size = args->content_size;
|
ScanCtx.text_ctx.content_size = args->content_size;
|
||||||
@@ -316,20 +312,17 @@ void sist2_index(index_args_t *args) {
|
|||||||
database_open(db);
|
database_open(db);
|
||||||
database_iterator_t *iterator = database_create_document_iterator(db);
|
database_iterator_t *iterator = database_create_document_iterator(db);
|
||||||
database_document_iter_foreach(json, iterator) {
|
database_document_iter_foreach(json, iterator) {
|
||||||
char doc_id[SIST_DOC_ID_LEN];
|
const char *doc_id = cJSON_GetObjectItem(json, "_id")->valuestring;
|
||||||
strcpy(doc_id, cJSON_GetObjectItem(json, "_id")->valuestring);
|
|
||||||
cJSON_DeleteItemFromObject(json, "_id");
|
|
||||||
|
|
||||||
if (args->print) {
|
if (args->print) {
|
||||||
print_json(json, doc_id);
|
print_json(json, doc_id);
|
||||||
} else {
|
} else {
|
||||||
index_json(json, doc_id);
|
index_json(json, doc_id);
|
||||||
cnt += 1;
|
cnt += 1;
|
||||||
}
|
}
|
||||||
cJSON_Delete(json);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
free(iterator);
|
free(iterator);
|
||||||
|
database_close(db, FALSE);
|
||||||
|
|
||||||
if (!args->print) {
|
if (!args->print) {
|
||||||
database_iterator_t *del_iter = database_create_delete_list_iterator(db);
|
database_iterator_t *del_iter = database_create_delete_list_iterator(db);
|
||||||
@@ -337,11 +330,8 @@ void sist2_index(index_args_t *args) {
|
|||||||
delete_document(id);
|
delete_document(id);
|
||||||
free(id);
|
free(id);
|
||||||
}
|
}
|
||||||
free(del_iter);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
database_close(db, FALSE);
|
|
||||||
|
|
||||||
tpool_wait(IndexCtx.pool);
|
tpool_wait(IndexCtx.pool);
|
||||||
tpool_destroy(IndexCtx.pool);
|
tpool_destroy(IndexCtx.pool);
|
||||||
|
|
||||||
|
|||||||
@@ -51,11 +51,11 @@
|
|||||||
#include <ctype.h>
|
#include <ctype.h>
|
||||||
#include "git_hash.h"
|
#include "git_hash.h"
|
||||||
|
|
||||||
#define VERSION "3.0.4"
|
#define VERSION "3.0.0"
|
||||||
static const char *const Version = VERSION;
|
static const char *const Version = VERSION;
|
||||||
static const int VersionMajor = 3;
|
static const int VersionMajor = 3;
|
||||||
static const int VersionMinor = 0;
|
static const int VersionMinor = 0;
|
||||||
static const int VersionPatch = 4;
|
static const int VersionPatch = 0;
|
||||||
|
|
||||||
#ifndef SIST_PLATFORM
|
#ifndef SIST_PLATFORM
|
||||||
#define SIST_PLATFORM unknown
|
#define SIST_PLATFORM unknown
|
||||||
|
|||||||
@@ -149,11 +149,6 @@ void worker_proc_cleanup(tpool_t *pool) {
|
|||||||
if (ProcData.index_db != NULL) {
|
if (ProcData.index_db != NULL) {
|
||||||
database_close(ProcData.index_db, FALSE);
|
database_close(ProcData.index_db, FALSE);
|
||||||
}
|
}
|
||||||
|
|
||||||
if (IndexCtx.needs_es_connection) {
|
|
||||||
elastic_cleanup();
|
|
||||||
}
|
|
||||||
|
|
||||||
database_close(ProcData.ipc_db, FALSE);
|
database_close(ProcData.ipc_db, FALSE);
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -247,7 +242,6 @@ static void *tpool_worker(void *arg) {
|
|||||||
pthread_mutex_lock(&pool->shm->mutex);
|
pthread_mutex_lock(&pool->shm->mutex);
|
||||||
pthread_cond_signal(&pool->shm->done_working_cond);
|
pthread_cond_signal(&pool->shm->done_working_cond);
|
||||||
pthread_mutex_unlock(&pool->shm->mutex);
|
pthread_mutex_unlock(&pool->shm->mutex);
|
||||||
worker_proc_cleanup(pool);
|
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
return NULL;
|
return NULL;
|
||||||
|
|||||||
@@ -20,40 +20,49 @@ static struct mg_http_serve_opts DefaultServeOpts = {
|
|||||||
|
|
||||||
void stats_files(struct mg_connection *nc, struct mg_http_message *hm) {
|
void stats_files(struct mg_connection *nc, struct mg_http_message *hm) {
|
||||||
|
|
||||||
if (hm->uri.len != SIST_INDEX_ID_LEN + 7) {
|
if (hm->uri.len != SIST_INDEX_ID_LEN + 4) {
|
||||||
HTTP_REPLY_NOT_FOUND
|
HTTP_REPLY_NOT_FOUND
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
|
||||||
char arg_index_id[SIST_INDEX_ID_LEN];
|
char arg_index_id[SIST_INDEX_ID_LEN];
|
||||||
char arg_stat_type[5];
|
|
||||||
|
|
||||||
memcpy(arg_index_id, hm->uri.ptr + 3, SIST_INDEX_ID_LEN);
|
memcpy(arg_index_id, hm->uri.ptr + 3, SIST_INDEX_ID_LEN);
|
||||||
*(arg_index_id + SIST_INDEX_ID_LEN - 1) = '\0';
|
*(arg_index_id + SIST_INDEX_ID_LEN - 1) = '\0';
|
||||||
memcpy(arg_stat_type, hm->uri.ptr + 3 + SIST_INDEX_ID_LEN, 4);
|
|
||||||
*(arg_stat_type + sizeof(arg_stat_type) - 1) = '\0';
|
|
||||||
|
|
||||||
database_stat_type_d stat_type = database_get_stat_type_by_mnemonic(arg_stat_type);
|
index_t *index = web_get_index_by_id(arg_index_id);
|
||||||
if (stat_type == DATABASE_STAT_INVALID) {
|
if (index == NULL) {
|
||||||
HTTP_REPLY_NOT_FOUND
|
HTTP_REPLY_NOT_FOUND
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
|
||||||
database_t *db = web_get_database(arg_index_id);
|
const char *file;
|
||||||
if (db == NULL) {
|
switch (atoi(hm->uri.ptr + 3 + SIST_INDEX_ID_LEN)) {
|
||||||
LOG_DEBUGF("serve.c", "Could not get database for index: %s", arg_index_id);
|
case 1:
|
||||||
HTTP_REPLY_NOT_FOUND
|
file = "treemap.csv";
|
||||||
|
break;
|
||||||
|
case 2:
|
||||||
|
file = "mime_agg.csv";
|
||||||
|
break;
|
||||||
|
case 3:
|
||||||
|
file = "size_agg.csv";
|
||||||
|
break;
|
||||||
|
case 4:
|
||||||
|
file = "date_agg.csv";
|
||||||
|
break;
|
||||||
|
default:
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
|
||||||
cJSON *json = database_get_stats(db, stat_type);
|
char disposition[8192];
|
||||||
char *json_str = cJSON_PrintUnformatted(json);
|
snprintf(disposition, sizeof(disposition),
|
||||||
|
"Content-Disposition: inline; filename=\"%s\"\r\nCache-Control: max-age=31536000\r\n", file);
|
||||||
|
|
||||||
web_send_headers(nc, 200, strlen(json_str), "Content-Type: application/json");
|
char full_path[PATH_MAX];
|
||||||
mg_send(nc, json_str, strlen(json_str));
|
strcpy(full_path, index->path);
|
||||||
|
strcat(full_path, file);
|
||||||
|
|
||||||
free(json_str);
|
struct mg_http_serve_opts opts = {};
|
||||||
cJSON_Delete(json);
|
mg_http_serve_file(nc, hm, full_path, &opts);
|
||||||
}
|
}
|
||||||
|
|
||||||
void serve_index_html(struct mg_connection *nc, struct mg_http_message *hm) {
|
void serve_index_html(struct mg_connection *nc, struct mg_http_message *hm) {
|
||||||
@@ -277,23 +286,16 @@ void index_info(struct mg_connection *nc) {
|
|||||||
cJSON *json = cJSON_CreateObject();
|
cJSON *json = cJSON_CreateObject();
|
||||||
cJSON *arr = cJSON_AddArrayToObject(json, "indices");
|
cJSON *arr = cJSON_AddArrayToObject(json, "indices");
|
||||||
|
|
||||||
|
cJSON_AddStringToObject(json, "mongooseVersion", MG_VERSION);
|
||||||
cJSON_AddStringToObject(json, "esIndex", WebCtx.es_index);
|
cJSON_AddStringToObject(json, "esIndex", WebCtx.es_index);
|
||||||
cJSON_AddStringToObject(json, "version", Version);
|
cJSON_AddStringToObject(json, "version", Version);
|
||||||
|
|
||||||
#ifdef SIST_DEBUG_INFO
|
|
||||||
cJSON_AddStringToObject(json, "mongooseVersion", MG_VERSION);
|
|
||||||
cJSON_AddStringToObject(json, "esVersion", es_version);
|
cJSON_AddStringToObject(json, "esVersion", es_version);
|
||||||
cJSON_AddStringToObject(json, "platform", QUOTE(SIST_PLATFORM));
|
|
||||||
cJSON_AddStringToObject(json, "sist2Hash", Sist2CommitHash);
|
|
||||||
cJSON_AddBoolToObject(json, "dev", WebCtx.dev);
|
|
||||||
cJSON_AddBoolToObject(json, "showDebugInfo", TRUE);
|
|
||||||
#else
|
|
||||||
cJSON_AddBoolToObject(json, "showDebugInfo", FALSE);
|
|
||||||
#endif
|
|
||||||
|
|
||||||
cJSON_AddBoolToObject(json, "esVersionSupported", IS_SUPPORTED_ES_VERSION(WebCtx.es_version));
|
cJSON_AddBoolToObject(json, "esVersionSupported", IS_SUPPORTED_ES_VERSION(WebCtx.es_version));
|
||||||
cJSON_AddBoolToObject(json, "esVersionLegacy", IS_LEGACY_VERSION(WebCtx.es_version));
|
cJSON_AddBoolToObject(json, "esVersionLegacy", IS_LEGACY_VERSION(WebCtx.es_version));
|
||||||
|
cJSON_AddStringToObject(json, "platform", QUOTE(SIST_PLATFORM));
|
||||||
|
cJSON_AddStringToObject(json, "sist2Hash", Sist2CommitHash);
|
||||||
cJSON_AddStringToObject(json, "lang", WebCtx.lang);
|
cJSON_AddStringToObject(json, "lang", WebCtx.lang);
|
||||||
|
cJSON_AddBoolToObject(json, "dev", WebCtx.dev);
|
||||||
|
|
||||||
cJSON_AddBoolToObject(json, "auth0Enabled", WebCtx.auth0_enabled);
|
cJSON_AddBoolToObject(json, "auth0Enabled", WebCtx.auth0_enabled);
|
||||||
if (WebCtx.auth0_enabled) {
|
if (WebCtx.auth0_enabled) {
|
||||||
@@ -666,9 +668,6 @@ static void ev_router(struct mg_connection *nc, int ev, void *ev_data, UNUSED(vo
|
|||||||
mg_send(nc, r->body, r->size);
|
mg_send(nc, r->body, r->size);
|
||||||
} else if (r->status_code == 0) {
|
} else if (r->status_code == 0) {
|
||||||
sist_log("serve.c", LOG_SIST_ERROR, "Could not connect to elasticsearch!");
|
sist_log("serve.c", LOG_SIST_ERROR, "Could not connect to elasticsearch!");
|
||||||
|
|
||||||
mg_http_reply(nc, 503, HTTP_SERVER_HEADER HTTP_TEXT_TYPE_HEADER,
|
|
||||||
"Elasticsearch connection error, see server logs.");
|
|
||||||
} else {
|
} else {
|
||||||
sist_logf("serve.c", LOG_SIST_WARNING, "ElasticSearch error during query (%d)", r->status_code);
|
sist_logf("serve.c", LOG_SIST_WARNING, "ElasticSearch error during query (%d)", r->status_code);
|
||||||
if (r->size != 0) {
|
if (r->size != 0) {
|
||||||
|
|||||||
2
third-party/libscan/CMakeLists.txt
vendored
2
third-party/libscan/CMakeLists.txt
vendored
@@ -106,7 +106,7 @@ find_library(MUPDF_LIB NAMES liblibmupdf.a)
|
|||||||
find_library(CMS_LIB NAMES lcms2)
|
find_library(CMS_LIB NAMES lcms2)
|
||||||
find_library(JAS_LIB NAMES jasper)
|
find_library(JAS_LIB NAMES jasper)
|
||||||
find_library(GUMBO_LIB NAMES gumbo)
|
find_library(GUMBO_LIB NAMES gumbo)
|
||||||
find_library(GOMP_LIB NAMES libgomp.a gomp PATHS /usr/lib/gcc/x86_64-linux-gnu/11/ /usr/lib/gcc/x86_64-linux-gnu/5/ /usr/lib/gcc/x86_64-linux-gnu/9/ /usr/lib/gcc/x86_64-linux-gnu/10/ /usr/lib/gcc/aarch64-linux-gnu/7/ /usr/lib/gcc/aarch64-linux-gnu/9/ /usr/lib/gcc/x86_64-linux-gnu/7/ /usr/lib/gcc/aarch64-linux-gnu/11/ /usr/lib/gcc/x86_64-linux-gnu/8/ /usr/lib/gcc/aarch64-linux-gnu/8/)
|
find_library(GOMP_LIB NAMES libgomp.a gomp PATHS /usr/lib/gcc/x86_64-linux-gnu/11/ /usr/lib/gcc/x86_64-linux-gnu/5/ /usr/lib/gcc/x86_64-linux-gnu/9/ /usr/lib/gcc/x86_64-linux-gnu/10/ /usr/lib/gcc/aarch64-linux-gnu/7/ /usr/lib/gcc/aarch64-linux-gnu/9/ /usr/lib/gcc/x86_64-linux-gnu/7/ /usr/lib/gcc/aarch64-linux-gnu/11/)
|
||||||
find_package(Leptonica CONFIG REQUIRED)
|
find_package(Leptonica CONFIG REQUIRED)
|
||||||
find_package(FFMPEG REQUIRED)
|
find_package(FFMPEG REQUIRED)
|
||||||
find_package(libraw CONFIG REQUIRED)
|
find_package(libraw CONFIG REQUIRED)
|
||||||
|
|||||||
39
third-party/libscan/libscan/mobi/scan_mobi.c
vendored
39
third-party/libscan/libscan/mobi/scan_mobi.c
vendored
@@ -1,44 +1,9 @@
|
|||||||
#include "scan_mobi.h"
|
#include "scan_mobi.h"
|
||||||
|
|
||||||
#include "../../third-party/libmobi/src/mobi.h"
|
#include "../../third-party/libmobi/src/mobi.h"
|
||||||
#include "../media/media.h"
|
|
||||||
#include <errno.h>
|
#include <errno.h>
|
||||||
#include "stdlib.h"
|
#include "stdlib.h"
|
||||||
|
|
||||||
int store_cover(scan_mobi_ctx_t *ctx, document_t *doc, MOBIData *m) {
|
|
||||||
MOBIExthHeader *exth = mobi_get_exthrecord_by_tag(m, EXTH_COVEROFFSET);
|
|
||||||
|
|
||||||
if (exth == NULL) {
|
|
||||||
return FALSE;
|
|
||||||
}
|
|
||||||
|
|
||||||
uint32_t offset = mobi_decode_exthvalue(exth->data, exth->size);
|
|
||||||
size_t first_resource = mobi_get_first_resource_record(m);
|
|
||||||
size_t uid = first_resource + offset;
|
|
||||||
MOBIPdbRecord *record = mobi_get_record_by_seqnumber(m, uid);
|
|
||||||
|
|
||||||
if (record == NULL || record->size < 4) {
|
|
||||||
return FALSE;
|
|
||||||
}
|
|
||||||
|
|
||||||
scan_media_ctx_t media_ctx = {
|
|
||||||
.tn_count = TRUE,
|
|
||||||
.tn_size = ctx->tn_size,
|
|
||||||
.tn_qscale = ctx->tn_qscale,
|
|
||||||
.tesseract_lang = NULL,
|
|
||||||
.tesseract_path = NULL,
|
|
||||||
.read_subtitles = FALSE,
|
|
||||||
.max_media_buffer = 0,
|
|
||||||
.log = ctx->log,
|
|
||||||
.logf = ctx->logf,
|
|
||||||
.store = ctx->store,
|
|
||||||
};
|
|
||||||
|
|
||||||
store_image_thumbnail(&media_ctx, record->data, record->size, doc, "img.jpg");
|
|
||||||
|
|
||||||
return TRUE;
|
|
||||||
}
|
|
||||||
|
|
||||||
void parse_mobi(scan_mobi_ctx_t *ctx, vfile_t *f, document_t *doc) {
|
void parse_mobi(scan_mobi_ctx_t *ctx, vfile_t *f, document_t *doc) {
|
||||||
|
|
||||||
MOBIData *m = mobi_init();
|
MOBIData *m = mobi_init();
|
||||||
@@ -107,10 +72,6 @@ void parse_mobi(scan_mobi_ctx_t *ctx, vfile_t *f, document_t *doc) {
|
|||||||
|
|
||||||
APPEND_STR_META(doc, MetaContent, tex.dyn_buffer.buf);
|
APPEND_STR_META(doc, MetaContent, tex.dyn_buffer.buf);
|
||||||
|
|
||||||
if (ctx->enable_tn) {
|
|
||||||
store_cover(ctx, doc, m);
|
|
||||||
}
|
|
||||||
|
|
||||||
free(content_str);
|
free(content_str);
|
||||||
free(buf);
|
free(buf);
|
||||||
text_buffer_destroy(&tex);
|
text_buffer_destroy(&tex);
|
||||||
|
|||||||
5
third-party/libscan/libscan/mobi/scan_mobi.h
vendored
5
third-party/libscan/libscan/mobi/scan_mobi.h
vendored
@@ -7,11 +7,6 @@ typedef struct {
|
|||||||
long content_size;
|
long content_size;
|
||||||
log_callback_t log;
|
log_callback_t log;
|
||||||
logf_callback_t logf;
|
logf_callback_t logf;
|
||||||
store_callback_t store;
|
|
||||||
|
|
||||||
int tn_qscale;
|
|
||||||
int tn_size;
|
|
||||||
int enable_tn;
|
|
||||||
} scan_mobi_ctx_t;
|
} scan_mobi_ctx_t;
|
||||||
|
|
||||||
void parse_mobi(scan_mobi_ctx_t *ctx, vfile_t *f, document_t *doc);
|
void parse_mobi(scan_mobi_ctx_t *ctx, vfile_t *f, document_t *doc);
|
||||||
|
|||||||
2
third-party/libscan/third-party/antiword
vendored
2
third-party/libscan/third-party/antiword
vendored
Submodule third-party/libscan/third-party/antiword updated: ddb042143e...badfdac845
2
third-party/libscan/third-party/libmobi
vendored
2
third-party/libscan/third-party/libmobi
vendored
Submodule third-party/libscan/third-party/libmobi updated: 864e3a86f2...395dbde361
Reference in New Issue
Block a user