Compare commits

..

No commits in common. "d4820d2fad7a43e5a2724727f1f08e3793837e6d" and "4e1109c5286e54271f11e3ee72e00d111dba76ff" have entirely different histories.

197 changed files with 22382 additions and 46825 deletions

View File

@ -1,9 +0,0 @@
FROM simon987/sist2-build
RUN curl -fsSL https://deb.nodesource.com/setup_16.x | bash
RUN apt update -y; apt install -y nodejs && rm -rf /var/lib/apt/lists/*
ENV DEBIAN_FRONTEND=noninteractive
ENV LANG C.UTF-8
ENV LC_ALL C.UTF-8

View File

@ -1,16 +0,0 @@
{
"name": "sist2-dev",
"dockerComposeFile": [
"docker-compose.yml"
],
"service": "sist2-dev",
"customizations": {
"vscode": {
"extensions": [
"ms-vscode.cpptools-extension-pack"
]
}
},
"remoteUser": "root",
"workspaceFolder": "/app/"
}

View File

@ -1,8 +0,0 @@
version: "3"
services:
sist2-dev:
build: .
command: sleep infinity
volumes:
- ../:/app

View File

@ -8,13 +8,13 @@ Testing/
**/cmake_install.cmake **/cmake_install.cmake
**/CMakeCache.txt **/CMakeCache.txt
**/CMakeFiles/ **/CMakeFiles/
.cmake
LICENSE LICENSE
Makefile Makefile
**/*.md **/*.md
**/*.cbp **/*.cbp
VERSION VERSION
**/node_modules/ **/node_modules/
.git/
sist2-*-linux-debug sist2-*-linux-debug
sist2-*-linux sist2-*-linux
sist2_debug sist2_debug
@ -29,12 +29,3 @@ sist2
**/core **/core
*.a *.a
tmp_scan/ tmp_scan/
Dockerfile
Dockerfile.arm64
docker-compose.yml
state.db
*-journal
build/
__pycache__/
sist2-vue/dist
sist2-admin/frontend/dist

3
.gitattributes vendored Normal file
View File

@ -0,0 +1,3 @@
CMakeModules/* linguist-vendored
**/*_generated.c linguist-vendored
**/*_generated.h linguist-vendored

20
.gitignore vendored
View File

@ -10,9 +10,6 @@ Makefile
LOG LOG
sist2* sist2*
!sist2-vue/ !sist2-vue/
!sist2-admin
!sist2_admin
!sist2.py
*.sist2/ *.sist2/
bundle*.css bundle*.css
bundle.js bundle.js
@ -29,20 +26,3 @@ test_i_inc
node_modules/ node_modules/
.cmake/ .cmake/
i_inc/ i_inc/
state.db
*.pyc
!sist2-admin/frontend/dist
*.js.map
sist2-vue/dist
sist2-admin/frontend/dist
.ninja_deps
.ninja_log
build.ninja
src/web/static_generated.c
src/magic_generated.c
src/index/static_generated.c
*.sist2
*-shm
*-journal
.vscode
*.fts

3
.gitmodules vendored
View File

@ -10,6 +10,3 @@
[submodule "third-party/libscan/third-party/libmobi"] [submodule "third-party/libscan/third-party/libmobi"]
path = third-party/libscan/third-party/libmobi path = third-party/libscan/third-party/libmobi
url = https://github.com/bfabiszewski/libmobi url = https://github.com/bfabiszewski/libmobi
[submodule "third-party/libscan/libscan-test-files"]
path = third-party/libscan/libscan-test-files
url = https://github.com/simon987/libscan-test-files

View File

@ -1,11 +1,11 @@
cmake_minimum_required(VERSION 3.7) cmake_minimum_required(VERSION 3.7)
project(sist2)
set(CMAKE_C_STANDARD 11) set(CMAKE_C_STANDARD 11)
project(sist2 C)
option(SIST_DEBUG "Build a debug executable" on) option(SIST_DEBUG "Build a debug executable" on)
option(SIST_FAST "Enable more optimisation flags" off) option(SIST_FAST "Enable more optimisation flags" off)
option(SIST_DEBUG_INFO "Turn on debug information in web interface" on) option(SIST_FAKE_STORE "Disable IO operations of LMDB stores for debugging purposes" 0)
add_compile_definitions( add_compile_definitions(
"SIST_PLATFORM=${SIST_PLATFORM}" "SIST_PLATFORM=${SIST_PLATFORM}"
@ -15,67 +15,50 @@ if (SIST_DEBUG)
add_compile_definitions( add_compile_definitions(
"SIST_DEBUG=${SIST_DEBUG}" "SIST_DEBUG=${SIST_DEBUG}"
) )
set(VCPKG_BUILD_TYPE debug)
else ()
set(VCPKG_BUILD_TYPE release)
endif() endif()
if (SIST_DEBUG_INFO)
add_compile_definitions(
"SIST_DEBUG_INFO=${SIST_DEBUG_INFO}"
)
endif ()
add_subdirectory(third-party/libscan) add_subdirectory(third-party/libscan)
set(ARGPARSE_SHARED off) set(ARGPARSE_SHARED off)
add_subdirectory(third-party/argparse) add_subdirectory(third-party/argparse)
add_executable( add_executable(sist2
sist2
# argparse
third-party/argparse/argparse.h third-party/argparse/argparse.c
src/main.c src/main.c
src/sist.h src/sist.h
src/io/walk.h src/io/walk.c src/io/walk.h src/io/walk.c
src/io/store.h src/io/store.c
src/tpool.h src/tpool.c src/tpool.h src/tpool.c
src/parsing/parse.h src/parsing/parse.c src/parsing/parse.h src/parsing/parse.c
src/parsing/magic_util.c src/parsing/magic_util.h
src/io/serialize.h src/io/serialize.c src/io/serialize.h src/io/serialize.c
src/parsing/mime.h src/parsing/mime.c src/parsing/mime_generated.c src/parsing/mime.h src/parsing/mime.c src/parsing/mime_generated.c
src/index/web.c src/index/web.h src/index/web.c src/index/web.h
src/web/serve.c src/web/serve.h src/web/serve.c src/web/serve.h
src/web/web_util.c src/web/web_util.h
src/index/elastic.c src/index/elastic.h src/index/elastic.c src/index/elastic.h
src/util.c src/util.h src/util.c src/util.h
src/ctx.c src/ctx.h src/ctx.h src/types.h
src/types.h
src/log.c src/log.h src/log.c src/log.h
src/cli.c src/cli.h src/cli.c src/cli.h
src/stats.c src/stats.h src/ctx.c
src/parsing/sidecar.c src/parsing/sidecar.h src/parsing/sidecar.c src/parsing/sidecar.h
src/database/database.c src/database/database.h
src/parsing/fs_util.h
src/auth0/auth0_c_api.h src/auth0/auth0_c_api.cpp # argparse
third-party/argparse/argparse.h third-party/argparse/argparse.c
src/database/database_stats.c
src/database/database_schema.c
src/database/database_fts.c
src/web/web_fts.c
) )
set_target_properties(sist2 PROPERTIES LINKER_LANGUAGE C)
target_link_directories(sist2 PRIVATE BEFORE ${_VCPKG_INSTALLED_DIR}/${VCPKG_TARGET_TRIPLET}/lib/) target_link_directories(sist2 PRIVATE BEFORE ${_VCPKG_INSTALLED_DIR}/${VCPKG_TARGET_TRIPLET}/lib/)
set(CMAKE_FIND_LIBRARY_SUFFIXES .a .lib) set(CMAKE_FIND_LIBRARY_SUFFIXES .a .lib)
find_package(PkgConfig REQUIRED) find_package(PkgConfig REQUIRED)
pkg_search_module(GLIB REQUIRED glib-2.0)
find_package(lmdb CONFIG REQUIRED)
find_package(cJSON CONFIG REQUIRED) find_package(cJSON CONFIG REQUIRED)
find_package(unofficial-mongoose CONFIG REQUIRED) find_package(unofficial-mongoose CONFIG REQUIRED)
find_package(CURL CONFIG REQUIRED) find_package(CURL CONFIG REQUIRED)
find_library(MAGIC_LIB NAMES libmagic.a REQUIRED) find_library(MAGIC_LIB
find_package(unofficial-sqlite3 CONFIG REQUIRED) NAMES libmagic.so.1 magic
PATHS /usr/lib/x86_64-linux-gnu/ /usr/lib/aarch64-linux-gnu/
)
target_include_directories( target_include_directories(
@ -84,6 +67,7 @@ target_include_directories(
${CMAKE_SOURCE_DIR}/third-party/utf8.h/ ${CMAKE_SOURCE_DIR}/third-party/utf8.h/
${CMAKE_SOURCE_DIR}/third-party/libscan/ ${CMAKE_SOURCE_DIR}/third-party/libscan/
${CMAKE_SOURCE_DIR}/ ${CMAKE_SOURCE_DIR}/
${GLIB_INCLUDE_DIRS}
) )
target_compile_options( target_compile_options(
@ -131,10 +115,8 @@ else ()
PRIVATE PRIVATE
-Ofast -Ofast
# -g
-fno-stack-protector -fno-stack-protector
-fomit-frame-pointer -fomit-frame-pointer
-w
) )
endif () endif ()
@ -148,16 +130,20 @@ target_link_libraries(
sist2 sist2
z z
lmdb
cjson
argparse argparse
${GLIB_LDFLAGS}
unofficial::mongoose::mongoose unofficial::mongoose::mongoose
CURL::libcurl CURL::libcurl
pthread pthread
c
scan scan
${MAGIC_LIB} ${MAGIC_LIB}
unofficial::sqlite3::sqlite3
) )
add_custom_target( add_custom_target(

View File

@ -1,58 +1,28 @@
FROM simon987/sist2-build as build FROM simon987/sist2-build as build
MAINTAINER simon987 <me@simon987.net> MAINTAINER simon987 <me@simon987.net>
ENV DEBIAN_FRONTEND=noninteractive
RUN curl -fsSL https://deb.nodesource.com/setup_16.x | bash
RUN apt update -y; apt install -y nodejs && rm -rf /var/lib/apt/lists/*
WORKDIR /build/ WORKDIR /build/
COPY . .
RUN cmake -DSIST_PLATFORM=x64_linux -DSIST_DEBUG=off -DBUILD_TESTS=off -DCMAKE_TOOLCHAIN_FILE=/vcpkg/scripts/buildsystems/vcpkg.cmake .
RUN make -j$(nproc)
RUN strip sist2 || mv sist2_debug sist2
COPY scripts scripts FROM --platform="linux/amd64" ubuntu:21.10
COPY schema schema
COPY CMakeLists.txt .
COPY third-party third-party
COPY src src
COPY sist2-vue sist2-vue
COPY sist2-admin sist2-admin
RUN cd sist2-vue/ && npm install && npm run build RUN apt update && apt install -y curl libasan5 libmagic1 && rm -rf /var/lib/apt/lists/*
RUN cd sist2-admin/frontend/ && npm install && npm run build
RUN mkdir build && cd build && cmake -DSIST_PLATFORM=x64_linux_docker -DSIST_DEBUG_INFO=on -DSIST_DEBUG=off -DBUILD_TESTS=off -DCMAKE_TOOLCHAIN_FILE=/vcpkg/scripts/buildsystems/vcpkg.cmake .. RUN mkdir -p /usr/share/tessdata && \
RUN cd build && make -j$(nproc) cd /usr/share/tessdata/ && \
RUN strip build/sist2 || mv build/sist2_debug build/sist2 curl -o /usr/share/tessdata/hin.traineddata https://raw.githubusercontent.com/tesseract-ocr/tessdata/master/hin.traineddata &&\
curl -o /usr/share/tessdata/jpn.traineddata https://raw.githubusercontent.com/tesseract-ocr/tessdata/master/jpn.traineddata &&\
FROM --platform="linux/amd64" ubuntu@sha256:965fbcae990b0467ed5657caceaec165018ef44a4d2d46c7cdea80a9dff0d1ea curl -o /usr/share/tessdata/eng.traineddata https://raw.githubusercontent.com/tesseract-ocr/tessdata/master/eng.traineddata &&\
curl -o /usr/share/tessdata/fra.traineddata https://raw.githubusercontent.com/tesseract-ocr/tessdata/master/fra.traineddata &&\
curl -o /usr/share/tessdata/rus.traineddata https://raw.githubusercontent.com/tesseract-ocr/tessdata/master/rus.traineddata &&\
curl -o /usr/share/tessdata/spa.traineddata https://raw.githubusercontent.com/tesseract-ocr/tessdata/master/spa.traineddata
ENTRYPOINT ["/root/sist2"]
ENV LANG C.UTF-8 ENV LANG C.UTF-8
ENV LC_ALL C.UTF-8 ENV LC_ALL C.UTF-8
ENTRYPOINT ["/root/sist2"] COPY --from=build /build/sist2 /root/sist2
RUN apt update && DEBIAN_FRONTEND=noninteractive apt install -y curl libasan5 libmagic1 python3 \
python3-pip git tesseract-ocr && rm -rf /var/lib/apt/lists/*
RUN mkdir -p /usr/share/tessdata && \
cd /usr/share/tessdata/ && \
curl -o /usr/share/tesseract-ocr/4.00/tessdata/hin.traineddata https://raw.githubusercontent.com/tesseract-ocr/tessdata/master/hin.traineddata &&\
curl -o /usr/share/tesseract-ocr/4.00/tessdata/jpn.traineddata https://raw.githubusercontent.com/tesseract-ocr/tessdata/master/jpn.traineddata &&\
curl -o /usr/share/tesseract-ocr/4.00/tessdata/eng.traineddata https://raw.githubusercontent.com/tesseract-ocr/tessdata/master/eng.traineddata &&\
curl -o /usr/share/tesseract-ocr/4.00/tessdata/fra.traineddata https://raw.githubusercontent.com/tesseract-ocr/tessdata/master/fra.traineddata &&\
curl -o /usr/share/tesseract-ocr/4.00/tessdata/rus.traineddata https://raw.githubusercontent.com/tesseract-ocr/tessdata/master/rus.traineddata &&\
curl -o /usr/share/tesseract-ocr/4.00/tessdata/osd.traineddata https://raw.githubusercontent.com/tesseract-ocr/tessdata/master/osd.traineddata &&\
curl -o /usr/share/tesseract-ocr/4.00/tessdata/spa.traineddata https://raw.githubusercontent.com/tesseract-ocr/tessdata/master/spa.traineddata &&\
curl -o /usr/share/tesseract-ocr/4.00/tessdata/deu.traineddata https://raw.githubusercontent.com/tesseract-ocr/tessdata/master/deu.traineddata &&\
curl -o /usr/share/tesseract-ocr/4.00/tessdata/equ.traineddata https://raw.githubusercontent.com/tesseract-ocr/tessdata/master/equ.traineddata &&\
curl -o /usr/share/tesseract-ocr/4.00/tessdata/pol.traineddata https://raw.githubusercontent.com/tesseract-ocr/tessdata/master/pol.traineddata &&\
curl -o /usr/share/tesseract-ocr/4.00/tessdata/chi_sim.traineddata https://raw.githubusercontent.com/tesseract-ocr/tessdata/master/chi_sim.traineddata
# sist2
COPY --from=build /build/build/sist2 /root/sist2
# sist2-admin
WORKDIR /root/sist2-admin
COPY sist2-admin/requirements.txt /root/sist2-admin/
RUN python3 -m pip install --no-cache -r /root/sist2-admin/requirements.txt
COPY --from=build /build/sist2-admin/ /root/sist2-admin/

View File

@ -3,39 +3,26 @@ MAINTAINER simon987 <me@simon987.net>
WORKDIR /build/ WORKDIR /build/
ADD . /build/ ADD . /build/
RUN mkdir build && cd build && cmake -DSIST_PLATFORM=arm64_linux_docker -DSIST_DEBUG_INFO=on -DSIST_DEBUG=off -DBUILD_TESTS=off -DCMAKE_TOOLCHAIN_FILE=/vcpkg/scripts/buildsystems/vcpkg.cmake .. RUN cmake -DSIST_PLATFORM=arm64_linux -DSIST_DEBUG=off -DBUILD_TESTS=off -DCMAKE_TOOLCHAIN_FILE=/vcpkg/scripts/buildsystems/vcpkg.cmake .
RUN cd build && make -j$(nproc) RUN make -j$(nproc)
RUN strip build/sist2 || mv build/sist2_debug build/sist2 RUN strip sist2
FROM --platform=linux/arm64/v8 ubuntu@sha256:537da24818633b45fcb65e5285a68c3ec1f3db25f5ae5476a7757bc8dfae92a3 FROM --platform="linux/arm64/v8" ubuntu:21.10
WORKDIR /root RUN apt update && apt install -y curl libasan5 && rm -rf /var/lib/apt/lists/*
RUN mkdir -p /usr/share/tessdata && \
cd /usr/share/tessdata/ && \
curl -o /usr/share/tessdata/hin.traineddata https://raw.githubusercontent.com/tesseract-ocr/tessdata/master/hin.traineddata &&\
curl -o /usr/share/tessdata/jpn.traineddata https://raw.githubusercontent.com/tesseract-ocr/tessdata/master/jpn.traineddata &&\
curl -o /usr/share/tessdata/eng.traineddata https://raw.githubusercontent.com/tesseract-ocr/tessdata/master/eng.traineddata &&\
curl -o /usr/share/tessdata/fra.traineddata https://raw.githubusercontent.com/tesseract-ocr/tessdata/master/fra.traineddata &&\
curl -o /usr/share/tessdata/rus.traineddata https://raw.githubusercontent.com/tesseract-ocr/tessdata/master/rus.traineddata &&\
curl -o /usr/share/tessdata/spa.traineddata https://raw.githubusercontent.com/tesseract-ocr/tessdata/master/spa.traineddata
ENV LANG C.UTF-8 ENV LANG C.UTF-8
ENV LC_ALL C.UTF-8 ENV LC_ALL C.UTF-8
ENTRYPOINT ["/root/sist2"] ENTRYPOINT ["/root/sist2"]
RUN apt update && apt install -y curl libasan5 libmagic1 tesseract-ocr python3-pip python3 git && rm -rf /var/lib/apt/lists/* COPY --from=build /build/sist2 /root/sist2
RUN mkdir -p /usr/share/tessdata && \
cd /usr/share/tessdata/ && \
curl -o /usr/share/tesseract-ocr/4.00/tessdata/hin.traineddata https://raw.githubusercontent.com/tesseract-ocr/tessdata/master/hin.traineddata &&\
curl -o /usr/share/tesseract-ocr/4.00/tessdata/jpn.traineddata https://raw.githubusercontent.com/tesseract-ocr/tessdata/master/jpn.traineddata &&\
curl -o /usr/share/tesseract-ocr/4.00/tessdata/eng.traineddata https://raw.githubusercontent.com/tesseract-ocr/tessdata/master/eng.traineddata &&\
curl -o /usr/share/tesseract-ocr/4.00/tessdata/fra.traineddata https://raw.githubusercontent.com/tesseract-ocr/tessdata/master/fra.traineddata &&\
curl -o /usr/share/tesseract-ocr/4.00/tessdata/rus.traineddata https://raw.githubusercontent.com/tesseract-ocr/tessdata/master/rus.traineddata &&\
curl -o /usr/share/tesseract-ocr/4.00/tessdata/osd.traineddata https://raw.githubusercontent.com/tesseract-ocr/tessdata/master/osd.traineddata &&\
curl -o /usr/share/tesseract-ocr/4.00/tessdata/spa.traineddata https://raw.githubusercontent.com/tesseract-ocr/tessdata/master/spa.traineddata &&\
curl -o /usr/share/tesseract-ocr/4.00/tessdata/deu.traineddata https://raw.githubusercontent.com/tesseract-ocr/tessdata/master/deu.traineddata &&\
curl -o /usr/share/tesseract-ocr/4.00/tessdata/equ.traineddata https://raw.githubusercontent.com/tesseract-ocr/tessdata/master/equ.traineddata &&\
curl -o /usr/share/tesseract-ocr/4.00/tessdata/pol.traineddata https://raw.githubusercontent.com/tesseract-ocr/tessdata/master/pol.traineddata &&\
curl -o /usr/share/tesseract-ocr/4.00/tessdata/chi_sim.traineddata https://raw.githubusercontent.com/tesseract-ocr/tessdata/master/chi_sim.traineddata
# sist2
COPY --from=build /build/build/sist2 /root/sist2
# sist2-admin
COPY sist2-admin/requirements.txt sist2-admin/
RUN python3 -m pip install --no-cache -r sist2-admin/requirements.txt
COPY --from=build /build/sist2-admin/ sist2-admin/

144
README.md
View File

@ -10,13 +10,13 @@ sist2 (Simple incremental search tool)
*Warning: sist2 is in early development* *Warning: sist2 is in early development*
![search panel](docs/sist2.gif) ![search panel](docs/sist2.png)
## Features ## Features
* Fast, low memory usage, multi-threaded * Fast, low memory usage, multi-threaded
* Manage & schedule scan jobs with simple web interface (Docker only)
* Mobile-friendly Web interface * Mobile-friendly Web interface
* Portable (all its features are packaged in a single executable)
* Extracts text and metadata from common file types \* * Extracts text and metadata from common file types \*
* Generates thumbnails \* * Generates thumbnails \*
* Incremental scanning * Incremental scanning
@ -24,64 +24,47 @@ sist2 (Simple incremental search tool)
* Recursive scan inside archive files \*\* * Recursive scan inside archive files \*\*
* OCR support with tesseract \*\*\* * OCR support with tesseract \*\*\*
* Stats page & disk utilisation visualization * Stats page & disk utilisation visualization
* Named-entity recognition (client-side) \*\*\*\*
\* See [format support](#format-support) \* See [format support](#format-support)
\*\* See [Archive files](#archive-files) \*\* See [Archive files](#archive-files)
\*\*\* See [OCR](#ocr) \*\*\* See [OCR](#ocr)
\*\*\*\* See [Named-Entity Recognition](#NER)
![stats](docs/stats.png)
## Getting Started ## Getting Started
### Using Docker Compose *(Windows/Linux/Mac)* 1. Have an Elasticsearch (>= 6.8.X, ideally >=7.14.0) instance running
```yaml
version: "3"
services:
elasticsearch:
image: elasticsearch:7.17.9
restart: unless-stopped
environment:
- "discovery.type=single-node"
- "ES_JAVA_OPTS=-Xms2g -Xmx2g"
sist2-admin:
image: simon987/sist2:3.1.0-x64-linux
restart: unless-stopped
volumes:
- ./sist2-admin-data/:/sist2-admin/
- /:/host
ports:
- 4090:4090 # sist2
- 8080:8080 # sist2-admin
working_dir: /root/sist2-admin/
entrypoint: python3 /root/sist2-admin/sist2_admin/app.py
```
Navigate to http://localhost:8080/ to configure sist2-admin.
### Using the executable file *(Linux/WSL only)*
1. Choose search backend (See [comparison](#search-backends)):
* **Elasticsearch**: have an Elasticsearch (version >= 6.8.X, ideally >=7.14.0) instance running
1. Download [from official website](https://www.elastic.co/downloads/elasticsearch) 1. Download [from official website](https://www.elastic.co/downloads/elasticsearch)
2. *(or)* Run using docker: 1. *(or)* Run using docker:
```bash ```bash
docker run -d -p 9200:9200 -e "discovery.type=single-node" elasticsearch:7.17.9 docker run -d -p 9200:9200 -e "discovery.type=single-node" elasticsearch:7.14.0
``` ```
* **SQLite**: No installation required 1. *(or)* Run using docker-compose:
```yaml
elasticsearch:
image: docker.elastic.co/elasticsearch/elasticsearch:7.14.0
environment:
- discovery.type=single-node
- "ES_JAVA_OPTS=-Xms1G -Xmx2G"
```
1. Download sist2 executable
1. Download the [latest sist2 release](https://github.com/simon987/sist2/releases).
Select the file corresponding to your CPU architecture and mark the binary as executable with `chmod +x` *
2. *(or)* Download a [development snapshot](https://files.simon987.net/.gate/sist2/simon987_sist2/) *(Not
recommended!)*
3. *(or)* `docker pull simon987/sist2:2.12.1-x64-linux`
2. Download the [latest sist2 release](https://github.com/simon987/sist2/releases). 1. See [Usage guide](docs/USAGE.md)
Select the file corresponding to your CPU architecture and mark the binary as executable with `chmod +x`.
3. See [usage guide](docs/USAGE.md) for command line usage.
Example usage: \* *Windows users*: **sist2** runs under [WSL](https://en.wikipedia.org/wiki/Windows_Subsystem_for_Linux)
1. Scan a directory: `sist2 scan ~/Documents --output ./documents.sist2` ## Example usage
2. Prepare search index:
* **Elasticsearch**: `sist2 index --es-url http://localhost:9200 ./documents.sist2` See [Usage guide](docs/USAGE.md) for more details
* **SQLite**: `sist2 index --search-index ./search.sist2 ./documents.sist2`
3. Start web interface: `sist2 web ./documents.sist2` 1. Scan a directory: `sist2 scan ~/Documents -o ./docs_idx`
1. Push index to Elasticsearch: `sist2 index ./docs_idx`
1. Start web interface: `sist2 web ./docs_idx`
## Format support ## Format support
@ -98,8 +81,8 @@ Example usage:
| html, xml | [libscan](https://github.com/simon987/sist2/tree/master/third-party/libscan) | yes | no | - | | html, xml | [libscan](https://github.com/simon987/sist2/tree/master/third-party/libscan) | yes | no | - |
| tar, zip, rar, 7z, ar ... | Libarchive | yes\* | - | no | | tar, zip, rar, 7z, ar ... | Libarchive | yes\* | - | no |
| docx, xlsx, pptx | [libscan](https://github.com/simon987/sist2/tree/master/third-party/libscan) | yes | if embedded | creator, modified_by, title | | docx, xlsx, pptx | [libscan](https://github.com/simon987/sist2/tree/master/third-party/libscan) | yes | if embedded | creator, modified_by, title |
| doc (MS Word 97-2003) | antiword | yes | no | author, title | | doc (MS Word 97-2003) | antiword | yes | yes | author, title |
| mobi, azw, azw3 | libmobi | yes | yes | author, title | | mobi, azw, azw3 | libmobi | yes | no | author, title |
| wpd (WordPerfect) | libwpd | yes | no | *planned* | | wpd (WordPerfect) | libwpd | yes | no | *planned* |
| json, jsonl, ndjson | [libscan](https://github.com/simon987/sist2/tree/master/third-party/libscan) | yes | - | - | | json, jsonl, ndjson | [libscan](https://github.com/simon987/sist2/tree/master/third-party/libscan) | yes | - | - |
@ -126,7 +109,7 @@ Download the language data files with your package manager (`apt install tessera
directly [from Github](https://github.com/tesseract-ocr/tesseract/wiki/Data-Files). directly [from Github](https://github.com/tesseract-ocr/tesseract/wiki/Data-Files).
The `simon987/sist2` image comes with common languages The `simon987/sist2` image comes with common languages
(hin, jpn, eng, fra, rus, spa, chi_sim, deu, pol) pre-installed. (hin, jpn, eng, fra, rus, spa) pre-installed.
You can use the `+` separator to specify multiple languages. The language You can use the `+` separator to specify multiple languages. The language
name must be identical to the `*.traineddata` file installed on your system name must be identical to the `*.traineddata` file installed on your system
@ -140,80 +123,39 @@ sist2 scan --ocr-images --ocr-lang eng ~/Images/Screenshots/
sist2 scan --ocr-ebooks --ocr-images --ocr-lang eng+chi_sim ~/Chinese-Bilingual/ sist2 scan --ocr-ebooks --ocr-images --ocr-lang eng+chi_sim ~/Chinese-Bilingual/
``` ```
### Search backends
sist2 v3.0.7+ supports SQLite search backend. The SQLite search backend has
fewer features and generally comparable query performance for medium-size
indices, but it uses much less memory and is easier to set up.
| | SQLite | Elasticsearch |
|----------------------------------------------|:----------------------------------------:|:-------------------------------------------------------------------------------------------------------------------------------------:|
| Requires separate search engine installation | | ✓ |
| Memory footprint | ~20MB | >500MB |
| Query syntax | [fts5](https://www.sqlite.org/fts5.html) | [query_string](https://www.elastic.co/guide/en/elasticsearch/reference/current/query-dsl-query-string-query.html#query-string-syntax) |
| Fuzzy search | | ✓ |
| Media Types tree real-time updating | | ✓ |
| Search in file `path` | | ✓ |
| Manual tagging | ✓ | ✓ |
| User scripts | | ✓ |
| Media Type breakdown for search results | | ✓ |
### NER
sist2 v3.0.4+ supports named-entity recognition (NER). Simply add a supported repository URL to
**Configuration** > **Machine learning options** > **Model repositories**
to enable it.
The text processing is done in your browser, no data is sent to any third-party services.
See [simon987/sist2-ner-models](https://github.com/simon987/sist2-ner-models) for more details.
#### List of available repositories:
| URL | Maintainer | Purpose |
|---------------------------------------------------------------------------------------------------------|-----------------------------------------|---------|
| [simon987/sist2-ner-models](https://raw.githubusercontent.com/simon987/sist2-ner-models/main/repo.json) | [simon987](https://github.com/simon987) | General |
<details>
<summary>Screenshot</summary>
![ner](docs/ner.png)
</details>
## Build from source ## Build from source
You can compile **sist2** by yourself if you don't want to use the pre-compiled binaries You can compile **sist2** by yourself if you don't want to use the pre-compiled binaries
### Using docker ### With docker (recommended)
```bash ```bash
git clone --recursive https://github.com/simon987/sist2/ git clone --recursive https://github.com/simon987/sist2/
cd sist2 cd sist2
docker build . -t my-sist2-image docker build . -f ./Dockerfile -t my-sist2-image
# Copy sist2 executable from docker image
docker run --rm --entrypoint cat my-sist2-image /root/sist2 > sist2-x64-linux docker run --rm --entrypoint cat my-sist2-image /root/sist2 > sist2-x64-linux
``` ```
### Using a linux computer ### On a linux computer
1. Install compile-time dependencies 1. Install compile-time dependencies
```bash ```bash
apt install gcc g++ python3 yasm ragel automake autotools-dev wget libtool libssl-dev curl zip unzip tar xorg-dev libglu1-mesa-dev libxcursor-dev libxml2-dev libxinerama-dev gettext nasm git nodejs apt install gcc g++ python3 yasm ragel automake autotools-dev wget libtool libssl-dev curl zip unzip tar xorg-dev libglu1-mesa-dev libxcursor-dev libxml2-dev libxinerama-dev gettext nasm git
``` ```
2. Install vcpkg using my fork: https://github.com/simon987/vcpkg 1. Apply vcpkg patches, as per [sist2-build](https://github.com/simon987/sist2-build) Dockerfile
3. Install vcpkg dependencies
1. Install vcpkg dependencies
```bash ```bash
vcpkg install curl[core,openssl] sqlite3[core,fts5] cpp-jwt pcre cjson brotli libarchive[core,bzip2,libxml2,lz4,lzma,lzo] pthread tesseract libxml2 libmupdf gtest mongoose libmagic libraw gumbo ffmpeg[core,avcodec,avformat,swscale,swresample,webp] vcpkg install curl[core,openssl]
vcpkg install lmdb cjson glib brotli libarchive[core,bzip2,libxml2,lz4,lzma,lzo] pthread tesseract libxml2 libmupdf gtest mongoose libmagic libraw jasper lcms gumbo
``` ```
4. Build 1. Build
```bash ```bash
git clone --recursive https://github.com/simon987/sist2/ git clone --recursive https://github.com/simon987/sist2/
(cd sist2-vue; npm install; npm run build)
(cd sist2-admin/frontend; npm install; npm run build)
cmake -DSIST_DEBUG=off -DCMAKE_TOOLCHAIN_FILE=<VCPKG_ROOT>/scripts/buildsystems/vcpkg.cmake . cmake -DSIST_DEBUG=off -DCMAKE_TOOLCHAIN_FILE=<VCPKG_ROOT>/scripts/buildsystems/vcpkg.cmake .
make make
``` ```

View File

@ -12,7 +12,7 @@ REWRITE_URL=""
sist2 scan \ sist2 scan \
--threads 14 \ --threads 14 \
--mem-throttle 32768 \ --mem-throttle 32768 \
--thumbnail-quality 2 \ --quality 1.0 \
--name $NAME \ --name $NAME \
--ocr-lang=eng+chi_sim \ --ocr-lang=eng+chi_sim \
--ocr-ebooks \ --ocr-ebooks \

View File

@ -12,7 +12,7 @@ REWRITE_URL=""
sist2 scan \ sist2 scan \
--threads 14 \ --threads 14 \
--mem-throttle 32768 \ --mem-throttle 32768 \
--thumbnail-quality 2 \ --quality 1.0 \
--name $NAME \ --name $NAME \
--ocr-lang=eng+chi_sim \ --ocr-lang=eng+chi_sim \
--ocr-ebooks \ --ocr-ebooks \

View File

@ -1,24 +0,0 @@
version: "3"
services:
elasticsearch:
image: elasticsearch:7.17.9
container_name: sist2-es
environment:
- "discovery.type=single-node"
- "ES_JAVA_OPTS=-Xms2g -Xmx2g"
sist2-admin:
build:
context: .
container_name: sist2-admin
volumes:
- /mnt/array/sist2-admin-data/:/sist2-admin/
- /:/host
ports:
- 4090:4090
# NOTE: Don't export this port publicly!
- 8080:8080
working_dir: /root/sist2-admin/
entrypoint: python3
command:
- /root/sist2-admin/sist2_admin/app.py

View File

@ -1,97 +1,154 @@
# Usage # Usage
*More examples (specifically with docker/compose) are in progress*
* [scan](#scan)
* [options](#scan-options)
* [examples](#scan-examples)
* [index format](#index-format)
* [index](#index)
* [options](#index-options)
* [examples](#index-examples)
* [web](#web)
* [options](#web-options)
* [examples](#web-examples)
* [rewrite_url](#rewrite_url)
* [elasticsearch](#elasticsearch)
* [exec-script](#exec-script)
* [tagging](#tagging)
* [sidecar files](#sidecar-files)
``` ```
Usage: sist2 scan [OPTION]... PATH Usage: sist2 scan [OPTION]... PATH
or: sist2 index [OPTION]... INDEX or: sist2 index [OPTION]... INDEX
or: sist2 sqlite-index [OPTION]... INDEX
or: sist2 web [OPTION]... INDEX... or: sist2 web [OPTION]... INDEX...
or: sist2 exec-script [OPTION]... INDEX or: sist2 exec-script [OPTION]... INDEX
Lightning-fast file system indexer and search tool. Lightning-fast file system indexer and search tool.
-h, --help show this help message and exit -h, --help show this help message and exit
-v, --version Print version and exit. -v, --version Show version and exit
--verbose Turn on logging. --verbose Turn on logging
--very-verbose Turn on debug messages. --very-verbose Turn on debug messages
--json-logs Output logs in JSON format.
Scan options Scan options
-t, --threads=<int> Number of threads. DEFAULT: 1 -t, --threads=<int> Number of threads. DEFAULT=1
-q, --thumbnail-quality=<int> Thumbnail quality, on a scale of 0 to 100, 100 being the best. DEFAULT: 50 --mem-throttle=<int> Total memory threshold in MiB for scan throttling. DEFAULT=0
--thumbnail-size=<int> Thumbnail size, in pixels. DEFAULT: 552 -q, --thumbnail-quality=<flt> Thumbnail quality, on a scale of 1.0 to 31.0, 1.0 being the best. DEFAULT=1
--thumbnail-count=<int> Number of thumbnails to generate. Set a value > 1 to create video previews, set to 0 to disable thumbnails. DEFAULT: 1 --thumbnail-size=<int> Thumbnail size, in pixels. DEFAULT=500
--content-size=<int> Number of bytes to be extracted from text documents. Set to 0 to disable. DEFAULT: 32768 --thumbnail-count=<int> Number of thumbnails to generate. Set a value > 1 to create video previews, set to 0 to disable thumbnails. DEFAULT=1
-o, --output=<str> Output index file path. DEFAULT: index.sist2 --content-size=<int> Number of bytes to be extracted from text documents. Set to 0 to disable. DEFAULT=32768
--incremental If the output file path exists, only scan new or modified files. --incremental=<str> Reuse an existing index and only scan modified files.
--optimize-index Defragment index file after scan to reduce its file size. -o, --output=<str> Output directory. DEFAULT=index.sist2/
--rewrite-url=<str> Serve files from this url instead of from disk. --rewrite-url=<str> Serve files from this url instead of from disk.
--name=<str> Index display name. DEFAULT: index --name=<str> Index display name. DEFAULT: (name of the directory)
--depth=<int> Scan up to DEPTH subdirectories deep. Use 0 to only scan files in PATH. DEFAULT: -1 --depth=<int> Scan up to DEPTH subdirectories deep. Use 0 to only scan files in PATH. DEFAULT: -1
--archive=<str> Archive file mode (skip|list|shallow|recurse). skip: don't scan, list: only save file names as text, shallow: don't scan archives inside archives. DEFAULT: recurse --archive=<str> Archive file mode (skip|list|shallow|recurse). skip: Don't parse, list: only get file names as text, shallow: Don't parse archives inside archives. DEFAULT: recurse
--archive-passphrase=<str> Passphrase for encrypted archive files --archive-passphrase=<str> Passphrase for encrypted archive files
--ocr-lang=<str> Tesseract language (use 'tesseract --list-langs' to see which are installed on your machine) --ocr-lang=<str> Tesseract language (use 'tesseract --list-langs' to see which are installed on your machine)
--ocr-images Enable OCR'ing of image files. --ocr-images Enable OCR'ing of image files.
--ocr-ebooks Enable OCR'ing of ebook files. --ocr-ebooks Enable OCR'ing of ebook files.
-e, --exclude=<str> Files that match this regex will not be scanned. -e, --exclude=<str> Files that match this regex will not be scanned
--fast Only index file names & mime type. --fast Only index file names & mime type
--treemap-threshold=<str> Relative size threshold for treemap (see USAGE.md). DEFAULT: 0.0005 --treemap-threshold=<str> Relative size threshold for treemap (see USAGE.md). DEFAULT: 0.0005
--mem-buffer=<int> Maximum memory buffer size per thread in MiB for files inside archives (see USAGE.md). DEFAULT: 2000 --mem-buffer=<int> Maximum memory buffer size per thread in MiB for files inside archives (see USAGE.md). DEFAULT: 2000
--read-subtitles Read subtitles from media files. --read-subtitles Read subtitles from media files.
--fast-epub Faster but less accurate EPUB parsing (no thumbnails, metadata). --fast-epub Faster but less accurate EPUB parsing (no thumbnails, metadata)
--checksums Calculate file checksums when scanning. --checksums Calculate file checksums when scanning.
--list-file=<str> Specify a list of newline-delimited paths to be scanned instead of normal directory traversal. Use '-' to read from stdin. --list-file=<str> Specify a list of newline-delimited paths to be scanned instead of normal directory traversal. Use '-' to read from stdin.
Index options Index options
-t, --threads=<int> Number of threads. DEFAULT: 1 -t, --threads=<int> Number of threads. DEFAULT=1
--es-url=<str> Elasticsearch url with port. DEFAULT: http://localhost:9200 --es-url=<str> Elasticsearch url with port. DEFAULT=http://localhost:9200
--es-insecure-ssl Do not verify SSL connections to Elasticsearch. --es-index=<str> Elasticsearch index name. DEFAULT=sist2
--es-index=<str> Elasticsearch index name. DEFAULT: sist2 -p, --print Just print JSON documents to stdout.
-p, --print Print JSON documents to stdout instead of indexing to elasticsearch. --incremental-index Conduct incremental indexing, assumes that the old index is already digested by Elasticsearch.
--incremental-index Conduct incremental indexing. Assumes that the old index is already ingested in Elasticsearch.
--script-file=<str> Path to user script. --script-file=<str> Path to user script.
--mappings-file=<str> Path to Elasticsearch mappings. --mappings-file=<str> Path to Elasticsearch mappings.
--settings-file=<str> Path to Elasticsearch settings. --settings-file=<str> Path to Elasticsearch settings.
--async-script Execute user script asynchronously. --async-script Execute user script asynchronously.
--batch-size=<int> Index batch size. DEFAULT: 70 --batch-size=<int> Index batch size. DEFAULT: 100
-f, --force-reset Reset Elasticsearch mappings and settings. -f, --force-reset Reset Elasticsearch mappings and settings. (You must use this option the first time you use the index command)
sqlite-index options
--search-index=<str> Path to search index. Will be created if it does not exist yet.
Web options Web options
--es-url=<str> Elasticsearch url. DEFAULT: http://localhost:9200 --es-url=<str> Elasticsearch url. DEFAULT=http://localhost:9200
--es-insecure-ssl Do not verify SSL connections to Elasticsearch. --es-index=<str> Elasticsearch index name. DEFAULT=sist2
--search-index=<str> Path to SQLite search index. --bind=<str> Listen on this address. DEFAULT=localhost:4090
--es-index=<str> Elasticsearch index name. DEFAULT: sist2
--bind=<str> Listen for connections on this address. DEFAULT: localhost:4090
--auth=<str> Basic auth in user:password format --auth=<str> Basic auth in user:password format
--auth0-audience=<str> API audience/identifier
--auth0-domain=<str> Application domain
--auth0-client-id=<str> Application client ID
--auth0-public-key-file=<str> Path to Auth0 public key file extracted from <domain>/pem
--tag-auth=<str> Basic auth in user:password format for tagging --tag-auth=<str> Basic auth in user:password format for tagging
--tagline=<str> Tagline in navbar --tagline=<str> Tagline in navbar
--dev Serve html & js files from disk (for development) --dev Serve html & js files from disk (for development)
--lang=<str> Default UI language. Can be changed by the user --lang=<str> Default UI language. Can be changed by the user
Exec-script options Exec-script options
--es-url=<str> Elasticsearch url. DEFAULT: http://localhost:9200 --es-url=<str> Elasticsearch url. DEFAULT=http://localhost:9200
--es-insecure-ssl Do not verify SSL connections to Elasticsearch. --es-index=<str> Elasticsearch index name. DEFAULT=sist2
--es-index=<str> Elasticsearch index name. DEFAULT: sist2
--script-file=<str> Path to user script. --script-file=<str> Path to user script.
--async-script Execute user script asynchronously. --async-script Execute user script asynchronously.
Made by simon987 <me@simon987.net>. Released under GPL-3.0 Made by simon987 <me@simon987.net>. Released under GPL-3.0
``` ```
#### Thumbnail database size estimation ## Scan
See chart below for rough estimate of thumbnail size vs. thumbnail size & quality arguments: ### Scan options
For example, `--thumbnail-size=500`, `--thumbnail-quality=50` for a directory with 8 million images will create a thumbnail database * `-t, --threads`
that is about `8000000 * 11.8kB = 94.4GB`. Number of threads for file parsing. **Do not set a number higher than `$(nproc)` or `$(Get-CimInstance Win32_ComputerSystem).NumberOfLogicalProcessors` in Windows!**
* `--mem-throttle`
Total memory threshold in MiB for scan throttling. Worker threads will not start a new parse job
until the total memory usage of sist2 is below this threshold. Set to 0 to disable. DEFAULT=0
* `-q, --thumbnail-quality`
Thumbnail quality, on a scale of 1.0 to 31.0, 1.0 being the best.
* `--thumbnail-size`
Thumbnail size in pixels.
* `--thumbnail-count`
Maximum number of thumbnails to generate. When set to a value >= 2, thumbnails for video previews
will be generated. The actual number of thumbnails generated depends on the length of the video (maximum 1 image
every ~7s). Set to 0 to completely disable thumbnails.
* `--content-size`
Number of bytes of text to be extracted from the content of files (plain text, PDFs etc.).
Repeated whitespace and special characters do not count toward this limit.
Set to 0 to completely disable content parsing.
* `--incremental`
Specify an existing index. Information about files in this index that were not modified (based on *mtime* attribute)
will be copied to the new index and will not be parsed again.
* `-o, --output` Output directory.
* `--rewrite-url` Set the `rewrite_url` option for the web module (See [rewrite_url](#rewrite_url))
* `--name` Set the `name` option for the web module
* `--depth` Maximum scan dept. Set to 0 only scan files directly in the root directory, set to -1 for infinite depth
* `--archive` Archive file mode.
* skip: Don't parse
* list: Only get file names as text
* shallow: Don't parse archives inside archives.
* recurse: Scan archives recursively (default)
* `--ocr-lang`, `--ocr-ebooks`, `--ocr-images` See [OCR](../README.md#OCR)
* `-e, --exclude` Regex pattern to exclude files. A file is excluded if the pattern matches any
part of the full absolute path.
![thumbnail_size](thumbnail_size.png) Examples:
* `-e ".*\.ttf"`: Ignore ttf files
* `-e ".*\.(ttf|rar)"`: Ignore ttf and rar files
* `-e "^/mnt/backups/"`: Ignore all files in the `/mnt/backups/` directory
* `-e "^/mnt/Data[12]/"`: Ignore all files in the `/mnt/Data1/` and `/mnt/Data2/` directory
* `-e "(^/usr/)|(^/var/)|(^/media/DRIVE-A/tmp/)|(^/media/DRIVE-B/Trash/)"` Exclude the
`/usr`, `/var`, `/media/DRIVE-A/tmp`, `/media/DRIVE-B/Trash` directories
* `--fast` Only index file names and mime type
* `--treemap-threshold` Directories smaller than (`treemap-threshold` * `<total size of the index>`)
will not be considered for the disk utilisation visualization; their size will be added to
the parent directory. If the parent directory is still smaller than the threshold, it will also be "merged upwards"
and so on.
In effect, smaller `treemap-threshold` values will yield a more detailed
(but also a more cluttered and harder to read) visualization.
* `--mem-buffer` Maximum memory buffer size in MiB (per thread) for files inside archives. Media files
larger than this number will be read sequentially and no *seek* operations will be supported.
To check if a media file can be parsed without *seek*, execute `cat file.mp4 | ffprobe -`
* `--read-subtitles` When enabled, will attempt to read the subtitles stream from media files.
* `--fast-epub` Much faster but less accurate EPUB parsing. When enabled, sist2 will use a simple HTML parser to read epub files instead of the MuPDF library. No thumbnails are generated and author/title metadata are not parsed.
* `--checksums` Calculate file checksums (SHA1) when scanning files. This option does not cause any additional read
operations. Checksums are not calculated for all file types, unless the file is inside an archive. When enabled, duplicate
files are hidden in the web UI (this behaviour can be toggled in the Configuration page).
### Scan examples ### Scan examples
@ -100,70 +157,131 @@ Simple scan
sist2 scan ~/Documents sist2 scan ~/Documents
sist2 scan \ sist2 scan \
--threads 4 --content-size 16000000 --thumbnail-quality 2 --archive shallow \ --threads 4 --content-size 16000000 --quality 1.0 --archive shallow \
--name "My Documents" --rewrite-url "http://nas.domain.local/My Documents/" \ --name "My Documents" --rewrite-url "http://nas.domain.local/My Documents/" \
~/Documents -o ./documents.sist2 ~/Documents -o ./documents.idx/
``` ```
Incremental scan Incremental scan
```
If the index file does not exist, `--incremental` has no effect. sist2 scan --incremental ./orig_idx/ -o ./updated_idx/ ~/Documents
```bash
sist scan ~/Documents -o ./documents.sist2
sist scan ~/Documents -o ./documents.sist2 --incremental
# or
sist scan ~/Documents -o ./documents.sist2 --incremental
sist scan ~/Documents -o ./documents.sist2 --incremental
``` ```
### Index documents to Elasticsearch search backend ### Index format
```bash A typical `ndjson` type index structure looks like this:
sist2 index --force-reset --batch-size 1000 --es-url http://localhost:9200 ./my_index.sist2 ```
sist2 index ./my_index.sist2 documents.idx/
├── descriptor.json
├── _index_main.ndjson.zst
├── treemap.csv
├── agg_mime.csv
├── agg_date.csv
├── add_size.csv
├── thumbs/
| ├── data.mdb
| └── lock.mdb
├── tags/
| ├── data.mdb
| └── lock.mdb
└── meta/
├── data.mdb
└── lock.mdb
``` ```
#### Index documents to SQLite search backend The `_index_*.ndjson.zst` files contain the document data in JSON format, in a compressed newline-delemited file.
The `thumbs/` folder is a [LMDB](https://en.wikipedia.org/wiki/Lightning_Memory-Mapped_Database)
database containing the thumbnails.
The `descriptor.json` file contains general information about the index. The
following fields are safe to modify manually: `root`, `name`, [rewrite_url](#rewrite_url) and `timestamp`.
The `.csv` are pre-computed aggregations necessary for the stats page.
*thumbs/*:
LMDB key-value store. Keys are **binary** 16-byte md5 hash* (`_id` field)
and values are raw image bytes.
*\* Hash is calculated from the full path of the file, including the extension, relative to the index root*
## Index
### Index options
* `--es-url`
Elasticsearch url and port. If you are using docker, make sure that both containers are on the
same network.
* `--es-index`
Elasticsearch index name. DEFAULT=sist2
* `-p, --print`
Print index in JSON format to stdout.
* `--incremental-index`
Conduct incremental indexing. Assumes that the old index is already ingested in Elasticsearch.
Only the new changes since the last scan will be sent.
* `--script-file`
Path to user script. See [Scripting](scripting.md).
* `--mappings-file`
Path to custom Elasticsearch mappings. If none is specified, [the bundled mappings](https://github.com/simon987/sist2/tree/master/schema) will be used.
* `--settings-file`
Path to custom Elasticsearch settings. *(See above)*
* `--async-script`
Use `wait_for_completion=false` elasticsearch option while executing user script.
(See [Elasticsearch documentation](https://www.elastic.co/guide/en/elasticsearch/reference/current/tasks.html))
* `--batch-size=<int>`
Index batch size. Indexing is generally faster with larger batches, but payloads that
are too large will fail and additional overhead for retrying with smaller sizes may slow
down the process.
* `-f, --force-reset`
Reset Elasticsearch mappings and settings.
* `-t, --threads` Number of threads to use. Ideally, choose a number equal to the number of logical cores of the machine hosting Elasticsearch.
### Index examples
**Push to elasticsearch**
```bash ```bash
# The search index will be created if it does not exist already sist2 index --force-reset --batch-size 1000 --es-url http://localhost:9200 ./my_index/
sist2 sqlite-index ./index1.sist2 --search-index search.sist2 sist2 index ./my_index/
sist2 sqlite-index ./index2.sist2 --search-index search.sist2
``` ```
**Save index in JSON format** **Save index in JSON format**
```bash ```bash
sist2 index --print ./my_index.sist2 > my_index.ndjson sist2 index --print ./my_index/ > my_index.ndjson
``` ```
**Inspect contents of an index** **Inspect contents of an index**
```bash ```bash
sist2 index --print ./my_index.sist2 | jq | less sist2 index --print ./my_index/ | jq | less
``` ```
## Web ## Web
### Web options
* `--es-url=<str>` Elasticsearch url.
* `--es-index`
Elasticsearch index name. DEFAULT=sist2
* `--bind=<str>` Listen on this address.
* `--auth=<str>` Basic auth in user:password format
* `--tag-auth=<str>` Basic auth in user:password format. Works the same way as the
`--auth` argument, but authentication is only applied the `/tag/` endpoint.
* `--tagline=<str>` When specified, will replace the default tagline in the navbar.
* `--dev` Serve html & js files from disk (for development, used to modify frontend files without having to recompile)
* `--lang=<str>` Set the default web UI language (See #180 for a list of supported languages, default
is `en`). The user can change the language in the configuration page
### Web examples ### Web examples
**Single index (Elasticsearch backend)** **Single index**
```bash ```bash
sist2 web --auth admin:hunter2 --bind 0.0.0.0:8888 my_index.sist2 sist2 web --auth admin:hunter2 --bind 0.0.0.0:8888 my_index
``` ```
**Multiple indices (Elasticsearch backend)** **Multiple indices**
```bash ```bash
# Indices will be displayed in this order in the web interface # Indices will be displayed in this order in the web interface
sist2 web index1.sist2 index2.sist2 index3.sist2 index4.sist2 sist2 web index1 index2 index3 index4
``` ```
**SQLite search backend**
```bash
sist2 web --search-index search.sist2 index1.sist2
```
#### Auth0 authentication
See [auth0.md](auth0.md)
### rewrite_url ### rewrite_url
When the `rewrite_url` field is not empty, the web module ignores the `root` When the `rewrite_url` field is not empty, the web module ignores the `root`
@ -245,8 +363,8 @@ The sidecar file must have exactly the same file path and the `.s2meta` suffix.
``` ```
``` ```
sist2 scan ~/Documents -o ./docs.sist2 sist2 scan ~/Documents -o ./docs.idx
sist2 index ./docs.sist2 sist2 index ./docs.idx
``` ```
*NOTE*: It is technically possible to overwrite the `tag` value using sidecar files, however, *NOTE*: It is technically possible to overwrite the `tag` value using sidecar files, however,

View File

@ -1,19 +0,0 @@
# Authentication with Auth0
1. Create a new Auth0 application (Single page app)
2. Create a new Auth0 API:
1. Choose `RS256` signing algorithm
2. Set identifier (audience) to `https://sist2`
3. Download the Auth0 certificate from https://<domain>.auth0.com/pem (you can find the domain Applications->Basic information)
4. Extract the public key from the certificate using `openssl x509 -pubkey -noout -in cert.pem > pubkey.txt`
5. Start the sist2 web server
Example options:
```bash
sist2 web \
--auth0-client-id XXX \
--auth0-audience https://sist2 \
--auth0-domain YYY.auth0.com \
--auth0-public-key-file /ZZZ/pubkey.txt
```

Binary file not shown.

Before

Width:  |  Height:  |  Size: 448 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 3.7 MiB

BIN
docs/sist2.png Normal file

Binary file not shown.

After

Width:  |  Height:  |  Size: 1011 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 169 KiB

View File

@ -67,8 +67,7 @@
"index": false "index": false
}, },
"mtime": { "mtime": {
"type": "date", "type": "integer"
"format": "epoch_millis"
}, },
"size": { "size": {
"type": "long" "type": "long"

View File

@ -1,8 +1,6 @@
#!/usr/bin/env bash #!/usr/bin/env bash
( rm -rf index.sist2/
cd ..
rm -rf index.sist2
python3 scripts/mime.py > src/parsing/mime_generated.c python3 scripts/mime.py > src/parsing/mime_generated.c
python3 scripts/serve_static.py > src/web/static_generated.c python3 scripts/serve_static.py > src/web/static_generated.c
@ -10,4 +8,3 @@
python3 scripts/magic_static.py > src/magic_generated.c python3 scripts/magic_static.py > src/magic_generated.c
printf "static const char *const Sist2CommitHash = \"%s\";\n" $(git rev-parse HEAD) > src/git_hash.h printf "static const char *const Sist2CommitHash = \"%s\";\n" $(git rev-parse HEAD) > src/git_hash.h
)

View File

@ -4,20 +4,14 @@ VCPKG_ROOT="/vcpkg"
git submodule update --init --recursive git submodule update --init --recursive
mkdir build rm -rf CMakeFiles CMakeCache.txt
( cmake -DSIST_PLATFORM=x64_linux -DSIST_DEBUG=off -DBUILD_TESTS=off -DCMAKE_TOOLCHAIN_FILE="${VCPKG_ROOT}/scripts/buildsystems/vcpkg.cmake" .
cd build
cmake -DSIST_PLATFORM=x64_linux -DSIST_DEBUG_INFO=on -DSIST_DEBUG=off -DBUILD_TESTS=off -DCMAKE_TOOLCHAIN_FILE="${VCPKG_ROOT}/scripts/buildsystems/vcpkg.cmake" ..
make -j $(nproc) make -j $(nproc)
strip sist2 strip sist2
./sist2 -v > VERSION ./sist2 -v > VERSION
) mv sist2 sist2-x64-linux
mv build/sist2 sist2-x64-linux
(
cd build
rm -rf CMakeFiles CMakeCache.txt rm -rf CMakeFiles CMakeCache.txt
cmake -DSIST_PLATFORM=x64_linux -DSIST_DEBUG_INFO=on -DSIST_DEBUG=on -DBUILD_TESTS=off -DCMAKE_TOOLCHAIN_FILE="${VCPKG_ROOT}/scripts/buildsystems/vcpkg.cmake" .. cmake -DSIST_PLATFORM=x64_linux -DSIST_DEBUG=on -DBUILD_TESTS=off -DCMAKE_TOOLCHAIN_FILE="${VCPKG_ROOT}/scripts/buildsystems/vcpkg.cmake" .
make -j $(nproc) make -j $(nproc)
) mv sist2_debug sist2-x64-linux-debug
mv build/sist2_debug sist2-x64-linux-debug

View File

@ -4,19 +4,14 @@ VCPKG_ROOT="/vcpkg"
git submodule update --init --recursive git submodule update --init --recursive
mkdir build rm -rf CMakeFiles CMakeCache.txt
( cmake -DSIST_PLATFORM=arm64_linux -DSIST_DEBUG=off -DBUILD_TESTS=off -DCMAKE_TOOLCHAIN_FILE="${VCPKG_ROOT}/scripts/buildsystems/vcpkg.cmake" .
cd build
cmake -DSIST_PLATFORM=arm64_linux -DSIST_DEBUG_INFO=on -DSIST_DEBUG=off -DBUILD_TESTS=off -DCMAKE_TOOLCHAIN_FILE="${VCPKG_ROOT}/scripts/buildsystems/vcpkg.cmake" ..
make -j $(nproc) make -j $(nproc)
strip sist2 strip sist2
) mv sist2 sist2-arm64-linux
mv build/sist2 sist2-arm64-linux
rm -rf CMakeFiles CMakeCache.txt rm -rf CMakeFiles CMakeCache.txt
( cmake -DSIST_PLATFORM=arm64_linux -DSIST_DEBUG=on -DBUILD_TESTS=off -DCMAKE_TOOLCHAIN_FILE="${VCPKG_ROOT}/scripts/buildsystems/vcpkg.cmake" .
cd build
cmake -DSIST_PLATFORM=arm64_linux -DSIST_DEBUG_INFO=on -DSIST_DEBUG=on -DBUILD_TESTS=off -DCMAKE_TOOLCHAIN_FILE="${VCPKG_ROOT}/scripts/buildsystems/vcpkg.cmake" ..
make -j $(nproc) make -j $(nproc)
) strip sist2
mv build/sist2_debug sist2-arm64-linux-debug mv sist2_debug sist2-arm64-linux-debug

View File

@ -1,4 +1,3 @@
application/x-matlab-data,mat
application/arj, arj application/arj, arj
application/base64, mme application/base64, mme
application/binhex, hqx application/binhex, hqx
@ -30,7 +29,7 @@ application/mime, aps
application/mspowerpoint, ppz application/mspowerpoint, ppz
application/msword, doc|dot|w6w|wiz|word application/msword, doc|dot|w6w|wiz|word
application/netmc, mcp application/netmc, mcp
application/octet-stream, bin|dump|gpg|pack|idx application/octet-stream, bin|dump|gpg
application/oda, oda application/oda, oda
application/ogg, ogv application/ogg, ogv
application/pdf, pdf application/pdf, pdf
@ -244,7 +243,7 @@ audio/make, funk|my|pfunk
audio/midi, kar audio/midi, kar
audio/mid, rmi audio/mid, rmi
audio/mp4, m4b audio/mp4, m4b
audio/mpeg, m2a|mpa|mpga audio/mpeg, m2a|mpa
audio/ogg, ogg audio/ogg, ogg
audio/s3m, s3m audio/s3m, s3m
audio/tsp-audio, tsi audio/tsp-audio, tsi
@ -347,8 +346,6 @@ text/mcf, mcf
text/pascal, pas text/pascal, pas
text/PGP, text/PGP,
text/plain, com|cmd|conf|def|g|idc|list|lst|mar|sdml|text|txt|md|groovy|license|properties|desktop|ini|rst|cmake|ipynb|readme|less|lo|go|yml|d|cs|hpp|srt|nfo|sfv|m3u|csv|eml|make|log|markdown|yaml text/plain, com|cmd|conf|def|g|idc|list|lst|mar|sdml|text|txt|md|groovy|license|properties|desktop|ini|rst|cmake|ipynb|readme|less|lo|go|yml|d|cs|hpp|srt|nfo|sfv|m3u|csv|eml|make|log|markdown|yaml
text/x-script.python, pyx
text/csv,
application/vnd.coffeescript, coffee application/vnd.coffeescript, coffee
text/richtext, rt|rtf|rtx text/richtext, rt|rtf|rtx
text/rtf, text/rtf,
@ -385,7 +382,7 @@ text/x-pascal, p
text/x-perl, pl text/x-perl, pl
text/x-php, php text/x-php, php
text/x-po, po text/x-po, po
text/x-python, py|pyi text/x-python, py
text/x-ruby, rb text/x-ruby, rb
text/x-sass, sass text/x-sass, sass
text/x-scss, scss text/x-scss, scss

1 application/x-matlab-data application/arj mat arj
application/x-matlab-data mat
1 application/arj application/arj arj arj
2 application/base64 application/base64 mme mme
3 application/binhex application/binhex hqx hqx
29 application/mspowerpoint application/mspowerpoint ppz ppz
30 application/msword application/msword doc|dot|w6w|wiz|word doc|dot|w6w|wiz|word
31 application/netmc application/netmc mcp mcp
32 application/octet-stream application/octet-stream bin|dump|gpg|pack|idx bin|dump|gpg
33 application/oda application/oda oda oda
34 application/ogg application/ogg ogv ogv
35 application/pdf application/pdf pdf pdf
243 audio/midi audio/midi kar kar
244 audio/mid audio/mid rmi rmi
245 audio/mp4 audio/mp4 m4b m4b
246 audio/mpeg audio/mpeg m2a|mpa|mpga m2a|mpa
247 audio/ogg audio/ogg ogg ogg
248 audio/s3m audio/s3m s3m s3m
249 audio/tsp-audio audio/tsp-audio tsi tsi
346 text/pascal text/pascal pas pas
347 text/PGP text/PGP
348 text/plain text/plain com|cmd|conf|def|g|idc|list|lst|mar|sdml|text|txt|md|groovy|license|properties|desktop|ini|rst|cmake|ipynb|readme|less|lo|go|yml|d|cs|hpp|srt|nfo|sfv|m3u|csv|eml|make|log|markdown|yaml com|cmd|conf|def|g|idc|list|lst|mar|sdml|text|txt|md|groovy|license|properties|desktop|ini|rst|cmake|ipynb|readme|less|lo|go|yml|d|cs|hpp|srt|nfo|sfv|m3u|csv|eml|make|log|markdown|yaml
text/x-script.python pyx
text/csv
349 application/vnd.coffeescript application/vnd.coffeescript coffee coffee
350 text/richtext text/richtext rt|rtf|rtx rt|rtf|rtx
351 text/rtf text/rtf
382 text/x-perl text/x-perl pl pl
383 text/x-php text/x-php php php
384 text/x-po text/x-po po po
385 text/x-python text/x-python py|pyi py
386 text/x-ruby text/x-ruby rb rb
387 text/x-sass text/x-sass sass sass
388 text/x-scss text/x-scss scss scss

View File

@ -1,5 +1,3 @@
import zlib
mimes = {} mimes = {}
noparse = set() noparse = set()
ext_in_hash = set() ext_in_hash = set()
@ -137,40 +135,24 @@ def clean(t):
return t.replace("/", "_").replace(".", "_").replace("+", "_").replace("-", "_") return t.replace("/", "_").replace(".", "_").replace("+", "_").replace("-", "_")
def crc(s):
return zlib.crc32(s.encode()) & 0xffffffff
with open("scripts/mime.csv") as f: with open("scripts/mime.csv") as f:
for l in f: for l in f:
mime, ext_list = l.split(",") mime, ext_list = l.split(",")
if l.startswith("!"): if l.startswith("!"):
mime = mime[1:] mime = mime[1:]
noparse.add(mime) noparse.add(mime)
ext = [x.strip() for x in ext_list.split("|") if x.strip() != ""] ext = [x.strip() for x in ext_list.split("|")]
mimes[mime] = ext mimes[mime] = ext
seen_crc = set()
for ext in mimes.values():
for e in ext:
if crc(e) in seen_crc:
raise Exception("CRC32 collision")
seen_crc.add(crc(e))
seen_crc = set()
for mime in mimes.keys():
if crc(mime) in seen_crc:
raise Exception("CRC32 collision")
seen_crc.add(crc(mime))
print("// **Generated by mime.py**") print("// **Generated by mime.py**")
print("#ifndef MIME_GENERATED_C") print("#ifndef MIME_GENERATED_C")
print("#define MIME_GENERATED_C") print("#define MIME_GENERATED_C")
print("#include <glib.h>\n")
print("#include <stdlib.h>\n") print("#include <stdlib.h>\n")
# Enum # Enum
print("enum mime {") print("enum mime {")
for mime, ext in sorted(mimes.items()): for mime, ext in sorted(mimes.items()):
print(f"{clean(mime)}={mime_id(mime)},") print(" " + clean(mime) + "=" + mime_id(mime) + ",")
print("};") print("};")
# Enum -> string # Enum -> string
@ -181,20 +163,20 @@ with open("scripts/mime.csv") as f:
print("default: return NULL;}}") print("default: return NULL;}}")
# Ext -> Enum # Ext -> Enum
print("unsigned int mime_extension_lookup(unsigned long extension_crc32) {" print("GHashTable *mime_get_ext_table() {"
"switch (extension_crc32) {") "GHashTable *ext_table = g_hash_table_new(g_str_hash, g_str_equal);")
for mime, ext in mimes.items(): for mime, ext in mimes.items():
if len(ext) > 0: for e in [e for e in ext if e]:
for e in ext: print("g_hash_table_insert(ext_table, \"" + e + "\", (gpointer)" + clean(mime) + ");")
print(f"case {crc(e)}:", end="") if e in ext_in_hash:
print(f"return {clean(mime)};") raise Exception("extension already in hash: " + e)
print("default: return 0;}}") ext_in_hash.add(e)
print("return ext_table;}")
# string -> Enum # string -> Enum
print("unsigned int mime_name_lookup(unsigned long mime_crc32) {" print("GHashTable *mime_get_mime_table() {"
"switch (mime_crc32) {") "GHashTable *mime_table = g_hash_table_new(g_str_hash, g_str_equal);")
for mime in mimes.keys(): for mime, ext in mimes.items():
print(f"case {crc(mime)}: return {clean(mime)};") print("g_hash_table_insert(mime_table, \"" + mime + "\", (gpointer)" + clean(mime) + ");")
print("return mime_table;}")
print("default: return 0;}}")
print("#endif") print("#endif")

6
scripts/reset.sh Executable file
View File

@ -0,0 +1,6 @@
#!/usr/bin/env bash
make clean
rm -rf CMakeFiles/ CMakeCache.txt Makefile \
third-party/libscan/CMakeFiles third-party/libscan/CMakeCache.txt third-party/libscan/third-party/ext_ffmpeg \
third-party/libscan/third-party/ext_libmobi third-party/libscan/Makefile

View File

@ -1,84 +0,0 @@
#include <sqlite3ext.h>
#include <string.h>
#include <stdlib.h>
SQLITE_EXTENSION_INIT1
static int sep_rfind(const char *str) {
for (int i = (int) strlen(str); i >= 0; i--) {
if (str[i] == '/') {
return i;
}
}
return -1;
}
void path_parent_func(sqlite3_context *ctx, int argc, sqlite3_value **argv) {
if (argc != 1 || sqlite3_value_type(argv[0]) != SQLITE_TEXT) {
sqlite3_result_error(ctx, "Invalid parameters", -1);
}
const char *value = (const char *) sqlite3_value_text(argv[0]);
int stop = sep_rfind(value);
if (stop == -1) {
sqlite3_result_null(ctx);
return;
}
char parent[4096 * 3];
strncpy(parent, value, stop);
sqlite3_result_text(ctx, parent, stop, SQLITE_TRANSIENT);
}
void random_func(sqlite3_context *ctx, int argc, sqlite3_value **argv) {
if (argc != 1 || sqlite3_value_type(argv[0]) != SQLITE_INTEGER) {
sqlite3_result_error(ctx, "Invalid parameters", -1);
}
char state_buf[32] = {0,};
struct random_data buf;
int result;
long seed = sqlite3_value_int64(argv[0]);
initstate_r((int) seed, state_buf, sizeof(state_buf), &buf);
random_r(&buf, &result);
sqlite3_result_int(ctx, result);
}
int sqlite3_extension_init(
sqlite3 *db,
char **pzErrMsg,
const sqlite3_api_routines *pApi
) {
SQLITE_EXTENSION_INIT2(pApi);
sqlite3_create_function(
db,
"path_parent",
1,
SQLITE_UTF8,
NULL,
path_parent_func,
NULL,
NULL
);
sqlite3_create_function(
db,
"random_seeded",
1,
SQLITE_UTF8,
NULL,
random_func,
NULL,
NULL
);
return SQLITE_OK;
}

View File

@ -1 +0,0 @@
gcc -I/mnt/work/vcpkg/installed/x64-linux/include -g -fPIC -shared sqlite_extension.c -o sist2funcs.so

View File

@ -1,3 +1,3 @@
docker run --rm -it --name "sist2-dev-es"\ docker run --rm -it --name "sist2-dev-es"\
-p 9200:9200 -e "discovery.type=single-node" \ -p 9200:9200 -e "discovery.type=single-node" \
-e "ES_JAVA_OPTS=-Xms8g -Xmx8g" elasticsearch:7.17.9 -e "ES_JAVA_OPTS=-Xms8g -Xmx8g" elasticsearch:7.14.0

View File

@ -1,3 +1,3 @@
docker run --rm -it --name "sist2-dev-es"\ docker run --rm -it --name "sist2-dev-es"\
-p 9200:9200 -p 9300:9300 -e "discovery.type=single-node" \ -p 9200:9200 -p 9300:9300 -e "discovery.type=single-node" \
-e "ES_JAVA_OPTS=-Xms8g -Xmx8g" elasticsearch:8.7.0 -e "ES_JAVA_OPTS=-Xms8g -Xmx8g" elasticsearch:8.1.2

View File

@ -1,5 +0,0 @@
module.exports = {
presets: [
'@vue/cli-plugin-babel/preset'
]
}

File diff suppressed because it is too large Load Diff

View File

@ -1,49 +0,0 @@
{
"name": "sist2-admin-vue",
"version": "0.1.0",
"private": true,
"scripts": {
"serve": "vue-cli-service serve",
"build": "vue-cli-service build",
"watch": "vue-cli-service build --watch"
},
"dependencies": {
"axios": "^0.27.2",
"bootstrap-vue": "^2.21.2",
"core-js": "^3.6.5",
"moment": "^2.29.3",
"socket.io-client": "^4.5.1",
"vue": "^2.6.14",
"vue-i18n": "^8.24.4",
"vue-router": "^3.5.4",
"vuex": "^3.4.0"
},
"devDependencies": {
"@vue/cli-plugin-babel": "~5.0.8",
"@vue/cli-plugin-router": "~5.0.8",
"@vue/cli-plugin-vuex": "~5.0.8",
"@vue/cli-service": "~5.0.8",
"babel-eslint": "^10.1.0",
"bootstrap": "^4.5.2",
"vue-template-compiler": "^2.6.11"
},
"eslintConfig": {
"root": true,
"env": {
"node": true
},
"extends": [
"plugin:vue/essential",
"eslint:recommended"
],
"parserOptions": {
"parser": "babel-eslint"
},
"rules": {}
},
"browserslist": [
"> 1%",
"last 2 versions",
"not dead"
]
}

Binary file not shown.

Before

Width:  |  Height:  |  Size: 15 KiB

View File

@ -1,17 +0,0 @@
<!DOCTYPE html>
<html lang="">
<head>
<meta charset="utf-8">
<meta http-equiv="X-UA-Compatible" content="IE=edge">
<meta name="viewport" content="width=device-width,initial-scale=1.0">
<link rel="icon" href="<%= BASE_URL %>serve_favicon_ico.ico">
<title>sist2-admin</title>
</head>
<body>
<noscript>
<strong>We're sorry but <%= htmlWebpackPlugin.options.title %> doesn't work properly without JavaScript enabled. Please enable it to continue.</strong>
</noscript>
<div id="app"></div>
<!-- built files will be auto injected -->
</body>
</html>

View File

@ -1,98 +0,0 @@
<template>
<div id="app">
<NavBar></NavBar>
<b-container class="pt-4">
<b-alert show dismissible variant="info">
This is a beta version of sist2-admin. Please submit bug reports, usability issues and feature requests
to the <a href="https://github.com/simon987/sist2/issues/new/choose" target="_blank">issue tracker on Github</a>. Thank you!
</b-alert>
<router-view/>
</b-container>
</div>
</template>
<script>
import NavBar from "@/components/NavBar";
import Sist2AdminApi from "@/Sist2AdminApi";
export default {
components: {NavBar},
data() {
return {
socket: null
}
},
mounted() {
Sist2AdminApi.getSist2AdminInfo()
.then(resp => this.$store.commit("setSist2AdminInfo", resp.data));
this.$store.dispatch("loadBrowserSettings");
this.connectNotifications();
// this.socket.onclose = this.connectNotifications;
},
methods: {
connectNotifications() {
this.socket = new WebSocket(`ws://${window.location.host}/notifications`);
this.socket.onopen = () => {
this.socket.send("Hello from client");
}
this.socket.onmessage = e => {
const notification = JSON.parse(e.data);
if (notification.message) {
notification.messageString = this.$t(notification.message).toString();
}
this.$store.dispatch("notify", notification)
}
}
}
}
</script>
<style>
html, body {
height: 100%;
}
#app {
/*font-family: Avenir, Helvetica, Arial, sans-serif;*/
-webkit-font-smoothing: antialiased;
-moz-osx-font-smoothing: grayscale;
/*text-align: center;*/
color: #2c3e50;
padding-bottom: 1em;
min-height: 100%;
}
.info-icon {
width: 1rem;
margin-right: 0.2rem;
cursor: pointer;
line-height: 1rem;
height: 1rem;
background-image: url();
filter: brightness(45%);
display: block;
}
.tabs {
margin-top: 10px;
}
.modal-title {
text-overflow: ellipsis;
overflow: hidden;
white-space: nowrap;
}
@media screen and (min-width: 1500px) {
.container {
max-width: 1440px;
}
}
label {
margin-top: 0.5rem;
margin-bottom: 0;
}
</style>

View File

@ -1,117 +0,0 @@
import axios from "axios";
class Sist2AdminApi {
constructor() {
this.baseUrl = window.location.protocol + "//" + window.location.host;
}
getJobs() {
return axios.get(`${this.baseUrl}/api/job/`);
}
getFrontends() {
return axios.get(`${this.baseUrl}/api/frontend/`);
}
getTasks() {
return axios.get(`${this.baseUrl}/api/task/`);
}
killTask(taskId) {
return axios.post(`${this.baseUrl}/api/task/${taskId}/kill`)
}
getTaskHistory() {
return axios.get(`${this.baseUrl}/api/task/history`);
}
/**
* @param {string} name
*/
getJob(name) {
return axios.get(`${this.baseUrl}/api/job/${name}`);
}
/**
* @param {string} name
*/
getFrontend(name) {
return axios.get(`${this.baseUrl}/api/frontend/${name}`);
}
/**
* @param {string} name
*/
startFrontend(name) {
return axios.post(`${this.baseUrl}/api/frontend/${name}/start`);
}
/**
* @param {string} name
*/
stopFrontend(name) {
return axios.post(`${this.baseUrl}/api/frontend/${name}/stop`);
}
/**
* @param {string} name
* @param job
*/
updateJob(name, job) {
return axios.put(`${this.baseUrl}/api/job/${name}`, job);
}
/**
* @param {string} name
* @param frontend
*/
updateFrontend(name, frontend) {
return axios.put(`${this.baseUrl}/api/frontend/${name}`, frontend);
}
/**
* @param {string} name
*/
runJob(name) {
return axios.get(`${this.baseUrl}/api/job/${name}/run`);
}
/**
* @param {string} name
*/
deleteJob(name) {
return axios.delete(`${this.baseUrl}/api/job/${name}`);
}
/**
* @param {string} name
*/
deleteFrontend(name) {
return axios.delete(`${this.baseUrl}/api/frontend/${name}`);
}
/**
* @param {string} name
*/
createJob(name) {
return axios.post(`${this.baseUrl}/api/job/${name}`);
}
/**
* @param {string} name
*/
createFrontend(name) {
return axios.post(`${this.baseUrl}/api/frontend/${name}`);
}
pingEs(url, insecure) {
return axios.get(`${this.baseUrl}/api/ping_es`, {params: {url, insecure}});
}
getSist2AdminInfo() {
return axios.get(`${this.baseUrl}/api/`);
}
}
export default new Sist2AdminApi()

View File

@ -1,31 +0,0 @@
<template>
<b-list-group-item action :to="`/frontend/${frontend.name}`">
<div class="d-flex w-100 justify-content-between">
<h5 class="mb-1" style="display: block">
{{ frontend.name }}
<b-badge variant="light">{{ formatBindAddress(frontend.web_options.bind) }}</b-badge>
</h5>
<div>
<b-badge v-if="frontend.running" variant="success">{{$t("online")}}</b-badge>
<b-badge v-else variant="secondary">{{$t("offline")}}</b-badge>
</div>
</div>
</b-list-group-item>
</template>
<script>
import {formatBindAddress} from "@/util";
export default {
name: "FrontendListItem",
props: ["frontend"],
data() {
return {
formatBindAddress
}
}
}
</script>

View File

@ -1,64 +0,0 @@
<template>
<div>
<label>{{ $t("indexOptions.threads") }}</label>
<b-form-input v-model="options.threads" type="number" min="1" @change="update()"></b-form-input>
<label>{{ $t("webOptions.esUrl") }}</label>
<b-alert :variant="esTestOk ? 'success' : 'danger'" :show="showEsTestAlert" class="mt-1">
{{ esTestMessage }}
</b-alert>
<b-input-group>
<b-form-input v-model="options.es_url" @change="update()"></b-form-input>
<b-input-group-append>
<b-button variant="outline-primary" @click="testEs()">{{ $t("test") }}</b-button>
</b-input-group-append>
</b-input-group>
<label>{{ $t("indexOptions.esIndex") }}</label>
<b-form-input v-model="options.es_index" @change="update()"></b-form-input>
<br>
<b-form-checkbox v-model="options.es_insecure_ssl" :disabled="!options.es_url.startsWith('https')" @change="update()">
{{ $t("webOptions.esInsecure") }}
</b-form-checkbox>
<label>{{ $t("indexOptions.batchSize") }}</label>
<b-form-input v-model="options.batch_size" type="number" min="1" @change="update()"></b-form-input>
<label>{{ $t("indexOptions.script") }}</label>
<b-form-textarea v-model="options.script" rows="6" @change="update()"></b-form-textarea>
</div>
</template>
<script>
import sist2AdminApi from "@/Sist2AdminApi";
export default {
name: "IndexOptions",
props: ["options"],
data() {
return {
showEsTestAlert: false,
esTestOk: false,
esTestMessage: "",
}
},
methods: {
update() {
this.$emit("change", this.options);
},
testEs() {
sist2AdminApi.pingEs(this.options.es_url, this.options.es_insecure_ssl).then((resp) => {
this.showEsTestAlert = true;
this.esTestOk = resp.data.ok;
this.esTestMessage = resp.data.message;
});
}
},
}
</script>
<style scoped>
</style>

View File

@ -1,42 +0,0 @@
<template>
<div>
<h5>{{ $t("selectJobs") }}</h5>
<b-progress v-if="loading" striped animated value="100"></b-progress>
<b-form-group v-else>
<b-form-checkbox-group
v-if="jobs.length > 0"
:checked="frontend.jobs"
@input="frontend.jobs = $event; $emit('input')"
>
<div v-for="job in jobs" :key="job.name">
<b-form-checkbox :disabled="job.status !== 'indexed'" :value="job.name">[{{ job.name }}]</b-form-checkbox>
<br/>
</div>
</b-form-checkbox-group>
<div v-else>
<span class="text-muted">{{ $t('jobOptions.noJobAvailable') }}</span>
&nbsp;<router-link to="/">{{$t("create")}}</router-link>
</div>
</b-form-group>
</div>
</template>
<script>
import Sist2AdminApi from "@/Sist2AdminApi";
export default {
name: "JobCheckboxGroup",
props: ["frontend"],
mounted() {
Sist2AdminApi.getJobs().then(resp => {
this.jobs = resp.data;
this.loading = false;
});
},
data() {
return {
loading: true,
}
}
}
</script>

View File

@ -1,56 +0,0 @@
<template>
<b-list-group-item class="flex-column align-items-start" action :to="`job/${job.name}`">
<div class="d-flex w-100 justify-content-between">
<div>
<h5 class="mb-1">
{{ job.name }}
</h5>
</div>
<div>
<b-row>
<b-col>
<small v-if="job.last_index_date">
{{ $t("scanned") }} {{ formatLastIndexDate(job.last_index_date) }}</small>
<div v-else>&nbsp;</div>
</b-col>
</b-row>
<b-row v-if="job.schedule_enabled">
<b-col>
<small><code>{{job.cron_expression }}</code></small>
</b-col>
</b-row>
<b-row v-else>
<b-col>
&nbsp;
</b-col>
</b-row>
</div>
</div>
</b-list-group-item>
</template>
<script>
import moment from "moment";
export default {
name: "JobListItem",
props: ["job"],
methods: {
formatLastIndexDate(dateString) {
if (dateString === null) {
return "";
}
const date = Date.parse(dateString);
return moment(date).fromNow();
}
}
}
</script>
<style scoped>
</style>

View File

@ -1,57 +0,0 @@
<template>
<div>
<b-form-checkbox :checked="desktopNotificationsEnabled" @change="updateNotifications($event)">
{{ $t("jobOptions.desktopNotifications") }}
</b-form-checkbox>
<b-form-checkbox v-model="job.schedule_enabled" @change="update()">
{{ $t("jobOptions.scheduleEnabled") }}
</b-form-checkbox>
<label>{{ $t("jobOptions.cron") }}</label>
<b-form-input class="text-monospace" :state="cronValid" v-model="job.cron_expression" :disabled="!job.schedule_enabled" @change="update()"></b-form-input>
</div>
</template>
<script>
export default {
name: "JobOptions",
props: ["job"],
data() {
return {
cronValid: undefined
}
},
computed: {
desktopNotificationsEnabled() {
return this.$store.state.jobDesktopNotificationMap[this.job.name];
}
},
mounted() {
this.cronValid = this.checkCron(this.job.cron_expression)
},
methods: {
checkCron(expression) {
return /((((\d+,)+\d+|(\d+([/-])\d+)|\d+|\*) ?){5,7})/.test(expression);
},
updateNotifications(value) {
this.$store.dispatch("setJobDesktopNotification", {
job: this.job.name,
enabled: value
});
},
update() {
if (this.job.schedule_enabled) {
this.cronValid = this.checkCron(this.job.cron_expression);
} else {
this.cronValid = undefined;
}
if (this.cronValid !== false) {
this.$emit("change", this.job);
}
},
},
}
</script>

View File

@ -1,69 +0,0 @@
<template>
<b-navbar>
<b-navbar-brand to="/">
<Sist2Icon></Sist2Icon>
</b-navbar-brand>
<b-button class="ml-auto" to="/task" variant="link">{{ $t("tasks") }}</b-button>
</b-navbar>
</template>
<script>
import Sist2Icon from "@/components/icons/Sist2Icon";
export default {
name: "NavBar",
components: {Sist2Icon},
methods: {
tagline() {
return this.$store.state.sist2Info.tagline;
},
sist2Version() {
return this.$store.state.sist2Info.version;
},
isDebug() {
return this.$store.state.sist2Info.debug;
},
isLegacy() {
return this.$store.state.sist2Info.esVersionLegacy;
},
hideLegacy() {
return this.$store.state.optHideLegacy;
}
}
}
</script>
<style scoped>
.navbar {
box-shadow: 0 0.125rem 0.25rem rgb(0 0 0 / 8%) !important;
border-radius: 0;
}
.theme-black .navbar {
background: #546b7a30;
border-bottom: none;
}
.navbar-brand {
color: #222 !important;
font-size: 1.75rem;
padding: 0;
}
.navbar-brand:hover {
color: #000 !important;
}
.version {
color: #222 !important;
margin-left: -18px;
margin-top: -14px;
font-size: 11px;
font-family: monospace;
}
.btn-link {
color: #222;
}
</style>

View File

@ -1,109 +0,0 @@
<template>
<div>
<label>{{ $t("scanOptions.path") }}</label>
<b-form-input v-model="options.path" @change="update()"></b-form-input>
<label>{{ $t("scanOptions.threads") }}</label>
<b-form-input type="number" min="1" v-model="options.threads" @change="update()"></b-form-input>
<label>{{ $t("scanOptions.thumbnailQuality") }}</label>
<b-form-input type="number" min="0" max="100" v-model="options.thumbnail_quality" @change="update()"></b-form-input>
<label>{{ $t("scanOptions.thumbnailCount") }}</label>
<b-form-input type="number" min="0" max="1000" v-model="options.thumbnail_count" @change="update()"></b-form-input>
<label>{{ $t("scanOptions.thumbnailSize") }}</label>
<b-form-input type="number" min="100" v-model="options.thumbnail_size" @change="update()"></b-form-input>
<label>{{ $t("scanOptions.contentSize") }}</label>
<b-form-input type="number" min="0" v-model="options.content_size" @change="update()"></b-form-input>
<label>{{ $t("scanOptions.rewriteUrl") }}</label>
<b-form-input v-model="options.rewrite_url" @change="update()"></b-form-input>
<label>{{ $t("scanOptions.depth") }}</label>
<b-form-input type="number" min="0" v-model="options.depth" @change="update()"></b-form-input>
<label>{{ $t("scanOptions.archive") }}</label>
<b-form-select :options="['skip', 'list', 'shallow', 'recurse']" v-model="options.archive"
@change="update()"></b-form-select>
<label>{{ $t("scanOptions.archivePassphrase") }}</label>
<b-form-input v-model="options.archive_passphrase" @change="update()"></b-form-input>
<label>{{ $t("scanOptions.ocrLang") }}</label>
<b-alert variant="danger" show v-if="selectedOcrLangs.length === 0 && !disableOcrLang">{{ $t("scanOptions.ocrLangAlert") }}</b-alert>
<b-checkbox-group :disabled="disableOcrLang" v-model="selectedOcrLangs" @input="onOcrLangChange">
<b-checkbox v-for="lang in ocrLangs" :key="lang" :value="lang">{{ lang }}</b-checkbox>
</b-checkbox-group>
<!-- <b-form-input readonly v-model="options.ocr_lang" @change="update()"></b-form-input>-->
<div style="height: 10px"></div>
<b-form-checkbox v-model="options.ocr_images" @change="update()">
{{ $t("scanOptions.ocrImages") }}
</b-form-checkbox>
<b-form-checkbox v-model="options.ocr_ebooks" @change="update()">
{{ $t("scanOptions.ocrEbooks") }}
</b-form-checkbox>
<label>{{ $t("scanOptions.exclude") }}</label>
<b-form-input v-model="options.exclude" @change="update()"
:placeholder="$t('scanOptions.excludePlaceholder')"></b-form-input>
<div style="height: 10px"></div>
<b-form-checkbox v-model="options.fast" @change="update()">
{{ $t("scanOptions.fast") }}
</b-form-checkbox>
<b-form-checkbox v-model="options.checksums" @change="update()">
{{ $t("scanOptions.checksums") }}
</b-form-checkbox>
<b-form-checkbox v-model="options.read_subtitles" @change="update()">
{{ $t("scanOptions.readSubtitles") }}
</b-form-checkbox>
<b-form-checkbox v-model="options.optimize_index" @change="update()">
{{ $t("scanOptions.optimizeIndex") }}
</b-form-checkbox>
<label>{{ $t("scanOptions.treemapThreshold") }}</label>
<b-form-input type="number" min="0" v-model="options.treemap_threshold" @change="update()"></b-form-input>
</div>
</template>
<script>
export default {
name: "ScanOptions",
props: ["options"],
data() {
return {
disableOcrLang: false,
selectedOcrLangs: []
}
},
computed: {
ocrLangs() {
return this.$store.state.sist2AdminInfo?.tesseract_langs || [];
}
},
methods: {
onOcrLangChange() {
this.options.ocr_lang = this.selectedOcrLangs.join("+");
},
update() {
this.disableOcrLang = this.options.ocr_images === false && this.options.ocr_ebooks === false;
this.$emit("change", this.options);
},
},
mounted() {
this.disableOcrLang = this.options.ocr_images === false && this.options.ocr_ebooks === false;
this.selectedOcrLangs = this.options.ocr_lang ? this.options.ocr_lang.split("+") : [];
}
}
</script>

View File

@ -1,57 +0,0 @@
<template>
<b-list-group-item>
<b-row style="height: 50px">
<b-col><h5>{{ task.display_name }}</h5></b-col>
<b-col class="shrink">
<router-link class="btn btn-link" :to="`/log/${task.id}`">{{ $t("logs") }}</router-link>
</b-col>
<b-col class="shrink">
<b-btn variant="link" @click="killTask(task.id)">{{ $t("kill") }}</b-btn>
</b-col>
</b-row>
<b-row>
<b-col>
<b-progress :max="task.progress.count">
<b-progress-bar :value="task.progress.done" :label-html="label" :striped="!task.progress.waiting"/>
</b-progress>
</b-col>
</b-row>
</b-list-group-item>
</template>
<script>
import sist2AdminApi from "@/Sist2AdminApi";
export default {
name: "TaskListItem",
props: ["task"],
computed: {
label() {
const count = this.task.progress.count;
const done = this.task.progress.done;
return `<span>${done}/${count}</span>`
}
},
methods: {
killTask(taskId) {
sist2AdminApi.killTask(taskId).then(() => {
this.$bvToast.toast(this.$t("killConfirmation"), {
title: this.$t("killConfirmationTitle"),
variant: "success",
toaster: "b-toaster-bottom-right"
});
});
}
}
}
</script>
<style scoped>
.shrink {
flex-grow: inherit;
}
</style>

View File

@ -1,91 +0,0 @@
<template>
<div>
<label>{{ $t("webOptions.esUrl") }}</label>
<b-alert :variant="esTestOk ? 'success' : 'danger'" :show="showEsTestAlert" class="mt-1">
{{ esTestMessage }}
</b-alert>
<b-input-group>
<b-form-input v-model="options.es_url" @change="update()"></b-form-input>
<b-input-group-append>
<b-button variant="outline-primary" @click="testEs()">{{ $t("test") }}</b-button>
</b-input-group-append>
</b-input-group>
<b-form-checkbox v-model="options.es_insecure_ssl" :disabled="!this.options.es_url.startsWith('https')" @change="update()">
{{ $t("webOptions.esInsecure") }}
</b-form-checkbox>
<label>{{ $t("webOptions.esIndex") }}</label>
<b-form-input v-model="options.es_index" @change="update()"></b-form-input>
<label>{{ $t("webOptions.lang") }}</label>
<b-form-select v-model="options.lang" :options="['en', 'fr', 'zh-CN']" @change="update()"></b-form-select>
<label>{{ $t("webOptions.bind") }}</label>
<b-form-input v-model="options.bind" @change="update()"></b-form-input>
<label>{{ $t("webOptions.tagline") }}</label>
<b-form-textarea v-model="options.tagline" @change="update()"></b-form-textarea>
<label>{{ $t("webOptions.auth") }}</label>
<b-form-input v-model="options.auth" @change="update()"></b-form-input>
<label>{{ $t("webOptions.tagAuth") }}</label>
<b-form-input v-model="options.tag_auth" @change="update()"></b-form-input>
<br>
<h5>Auth0 options</h5>
<label>{{ $t("webOptions.auth0Audience") }}</label>
<b-form-input v-model="options.auth0_audience" @change="update()"></b-form-input>
<label>{{ $t("webOptions.auth0Domain") }}</label>
<b-form-input v-model="options.auth0_domain" @change="update()"></b-form-input>
<label>{{ $t("webOptions.auth0ClientId") }}</label>
<b-form-input v-model="options.auth0_client_id" @change="update()"></b-form-input>
<label>{{ $t("webOptions.auth0PublicKey") }}</label>
<b-textarea rows="10" v-model="options.auth0_public_key" @change="update()"></b-textarea>
</div>
</template>
<script>
import sist2AdminApi from "@/Sist2AdminApi";
export default {
name: "WebOptions",
props: ["options", "frontendName"],
data() {
return {
showEsTestAlert: false,
esTestOk: false,
esTestMessage: "",
}
},
methods: {
update() {
if (!this.options.es_url.startsWith("https")) {
this.options.es_insecure_ssl = false;
}
this.$emit("change", this.options);
},
testEs() {
sist2AdminApi.pingEs(this.options.es_url, this.options.es_insecure_ssl).then((resp) => {
this.showEsTestAlert = true;
this.esTestOk = resp.data.ok;
this.esTestMessage = resp.data.message;
});
}
}
}
</script>
<style scoped>
</style>

View File

@ -1,40 +0,0 @@
<template>
<svg
xmlns="http://www.w3.org/2000/svg"
width="27.868069mm"
height="7.6446671mm"
viewBox="0 0 27.868069 7.6446671"
>
<g transform="translate(-4.5018313,-4.1849793)">
<g
style="fill: currentColor;fill-opacity:1;stroke:none;stroke-width:0.26458332">
<path
d="m 6.3153296,11.829646 q -0.7717014,0 -1.8134983,-0.337619 v -0.916395 q 1.0128581,0.511252 1.803852,0.511252 0.5643067,0 0.901926,-0.236334 0.3376194,-0.236333 0.3376194,-0.63183 0,-0.3424428 -0.2845649,-0.5498376 Q 6.980922,9.4566645 6.3635609,9.3264399 L 5.9921796,9.2492698 Q 5.2301245,9.0949295 4.8732126,8.7428407 4.5211238,8.3859288 4.5211238,7.7733908 q 0,-0.7765245 0.5305447,-1.1961372 0.5305447,-0.4196126 1.5096409,-0.4196126 0.829579,0 1.6061036,0.3183268 V 7.3441319 Q 7.4101809,6.9004036 6.5854251,6.9004036 q -1.1671984,0 -1.1671984,0.7958171 0,0.2604492 0.1012858,0.4147895 0.1012858,0.1495171 0.3858507,0.2556261 0.2845649,0.1012858 0.8392253,0.2122179 l 0.3569119,0.067524 q 1.3408312,0.2652724 1.3408312,1.4614098 0,0.80064 -0.5691298,1.263661 -0.5691298,0.458197 -1.5578722,0.458197 z"
style="stroke-width:0.26458332"
/>
<path
d="m 11.943927,5.3087694 q -0.144694,0 -0.144694,-0.144694 V 4.3296733 q 0,-0.144694 0.144694,-0.144694 h 0.694531 q 0.144694,0 0.144694,0.144694 v 0.8344021 q 0,0.144694 -0.144694,0.144694 z M 13.5645,11.728361 q -0.795817,0 -1.234722,-0.511253 -0.434082,-0.516075 -0.434082,-1.4469398 V 6.9823969 H 10.714028 V 6.2878656 h 2.069124 v 3.4823026 q 0,0.5884228 0.221864,0.8971028 0.221865,0.308681 0.6463,0.308681 h 1.036974 v 0.752409 z"
style="stroke-width:0.26458332"
/>
<path
d="m 18.209178,11.829646 q -0.771701,0 -1.813498,-0.337619 v -0.916395 q 1.012858,0.511252 1.803852,0.511252 0.564306,0 0.901926,-0.236334 0.337619,-0.236333 0.337619,-0.63183 0,-0.3424428 -0.284565,-0.5498376 Q 18.87477,9.4566645 18.257409,9.3264399 l -0.371381,-0.07717 Q 17.123973,9.0949295 16.767061,8.7428407 16.414972,8.3859288 16.414972,7.7733908 q 0,-0.7765245 0.530545,-1.1961372 0.530545,-0.4196126 1.509641,-0.4196126 0.829579,0 1.606103,0.3183268 v 0.8681641 q -0.757232,-0.4437283 -1.581988,-0.4437283 -1.167198,0 -1.167198,0.7958171 0,0.2604492 0.101286,0.4147895 0.101286,0.1495171 0.385851,0.2556261 0.284565,0.1012858 0.839225,0.2122179 l 0.356912,0.067524 q 1.340831,0.2652724 1.340831,1.4614098 0,0.80064 -0.56913,1.263661 -0.56913,0.458197 -1.557872,0.458197 z"
style="stroke-width:0.26458332"
/>
<path
d="m 25.207545,11.709068 q -0.993565,0 -1.408355,-0.40032 -0.409966,-0.405143 -0.409966,-1.3794164 V 6.9775737 H 21.947107 V 6.2878656 h 1.442117 V 4.8746874 l 0.887457,-0.3858507 v 1.7990289 h 2.016069 v 0.6897081 h -2.016069 v 2.9517579 q 0,0.5932454 0.226687,0.8344024 0.226687,0.236333 0.790994,0.236333 h 0.998388 v 0.709001 z"
style="stroke-width:0.26458332"
/>
<path
d="m 27.995317,11.043476 q 0,-0.178456 0.120578,-0.299035 0.274919,-0.289388 0.651123,-0.684885 0.376205,-0.4003199 0.805464,-0.8681638 0.327973,-0.356912 0.491959,-0.5353679 0.16881,-0.1832791 0.255626,-0.2845649 0.09164,-0.1012858 0.178456,-0.2073948 0.255626,-0.3086805 0.405144,-0.5257215 0.15434,-0.2170411 0.250803,-0.4292589 0.168809,-0.3762045 0.168809,-0.7524089 0,-0.5980686 -0.352089,-0.935688 -0.356911,-0.3424425 -0.979096,-0.3424425 -0.863341,0 -1.938899,0.6414768 V 4.8361023 q 0.491959,-0.2363335 0.979096,-0.3569119 0.47749,-0.1205783 0.945334,-0.1205783 0.501606,0 0.940511,0.1350477 0.438905,0.1350478 0.766878,0.4244358 0.289388,0.2556261 0.463021,0.6270074 0.173633,0.3665582 0.173633,0.829579 0,0.4726671 -0.212218,0.9501574 -0.106109,0.2411567 -0.274919,0.4726671 -0.163986,0.2266873 -0.424435,0.540191 Q 31.270225,8.501684 31.077299,8.718725 30.884374,8.9357661 30.628748,9.2106847 30.445469,9.4084332 30.286305,9.5675966 30.131965,9.72676 29.958332,9.9003928 29.7847,10.069203 29.558012,10.300713 29.336148,10.5274 29.012998,10.869843 h 3.356901 v 0.819932 h -4.374582 z"
style="stroke-width:0.26458332"
/>
</g>
</g>
</svg>
</template>
<script>
export default {
name: "Sist2Icon"
}
</script>

View File

@ -1,114 +0,0 @@
export default {
en: {
start: "Start",
stop: "Stop",
go: "Go",
online: "online",
offline: "offline",
delete: "Delete",
runNow: "Index now",
create: "Create",
test: "Test",
jobTitle: "job configuration",
tasks: "Tasks",
runningTasks: "Running tasks",
frontends: "Frontends",
jobDisabled: "There is no valid index for this job",
status: "Status",
taskHistory: "Task history",
taskName: "Task name",
taskStarted: "Started",
taskDuration: "Duration",
taskStatus: "Status",
logs: "Logs",
kill: "Kill",
killConfirmation: "SIGTERM signal sent to sist2 process",
killConfirmationTitle: "Confirmation",
follow: "Follow",
wholeFile: "Whole file",
logLevel: "Log level",
logMode: "Follow mode",
logFile: "Reading log file",
jobs: "Jobs",
newJobName: "New job name",
newJobHelp: "Create a new job to get started!",
newFrontendName: "New frontend name",
scanned: "last scan",
autoStart: "Start automatically",
runJobConfirmationTitle: "Task queued",
runJobConfirmation: "Check the Tasks page to monitor the status.",
extraQueryArgs: "Extra query arguments when launching from sist2-admin",
customUrl: "Custom URL when launching from sist2-admin",
selectJobs: "Select jobs",
webOptions: {
title: "Web options",
esUrl: "Elasticsearch URL",
esIndex: "Elasticsearch index name",
esInsecure: "Do not verify SSL connections to Elasticsearch.",
lang: "UI Language",
bind: "Listen address",
tagline: "Tagline in navbar",
auth: "Basic auth in user:password format",
tagAuth: "Basic auth in user:password format for tagging",
auth0Audience: "Auth0 audience",
auth0Domain: "Auth0 domain",
auth0ClientId: "Auth0 client ID",
auth0PublicKey: "Auth0 public key",
},
scanOptions: {
title: "Scanning options",
path: "Path",
threads: "Number of threads",
memThrottle: "Total memory threshold in MiB for scan throttling",
thumbnailQuality: "Thumbnail quality, on a scale of 0 to 100, 100 being the best",
thumbnailCount: "Number of thumbnails to generate. Set a value > 1 to create video previews, set to 0 to disable thumbnails.",
thumbnailSize: "Thumbnail size, in pixels",
contentSize: "Number of bytes to be extracted from text documents. Set to 0 to disable",
rewriteUrl: "Serve files from this url instead of from disk",
depth: "Scan up to this many subdirectories deep",
archive: "Archive file mode",
archivePassphrase: "Passphrase for encrypted archive files",
ocrLang: "Tesseract language",
ocrLangAlert: "You must select at least one language",
ocrEbooks: "Enable OCR'ing of ebook files",
ocrImages: "Enable OCR'ing of image files",
exclude: "Files that match this regex will not be scanned",
excludePlaceholder: "Exclude",
fast: "Only index file names & mime type",
checksums: "Calculate file checksums when scanning",
readSubtitles: "Read subtitles from media files",
memBuffer: "Maximum memory buffer size per thread in MiB for files inside archives",
treemapThreshold: "Relative size threshold for treemap",
optimizeIndex: "Defragment index file after scan to reduce its file size."
},
indexOptions: {
title: "Indexing options",
threads: "Number of threads",
esUrl: "Elasticsearch URL",
esIndex: "Elasticsearch index name",
esInsecure: "Do not verify SSL connections to Elasticsearch.",
batchSize: "Index batch size",
script: "User script"
},
jobOptions: {
title: "Job options",
cron: "Job schedule",
scheduleEnabled: "Enable scheduled re-scan",
noJobAvailable: "No jobs available.",
desktopNotifications: "Desktop notifications"
},
frontendOptions: {
title: "Frontend options",
noJobSelectedWarning: "You must select at least one job to start this frontend"
},
notifications: {
indexCompleted: "Task completed for [$JOB$]"
}
}
}

View File

@ -1,31 +0,0 @@
import Vue from 'vue'
import { BootstrapVue, IconsPlugin } from 'bootstrap-vue'
import "bootstrap/dist/css/bootstrap.min.css"
import "bootstrap-vue/dist/bootstrap-vue.min.css"
Vue.use(BootstrapVue);
Vue.use(IconsPlugin);
import App from './App.vue';
import router from './router';
import store from './store';
import VueI18n from "vue-i18n";
import messages from "@/i18n/messages";
Vue.use(VueI18n);
const i18n = new VueI18n({
locale: "en",
messages: messages
});
Vue.config.productionTip = false
new Vue({
router,
store,
i18n,
render: h => h(App)
}).$mount('#app')

View File

@ -1,45 +0,0 @@
import Vue from 'vue'
import VueRouter from 'vue-router'
import Home from '../views/Home.vue'
import Job from "@/views/Job";
import Tasks from "@/views/Tasks";
import Frontend from "@/views/Frontend";
import Tail from "@/views/Tail";
Vue.use(VueRouter);
const routes = [
{
path: "/",
name: "Home",
component: Home
},
{
path: "/job/:name",
name: "Job",
component: Job
},
{
path: "/task/",
name: "Tasks",
component: Tasks
},
{
path: "/frontend/:name",
name: "Frontend",
component: Frontend
},
{
path: "/log/:taskId",
name: "Tail",
component: Tail
},
]
const router = new VueRouter({
mode: "hash",
base: process.env.BASE_URL,
routes
})
export default router

View File

@ -1,63 +0,0 @@
import Vue from "vue";
import Vuex from "vuex";
Vue.use(Vuex);
function saveBrowserSettings(state) {
const settings = {
jobDesktopNotificationMap: state.jobDesktopNotificationMap
};
localStorage.setItem("sist2-admin-settings", JSON.stringify(settings));
console.log("SAVED");
console.log(settings);
}
export default new Vuex.Store({
state: {
sist2AdminInfo: null,
jobDesktopNotificationMap: {}
},
mutations: {
setSist2AdminInfo: (state, payload) => state.sist2AdminInfo = payload,
setJobDesktopNotificationMap: (state, payload) => state.jobDesktopNotificationMap = payload,
},
actions: {
notify: async ({state}, notification) => {
if (!state.jobDesktopNotificationMap[notification.job]) {
console.log("pass");
return;
}
new Notification(notification.messageString.replace("$JOB$", notification.job));
},
setJobDesktopNotification: async ({state}, {job, enabled}) => {
if (enabled === true) {
const permission = await Notification.requestPermission()
if (permission !== "granted") {
return false;
}
}
state.jobDesktopNotificationMap[job] = enabled;
saveBrowserSettings(state);
return true;
},
loadBrowserSettings({commit}) {
const settingString = localStorage.getItem("sist2-admin-settings");
if (!settingString) {
return;
}
const settings = JSON.parse(settingString);
commit("setJobDesktopNotificationMap", settings["jobDesktopNotificationMap"]);
}
},
modules: {}
})

View File

@ -1,8 +0,0 @@
export function formatBindAddress(address) {
if (address.startsWith("0.0.0.0")) {
return address.slice("0.0.0.0".length)
}
return address
}

View File

@ -1,129 +0,0 @@
<template>
<b-card>
<b-card-title>
{{ name }}
<small style="vertical-align: top">
<b-badge v-if="!loading && frontend.running" variant="success">{{ $t("online") }}</b-badge>
<b-badge v-else-if="!loading" variant="secondary">{{ $t("offline") }}</b-badge>
</small>
</b-card-title>
<div class="mb-3" v-if="!loading">
<b-button class="mr-1" :disabled="frontend.running || !valid" variant="success" @click="start()">{{
$t("start")
}}
</b-button>
<b-button class="mr-1" :disabled="!frontend.running" variant="danger" @click="stop()">{{
$t("stop")
}}
</b-button>
<b-button class="mr-1" :disabled="!frontend.running" variant="primary" :href="frontendUrl" target="_blank">
{{ $t("go") }}
</b-button>
<b-button variant="danger" @click="deleteFrontend()">{{ $t("delete") }}</b-button>
</div>
<b-progress v-if="loading" striped animated value="100"></b-progress>
<b-card-body v-else>
<h4>{{ $t("frontendOptions.title") }}</h4>
<b-card>
<b-form-checkbox v-model="frontend.auto_start" @change="update()">
{{ $t("autoStart") }}
</b-form-checkbox>
<label>{{ $t("extraQueryArgs") }}</label>
<b-form-input v-model="frontend.extra_query_args" @change="update()"></b-form-input>
<label>{{ $t("customUrl") }}</label>
<b-form-input v-model="frontend.custom_url" @change="update()" placeholder="http://"></b-form-input>
<br/>
<b-alert v-if="!valid" variant="warning" show>{{ $t("frontendOptions.noJobSelectedWarning") }}</b-alert>
<JobCheckboxGroup :frontend="frontend" @input="update()"></JobCheckboxGroup>
</b-card>
<br/>
<h4>{{ $t("webOptions.title") }}</h4>
<b-card>
<WebOptions :options="frontend.web_options" :frontend-name="$route.params.name" @change="update()"></WebOptions>
</b-card>
</b-card-body>
</b-card>
</template>
<script>
import Sist2AdminApi from "@/Sist2AdminApi";
import JobCheckboxGroup from "@/components/JobCheckboxGroup";
import WebOptions from "@/components/WebOptions";
export default {
name: 'Frontend',
components: {JobCheckboxGroup, WebOptions},
data() {
return {
loading: true,
frontend: null,
}
},
computed: {
valid() {
return !this.loading && this.frontend.jobs.length > 0;
},
frontendUrl() {
if (this.frontend.custom_url) {
return this.frontend.custom_url + this.args;
}
if (this.frontend.web_options.bind.startsWith("0.0.0.0")) {
return window.location.protocol + "//" + window.location.hostname + ":" + this.port + this.args;
}
return window.location.protocol + "//" + this.frontend.web_options.bind + this.args;
},
name() {
return this.$route.params.name;
},
port() {
return this.frontend.web_options.bind.split(":")[1]
},
args() {
const args = this.frontend.extra_query_args;
if (args !== "") {
return "#" + (args.startsWith("?") ? (args) : ("?" + args));
}
return "";
}
},
mounted() {
Sist2AdminApi.getFrontend(this.name).then(resp => {
this.frontend = resp.data;
this.loading = false;
});
},
methods: {
start() {
this.frontend.running = true;
Sist2AdminApi.startFrontend(this.name)
},
stop() {
this.frontend.running = false;
Sist2AdminApi.stopFrontend(this.name)
},
deleteFrontend() {
Sist2AdminApi.deleteFrontend(this.name).then(() => {
this.$router.push("/frontends");
});
},
update() {
Sist2AdminApi.updateFrontend(this.name, this.frontend);
},
}
}
</script>

View File

@ -1,122 +0,0 @@
<template>
<div>
<b-card>
<b-card-title>{{ $t("jobs") }}</b-card-title>
<b-row>
<b-col>
<b-input id="new-job" v-model="newJobName" :placeholder="$t('newJobName')"></b-input>
<b-popover
:show.sync="showHelp"
target="new-job"
placement="top"
triggers="manual"
variant="primary"
:content="$t('newJobHelp')"
></b-popover>
</b-col>
<b-col>
<b-button variant="primary" @click="createJob()" :disabled="!jobNameValid(newJobName)">{{ $t("create") }}
</b-button>
</b-col>
</b-row>
<hr/>
<b-progress v-if="jobsLoading" striped animated value="100"></b-progress>
<b-list-group v-else>
<JobListItem v-for="job in jobs" :key="job.name" :job="job"></JobListItem>
</b-list-group>
</b-card>
<br/>
<b-card>
<b-card-title>{{ $t("frontends") }}</b-card-title>
<b-row>
<b-col>
<b-input v-model="newFrontendName" :placeholder="$t('newFrontendName')"></b-input>
</b-col>
<b-col>
<b-button variant="primary" @click="createFrontend()" :disabled="!frontendNameValid(newFrontendName)">
{{ $t("create") }}
</b-button>
</b-col>
</b-row>
<hr/>
<b-progress v-if="frontendsLoading" striped animated value="100"></b-progress>
<b-list-group v-else>
<FrontendListItem v-for="frontend in frontends"
:key="frontend.name" :frontend="frontend"></FrontendListItem>
</b-list-group>
</b-card>
</div>
</template>
<script>
import JobListItem from "@/components/JobListItem";
import {formatBindAddress} from "@/util";
import Sist2AdminApi from "@/Sist2AdminApi";
import FrontendListItem from "@/components/FrontendListItem";
export default {
name: "Jobs",
components: {JobListItem, FrontendListItem},
data() {
return {
jobsLoading: true,
newJobName: "",
jobs: [],
frontendsLoading: true,
frontends: [],
formatBindAddress,
newFrontendName: "",
showHelp: false
}
},
mounted() {
this.loading = true;
this.reload();
},
methods: {
jobNameValid(name) {
if (this.jobs.some(job => job.name === name)) {
return false;
}
return /^[a-zA-Z0-9-_,.; ]+$/.test(name);
},
frontendNameValid(name) {
if (this.frontends.some(job => job.name === name)) {
return false;
}
return /^[a-zA-Z0-9-_,.; ]+$/.test(name);
},
reload() {
Sist2AdminApi.getJobs().then(resp => {
this.jobs = resp.data;
this.jobsLoading = false;
this.showHelp = this.jobs.length === 0;
});
Sist2AdminApi.getFrontends().then(resp => {
this.frontends = resp.data;
this.frontendsLoading = false;
});
},
createJob() {
Sist2AdminApi.createJob(this.newJobName).then(this.reload);
},
createFrontend() {
Sist2AdminApi.createFrontend(this.newFrontendName).then(this.reload)
}
}
}
</script>

View File

@ -1,92 +0,0 @@
<template>
<b-card>
<b-card-title>
[{{ getName() }}]
{{ $t("jobTitle") }}
</b-card-title>
<div class="mb-3">
<b-button class="mr-1" variant="primary" @click="runJob()">{{ $t("runNow") }}</b-button>
<b-button variant="danger" @click="deleteJob()">{{ $t("delete") }}</b-button>
</div>
<div v-if="job">
{{ $t("status") }}: <code>{{ job.status }}</code>
</div>
<b-progress v-if="loading" striped animated value="100"></b-progress>
<b-card-body v-else>
<h4>{{ $t("jobOptions.title") }}</h4>
<b-card>
<JobOptions :job="job" @change="update"></JobOptions>
</b-card>
<br/>
<h4>{{ $t("scanOptions.title") }}</h4>
<b-card>
<ScanOptions :options="job.scan_options" @change="update()"></ScanOptions>
</b-card>
<br/>
<h4>{{ $t("indexOptions.title") }}</h4>
<b-card>
<IndexOptions :options="job.index_options" @change="update()"></IndexOptions>
</b-card>
</b-card-body>
</b-card>
</template>
<script>
import ScanOptions from "@/components/ScanOptions";
import Sist2AdminApi from "@/Sist2AdminApi";
import IndexOptions from "@/components/IndexOptions";
import JobOptions from "@/components/JobOptions";
export default {
name: "Job",
components: {
IndexOptions,
ScanOptions,
JobOptions
},
data() {
return {
loading: true,
job: null
}
},
methods: {
getName() {
return this.$route.params.name;
},
update() {
Sist2AdminApi.updateJob(this.getName(), this.job);
},
runJob() {
Sist2AdminApi.runJob(this.getName()).then(() => {
this.$bvToast.toast(this.$t("runJobConfirmation"), {
title: this.$t("runJobConfirmationTitle"),
variant: "success",
toaster: "b-toaster-bottom-right"
});
});
},
deleteJob() {
Sist2AdminApi.deleteJob(this.getName()).then(() => {
this.$router.push("/");
})
}
},
mounted() {
Sist2AdminApi.getJob(this.getName()).then(resp => {
this.loading = false;
this.job = resp.data;
})
}
}
</script>

View File

@ -1,168 +0,0 @@
<template>
<b-card>
<b-card-body>
<h4 class="mb-3">{{ taskId }} {{ $t("logs") }}</h4>
<div v-if="$store.state.sist2AdminInfo">
{{ $t("logFile") }}
<code>{{ $store.state.sist2AdminInfo.logs_folder }}/sist2-{{ taskId }}.log</code>
<br/>
<br/>
</div>
<b-row>
<b-col>
<span>{{ $t("logLevel") }}</span>
<b-select :options="levels.slice(0, -1)" v-model="logLevel" @input="connect()"></b-select>
</b-col>
<b-col>
<span>{{ $t("logMode") }}</span>
<b-select :options="modeOptions" v-model="mode" @input="connect()"></b-select>
</b-col>
</b-row>
<div id="log-tail-output" class="mt-3 ml-1"></div>
</b-card-body>
</b-card>
</template>
<script>
export default {
name: "Tail",
data() {
return {
logLevel: "DEBUG",
levels: ["DEBUG", "INFO", "WARNING", "ERROR", "ADMIN", "FATAL"],
socket: null,
mode: "follow",
modeOptions: [
{
"text": this.$t('follow'),
"value": "follow"
},
{
"text": this.$t('wholeFile'),
"value": "wholeFile"
}
]
}
},
computed: {
taskId: function () {
return this.$route.params.taskId;
}
},
methods: {
connect() {
let lineCount = 0;
const outputElem = document.getElementById("log-tail-output")
outputElem.replaceChildren();
if (this.socket !== null) {
this.socket.close();
}
const n = this.mode === "follow" ? 32 : 9999999999;
this.socket = new WebSocket(`ws://${window.location.host}/log/${this.taskId}?n=${n}`);
this.socket.onopen = () => {
this.socket.send("Hello from client");
}
this.socket.onmessage = e => {
let message;
try {
message = JSON.parse(e.data);
} catch {
console.error(e.data)
return;
}
if ("ping" in message) {
return;
}
if (message.level === undefined) {
if ("stderr" in message) {
message.level = "ERROR";
message.message = message["stderr"];
} else {
message.level = "ADMIN";
message.message = message["sist2-admin"];
}
message.datetime = ""
message.filepath = ""
}
if (this.levels.indexOf(message.level) < this.levels.indexOf(this.logLevel)) {
return;
}
const logLine = `${message.datetime} [${message.level} ${message.filepath}] ${message.message}`;
const span = document.createElement("span");
span.setAttribute("class", message.level);
span.appendChild(document.createTextNode(logLine));
outputElem.appendChild(span);
lineCount += 1;
if (this.mode === "follow" && lineCount >= n) {
outputElem.firstChild.remove();
}
}
}
},
mounted() {
this.connect()
}
}
</script>
<style>
#log-tail-output span {
display: block;
}
span.DEBUG {
color: #9E9E9E;
}
span.WARNING {
color: #FFB300;
}
span.INFO {
color: #039BE5;
}
span.ERROR {
color: #F4511E;
}
span.FATAL {
color: #F4511E;
}
span.ADMIN {
color: #ee05ff;
}
#log-tail-output {
font-size: 13px;
font-family: monospace;
padding: 6px;
background-color: #f5f5f5;
border: 1px solid #ccc;
border-radius: 4px;
margin: 3px;
white-space: pre;
color: #000;
overflow: hidden;
}
</style>

View File

@ -1,150 +0,0 @@
<template>
<div>
<b-card v-if="tasks.length > 0">
<h2>{{ $t("runningTasks") }}</h2>
<b-list-group>
<TaskListItem v-for="task in tasks" :key="task.id" :task="task"></TaskListItem>
</b-list-group>
</b-card>
<b-card class="mt-4">
<b-card-title>{{ $t("taskHistory") }}</b-card-title>
<br/>
<b-table
id="task-history"
:items="historyItems"
:fields="historyFields"
:current-page="historyCurrentPage"
:tbody-tr-class="rowClass"
:per-page="10"
>
<template #cell(logs)="data">
<router-link :to="`/log/${data.item.logs}`">{{ $t("logs") }}</router-link>
</template>
</b-table>
<b-pagination limit="20" v-model="historyCurrentPage" :total-rows="historyItems.length"
:per-page="10"></b-pagination>
</b-card>
</div>
</template>
<script>
import TaskListItem from "@/components/TaskListItem";
import Sist2AdminApi from "@/Sist2AdminApi";
import moment from "moment";
const DAY = 3600 * 24;
const HOUR = 3600;
const MINUTE = 60;
function humanDuration(sec_num) {
sec_num = sec_num / 1000;
const days = Math.floor(sec_num / DAY);
sec_num -= days * DAY;
const hours = Math.floor(sec_num / HOUR);
sec_num -= hours * HOUR;
const minutes = Math.floor(sec_num / MINUTE);
sec_num -= minutes * MINUTE;
const seconds = Math.floor(sec_num);
if (days > 0) {
return `${days} days ${hours}h ${minutes}m ${seconds}s`;
}
if (hours > 0) {
return `${hours}h ${minutes}m ${seconds}s`;
}
if (minutes > 0) {
return `${minutes}m ${seconds}s`;
}
if (seconds > 0) {
return `${seconds}s`;
}
return "<0s";
}
export default {
name: 'Tasks',
components: {TaskListItem},
data() {
return {
loading: true,
tasks: [],
taskHistory: [],
timerId: null,
historyFields: [
{key: "name", label: this.$t("taskName")},
{key: "time", label: this.$t("taskStarted")},
{key: "duration", label: this.$t("taskDuration")},
{key: "status", label: this.$t("taskStatus")},
{key: "logs", label: this.$t("logs")},
],
historyCurrentPage: 1,
historyItems: []
}
},
props: {
msg: String
},
mounted() {
this.loading = true;
this.update().then(() => this.loading = false);
this.timerId = window.setInterval(this.update, 1000);
this.updateHistory();
},
destroyed() {
if (this.timerId) {
window.clearInterval(this.timerId);
}
},
methods: {
rowClass(row) {
if (row.status === "failed") {
return "table-danger";
}
return null;
},
updateHistory() {
Sist2AdminApi.getTaskHistory().then(resp => {
this.historyItems = resp.data.map(row => ({
id: row.id,
name: row.name,
duration: this.taskDuration(row),
time: moment(row.started).format("dd, MMM Do YYYY, HH:mm:ss"),
logs: row.id,
status: row.return_code === 0 ? "ok" : "failed"
}));
});
},
update() {
return Sist2AdminApi.getTasks().then(resp => {
this.tasks = resp.data;
})
},
taskDuration(task) {
const start = moment.utc(task.started);
const end = moment.utc(task.ended);
return humanDuration(end.diff(start))
}
}
}
</script>
<style scoped>
#task-history {
font-family: monospace;
font-size: 12px;
}
</style>

View File

@ -1,5 +0,0 @@
module.exports = {
publicPath: "",
filenameHashing: false,
productionSourceMap: false,
};

File diff suppressed because it is too large Load Diff

View File

@ -1,5 +0,0 @@
fastapi
git+https://github.com/simon987/hexlib.git
uvicorn
websockets
pycron

View File

@ -1,390 +0,0 @@
import asyncio
import os
import signal
from datetime import datetime
from urllib.parse import urlparse
import requests
import uvicorn
from fastapi import FastAPI, HTTPException
from hexlib.db import PersistentState
from requests import ConnectionError
from requests.exceptions import SSLError
from starlette.middleware.cors import CORSMiddleware
from starlette.responses import RedirectResponse
from starlette.staticfiles import StaticFiles
from starlette.websockets import WebSocket
from websockets.exceptions import ConnectionClosed
import cron
from config import LOG_FOLDER, logger, WEBSERVER_PORT, DATA_FOLDER, SIST2_BINARY
from jobs import Sist2Job, Sist2ScanTask, TaskQueue, Sist2IndexTask, JobStatus
from notifications import Subscribe, Notifications
from sist2 import Sist2
from state import migrate_v1_to_v2, RUNNING_FRONTENDS, TESSERACT_LANGS, DB_SCHEMA_VERSION
from web import Sist2Frontend
sist2 = Sist2(SIST2_BINARY, DATA_FOLDER)
db = PersistentState(dbfile=os.path.join(DATA_FOLDER, "state.db"))
notifications = Notifications()
task_queue = TaskQueue(sist2, db, notifications)
app = FastAPI()
app.add_middleware(
CORSMiddleware,
allow_credentials=True,
allow_origins=["*"],
allow_methods=["*"],
allow_headers=["*"],
)
app.mount("/ui/", StaticFiles(directory="./frontend/dist", html=True), name="static")
@app.get("/")
async def home():
return RedirectResponse("ui")
@app.get("/api")
async def api():
return {
"tesseract_langs": TESSERACT_LANGS,
"logs_folder": LOG_FOLDER
}
@app.get("/api/job/{name:str}")
async def get_job(name: str):
job = db["jobs"][name]
if not job:
raise HTTPException(status_code=404)
return job
@app.get("/api/frontend/{name:str}")
async def get_frontend(name: str):
frontend = db["frontends"][name]
frontend: Sist2Frontend
if frontend:
frontend.running = frontend.name in RUNNING_FRONTENDS
return frontend
raise HTTPException(status_code=404)
@app.get("/api/job/")
async def get_jobs():
return list(db["jobs"])
@app.put("/api/job/{name:str}")
async def update_job(name: str, new_job: Sist2Job):
# TODO: Check etag
new_job.last_modified = datetime.now()
job = db["jobs"][name]
if not job:
raise HTTPException(status_code=404)
args_that_trigger_full_scan = [
"path",
"thumbnail_count",
"thumbnail_quality",
"thumbnail_size",
"content_size",
"depth",
"archive",
"archive_passphrase",
"ocr_lang",
"ocr_images",
"ocr_ebooks",
"fast",
"checksums",
"read_subtitles",
]
for arg in args_that_trigger_full_scan:
if getattr(new_job.scan_options, arg) != getattr(job.scan_options, arg):
new_job.do_full_scan = True
db["jobs"][name] = new_job
@app.put("/api/frontend/{name:str}")
async def update_frontend(name: str, frontend: Sist2Frontend):
db["frontends"][name] = frontend
# TODO: Check etag
return "ok"
@app.get("/api/task/")
async def get_tasks():
return list(map(lambda t: t.json(), task_queue.tasks()))
@app.get("/api/task/history")
async def task_history():
return list(db["task_done"].sql("ORDER BY started DESC"))
@app.post("/api/task/{task_id:str}/kill")
async def kill_job(task_id: str):
return task_queue.kill_task(task_id)
def _run_job(job: Sist2Job):
job.last_modified = datetime.now()
if job.status == JobStatus("created"):
job.status = JobStatus("started")
db["jobs"][job.name] = job
scan_task = Sist2ScanTask(job, f"Scan [{job.name}]")
index_task = Sist2IndexTask(job, f"Index [{job.name}]", depends_on=scan_task)
task_queue.submit(scan_task)
task_queue.submit(index_task)
@app.get("/api/job/{name:str}/run")
async def run_job(name: str):
job = db["jobs"][name]
if not job:
raise HTTPException(status_code=404)
_run_job(job)
return "ok"
@app.delete("/api/job/{name:str}")
async def delete_job(name: str):
job = db["jobs"][name]
if job:
del db["jobs"][name]
else:
raise HTTPException(status_code=404)
@app.delete("/api/frontend/{name:str}")
async def delete_frontend(name: str):
if name in RUNNING_FRONTENDS:
os.kill(RUNNING_FRONTENDS[name], signal.SIGTERM)
del RUNNING_FRONTENDS[name]
frontend = db["frontends"][name]
if frontend:
del db["frontends"][name]
else:
raise HTTPException(status_code=404)
@app.post("/api/job/{name:str}")
async def create_job(name: str):
if db["jobs"][name]:
raise ValueError("Job with the same name already exists")
job = Sist2Job.create_default(name)
db["jobs"][name] = job
return job
@app.post("/api/frontend/{name:str}")
async def create_frontend(name: str):
if db["frontends"][name]:
raise ValueError("Frontend with the same name already exists")
frontend = Sist2Frontend.create_default(name)
db["frontends"][name] = frontend
return frontend
@app.get("/api/ping_es")
async def ping_es(url: str, insecure: bool):
return check_es_version(url, insecure)
def check_es_version(es_url: str, insecure: bool):
try:
url = urlparse(es_url)
if url.username:
auth = (url.username, url.password)
es_url = f"{url.scheme}://{url.hostname}:{url.port}"
else:
auth = None
r = requests.get(es_url, verify=insecure, auth=auth)
except SSLError:
return {
"ok": False,
"message": "Invalid SSL certificate"
}
except ConnectionError as e:
return {
"ok": False,
"message": "Connection refused"
}
except ValueError as e:
return {
"ok": False,
"message": str(e)
}
if r.status_code == 401:
return {
"ok": False,
"message": "Authentication failure"
}
try:
return {
"ok": True,
"message": "Elasticsearch version " + r.json()["version"]["number"]
}
except:
return {
"ok": False,
"message": "Could not read version"
}
def start_frontend_(frontend: Sist2Frontend):
frontend.web_options.indices = list(map(lambda j: db["jobs"][j].index_path, frontend.jobs))
pid = sist2.web(frontend.web_options, frontend.name)
RUNNING_FRONTENDS[frontend.name] = pid
@app.post("/api/frontend/{name:str}/start")
async def start_frontend(name: str):
frontend = db["frontends"][name]
if not frontend:
raise HTTPException(status_code=404)
start_frontend_(frontend)
@app.post("/api/frontend/{name:str}/stop")
async def stop_frontend(name: str):
if name in RUNNING_FRONTENDS:
os.kill(RUNNING_FRONTENDS[name], signal.SIGTERM)
del RUNNING_FRONTENDS[name]
@app.get("/api/frontend/")
async def get_frontends():
res = []
for frontend in db["frontends"]:
frontend: Sist2Frontend
frontend.running = frontend.name in RUNNING_FRONTENDS
res.append(frontend)
return res
def tail(filepath: str, n: int):
with open(filepath) as file:
reached_eof = False
buffer = []
line = ""
while True:
tmp = file.readline()
if tmp:
line += tmp
if line.endswith("\n"):
if reached_eof:
yield line
else:
if len(buffer) > n:
buffer.pop(0)
buffer.append(line)
line = ""
else:
if not reached_eof:
reached_eof = True
yield from buffer
yield None
@app.websocket("/notifications")
async def ws_tail_log(websocket: WebSocket):
await websocket.accept()
try:
await websocket.receive_text()
async with Subscribe(notifications) as ob:
async for notification in ob.notifications():
await websocket.send_json(notification)
print(notification)
except ConnectionClosed:
return
@app.websocket("/log/{task_id}")
async def ws_tail_log(websocket: WebSocket, task_id: str, n: int):
log_file = os.path.join(LOG_FOLDER, f"sist2-{task_id}.log")
await websocket.accept()
try:
await websocket.receive_text()
except ConnectionClosed:
return
while True:
for line in tail(log_file, n):
try:
if line:
await websocket.send_text(line)
else:
await websocket.send_json({"ping": ""})
await asyncio.sleep(0.1)
except ConnectionClosed:
return
def main():
uvicorn.run(app, port=WEBSERVER_PORT, host="0.0.0.0")
def initialize_db():
db["sist2_admin"]["info"] = {"version": DB_SCHEMA_VERSION}
frontend = Sist2Frontend.create_default("default")
db["frontends"]["default"] = frontend
logger.info("Initialized database.")
def start_frontends():
for frontend in db["frontends"]:
frontend: Sist2Frontend
if frontend.auto_start and len(frontend.jobs) > 0:
start_frontend_(frontend)
if __name__ == '__main__':
if not db["sist2_admin"]["info"]:
initialize_db()
if db["sist2_admin"]["info"]["version"] == "1":
logger.info("Migrating to v2 database schema")
migrate_v1_to_v2(db)
if db["sist2_admin"]["info"]["version"] == "2":
logger.error("Cannot migrate database from v2 to v3. Delete state.db to proceed.")
exit(-1)
start_frontends()
cron.initialize(db, _run_job)
logger.info("Started sist2-admin. Hello!")
main()

View File

@ -1,30 +0,0 @@
import os
import logging
import sys
from logging import StreamHandler
from logging.handlers import RotatingFileHandler
MAX_LOG_SIZE = 1 * 1024 * 1024
SIST2_BINARY = os.environ.get("SIST2_BINARY", "/root/sist2")
DATA_FOLDER = os.environ.get("DATA_FOLDER", "/sist2-admin/")
LOG_FOLDER = os.path.join(DATA_FOLDER, "logs")
WEBSERVER_PORT = 8080
os.makedirs(LOG_FOLDER, exist_ok=True)
os.makedirs(DATA_FOLDER, exist_ok=True)
logger = logging.Logger("sist2-admin")
_log_file = os.path.join(LOG_FOLDER, "sist2-admin.log")
_log_fmt = "%(asctime)s [%(levelname)s] %(message)s"
_log_formatter = logging.Formatter(_log_fmt, datefmt='%Y-%m-%d %H:%M:%S')
console_handler = StreamHandler(sys.stdout)
console_handler.setFormatter(_log_formatter)
file_handler = RotatingFileHandler(_log_file, mode="a", maxBytes=MAX_LOG_SIZE, backupCount=1)
file_handler.setFormatter(_log_formatter)
logger.addHandler(console_handler)
logger.addHandler(file_handler)

View File

@ -1,33 +0,0 @@
from threading import Thread
import pycron
import time
from hexlib.db import PersistentState
from config import logger
from jobs import Sist2Job
def _check_schedule(db: PersistentState, run_job):
for job in db["jobs"]:
job: Sist2Job
if job.schedule_enabled:
if pycron.is_now(job.cron_expression):
logger.info(f"Submit scan task to queue for [{job.name}]")
run_job(job)
def _cron_thread(db, run_job):
time.sleep(60 - (time.time() % 60))
start = time.time()
while True:
_check_schedule(db, run_job)
time.sleep(60 - ((time.time() - start) % 60))
def initialize(db, run_job):
t = Thread(target=_cron_thread, args=(db, run_job), daemon=True, name="timer")
t.start()

View File

@ -1,317 +0,0 @@
import json
import logging
import os.path
import signal
import uuid
from datetime import datetime
from enum import Enum
from logging import FileHandler
from threading import Lock, Thread
from time import sleep
from uuid import uuid4, UUID
from hexlib.db import PersistentState
from pydantic import BaseModel
from config import logger, LOG_FOLDER
from notifications import Notifications
from sist2 import ScanOptions, IndexOptions, Sist2
from state import RUNNING_FRONTENDS
from web import Sist2Frontend
class JobStatus(Enum):
CREATED = "created"
STARTED = "started"
INDEXED = "indexed"
FAILED = "failed"
class Sist2Job(BaseModel):
name: str
scan_options: ScanOptions
index_options: IndexOptions
cron_expression: str
schedule_enabled: bool = False
previous_index: str = None
index_path: str = None
previous_index_path: str = None
last_index_date: datetime = None
status: JobStatus = JobStatus("created")
last_modified: datetime
etag: str = None
do_full_scan: bool = False
def __init__(self, **kwargs):
super().__init__(**kwargs)
@staticmethod
def create_default(name: str):
return Sist2Job(
name=name,
scan_options=ScanOptions(path="/"),
index_options=IndexOptions(),
last_modified=datetime.now(),
cron_expression="0 0 * * *"
)
# @validator("etag", always=True)
# def validate_etag(cls, value, values):
# s = values["name"] + values["scan_options"].json() + values["index_options"].json() + values["cron_expression"]
# return md5(s.encode()).hexdigest()
class Sist2TaskProgress:
def __init__(self, done: int = 0, count: int = 0, index_size: int = 0, tn_size: int = 0, waiting: bool = False):
self.done = done
self.count = count
self.index_size = index_size
self.store_size = tn_size
self.waiting = waiting
def percent(self):
return (self.done / self.count) if self.count else 0
class Sist2Task:
def __init__(self, job: Sist2Job, display_name: str, depends_on: uuid.UUID = None):
self.job = job
self.display_name = display_name
self.progress = Sist2TaskProgress()
self.id = uuid4()
self.pid = None
self.started = None
self.ended = None
self.depends_on = depends_on
self._logger = logging.Logger(name=f"{self.id}")
self._logger.addHandler(FileHandler(os.path.join(LOG_FOLDER, f"sist2-{self.id}.log")))
def json(self):
return {
"id": self.id,
"job": self.job,
"display_name": self.display_name,
"progress": self.progress,
"started": self.started,
"ended": self.ended,
"depends_on": self.depends_on,
}
def log_callback(self, log_json):
if "progress" in log_json:
self.progress = Sist2TaskProgress(**log_json["progress"])
elif self._logger:
self._logger.info(json.dumps(log_json))
def run(self, sist2: Sist2, db: PersistentState):
self.started = datetime.now()
logger.info(f"Started task {self.display_name}")
class Sist2ScanTask(Sist2Task):
def run(self, sist2: Sist2, db: PersistentState):
super().run(sist2, db)
self.job.scan_options.name = self.job.name
if self.job.index_path is not None and not self.job.do_full_scan:
self.job.scan_options.output = self.job.index_path
else:
self.job.scan_options.output = None
def set_pid(pid):
self.pid = pid
return_code = sist2.scan(self.job.scan_options, logs_cb=self.log_callback, set_pid_cb=set_pid)
self.ended = datetime.now()
if return_code != 0:
self._logger.error(json.dumps({"sist2-admin": f"Process returned non-zero exit code ({return_code})"}))
logger.info(f"Task {self.display_name} failed ({return_code})")
else:
self.job.index_path = self.job.scan_options.output
self.job.last_index_date = datetime.now()
self.job.do_full_scan = False
db["jobs"][self.job.name] = self.job
self._logger.info(json.dumps({"sist2-admin": f"Save last_index_date={self.job.last_index_date}"}))
logger.info(f"Completed {self.display_name} ({return_code=})")
# Remove old index
if return_code == 0:
if self.job.previous_index_path is not None and self.job.previous_index_path != self.job.index_path:
self._logger.info(json.dumps({"sist2-admin": f"Remove {self.job.previous_index_path=}"}))
try:
os.remove(self.job.previous_index_path)
except FileNotFoundError:
pass
self.job.previous_index_path = self.job.index_path
db["jobs"][self.job.name] = self.job
return return_code
class Sist2IndexTask(Sist2Task):
def __init__(self, job: Sist2Job, display_name: str, depends_on: Sist2Task):
super().__init__(job, display_name, depends_on=depends_on.id)
def run(self, sist2: Sist2, db: PersistentState):
super().run(sist2, db)
self.job.index_options.path = self.job.scan_options.output
return_code = sist2.index(self.job.index_options, logs_cb=self.log_callback)
self.ended = datetime.now()
duration = self.ended - self.started
ok = return_code == 0
if ok:
self.restart_running_frontends(db, sist2)
# Update status
self.job.status = JobStatus("indexed") if ok else JobStatus("failed")
self.job.previous_index_path = self.job.index_path
db["jobs"][self.job.name] = self.job
self._logger.info(json.dumps({"sist2-admin": f"Sist2Scan task finished {return_code=}, {duration=}"}))
logger.info(f"Completed {self.display_name} ({return_code=})")
return return_code
def restart_running_frontends(self, db: PersistentState, sist2: Sist2):
for frontend_name, pid in RUNNING_FRONTENDS.items():
frontend = db["frontends"][frontend_name]
frontend: Sist2Frontend
try:
os.kill(pid, signal.SIGTERM)
except ProcessLookupError:
pass
try:
os.wait()
except ChildProcessError:
pass
frontend.web_options.indices = map(lambda j: db["jobs"][j].index_path, frontend.jobs)
pid = sist2.web(frontend.web_options, frontend.name)
RUNNING_FRONTENDS[frontend_name] = pid
self._logger.info(json.dumps({"sist2-admin": f"Restart frontend {pid=} {frontend_name=}"}))
class TaskQueue:
def __init__(self, sist2: Sist2, db: PersistentState, notifications: Notifications):
self._lock = Lock()
self._sist2 = sist2
self._db = db
self._notifications = notifications
self._tasks = {}
self._queue = []
self._sem = 0
self._thread = Thread(target=self._check_new_task, daemon=True)
self._thread.start()
def _tasks_failed(self):
done = set()
for row in self._db["task_done"].sql("WHERE return_code != 0"):
done.add(uuid.UUID(row["id"]))
return done
def _tasks_done(self):
done = set()
for row in self._db["task_done"]:
done.add(uuid.UUID(row["id"]))
return done
def _check_new_task(self):
while True:
with self._lock:
for task in list(self._queue):
task: Sist2Task
if self._sem >= 1:
break
if not task.depends_on or task.depends_on in self._tasks_done():
self._queue.remove(task)
if task.depends_on in self._tasks_failed():
# The task which we depend on failed, continue
continue
self._sem += 1
t = Thread(target=self._run_task, args=(task,))
self._tasks[task.id] = {
"task": task,
"thread": t,
}
t.start()
break
sleep(1)
def tasks(self):
return list(map(lambda t: t["task"], self._tasks.values()))
def kill_task(self, task_id):
task = self._tasks.get(UUID(task_id))
if task:
pid = task["task"].pid
logger.info(f"Killing task {task_id} (pid={pid})")
os.kill(pid, signal.SIGTERM)
return True
return False
def _run_task(self, task: Sist2Task):
task_result = task.run(self._sist2, self._db)
with self._lock:
del self._tasks[task.id]
self._sem -= 1
self._db["task_done"][task.id] = {
"ended": task.ended,
"started": task.started,
"name": task.display_name,
"return_code": task_result
}
if isinstance(task, Sist2IndexTask):
self._notifications.notify({
"message": "notifications.indexCompleted",
"job": task.job.name
})
def submit(self, task: Sist2Task):
logger.info(f"Submitted task to queue {task.display_name}")
with self._lock:
self._queue.append(task)

View File

@ -1,40 +0,0 @@
import asyncio
from typing import List
class Notifications:
def __init__(self):
self._subscribers: List[Subscribe] = []
def subscribe(self, ob):
self._subscribers.append(ob)
def unsubscribe(self, ob):
self._subscribers.remove(ob)
def notify(self, notification: dict):
for ob in self._subscribers:
ob.notify(notification)
class Subscribe:
def __init__(self, notifications: Notifications):
self._queue = []
self._notifications = notifications
async def __aenter__(self):
self._notifications.subscribe(self)
return self
async def __aexit__(self, exc_type, exc_val, exc_tb):
self._notifications.unsubscribe(self)
def notify(self, notification: dict):
self._queue.append(notification)
async def notifications(self):
while True:
try:
yield self._queue.pop(0)
except IndexError:
await asyncio.sleep(0.1)

View File

@ -1,323 +0,0 @@
import datetime
import json
import logging
import os.path
from datetime import datetime
from io import TextIOWrapper
from logging import FileHandler
from subprocess import Popen, PIPE
from tempfile import NamedTemporaryFile
from threading import Thread
from typing import List
from pydantic import BaseModel
from config import logger, LOG_FOLDER
class Sist2Version:
def __init__(self, version: str):
self._version = version
self.major, self.minor, self.patch = [int(x) for x in version.split(".")]
def __str__(self):
return f"{self.major}.{self.minor}.{self.patch}"
class WebOptions(BaseModel):
indices: List[str] = []
es_url: str = "http://elasticsearch:9200"
es_insecure_ssl: bool = False
es_index: str = "sist2"
bind: str = "0.0.0.0:4090"
auth: str = None
tag_auth: str = None
tagline: str = "Lightning-fast file system indexer and search tool"
dev: bool = False
lang: str = "en"
auth0_audience: str = None
auth0_domain: str = None
auth0_client_id: str = None
auth0_public_key: str = None
auth0_public_key_file: str = None
def __init__(self, **kwargs):
super().__init__(**kwargs)
def args(self):
args = ["web", f"--es-url={self.es_url}", f"--es-index={self.es_index}", f"--bind={self.bind}",
f"--tagline={self.tagline}", f"--lang={self.lang}"]
if self.auth0_audience:
args.append(f"--auth0-audience={self.auth0_audience}")
if self.auth0_domain:
args.append(f"--auth0-domain={self.auth0_domain}")
if self.auth0_client_id:
args.append(f"--auth0-client-id={self.auth0_client_id}")
if self.auth0_public_key_file:
args.append(f"--auth0-public-key-file={self.auth0_public_key_file}")
if self.es_insecure_ssl:
args.append(f"--es-insecure-ssl")
if self.auth:
args.append(f"--auth={self.auth}")
if self.tag_auth:
args.append(f"--tag-auth={self.tag_auth}")
if self.dev:
args.append(f"--dev")
args.extend(self.indices)
return args
class IndexOptions(BaseModel):
path: str = None
threads: int = 1
es_url: str = "http://elasticsearch:9200"
es_insecure_ssl: bool = False
es_index: str = "sist2"
incremental_index: bool = True
script: str = ""
script_file: str = None
batch_size: int = 70
def __init__(self, **kwargs):
super().__init__(**kwargs)
def args(self):
args = ["index", self.path, f"--threads={self.threads}", f"--es-url={self.es_url}",
f"--es-index={self.es_index}", f"--batch-size={self.batch_size}"]
if self.script_file:
args.append(f"--script-file={self.script_file}")
if self.es_insecure_ssl:
args.append(f"--es-insecure-ssl")
if self.incremental_index:
args.append(f"--incremental-index")
return args
ARCHIVE_SKIP = "skip"
ARCHIVE_LIST = "list"
ARCHIVE_SHALLOW = "shallow"
ARCHIVE_RECURSE = "recurse"
class ScanOptions(BaseModel):
path: str
threads: int = 1
thumbnail_quality: int = 50
thumbnail_size: int = 552
thumbnail_count: int = 1
content_size: int = 32768
depth: int = -1
archive: str = ARCHIVE_RECURSE
archive_passphrase: str = None
ocr_lang: str = None
ocr_images: bool = False
ocr_ebooks: bool = False
exclude: str = None
fast: bool = False
treemap_threshold: float = 0.0005
mem_buffer: int = 2000
read_subtitles: bool = False
fast_epub: bool = False
checksums: bool = False
incremental: bool = True
optimize_index: bool = False
output: str = None
name: str = None
rewrite_url: str = None
list_file: str = None
def __init__(self, **kwargs):
super().__init__(**kwargs)
def args(self):
args = ["scan", self.path, f"--threads={self.threads}", f"--thumbnail-quality={self.thumbnail_quality}",
f"--thumbnail-count={self.thumbnail_count}", f"--thumbnail-size={self.thumbnail_size}",
f"--content-size={self.content_size}", f"--output={self.output}", f"--depth={self.depth}",
f"--archive={self.archive}", f"--mem-buffer={self.mem_buffer}"]
if self.incremental:
args.append(f"--incremental")
if self.optimize_index:
args.append(f"--optimize-index")
if self.rewrite_url:
args.append(f"--rewrite-url={self.rewrite_url}")
if self.name:
args.append(f"--name={self.name}")
if self.archive_passphrase:
args.append(f"--archive-passphrase={self.archive_passphrase}")
if self.ocr_lang:
args.append(f"--ocr-lang={self.ocr_lang}")
if self.ocr_ebooks:
args.append(f"--ocr-ebooks")
if self.ocr_images:
args.append(f"--ocr-images")
if self.exclude:
args.append(f"--exclude={self.exclude}")
if self.fast:
args.append(f"--fast")
if self.treemap_threshold:
args.append(f"--treemap-threshold={self.treemap_threshold}")
if self.read_subtitles:
args.append(f"--read-subtitles")
if self.fast_epub:
args.append(f"--fast-epub")
if self.checksums:
args.append(f"--checksums")
if self.list_file:
args.append(f"--list_file={self.list_file}")
return args
class Sist2Index:
def __init__(self, path):
self.path = path
with open(os.path.join(path, "descriptor.json")) as f:
self._descriptor = json.load(f)
def to_json(self):
return {
"path": self.path,
"version": self.version(),
"timestamp": self.timestamp(),
"name": self.name()
}
def version(self) -> Sist2Version:
return Sist2Version(self._descriptor["version"])
def timestamp(self) -> datetime:
return datetime.fromtimestamp(self._descriptor["timestamp"])
def name(self) -> str:
return self._descriptor["name"]
class Sist2:
def __init__(self, bin_path: str, data_directory: str):
self._bin_path = bin_path
self._data_dir = data_directory
def index(self, options: IndexOptions, logs_cb):
if options.script:
with NamedTemporaryFile("w", prefix="sist2-admin", suffix=".painless", delete=False) as f:
f.write(options.script)
options.script_file = f.name
else:
options.script_file = None
args = [
self._bin_path,
*options.args(),
"--json-logs",
"--very-verbose"
]
proc = Popen(args, stdout=PIPE, stderr=PIPE)
t_stderr = Thread(target=self._consume_logs_stderr, args=(logs_cb, proc))
t_stderr.start()
self._consume_logs_stdout(logs_cb, proc)
t_stderr.join()
return proc.returncode
def scan(self, options: ScanOptions, logs_cb, set_pid_cb):
if options.output is None:
options.output = os.path.join(
self._data_dir,
f"scan-{options.name.replace('/', '_')}-{datetime.now()}.sist2"
)
args = [
self._bin_path,
*options.args(),
"--json-logs",
"--very-verbose"
]
logs_cb({"sist2-admin": f"Starting sist2 command with args {args}"})
proc = Popen(args, stdout=PIPE, stderr=PIPE)
set_pid_cb(proc.pid)
t_stderr = Thread(target=self._consume_logs_stderr, args=(logs_cb, proc))
t_stderr.start()
self._consume_logs_stdout(logs_cb, proc)
t_stderr.join()
return proc.returncode
@staticmethod
def _consume_logs_stderr(logs_cb, proc):
pipe_wrapper = TextIOWrapper(proc.stderr, encoding="utf8", errors="ignore")
try:
for line in pipe_wrapper:
if line.strip() == "":
continue
logs_cb({"stderr": line})
finally:
proc.wait()
pipe_wrapper.close()
@staticmethod
def _consume_logs_stdout(logs_cb, proc):
pipe_wrapper = TextIOWrapper(proc.stdout, encoding="utf8", errors="ignore")
for line in pipe_wrapper:
try:
if line.strip() == "":
continue
log_object = json.loads(line)
logs_cb(log_object)
except Exception as e:
try:
logs_cb({"sist2-admin": f"Could not decode log line: {line}; {e}"})
except NameError:
pass
def web(self, options: WebOptions, name: str):
if options.auth0_public_key:
with NamedTemporaryFile("w", prefix="sist2-admin", suffix=".txt", delete=False) as f:
f.write(options.auth0_public_key)
options.auth0_public_key_file = f.name
else:
options.auth0_public_key_file = None
args = [
self._bin_path,
*options.args()
]
web_logger = logging.Logger(name=f"sist2-frontend-{name}")
web_logger.addHandler(FileHandler(os.path.join(LOG_FOLDER, f"frontend-{name}.log")))
def logs_cb(message):
web_logger.info(json.dumps(message))
logger.info(f"Starting frontend {' '.join(args)}")
proc = Popen(args, stdout=PIPE, stderr=PIPE)
t_stderr = Thread(target=self._consume_logs_stderr, args=(logs_cb, proc))
t_stderr.start()
t_stdout = Thread(target=self._consume_logs_stdout, args=(logs_cb, proc))
t_stdout.start()
return proc.pid

View File

@ -1,79 +0,0 @@
from typing import Dict
import shutil
from hexlib.db import Table, PersistentState
import pickle
from tesseract import get_tesseract_langs
RUNNING_FRONTENDS: Dict[str, int] = {}
TESSERACT_LANGS = get_tesseract_langs()
DB_SCHEMA_VERSION = "3"
from pydantic import BaseModel
def _serialize(item):
if isinstance(item, BaseModel):
return pickle.dumps(item)
if isinstance(item, bytes):
raise Exception("FIXME: bytes in PickleTable")
return item
def _deserialize(item):
if isinstance(item, bytes):
return pickle.loads(item)
return item
class PickleTable(Table):
def __getitem__(self, item):
row = super().__getitem__(item)
if row:
return dict((k, _deserialize(v)) for k, v in row.items())
return row
def __setitem__(self, key, value):
value = dict((k, _serialize(v)) for k, v in value.items())
super().__setitem__(key, value)
def __iter__(self):
for row in super().__iter__():
yield dict((k, _deserialize(v)) for k, v in row.items())
def sql(self, where_clause, *params):
for row in super().sql(where_clause, *params):
yield dict((k, _deserialize(v)) for k, v in row.items())
def migrate_v1_to_v2(db: PersistentState):
shutil.copy(db.dbfile, db.dbfile + "-before-migrate-v2.bak")
# Frontends
db._table_factory = PickleTable
frontends = [row["frontend"] for row in db["frontends"]]
del db["frontends"]
db._table_factory = Table
for frontend in frontends:
db["frontends"][frontend.name] = frontend
list(db["frontends"])
# Jobs
db._table_factory = PickleTable
jobs = [row["job"] for row in db["jobs"]]
del db["jobs"]
db._table_factory = Table
for job in jobs:
db["jobs"][job.name] = job
list(db["jobs"])
db["sist2_admin"]["info"] = {
"version": "2"
}

View File

@ -1,14 +0,0 @@
import subprocess
def get_tesseract_langs():
res = subprocess.check_output([
"tesseract",
"--list-langs"
]).decode()
languages = res.split("\n")[1:]
return list(filter(lambda lang: lang and lang != "osd", languages))

View File

@ -1,28 +0,0 @@
import os.path
from typing import List
from pydantic import BaseModel
from sist2 import WebOptions
class Sist2Frontend(BaseModel):
name: str
jobs: List[str]
web_options: WebOptions
running: bool = False
auto_start: bool = False
extra_query_args: str = ""
custom_url: str = None
def get_log_path(self, log_folder: str):
return os.path.join(log_folder, f"frontend-{self.name}.log")
@staticmethod
def create_default(name: str):
return Sist2Frontend(
name=name,
web_options=WebOptions(),
jobs=[]
)

9
sist2-vue/dist/css/chunk-vendors.css vendored Normal file

File diff suppressed because one or more lines are too long

1
sist2-vue/dist/css/index.css vendored Normal file

File diff suppressed because one or more lines are too long

3
sist2-vue/dist/index.html vendored Normal file
View File

@ -0,0 +1,3 @@
<!DOCTYPE html><html lang="en"><head><meta charset="utf-8"><meta http-equiv="X-UA-Compatible" content="IE=edge"><meta name="viewport" content="width=device-width,initial-scale=1,maximum-scale=1,user-scalable=no"><title>sist2</title><link href="css/chunk-vendors.css" rel="preload" as="style"><link href="css/index.css" rel="preload" as="style"><link href="js/chunk-vendors.js" rel="preload" as="script"><link href="js/index.js" rel="preload" as="script"><link href="css/chunk-vendors.css" rel="stylesheet"><link href="css/index.css" rel="stylesheet"></head><body><noscript><style>body {
height: initial;
}</style><div style="text-align: center; margin-top: 100px"><strong>We're sorry but sist2 doesn't work properly without JavaScript enabled. Please enable it to continue.</strong><br><strong>Nous sommes désolés mais sist2 ne fonctionne pas correctement si JavaScript est activé. Veuillez l'activer pour continuer.</strong></div></noscript><div id="app"></div><script src="js/chunk-vendors.js"></script><script src="js/index.js"></script></body></html>

140
sist2-vue/dist/js/chunk-vendors.js vendored Normal file

File diff suppressed because one or more lines are too long

1
sist2-vue/dist/js/index.js vendored Normal file

File diff suppressed because one or more lines are too long

Binary file not shown.

24369
sist2-vue/package-lock.json generated

File diff suppressed because it is too large Load Diff

View File

@ -7,16 +7,14 @@
"build": "vue-cli-service build --mode production" "build": "vue-cli-service build --mode production"
}, },
"dependencies": { "dependencies": {
"@auth0/auth0-spa-js": "^2.0.2",
"@egjs/vue-infinitegrid": "3.3.0", "@egjs/vue-infinitegrid": "3.3.0",
"@tensorflow/tfjs": "^4.4.0",
"axios": "^0.25.0", "axios": "^0.25.0",
"bootstrap-vue": "^2.21.2", "bootstrap-vue": "^2.21.2",
"core-js": "^3.6.5", "core-js": "^3.6.5",
"d3": "^7.8.4", "d3": "^5.16.0",
"date-fns": "^2.21.3", "date-fns": "^2.21.3",
"dom-to-image": "^2.6.0", "dom-to-image": "^2.6.0",
"fslightbox-vue": "fslightbox-vue.tgz", "fslightbox-vue": "file:../../../mnt/Hatchery/main/projects/sist2/fslightbox-vue-pro-1.3.1.tgz",
"nouislider": "^15.2.0", "nouislider": "^15.2.0",
"underscore": "^1.13.1", "underscore": "^1.13.1",
"vue": "^2.6.12", "vue": "^2.6.12",
@ -28,12 +26,12 @@
"vuex": "^3.4.0" "vuex": "^3.4.0"
}, },
"devDependencies": { "devDependencies": {
"@babel/polyfill": "^7.12.1", "@babel/polyfill": "^7.11.5",
"@vue/cli-plugin-babel": "~5.0.8", "@vue/cli-plugin-babel": "~4.5.0",
"@vue/cli-plugin-router": "~5.0.8", "@vue/cli-plugin-router": "~4.5.0",
"@vue/cli-plugin-typescript": "^5.0.8", "@vue/cli-plugin-typescript": "~4.5.0",
"@vue/cli-plugin-vuex": "~5.0.8", "@vue/cli-plugin-vuex": "~4.5.0",
"@vue/cli-service": "^5.0.8", "@vue/cli-service": "~4.5.0",
"@vue/test-utils": "^1.0.3", "@vue/test-utils": "^1.0.3",
"bootstrap": "^4.5.2", "bootstrap": "^4.5.2",
"inspire-tree": "^4.3.1", "inspire-tree": "^4.3.1",

View File

@ -19,6 +19,12 @@
We're sorry but <%= htmlWebpackPlugin.options.title %> doesn't work properly without JavaScript enabled. We're sorry but <%= htmlWebpackPlugin.options.title %> doesn't work properly without JavaScript enabled.
Please enable it to continue. Please enable it to continue.
</strong> </strong>
<br/>
<strong>
Nous sommes désolés mais <%= htmlWebpackPlugin.options.title %> ne fonctionne pas correctement
si JavaScript est activé.
Veuillez l'activer pour continuer.
</strong>
</div> </div>
</noscript> </noscript>
<div id="app"></div> <div id="app"></div>

View File

@ -1,36 +1,19 @@
<template> <template>
<div id="app" :class="getClass()" v-if="!authLoading"> <div id="app" :class="getClass()">
<NavBar></NavBar> <NavBar></NavBar>
<router-view v-if="!configLoading"/> <router-view v-if="!configLoading"/>
</div> </div>
<div class="loading-page" v-else>
<div class="loading-spinners">
<b-spinner type="grow" variant="primary"></b-spinner>
<b-spinner type="grow" variant="primary"></b-spinner>
<b-spinner type="grow" variant="primary"></b-spinner>
</div>
<div class="loading-text">
Loading Chargement 装载 Wird geladen Ładowanie
</div>
</div>
</template> </template>
<script> <script>
import NavBar from "@/components/NavBar"; import NavBar from "@/components/NavBar";
import {mapActions, mapGetters, mapMutations} from "vuex"; import {mapGetters} from "vuex";
import Sist2Api from "@/Sist2Api";
import ModelsRepo from "@/ml/modelsRepo";
import {setupAuth0} from "@/main";
import Sist2ElasticsearchQuery from "@/Sist2ElasticsearchQuery";
import Sist2SqliteQuery from "@/Sist2SqliteQuery";
export default { export default {
components: {NavBar}, components: {NavBar},
data() { data() {
return { return {
configLoading: false, configLoading: false
authLoading: true,
sist2InfoLoading: true
} }
}, },
computed: { computed: {
@ -39,17 +22,6 @@ export default {
mounted() { mounted() {
this.$store.dispatch("loadConfiguration").then(() => { this.$store.dispatch("loadConfiguration").then(() => {
this.$root.$i18n.locale = this.$store.state.optLang; this.$root.$i18n.locale = this.$store.state.optLang;
ModelsRepo.init(this.$store.getters.mlRepositoryList).catch(err => {
this.$bvToast.toast(
this.$t("ml.repoFetchError"),
{
title: this.$t("ml.repoFetchErrorTitle"),
noAutoHide: true,
toaster: "b-toaster-bottom-right",
headerClass: "toast-header-warning",
bodyClass: "toast-body-warning",
});
});
}); });
this.$store.subscribe((mutation) => { this.$store.subscribe((mutation) => {
@ -58,50 +30,9 @@ export default {
this.configLoading = true; this.configLoading = true;
window.setTimeout(() => this.configLoading = false, 10); window.setTimeout(() => this.configLoading = false, 10);
} }
if (mutation.type === "setAuth0Token") {
this.authLoading = false;
}
});
Sist2Api.getSist2Info().then(data => {
if (data.auth0Enabled) {
this.authLoading = true;
setupAuth0(data.auth0Domain, data.auth0ClientId, data.auth0Audience)
this.$auth.$watch("loading", loading => {
if (loading === false) {
if (!this.$auth.isAuthenticated) {
this.$auth.loginWithRedirect();
return;
}
// Remove "code" param
window.history.replaceState({}, "", "/" + window.location.hash);
this.$store.dispatch("loadAuth0Token");
}
});
} else {
this.authLoading = false;
}
this.setSist2Info(data);
this.setIndices(data.indices)
if (Sist2Api.backend() === "sqlite") {
Sist2Api.init(Sist2SqliteQuery.searchQuery);
this.$store.commit("setUiSqliteMode", true);
} else {
Sist2Api.init(Sist2ElasticsearchQuery.searchQuery);
}
}); });
}, },
methods: { methods: {
...mapActions(["setSist2Info",]),
...mapMutations(["setIndices",]),
getClass() { getClass() {
return { return {
"theme-light": this.optTheme === "light", "theme-light": this.optTheme === "light",
@ -383,22 +314,4 @@ mark {
.pointer { .pointer {
cursor: pointer; cursor: pointer;
} }
.loading-page {
display: flex;
justify-content: center;
align-items: center;
flex-direction: column;
height: 100%;
gap: 15px
}
.loading-spinners {
display: flex;
gap: 10px;
}
.loading-text {
text-align: center;
}
</style> </style>

View File

@ -1,7 +1,5 @@
import axios from "axios"; import axios from "axios";
import {ext, strUnescape, lum} from "./util"; import {ext, strUnescape, lum} from "./util";
import Sist2Query from "@/Sist2ElasticsearchQuery";
import store from "@/store";
export interface EsTag { export interface EsTag {
id: string id: string
@ -63,7 +61,6 @@ export interface EsHit {
isAudio: boolean isAudio: boolean
hasThumbnail: boolean hasThumbnail: boolean
hasVidPreview: boolean hasVidPreview: boolean
imageAspectRatio: number
/** Number of thumbnails available */ /** Number of thumbnails available */
tnNum: number tnNum: number
} }
@ -101,22 +98,12 @@ export interface EsResult {
class Sist2Api { class Sist2Api {
private readonly baseUrl: string private baseUrl: string
private sist2Info: any
private queryfunc: () => EsResult;
constructor(baseUrl: string) { constructor(baseUrl: string) {
this.baseUrl = baseUrl; this.baseUrl = baseUrl;
} }
init(queryFunc: () => EsResult) {
this.queryfunc = queryFunc;
}
backend() {
return this.sist2Info.searchBackend;
}
getSist2Info(): Promise<any> { getSist2Info(): Promise<any> {
return axios.get(`${this.baseUrl}i`).then(resp => { return axios.get(`${this.baseUrl}i`).then(resp => {
const indices = resp.data.indices as Index[]; const indices = resp.data.indices as Index[];
@ -131,8 +118,6 @@ class Sist2Api {
} as Index; } as Index;
}); });
this.sist2Info = resp.data;
return resp.data; return resp.data;
}) })
} }
@ -170,9 +155,6 @@ class Sist2Api {
&& hit._source.videoc !== "raw" && hit._source.videoc !== "ppm") { && hit._source.videoc !== "raw" && hit._source.videoc !== "ppm") {
hit._props.isPlayableImage = true; hit._props.isPlayableImage = true;
} }
if ("width" in hit._source && "height" in hit._source) {
hit._props.imageAspectRatio = hit._source.width / hit._source.height;
}
break; break;
case "video": case "video":
if ("videoc" in hit._source) { if ("videoc" in hit._source) {
@ -205,6 +187,30 @@ class Sist2Api {
setHitTags(hit: EsHit): void { setHitTags(hit: EsHit): void {
const tags = [] as Tag[]; const tags = [] as Tag[];
const mimeCategory = hit._source.mime == null ? null : hit._source.mime.split("/")[0];
switch (mimeCategory) {
case "image":
case "video":
if ("videoc" in hit._source && hit._source.videoc) {
tags.push({
style: "video",
text: hit._source.videoc.replace(" ", ""),
userTag: false
} as Tag);
}
break
case "audio":
if ("audioc" in hit._source && hit._source.audioc) {
tags.push({
style: "audio",
text: hit._source.audioc,
userTag: false
} as Tag);
}
break;
}
// User tags // User tags
if ("tag" in hit._source) { if ("tag" in hit._source) {
hit._source.tag.forEach(tag => { hit._source.tag.forEach(tag => {
@ -233,14 +239,6 @@ class Sist2Api {
} as Tag; } as Tag;
} }
search(): Promise<EsResult> {
if (this.backend() == "sqlite") {
return this.ftsQuery(this.queryfunc())
} else {
return this.esQuery(this.queryfunc());
}
}
esQuery(query: any): Promise<EsResult> { esQuery(query: any): Promise<EsResult> {
return axios.post(`${this.baseUrl}es`, query).then(resp => { return axios.post(`${this.baseUrl}es`, query).then(resp => {
const res = resp.data as EsResult; const res = resp.data as EsResult;
@ -259,30 +257,7 @@ class Sist2Api {
}); });
} }
ftsQuery(query: any): Promise<EsResult> { getMimeTypes(query = undefined) {
return axios.post(`${this.baseUrl}fts/search`, query).then(resp => {
const res = resp.data as any;
if (res.hits.hits) {
res.hits.hits.forEach(hit => {
hit["_source"]["name"] = strUnescape(hit["_source"]["name"]);
hit["_source"]["path"] = strUnescape(hit["_source"]["path"]);
this.setHitProps(hit);
this.setHitTags(hit);
if ("highlight" in hit) {
hit["highlight"]["name"] = [hit["highlight"]["name"]];
hit["highlight"]["content"] = [hit["highlight"]["content"]];
}
});
}
return res;
});
}
private getMimeTypesEs(query) {
const AGGS = { const AGGS = {
mimeTypes: { mimeTypes: {
terms: { terms: {
@ -303,42 +278,19 @@ class Sist2Api {
} }
return this.esQuery(query).then(resp => { return this.esQuery(query).then(resp => {
return resp["aggregations"]["mimeTypes"]["buckets"].map(bucket => ({
mime: bucket.key,
count: bucket.doc_count
}));
});
}
private getMimeTypesSqlite(): Promise<[{ mime: string, count: number }]> {
return axios.get(`${this.baseUrl}fts/mimetypes`)
.then(resp => {
return resp.data;
});
}
async getMimeTypes(query = undefined) {
let buckets;
if (this.backend() == "sqlite") {
buckets = await this.getMimeTypesSqlite();
} else {
buckets = await this.getMimeTypesEs(query);
}
const mimeMap: any[] = []; const mimeMap: any[] = [];
const buckets = resp["aggregations"]["mimeTypes"]["buckets"];
buckets.sort((a: any, b: any) => a.mime > b.mime).forEach((bucket: any) => { buckets.sort((a: any, b: any) => a.key > b.key).forEach((bucket: any) => {
const tmp = bucket.mime.split("/"); const tmp = bucket["key"].split("/");
const category = tmp[0]; const category = tmp[0];
const mime = tmp[1]; const mime = tmp[1];
let category_exists = false; let category_exists = false;
const child = { const child = {
"id": bucket.mime, "id": bucket["key"],
"text": `${mime} (${bucket.count})` "text": `${mime} (${bucket["doc_count"]})`
}; };
mimeMap.forEach(node => { mimeMap.forEach(node => {
@ -361,12 +313,13 @@ class Sist2Api {
mimeMap.sort((a, b) => a.id.localeCompare(b.id)) mimeMap.sort((a, b) => a.id.localeCompare(b.id))
return {buckets, mimeMap}; return {buckets, mimeMap};
});
} }
_createEsTag(tag: string, count: number): EsTag { _createEsTag(tag: string, count: number): EsTag {
const tokens = tag.split("."); const tokens = tag.split(".");
if (/.*\.#[0-9a-fA-F]{6}/.test(tag)) { if (/.*\.#[0-9a-f]{6}/.test(tag)) {
return { return {
id: tokens.slice(0, -1).join("."), id: tokens.slice(0, -1).join("."),
color: tokens.pop(), color: tokens.pop(),
@ -383,42 +336,25 @@ class Sist2Api {
}; };
} }
private getTagsEs() { getTags() {
return this.esQuery({ return this.esQuery({
aggs: { aggs: {
tags: { tags: {
terms: { terms: {
field: "tag", field: "tag",
size: 65535 size: 10000
} }
} }
}, },
size: 0, size: 0,
}).then(resp => { }).then(resp => {
return resp["aggregations"]["tags"]["buckets"]
.sort((a: any, b: any) => a["key"].localeCompare(b["key"]))
.map((bucket: any) => this._createEsTag(bucket["key"], bucket["doc_count"]));
});
}
private getTagsSqlite() {
return axios.get(`${this.baseUrl}/fts/tags`)
.then(resp => {
return resp.data.map(tag => this._createEsTag(tag.tag, tag.count))
});
}
async getTags(): Promise<EsTag[]> {
let tags;
if (this.backend() == "sqlite") {
tags = await this.getTagsSqlite();
} else {
tags = await this.getTagsEs();
}
// Remove duplicates (same tag with different color)
const seen = new Set(); const seen = new Set();
const tags = resp["aggregations"]["tags"]["buckets"]
.sort((a: any, b: any) => a["key"].localeCompare(b["key"]))
.map((bucket: any) => this._createEsTag(bucket["key"], bucket["doc_count"]));
// Remove duplicates (same tag with different color)
return tags.filter((t: EsTag) => { return tags.filter((t: EsTag) => {
if (seen.has(t.id)) { if (seen.has(t.id)) {
return false; return false;
@ -426,6 +362,7 @@ class Sist2Api {
seen.add(t.id); seen.add(t.id);
return true; return true;
}); });
});
} }
saveTag(tag: string, hit: EsHit) { saveTag(tag: string, hit: EsHit) {
@ -444,263 +381,20 @@ class Sist2Api {
}); });
} }
searchPaths(indexId, minDepth, maxDepth, prefix = null) { getTreemapCsvUrl(indexId: string) {
if (this.backend() == "sqlite") { return `${this.baseUrl}s/${indexId}/1`;
return this.searchPathsSqlite(indexId, minDepth, minDepth, prefix);
} else {
return this.searchPathsEs(indexId, minDepth, maxDepth, prefix);
}
} }
private searchPathsSqlite(indexId, minDepth, maxDepth, prefix) { getMimeCsvUrl(indexId: string) {
return axios.post(`${this.baseUrl}fts/paths`, { return `${this.baseUrl}s/${indexId}/2`;
indexId, minDepth, maxDepth, prefix
}).then(resp => {
return resp.data;
});
} }
private searchPathsEs(indexId, minDepth, maxDepth, prefix): Promise<[{ path: string, count: number }]> { getSizeCsv(indexId: string) {
return `${this.baseUrl}s/${indexId}/3`;
const query = {
query: {
bool: {
filter: [
{term: {index: indexId}},
{range: {_depth: {gte: minDepth, lte: maxDepth}}},
]
}
},
aggs: {
paths: {
terms: {
field: "path",
size: 10000
}
}
},
size: 0
};
if (prefix != null) {
query["query"]["bool"]["must"] = {
prefix: {
path: prefix,
}
};
} }
return this.esQuery(query).then(resp => { getDateCsv(indexId: string) {
const buckets = resp["aggregations"]["paths"]["buckets"]; return `${this.baseUrl}s/${indexId}/4`;
if (!buckets) {
return [];
}
return buckets
.map(bucket => ({
path: bucket.key,
count: bucket.doc_count
}));
});
}
private getDateRangeSqlite() {
return axios.get(`${this.baseUrl}fts/dateRange`)
.then(resp => ({
min: resp.data.dateMin,
max: resp.data.dateMax,
}));
}
getDateRange(): Promise<{ min: number, max: number }> {
if (this.backend() == "sqlite") {
return this.getDateRangeSqlite();
} else {
return this.getDateRangeEs();
}
}
private getDateRangeEs() {
return this.esQuery({
// TODO: filter current selected indices
aggs: {
dateMin: {min: {field: "mtime"}},
dateMax: {max: {field: "mtime"}},
},
size: 0
}).then(res => {
const range = {
min: res.aggregations.dateMin.value,
max: res.aggregations.dateMax.value,
}
if (range.min == null) {
range.min = 0;
range.max = 1;
} else if (range.min == range.max) {
range.max += 1;
}
return range;
});
}
private getPathSuggestionsSqlite(text: string) {
return axios.post(`${this.baseUrl}fts/paths`, {
prefix: text,
minDepth: 1,
maxDepth: 10000
}).then(resp => {
return resp.data.map(bucket => bucket.path);
})
}
private getPathSuggestionsEs(text) {
return this.esQuery({
suggest: {
path: {
prefix: text,
completion: {
field: "suggest-path",
skip_duplicates: true,
size: 10000
}
}
}
}).then(resp => {
return resp["suggest"]["path"][0]["options"]
.map(opt => opt["_source"]["path"]);
});
}
getPathSuggestions(text: string): Promise<string[]> {
if (this.backend() == "sqlite") {
return this.getPathSuggestionsSqlite(text);
} else {
return this.getPathSuggestionsEs(text)
}
}
getTreemapStat(indexId: string) {
return `${this.baseUrl}s/${indexId}/TMAP`;
}
getMimeStat(indexId: string) {
return `${this.baseUrl}s/${indexId}/MAGG`;
}
getSizeStat(indexId: string) {
return `${this.baseUrl}s/${indexId}/SAGG`;
}
getDateStat(indexId: string) {
return `${this.baseUrl}s/${indexId}/DAGG`;
}
private getDocumentEs(docId: string, highlight: boolean, fuzzy: boolean) {
const query = Sist2Query.searchQuery();
if (highlight) {
const fields = fuzzy
? {"content.nGram": {}}
: {content: {}};
query.highlight = {
pre_tags: ["<mark>"],
post_tags: ["</mark>"],
number_of_fragments: 0,
fields,
};
if (!store.state.sist2Info.esVersionLegacy) {
query.highlight.max_analyzed_offset = 999_999;
}
}
if ("function_score" in query.query) {
query.query = query.query.function_score.query;
}
if (!("must" in query.query.bool)) {
query.query.bool.must = [];
} else if (!Array.isArray(query.query.bool.must)) {
query.query.bool.must = [query.query.bool.must];
}
query.query.bool.must.push({match: {_id: docId}});
delete query["sort"];
delete query["aggs"];
delete query["search_after"];
delete query.query["function_score"];
query._source = {
includes: ["content", "name", "path", "extension"]
}
query.size = 1;
return this.esQuery(query).then(resp => {
if (resp.hits.hits.length === 1) {
return resp.hits.hits[0];
}
return null;
});
}
private getDocumentSqlite(docId: string): Promise<EsHit> {
return axios.get(`${this.baseUrl}/fts/d/${docId}`)
.then(resp => ({
_source: resp.data
} as EsHit));
}
getDocument(docId: string, highlight: boolean, fuzzy: boolean): Promise<EsHit | null> {
if (this.backend() == "sqlite") {
return this.getDocumentSqlite(docId);
} else {
return this.getDocumentEs(docId, highlight, fuzzy);
}
}
getTagSuggestions(prefix: string): Promise<string[]> {
if (this.backend() == "sqlite") {
return this.getTagSuggestionsSqlite(prefix);
} else {
return this.getTagSuggestionsEs(prefix);
}
}
private getTagSuggestionsSqlite(prefix): Promise<string[]> {
return axios.post(`${this.baseUrl}/fts/suggestTags`, prefix)
.then(resp => (resp.data));
}
private getTagSuggestionsEs(prefix): Promise<string[]> {
return this.esQuery({
suggest: {
tag: {
prefix: prefix,
completion: {
field: "suggest-tag",
skip_duplicates: true,
size: 10000
}
}
}
}).then(resp => {
const result = [];
resp["suggest"]["tag"][0]["options"].map(opt => opt["_source"]["tag"]).forEach(tags => {
tags.forEach(tag => {
const t = tag.slice(0, -8);
if (!result.find(x => x.slice(0, -8) === t)) {
result.push(tag);
}
});
});
return result;
});
} }
} }

View File

@ -60,7 +60,14 @@ const SORT_MODES = {
} }
} as any; } as any;
class Sist2ElasticsearchQuery { interface SortMode {
text: string
mode: any[]
key: (hit: EsHit) => any
}
class Sist2Query {
searchQuery(blankSearch: boolean = false): any { searchQuery(blankSearch: boolean = false): any {
@ -170,15 +177,13 @@ class Sist2ElasticsearchQuery {
} }
}, },
sort: SORT_MODES[getters.sortMode].mode, sort: SORT_MODES[getters.sortMode].mode,
size: size, aggs:
} as any; {
if (!after) {
q.aggs = {
total_size: {"sum": {"field": "size"}}, total_size: {"sum": {"field": "size"}},
total_count: {"value_count": {"field": "size"}} total_count: {"value_count": {"field": "size"}}
}; },
} size: size,
} as any;
if (!empty && !blankSearch) { if (!empty && !blankSearch) {
q.query.bool.must = query; q.query.bool.must = query;
@ -244,5 +249,4 @@ class Sist2ElasticsearchQuery {
} }
} }
export default new Sist2Query();
export default new Sist2ElasticsearchQuery();

View File

@ -1,111 +0,0 @@
import store from "./store";
import {EsHit, Index} from "@/Sist2Api";
const SORT_MODES = {
score: {
"sort": "score",
},
random: {
"sort": "random"
},
dateAsc: {
"sort": "mtime"
},
dateDesc: {
"sort": "mtime",
"sortAsc": false
},
sizeAsc: {
"sort": "size",
},
sizeDesc: {
"sort": "size",
"sortAsc": false
},
nameAsc: {
"sort": "name",
},
nameDesc: {
"sort": "name",
"sortAsc": false
}
} as any;
interface SortMode {
text: string
mode: any[]
key: (hit: EsHit) => any
}
class Sist2ElasticsearchQuery {
searchQuery(): any {
const getters = store.getters;
const searchText = getters.searchText;
const pathText = getters.pathText;
const sizeMin = getters.sizeMin;
const sizeMax = getters.sizeMax;
const dateMin = getters.dateMin;
const dateMax = getters.dateMax;
const size = getters.size;
const after = getters.lastDoc;
const selectedIndexIds = getters.selectedIndices.map((idx: Index) => idx.id)
const selectedMimeTypes = getters.selectedMimeTypes;
const selectedTags = getters.selectedTags;
const q = {
"pageSize": size
}
Object.assign(q, SORT_MODES[getters.sortMode]);
if (!after) {
q["fetchAggregations"] = true;
}
if (searchText) {
q["query"] = searchText;
}
if (pathText) {
q["path"] = pathText.endsWith("/") ? pathText.slice(0, -1) : pathText;
}
if (sizeMin) {
q["sizeMin"] = sizeMin;
}
if (sizeMax) {
q["sizeMax"] = sizeMax;
}
if (dateMin) {
q["dateMin"] = dateMin;
}
if (dateMax) {
q["dateMax"] = dateMax;
}
if (after) {
q["after"] = after.sort;
}
if (selectedIndexIds.length > 0) {
q["indexIds"] = selectedIndexIds;
}
if (selectedMimeTypes.length > 0) {
q["mimeTypes"] = selectedMimeTypes;
}
if (selectedTags.length > 0) {
q["tags"] = selectedTags
}
if (getters.sortMode == "random") {
q["seed"] = getters.seed;
}
if (getters.optHighlight) {
q["highlight"] = true;
q["highlightContextSize"] = Number(getters.optFragmentSize);
}
return q;
}
}
export default new Sist2ElasticsearchQuery();

View File

@ -1,21 +0,0 @@
<template>
<span :style="getStyle()">{{span.text}}</span>
</template>
<script>
import ModelsRepo from "@/ml/modelsRepo";
export default {
name: "AnalyzedContentSpan",
props: ["span", "text"],
methods: {
getStyle() {
return ModelsRepo.data[this.$store.getters.mlModel.name].labelStyles[this.span.label];
}
}
}
</script>
<style scoped></style>

View File

@ -1,75 +0,0 @@
<template>
<div>
<b-card class="mb-2">
<AnalyzedContentSpan v-for="span of legend" :key="span.id" :span="span"
class="mr-2"></AnalyzedContentSpan>
</b-card>
<div class="content-div">
<AnalyzedContentSpan v-for="span of mergedSpans" :key="span.id" :span="span"></AnalyzedContentSpan>
</div>
</div>
</template>
<script>
import AnalyzedContentSpan from "@/components/AnalyzedContentSpan.vue";
import ModelsRepo from "@/ml/modelsRepo";
export default {
name: "AnalyzedContentSpanContainer",
components: {AnalyzedContentSpan},
props: ["spans", "text"],
computed: {
legend() {
return Object.entries(ModelsRepo.data[this.$store.state.mlModel.name].legend)
.map(([label, name]) => ({
text: name,
id: label,
label: label
}));
},
mergedSpans() {
const spans = this.spans;
const merged = [];
let lastLabel = null;
let fixSpace = false;
for (let i = 0; i < spans.length; i++) {
if (spans[i].label !== lastLabel) {
let start = spans[i].wordIndex;
const nextSpan = spans.slice(i + 1).find(s => s.label !== spans[i].label)
let end = nextSpan ? nextSpan.wordIndex : undefined;
if (end !== undefined && this.text[end - 1] === " ") {
end -= 1;
fixSpace = true;
}
merged.push({
text: this.text.slice(start, end),
label: spans[i].label,
id: spans[i].wordIndex
});
if (fixSpace) {
merged.push({
text: " ",
label: "O",
id: end
});
fixSpace = false;
}
lastLabel = spans[i].label;
}
}
return merged;
},
},
}
</script>
<style scoped></style>

View File

@ -120,7 +120,7 @@ export default {
update(indexId) { update(indexId) {
const svg = d3.select("#date-histogram"); const svg = d3.select("#date-histogram");
d3.json(Sist2Api.getDateStat(indexId)).then(tabularData => { d3.csv(Sist2Api.getDateCsv(indexId)).then(tabularData => {
dateHistogram(tabularData.slice(), svg, this.$t("d3.dateHistogram")); dateHistogram(tabularData.slice(), svg, this.$t("d3.dateHistogram"));
}); });
} }

View File

@ -91,7 +91,7 @@ export default {
const mimeSvgCount = d3.select("#agg-mime-count"); const mimeSvgCount = d3.select("#agg-mime-count");
const fillOpacity = this.$store.state.optTheme === "black" ? 0.9 : 0.6; const fillOpacity = this.$store.state.optTheme === "black" ? 0.9 : 0.6;
d3.json(Sist2Api.getMimeStat(indexId)).then(tabularData => { d3.csv(Sist2Api.getMimeCsvUrl(indexId)).then(tabularData => {
mimeBarCount(tabularData.slice(), mimeSvgCount, fillOpacity, this.$t("d3.mimeCount")); mimeBarCount(tabularData.slice(), mimeSvgCount, fillOpacity, this.$t("d3.mimeCount"));
}); });
} }

View File

@ -90,7 +90,7 @@ export default {
const mimeSvgSize = d3.select("#agg-mime-size"); const mimeSvgSize = d3.select("#agg-mime-size");
const fillOpacity = this.$store.state.optTheme === "black" ? 0.9 : 0.6; const fillOpacity = this.$store.state.optTheme === "black" ? 0.9 : 0.6;
d3.json(Sist2Api.getMimeStat(indexId)).then(tabularData => { d3.csv(Sist2Api.getMimeCsvUrl(indexId)).then(tabularData => {
mimeBarSize(tabularData.slice(), mimeSvgSize, fillOpacity, this.$t("d3.mimeSize")); mimeBarSize(tabularData.slice(), mimeSvgSize, fillOpacity, this.$t("d3.mimeSize"));
}); });
} }

View File

@ -117,7 +117,7 @@ export default {
update(indexId) { update(indexId) {
const svg = d3.select("#size-histogram"); const svg = d3.select("#size-histogram");
d3.json(Sist2Api.getSizeStat(indexId)).then(tabularData => { d3.csv(Sist2Api.getSizeCsv(indexId)).then(tabularData => {
sizeHistogram(tabularData.slice(), svg, this.$t("d3.sizeHistogram")); sizeHistogram(tabularData.slice(), svg, this.$t("d3.sizeHistogram"));
}); });
} }

View File

@ -240,7 +240,7 @@ export default {
.style("overflow", "visible") .style("overflow", "visible")
.style("font", "10px sans-serif"); .style("font", "10px sans-serif");
d3.json(Sist2Api.getTreemapStat(indexId)).then(tabularData => { d3.csv(Sist2Api.getTreemapCsvUrl(indexId)).then(tabularData => {
tabularData.forEach(row => { tabularData.forEach(row => {
row.taxonomy = row.path.split("/"); row.taxonomy = row.path.split("/");
row.size = Number(row.size); row.size = Number(row.size);

View File

@ -1,9 +1,6 @@
<template> <template>
<b-card v-if="$store.state.sist2Info.showDebugInfo" class="mb-4 mt-4"> <b-card class="mb-4 mt-4">
<b-card-title> <b-card-title><DebugIcon class="mr-1"></DebugIcon>{{ $t("debug") }}</b-card-title>
<DebugIcon class="mr-1"></DebugIcon>
{{ $t("debug") }}
</b-card-title>
<p v-html="$t('debugDescription')"></p> <p v-html="$t('debugDescription')"></p>
<b-card-body> <b-card-body>
@ -11,8 +8,7 @@
<b-table :items="tableItems" small borderless responsive="md" thead-class="hidden" class="mb-0"></b-table> <b-table :items="tableItems" small borderless responsive="md" thead-class="hidden" class="mb-0"></b-table>
<hr /> <hr />
<IndexDebugInfo v-for="idx of $store.state.sist2Info.indices" :key="idx.id" :index="idx" <IndexDebugInfo v-for="idx of $store.state.sist2Info.indices" :key="idx.id" :index="idx" class="mt-2"></IndexDebugInfo>
class="mt-2"></IndexDebugInfo>
</b-card-body> </b-card-body>
</b-card> </b-card>
</template> </template>
@ -20,17 +16,13 @@
<script> <script>
import IndexDebugInfo from "@/components/IndexDebugInfo"; import IndexDebugInfo from "@/components/IndexDebugInfo";
import DebugIcon from "@/components/icons/DebugIcon"; import DebugIcon from "@/components/icons/DebugIcon";
import {mapGetters} from "vuex";
export default { export default {
name: "DebugInfo.vue", name: "DebugInfo.vue",
components: {DebugIcon, IndexDebugInfo}, components: {DebugIcon, IndexDebugInfo},
computed: { computed: {
...mapGetters([
"uiSqliteMode",
]),
tableItems() { tableItems() {
const items = [ return [
{key: "version", value: this.$store.state.sist2Info.version}, {key: "version", value: this.$store.state.sist2Info.version},
{key: "platform", value: this.$store.state.sist2Info.platform}, {key: "platform", value: this.$store.state.sist2Info.platform},
{key: "debugBinary", value: this.$store.state.sist2Info.debug}, {key: "debugBinary", value: this.$store.state.sist2Info.debug},
@ -39,17 +31,10 @@ export default {
{key: "tagline", value: this.$store.state.sist2Info.tagline}, {key: "tagline", value: this.$store.state.sist2Info.tagline},
{key: "dev", value: this.$store.state.sist2Info.dev}, {key: "dev", value: this.$store.state.sist2Info.dev},
{key: "mongooseVersion", value: this.$store.state.sist2Info.mongooseVersion}, {key: "mongooseVersion", value: this.$store.state.sist2Info.mongooseVersion},
];
if (!this.uiSqliteMode) {
items.push(
{key: "esVersion", value: this.$store.state.sist2Info.esVersion}, {key: "esVersion", value: this.$store.state.sist2Info.esVersion},
{key: "esVersionSupported", value: this.$store.state.sist2Info.esVersionSupported}, {key: "esVersionSupported", value: this.$store.state.sist2Info.esVersionSupported},
{key: "esVersionLegacy", value: this.$store.state.sist2Info.esVersionLegacy} {key: "esVersionLegacy", value: this.$store.state.sist2Info.esVersionLegacy},
); ]
}
return items;
} }
} }
} }

View File

@ -16,7 +16,7 @@
<!-- Audio player--> <!-- Audio player-->
<audio v-if="doc._props.isAudio" ref="audio" preload="none" class="audio-fit fit" controls <audio v-if="doc._props.isAudio" ref="audio" preload="none" class="audio-fit fit" controls
:type="doc._source.mime" :type="doc._source.mime"
:src="`f/${doc._source.index}/${doc._id}`" :src="`f/${doc._id}`"
@play="onAudioPlay()"></audio> @play="onAudioPlay()"></audio>
<b-card-body class="padding-03"> <b-card-body class="padding-03">
@ -27,11 +27,6 @@
<DocFileTitle :doc="doc"></DocFileTitle> <DocFileTitle :doc="doc"></DocFileTitle>
</div> </div>
<!-- Featured line -->
<div style="display: flex">
<FeaturedFieldsLine :doc="doc"></FeaturedFieldsLine>
</div>
<!-- Tags --> <!-- Tags -->
<div class="card-text"> <div class="card-text">
<TagContainer :hit="doc"></TagContainer> <TagContainer :hit="doc"></TagContainer>
@ -48,11 +43,10 @@ import DocFileTitle from "@/components/DocFileTitle.vue";
import DocInfoModal from "@/components/DocInfoModal.vue"; import DocInfoModal from "@/components/DocInfoModal.vue";
import ContentDiv from "@/components/ContentDiv.vue"; import ContentDiv from "@/components/ContentDiv.vue";
import FullThumbnail from "@/components/FullThumbnail"; import FullThumbnail from "@/components/FullThumbnail";
import FeaturedFieldsLine from "@/components/FeaturedFieldsLine";
export default { export default {
components: {FeaturedFieldsLine, FullThumbnail, ContentDiv, DocInfoModal, DocFileTitle, TagContainer}, components: {FullThumbnail, ContentDiv, DocInfoModal, DocFileTitle, TagContainer},
props: ["doc", "width"], props: ["doc", "width"],
data() { data() {
return { return {
@ -76,7 +70,7 @@ export default {
await this.$store.dispatch("showLightbox"); await this.$store.dispatch("showLightbox");
}, },
onAudioPlay() { onAudioPlay() {
Array.prototype.slice.call(document.getElementsByTagName("audio")).forEach((el) => { document.getElementsByTagName("audio").forEach((el) => {
if (el !== this.$refs["audio"]) { if (el !== this.$refs["audio"]) {
el.pause(); el.pause();
} }

View File

@ -1,5 +1,5 @@
<template> <template>
<a :href="`f/${doc._source.index}/${doc._id}`" class="file-title-anchor" target="_blank"> <a :href="`f/${doc._id}`" class="file-title-anchor" target="_blank">
<div class="file-title" :title="doc._source.path + '/' + doc._source.name + ext(doc)" <div class="file-title" :title="doc._source.path + '/' + doc._source.name + ext(doc)"
v-html="fileName() + ext(doc)"></div> v-html="fileName() + ext(doc)"></div>
</a> </a>

View File

@ -17,7 +17,7 @@
</div> </div>
<img v-if="doc._props.isPlayableImage || doc._props.isPlayableVideo" <img v-if="doc._props.isPlayableImage || doc._props.isPlayableVideo"
:src="(doc._props.isGif && hover) ? `f/${doc._source.index}/${doc._id}` : `t/${doc._source.index}/${doc._id}`" :src="(doc._props.isGif && hover) ? `f/${doc._id}` : `t/${doc._source.index}/${doc._id}`"
alt="" alt=""
class="pointer fit-sm" @click="onThumbnailClick()"> class="pointer fit-sm" @click="onThumbnailClick()">
<img v-else :src="`t/${doc._source.index}/${doc._id}`" alt="" <img v-else :src="`t/${doc._source.index}/${doc._id}`" alt=""
@ -50,11 +50,6 @@
<span v-if="doc._source.author && doc._source.pages" class="mx-1">-</span> <span v-if="doc._source.author && doc._source.pages" class="mx-1">-</span>
<span v-if="doc._source.author">{{ doc._source.author }}</span> <span v-if="doc._source.author">{{ doc._source.author }}</span>
</div> </div>
<!-- Featured line -->
<div style="display: flex">
<FeaturedFieldsLine :doc="doc"></FeaturedFieldsLine>
</div>
</div> </div>
</div> </div>
</b-list-group-item> </b-list-group-item>
@ -66,11 +61,10 @@ import DocFileTitle from "@/components/DocFileTitle";
import DocInfoModal from "@/components/DocInfoModal"; import DocInfoModal from "@/components/DocInfoModal";
import ContentDiv from "@/components/ContentDiv"; import ContentDiv from "@/components/ContentDiv";
import FileIcon from "@/components/icons/FileIcon"; import FileIcon from "@/components/icons/FileIcon";
import FeaturedFieldsLine from "@/components/FeaturedFieldsLine";
export default { export default {
name: "DocListItem", name: "DocListItem",
components: {FileIcon, ContentDiv, DocInfoModal, DocFileTitle, TagContainer, FeaturedFieldsLine}, components: {FileIcon, ContentDiv, DocInfoModal, DocFileTitle, TagContainer},
props: ["doc"], props: ["doc"],
data() { data() {
return { return {

View File

@ -1,46 +0,0 @@
<template>
<div class="featured-line" v-html="featuredLineHtml"></div>
</template>
<script>
import {humanDate, humanFileSize} from "@/util";
function scopedEval(context, expr) {
const evaluator = Function.apply(null, [...Object.keys(context), "expr", "return eval(expr)"]);
return evaluator.apply(null, [...Object.values(context), expr]);
}
export default {
name: "FeaturedFieldsLine",
props: ["doc"],
computed: {
featuredLineHtml() {
if (this.$store.getters.optFeaturedFields === undefined) {
return "";
}
const scope = {doc: this.doc._source, humanDate: humanDate, humanFileSize: humanFileSize};
return this.$store.getters.optFeaturedFields
.replaceAll(/\$\{([^}]*)}/g, (match, g1) => {
return scopedEval(scope, g1);
});
}
}
}
</script>
<style scoped>
.featured-line {
font-size: 90%;
font-family: 'Source Sans Pro', 'Helvetica Neue', Arial, sans-serif;
color: #424242;
padding-left: 2px;
}
.theme-black .featured-line {
color: #bebebe;
}
</style>

View File

@ -6,13 +6,13 @@
</div> </div>
<div <div
v-if="doc._props.isImage && doc._props.imageAspectRatio < 5" v-if="doc._props.isImage && !hover && doc._props.tnW / doc._props.tnH < 5"
class="card-img-overlay" class="card-img-overlay"
:class="{'small-badge': smallBadge}"> :class="{'small-badge': smallBadge}">
<span class="badge badge-resolution">{{ `${doc._source.width}x${doc._source.height}` }}</span> <span class="badge badge-resolution">{{ `${doc._source.width}x${doc._source.height}` }}</span>
</div> </div>
<div v-if="(doc._props.isVideo || doc._props.isGif) && doc._source.duration > 0" <div v-if="(doc._props.isVideo || doc._props.isGif) && doc._source.duration > 0 && !hover"
class="card-img-overlay" class="card-img-overlay"
:class="{'small-badge': smallBadge}"> :class="{'small-badge': smallBadge}">
<span class="badge badge-resolution">{{ humanTime(doc._source.duration) }}</span> <span class="badge badge-resolution">{{ humanTime(doc._source.duration) }}</span>
@ -63,20 +63,17 @@ export default {
}, },
computed: { computed: {
tnSrc() { tnSrc() {
return this.getThumbnailSrc(this.currentThumbnailNum);
},
},
methods: {
getThumbnailSrc(thumbnailNum) {
const doc = this.doc; const doc = this.doc;
const props = doc._props; const props = doc._props;
if (props.isGif && this.hover) { if (props.isGif && this.hover) {
return `f/${doc._source.index}/${doc._id}`; return `f/${doc._id}`;
} }
return (this.currentThumbnailNum === 0) return (this.currentThumbnailNum === 0)
? `t/${doc._source.index}/${doc._id}` ? `t/${doc._source.index}/${doc._id}`
: `t/${doc._source.index}/${doc._id}/${String(thumbnailNum).padStart(4, "0")}`; : `t/${doc._source.index}/${doc._id}${String(this.currentThumbnailNum).padStart(4, "0")}`;
}, },
},
methods: {
humanTime: humanTime, humanTime: humanTime,
onThumbnailClick() { onThumbnailClick() {
this.$emit("onThumbnailClick"); this.$emit("onThumbnailClick");
@ -89,14 +86,9 @@ export default {
}, },
onTnEnter() { onTnEnter() {
this.hover = true; this.hover = true;
const start = Date.now()
if (this.doc._props.hasVidPreview) { if (this.doc._props.hasVidPreview) {
let img = new Image();
img.src = this.getThumbnailSrc(this.currentThumbnailNum + 1);
img.onload = () => {
this.currentThumbnailNum += 1; this.currentThumbnailNum += 1;
this.scheduleNextTnNum(Date.now() - start); this.scheduleNextTnNum();
}
} }
}, },
onTnLeave() { onTnLeave() {
@ -107,23 +99,17 @@ export default {
this.timeoutId = null; this.timeoutId = null;
} }
}, },
scheduleNextTnNum(offset = 0) { scheduleNextTnNum() {
const INTERVAL = (this.$store.state.optVidPreviewInterval ?? 700) - offset; const INTERVAL = this.$store.state.optVidPreviewInterval ?? 700;
this.timeoutId = window.setTimeout(() => { this.timeoutId = window.setTimeout(() => {
const start = Date.now();
if (!this.hover) { if (!this.hover) {
return; return;
} }
this.scheduleNextTnNum();
if (this.currentThumbnailNum === this.doc._props.tnNum - 1) { if (this.currentThumbnailNum === this.doc._props.tnNum - 1) {
this.currentThumbnailNum = 0; this.currentThumbnailNum = 0;
this.scheduleNextTnNum();
} else { } else {
let img = new Image();
img.src = this.getThumbnailSrc(this.currentThumbnailNum + 1);
img.onload = () => {
this.currentThumbnailNum += 1; this.currentThumbnailNum += 1;
this.scheduleNextTnNum(Date.now() - start);
}
} }
}, INTERVAL); }, INTERVAL);
}, },
@ -166,18 +152,17 @@ export default {
} }
.badge-resolution { .badge-resolution {
color: #c6c6c6; color: #212529;
background-color: #272727CC; background-color: #FFC107;
padding: 2px 3px;
} }
.card-img-overlay { .card-img-overlay {
pointer-events: none; pointer-events: none;
padding: 2px 6px; padding: 0.75rem;
bottom: 4px; bottom: unset;
top: unset; top: 0;
left: unset; left: unset;
right: 0; right: unset;
} }
.small-badge { .small-badge {

View File

@ -1,103 +1,80 @@
<template> <template>
<Preloader v-if="loading"></Preloader> <Preloader v-if="loading"></Preloader>
<div v-else-if="content"> <div v-else-if="content" class="content-div" v-html="content"></div>
<b-form inline class="my-2" v-if="ModelsRepo.getOptions().length > 0">
<b-checkbox class="ml-auto mr-2" :checked="optAutoAnalyze"
@input="setOptAutoAnalyze($event); $store.dispatch('updateConfiguration')">
{{ $t("ml.auto") }}
</b-checkbox>
<b-button :disabled="mlPredictionsLoading || mlLoading" @click="mlAnalyze" variant="primary"
>{{ $t("ml.analyzeText") }}
</b-button>
<b-select :disabled="mlPredictionsLoading || mlLoading" class="ml-2" v-model="mlModel">
<b-select-option :value="opt.value" v-for="opt of ModelsRepo.getOptions()">{{ opt.text }}
</b-select-option>
</b-select>
</b-form>
<b-progress v-if="mlLoading" variant="warning" show-progress :max="1" class="mb-3"
>
<b-progress-bar :value="modelLoadingProgress">
<strong>{{ ((modelLoadingProgress * modelSize) / (1024 * 1024)).toFixed(1) }}MB / {{
(modelSize / (1024 * 1024)).toFixed(1)
}}MB</strong>
</b-progress-bar>
</b-progress>
<b-progress v-if="mlPredictionsLoading" variant="primary" :value="modelPredictionProgress"
:max="content.length" class="mb-3"></b-progress>
<AnalyzedContentSpansContainer v-if="analyzedContentSpans.length > 0"
:spans="analyzedContentSpans" :text="rawContent"></AnalyzedContentSpansContainer>
<div v-else class="content-div" v-html="content"></div>
</div>
</template> </template>
<script> <script>
import Sist2Api from "@/Sist2Api"; import Sist2Api from "@/Sist2Api";
import Preloader from "@/components/Preloader"; import Preloader from "@/components/Preloader";
import Sist2Query from "@/Sist2ElasticsearchQuery"; import Sist2Query from "@/Sist2Query";
import store from "@/store"; import store from "@/store";
import BertNerModel from "@/ml/BertNerModel";
import AnalyzedContentSpansContainer from "@/components/AnalyzedContentSpanContainer.vue";
import ModelsRepo from "@/ml/modelsRepo";
import {mapGetters, mapMutations} from "vuex";
export default { export default {
name: "LazyContentDiv", name: "LazyContentDiv",
components: {AnalyzedContentSpansContainer, Preloader}, components: {Preloader},
props: ["docId"], props: ["docId"],
data() { data() {
return { return {
ModelsRepo,
content: "", content: "",
rawContent: "", loading: true
loading: true,
modelLoadingProgress: 0,
modelPredictionProgress: 0,
mlPredictionsLoading: false,
mlLoading: false,
mlModel: null,
analyzedContentSpans: []
} }
}, },
mounted() { mounted() {
const query = Sist2Query.searchQuery();
if (this.$store.getters.optMlDefaultModel) { if (this.$store.state.optHighlight) {
this.mlModel = this.$store.getters.optMlDefaultModel
} else { const fields = this.$store.state.fuzzy
this.mlModel = ModelsRepo.getDefaultModel(); ? {"content.nGram": {}}
: {content: {}};
query.highlight = {
pre_tags: ["<mark>"],
post_tags: ["</mark>"],
number_of_fragments: 0,
fields,
};
if (!store.state.sist2Info.esVersionLegacy) {
query.highlight.max_analyzed_offset = 999_999;
}
} }
Sist2Api if ("function_score" in query.query) {
.getDocument(this.docId, this.$store.state.optHighlight, this.$store.state.fuzzy) query.query = query.query.function_score.query;
.then(doc => { }
if (!("must" in query.query.bool)) {
query.query.bool.must = [];
} else if (!Array.isArray(query.query.bool.must)) {
query.query.bool.must = [query.query.bool.must];
}
query.query.bool.must.push({match: {_id: this.docId}});
delete query["sort"];
delete query["aggs"];
delete query["search_after"];
delete query.query["function_score"];
query._source = {
includes: ["content", "name", "path", "extension"]
}
query.size = 1;
Sist2Api.esQuery(query).then(resp => {
this.loading = false; this.loading = false;
if (resp.hits.hits.length === 1) {
if (doc) { this.content = this.getContent(resp.hits.hits[0]);
this.content = this.getContent(doc) } else {
} console.log("FIXME: could not get content")
console.log(resp)
if (this.optAutoAnalyze) {
this.mlAnalyze();
} }
}); });
}, },
computed: {
...mapGetters(["optAutoAnalyze"]),
modelSize() {
const modelData = ModelsRepo.data[this.mlModel];
if (!modelData) {
return 0;
}
return modelData.size;
}
},
methods: { methods: {
...mapMutations(["setOptAutoAnalyze"]),
getContent(doc) { getContent(doc) {
this.rawContent = doc._source.content;
if (!doc.highlight) { if (!doc.highlight) {
return doc._source.content; return doc._source.content;
} }
@ -108,60 +85,10 @@ export default {
if (doc.highlight.content) { if (doc.highlight.content) {
return doc.highlight.content[0]; return doc.highlight.content[0];
} }
},
async getMlModel() {
if (this.$store.getters.mlModel.name !== this.mlModel) {
this.mlLoading = true;
this.modelLoadingProgress = 0;
const modelInfo = ModelsRepo.data[this.mlModel];
const model = new BertNerModel(
modelInfo.vocabUrl,
modelInfo.modelUrl,
modelInfo.id2label,
)
await model.init(progress => this.modelLoadingProgress = progress);
this.$store.commit("setMlModel", {model, name: this.mlModel});
this.mlLoading = false;
return model
}
return this.$store.getters.mlModel.model;
},
async mlAnalyze() {
if (!this.content) {
return;
}
const modelInfo = ModelsRepo.data[this.mlModel];
if (modelInfo === undefined) {
return;
}
this.$store.commit("setOptMlDefaultModel", this.mlModel);
await this.$store.dispatch("updateConfiguration");
const model = await this.getMlModel();
this.analyzedContentSpans = [];
this.mlPredictionsLoading = true;
await model.predict(this.rawContent, results => {
results.forEach(result => result.label = modelInfo.humanLabels[result.label]);
this.analyzedContentSpans.push(...results);
this.modelPredictionProgress = results[results.length - 1].wordIndex;
});
this.mlPredictionsLoading = false;
} }
} }
} }
</script> </script>
<style> <style scoped>
.progress-bar {
transition: none;
}
</style> </style>

View File

@ -160,13 +160,9 @@ export default {
}, },
onSlideChange() { onSlideChange() {
// Pause all videos when changing slide // Pause all videos when changing slide
const videos = document.getElementsByTagName("video"); document.getElementsByTagName("video").forEach((el) => {
if (videos.length === 0) {
return
}
for (let el of videos) {
el.pause(); el.pause();
} });
}, },
} }

View File

@ -9,7 +9,7 @@ import InspireTreeDOM from "inspire-tree-dom";
import "inspire-tree-dom/dist/inspire-tree-light.min.css"; import "inspire-tree-dom/dist/inspire-tree-light.min.css";
import {getSelectedTreeNodes, getTreeNodeAttributes} from "@/util"; import {getSelectedTreeNodes, getTreeNodeAttributes} from "@/util";
import Sist2Api from "@/Sist2Api"; import Sist2Api from "@/Sist2Api";
import Sist2Query from "@/Sist2ElasticsearchQuery"; import Sist2Query from "@/Sist2Query";
export default { export default {
name: "MimePicker", name: "MimePicker",

View File

@ -9,15 +9,13 @@
<span class="badge badge-pill version" v-if="$store && $store.state.sist2Info"> <span class="badge badge-pill version" v-if="$store && $store.state.sist2Info">
v{{ sist2Version() }}<span v-if="isDebug()">-dbg</span><span v-if="isLegacy() && !hideLegacy()">-<a v{{ sist2Version() }}<span v-if="isDebug()">-dbg</span><span v-if="isLegacy() && !hideLegacy()">-<a
href="https://github.com/simon987/sist2/blob/master/docs/USAGE.md#elasticsearch" href="https://github.com/simon987/sist2/blob/master/docs/USAGE.md#elasticsearch" target="_blank">legacyES</a></span>
target="_blank">legacyES</a></span><span v-if="$store.state.uiSqliteMode">-SQLite</span>
</span> </span>
<span v-if="$store && $store.state.sist2Info" class="tagline" v-html="tagline()"></span> <span v-if="$store && $store.state.sist2Info" class="tagline" v-html="tagline()"></span>
<b-button class="ml-auto" to="stats" variant="link">{{ $t("stats") }}</b-button> <b-button class="ml-auto" to="stats" variant="link">{{ $t("stats") }}</b-button>
<b-button to="config" variant="link">{{ $t("config") }}</b-button> <b-button to="config" variant="link">{{ $t("config") }}</b-button>
<b-button v-if="$auth && $auth.isAuthenticated" variant="link" @click="onLogoutClick()">logout</b-button>
</b-navbar> </b-navbar>
</template> </template>
@ -42,9 +40,6 @@ export default {
}, },
hideLegacy() { hideLegacy() {
return this.$store.state.optHideLegacy; return this.$store.state.optHideLegacy;
},
onLogoutClick() {
this.$auth.logout();
} }
} }
} }
@ -65,6 +60,7 @@ export default {
color: #222 !important; color: #222 !important;
font-size: 1.75rem; font-size: 1.75rem;
padding: 0; padding: 0;
font-family: Hack;
} }
.navbar-brand:hover { .navbar-brand:hover {

Some files were not shown because too many files have changed in this diff Show More