Compare commits

..

1 Commits

Author SHA1 Message Date
9e51d55ca1 Update readme 2019-10-26 15:48:34 -04:00
290 changed files with 4970 additions and 73976 deletions

View File

@@ -1,9 +0,0 @@
FROM simon987/sist2-build
RUN curl -fsSL https://deb.nodesource.com/setup_16.x | bash
RUN apt update -y; apt install -y nodejs && rm -rf /var/lib/apt/lists/*
ENV DEBIAN_FRONTEND=noninteractive
ENV LANG C.UTF-8
ENV LC_ALL C.UTF-8

View File

@@ -1,16 +0,0 @@
{
"name": "sist2-dev",
"dockerComposeFile": [
"docker-compose.yml"
],
"service": "sist2-dev",
"customizations": {
"vscode": {
"extensions": [
"ms-vscode.cpptools-extension-pack"
]
}
},
"remoteUser": "root",
"workspaceFolder": "/app/"
}

View File

@@ -1,8 +0,0 @@
version: "3"
services:
sist2-dev:
build: .
command: sleep infinity
volumes:
- ../:/app

View File

@@ -1,40 +0,0 @@
.idea/
*.sist2
docs/
test_i/
test_i_inc/
Testing/
.drone.yml
**/cmake_install.cmake
**/CMakeCache.txt
**/CMakeFiles/
.cmake
LICENSE
Makefile
**/*.md
**/*.cbp
VERSION
**/node_modules/
sist2-*-linux-debug
sist2-*-linux
sist2_debug
sist2
**/libscan-test-files
**/scan_ub_test
**/scan_a_test
**/scan_test
**/ext_ffmpeg
**/ext_libmobi
**/ext_libwpd
**/core
*.a
tmp_scan/
Dockerfile
Dockerfile.arm64
docker-compose.yml
state.db
*-journal
build/
__pycache__/
sist2-vue/dist
sist2-admin/frontend/dist

View File

@@ -1,88 +0,0 @@
kind: pipeline
type: docker
name: amd64
platform:
os: linux
arch: amd64
steps:
- name: build
image: simon987/sist2-build
commands:
- ./scripts/build.sh
- name: scp files
image: appleboy/drone-scp
settings:
host:
from_secret: SSH_HOST
port:
from_secret: SSH_PORT
user:
from_secret: SSH_USER
key:
from_secret: SSH_KEY
target: /files/sist2/${DRONE_REPO_OWNER}_${DRONE_REPO_NAME}/${DRONE_BRANCH}_${DRONE_BUILD_NUMBER}_${DRONE_COMMIT}/
source:
- ./VERSION
- ./sist2-x64-linux
- ./sist2-x64-linux-debug
- name: docker
image: plugins/docker
settings:
username:
from_secret: DOCKER_USER
password:
from_secret: DOCKER_PASSWORD
repo: simon987/sist2
context: ./
dockerfile: ./Dockerfile
auto_tag: true
auto_tag_suffix: x64-linux
when:
event:
- tag
---
kind: pipeline
type: docker
name: arm64
platform:
arch: arm64
steps:
- name: build
image: simon987/sist2-build-arm64
commands:
- ./scripts/build_arm64.sh
- name: scp files
image: appleboy/drone-scp
settings:
host:
from_secret: SSH_HOST
port:
from_secret: SSH_PORT
user:
from_secret: SSH_USER
key:
from_secret: SSH_KEY
target: /files/sist2/${DRONE_REPO_OWNER}_${DRONE_REPO_NAME}/arm_${DRONE_BRANCH}_${DRONE_BUILD_NUMBER}_${DRONE_COMMIT}/
source:
- ./sist2-arm64-linux
- ./sist2-arm64-linux-debug
- name: docker
image: plugins/docker
settings:
username:
from_secret: DOCKER_USER
password:
from_secret: DOCKER_PASSWORD
repo: simon987/sist2
context: ./
dockerfile: ./Dockerfile.arm64
auto_tag: true
auto_tag_suffix: arm64-linux
when:
event:
- tag

3
.gitattributes vendored Normal file
View File

@@ -0,0 +1,3 @@
CMakeModules/* linguist-vendored
web/js/*.min.js linguist-vendored
web/css/*.min.css linguist-vendored

View File

@@ -1,40 +0,0 @@
---
name: "🐞 Bug Report"
about: Submit a bug report
title: ''
labels: bug
assignees: ''
---
**Device Information (please complete the following information):**
- OS: `[e.g., Ubuntu 20.04, WSL2]`
- Deployment: `[Linux, Linux ARM64 or Docker]`
- Browser *(if relevant)*: `[e.g., chrome, safari]`
- SIST2 Version: `[e.g., v2.9.0]`
- Elasticsearch Version *(if relevant)* : ``
**Command with arguments**
<!-- `ex: "scan ~/Documents -o ./i2 --threads 3 -q 1.0` -->
**Describe the bug**
<!-- A clear and concise description of what the bug is. -->
**Steps To Reproduce**
Please be specific!
1. Go to '...'
2. Click on '....'
3. etc.
**Expected behavior**
<!-- A clear and concise description of what you expected to happen. -->
**Actual Behavior**
<!-- A clear and concise description of what actually happens. -->
**Screenshots**
<!-- If applicable, add screenshots to help explain your problem. -->
**Additional context**
<!-- Add any other context about the problem here. If applicable, please include why you think the bug is occurring and/or troubleshooting you have already performed. -->
<!-- If the issue is related to the `scan` module, please attach the files necessary to reproduce the error or email them to me[at]simon987.net. -->

View File

@@ -1,5 +0,0 @@
blank_issues_enabled: false
contact_links:
- name: SIST2 Documentation
url: https://github.com/simon987/sist2/blob/master/docs/USAGE.md
about: Check out the SIST2 documentation for answers to common questions

View File

@@ -1,18 +0,0 @@
---
name: "🚀 Feature Request"
about: Suggest an idea for SIST2
title: ''
assignees: ''
---
**Which SIST2 component is your Feature Request related to?**
<!-- e.g., Scan, Index, or Web? -->
**Is your feature request related to a problem? Please describe.**
<!-- A clear and concise description of what the problem is. e.g., "I'm always frustrated when [...]" -->
**What would you like to see happen?**
<!-- A clear and concise description of what you want to happen. -->
**Additional context**
<!-- Add any other context or screenshots about the feature request here. -->

View File

@@ -1,18 +0,0 @@
---
name: Issue template
about: General
title: ''
labels: ''
assignees: ''
---
sist2 version:
Platform (Linux or Docker, x86-64 or arm64):
Elasticsearch version:
Command with arguments: `ex: "scan ~/Documents -o ./i2 --threads 3 -q 1.0`
If the issue is related to the `scan` module, please attach the files necessary to reproduce the error or email them to me[at]simon987.net.

37
.gitignore vendored
View File

@@ -1,5 +1,6 @@
.idea
thumbs
test
*.cbp
CMakeCache.txt
CMakeFiles
@@ -9,40 +10,8 @@ Makefile
*.out
LOG
sist2*
!sist2-vue/
!sist2-admin
!sist2_admin
!sist2.py
*.sist2/
bundle*.css
index.sist2/
bundle.css
bundle.js
*.a
vgcore.*
build/
third-party/argparse
*.idx/
VERSION
git_hash.h
Testing/
test_i
test_i_inc
node_modules/
.cmake/
i_inc/
state.db
*.pyc
!sist2-admin/frontend/dist
*.js.map
sist2-vue/dist
sist2-admin/frontend/dist
.ninja_deps
.ninja_log
build.ninja
src/web/static_generated.c
src/magic_generated.c
src/index/static_generated.c
*.sist2
*-shm
*-journal
.vscode
*.fts

33
.gitmodules vendored
View File

@@ -1,15 +1,18 @@
[submodule "third-party/argparse"]
path = third-party/argparse
url = https://github.com/simon987/argparse
[submodule "third-party/libscan/third-party/utf8.h"]
path = third-party/libscan/third-party/utf8.h
url = https://github.com/sheredom/utf8.h
[submodule "third-party/libscan/third-party/antiword"]
path = third-party/libscan/third-party/antiword
url = https://github.com/simon987/antiword
[submodule "third-party/libscan/third-party/libmobi"]
path = third-party/libscan/third-party/libmobi
url = https://github.com/bfabiszewski/libmobi
[submodule "third-party/libscan/libscan-test-files"]
path = third-party/libscan/libscan-test-files
url = https://github.com/simon987/libscan-test-files
[submodule "argparse"]
path = argparse
url = https://github.com/cofyc/argparse
[submodule "cJSON"]
path = cJSON
url = https://github.com/DaveGamble/cJSON
[submodule "lib/mupdf"]
path = lib/mupdf
url = git://git.ghostscript.com/mupdf.git
[submodule "lib/onion"]
path = lib/onion
url = https://github.com/davidmoreno/onion
[submodule "lib/ffmpeg"]
path = lib/ffmpeg
url = https://git.ffmpeg.org/ffmpeg.git
[submodule "lmdb"]
path = lmdb
url = https://github.com/LMDB/lmdb

View File

@@ -1,168 +1,127 @@
cmake_minimum_required(VERSION 3.7)
project(sist2)
set(CMAKE_C_STANDARD 11)
option(SIST_DEBUG "Build a debug executable" on)
option(SIST_FAST "Enable more optimisation flags" off)
option(SIST_DEBUG_INFO "Turn on debug information in web interface" on)
add_compile_definitions(
"SIST_PLATFORM=${SIST_PLATFORM}"
)
if (SIST_DEBUG)
add_compile_definitions(
"SIST_DEBUG=${SIST_DEBUG}"
)
set(VCPKG_BUILD_TYPE debug)
else ()
set(VCPKG_BUILD_TYPE release)
endif ()
if (SIST_DEBUG_INFO)
add_compile_definitions(
"SIST_DEBUG_INFO=${SIST_DEBUG_INFO}"
)
endif ()
add_subdirectory(third-party/libscan)
set(ARGPARSE_SHARED off)
add_subdirectory(third-party/argparse)
project(sist2 C)
list(APPEND CMAKE_MODULE_PATH "${CMAKE_CURRENT_LIST_DIR}/CMakeModules")
add_executable(
sist2
# argparse
third-party/argparse/argparse.h third-party/argparse/argparse.c
src/main.c
src/sist.h
src/io/walk.h src/io/walk.c
src/parsing/media.h src/parsing/media.c
src/parsing/pdf.h src/parsing/pdf.c
src/io/store.h src/io/store.c
src/tpool.h src/tpool.c
src/parsing/parse.h src/parsing/parse.c
src/parsing/magic_util.c src/parsing/magic_util.h
src/io/serialize.h src/io/serialize.c
src/parsing/mime.h src/parsing/mime.c src/parsing/mime_generated.c
src/parsing/text.h src/parsing/text.c
src/index/web.c src/index/web.h
src/web/serve.c src/web/serve.h
src/web/web_util.c src/web/web_util.h
src/index/elastic.c src/index/elastic.h
src/util.c src/util.h
src/ctx.c src/ctx.h
src/types.h
src/log.c src/log.h
src/cli.c src/cli.h
src/parsing/sidecar.c src/parsing/sidecar.h
src/database/database.c src/database/database.h
src/parsing/fs_util.h
src/ctx.h src/types.h src/parsing/font.c src/parsing/font.h
src/auth0/auth0_c_api.h src/auth0/auth0_c_api.cpp
# argparse
argparse/argparse.h argparse/argparse.c
src/database/database_stats.c
src/database/database_schema.c
src/database/database_fts.c
src/web/web_fts.c
)
set_target_properties(sist2 PROPERTIES LINKER_LANGUAGE C)
# cJSON
cJSON/cJSON.h cJSON/cJSON.c
target_link_directories(sist2 PRIVATE BEFORE ${_VCPKG_INSTALLED_DIR}/${VCPKG_TARGET_TRIPLET}/lib/)
set(CMAKE_FIND_LIBRARY_SUFFIXES .a .lib)
# LMDB
lmdb/libraries/liblmdb/lmdb.h lmdb/libraries/liblmdb/mdb.c
lmdb/libraries/liblmdb/midl.h lmdb/libraries/liblmdb/midl.c
src/cli.c src/cli.h)
find_package(PkgConfig REQUIRED)
set(ENV{PKG_CONFIG_PATH} "$ENV{PKG_CONFIG_PATH}:/usr/local/lib/pkgconfig/")
find_package(cJSON CONFIG REQUIRED)
find_package(unofficial-mongoose CONFIG REQUIRED)
find_package(CURL CONFIG REQUIRED)
find_library(MAGIC_LIB NAMES libmagic.a REQUIRED)
find_package(unofficial-sqlite3 CONFIG REQUIRED)
find_package(LibMagic REQUIRED)
find_package(FFmpeg REQUIRED)
find_package(OpenSSL REQUIRED)
find_package(Freetype REQUIRED)
pkg_check_modules(GLIB REQUIRED glib-2.0)
pkg_check_modules(GOBJECT REQUIRED gobject-2.0)
pkg_check_modules(UUID REQUIRED uuid)
target_include_directories(
sist2 PUBLIC
${CMAKE_SOURCE_DIR}/third-party/onion/src/
${CMAKE_SOURCE_DIR}/third-party/utf8.h/
${CMAKE_SOURCE_DIR}/third-party/libscan/
${CMAKE_SOURCE_DIR}/
include_directories(${LIBMAGIC_INCLUDE_DIRS})
link_directories(${LIBMAGIC_LIBRARY_DIRS})
add_definitions(${LIBMAGIC_CFLAGS_OTHER})
link_directories(${UUID_LIBRARY_DIRS})
include_directories(${UUID_INCLUDE_DIRS})
add_definitions(${UUID_CFLAGS_OTHER})
include_directories(${GLIB_INCLUDE_DIRS})
link_directories(${GLIB_LIBRARY_DIRS})
add_definitions(${GLIB_CFLAGS_OTHER})
include_directories(${GOBJECT_INCLUDE_DIRS})
link_directories(${GOBJECT_LIBRARY_DIRS})
add_definitions(${GOBJECT_CFLAGS_OTHER})
link_directories(${FFMPEG_LIBRARY_DIRS})
include_directories(${FFMPEG_INCLUDE_DIRS})
include_directories(${OPENSSL_INCLUDE_DIR})
link_directories(${OPENSSL_CRYPTO_LIBRARY})
list(REMOVE_ITEM GLIB_LIBRARIES pcre)
list(REMOVE_ITEM GOBJECT_LIBRARIES pcre)
list(REMOVE_ITEM UUID_LIBRARIES pcre)
include_directories(${FREETYPE_INCLUDE_DIRS})
add_definitions(${FREETYPE_CFLAGS_OTHER})
include_directories(
${PROJECT_SOURCE_DIR}/
${PROJECT_SOURCE_DIR}/lmdb/libraries/liblmdb/
${PROJECT_SOURCE_DIR}/lib/onion/src/
${PROJECT_SOURCE_DIR}/lib/mupdf/include/
)
target_compile_options(
sist2
target_compile_options(sist2
PRIVATE
-fPIC
)
-O3
# -march=native
-fno-stack-protector
-fomit-frame-pointer
)
if (SIST_DEBUG)
target_compile_options(
sist2
PRIVATE
-g
-fstack-protector
-fno-omit-frame-pointer
-fsanitize=address
-fno-inline
# -O2
)
target_link_options(
sist2
PRIVATE
-fsanitize=address
-static-libasan
)
set_target_properties(
sist2
PROPERTIES
OUTPUT_NAME sist2_debug
)
elseif (SIST_FAST)
target_compile_options(
sist2
PRIVATE
-Ofast
-march=native
-fno-stack-protector
-fomit-frame-pointer
-freciprocal-math
)
else ()
target_compile_options(
sist2
PRIVATE
-Ofast
# -g
-fno-stack-protector
-fomit-frame-pointer
-w
)
endif ()
add_dependencies(
sist2
scan
argparse
)
target_link_libraries(
TARGET_LINK_LIBRARIES(
sist2
z
argparse
unofficial::mongoose::mongoose
CURL::libcurl
${GLIB_LIBRARIES}
${GOBJECT_LIBRARIES}
${UUID_LIBRARIES}
# ffmpeg
# ${PROJECT_SOURCE_DIR}/lib/libavcodec.a
# ${PROJECT_SOURCE_DIR}/lib/libavformat.a
# ${PROJECT_SOURCE_DIR}/lib/libavutil.a
# ${PROJECT_SOURCE_DIR}/lib/libswscale.a
# ${PROJECT_SOURCE_DIR}/lib/libswresample.a
${FFMPEG_LIBRARIES}
swscale
# mupdf
${PROJECT_SOURCE_DIR}/lib/libmupdf.a
${PROJECT_SOURCE_DIR}/lib/libmupdf-third.a
# onion
${PROJECT_SOURCE_DIR}/lib/libonion_static.a
pthread
scan
${MAGIC_LIB}
unofficial::sqlite3::sqlite3
curl
m
bz2
magic
)
add_custom_target(
before_sist2
COMMAND ${CMAKE_CURRENT_SOURCE_DIR}/scripts/before_build.sh
)
add_dependencies(sist2 before_sist2)

View File

@@ -1,57 +0,0 @@
FROM simon987/sist2-build as build
MAINTAINER simon987 <me@simon987.net>
ENV DEBIAN_FRONTEND=noninteractive
RUN curl -fsSL https://deb.nodesource.com/setup_16.x | bash
RUN apt update -y; apt install -y nodejs && rm -rf /var/lib/apt/lists/*
WORKDIR /build/
COPY scripts scripts
COPY schema schema
COPY CMakeLists.txt .
COPY third-party third-party
COPY src src
COPY sist2-vue sist2-vue
COPY sist2-admin sist2-admin
RUN cd sist2-vue/ && npm install && npm run build
RUN cd sist2-admin/frontend/ && npm install && npm run build
RUN mkdir build && cd build && cmake -DSIST_PLATFORM=x64_linux_docker -DSIST_DEBUG_INFO=on -DSIST_DEBUG=off -DBUILD_TESTS=off -DCMAKE_TOOLCHAIN_FILE=/vcpkg/scripts/buildsystems/vcpkg.cmake ..
RUN cd build && make -j$(nproc)
RUN strip build/sist2 || mv build/sist2_debug build/sist2
FROM --platform="linux/amd64" ubuntu@sha256:965fbcae990b0467ed5657caceaec165018ef44a4d2d46c7cdea80a9dff0d1ea
ENV LANG C.UTF-8
ENV LC_ALL C.UTF-8
ENTRYPOINT ["/root/sist2"]
RUN apt update && DEBIAN_FRONTEND=noninteractive apt install -y curl libasan5 libmagic1 python3 \
python3-pip git tesseract-ocr && rm -rf /var/lib/apt/lists/*
RUN mkdir -p /usr/share/tessdata && \
cd /usr/share/tessdata/ && \
curl -o /usr/share/tesseract-ocr/4.00/tessdata/hin.traineddata https://raw.githubusercontent.com/tesseract-ocr/tessdata/master/hin.traineddata &&\
curl -o /usr/share/tesseract-ocr/4.00/tessdata/jpn.traineddata https://raw.githubusercontent.com/tesseract-ocr/tessdata/master/jpn.traineddata &&\
curl -o /usr/share/tesseract-ocr/4.00/tessdata/eng.traineddata https://raw.githubusercontent.com/tesseract-ocr/tessdata/master/eng.traineddata &&\
curl -o /usr/share/tesseract-ocr/4.00/tessdata/fra.traineddata https://raw.githubusercontent.com/tesseract-ocr/tessdata/master/fra.traineddata &&\
curl -o /usr/share/tesseract-ocr/4.00/tessdata/rus.traineddata https://raw.githubusercontent.com/tesseract-ocr/tessdata/master/rus.traineddata &&\
curl -o /usr/share/tesseract-ocr/4.00/tessdata/osd.traineddata https://raw.githubusercontent.com/tesseract-ocr/tessdata/master/osd.traineddata &&\
curl -o /usr/share/tesseract-ocr/4.00/tessdata/spa.traineddata https://raw.githubusercontent.com/tesseract-ocr/tessdata/master/spa.traineddata &&\
curl -o /usr/share/tesseract-ocr/4.00/tessdata/deu.traineddata https://raw.githubusercontent.com/tesseract-ocr/tessdata/master/deu.traineddata &&\
curl -o /usr/share/tesseract-ocr/4.00/tessdata/equ.traineddata https://raw.githubusercontent.com/tesseract-ocr/tessdata/master/equ.traineddata &&\
curl -o /usr/share/tesseract-ocr/4.00/tessdata/chi_sim.traineddata https://raw.githubusercontent.com/tesseract-ocr/tessdata/master/chi_sim.traineddata
# sist2
COPY --from=build /build/build/sist2 /root/sist2
# sist2-admin
WORKDIR /root/sist2-admin
COPY sist2-admin/requirements.txt /root/sist2-admin/
RUN python3 -m pip install --no-cache -r /root/sist2-admin/requirements.txt
COPY --from=build /build/sist2-admin/ /root/sist2-admin/

View File

@@ -1,40 +0,0 @@
FROM simon987/sist2-build-arm64 as build
MAINTAINER simon987 <me@simon987.net>
WORKDIR /build/
ADD . /build/
RUN mkdir build && cd build && cmake -DSIST_PLATFORM=arm64_linux_docker -DSIST_DEBUG_INFO=on -DSIST_DEBUG=off -DBUILD_TESTS=off -DCMAKE_TOOLCHAIN_FILE=/vcpkg/scripts/buildsystems/vcpkg.cmake ..
RUN cd build && make -j$(nproc)
RUN strip build/sist2 || mv build/sist2_debug build/sist2
FROM --platform=linux/arm64/v8 ubuntu@sha256:537da24818633b45fcb65e5285a68c3ec1f3db25f5ae5476a7757bc8dfae92a3
WORKDIR /root
ENV LANG C.UTF-8
ENV LC_ALL C.UTF-8
ENTRYPOINT ["/root/sist2"]
RUN apt update && apt install -y curl libasan5 libmagic1 tesseract-ocr python3-pip python3 git && rm -rf /var/lib/apt/lists/*
RUN mkdir -p /usr/share/tessdata && \
cd /usr/share/tessdata/ && \
curl -o /usr/share/tessdata/hin.traineddata https://raw.githubusercontent.com/tesseract-ocr/tessdata/master/hin.traineddata &&\
curl -o /usr/share/tessdata/jpn.traineddata https://raw.githubusercontent.com/tesseract-ocr/tessdata/master/jpn.traineddata &&\
curl -o /usr/share/tessdata/eng.traineddata https://raw.githubusercontent.com/tesseract-ocr/tessdata/master/eng.traineddata &&\
curl -o /usr/share/tessdata/fra.traineddata https://raw.githubusercontent.com/tesseract-ocr/tessdata/master/fra.traineddata &&\
curl -o /usr/share/tessdata/rus.traineddata https://raw.githubusercontent.com/tesseract-ocr/tessdata/master/rus.traineddata &&\
curl -o /usr/share/tessdata/osd.traineddata https://raw.githubusercontent.com/tesseract-ocr/tessdata/master/osd.traineddata &&\
curl -o /usr/share/tessdata/spa.traineddata https://raw.githubusercontent.com/tesseract-ocr/tessdata/master/spa.traineddata &&\
curl -o /usr/share/tessdata/deu.traineddata https://raw.githubusercontent.com/tesseract-ocr/tessdata/master/deu.traineddata &&\
curl -o /usr/share/tessdata/equ.traineddata https://raw.githubusercontent.com/tesseract-ocr/tessdata/master/equ.traineddata &&\
curl -o /usr/share/tessdata/chi_sim.traineddata https://raw.githubusercontent.com/tesseract-ocr/tessdata/master/chi_sim.traineddata
# sist2
COPY --from=build /build/build/sist2 /root/sist2
# sist2-admin
COPY sist2-admin/requirements.txt sist2-admin/
RUN python3 -m pip install --no-cache -r sist2-admin/requirements.txt
COPY --from=build /build/sist2-admin/ sist2-admin/

237
README.md
View File

@@ -1,8 +1,5 @@
![GitHub](https://img.shields.io/github/license/simon987/sist2.svg)
[![CodeFactor](https://www.codefactor.io/repository/github/simon987/sist2/badge?s=05daa325188aac4eae32c786f3d9cf4e0593f822)](https://www.codefactor.io/repository/github/simon987/sist2)
[![Development snapshots](https://ci.simon987.net/api/badges/simon987/sist2/status.svg)](https://files.simon987.net/.gate/sist2/simon987_sist2/)
**Demo**: [sist2.simon987.net](https://sist2.simon987.net/)
# sist2
@@ -10,210 +7,84 @@ sist2 (Simple incremental search tool)
*Warning: sist2 is in early development*
![search panel](docs/sist2.gif)
## Features
* Fast, low memory usage, multi-threaded
* Manage & schedule scan jobs with simple web interface (Docker only)
* Mobile-friendly Web interface
* Extracts text and metadata from common file types \*
* Generates thumbnails \*
* Fast, low memory usage
* Portable (all its features are packaged in a single executable)
* Extracts text from common file types\*
* Generates thumbnails\*
* Incremental scanning
* Manual tagging from the UI and automatic tagging based on file attributes via [user scripts](docs/scripting.md)
* Recursive scan inside archive files \*\*
* OCR support with tesseract \*\*\*
* Stats page & disk utilisation visualization
* Named-entity recognition (client-side) \*\*\*\*
\* See [format support](#format-support)
\*\* See [Archive files](#archive-files)
\*\*\* See [OCR](#ocr)
\*\*\*\* See [Named-Entity Recognition](#NER)
\* See [format support](#format-support)
## Getting Started
### Using Docker Compose *(Windows/Linux/Mac)*
1. Have an [Elasticsearch](https://www.elastic.co/downloads/elasticsearch) instance running
1. Download the [latest sist2 release](https://github.com/simon987/sist2/releases)
```yaml
version: "3"
*Windows users*: `sist2` runs under [WSL](https://en.wikipedia.org/wiki/Windows_Subsystem_for_Linux)
services:
elasticsearch:
image: elasticsearch:7.17.9
restart: unless-stopped
environment:
- "discovery.type=single-node"
- "ES_JAVA_OPTS=-Xms2g -Xmx2g"
sist2-admin:
image: simon987/sist2:3.0.7-x64-linux
restart: unless-stopped
volumes:
- ./sist2-admin-data/:/sist2-admin/
- /:/host
ports:
- 4090:4090 # sist2
- 8080:8080 # sist2-admin
working_dir: /root/sist2-admin/
entrypoint: python3 /root/sist2-admin/sist2_admin/app.py
*Mac users*: See [#1](https://github.com/simon987/sist2/issues/1)
## Example usage
![demo](demo.gif)
See help page `sist2 --help` for more details.
**Scan a directory**
```bash
sist2 scan ~/Documents -o ./orig_idx/
sist2 scan --threads 4 --content-size 16384 /mnt/Pictures
sist2 scan --incremental ./orig_idx/ -o ./updated_idx/ ~/Documents
```
Navigate to http://localhost:8080/ to configure sist2-admin.
**Push index to Elasticsearch or file**
```bash
sist2 index --force-reset ./my_idx
sist2 index --print ./my_idx > raw_documents.ndjson
```
### Using the executable file *(Linux/WSL only)*
1. Choose search backend (See [comparison](#search-backends)):
* **Elasticsearch**: have an Elasticsearch (version >= 6.8.X, ideally >=7.14.0) instance running
1. Download [from official website](https://www.elastic.co/downloads/elasticsearch)
2. *(or)* Run using docker:
```bash
docker run -d -p 9200:9200 -e "discovery.type=single-node" elasticsearch:7.17.9
```
* **SQLite**: No installation required
2. Download the [latest sist2 release](https://github.com/simon987/sist2/releases).
Select the file corresponding to your CPU architecture and mark the binary as executable with `chmod +x`.
3. See [usage guide](docs/USAGE.md) for command line usage.
Example usage:
1. Scan a directory: `sist2 scan ~/Documents --output ./documents.sist2`
2. Prepare search index:
* **Elasticsearch**: `sist2 index --es-url http://localhost:9200 ./documents.sist2`
* **SQLite**: `sist2 index --search-index ./search.sist2 ./documents.sist2`
3. Start web interface: `sist2 web ./documents.sist2`
**Start web interface**
```bash
sist2 web --bind 0.0.0.0 --port 4321 ./my_idx1 ./my_idx2 ./my_idx3
```
## Format support
| File type | Library | Content | Thumbnail | Metadata |
|:--------------------------------------------------------------------------|:-----------------------------------------------------------------------------|:---------|:------------|:---------------------------------------------------------------------------------------------------------------------------------------|
| pdf,xps,fb2,epub | MuPDF | text+ocr | yes | author, title |
| cbz,cbr | [libscan](https://github.com/simon987/sist2/tree/master/third-party/libscan) | - | yes | - |
| `audio/*` | ffmpeg | - | yes | ID3 tags |
| `video/*` | ffmpeg | - | yes | title, comment, artist |
| `image/*` | ffmpeg | ocr | yes | [Common EXIF tags](https://github.com/simon987/sist2/blob/efdde2734eca9b14a54f84568863b7ffd59bdba3/src/parsing/media.c#L190), GPS tags |
| raw, rw2, dng, cr2, crw, dcr, k25, kdc, mrw, pef, xf3, arw, sr2, srf, erf | LibRaw | no | yes | Common EXIF tags, GPS tags |
| ttf,ttc,cff,woff,fnt,otf | Freetype2 | - | yes, `bmp` | Name & style |
| `text/plain` | [libscan](https://github.com/simon987/sist2/tree/master/third-party/libscan) | yes | no | - |
| html, xml | [libscan](https://github.com/simon987/sist2/tree/master/third-party/libscan) | yes | no | - |
| tar, zip, rar, 7z, ar ... | Libarchive | yes\* | - | no |
| docx, xlsx, pptx | [libscan](https://github.com/simon987/sist2/tree/master/third-party/libscan) | yes | if embedded | creator, modified_by, title |
| doc (MS Word 97-2003) | antiword | yes | no | author, title |
| mobi, azw, azw3 | libmobi | yes | yes | author, title |
| wpd (WordPerfect) | libwpd | yes | no | *planned* |
| json, jsonl, ndjson | [libscan](https://github.com/simon987/sist2/tree/master/third-party/libscan) | yes | - | - |
File type | Library | Content | Thumbnail | Metadata
:---|:---|:---|:---|:---
pdf,xps,cbz,cbr,fb2,epub | MuPDF | yes | yes, `png` | *planned* |
`audio/*` | libav | - | yes, `jpeg` | ID3 tags |
`video/*` | libav | - | yes, `jpeg` | *planned* |
`image/*` | libav | - | yes, `jpeg` | *planned* |
ttf,ttc,cff,woff,fnt,otf | Freetype2 | - | yes, `bmp` | Name & style |
`text/plain` | *(none)* | yes | no | - |
docx, xlsx, pptx | | *planned* | no | *planned* |
\* *See [Archive files](#archive-files)*
### Archive files
**sist2** will scan files stored into archive files (zip, tar, 7z...) as if they were directly in the file system.
Recursive (archives inside archives)
scan is also supported.
**Limitations**:
* Support for parsing media files with formats that require *seek* (e.g. `.gif`, `.mp4` w/ fragmented metadata etc.)
is limitted (see `--mem-buffer` option)
* Archive files are scanned sequentially, by a single thread. On systems where
**sist2** is not I/O bound, scans might be faster when larger archives are split into smaller parts.
### OCR
You can enable OCR support for ebook (pdf,xps,fb2,epub) or image file types with the
`--ocr-lang <lang>` option in combination with `--ocr-images` and/or `--ocr-ebooks`.
Download the language data files with your package manager (`apt install tesseract-ocr-eng`) or
directly [from Github](https://github.com/tesseract-ocr/tesseract/wiki/Data-Files).
The `simon987/sist2` image comes with common languages
(hin, jpn, eng, fra, rus, spa, chi_sim, deu) pre-installed.
You can use the `+` separator to specify multiple languages. The language
name must be identical to the `*.traineddata` file installed on your system
(use `chi_sim` rather than `chi-sim`).
Examples:
```bash
sist2 scan --ocr-ebooks --ocr-lang jpn ~/Books/Manga/
sist2 scan --ocr-images --ocr-lang eng ~/Images/Screenshots/
sist2 scan --ocr-ebooks --ocr-images --ocr-lang eng+chi_sim ~/Chinese-Bilingual/
```
### Search backends
sist2 v3.0.7+ supports SQLite search backend. The SQLite search backend has
fewer features and generally comparable query performance for medium-size
indices, but it uses much less memory and is easier to set up.
| | SQLite | Elasticsearch |
|----------------------------------------------|:----------------------------------------:|:-------------------------------------------------------------------------------------------------------------------------------------:|
| Requires separate search engine installation | | ✓ |
| Memory footprint | ~20MB | >500MB |
| Query syntax | [fts5](https://www.sqlite.org/fts5.html) | [query_string](https://www.elastic.co/guide/en/elasticsearch/reference/current/query-dsl-query-string-query.html#query-string-syntax) |
| Fuzzy search | | ✓ |
| Media Types tree real-time updating | | ✓ |
| Search in file `path` | | ✓ |
| Manual tagging | ✓ | ✓ |
| User scripts | | ✓ |
| Media Type breakdown for search results | | ✓ |
### NER
sist2 v3.0.4+ supports named-entity recognition (NER). Simply add a supported repository URL to
**Configuration** > **Machine learning options** > **Model repositories**
to enable it.
The text processing is done in your browser, no data is sent to any third-party services.
See [simon987/sist2-ner-models](https://github.com/simon987/sist2-ner-models) for more details.
#### List of available repositories:
| URL | Maintainer | Purpose |
|---------------------------------------------------------------------------------------------------------|-----------------------------------------|---------|
| [simon987/sist2-ner-models](https://raw.githubusercontent.com/simon987/sist2-ner-models/main/repo.json) | [simon987](https://github.com/simon987) | General |
<details>
<summary>Screenshot</summary>
![ner](docs/ner.png)
</details>
## Build from source
You can compile **sist2** by yourself if you don't want to use the pre-compiled binaries
### Using docker
```bash
git clone --recursive https://github.com/simon987/sist2/
cd sist2
docker build . -t my-sist2-image
# Copy sist2 executable from docker image
docker run --rm --entrypoint cat my-sist2-image /root/sist2 > sist2-x64-linux
```
### Using a linux computer
You can compile **sist2** by yourself if you don't want to use the pre-compiled
binaries.
1. Install compile-time dependencies
```bash
apt install gcc g++ python3 yasm ragel automake autotools-dev wget libtool libssl-dev curl zip unzip tar xorg-dev libglu1-mesa-dev libxcursor-dev libxml2-dev libxinerama-dev gettext nasm git nodejs
```
2. Install vcpkg using my fork: https://github.com/simon987/vcpkg
3. Install vcpkg dependencies
*(Debian)*
```bash
vcpkg install curl[core,openssl] sqlite3[core,fts5] cpp-jwt pcre cjson brotli libarchive[core,bzip2,libxml2,lz4,lzma,lzo] pthread tesseract libxml2 libmupdf gtest mongoose libmagic libraw gumbo ffmpeg[core,avcodec,avformat,swscale,swresample]
```
4. Build
apt install git cmake pkg-config libglib2.0-dev\
libssl-dev uuid-dev libavformat-dev libswscale-dev \
python3 libmagic-dev libfreetype6-dev libcurl-dev \
libbz2-dev yasm
2. Build
```bash
git clone --recursive https://github.com/simon987/sist2/
(cd sist2-vue; npm install; npm run build)
(cd sist2-admin/frontend; npm install; npm run build)
cmake -DSIST_DEBUG=off -DCMAKE_TOOLCHAIN_FILE=<VCPKG_ROOT>/scripts/buildsystems/vcpkg.cmake .
git clone --recurse-submodules https://github.com/simon987/sist2
./scripts/get_static_libs.sh
cmake .
make
```
```

1
argparse Submodule

Submodule argparse added at fafc503d23

1
cJSON Submodule

Submodule cJSON added at 2de7d04aaf

View File

@@ -1,7 +0,0 @@
install:
install sist2-update-all.sh /usr/bin/sist2-update-all.sh
install sist2-update-files.sh /usr/bin/sist2-update-files.sh
install sist2-update-nextcloud.sh /usr/bin/sist2-update-nextcloud.sh
install sist2-update.service /etc/systemd/system/sist2-update.service
install sist2-update.timer /etc/systemd/system/sist2-update.timer
systemctl daemon-reload

View File

@@ -1,31 +0,0 @@
# Systemd integration example
This example contains my (yatli) personal configuration for sist2 auto-updating.
The following indices are involved in this configuration:
| Index | Path | Description |
|-----------|------------------|--------------------------------------------|
| files | /zpool/files | Main file repository |
| nextcloud | /zpool/nextcloud | Externally synchronized to a cloud account |
The systemd integration achieves automatic sist2 scanning & indexing everyday at 3:00AM.
### Tailoring the configuration for yourself
`sist2-update-all.sh` calls update scripts for each sist2 index. Add or remove
update scripts accordingly to suit your need. Each update script (e.g.
`sist2-update-files.sh`) has important parameters laid down at the beginning so
make sure to edit them to point to your files and index locations.
### Installation
```bash
# install the services and scripts
sudo make install
# enable & start the timer
sudo systemctl enable sist2-update.timer
sudo systemctl start sist2-update.timer
# verify that the timer has been enabled
systemctl list-timers --all
```

View File

@@ -1,9 +0,0 @@
#!/bin/bash
set -e
__dir="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
echo "Update index: Files"
source ${__dir}/sist2-update-files.sh
echo "Update index: Nextcloud"
source ${__dir}/sist2-update-nextcloud.sh
echo "Done. Restarting sist2."
docker restart sist2-sist2-1

View File

@@ -1,34 +0,0 @@
#!/bin/bash
set -e
DATE=$(date +%Y_%m_%d)
CONTENT=/zpool/files
ORIG=/mnt/ssd/sist-index/files.idx
NEW=/mnt/ssd/sist-index/files_$DATE.idx
EXCLUDE='ZArchives|TorrentStore|TorrentDownload|624f0c59-1fef-44f6-95e9-7483296f2833|ubuntu-full-2021-12-07'
NAME=Files
#REWRITE_URL="http://localhost:33333/activate?collection=$NAME&path="
REWRITE_URL=""
sist2 scan \
--threads 14 \
--mem-throttle 32768 \
--thumbnail-quality 2 \
--name $NAME \
--ocr-lang=eng+chi_sim \
--ocr-ebooks \
--ocr-images \
--exclude=$EXCLUDE \
--rewrite-url=$REWRITE_URL \
--incremental=$ORIG \
--output=$NEW \
$CONTENT
echo ">>> Scan complete"
rm -rf $ORIG
mv $NEW $ORIG
unset http_proxy
unset https_proxy
unset HTTP_PROXY
unset HTTPS_PROXY
sist2 index $ORIG --incremental-index
echo ">>> Index complete"

View File

@@ -1,33 +0,0 @@
#!/bin/bash
set -e
DATE=$(date +%Y_%m_%d)
CONTENT=/zpool/nextcloud/v-yadli
ORIG=/mnt/ssd/sist-index/nextcloud.idx
NEW=/mnt/ssd/sist-index/nextcloud_$DATE.idx
EXCLUDE='Yatao|.*263418493\\/Image\\/.*'
NAME=NextCloud
# REWRITE_URL="http://localhost:33333/activate?collection=$NAME&path="
REWRITE_URL=""
sist2 scan \
--threads 14 \
--mem-throttle 32768 \
--thumbnail-quality 2 \
--name $NAME \
--ocr-lang=eng+chi_sim \
--ocr-ebooks \
--ocr-images \
--exclude=$EXCLUDE \
--rewrite-url=$REWRITE_URL \
--incremental=$ORIG \
--output=$NEW \
$CONTENT
echo ">>> Scan complete"
rm -rf $ORIG
mv $NEW $ORIG
unset http_proxy
unset https_proxy
unset HTTP_PROXY
unset HTTPS_PROXY
sist2 index $ORIG --incremental-index

View File

@@ -1,6 +0,0 @@
[Unit]
Description=sist2-update
[Service]
User=yatli
ExecStart=/bin/bash /usr/bin/sist2-update-all.sh

View File

@@ -1,10 +0,0 @@
[Unit]
Description=sist2-update
[Timer]
OnCalendar=*-*-* 3:00:00
Persistent=true
Unit=sist2-update.service
[Install]
WantedBy=timers.target

BIN
demo.gif Normal file

Binary file not shown.

After

Width:  |  Height:  |  Size: 18 MiB

View File

@@ -1,24 +0,0 @@
version: "3"
services:
elasticsearch:
image: elasticsearch:7.17.9
container_name: sist2-es
environment:
- "discovery.type=single-node"
- "ES_JAVA_OPTS=-Xms2g -Xmx2g"
sist2-admin:
build:
context: .
container_name: sist2-admin
volumes:
- /mnt/array/sist2-admin-data/:/sist2-admin/
- /:/host
ports:
- 4090:4090
# NOTE: Don't export this port publicly!
- 8080:8080
working_dir: /root/sist2-admin/
entrypoint: python3
command:
- /root/sist2-admin/sist2_admin/app.py

View File

@@ -1,254 +0,0 @@
# Usage
```
Usage: sist2 scan [OPTION]... PATH
or: sist2 index [OPTION]... INDEX
or: sist2 sqlite-index [OPTION]... INDEX
or: sist2 web [OPTION]... INDEX...
or: sist2 exec-script [OPTION]... INDEX
Lightning-fast file system indexer and search tool.
-h, --help show this help message and exit
-v, --version Print version and exit.
--verbose Turn on logging.
--very-verbose Turn on debug messages.
--json-logs Output logs in JSON format.
Scan options
-t, --threads=<int> Number of threads. DEFAULT: 1
-q, --thumbnail-quality=<int> Thumbnail quality, on a scale of 2 to 31, 2 being the best. DEFAULT: 2
--thumbnail-size=<int> Thumbnail size, in pixels. DEFAULT: 552
--thumbnail-count=<int> Number of thumbnails to generate. Set a value > 1 to create video previews, set to 0 to disable thumbnails. DEFAULT: 1
--content-size=<int> Number of bytes to be extracted from text documents. Set to 0 to disable. DEFAULT: 32768
-o, --output=<str> Output index file path. DEFAULT: index.sist2
--incremental If the output file path exists, only scan new or modified files.
--optimize-index Defragment index file after scan to reduce its file size.
--rewrite-url=<str> Serve files from this url instead of from disk.
--name=<str> Index display name. DEFAULT: index
--depth=<int> Scan up to DEPTH subdirectories deep. Use 0 to only scan files in PATH. DEFAULT: -1
--archive=<str> Archive file mode (skip|list|shallow|recurse). skip: don't scan, list: only save file names as text, shallow: don't scan archives inside archives. DEFAULT: recurse
--archive-passphrase=<str> Passphrase for encrypted archive files
--ocr-lang=<str> Tesseract language (use 'tesseract --list-langs' to see which are installed on your machine)
--ocr-images Enable OCR'ing of image files.
--ocr-ebooks Enable OCR'ing of ebook files.
-e, --exclude=<str> Files that match this regex will not be scanned.
--fast Only index file names & mime type.
--treemap-threshold=<str> Relative size threshold for treemap (see USAGE.md). DEFAULT: 0.0005
--mem-buffer=<int> Maximum memory buffer size per thread in MiB for files inside archives (see USAGE.md). DEFAULT: 2000
--read-subtitles Read subtitles from media files.
--fast-epub Faster but less accurate EPUB parsing (no thumbnails, metadata).
--checksums Calculate file checksums when scanning.
--list-file=<str> Specify a list of newline-delimited paths to be scanned instead of normal directory traversal. Use '-' to read from stdin.
Index options
-t, --threads=<int> Number of threads. DEFAULT: 1
--es-url=<str> Elasticsearch url with port. DEFAULT: http://localhost:9200
--es-insecure-ssl Do not verify SSL connections to Elasticsearch.
--es-index=<str> Elasticsearch index name. DEFAULT: sist2
-p, --print Print JSON documents to stdout instead of indexing to elasticsearch.
--incremental-index Conduct incremental indexing. Assumes that the old index is already ingested in Elasticsearch.
--script-file=<str> Path to user script.
--mappings-file=<str> Path to Elasticsearch mappings.
--settings-file=<str> Path to Elasticsearch settings.
--async-script Execute user script asynchronously.
--batch-size=<int> Index batch size. DEFAULT: 70
-f, --force-reset Reset Elasticsearch mappings and settings.
sqlite-index options
--search-index=<str> Path to search index. Will be created if it does not exist yet.
Web options
--es-url=<str> Elasticsearch url. DEFAULT: http://localhost:9200
--es-insecure-ssl Do not verify SSL connections to Elasticsearch.
--search-index=<str> Path to SQLite search index.
--es-index=<str> Elasticsearch index name. DEFAULT: sist2
--bind=<str> Listen for connections on this address. DEFAULT: localhost:4090
--auth=<str> Basic auth in user:password format
--auth0-audience=<str> API audience/identifier
--auth0-domain=<str> Application domain
--auth0-client-id=<str> Application client ID
--auth0-public-key-file=<str> Path to Auth0 public key file extracted from <domain>/pem
--tag-auth=<str> Basic auth in user:password format for tagging
--tagline=<str> Tagline in navbar
--dev Serve html & js files from disk (for development)
--lang=<str> Default UI language. Can be changed by the user
Exec-script options
--es-url=<str> Elasticsearch url. DEFAULT: http://localhost:9200
--es-insecure-ssl Do not verify SSL connections to Elasticsearch.
--es-index=<str> Elasticsearch index name. DEFAULT: sist2
--script-file=<str> Path to user script.
--async-script Execute user script asynchronously.
Made by simon987 <me@simon987.net>. Released under GPL-3.0
```
#### Thumbnail database size estimation
See chart below for rough estimate of thumbnail size vs. thumbnail size & quality arguments:
For example, `--thumbnail-size=500`, `--thumbnail-quality=2` for a directory with 8 million images will create a thumbnail database
that is about `8000000 * 36kB = 288GB`.
![thumbnail_size](thumbnail_size.png)
### Scan examples
Simple scan
```bash
sist2 scan ~/Documents
sist2 scan \
--threads 4 --content-size 16000000 --thumbnail-quality 2 --archive shallow \
--name "My Documents" --rewrite-url "http://nas.domain.local/My Documents/" \
~/Documents -o ./documents.sist2
```
Incremental scan
If the index file does not exist, `--incremental` has no effect.
```bash
sist scan ~/Documents -o ./documents.sist2
sist scan ~/Documents -o ./documents.sist2 --incremental
# or
sist scan ~/Documents -o ./documents.sist2 --incremental
sist scan ~/Documents -o ./documents.sist2 --incremental
```
### Index documents to Elasticsearch search backend
```bash
sist2 index --force-reset --batch-size 1000 --es-url http://localhost:9200 ./my_index.sist2
sist2 index ./my_index.sist2
```
#### Index documents to SQLite search backend
```bash
# The search index will be created if it does not exist already
sist2 sqlite-index ./index1.sist2 --search-index search.sist2
sist2 sqlite-index ./index2.sist2 --search-index search.sist2
```
**Save index in JSON format**
```bash
sist2 index --print ./my_index.sist2 > my_index.ndjson
```
**Inspect contents of an index**
```bash
sist2 index --print ./my_index.sist2 | jq | less
```
## Web
### Web examples
**Single index (Elasticsearch backend)**
```bash
sist2 web --auth admin:hunter2 --bind 0.0.0.0:8888 my_index.sist2
```
**Multiple indices (Elasticsearch backend)**
```bash
# Indices will be displayed in this order in the web interface
sist2 web index1.sist2 index2.sist2 index3.sist2 index4.sist2
```
**SQLite search backend**
```bash
sist2 web --search-index search.sist2 index1.sist2
```
#### Auth0 authentication
See [auth0.md](auth0.md)
### rewrite_url
When the `rewrite_url` field is not empty, the web module ignores the `root`
field and will return a HTTP redirect to `<rewrite_url><path>/<name><extension>`
instead of serving the file from disk.
Both the `root` and `rewrite_url` fields are safe to manually modify from the
`descriptor.json` file.
# Elasticsearch
Elasticsearch versions >=6.8.0, 7.X.X and 8.X.X are supported by sist2.
Using a version >=7.14.0 is recommended to enable the following features:
- Bug fix for large documents (See #198)
When using a legacy version of ES, a notice will be displayed next to the sist2 version in the web UI.
If you don't care about the features above, you can ignore it or disable it in the configuration page.
## exec-script
The `exec-script` command is used to execute a user script for an index that has already been imported to Elasticsearch with the `index` command. Note that the documents will not be reset to their default state before each execution as the `index` command does: if you make undesired changes to the documents by accident, you will need to run `index` again to revert to the original state.
# Tagging
### Manual tagging
You can modify tags of individual documents directly from the
`web` interface. Note that you can setup authentication for this feature
with the `--tag-auth` option (See [web options](#web-options))
![manual_tag](manual_tag.png)
Tags that are manually added are saved both in the
index folder (in `/tags/`) and in Elasticsearch*. When re-`index`ing,
they are read from the index and automatically applied.
You can safely copy the `/tags/` database to another index.
See [Automatic tagging](#automatic-tagging) for information about tag
hierarchies and tag colors.
\* *It can take a few seconds to take effect in new search queries.*
### Automatic tagging
See [scripting](scripting.md) documentation.
# Sidecar files
When scanning, sist2 will read metadata from `.s2meta` JSON files and overwrite the
original document's indexed metadata (does not modify the actual file). Sidecar metadata files will also work inside archives.
Sidecar files themselves are not saved in the index.
This feature is useful to leverage third-party applications such as speech-to-text or
OCR to add additional metadata to a file.
**Example**
```
~/Documents/
├── Video.mp4
└── Video.mp4.s2meta
```
The sidecar file must have exactly the same file path and the `.s2meta` suffix.
`Video.mp4.s2meta`:
```json
{
"content": "This sidecar file will overwrite some metadata fields of Video.mp4",
"author": "Some author",
"duration": 12345,
"bitrate": 67890,
"some_arbitrary_field": [1,2,3]
}
```
```
sist2 scan ~/Documents -o ./docs.sist2
sist2 index ./docs.sist2
```
*NOTE*: It is technically possible to overwrite the `tag` value using sidecar files, however,
it is not currently possible to restore both manual tags and sidecar tags without user scripts
while reindexing.

View File

@@ -1,19 +0,0 @@
# Authentication with Auth0
1. Create a new Auth0 application (Single page app)
2. Create a new Auth0 API:
1. Choose `RS256` signing algorithm
2. Set identifier (audience) to `https://sist2`
3. Download the Auth0 certificate from https://<domain>.auth0.com/pem (you can find the domain Applications->Basic information)
4. Extract the public key from the certificate using `openssl x509 -pubkey -noout -in cert.pem > pubkey.txt`
5. Start the sist2 web server
Example options:
```bash
sist2 web \
--auth0-client-id XXX \
--auth0-audience https://sist2 \
--auth0-domain YYY.auth0.com \
--auth0-public-key-file /ZZZ/pubkey.txt
```

Binary file not shown.

Before

Width:  |  Height:  |  Size: 26 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 35 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 448 KiB

View File

@@ -1,142 +0,0 @@
## User scripts
*This document is under construction, more in-depth guide coming soon*
During the `index` step, you can use the `--script-file <script>` option to
modify documents or add user tags. This option is mainly used to
implement automatic tagging based on file attributes.
The scripting language used
([Painless Scripting Language](https://www.elastic.co/guide/en/elasticsearch/painless/7.4/index.html))
is very similar to Java, but you should be able to create user scripts
without programming experience at all if you're somewhat familiar with
regex.
This is the base structure of the documents we're working with:
```json
{
"_id": "e171405c-fdb5-4feb-bb32-82637bc32084",
"_index": "sist2",
"_type": "_doc",
"_source": {
"index": "206b3050-e821-421a-891d-12fcf6c2db0d",
"mime": "application/json",
"size": 1799,
"mtime": 1545443685,
"extension": "md",
"name": "README",
"path": "sist2/scripting",
"content": "..."
}
}
```
**Example script**
This script checks if the `genre` attribute exists, if it does
it adds the `genre.<genre>` tag.
```Java
ArrayList tags = ctx._source.tag = new ArrayList();
if (ctx._source?.genre != null) {
tags.add("genre." + ctx._source.genre.toLowerCase());
}
```
You can use `.` to create a hierarchical tag tree:
![scripting/genre_example](genre_example.png)
To use regular expressions, you need to add this line in `/etc/elasticsearch/elasticsearch.yml`
```yaml
script.painless.regex.enabled: true
```
Or, if you're using docker add `-e "script.painless.regex.enabled=true"`
**Tag color**
You can specify the color for an individual tag by appending an
hexadecimal color code (`#RRGGBBAA`) to the tag name.
### Examples
If `(20XX)` is in the file name, add the `year.<year>` tag:
```Java
ArrayList tags = ctx._source.tag = new ArrayList();
Matcher m = /[\(\.+](20[0-9]{2})[\)\.+]/.matcher(ctx._source.name);
if (m.find()) {
tags.add("year." + m.group(1));
}
```
Use default *Calibre* folder structure to infer author.
```Java
ArrayList tags = ctx._source.tag = new ArrayList();
// We expect the book path to look like this:
// /path/to/Calibre Library/Author/Title/Title - Author.pdf
if (ctx._source.name.contains("-") && ctx._source.extension == "pdf") {
String[] names = ctx._source.name.splitOnToken('-');
tags.add("author." + names[1].strip());
}
```
If the file matches a specific pattern `AAAA-000 fName1 lName1, <fName2 lName2>...`, add the `actress.<actress>` and
`studio.<studio>` tag:
```Java
ArrayList tags = ctx._source.tag = new ArrayList();
Matcher m = /([A-Z]{4})-[0-9]{3} (.*)/.matcher(ctx._source.name);
if (m.find()) {
tags.add("studio." + m.group(1));
// Take the matched group (.*), and add a tag for
// each name, separated by comma
for (String name : m.group(2).splitOnToken(',')) {
tags.add("actress." + name);
}
}
```
Set the name of the last folder (`/path/to/<studio>/file.mp4`) to `studio.<studio>` tag
```Java
ArrayList tags = ctx._source.tag = new ArrayList();
if (ctx._source.path != "") {
String[] names = ctx._source.path.splitOnToken('/');
tags.add("studio." + names[names.length-1]);
}
```
Parse `EXIF:F Number` tag
```Java
if (ctx._source?.exif_fnumber != null) {
String[] values = ctx._source.exif_fnumber.splitOnToken(' ');
String aperture = String.valueOf(Float.parseFloat(values[0]) / Float.parseFloat(values[1]));
if (aperture == "NaN") {
aperture = "0,0";
}
tags.add("Aperture.f/" + aperture.replace(".", ","));
}
```
Display year and months from `EXIF:DateTime` tag
```Java
if (ctx._source?.exif_datetime != null) {
SimpleDateFormat parser = new SimpleDateFormat("yyyy:MM:dd HH:mm:ss");
Date date = parser.parse(ctx._source.exif_datetime);
SimpleDateFormat yp = new SimpleDateFormat("yyyy");
SimpleDateFormat mp = new SimpleDateFormat("MMMMMMMMM");
String year = yp.format(date);
String month = mp.format(date);
tags.add("Month." + month);
tags.add("Year." + year);
}
```

Binary file not shown.

Before

Width:  |  Height:  |  Size: 3.7 MiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 167 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 180 KiB

1
lib/ffmpeg Submodule

Submodule lib/ffmpeg added at 0481a1f6e5

1
lib/mupdf Submodule

Submodule lib/mupdf added at 91782a4348

1
lib/onion Submodule

Submodule lib/onion added at d8d4cc9290

1
lmdb Submodule

Submodule lmdb added at 5c012bbe03

View File

@@ -1,20 +1,15 @@
application/x-matlab-data,mat
application/arj, arj
application/base64, mme
application/binhex, hqx
application/book, boo|book
application/CDFV2-corrupt,
application/CDFV2, sdv
application/clariscad, ccad
application/commonground, dp
application/csv,
application/dicom, dcm
application/drafting, drw
application/epub+zip, epub
application/freeloader, frl
application/futuresplash, spl
application/groupwise, vew
application/gzip, gz|tgz
application/gzip, gz
application/hta, hta
application/i-deas, unv
application/iges, iges|igs
@@ -22,19 +17,17 @@ application/inf, inf
application/java-archive, jar
application/java, class
application/javascript,
application/x-archive, a
application/json, json
application/ndjson, jsonl|ndjson
application/marc, mrc
application/mbedlet, mbd
application/mime, aps
application/mspowerpoint, ppz
application/msword, doc|dot|w6w|wiz|word
application/netmc, mcp
application/octet-stream, bin|dump|gpg|pack|idx
application/octet-stream, bin|dump|gpg
application/oda, oda
application/ogg, ogv
application/pdf, pdf
application/pgp-keys,
application/pgp-signature, pgp
application/pkcs7-signature, p7s
application/pkix-cert, cer|crt
@@ -50,10 +43,6 @@ application/vda, vda
application/vnd.fdf, fdf
application/vnd.font-fontforge-sfd, sfd
application/vnd.hp-hpgl, hgl|hpg|hpgl
application/vnd.iccprofile, icm
application/vnd.iccprofile, icm
application/vnd.lotus-1-2-3,
application/vnd.ms-cab-compressed, cab
application/vnd.ms-excel, xlb|xlc|xll|xlm|xls|xlw
application/vnd.ms-fontobject, eot
application/vnd.ms-opentype, otf
@@ -65,73 +54,45 @@ application/vnd.ms-project, mpp
application/vnd.oasis.opendocument.base, odb
application/vnd.oasis.opendocument.formula, odf
application/vnd.oasis.opendocument.graphics, odg
application/vnd.oasis.opendocument.presentation, odp
application/vnd.oasis.opendocument.spreadsheet, ods
application/vnd.oasis.opendocument.text, odt
application/vnd.openxmlformats-officedocument.presentationml.presentation, pptx
application/vnd.openxmlformats-officedocument.spreadsheetml.sheet, xlsx
application/vnd.openxmlformats-officedocument.wordprocessingml.document, docx
application/vnd.symbian.install,
application/vnd.tcpdump.pcap, pcap
application/vnd.wap.wmlc, wmlc
application/vnd.wap.wmlscriptc, wmlsc
application/vnd.xara, web
application/vocaltec-media-desc, vmd
application/vocaltec-media-file, vmf
application/warc, warc
application/winhelp, hlp
application/wordperfect, wp|wp5|wp6|wpd|w60|w61
application/wordperfect6.0, w60
application/wordperfect6.1, w61
application/wordperfect, wp|wp5|wp6|wpd
application/x-123, wk1
application/x-7z-compressed, 7z
application/x-aim, aim
application/x-apple-diskimage,
application/x-arc,
application/x-archive, a
application/x-atari-7800-rom, a78
application/x-authorware-bin, aab
application/x-authorware-map, aam
application/x-authorware-seg, aas
application/x-avira-qua,
application/x-bcpio, bcpio
application/x-bittorrent, torrent
application/x-bsh, bsh
application/x-bytecode.python, pyc
application/x-bzip2, boz|bz2
application/x-bzip, bz
application/x-cbr, cbr
application/x-cbz, cbz
application/x-cdlink, vcd
application/x-chat, cha|chat
application/x-chrome-extension,
application/x-cocoa, cco
application/x-conference, nsc
application/x-coredump,
application/x-cpio, cpio
application/x-dbf, dbf
application/x-dbt,
application/x-debian-package, deb
application/x-deepv, deepv
application/x-director, dir|dxr
application/x-dmp, dmp
application/x-dosdriver,
application/x-director, dcr|dir|dxr
application/x-dosexec, dll
application/x-dvi, dvi
application/x-elc, elc
application/x-empty,
application/x-envoy, env|evy
application/x-esrehber, es
application/x-excel, xla|xld|xlk|xlt|xlv
application/x-executable, exe
application/x-font-gdos,
application/x-font-pf2, pf2
application/x-font-pfm, pfm
application/x-font-sfn,
application/x-font-ttf, ttf|ttc
application/x-fptapplication/x-dbt,
application/x-font-ttf, ttf
application/x-freelance, pre
application/x-gamecube-rom,
application/x-gdbm,
application/x-gettext-translation,
application/x-git,
application/x-gsp, gsp
application/x-gss, gss
@@ -141,67 +102,46 @@ application/x-hdf, hdf
application/x-helpfile, help
application/x-httpd-imap, imap
application/x-ima, ima
application/x-innosetup,
application/x-internett-signup, ins
application/x-inventor, iv
application/x-ip2, ip
application/x-java-applet,
application/x-java-commerce, jcm
application/x-java-image,
application/x-java-jmod, jmod
application/x-java-keystore,
application/x-kdelnk,
application/x-koan, skd|skm|skp|skt
application/x-latex, latex|ltx
application/x-livescreen, ivy
application/x-lotus, wq1
application/x-lz4+json, jsonlz4
application/x-lz4, lz4
application/x-lzh-compressed,
application/x-lzh, lzh
application/x-lzip, lz
application/x-lzma, lzma
application/x-lzop, lzo
application/x-lzx, lzx
application/x-mach-binary, jnilib|dylib
application/x-mach-executable,
application/x-magic-cap-package-1.0, mc$
application/x-mathcad, mcd
application/x-maxis-dbpf,
application/x-meme, mm
application/x-midi, midi
application/x-mif, mif
application/x-mix-transfer, nix
application/xml, opf
application/x-mobipocket-ebook, mobi
application/vnd.amazon.mobi8-ebook, azw|azw3
application/x-msaccess, accdb
application/x-ms-compress-szdd, fon
application/x-ms-pdb, pdb
application/x-ms-reader, lit
application/x-n64-rom, z64
application/x-navi-animation, ani
application/x-navidoc, nvd
application/x-navimap, map
application/x-navistyle, stl
application/x-nes-rom, nes
application/x-netcdf, cdf|nc
application/x-newton-compatible-pkg, pkg
application/x-nintendo-ds-rom,
application/x-object, o
application/x-omcdatamaker, omcd
application/x-omc, omc
application/x-omcregerator, omcr
application/x-pagemaker, pm4|pm5
application/x-pcl, pcl
application/x-pgp-keyring,
application/x-pixclscript, plx
application/x-pkcs7-certreqresp, p7r
application/x-pkcs7-signature, p7a
application/x-project, mpc|mpt|mpv|mpx
application/x-qpro, wb1
application/x-rar, rar
application/x-rpm, rpm
application/x-sdp, sdp
application/x-sea, sea
application/x-seelogo, sl
@@ -209,17 +149,12 @@ application/x-setupscript,
application/x-sharedlib, so
application/x-shar, shar
application/x-shockwave-flash, swf
application/x-snappy-framed,
application/x-sprite, spr|sprite
application/x-sqlite3,
application/x-stargallery-thm,
application/x-stuffit, sit
application/x-sv4cpio, sv4cpio
application/x-sv4crc, sv4crc
application/x-tar, tar
application/x-tbook, sbk|tbk
application/x-terminfo,
application/x-terminfo2,
application/x-texinfo, texi|texinfo
application/x-tex-tfm, tfm
application/x-ustar, ustar
@@ -228,33 +163,24 @@ application/x-vnd.audioexplosion.mzz, mzz
application/x-vnd.ls-xpix, xpix
application/x-vrml, vrml
application/x-wais-source, src|wsrc
application/x-wine-extension-ini,
application/x-wintalk, wtk
application/x-world, svr
application/x-wri, wri
application/x-x509-ca-cert, der
application/x-xz, xz
application/x-zip,
application/x-zstd, zst
application/zip, zip
application/zlib, z
!audio/basic, au
audio/it, it
audio/make, funk|my|pfunk
audio/midi, kar
audio/mid, rmi
audio/mp4, m4b
audio/mpeg, m2a|mpa|mpga
audio/mpeg, m2a|mpa
audio/ogg, ogg
audio/s3m, s3m
audio/tsp-audio, tsi
audio/tsplayer, tsp
audio/vnd.qcelp, qcp
audio/voxware, vox
audio/x-aiff, aiff|aif
audio/x-flac, flac
audio/x-gsm, gsd|gsm
audio/x-hx-aac-adts,
audio/x-jam, jam
audio/x-liveaudio, lam
audio/x-m4a, m4a
@@ -268,24 +194,17 @@ audio/x-nspaudio, lma
audio/x-pn-realaudio, ram|rm|rmm|rmp
audio/x-psid, sid
audio/x-realaudio, ra
audio/x-s3m,
audio/x-twinvq-plugin, vqe|vql
audio/x-twinvq, vqf
audio/x-voc, voc
audio/x-wav, wav
!audio/x-xbox360-executable, xex
!audio/x-xbox-executable, xbe
font/otf,
font/sfnt,
font/woff2, woff2
font/woff, woff
image/bmp,
image/cmu-raster, rast
image/fif, fif
image/florian, flo|turbot
image/g3fax, g3
image/gif, gif
image/heic, heic
image/ief, ief|iefs
image/jpeg, jfif|jfif-tbnl|jpe|jpeg|jpg
image/jutvision, jut
@@ -294,9 +213,6 @@ image/pict, pic|pict
image/png, png|x-png
!image/svg, svg
!image/svg+xml,
image/tiff,
!image/vnd.adobe.photoshop, psd
!image/vnd.djvu, djvu
image/vnd.fpx, fpx
image/vnd.microsoft.icon,
image/vnd.rn-realflash, rf
@@ -304,15 +220,9 @@ image/vnd.rn-realpix, rp
image/vnd.wap.wbmp, wbmp
image/vnd.xiff, xif
image/webp, webp
image/wmf,
image/x-3ds, 3ds
image/x-award-bioslogo,
image/x-cmu-raster, ras
image/x-cur, tga
image/x-dwg, dwg|dxf|svf
image/x-eps,
image/x-exr, exr
image/x-gem,
image/x-icns,
!image/x-icon, ico
image/x-jg, art
@@ -326,36 +236,34 @@ image/x-portable-graymap, pgm
image/x-portable-pixmap, ppm
image/x-quicktime, qif|qti|qtif
image/x-rgb, rgb
image/x-tga,
image/x-tiff, tif|tiff
image/x-win-bitmap,
image/tiff,
!image/x-xcf, xcf
!image/x-xpixmap, xpm
image/x-xwindowdump, xwd
message/news,
message/rfc822, mht|mhtml|mime
model/vnd.dwf, dwf
model/vnd.gdl, gdl
model/vnd.gs.gdl, gdsl
model/vrml, wrz
model/x-pov, pov
text/asp, asp
text/css, css
text/x-sass, sass
text/x-scss, scss
text/html, acgi|htm|html|htmls|htx|shtml
text/javascript, js
text/mcf, mcf
text/pascal, pas
text/PGP,
text/plain, com|cmd|conf|def|g|idc|list|lst|mar|sdml|text|txt|md|groovy|license|properties|desktop|ini|rst|cmake|ipynb|readme|less|lo|go|yml|d|cs|hpp|srt|nfo|sfv|m3u|csv|eml|make|log|markdown|yaml
text/x-script.python, pyx
text/csv,
application/vnd.coffeescript, coffee
text/plain, com|cmd|conf|def|g|idc|list|lst|mar|sdml|text|txt|md|groovy|license|properties|desktop|ini|rst|cmake|ipynb|readme|less|lo|go|yml|d|cs|hpp|srt
text/richtext, rt|rtf|rtx
text/rtf,
text/scriplet, wsc
text/x-awk, awk
!video/x-jng, jng
video/x-mng, mng
image/x-cur, tga
image/x-xwindowdump, xwd
!image/vnd.adobe.photoshop, psd
text/tab-separated-values, tsv
text/troff, man|me|ms|roff|t|tr
text/uri-list, uji|unis|uri|uris
text/uri-list, uni|unis|uri|uris
text/vnd.abc, abc
text/vnd.fmi.flexstor, flx
text/vnd.wap.wmlscript, wmls
@@ -364,7 +272,6 @@ text/webviewhtml, htt
text/x-Algol68,
text/x-asm, asm|s
text/x-audiosoft-intra, aip
text/x-awk, awk
text/x-bcpl,
text/x-c, c|cc|h
text/x-c++, cpp|cxx|c++
@@ -379,31 +286,23 @@ text/x-makefile, am|mak
text/xml, xml|pom|iml|plist
text/x-m, m
text/x-msdos-batch, bat
text/x-ms-regedit, reg
text/x-objective-c,
text/x-pascal, p
text/x-perl, pl
text/x-php, php
text/x-po, po
text/x-python, py|pyi
text/x-python, py
text/x-ruby, rb
text/x-sass, sass
text/x-scss, scss
text/x-server-parsed-html, ssi
text/x-setext, etx
text/x-sgml, sgm|sgml
text/x-shellscript, sh
text/x-speech, talk
text/x-tcl,
text/x-tex, tex
text/x-uil, uil
text/x-uuencode, uue
text/x-vcalendar, vcs
text/x-vcard, vcf
video/animaflex, afl
video/avi, avi
video/avs-video, avs
video/MP2T,
video/mp4, mp4
video/mpeg, m1v|m2v|mpe|mpeg|mpg
video/quicktime, moov|mov|qt
@@ -418,36 +317,43 @@ video/x-atomic3d-feature, fmf
video/x-dl, dl
video/x-dv, dif|dv
video/x-fli, fli
video/x-flv, flv
video/x-isvideo, isu
!video/x-jng, jng
video/x-m4v, m4v
video/x-matroska, mkv
video/x-mng, mng
video/x-motion-jpeg, mjpg
video/x-ms-asf, asf|asx|wmv
video/x-msvideo, divx
video/x-ms-asf, asf|asx
video/x-qtc, qtc
video/x-sgi-movie, movie|mv
x-epoc/x-sisx-app,
application/x-zstd-dictionary,
application/vnd.ms-outlook, msg
image/x-olympus-orf, orf
image/x-nikon-nef, nef
image/x-fuji-raf, raf
image/x-panasonic-raw, rw2|raw
image/x-adobe-dng, dng
image/x-canon-cr2, cr2
image/x-canon-crw, crw
image/x-dcraw,
image/x-kodak-dcr, dcr
image/x-kodak-k25, k25
image/x-kodak-kdc, kdc
image/x-minolta-mrw, mrw
image/x-pentax-pef, pef
image/x-sigma-x3f, xf3
image/x-sony-arw, arw
image/x-sony-sr2, sr2
image/x-sony-srf, srf
image/x-epson-erf, erf
sist2/sidecar, s2meta
application/x-7z-compressed, 7z
application/vnd.openxmlformats-officedocument.wordprocessingml.document, docx
text/x-po, po
application/x-rpm, rpm
application/x-debian-package, deb
application/vnd.iccprofile, icm
application/dicom, dcm
image/x-exr, exr
application/vnd.iccprofile, icm
video/x-matroska, mkv
application/x-empty,
model/vnd.gdl, gdl
model/vnd.gs.gdl, gdsl
font/woff, woff
font/woff2, woff2
application/epub+zip, epub
application/x-mobipocket-ebook, mobi
audio/x-flac, flac
application/x-rar, rar
video/x-msvideo, divx
video/x-flv, flv
application/x-kdelnk,
text/x-tcl,
application/ogg, ogv
application/vnd.openxmlformats-officedocument.spreadsheetml.sheet, xlsx
application/vnd.ms-cab-compressed, cab
audio/mp4, m4b
!image/vnd.djvu, djvu
application/x-ms-reader, lit
application/CDFV2-corrupt,
text/x-vcard, vcf
application/x-innosetup,
application/winhelp, hlp
image/x-tga,
application/x-wine-extension-ini,
1 application/x-matlab-data application/arj mat arj
application/x-matlab-data mat
1 application/arj application/arj arj arj
2 application/base64 application/base64 mme mme
3 application/binhex application/binhex hqx hqx
4 application/book application/book boo|book boo|book
application/CDFV2-corrupt
5 application/CDFV2 application/CDFV2 sdv sdv
6 application/clariscad application/clariscad ccad ccad
7 application/commonground application/commonground dp dp
application/csv
application/dicom dcm
8 application/drafting application/drafting drw drw
application/epub+zip epub
9 application/freeloader application/freeloader frl frl
10 application/futuresplash application/futuresplash spl spl
11 application/groupwise application/groupwise vew vew
12 application/gzip application/gzip gz|tgz gz
13 application/hta application/hta hta hta
14 application/i-deas application/i-deas unv unv
15 application/iges application/iges iges|igs iges|igs
17 application/java-archive application/java-archive jar jar
18 application/java application/java class class
19 application/javascript application/javascript
20 application/x-archive a
21 application/json application/json json json
application/ndjson jsonl|ndjson
22 application/marc application/marc mrc mrc
23 application/mbedlet application/mbedlet mbd mbd
24 application/mime application/mime aps aps
25 application/mspowerpoint application/mspowerpoint ppz ppz
26 application/msword application/msword doc|dot|w6w|wiz|word doc|dot|w6w|wiz|word
27 application/netmc application/netmc mcp mcp
28 application/octet-stream application/octet-stream bin|dump|gpg|pack|idx bin|dump|gpg
29 application/oda application/oda oda oda
application/ogg ogv
30 application/pdf application/pdf pdf pdf
application/pgp-keys
31 application/pgp-signature application/pgp-signature pgp pgp
32 application/pkcs7-signature application/pkcs7-signature p7s p7s
33 application/pkix-cert application/pkix-cert cer|crt cer|crt
43 application/vnd.fdf application/vnd.fdf fdf fdf
44 application/vnd.font-fontforge-sfd application/vnd.font-fontforge-sfd sfd sfd
45 application/vnd.hp-hpgl application/vnd.hp-hpgl hgl|hpg|hpgl hgl|hpg|hpgl
application/vnd.iccprofile icm
application/vnd.iccprofile icm
application/vnd.lotus-1-2-3
application/vnd.ms-cab-compressed cab
46 application/vnd.ms-excel application/vnd.ms-excel xlb|xlc|xll|xlm|xls|xlw xlb|xlc|xll|xlm|xls|xlw
47 application/vnd.ms-fontobject application/vnd.ms-fontobject eot eot
48 application/vnd.ms-opentype application/vnd.ms-opentype otf otf
54 application/vnd.oasis.opendocument.base application/vnd.oasis.opendocument.base odb odb
55 application/vnd.oasis.opendocument.formula application/vnd.oasis.opendocument.formula odf odf
56 application/vnd.oasis.opendocument.graphics application/vnd.oasis.opendocument.graphics odg odg
application/vnd.oasis.opendocument.presentation odp
application/vnd.oasis.opendocument.spreadsheet ods
57 application/vnd.oasis.opendocument.text application/vnd.oasis.opendocument.text odt odt
application/vnd.openxmlformats-officedocument.presentationml.presentation pptx
application/vnd.openxmlformats-officedocument.spreadsheetml.sheet xlsx
application/vnd.openxmlformats-officedocument.wordprocessingml.document docx
application/vnd.symbian.install
application/vnd.tcpdump.pcap pcap
58 application/vnd.wap.wmlc application/vnd.wap.wmlc wmlc wmlc
59 application/vnd.wap.wmlscriptc application/vnd.wap.wmlscriptc wmlsc wmlsc
60 application/vnd.xara application/vnd.xara web web
61 application/vocaltec-media-desc application/vocaltec-media-desc vmd vmd
62 application/vocaltec-media-file application/vocaltec-media-file vmf vmf
63 application/warc application/wordperfect6.0 warc w60
64 application/winhelp application/wordperfect6.1 hlp w61
65 application/wordperfect application/wordperfect wp|wp5|wp6|wpd|w60|w61 wp|wp5|wp6|wpd
66 application/x-123 application/x-123 wk1 wk1
application/x-7z-compressed 7z
67 application/x-aim application/x-aim aim aim
application/x-apple-diskimage
application/x-arc
application/x-archive a
application/x-atari-7800-rom a78
68 application/x-authorware-bin application/x-authorware-bin aab aab
69 application/x-authorware-map application/x-authorware-map aam aam
70 application/x-authorware-seg application/x-authorware-seg aas aas
application/x-avira-qua
71 application/x-bcpio application/x-bcpio bcpio bcpio
72 application/x-bittorrent application/x-bittorrent torrent torrent
73 application/x-bsh application/x-bsh bsh bsh
74 application/x-bytecode.python application/x-bytecode.python pyc pyc
75 application/x-bzip2 application/x-bzip2 boz|bz2 boz|bz2
76 application/x-bzip application/x-bzip bz bz
application/x-cbr cbr
application/x-cbz cbz
77 application/x-cdlink application/x-cdlink vcd vcd
78 application/x-chat application/x-chat cha|chat cha|chat
application/x-chrome-extension
79 application/x-cocoa application/x-cocoa cco cco
80 application/x-conference application/x-conference nsc nsc
application/x-coredump
81 application/x-cpio application/x-cpio cpio cpio
82 application/x-dbf application/x-dbf dbf dbf
83 application/x-dbt application/x-dbt
application/x-debian-package deb
84 application/x-deepv application/x-deepv deepv deepv
85 application/x-director application/x-director dir|dxr dcr|dir|dxr
application/x-dmp dmp
application/x-dosdriver
86 application/x-dosexec application/x-dosexec dll dll
87 application/x-dvi application/x-dvi dvi dvi
88 application/x-elc application/x-elc elc elc
application/x-empty
89 application/x-envoy application/x-envoy env|evy env|evy
90 application/x-esrehber application/x-esrehber es es
91 application/x-excel application/x-excel xla|xld|xlk|xlt|xlv xla|xld|xlk|xlt|xlv
92 application/x-executable application/x-executable exe exe
application/x-font-gdos
application/x-font-pf2 pf2
application/x-font-pfm pfm
93 application/x-font-sfn application/x-font-sfn
94 application/x-font-ttf application/x-font-ttf ttf|ttc ttf
application/x-fptapplication/x-dbt
95 application/x-freelance application/x-freelance pre pre
application/x-gamecube-rom
application/x-gdbm
application/x-gettext-translation
96 application/x-git application/x-git
97 application/x-gsp application/x-gsp gsp gsp
98 application/x-gss application/x-gss gss gss
102 application/x-helpfile application/x-helpfile help help
103 application/x-httpd-imap application/x-httpd-imap imap imap
104 application/x-ima application/x-ima ima ima
application/x-innosetup
105 application/x-internett-signup application/x-internett-signup ins ins
106 application/x-inventor application/x-inventor iv iv
107 application/x-ip2 application/x-ip2 ip ip
108 application/x-java-applet application/x-java-applet
109 application/x-java-commerce application/x-java-commerce jcm jcm
110 application/x-java-image application/x-java-image
application/x-java-jmod jmod
111 application/x-java-keystore application/x-java-keystore
application/x-kdelnk
112 application/x-koan application/x-koan skd|skm|skp|skt skd|skm|skp|skt
113 application/x-latex application/x-latex latex|ltx latex|ltx
114 application/x-livescreen application/x-livescreen ivy ivy
115 application/x-lotus application/x-lotus wq1 wq1
application/x-lz4+json jsonlz4
application/x-lz4 lz4
application/x-lzh-compressed
116 application/x-lzh application/x-lzh lzh lzh
application/x-lzip lz
application/x-lzma lzma
application/x-lzop lzo
117 application/x-lzx application/x-lzx lzx lzx
118 application/x-mach-binary application/x-mach-binary jnilib|dylib jnilib|dylib
119 application/x-mach-executable application/x-mach-executable
120 application/x-magic-cap-package-1.0 application/x-magic-cap-package-1.0 mc$ mc$
121 application/x-mathcad application/x-mathcad mcd mcd
application/x-maxis-dbpf
122 application/x-meme application/x-meme mm mm
123 application/x-midi application/x-midi midi midi
124 application/x-mif application/x-mif mif mif
125 application/x-mix-transfer application/x-mix-transfer nix nix
126 application/xml application/xml opf opf
application/x-mobipocket-ebook mobi
application/vnd.amazon.mobi8-ebook azw|azw3
application/x-msaccess accdb
application/x-ms-compress-szdd fon
127 application/x-ms-pdb application/x-ms-pdb pdb pdb
application/x-ms-reader lit
application/x-n64-rom z64
128 application/x-navi-animation application/x-navi-animation ani ani
129 application/x-navidoc application/x-navidoc nvd nvd
130 application/x-navimap application/x-navimap map map
131 application/x-navistyle application/x-navistyle stl stl
application/x-nes-rom nes
132 application/x-netcdf application/x-netcdf cdf|nc cdf|nc
133 application/x-newton-compatible-pkg application/x-newton-compatible-pkg pkg pkg
application/x-nintendo-ds-rom
134 application/x-object application/x-object o o
135 application/x-omcdatamaker application/x-omcdatamaker omcd omcd
136 application/x-omc application/x-omc omc omc
137 application/x-omcregerator application/x-omcregerator omcr omcr
138 application/x-pagemaker application/x-pagemaker pm4|pm5 pm4|pm5
139 application/x-pcl application/x-pcl pcl pcl
application/x-pgp-keyring
140 application/x-pixclscript application/x-pixclscript plx plx
141 application/x-pkcs7-certreqresp application/x-pkcs7-certreqresp p7r p7r
142 application/x-pkcs7-signature application/x-pkcs7-signature p7a p7a
143 application/x-project application/x-project mpc|mpt|mpv|mpx mpc|mpt|mpv|mpx
144 application/x-qpro application/x-qpro wb1 wb1
application/x-rar rar
application/x-rpm rpm
145 application/x-sdp application/x-sdp sdp sdp
146 application/x-sea application/x-sea sea sea
147 application/x-seelogo application/x-seelogo sl sl
149 application/x-sharedlib application/x-sharedlib so so
150 application/x-shar application/x-shar shar shar
151 application/x-shockwave-flash application/x-shockwave-flash swf swf
application/x-snappy-framed
152 application/x-sprite application/x-sprite spr|sprite spr|sprite
153 application/x-sqlite3 application/x-sqlite3
application/x-stargallery-thm
application/x-stuffit sit
154 application/x-sv4cpio application/x-sv4cpio sv4cpio sv4cpio
155 application/x-sv4crc application/x-sv4crc sv4crc sv4crc
156 application/x-tar application/x-tar tar tar
157 application/x-tbook application/x-tbook sbk|tbk sbk|tbk
application/x-terminfo
application/x-terminfo2
158 application/x-texinfo application/x-texinfo texi|texinfo texi|texinfo
159 application/x-tex-tfm application/x-tex-tfm tfm tfm
160 application/x-ustar application/x-ustar ustar ustar
163 application/x-vnd.ls-xpix application/x-vnd.ls-xpix xpix xpix
164 application/x-vrml application/x-vrml vrml vrml
165 application/x-wais-source application/x-wais-source src|wsrc src|wsrc
application/x-wine-extension-ini
166 application/x-wintalk application/x-wintalk wtk wtk
167 application/x-world application/x-world svr svr
168 application/x-wri application/x-wri wri wri
169 application/x-x509-ca-cert application/x-x509-ca-cert der der
170 application/x-xz application/x-xz xz xz
application/x-zip
application/x-zstd zst
171 application/zip application/zip zip zip
application/zlib z
!audio/basic au
172 audio/it audio/it it it
173 audio/make audio/make funk|my|pfunk funk|my|pfunk
174 audio/midi audio/midi kar kar
175 audio/mid audio/mid rmi rmi
176 audio/mp4 audio/mpeg m4b m2a|mpa
audio/mpeg m2a|mpa|mpga
177 audio/ogg audio/ogg ogg ogg
178 audio/s3m audio/s3m s3m s3m
179 audio/tsp-audio audio/tsp-audio tsi tsi
180 audio/tsplayer audio/tsplayer tsp tsp
181 audio/vnd.qcelp audio/vnd.qcelp qcp qcp
182 audio/voxware audio/voxware vox vox
audio/x-aiff aiff|aif
audio/x-flac flac
183 audio/x-gsm audio/x-gsm gsd|gsm gsd|gsm
audio/x-hx-aac-adts
184 audio/x-jam audio/x-jam jam jam
185 audio/x-liveaudio audio/x-liveaudio lam lam
186 audio/x-m4a audio/x-m4a m4a m4a
194 audio/x-pn-realaudio audio/x-pn-realaudio ram|rm|rmm|rmp ram|rm|rmm|rmp
195 audio/x-psid audio/x-psid sid sid
196 audio/x-realaudio audio/x-realaudio ra ra
audio/x-s3m
197 audio/x-twinvq-plugin audio/x-twinvq-plugin vqe|vql vqe|vql
198 audio/x-twinvq audio/x-twinvq vqf vqf
199 audio/x-voc audio/x-voc voc voc
200 audio/x-wav audio/x-wav wav wav
!audio/x-xbox360-executable xex
!audio/x-xbox-executable xbe
201 font/otf font/otf
202 font/sfnt font/sfnt
font/woff2 woff2
font/woff woff
image/bmp
203 image/cmu-raster image/cmu-raster rast rast
204 image/fif image/fif fif fif
205 image/florian image/florian flo|turbot flo|turbot
206 image/g3fax image/g3fax g3 g3
207 image/gif image/gif gif gif
image/heic heic
208 image/ief image/ief ief|iefs ief|iefs
209 image/jpeg image/jpeg jfif|jfif-tbnl|jpe|jpeg|jpg jfif|jfif-tbnl|jpe|jpeg|jpg
210 image/jutvision image/jutvision jut jut
213 image/png image/png png|x-png png|x-png
214 !image/svg !image/svg svg svg
215 !image/svg+xml !image/svg+xml
image/tiff
!image/vnd.adobe.photoshop psd
!image/vnd.djvu djvu
216 image/vnd.fpx image/vnd.fpx fpx fpx
217 image/vnd.microsoft.icon image/vnd.microsoft.icon
218 image/vnd.rn-realflash image/vnd.rn-realflash rf rf
220 image/vnd.wap.wbmp image/vnd.wap.wbmp wbmp wbmp
221 image/vnd.xiff image/vnd.xiff xif xif
222 image/webp image/webp webp webp
image/wmf
image/x-3ds 3ds
image/x-award-bioslogo
223 image/x-cmu-raster image/x-cmu-raster ras ras
image/x-cur tga
224 image/x-dwg image/x-dwg dwg|dxf|svf dwg|dxf|svf
225 image/x-eps image/x-eps
image/x-exr exr
image/x-gem
226 image/x-icns image/x-icns
227 !image/x-icon !image/x-icon ico ico
228 image/x-jg image/x-jg art art
236 image/x-portable-pixmap image/x-portable-pixmap ppm ppm
237 image/x-quicktime image/x-quicktime qif|qti|qtif qif|qti|qtif
238 image/x-rgb image/x-rgb rgb rgb
image/x-tga
239 image/x-tiff image/x-tiff tif|tiff tif|tiff
240 image/x-win-bitmap image/tiff
241 !image/x-xcf !image/x-xcf xcf xcf
242 !image/x-xpixmap !image/x-xpixmap xpm xpm
image/x-xwindowdump xwd
message/news
243 message/rfc822 message/rfc822 mht|mhtml|mime mht|mhtml|mime
244 model/vnd.dwf model/vnd.dwf dwf dwf
model/vnd.gdl gdl
model/vnd.gs.gdl gdsl
245 model/vrml model/vrml wrz wrz
246 model/x-pov model/x-pov pov pov
247 text/asp text/asp asp asp
248 text/css text/css css css
249 text/x-sass sass
250 text/x-scss scss
251 text/html text/html acgi|htm|html|htmls|htx|shtml acgi|htm|html|htmls|htx|shtml
252 text/javascript text/javascript js js
253 text/mcf text/mcf mcf mcf
254 text/pascal text/pascal pas pas
255 text/PGP text/plain com|cmd|conf|def|g|idc|list|lst|mar|sdml|text|txt|md|groovy|license|properties|desktop|ini|rst|cmake|ipynb|readme|less|lo|go|yml|d|cs|hpp|srt
text/plain com|cmd|conf|def|g|idc|list|lst|mar|sdml|text|txt|md|groovy|license|properties|desktop|ini|rst|cmake|ipynb|readme|less|lo|go|yml|d|cs|hpp|srt|nfo|sfv|m3u|csv|eml|make|log|markdown|yaml
text/x-script.python pyx
text/csv
application/vnd.coffeescript coffee
256 text/richtext text/richtext rt|rtf|rtx rt|rtf|rtx
text/rtf
257 text/scriplet text/scriplet wsc wsc
258 text/x-awk awk
259 !video/x-jng jng
260 video/x-mng mng
261 image/x-cur tga
262 image/x-xwindowdump xwd
263 !image/vnd.adobe.photoshop psd
264 text/tab-separated-values text/tab-separated-values tsv tsv
265 text/troff text/troff man|me|ms|roff|t|tr man|me|ms|roff|t|tr
266 text/uri-list text/uri-list uji|unis|uri|uris uni|unis|uri|uris
267 text/vnd.abc text/vnd.abc abc abc
268 text/vnd.fmi.flexstor text/vnd.fmi.flexstor flx flx
269 text/vnd.wap.wmlscript text/vnd.wap.wmlscript wmls wmls
272 text/x-Algol68 text/x-Algol68
273 text/x-asm text/x-asm asm|s asm|s
274 text/x-audiosoft-intra text/x-audiosoft-intra aip aip
text/x-awk awk
275 text/x-bcpl text/x-bcpl
276 text/x-c text/x-c c|cc|h c|cc|h
277 text/x-c++ text/x-c++ cpp|cxx|c++ cpp|cxx|c++
286 text/xml text/xml xml|pom|iml|plist xml|pom|iml|plist
287 text/x-m text/x-m m m
288 text/x-msdos-batch text/x-msdos-batch bat bat
text/x-ms-regedit reg
text/x-objective-c
289 text/x-pascal text/x-pascal p p
290 text/x-perl text/x-perl pl pl
291 text/x-php text/x-php php php
292 text/x-po text/x-python po py
text/x-python py|pyi
293 text/x-ruby text/x-ruby rb rb
text/x-sass sass
text/x-scss scss
294 text/x-server-parsed-html text/x-server-parsed-html ssi ssi
295 text/x-setext text/x-setext etx etx
296 text/x-sgml text/x-sgml sgm|sgml sgm|sgml
297 text/x-shellscript text/x-shellscript sh sh
298 text/x-speech text/x-speech talk talk
text/x-tcl
299 text/x-tex text/x-tex tex tex
300 text/x-uil text/x-uil uil uil
301 text/x-uuencode text/x-uuencode uue uue
302 text/x-vcalendar text/x-vcalendar vcs vcs
text/x-vcard vcf
303 video/animaflex video/animaflex afl afl
304 video/avi video/avi avi avi
305 video/avs-video video/avs-video avs avs
video/MP2T
306 video/mp4 video/mp4 mp4 mp4
307 video/mpeg video/mpeg m1v|m2v|mpe|mpeg|mpg m1v|m2v|mpe|mpeg|mpg
308 video/quicktime video/quicktime moov|mov|qt moov|mov|qt
317 video/x-dl video/x-dl dl dl
318 video/x-dv video/x-dv dif|dv dif|dv
319 video/x-fli video/x-fli fli fli
video/x-flv flv
320 video/x-isvideo video/x-isvideo isu isu
!video/x-jng jng
video/x-m4v m4v
video/x-matroska mkv
video/x-mng mng
321 video/x-motion-jpeg video/x-motion-jpeg mjpg mjpg
322 video/x-ms-asf video/x-ms-asf asf|asx|wmv asf|asx
video/x-msvideo divx
323 video/x-qtc video/x-qtc qtc qtc
324 video/x-sgi-movie video/x-sgi-movie movie|mv movie|mv
325 x-epoc/x-sisx-app application/x-7z-compressed 7z
326 application/x-zstd-dictionary application/vnd.openxmlformats-officedocument.wordprocessingml.document docx
327 application/vnd.ms-outlook text/x-po msg po
328 image/x-olympus-orf application/x-rpm orf rpm
329 image/x-nikon-nef application/x-debian-package nef deb
330 image/x-fuji-raf application/vnd.iccprofile raf icm
331 image/x-panasonic-raw application/dicom rw2|raw dcm
332 image/x-adobe-dng image/x-exr dng exr
333 image/x-canon-cr2 application/vnd.iccprofile cr2 icm
334 image/x-canon-crw video/x-matroska crw mkv
335 image/x-dcraw application/x-empty
336 image/x-kodak-dcr model/vnd.gdl dcr gdl
337 image/x-kodak-k25 model/vnd.gs.gdl k25 gdsl
338 image/x-kodak-kdc font/woff kdc woff
339 image/x-minolta-mrw font/woff2 mrw woff2
340 image/x-pentax-pef application/epub+zip pef epub
341 image/x-sigma-x3f application/x-mobipocket-ebook xf3 mobi
342 image/x-sony-arw audio/x-flac arw flac
343 image/x-sony-sr2 application/x-rar sr2 rar
344 image/x-sony-srf video/x-msvideo srf divx
345 image/x-epson-erf video/x-flv erf flv
346 sist2/sidecar application/x-kdelnk s2meta
347 text/x-tcl
348 application/ogg ogv
349 application/vnd.openxmlformats-officedocument.spreadsheetml.sheet xlsx
350 application/vnd.ms-cab-compressed cab
351 audio/mp4 m4b
352 !image/vnd.djvu djvu
353 application/x-ms-reader lit
354 application/CDFV2-corrupt
355 text/x-vcard vcf
356 application/x-innosetup
357 application/winhelp hlp
358 image/x-tga
359 application/x-wine-extension-ini

View File

@@ -1,74 +1,34 @@
{
"properties": {
"_tie": {
"type": "keyword",
"doc_values": true
},
"checksum": {
"type": "keyword",
"index": false
},
"_depth": {
"type": "integer"
},
"path": {
"type": "text",
"analyzer": "path_analyzer",
"copy_to": "suggest-path",
"fielddata": true,
"fields": {
"nGram": {
"type": "text",
"analyzer": "my_nGram"
},
"text": {
"type": "text",
"analyzer": "content_analyzer"
}
}
"copy_to": "suggest-path"
},
"suggest-path": {
"type": "completion",
"analyzer": "case_insensitive_kw_analyzer"
"analyzer": "keyword"
},
"mime": {
"type": "keyword"
},
"parent": {
"type": "keyword",
"index": false
},
"thumbnail": {
"type": "integer",
"index": false
},
"videoc": {
"type": "keyword",
"index": false
"type": "keyword"
},
"audioc": {
"type": "keyword",
"index": false
"type": "keyword"
},
"duration": {
"type": "integer",
"index": false
"type": "float"
},
"width": {
"type": "integer",
"index": false
"type": "integer"
},
"height": {
"type": "integer",
"index": false
},
"pages": {
"type": "integer",
"index": false
"type": "integer"
},
"mtime": {
"type": "date",
"format": "epoch_millis"
"type": "integer"
},
"size": {
"type": "long"
@@ -79,7 +39,6 @@
"name": {
"analyzer": "content_analyzer",
"type": "text",
"fielddata": true,
"fields": {
"nGram": {
"type": "text",
@@ -111,23 +70,6 @@
"analyzer": "my_nGram",
"type": "text"
},
"_keyword.*": {
"type": "keyword"
},
"_text.*": {
"analyzer": "content_analyzer",
"type": "text",
"fields": {
"nGram": {
"type": "text",
"analyzer": "my_nGram"
}
}
},
"_url": {
"type": "keyword",
"index": false
},
"content": {
"analyzer": "content_analyzer",
"type": "text",
@@ -138,70 +80,6 @@
"analyzer": "my_nGram"
}
}
},
"tag": {
"type": "text",
"fielddata": true,
"analyzer": "tag_analyzer",
"copy_to": "suggest-tag"
},
"suggest-tag": {
"type": "completion",
"analyzer": "case_insensitive_kw_analyzer"
},
"exif_make": {
"type": "text"
},
"exif_model": {
"type": "text"
},
"exif:software": {
"type": "text"
},
"exif_exposure_time": {
"type": "keyword"
},
"exif_fnumber": {
"type": "keyword"
},
"exif_iso_speed_ratings": {
"type": "keyword"
},
"exif_focal_length": {
"type": "keyword"
},
"exif_user_comment": {
"type": "text"
},
"exif_gps_longitude_ref": {
"type": "keyword",
"index": false
},
"exif_gps_longitude_dms": {
"type": "keyword",
"index": false
},
"exif_gps_longitude_dec": {
"type": "keyword",
"index": false
},
"exif_gps_latitude_ref": {
"type": "keyword",
"index": false
},
"exif_gps_latitude_dms": {
"type": "keyword",
"index": false
},
"exif_gps_latitude_dec": {
"type": "keyword",
"index": false
},
"author": {
"type": "text"
},
"modified_by": {
"type": "text"
}
}
}

View File

@@ -1,10 +0,0 @@
{
"description": "Copy _id to _tie, save path depth",
"processors": [
{
"script": {
"source": "ctx._tie = ctx._id; ctx._depth = ctx.path.length() == 0 ? 0 : 1 + ctx.path.length() - ctx.path.replace(\"/\", \"\").length();"
}
}
]
}

View File

@@ -1,22 +1,15 @@
{
"index": {
"refresh_interval": "30s",
"codec": "best_compression",
"number_of_replicas": 0,
"highlight.max_analyzed_offset": 1000000
"refresh_interval": "-1",
"codec": "best_compression"
},
"analysis": {
"tokenizer": {
"path_tokenizer": {
"type": "path_hierarchy",
"delimiter": "/"
},
"tag_tokenizer": {
"type": "path_hierarchy",
"delimiter": "."
"type": "path_hierarchy"
},
"my_nGram_tokenizer": {
"type": "ngram",
"type": "nGram",
"min_gram": 3,
"max_gram": 3
}
@@ -28,30 +21,16 @@
"lowercase"
]
},
"tag_analyzer": {
"tokenizer": "tag_tokenizer",
"filter": [
"lowercase"
]
},
"case_insensitive_kw_analyzer": {
"tokenizer": "keyword",
"filter": [
"lowercase"
]
},
"my_nGram": {
"tokenizer": "my_nGram_tokenizer",
"filter": [
"lowercase",
"asciifolding"
"lowercase"
]
},
"content_analyzer": {
"tokenizer": "standard",
"filter": [
"lowercase",
"asciifolding"
"lowercase"
]
}
}

View File

@@ -1,58 +0,0 @@
{
"index": {
"refresh_interval": "30s",
"codec": "best_compression",
"number_of_replicas": 0
},
"analysis": {
"tokenizer": {
"path_tokenizer": {
"type": "path_hierarchy",
"delimiter": "/"
},
"tag_tokenizer": {
"type": "path_hierarchy",
"delimiter": "."
},
"my_nGram_tokenizer": {
"type": "nGram",
"min_gram": 3,
"max_gram": 3
}
},
"analyzer": {
"path_analyzer": {
"tokenizer": "path_tokenizer",
"filter": [
"lowercase"
]
},
"tag_analyzer": {
"tokenizer": "tag_tokenizer",
"filter": [
"lowercase"
]
},
"case_insensitive_kw_analyzer": {
"tokenizer": "keyword",
"filter": [
"lowercase"
]
},
"my_nGram": {
"tokenizer": "my_nGram_tokenizer",
"filter": [
"lowercase",
"asciifolding"
]
},
"content_analyzer": {
"tokenizer": "standard",
"filter": [
"lowercase",
"asciifolding"
]
}
}
}
}

View File

@@ -1,13 +1,15 @@
#!/usr/bin/env bash
#!/bin/bash
(
cd ..
rm -rf index.sist2
rm -rf index.sist2/
python3 scripts/mime.py > src/parsing/mime_generated.c
python3 scripts/serve_static.py > src/web/static_generated.c
python3 scripts/index_static.py > src/index/static_generated.c
python3 scripts/magic_static.py > src/magic_generated.c
rm web/js/bundle.js 2> /dev/null
cat `ls -v web/js/*.min.js` > web/js/bundle.js
cat web/js/{util,dom,search}.js >> web/js/bundle.js
printf "static const char *const Sist2CommitHash = \"%s\";\n" $(git rev-parse HEAD) > src/git_hash.h
)
rm web/css/bundle.css 2> /dev/null
cat web/css/*.min.css > web/css/bundle.css
cat web/css/main.css >> web/css/bundle.css
python3 scripts/mime.py > src/parsing/mime_generated.c
python3 scripts/serve_static.py > src/web/static_generated.c
python3 scripts/index_static.py > src/index/static_generated.c

View File

@@ -1,23 +0,0 @@
#!/usr/bin/env bash
VCPKG_ROOT="/vcpkg"
git submodule update --init --recursive
mkdir build
(
cd build
cmake -DSIST_PLATFORM=x64_linux -DSIST_DEBUG_INFO=on -DSIST_DEBUG=off -DBUILD_TESTS=off -DCMAKE_TOOLCHAIN_FILE="${VCPKG_ROOT}/scripts/buildsystems/vcpkg.cmake" ..
make -j $(nproc)
strip sist2
./sist2 -v > VERSION
)
mv build/sist2 sist2-x64-linux
(
cd build
rm -rf CMakeFiles CMakeCache.txt
cmake -DSIST_PLATFORM=x64_linux -DSIST_DEBUG_INFO=on -DSIST_DEBUG=on -DBUILD_TESTS=off -DCMAKE_TOOLCHAIN_FILE="${VCPKG_ROOT}/scripts/buildsystems/vcpkg.cmake" ..
make -j $(nproc)
)
mv build/sist2_debug sist2-x64-linux-debug

View File

@@ -1,22 +0,0 @@
#!/usr/bin/env bash
VCPKG_ROOT="/vcpkg"
git submodule update --init --recursive
mkdir build
(
cd build
cmake -DSIST_PLATFORM=arm64_linux -DSIST_DEBUG_INFO=on -DSIST_DEBUG=off -DBUILD_TESTS=off -DCMAKE_TOOLCHAIN_FILE="${VCPKG_ROOT}/scripts/buildsystems/vcpkg.cmake" ..
make -j $(nproc)
strip sist2
)
mv build/sist2 sist2-arm64-linux
rm -rf CMakeFiles CMakeCache.txt
(
cd build
cmake -DSIST_PLATFORM=arm64_linux -DSIST_DEBUG_INFO=on -DSIST_DEBUG=on -DBUILD_TESTS=off -DCMAKE_TOOLCHAIN_FILE="${VCPKG_ROOT}/scripts/buildsystems/vcpkg.cmake" ..
make -j $(nproc)
)
mv build/sist2_debug sist2-arm64-linux-debug

39
scripts/get_static_libs.sh Executable file
View File

@@ -0,0 +1,39 @@
#!/bin/bash
cd lib
cd mupdf
HAVE_X11=no HAVE_GLUT=no make -j 4
cd ..
mv mupdf/build/release/libmupdf.a .
mv mupdf/build/release/libmupdf-third.a .
# ffmpeg
cd ffmpeg
./configure --disable-shared --enable-static --disable-ffmpeg --disable-ffplay \
--disable-ffprobe --disable-doc\
--disable-manpages --disable-postproc --disable-avfilter \
--disable-alsa --disable-lzma --disable-xlib --disable-debug\
--disable-vdpau --disable-vaapi --disable-sdl2 --disable-network
make -j 4
cd ..
mv ffmpeg/libavcodec/libavcodec.a .
mv ffmpeg/libavformat/libavformat.a .
mv ffmpeg/libavutil/libavutil.a .
mv ffmpeg/libswresample/libswresample.a .
mv ffmpeg/libswscale/libswscale.a .
# onion
cd onion
mkdir build 2> /dev/null
cd build
cmake -DONION_USE_SSL=false -DONION_USE_PAM=false -DONION_USE_PNG=false -DONION_USE_JPEG=false \
-DONION_USE_JPEG=false -DONION_USE_XML2=false -DONION_USE_SYSTEMD=false -DONION_USE_SQLITE3=false \
-DONION_USE_REDIS=false -DONION_USE_GC=false -DONION_USE_TESTS=false -DONION_EXAMPLES=false \
-DONION_USE_BINDINGS_CPP=false ..
make -j 4
cd ../..
mv onion/build/src/onion/libonion_static.a .
cd ..

View File

@@ -1,10 +1,6 @@
import json
files = [
"schema/mappings.json",
"schema/settings.json",
"schema/settings_legacy.json",
"schema/pipeline.json",
]
@@ -13,7 +9,6 @@ def clean(filepath):
for file in files:
with open(file, "r") as f:
data = json.dumps(json.load(f), separators=(",", ":")).encode()
data += b'\0'
with open(file, "rb") as f:
data = f.read()
print("char %s[%d] = {%s};" % (clean(file), len(data), ",".join(str(int(b)) for b in data)))

View File

@@ -1,8 +0,0 @@
try:
with open("/usr/lib/file/magic.mgc", "rb") as f:
data = f.read()
except:
data = bytes([])
print("char magic_database_buffer[%d] = {%s};" % (len(data), ",".join(str(int(b)) for b in data)))

View File

@@ -1,11 +1,8 @@
import zlib
mimes = {}
noparse = set()
ext_in_hash = set()
major_mime = {
"sist2": 0,
"model": 1,
"example": 2,
"message": 3,
@@ -15,19 +12,18 @@ major_mime = {
"audio": 7,
"image": 8,
"text": 9,
"application": 10,
"x-epoc": 11,
"application": 10
}
pdf = (
"application/pdf",
"application/epub+zip",
"application/x-cbr",
"application/x-cbz",
"application/vnd.ms-xpsdocument",
)
font = (
"application/vnd.ms-opentype",
"application/x-ms-compress-szdd"
"application/x-font-sfn",
"application/x-font-ttf",
"font/otf",
@@ -36,68 +32,6 @@ font = (
"font/woff2"
)
# Archive "formats"
archive = (
"application/x-tar",
"application/zip",
"application/x-rar",
"application/x-arc",
"application/x-warc",
"application/x-7z-compressed",
)
# Archive "filters"
arc_filter = (
"application/gzip",
"application/x-bzip2",
"application/x-xz",
"application/x-zstd",
"application/x-lzma",
"application/x-lz4",
"application/x-lzip",
"application/x-lzop",
)
doc = (
"application/vnd.openxmlformats-officedocument.wordprocessingml.document",
"application/vnd.openxmlformats-officedocument.spreadsheetml.sheet",
"application/vnd.openxmlformats-officedocument.presentationml.presentation"
)
mobi = (
"application/x-mobipocket-ebook",
"application/vnd.amazon.mobi8-ebook"
)
markup = (
"text/xml",
"text/html",
"text/x-sgml"
)
raw = (
"image/x-olympus-orf",
"image/x-nikon-nef",
"image/x-fuji-raf",
"image/x-panasonic-raw",
"image/x-adobe-dng",
"image/x-canon-cr2",
"image/x-canon-crw",
"image/x-dcraw",
"image/x-kodak-dcr",
"image/x-kodak-k25",
"image/x-kodak-kdc",
"image/x-minolta-mrw",
"image/x-pentax-pef",
"image/x-sigma-x3f",
"image/x-sony-arw",
"image/x-sony-sr2",
"image/x-sony-srf",
"image/x-minolta-mrw",
"image/x-pentax-pef",
"image/x-epson-erf",
)
cnt = 1
@@ -112,24 +46,8 @@ def mime_id(mime):
mime_id += " | 0x40000000"
elif mime in font:
mime_id += " | 0x20000000"
elif mime in archive:
mime_id += " | 0x10000000"
elif mime in arc_filter:
mime_id += " | 0x08000000"
elif mime in doc:
mime_id += " | 0x04000000"
elif mime in mobi:
mime_id += " | 0x02000000"
elif mime in markup:
mime_id += " | 0x01000000"
elif mime in raw:
mime_id += " | 0x00800000"
elif mime == "application/x-empty":
cnt -= 1
return "1"
elif mime == "sist2/sidecar":
cnt -= 1
return "2"
return mime_id
@@ -137,40 +55,24 @@ def clean(t):
return t.replace("/", "_").replace(".", "_").replace("+", "_").replace("-", "_")
def crc(s):
return zlib.crc32(s.encode()) & 0xffffffff
with open("scripts/mime.csv") as f:
with open("mime.csv") as f:
for l in f:
mime, ext_list = l.split(",")
if l.startswith("!"):
mime = mime[1:]
noparse.add(mime)
ext = [x.strip() for x in ext_list.split("|") if x.strip() != ""]
ext = [x.strip() for x in ext_list.split("|")]
mimes[mime] = ext
seen_crc = set()
for ext in mimes.values():
for e in ext:
if crc(e) in seen_crc:
raise Exception("CRC32 collision")
seen_crc.add(crc(e))
seen_crc = set()
for mime in mimes.keys():
if crc(mime) in seen_crc:
raise Exception("CRC32 collision")
seen_crc.add(crc(mime))
print("// **Generated by mime.py**")
print("#ifndef MIME_GENERATED_C")
print("#define MIME_GENERATED_C")
print("#include <glib-2.0/glib.h>\n")
print("#include <stdlib.h>\n")
# Enum
print("enum mime {")
for mime, ext in sorted(mimes.items()):
print(f"{clean(mime)}={mime_id(mime)},")
for mime, ext in mimes.items():
print(" " + clean(mime) + "=" + mime_id(mime) + ",")
print("};")
# Enum -> string
@@ -181,20 +83,20 @@ with open("scripts/mime.csv") as f:
print("default: return NULL;}}")
# Ext -> Enum
print("unsigned int mime_extension_lookup(unsigned long extension_crc32) {"
"switch (extension_crc32) {")
print("GHashTable *mime_get_ext_table() {"
"GHashTable *ext_table = g_hash_table_new(g_str_hash, g_str_equal);")
for mime, ext in mimes.items():
if len(ext) > 0:
for e in ext:
print(f"case {crc(e)}:", end="")
print(f"return {clean(mime)};")
print("default: return 0;}}")
for e in [e for e in ext if e]:
print("g_hash_table_insert(ext_table, \"" + e + "\", (gpointer)" + clean(mime) + ");")
if e in ext_in_hash:
raise Exception("extension already in hash: " + e)
ext_in_hash.add(e)
print("return ext_table;}")
# string -> Enum
print("unsigned int mime_name_lookup(unsigned long mime_crc32) {"
"switch (mime_crc32) {")
for mime in mimes.keys():
print(f"case {crc(mime)}: return {clean(mime)};")
print("default: return 0;}}")
print("GHashTable *mime_get_mime_table() {"
"GHashTable *mime_table = g_hash_table_new(g_str_hash, g_str_equal);")
for mime, ext in mimes.items():
print("g_hash_table_insert(mime_table, \"" + mime + "\", (gpointer)" + clean(mime) + ");")
print("return mime_table;}")
print("#endif")

View File

@@ -1,10 +1,9 @@
files = [
"sist2-vue/src/assets/favicon.ico",
"sist2-vue/dist/css/chunk-vendors.css",
"sist2-vue/dist/css/index.css",
"sist2-vue/dist/js/chunk-vendors.js",
"sist2-vue/dist/js/index.js",
"sist2-vue/dist/index.html",
"web/css/bundle.css",
"web/js/bundle.js",
"web/img/bg-bars.png",
"web/img/sprite-skin-flat.png",
"web/search.html",
]
@@ -13,10 +12,6 @@ def clean(filepath):
for file in files:
try:
with open(file, "rb") as f:
data = f.read()
except:
data = bytes([])
with open(file, "rb") as f:
data = f.read()
print("char %s[%d] = {%s};" % (clean(file), len(data), ",".join(str(int(b)) for b in data)))

View File

@@ -1,84 +0,0 @@
#include <sqlite3ext.h>
#include <string.h>
#include <stdlib.h>
SQLITE_EXTENSION_INIT1
static int sep_rfind(const char *str) {
for (int i = (int) strlen(str); i >= 0; i--) {
if (str[i] == '/') {
return i;
}
}
return -1;
}
void path_parent_func(sqlite3_context *ctx, int argc, sqlite3_value **argv) {
if (argc != 1 || sqlite3_value_type(argv[0]) != SQLITE_TEXT) {
sqlite3_result_error(ctx, "Invalid parameters", -1);
}
const char *value = (const char *) sqlite3_value_text(argv[0]);
int stop = sep_rfind(value);
if (stop == -1) {
sqlite3_result_null(ctx);
return;
}
char parent[4096 * 3];
strncpy(parent, value, stop);
sqlite3_result_text(ctx, parent, stop, SQLITE_TRANSIENT);
}
void random_func(sqlite3_context *ctx, int argc, sqlite3_value **argv) {
if (argc != 1 || sqlite3_value_type(argv[0]) != SQLITE_INTEGER) {
sqlite3_result_error(ctx, "Invalid parameters", -1);
}
char state_buf[32] = {0,};
struct random_data buf;
int result;
long seed = sqlite3_value_int64(argv[0]);
initstate_r((int) seed, state_buf, sizeof(state_buf), &buf);
random_r(&buf, &result);
sqlite3_result_int(ctx, result);
}
int sqlite3_extension_init(
sqlite3 *db,
char **pzErrMsg,
const sqlite3_api_routines *pApi
) {
SQLITE_EXTENSION_INIT2(pApi);
sqlite3_create_function(
db,
"path_parent",
1,
SQLITE_UTF8,
NULL,
path_parent_func,
NULL,
NULL
);
sqlite3_create_function(
db,
"random_seeded",
1,
SQLITE_UTF8,
NULL,
random_func,
NULL,
NULL
);
return SQLITE_OK;
}

View File

@@ -1 +0,0 @@
gcc -I/mnt/work/vcpkg/installed/x64-linux/include -g -fPIC -shared sqlite_extension.c -o sist2funcs.so

View File

@@ -1,3 +0,0 @@
docker run --rm -it --name "sist2-dev-es"\
-p 9200:9200 -e "discovery.type=single-node" \
-e "ES_JAVA_OPTS=-Xms8g -Xmx8g" elasticsearch:7.17.9

View File

@@ -1,3 +0,0 @@
docker run --rm -it --name "sist2-dev-es-6"\
-p 9202:9200 -e "discovery.type=single-node" \
-e "ES_JAVA_OPTS=-Xms8g -Xmx8g" elasticsearch:6.8.0

View File

@@ -1,3 +0,0 @@
docker run --rm -it --name "sist2-dev-es"\
-p 9200:9200 -p 9300:9300 -e "discovery.type=single-node" \
-e "ES_JAVA_OPTS=-Xms8g -Xmx8g" elasticsearch:8.7.0

View File

@@ -1,5 +0,0 @@
module.exports = {
presets: [
'@vue/cli-plugin-babel/preset'
]
}

File diff suppressed because it is too large Load Diff

View File

@@ -1,49 +0,0 @@
{
"name": "sist2-admin-vue",
"version": "0.1.0",
"private": true,
"scripts": {
"serve": "vue-cli-service serve",
"build": "vue-cli-service build",
"watch": "vue-cli-service build --watch"
},
"dependencies": {
"axios": "^0.27.2",
"bootstrap-vue": "^2.21.2",
"core-js": "^3.6.5",
"moment": "^2.29.3",
"socket.io-client": "^4.5.1",
"vue": "^2.6.14",
"vue-i18n": "^8.24.4",
"vue-router": "^3.5.4",
"vuex": "^3.4.0"
},
"devDependencies": {
"@vue/cli-plugin-babel": "~5.0.8",
"@vue/cli-plugin-router": "~5.0.8",
"@vue/cli-plugin-vuex": "~5.0.8",
"@vue/cli-service": "~5.0.8",
"babel-eslint": "^10.1.0",
"bootstrap": "^4.5.2",
"vue-template-compiler": "^2.6.11"
},
"eslintConfig": {
"root": true,
"env": {
"node": true
},
"extends": [
"plugin:vue/essential",
"eslint:recommended"
],
"parserOptions": {
"parser": "babel-eslint"
},
"rules": {}
},
"browserslist": [
"> 1%",
"last 2 versions",
"not dead"
]
}

Binary file not shown.

Before

Width:  |  Height:  |  Size: 15 KiB

View File

@@ -1,17 +0,0 @@
<!DOCTYPE html>
<html lang="">
<head>
<meta charset="utf-8">
<meta http-equiv="X-UA-Compatible" content="IE=edge">
<meta name="viewport" content="width=device-width,initial-scale=1.0">
<link rel="icon" href="<%= BASE_URL %>serve_favicon_ico.ico">
<title>sist2-admin</title>
</head>
<body>
<noscript>
<strong>We're sorry but <%= htmlWebpackPlugin.options.title %> doesn't work properly without JavaScript enabled. Please enable it to continue.</strong>
</noscript>
<div id="app"></div>
<!-- built files will be auto injected -->
</body>
</html>

View File

@@ -1,98 +0,0 @@
<template>
<div id="app">
<NavBar></NavBar>
<b-container class="pt-4">
<b-alert show dismissible variant="info">
This is a beta version of sist2-admin. Please submit bug reports, usability issues and feature requests
to the <a href="https://github.com/simon987/sist2/issues/new/choose" target="_blank">issue tracker on Github</a>. Thank you!
</b-alert>
<router-view/>
</b-container>
</div>
</template>
<script>
import NavBar from "@/components/NavBar";
import Sist2AdminApi from "@/Sist2AdminApi";
export default {
components: {NavBar},
data() {
return {
socket: null
}
},
mounted() {
Sist2AdminApi.getSist2AdminInfo()
.then(resp => this.$store.commit("setSist2AdminInfo", resp.data));
this.$store.dispatch("loadBrowserSettings");
this.connectNotifications();
// this.socket.onclose = this.connectNotifications;
},
methods: {
connectNotifications() {
this.socket = new WebSocket(`ws://${window.location.host}/notifications`);
this.socket.onopen = () => {
this.socket.send("Hello from client");
}
this.socket.onmessage = e => {
const notification = JSON.parse(e.data);
if (notification.message) {
notification.messageString = this.$t(notification.message).toString();
}
this.$store.dispatch("notify", notification)
}
}
}
}
</script>
<style>
html, body {
height: 100%;
}
#app {
/*font-family: Avenir, Helvetica, Arial, sans-serif;*/
-webkit-font-smoothing: antialiased;
-moz-osx-font-smoothing: grayscale;
/*text-align: center;*/
color: #2c3e50;
padding-bottom: 1em;
min-height: 100%;
}
.info-icon {
width: 1rem;
margin-right: 0.2rem;
cursor: pointer;
line-height: 1rem;
height: 1rem;
background-image: url();
filter: brightness(45%);
display: block;
}
.tabs {
margin-top: 10px;
}
.modal-title {
text-overflow: ellipsis;
overflow: hidden;
white-space: nowrap;
}
@media screen and (min-width: 1500px) {
.container {
max-width: 1440px;
}
}
label {
margin-top: 0.5rem;
margin-bottom: 0;
}
</style>

View File

@@ -1,117 +0,0 @@
import axios from "axios";
class Sist2AdminApi {
constructor() {
this.baseUrl = window.location.protocol + "//" + window.location.host;
}
getJobs() {
return axios.get(`${this.baseUrl}/api/job/`);
}
getFrontends() {
return axios.get(`${this.baseUrl}/api/frontend/`);
}
getTasks() {
return axios.get(`${this.baseUrl}/api/task/`);
}
killTask(taskId) {
return axios.post(`${this.baseUrl}/api/task/${taskId}/kill`)
}
getTaskHistory() {
return axios.get(`${this.baseUrl}/api/task/history`);
}
/**
* @param {string} name
*/
getJob(name) {
return axios.get(`${this.baseUrl}/api/job/${name}`);
}
/**
* @param {string} name
*/
getFrontend(name) {
return axios.get(`${this.baseUrl}/api/frontend/${name}`);
}
/**
* @param {string} name
*/
startFrontend(name) {
return axios.post(`${this.baseUrl}/api/frontend/${name}/start`);
}
/**
* @param {string} name
*/
stopFrontend(name) {
return axios.post(`${this.baseUrl}/api/frontend/${name}/stop`);
}
/**
* @param {string} name
* @param job
*/
updateJob(name, job) {
return axios.put(`${this.baseUrl}/api/job/${name}`, job);
}
/**
* @param {string} name
* @param frontend
*/
updateFrontend(name, frontend) {
return axios.put(`${this.baseUrl}/api/frontend/${name}`, frontend);
}
/**
* @param {string} name
*/
runJob(name) {
return axios.get(`${this.baseUrl}/api/job/${name}/run`);
}
/**
* @param {string} name
*/
deleteJob(name) {
return axios.delete(`${this.baseUrl}/api/job/${name}`);
}
/**
* @param {string} name
*/
deleteFrontend(name) {
return axios.delete(`${this.baseUrl}/api/frontend/${name}`);
}
/**
* @param {string} name
*/
createJob(name) {
return axios.post(`${this.baseUrl}/api/job/${name}`);
}
/**
* @param {string} name
*/
createFrontend(name) {
return axios.post(`${this.baseUrl}/api/frontend/${name}`);
}
pingEs(url, insecure) {
return axios.get(`${this.baseUrl}/api/ping_es`, {params: {url, insecure}});
}
getSist2AdminInfo() {
return axios.get(`${this.baseUrl}/api/`);
}
}
export default new Sist2AdminApi()

View File

@@ -1,31 +0,0 @@
<template>
<b-list-group-item action :to="`/frontend/${frontend.name}`">
<div class="d-flex w-100 justify-content-between">
<h5 class="mb-1" style="display: block">
{{ frontend.name }}
<b-badge variant="light">{{ formatBindAddress(frontend.web_options.bind) }}</b-badge>
</h5>
<div>
<b-badge v-if="frontend.running" variant="success">{{$t("online")}}</b-badge>
<b-badge v-else variant="secondary">{{$t("offline")}}</b-badge>
</div>
</div>
</b-list-group-item>
</template>
<script>
import {formatBindAddress} from "@/util";
export default {
name: "FrontendListItem",
props: ["frontend"],
data() {
return {
formatBindAddress
}
}
}
</script>

View File

@@ -1,64 +0,0 @@
<template>
<div>
<label>{{ $t("indexOptions.threads") }}</label>
<b-form-input v-model="options.threads" type="number" min="1" @change="update()"></b-form-input>
<label>{{ $t("webOptions.esUrl") }}</label>
<b-alert :variant="esTestOk ? 'success' : 'danger'" :show="showEsTestAlert" class="mt-1">
{{ esTestMessage }}
</b-alert>
<b-input-group>
<b-form-input v-model="options.es_url" @change="update()"></b-form-input>
<b-input-group-append>
<b-button variant="outline-primary" @click="testEs()">{{ $t("test") }}</b-button>
</b-input-group-append>
</b-input-group>
<label>{{ $t("indexOptions.esIndex") }}</label>
<b-form-input v-model="options.es_index" @change="update()"></b-form-input>
<br>
<b-form-checkbox v-model="options.es_insecure_ssl" :disabled="!options.es_url.startsWith('https')" @change="update()">
{{ $t("webOptions.esInsecure") }}
</b-form-checkbox>
<label>{{ $t("indexOptions.batchSize") }}</label>
<b-form-input v-model="options.batch_size" type="number" min="1" @change="update()"></b-form-input>
<label>{{ $t("indexOptions.script") }}</label>
<b-form-textarea v-model="options.script" rows="6" @change="update()"></b-form-textarea>
</div>
</template>
<script>
import sist2AdminApi from "@/Sist2AdminApi";
export default {
name: "IndexOptions",
props: ["options"],
data() {
return {
showEsTestAlert: false,
esTestOk: false,
esTestMessage: "",
}
},
methods: {
update() {
this.$emit("change", this.options);
},
testEs() {
sist2AdminApi.pingEs(this.options.es_url, this.options.es_insecure_ssl).then((resp) => {
this.showEsTestAlert = true;
this.esTestOk = resp.data.ok;
this.esTestMessage = resp.data.message;
});
}
},
}
</script>
<style scoped>
</style>

View File

@@ -1,42 +0,0 @@
<template>
<div>
<h5>{{ $t("selectJobs") }}</h5>
<b-progress v-if="loading" striped animated value="100"></b-progress>
<b-form-group v-else>
<b-form-checkbox-group
v-if="jobs.length > 0"
:checked="frontend.jobs"
@input="frontend.jobs = $event; $emit('input')"
>
<div v-for="job in jobs" :key="job.name">
<b-form-checkbox :disabled="job.status !== 'indexed'" :value="job.name">[{{ job.name }}]</b-form-checkbox>
<br/>
</div>
</b-form-checkbox-group>
<div v-else>
<span class="text-muted">{{ $t('jobOptions.noJobAvailable') }}</span>
&nbsp;<router-link to="/">{{$t("create")}}</router-link>
</div>
</b-form-group>
</div>
</template>
<script>
import Sist2AdminApi from "@/Sist2AdminApi";
export default {
name: "JobCheckboxGroup",
props: ["frontend"],
mounted() {
Sist2AdminApi.getJobs().then(resp => {
this.jobs = resp.data;
this.loading = false;
});
},
data() {
return {
loading: true,
}
}
}
</script>

View File

@@ -1,56 +0,0 @@
<template>
<b-list-group-item class="flex-column align-items-start" action :to="`job/${job.name}`">
<div class="d-flex w-100 justify-content-between">
<div>
<h5 class="mb-1">
{{ job.name }}
</h5>
</div>
<div>
<b-row>
<b-col>
<small v-if="job.last_index_date">
{{ $t("scanned") }} {{ formatLastIndexDate(job.last_index_date) }}</small>
<div v-else>&nbsp;</div>
</b-col>
</b-row>
<b-row v-if="job.schedule_enabled">
<b-col>
<small><code>{{job.cron_expression }}</code></small>
</b-col>
</b-row>
<b-row v-else>
<b-col>
&nbsp;
</b-col>
</b-row>
</div>
</div>
</b-list-group-item>
</template>
<script>
import moment from "moment";
export default {
name: "JobListItem",
props: ["job"],
methods: {
formatLastIndexDate(dateString) {
if (dateString === null) {
return "";
}
const date = Date.parse(dateString);
return moment(date).fromNow();
}
}
}
</script>
<style scoped>
</style>

View File

@@ -1,57 +0,0 @@
<template>
<div>
<b-form-checkbox :checked="desktopNotificationsEnabled" @change="updateNotifications($event)">
{{ $t("jobOptions.desktopNotifications") }}
</b-form-checkbox>
<b-form-checkbox v-model="job.schedule_enabled" @change="update()">
{{ $t("jobOptions.scheduleEnabled") }}
</b-form-checkbox>
<label>{{ $t("jobOptions.cron") }}</label>
<b-form-input class="text-monospace" :state="cronValid" v-model="job.cron_expression" :disabled="!job.schedule_enabled" @change="update()"></b-form-input>
</div>
</template>
<script>
export default {
name: "JobOptions",
props: ["job"],
data() {
return {
cronValid: undefined
}
},
computed: {
desktopNotificationsEnabled() {
return this.$store.state.jobDesktopNotificationMap[this.job.name];
}
},
mounted() {
this.cronValid = this.checkCron(this.job.cron_expression)
},
methods: {
checkCron(expression) {
return /((((\d+,)+\d+|(\d+([/-])\d+)|\d+|\*) ?){5,7})/.test(expression);
},
updateNotifications(value) {
this.$store.dispatch("setJobDesktopNotification", {
job: this.job.name,
enabled: value
});
},
update() {
if (this.job.schedule_enabled) {
this.cronValid = this.checkCron(this.job.cron_expression);
} else {
this.cronValid = undefined;
}
if (this.cronValid !== false) {
this.$emit("change", this.job);
}
},
},
}
</script>

View File

@@ -1,69 +0,0 @@
<template>
<b-navbar>
<b-navbar-brand to="/">
<Sist2Icon></Sist2Icon>
</b-navbar-brand>
<b-button class="ml-auto" to="/task" variant="link">{{ $t("tasks") }}</b-button>
</b-navbar>
</template>
<script>
import Sist2Icon from "@/components/icons/Sist2Icon";
export default {
name: "NavBar",
components: {Sist2Icon},
methods: {
tagline() {
return this.$store.state.sist2Info.tagline;
},
sist2Version() {
return this.$store.state.sist2Info.version;
},
isDebug() {
return this.$store.state.sist2Info.debug;
},
isLegacy() {
return this.$store.state.sist2Info.esVersionLegacy;
},
hideLegacy() {
return this.$store.state.optHideLegacy;
}
}
}
</script>
<style scoped>
.navbar {
box-shadow: 0 0.125rem 0.25rem rgb(0 0 0 / 8%) !important;
border-radius: 0;
}
.theme-black .navbar {
background: #546b7a30;
border-bottom: none;
}
.navbar-brand {
color: #222 !important;
font-size: 1.75rem;
padding: 0;
}
.navbar-brand:hover {
color: #000 !important;
}
.version {
color: #222 !important;
margin-left: -18px;
margin-top: -14px;
font-size: 11px;
font-family: monospace;
}
.btn-link {
color: #222;
}
</style>

View File

@@ -1,109 +0,0 @@
<template>
<div>
<label>{{ $t("scanOptions.path") }}</label>
<b-form-input v-model="options.path" @change="update()"></b-form-input>
<label>{{ $t("scanOptions.threads") }}</label>
<b-form-input type="number" min="1" v-model="options.threads" @change="update()"></b-form-input>
<label>{{ $t("scanOptions.thumbnailQuality") }}</label>
<b-form-input type="number" min="1" max="31" v-model="options.thumbnail_quality" @change="update()"></b-form-input>
<label>{{ $t("scanOptions.thumbnailCount") }}</label>
<b-form-input type="number" min="0" max="1000" v-model="options.thumbnail_count" @change="update()"></b-form-input>
<label>{{ $t("scanOptions.thumbnailSize") }}</label>
<b-form-input type="number" min="100" v-model="options.thumbnail_size" @change="update()"></b-form-input>
<label>{{ $t("scanOptions.contentSize") }}</label>
<b-form-input type="number" min="0" v-model="options.content_size" @change="update()"></b-form-input>
<label>{{ $t("scanOptions.rewriteUrl") }}</label>
<b-form-input v-model="options.rewrite_url" @change="update()"></b-form-input>
<label>{{ $t("scanOptions.depth") }}</label>
<b-form-input type="number" min="0" v-model="options.depth" @change="update()"></b-form-input>
<label>{{ $t("scanOptions.archive") }}</label>
<b-form-select :options="['skip', 'list', 'shallow', 'recurse']" v-model="options.archive"
@change="update()"></b-form-select>
<label>{{ $t("scanOptions.archivePassphrase") }}</label>
<b-form-input v-model="options.archive_passphrase" @change="update()"></b-form-input>
<label>{{ $t("scanOptions.ocrLang") }}</label>
<b-alert variant="danger" show v-if="selectedOcrLangs.length === 0 && !disableOcrLang">{{ $t("scanOptions.ocrLangAlert") }}</b-alert>
<b-checkbox-group :disabled="disableOcrLang" v-model="selectedOcrLangs" @input="onOcrLangChange">
<b-checkbox v-for="lang in ocrLangs" :key="lang" :value="lang">{{ lang }}</b-checkbox>
</b-checkbox-group>
<!-- <b-form-input readonly v-model="options.ocr_lang" @change="update()"></b-form-input>-->
<div style="height: 10px"></div>
<b-form-checkbox v-model="options.ocr_images" @change="update()">
{{ $t("scanOptions.ocrImages") }}
</b-form-checkbox>
<b-form-checkbox v-model="options.ocr_ebooks" @change="update()">
{{ $t("scanOptions.ocrEbooks") }}
</b-form-checkbox>
<label>{{ $t("scanOptions.exclude") }}</label>
<b-form-input v-model="options.exclude" @change="update()"
:placeholder="$t('scanOptions.excludePlaceholder')"></b-form-input>
<div style="height: 10px"></div>
<b-form-checkbox v-model="options.fast" @change="update()">
{{ $t("scanOptions.fast") }}
</b-form-checkbox>
<b-form-checkbox v-model="options.checksums" @change="update()">
{{ $t("scanOptions.checksums") }}
</b-form-checkbox>
<b-form-checkbox v-model="options.read_subtitles" @change="update()">
{{ $t("scanOptions.readSubtitles") }}
</b-form-checkbox>
<b-form-checkbox v-model="options.optimize_index" @change="update()">
{{ $t("scanOptions.optimizeIndex") }}
</b-form-checkbox>
<label>{{ $t("scanOptions.treemapThreshold") }}</label>
<b-form-input type="number" min="0" v-model="options.treemap_threshold" @change="update()"></b-form-input>
</div>
</template>
<script>
export default {
name: "ScanOptions",
props: ["options"],
data() {
return {
disableOcrLang: false,
selectedOcrLangs: []
}
},
computed: {
ocrLangs() {
return this.$store.state.sist2AdminInfo?.tesseract_langs || [];
}
},
methods: {
onOcrLangChange() {
this.options.ocr_lang = this.selectedOcrLangs.join("+");
},
update() {
this.disableOcrLang = this.options.ocr_images === false && this.options.ocr_ebooks === false;
this.$emit("change", this.options);
},
},
mounted() {
this.disableOcrLang = this.options.ocr_images === false && this.options.ocr_ebooks === false;
this.selectedOcrLangs = this.options.ocr_lang ? this.options.ocr_lang.split("+") : [];
}
}
</script>

View File

@@ -1,57 +0,0 @@
<template>
<b-list-group-item>
<b-row style="height: 50px">
<b-col><h5>{{ task.display_name }}</h5></b-col>
<b-col class="shrink">
<router-link class="btn btn-link" :to="`/log/${task.id}`">{{ $t("logs") }}</router-link>
</b-col>
<b-col class="shrink">
<b-btn variant="link" @click="killTask(task.id)">{{ $t("kill") }}</b-btn>
</b-col>
</b-row>
<b-row>
<b-col>
<b-progress :max="task.progress.count">
<b-progress-bar :value="task.progress.done" :label-html="label" :striped="!task.progress.waiting"/>
</b-progress>
</b-col>
</b-row>
</b-list-group-item>
</template>
<script>
import sist2AdminApi from "@/Sist2AdminApi";
export default {
name: "TaskListItem",
props: ["task"],
computed: {
label() {
const count = this.task.progress.count;
const done = this.task.progress.done;
return `<span>${done}/${count}</span>`
}
},
methods: {
killTask(taskId) {
sist2AdminApi.killTask(taskId).then(() => {
this.$bvToast.toast(this.$t("killConfirmation"), {
title: this.$t("killConfirmationTitle"),
variant: "success",
toaster: "b-toaster-bottom-right"
});
});
}
}
}
</script>
<style scoped>
.shrink {
flex-grow: inherit;
}
</style>

View File

@@ -1,91 +0,0 @@
<template>
<div>
<label>{{ $t("webOptions.esUrl") }}</label>
<b-alert :variant="esTestOk ? 'success' : 'danger'" :show="showEsTestAlert" class="mt-1">
{{ esTestMessage }}
</b-alert>
<b-input-group>
<b-form-input v-model="options.es_url" @change="update()"></b-form-input>
<b-input-group-append>
<b-button variant="outline-primary" @click="testEs()">{{ $t("test") }}</b-button>
</b-input-group-append>
</b-input-group>
<b-form-checkbox v-model="options.es_insecure_ssl" :disabled="!this.options.es_url.startsWith('https')" @change="update()">
{{ $t("webOptions.esInsecure") }}
</b-form-checkbox>
<label>{{ $t("webOptions.esIndex") }}</label>
<b-form-input v-model="options.es_index" @change="update()"></b-form-input>
<label>{{ $t("webOptions.lang") }}</label>
<b-form-select v-model="options.lang" :options="['en', 'fr', 'zh-CN']" @change="update()"></b-form-select>
<label>{{ $t("webOptions.bind") }}</label>
<b-form-input v-model="options.bind" @change="update()"></b-form-input>
<label>{{ $t("webOptions.tagline") }}</label>
<b-form-textarea v-model="options.tagline" @change="update()"></b-form-textarea>
<label>{{ $t("webOptions.auth") }}</label>
<b-form-input v-model="options.auth" @change="update()"></b-form-input>
<label>{{ $t("webOptions.tagAuth") }}</label>
<b-form-input v-model="options.tag_auth" @change="update()"></b-form-input>
<br>
<h5>Auth0 options</h5>
<label>{{ $t("webOptions.auth0Audience") }}</label>
<b-form-input v-model="options.auth0_audience" @change="update()"></b-form-input>
<label>{{ $t("webOptions.auth0Domain") }}</label>
<b-form-input v-model="options.auth0_domain" @change="update()"></b-form-input>
<label>{{ $t("webOptions.auth0ClientId") }}</label>
<b-form-input v-model="options.auth0_client_id" @change="update()"></b-form-input>
<label>{{ $t("webOptions.auth0PublicKey") }}</label>
<b-textarea rows="10" v-model="options.auth0_public_key" @change="update()"></b-textarea>
</div>
</template>
<script>
import sist2AdminApi from "@/Sist2AdminApi";
export default {
name: "WebOptions",
props: ["options", "frontendName"],
data() {
return {
showEsTestAlert: false,
esTestOk: false,
esTestMessage: "",
}
},
methods: {
update() {
if (!this.options.es_url.startsWith("https")) {
this.options.es_insecure_ssl = false;
}
this.$emit("change", this.options);
},
testEs() {
sist2AdminApi.pingEs(this.options.es_url, this.options.es_insecure_ssl).then((resp) => {
this.showEsTestAlert = true;
this.esTestOk = resp.data.ok;
this.esTestMessage = resp.data.message;
});
}
}
}
</script>
<style scoped>
</style>

View File

@@ -1,40 +0,0 @@
<template>
<svg
xmlns="http://www.w3.org/2000/svg"
width="27.868069mm"
height="7.6446671mm"
viewBox="0 0 27.868069 7.6446671"
>
<g transform="translate(-4.5018313,-4.1849793)">
<g
style="fill: currentColor;fill-opacity:1;stroke:none;stroke-width:0.26458332">
<path
d="m 6.3153296,11.829646 q -0.7717014,0 -1.8134983,-0.337619 v -0.916395 q 1.0128581,0.511252 1.803852,0.511252 0.5643067,0 0.901926,-0.236334 0.3376194,-0.236333 0.3376194,-0.63183 0,-0.3424428 -0.2845649,-0.5498376 Q 6.980922,9.4566645 6.3635609,9.3264399 L 5.9921796,9.2492698 Q 5.2301245,9.0949295 4.8732126,8.7428407 4.5211238,8.3859288 4.5211238,7.7733908 q 0,-0.7765245 0.5305447,-1.1961372 0.5305447,-0.4196126 1.5096409,-0.4196126 0.829579,0 1.6061036,0.3183268 V 7.3441319 Q 7.4101809,6.9004036 6.5854251,6.9004036 q -1.1671984,0 -1.1671984,0.7958171 0,0.2604492 0.1012858,0.4147895 0.1012858,0.1495171 0.3858507,0.2556261 0.2845649,0.1012858 0.8392253,0.2122179 l 0.3569119,0.067524 q 1.3408312,0.2652724 1.3408312,1.4614098 0,0.80064 -0.5691298,1.263661 -0.5691298,0.458197 -1.5578722,0.458197 z"
style="stroke-width:0.26458332"
/>
<path
d="m 11.943927,5.3087694 q -0.144694,0 -0.144694,-0.144694 V 4.3296733 q 0,-0.144694 0.144694,-0.144694 h 0.694531 q 0.144694,0 0.144694,0.144694 v 0.8344021 q 0,0.144694 -0.144694,0.144694 z M 13.5645,11.728361 q -0.795817,0 -1.234722,-0.511253 -0.434082,-0.516075 -0.434082,-1.4469398 V 6.9823969 H 10.714028 V 6.2878656 h 2.069124 v 3.4823026 q 0,0.5884228 0.221864,0.8971028 0.221865,0.308681 0.6463,0.308681 h 1.036974 v 0.752409 z"
style="stroke-width:0.26458332"
/>
<path
d="m 18.209178,11.829646 q -0.771701,0 -1.813498,-0.337619 v -0.916395 q 1.012858,0.511252 1.803852,0.511252 0.564306,0 0.901926,-0.236334 0.337619,-0.236333 0.337619,-0.63183 0,-0.3424428 -0.284565,-0.5498376 Q 18.87477,9.4566645 18.257409,9.3264399 l -0.371381,-0.07717 Q 17.123973,9.0949295 16.767061,8.7428407 16.414972,8.3859288 16.414972,7.7733908 q 0,-0.7765245 0.530545,-1.1961372 0.530545,-0.4196126 1.509641,-0.4196126 0.829579,0 1.606103,0.3183268 v 0.8681641 q -0.757232,-0.4437283 -1.581988,-0.4437283 -1.167198,0 -1.167198,0.7958171 0,0.2604492 0.101286,0.4147895 0.101286,0.1495171 0.385851,0.2556261 0.284565,0.1012858 0.839225,0.2122179 l 0.356912,0.067524 q 1.340831,0.2652724 1.340831,1.4614098 0,0.80064 -0.56913,1.263661 -0.56913,0.458197 -1.557872,0.458197 z"
style="stroke-width:0.26458332"
/>
<path
d="m 25.207545,11.709068 q -0.993565,0 -1.408355,-0.40032 -0.409966,-0.405143 -0.409966,-1.3794164 V 6.9775737 H 21.947107 V 6.2878656 h 1.442117 V 4.8746874 l 0.887457,-0.3858507 v 1.7990289 h 2.016069 v 0.6897081 h -2.016069 v 2.9517579 q 0,0.5932454 0.226687,0.8344024 0.226687,0.236333 0.790994,0.236333 h 0.998388 v 0.709001 z"
style="stroke-width:0.26458332"
/>
<path
d="m 27.995317,11.043476 q 0,-0.178456 0.120578,-0.299035 0.274919,-0.289388 0.651123,-0.684885 0.376205,-0.4003199 0.805464,-0.8681638 0.327973,-0.356912 0.491959,-0.5353679 0.16881,-0.1832791 0.255626,-0.2845649 0.09164,-0.1012858 0.178456,-0.2073948 0.255626,-0.3086805 0.405144,-0.5257215 0.15434,-0.2170411 0.250803,-0.4292589 0.168809,-0.3762045 0.168809,-0.7524089 0,-0.5980686 -0.352089,-0.935688 -0.356911,-0.3424425 -0.979096,-0.3424425 -0.863341,0 -1.938899,0.6414768 V 4.8361023 q 0.491959,-0.2363335 0.979096,-0.3569119 0.47749,-0.1205783 0.945334,-0.1205783 0.501606,0 0.940511,0.1350477 0.438905,0.1350478 0.766878,0.4244358 0.289388,0.2556261 0.463021,0.6270074 0.173633,0.3665582 0.173633,0.829579 0,0.4726671 -0.212218,0.9501574 -0.106109,0.2411567 -0.274919,0.4726671 -0.163986,0.2266873 -0.424435,0.540191 Q 31.270225,8.501684 31.077299,8.718725 30.884374,8.9357661 30.628748,9.2106847 30.445469,9.4084332 30.286305,9.5675966 30.131965,9.72676 29.958332,9.9003928 29.7847,10.069203 29.558012,10.300713 29.336148,10.5274 29.012998,10.869843 h 3.356901 v 0.819932 h -4.374582 z"
style="stroke-width:0.26458332"
/>
</g>
</g>
</svg>
</template>
<script>
export default {
name: "Sist2Icon"
}
</script>

View File

@@ -1,114 +0,0 @@
export default {
en: {
start: "Start",
stop: "Stop",
go: "Go",
online: "online",
offline: "offline",
delete: "Delete",
runNow: "Index now",
create: "Create",
test: "Test",
jobTitle: "job configuration",
tasks: "Tasks",
runningTasks: "Running tasks",
frontends: "Frontends",
jobDisabled: "There is no valid index for this job",
status: "Status",
taskHistory: "Task history",
taskName: "Task name",
taskStarted: "Started",
taskDuration: "Duration",
taskStatus: "Status",
logs: "Logs",
kill: "Kill",
killConfirmation: "SIGTERM signal sent to sist2 process",
killConfirmationTitle: "Confirmation",
follow: "Follow",
wholeFile: "Whole file",
logLevel: "Log level",
logMode: "Follow mode",
logFile: "Reading log file",
jobs: "Jobs",
newJobName: "New job name",
newJobHelp: "Create a new job to get started!",
newFrontendName: "New frontend name",
scanned: "last scan",
autoStart: "Start automatically",
runJobConfirmationTitle: "Task queued",
runJobConfirmation: "Check the Tasks page to monitor the status.",
extraQueryArgs: "Extra query arguments when launching from sist2-admin",
customUrl: "Custom URL when launching from sist2-admin",
selectJobs: "Select jobs",
webOptions: {
title: "Web options",
esUrl: "Elasticsearch URL",
esIndex: "Elasticsearch index name",
esInsecure: "Do not verify SSL connections to Elasticsearch.",
lang: "UI Language",
bind: "Listen address",
tagline: "Tagline in navbar",
auth: "Basic auth in user:password format",
tagAuth: "Basic auth in user:password format for tagging",
auth0Audience: "Auth0 audience",
auth0Domain: "Auth0 domain",
auth0ClientId: "Auth0 client ID",
auth0PublicKey: "Auth0 public key",
},
scanOptions: {
title: "Scanning options",
path: "Path",
threads: "Number of threads",
memThrottle: "Total memory threshold in MiB for scan throttling",
thumbnailQuality: "Thumbnail quality, on a scale of 2 to 32, 2 being the best",
thumbnailCount: "Number of thumbnails to generate. Set a value > 1 to create video previews, set to 0 to disable thumbnails.",
thumbnailSize: "Thumbnail size, in pixels",
contentSize: "Number of bytes to be extracted from text documents. Set to 0 to disable",
rewriteUrl: "Serve files from this url instead of from disk",
depth: "Scan up to this many subdirectories deep",
archive: "Archive file mode",
archivePassphrase: "Passphrase for encrypted archive files",
ocrLang: "Tesseract language",
ocrLangAlert: "You must select at least one language",
ocrEbooks: "Enable OCR'ing of ebook files",
ocrImages: "Enable OCR'ing of image files",
exclude: "Files that match this regex will not be scanned",
excludePlaceholder: "Exclude",
fast: "Only index file names & mime type",
checksums: "Calculate file checksums when scanning",
readSubtitles: "Read subtitles from media files",
memBuffer: "Maximum memory buffer size per thread in MiB for files inside archives",
treemapThreshold: "Relative size threshold for treemap",
optimizeIndex: "Defragment index file after scan to reduce its file size."
},
indexOptions: {
title: "Indexing options",
threads: "Number of threads",
esUrl: "Elasticsearch URL",
esIndex: "Elasticsearch index name",
esInsecure: "Do not verify SSL connections to Elasticsearch.",
batchSize: "Index batch size",
script: "User script"
},
jobOptions: {
title: "Job options",
cron: "Job schedule",
scheduleEnabled: "Enable scheduled re-scan",
noJobAvailable: "No jobs available.",
desktopNotifications: "Desktop notifications"
},
frontendOptions: {
title: "Frontend options",
noJobSelectedWarning: "You must select at least one job to start this frontend"
},
notifications: {
indexCompleted: "Task completed for [$JOB$]"
}
}
}

View File

@@ -1,31 +0,0 @@
import Vue from 'vue'
import { BootstrapVue, IconsPlugin } from 'bootstrap-vue'
import "bootstrap/dist/css/bootstrap.min.css"
import "bootstrap-vue/dist/bootstrap-vue.min.css"
Vue.use(BootstrapVue);
Vue.use(IconsPlugin);
import App from './App.vue';
import router from './router';
import store from './store';
import VueI18n from "vue-i18n";
import messages from "@/i18n/messages";
Vue.use(VueI18n);
const i18n = new VueI18n({
locale: "en",
messages: messages
});
Vue.config.productionTip = false
new Vue({
router,
store,
i18n,
render: h => h(App)
}).$mount('#app')

View File

@@ -1,45 +0,0 @@
import Vue from 'vue'
import VueRouter from 'vue-router'
import Home from '../views/Home.vue'
import Job from "@/views/Job";
import Tasks from "@/views/Tasks";
import Frontend from "@/views/Frontend";
import Tail from "@/views/Tail";
Vue.use(VueRouter);
const routes = [
{
path: "/",
name: "Home",
component: Home
},
{
path: "/job/:name",
name: "Job",
component: Job
},
{
path: "/task/",
name: "Tasks",
component: Tasks
},
{
path: "/frontend/:name",
name: "Frontend",
component: Frontend
},
{
path: "/log/:taskId",
name: "Tail",
component: Tail
},
]
const router = new VueRouter({
mode: "hash",
base: process.env.BASE_URL,
routes
})
export default router

View File

@@ -1,63 +0,0 @@
import Vue from "vue";
import Vuex from "vuex";
Vue.use(Vuex);
function saveBrowserSettings(state) {
const settings = {
jobDesktopNotificationMap: state.jobDesktopNotificationMap
};
localStorage.setItem("sist2-admin-settings", JSON.stringify(settings));
console.log("SAVED");
console.log(settings);
}
export default new Vuex.Store({
state: {
sist2AdminInfo: null,
jobDesktopNotificationMap: {}
},
mutations: {
setSist2AdminInfo: (state, payload) => state.sist2AdminInfo = payload,
setJobDesktopNotificationMap: (state, payload) => state.jobDesktopNotificationMap = payload,
},
actions: {
notify: async ({state}, notification) => {
if (!state.jobDesktopNotificationMap[notification.job]) {
console.log("pass");
return;
}
new Notification(notification.messageString.replace("$JOB$", notification.job));
},
setJobDesktopNotification: async ({state}, {job, enabled}) => {
if (enabled === true) {
const permission = await Notification.requestPermission()
if (permission !== "granted") {
return false;
}
}
state.jobDesktopNotificationMap[job] = enabled;
saveBrowserSettings(state);
return true;
},
loadBrowserSettings({commit}) {
const settingString = localStorage.getItem("sist2-admin-settings");
if (!settingString) {
return;
}
const settings = JSON.parse(settingString);
commit("setJobDesktopNotificationMap", settings["jobDesktopNotificationMap"]);
}
},
modules: {}
})

View File

@@ -1,8 +0,0 @@
export function formatBindAddress(address) {
if (address.startsWith("0.0.0.0")) {
return address.slice("0.0.0.0".length)
}
return address
}

View File

@@ -1,129 +0,0 @@
<template>
<b-card>
<b-card-title>
{{ name }}
<small style="vertical-align: top">
<b-badge v-if="!loading && frontend.running" variant="success">{{ $t("online") }}</b-badge>
<b-badge v-else-if="!loading" variant="secondary">{{ $t("offline") }}</b-badge>
</small>
</b-card-title>
<div class="mb-3" v-if="!loading">
<b-button class="mr-1" :disabled="frontend.running || !valid" variant="success" @click="start()">{{
$t("start")
}}
</b-button>
<b-button class="mr-1" :disabled="!frontend.running" variant="danger" @click="stop()">{{
$t("stop")
}}
</b-button>
<b-button class="mr-1" :disabled="!frontend.running" variant="primary" :href="frontendUrl" target="_blank">
{{ $t("go") }}
</b-button>
<b-button variant="danger" @click="deleteFrontend()">{{ $t("delete") }}</b-button>
</div>
<b-progress v-if="loading" striped animated value="100"></b-progress>
<b-card-body v-else>
<h4>{{ $t("frontendOptions.title") }}</h4>
<b-card>
<b-form-checkbox v-model="frontend.auto_start" @change="update()">
{{ $t("autoStart") }}
</b-form-checkbox>
<label>{{ $t("extraQueryArgs") }}</label>
<b-form-input v-model="frontend.extra_query_args" @change="update()"></b-form-input>
<label>{{ $t("customUrl") }}</label>
<b-form-input v-model="frontend.custom_url" @change="update()" placeholder="http://"></b-form-input>
<br/>
<b-alert v-if="!valid" variant="warning" show>{{ $t("frontendOptions.noJobSelectedWarning") }}</b-alert>
<JobCheckboxGroup :frontend="frontend" @input="update()"></JobCheckboxGroup>
</b-card>
<br/>
<h4>{{ $t("webOptions.title") }}</h4>
<b-card>
<WebOptions :options="frontend.web_options" :frontend-name="$route.params.name" @change="update()"></WebOptions>
</b-card>
</b-card-body>
</b-card>
</template>
<script>
import Sist2AdminApi from "@/Sist2AdminApi";
import JobCheckboxGroup from "@/components/JobCheckboxGroup";
import WebOptions from "@/components/WebOptions";
export default {
name: 'Frontend',
components: {JobCheckboxGroup, WebOptions},
data() {
return {
loading: true,
frontend: null,
}
},
computed: {
valid() {
return !this.loading && this.frontend.jobs.length > 0;
},
frontendUrl() {
if (this.frontend.custom_url) {
return this.frontend.custom_url + this.args;
}
if (this.frontend.web_options.bind.startsWith("0.0.0.0")) {
return window.location.protocol + "//" + window.location.hostname + ":" + this.port + this.args;
}
return window.location.protocol + "//" + this.frontend.web_options.bind + this.args;
},
name() {
return this.$route.params.name;
},
port() {
return this.frontend.web_options.bind.split(":")[1]
},
args() {
const args = this.frontend.extra_query_args;
if (args !== "") {
return "#" + (args.startsWith("?") ? (args) : ("?" + args));
}
return "";
}
},
mounted() {
Sist2AdminApi.getFrontend(this.name).then(resp => {
this.frontend = resp.data;
this.loading = false;
});
},
methods: {
start() {
this.frontend.running = true;
Sist2AdminApi.startFrontend(this.name)
},
stop() {
this.frontend.running = false;
Sist2AdminApi.stopFrontend(this.name)
},
deleteFrontend() {
Sist2AdminApi.deleteFrontend(this.name).then(() => {
this.$router.push("/frontends");
});
},
update() {
Sist2AdminApi.updateFrontend(this.name, this.frontend);
},
}
}
</script>

View File

@@ -1,122 +0,0 @@
<template>
<div>
<b-card>
<b-card-title>{{ $t("jobs") }}</b-card-title>
<b-row>
<b-col>
<b-input id="new-job" v-model="newJobName" :placeholder="$t('newJobName')"></b-input>
<b-popover
:show.sync="showHelp"
target="new-job"
placement="top"
triggers="manual"
variant="primary"
:content="$t('newJobHelp')"
></b-popover>
</b-col>
<b-col>
<b-button variant="primary" @click="createJob()" :disabled="!jobNameValid(newJobName)">{{ $t("create") }}
</b-button>
</b-col>
</b-row>
<hr/>
<b-progress v-if="jobsLoading" striped animated value="100"></b-progress>
<b-list-group v-else>
<JobListItem v-for="job in jobs" :key="job.name" :job="job"></JobListItem>
</b-list-group>
</b-card>
<br/>
<b-card>
<b-card-title>{{ $t("frontends") }}</b-card-title>
<b-row>
<b-col>
<b-input v-model="newFrontendName" :placeholder="$t('newFrontendName')"></b-input>
</b-col>
<b-col>
<b-button variant="primary" @click="createFrontend()" :disabled="!frontendNameValid(newFrontendName)">
{{ $t("create") }}
</b-button>
</b-col>
</b-row>
<hr/>
<b-progress v-if="frontendsLoading" striped animated value="100"></b-progress>
<b-list-group v-else>
<FrontendListItem v-for="frontend in frontends"
:key="frontend.name" :frontend="frontend"></FrontendListItem>
</b-list-group>
</b-card>
</div>
</template>
<script>
import JobListItem from "@/components/JobListItem";
import {formatBindAddress} from "@/util";
import Sist2AdminApi from "@/Sist2AdminApi";
import FrontendListItem from "@/components/FrontendListItem";
export default {
name: "Jobs",
components: {JobListItem, FrontendListItem},
data() {
return {
jobsLoading: true,
newJobName: "",
jobs: [],
frontendsLoading: true,
frontends: [],
formatBindAddress,
newFrontendName: "",
showHelp: false
}
},
mounted() {
this.loading = true;
this.reload();
},
methods: {
jobNameValid(name) {
if (this.jobs.some(job => job.name === name)) {
return false;
}
return /^[a-zA-Z0-9-_,.; ]+$/.test(name);
},
frontendNameValid(name) {
if (this.frontends.some(job => job.name === name)) {
return false;
}
return /^[a-zA-Z0-9-_,.; ]+$/.test(name);
},
reload() {
Sist2AdminApi.getJobs().then(resp => {
this.jobs = resp.data;
this.jobsLoading = false;
this.showHelp = this.jobs.length === 0;
});
Sist2AdminApi.getFrontends().then(resp => {
this.frontends = resp.data;
this.frontendsLoading = false;
});
},
createJob() {
Sist2AdminApi.createJob(this.newJobName).then(this.reload);
},
createFrontend() {
Sist2AdminApi.createFrontend(this.newFrontendName).then(this.reload)
}
}
}
</script>

View File

@@ -1,92 +0,0 @@
<template>
<b-card>
<b-card-title>
[{{ getName() }}]
{{ $t("jobTitle") }}
</b-card-title>
<div class="mb-3">
<b-button class="mr-1" variant="primary" @click="runJob()">{{ $t("runNow") }}</b-button>
<b-button variant="danger" @click="deleteJob()">{{ $t("delete") }}</b-button>
</div>
<div v-if="job">
{{ $t("status") }}: <code>{{ job.status }}</code>
</div>
<b-progress v-if="loading" striped animated value="100"></b-progress>
<b-card-body v-else>
<h4>{{ $t("jobOptions.title") }}</h4>
<b-card>
<JobOptions :job="job" @change="update"></JobOptions>
</b-card>
<br/>
<h4>{{ $t("scanOptions.title") }}</h4>
<b-card>
<ScanOptions :options="job.scan_options" @change="update()"></ScanOptions>
</b-card>
<br/>
<h4>{{ $t("indexOptions.title") }}</h4>
<b-card>
<IndexOptions :options="job.index_options" @change="update()"></IndexOptions>
</b-card>
</b-card-body>
</b-card>
</template>
<script>
import ScanOptions from "@/components/ScanOptions";
import Sist2AdminApi from "@/Sist2AdminApi";
import IndexOptions from "@/components/IndexOptions";
import JobOptions from "@/components/JobOptions";
export default {
name: "Job",
components: {
IndexOptions,
ScanOptions,
JobOptions
},
data() {
return {
loading: true,
job: null
}
},
methods: {
getName() {
return this.$route.params.name;
},
update() {
Sist2AdminApi.updateJob(this.getName(), this.job);
},
runJob() {
Sist2AdminApi.runJob(this.getName()).then(() => {
this.$bvToast.toast(this.$t("runJobConfirmation"), {
title: this.$t("runJobConfirmationTitle"),
variant: "success",
toaster: "b-toaster-bottom-right"
});
});
},
deleteJob() {
Sist2AdminApi.deleteJob(this.getName()).then(() => {
this.$router.push("/");
})
}
},
mounted() {
Sist2AdminApi.getJob(this.getName()).then(resp => {
this.loading = false;
this.job = resp.data;
})
}
}
</script>

View File

@@ -1,168 +0,0 @@
<template>
<b-card>
<b-card-body>
<h4 class="mb-3">{{ taskId }} {{ $t("logs") }}</h4>
<div v-if="$store.state.sist2AdminInfo">
{{ $t("logFile") }}
<code>{{ $store.state.sist2AdminInfo.logs_folder }}/sist2-{{ taskId }}.log</code>
<br/>
<br/>
</div>
<b-row>
<b-col>
<span>{{ $t("logLevel") }}</span>
<b-select :options="levels.slice(0, -1)" v-model="logLevel" @input="connect()"></b-select>
</b-col>
<b-col>
<span>{{ $t("logMode") }}</span>
<b-select :options="modeOptions" v-model="mode" @input="connect()"></b-select>
</b-col>
</b-row>
<div id="log-tail-output" class="mt-3 ml-1"></div>
</b-card-body>
</b-card>
</template>
<script>
export default {
name: "Tail",
data() {
return {
logLevel: "DEBUG",
levels: ["DEBUG", "INFO", "WARNING", "ERROR", "ADMIN", "FATAL"],
socket: null,
mode: "follow",
modeOptions: [
{
"text": this.$t('follow'),
"value": "follow"
},
{
"text": this.$t('wholeFile'),
"value": "wholeFile"
}
]
}
},
computed: {
taskId: function () {
return this.$route.params.taskId;
}
},
methods: {
connect() {
let lineCount = 0;
const outputElem = document.getElementById("log-tail-output")
outputElem.replaceChildren();
if (this.socket !== null) {
this.socket.close();
}
const n = this.mode === "follow" ? 32 : 9999999999;
this.socket = new WebSocket(`ws://${window.location.host}/log/${this.taskId}?n=${n}`);
this.socket.onopen = () => {
this.socket.send("Hello from client");
}
this.socket.onmessage = e => {
let message;
try {
message = JSON.parse(e.data);
} catch {
console.error(e.data)
return;
}
if ("ping" in message) {
return;
}
if (message.level === undefined) {
if ("stderr" in message) {
message.level = "ERROR";
message.message = message["stderr"];
} else {
message.level = "ADMIN";
message.message = message["sist2-admin"];
}
message.datetime = ""
message.filepath = ""
}
if (this.levels.indexOf(message.level) < this.levels.indexOf(this.logLevel)) {
return;
}
const logLine = `${message.datetime} [${message.level} ${message.filepath}] ${message.message}`;
const span = document.createElement("span");
span.setAttribute("class", message.level);
span.appendChild(document.createTextNode(logLine));
outputElem.appendChild(span);
lineCount += 1;
if (this.mode === "follow" && lineCount >= n) {
outputElem.firstChild.remove();
}
}
}
},
mounted() {
this.connect()
}
}
</script>
<style>
#log-tail-output span {
display: block;
}
span.DEBUG {
color: #9E9E9E;
}
span.WARNING {
color: #FFB300;
}
span.INFO {
color: #039BE5;
}
span.ERROR {
color: #F4511E;
}
span.FATAL {
color: #F4511E;
}
span.ADMIN {
color: #ee05ff;
}
#log-tail-output {
font-size: 13px;
font-family: monospace;
padding: 6px;
background-color: #f5f5f5;
border: 1px solid #ccc;
border-radius: 4px;
margin: 3px;
white-space: pre;
color: #000;
overflow: hidden;
}
</style>

View File

@@ -1,150 +0,0 @@
<template>
<div>
<b-card v-if="tasks.length > 0">
<h2>{{ $t("runningTasks") }}</h2>
<b-list-group>
<TaskListItem v-for="task in tasks" :key="task.id" :task="task"></TaskListItem>
</b-list-group>
</b-card>
<b-card class="mt-4">
<b-card-title>{{ $t("taskHistory") }}</b-card-title>
<br/>
<b-table
id="task-history"
:items="historyItems"
:fields="historyFields"
:current-page="historyCurrentPage"
:tbody-tr-class="rowClass"
:per-page="10"
>
<template #cell(logs)="data">
<router-link :to="`/log/${data.item.logs}`">{{ $t("logs") }}</router-link>
</template>
</b-table>
<b-pagination limit="20" v-model="historyCurrentPage" :total-rows="historyItems.length"
:per-page="10"></b-pagination>
</b-card>
</div>
</template>
<script>
import TaskListItem from "@/components/TaskListItem";
import Sist2AdminApi from "@/Sist2AdminApi";
import moment from "moment";
const DAY = 3600 * 24;
const HOUR = 3600;
const MINUTE = 60;
function humanDuration(sec_num) {
sec_num = sec_num / 1000;
const days = Math.floor(sec_num / DAY);
sec_num -= days * DAY;
const hours = Math.floor(sec_num / HOUR);
sec_num -= hours * HOUR;
const minutes = Math.floor(sec_num / MINUTE);
sec_num -= minutes * MINUTE;
const seconds = Math.floor(sec_num);
if (days > 0) {
return `${days} days ${hours}h ${minutes}m ${seconds}s`;
}
if (hours > 0) {
return `${hours}h ${minutes}m ${seconds}s`;
}
if (minutes > 0) {
return `${minutes}m ${seconds}s`;
}
if (seconds > 0) {
return `${seconds}s`;
}
return "<0s";
}
export default {
name: 'Tasks',
components: {TaskListItem},
data() {
return {
loading: true,
tasks: [],
taskHistory: [],
timerId: null,
historyFields: [
{key: "name", label: this.$t("taskName")},
{key: "time", label: this.$t("taskStarted")},
{key: "duration", label: this.$t("taskDuration")},
{key: "status", label: this.$t("taskStatus")},
{key: "logs", label: this.$t("logs")},
],
historyCurrentPage: 1,
historyItems: []
}
},
props: {
msg: String
},
mounted() {
this.loading = true;
this.update().then(() => this.loading = false);
this.timerId = window.setInterval(this.update, 1000);
this.updateHistory();
},
destroyed() {
if (this.timerId) {
window.clearInterval(this.timerId);
}
},
methods: {
rowClass(row) {
if (row.status === "failed") {
return "table-danger";
}
return null;
},
updateHistory() {
Sist2AdminApi.getTaskHistory().then(resp => {
this.historyItems = resp.data.map(row => ({
id: row.id,
name: row.name,
duration: this.taskDuration(row),
time: moment(row.started).format("dd, MMM Do YYYY, HH:mm:ss"),
logs: row.id,
status: row.return_code === 0 ? "ok" : "failed"
}));
});
},
update() {
return Sist2AdminApi.getTasks().then(resp => {
this.tasks = resp.data;
})
},
taskDuration(task) {
const start = moment.utc(task.started);
const end = moment.utc(task.ended);
return humanDuration(end.diff(start))
}
}
}
</script>
<style scoped>
#task-history {
font-family: monospace;
font-size: 12px;
}
</style>

View File

@@ -1,5 +0,0 @@
module.exports = {
publicPath: "",
filenameHashing: false,
productionSourceMap: false,
};

File diff suppressed because it is too large Load Diff

View File

@@ -1,5 +0,0 @@
fastapi
git+https://github.com/simon987/hexlib.git
uvicorn
websockets
pycron

View File

@@ -1,390 +0,0 @@
import asyncio
import os
import signal
from datetime import datetime
from urllib.parse import urlparse
import requests
import uvicorn
from fastapi import FastAPI, HTTPException
from hexlib.db import PersistentState
from requests import ConnectionError
from requests.exceptions import SSLError
from starlette.middleware.cors import CORSMiddleware
from starlette.responses import RedirectResponse
from starlette.staticfiles import StaticFiles
from starlette.websockets import WebSocket
from websockets.exceptions import ConnectionClosed
import cron
from config import LOG_FOLDER, logger, WEBSERVER_PORT, DATA_FOLDER, SIST2_BINARY
from jobs import Sist2Job, Sist2ScanTask, TaskQueue, Sist2IndexTask, JobStatus
from notifications import Subscribe, Notifications
from sist2 import Sist2
from state import migrate_v1_to_v2, RUNNING_FRONTENDS, TESSERACT_LANGS, DB_SCHEMA_VERSION
from web import Sist2Frontend
sist2 = Sist2(SIST2_BINARY, DATA_FOLDER)
db = PersistentState(dbfile=os.path.join(DATA_FOLDER, "state.db"))
notifications = Notifications()
task_queue = TaskQueue(sist2, db, notifications)
app = FastAPI()
app.add_middleware(
CORSMiddleware,
allow_credentials=True,
allow_origins=["*"],
allow_methods=["*"],
allow_headers=["*"],
)
app.mount("/ui/", StaticFiles(directory="./frontend/dist", html=True), name="static")
@app.get("/")
async def home():
return RedirectResponse("ui")
@app.get("/api")
async def api():
return {
"tesseract_langs": TESSERACT_LANGS,
"logs_folder": LOG_FOLDER
}
@app.get("/api/job/{name:str}")
async def get_job(name: str):
job = db["jobs"][name]
if not job:
raise HTTPException(status_code=404)
return job
@app.get("/api/frontend/{name:str}")
async def get_frontend(name: str):
frontend = db["frontends"][name]
frontend: Sist2Frontend
if frontend:
frontend.running = frontend.name in RUNNING_FRONTENDS
return frontend
raise HTTPException(status_code=404)
@app.get("/api/job/")
async def get_jobs():
return list(db["jobs"])
@app.put("/api/job/{name:str}")
async def update_job(name: str, new_job: Sist2Job):
# TODO: Check etag
new_job.last_modified = datetime.now()
job = db["jobs"][name]
if not job:
raise HTTPException(status_code=404)
args_that_trigger_full_scan = [
"path",
"thumbnail_count",
"thumbnail_quality",
"thumbnail_size",
"content_size",
"depth",
"archive",
"archive_passphrase",
"ocr_lang",
"ocr_images",
"ocr_ebooks",
"fast",
"checksums",
"read_subtitles",
]
for arg in args_that_trigger_full_scan:
if getattr(new_job.scan_options, arg) != getattr(job.scan_options, arg):
new_job.do_full_scan = True
db["jobs"][name] = new_job
@app.put("/api/frontend/{name:str}")
async def update_frontend(name: str, frontend: Sist2Frontend):
db["frontends"][name] = frontend
# TODO: Check etag
return "ok"
@app.get("/api/task/")
async def get_tasks():
return list(map(lambda t: t.json(), task_queue.tasks()))
@app.get("/api/task/history")
async def task_history():
return list(db["task_done"].sql("ORDER BY started DESC"))
@app.post("/api/task/{task_id:str}/kill")
async def kill_job(task_id: str):
return task_queue.kill_task(task_id)
def _run_job(job: Sist2Job):
job.last_modified = datetime.now()
if job.status == JobStatus("created"):
job.status = JobStatus("started")
db["jobs"][job.name] = job
scan_task = Sist2ScanTask(job, f"Scan [{job.name}]")
index_task = Sist2IndexTask(job, f"Index [{job.name}]", depends_on=scan_task)
task_queue.submit(scan_task)
task_queue.submit(index_task)
@app.get("/api/job/{name:str}/run")
async def run_job(name: str):
job = db["jobs"][name]
if not job:
raise HTTPException(status_code=404)
_run_job(job)
return "ok"
@app.delete("/api/job/{name:str}")
async def delete_job(name: str):
job = db["jobs"][name]
if job:
del db["jobs"][name]
else:
raise HTTPException(status_code=404)
@app.delete("/api/frontend/{name:str}")
async def delete_frontend(name: str):
if name in RUNNING_FRONTENDS:
os.kill(RUNNING_FRONTENDS[name], signal.SIGTERM)
del RUNNING_FRONTENDS[name]
frontend = db["frontends"][name]
if frontend:
del db["frontends"][name]
else:
raise HTTPException(status_code=404)
@app.post("/api/job/{name:str}")
async def create_job(name: str):
if db["jobs"][name]:
raise ValueError("Job with the same name already exists")
job = Sist2Job.create_default(name)
db["jobs"][name] = job
return job
@app.post("/api/frontend/{name:str}")
async def create_frontend(name: str):
if db["frontends"][name]:
raise ValueError("Frontend with the same name already exists")
frontend = Sist2Frontend.create_default(name)
db["frontends"][name] = frontend
return frontend
@app.get("/api/ping_es")
async def ping_es(url: str, insecure: bool):
return check_es_version(url, insecure)
def check_es_version(es_url: str, insecure: bool):
try:
url = urlparse(es_url)
if url.username:
auth = (url.username, url.password)
es_url = f"{url.scheme}://{url.hostname}:{url.port}"
else:
auth = None
r = requests.get(es_url, verify=insecure, auth=auth)
except SSLError:
return {
"ok": False,
"message": "Invalid SSL certificate"
}
except ConnectionError as e:
return {
"ok": False,
"message": "Connection refused"
}
except ValueError as e:
return {
"ok": False,
"message": str(e)
}
if r.status_code == 401:
return {
"ok": False,
"message": "Authentication failure"
}
try:
return {
"ok": True,
"message": "Elasticsearch version " + r.json()["version"]["number"]
}
except:
return {
"ok": False,
"message": "Could not read version"
}
def start_frontend_(frontend: Sist2Frontend):
frontend.web_options.indices = list(map(lambda j: db["jobs"][j].index_path, frontend.jobs))
pid = sist2.web(frontend.web_options, frontend.name)
RUNNING_FRONTENDS[frontend.name] = pid
@app.post("/api/frontend/{name:str}/start")
async def start_frontend(name: str):
frontend = db["frontends"][name]
if not frontend:
raise HTTPException(status_code=404)
start_frontend_(frontend)
@app.post("/api/frontend/{name:str}/stop")
async def stop_frontend(name: str):
if name in RUNNING_FRONTENDS:
os.kill(RUNNING_FRONTENDS[name], signal.SIGTERM)
del RUNNING_FRONTENDS[name]
@app.get("/api/frontend/")
async def get_frontends():
res = []
for frontend in db["frontends"]:
frontend: Sist2Frontend
frontend.running = frontend.name in RUNNING_FRONTENDS
res.append(frontend)
return res
def tail(filepath: str, n: int):
with open(filepath) as file:
reached_eof = False
buffer = []
line = ""
while True:
tmp = file.readline()
if tmp:
line += tmp
if line.endswith("\n"):
if reached_eof:
yield line
else:
if len(buffer) > n:
buffer.pop(0)
buffer.append(line)
line = ""
else:
if not reached_eof:
reached_eof = True
yield from buffer
yield None
@app.websocket("/notifications")
async def ws_tail_log(websocket: WebSocket):
await websocket.accept()
try:
await websocket.receive_text()
async with Subscribe(notifications) as ob:
async for notification in ob.notifications():
await websocket.send_json(notification)
print(notification)
except ConnectionClosed:
return
@app.websocket("/log/{task_id}")
async def ws_tail_log(websocket: WebSocket, task_id: str, n: int):
log_file = os.path.join(LOG_FOLDER, f"sist2-{task_id}.log")
await websocket.accept()
try:
await websocket.receive_text()
except ConnectionClosed:
return
while True:
for line in tail(log_file, n):
try:
if line:
await websocket.send_text(line)
else:
await websocket.send_json({"ping": ""})
await asyncio.sleep(0.1)
except ConnectionClosed:
return
def main():
uvicorn.run(app, port=WEBSERVER_PORT, host="0.0.0.0")
def initialize_db():
db["sist2_admin"]["info"] = {"version": DB_SCHEMA_VERSION}
frontend = Sist2Frontend.create_default("default")
db["frontends"]["default"] = frontend
logger.info("Initialized database.")
def start_frontends():
for frontend in db["frontends"]:
frontend: Sist2Frontend
if frontend.auto_start and len(frontend.jobs) > 0:
start_frontend_(frontend)
if __name__ == '__main__':
if not db["sist2_admin"]["info"]:
initialize_db()
if db["sist2_admin"]["info"]["version"] == "1":
logger.info("Migrating to v2 database schema")
migrate_v1_to_v2(db)
if db["sist2_admin"]["info"]["version"] == "2":
logger.error("Cannot migrate database from v2 to v3. Delete state.db to proceed.")
exit(-1)
start_frontends()
cron.initialize(db, _run_job)
logger.info("Started sist2-admin. Hello!")
main()

View File

@@ -1,30 +0,0 @@
import os
import logging
import sys
from logging import StreamHandler
from logging.handlers import RotatingFileHandler
MAX_LOG_SIZE = 1 * 1024 * 1024
SIST2_BINARY = os.environ.get("SIST2_BINARY", "/root/sist2")
DATA_FOLDER = os.environ.get("DATA_FOLDER", "/sist2-admin/")
LOG_FOLDER = os.path.join(DATA_FOLDER, "logs")
WEBSERVER_PORT = 8080
os.makedirs(LOG_FOLDER, exist_ok=True)
os.makedirs(DATA_FOLDER, exist_ok=True)
logger = logging.Logger("sist2-admin")
_log_file = os.path.join(LOG_FOLDER, "sist2-admin.log")
_log_fmt = "%(asctime)s [%(levelname)s] %(message)s"
_log_formatter = logging.Formatter(_log_fmt, datefmt='%Y-%m-%d %H:%M:%S')
console_handler = StreamHandler(sys.stdout)
console_handler.setFormatter(_log_formatter)
file_handler = RotatingFileHandler(_log_file, mode="a", maxBytes=MAX_LOG_SIZE, backupCount=1)
file_handler.setFormatter(_log_formatter)
logger.addHandler(console_handler)
logger.addHandler(file_handler)

View File

@@ -1,33 +0,0 @@
from threading import Thread
import pycron
import time
from hexlib.db import PersistentState
from config import logger
from jobs import Sist2Job
def _check_schedule(db: PersistentState, run_job):
for job in db["jobs"]:
job: Sist2Job
if job.schedule_enabled:
if pycron.is_now(job.cron_expression):
logger.info(f"Submit scan task to queue for [{job.name}]")
run_job(job)
def _cron_thread(db, run_job):
time.sleep(60 - (time.time() % 60))
start = time.time()
while True:
_check_schedule(db, run_job)
time.sleep(60 - ((time.time() - start) % 60))
def initialize(db, run_job):
t = Thread(target=_cron_thread, args=(db, run_job), daemon=True, name="timer")
t.start()

View File

@@ -1,317 +0,0 @@
import json
import logging
import os.path
import signal
import uuid
from datetime import datetime
from enum import Enum
from logging import FileHandler
from threading import Lock, Thread
from time import sleep
from uuid import uuid4, UUID
from hexlib.db import PersistentState
from pydantic import BaseModel
from config import logger, LOG_FOLDER
from notifications import Notifications
from sist2 import ScanOptions, IndexOptions, Sist2
from state import RUNNING_FRONTENDS
from web import Sist2Frontend
class JobStatus(Enum):
CREATED = "created"
STARTED = "started"
INDEXED = "indexed"
FAILED = "failed"
class Sist2Job(BaseModel):
name: str
scan_options: ScanOptions
index_options: IndexOptions
cron_expression: str
schedule_enabled: bool = False
previous_index: str = None
index_path: str = None
previous_index_path: str = None
last_index_date: datetime = None
status: JobStatus = JobStatus("created")
last_modified: datetime
etag: str = None
do_full_scan: bool = False
def __init__(self, **kwargs):
super().__init__(**kwargs)
@staticmethod
def create_default(name: str):
return Sist2Job(
name=name,
scan_options=ScanOptions(path="/"),
index_options=IndexOptions(),
last_modified=datetime.now(),
cron_expression="0 0 * * *"
)
# @validator("etag", always=True)
# def validate_etag(cls, value, values):
# s = values["name"] + values["scan_options"].json() + values["index_options"].json() + values["cron_expression"]
# return md5(s.encode()).hexdigest()
class Sist2TaskProgress:
def __init__(self, done: int = 0, count: int = 0, index_size: int = 0, tn_size: int = 0, waiting: bool = False):
self.done = done
self.count = count
self.index_size = index_size
self.store_size = tn_size
self.waiting = waiting
def percent(self):
return (self.done / self.count) if self.count else 0
class Sist2Task:
def __init__(self, job: Sist2Job, display_name: str, depends_on: uuid.UUID = None):
self.job = job
self.display_name = display_name
self.progress = Sist2TaskProgress()
self.id = uuid4()
self.pid = None
self.started = None
self.ended = None
self.depends_on = depends_on
self._logger = logging.Logger(name=f"{self.id}")
self._logger.addHandler(FileHandler(os.path.join(LOG_FOLDER, f"sist2-{self.id}.log")))
def json(self):
return {
"id": self.id,
"job": self.job,
"display_name": self.display_name,
"progress": self.progress,
"started": self.started,
"ended": self.ended,
"depends_on": self.depends_on,
}
def log_callback(self, log_json):
if "progress" in log_json:
self.progress = Sist2TaskProgress(**log_json["progress"])
elif self._logger:
self._logger.info(json.dumps(log_json))
def run(self, sist2: Sist2, db: PersistentState):
self.started = datetime.now()
logger.info(f"Started task {self.display_name}")
class Sist2ScanTask(Sist2Task):
def run(self, sist2: Sist2, db: PersistentState):
super().run(sist2, db)
self.job.scan_options.name = self.job.name
if self.job.index_path is not None and not self.job.do_full_scan:
self.job.scan_options.output = self.job.index_path
else:
self.job.scan_options.output = None
def set_pid(pid):
self.pid = pid
return_code = sist2.scan(self.job.scan_options, logs_cb=self.log_callback, set_pid_cb=set_pid)
self.ended = datetime.now()
if return_code != 0:
self._logger.error(json.dumps({"sist2-admin": f"Process returned non-zero exit code ({return_code})"}))
logger.info(f"Task {self.display_name} failed ({return_code})")
else:
self.job.index_path = self.job.scan_options.output
self.job.last_index_date = datetime.now()
self.job.do_full_scan = False
db["jobs"][self.job.name] = self.job
self._logger.info(json.dumps({"sist2-admin": f"Save last_index_date={self.job.last_index_date}"}))
logger.info(f"Completed {self.display_name} ({return_code=})")
# Remove old index
if return_code == 0:
if self.job.previous_index_path is not None and self.job.previous_index_path != self.job.index_path:
self._logger.info(json.dumps({"sist2-admin": f"Remove {self.job.previous_index_path=}"}))
try:
os.remove(self.job.previous_index_path)
except FileNotFoundError:
pass
self.job.previous_index_path = self.job.index_path
db["jobs"][self.job.name] = self.job
return return_code
class Sist2IndexTask(Sist2Task):
def __init__(self, job: Sist2Job, display_name: str, depends_on: Sist2Task):
super().__init__(job, display_name, depends_on=depends_on.id)
def run(self, sist2: Sist2, db: PersistentState):
super().run(sist2, db)
self.job.index_options.path = self.job.scan_options.output
return_code = sist2.index(self.job.index_options, logs_cb=self.log_callback)
self.ended = datetime.now()
duration = self.ended - self.started
ok = return_code == 0
if ok:
self.restart_running_frontends(db, sist2)
# Update status
self.job.status = JobStatus("indexed") if ok else JobStatus("failed")
self.job.previous_index_path = self.job.index_path
db["jobs"][self.job.name] = self.job
self._logger.info(json.dumps({"sist2-admin": f"Sist2Scan task finished {return_code=}, {duration=}"}))
logger.info(f"Completed {self.display_name} ({return_code=})")
return return_code
def restart_running_frontends(self, db: PersistentState, sist2: Sist2):
for frontend_name, pid in RUNNING_FRONTENDS.items():
frontend = db["frontends"][frontend_name]
frontend: Sist2Frontend
try:
os.kill(pid, signal.SIGTERM)
except ProcessLookupError:
pass
try:
os.wait()
except ChildProcessError:
pass
frontend.web_options.indices = map(lambda j: db["jobs"][j].index_path, frontend.jobs)
pid = sist2.web(frontend.web_options, frontend.name)
RUNNING_FRONTENDS[frontend_name] = pid
self._logger.info(json.dumps({"sist2-admin": f"Restart frontend {pid=} {frontend_name=}"}))
class TaskQueue:
def __init__(self, sist2: Sist2, db: PersistentState, notifications: Notifications):
self._lock = Lock()
self._sist2 = sist2
self._db = db
self._notifications = notifications
self._tasks = {}
self._queue = []
self._sem = 0
self._thread = Thread(target=self._check_new_task, daemon=True)
self._thread.start()
def _tasks_failed(self):
done = set()
for row in self._db["task_done"].sql("WHERE return_code != 0"):
done.add(uuid.UUID(row["id"]))
return done
def _tasks_done(self):
done = set()
for row in self._db["task_done"]:
done.add(uuid.UUID(row["id"]))
return done
def _check_new_task(self):
while True:
with self._lock:
for task in list(self._queue):
task: Sist2Task
if self._sem >= 1:
break
if not task.depends_on or task.depends_on in self._tasks_done():
self._queue.remove(task)
if task.depends_on in self._tasks_failed():
# The task which we depend on failed, continue
continue
self._sem += 1
t = Thread(target=self._run_task, args=(task,))
self._tasks[task.id] = {
"task": task,
"thread": t,
}
t.start()
break
sleep(1)
def tasks(self):
return list(map(lambda t: t["task"], self._tasks.values()))
def kill_task(self, task_id):
task = self._tasks.get(UUID(task_id))
if task:
pid = task["task"].pid
logger.info(f"Killing task {task_id} (pid={pid})")
os.kill(pid, signal.SIGTERM)
return True
return False
def _run_task(self, task: Sist2Task):
task_result = task.run(self._sist2, self._db)
with self._lock:
del self._tasks[task.id]
self._sem -= 1
self._db["task_done"][task.id] = {
"ended": task.ended,
"started": task.started,
"name": task.display_name,
"return_code": task_result
}
if isinstance(task, Sist2IndexTask):
self._notifications.notify({
"message": "notifications.indexCompleted",
"job": task.job.name
})
def submit(self, task: Sist2Task):
logger.info(f"Submitted task to queue {task.display_name}")
with self._lock:
self._queue.append(task)

View File

@@ -1,40 +0,0 @@
import asyncio
from typing import List
class Notifications:
def __init__(self):
self._subscribers: List[Subscribe] = []
def subscribe(self, ob):
self._subscribers.append(ob)
def unsubscribe(self, ob):
self._subscribers.remove(ob)
def notify(self, notification: dict):
for ob in self._subscribers:
ob.notify(notification)
class Subscribe:
def __init__(self, notifications: Notifications):
self._queue = []
self._notifications = notifications
async def __aenter__(self):
self._notifications.subscribe(self)
return self
async def __aexit__(self, exc_type, exc_val, exc_tb):
self._notifications.unsubscribe(self)
def notify(self, notification: dict):
self._queue.append(notification)
async def notifications(self):
while True:
try:
yield self._queue.pop(0)
except IndexError:
await asyncio.sleep(0.1)

View File

@@ -1,323 +0,0 @@
import datetime
import json
import logging
import os.path
from datetime import datetime
from io import TextIOWrapper
from logging import FileHandler
from subprocess import Popen, PIPE
from tempfile import NamedTemporaryFile
from threading import Thread
from typing import List
from pydantic import BaseModel
from config import logger, LOG_FOLDER
class Sist2Version:
def __init__(self, version: str):
self._version = version
self.major, self.minor, self.patch = [int(x) for x in version.split(".")]
def __str__(self):
return f"{self.major}.{self.minor}.{self.patch}"
class WebOptions(BaseModel):
indices: List[str] = []
es_url: str = "http://elasticsearch:9200"
es_insecure_ssl: bool = False
es_index: str = "sist2"
bind: str = "0.0.0.0:4090"
auth: str = None
tag_auth: str = None
tagline: str = "Lightning-fast file system indexer and search tool"
dev: bool = False
lang: str = "en"
auth0_audience: str = None
auth0_domain: str = None
auth0_client_id: str = None
auth0_public_key: str = None
auth0_public_key_file: str = None
def __init__(self, **kwargs):
super().__init__(**kwargs)
def args(self):
args = ["web", f"--es-url={self.es_url}", f"--es-index={self.es_index}", f"--bind={self.bind}",
f"--tagline={self.tagline}", f"--lang={self.lang}"]
if self.auth0_audience:
args.append(f"--auth0-audience={self.auth0_audience}")
if self.auth0_domain:
args.append(f"--auth0-domain={self.auth0_domain}")
if self.auth0_client_id:
args.append(f"--auth0-client-id={self.auth0_client_id}")
if self.auth0_public_key_file:
args.append(f"--auth0-public-key-file={self.auth0_public_key_file}")
if self.es_insecure_ssl:
args.append(f"--es-insecure-ssl")
if self.auth:
args.append(f"--auth={self.auth}")
if self.tag_auth:
args.append(f"--tag-auth={self.tag_auth}")
if self.dev:
args.append(f"--dev")
args.extend(self.indices)
return args
class IndexOptions(BaseModel):
path: str = None
threads: int = 1
es_url: str = "http://elasticsearch:9200"
es_insecure_ssl: bool = False
es_index: str = "sist2"
incremental_index: bool = True
script: str = ""
script_file: str = None
batch_size: int = 70
def __init__(self, **kwargs):
super().__init__(**kwargs)
def args(self):
args = ["index", self.path, f"--threads={self.threads}", f"--es-url={self.es_url}",
f"--es-index={self.es_index}", f"--batch-size={self.batch_size}"]
if self.script_file:
args.append(f"--script-file={self.script_file}")
if self.es_insecure_ssl:
args.append(f"--es-insecure-ssl")
if self.incremental_index:
args.append(f"--incremental-index")
return args
ARCHIVE_SKIP = "skip"
ARCHIVE_LIST = "list"
ARCHIVE_SHALLOW = "shallow"
ARCHIVE_RECURSE = "recurse"
class ScanOptions(BaseModel):
path: str
threads: int = 1
thumbnail_quality: int = 2
thumbnail_size: int = 552
thumbnail_count: int = 1
content_size: int = 32768
depth: int = -1
archive: str = ARCHIVE_RECURSE
archive_passphrase: str = None
ocr_lang: str = None
ocr_images: bool = False
ocr_ebooks: bool = False
exclude: str = None
fast: bool = False
treemap_threshold: float = 0.0005
mem_buffer: int = 2000
read_subtitles: bool = False
fast_epub: bool = False
checksums: bool = False
incremental: bool = True
optimize_index: bool = False
output: str = None
name: str = None
rewrite_url: str = None
list_file: str = None
def __init__(self, **kwargs):
super().__init__(**kwargs)
def args(self):
args = ["scan", self.path, f"--threads={self.threads}", f"--thumbnail-quality={self.thumbnail_quality}",
f"--thumbnail-count={self.thumbnail_count}", f"--thumbnail-size={self.thumbnail_size}",
f"--content-size={self.content_size}", f"--output={self.output}", f"--depth={self.depth}",
f"--archive={self.archive}", f"--mem-buffer={self.mem_buffer}"]
if self.incremental:
args.append(f"--incremental")
if self.optimize_index:
args.append(f"--optimize-index")
if self.rewrite_url:
args.append(f"--rewrite-url={self.rewrite_url}")
if self.name:
args.append(f"--name={self.name}")
if self.archive_passphrase:
args.append(f"--archive-passphrase={self.archive_passphrase}")
if self.ocr_lang:
args.append(f"--ocr-lang={self.ocr_lang}")
if self.ocr_ebooks:
args.append(f"--ocr-ebooks")
if self.ocr_images:
args.append(f"--ocr-images")
if self.exclude:
args.append(f"--exclude={self.exclude}")
if self.fast:
args.append(f"--fast")
if self.treemap_threshold:
args.append(f"--treemap-threshold={self.treemap_threshold}")
if self.read_subtitles:
args.append(f"--read-subtitles")
if self.fast_epub:
args.append(f"--fast-epub")
if self.checksums:
args.append(f"--checksums")
if self.list_file:
args.append(f"--list_file={self.list_file}")
return args
class Sist2Index:
def __init__(self, path):
self.path = path
with open(os.path.join(path, "descriptor.json")) as f:
self._descriptor = json.load(f)
def to_json(self):
return {
"path": self.path,
"version": self.version(),
"timestamp": self.timestamp(),
"name": self.name()
}
def version(self) -> Sist2Version:
return Sist2Version(self._descriptor["version"])
def timestamp(self) -> datetime:
return datetime.fromtimestamp(self._descriptor["timestamp"])
def name(self) -> str:
return self._descriptor["name"]
class Sist2:
def __init__(self, bin_path: str, data_directory: str):
self._bin_path = bin_path
self._data_dir = data_directory
def index(self, options: IndexOptions, logs_cb):
if options.script:
with NamedTemporaryFile("w", prefix="sist2-admin", suffix=".painless", delete=False) as f:
f.write(options.script)
options.script_file = f.name
else:
options.script_file = None
args = [
self._bin_path,
*options.args(),
"--json-logs",
"--very-verbose"
]
proc = Popen(args, stdout=PIPE, stderr=PIPE)
t_stderr = Thread(target=self._consume_logs_stderr, args=(logs_cb, proc))
t_stderr.start()
self._consume_logs_stdout(logs_cb, proc)
t_stderr.join()
return proc.returncode
def scan(self, options: ScanOptions, logs_cb, set_pid_cb):
if options.output is None:
options.output = os.path.join(
self._data_dir,
f"scan-{options.name.replace('/', '_')}-{datetime.now()}.sist2"
)
args = [
self._bin_path,
*options.args(),
"--json-logs",
"--very-verbose"
]
logs_cb({"sist2-admin": f"Starting sist2 command with args {args}"})
proc = Popen(args, stdout=PIPE, stderr=PIPE)
set_pid_cb(proc.pid)
t_stderr = Thread(target=self._consume_logs_stderr, args=(logs_cb, proc))
t_stderr.start()
self._consume_logs_stdout(logs_cb, proc)
t_stderr.join()
return proc.returncode
@staticmethod
def _consume_logs_stderr(logs_cb, proc):
pipe_wrapper = TextIOWrapper(proc.stderr, encoding="utf8", errors="ignore")
try:
for line in pipe_wrapper:
if line.strip() == "":
continue
logs_cb({"stderr": line})
finally:
proc.wait()
pipe_wrapper.close()
@staticmethod
def _consume_logs_stdout(logs_cb, proc):
pipe_wrapper = TextIOWrapper(proc.stdout, encoding="utf8", errors="ignore")
for line in pipe_wrapper:
try:
if line.strip() == "":
continue
log_object = json.loads(line)
logs_cb(log_object)
except Exception as e:
try:
logs_cb({"sist2-admin": f"Could not decode log line: {line}; {e}"})
except NameError:
pass
def web(self, options: WebOptions, name: str):
if options.auth0_public_key:
with NamedTemporaryFile("w", prefix="sist2-admin", suffix=".txt", delete=False) as f:
f.write(options.auth0_public_key)
options.auth0_public_key_file = f.name
else:
options.auth0_public_key_file = None
args = [
self._bin_path,
*options.args()
]
web_logger = logging.Logger(name=f"sist2-frontend-{name}")
web_logger.addHandler(FileHandler(os.path.join(LOG_FOLDER, f"frontend-{name}.log")))
def logs_cb(message):
web_logger.info(json.dumps(message))
logger.info(f"Starting frontend {' '.join(args)}")
proc = Popen(args, stdout=PIPE, stderr=PIPE)
t_stderr = Thread(target=self._consume_logs_stderr, args=(logs_cb, proc))
t_stderr.start()
t_stdout = Thread(target=self._consume_logs_stdout, args=(logs_cb, proc))
t_stdout.start()
return proc.pid

View File

@@ -1,79 +0,0 @@
from typing import Dict
import shutil
from hexlib.db import Table, PersistentState
import pickle
from tesseract import get_tesseract_langs
RUNNING_FRONTENDS: Dict[str, int] = {}
TESSERACT_LANGS = get_tesseract_langs()
DB_SCHEMA_VERSION = "3"
from pydantic import BaseModel
def _serialize(item):
if isinstance(item, BaseModel):
return pickle.dumps(item)
if isinstance(item, bytes):
raise Exception("FIXME: bytes in PickleTable")
return item
def _deserialize(item):
if isinstance(item, bytes):
return pickle.loads(item)
return item
class PickleTable(Table):
def __getitem__(self, item):
row = super().__getitem__(item)
if row:
return dict((k, _deserialize(v)) for k, v in row.items())
return row
def __setitem__(self, key, value):
value = dict((k, _serialize(v)) for k, v in value.items())
super().__setitem__(key, value)
def __iter__(self):
for row in super().__iter__():
yield dict((k, _deserialize(v)) for k, v in row.items())
def sql(self, where_clause, *params):
for row in super().sql(where_clause, *params):
yield dict((k, _deserialize(v)) for k, v in row.items())
def migrate_v1_to_v2(db: PersistentState):
shutil.copy(db.dbfile, db.dbfile + "-before-migrate-v2.bak")
# Frontends
db._table_factory = PickleTable
frontends = [row["frontend"] for row in db["frontends"]]
del db["frontends"]
db._table_factory = Table
for frontend in frontends:
db["frontends"][frontend.name] = frontend
list(db["frontends"])
# Jobs
db._table_factory = PickleTable
jobs = [row["job"] for row in db["jobs"]]
del db["jobs"]
db._table_factory = Table
for job in jobs:
db["jobs"][job.name] = job
list(db["jobs"])
db["sist2_admin"]["info"] = {
"version": "2"
}

View File

@@ -1,14 +0,0 @@
import subprocess
def get_tesseract_langs():
res = subprocess.check_output([
"tesseract",
"--list-langs"
]).decode()
languages = res.split("\n")[1:]
return list(filter(lambda lang: lang and lang != "osd", languages))

View File

@@ -1,28 +0,0 @@
import os.path
from typing import List
from pydantic import BaseModel
from sist2 import WebOptions
class Sist2Frontend(BaseModel):
name: str
jobs: List[str]
web_options: WebOptions
running: bool = False
auto_start: bool = False
extra_query_args: str = ""
custom_url: str = None
def get_log_path(self, log_folder: str):
return os.path.join(log_folder, f"frontend-{self.name}.log")
@staticmethod
def create_default(name: str):
return Sist2Frontend(
name=name,
web_options=WebOptions(),
jobs=[]
)

23
sist2-vue/.gitignore vendored
View File

@@ -1,23 +0,0 @@
.DS_Store
node_modules
# local env files
.env.local
.env.*.local
# Log files
npm-debug.log*
yarn-debug.log*
yarn-error.log*
pnpm-debug.log*
# Editor directories and files
.idea
.vscode
*.suo
*.ntvs*
*.njsproj
*.sln
*.sw?
*.iml

View File

@@ -1,5 +0,0 @@
module.exports = {
"presets": [
"@vue/cli-plugin-babel/preset"
]
}

Binary file not shown.

Some files were not shown because too many files have changed in this diff Show More