Compare commits

..

1 Commits
dev ... v1.0.11

Author SHA1 Message Date
c762d0de08 Dark theme, pdf meta, de-serialize bugfix 2019-10-30 21:48:58 -04:00
277 changed files with 5290 additions and 111535 deletions

View File

@@ -1,35 +0,0 @@
.idea/
*.sist2
docs/
test_i/
test_i_inc/
Testing/
.drone.yml
**/cmake_install.cmake
**/CMakeCache.txt
**/CMakeFiles/
LICENSE
Makefile
**/*.md
**/*.cbp
VERSION
**/node_modules/
.git/
sist2-*-linux-debug
sist2-*-linux
sist2_debug
sist2
**/libscan-test-files
**/scan_ub_test
**/scan_a_test
**/scan_test
**/ext_ffmpeg
**/ext_libmobi
**/ext_libwpd
**/core
*.a
tmp_scan/
Dockerfile
Dockerfile.arm64
docker-compose.yml
state.db

View File

@@ -1,88 +0,0 @@
kind: pipeline
type: docker
name: amd64
platform:
os: linux
arch: amd64
steps:
- name: build
image: simon987/sist2-build
commands:
- ./scripts/build.sh
- name: scp files
image: appleboy/drone-scp
settings:
host:
from_secret: SSH_HOST
port:
from_secret: SSH_PORT
user:
from_secret: SSH_USER
key:
from_secret: SSH_KEY
target: /files/sist2/${DRONE_REPO_OWNER}_${DRONE_REPO_NAME}/${DRONE_BRANCH}_${DRONE_BUILD_NUMBER}_${DRONE_COMMIT}/
source:
- ./VERSION
- ./sist2-x64-linux
- ./sist2-x64-linux-debug
- name: docker
image: plugins/docker
settings:
username:
from_secret: DOCKER_USER
password:
from_secret: DOCKER_PASSWORD
repo: simon987/sist2
context: ./
dockerfile: ./Dockerfile
auto_tag: true
auto_tag_suffix: x64-linux
when:
event:
- tag
---
kind: pipeline
type: docker
name: arm64
platform:
arch: arm64
steps:
- name: build
image: simon987/sist2-build-arm64
commands:
- ./scripts/build_arm64.sh
- name: scp files
image: appleboy/drone-scp
settings:
host:
from_secret: SSH_HOST
port:
from_secret: SSH_PORT
user:
from_secret: SSH_USER
key:
from_secret: SSH_KEY
target: /files/sist2/${DRONE_REPO_OWNER}_${DRONE_REPO_NAME}/arm_${DRONE_BRANCH}_${DRONE_BUILD_NUMBER}_${DRONE_COMMIT}/
source:
- ./sist2-arm64-linux
- ./sist2-arm64-linux-debug
- name: docker
image: plugins/docker
settings:
username:
from_secret: DOCKER_USER
password:
from_secret: DOCKER_PASSWORD
repo: simon987/sist2
context: ./
dockerfile: ./Dockerfile.arm64
auto_tag: true
auto_tag_suffix: arm64-linux
when:
event:
- tag

4
.gitattributes vendored
View File

@@ -1,3 +1,3 @@
CMakeModules/* linguist-vendored
**/*_generated.c linguist-vendored
**/*_generated.h linguist-vendored
web/js/*.min.js linguist-vendored
web/css/*.min.css linguist-vendored

View File

@@ -1,40 +0,0 @@
---
name: "🐞 Bug Report"
about: Submit a bug report
title: ''
labels: bug
assignees: ''
---
**Device Information (please complete the following information):**
- OS: `[e.g., Ubuntu 20.04, WSL2]`
- Deployment: `[Linux, Linux ARM64 or Docker]`
- Browser *(if relevant)*: `[e.g., chrome, safari]`
- SIST2 Version: `[e.g., v2.9.0]`
- Elasticsearch Version *(if relevant)* : ``
**Command with arguments**
<!-- `ex: "scan ~/Documents -o ./i2 --threads 3 -q 1.0` -->
**Describe the bug**
<!-- A clear and concise description of what the bug is. -->
**Steps To Reproduce**
Please be specific!
1. Go to '...'
2. Click on '....'
3. etc.
**Expected behavior**
<!-- A clear and concise description of what you expected to happen. -->
**Actual Behavior**
<!-- A clear and concise description of what actually happens. -->
**Screenshots**
<!-- If applicable, add screenshots to help explain your problem. -->
**Additional context**
<!-- Add any other context about the problem here. If applicable, please include why you think the bug is occurring and/or troubleshooting you have already performed. -->
<!-- If the issue is related to the `scan` module, please attach the files necessary to reproduce the error or email them to me[at]simon987.net. -->

View File

@@ -1,5 +0,0 @@
blank_issues_enabled: false
contact_links:
- name: SIST2 Documentation
url: https://github.com/simon987/sist2/blob/master/docs/USAGE.md
about: Check out the SIST2 documentation for answers to common questions

View File

@@ -1,18 +0,0 @@
---
name: "🚀 Feature Request"
about: Suggest an idea for SIST2
title: ''
assignees: ''
---
**Which SIST2 component is your Feature Request related to?**
<!-- e.g., Scan, Index, or Web? -->
**Is your feature request related to a problem? Please describe.**
<!-- A clear and concise description of what the problem is. e.g., "I'm always frustrated when [...]" -->
**What would you like to see happen?**
<!-- A clear and concise description of what you want to happen. -->
**Additional context**
<!-- Add any other context or screenshots about the feature request here. -->

View File

@@ -1,18 +0,0 @@
---
name: Issue template
about: General
title: ''
labels: ''
assignees: ''
---
sist2 version:
Platform (Linux or Docker, x86-64 or arm64):
Elasticsearch version:
Command with arguments: `ex: "scan ~/Documents -o ./i2 --threads 3 -q 1.0`
If the issue is related to the `scan` module, please attach the files necessary to reproduce the error or email them to me[at]simon987.net.

22
.gitignore vendored
View File

@@ -1,5 +1,6 @@
.idea
thumbs
test
*.cbp
CMakeCache.txt
CMakeFiles
@@ -9,27 +10,8 @@ Makefile
*.out
LOG
sist2*
!sist2-vue/
!sist2-admin
!sist2_admin
!sist2.py
*.sist2/
index.sist2/
bundle*.css
bundle.js
*.a
vgcore.*
build/
third-party/argparse
*.idx/
VERSION
git_hash.h
Testing/
test_i
test_i_inc
node_modules/
.cmake/
i_inc/
state.db
*.pyc
!sist2-admin/frontend/dist
*.js.map

30
.gitmodules vendored
View File

@@ -1,12 +1,18 @@
[submodule "third-party/argparse"]
path = third-party/argparse
url = https://github.com/simon987/argparse
[submodule "third-party/libscan/third-party/utf8.h"]
path = third-party/libscan/third-party/utf8.h
url = https://github.com/sheredom/utf8.h
[submodule "third-party/libscan/third-party/antiword"]
path = third-party/libscan/third-party/antiword
url = https://github.com/simon987/antiword
[submodule "third-party/libscan/third-party/libmobi"]
path = third-party/libscan/third-party/libmobi
url = https://github.com/bfabiszewski/libmobi
[submodule "argparse"]
path = argparse
url = https://github.com/cofyc/argparse
[submodule "cJSON"]
path = cJSON
url = https://github.com/DaveGamble/cJSON
[submodule "lib/mupdf"]
path = lib/mupdf
url = git://git.ghostscript.com/mupdf.git
[submodule "lib/onion"]
path = lib/onion
url = https://github.com/davidmoreno/onion
[submodule "lib/ffmpeg"]
path = lib/ffmpeg
url = https://git.ffmpeg.org/ffmpeg.git
[submodule "lmdb"]
path = lmdb
url = https://github.com/LMDB/lmdb

View File

@@ -1,154 +1,230 @@
cmake_minimum_required(VERSION 3.7)
set(CMAKE_C_STANDARD 11)
option(WITH_SIST2 "Build main executable" ON)
option(WITH_SIST2_SCAN "Build scan executable" ON)
project(sist2 C)
list(APPEND CMAKE_MODULE_PATH "${CMAKE_CURRENT_LIST_DIR}/CMakeModules")
option(SIST_DEBUG "Build a debug executable" on)
option(SIST_FAST "Enable more optimisation flags" off)
option(SIST_FAKE_STORE "Disable IO operations of LMDB stores for debugging purposes" 0)
add_compile_definitions(
"SIST_PLATFORM=${SIST_PLATFORM}"
)
if (SIST_DEBUG)
add_compile_definitions(
"SIST_DEBUG=${SIST_DEBUG}"
)
endif()
add_subdirectory(third-party/libscan)
set(ARGPARSE_SHARED off)
add_subdirectory(third-party/argparse)
add_executable(sist2
src/main.c
src/sist.h
src/io/walk.h src/io/walk.c
src/io/store.h src/io/store.c
src/tpool.h src/tpool.c
src/parsing/parse.h src/parsing/parse.c
src/io/serialize.h src/io/serialize.c
src/parsing/mime.h src/parsing/mime.c src/parsing/mime_generated.c
src/index/web.c src/index/web.h
src/web/serve.c src/web/serve.h
src/index/elastic.c src/index/elastic.h
src/util.c src/util.h
src/ctx.h src/types.h
src/log.c src/log.h
src/cli.c src/cli.h
src/stats.c src/stats.h src/ctx.c
src/parsing/sidecar.c src/parsing/sidecar.h
# argparse
third-party/argparse/argparse.h third-party/argparse/argparse.c
)
target_link_directories(sist2 PRIVATE BEFORE ${_VCPKG_INSTALLED_DIR}/${VCPKG_TARGET_TRIPLET}/lib/)
set(CMAKE_FIND_LIBRARY_SUFFIXES .a .lib)
find_package(PkgConfig REQUIRED)
pkg_search_module(GLIB REQUIRED glib-2.0)
find_package(lmdb CONFIG REQUIRED)
find_package(cJSON CONFIG REQUIRED)
find_package(unofficial-mongoose CONFIG REQUIRED)
find_package(CURL CONFIG REQUIRED)
find_library(MAGIC_LIB
NAMES libmagic.so.1 magic
PATHS /usr/lib/x86_64-linux-gnu/ /usr/lib/aarch64-linux-gnu/
)
target_include_directories(
sist2 PUBLIC
${CMAKE_SOURCE_DIR}/third-party/onion/src/
${CMAKE_SOURCE_DIR}/third-party/utf8.h/
${CMAKE_SOURCE_DIR}/third-party/libscan/
${CMAKE_SOURCE_DIR}/
${GLIB_INCLUDE_DIRS}
)
target_compile_options(
sist2
PRIVATE
-fPIC
)
if (SIST_DEBUG)
target_compile_options(
if (WITH_SIST2)
add_executable(
sist2
PRIVATE
-g
-fstack-protector
-fno-omit-frame-pointer
-fsanitize=address
-fno-inline
# -O2
)
target_link_options(
sist2
PRIVATE
-fsanitize=address
-static-libasan
)
set_target_properties(
sist2
PROPERTIES
OUTPUT_NAME sist2_debug
)
elseif (SIST_FAST)
target_compile_options(
sist2
PRIVATE
src/main.c
src/sist.h
src/io/walk.h src/io/walk.c
src/parsing/media.h src/parsing/media.c
src/parsing/pdf.h src/parsing/pdf.c
src/io/store.h src/io/store.c
src/tpool.h src/tpool.c
src/parsing/parse.h src/parsing/parse.c
src/io/serialize.h src/io/serialize.c
src/parsing/mime.h src/parsing/mime.c src/parsing/mime_generated.c
src/parsing/text.h src/parsing/text.c
src/index/web.c src/index/web.h
src/web/serve.c src/web/serve.h
src/index/elastic.c src/index/elastic.h
src/util.c src/util.h
src/ctx.h src/types.h src/parsing/font.c src/parsing/font.h
-Ofast
-march=native
-fno-stack-protector
-fomit-frame-pointer
-freciprocal-math
)
else ()
target_compile_options(
sist2
PRIVATE
# argparse
argparse/argparse.h argparse/argparse.c
-Ofast
-fno-stack-protector
-fomit-frame-pointer
# cJSON
cJSON/cJSON.h cJSON/cJSON.c
# LMDB
lmdb/libraries/liblmdb/lmdb.h lmdb/libraries/liblmdb/mdb.c
lmdb/libraries/liblmdb/midl.h lmdb/libraries/liblmdb/midl.c
src/cli.c src/cli.h
)
endif ()
add_dependencies(
sist2
scan
argparse
)
if (WITH_SIST2_SCAN)
add_executable(
sist2_scan
src/main.c
src/sist.h
src/io/walk.h src/io/walk.c
src/parsing/media.h src/parsing/media.c
src/parsing/pdf.h src/parsing/pdf.c
src/io/store.h src/io/store.c
src/tpool.h src/tpool.c
src/parsing/parse.h src/parsing/parse.c
src/io/serialize.h src/io/serialize.c
src/parsing/mime.h src/parsing/mime.c src/parsing/mime_generated.c
src/parsing/text.h src/parsing/text.c
src/util.c src/util.h
src/ctx.h src/types.h src/parsing/font.c src/parsing/font.h
target_link_libraries(
sist2
# argparse
argparse/argparse.h argparse/argparse.c
z
lmdb
cjson
argparse
${GLIB_LDFLAGS}
unofficial::mongoose::mongoose
CURL::libcurl
# cJSON
cJSON/cJSON.h cJSON/cJSON.c
pthread
# LMDB
lmdb/libraries/liblmdb/lmdb.h lmdb/libraries/liblmdb/mdb.c
lmdb/libraries/liblmdb/midl.h lmdb/libraries/liblmdb/midl.c
src/cli.c src/cli.h
)
endif ()
c
find_package(PkgConfig REQUIRED)
set(ENV{PKG_CONFIG_PATH} "$ENV{PKG_CONFIG_PATH}:/usr/local/lib/pkgconfig/")
scan
find_package(LibMagic REQUIRED)
find_package(FFmpeg REQUIRED)
find_package(OpenSSL REQUIRED)
find_package(Freetype REQUIRED)
${MAGIC_LIB}
)
pkg_check_modules(GLIB REQUIRED glib-2.0)
pkg_check_modules(GOBJECT REQUIRED gobject-2.0)
pkg_check_modules(UUID REQUIRED uuid)
add_definitions(${LIBMAGIC_CFLAGS_OTHER})
add_definitions(${UUID_CFLAGS_OTHER})
add_definitions(${GLIB_CFLAGS_OTHER})
add_definitions(${GOBJECT_CFLAGS_OTHER})
add_definitions(${FREETYPE_CFLAGS_OTHER})
list(REMOVE_ITEM GLIB_LIBRARIES pcre)
list(REMOVE_ITEM GOBJECT_LIBRARIES pcre)
list(REMOVE_ITEM UUID_LIBRARIES pcre)
if (WITH_SIST2)
target_include_directories(
sist2 PUBLIC
${LIBMAGIC_INCLUDE_DIRS}
${GOBJECT_INCLUDE_DIRS}
${OPENSSL_INCLUDE_DIR}
${FFMPEG_INCLUDE_DIRS}
${GLIB_INCLUDE_DIRS}
${FREETYPE_INCLUDE_DIRS}
${UUID_INCLUDE_DIRS}
${PROJECT_SOURCE_DIR}/
${PROJECT_SOURCE_DIR}/lmdb/libraries/liblmdb/
${PROJECT_SOURCE_DIR}/lib/onion/src/
${PROJECT_SOURCE_DIR}/lib/mupdf/include/
)
target_link_directories(
sist2 PUBLIC
${UUID_LIBRARY_DIRS}
${FFMPEG_LIBRARY_DIRS}
)
target_compile_options(sist2
PRIVATE
-O3
# -march=native
-fno-stack-protector
-fomit-frame-pointer
)
TARGET_LINK_LIBRARIES(
sist2
${GLIB_LIBRARIES}
${GOBJECT_LIBRARIES}
${UUID_LIBRARIES}
# ffmpeg
${PROJECT_SOURCE_DIR}/lib/libavcodec.a
${PROJECT_SOURCE_DIR}/lib/libavformat.a
${PROJECT_SOURCE_DIR}/lib/libavutil.a
${PROJECT_SOURCE_DIR}/lib/libswscale.a
${PROJECT_SOURCE_DIR}/lib/libswresample.a
# ${FFMPEG_LIBRARIES}
# swscale
# mupdf
${PROJECT_SOURCE_DIR}/lib/libmupdf.a
${PROJECT_SOURCE_DIR}/lib/libmupdf-third.a
# onion
${PROJECT_SOURCE_DIR}/lib/libonion_static.a
pthread
curl
m
bz2
magic
)
endif ()
if (WITH_SIST2_SCAN)
set_target_properties(
sist2_scan
PROPERTIES COMPILE_DEFINITIONS SIST_SCAN_ONLY
)
set_target_properties(
sist2_scan
PROPERTIES
COMPILE_DEFINITIONS SIST_SCAN_ONLY
LINK_FLAGS -static
)
target_include_directories(
sist2_scan PUBLIC
${LIBMAGIC_INCLUDE_DIRS}
${GOBJECT_INCLUDE_DIRS}
${OPENSSL_INCLUDE_DIR}
${FFMPEG_INCLUDE_DIRS}
${GLIB_INCLUDE_DIRS}
${UUID_INCLUDE_DIRS}
${FREETYPE_INCLUDE_DIRS}
${PROJECT_SOURCE_DIR}/
${PROJECT_SOURCE_DIR}/lmdb/libraries/liblmdb/
${PROJECT_SOURCE_DIR}/lib/onion/src/
${PROJECT_SOURCE_DIR}/lib/mupdf/include/
)
target_link_directories(
sist2_scan PUBLIC
${UUID_LIBRARY_DIRS}
${FFMPEG_LIBRARY_DIRS}
)
target_compile_options(sist2_scan
PRIVATE
-O3
# -march=native
-fno-stack-protector
-fomit-frame-pointer
)
TARGET_LINK_LIBRARIES(
sist2_scan
${GLIB_LIBRARIES}
${GOBJECT_LIBRARIES}
${UUID_LIBRARIES}
# ffmpeg
${PROJECT_SOURCE_DIR}/lib/libavcodec.a
${PROJECT_SOURCE_DIR}/lib/libavformat.a
${PROJECT_SOURCE_DIR}/lib/libavutil.a
${PROJECT_SOURCE_DIR}/lib/libswscale.a
${PROJECT_SOURCE_DIR}/lib/libswresample.a
# mupdf
${PROJECT_SOURCE_DIR}/lib/libmupdf.a
${PROJECT_SOURCE_DIR}/lib/libmupdf-third.a
${PROJECT_SOURCE_DIR}/lib/libbz2.a
${PROJECT_SOURCE_DIR}/lib/libmagic.a
pthread
m
)
endif ()
add_custom_target(
before_sist2
COMMAND ${CMAKE_CURRENT_SOURCE_DIR}/scripts/before_build.sh
)
add_dependencies(sist2 before_sist2)
IF (WITH_SIST2)
add_dependencies(sist2 before_sist2)
else ()
add_dependencies(sist2_scan before_sist2)
endif ()

80
CMakeModules/FindFFmpeg.cmake vendored Normal file
View File

@@ -0,0 +1,80 @@
# - Try to find ffmpeg libraries (libavcodec, libavformat and libavutil)
# Once done this will define
#
# FFMPEG_FOUND - system has ffmpeg or libav
# FFMPEG_INCLUDE_DIR - the ffmpeg include directory
# FFMPEG_LIBRARIES - Link these to use ffmpeg
# FFMPEG_LIBAVCODEC
# FFMPEG_LIBAVFORMAT
# FFMPEG_LIBAVUTIL
#
# Copyright (c) 2008 Andreas Schneider <mail@cynapses.org>
# Modified for other libraries by Lasse Kärkkäinen <tronic>
# Modified for Hedgewars by Stepik777
#
# Redistribution and use is allowed according to the terms of the New
# BSD license.
#
if (FFMPEG_LIBRARIES AND FFMPEG_INCLUDE_DIR)
# in cache already
set(FFMPEG_FOUND TRUE)
else (FFMPEG_LIBRARIES AND FFMPEG_INCLUDE_DIR)
# use pkg-config to get the directories and then use these values
# in the FIND_PATH() and FIND_LIBRARY() calls
find_package(PkgConfig)
if (PKG_CONFIG_FOUND)
pkg_check_modules(_FFMPEG_AVCODEC libavcodec)
pkg_check_modules(_FFMPEG_AVFORMAT libavformat)
pkg_check_modules(_FFMPEG_AVUTIL libavutil)
endif (PKG_CONFIG_FOUND)
find_path(FFMPEG_AVCODEC_INCLUDE_DIR
NAMES libavcodec/avcodec.h
PATHS ${_FFMPEG_AVCODEC_INCLUDE_DIRS} /usr/include /usr/local/include /opt/local/include /sw/include
PATH_SUFFIXES ffmpeg libav
)
find_library(FFMPEG_LIBAVCODEC
NAMES avcodec
PATHS ${_FFMPEG_AVCODEC_LIBRARY_DIRS} /usr/lib /usr/local/lib /opt/local/lib /sw/lib
)
find_library(FFMPEG_LIBAVFORMAT
NAMES avformat
PATHS ${_FFMPEG_AVFORMAT_LIBRARY_DIRS} /usr/lib /usr/local/lib /opt/local/lib /sw/lib
)
find_library(FFMPEG_LIBAVUTIL
NAMES avutil
PATHS ${_FFMPEG_AVUTIL_LIBRARY_DIRS} /usr/lib /usr/local/lib /opt/local/lib /sw/lib
)
if (FFMPEG_LIBAVCODEC AND FFMPEG_LIBAVFORMAT)
set(FFMPEG_FOUND TRUE)
endif()
if (FFMPEG_FOUND)
set(FFMPEG_INCLUDE_DIR ${FFMPEG_AVCODEC_INCLUDE_DIR})
set(FFMPEG_LIBRARIES
${FFMPEG_LIBAVCODEC}
${FFMPEG_LIBAVFORMAT}
${FFMPEG_LIBAVUTIL}
)
endif (FFMPEG_FOUND)
if (FFMPEG_FOUND)
if (NOT FFMPEG_FIND_QUIETLY)
message(STATUS "Found FFMPEG or Libav: ${FFMPEG_LIBRARIES}, ${FFMPEG_INCLUDE_DIR}")
endif (NOT FFMPEG_FIND_QUIETLY)
else (FFMPEG_FOUND)
if (FFMPEG_FIND_REQUIRED)
message(FATAL_ERROR "Could not find libavcodec or libavformat or libavutil")
endif (FFMPEG_FIND_REQUIRED)
endif (FFMPEG_FOUND)
endif (FFMPEG_LIBRARIES AND FFMPEG_INCLUDE_DIR)

100
CMakeModules/FindLibMagic.cmake vendored Normal file
View File

@@ -0,0 +1,100 @@
#-------------------------------------------------------------------------------
# Copyright (c) 2013-2013, Lars Baehren <lbaehren@gmail.com>
# All rights reserved.
#
# Redistribution and use in source and binary forms, with or without modification,
# are permitted provided that the following conditions are met:
#
# * Redistributions of source code must retain the above copyright notice, this
# list of conditions and the following disclaimer.
# * Redistributions in binary form must reproduce the above copyright notice,
# this list of conditions and the following disclaimer in the documentation
# and/or other materials provided with the distribution.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
# DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
# FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
# SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
# CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
# OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#-------------------------------------------------------------------------------
# - Check for the presence of LIBMAGIC
#
# The following variables are set when LIBMAGIC is found:
# LIBMAGIC_FOUND = Set to true, if all components of LIBMAGIC have been
# found.
# LIBMAGIC_INCLUDES = Include path for the header files of LIBMAGIC
# LIBMAGIC_LIBRARIES = Link these to use LIBMAGIC
# LIBMAGIC_LFLAGS = Linker flags (optional)
if (NOT LIBMAGIC_FOUND)
if (NOT LIBMAGIC_ROOT_DIR)
set (LIBMAGIC_ROOT_DIR ${CMAKE_INSTALL_PREFIX})
endif (NOT LIBMAGIC_ROOT_DIR)
##____________________________________________________________________________
## Check for the header files
find_path (LIBMAGIC_FILE_H
NAMES file/file.h
HINTS ${LIBMAGIC_ROOT_DIR} ${CMAKE_INSTALL_PREFIX}
PATH_SUFFIXES include
)
if (LIBMAGIC_FILE_H)
list (APPEND LIBMAGIC_INCLUDES ${LIBMAGIC_FILE_H})
endif (LIBMAGIC_FILE_H)
find_path (LIBMAGIC_MAGIC_H
NAMES magic.h
HINTS ${LIBMAGIC_ROOT_DIR} ${CMAKE_INSTALL_PREFIX}
PATH_SUFFIXES include include/linux
)
if (LIBMAGIC_MAGIC_H)
list (APPEND LIBMAGIC_INCLUDES ${LIBMAGIC_MAGIC_H})
endif (LIBMAGIC_MAGIC_H)
list (REMOVE_DUPLICATES LIBMAGIC_INCLUDES)
##____________________________________________________________________________
## Check for the library
find_library (LIBMAGIC_LIBRARIES magic
HINTS ${LIBMAGIC_ROOT_DIR} ${CMAKE_INSTALL_PREFIX}
PATH_SUFFIXES lib
)
##____________________________________________________________________________
## Actions taken when all components have been found
#find_package_handle_standard_args (LIBMAGIC DEFAULT_MSG LIBMAGIC_LIBRARIES LIBMAGIC_INCLUDES)
if (LIBMAGIC_FOUND)
if (NOT LIBMAGIC_FIND_QUIETLY)
message (STATUS "Found components for LIBMAGIC")
message (STATUS "LIBMAGIC_ROOT_DIR = ${LIBMAGIC_ROOT_DIR}")
message (STATUS "LIBMAGIC_INCLUDES = ${LIBMAGIC_INCLUDES}")
message (STATUS "LIBMAGIC_LIBRARIES = ${LIBMAGIC_LIBRARIES}")
endif (NOT LIBMAGIC_FIND_QUIETLY)
else (LIBMAGIC_FOUND)
if (LIBMAGIC_FIND_REQUIRED)
message (FATAL_ERROR "Could not find LIBMAGIC!")
endif (LIBMAGIC_FIND_REQUIRED)
endif (LIBMAGIC_FOUND)
##____________________________________________________________________________
## Mark advanced variables
mark_as_advanced (
LIBMAGIC_ROOT_DIR
LIBMAGIC_INCLUDES
LIBMAGIC_LIBRARIES
)
endif (NOT LIBMAGIC_FOUND)

478
CMakeModules/FindOpenSSL.cmake vendored Normal file
View File

@@ -0,0 +1,478 @@
# Distributed under the OSI-approved BSD 3-Clause License. See accompanying
# file Copyright.txt or https://cmake.org/licensing for details.
macro(_OpenSSL_test_and_find_dependencies ssl_library crypto_library)
if((CMAKE_SYSTEM_NAME STREQUAL "Linux") AND
(("${ssl_library}" MATCHES "\\${CMAKE_STATIC_LIBRARY_SUFFIX}$") OR
("${crypto_library}" MATCHES "\\${CMAKE_STATIC_LIBRARY_SUFFIX}$")))
set(_OpenSSL_has_dependencies TRUE)
find_package(Threads)
else()
set(_OpenSSL_has_dependencies FALSE)
endif()
endmacro()
function(_OpenSSL_add_dependencies libraries_var library)
if(CMAKE_THREAD_LIBS_INIT)
list(APPEND ${libraries_var} ${CMAKE_THREAD_LIBS_INIT})
endif()
list(APPEND ${libraries_var} ${CMAKE_DL_LIBS})
set(${libraries_var} ${${libraries_var}} PARENT_SCOPE)
endfunction()
function(_OpenSSL_target_add_dependencies target)
if(_OpenSSL_has_dependencies)
set_property( TARGET ${target} APPEND PROPERTY INTERFACE_LINK_LIBRARIES Threads::Threads )
set_property( TARGET ${target} APPEND PROPERTY INTERFACE_LINK_LIBRARIES ${CMAKE_DL_LIBS} )
endif()
endfunction()
if (UNIX)
find_package(PkgConfig QUIET)
pkg_check_modules(_OPENSSL QUIET openssl)
endif ()
# Support preference of static libs by adjusting CMAKE_FIND_LIBRARY_SUFFIXES
if(OPENSSL_USE_STATIC_LIBS)
set(_openssl_ORIG_CMAKE_FIND_LIBRARY_SUFFIXES ${CMAKE_FIND_LIBRARY_SUFFIXES})
if(WIN32)
set(CMAKE_FIND_LIBRARY_SUFFIXES .lib .a ${CMAKE_FIND_LIBRARY_SUFFIXES})
else()
set(CMAKE_FIND_LIBRARY_SUFFIXES .a )
endif()
endif()
if (WIN32)
# http://www.slproweb.com/products/Win32OpenSSL.html
set(_OPENSSL_ROOT_HINTS
${OPENSSL_ROOT_DIR}
"[HKEY_LOCAL_MACHINE\\SOFTWARE\\Microsoft\\Windows\\CurrentVersion\\Uninstall\\OpenSSL (32-bit)_is1;Inno Setup: App Path]"
"[HKEY_LOCAL_MACHINE\\SOFTWARE\\Microsoft\\Windows\\CurrentVersion\\Uninstall\\OpenSSL (64-bit)_is1;Inno Setup: App Path]"
ENV OPENSSL_ROOT_DIR
)
file(TO_CMAKE_PATH "$ENV{PROGRAMFILES}" _programfiles)
set(_OPENSSL_ROOT_PATHS
"${_programfiles}/OpenSSL"
"${_programfiles}/OpenSSL-Win32"
"${_programfiles}/OpenSSL-Win64"
"C:/OpenSSL/"
"C:/OpenSSL-Win32/"
"C:/OpenSSL-Win64/"
)
unset(_programfiles)
else ()
set(_OPENSSL_ROOT_HINTS
${OPENSSL_ROOT_DIR}
ENV OPENSSL_ROOT_DIR
)
endif ()
set(_OPENSSL_ROOT_HINTS_AND_PATHS
HINTS ${_OPENSSL_ROOT_HINTS}
PATHS ${_OPENSSL_ROOT_PATHS}
)
find_path(OPENSSL_INCLUDE_DIR
NAMES
openssl/ssl.h
${_OPENSSL_ROOT_HINTS_AND_PATHS}
HINTS
${_OPENSSL_INCLUDEDIR}
PATH_SUFFIXES
include
)
if(WIN32 AND NOT CYGWIN)
if(MSVC)
# /MD and /MDd are the standard values - if someone wants to use
# others, the libnames have to change here too
# use also ssl and ssleay32 in debug as fallback for openssl < 0.9.8b
# enable OPENSSL_MSVC_STATIC_RT to get the libs build /MT (Multithreaded no-DLL)
# In Visual C++ naming convention each of these four kinds of Windows libraries has it's standard suffix:
# * MD for dynamic-release
# * MDd for dynamic-debug
# * MT for static-release
# * MTd for static-debug
# Implementation details:
# We are using the libraries located in the VC subdir instead of the parent directory even though :
# libeay32MD.lib is identical to ../libeay32.lib, and
# ssleay32MD.lib is identical to ../ssleay32.lib
# enable OPENSSL_USE_STATIC_LIBS to use the static libs located in lib/VC/static
if (OPENSSL_MSVC_STATIC_RT)
set(_OPENSSL_MSVC_RT_MODE "MT")
else ()
set(_OPENSSL_MSVC_RT_MODE "MD")
endif ()
# Since OpenSSL 1.1, lib names are like libcrypto32MTd.lib and libssl32MTd.lib
if( "${CMAKE_SIZEOF_VOID_P}" STREQUAL "8" )
set(_OPENSSL_MSVC_ARCH_SUFFIX "64")
else()
set(_OPENSSL_MSVC_ARCH_SUFFIX "32")
endif()
if(OPENSSL_USE_STATIC_LIBS)
set(_OPENSSL_PATH_SUFFIXES
"lib/VC/static"
"VC/static"
"lib"
)
else()
set(_OPENSSL_PATH_SUFFIXES
"lib/VC"
"VC"
"lib"
)
endif ()
find_library(LIB_EAY_DEBUG
NAMES
libcrypto${_OPENSSL_MSVC_ARCH_SUFFIX}${_OPENSSL_MSVC_RT_MODE}d
libcrypto${_OPENSSL_MSVC_RT_MODE}d
libcryptod
libeay32${_OPENSSL_MSVC_RT_MODE}d
libeay32d
cryptod
NAMES_PER_DIR
${_OPENSSL_ROOT_HINTS_AND_PATHS}
PATH_SUFFIXES
${_OPENSSL_PATH_SUFFIXES}
)
find_library(LIB_EAY_RELEASE
NAMES
libcrypto${_OPENSSL_MSVC_ARCH_SUFFIX}${_OPENSSL_MSVC_RT_MODE}
libcrypto${_OPENSSL_MSVC_RT_MODE}
libcrypto
libeay32${_OPENSSL_MSVC_RT_MODE}
libeay32
crypto
NAMES_PER_DIR
${_OPENSSL_ROOT_HINTS_AND_PATHS}
PATH_SUFFIXES
${_OPENSSL_PATH_SUFFIXES}
)
find_library(SSL_EAY_DEBUG
NAMES
libssl${_OPENSSL_MSVC_ARCH_SUFFIX}${_OPENSSL_MSVC_RT_MODE}d
libssl${_OPENSSL_MSVC_RT_MODE}d
libssld
ssleay32${_OPENSSL_MSVC_RT_MODE}d
ssleay32d
ssld
NAMES_PER_DIR
${_OPENSSL_ROOT_HINTS_AND_PATHS}
PATH_SUFFIXES
${_OPENSSL_PATH_SUFFIXES}
)
find_library(SSL_EAY_RELEASE
NAMES
libssl${_OPENSSL_MSVC_ARCH_SUFFIX}${_OPENSSL_MSVC_RT_MODE}
libssl${_OPENSSL_MSVC_RT_MODE}
libssl
ssleay32${_OPENSSL_MSVC_RT_MODE}
ssleay32
ssl
NAMES_PER_DIR
${_OPENSSL_ROOT_HINTS_AND_PATHS}
PATH_SUFFIXES
${_OPENSSL_PATH_SUFFIXES}
)
set(LIB_EAY_LIBRARY_DEBUG "${LIB_EAY_DEBUG}")
set(LIB_EAY_LIBRARY_RELEASE "${LIB_EAY_RELEASE}")
set(SSL_EAY_LIBRARY_DEBUG "${SSL_EAY_DEBUG}")
set(SSL_EAY_LIBRARY_RELEASE "${SSL_EAY_RELEASE}")
include(${CMAKE_CURRENT_LIST_DIR}/SelectLibraryConfigurations.cmake)
select_library_configurations(LIB_EAY)
select_library_configurations(SSL_EAY)
mark_as_advanced(LIB_EAY_LIBRARY_DEBUG LIB_EAY_LIBRARY_RELEASE
SSL_EAY_LIBRARY_DEBUG SSL_EAY_LIBRARY_RELEASE)
set(OPENSSL_SSL_LIBRARY ${SSL_EAY_LIBRARY} )
set(OPENSSL_CRYPTO_LIBRARY ${LIB_EAY_LIBRARY} )
elseif(MINGW)
# same player, for MinGW
set(LIB_EAY_NAMES crypto libeay32)
set(SSL_EAY_NAMES ssl ssleay32)
find_library(LIB_EAY
NAMES
${LIB_EAY_NAMES}
NAMES_PER_DIR
${_OPENSSL_ROOT_HINTS_AND_PATHS}
PATH_SUFFIXES
"lib/MinGW"
"lib"
)
find_library(SSL_EAY
NAMES
${SSL_EAY_NAMES}
NAMES_PER_DIR
${_OPENSSL_ROOT_HINTS_AND_PATHS}
PATH_SUFFIXES
"lib/MinGW"
"lib"
)
mark_as_advanced(SSL_EAY LIB_EAY)
set(OPENSSL_SSL_LIBRARY ${SSL_EAY} )
set(OPENSSL_CRYPTO_LIBRARY ${LIB_EAY} )
unset(LIB_EAY_NAMES)
unset(SSL_EAY_NAMES)
else()
# Not sure what to pick for -say- intel, let's use the toplevel ones and hope someone report issues:
find_library(LIB_EAY
NAMES
libcrypto
libeay32
NAMES_PER_DIR
${_OPENSSL_ROOT_HINTS_AND_PATHS}
HINTS
${_OPENSSL_LIBDIR}
PATH_SUFFIXES
lib
)
find_library(SSL_EAY
NAMES
libssl
ssleay32
NAMES_PER_DIR
${_OPENSSL_ROOT_HINTS_AND_PATHS}
HINTS
${_OPENSSL_LIBDIR}
PATH_SUFFIXES
lib
)
mark_as_advanced(SSL_EAY LIB_EAY)
set(OPENSSL_SSL_LIBRARY ${SSL_EAY} )
set(OPENSSL_CRYPTO_LIBRARY ${LIB_EAY} )
endif()
else()
find_library(OPENSSL_SSL_LIBRARY
NAMES
ssl
ssleay32
ssleay32MD
NAMES_PER_DIR
${_OPENSSL_ROOT_HINTS_AND_PATHS}
HINTS
${_OPENSSL_LIBDIR}
PATH_SUFFIXES
lib
)
find_library(OPENSSL_CRYPTO_LIBRARY
NAMES
crypto
NAMES_PER_DIR
${_OPENSSL_ROOT_HINTS_AND_PATHS}
HINTS
${_OPENSSL_LIBDIR}
PATH_SUFFIXES
lib
)
mark_as_advanced(OPENSSL_CRYPTO_LIBRARY OPENSSL_SSL_LIBRARY)
endif()
# compat defines
set(OPENSSL_SSL_LIBRARIES ${OPENSSL_SSL_LIBRARY})
set(OPENSSL_CRYPTO_LIBRARIES ${OPENSSL_CRYPTO_LIBRARY})
_OpenSSL_test_and_find_dependencies("${OPENSSL_SSL_LIBRARY}" "${OPENSSL_CRYPTO_LIBRARY}")
if(_OpenSSL_has_dependencies)
_OpenSSL_add_dependencies( OPENSSL_SSL_LIBRARIES "${OPENSSL_SSL_LIBRARY}" )
_OpenSSL_add_dependencies( OPENSSL_CRYPTO_LIBRARIES "${OPENSSL_CRYPTO_LIBRARY}" )
endif()
function(from_hex HEX DEC)
string(TOUPPER "${HEX}" HEX)
set(_res 0)
string(LENGTH "${HEX}" _strlen)
while (_strlen GREATER 0)
math(EXPR _res "${_res} * 16")
string(SUBSTRING "${HEX}" 0 1 NIBBLE)
string(SUBSTRING "${HEX}" 1 -1 HEX)
if (NIBBLE STREQUAL "A")
math(EXPR _res "${_res} + 10")
elseif (NIBBLE STREQUAL "B")
math(EXPR _res "${_res} + 11")
elseif (NIBBLE STREQUAL "C")
math(EXPR _res "${_res} + 12")
elseif (NIBBLE STREQUAL "D")
math(EXPR _res "${_res} + 13")
elseif (NIBBLE STREQUAL "E")
math(EXPR _res "${_res} + 14")
elseif (NIBBLE STREQUAL "F")
math(EXPR _res "${_res} + 15")
else()
math(EXPR _res "${_res} + ${NIBBLE}")
endif()
string(LENGTH "${HEX}" _strlen)
endwhile()
set(${DEC} ${_res} PARENT_SCOPE)
endfunction()
if(OPENSSL_INCLUDE_DIR AND EXISTS "${OPENSSL_INCLUDE_DIR}/openssl/opensslv.h")
file(STRINGS "${OPENSSL_INCLUDE_DIR}/openssl/opensslv.h" openssl_version_str
REGEX "^#[\t ]*define[\t ]+OPENSSL_VERSION_NUMBER[\t ]+0x([0-9a-fA-F])+.*")
if(openssl_version_str)
# The version number is encoded as 0xMNNFFPPS: major minor fix patch status
# The status gives if this is a developer or prerelease and is ignored here.
# Major, minor, and fix directly translate into the version numbers shown in
# the string. The patch field translates to the single character suffix that
# indicates the bug fix state, which 00 -> nothing, 01 -> a, 02 -> b and so
# on.
string(REGEX REPLACE "^.*OPENSSL_VERSION_NUMBER[\t ]+0x([0-9a-fA-F])([0-9a-fA-F][0-9a-fA-F])([0-9a-fA-F][0-9a-fA-F])([0-9a-fA-F][0-9a-fA-F])([0-9a-fA-F]).*$"
"\\1;\\2;\\3;\\4;\\5" OPENSSL_VERSION_LIST "${openssl_version_str}")
list(GET OPENSSL_VERSION_LIST 0 OPENSSL_VERSION_MAJOR)
list(GET OPENSSL_VERSION_LIST 1 OPENSSL_VERSION_MINOR)
from_hex("${OPENSSL_VERSION_MINOR}" OPENSSL_VERSION_MINOR)
list(GET OPENSSL_VERSION_LIST 2 OPENSSL_VERSION_FIX)
from_hex("${OPENSSL_VERSION_FIX}" OPENSSL_VERSION_FIX)
list(GET OPENSSL_VERSION_LIST 3 OPENSSL_VERSION_PATCH)
if (NOT OPENSSL_VERSION_PATCH STREQUAL "00")
from_hex("${OPENSSL_VERSION_PATCH}" _tmp)
# 96 is the ASCII code of 'a' minus 1
math(EXPR OPENSSL_VERSION_PATCH_ASCII "${_tmp} + 96")
unset(_tmp)
# Once anyone knows how OpenSSL would call the patch versions beyond 'z'
# this should be updated to handle that, too. This has not happened yet
# so it is simply ignored here for now.
string(ASCII "${OPENSSL_VERSION_PATCH_ASCII}" OPENSSL_VERSION_PATCH_STRING)
endif ()
set(OPENSSL_VERSION "${OPENSSL_VERSION_MAJOR}.${OPENSSL_VERSION_MINOR}.${OPENSSL_VERSION_FIX}${OPENSSL_VERSION_PATCH_STRING}")
endif ()
endif ()
set(OPENSSL_LIBRARIES ${OPENSSL_SSL_LIBRARIES} ${OPENSSL_CRYPTO_LIBRARIES} )
list(REMOVE_DUPLICATES OPENSSL_LIBRARIES)
foreach(_comp IN LISTS OpenSSL_FIND_COMPONENTS)
if(_comp STREQUAL "Crypto")
if(EXISTS "${OPENSSL_INCLUDE_DIR}" AND
(EXISTS "${OPENSSL_CRYPTO_LIBRARY}" OR
EXISTS "${LIB_EAY_LIBRARY_DEBUG}" OR
EXISTS "${LIB_EAY_LIBRARY_RELEASE}")
)
set(OpenSSL_${_comp}_FOUND TRUE)
else()
set(OpenSSL_${_comp}_FOUND FALSE)
endif()
elseif(_comp STREQUAL "SSL")
if(EXISTS "${OPENSSL_INCLUDE_DIR}" AND
(EXISTS "${OPENSSL_SSL_LIBRARY}" OR
EXISTS "${SSL_EAY_LIBRARY_DEBUG}" OR
EXISTS "${SSL_EAY_LIBRARY_RELEASE}")
)
set(OpenSSL_${_comp}_FOUND TRUE)
else()
set(OpenSSL_${_comp}_FOUND FALSE)
endif()
else()
message(WARNING "${_comp} is not a valid OpenSSL component")
set(OpenSSL_${_comp}_FOUND FALSE)
endif()
endforeach()
unset(_comp)
include(${CMAKE_CURRENT_LIST_DIR}/FindPackageHandleStandardArgs.cmake)
find_package_handle_standard_args(OpenSSL
REQUIRED_VARS
OPENSSL_CRYPTO_LIBRARY
OPENSSL_INCLUDE_DIR
VERSION_VAR
OPENSSL_VERSION
HANDLE_COMPONENTS
FAIL_MESSAGE
"Could NOT find OpenSSL, try to set the path to OpenSSL root folder in the system variable OPENSSL_ROOT_DIR"
)
mark_as_advanced(OPENSSL_INCLUDE_DIR OPENSSL_LIBRARIES)
if(OPENSSL_FOUND)
if(NOT TARGET OpenSSL::Crypto AND
(EXISTS "${OPENSSL_CRYPTO_LIBRARY}" OR
EXISTS "${LIB_EAY_LIBRARY_DEBUG}" OR
EXISTS "${LIB_EAY_LIBRARY_RELEASE}")
)
add_library(OpenSSL::Crypto UNKNOWN IMPORTED)
set_target_properties(OpenSSL::Crypto PROPERTIES
INTERFACE_INCLUDE_DIRECTORIES "${OPENSSL_INCLUDE_DIR}")
if(EXISTS "${OPENSSL_CRYPTO_LIBRARY}")
set_target_properties(OpenSSL::Crypto PROPERTIES
IMPORTED_LINK_INTERFACE_LANGUAGES "C"
IMPORTED_LOCATION "${OPENSSL_CRYPTO_LIBRARY}")
endif()
if(EXISTS "${LIB_EAY_LIBRARY_RELEASE}")
set_property(TARGET OpenSSL::Crypto APPEND PROPERTY
IMPORTED_CONFIGURATIONS RELEASE)
set_target_properties(OpenSSL::Crypto PROPERTIES
IMPORTED_LINK_INTERFACE_LANGUAGES_RELEASE "C"
IMPORTED_LOCATION_RELEASE "${LIB_EAY_LIBRARY_RELEASE}")
endif()
if(EXISTS "${LIB_EAY_LIBRARY_DEBUG}")
set_property(TARGET OpenSSL::Crypto APPEND PROPERTY
IMPORTED_CONFIGURATIONS DEBUG)
set_target_properties(OpenSSL::Crypto PROPERTIES
IMPORTED_LINK_INTERFACE_LANGUAGES_DEBUG "C"
IMPORTED_LOCATION_DEBUG "${LIB_EAY_LIBRARY_DEBUG}")
endif()
_OpenSSL_target_add_dependencies(OpenSSL::Crypto)
endif()
if(NOT TARGET OpenSSL::SSL AND
(EXISTS "${OPENSSL_SSL_LIBRARY}" OR
EXISTS "${SSL_EAY_LIBRARY_DEBUG}" OR
EXISTS "${SSL_EAY_LIBRARY_RELEASE}")
)
add_library(OpenSSL::SSL UNKNOWN IMPORTED)
set_target_properties(OpenSSL::SSL PROPERTIES
INTERFACE_INCLUDE_DIRECTORIES "${OPENSSL_INCLUDE_DIR}")
if(EXISTS "${OPENSSL_SSL_LIBRARY}")
set_target_properties(OpenSSL::SSL PROPERTIES
IMPORTED_LINK_INTERFACE_LANGUAGES "C"
IMPORTED_LOCATION "${OPENSSL_SSL_LIBRARY}")
endif()
if(EXISTS "${SSL_EAY_LIBRARY_RELEASE}")
set_property(TARGET OpenSSL::SSL APPEND PROPERTY
IMPORTED_CONFIGURATIONS RELEASE)
set_target_properties(OpenSSL::SSL PROPERTIES
IMPORTED_LINK_INTERFACE_LANGUAGES_RELEASE "C"
IMPORTED_LOCATION_RELEASE "${SSL_EAY_LIBRARY_RELEASE}")
endif()
if(EXISTS "${SSL_EAY_LIBRARY_DEBUG}")
set_property(TARGET OpenSSL::SSL APPEND PROPERTY
IMPORTED_CONFIGURATIONS DEBUG)
set_target_properties(OpenSSL::SSL PROPERTIES
IMPORTED_LINK_INTERFACE_LANGUAGES_DEBUG "C"
IMPORTED_LOCATION_DEBUG "${SSL_EAY_LIBRARY_DEBUG}")
endif()
if(TARGET OpenSSL::Crypto)
set_target_properties(OpenSSL::SSL PROPERTIES
INTERFACE_LINK_LIBRARIES OpenSSL::Crypto)
endif()
_OpenSSL_target_add_dependencies(OpenSSL::SSL)
endif()
endif()
# Restore the original find library ordering
if(OPENSSL_USE_STATIC_LIBS)
set(CMAKE_FIND_LIBRARY_SUFFIXES ${_openssl_ORIG_CMAKE_FIND_LIBRARY_SUFFIXES})
endif()

View File

@@ -0,0 +1,268 @@
# Distributed under the OSI-approved BSD 3-Clause License. See accompanying
# file Copyright.txt or https://cmake.org/licensing for details.
include(${CMAKE_CURRENT_LIST_DIR}/FindPackageMessage.cmake)
# internal helper macro
macro(_FPHSA_FAILURE_MESSAGE _msg)
set (__msg "${_msg}")
if (FPHSA_REASON_FAILURE_MESSAGE)
string(APPEND __msg "\n Reason given by package: ${FPHSA_REASON_FAILURE_MESSAGE}\n")
endif()
if (${_NAME}_FIND_REQUIRED)
message(FATAL_ERROR "${__msg}")
else ()
if (NOT ${_NAME}_FIND_QUIETLY)
message(STATUS "${__msg}")
endif ()
endif ()
endmacro()
# internal helper macro to generate the failure message when used in CONFIG_MODE:
macro(_FPHSA_HANDLE_FAILURE_CONFIG_MODE)
# <PackageName>_CONFIG is set, but FOUND is false, this means that some other of the REQUIRED_VARS was not found:
if(${_NAME}_CONFIG)
_FPHSA_FAILURE_MESSAGE("${FPHSA_FAIL_MESSAGE}: missing:${MISSING_VARS} (found ${${_NAME}_CONFIG} ${VERSION_MSG})")
else()
# If _CONSIDERED_CONFIGS is set, the config-file has been found, but no suitable version.
# List them all in the error message:
if(${_NAME}_CONSIDERED_CONFIGS)
set(configsText "")
list(LENGTH ${_NAME}_CONSIDERED_CONFIGS configsCount)
math(EXPR configsCount "${configsCount} - 1")
foreach(currentConfigIndex RANGE ${configsCount})
list(GET ${_NAME}_CONSIDERED_CONFIGS ${currentConfigIndex} filename)
list(GET ${_NAME}_CONSIDERED_VERSIONS ${currentConfigIndex} version)
string(APPEND configsText "\n ${filename} (version ${version})")
endforeach()
if (${_NAME}_NOT_FOUND_MESSAGE)
if (FPHSA_REASON_FAILURE_MESSAGE)
string(PREPEND FPHSA_REASON_FAILURE_MESSAGE "${${_NAME}_NOT_FOUND_MESSAGE}\n ")
else()
set(FPHSA_REASON_FAILURE_MESSAGE "${${_NAME}_NOT_FOUND_MESSAGE}")
endif()
else()
string(APPEND configsText "\n")
endif()
_FPHSA_FAILURE_MESSAGE("${FPHSA_FAIL_MESSAGE} ${VERSION_MSG}, checked the following files:${configsText}")
else()
# Simple case: No Config-file was found at all:
_FPHSA_FAILURE_MESSAGE("${FPHSA_FAIL_MESSAGE}: found neither ${_NAME}Config.cmake nor ${_NAME_LOWER}-config.cmake ${VERSION_MSG}")
endif()
endif()
endmacro()
function(FIND_PACKAGE_HANDLE_STANDARD_ARGS _NAME _FIRST_ARG)
# Set up the arguments for `cmake_parse_arguments`.
set(options CONFIG_MODE HANDLE_COMPONENTS)
set(oneValueArgs FAIL_MESSAGE REASON_FAILURE_MESSAGE VERSION_VAR FOUND_VAR)
set(multiValueArgs REQUIRED_VARS)
# Check whether we are in 'simple' or 'extended' mode:
set(_KEYWORDS_FOR_EXTENDED_MODE ${options} ${oneValueArgs} ${multiValueArgs} )
list(FIND _KEYWORDS_FOR_EXTENDED_MODE "${_FIRST_ARG}" INDEX)
if(${INDEX} EQUAL -1)
set(FPHSA_FAIL_MESSAGE ${_FIRST_ARG})
set(FPHSA_REQUIRED_VARS ${ARGN})
set(FPHSA_VERSION_VAR)
else()
cmake_parse_arguments(FPHSA "${options}" "${oneValueArgs}" "${multiValueArgs}" ${_FIRST_ARG} ${ARGN})
if(FPHSA_UNPARSED_ARGUMENTS)
message(FATAL_ERROR "Unknown keywords given to FIND_PACKAGE_HANDLE_STANDARD_ARGS(): \"${FPHSA_UNPARSED_ARGUMENTS}\"")
endif()
if(NOT FPHSA_FAIL_MESSAGE)
set(FPHSA_FAIL_MESSAGE "DEFAULT_MSG")
endif()
# In config-mode, we rely on the variable <PackageName>_CONFIG, which is set by find_package()
# when it successfully found the config-file, including version checking:
if(FPHSA_CONFIG_MODE)
list(INSERT FPHSA_REQUIRED_VARS 0 ${_NAME}_CONFIG)
list(REMOVE_DUPLICATES FPHSA_REQUIRED_VARS)
set(FPHSA_VERSION_VAR ${_NAME}_VERSION)
endif()
if(NOT FPHSA_REQUIRED_VARS)
message(FATAL_ERROR "No REQUIRED_VARS specified for FIND_PACKAGE_HANDLE_STANDARD_ARGS()")
endif()
endif()
# now that we collected all arguments, process them
if("x${FPHSA_FAIL_MESSAGE}" STREQUAL "xDEFAULT_MSG")
set(FPHSA_FAIL_MESSAGE "Could NOT find ${_NAME}")
endif()
list(GET FPHSA_REQUIRED_VARS 0 _FIRST_REQUIRED_VAR)
string(TOUPPER ${_NAME} _NAME_UPPER)
string(TOLOWER ${_NAME} _NAME_LOWER)
if(FPHSA_FOUND_VAR)
if(FPHSA_FOUND_VAR MATCHES "^${_NAME}_FOUND$" OR FPHSA_FOUND_VAR MATCHES "^${_NAME_UPPER}_FOUND$")
set(_FOUND_VAR ${FPHSA_FOUND_VAR})
else()
message(FATAL_ERROR "The argument for FOUND_VAR is \"${FPHSA_FOUND_VAR}\", but only \"${_NAME}_FOUND\" and \"${_NAME_UPPER}_FOUND\" are valid names.")
endif()
else()
set(_FOUND_VAR ${_NAME_UPPER}_FOUND)
endif()
# collect all variables which were not found, so they can be printed, so the
# user knows better what went wrong (#6375)
set(MISSING_VARS "")
set(DETAILS "")
# check if all passed variables are valid
set(FPHSA_FOUND_${_NAME} TRUE)
foreach(_CURRENT_VAR ${FPHSA_REQUIRED_VARS})
if(NOT ${_CURRENT_VAR})
set(FPHSA_FOUND_${_NAME} FALSE)
string(APPEND MISSING_VARS " ${_CURRENT_VAR}")
else()
string(APPEND DETAILS "[${${_CURRENT_VAR}}]")
endif()
endforeach()
if(FPHSA_FOUND_${_NAME})
set(${_NAME}_FOUND TRUE)
set(${_NAME_UPPER}_FOUND TRUE)
else()
set(${_NAME}_FOUND FALSE)
set(${_NAME_UPPER}_FOUND FALSE)
endif()
# component handling
unset(FOUND_COMPONENTS_MSG)
unset(MISSING_COMPONENTS_MSG)
if(FPHSA_HANDLE_COMPONENTS)
foreach(comp ${${_NAME}_FIND_COMPONENTS})
if(${_NAME}_${comp}_FOUND)
if(NOT DEFINED FOUND_COMPONENTS_MSG)
set(FOUND_COMPONENTS_MSG "found components:")
endif()
string(APPEND FOUND_COMPONENTS_MSG " ${comp}")
else()
if(NOT DEFINED MISSING_COMPONENTS_MSG)
set(MISSING_COMPONENTS_MSG "missing components:")
endif()
string(APPEND MISSING_COMPONENTS_MSG " ${comp}")
if(${_NAME}_FIND_REQUIRED_${comp})
set(${_NAME}_FOUND FALSE)
string(APPEND MISSING_VARS " ${comp}")
endif()
endif()
endforeach()
set(COMPONENT_MSG "${FOUND_COMPONENTS_MSG} ${MISSING_COMPONENTS_MSG}")
string(APPEND DETAILS "[c${COMPONENT_MSG}]")
endif()
# version handling:
set(VERSION_MSG "")
set(VERSION_OK TRUE)
# check with DEFINED here as the requested or found version may be "0"
if (DEFINED ${_NAME}_FIND_VERSION)
if(DEFINED ${FPHSA_VERSION_VAR})
set(_FOUND_VERSION ${${FPHSA_VERSION_VAR}})
if(${_NAME}_FIND_VERSION_EXACT) # exact version required
# count the dots in the version string
string(REGEX REPLACE "[^.]" "" _VERSION_DOTS "${_FOUND_VERSION}")
# add one dot because there is one dot more than there are components
string(LENGTH "${_VERSION_DOTS}." _VERSION_DOTS)
if (_VERSION_DOTS GREATER ${_NAME}_FIND_VERSION_COUNT)
# Because of the C++ implementation of find_package() ${_NAME}_FIND_VERSION_COUNT
# is at most 4 here. Therefore a simple lookup table is used.
if (${_NAME}_FIND_VERSION_COUNT EQUAL 1)
set(_VERSION_REGEX "[^.]*")
elseif (${_NAME}_FIND_VERSION_COUNT EQUAL 2)
set(_VERSION_REGEX "[^.]*\\.[^.]*")
elseif (${_NAME}_FIND_VERSION_COUNT EQUAL 3)
set(_VERSION_REGEX "[^.]*\\.[^.]*\\.[^.]*")
else ()
set(_VERSION_REGEX "[^.]*\\.[^.]*\\.[^.]*\\.[^.]*")
endif ()
string(REGEX REPLACE "^(${_VERSION_REGEX})\\..*" "\\1" _VERSION_HEAD "${_FOUND_VERSION}")
unset(_VERSION_REGEX)
if (NOT ${_NAME}_FIND_VERSION VERSION_EQUAL _VERSION_HEAD)
set(VERSION_MSG "Found unsuitable version \"${_FOUND_VERSION}\", but required is exact version \"${${_NAME}_FIND_VERSION}\"")
set(VERSION_OK FALSE)
else ()
set(VERSION_MSG "(found suitable exact version \"${_FOUND_VERSION}\")")
endif ()
unset(_VERSION_HEAD)
else ()
if (NOT ${_NAME}_FIND_VERSION VERSION_EQUAL _FOUND_VERSION)
set(VERSION_MSG "Found unsuitable version \"${_FOUND_VERSION}\", but required is exact version \"${${_NAME}_FIND_VERSION}\"")
set(VERSION_OK FALSE)
else ()
set(VERSION_MSG "(found suitable exact version \"${_FOUND_VERSION}\")")
endif ()
endif ()
unset(_VERSION_DOTS)
else() # minimum version specified:
if (${_NAME}_FIND_VERSION VERSION_GREATER _FOUND_VERSION)
set(VERSION_MSG "Found unsuitable version \"${_FOUND_VERSION}\", but required is at least \"${${_NAME}_FIND_VERSION}\"")
set(VERSION_OK FALSE)
else ()
set(VERSION_MSG "(found suitable version \"${_FOUND_VERSION}\", minimum required is \"${${_NAME}_FIND_VERSION}\")")
endif ()
endif()
else()
# if the package was not found, but a version was given, add that to the output:
if(${_NAME}_FIND_VERSION_EXACT)
set(VERSION_MSG "(Required is exact version \"${${_NAME}_FIND_VERSION}\")")
else()
set(VERSION_MSG "(Required is at least version \"${${_NAME}_FIND_VERSION}\")")
endif()
endif()
else ()
# Check with DEFINED as the found version may be 0.
if(DEFINED ${FPHSA_VERSION_VAR})
set(VERSION_MSG "(found version \"${${FPHSA_VERSION_VAR}}\")")
endif()
endif ()
if(VERSION_OK)
string(APPEND DETAILS "[v${${FPHSA_VERSION_VAR}}(${${_NAME}_FIND_VERSION})]")
else()
set(${_NAME}_FOUND FALSE)
endif()
# print the result:
if (${_NAME}_FOUND)
FIND_PACKAGE_MESSAGE(${_NAME} "Found ${_NAME}: ${${_FIRST_REQUIRED_VAR}} ${VERSION_MSG} ${COMPONENT_MSG}" "${DETAILS}")
else ()
if(FPHSA_CONFIG_MODE)
_FPHSA_HANDLE_FAILURE_CONFIG_MODE()
else()
if(NOT VERSION_OK)
_FPHSA_FAILURE_MESSAGE("${FPHSA_FAIL_MESSAGE}: ${VERSION_MSG} (found ${${_FIRST_REQUIRED_VAR}})")
else()
_FPHSA_FAILURE_MESSAGE("${FPHSA_FAIL_MESSAGE} (missing:${MISSING_VARS}) ${VERSION_MSG}")
endif()
endif()
endif ()
set(${_NAME}_FOUND ${${_NAME}_FOUND} PARENT_SCOPE)
set(${_NAME_UPPER}_FOUND ${${_NAME}_FOUND} PARENT_SCOPE)
endfunction()

48
CMakeModules/FindPackageMessage.cmake vendored Normal file
View File

@@ -0,0 +1,48 @@
# Distributed under the OSI-approved BSD 3-Clause License. See accompanying
# file Copyright.txt or https://cmake.org/licensing for details.
#[=======================================================================[.rst:
FindPackageMessage
------------------
.. code-block:: cmake
find_package_message(<name> "message for user" "find result details")
This function is intended to be used in FindXXX.cmake modules files.
It will print a message once for each unique find result. This is
useful for telling the user where a package was found. The first
argument specifies the name (XXX) of the package. The second argument
specifies the message to display. The third argument lists details
about the find result so that if they change the message will be
displayed again. The macro also obeys the QUIET argument to the
find_package command.
Example:
.. code-block:: cmake
if(X11_FOUND)
find_package_message(X11 "Found X11: ${X11_X11_LIB}"
"[${X11_X11_LIB}][${X11_INCLUDE_DIR}]")
else()
...
endif()
#]=======================================================================]
function(find_package_message pkg msg details)
# Avoid printing a message repeatedly for the same find result.
if(NOT ${pkg}_FIND_QUIETLY)
string(REPLACE "\n" "" details "${details}")
set(DETAILS_VAR FIND_PACKAGE_MESSAGE_DETAILS_${pkg})
if(NOT "${details}" STREQUAL "${${DETAILS_VAR}}")
# The message has not yet been printed.
message(STATUS "${msg}")
# Save the find details in the cache to avoid printing the same
# message again.
set("${DETAILS_VAR}" "${details}"
CACHE INTERNAL "Details about finding ${pkg}")
endif()
endif()
endfunction()

View File

@@ -1,44 +0,0 @@
FROM simon987/sist2-build as build
MAINTAINER simon987 <me@simon987.net>
WORKDIR /build/
COPY scripts scripts
COPY schema schema
COPY CMakeLists.txt .
COPY third-party third-party
COPY src src
COPY sist2-vue sist2-vue
RUN cmake -DSIST_PLATFORM=x64_linux -DSIST_DEBUG=off -DBUILD_TESTS=off -DCMAKE_TOOLCHAIN_FILE=/vcpkg/scripts/buildsystems/vcpkg.cmake .
RUN make -j$(nproc)
RUN strip sist2 || mv sist2_debug sist2
FROM --platform="linux/amd64" ubuntu:20.04
WORKDIR /root
ENV LANG C.UTF-8
ENV LC_ALL C.UTF-8
ENTRYPOINT ["sist2"]
RUN apt update && DEBIAN_FRONTEND=noninteractive apt install -y curl libasan5 libmagic1 python3 \
python3-pip git tesseract-ocr && rm -rf /var/lib/apt/lists/*
RUN mkdir -p /usr/share/tessdata && \
cd /usr/share/tessdata/ && \
curl -o /usr/share/tessdata/hin.traineddata https://raw.githubusercontent.com/tesseract-ocr/tessdata/master/hin.traineddata &&\
curl -o /usr/share/tessdata/jpn.traineddata https://raw.githubusercontent.com/tesseract-ocr/tessdata/master/jpn.traineddata &&\
curl -o /usr/share/tessdata/eng.traineddata https://raw.githubusercontent.com/tesseract-ocr/tessdata/master/eng.traineddata &&\
curl -o /usr/share/tessdata/fra.traineddata https://raw.githubusercontent.com/tesseract-ocr/tessdata/master/fra.traineddata &&\
curl -o /usr/share/tessdata/rus.traineddata https://raw.githubusercontent.com/tesseract-ocr/tessdata/master/rus.traineddata &&\
curl -o /usr/share/tessdata/spa.traineddata https://raw.githubusercontent.com/tesseract-ocr/tessdata/master/spa.traineddata
# sist2
COPY --from=build /build/sist2 sist2
# sist2-admin
COPY sist2-admin/requirements.txt sist2-admin/
RUN python3 -m pip install --no-cache -r sist2-admin/requirements.txt
COPY sist2-admin/ sist2-admin/

View File

@@ -1,28 +0,0 @@
FROM simon987/sist2-build-arm64 as build
MAINTAINER simon987 <me@simon987.net>
WORKDIR /build/
ADD . /build/
RUN cmake -DSIST_PLATFORM=arm64_linux -DSIST_DEBUG=off -DBUILD_TESTS=off -DCMAKE_TOOLCHAIN_FILE=/vcpkg/scripts/buildsystems/vcpkg.cmake .
RUN make -j$(nproc)
RUN strip sist2
FROM --platform="linux/arm64/v8" ubuntu:21.10
RUN apt update && apt install -y curl libasan5 && rm -rf /var/lib/apt/lists/*
RUN mkdir -p /usr/share/tessdata && \
cd /usr/share/tessdata/ && \
curl -o /usr/share/tessdata/hin.traineddata https://raw.githubusercontent.com/tesseract-ocr/tessdata/master/hin.traineddata &&\
curl -o /usr/share/tessdata/jpn.traineddata https://raw.githubusercontent.com/tesseract-ocr/tessdata/master/jpn.traineddata &&\
curl -o /usr/share/tessdata/eng.traineddata https://raw.githubusercontent.com/tesseract-ocr/tessdata/master/eng.traineddata &&\
curl -o /usr/share/tessdata/fra.traineddata https://raw.githubusercontent.com/tesseract-ocr/tessdata/master/fra.traineddata &&\
curl -o /usr/share/tessdata/rus.traineddata https://raw.githubusercontent.com/tesseract-ocr/tessdata/master/rus.traineddata &&\
curl -o /usr/share/tessdata/spa.traineddata https://raw.githubusercontent.com/tesseract-ocr/tessdata/master/spa.traineddata
ENV LANG C.UTF-8
ENV LC_ALL C.UTF-8
ENTRYPOINT ["/root/sist2"]
COPY --from=build /build/sist2 /root/sist2

176
README.md
View File

@@ -1,8 +1,5 @@
![GitHub](https://img.shields.io/github/license/simon987/sist2.svg)
[![CodeFactor](https://www.codefactor.io/repository/github/simon987/sist2/badge?s=05daa325188aac4eae32c786f3d9cf4e0593f822)](https://www.codefactor.io/repository/github/simon987/sist2)
[![Development snapshots](https://ci.simon987.net/api/badges/simon987/sist2/status.svg)](https://files.simon987.net/.gate/sist2/simon987_sist2/)
**Demo**: [sist2.simon987.net](https://sist2.simon987.net/)
# sist2
@@ -10,152 +7,89 @@ sist2 (Simple incremental search tool)
*Warning: sist2 is in early development*
![search panel](docs/sist2.png)
## Features
* Fast, low memory usage, multi-threaded
* Mobile-friendly Web interface
* Fast, low memory usage
* Portable (all its features are packaged in a single executable)
* Extracts text and metadata from common file types \*
* Generates thumbnails \*
* Extracts text from common file types\*
* Generates thumbnails\*
* Incremental scanning
* Manual tagging from the UI and automatic tagging based on file attributes via [user scripts](docs/scripting.md)
* Recursive scan inside archive files \*\*
* OCR support with tesseract \*\*\*
* Stats page & disk utilisation visualization
\* See [format support](#format-support)
\*\* See [Archive files](#archive-files)
\*\*\* See [OCR](#ocr)
![stats](docs/stats.png)
\* See [format support](#format-support)
## Getting Started
1. Have an Elasticsearch (>= 6.8.X, ideally >=7.14.0) instance running
1. Download [from official website](https://www.elastic.co/downloads/elasticsearch)
1. *(or)* Run using docker:
```bash
docker run -d -p 9200:9200 -e "discovery.type=single-node" elasticsearch:7.14.0
```
1. *(or)* Run using docker-compose:
```yaml
elasticsearch:
image: docker.elastic.co/elasticsearch/elasticsearch:7.14.0
environment:
- discovery.type=single-node
- "ES_JAVA_OPTS=-Xms1G -Xmx2G"
```
1. Download sist2 executable
1. Download the [latest sist2 release](https://github.com/simon987/sist2/releases).
Select the file corresponding to your CPU architecture and mark the binary as executable with `chmod +x` *
2. *(or)* Download a [development snapshot](https://files.simon987.net/.gate/sist2/simon987_sist2/) *(Not
recommended!)*
3. *(or)* `docker pull simon987/sist2:2.12.1-x64-linux`
1. Have an [Elasticsearch](https://www.elastic.co/downloads/elasticsearch) instance running
1. Download the [latest sist2 release](https://github.com/simon987/sist2/releases)
1. See [Usage guide](docs/USAGE.md)
*Windows users*: `sist2` runs under [WSL](https://en.wikipedia.org/wiki/Windows_Subsystem_for_Linux)
*Mac users*: See [#1](https://github.com/simon987/sist2/issues/1)
\* *Windows users*: **sist2** runs under [WSL](https://en.wikipedia.org/wiki/Windows_Subsystem_for_Linux)
## Example usage
See [Usage guide](docs/USAGE.md) for more details
![demo](demo.gif)
1. Scan a directory: `sist2 scan ~/Documents -o ./docs_idx`
1. Push index to Elasticsearch: `sist2 index ./docs_idx`
1. Start web interface: `sist2 web ./docs_idx`
See help page `sist2 --help` for more details.
**Scan a directory**
```bash
sist2 scan ~/Documents -o ./orig_idx/
sist2 scan --threads 4 --content-size 16384 /mnt/Pictures
sist2 scan --incremental ./orig_idx/ -o ./updated_idx/ ~/Documents
```
**Push index to Elasticsearch or file**
```bash
sist2 index --force-reset ./my_idx
sist2 index --print ./my_idx > raw_documents.ndjson
```
**Start web interface**
```bash
sist2 web --bind 0.0.0.0 --port 4321 ./my_idx1 ./my_idx2 ./my_idx3
```
## Format support
| File type | Library | Content | Thumbnail | Metadata |
|:--------------------------------------------------------------------------|:-----------------------------------------------------------------------------|:---------|:------------|:---------------------------------------------------------------------------------------------------------------------------------------|
| pdf,xps,fb2,epub | MuPDF | text+ocr | yes | author, title |
| cbz,cbr | [libscan](https://github.com/simon987/sist2/tree/master/third-party/libscan) | - | yes | - |
| `audio/*` | ffmpeg | - | yes | ID3 tags |
| `video/*` | ffmpeg | - | yes | title, comment, artist |
| `image/*` | ffmpeg | ocr | yes | [Common EXIF tags](https://github.com/simon987/sist2/blob/efdde2734eca9b14a54f84568863b7ffd59bdba3/src/parsing/media.c#L190), GPS tags |
| raw, rw2, dng, cr2, crw, dcr, k25, kdc, mrw, pef, xf3, arw, sr2, srf, erf | LibRaw | no | yes | Common EXIF tags, GPS tags |
| ttf,ttc,cff,woff,fnt,otf | Freetype2 | - | yes, `bmp` | Name & style |
| `text/plain` | [libscan](https://github.com/simon987/sist2/tree/master/third-party/libscan) | yes | no | - |
| html, xml | [libscan](https://github.com/simon987/sist2/tree/master/third-party/libscan) | yes | no | - |
| tar, zip, rar, 7z, ar ... | Libarchive | yes\* | - | no |
| docx, xlsx, pptx | [libscan](https://github.com/simon987/sist2/tree/master/third-party/libscan) | yes | if embedded | creator, modified_by, title |
| doc (MS Word 97-2003) | antiword | yes | yes | author, title |
| mobi, azw, azw3 | libmobi | yes | no | author, title |
| wpd (WordPerfect) | libwpd | yes | no | *planned* |
| json, jsonl, ndjson | [libscan](https://github.com/simon987/sist2/tree/master/third-party/libscan) | yes | - | - |
File type | Library | Content | Thumbnail | Metadata
:---|:---|:---|:---|:---
pdf,xps,cbz,cbr,fb2,epub | MuPDF | yes | yes, `png` | *planned* |
`audio/*` | libav | - | yes, `jpeg` | ID3 tags |
`video/*` | libav | - | yes, `jpeg` | *planned* |
`image/*` | libav | - | yes, `jpeg` | *planned* |
ttf,ttc,cff,woff,fnt,otf | Freetype2 | - | yes, `bmp` | Name & style |
`text/plain` | *(none)* | yes | no | - |
docx, xlsx, pptx | | *planned* | no | *planned* |
\* *See [Archive files](#archive-files)*
### Archive files
**sist2** will scan files stored into archive files (zip, tar, 7z...) as if they were directly in the file system.
Recursive (archives inside archives)
scan is also supported.
**Limitations**:
* Support for parsing media files with formats that require *seek* (e.g. `.gif`, `.mp4` w/ fragmented metadata etc.)
is limitted (see `--mem-buffer` option)
* Archive files are scanned sequentially, by a single thread. On systems where
**sist2** is not I/O bound, scans might be faster when larger archives are split into smaller parts.
### OCR
You can enable OCR support for ebook (pdf,xps,fb2,epub) or image file types with the
`--ocr-lang <lang>` option in combination with `--ocr-images` and/or `--ocr-ebooks`.
Download the language data files with your package manager (`apt install tesseract-ocr-eng`) or
directly [from Github](https://github.com/tesseract-ocr/tesseract/wiki/Data-Files).
The `simon987/sist2` image comes with common languages
(hin, jpn, eng, fra, rus, spa) pre-installed.
You can use the `+` separator to specify multiple languages. The language
name must be identical to the `*.traineddata` file installed on your system
(use `chi_sim` rather than `chi-sim`).
Examples:
```bash
sist2 scan --ocr-ebooks --ocr-lang jpn ~/Books/Manga/
sist2 scan --ocr-images --ocr-lang eng ~/Images/Screenshots/
sist2 scan --ocr-ebooks --ocr-images --ocr-lang eng+chi_sim ~/Chinese-Bilingual/
```
## Build from source
You can compile **sist2** by yourself if you don't want to use the pre-compiled binaries
### With docker (recommended)
```bash
git clone --recursive https://github.com/simon987/sist2/
cd sist2
docker build . -f ./Dockerfile -t my-sist2-image
docker run --rm --entrypoint cat my-sist2-image /root/sist2 > sist2-x64-linux
```
### On a linux computer
You can compile **sist2** by yourself if you don't want to use the pre-compiled
binaries.
1. Install compile-time dependencies
```bash
apt install gcc g++ python3 yasm ragel automake autotools-dev wget libtool libssl-dev curl zip unzip tar xorg-dev libglu1-mesa-dev libxcursor-dev libxml2-dev libxinerama-dev gettext nasm git
*(Debian)*
```bash
apt install git cmake pkg-config libglib2.0-dev\
libssl-dev uuid-dev libavformat-dev libswscale-dev \
python3 libmagic-dev libfreetype6-dev libcurl-dev \
libbz2-dev yasm
```
1. Apply vcpkg patches, as per [sist2-build](https://github.com/simon987/sist2-build) Dockerfile
1. Install vcpkg dependencies
*(FreeBSD)*
```bash
vcpkg install curl[core,openssl]
vcpkg install lmdb cjson glib brotli libarchive[core,bzip2,libxml2,lz4,lzma,lzo] pthread tesseract libxml2 libmupdf gtest mongoose libmagic libraw jasper lcms gumbo
```
1. Build
pkg install cmake gcc yasm gmake bash ffmpeg e2fsprogs-uuid
```
__
2. Build
```bash
git clone --recursive https://github.com/simon987/sist2/
cmake -DSIST_DEBUG=off -DCMAKE_TOOLCHAIN_FILE=<VCPKG_ROOT>/scripts/buildsystems/vcpkg.cmake .
git clone --recurse-submodules https://github.com/simon987/sist2
./scripts/get_static_libs.sh
cmake .
make
```
```

1
argparse Submodule

Submodule argparse added at fafc503d23

1
cJSON Submodule

Submodule cJSON added at 2de7d04aaf

View File

@@ -1,7 +0,0 @@
install:
install sist2-update-all.sh /usr/bin/sist2-update-all.sh
install sist2-update-files.sh /usr/bin/sist2-update-files.sh
install sist2-update-nextcloud.sh /usr/bin/sist2-update-nextcloud.sh
install sist2-update.service /etc/systemd/system/sist2-update.service
install sist2-update.timer /etc/systemd/system/sist2-update.timer
systemctl daemon-reload

View File

@@ -1,31 +0,0 @@
# Systemd integration example
This example contains my (yatli) personal configuration for sist2 auto-updating.
The following indices are involved in this configuration:
| Index | Path | Description |
|-----------|------------------|--------------------------------------------|
| files | /zpool/files | Main file repository |
| nextcloud | /zpool/nextcloud | Externally synchronized to a cloud account |
The systemd integration achieves automatic sist2 scanning & indexing everyday at 3:00AM.
### Tailoring the configuration for yourself
`sist2-update-all.sh` calls update scripts for each sist2 index. Add or remove
update scripts accordingly to suit your need. Each update script (e.g.
`sist2-update-files.sh`) has important parameters laid down at the beginning so
make sure to edit them to point to your files and index locations.
### Installation
```bash
# install the services and scripts
sudo make install
# enable & start the timer
sudo systemctl enable sist2-update.timer
sudo systemctl start sist2-update.timer
# verify that the timer has been enabled
systemctl list-timers --all
```

View File

@@ -1,9 +0,0 @@
#!/bin/bash
set -e
__dir="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
echo "Update index: Files"
source ${__dir}/sist2-update-files.sh
echo "Update index: Nextcloud"
source ${__dir}/sist2-update-nextcloud.sh
echo "Done. Restarting sist2."
docker restart sist2-sist2-1

View File

@@ -1,34 +0,0 @@
#!/bin/bash
set -e
DATE=$(date +%Y_%m_%d)
CONTENT=/zpool/files
ORIG=/mnt/ssd/sist-index/files.idx
NEW=/mnt/ssd/sist-index/files_$DATE.idx
EXCLUDE='ZArchives|TorrentStore|TorrentDownload|624f0c59-1fef-44f6-95e9-7483296f2833|ubuntu-full-2021-12-07'
NAME=Files
#REWRITE_URL="http://localhost:33333/activate?collection=$NAME&path="
REWRITE_URL=""
sist2 scan \
--threads 14 \
--mem-throttle 32768 \
--quality 1.0 \
--name $NAME \
--ocr-lang=eng+chi_sim \
--ocr-ebooks \
--ocr-images \
--exclude=$EXCLUDE \
--rewrite-url=$REWRITE_URL \
--incremental=$ORIG \
--output=$NEW \
$CONTENT
echo ">>> Scan complete"
rm -rf $ORIG
mv $NEW $ORIG
unset http_proxy
unset https_proxy
unset HTTP_PROXY
unset HTTPS_PROXY
sist2 index $ORIG --incremental-index
echo ">>> Index complete"

View File

@@ -1,33 +0,0 @@
#!/bin/bash
set -e
DATE=$(date +%Y_%m_%d)
CONTENT=/zpool/nextcloud/v-yadli
ORIG=/mnt/ssd/sist-index/nextcloud.idx
NEW=/mnt/ssd/sist-index/nextcloud_$DATE.idx
EXCLUDE='Yatao|.*263418493\\/Image\\/.*'
NAME=NextCloud
# REWRITE_URL="http://localhost:33333/activate?collection=$NAME&path="
REWRITE_URL=""
sist2 scan \
--threads 14 \
--mem-throttle 32768 \
--quality 1.0 \
--name $NAME \
--ocr-lang=eng+chi_sim \
--ocr-ebooks \
--ocr-images \
--exclude=$EXCLUDE \
--rewrite-url=$REWRITE_URL \
--incremental=$ORIG \
--output=$NEW \
$CONTENT
echo ">>> Scan complete"
rm -rf $ORIG
mv $NEW $ORIG
unset http_proxy
unset https_proxy
unset HTTP_PROXY
unset HTTPS_PROXY
sist2 index $ORIG --incremental-index

View File

@@ -1,6 +0,0 @@
[Unit]
Description=sist2-update
[Service]
User=yatli
ExecStart=/bin/bash /usr/bin/sist2-update-all.sh

View File

@@ -1,10 +0,0 @@
[Unit]
Description=sist2-update
[Timer]
OnCalendar=*-*-* 3:00:00
Persistent=true
Unit=sist2-update.service
[Install]
WantedBy=timers.target

BIN
demo.gif Normal file

Binary file not shown.

After

Width:  |  Height:  |  Size: 18 MiB

View File

@@ -1,24 +0,0 @@
version: "3"
services:
elasticsearch:
image: elasticsearch:7.14.0
container_name: sist2-es
environment:
- "discovery.type=single-node"
- "ES_JAVA_OPTS=-Xms2g -Xmx2g"
sist2-admin:
build:
context: .
container_name: sist2-admin
volumes:
- /mnt/array/sist2-admin-data/:/sist2-admin/
- /:/host
ports:
# NOTE: Don't export this port publicly!
- 8080:8080
- 4090:4090
working_dir: /root/sist2-admin/
entrypoint: python3
command:
- /root/sist2-admin/sist2_admin/app.py

View File

@@ -1,372 +0,0 @@
# Usage
*More examples (specifically with docker/compose) are in progress*
* [scan](#scan)
* [options](#scan-options)
* [examples](#scan-examples)
* [index format](#index-format)
* [index](#index)
* [options](#index-options)
* [examples](#index-examples)
* [web](#web)
* [options](#web-options)
* [examples](#web-examples)
* [rewrite_url](#rewrite_url)
* [elasticsearch](#elasticsearch)
* [exec-script](#exec-script)
* [tagging](#tagging)
* [sidecar files](#sidecar-files)
```
Usage: sist2 scan [OPTION]... PATH
or: sist2 index [OPTION]... INDEX
or: sist2 web [OPTION]... INDEX...
or: sist2 exec-script [OPTION]... INDEX
Lightning-fast file system indexer and search tool.
-h, --help show this help message and exit
-v, --version Show version and exit
--verbose Turn on logging
--very-verbose Turn on debug messages
Scan options
-t, --threads=<int> Number of threads. DEFAULT=1
--mem-throttle=<int> Total memory threshold in MiB for scan throttling. DEFAULT=0
-q, --thumbnail-quality=<flt> Thumbnail quality, on a scale of 1.0 to 31.0, 1.0 being the best. DEFAULT=1
--thumbnail-size=<int> Thumbnail size, in pixels. DEFAULT=500
--thumbnail-count=<int> Number of thumbnails to generate. Set a value > 1 to create video previews, set to 0 to disable thumbnails. DEFAULT=1
--content-size=<int> Number of bytes to be extracted from text documents. Set to 0 to disable. DEFAULT=32768
--incremental=<str> Reuse an existing index and only scan modified files.
-o, --output=<str> Output directory. DEFAULT=index.sist2/
--rewrite-url=<str> Serve files from this url instead of from disk.
--name=<str> Index display name. DEFAULT: (name of the directory)
--depth=<int> Scan up to DEPTH subdirectories deep. Use 0 to only scan files in PATH. DEFAULT: -1
--archive=<str> Archive file mode (skip|list|shallow|recurse). skip: Don't parse, list: only get file names as text, shallow: Don't parse archives inside archives. DEFAULT: recurse
--archive-passphrase=<str> Passphrase for encrypted archive files
--ocr-lang=<str> Tesseract language (use 'tesseract --list-langs' to see which are installed on your machine)
--ocr-images Enable OCR'ing of image files.
--ocr-ebooks Enable OCR'ing of ebook files.
-e, --exclude=<str> Files that match this regex will not be scanned
--fast Only index file names & mime type
--treemap-threshold=<str> Relative size threshold for treemap (see USAGE.md). DEFAULT: 0.0005
--mem-buffer=<int> Maximum memory buffer size per thread in MiB for files inside archives (see USAGE.md). DEFAULT: 2000
--read-subtitles Read subtitles from media files.
--fast-epub Faster but less accurate EPUB parsing (no thumbnails, metadata)
--checksums Calculate file checksums when scanning.
--list-file=<str> Specify a list of newline-delimited paths to be scanned instead of normal directory traversal. Use '-' to read from stdin.
Index options
-t, --threads=<int> Number of threads. DEFAULT=1
--es-url=<str> Elasticsearch url with port. DEFAULT=http://localhost:9200
--es-index=<str> Elasticsearch index name. DEFAULT=sist2
-p, --print Just print JSON documents to stdout.
--incremental-index Conduct incremental indexing, assumes that the old index is already digested by Elasticsearch.
--script-file=<str> Path to user script.
--mappings-file=<str> Path to Elasticsearch mappings.
--settings-file=<str> Path to Elasticsearch settings.
--async-script Execute user script asynchronously.
--batch-size=<int> Index batch size. DEFAULT: 100
-f, --force-reset Reset Elasticsearch mappings and settings. (You must use this option the first time you use the index command)
Web options
--es-url=<str> Elasticsearch url. DEFAULT=http://localhost:9200
--es-index=<str> Elasticsearch index name. DEFAULT=sist2
--bind=<str> Listen on this address. DEFAULT=localhost:4090
--auth=<str> Basic auth in user:password format
--tag-auth=<str> Basic auth in user:password format for tagging
--tagline=<str> Tagline in navbar
--dev Serve html & js files from disk (for development)
--lang=<str> Default UI language. Can be changed by the user
Exec-script options
--es-url=<str> Elasticsearch url. DEFAULT=http://localhost:9200
--es-index=<str> Elasticsearch index name. DEFAULT=sist2
--script-file=<str> Path to user script.
--async-script Execute user script asynchronously.
Made by simon987 <me@simon987.net>. Released under GPL-3.0
```
## Scan
### Scan options
* `-t, --threads`
Number of threads for file parsing. **Do not set a number higher than `$(nproc)` or `$(Get-CimInstance Win32_ComputerSystem).NumberOfLogicalProcessors` in Windows!**
* `--mem-throttle`
Total memory threshold in MiB for scan throttling. Worker threads will not start a new parse job
until the total memory usage of sist2 is below this threshold. Set to 0 to disable. DEFAULT=0
* `-q, --thumbnail-quality`
Thumbnail quality, on a scale of 1.0 to 31.0, 1.0 being the best.
* `--thumbnail-size`
Thumbnail size in pixels.
* `--thumbnail-count`
Maximum number of thumbnails to generate. When set to a value >= 2, thumbnails for video previews
will be generated. The actual number of thumbnails generated depends on the length of the video (maximum 1 image
every ~7s). Set to 0 to completely disable thumbnails.
* `--content-size`
Number of bytes of text to be extracted from the content of files (plain text, PDFs etc.).
Repeated whitespace and special characters do not count toward this limit.
Set to 0 to completely disable content parsing.
* `--incremental`
Specify an existing index. Information about files in this index that were not modified (based on *mtime* attribute)
will be copied to the new index and will not be parsed again.
* `-o, --output` Output directory.
* `--rewrite-url` Set the `rewrite_url` option for the web module (See [rewrite_url](#rewrite_url))
* `--name` Set the `name` option for the web module
* `--depth` Maximum scan dept. Set to 0 only scan files directly in the root directory, set to -1 for infinite depth
* `--archive` Archive file mode.
* skip: Don't parse
* list: Only get file names as text
* shallow: Don't parse archives inside archives.
* recurse: Scan archives recursively (default)
* `--ocr-lang`, `--ocr-ebooks`, `--ocr-images` See [OCR](../README.md#OCR)
* `-e, --exclude` Regex pattern to exclude files. A file is excluded if the pattern matches any
part of the full absolute path.
Examples:
* `-e ".*\.ttf"`: Ignore ttf files
* `-e ".*\.(ttf|rar)"`: Ignore ttf and rar files
* `-e "^/mnt/backups/"`: Ignore all files in the `/mnt/backups/` directory
* `-e "^/mnt/Data[12]/"`: Ignore all files in the `/mnt/Data1/` and `/mnt/Data2/` directory
* `-e "(^/usr/)|(^/var/)|(^/media/DRIVE-A/tmp/)|(^/media/DRIVE-B/Trash/)"` Exclude the
`/usr`, `/var`, `/media/DRIVE-A/tmp`, `/media/DRIVE-B/Trash` directories
* `--fast` Only index file names and mime type
* `--treemap-threshold` Directories smaller than (`treemap-threshold` * `<total size of the index>`)
will not be considered for the disk utilisation visualization; their size will be added to
the parent directory. If the parent directory is still smaller than the threshold, it will also be "merged upwards"
and so on.
In effect, smaller `treemap-threshold` values will yield a more detailed
(but also a more cluttered and harder to read) visualization.
* `--mem-buffer` Maximum memory buffer size in MiB (per thread) for files inside archives. Media files
larger than this number will be read sequentially and no *seek* operations will be supported.
To check if a media file can be parsed without *seek*, execute `cat file.mp4 | ffprobe -`
* `--read-subtitles` When enabled, will attempt to read the subtitles stream from media files.
* `--fast-epub` Much faster but less accurate EPUB parsing. When enabled, sist2 will use a simple HTML parser to read epub files instead of the MuPDF library. No thumbnails are generated and author/title metadata are not parsed.
* `--checksums` Calculate file checksums (SHA1) when scanning files. This option does not cause any additional read
operations. Checksums are not calculated for all file types, unless the file is inside an archive. When enabled, duplicate
files are hidden in the web UI (this behaviour can be toggled in the Configuration page).
### Scan examples
Simple scan
```bash
sist2 scan ~/Documents
sist2 scan \
--threads 4 --content-size 16000000 --quality 1.0 --archive shallow \
--name "My Documents" --rewrite-url "http://nas.domain.local/My Documents/" \
~/Documents -o ./documents.idx/
```
Incremental scan
```
sist2 scan --incremental ./orig_idx/ -o ./updated_idx/ ~/Documents
```
### Index format
A typical `ndjson` type index structure looks like this:
```
documents.idx/
├── descriptor.json
├── _index_main.ndjson.zst
├── treemap.csv
├── agg_mime.csv
├── agg_date.csv
├── add_size.csv
├── thumbs/
| ├── data.mdb
| └── lock.mdb
├── tags/
| ├── data.mdb
| └── lock.mdb
└── meta/
├── data.mdb
└── lock.mdb
```
The `_index_*.ndjson.zst` files contain the document data in JSON format, in a compressed newline-delemited file.
The `thumbs/` folder is a [LMDB](https://en.wikipedia.org/wiki/Lightning_Memory-Mapped_Database)
database containing the thumbnails.
The `descriptor.json` file contains general information about the index. The
following fields are safe to modify manually: `root`, `name`, [rewrite_url](#rewrite_url) and `timestamp`.
The `.csv` are pre-computed aggregations necessary for the stats page.
*thumbs/*:
LMDB key-value store. Keys are **binary** 16-byte md5 hash* (`_id` field)
and values are raw image bytes.
*\* Hash is calculated from the full path of the file, including the extension, relative to the index root*
## Index
### Index options
* `--es-url`
Elasticsearch url and port. If you are using docker, make sure that both containers are on the
same network.
* `--es-index`
Elasticsearch index name. DEFAULT=sist2
* `-p, --print`
Print index in JSON format to stdout.
* `--incremental-index`
Conduct incremental indexing. Assumes that the old index is already ingested in Elasticsearch.
Only the new changes since the last scan will be sent.
* `--script-file`
Path to user script. See [Scripting](scripting.md).
* `--mappings-file`
Path to custom Elasticsearch mappings. If none is specified, [the bundled mappings](https://github.com/simon987/sist2/tree/master/schema) will be used.
* `--settings-file`
Path to custom Elasticsearch settings. *(See above)*
* `--async-script`
Use `wait_for_completion=false` elasticsearch option while executing user script.
(See [Elasticsearch documentation](https://www.elastic.co/guide/en/elasticsearch/reference/current/tasks.html))
* `--batch-size=<int>`
Index batch size. Indexing is generally faster with larger batches, but payloads that
are too large will fail and additional overhead for retrying with smaller sizes may slow
down the process.
* `-f, --force-reset`
Reset Elasticsearch mappings and settings.
* `-t, --threads` Number of threads to use. Ideally, choose a number equal to the number of logical cores of the machine hosting Elasticsearch.
### Index examples
**Push to elasticsearch**
```bash
sist2 index --force-reset --batch-size 1000 --es-url http://localhost:9200 ./my_index/
sist2 index ./my_index/
```
**Save index in JSON format**
```bash
sist2 index --print ./my_index/ > my_index.ndjson
```
**Inspect contents of an index**
```bash
sist2 index --print ./my_index/ | jq | less
```
## Web
### Web options
* `--es-url=<str>` Elasticsearch url.
* `--es-index`
Elasticsearch index name. DEFAULT=sist2
* `--bind=<str>` Listen on this address.
* `--auth=<str>` Basic auth in user:password format
* `--tag-auth=<str>` Basic auth in user:password format. Works the same way as the
`--auth` argument, but authentication is only applied the `/tag/` endpoint.
* `--tagline=<str>` When specified, will replace the default tagline in the navbar.
* `--dev` Serve html & js files from disk (for development, used to modify frontend files without having to recompile)
* `--lang=<str>` Set the default web UI language (See #180 for a list of supported languages, default
is `en`). The user can change the language in the configuration page
### Web examples
**Single index**
```bash
sist2 web --auth admin:hunter2 --bind 0.0.0.0:8888 my_index
```
**Multiple indices**
```bash
# Indices will be displayed in this order in the web interface
sist2 web index1 index2 index3 index4
```
### rewrite_url
When the `rewrite_url` field is not empty, the web module ignores the `root`
field and will return a HTTP redirect to `<rewrite_url><path>/<name><extension>`
instead of serving the file from disk.
Both the `root` and `rewrite_url` fields are safe to manually modify from the
`descriptor.json` file.
# Elasticsearch
Elasticsearch versions >=6.8.0, 7.X.X and 8.X.X are supported by sist2.
Using a version >=7.14.0 is recommended to enable the following features:
- Bug fix for large documents (See #198)
When using a legacy version of ES, a notice will be displayed next to the sist2 version in the web UI.
If you don't care about the features above, you can ignore it or disable it in the configuration page.
## exec-script
The `exec-script` command is used to execute a user script for an index that has already been imported to Elasticsearch with the `index` command. Note that the documents will not be reset to their default state before each execution as the `index` command does: if you make undesired changes to the documents by accident, you will need to run `index` again to revert to the original state.
# Tagging
### Manual tagging
You can modify tags of individual documents directly from the
`web` interface. Note that you can setup authentication for this feature
with the `--tag-auth` option (See [web options](#web-options))
![manual_tag](manual_tag.png)
Tags that are manually added are saved both in the
index folder (in `/tags/`) and in Elasticsearch*. When re-`index`ing,
they are read from the index and automatically applied.
You can safely copy the `/tags/` database to another index.
See [Automatic tagging](#automatic-tagging) for information about tag
hierarchies and tag colors.
\* *It can take a few seconds to take effect in new search queries.*
### Automatic tagging
See [scripting](scripting.md) documentation.
# Sidecar files
When scanning, sist2 will read metadata from `.s2meta` JSON files and overwrite the
original document's indexed metadata (does not modify the actual file). Sidecar metadata files will also work inside archives.
Sidecar files themselves are not saved in the index.
This feature is useful to leverage third-party applications such as speech-to-text or
OCR to add additional metadata to a file.
**Example**
```
~/Documents/
├── Video.mp4
└── Video.mp4.s2meta
```
The sidecar file must have exactly the same file path and the `.s2meta` suffix.
`Video.mp4.s2meta`:
```json
{
"content": "This sidecar file will overwrite some metadata fields of Video.mp4",
"author": "Some author",
"duration": 12345,
"bitrate": 67890,
"some_arbitrary_field": [1,2,3]
}
```
```
sist2 scan ~/Documents -o ./docs.idx
sist2 index ./docs.idx
```
*NOTE*: It is technically possible to overwrite the `tag` value using sidecar files, however,
it is not currently possible to restore both manual tags and sidecar tags without user scripts
while reindexing.

Binary file not shown.

Before

Width:  |  Height:  |  Size: 26 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 35 KiB

View File

@@ -1,142 +0,0 @@
## User scripts
*This document is under construction, more in-depth guide coming soon*
During the `index` step, you can use the `--script-file <script>` option to
modify documents or add user tags. This option is mainly used to
implement automatic tagging based on file attributes.
The scripting language used
([Painless Scripting Language](https://www.elastic.co/guide/en/elasticsearch/painless/7.4/index.html))
is very similar to Java, but you should be able to create user scripts
without programming experience at all if you're somewhat familiar with
regex.
This is the base structure of the documents we're working with:
```json
{
"_id": "e171405c-fdb5-4feb-bb32-82637bc32084",
"_index": "sist2",
"_type": "_doc",
"_source": {
"index": "206b3050-e821-421a-891d-12fcf6c2db0d",
"mime": "application/json",
"size": 1799,
"mtime": 1545443685,
"extension": "md",
"name": "README",
"path": "sist2/scripting",
"content": "..."
}
}
```
**Example script**
This script checks if the `genre` attribute exists, if it does
it adds the `genre.<genre>` tag.
```Java
ArrayList tags = ctx._source.tag = new ArrayList();
if (ctx._source?.genre != null) {
tags.add("genre." + ctx._source.genre.toLowerCase());
}
```
You can use `.` to create a hierarchical tag tree:
![scripting/genre_example](genre_example.png)
To use regular expressions, you need to add this line in `/etc/elasticsearch/elasticsearch.yml`
```yaml
script.painless.regex.enabled: true
```
Or, if you're using docker add `-e "script.painless.regex.enabled=true"`
**Tag color**
You can specify the color for an individual tag by appending an
hexadecimal color code (`#RRGGBBAA`) to the tag name.
### Examples
If `(20XX)` is in the file name, add the `year.<year>` tag:
```Java
ArrayList tags = ctx._source.tag = new ArrayList();
Matcher m = /[\(\.+](20[0-9]{2})[\)\.+]/.matcher(ctx._source.name);
if (m.find()) {
tags.add("year." + m.group(1));
}
```
Use default *Calibre* folder structure to infer author.
```Java
ArrayList tags = ctx._source.tag = new ArrayList();
// We expect the book path to look like this:
// /path/to/Calibre Library/Author/Title/Title - Author.pdf
if (ctx._source.name.contains("-") && ctx._source.extension == "pdf") {
String[] names = ctx._source.name.splitOnToken('-');
tags.add("author." + names[1].strip());
}
```
If the file matches a specific pattern `AAAA-000 fName1 lName1, <fName2 lName2>...`, add the `actress.<actress>` and
`studio.<studio>` tag:
```Java
ArrayList tags = ctx._source.tag = new ArrayList();
Matcher m = /([A-Z]{4})-[0-9]{3} (.*)/.matcher(ctx._source.name);
if (m.find()) {
tags.add("studio." + m.group(1));
// Take the matched group (.*), and add a tag for
// each name, separated by comma
for (String name : m.group(2).splitOnToken(',')) {
tags.add("actress." + name);
}
}
```
Set the name of the last folder (`/path/to/<studio>/file.mp4`) to `studio.<studio>` tag
```Java
ArrayList tags = ctx._source.tag = new ArrayList();
if (ctx._source.path != "") {
String[] names = ctx._source.path.splitOnToken('/');
tags.add("studio." + names[names.length-1]);
}
```
Parse `EXIF:F Number` tag
```Java
if (ctx._source?.exif_fnumber != null) {
String[] values = ctx._source.exif_fnumber.splitOnToken(' ');
String aperture = String.valueOf(Float.parseFloat(values[0]) / Float.parseFloat(values[1]));
if (aperture == "NaN") {
aperture = "0,0";
}
tags.add("Aperture.f/" + aperture.replace(".", ","));
}
```
Display year and months from `EXIF:DateTime` tag
```Java
if (ctx._source?.exif_datetime != null) {
SimpleDateFormat parser = new SimpleDateFormat("yyyy:MM:dd HH:mm:ss");
Date date = parser.parse(ctx._source.exif_datetime);
SimpleDateFormat yp = new SimpleDateFormat("yyyy");
SimpleDateFormat mp = new SimpleDateFormat("MMMMMMMMM");
String year = yp.format(date);
String month = mp.format(date);
tags.add("Month." + month);
tags.add("Year." + year);
}
```

Binary file not shown.

Before

Width:  |  Height:  |  Size: 1011 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 167 KiB

1
lib/ffmpeg Submodule

Submodule lib/ffmpeg added at 0481a1f6e5

1
lib/mupdf Submodule

Submodule lib/mupdf added at 91782a4348

1
lib/onion Submodule

Submodule lib/onion added at d8d4cc9290

1
lmdb Submodule

Submodule lmdb added at 5c012bbe03

View File

@@ -2,18 +2,14 @@ application/arj, arj
application/base64, mme
application/binhex, hqx
application/book, boo|book
application/CDFV2-corrupt,
application/CDFV2, sdv
application/clariscad, ccad
application/commonground, dp
application/csv,
application/dicom, dcm
application/drafting, drw
application/epub+zip, epub
application/freeloader, frl
application/futuresplash, spl
application/groupwise, vew
application/gzip, gz|tgz
application/gzip, gz
application/hta, hta
application/i-deas, unv
application/iges, iges|igs
@@ -21,8 +17,8 @@ application/inf, inf
application/java-archive, jar
application/java, class
application/javascript,
application/x-archive, a
application/json, json
application/ndjson, jsonl|ndjson
application/marc, mrc
application/mbedlet, mbd
application/mime, aps
@@ -31,9 +27,7 @@ application/msword, doc|dot|w6w|wiz|word
application/netmc, mcp
application/octet-stream, bin|dump|gpg
application/oda, oda
application/ogg, ogv
application/pdf, pdf
application/pgp-keys,
application/pgp-signature, pgp
application/pkcs7-signature, p7s
application/pkix-cert, cer|crt
@@ -49,10 +43,6 @@ application/vda, vda
application/vnd.fdf, fdf
application/vnd.font-fontforge-sfd, sfd
application/vnd.hp-hpgl, hgl|hpg|hpgl
application/vnd.iccprofile, icm
application/vnd.iccprofile, icm
application/vnd.lotus-1-2-3,
application/vnd.ms-cab-compressed, cab
application/vnd.ms-excel, xlb|xlc|xll|xlm|xls|xlw
application/vnd.ms-fontobject, eot
application/vnd.ms-opentype, otf
@@ -64,73 +54,45 @@ application/vnd.ms-project, mpp
application/vnd.oasis.opendocument.base, odb
application/vnd.oasis.opendocument.formula, odf
application/vnd.oasis.opendocument.graphics, odg
application/vnd.oasis.opendocument.presentation, odp
application/vnd.oasis.opendocument.spreadsheet, ods
application/vnd.oasis.opendocument.text, odt
application/vnd.openxmlformats-officedocument.presentationml.presentation, pptx
application/vnd.openxmlformats-officedocument.spreadsheetml.sheet, xlsx
application/vnd.openxmlformats-officedocument.wordprocessingml.document, docx
application/vnd.symbian.install,
application/vnd.tcpdump.pcap, pcap
application/vnd.wap.wmlc, wmlc
application/vnd.wap.wmlscriptc, wmlsc
application/vnd.xara, web
application/vocaltec-media-desc, vmd
application/vocaltec-media-file, vmf
application/warc, warc
application/winhelp, hlp
application/wordperfect, wp|wp5|wp6|wpd|w60|w61
application/wordperfect6.0, w60
application/wordperfect6.1, w61
application/wordperfect, wp|wp5|wp6|wpd
application/x-123, wk1
application/x-7z-compressed, 7z
application/x-aim, aim
application/x-apple-diskimage,
application/x-arc,
application/x-archive, a
application/x-atari-7800-rom, a78
application/x-authorware-bin, aab
application/x-authorware-map, aam
application/x-authorware-seg, aas
application/x-avira-qua,
application/x-bcpio, bcpio
application/x-bittorrent, torrent
application/x-bsh, bsh
application/x-bytecode.python, pyc
application/x-bzip2, boz|bz2
application/x-bzip, bz
application/x-cbr, cbr
application/x-cbz, cbz
application/x-cdlink, vcd
application/x-chat, cha|chat
application/x-chrome-extension,
application/x-cocoa, cco
application/x-conference, nsc
application/x-coredump,
application/x-cpio, cpio
application/x-dbf, dbf
application/x-dbt,
application/x-debian-package, deb
application/x-deepv, deepv
application/x-director, dir|dxr
application/x-dmp, dmp
application/x-dosdriver,
application/x-director, dcr|dir|dxr
application/x-dosexec, dll
application/x-dvi, dvi
application/x-elc, elc
application/x-empty,
application/x-envoy, env|evy
application/x-esrehber, es
application/x-excel, xla|xld|xlk|xlt|xlv
application/x-executable, exe
application/x-font-gdos,
application/x-font-pf2, pf2
application/x-font-pfm, pfm
application/x-font-sfn,
application/x-font-ttf, ttf|ttc
application/x-fptapplication/x-dbt,
application/x-freelance, pre
application/x-gamecube-rom,
application/x-gdbm,
application/x-gettext-translation,
application/x-git,
application/x-gsp, gsp
application/x-gss, gss
@@ -140,67 +102,46 @@ application/x-hdf, hdf
application/x-helpfile, help
application/x-httpd-imap, imap
application/x-ima, ima
application/x-innosetup,
application/x-internett-signup, ins
application/x-inventor, iv
application/x-ip2, ip
application/x-java-applet,
application/x-java-commerce, jcm
application/x-java-image,
application/x-java-jmod, jmod
application/x-java-keystore,
application/x-kdelnk,
application/x-koan, skd|skm|skp|skt
application/x-latex, latex|ltx
application/x-livescreen, ivy
application/x-lotus, wq1
application/x-lz4+json, jsonlz4
application/x-lz4, lz4
application/x-lzh-compressed,
application/x-lzh, lzh
application/x-lzip, lz
application/x-lzma, lzma
application/x-lzop, lzo
application/x-lzx, lzx
application/x-mach-binary, jnilib|dylib
application/x-mach-executable,
application/x-magic-cap-package-1.0, mc$
application/x-mathcad, mcd
application/x-maxis-dbpf,
application/x-meme, mm
application/x-midi, midi
application/x-mif, mif
application/x-mix-transfer, nix
application/xml, opf
application/x-mobipocket-ebook, mobi
application/vnd.amazon.mobi8-ebook, azw|azw3
application/x-msaccess, accdb
application/x-ms-compress-szdd, fon
application/x-ms-pdb, pdb
application/x-ms-reader, lit
application/x-n64-rom, z64
application/x-navi-animation, ani
application/x-navidoc, nvd
application/x-navimap, map
application/x-navistyle, stl
application/x-nes-rom, nes
application/x-netcdf, cdf|nc
application/x-newton-compatible-pkg, pkg
application/x-nintendo-ds-rom,
application/x-object, o
application/x-omcdatamaker, omcd
application/x-omc, omc
application/x-omcregerator, omcr
application/x-pagemaker, pm4|pm5
application/x-pcl, pcl
application/x-pgp-keyring,
application/x-pixclscript, plx
application/x-pkcs7-certreqresp, p7r
application/x-pkcs7-signature, p7a
application/x-project, mpc|mpt|mpv|mpx
application/x-qpro, wb1
application/x-rar, rar
application/x-rpm, rpm
application/x-sdp, sdp
application/x-sea, sea
application/x-seelogo, sl
@@ -208,17 +149,12 @@ application/x-setupscript,
application/x-sharedlib, so
application/x-shar, shar
application/x-shockwave-flash, swf
application/x-snappy-framed,
application/x-sprite, spr|sprite
application/x-sqlite3,
application/x-stargallery-thm,
application/x-stuffit, sit
application/x-sv4cpio, sv4cpio
application/x-sv4crc, sv4crc
application/x-tar, tar
application/x-tbook, sbk|tbk
application/x-terminfo,
application/x-terminfo2,
application/x-texinfo, texi|texinfo
application/x-tex-tfm, tfm
application/x-ustar, ustar
@@ -227,22 +163,16 @@ application/x-vnd.audioexplosion.mzz, mzz
application/x-vnd.ls-xpix, xpix
application/x-vrml, vrml
application/x-wais-source, src|wsrc
application/x-wine-extension-ini,
application/x-wintalk, wtk
application/x-world, svr
application/x-wri, wri
application/x-x509-ca-cert, der
application/x-xz, xz
application/x-zip,
application/x-zstd, zst
application/zip, zip
application/zlib, z
!audio/basic, au
audio/it, it
audio/make, funk|my|pfunk
audio/midi, kar
audio/mid, rmi
audio/mp4, m4b
audio/mpeg, m2a|mpa
audio/ogg, ogg
audio/s3m, s3m
@@ -250,10 +180,7 @@ audio/tsp-audio, tsi
audio/tsplayer, tsp
audio/vnd.qcelp, qcp
audio/voxware, vox
audio/x-aiff, aiff|aif
audio/x-flac, flac
audio/x-gsm, gsd|gsm
audio/x-hx-aac-adts,
audio/x-jam, jam
audio/x-liveaudio, lam
audio/x-m4a, m4a
@@ -267,24 +194,17 @@ audio/x-nspaudio, lma
audio/x-pn-realaudio, ram|rm|rmm|rmp
audio/x-psid, sid
audio/x-realaudio, ra
audio/x-s3m,
audio/x-twinvq-plugin, vqe|vql
audio/x-twinvq, vqf
audio/x-voc, voc
audio/x-wav, wav
!audio/x-xbox360-executable, xex
!audio/x-xbox-executable, xbe
font/otf,
font/sfnt,
font/woff2, woff2
font/woff, woff
image/bmp,
image/cmu-raster, rast
image/fif, fif
image/florian, flo|turbot
image/g3fax, g3
image/gif, gif
image/heic, heic
image/ief, ief|iefs
image/jpeg, jfif|jfif-tbnl|jpe|jpeg|jpg
image/jutvision, jut
@@ -293,9 +213,6 @@ image/pict, pic|pict
image/png, png|x-png
!image/svg, svg
!image/svg+xml,
image/tiff,
!image/vnd.adobe.photoshop, psd
!image/vnd.djvu, djvu
image/vnd.fpx, fpx
image/vnd.microsoft.icon,
image/vnd.rn-realflash, rf
@@ -303,15 +220,9 @@ image/vnd.rn-realpix, rp
image/vnd.wap.wbmp, wbmp
image/vnd.xiff, xif
image/webp, webp
image/wmf,
image/x-3ds, 3ds
image/x-award-bioslogo,
image/x-cmu-raster, ras
image/x-cur, tga
image/x-dwg, dwg|dxf|svf
image/x-eps,
image/x-exr, exr
image/x-gem,
image/x-icns,
!image/x-icon, ico
image/x-jg, art
@@ -325,34 +236,34 @@ image/x-portable-graymap, pgm
image/x-portable-pixmap, ppm
image/x-quicktime, qif|qti|qtif
image/x-rgb, rgb
image/x-tga,
image/x-tiff, tif|tiff
image/x-win-bitmap,
image/tiff,
!image/x-xcf, xcf
!image/x-xpixmap, xpm
image/x-xwindowdump, xwd
message/news,
message/rfc822, mht|mhtml|mime
model/vnd.dwf, dwf
model/vnd.gdl, gdl
model/vnd.gs.gdl, gdsl
model/vrml, wrz
model/x-pov, pov
text/asp, asp
text/css, css
text/x-sass, sass
text/x-scss, scss
text/html, acgi|htm|html|htmls|htx|shtml
text/javascript, js
text/mcf, mcf
text/pascal, pas
text/PGP,
text/plain, com|cmd|conf|def|g|idc|list|lst|mar|sdml|text|txt|md|groovy|license|properties|desktop|ini|rst|cmake|ipynb|readme|less|lo|go|yml|d|cs|hpp|srt|nfo|sfv|m3u|csv|eml|make|log|markdown|yaml
application/vnd.coffeescript, coffee
text/plain, com|cmd|conf|def|g|idc|list|lst|mar|sdml|text|txt|md|groovy|license|properties|desktop|ini|rst|cmake|ipynb|readme|less|lo|go|yml|d|cs|hpp|srt
text/richtext, rt|rtf|rtx
text/rtf,
text/scriplet, wsc
text/x-awk, awk
!video/x-jng, jng
video/x-mng, mng
image/x-cur, tga
image/x-xwindowdump, xwd
!image/vnd.adobe.photoshop, psd
text/tab-separated-values, tsv
text/troff, man|me|ms|roff|t|tr
text/uri-list, uji|unis|uri|uris
text/uri-list, uni|unis|uri|uris
text/vnd.abc, abc
text/vnd.fmi.flexstor, flx
text/vnd.wap.wmlscript, wmls
@@ -361,7 +272,6 @@ text/webviewhtml, htt
text/x-Algol68,
text/x-asm, asm|s
text/x-audiosoft-intra, aip
text/x-awk, awk
text/x-bcpl,
text/x-c, c|cc|h
text/x-c++, cpp|cxx|c++
@@ -376,31 +286,23 @@ text/x-makefile, am|mak
text/xml, xml|pom|iml|plist
text/x-m, m
text/x-msdos-batch, bat
text/x-ms-regedit, reg
text/x-objective-c,
text/x-pascal, p
text/x-perl, pl
text/x-php, php
text/x-po, po
text/x-python, py
text/x-ruby, rb
text/x-sass, sass
text/x-scss, scss
text/x-server-parsed-html, ssi
text/x-setext, etx
text/x-sgml, sgm|sgml
text/x-shellscript, sh
text/x-speech, talk
text/x-tcl,
text/x-tex, tex
text/x-uil, uil
text/x-uuencode, uue
text/x-vcalendar, vcs
text/x-vcard, vcf
video/animaflex, afl
video/avi, avi
video/avs-video, avs
video/MP2T,
video/mp4, mp4
video/mpeg, m1v|m2v|mpe|mpeg|mpg
video/quicktime, moov|mov|qt
@@ -415,36 +317,46 @@ video/x-atomic3d-feature, fmf
video/x-dl, dl
video/x-dv, dif|dv
video/x-fli, fli
video/x-flv, flv
video/x-isvideo, isu
!video/x-jng, jng
video/x-m4v, m4v
video/x-matroska, mkv
video/x-mng, mng
video/x-motion-jpeg, mjpg
video/x-ms-asf, asf|asx|wmv
video/x-msvideo, divx
video/x-ms-asf, asf|asx
video/x-qtc, qtc
video/x-sgi-movie, movie|mv
x-epoc/x-sisx-app,
application/x-zstd-dictionary,
application/vnd.ms-outlook, msg
image/x-olympus-orf, orf
image/x-nikon-nef, nef
image/x-fuji-raf, raf
image/x-panasonic-raw, rw2|raw
image/x-adobe-dng, dng
image/x-canon-cr2, cr2
image/x-canon-crw, crw
image/x-dcraw,
image/x-kodak-dcr, dcr
image/x-kodak-k25, k25
image/x-kodak-kdc, kdc
image/x-minolta-mrw, mrw
image/x-pentax-pef, pef
image/x-sigma-x3f, xf3
image/x-sony-arw, arw
image/x-sony-sr2, sr2
image/x-sony-srf, srf
image/x-epson-erf, erf
sist2/sidecar, s2meta
application/x-7z-compressed, 7z
application/vnd.openxmlformats-officedocument.wordprocessingml.document, docx
text/x-po, po
application/x-rpm, rpm
application/x-debian-package, deb
application/vnd.iccprofile, icm
application/dicom, dcm
image/x-exr, exr
application/vnd.iccprofile, icm
video/x-matroska, mkv
application/x-empty,
model/vnd.gdl, gdl
model/vnd.gs.gdl, gdsl
font/woff, woff
font/woff2, woff2
application/epub+zip, epub
application/x-mobipocket-ebook, mobi
audio/x-flac, flac
application/x-rar, rar
video/x-msvideo, divx
video/x-flv, flv
application/x-kdelnk,
text/x-tcl,
application/ogg, ogv
application/vnd.openxmlformats-officedocument.spreadsheetml.sheet, xlsx
application/vnd.ms-cab-compressed, cab
audio/mp4, m4b
!image/vnd.djvu, djvu
application/x-ms-reader, lit
application/CDFV2-corrupt,
text/x-vcard, vcf
application/x-innosetup,
application/winhelp, hlp
image/x-tga,
application/x-wine-extension-ini,
application/x-cbz, cbz
application/x-cbr, cbr
application/x-ms-compress-szdd, fon
1 application/arj arj
2 application/base64 mme
3 application/binhex hqx
4 application/book boo|book
application/CDFV2-corrupt
5 application/CDFV2 sdv
6 application/clariscad ccad
7 application/commonground dp
application/csv
application/dicom dcm
8 application/drafting drw
application/epub+zip epub
9 application/freeloader frl
10 application/futuresplash spl
11 application/groupwise vew
12 application/gzip gz|tgz gz
13 application/hta hta
14 application/i-deas unv
15 application/iges iges|igs
17 application/java-archive jar
18 application/java class
19 application/javascript
20 application/x-archive a
21 application/json json
application/ndjson jsonl|ndjson
22 application/marc mrc
23 application/mbedlet mbd
24 application/mime aps
27 application/netmc mcp
28 application/octet-stream bin|dump|gpg
29 application/oda oda
application/ogg ogv
30 application/pdf pdf
application/pgp-keys
31 application/pgp-signature pgp
32 application/pkcs7-signature p7s
33 application/pkix-cert cer|crt
43 application/vnd.fdf fdf
44 application/vnd.font-fontforge-sfd sfd
45 application/vnd.hp-hpgl hgl|hpg|hpgl
application/vnd.iccprofile icm
application/vnd.iccprofile icm
application/vnd.lotus-1-2-3
application/vnd.ms-cab-compressed cab
46 application/vnd.ms-excel xlb|xlc|xll|xlm|xls|xlw
47 application/vnd.ms-fontobject eot
48 application/vnd.ms-opentype otf
54 application/vnd.oasis.opendocument.base odb
55 application/vnd.oasis.opendocument.formula odf
56 application/vnd.oasis.opendocument.graphics odg
application/vnd.oasis.opendocument.presentation odp
application/vnd.oasis.opendocument.spreadsheet ods
57 application/vnd.oasis.opendocument.text odt
application/vnd.openxmlformats-officedocument.presentationml.presentation pptx
application/vnd.openxmlformats-officedocument.spreadsheetml.sheet xlsx
application/vnd.openxmlformats-officedocument.wordprocessingml.document docx
application/vnd.symbian.install
application/vnd.tcpdump.pcap pcap
58 application/vnd.wap.wmlc wmlc
59 application/vnd.wap.wmlscriptc wmlsc
60 application/vnd.xara web
61 application/vocaltec-media-desc vmd
62 application/vocaltec-media-file vmf
63 application/warc application/wordperfect6.0 warc w60
64 application/winhelp application/wordperfect6.1 hlp w61
65 application/wordperfect wp|wp5|wp6|wpd|w60|w61 wp|wp5|wp6|wpd
66 application/x-123 wk1
application/x-7z-compressed 7z
67 application/x-aim aim
application/x-apple-diskimage
application/x-arc
application/x-archive a
application/x-atari-7800-rom a78
68 application/x-authorware-bin aab
69 application/x-authorware-map aam
70 application/x-authorware-seg aas
application/x-avira-qua
71 application/x-bcpio bcpio
72 application/x-bittorrent torrent
73 application/x-bsh bsh
74 application/x-bytecode.python pyc
75 application/x-bzip2 boz|bz2
76 application/x-bzip bz
application/x-cbr cbr
application/x-cbz cbz
77 application/x-cdlink vcd
78 application/x-chat cha|chat
application/x-chrome-extension
79 application/x-cocoa cco
80 application/x-conference nsc
application/x-coredump
81 application/x-cpio cpio
82 application/x-dbf dbf
83 application/x-dbt
application/x-debian-package deb
84 application/x-deepv deepv
85 application/x-director dir|dxr dcr|dir|dxr
application/x-dmp dmp
application/x-dosdriver
86 application/x-dosexec dll
87 application/x-dvi dvi
88 application/x-elc elc
application/x-empty
89 application/x-envoy env|evy
90 application/x-esrehber es
91 application/x-excel xla|xld|xlk|xlt|xlv
92 application/x-executable exe
application/x-font-gdos
application/x-font-pf2 pf2
application/x-font-pfm pfm
93 application/x-font-sfn
94 application/x-font-ttf ttf|ttc
application/x-fptapplication/x-dbt
95 application/x-freelance pre
application/x-gamecube-rom
application/x-gdbm
application/x-gettext-translation
96 application/x-git
97 application/x-gsp gsp
98 application/x-gss gss
102 application/x-helpfile help
103 application/x-httpd-imap imap
104 application/x-ima ima
application/x-innosetup
105 application/x-internett-signup ins
106 application/x-inventor iv
107 application/x-ip2 ip
108 application/x-java-applet
109 application/x-java-commerce jcm
110 application/x-java-image
application/x-java-jmod jmod
111 application/x-java-keystore
application/x-kdelnk
112 application/x-koan skd|skm|skp|skt
113 application/x-latex latex|ltx
114 application/x-livescreen ivy
115 application/x-lotus wq1
application/x-lz4+json jsonlz4
application/x-lz4 lz4
application/x-lzh-compressed
116 application/x-lzh lzh
application/x-lzip lz
application/x-lzma lzma
application/x-lzop lzo
117 application/x-lzx lzx
118 application/x-mach-binary jnilib|dylib
119 application/x-mach-executable
120 application/x-magic-cap-package-1.0 mc$
121 application/x-mathcad mcd
application/x-maxis-dbpf
122 application/x-meme mm
123 application/x-midi midi
124 application/x-mif mif
125 application/x-mix-transfer nix
126 application/xml opf
application/x-mobipocket-ebook mobi
application/vnd.amazon.mobi8-ebook azw|azw3
application/x-msaccess accdb
application/x-ms-compress-szdd fon
127 application/x-ms-pdb pdb
application/x-ms-reader lit
application/x-n64-rom z64
128 application/x-navi-animation ani
129 application/x-navidoc nvd
130 application/x-navimap map
131 application/x-navistyle stl
application/x-nes-rom nes
132 application/x-netcdf cdf|nc
133 application/x-newton-compatible-pkg pkg
application/x-nintendo-ds-rom
134 application/x-object o
135 application/x-omcdatamaker omcd
136 application/x-omc omc
137 application/x-omcregerator omcr
138 application/x-pagemaker pm4|pm5
139 application/x-pcl pcl
application/x-pgp-keyring
140 application/x-pixclscript plx
141 application/x-pkcs7-certreqresp p7r
142 application/x-pkcs7-signature p7a
143 application/x-project mpc|mpt|mpv|mpx
144 application/x-qpro wb1
application/x-rar rar
application/x-rpm rpm
145 application/x-sdp sdp
146 application/x-sea sea
147 application/x-seelogo sl
149 application/x-sharedlib so
150 application/x-shar shar
151 application/x-shockwave-flash swf
application/x-snappy-framed
152 application/x-sprite spr|sprite
153 application/x-sqlite3
application/x-stargallery-thm
application/x-stuffit sit
154 application/x-sv4cpio sv4cpio
155 application/x-sv4crc sv4crc
156 application/x-tar tar
157 application/x-tbook sbk|tbk
application/x-terminfo
application/x-terminfo2
158 application/x-texinfo texi|texinfo
159 application/x-tex-tfm tfm
160 application/x-ustar ustar
163 application/x-vnd.ls-xpix xpix
164 application/x-vrml vrml
165 application/x-wais-source src|wsrc
application/x-wine-extension-ini
166 application/x-wintalk wtk
167 application/x-world svr
168 application/x-wri wri
169 application/x-x509-ca-cert der
170 application/x-xz xz
application/x-zip
application/x-zstd zst
171 application/zip zip
application/zlib z
!audio/basic au
172 audio/it it
173 audio/make funk|my|pfunk
174 audio/midi kar
175 audio/mid rmi
audio/mp4 m4b
176 audio/mpeg m2a|mpa
177 audio/ogg ogg
178 audio/s3m s3m
180 audio/tsplayer tsp
181 audio/vnd.qcelp qcp
182 audio/voxware vox
audio/x-aiff aiff|aif
audio/x-flac flac
183 audio/x-gsm gsd|gsm
audio/x-hx-aac-adts
184 audio/x-jam jam
185 audio/x-liveaudio lam
186 audio/x-m4a m4a
194 audio/x-pn-realaudio ram|rm|rmm|rmp
195 audio/x-psid sid
196 audio/x-realaudio ra
audio/x-s3m
197 audio/x-twinvq-plugin vqe|vql
198 audio/x-twinvq vqf
199 audio/x-voc voc
200 audio/x-wav wav
!audio/x-xbox360-executable xex
!audio/x-xbox-executable xbe
201 font/otf
202 font/sfnt
font/woff2 woff2
font/woff woff
image/bmp
203 image/cmu-raster rast
204 image/fif fif
205 image/florian flo|turbot
206 image/g3fax g3
207 image/gif gif
image/heic heic
208 image/ief ief|iefs
209 image/jpeg jfif|jfif-tbnl|jpe|jpeg|jpg
210 image/jutvision jut
213 image/png png|x-png
214 !image/svg svg
215 !image/svg+xml
image/tiff
!image/vnd.adobe.photoshop psd
!image/vnd.djvu djvu
216 image/vnd.fpx fpx
217 image/vnd.microsoft.icon
218 image/vnd.rn-realflash rf
220 image/vnd.wap.wbmp wbmp
221 image/vnd.xiff xif
222 image/webp webp
image/wmf
image/x-3ds 3ds
image/x-award-bioslogo
223 image/x-cmu-raster ras
image/x-cur tga
224 image/x-dwg dwg|dxf|svf
225 image/x-eps
image/x-exr exr
image/x-gem
226 image/x-icns
227 !image/x-icon ico
228 image/x-jg art
236 image/x-portable-pixmap ppm
237 image/x-quicktime qif|qti|qtif
238 image/x-rgb rgb
image/x-tga
239 image/x-tiff tif|tiff
240 image/x-win-bitmap image/tiff
241 !image/x-xcf xcf
242 !image/x-xpixmap xpm
image/x-xwindowdump xwd
message/news
243 message/rfc822 mht|mhtml|mime
244 model/vnd.dwf dwf
model/vnd.gdl gdl
model/vnd.gs.gdl gdsl
245 model/vrml wrz
246 model/x-pov pov
247 text/asp asp
248 text/css css
249 text/x-sass sass
250 text/x-scss scss
251 text/html acgi|htm|html|htmls|htx|shtml
252 text/javascript js
253 text/mcf mcf
254 text/pascal pas
255 text/PGP text/plain com|cmd|conf|def|g|idc|list|lst|mar|sdml|text|txt|md|groovy|license|properties|desktop|ini|rst|cmake|ipynb|readme|less|lo|go|yml|d|cs|hpp|srt
text/plain com|cmd|conf|def|g|idc|list|lst|mar|sdml|text|txt|md|groovy|license|properties|desktop|ini|rst|cmake|ipynb|readme|less|lo|go|yml|d|cs|hpp|srt|nfo|sfv|m3u|csv|eml|make|log|markdown|yaml
application/vnd.coffeescript coffee
256 text/richtext rt|rtf|rtx
text/rtf
257 text/scriplet wsc
258 text/x-awk awk
259 !video/x-jng jng
260 video/x-mng mng
261 image/x-cur tga
262 image/x-xwindowdump xwd
263 !image/vnd.adobe.photoshop psd
264 text/tab-separated-values tsv
265 text/troff man|me|ms|roff|t|tr
266 text/uri-list uji|unis|uri|uris uni|unis|uri|uris
267 text/vnd.abc abc
268 text/vnd.fmi.flexstor flx
269 text/vnd.wap.wmlscript wmls
272 text/x-Algol68
273 text/x-asm asm|s
274 text/x-audiosoft-intra aip
text/x-awk awk
275 text/x-bcpl
276 text/x-c c|cc|h
277 text/x-c++ cpp|cxx|c++
286 text/xml xml|pom|iml|plist
287 text/x-m m
288 text/x-msdos-batch bat
text/x-ms-regedit reg
text/x-objective-c
289 text/x-pascal p
290 text/x-perl pl
291 text/x-php php
text/x-po po
292 text/x-python py
293 text/x-ruby rb
text/x-sass sass
text/x-scss scss
294 text/x-server-parsed-html ssi
295 text/x-setext etx
296 text/x-sgml sgm|sgml
297 text/x-shellscript sh
298 text/x-speech talk
text/x-tcl
299 text/x-tex tex
300 text/x-uil uil
301 text/x-uuencode uue
302 text/x-vcalendar vcs
text/x-vcard vcf
303 video/animaflex afl
304 video/avi avi
305 video/avs-video avs
video/MP2T
306 video/mp4 mp4
307 video/mpeg m1v|m2v|mpe|mpeg|mpg
308 video/quicktime moov|mov|qt
317 video/x-dl dl
318 video/x-dv dif|dv
319 video/x-fli fli
video/x-flv flv
320 video/x-isvideo isu
!video/x-jng jng
video/x-m4v m4v
video/x-matroska mkv
video/x-mng mng
321 video/x-motion-jpeg mjpg
322 video/x-ms-asf asf|asx|wmv asf|asx
video/x-msvideo divx
323 video/x-qtc qtc
324 video/x-sgi-movie movie|mv
325 x-epoc/x-sisx-app application/x-7z-compressed 7z
326 application/x-zstd-dictionary application/vnd.openxmlformats-officedocument.wordprocessingml.document docx
327 application/vnd.ms-outlook text/x-po msg po
328 image/x-olympus-orf application/x-rpm orf rpm
329 image/x-nikon-nef application/x-debian-package nef deb
330 image/x-fuji-raf application/vnd.iccprofile raf icm
331 image/x-panasonic-raw application/dicom rw2|raw dcm
332 image/x-adobe-dng image/x-exr dng exr
333 image/x-canon-cr2 application/vnd.iccprofile cr2 icm
334 image/x-canon-crw video/x-matroska crw mkv
335 image/x-dcraw application/x-empty
336 image/x-kodak-dcr model/vnd.gdl dcr gdl
337 image/x-kodak-k25 model/vnd.gs.gdl k25 gdsl
338 image/x-kodak-kdc font/woff kdc woff
339 image/x-minolta-mrw font/woff2 mrw woff2
340 image/x-pentax-pef application/epub+zip pef epub
341 image/x-sigma-x3f application/x-mobipocket-ebook xf3 mobi
342 image/x-sony-arw audio/x-flac arw flac
343 image/x-sony-sr2 application/x-rar sr2 rar
344 image/x-sony-srf video/x-msvideo srf divx
345 image/x-epson-erf video/x-flv erf flv
346 sist2/sidecar application/x-kdelnk s2meta
347 text/x-tcl
348 application/ogg ogv
349 application/vnd.openxmlformats-officedocument.spreadsheetml.sheet xlsx
350 application/vnd.ms-cab-compressed cab
351 audio/mp4 m4b
352 !image/vnd.djvu djvu
353 application/x-ms-reader lit
354 application/CDFV2-corrupt
355 text/x-vcard vcf
356 application/x-innosetup
357 application/winhelp hlp
358 image/x-tga
359 application/x-wine-extension-ini
360 application/x-cbz cbz
361 application/x-cbr cbr
362 application/x-ms-compress-szdd fon

View File

@@ -1,74 +1,34 @@
{
"properties": {
"_tie": {
"type": "keyword",
"doc_values": true
},
"checksum": {
"type": "keyword",
"index": false
},
"_depth": {
"type": "integer"
},
"path": {
"type": "text",
"analyzer": "path_analyzer",
"copy_to": "suggest-path",
"fielddata": true,
"fields": {
"nGram": {
"type": "text",
"analyzer": "my_nGram"
},
"text": {
"type": "text",
"analyzer": "content_analyzer"
}
}
"copy_to": "suggest-path"
},
"suggest-path": {
"type": "completion",
"analyzer": "case_insensitive_kw_analyzer"
"analyzer": "keyword"
},
"mime": {
"type": "keyword"
},
"parent": {
"type": "keyword",
"index": false
},
"thumbnail": {
"type": "integer",
"index": false
},
"videoc": {
"type": "keyword",
"index": false
"type": "keyword"
},
"audioc": {
"type": "keyword",
"index": false
"type": "keyword"
},
"duration": {
"type": "integer",
"index": false
"type": "float"
},
"width": {
"type": "integer",
"index": false
"type": "integer"
},
"height": {
"type": "integer",
"index": false
},
"pages": {
"type": "integer",
"index": false
"type": "integer"
},
"mtime": {
"type": "date",
"format": "epoch_millis"
"type": "integer"
},
"size": {
"type": "long"
@@ -79,7 +39,6 @@
"name": {
"analyzer": "content_analyzer",
"type": "text",
"fielddata": true,
"fields": {
"nGram": {
"type": "text",
@@ -111,23 +70,6 @@
"analyzer": "my_nGram",
"type": "text"
},
"_keyword.*": {
"type": "keyword"
},
"_text.*": {
"analyzer": "content_analyzer",
"type": "text",
"fields": {
"nGram": {
"type": "text",
"analyzer": "my_nGram"
}
}
},
"_url": {
"type": "keyword",
"index": false
},
"content": {
"analyzer": "content_analyzer",
"type": "text",
@@ -138,70 +80,6 @@
"analyzer": "my_nGram"
}
}
},
"tag": {
"type": "text",
"fielddata": true,
"analyzer": "tag_analyzer",
"copy_to": "suggest-tag"
},
"suggest-tag": {
"type": "completion",
"analyzer": "case_insensitive_kw_analyzer"
},
"exif_make": {
"type": "text"
},
"exif_model": {
"type": "text"
},
"exif:software": {
"type": "text"
},
"exif_exposure_time": {
"type": "keyword"
},
"exif_fnumber": {
"type": "keyword"
},
"exif_iso_speed_ratings": {
"type": "keyword"
},
"exif_focal_length": {
"type": "keyword"
},
"exif_user_comment": {
"type": "text"
},
"exif_gps_longitude_ref": {
"type": "keyword",
"index": false
},
"exif_gps_longitude_dms": {
"type": "keyword",
"index": false
},
"exif_gps_longitude_dec": {
"type": "keyword",
"index": false
},
"exif_gps_latitude_ref": {
"type": "keyword",
"index": false
},
"exif_gps_latitude_dms": {
"type": "keyword",
"index": false
},
"exif_gps_latitude_dec": {
"type": "keyword",
"index": false
},
"author": {
"type": "text"
},
"modified_by": {
"type": "text"
}
}
}

View File

@@ -1,10 +0,0 @@
{
"description": "Copy _id to _tie, save path depth",
"processors": [
{
"script": {
"source": "ctx._tie = ctx._id; ctx._depth = ctx.path.length() == 0 ? 0 : 1 + ctx.path.length() - ctx.path.replace(\"/\", \"\").length();"
}
}
]
}

View File

@@ -1,22 +1,15 @@
{
"index": {
"refresh_interval": "30s",
"codec": "best_compression",
"number_of_replicas": 0,
"highlight.max_analyzed_offset": 1000000
"codec": "best_compression"
},
"analysis": {
"tokenizer": {
"path_tokenizer": {
"type": "path_hierarchy",
"delimiter": "/"
},
"tag_tokenizer": {
"type": "path_hierarchy",
"delimiter": "."
"type": "path_hierarchy"
},
"my_nGram_tokenizer": {
"type": "ngram",
"type": "nGram",
"min_gram": 3,
"max_gram": 3
}
@@ -28,18 +21,6 @@
"lowercase"
]
},
"tag_analyzer": {
"tokenizer": "tag_tokenizer",
"filter": [
"lowercase"
]
},
"case_insensitive_kw_analyzer": {
"tokenizer": "keyword",
"filter": [
"lowercase"
]
},
"my_nGram": {
"tokenizer": "my_nGram_tokenizer",
"filter": [

View File

@@ -1,58 +0,0 @@
{
"index": {
"refresh_interval": "30s",
"codec": "best_compression",
"number_of_replicas": 0
},
"analysis": {
"tokenizer": {
"path_tokenizer": {
"type": "path_hierarchy",
"delimiter": "/"
},
"tag_tokenizer": {
"type": "path_hierarchy",
"delimiter": "."
},
"my_nGram_tokenizer": {
"type": "nGram",
"min_gram": 3,
"max_gram": 3
}
},
"analyzer": {
"path_analyzer": {
"tokenizer": "path_tokenizer",
"filter": [
"lowercase"
]
},
"tag_analyzer": {
"tokenizer": "tag_tokenizer",
"filter": [
"lowercase"
]
},
"case_insensitive_kw_analyzer": {
"tokenizer": "keyword",
"filter": [
"lowercase"
]
},
"my_nGram": {
"tokenizer": "my_nGram_tokenizer",
"filter": [
"lowercase",
"asciifolding"
]
},
"content_analyzer": {
"tokenizer": "standard",
"filter": [
"lowercase",
"asciifolding"
]
}
}
}
}

View File

@@ -2,9 +2,16 @@
rm -rf index.sist2/
rm web/js/bundle.js 2> /dev/null
cat `ls web/js/*.min.js` > web/js/bundle.js
cat web/js/{util,dom,search}.js >> web/js/bundle.js
rm web/css/bundle*.css 2> /dev/null
cat web/css/*.min.css > web/css/bundle.css
cat web/css/light.css >> web/css/bundle.css
cat web/css/*.min.css > web/css/bundle_dark.css
cat web/css/dark.css >> web/css/bundle_dark.css
python3 scripts/mime.py > src/parsing/mime_generated.c
python3 scripts/serve_static.py > src/web/static_generated.c
python3 scripts/index_static.py > src/index/static_generated.c
python3 scripts/magic_static.py > src/magic_generated.c
printf "static const char *const Sist2CommitHash = \"%s\";\n" $(git rev-parse HEAD) > src/git_hash.h

View File

@@ -1,17 +0,0 @@
#!/usr/bin/env bash
VCPKG_ROOT="/vcpkg"
git submodule update --init --recursive
rm -rf CMakeFiles CMakeCache.txt
cmake -DSIST_PLATFORM=x64_linux -DSIST_DEBUG=off -DBUILD_TESTS=off -DCMAKE_TOOLCHAIN_FILE="${VCPKG_ROOT}/scripts/buildsystems/vcpkg.cmake" .
make -j $(nproc)
strip sist2
./sist2 -v > VERSION
mv sist2 sist2-x64-linux
rm -rf CMakeFiles CMakeCache.txt
cmake -DSIST_PLATFORM=x64_linux -DSIST_DEBUG=on -DBUILD_TESTS=off -DCMAKE_TOOLCHAIN_FILE="${VCPKG_ROOT}/scripts/buildsystems/vcpkg.cmake" .
make -j $(nproc)
mv sist2_debug sist2-x64-linux-debug

View File

@@ -1,17 +0,0 @@
#!/usr/bin/env bash
VCPKG_ROOT="/vcpkg"
git submodule update --init --recursive
rm -rf CMakeFiles CMakeCache.txt
cmake -DSIST_PLATFORM=arm64_linux -DSIST_DEBUG=off -DBUILD_TESTS=off -DCMAKE_TOOLCHAIN_FILE="${VCPKG_ROOT}/scripts/buildsystems/vcpkg.cmake" .
make -j $(nproc)
strip sist2
mv sist2 sist2-arm64-linux
rm -rf CMakeFiles CMakeCache.txt
cmake -DSIST_PLATFORM=arm64_linux -DSIST_DEBUG=on -DBUILD_TESTS=off -DCMAKE_TOOLCHAIN_FILE="${VCPKG_ROOT}/scripts/buildsystems/vcpkg.cmake" .
make -j $(nproc)
strip sist2
mv sist2_debug sist2-arm64-linux-debug

57
scripts/get_static_libs.sh Executable file
View File

@@ -0,0 +1,57 @@
#!/usr/bin/env bash
cd lib
cd mupdf
HAVE_X11=no HAVE_GLUT=no make -j 4
cd ..
mv mupdf/build/release/libmupdf.a .
mv mupdf/build/release/libmupdf-third.a .
# ffmpeg
cd ffmpeg
./configure --disable-shared --enable-static --disable-ffmpeg --disable-ffplay \
--disable-ffprobe --disable-doc\
--disable-manpages --disable-postproc --disable-avfilter \
--disable-alsa --disable-lzma --disable-xlib --disable-debug\
--disable-vdpau --disable-vaapi --disable-sdl2 --disable-network
make -j 4
cd ..
mv ffmpeg/libavcodec/libavcodec.a .
mv ffmpeg/libavformat/libavformat.a .
mv ffmpeg/libavutil/libavutil.a .
mv ffmpeg/libswresample/libswresample.a .
mv ffmpeg/libswscale/libswscale.a .
# onion
cd onion
mkdir build 2> /dev/null
cd build
cmake -DONION_USE_SSL=false -DONION_USE_PAM=false -DONION_USE_PNG=false -DONION_USE_JPEG=false \
-DONION_USE_JPEG=false -DONION_USE_XML2=false -DONION_USE_SYSTEMD=false -DONION_USE_SQLITE3=false \
-DONION_USE_REDIS=false -DONION_USE_GC=false -DONION_USE_TESTS=false -DONION_EXAMPLES=false \
-DONION_USE_BINDINGS_CPP=false ..
make -j 4
cd ../..
mv onion/build/src/onion/libonion_static.a .
#bzip2
git clone https://github.com/enthought/bzip2-1.0.6
cd bzip2-1.0.6
make -j 4
cd ..
mv bzip2-1.0.6/libbz2.a .
# magic
git clone https://github.com/threatstack/libmagic
cd libmagic
./autogen.sh
./configure --enable-static --disable-shared
make -j 4
cd ..
mv libmagic/src/.libs/libmagic.a .
cd ..

View File

@@ -0,0 +1,44 @@
#!/usr/bin/env bash
cd lib
# mupdf
cd mupdf
HAVE_X11=no HAVE_GLUT=no gmake -j 4
cd ..
mv mupdf/build/release/libmupdf.a .
mv mupdf/build/release/libmupdf-third.a .
# ffmpeg
cd ffmpeg
./configure --disable-shared --enable-static --disable-ffmpeg --disable-ffplay \
--disable-ffprobe --disable-doc\
--disable-manpages --disable-postproc --disable-avfilter \
--disable-alsa --disable-lzma --disable-xlib --disable-debug\
--disable-vdpau --disable-vaapi --disable-sdl2 --disable-network
gmake -j 4
cd ..
mv ffmpeg/libavcodec/libavcodec.a .
mv ffmpeg/libavformat/libavformat.a .
mv ffmpeg/libavutil/libavutil.a .
mv ffmpeg/libswresample/libswresample.a .
mv ffmpeg/libswscale/libswscale.a .
#bzip2
git clone https://github.com/enthought/bzip2-1.0.6
cd bzip2-1.0.6
make -j 4
cd ..
mv bzip2-1.0.6/libbz2.a .
# magic
git clone https://github.com/threatstack/libmagic
cd libmagic
./autogen.sh
./configure --enable-static --disable-shared
make -j 4
cd ..
mv libmagic/src/.libs/libmagic.a .
cd ..

View File

@@ -1,10 +1,6 @@
import json
files = [
"schema/mappings.json",
"schema/settings.json",
"schema/settings_legacy.json",
"schema/pipeline.json",
]
@@ -13,7 +9,6 @@ def clean(filepath):
for file in files:
with open(file, "r") as f:
data = json.dumps(json.load(f), separators=(",", ":")).encode()
data += b'\0'
with open(file, "rb") as f:
data = f.read()
print("char %s[%d] = {%s};" % (clean(file), len(data), ",".join(str(int(b)) for b in data)))

View File

@@ -1,8 +0,0 @@
try:
with open("/usr/lib/file/magic.mgc", "rb") as f:
data = f.read()
except:
data = bytes([])
print("char magic_database_buffer[%d] = {%s};" % (len(data), ",".join(str(int(b)) for b in data)))

View File

@@ -3,7 +3,6 @@ noparse = set()
ext_in_hash = set()
major_mime = {
"sist2": 0,
"model": 1,
"example": 2,
"message": 3,
@@ -13,12 +12,12 @@ major_mime = {
"audio": 7,
"image": 8,
"text": 9,
"application": 10,
"x-epoc": 11,
"application": 10
}
pdf = (
"application/pdf",
"application/x-cbz",
"application/epub+zip",
"application/vnd.ms-xpsdocument",
)
@@ -34,68 +33,6 @@ font = (
"font/woff2"
)
# Archive "formats"
archive = (
"application/x-tar",
"application/zip",
"application/x-rar",
"application/x-arc",
"application/x-warc",
"application/x-7z-compressed",
)
# Archive "filters"
arc_filter = (
"application/gzip",
"application/x-bzip2",
"application/x-xz",
"application/x-zstd",
"application/x-lzma",
"application/x-lz4",
"application/x-lzip",
"application/x-lzop",
)
doc = (
"application/vnd.openxmlformats-officedocument.wordprocessingml.document",
"application/vnd.openxmlformats-officedocument.spreadsheetml.sheet",
"application/vnd.openxmlformats-officedocument.presentationml.presentation"
)
mobi = (
"application/x-mobipocket-ebook",
"application/vnd.amazon.mobi8-ebook"
)
markup = (
"text/xml",
"text/html",
"text/x-sgml"
)
raw = (
"image/x-olympus-orf",
"image/x-nikon-nef",
"image/x-fuji-raf",
"image/x-panasonic-raw",
"image/x-adobe-dng",
"image/x-canon-cr2",
"image/x-canon-crw",
"image/x-dcraw",
"image/x-kodak-dcr",
"image/x-kodak-k25",
"image/x-kodak-kdc",
"image/x-minolta-mrw",
"image/x-pentax-pef",
"image/x-sigma-x3f",
"image/x-sony-arw",
"image/x-sony-sr2",
"image/x-sony-srf",
"image/x-minolta-mrw",
"image/x-pentax-pef",
"image/x-epson-erf",
)
cnt = 1
@@ -110,24 +47,8 @@ def mime_id(mime):
mime_id += " | 0x40000000"
elif mime in font:
mime_id += " | 0x20000000"
elif mime in archive:
mime_id += " | 0x10000000"
elif mime in arc_filter:
mime_id += " | 0x08000000"
elif mime in doc:
mime_id += " | 0x04000000"
elif mime in mobi:
mime_id += " | 0x02000000"
elif mime in markup:
mime_id += " | 0x01000000"
elif mime in raw:
mime_id += " | 0x00800000"
elif mime == "application/x-empty":
cnt -= 1
return "1"
elif mime == "sist2/sidecar":
cnt -= 1
return "2"
return mime_id
@@ -135,7 +56,7 @@ def clean(t):
return t.replace("/", "_").replace(".", "_").replace("+", "_").replace("-", "_")
with open("scripts/mime.csv") as f:
with open("mime.csv") as f:
for l in f:
mime, ext_list = l.split(",")
if l.startswith("!"):
@@ -147,7 +68,7 @@ with open("scripts/mime.csv") as f:
print("// **Generated by mime.py**")
print("#ifndef MIME_GENERATED_C")
print("#define MIME_GENERATED_C")
print("#include <glib.h>\n")
print("#include <glib-2.0/glib.h>\n")
print("#include <stdlib.h>\n")
# Enum
print("enum mime {")

View File

@@ -1,10 +1,10 @@
files = [
"sist2-vue/src/assets/favicon.ico",
"sist2-vue/dist/css/chunk-vendors.css",
"sist2-vue/dist/css/index.css",
"sist2-vue/dist/js/chunk-vendors.js",
"sist2-vue/dist/js/index.js",
"sist2-vue/dist/index.html",
"web/css/bundle.css",
"web/css/bundle_dark.css",
"web/js/bundle.js",
"web/img/sprite-skin-flat.png",
"web/img/sprite-skin-flat-dark.png",
"web/search.html",
]
@@ -13,10 +13,6 @@ def clean(filepath):
for file in files:
try:
with open(file, "rb") as f:
data = f.read()
except:
data = bytes([])
with open(file, "rb") as f:
data = f.read()
print("char %s[%d] = {%s};" % (clean(file), len(data), ",".join(str(int(b)) for b in data)))

View File

@@ -1,3 +0,0 @@
docker run --rm -it --name "sist2-dev-es"\
-p 9200:9200 -e "discovery.type=single-node" \
-e "ES_JAVA_OPTS=-Xms8g -Xmx8g" elasticsearch:7.14.0

View File

@@ -1,3 +0,0 @@
docker run --rm -it --name "sist2-dev-es-6"\
-p 9202:9200 -e "discovery.type=single-node" \
-e "ES_JAVA_OPTS=-Xms8g -Xmx8g" elasticsearch:6.8.0

View File

@@ -1,3 +0,0 @@
docker run --rm -it --name "sist2-dev-es"\
-p 9200:9200 -p 9300:9300 -e "discovery.type=single-node" \
-e "ES_JAVA_OPTS=-Xms8g -Xmx8g" elasticsearch:8.1.2

View File

@@ -1,5 +0,0 @@
module.exports = {
presets: [
'@vue/cli-plugin-babel/preset'
]
}

View File

@@ -1 +0,0 @@
.navbar[data-v-27bc1d68]{box-shadow:0 .125rem .25rem rgba(0,0,0,.08)!important;border-radius:0}.theme-black .navbar[data-v-27bc1d68]{background:rgba(84,107,122,.18823529411764706);border-bottom:none}.navbar-brand[data-v-27bc1d68]{color:#222!important;font-size:1.75rem;padding:0}.navbar-brand[data-v-27bc1d68]:hover{color:#000!important}.version[data-v-27bc1d68]{color:#222!important;margin-left:-18px;margin-top:-14px;font-size:11px;font-family:monospace}.btn-link[data-v-27bc1d68]{color:#222}body,html{height:100%}#app{-webkit-font-smoothing:antialiased;-moz-osx-font-smoothing:grayscale;color:#2c3e50;padding-bottom:1em;min-height:100%}.info-icon{width:1rem;margin-right:.2rem;cursor:pointer;line-height:1rem;height:1rem;background-image:url();filter:brightness(45%);display:block}.tabs{margin-top:10px}.modal-title{text-overflow:ellipsis;overflow:hidden;white-space:nowrap}@media screen and (min-width:1500px){.container{max-width:1440px}}label{margin-top:.5rem;margin-bottom:0}.shrink[data-v-9b017c42]{flex-grow:inherit}#task-history[data-v-46960281]{font-family:monospace;font-size:12px}#log-tail-output span{display:block}span.DEBUG{color:#9e9e9e}span.WARNING{color:#ffb300}span.INFO{color:#039be5}span.ERROR,span.FATAL{color:#f4511e}span.ADMIN{color:#ee05ff}#log-tail-output{font-size:13px;font-family:monospace;padding:6px;background-color:#f5f5f5;border:1px solid #ccc;border-radius:4px;margin:3px;white-space:pre;color:#000;overflow:hidden}

File diff suppressed because one or more lines are too long

Binary file not shown.

Before

Width:  |  Height:  |  Size: 15 KiB

View File

@@ -1 +0,0 @@
<!DOCTYPE html><html lang=""><head><meta charset="utf-8"><meta http-equiv="X-UA-Compatible" content="IE=edge"><meta name="viewport" content="width=device-width,initial-scale=1"><link rel="icon" href="favicon.ico"><title>sist2-admin</title><link href="css/app.0f0b676b.css" rel="preload" as="style"><link href="css/chunk-vendors.aa66c7e8.css" rel="preload" as="style"><link href="js/app.b34f501e.js" rel="preload" as="script"><link href="js/chunk-vendors.fad0ee6a.js" rel="preload" as="script"><link href="css/chunk-vendors.aa66c7e8.css" rel="stylesheet"><link href="css/app.0f0b676b.css" rel="stylesheet"></head><body><noscript><strong>We're sorry but sist2-admin-vue doesn't work properly without JavaScript enabled. Please enable it to continue.</strong></noscript><div id="app"></div><script src="js/chunk-vendors.fad0ee6a.js"></script><script src="js/app.b34f501e.js"></script></body></html>

File diff suppressed because one or more lines are too long

File diff suppressed because one or more lines are too long

File diff suppressed because it is too large Load Diff

View File

@@ -1,52 +0,0 @@
{
"name": "sist2-admin-vue",
"version": "0.1.0",
"private": true,
"scripts": {
"serve": "vue-cli-service serve",
"build": "vue-cli-service build",
"watch": "vue-cli-service build --watch"
},
"dependencies": {
"axios": "^0.27.2",
"bootstrap-vue": "^2.21.2",
"core-js": "^3.6.5",
"moment": "^2.29.3",
"socket.io-client": "^4.5.1",
"vue": "^2.6.14",
"vue-i18n": "^8.24.4",
"vue-router": "^3.5.4",
"vuex": "^3.4.0"
},
"devDependencies": {
"@vue/cli-plugin-babel": "~4.5.13",
"@vue/cli-plugin-eslint": "~4.5.13",
"@vue/cli-plugin-router": "~4.5.13",
"@vue/cli-plugin-vuex": "~4.5.13",
"@vue/cli-service": "~4.5.13",
"babel-eslint": "^10.1.0",
"bootstrap": "^4.5.2",
"eslint": "^6.7.2",
"eslint-plugin-vue": "^6.2.2",
"vue-template-compiler": "^2.6.11"
},
"eslintConfig": {
"root": true,
"env": {
"node": true
},
"extends": [
"plugin:vue/essential",
"eslint:recommended"
],
"parserOptions": {
"parser": "babel-eslint"
},
"rules": {}
},
"browserslist": [
"> 1%",
"last 2 versions",
"not dead"
]
}

Binary file not shown.

Before

Width:  |  Height:  |  Size: 15 KiB

View File

@@ -1,17 +0,0 @@
<!DOCTYPE html>
<html lang="">
<head>
<meta charset="utf-8">
<meta http-equiv="X-UA-Compatible" content="IE=edge">
<meta name="viewport" content="width=device-width,initial-scale=1.0">
<link rel="icon" href="<%= BASE_URL %>favicon.ico">
<title>sist2-admin</title>
</head>
<body>
<noscript>
<strong>We're sorry but <%= htmlWebpackPlugin.options.title %> doesn't work properly without JavaScript enabled. Please enable it to continue.</strong>
</noscript>
<div id="app"></div>
<!-- built files will be auto injected -->
</body>
</html>

View File

@@ -1,98 +0,0 @@
<template>
<div id="app">
<NavBar></NavBar>
<b-container class="pt-4">
<b-alert show dismissible variant="info">
This is a beta version of sist2-admin. Please submit bug reports, usability issues and feature requests
to the <a href="https://github.com/simon987/sist2/issues/new/choose" target="_blank">issue tracker on Github</a>. Thank you!
</b-alert>
<router-view/>
</b-container>
</div>
</template>
<script>
import NavBar from "@/components/NavBar";
import Sist2AdminApi from "@/Sist2AdminApi";
export default {
components: {NavBar},
data() {
return {
socket: null
}
},
mounted() {
Sist2AdminApi.getSist2AdminInfo()
.then(resp => this.$store.commit("setSist2AdminInfo", resp.data));
this.$store.dispatch("loadBrowserSettings");
this.connectNotifications();
// this.socket.onclose = this.connectNotifications;
},
methods: {
connectNotifications() {
this.socket = new WebSocket(`ws://${window.location.host}/notifications`);
this.socket.onopen = () => {
this.socket.send("Hello from client");
}
this.socket.onmessage = e => {
const notification = JSON.parse(e.data);
if (notification.message) {
notification.messageString = this.$t(notification.message).toString();
}
this.$store.dispatch("notify", notification)
}
}
}
}
</script>
<style>
html, body {
height: 100%;
}
#app {
/*font-family: Avenir, Helvetica, Arial, sans-serif;*/
-webkit-font-smoothing: antialiased;
-moz-osx-font-smoothing: grayscale;
/*text-align: center;*/
color: #2c3e50;
padding-bottom: 1em;
min-height: 100%;
}
.info-icon {
width: 1rem;
margin-right: 0.2rem;
cursor: pointer;
line-height: 1rem;
height: 1rem;
background-image: url();
filter: brightness(45%);
display: block;
}
.tabs {
margin-top: 10px;
}
.modal-title {
text-overflow: ellipsis;
overflow: hidden;
white-space: nowrap;
}
@media screen and (min-width: 1500px) {
.container {
max-width: 1440px;
}
}
label {
margin-top: 0.5rem;
margin-bottom: 0;
}
</style>

View File

@@ -1,117 +0,0 @@
import axios from "axios";
class Sist2AdminApi {
constructor() {
this.baseUrl = window.location.protocol + "//" + window.location.host;
}
getJobs() {
return axios.get(`${this.baseUrl}/api/job/`);
}
getFrontends() {
return axios.get(`${this.baseUrl}/api/frontend/`);
}
getTasks() {
return axios.get(`${this.baseUrl}/api/task/`);
}
killTask(taskId) {
return axios.post(`${this.baseUrl}/api/task/${taskId}/kill`)
}
getTaskHistory() {
return axios.get(`${this.baseUrl}/api/task/history`);
}
/**
* @param {string} name
*/
getJob(name) {
return axios.get(`${this.baseUrl}/api/job/${name}`);
}
/**
* @param {string} name
*/
getFrontend(name) {
return axios.get(`${this.baseUrl}/api/frontend/${name}`);
}
/**
* @param {string} name
*/
startFrontend(name) {
return axios.post(`${this.baseUrl}/api/frontend/${name}/start`);
}
/**
* @param {string} name
*/
stopFrontend(name) {
return axios.post(`${this.baseUrl}/api/frontend/${name}/stop`);
}
/**
* @param {string} name
* @param job
*/
updateJob(name, job) {
return axios.put(`${this.baseUrl}/api/job/${name}`, job);
}
/**
* @param {string} name
* @param frontend
*/
updateFrontend(name, frontend) {
return axios.put(`${this.baseUrl}/api/frontend/${name}`, frontend);
}
/**
* @param {string} name
*/
runJob(name) {
return axios.get(`${this.baseUrl}/api/job/${name}/run`);
}
/**
* @param {string} name
*/
deleteJob(name) {
return axios.delete(`${this.baseUrl}/api/job/${name}`);
}
/**
* @param {string} name
*/
deleteFrontend(name) {
return axios.delete(`${this.baseUrl}/api/frontend/${name}`);
}
/**
* @param {string} name
*/
createJob(name) {
return axios.post(`${this.baseUrl}/api/job/${name}`);
}
/**
* @param {string} name
*/
createFrontend(name) {
return axios.post(`${this.baseUrl}/api/frontend/${name}`);
}
pingEs(url, insecure) {
return axios.get(`${this.baseUrl}/api/ping_es`, {params: {url, insecure}});
}
getSist2AdminInfo() {
return axios.get(`${this.baseUrl}/api/`);
}
}
export default new Sist2AdminApi()

View File

@@ -1,31 +0,0 @@
<template>
<b-list-group-item action :to="`/frontend/${frontend.name}`">
<div class="d-flex w-100 justify-content-between">
<h5 class="mb-1" style="display: block">
{{ frontend.name }}
<b-badge variant="light">{{ formatBindAddress(frontend.web_options.bind) }}</b-badge>
</h5>
<div>
<b-badge v-if="frontend.running" variant="success">{{$t("online")}}</b-badge>
<b-badge v-else variant="secondary">{{$t("offline")}}</b-badge>
</div>
</div>
</b-list-group-item>
</template>
<script>
import {formatBindAddress} from "@/util";
export default {
name: "FrontendListItem",
props: ["frontend"],
data() {
return {
formatBindAddress
}
}
}
</script>

View File

@@ -1,64 +0,0 @@
<template>
<div>
<label>{{ $t("indexOptions.threads") }}</label>
<b-form-input v-model="options.threads" type="number" min="1" @change="update()"></b-form-input>
<label>{{ $t("webOptions.esUrl") }}</label>
<b-alert :variant="esTestOk ? 'success' : 'danger'" :show="showEsTestAlert" class="mt-1">
{{ esTestMessage }}
</b-alert>
<b-input-group>
<b-form-input v-model="options.es_url" @change="update()"></b-form-input>
<b-input-group-append>
<b-button variant="outline-primary" @click="testEs()">{{ $t("test") }}</b-button>
</b-input-group-append>
</b-input-group>
<label>{{ $t("indexOptions.esIndex") }}</label>
<b-form-input v-model="options.es_index" @change="update()"></b-form-input>
<br>
<b-form-checkbox v-model="options.es_insecure_ssl" :disabled="!options.es_url.startsWith('https')" @change="update()">
{{ $t("webOptions.esInsecure") }}
</b-form-checkbox>
<label>{{ $t("indexOptions.batchSize") }}</label>
<b-form-input v-model="options.batch_size" type="number" min="1" @change="update()"></b-form-input>
<label>{{ $t("indexOptions.script") }}</label>
<b-form-textarea v-model="options.script" rows="6" @change="update()"></b-form-textarea>
</div>
</template>
<script>
import sist2AdminApi from "@/Sist2AdminApi";
export default {
name: "IndexOptions",
props: ["options"],
data() {
return {
showEsTestAlert: false,
esTestOk: false,
esTestMessage: "",
}
},
methods: {
update() {
this.$emit("change", this.options);
},
testEs() {
sist2AdminApi.pingEs(this.options.es_url, this.options.es_insecure_ssl).then((resp) => {
this.showEsTestAlert = true;
this.esTestOk = resp.data.ok;
this.esTestMessage = resp.data.message;
});
}
},
}
</script>
<style scoped>
</style>

View File

@@ -1,42 +0,0 @@
<template>
<div>
<h5>{{ $t("selectJobs") }}</h5>
<b-progress v-if="loading" striped animated value="100"></b-progress>
<b-form-group v-else>
<b-form-checkbox-group
v-if="jobs.length > 0"
:checked="frontend.jobs"
@input="frontend.jobs = $event; $emit('input')"
>
<div v-for="job in jobs" :key="job.name">
<b-form-checkbox :disabled="job.status !== 'indexed'" :value="job.name">[{{ job.name }}]</b-form-checkbox>
<br/>
</div>
</b-form-checkbox-group>
<div v-else>
<span class="text-muted">{{ $t('jobOptions.noJobAvailable') }}</span>
&nbsp;<router-link to="/">{{$t("create")}}</router-link>
</div>
</b-form-group>
</div>
</template>
<script>
import Sist2AdminApi from "@/Sist2AdminApi";
export default {
name: "JobCheckboxGroup",
props: ["frontend"],
mounted() {
Sist2AdminApi.getJobs().then(resp => {
this.jobs = resp.data;
this.loading = false;
});
},
data() {
return {
loading: true,
}
}
}
</script>

View File

@@ -1,56 +0,0 @@
<template>
<b-list-group-item class="flex-column align-items-start" action :to="`job/${job.name}`">
<div class="d-flex w-100 justify-content-between">
<div>
<h5 class="mb-1">
{{ job.name }}
</h5>
</div>
<div>
<b-row>
<b-col>
<small v-if="job.last_index_date">
{{ $t("scanned") }} {{ formatLastIndexDate(job.last_index_date) }}</small>
<div v-else>&nbsp;</div>
</b-col>
</b-row>
<b-row v-if="job.schedule_enabled">
<b-col>
<small><code>{{job.cron_expression }}</code></small>
</b-col>
</b-row>
<b-row v-else>
<b-col>
&nbsp;
</b-col>
</b-row>
</div>
</div>
</b-list-group-item>
</template>
<script>
import moment from "moment";
export default {
name: "JobListItem",
props: ["job"],
methods: {
formatLastIndexDate(dateString) {
if (dateString === null) {
return "";
}
const date = Date.parse(dateString);
return moment(date).fromNow();
}
}
}
</script>
<style scoped>
</style>

View File

@@ -1,51 +0,0 @@
<template>
<div>
<b-form-checkbox :checked="desktopNotificationsEnabled" @change="updateNotifications($event)">
{{ $t("jobOptions.desktopNotifications") }}
</b-form-checkbox>
<b-form-checkbox v-model="job.schedule_enabled" @change="update()">
{{ $t("jobOptions.scheduleEnabled") }}
</b-form-checkbox>
<label>{{ $t("jobOptions.cron") }}</label>
<b-form-input class="text-monospace" :state="cronValid" v-model="job.cron_expression" :disabled="!job.schedule_enabled" @change="update()"></b-form-input>
</div>
</template>
<script>
export default {
name: "JobOptions",
props: ["job"],
data() {
return {
cronValid: undefined
}
},
computed: {
desktopNotificationsEnabled() {
return this.$store.state.jobDesktopNotificationMap[this.job.name];
}
},
methods: {
updateNotifications(value) {
this.$store.dispatch("setJobDesktopNotification", {
job: this.job.name,
enabled: value
})
},
update() {
if (this.job.schedule_enabled) {
this.cronValid = /((((\d+,)+\d+|(\d+([/-])\d+)|\d+|\*) ?){5,7})/.test(this.job.cron_expression);
} else {
this.cronValid = undefined;
}
if (this.cronValid !== false) {
this.$emit("change", this.job);
}
},
},
}
</script>

View File

@@ -1,69 +0,0 @@
<template>
<b-navbar>
<b-navbar-brand to="/">
<Sist2Icon></Sist2Icon>
</b-navbar-brand>
<b-button class="ml-auto" to="/task" variant="link">{{ $t("tasks") }}</b-button>
</b-navbar>
</template>
<script>
import Sist2Icon from "@/components/icons/Sist2Icon";
export default {
name: "NavBar",
components: {Sist2Icon},
methods: {
tagline() {
return this.$store.state.sist2Info.tagline;
},
sist2Version() {
return this.$store.state.sist2Info.version;
},
isDebug() {
return this.$store.state.sist2Info.debug;
},
isLegacy() {
return this.$store.state.sist2Info.esVersionLegacy;
},
hideLegacy() {
return this.$store.state.optHideLegacy;
}
}
}
</script>
<style scoped>
.navbar {
box-shadow: 0 0.125rem 0.25rem rgb(0 0 0 / 8%) !important;
border-radius: 0;
}
.theme-black .navbar {
background: #546b7a30;
border-bottom: none;
}
.navbar-brand {
color: #222 !important;
font-size: 1.75rem;
padding: 0;
}
.navbar-brand:hover {
color: #000 !important;
}
.version {
color: #222 !important;
margin-left: -18px;
margin-top: -14px;
font-size: 11px;
font-family: monospace;
}
.btn-link {
color: #222;
}
</style>

View File

@@ -1,111 +0,0 @@
<template>
<div>
<label>{{ $t("scanOptions.path") }}</label>
<b-form-input v-model="options.path" @change="update()"></b-form-input>
<label>{{ $t("scanOptions.threads") }}</label>
<b-form-input type="number" min="1" v-model="options.threads" @change="update()"></b-form-input>
<label>{{ $t("scanOptions.memThrottle") }}</label>
<b-form-input type="number" min="0" v-model="options.mem_throttle" @change="update()"></b-form-input>
<label>{{ $t("scanOptions.thumbnailQuality") }}</label>
<b-form-input type="number" min="1" max="31" v-model="options.thumbnail_quality" @change="update()"></b-form-input>
<label>{{ $t("scanOptions.thumbnailCount") }}</label>
<b-form-input type="number" min="0" max="1000" v-model="options.thumbnail_count" @change="update()"></b-form-input>
<label>{{ $t("scanOptions.thumbnailSize") }}</label>
<b-form-input type="number" min="100" v-model="options.thumbnail_size" @change="update()"></b-form-input>
<label>{{ $t("scanOptions.contentSize") }}</label>
<b-form-input type="number" min="0" v-model="options.content_size" @change="update()"></b-form-input>
<label>{{ $t("scanOptions.rewriteUrl") }}</label>
<b-form-input v-model="options.rewrite_url" @change="update()"></b-form-input>
<label>{{ $t("scanOptions.depth") }}</label>
<b-form-input type="number" min="0" v-model="options.depth" @change="update()"></b-form-input>
<label>{{ $t("scanOptions.archive") }}</label>
<b-form-select :options="['skip', 'list', 'shallow', 'recurse']" v-model="options.archive"
@change="update()"></b-form-select>
<label>{{ $t("scanOptions.archivePassphrase") }}</label>
<b-form-input v-model="options.archive_passphrase" @change="update()"></b-form-input>
<label>{{ $t("scanOptions.ocrLang") }}</label>
<b-alert variant="danger" show v-if="selectedOcrLangs.length === 0 && !disableOcrLang">{{ $t("scanOptions.ocrLangAlert") }}</b-alert>
<b-checkbox-group :disabled="disableOcrLang" v-model="selectedOcrLangs" @input="onOcrLangChange">
<b-checkbox v-for="lang in ocrLangs" :key="lang" :value="lang">{{ lang }}</b-checkbox>
</b-checkbox-group>
<!-- <b-form-input readonly v-model="options.ocr_lang" @change="update()"></b-form-input>-->
<div style="height: 10px"></div>
<b-form-checkbox v-model="options.ocr_images" @change="update()">
{{ $t("scanOptions.ocrImages") }}
</b-form-checkbox>
<b-form-checkbox v-model="options.ocr_ebooks" @change="update()">
{{ $t("scanOptions.ocrEbooks") }}
</b-form-checkbox>
<label>{{ $t("scanOptions.exclude") }}</label>
<b-form-input v-model="options.exclude" @change="update()"
:placeholder="$t('scanOptions.excludePlaceholder')"></b-form-input>
<div style="height: 10px"></div>
<b-form-checkbox v-model="options.fast" @change="update()">
{{ $t("scanOptions.fast") }}
</b-form-checkbox>
<b-form-checkbox v-model="options.checksums" @change="update()">
{{ $t("scanOptions.checksums") }}
</b-form-checkbox>
<b-form-checkbox v-model="options.read_subtitles" @change="update()">
{{ $t("scanOptions.readSubtitles") }}
</b-form-checkbox>
<label>{{ $t("scanOptions.memBuffer") }}</label>
<b-form-input type="number" min="0" v-model="options.mem_buffer" @change="update()"></b-form-input>
<label>{{ $t("scanOptions.treemapThreshold") }}</label>
<b-form-input type="number" min="0" v-model="options.treemap_threshold" @change="update()"></b-form-input>
</div>
</template>
<script>
export default {
name: "ScanOptions",
props: ["options"],
data() {
return {
disableOcrLang: false,
selectedOcrLangs: []
}
},
computed: {
ocrLangs() {
return this.$store.state.sist2AdminInfo?.tesseract_langs || [];
}
},
methods: {
onOcrLangChange() {
this.options.ocr_lang = this.selectedOcrLangs.join("+");
},
update() {
this.disableOcrLang = this.options.ocr_images === false && this.options.ocr_ebooks === false;
this.$emit("change", this.options);
},
},
mounted() {
this.disableOcrLang = this.options.ocr_images === false && this.options.ocr_ebooks === false;
this.selectedOcrLangs = this.options.ocr_lang ? this.options.ocr_lang.split("+") : [];
}
}
</script>

View File

@@ -1,57 +0,0 @@
<template>
<b-list-group-item>
<b-row style="height: 50px">
<b-col><h5>{{ task.display_name }}</h5></b-col>
<b-col class="shrink">
<router-link class="btn btn-link" :to="`/log/${task.id}`">{{ $t("logs") }}</router-link>
</b-col>
<b-col class="shrink">
<b-btn variant="link" @click="killTask(task.id)">{{ $t("kill") }}</b-btn>
</b-col>
</b-row>
<b-row>
<b-col>
<b-progress :max="task.progress.count">
<b-progress-bar :value="task.progress.done" :label-html="label" :striped="!task.progress.waiting"/>
</b-progress>
</b-col>
</b-row>
</b-list-group-item>
</template>
<script>
import sist2AdminApi from "@/Sist2AdminApi";
export default {
name: "TaskListItem",
props: ["task"],
computed: {
label() {
const count = this.task.progress.count;
const done = this.task.progress.done;
return `<span>${done}/${count}</span>`
}
},
methods: {
killTask(taskId) {
sist2AdminApi.killTask(taskId).then(() => {
this.$bvToast.toast(this.$t("killConfirmation"), {
title: this.$t("killConfirmationTitle"),
variant: "success",
toaster: "b-toaster-bottom-right"
});
});
}
}
}
</script>
<style scoped>
.shrink {
flex-grow: inherit;
}
</style>

View File

@@ -1,75 +0,0 @@
<template>
<div>
<label>{{ $t("webOptions.esUrl") }}</label>
<b-alert :variant="esTestOk ? 'success' : 'danger'" :show="showEsTestAlert" class="mt-1">
{{ esTestMessage }}
</b-alert>
<b-input-group>
<b-form-input v-model="options.es_url" @change="update()"></b-form-input>
<b-input-group-append>
<b-button variant="outline-primary" @click="testEs()">{{ $t("test") }}</b-button>
</b-input-group-append>
</b-input-group>
<b-form-checkbox v-model="options.es_insecure_ssl" :disabled="!this.options.es_url.startsWith('https')" @change="update()">
{{ $t("webOptions.esInsecure") }}
</b-form-checkbox>
<label>{{ $t("webOptions.esIndex") }}</label>
<b-form-input v-model="options.es_index" @change="update()"></b-form-input>
<label>{{ $t("webOptions.lang") }}</label>
<b-form-select v-model="options.lang" :options="['en', 'fr', 'zh-CN']" @change="update()"></b-form-select>
<label>{{ $t("webOptions.bind") }}</label>
<b-form-input v-model="options.bind" @change="update()"></b-form-input>
<label>{{ $t("webOptions.tagline") }}</label>
<b-form-textarea v-model="options.tagline" @change="update()"></b-form-textarea>
<label>{{ $t("webOptions.auth") }}</label>
<b-form-input v-model="options.auth" @change="update()"></b-form-input>
<label>{{ $t("webOptions.tagAuth") }}</label>
<b-form-input v-model="options.tag_auth" @change="update()"></b-form-input>
</div>
</template>
<script>
import sist2AdminApi from "@/Sist2AdminApi";
export default {
name: "WebOptions",
props: ["options", "frontendName"],
data() {
return {
showEsTestAlert: false,
esTestOk: false,
esTestMessage: "",
}
},
methods: {
update() {
if (!this.options.es_url.startsWith("https")) {
this.options.es_insecure_ssl = false;
}
this.$emit("change", this.options);
},
testEs() {
sist2AdminApi.pingEs(this.options.es_url, this.options.es_insecure_ssl).then((resp) => {
this.showEsTestAlert = true;
this.esTestOk = resp.data.ok;
this.esTestMessage = resp.data.message;
});
}
}
}
</script>
<style scoped>
</style>

View File

@@ -1,40 +0,0 @@
<template>
<svg
xmlns="http://www.w3.org/2000/svg"
width="27.868069mm"
height="7.6446671mm"
viewBox="0 0 27.868069 7.6446671"
>
<g transform="translate(-4.5018313,-4.1849793)">
<g
style="fill: currentColor;fill-opacity:1;stroke:none;stroke-width:0.26458332">
<path
d="m 6.3153296,11.829646 q -0.7717014,0 -1.8134983,-0.337619 v -0.916395 q 1.0128581,0.511252 1.803852,0.511252 0.5643067,0 0.901926,-0.236334 0.3376194,-0.236333 0.3376194,-0.63183 0,-0.3424428 -0.2845649,-0.5498376 Q 6.980922,9.4566645 6.3635609,9.3264399 L 5.9921796,9.2492698 Q 5.2301245,9.0949295 4.8732126,8.7428407 4.5211238,8.3859288 4.5211238,7.7733908 q 0,-0.7765245 0.5305447,-1.1961372 0.5305447,-0.4196126 1.5096409,-0.4196126 0.829579,0 1.6061036,0.3183268 V 7.3441319 Q 7.4101809,6.9004036 6.5854251,6.9004036 q -1.1671984,0 -1.1671984,0.7958171 0,0.2604492 0.1012858,0.4147895 0.1012858,0.1495171 0.3858507,0.2556261 0.2845649,0.1012858 0.8392253,0.2122179 l 0.3569119,0.067524 q 1.3408312,0.2652724 1.3408312,1.4614098 0,0.80064 -0.5691298,1.263661 -0.5691298,0.458197 -1.5578722,0.458197 z"
style="stroke-width:0.26458332"
/>
<path
d="m 11.943927,5.3087694 q -0.144694,0 -0.144694,-0.144694 V 4.3296733 q 0,-0.144694 0.144694,-0.144694 h 0.694531 q 0.144694,0 0.144694,0.144694 v 0.8344021 q 0,0.144694 -0.144694,0.144694 z M 13.5645,11.728361 q -0.795817,0 -1.234722,-0.511253 -0.434082,-0.516075 -0.434082,-1.4469398 V 6.9823969 H 10.714028 V 6.2878656 h 2.069124 v 3.4823026 q 0,0.5884228 0.221864,0.8971028 0.221865,0.308681 0.6463,0.308681 h 1.036974 v 0.752409 z"
style="stroke-width:0.26458332"
/>
<path
d="m 18.209178,11.829646 q -0.771701,0 -1.813498,-0.337619 v -0.916395 q 1.012858,0.511252 1.803852,0.511252 0.564306,0 0.901926,-0.236334 0.337619,-0.236333 0.337619,-0.63183 0,-0.3424428 -0.284565,-0.5498376 Q 18.87477,9.4566645 18.257409,9.3264399 l -0.371381,-0.07717 Q 17.123973,9.0949295 16.767061,8.7428407 16.414972,8.3859288 16.414972,7.7733908 q 0,-0.7765245 0.530545,-1.1961372 0.530545,-0.4196126 1.509641,-0.4196126 0.829579,0 1.606103,0.3183268 v 0.8681641 q -0.757232,-0.4437283 -1.581988,-0.4437283 -1.167198,0 -1.167198,0.7958171 0,0.2604492 0.101286,0.4147895 0.101286,0.1495171 0.385851,0.2556261 0.284565,0.1012858 0.839225,0.2122179 l 0.356912,0.067524 q 1.340831,0.2652724 1.340831,1.4614098 0,0.80064 -0.56913,1.263661 -0.56913,0.458197 -1.557872,0.458197 z"
style="stroke-width:0.26458332"
/>
<path
d="m 25.207545,11.709068 q -0.993565,0 -1.408355,-0.40032 -0.409966,-0.405143 -0.409966,-1.3794164 V 6.9775737 H 21.947107 V 6.2878656 h 1.442117 V 4.8746874 l 0.887457,-0.3858507 v 1.7990289 h 2.016069 v 0.6897081 h -2.016069 v 2.9517579 q 0,0.5932454 0.226687,0.8344024 0.226687,0.236333 0.790994,0.236333 h 0.998388 v 0.709001 z"
style="stroke-width:0.26458332"
/>
<path
d="m 27.995317,11.043476 q 0,-0.178456 0.120578,-0.299035 0.274919,-0.289388 0.651123,-0.684885 0.376205,-0.4003199 0.805464,-0.8681638 0.327973,-0.356912 0.491959,-0.5353679 0.16881,-0.1832791 0.255626,-0.2845649 0.09164,-0.1012858 0.178456,-0.2073948 0.255626,-0.3086805 0.405144,-0.5257215 0.15434,-0.2170411 0.250803,-0.4292589 0.168809,-0.3762045 0.168809,-0.7524089 0,-0.5980686 -0.352089,-0.935688 -0.356911,-0.3424425 -0.979096,-0.3424425 -0.863341,0 -1.938899,0.6414768 V 4.8361023 q 0.491959,-0.2363335 0.979096,-0.3569119 0.47749,-0.1205783 0.945334,-0.1205783 0.501606,0 0.940511,0.1350477 0.438905,0.1350478 0.766878,0.4244358 0.289388,0.2556261 0.463021,0.6270074 0.173633,0.3665582 0.173633,0.829579 0,0.4726671 -0.212218,0.9501574 -0.106109,0.2411567 -0.274919,0.4726671 -0.163986,0.2266873 -0.424435,0.540191 Q 31.270225,8.501684 31.077299,8.718725 30.884374,8.9357661 30.628748,9.2106847 30.445469,9.4084332 30.286305,9.5675966 30.131965,9.72676 29.958332,9.9003928 29.7847,10.069203 29.558012,10.300713 29.336148,10.5274 29.012998,10.869843 h 3.356901 v 0.819932 h -4.374582 z"
style="stroke-width:0.26458332"
/>
</g>
</g>
</svg>
</template>
<script>
export default {
name: "Sist2Icon"
}
</script>

View File

@@ -1,110 +0,0 @@
export default {
en: {
start: "Start",
stop: "Stop",
go: "Go",
online: "online",
offline: "offline",
delete: "Delete",
runNow: "Index now",
create: "Create",
test: "Test",
jobTitle: "job configuration",
tasks: "Tasks",
runningTasks: "Running tasks",
frontends: "Frontends",
jobDisabled: "There is no valid index for this job",
status: "Status",
taskHistory: "Task history",
taskName: "Task name",
taskStarted: "Started",
taskDuration: "Duration",
taskStatus: "Status",
logs: "Logs",
kill: "Kill",
killConfirmation: "SIGTERM signal sent to sist2 process",
killConfirmationTitle: "Confirmation",
follow: "Follow",
wholeFile: "Whole file",
logLevel: "Log level",
logMode: "Follow mode",
logFile: "Reading log file",
jobs: "Jobs",
newJobName: "New job name",
newJobHelp: "Create a new job to get started!",
newFrontendName: "New frontend name",
scanned: "last scan",
autoStart: "Start automatically",
runJobConfirmationTitle: "Task queued",
runJobConfirmation: "Check the Tasks page to monitor the status.",
monitoring: "Monitoring",
enableMonitoring: "Enable monitoring",
extraQueryArgs: "Extra query arguments when launching from sist2-admin",
customUrl: "Custom URL when launching from sist2-admin",
selectJobs: "Select jobs",
webOptions: {
esUrl: "Elasticsearch URL",
esIndex: "Elasticsearch index name",
esInsecure: "Do not verify SSL connections to Elasticsearch.",
lang: "UI Language",
bind: "Listen address",
tagline: "Tagline in navbar",
auth: "Basic auth in user:password format",
tagAuth: "Basic auth in user:password format for tagging",
},
scanOptions: {
title: "Scanning options",
path: "Path",
threads: "Number of threads",
memThrottle: "Total memory threshold in MiB for scan throttling",
thumbnailQuality: "Thumbnail quality, on a scale of 1.0 to 31.0, 1.0 being the best",
thumbnailCount: "Number of thumbnails to generate. Set a value > 1 to create video previews, set to 0 to disable thumbnails.",
thumbnailSize: "Thumbnail size, in pixels",
contentSize: "Number of bytes to be extracted from text documents. Set to 0 to disable",
rewriteUrl: "Serve files from this url instead of from disk",
depth: "Scan up to this many subdirectories deep",
archive: "Archive file mode",
archivePassphrase: "Passphrase for encrypted archive files",
ocrLang: "Tesseract language",
ocrLangAlert: "You must select at least one language",
ocrEbooks: "Enable OCR'ing of ebook files",
ocrImages: "Enable OCR'ing of image files",
exclude: "Files that match this regex will not be scanned",
excludePlaceholder: "Exclude",
fast: "Only index file names & mime type",
checksums: "Calculate file checksums when scanning",
readSubtitles: "Read subtitles from media files",
memBuffer: "Maximum memory buffer size per thread in MiB for files inside archives",
treemapThreshold: "Relative size threshold for treemap"
},
indexOptions: {
title: "Indexing options",
threads: "Number of threads",
esUrl: "Elasticsearch URL",
esIndex: "Elasticsearch index name",
esInsecure: "Do not verify SSL connections to Elasticsearch.",
batchSize: "Index batch size",
script: "User script"
},
jobOptions: {
title: "Job options",
cron: "Job schedule",
scheduleEnabled: "Enable scheduled re-scan",
noJobAvailable: "No jobs available.",
desktopNotifications: "Desktop notifications"
},
frontendOptions: {
title: "Frontend options",
noJobSelectedWarning: "You must select at least one job to start this frontend"
},
notifications: {
indexCompleted: "Task completed for [$JOB$]"
}
}
}

View File

@@ -1,31 +0,0 @@
import Vue from 'vue'
import { BootstrapVue, IconsPlugin } from 'bootstrap-vue'
import "bootstrap/dist/css/bootstrap.min.css"
import "bootstrap-vue/dist/bootstrap-vue.min.css"
Vue.use(BootstrapVue);
Vue.use(IconsPlugin);
import App from './App.vue';
import router from './router';
import store from './store';
import VueI18n from "vue-i18n";
import messages from "@/i18n/messages";
Vue.use(VueI18n);
const i18n = new VueI18n({
locale: "en",
messages: messages
});
Vue.config.productionTip = false
new Vue({
router,
store,
i18n,
render: h => h(App)
}).$mount('#app')

View File

@@ -1,45 +0,0 @@
import Vue from 'vue'
import VueRouter from 'vue-router'
import Home from '../views/Home.vue'
import Job from "@/views/Job";
import Tasks from "@/views/Tasks";
import Frontend from "@/views/Frontend";
import Tail from "@/views/Tail";
Vue.use(VueRouter);
const routes = [
{
path: "/",
name: "Home",
component: Home
},
{
path: "/job/:name",
name: "Job",
component: Job
},
{
path: "/task/",
name: "Tasks",
component: Tasks
},
{
path: "/frontend/:name",
name: "Frontend",
component: Frontend
},
{
path: "/log/:taskId",
name: "Tail",
component: Tail
},
]
const router = new VueRouter({
mode: "hash",
base: process.env.BASE_URL,
routes
})
export default router

View File

@@ -1,63 +0,0 @@
import Vue from "vue";
import Vuex from "vuex";
Vue.use(Vuex);
function saveBrowserSettings(state) {
const settings = {
jobDesktopNotificationMap: state.jobDesktopNotificationMap
};
localStorage.setItem("sist2-admin-settings", JSON.stringify(settings));
console.log("SAVED");
console.log(settings);
}
export default new Vuex.Store({
state: {
sist2AdminInfo: null,
jobDesktopNotificationMap: {}
},
mutations: {
setSist2AdminInfo: (state, payload) => state.sist2AdminInfo = payload,
setJobDesktopNotificationMap: (state, payload) => state.jobDesktopNotificationMap = payload,
},
actions: {
notify: async ({state}, notification) => {
if (!state.jobDesktopNotificationMap[notification.job]) {
console.log("pass");
return;
}
new Notification(notification.messageString.replace("$JOB$", notification.job));
},
setJobDesktopNotification: async ({state}, {job, enabled}) => {
if (enabled === true) {
const permission = await Notification.requestPermission()
if (permission !== "granted") {
return false;
}
}
state.jobDesktopNotificationMap[job] = enabled;
saveBrowserSettings(state);
return true;
},
loadBrowserSettings({commit}) {
const settingString = localStorage.getItem("sist2-admin-settings");
if (!settingString) {
return;
}
const settings = JSON.parse(settingString);
commit("setJobDesktopNotificationMap", settings["jobDesktopNotificationMap"]);
}
},
modules: {}
})

View File

@@ -1,8 +0,0 @@
export function formatBindAddress(address) {
if (address.startsWith("0.0.0.0")) {
return address.slice("0.0.0.0".length)
}
return address
}

View File

@@ -1,133 +0,0 @@
<template>
<b-card>
<b-card-title>
{{ name }}
<small style="vertical-align: top">
<b-badge v-if="!loading && frontend.running" variant="success">{{ $t("online") }}</b-badge>
<b-badge v-else-if="!loading" variant="secondary">{{ $t("offline") }}</b-badge>
</small>
</b-card-title>
<div class="mb-3" v-if="!loading">
<b-button class="mr-1" :disabled="frontend.running || !valid" variant="success" @click="start()">{{
$t("start")
}}
</b-button>
<b-button class="mr-1" :disabled="!frontend.running" variant="danger" @click="stop()">{{
$t("stop")
}}
</b-button>
<b-button class="mr-1" :disabled="!frontend.running" variant="primary" :href="frontendUrl" target="_blank">
{{ $t("go") }}
</b-button>
<b-button variant="danger" @click="deleteFrontend()">{{ $t("delete") }}</b-button>
</div>
<b-progress v-if="loading" striped animated value="100"></b-progress>
<b-card-body v-else>
<h4>{{ $t("frontendOptions.title") }}</h4>
<b-card>
<b-form-checkbox v-model="frontend.auto_start" @change="update()">
{{ $t("autoStart") }}
</b-form-checkbox>
<b-form-checkbox v-model="frontend.enable_monitoring" @change="update()">
{{ $t("enableMonitoring") }}
</b-form-checkbox>
<label>{{ $t("extraQueryArgs") }}</label>
<b-form-input v-model="frontend.extra_query_args" @change="update()"></b-form-input>
<label>{{ $t("customUrl") }}</label>
<b-form-input v-model="frontend.custom_url" @change="update()" placeholder="http://"></b-form-input>
<br/>
<b-alert v-if="!valid" variant="warning" show>{{ $t("frontendOptions.noJobSelectedWarning") }}</b-alert>
<JobCheckboxGroup :frontend="frontend" @input="update()"></JobCheckboxGroup>
</b-card>
<br/>
<h4>{{ $t("jobOptions.title") }}</h4>
<b-card>
<WebOptions :options="frontend.web_options" :frontend-name="$route.params.name" @change="update()"></WebOptions>
</b-card>
</b-card-body>
</b-card>
</template>
<script>
import Sist2AdminApi from "@/Sist2AdminApi";
import JobCheckboxGroup from "@/components/JobCheckboxGroup";
import WebOptions from "@/components/WebOptions";
export default {
name: 'Frontend',
components: {JobCheckboxGroup, WebOptions},
data() {
return {
loading: true,
frontend: null,
}
},
computed: {
valid() {
return !this.loading && this.frontend.jobs.length > 0;
},
frontendUrl() {
if (this.frontend.custom_url) {
return this.frontend.custom_url + this.args;
}
if (this.frontend.web_options.bind.startsWith("0.0.0.0")) {
return window.location.protocol + "//" + window.location.hostname + ":" + this.port + this.args;
}
return window.location.protocol + "//" + this.frontend.web_options.bind + this.args;
},
name() {
return this.$route.params.name;
},
port() {
return this.frontend.web_options.bind.split(":")[1]
},
args() {
const args = this.frontend.extra_query_args;
if (args !== "") {
return "#" + (args.startsWith("?") ? (args) : ("?" + args));
}
return "";
}
},
mounted() {
Sist2AdminApi.getFrontend(this.name).then(resp => {
this.frontend = resp.data;
this.loading = false;
});
},
methods: {
start() {
this.frontend.running = true;
Sist2AdminApi.startFrontend(this.name)
},
stop() {
this.frontend.running = false;
Sist2AdminApi.stopFrontend(this.name)
},
deleteFrontend() {
Sist2AdminApi.deleteFrontend(this.name).then(() => {
this.$router.push("/frontends");
});
},
update() {
Sist2AdminApi.updateFrontend(this.name, this.frontend);
},
}
}
</script>

View File

@@ -1,122 +0,0 @@
<template>
<div>
<b-card>
<b-card-title>{{ $t("jobs") }}</b-card-title>
<b-row>
<b-col>
<b-input id="new-job" v-model="newJobName" :placeholder="$t('newJobName')"></b-input>
<b-popover
:show.sync="showHelp"
target="new-job"
placement="top"
triggers="manual"
variant="primary"
:content="$t('newJobHelp')"
></b-popover>
</b-col>
<b-col>
<b-button variant="primary" @click="createJob()" :disabled="!jobNameValid(newJobName)">{{ $t("create") }}
</b-button>
</b-col>
</b-row>
<hr/>
<b-progress v-if="jobsLoading" striped animated value="100"></b-progress>
<b-list-group v-else>
<JobListItem v-for="job in jobs" :key="job.name" :job="job"></JobListItem>
</b-list-group>
</b-card>
<br/>
<b-card>
<b-card-title>{{ $t("frontends") }}</b-card-title>
<b-row>
<b-col>
<b-input v-model="newFrontendName" :placeholder="$t('newFrontendName')"></b-input>
</b-col>
<b-col>
<b-button variant="primary" @click="createFrontend()" :disabled="!frontendNameValid(newFrontendName)">
{{ $t("create") }}
</b-button>
</b-col>
</b-row>
<hr/>
<b-progress v-if="frontendsLoading" striped animated value="100"></b-progress>
<b-list-group v-else>
<FrontendListItem v-for="frontend in frontends"
:key="frontend.name" :frontend="frontend"></FrontendListItem>
</b-list-group>
</b-card>
</div>
</template>
<script>
import JobListItem from "@/components/JobListItem";
import {formatBindAddress} from "@/util";
import Sist2AdminApi from "@/Sist2AdminApi";
import FrontendListItem from "@/components/FrontendListItem";
export default {
name: "Jobs",
components: {JobListItem, FrontendListItem},
data() {
return {
jobsLoading: true,
newJobName: "",
jobs: [],
frontendsLoading: true,
frontends: [],
formatBindAddress,
newFrontendName: "",
showHelp: false
}
},
mounted() {
this.loading = true;
this.reload();
},
methods: {
jobNameValid(name) {
if (this.jobs.some(job => job.name === name)) {
return false;
}
return /^[a-zA-Z0-9-_,.; ]+$/.test(name);
},
frontendNameValid(name) {
if (this.frontends.some(job => job.name === name)) {
return false;
}
return /^[a-zA-Z0-9-_,.; ]+$/.test(name);
},
reload() {
Sist2AdminApi.getJobs().then(resp => {
this.jobs = resp.data;
this.jobsLoading = false;
this.showHelp = this.jobs.length === 0;
});
Sist2AdminApi.getFrontends().then(resp => {
this.frontends = resp.data;
this.frontendsLoading = false;
});
},
createJob() {
Sist2AdminApi.createJob(this.newJobName).then(this.reload);
},
createFrontend() {
Sist2AdminApi.createFrontend(this.newFrontendName).then(this.reload)
}
}
}
</script>

View File

@@ -1,92 +0,0 @@
<template>
<b-card>
<b-card-title>
[{{ getName() }}]
{{ $t("jobTitle") }}
</b-card-title>
<div class="mb-3">
<b-button class="mr-1" variant="primary" @click="runJob()">{{ $t("runNow") }}</b-button>
<b-button variant="danger" @click="deleteJob()">{{ $t("delete") }}</b-button>
</div>
<div v-if="job">
{{ $t("status") }}: <code>{{ job.status }}</code>
</div>
<b-progress v-if="loading" striped animated value="100"></b-progress>
<b-card-body v-else>
<h4>{{ $t("jobOptions.title") }}</h4>
<b-card>
<JobOptions :job="job" @change="update"></JobOptions>
</b-card>
<br/>
<h4>{{ $t("scanOptions.title") }}</h4>
<b-card>
<ScanOptions :options="job.scan_options" @change="update()"></ScanOptions>
</b-card>
<br/>
<h4>{{ $t("indexOptions.title") }}</h4>
<b-card>
<IndexOptions :options="job.index_options" @change="update()"></IndexOptions>
</b-card>
</b-card-body>
</b-card>
</template>
<script>
import ScanOptions from "@/components/ScanOptions";
import Sist2AdminApi from "@/Sist2AdminApi";
import IndexOptions from "@/components/IndexOptions";
import JobOptions from "@/components/JobOptions";
export default {
name: "Job",
components: {
IndexOptions,
ScanOptions,
JobOptions
},
data() {
return {
loading: true,
job: null
}
},
methods: {
getName() {
return this.$route.params.name;
},
update() {
Sist2AdminApi.updateJob(this.getName(), this.job);
},
runJob() {
Sist2AdminApi.runJob(this.getName()).then(() => {
this.$bvToast.toast(this.$t("runJobConfirmation"), {
title: this.$t("runJobConfirmationTitle"),
variant: "success",
toaster: "b-toaster-bottom-right"
});
});
},
deleteJob() {
Sist2AdminApi.deleteJob(this.getName()).then(() => {
this.$router.push("/");
})
}
},
mounted() {
Sist2AdminApi.getJob(this.getName()).then(resp => {
this.loading = false;
this.job = resp.data;
})
}
}
</script>

View File

@@ -1,168 +0,0 @@
<template>
<b-card>
<b-card-body>
<h4 class="mb-3">{{ taskId }} {{ $t("logs") }}</h4>
<div v-if="$store.state.sist2AdminInfo">
{{ $t("logFile") }}
<code>{{ $store.state.sist2AdminInfo.logs_folder }}/sist2-{{ taskId }}.log</code>
<br/>
<br/>
</div>
<b-row>
<b-col>
<span>{{ $t("logLevel") }}</span>
<b-select :options="levels.slice(0, -1)" v-model="logLevel" @input="connect()"></b-select>
</b-col>
<b-col>
<span>{{ $t("logMode") }}</span>
<b-select :options="modeOptions" v-model="mode" @input="connect()"></b-select>
</b-col>
</b-row>
<div id="log-tail-output" class="mt-3 ml-1"></div>
</b-card-body>
</b-card>
</template>
<script>
export default {
name: "Tail",
data() {
return {
logLevel: "DEBUG",
levels: ["DEBUG", "INFO", "WARNING", "ERROR", "ADMIN", "FATAL"],
socket: null,
mode: "follow",
modeOptions: [
{
"text": this.$t('follow'),
"value": "follow"
},
{
"text": this.$t('wholeFile'),
"value": "wholeFile"
}
]
}
},
computed: {
taskId: function () {
return this.$route.params.taskId;
}
},
methods: {
connect() {
let lineCount = 0;
const outputElem = document.getElementById("log-tail-output")
outputElem.replaceChildren();
if (this.socket !== null) {
this.socket.close();
}
const n = this.mode === "follow" ? 32 : 9999999999;
this.socket = new WebSocket(`ws://${window.location.host}/log/${this.taskId}?n=${n}`);
this.socket.onopen = () => {
this.socket.send("Hello from client");
}
this.socket.onmessage = e => {
let message;
try {
message = JSON.parse(e.data);
} catch {
console.error(e.data)
return;
}
if ("ping" in message) {
return;
}
if (message.level === undefined) {
if ("stderr" in message) {
message.level = "ERROR";
message.message = message["stderr"];
} else {
message.level = "ADMIN";
message.message = message["sist2-admin"];
}
message.datetime = ""
message.filepath = ""
}
if (this.levels.indexOf(message.level) < this.levels.indexOf(this.logLevel)) {
return;
}
const logLine = `${message.datetime} [${message.level} ${message.filepath}] ${message.message}`;
const span = document.createElement("span");
span.setAttribute("class", message.level);
span.appendChild(document.createTextNode(logLine));
outputElem.appendChild(span);
lineCount += 1;
if (this.mode === "follow" && lineCount >= n) {
outputElem.firstChild.remove();
}
}
}
},
mounted() {
this.connect()
}
}
</script>
<style>
#log-tail-output span {
display: block;
}
span.DEBUG {
color: #9E9E9E;
}
span.WARNING {
color: #FFB300;
}
span.INFO {
color: #039BE5;
}
span.ERROR {
color: #F4511E;
}
span.FATAL {
color: #F4511E;
}
span.ADMIN {
color: #ee05ff;
}
#log-tail-output {
font-size: 13px;
font-family: monospace;
padding: 6px;
background-color: #f5f5f5;
border: 1px solid #ccc;
border-radius: 4px;
margin: 3px;
white-space: pre;
color: #000;
overflow: hidden;
}
</style>

View File

@@ -1,124 +0,0 @@
<template>
<div>
<b-card v-if="tasks.length > 0">
<h2>{{ $t("runningTasks") }}</h2>
<b-list-group>
<TaskListItem v-for="task in tasks" :key="task.id" :task="task"></TaskListItem>
</b-list-group>
</b-card>
<b-card class="mt-4">
<b-card-title>{{ $t("taskHistory") }}</b-card-title>
<br/>
<b-table
id="task-history"
:items="historyItems"
:fields="historyFields"
:current-page="historyCurrentPage"
:tbody-tr-class="rowClass"
:per-page="10"
>
<template #cell(logs)="data">
<router-link :to="`/log/${data.item.logs}`">{{ $t("logs") }}</router-link>
</template>
</b-table>
<b-pagination limit="20" v-model="historyCurrentPage" :total-rows="historyItems.length"
:per-page="10"></b-pagination>
</b-card>
</div>
</template>
<script>
import TaskListItem from "@/components/TaskListItem";
import Sist2AdminApi from "@/Sist2AdminApi";
import moment from "moment";
export default {
name: 'Tasks',
components: {TaskListItem},
data() {
return {
loading: true,
tasks: [],
taskHistory: [],
timerId: null,
historyFields: [
{key: "name", label: this.$t("taskName")},
{key: "time", label: this.$t("taskStarted")},
{key: "duration", label: this.$t("taskDuration")},
{key: "status", label: this.$t("taskStatus")},
{key: "logs", label: this.$t("logs")},
],
historyCurrentPage: 1,
historyItems: []
}
},
props: {
msg: String
},
mounted() {
this.loading = true;
this.update().then(() => this.loading = false);
this.timerId = window.setInterval(this.update, 1000);
this.updateHistory();
},
destroyed() {
if (this.timerId) {
window.clearInterval(this.timerId);
}
},
methods: {
rowClass(row) {
if (row.status === "failed") {
return "table-danger";
}
return null;
},
updateHistory() {
Sist2AdminApi.getTaskHistory().then(resp => {
this.historyItems = resp.data.map(row => ({
id: row.id,
name: row.name,
duration: this.taskDuration(row),
time: moment(row.started).format("dd, MMM Do YYYY, HH:mm:ss"),
logs: row.id,
status: row.return_code === 0 ? "ok" : "failed"
}));
});
},
update() {
return Sist2AdminApi.getTasks().then(resp => {
this.tasks = resp.data;
})
},
taskDuration(task) {
const start = moment(task.started);
const end = moment(task.ended);
let duration = moment.utc(end.diff(start)).format("HH[h] mm[m] ss[s]");
duration = duration.replace("00h ", "");
duration = duration.replace(/^00m /, "");
duration = duration.replace(/00s/, "<1s");
duration = duration.replace(/^0/, "");
return duration;
}
}
}
</script>
<style scoped>
#task-history {
font-family: monospace;
font-size: 12px;
}
</style>

View File

@@ -1,3 +0,0 @@
module.exports = {
publicPath: ""
};

File diff suppressed because it is too large Load Diff

View File

@@ -1,5 +0,0 @@
fastapi
git+https://github.com/simon987/hexlib.git
uvicorn
websockets
pycron

View File

@@ -1,392 +0,0 @@
import asyncio
import os
import signal
from datetime import datetime
from urllib.parse import urlparse
import requests
import uvicorn
from fastapi import FastAPI, HTTPException
from hexlib.db import PersistentState
from requests import ConnectionError
from requests.exceptions import SSLError
from starlette.middleware.cors import CORSMiddleware
from starlette.responses import RedirectResponse
from starlette.staticfiles import StaticFiles
from starlette.websockets import WebSocket
from websockets.exceptions import ConnectionClosed
import cron
from config import LOG_FOLDER, logger, WEBSERVER_PORT, DATA_FOLDER, SIST2_BINARY
from jobs import Sist2Job, Sist2ScanTask, TaskQueue, Sist2IndexTask, JobStatus
from notifications import Subscribe, Notifications
from sist2 import Sist2
from state import PickleTable, RUNNING_FRONTENDS, TESSERACT_LANGS, DB_SCHEMA_VERSION
from web import Sist2Frontend
VERSION = "1.0"
sist2 = Sist2(SIST2_BINARY, DATA_FOLDER)
db = PersistentState(table_factory=PickleTable, dbfile=os.path.join(DATA_FOLDER, "state.db"))
notifications = Notifications()
task_queue = TaskQueue(sist2, db, notifications)
app = FastAPI()
app.add_middleware(
CORSMiddleware,
allow_credentials=True,
allow_origins=["*"],
allow_methods=["*"],
allow_headers=["*"],
)
app.mount("/ui/", StaticFiles(directory="./frontend/dist", html=True), name="static")
@app.get("/")
async def home():
return RedirectResponse("ui")
@app.get("/api")
async def api():
return {
"version": VERSION,
"tesseract_langs": TESSERACT_LANGS,
"logs_folder": LOG_FOLDER
}
@app.get("/api/job/{name:str}")
async def get_job(name: str):
row = db["jobs"][name]
if row:
return row["job"]
raise HTTPException(status_code=404)
@app.get("/api/frontend/{name:str}")
async def get_frontend(name: str):
row = db["frontends"][name]
if row:
frontend = row["frontend"]
frontend: Sist2Frontend
frontend.running = frontend.name in RUNNING_FRONTENDS
return frontend
raise HTTPException(status_code=404)
@app.get("/api/job/")
async def get_jobs():
return [row["job"] for row in db["jobs"]]
@app.put("/api/job/{name:str}")
async def update_job(name: str, job: Sist2Job):
# TODO: Check etag
job.last_modified = datetime.now()
row = db["jobs"][name]
if not row:
raise HTTPException(status_code=404)
args_that_trigger_full_scan = [
"path",
"thumbnail_count",
"thumbnail_quality",
"thumbnail_size",
"content_size",
"depth",
"archive",
"archive_passphrase",
"ocr_lang",
"ocr_images",
"ocr_ebooks",
"fast",
"checksums",
"read_subtitles",
]
for arg in args_that_trigger_full_scan:
if getattr(row["job"].scan_options, arg) != getattr(job.scan_options, arg):
job.do_full_scan = True
db["jobs"][name] = {"job": job}
@app.put("/api/frontend/{name:str}")
async def update_frontend(name: str, frontend: Sist2Frontend):
db["frontends"][name] = {"frontend": frontend}
# TODO: Check etag
return "ok"
@app.get("/api/task/")
async def get_tasks():
return list(map(lambda t: t.json(), task_queue.tasks()))
@app.get("/api/task/history")
async def task_history():
return list(db["task_done"].sql("ORDER BY started DESC"))
@app.post("/api/task/{task_id:str}/kill")
async def kill_job(task_id: str):
return task_queue.kill_task(task_id)
def _run_job(job: Sist2Job):
job.last_modified = datetime.now()
if job.status == JobStatus("created"):
job.status = JobStatus("started")
db["jobs"][job.name] = {"job": job}
scan_task = Sist2ScanTask(job, f"Scan [{job.name}]")
index_task = Sist2IndexTask(job, f"Index [{job.name}]", depends_on=scan_task)
task_queue.submit(scan_task)
task_queue.submit(index_task)
@app.get("/api/job/{name:str}/run")
async def run_job(name: str):
row = db["jobs"][name]
if not row:
raise HTTPException(status_code=404)
_run_job(row["job"])
return "ok"
@app.delete("/api/job/{name:str}")
async def delete_job(name: str):
row = db["jobs"][name]
if row:
del db["jobs"][name]
else:
raise HTTPException(status_code=404)
@app.delete("/api/frontend/{name:str}")
async def delete_frontend(name: str):
if name in RUNNING_FRONTENDS:
os.kill(RUNNING_FRONTENDS[name], signal.SIGTERM)
del RUNNING_FRONTENDS[name]
row = db["frontends"][name]
if row:
del db["frontends"][name]
else:
raise HTTPException(status_code=404)
@app.post("/api/job/{name:str}")
async def create_job(name: str):
if db["jobs"][name]:
raise ValueError("Job with the same name already exists")
job = Sist2Job.create_default(name)
db["jobs"][name] = {"job": job}
return job
@app.post("/api/frontend/{name:str}")
async def create_frontend(name: str):
if db["frontend"][name]:
raise ValueError("Frontend with the same name already exists")
frontend = Sist2Frontend.create_default(name)
db["frontends"][name] = {"frontend": frontend}
return frontend
@app.get("/api/ping_es")
async def ping_es(url: str, insecure: bool):
return check_es_version(url, insecure)
def check_es_version(es_url: str, insecure: bool):
try:
url = urlparse(es_url)
if url.username:
auth = (url.username, url.password)
es_url = f"{url.scheme}://{url.hostname}:{url.port}"
else:
auth = None
r = requests.get(es_url, verify=insecure, auth=auth)
except SSLError:
return {
"ok": False,
"message": "Invalid SSL certificate"
}
except ConnectionError as e:
return {
"ok": False,
"message": "Connection refused"
}
except ValueError as e:
return {
"ok": False,
"message": str(e)
}
if r.status_code == 401:
return {
"ok": False,
"message": "Authentication failure"
}
try:
return {
"ok": True,
"message": "Elasticsearch version " + r.json()["version"]["number"]
}
except:
return {
"ok": False,
"message": "Could not read version"
}
def start_frontend_(frontend: Sist2Frontend):
frontend.web_options.indices = list(map(lambda j: db["jobs"][j]["job"].last_index, frontend.jobs))
pid = sist2.web(frontend.web_options, frontend.name)
RUNNING_FRONTENDS[frontend.name] = pid
@app.post("/api/frontend/{name:str}/start")
async def start_frontend(name: str):
row = db["frontends"][name]
if not row:
raise HTTPException(status_code=404)
start_frontend_(row["frontend"])
@app.post("/api/frontend/{name:str}/stop")
async def stop_frontend(name: str):
if name in RUNNING_FRONTENDS:
os.kill(RUNNING_FRONTENDS[name], signal.SIGTERM)
del RUNNING_FRONTENDS[name]
@app.get("/api/frontend/")
async def get_frontends():
res = []
for row in db["frontends"]:
frontend = row["frontend"]
frontend: Sist2Frontend
frontend.running = frontend.name in RUNNING_FRONTENDS
res.append(frontend)
return res
def tail(filepath: str, n: int):
with open(filepath) as file:
reached_eof = False
buffer = []
line = ""
while True:
tmp = file.readline()
if tmp:
line += tmp
if line.endswith("\n"):
if reached_eof:
yield line
else:
if len(buffer) > n:
buffer.pop(0)
buffer.append(line)
line = ""
else:
if not reached_eof:
reached_eof = True
yield from buffer
yield None
@app.websocket("/notifications")
async def ws_tail_log(websocket: WebSocket):
await websocket.accept()
try:
await websocket.receive_text()
async with Subscribe(notifications) as ob:
async for notification in ob.notifications():
await websocket.send_json(notification)
print(notification)
except ConnectionClosed:
return
@app.websocket("/log/{task_id}")
async def ws_tail_log(websocket: WebSocket, task_id: str, n: int):
log_file = os.path.join(LOG_FOLDER, f"sist2-{task_id}.log")
await websocket.accept()
try:
await websocket.receive_text()
except ConnectionClosed:
return
while True:
for line in tail(log_file, n):
try:
if line:
await websocket.send_text(line)
else:
await websocket.send_json({"ping": ""})
await asyncio.sleep(0.1)
except ConnectionClosed:
return
def main():
uvicorn.run(app, port=WEBSERVER_PORT, host="0.0.0.0")
def initialize_db():
db["sist2_admin"]["info"] = {"version": DB_SCHEMA_VERSION}
frontend = Sist2Frontend.create_default("default")
db["frontends"]["default"] = {"frontend": frontend}
logger.info("Initialized database.")
def start_frontends():
for row in db["frontends"]:
frontend: Sist2Frontend = row["frontend"]
if frontend.auto_start and len(frontend.jobs) > 0:
start_frontend_(frontend)
if __name__ == '__main__':
if not db["sist2_admin"]["info"]:
initialize_db()
elif db["sist2_admin"]["info"]["version"] != DB_SCHEMA_VERSION:
print("Database has incompatible schema version! Delete state.db to continue.")
exit(-1)
start_frontends()
cron.initialize(db, _run_job)
logger.info("Started sist2-admin. Hello!")
main()

View File

@@ -1,30 +0,0 @@
import os
import logging
import sys
from logging import StreamHandler
from logging.handlers import RotatingFileHandler
MAX_LOG_SIZE = 1 * 1024 * 1024
SIST2_BINARY = os.environ.get("SIST2_BINARY", "/root/sist2")
DATA_FOLDER = os.environ.get("DATA_FOLDER", "/sist2-admin/")
LOG_FOLDER = os.path.join(DATA_FOLDER, "logs")
WEBSERVER_PORT = 8080
os.makedirs(LOG_FOLDER, exist_ok=True)
os.makedirs(DATA_FOLDER, exist_ok=True)
logger = logging.Logger("sist2-admin")
_log_file = os.path.join(LOG_FOLDER, "sist2-admin.log")
_log_fmt = "%(asctime)s [%(levelname)s] %(message)s"
_log_formatter = logging.Formatter(_log_fmt, datefmt='%Y-%m-%d %H:%M:%S')
console_handler = StreamHandler(sys.stdout)
console_handler.setFormatter(_log_formatter)
file_handler = RotatingFileHandler(_log_file, mode="a", maxBytes=MAX_LOG_SIZE, backupCount=1)
file_handler.setFormatter(_log_formatter)
logger.addHandler(console_handler)
logger.addHandler(file_handler)

View File

@@ -1,33 +0,0 @@
from threading import Thread
import pycron
import time
from hexlib.db import PersistentState
from config import logger
from jobs import Sist2Job
def _check_schedule(db: PersistentState, run_job):
for job in (row["job"] for row in db["jobs"]):
job: Sist2Job
if job.schedule_enabled:
if pycron.is_now(job.cron_expression):
logger.info(f"Submit scan task to queue for [{job.name}]")
run_job(job)
def _cron_thread(db, run_job):
time.sleep(60 - (time.time() % 60))
start = time.time()
while True:
_check_schedule(db, run_job)
time.sleep(60 - ((time.time() - start) % 60))
def initialize(db, run_job):
t = Thread(target=_cron_thread, args=(db, run_job), daemon=True, name="timer")
t.start()

View File

@@ -1,315 +0,0 @@
import json
import logging
import os.path
import shutil
import signal
import uuid
from datetime import datetime
from enum import Enum
from hashlib import md5
from logging import FileHandler
from threading import Lock, Thread
from time import sleep
from uuid import uuid4, UUID
from hexlib.db import PersistentState
from pydantic import BaseModel, validator
from config import logger, LOG_FOLDER
from notifications import Notifications
from sist2 import ScanOptions, IndexOptions, Sist2, Sist2Index
from state import RUNNING_FRONTENDS
from web import Sist2Frontend
class JobStatus(Enum):
CREATED = "created"
STARTED = "started"
INDEXED = "indexed"
FAILED = "failed"
class Sist2Job(BaseModel):
name: str
scan_options: ScanOptions
index_options: IndexOptions
cron_expression: str
schedule_enabled: bool = False
previous_index: str = None
last_index: str = None
last_index_date: datetime = None
status: JobStatus = JobStatus("created")
last_modified: datetime
etag: str = None
do_full_scan: bool = False
def __init__(self, **kwargs):
super().__init__(**kwargs)
@staticmethod
def create_default(name: str):
return Sist2Job(
name=name,
scan_options=ScanOptions(path="/"),
index_options=IndexOptions(),
last_modified=datetime.now(),
cron_expression="0 0 * * *"
)
@validator("etag", always=True)
def validate_etag(cls, value, values):
s = values["name"] + values["scan_options"].json() + values["index_options"].json() + values["cron_expression"]
return md5(s.encode()).hexdigest()
class Sist2TaskProgress:
def __init__(self, done: int = 0, count: int = 0, index_size: int = 0, tn_size: int = 0, waiting: bool = False):
self.done = done
self.count = count
self.index_size = index_size
self.store_size = tn_size
self.waiting = waiting
def percent(self):
return (self.done / self.count) if self.count else 0
class Sist2Task:
def __init__(self, job: Sist2Job, display_name: str, depends_on: uuid.UUID = None):
self.job = job
self.display_name = display_name
self.progress = Sist2TaskProgress()
self.id = uuid4()
self.pid = None
self.started = None
self.ended = None
self.depends_on = depends_on
self._logger = logging.Logger(name=f"{self.id}")
self._logger.addHandler(FileHandler(os.path.join(LOG_FOLDER, f"sist2-{self.id}.log")))
def json(self):
return {
"id": self.id,
"job": self.job,
"display_name": self.display_name,
"progress": self.progress,
"started": self.started,
"ended": self.ended,
"depends_on": self.depends_on,
}
def log_callback(self, log_json):
if "progress" in log_json:
self.progress = Sist2TaskProgress(**log_json["progress"])
elif self._logger:
self._logger.info(json.dumps(log_json))
def run(self, sist2: Sist2, db: PersistentState):
self.started = datetime.now()
logger.info(f"Started task {self.display_name}")
class Sist2ScanTask(Sist2Task):
def run(self, sist2: Sist2, db: PersistentState):
super().run(sist2, db)
self.job.scan_options.name = self.job.name
if self.job.last_index and os.path.exists(self.job.last_index) and not self.job.do_full_scan:
self.job.scan_options.incremental = self.job.last_index
else:
self.job.scan_options.incremental = None
def set_pid(pid):
self.pid = pid
return_code = sist2.scan(self.job.scan_options, logs_cb=self.log_callback, set_pid_cb=set_pid)
self.ended = datetime.now()
if return_code != 0:
self._logger.error(json.dumps({"sist2-admin": f"Process returned non-zero exit code ({return_code})"}))
logger.info(f"Task {self.display_name} failed ({return_code})")
else:
index = Sist2Index(self.job.scan_options.output)
# Save latest index
self.job.previous_index = self.job.last_index
self.job.last_index = index.path
self.job.last_index_date = datetime.now()
self.job.do_full_scan = False
db["jobs"][self.job.name] = {"job": self.job}
self._logger.info(json.dumps({"sist2-admin": f"Save last_index={self.job.last_index}"}))
logger.info(f"Completed {self.display_name} ({return_code=})")
return return_code
class Sist2IndexTask(Sist2Task):
def __init__(self, job: Sist2Job, display_name: str, depends_on: Sist2Task):
super().__init__(job, display_name, depends_on=depends_on.id)
def run(self, sist2: Sist2, db: PersistentState):
super().run(sist2, db)
self.job.index_options.path = self.job.scan_options.output
return_code = sist2.index(self.job.index_options, logs_cb=self.log_callback)
self.ended = datetime.now()
duration = self.ended - self.started
ok = return_code == 0
if ok:
# Remove old index
if self.job.previous_index is not None:
self._logger.info(json.dumps({"sist2-admin": f"Remove {self.job.previous_index=}"}))
try:
shutil.rmtree(self.job.previous_index)
except FileNotFoundError:
pass
self.restart_running_frontends(db, sist2)
# Update status
self.job.status = JobStatus("indexed") if ok else JobStatus("failed")
db["jobs"][self.job.name] = {"job": self.job}
self._logger.info(json.dumps({"sist2-admin": f"Sist2Scan task finished {return_code=}, {duration=}"}))
logger.info(f"Completed {self.display_name} ({return_code=})")
return return_code
def restart_running_frontends(self, db: PersistentState, sist2: Sist2):
for frontend_name, pid in RUNNING_FRONTENDS.items():
frontend = db["frontends"][frontend_name]["frontend"]
frontend: Sist2Frontend
os.kill(pid, signal.SIGTERM)
try:
os.wait()
except ChildProcessError:
pass
frontend.web_options.indices = map(lambda j: db["jobs"][j]["job"].last_index, frontend.jobs)
pid = sist2.web(frontend.web_options, frontend.name)
RUNNING_FRONTENDS[frontend_name] = pid
self._logger.info(json.dumps({"sist2-admin": f"Restart frontend {pid=} {frontend_name=}"}))
class TaskQueue:
def __init__(self, sist2: Sist2, db: PersistentState, notifications: Notifications):
self._lock = Lock()
self._sist2 = sist2
self._db = db
self._notifications = notifications
self._tasks = {}
self._queue = []
self._sem = 0
self._thread = Thread(target=self._check_new_task, daemon=True)
self._thread.start()
def _tasks_failed(self):
done = set()
for row in self._db["task_done"].sql("WHERE return_code != 0"):
done.add(uuid.UUID(row["id"]))
return done
def _tasks_done(self):
done = set()
for row in self._db["task_done"]:
done.add(uuid.UUID(row["id"]))
return done
def _check_new_task(self):
while True:
with self._lock:
for task in list(self._queue):
task: Sist2Task
if self._sem >= 1:
break
if not task.depends_on or task.depends_on in self._tasks_done():
self._queue.remove(task)
if task.depends_on in self._tasks_failed():
# The task which we depend on failed, continue
continue
self._sem += 1
t = Thread(target=self._run_task, args=(task,))
self._tasks[task.id] = {
"task": task,
"thread": t,
}
t.start()
break
sleep(1)
def tasks(self):
return list(map(lambda t: t["task"], self._tasks.values()))
def kill_task(self, task_id):
task = self._tasks.get(UUID(task_id))
if task:
pid = task["task"].pid
logger.info(f"Killing task {task_id} (pid={pid})")
os.kill(pid, signal.SIGTERM)
return True
return False
def _run_task(self, task: Sist2Task):
task_result = task.run(self._sist2, self._db)
with self._lock:
del self._tasks[task.id]
self._sem -= 1
self._db["task_done"][task.id] = {
"ended": task.ended,
"started": task.started,
"name": task.display_name,
"return_code": task_result
}
if isinstance(task, Sist2IndexTask):
self._notifications.notify({
"message": "notifications.indexCompleted",
"job": task.job.name
})
def submit(self, task: Sist2Task):
logger.info(f"Submitted task to queue {task.display_name}")
with self._lock:
self._queue.append(task)

View File

@@ -1,40 +0,0 @@
import asyncio
from typing import List
class Notifications:
def __init__(self):
self._subscribers: List[Subscribe] = []
def subscribe(self, ob):
self._subscribers.append(ob)
def unsubscribe(self, ob):
self._subscribers.remove(ob)
def notify(self, notification: dict):
for ob in self._subscribers:
ob.notify(notification)
class Subscribe:
def __init__(self, notifications: Notifications):
self._queue = []
self._notifications = notifications
async def __aenter__(self):
self._notifications.subscribe(self)
return self
async def __aexit__(self, exc_type, exc_val, exc_tb):
self._notifications.unsubscribe(self)
def notify(self, notification: dict):
self._queue.append(notification)
async def notifications(self):
while True:
try:
yield self._queue.pop(0)
except IndexError:
await asyncio.sleep(0.1)

View File

@@ -1,307 +0,0 @@
import datetime
import json
import logging
import os.path
import traceback
from datetime import datetime
from io import TextIOWrapper
from logging import FileHandler
from subprocess import Popen, PIPE
from tempfile import NamedTemporaryFile
from threading import Thread
from typing import List
from pydantic import BaseModel
from config import logger, LOG_FOLDER
class Sist2Version:
def __init__(self, version: str):
self._version = version
self.major, self.minor, self.patch = [int(x) for x in version.split(".")]
def __str__(self):
return f"{self.major}.{self.minor}.{self.patch}"
class WebOptions(BaseModel):
indices: List[str] = []
es_url: str = "http://elasticsearch:9200"
es_insecure_ssl: bool = False
es_index: str = "sist2"
bind: str = "0.0.0.0:4090"
auth: str = None
tag_auth: str = None
tagline: str = "Lightning-fast file system indexer and search tool"
dev: bool = False
lang: str = "en"
def __init__(self, **kwargs):
super().__init__(**kwargs)
def args(self):
args = ["web", f"--es-url={self.es_url}", f"--bind={self.bind}",
f"--tagline={self.tagline}", f"--lang={self.lang}"]
if self.es_insecure_ssl:
args.append(f"--es-insecure-ssl")
if self.auth:
args.append(f"--auth={self.auth}")
if self.tag_auth:
args.append(f"--tag_auth={self.tag_auth}")
if self.dev:
args.append(f"--dev")
args.extend(self.indices)
return args
class IndexOptions(BaseModel):
path: str = None
threads: int = 1
es_url: str = "http://elasticsearch:9200"
es_insecure_ssl: bool = False
es_index: str = "sist2"
incremental_index: bool = False
script: str = ""
script_file: str = None
batch_size: int = 100
def __init__(self, **kwargs):
super().__init__(**kwargs)
def args(self):
args = ["index", self.path, f"--threads={self.threads}", f"--es-url={self.es_url}",
f"--es-index={self.es_index}", f"--batch-size={self.batch_size}"]
if self.script_file:
args.append(f"--script-file={self.script_file}")
if self.es_insecure_ssl:
args.append(f"--es-insecure-ssl")
if self.incremental_index:
args.append(f"--incremental-index")
return args
ARCHIVE_SKIP = "skip"
ARCHIVE_LIST = "list"
ARCHIVE_SHALLOW = "shallow"
ARCHIVE_RECURSE = "recurse"
class ScanOptions(BaseModel):
path: str
threads: int = 1
mem_throttle: int = 0
thumbnail_quality: float = 1.0
thumbnail_size: int = 500
thumbnail_count: int = 1
content_size: int = 32768
depth: int = -1
archive: str = ARCHIVE_RECURSE
archive_passphrase: str = None
ocr_lang: bool = None
ocr_images: bool = False
ocr_ebooks: bool = False
exclude: str = None
fast: bool = False
treemap_threshold: float = 0.0005
mem_buffer: int = 2000
read_subtitles: bool = False
fast_epub: bool = False
checksums: bool = False
incremental: str = None
output: str = None
name: str = None
rewrite_url: str = None
list_file: str = None
def __init__(self, **kwargs):
super().__init__(**kwargs)
def args(self):
args = ["scan", self.path, f"--threads={self.threads}", f"--mem-throttle={self.mem_throttle}",
f"--thumbnail-quality={self.thumbnail_quality}", f"--thumbnail-count={self.thumbnail_count}",
f"--content-size={self.content_size}", f"--output={self.output}", f"--depth={self.depth}",
f"--archive={self.archive}", f"--mem-buffer={self.mem_buffer}"]
if self.incremental:
args.append(f"--incremental={self.incremental}")
if self.rewrite_url:
args.append(f"--rewrite-url={self.rewrite_url}")
if self.name:
args.append(f"--name={self.name}")
if self.archive_passphrase:
args.append(f"--archive-passphrase={self.archive_passphrase}")
if self.ocr_lang:
args.append(f"--ocr-lang={self.ocr_lang}")
if self.ocr_ebooks:
args.append(f"--ocr-ebooks")
if self.ocr_images:
args.append(f"--ocr-images")
if self.exclude:
args.append(f"--exclude={self.exclude}")
if self.fast:
args.append(f"--fast")
if self.treemap_threshold:
args.append(f"--treemap-threshold={self.treemap_threshold}")
if self.read_subtitles:
args.append(f"--read-subtitles")
if self.fast_epub:
args.append(f"--fast-epub")
if self.checksums:
args.append(f"--checksums")
if self.list_file:
args.append(f"--list_file={self.list_file}")
return args
class Sist2Index:
def __init__(self, path):
self.path = path
with open(os.path.join(path, "descriptor.json")) as f:
self._descriptor = json.load(f)
def to_json(self):
return {
"path": self.path,
"version": self.version(),
"timestamp": self.timestamp(),
"name": self.name()
}
def version(self) -> Sist2Version:
return Sist2Version(self._descriptor["version"])
def timestamp(self) -> datetime:
return datetime.fromtimestamp(self._descriptor["timestamp"])
def name(self) -> str:
return self._descriptor["name"]
class Sist2:
def __init__(self, bin_path: str, data_directory: str):
self._bin_path = bin_path
self._data_dir = data_directory
def index(self, options: IndexOptions, logs_cb):
if options.script:
with NamedTemporaryFile("w", prefix="sist2-admin", suffix=".painless", delete=False) as f:
f.write(options.script)
options.script_file = f.name
else:
options.script_file = None
args = [
self._bin_path,
*options.args(),
"--json-logs",
"--very-verbose"
]
proc = Popen(args, stdout=PIPE, stderr=PIPE)
t_stderr = Thread(target=self._consume_logs_stderr, args=(logs_cb, proc))
t_stderr.start()
self._consume_logs_stdout(logs_cb, proc)
t_stderr.join()
return proc.returncode
def scan(self, options: ScanOptions, logs_cb, set_pid_cb):
output_dir = os.path.join(
self._data_dir,
f"scan-{datetime.now()}.sist2"
)
options.output = output_dir
args = [
self._bin_path,
*options.args(),
"--json-logs",
"--very-verbose"
]
logs_cb({"sist2-admin": f"Starting sist2 command with args {args}"})
proc = Popen(args, stdout=PIPE, stderr=PIPE)
set_pid_cb(proc.pid)
t_stderr = Thread(target=self._consume_logs_stderr, args=(logs_cb, proc))
t_stderr.start()
self._consume_logs_stdout(logs_cb, proc)
t_stderr.join()
return proc.returncode
@staticmethod
def _consume_logs_stderr(logs_cb, proc):
pipe_wrapper = TextIOWrapper(proc.stderr, encoding="utf8")
try:
for line in pipe_wrapper:
if line.strip() == "":
continue
logs_cb({"stderr": line})
finally:
proc.wait()
pipe_wrapper.close()
@staticmethod
def _consume_logs_stdout(logs_cb, proc):
pipe_wrapper = TextIOWrapper(proc.stdout, encoding="utf8")
try:
for line in pipe_wrapper:
if line.strip() == "":
continue
log_object = json.loads(line)
logs_cb(log_object)
except Exception as e:
proc.kill()
try:
print(line)
except NameError:
pass
print(traceback.format_exc())
finally:
pass
# proc.wait()
# pipe_wrapper.close()
def web(self, options: WebOptions, name: str):
args = [
self._bin_path,
*options.args()
]
web_logger = logging.Logger(name=f"sist2-frontend-{name}")
web_logger.addHandler(FileHandler(os.path.join(LOG_FOLDER, f"frontend-{name}.log")))
def logs_cb(message):
web_logger.info(json.dumps(message))
logger.info(f"Starting frontend {' '.join(args)}")
proc = Popen(args, stdout=PIPE, stderr=PIPE)
t_stderr = Thread(target=self._consume_logs_stderr, args=(logs_cb, proc))
t_stderr.start()
t_stdout = Thread(target=self._consume_logs_stdout, args=(logs_cb, proc))
t_stdout.start()
return proc.pid

View File

@@ -1,50 +0,0 @@
from typing import Dict
from hexlib.db import Table
import pickle
from tesseract import get_tesseract_langs
RUNNING_FRONTENDS: Dict[str, int] = {}
TESSERACT_LANGS = get_tesseract_langs()
DB_SCHEMA_VERSION = "1"
from pydantic import BaseModel
def _serialize(item):
if isinstance(item, BaseModel):
return pickle.dumps(item)
if isinstance(item, bytes):
raise Exception("FIXME: bytes in PickleTable")
return item
def _deserialize(item):
if isinstance(item, bytes):
return pickle.loads(item)
return item
class PickleTable(Table):
def __getitem__(self, item):
row = super().__getitem__(item)
if row:
return dict((k, _deserialize(v)) for k, v in row.items())
return row
def __setitem__(self, key, value):
value = dict((k, _serialize(v)) for k, v in value.items())
super().__setitem__(key, value)
def __iter__(self):
for row in super().__iter__():
yield dict((k, _deserialize(v)) for k, v in row.items())
def sql(self, where_clause, *params):
for row in super().sql(where_clause, *params):
yield dict((k, _deserialize(v)) for k, v in row.items())

View File

@@ -1,14 +0,0 @@
import subprocess
def get_tesseract_langs():
res = subprocess.check_output([
"tesseract",
"--list-langs"
]).decode()
languages = res.split("\n")[1:]
return list(filter(lambda lang: lang and lang != "osd", languages))

Some files were not shown because too many files have changed in this diff Show More