mirror of
https://github.com/simon987/sist2.git
synced 2025-12-12 15:08:53 +00:00
Compare commits
64 Commits
| Author | SHA1 | Date | |
|---|---|---|---|
| d32bda0d68 | |||
| 499ed0be79 | |||
| dc39c0ec4b | |||
| b5cdd9a5df | |||
| a8b6886f7b | |||
| a7e9b6af96 | |||
| 0710dc6d3d | |||
| 75b66b5982 | |||
| 9813646c11 | |||
| ebc9468251 | |||
| 7baaca5078 | |||
| 6c4bdc87cf | |||
| 1ea78887c3 | |||
| 886fa720ec | |||
| d43aac735f | |||
| faf438a798 | |||
| 5b3b9911bd | |||
| 237d55ec9c | |||
|
|
ced4c7de88 | ||
| 90ee318981 | |||
| 785121e46c | |||
| 585c57a2ad | |||
| 42abbbce95 | |||
|
|
e8607df26f | ||
| f1726ca0a9 | |||
| 3ef675abcf | |||
| 01490d1cbf | |||
| 6182338f29 | |||
| 300c70883d | |||
| fc36f33d52 | |||
|
|
81658efb19 | ||
| ca973d63a4 | |||
| f8abffba81 | |||
| 60c77678b4 | |||
|
|
bf1d2f7d55 | ||
| 8c662bb8f8 | |||
| 9c40dddd41 | |||
| d259b95017 | |||
| 707bac86b3 | |||
| 8b9b067c06 | |||
| b17f3ff924 | |||
| e44fbf741c | |||
| fa14efbeb6 | |||
| c510162dd9 | |||
| f5c664507f | |||
| 2805fd509f | |||
| 20adcce4a9 | |||
| 1e6e24111b | |||
|
|
5a76b855c9 | ||
| 6f759642fc | |||
| 587c9a2c90 | |||
| 821a571ecf | |||
|
|
9020246a01 | ||
|
|
200c000c5a | ||
|
|
a43f930d00 | ||
| abe120197a | |||
| 9e0d7bf992 | |||
|
|
959d4b4386 | ||
|
|
742a50be03 | ||
| 87ecc5ef6d | |||
| 2e3d648796 | |||
| 9972e21fcc | |||
| c625c03552 | |||
| 5863b9cd6e |
@@ -15,7 +15,6 @@ Makefile
|
||||
**/*.cbp
|
||||
VERSION
|
||||
**/node_modules/
|
||||
.git/
|
||||
sist2-*-linux-debug
|
||||
sist2-*-linux
|
||||
sist2_debug
|
||||
@@ -33,4 +32,9 @@ tmp_scan/
|
||||
Dockerfile
|
||||
Dockerfile.arm64
|
||||
docker-compose.yml
|
||||
state.db
|
||||
state.db
|
||||
*-journal
|
||||
build/
|
||||
__pycache__/
|
||||
sist2-vue/dist
|
||||
sist2-admin/frontend/dist
|
||||
3
.gitattributes
vendored
3
.gitattributes
vendored
@@ -1,3 +0,0 @@
|
||||
CMakeModules/* linguist-vendored
|
||||
**/*_generated.c linguist-vendored
|
||||
**/*_generated.h linguist-vendored
|
||||
11
.gitignore
vendored
11
.gitignore
vendored
@@ -33,3 +33,14 @@ state.db
|
||||
*.pyc
|
||||
!sist2-admin/frontend/dist
|
||||
*.js.map
|
||||
sist2-vue/dist
|
||||
sist2-admin/frontend/dist
|
||||
.ninja_deps
|
||||
.ninja_log
|
||||
build.ninja
|
||||
src/web/static_generated.c
|
||||
src/magic_generated.c
|
||||
src/index/static_generated.c
|
||||
*.sist2
|
||||
*-shm
|
||||
*-journal
|
||||
3
.gitmodules
vendored
3
.gitmodules
vendored
@@ -10,3 +10,6 @@
|
||||
[submodule "third-party/libscan/third-party/libmobi"]
|
||||
path = third-party/libscan/third-party/libmobi
|
||||
url = https://github.com/bfabiszewski/libmobi
|
||||
[submodule "third-party/libscan/libscan-test-files"]
|
||||
path = third-party/libscan/libscan-test-files
|
||||
url = https://github.com/simon987/libscan-test-files
|
||||
|
||||
@@ -5,7 +5,7 @@ set(CMAKE_C_STANDARD 11)
|
||||
|
||||
option(SIST_DEBUG "Build a debug executable" on)
|
||||
option(SIST_FAST "Enable more optimisation flags" off)
|
||||
option(SIST_FAKE_STORE "Disable IO operations of LMDB stores for debugging purposes" 0)
|
||||
option(SIST_DEBUG_INFO "Turn on debug information in web interface" on)
|
||||
|
||||
add_compile_definitions(
|
||||
"SIST_PLATFORM=${SIST_PLATFORM}"
|
||||
@@ -15,36 +15,50 @@ if (SIST_DEBUG)
|
||||
add_compile_definitions(
|
||||
"SIST_DEBUG=${SIST_DEBUG}"
|
||||
)
|
||||
endif()
|
||||
set(VCPKG_BUILD_TYPE debug)
|
||||
else ()
|
||||
set(VCPKG_BUILD_TYPE release)
|
||||
endif ()
|
||||
|
||||
if (SIST_DEBUG_INFO)
|
||||
add_compile_definitions(
|
||||
"SIST_DEBUG_INFO=${SIST_DEBUG_INFO}"
|
||||
)
|
||||
endif ()
|
||||
|
||||
|
||||
add_subdirectory(third-party/libscan)
|
||||
set(ARGPARSE_SHARED off)
|
||||
add_subdirectory(third-party/argparse)
|
||||
|
||||
add_executable(sist2
|
||||
# argparse
|
||||
third-party/argparse/argparse.h third-party/argparse/argparse.c
|
||||
|
||||
src/main.c
|
||||
src/sist.h
|
||||
src/io/walk.h src/io/walk.c
|
||||
src/io/store.h src/io/store.c
|
||||
src/tpool.h src/tpool.c
|
||||
src/parsing/parse.h src/parsing/parse.c
|
||||
src/parsing/magic_util.c src/parsing/magic_util.h
|
||||
src/io/serialize.h src/io/serialize.c
|
||||
src/parsing/mime.h src/parsing/mime.c src/parsing/mime_generated.c
|
||||
src/index/web.c src/index/web.h
|
||||
src/web/serve.c src/web/serve.h
|
||||
src/web/web_util.c src/web/web_util.h
|
||||
src/index/elastic.c src/index/elastic.h
|
||||
src/util.c src/util.h
|
||||
src/ctx.h src/types.h
|
||||
src/ctx.c src/ctx.h
|
||||
src/types.h
|
||||
src/log.c src/log.h
|
||||
src/cli.c src/cli.h
|
||||
src/stats.c src/stats.h src/ctx.c
|
||||
src/parsing/sidecar.c src/parsing/sidecar.h
|
||||
src/database/database.c src/database/database.h
|
||||
src/parsing/fs_util.h
|
||||
|
||||
src/auth0/auth0_c_api.h src/auth0/auth0_c_api.cpp
|
||||
|
||||
# argparse
|
||||
third-party/argparse/argparse.h third-party/argparse/argparse.c
|
||||
)
|
||||
src/database/database_stats.c src/database/database_schema.c)
|
||||
set_target_properties(sist2 PROPERTIES LINKER_LANGUAGE C)
|
||||
|
||||
target_link_directories(sist2 PRIVATE BEFORE ${_VCPKG_INSTALLED_DIR}/${VCPKG_TARGET_TRIPLET}/lib/)
|
||||
@@ -52,16 +66,11 @@ set(CMAKE_FIND_LIBRARY_SUFFIXES .a .lib)
|
||||
|
||||
find_package(PkgConfig REQUIRED)
|
||||
|
||||
pkg_search_module(GLIB REQUIRED glib-2.0)
|
||||
|
||||
find_package(lmdb CONFIG REQUIRED)
|
||||
find_package(cJSON CONFIG REQUIRED)
|
||||
find_package(unofficial-mongoose CONFIG REQUIRED)
|
||||
find_package(CURL CONFIG REQUIRED)
|
||||
find_library(MAGIC_LIB
|
||||
NAMES libmagic.so.1 magic
|
||||
PATHS /usr/lib/x86_64-linux-gnu/ /usr/lib/aarch64-linux-gnu/
|
||||
)
|
||||
find_library(MAGIC_LIB NAMES libmagic.a REQUIRED)
|
||||
find_package(unofficial-sqlite3 CONFIG REQUIRED)
|
||||
|
||||
|
||||
target_include_directories(
|
||||
@@ -70,7 +79,6 @@ target_include_directories(
|
||||
${CMAKE_SOURCE_DIR}/third-party/utf8.h/
|
||||
${CMAKE_SOURCE_DIR}/third-party/libscan/
|
||||
${CMAKE_SOURCE_DIR}/
|
||||
${GLIB_INCLUDE_DIRS}
|
||||
)
|
||||
|
||||
target_compile_options(
|
||||
@@ -88,7 +96,7 @@ if (SIST_DEBUG)
|
||||
-fno-omit-frame-pointer
|
||||
-fsanitize=address
|
||||
-fno-inline
|
||||
# -O2
|
||||
# -O2
|
||||
)
|
||||
target_link_options(
|
||||
sist2
|
||||
@@ -120,6 +128,7 @@ else ()
|
||||
-Ofast
|
||||
-fno-stack-protector
|
||||
-fomit-frame-pointer
|
||||
-w
|
||||
)
|
||||
endif ()
|
||||
|
||||
@@ -133,20 +142,16 @@ target_link_libraries(
|
||||
sist2
|
||||
|
||||
z
|
||||
lmdb
|
||||
cjson
|
||||
argparse
|
||||
${GLIB_LDFLAGS}
|
||||
unofficial::mongoose::mongoose
|
||||
CURL::libcurl
|
||||
|
||||
pthread
|
||||
|
||||
c
|
||||
|
||||
scan
|
||||
|
||||
${MAGIC_LIB}
|
||||
unofficial::sqlite3::sqlite3
|
||||
)
|
||||
|
||||
add_custom_target(
|
||||
|
||||
29
Dockerfile
29
Dockerfile
@@ -1,6 +1,11 @@
|
||||
FROM simon987/sist2-build as build
|
||||
MAINTAINER simon987 <me@simon987.net>
|
||||
|
||||
ENV DEBIAN_FRONTEND=noninteractive
|
||||
|
||||
RUN curl -fsSL https://deb.nodesource.com/setup_16.x | bash
|
||||
RUN apt update -y; apt install -y nodejs && rm -rf /var/lib/apt/lists/*
|
||||
|
||||
WORKDIR /build/
|
||||
|
||||
COPY scripts scripts
|
||||
@@ -9,14 +14,17 @@ COPY CMakeLists.txt .
|
||||
COPY third-party third-party
|
||||
COPY src src
|
||||
COPY sist2-vue sist2-vue
|
||||
COPY sist2-admin sist2-admin
|
||||
|
||||
RUN cmake -DSIST_PLATFORM=x64_linux -DSIST_DEBUG=off -DBUILD_TESTS=off -DCMAKE_TOOLCHAIN_FILE=/vcpkg/scripts/buildsystems/vcpkg.cmake .
|
||||
RUN make -j$(nproc)
|
||||
RUN strip sist2 || mv sist2_debug sist2
|
||||
RUN cd sist2-vue/ && npm install && npm run build
|
||||
RUN cd sist2-admin/frontend/ && npm install && npm run build
|
||||
|
||||
RUN mkdir build && cd build && cmake -DSIST_PLATFORM=x64_linux_docker -DSIST_DEBUG_INFO=on -DSIST_DEBUG=off -DBUILD_TESTS=off -DCMAKE_TOOLCHAIN_FILE=/vcpkg/scripts/buildsystems/vcpkg.cmake ..
|
||||
RUN cd build && make -j$(nproc)
|
||||
RUN strip build/sist2 || mv build/sist2_debug build/sist2
|
||||
|
||||
FROM --platform="linux/amd64" ubuntu@sha256:965fbcae990b0467ed5657caceaec165018ef44a4d2d46c7cdea80a9dff0d1ea
|
||||
|
||||
WORKDIR /root
|
||||
|
||||
ENV LANG C.UTF-8
|
||||
ENV LC_ALL C.UTF-8
|
||||
@@ -24,7 +32,7 @@ ENV LC_ALL C.UTF-8
|
||||
ENTRYPOINT ["/root/sist2"]
|
||||
|
||||
RUN apt update && DEBIAN_FRONTEND=noninteractive apt install -y curl libasan5 libmagic1 python3 \
|
||||
python3-pip git tesseract-ocr libpq-dev && rm -rf /var/lib/apt/lists/*
|
||||
python3-pip git tesseract-ocr && rm -rf /var/lib/apt/lists/*
|
||||
|
||||
RUN mkdir -p /usr/share/tessdata && \
|
||||
cd /usr/share/tessdata/ && \
|
||||
@@ -35,12 +43,15 @@ RUN mkdir -p /usr/share/tessdata && \
|
||||
curl -o /usr/share/tessdata/rus.traineddata https://raw.githubusercontent.com/tesseract-ocr/tessdata/master/rus.traineddata &&\
|
||||
curl -o /usr/share/tessdata/osd.traineddata https://raw.githubusercontent.com/tesseract-ocr/tessdata/master/osd.traineddata &&\
|
||||
curl -o /usr/share/tessdata/spa.traineddata https://raw.githubusercontent.com/tesseract-ocr/tessdata/master/spa.traineddata &&\
|
||||
curl -o /usr/share/tessdata/deu.traineddata https://raw.githubusercontent.com/tesseract-ocr/tessdata/master/deu.traineddata &&\
|
||||
curl -o /usr/share/tessdata/equ.traineddata https://raw.githubusercontent.com/tesseract-ocr/tessdata/master/equ.traineddata &&\
|
||||
curl -o /usr/share/tessdata/chi_sim.traineddata https://raw.githubusercontent.com/tesseract-ocr/tessdata/master/chi_sim.traineddata
|
||||
|
||||
# sist2
|
||||
COPY --from=build /build/sist2 /root/sist2
|
||||
COPY --from=build /build/build/sist2 /root/sist2
|
||||
|
||||
# sist2-admin
|
||||
COPY sist2-admin/requirements.txt sist2-admin/
|
||||
RUN python3 -m pip install --no-cache -r sist2-admin/requirements.txt
|
||||
COPY sist2-admin/ sist2-admin/
|
||||
WORKDIR /root/sist2-admin
|
||||
COPY sist2-admin/requirements.txt /root/sist2-admin/
|
||||
RUN python3 -m pip install --no-cache -r /root/sist2-admin/requirements.txt
|
||||
COPY --from=build /build/sist2-admin/ /root/sist2-admin/
|
||||
|
||||
@@ -3,13 +3,20 @@ MAINTAINER simon987 <me@simon987.net>
|
||||
|
||||
WORKDIR /build/
|
||||
ADD . /build/
|
||||
RUN cmake -DSIST_PLATFORM=arm64_linux -DSIST_DEBUG=off -DBUILD_TESTS=off -DCMAKE_TOOLCHAIN_FILE=/vcpkg/scripts/buildsystems/vcpkg.cmake .
|
||||
RUN make -j$(nproc)
|
||||
RUN strip sist2
|
||||
RUN mkdir build && cd build && cmake -DSIST_PLATFORM=arm64_linux_docker -DSIST_DEBUG_INFO=on -DSIST_DEBUG=off -DBUILD_TESTS=off -DCMAKE_TOOLCHAIN_FILE=/vcpkg/scripts/buildsystems/vcpkg.cmake ..
|
||||
RUN cd build && make -j$(nproc)
|
||||
RUN strip build/sist2 || mv build/sist2_debug build/sist2
|
||||
|
||||
FROM --platform="linux/arm64/v8" ubuntu:20.04
|
||||
FROM --platform=linux/arm64/v8 ubuntu@sha256:537da24818633b45fcb65e5285a68c3ec1f3db25f5ae5476a7757bc8dfae92a3
|
||||
|
||||
RUN apt update && apt install -y curl libasan5 && rm -rf /var/lib/apt/lists/*
|
||||
WORKDIR /root
|
||||
|
||||
ENV LANG C.UTF-8
|
||||
ENV LC_ALL C.UTF-8
|
||||
|
||||
ENTRYPOINT ["/root/sist2"]
|
||||
|
||||
RUN apt update && apt install -y curl libasan5 libmagic1 tesseract-ocr python3-pip python3 git && rm -rf /var/lib/apt/lists/*
|
||||
|
||||
RUN mkdir -p /usr/share/tessdata && \
|
||||
cd /usr/share/tessdata/ && \
|
||||
@@ -18,11 +25,16 @@ RUN mkdir -p /usr/share/tessdata && \
|
||||
curl -o /usr/share/tessdata/eng.traineddata https://raw.githubusercontent.com/tesseract-ocr/tessdata/master/eng.traineddata &&\
|
||||
curl -o /usr/share/tessdata/fra.traineddata https://raw.githubusercontent.com/tesseract-ocr/tessdata/master/fra.traineddata &&\
|
||||
curl -o /usr/share/tessdata/rus.traineddata https://raw.githubusercontent.com/tesseract-ocr/tessdata/master/rus.traineddata &&\
|
||||
curl -o /usr/share/tessdata/spa.traineddata https://raw.githubusercontent.com/tesseract-ocr/tessdata/master/spa.traineddata
|
||||
curl -o /usr/share/tessdata/osd.traineddata https://raw.githubusercontent.com/tesseract-ocr/tessdata/master/osd.traineddata &&\
|
||||
curl -o /usr/share/tessdata/spa.traineddata https://raw.githubusercontent.com/tesseract-ocr/tessdata/master/spa.traineddata &&\
|
||||
curl -o /usr/share/tessdata/deu.traineddata https://raw.githubusercontent.com/tesseract-ocr/tessdata/master/deu.traineddata &&\
|
||||
curl -o /usr/share/tessdata/equ.traineddata https://raw.githubusercontent.com/tesseract-ocr/tessdata/master/equ.traineddata &&\
|
||||
curl -o /usr/share/tessdata/chi_sim.traineddata https://raw.githubusercontent.com/tesseract-ocr/tessdata/master/chi_sim.traineddata
|
||||
|
||||
ENV LANG C.UTF-8
|
||||
ENV LC_ALL C.UTF-8
|
||||
# sist2
|
||||
COPY --from=build /build/build/sist2 /root/sist2
|
||||
|
||||
ENTRYPOINT ["/root/sist2"]
|
||||
|
||||
COPY --from=build /build/sist2 /root/sist2
|
||||
# sist2-admin
|
||||
COPY sist2-admin/requirements.txt sist2-admin/
|
||||
RUN python3 -m pip install --no-cache -r sist2-admin/requirements.txt
|
||||
COPY --from=build /build/sist2-admin/ sist2-admin/
|
||||
|
||||
123
README.md
123
README.md
@@ -10,13 +10,13 @@ sist2 (Simple incremental search tool)
|
||||
|
||||
*Warning: sist2 is in early development*
|
||||
|
||||

|
||||

|
||||
|
||||
## Features
|
||||
|
||||
* Fast, low memory usage, multi-threaded
|
||||
* Manage & schedule scan jobs with simple web interface (Docker only)
|
||||
* Mobile-friendly Web interface
|
||||
* Portable (all its features are packaged in a single executable)
|
||||
* Extracts text and metadata from common file types \*
|
||||
* Generates thumbnails \*
|
||||
* Incremental scanning
|
||||
@@ -24,47 +24,60 @@ sist2 (Simple incremental search tool)
|
||||
* Recursive scan inside archive files \*\*
|
||||
* OCR support with tesseract \*\*\*
|
||||
* Stats page & disk utilisation visualization
|
||||
* Named-entity recognition (client-side) \*\*\*\*
|
||||
|
||||
\* See [format support](#format-support)
|
||||
\*\* See [Archive files](#archive-files)
|
||||
\*\*\* See [OCR](#ocr)
|
||||
|
||||

|
||||
\*\*\*\* See [Named-Entity Recognition](#NER)
|
||||
|
||||
## Getting Started
|
||||
|
||||
### Using Docker Compose *(Windows/Linux/Mac)*
|
||||
|
||||
```yaml
|
||||
version: "3"
|
||||
|
||||
services:
|
||||
elasticsearch:
|
||||
image: elasticsearch:7.17.9
|
||||
restart: unless-stopped
|
||||
environment:
|
||||
- "discovery.type=single-node"
|
||||
- "ES_JAVA_OPTS=-Xms2g -Xmx2g"
|
||||
sist2-admin:
|
||||
image: simon987/sist2:3.0.3
|
||||
restart: unless-stopped
|
||||
volumes:
|
||||
- ./sist2-admin-data/:/sist2-admin/
|
||||
- /:/host
|
||||
ports:
|
||||
- 4090:4090 # sist2
|
||||
- 8080:8080 # sist2-admin
|
||||
working_dir: /root/sist2-admin/
|
||||
entrypoint: python3 /root/sist2-admin/sist2_admin/app.py
|
||||
```
|
||||
|
||||
Navigate to http://localhost:8080/ to configure sist2-admin.
|
||||
|
||||
### Using the executable file *(Linux/WSL only)*
|
||||
|
||||
1. Have an Elasticsearch (>= 6.8.X, ideally >=7.14.0) instance running
|
||||
1. Download [from official website](https://www.elastic.co/downloads/elasticsearch)
|
||||
1. *(or)* Run using docker:
|
||||
2. *(or)* Run using docker:
|
||||
```bash
|
||||
docker run -d -p 9200:9200 -e "discovery.type=single-node" elasticsearch:7.14.0
|
||||
docker run -d -p 9200:9200 -e "discovery.type=single-node" elasticsearch:7.17.9
|
||||
```
|
||||
1. *(or)* Run using docker-compose:
|
||||
```yaml
|
||||
elasticsearch:
|
||||
image: docker.elastic.co/elasticsearch/elasticsearch:7.14.0
|
||||
environment:
|
||||
- discovery.type=single-node
|
||||
- "ES_JAVA_OPTS=-Xms1G -Xmx2G"
|
||||
```
|
||||
1. Download sist2 executable
|
||||
1. Download the [latest sist2 release](https://github.com/simon987/sist2/releases).
|
||||
Select the file corresponding to your CPU architecture and mark the binary as executable with `chmod +x` *
|
||||
2. *(or)* Download a [development snapshot](https://files.simon987.net/.gate/sist2/simon987_sist2/) *(Not
|
||||
recommended!)*
|
||||
3. *(or)* `docker pull simon987/sist2:2.12.1-x64-linux`
|
||||
|
||||
1. See [Usage guide](docs/USAGE.md)
|
||||
2. Download the [latest sist2 release](https://github.com/simon987/sist2/releases).
|
||||
Select the file corresponding to your CPU architecture and mark the binary as executable with `chmod +x`.
|
||||
3. See [usage guide](docs/USAGE.md) for command line usage.
|
||||
|
||||
\* *Windows users*: **sist2** runs under [WSL](https://en.wikipedia.org/wiki/Windows_Subsystem_for_Linux)
|
||||
Example usage:
|
||||
|
||||
## Example usage
|
||||
|
||||
See [Usage guide](docs/USAGE.md) for more details
|
||||
|
||||
1. Scan a directory: `sist2 scan ~/Documents -o ./docs_idx`
|
||||
1. Push index to Elasticsearch: `sist2 index ./docs_idx`
|
||||
1. Start web interface: `sist2 web ./docs_idx`
|
||||
1. Scan a directory: `sist2 scan ~/Documents --output ./documents.sist2`
|
||||
2. Push index to Elasticsearch: `sist2 index ./documents.sist2`
|
||||
3. Start web interface: `sist2 web ./documents.sist2`
|
||||
|
||||
## Format support
|
||||
|
||||
@@ -81,8 +94,8 @@ See [Usage guide](docs/USAGE.md) for more details
|
||||
| html, xml | [libscan](https://github.com/simon987/sist2/tree/master/third-party/libscan) | yes | no | - |
|
||||
| tar, zip, rar, 7z, ar ... | Libarchive | yes\* | - | no |
|
||||
| docx, xlsx, pptx | [libscan](https://github.com/simon987/sist2/tree/master/third-party/libscan) | yes | if embedded | creator, modified_by, title |
|
||||
| doc (MS Word 97-2003) | antiword | yes | yes | author, title |
|
||||
| mobi, azw, azw3 | libmobi | yes | no | author, title |
|
||||
| doc (MS Word 97-2003) | antiword | yes | no | author, title |
|
||||
| mobi, azw, azw3 | libmobi | yes | yes | author, title |
|
||||
| wpd (WordPerfect) | libwpd | yes | no | *planned* |
|
||||
| json, jsonl, ndjson | [libscan](https://github.com/simon987/sist2/tree/master/third-party/libscan) | yes | - | - |
|
||||
|
||||
@@ -109,10 +122,10 @@ Download the language data files with your package manager (`apt install tessera
|
||||
directly [from Github](https://github.com/tesseract-ocr/tesseract/wiki/Data-Files).
|
||||
|
||||
The `simon987/sist2` image comes with common languages
|
||||
(hin, jpn, eng, fra, rus, spa) pre-installed.
|
||||
(hin, jpn, eng, fra, rus, spa, chi_sim, deu) pre-installed.
|
||||
|
||||
You can use the `+` separator to specify multiple languages. The language
|
||||
name must be identical to the `*.traineddata` file installed on your system
|
||||
name must be identical to the `*.traineddata` file installed on your system
|
||||
(use `chi_sim` rather than `chi-sim`).
|
||||
|
||||
Examples:
|
||||
@@ -123,39 +136,63 @@ sist2 scan --ocr-images --ocr-lang eng ~/Images/Screenshots/
|
||||
sist2 scan --ocr-ebooks --ocr-images --ocr-lang eng+chi_sim ~/Chinese-Bilingual/
|
||||
```
|
||||
|
||||
### NER
|
||||
|
||||
sist2 v3.0.4+ supports named-entity recognition (NER). Simply add a supported repository URL to
|
||||
**Configuration** > **Machine learning options** > **Model repositories**
|
||||
to enable it.
|
||||
|
||||
The text processing is done in your browser, no data is sent to any third-party services.
|
||||
See [simon987/sist2-ner-models](https://github.com/simon987/sist2-ner-models) for more details.
|
||||
|
||||
#### List of available repositories:
|
||||
|
||||
| URL | Maintainer | Purpose |
|
||||
|---------------------------------------------------------------------------------------------------------|-----------------------------------------|---------|
|
||||
| [simon987/sist2-ner-models](https://raw.githubusercontent.com/simon987/sist2-ner-models/main/repo.json) | [simon987](https://github.com/simon987) | General |
|
||||
|
||||
|
||||
<details>
|
||||
<summary>Screenshot</summary>
|
||||
|
||||

|
||||
|
||||
</details>
|
||||
|
||||
## Build from source
|
||||
|
||||
You can compile **sist2** by yourself if you don't want to use the pre-compiled binaries
|
||||
|
||||
### With docker (recommended)
|
||||
### Using docker
|
||||
|
||||
```bash
|
||||
git clone --recursive https://github.com/simon987/sist2/
|
||||
cd sist2
|
||||
docker build . -f ./Dockerfile -t my-sist2-image
|
||||
docker build . -t my-sist2-image
|
||||
# Copy sist2 executable from docker image
|
||||
docker run --rm --entrypoint cat my-sist2-image /root/sist2 > sist2-x64-linux
|
||||
```
|
||||
|
||||
### On a linux computer
|
||||
### Using a linux computer
|
||||
|
||||
1. Install compile-time dependencies
|
||||
|
||||
```bash
|
||||
apt install gcc g++ python3 yasm ragel automake autotools-dev wget libtool libssl-dev curl zip unzip tar xorg-dev libglu1-mesa-dev libxcursor-dev libxml2-dev libxinerama-dev gettext nasm git
|
||||
apt install gcc g++ python3 yasm ragel automake autotools-dev wget libtool libssl-dev curl zip unzip tar xorg-dev libglu1-mesa-dev libxcursor-dev libxml2-dev libxinerama-dev gettext nasm git nodejs
|
||||
```
|
||||
|
||||
1. Apply vcpkg patches, as per [sist2-build](https://github.com/simon987/sist2-build) Dockerfile
|
||||
|
||||
1. Install vcpkg dependencies
|
||||
2. Install vcpkg using my fork: https://github.com/simon987/vcpkg
|
||||
3. Install vcpkg dependencies
|
||||
|
||||
```bash
|
||||
vcpkg install curl[core,openssl]
|
||||
vcpkg install lmdb cjson glib brotli libarchive[core,bzip2,libxml2,lz4,lzma,lzo] pthread tesseract libxml2 libmupdf gtest mongoose libmagic libraw jasper lcms gumbo
|
||||
vcpkg install curl[core,openssl] sqlite3 cpp-jwt pcre cjson brotli libarchive[core,bzip2,libxml2,lz4,lzma,lzo] pthread tesseract libxml2 libmupdf gtest mongoose libmagic libraw gumbo ffmpeg[core,avcodec,avformat,swscale,swresample]
|
||||
```
|
||||
|
||||
1. Build
|
||||
4. Build
|
||||
```bash
|
||||
git clone --recursive https://github.com/simon987/sist2/
|
||||
(cd sist2-vue; npm install; npm run build)
|
||||
(cd sist2-admin/frontend; npm install; npm run build)
|
||||
cmake -DSIST_DEBUG=off -DCMAKE_TOOLCHAIN_FILE=<VCPKG_ROOT>/scripts/buildsystems/vcpkg.cmake .
|
||||
make
|
||||
```
|
||||
|
||||
@@ -12,7 +12,7 @@ REWRITE_URL=""
|
||||
sist2 scan \
|
||||
--threads 14 \
|
||||
--mem-throttle 32768 \
|
||||
--quality 1.0 \
|
||||
--thumbnail-quality 2 \
|
||||
--name $NAME \
|
||||
--ocr-lang=eng+chi_sim \
|
||||
--ocr-ebooks \
|
||||
|
||||
@@ -12,7 +12,7 @@ REWRITE_URL=""
|
||||
sist2 scan \
|
||||
--threads 14 \
|
||||
--mem-throttle 32768 \
|
||||
--quality 1.0 \
|
||||
--thumbnail-quality 2 \
|
||||
--name $NAME \
|
||||
--ocr-lang=eng+chi_sim \
|
||||
--ocr-ebooks \
|
||||
|
||||
@@ -2,7 +2,7 @@ version: "3"
|
||||
|
||||
services:
|
||||
elasticsearch:
|
||||
image: elasticsearch:7.14.0
|
||||
image: elasticsearch:7.17.9
|
||||
container_name: sist2-es
|
||||
environment:
|
||||
- "discovery.type=single-node"
|
||||
@@ -15,9 +15,9 @@ services:
|
||||
- /mnt/array/sist2-admin-data/:/sist2-admin/
|
||||
- /:/host
|
||||
ports:
|
||||
- 4090:4090
|
||||
# NOTE: Don't export this port publicly!
|
||||
- 8080:8080
|
||||
- 4090:4090
|
||||
working_dir: /root/sist2-admin/
|
||||
entrypoint: python3
|
||||
command:
|
||||
|
||||
231
docs/USAGE.md
231
docs/USAGE.md
@@ -1,78 +1,64 @@
|
||||
# Usage
|
||||
|
||||
*More examples (specifically with docker/compose) are in progress*
|
||||
|
||||
* [scan](#scan)
|
||||
* [options](#scan-options)
|
||||
* [examples](#scan-examples)
|
||||
* [index format](#index-format)
|
||||
* [index](#index)
|
||||
* [options](#index-options)
|
||||
* [examples](#index-examples)
|
||||
* [web](#web)
|
||||
* [options](#web-options)
|
||||
* [examples](#web-examples)
|
||||
* [rewrite_url](#rewrite_url)
|
||||
* [elasticsearch](#elasticsearch)
|
||||
* [exec-script](#exec-script)
|
||||
* [tagging](#tagging)
|
||||
* [sidecar files](#sidecar-files)
|
||||
|
||||
```
|
||||
Usage: sist2 scan [OPTION]... PATH
|
||||
or: sist2 index [OPTION]... INDEX
|
||||
or: sist2 web [OPTION]... INDEX...
|
||||
or: sist2 exec-script [OPTION]... INDEX
|
||||
|
||||
Lightning-fast file system indexer and search tool.
|
||||
|
||||
-h, --help show this help message and exit
|
||||
-v, --version Show version and exit
|
||||
--verbose Turn on logging
|
||||
--very-verbose Turn on debug messages
|
||||
-v, --version Print version and exit.
|
||||
--verbose Turn on logging.
|
||||
--very-verbose Turn on debug messages.
|
||||
--json-logs Output logs in JSON format.
|
||||
|
||||
Scan options
|
||||
-t, --threads=<int> Number of threads. DEFAULT=1
|
||||
--mem-throttle=<int> Total memory threshold in MiB for scan throttling. DEFAULT=0
|
||||
-q, --thumbnail-quality=<flt> Thumbnail quality, on a scale of 1.0 to 31.0, 1.0 being the best. DEFAULT=1
|
||||
--thumbnail-size=<int> Thumbnail size, in pixels. DEFAULT=500
|
||||
--thumbnail-count=<int> Number of thumbnails to generate. Set a value > 1 to create video previews, set to 0 to disable thumbnails. DEFAULT=1
|
||||
--content-size=<int> Number of bytes to be extracted from text documents. Set to 0 to disable. DEFAULT=32768
|
||||
--incremental=<str> Reuse an existing index and only scan modified files.
|
||||
-o, --output=<str> Output directory. DEFAULT=index.sist2/
|
||||
-t, --threads=<int> Number of threads. DEFAULT: 1
|
||||
-q, --thumbnail-quality=<int> Thumbnail quality, on a scale of 2 to 31, 2 being the best. DEFAULT: 2
|
||||
--thumbnail-size=<int> Thumbnail size, in pixels. DEFAULT: 552
|
||||
--thumbnail-count=<int> Number of thumbnails to generate. Set a value > 1 to create video previews, set to 0 to disable thumbnails. DEFAULT: 1
|
||||
--content-size=<int> Number of bytes to be extracted from text documents. Set to 0 to disable. DEFAULT: 32768
|
||||
-o, --output=<str> Output index file path. DEFAULT: index.sist2
|
||||
--incremental If the output file path exists, only scan new or modified files.
|
||||
--optimize-index Defragment index file after scan to reduce its file size.
|
||||
--rewrite-url=<str> Serve files from this url instead of from disk.
|
||||
--name=<str> Index display name. DEFAULT: (name of the directory)
|
||||
--name=<str> Index display name. DEFAULT: index
|
||||
--depth=<int> Scan up to DEPTH subdirectories deep. Use 0 to only scan files in PATH. DEFAULT: -1
|
||||
--archive=<str> Archive file mode (skip|list|shallow|recurse). skip: Don't parse, list: only get file names as text, shallow: Don't parse archives inside archives. DEFAULT: recurse
|
||||
--archive=<str> Archive file mode (skip|list|shallow|recurse). skip: don't scan, list: only save file names as text, shallow: don't scan archives inside archives. DEFAULT: recurse
|
||||
--archive-passphrase=<str> Passphrase for encrypted archive files
|
||||
--ocr-lang=<str> Tesseract language (use 'tesseract --list-langs' to see which are installed on your machine)
|
||||
--ocr-images Enable OCR'ing of image files.
|
||||
--ocr-ebooks Enable OCR'ing of ebook files.
|
||||
-e, --exclude=<str> Files that match this regex will not be scanned
|
||||
--fast Only index file names & mime type
|
||||
-e, --exclude=<str> Files that match this regex will not be scanned.
|
||||
--fast Only index file names & mime type.
|
||||
--treemap-threshold=<str> Relative size threshold for treemap (see USAGE.md). DEFAULT: 0.0005
|
||||
--mem-buffer=<int> Maximum memory buffer size per thread in MiB for files inside archives (see USAGE.md). DEFAULT: 2000
|
||||
--read-subtitles Read subtitles from media files.
|
||||
--fast-epub Faster but less accurate EPUB parsing (no thumbnails, metadata)
|
||||
--fast-epub Faster but less accurate EPUB parsing (no thumbnails, metadata).
|
||||
--checksums Calculate file checksums when scanning.
|
||||
--list-file=<str> Specify a list of newline-delimited paths to be scanned instead of normal directory traversal. Use '-' to read from stdin.
|
||||
|
||||
Index options
|
||||
-t, --threads=<int> Number of threads. DEFAULT=1
|
||||
--es-url=<str> Elasticsearch url with port. DEFAULT=http://localhost:9200
|
||||
--es-index=<str> Elasticsearch index name. DEFAULT=sist2
|
||||
-p, --print Just print JSON documents to stdout.
|
||||
--incremental-index Conduct incremental indexing, assumes that the old index is already digested by Elasticsearch.
|
||||
-t, --threads=<int> Number of threads. DEFAULT: 1
|
||||
--es-url=<str> Elasticsearch url with port. DEFAULT: http://localhost:9200
|
||||
--es-insecure-ssl Do not verify SSL connections to Elasticsearch.
|
||||
--es-index=<str> Elasticsearch index name. DEFAULT: sist2
|
||||
-p, --print Print JSON documents to stdout instead of indexing to elasticsearch.
|
||||
--incremental-index Conduct incremental indexing. Assumes that the old index is already ingested in Elasticsearch.
|
||||
--script-file=<str> Path to user script.
|
||||
--mappings-file=<str> Path to Elasticsearch mappings.
|
||||
--settings-file=<str> Path to Elasticsearch settings.
|
||||
--async-script Execute user script asynchronously.
|
||||
--batch-size=<int> Index batch size. DEFAULT: 100
|
||||
-f, --force-reset Reset Elasticsearch mappings and settings. (You must use this option the first time you use the index command)
|
||||
--batch-size=<int> Index batch size. DEFAULT: 70
|
||||
-f, --force-reset Reset Elasticsearch mappings and settings.
|
||||
|
||||
Web options
|
||||
--es-url=<str> Elasticsearch url. DEFAULT=http://localhost:9200
|
||||
--es-index=<str> Elasticsearch index name. DEFAULT=sist2
|
||||
--bind=<str> Listen on this address. DEFAULT=localhost:4090
|
||||
--es-url=<str> Elasticsearch url. DEFAULT: http://localhost:9200
|
||||
--es-insecure-ssl Do not verify SSL connections to Elasticsearch.
|
||||
--es-index=<str> Elasticsearch index name. DEFAULT: sist2
|
||||
--bind=<str> Listen for connections on this address. DEFAULT: localhost:4090
|
||||
--auth=<str> Basic auth in user:password format
|
||||
--auth0-audience=<str> API audience/identifier
|
||||
--auth0-domain=<str> Application domain
|
||||
@@ -84,75 +70,23 @@ Web options
|
||||
--lang=<str> Default UI language. Can be changed by the user
|
||||
|
||||
Exec-script options
|
||||
--es-url=<str> Elasticsearch url. DEFAULT=http://localhost:9200
|
||||
--es-index=<str> Elasticsearch index name. DEFAULT=sist2
|
||||
--es-url=<str> Elasticsearch url. DEFAULT: http://localhost:9200
|
||||
--es-insecure-ssl Do not verify SSL connections to Elasticsearch.
|
||||
--es-index=<str> Elasticsearch index name. DEFAULT: sist2
|
||||
--script-file=<str> Path to user script.
|
||||
--async-script Execute user script asynchronously.
|
||||
|
||||
Made by simon987 <me@simon987.net>. Released under GPL-3.0
|
||||
```
|
||||
|
||||
## Scan
|
||||
#### Thumbnail database size estimation
|
||||
|
||||
### Scan options
|
||||
See chart below for rough estimate of thumbnail size vs. thumbnail size & quality arguments:
|
||||
|
||||
* `-t, --threads`
|
||||
Number of threads for file parsing. **Do not set a number higher than `$(nproc)` or `$(Get-CimInstance Win32_ComputerSystem).NumberOfLogicalProcessors` in Windows!**
|
||||
* `--mem-throttle`
|
||||
Total memory threshold in MiB for scan throttling. Worker threads will not start a new parse job
|
||||
until the total memory usage of sist2 is below this threshold. Set to 0 to disable. DEFAULT=0
|
||||
* `-q, --thumbnail-quality`
|
||||
Thumbnail quality, on a scale of 1.0 to 31.0, 1.0 being the best.
|
||||
* `--thumbnail-size`
|
||||
Thumbnail size in pixels.
|
||||
* `--thumbnail-count`
|
||||
Maximum number of thumbnails to generate. When set to a value >= 2, thumbnails for video previews
|
||||
will be generated. The actual number of thumbnails generated depends on the length of the video (maximum 1 image
|
||||
every ~7s). Set to 0 to completely disable thumbnails.
|
||||
* `--content-size`
|
||||
Number of bytes of text to be extracted from the content of files (plain text, PDFs etc.).
|
||||
Repeated whitespace and special characters do not count toward this limit.
|
||||
Set to 0 to completely disable content parsing.
|
||||
* `--incremental`
|
||||
Specify an existing index. Information about files in this index that were not modified (based on *mtime* attribute)
|
||||
will be copied to the new index and will not be parsed again.
|
||||
* `-o, --output` Output directory.
|
||||
* `--rewrite-url` Set the `rewrite_url` option for the web module (See [rewrite_url](#rewrite_url))
|
||||
* `--name` Set the `name` option for the web module
|
||||
* `--depth` Maximum scan dept. Set to 0 only scan files directly in the root directory, set to -1 for infinite depth
|
||||
* `--archive` Archive file mode.
|
||||
* skip: Don't parse
|
||||
* list: Only get file names as text
|
||||
* shallow: Don't parse archives inside archives.
|
||||
* recurse: Scan archives recursively (default)
|
||||
* `--ocr-lang`, `--ocr-ebooks`, `--ocr-images` See [OCR](../README.md#OCR)
|
||||
* `-e, --exclude` Regex pattern to exclude files. A file is excluded if the pattern matches any
|
||||
part of the full absolute path.
|
||||
|
||||
Examples:
|
||||
* `-e ".*\.ttf"`: Ignore ttf files
|
||||
* `-e ".*\.(ttf|rar)"`: Ignore ttf and rar files
|
||||
* `-e "^/mnt/backups/"`: Ignore all files in the `/mnt/backups/` directory
|
||||
* `-e "^/mnt/Data[12]/"`: Ignore all files in the `/mnt/Data1/` and `/mnt/Data2/` directory
|
||||
* `-e "(^/usr/)|(^/var/)|(^/media/DRIVE-A/tmp/)|(^/media/DRIVE-B/Trash/)"` Exclude the
|
||||
`/usr`, `/var`, `/media/DRIVE-A/tmp`, `/media/DRIVE-B/Trash` directories
|
||||
* `--fast` Only index file names and mime type
|
||||
* `--treemap-threshold` Directories smaller than (`treemap-threshold` * `<total size of the index>`)
|
||||
will not be considered for the disk utilisation visualization; their size will be added to
|
||||
the parent directory. If the parent directory is still smaller than the threshold, it will also be "merged upwards"
|
||||
and so on.
|
||||
|
||||
In effect, smaller `treemap-threshold` values will yield a more detailed
|
||||
(but also a more cluttered and harder to read) visualization.
|
||||
|
||||
* `--mem-buffer` Maximum memory buffer size in MiB (per thread) for files inside archives. Media files
|
||||
larger than this number will be read sequentially and no *seek* operations will be supported.
|
||||
For example, `--thumbnail-size=500`, `--thumbnail-quality=2` for a directory with 8 million images will create a thumbnail database
|
||||
that is about `8000000 * 36kB = 288GB`.
|
||||
|
||||
To check if a media file can be parsed without *seek*, execute `cat file.mp4 | ffprobe -`
|
||||
* `--read-subtitles` When enabled, will attempt to read the subtitles stream from media files.
|
||||
* `--fast-epub` Much faster but less accurate EPUB parsing. When enabled, sist2 will use a simple HTML parser to read epub files instead of the MuPDF library. No thumbnails are generated and author/title metadata are not parsed.
|
||||
* `--checksums` Calculate file checksums (SHA1) when scanning files. This option does not cause any additional read
|
||||
operations. Checksums are not calculated for all file types, unless the file is inside an archive. When enabled, duplicate
|
||||
files are hidden in the web UI (this behaviour can be toggled in the Configuration page).
|
||||

|
||||
|
||||
### Scan examples
|
||||
|
||||
@@ -161,85 +95,22 @@ Simple scan
|
||||
sist2 scan ~/Documents
|
||||
|
||||
sist2 scan \
|
||||
--threads 4 --content-size 16000000 --quality 1.0 --archive shallow \
|
||||
--threads 4 --content-size 16000000 --thumbnail-quality 2 --archive shallow \
|
||||
--name "My Documents" --rewrite-url "http://nas.domain.local/My Documents/" \
|
||||
~/Documents -o ./documents.idx/
|
||||
~/Documents -o ./documents.sist2
|
||||
```
|
||||
|
||||
Incremental scan
|
||||
```
|
||||
sist2 scan --incremental ./orig_idx/ -o ./updated_idx/ ~/Documents
|
||||
|
||||
If the index file does not exist, `--incremental` has no effect.
|
||||
```bash
|
||||
sist scan ~/Documents -o ./documents.sist2
|
||||
sist scan ~/Documents -o ./documents.sist2 --incremental
|
||||
# or
|
||||
sist scan ~/Documents -o ./documents.sist2 --incremental
|
||||
sist scan ~/Documents -o ./documents.sist2 --incremental
|
||||
```
|
||||
|
||||
### Index format
|
||||
|
||||
A typical `ndjson` type index structure looks like this:
|
||||
```
|
||||
documents.idx/
|
||||
├── descriptor.json
|
||||
├── _index_main.ndjson.zst
|
||||
├── treemap.csv
|
||||
├── agg_mime.csv
|
||||
├── agg_date.csv
|
||||
├── add_size.csv
|
||||
├── thumbs/
|
||||
| ├── data.mdb
|
||||
| └── lock.mdb
|
||||
├── tags/
|
||||
| ├── data.mdb
|
||||
| └── lock.mdb
|
||||
└── meta/
|
||||
├── data.mdb
|
||||
└── lock.mdb
|
||||
```
|
||||
|
||||
The `_index_*.ndjson.zst` files contain the document data in JSON format, in a compressed newline-delemited file.
|
||||
|
||||
The `thumbs/` folder is a [LMDB](https://en.wikipedia.org/wiki/Lightning_Memory-Mapped_Database)
|
||||
database containing the thumbnails.
|
||||
|
||||
The `descriptor.json` file contains general information about the index. The
|
||||
following fields are safe to modify manually: `root`, `name`, [rewrite_url](#rewrite_url) and `timestamp`.
|
||||
|
||||
The `.csv` are pre-computed aggregations necessary for the stats page.
|
||||
|
||||
*thumbs/*:
|
||||
|
||||
LMDB key-value store. Keys are **binary** 16-byte md5 hash* (`_id` field)
|
||||
and values are raw image bytes.
|
||||
|
||||
*\* Hash is calculated from the full path of the file, including the extension, relative to the index root*
|
||||
|
||||
|
||||
## Index
|
||||
### Index options
|
||||
* `--es-url`
|
||||
Elasticsearch url and port. If you are using docker, make sure that both containers are on the
|
||||
same network.
|
||||
* `--es-index`
|
||||
Elasticsearch index name. DEFAULT=sist2
|
||||
* `-p, --print`
|
||||
Print index in JSON format to stdout.
|
||||
* `--incremental-index`
|
||||
Conduct incremental indexing. Assumes that the old index is already ingested in Elasticsearch.
|
||||
Only the new changes since the last scan will be sent.
|
||||
* `--script-file`
|
||||
Path to user script. See [Scripting](scripting.md).
|
||||
* `--mappings-file`
|
||||
Path to custom Elasticsearch mappings. If none is specified, [the bundled mappings](https://github.com/simon987/sist2/tree/master/schema) will be used.
|
||||
* `--settings-file`
|
||||
Path to custom Elasticsearch settings. *(See above)*
|
||||
* `--async-script`
|
||||
Use `wait_for_completion=false` elasticsearch option while executing user script.
|
||||
(See [Elasticsearch documentation](https://www.elastic.co/guide/en/elasticsearch/reference/current/tasks.html))
|
||||
* `--batch-size=<int>`
|
||||
Index batch size. Indexing is generally faster with larger batches, but payloads that
|
||||
are too large will fail and additional overhead for retrying with smaller sizes may slow
|
||||
down the process.
|
||||
* `-f, --force-reset`
|
||||
Reset Elasticsearch mappings and settings.
|
||||
* `-t, --threads` Number of threads to use. Ideally, choose a number equal to the number of logical cores of the machine hosting Elasticsearch.
|
||||
|
||||
### Index examples
|
||||
|
||||
**Push to elasticsearch**
|
||||
@@ -368,8 +239,8 @@ The sidecar file must have exactly the same file path and the `.s2meta` suffix.
|
||||
```
|
||||
|
||||
```
|
||||
sist2 scan ~/Documents -o ./docs.idx
|
||||
sist2 index ./docs.idx
|
||||
sist2 scan ~/Documents -o ./docs.sist2
|
||||
sist2 index ./docs.sist2
|
||||
```
|
||||
|
||||
*NOTE*: It is technically possible to overwrite the `tag` value using sidecar files, however,
|
||||
|
||||
BIN
docs/ner.png
Normal file
BIN
docs/ner.png
Normal file
Binary file not shown.
|
After Width: | Height: | Size: 448 KiB |
BIN
docs/sist2.gif
Normal file
BIN
docs/sist2.gif
Normal file
Binary file not shown.
|
After Width: | Height: | Size: 3.7 MiB |
BIN
docs/sist2.png
BIN
docs/sist2.png
Binary file not shown.
|
Before Width: | Height: | Size: 1011 KiB |
BIN
docs/thumbnail_size.png
Normal file
BIN
docs/thumbnail_size.png
Normal file
Binary file not shown.
|
After Width: | Height: | Size: 180 KiB |
@@ -1,10 +1,13 @@
|
||||
#!/usr/bin/env bash
|
||||
|
||||
rm -rf index.sist2/
|
||||
(
|
||||
cd ..
|
||||
rm -rf index.sist2
|
||||
|
||||
python3 scripts/mime.py > src/parsing/mime_generated.c
|
||||
python3 scripts/serve_static.py > src/web/static_generated.c
|
||||
python3 scripts/index_static.py > src/index/static_generated.c
|
||||
python3 scripts/magic_static.py > src/magic_generated.c
|
||||
python3 scripts/mime.py > src/parsing/mime_generated.c
|
||||
python3 scripts/serve_static.py > src/web/static_generated.c
|
||||
python3 scripts/index_static.py > src/index/static_generated.c
|
||||
python3 scripts/magic_static.py > src/magic_generated.c
|
||||
|
||||
printf "static const char *const Sist2CommitHash = \"%s\";\n" $(git rev-parse HEAD) > src/git_hash.h
|
||||
printf "static const char *const Sist2CommitHash = \"%s\";\n" $(git rev-parse HEAD) > src/git_hash.h
|
||||
)
|
||||
@@ -4,14 +4,20 @@ VCPKG_ROOT="/vcpkg"
|
||||
|
||||
git submodule update --init --recursive
|
||||
|
||||
rm -rf CMakeFiles CMakeCache.txt
|
||||
cmake -DSIST_PLATFORM=x64_linux -DSIST_DEBUG=off -DBUILD_TESTS=off -DCMAKE_TOOLCHAIN_FILE="${VCPKG_ROOT}/scripts/buildsystems/vcpkg.cmake" .
|
||||
make -j $(nproc)
|
||||
strip sist2
|
||||
./sist2 -v > VERSION
|
||||
mv sist2 sist2-x64-linux
|
||||
mkdir build
|
||||
(
|
||||
cd build
|
||||
cmake -DSIST_PLATFORM=x64_linux -DSIST_DEBUG_INFO=on -DSIST_DEBUG=off -DBUILD_TESTS=off -DCMAKE_TOOLCHAIN_FILE="${VCPKG_ROOT}/scripts/buildsystems/vcpkg.cmake" ..
|
||||
make -j $(nproc)
|
||||
strip sist2
|
||||
./sist2 -v > VERSION
|
||||
)
|
||||
mv build/sist2 sist2-x64-linux
|
||||
|
||||
rm -rf CMakeFiles CMakeCache.txt
|
||||
cmake -DSIST_PLATFORM=x64_linux -DSIST_DEBUG=on -DBUILD_TESTS=off -DCMAKE_TOOLCHAIN_FILE="${VCPKG_ROOT}/scripts/buildsystems/vcpkg.cmake" .
|
||||
make -j $(nproc)
|
||||
mv sist2_debug sist2-x64-linux-debug
|
||||
(
|
||||
cd build
|
||||
rm -rf CMakeFiles CMakeCache.txt
|
||||
cmake -DSIST_PLATFORM=x64_linux -DSIST_DEBUG_INFO=on -DSIST_DEBUG=on -DBUILD_TESTS=off -DCMAKE_TOOLCHAIN_FILE="${VCPKG_ROOT}/scripts/buildsystems/vcpkg.cmake" ..
|
||||
make -j $(nproc)
|
||||
)
|
||||
mv build/sist2_debug sist2-x64-linux-debug
|
||||
@@ -4,14 +4,19 @@ VCPKG_ROOT="/vcpkg"
|
||||
|
||||
git submodule update --init --recursive
|
||||
|
||||
rm -rf CMakeFiles CMakeCache.txt
|
||||
cmake -DSIST_PLATFORM=arm64_linux -DSIST_DEBUG=off -DBUILD_TESTS=off -DCMAKE_TOOLCHAIN_FILE="${VCPKG_ROOT}/scripts/buildsystems/vcpkg.cmake" .
|
||||
make -j $(nproc)
|
||||
strip sist2
|
||||
mv sist2 sist2-arm64-linux
|
||||
mkdir build
|
||||
(
|
||||
cd build
|
||||
cmake -DSIST_PLATFORM=arm64_linux -DSIST_DEBUG_INFO=on -DSIST_DEBUG=off -DBUILD_TESTS=off -DCMAKE_TOOLCHAIN_FILE="${VCPKG_ROOT}/scripts/buildsystems/vcpkg.cmake" ..
|
||||
make -j $(nproc)
|
||||
strip sist2
|
||||
)
|
||||
mv build/sist2 sist2-arm64-linux
|
||||
|
||||
rm -rf CMakeFiles CMakeCache.txt
|
||||
cmake -DSIST_PLATFORM=arm64_linux -DSIST_DEBUG=on -DBUILD_TESTS=off -DCMAKE_TOOLCHAIN_FILE="${VCPKG_ROOT}/scripts/buildsystems/vcpkg.cmake" .
|
||||
make -j $(nproc)
|
||||
strip sist2
|
||||
mv sist2_debug sist2-arm64-linux-debug
|
||||
(
|
||||
cd build
|
||||
cmake -DSIST_PLATFORM=arm64_linux -DSIST_DEBUG_INFO=on -DSIST_DEBUG=on -DBUILD_TESTS=off -DCMAKE_TOOLCHAIN_FILE="${VCPKG_ROOT}/scripts/buildsystems/vcpkg.cmake" ..
|
||||
make -j $(nproc)
|
||||
)
|
||||
mv build/sist2_debug sist2-arm64-linux-debug
|
||||
@@ -1,3 +1,4 @@
|
||||
application/x-matlab-data,mat
|
||||
application/arj, arj
|
||||
application/base64, mme
|
||||
application/binhex, hqx
|
||||
@@ -29,7 +30,7 @@ application/mime, aps
|
||||
application/mspowerpoint, ppz
|
||||
application/msword, doc|dot|w6w|wiz|word
|
||||
application/netmc, mcp
|
||||
application/octet-stream, bin|dump|gpg
|
||||
application/octet-stream, bin|dump|gpg|pack|idx
|
||||
application/oda, oda
|
||||
application/ogg, ogv
|
||||
application/pdf, pdf
|
||||
@@ -243,7 +244,7 @@ audio/make, funk|my|pfunk
|
||||
audio/midi, kar
|
||||
audio/mid, rmi
|
||||
audio/mp4, m4b
|
||||
audio/mpeg, m2a|mpa
|
||||
audio/mpeg, m2a|mpa|mpga
|
||||
audio/ogg, ogg
|
||||
audio/s3m, s3m
|
||||
audio/tsp-audio, tsi
|
||||
@@ -346,6 +347,8 @@ text/mcf, mcf
|
||||
text/pascal, pas
|
||||
text/PGP,
|
||||
text/plain, com|cmd|conf|def|g|idc|list|lst|mar|sdml|text|txt|md|groovy|license|properties|desktop|ini|rst|cmake|ipynb|readme|less|lo|go|yml|d|cs|hpp|srt|nfo|sfv|m3u|csv|eml|make|log|markdown|yaml
|
||||
text/x-script.python, pyx
|
||||
text/csv,
|
||||
application/vnd.coffeescript, coffee
|
||||
text/richtext, rt|rtf|rtx
|
||||
text/rtf,
|
||||
@@ -382,7 +385,7 @@ text/x-pascal, p
|
||||
text/x-perl, pl
|
||||
text/x-php, php
|
||||
text/x-po, po
|
||||
text/x-python, py
|
||||
text/x-python, py|pyi
|
||||
text/x-ruby, rb
|
||||
text/x-sass, sass
|
||||
text/x-scss, scss
|
||||
|
||||
|
@@ -1,3 +1,5 @@
|
||||
import zlib
|
||||
|
||||
mimes = {}
|
||||
noparse = set()
|
||||
ext_in_hash = set()
|
||||
@@ -135,24 +137,40 @@ def clean(t):
|
||||
return t.replace("/", "_").replace(".", "_").replace("+", "_").replace("-", "_")
|
||||
|
||||
|
||||
def crc(s):
|
||||
return zlib.crc32(s.encode()) & 0xffffffff
|
||||
|
||||
|
||||
with open("scripts/mime.csv") as f:
|
||||
for l in f:
|
||||
mime, ext_list = l.split(",")
|
||||
if l.startswith("!"):
|
||||
mime = mime[1:]
|
||||
noparse.add(mime)
|
||||
ext = [x.strip() for x in ext_list.split("|")]
|
||||
ext = [x.strip() for x in ext_list.split("|") if x.strip() != ""]
|
||||
mimes[mime] = ext
|
||||
|
||||
seen_crc = set()
|
||||
for ext in mimes.values():
|
||||
for e in ext:
|
||||
if crc(e) in seen_crc:
|
||||
raise Exception("CRC32 collision")
|
||||
seen_crc.add(crc(e))
|
||||
|
||||
seen_crc = set()
|
||||
for mime in mimes.keys():
|
||||
if crc(mime) in seen_crc:
|
||||
raise Exception("CRC32 collision")
|
||||
seen_crc.add(crc(mime))
|
||||
|
||||
print("// **Generated by mime.py**")
|
||||
print("#ifndef MIME_GENERATED_C")
|
||||
print("#define MIME_GENERATED_C")
|
||||
print("#include <glib.h>\n")
|
||||
print("#include <stdlib.h>\n")
|
||||
# Enum
|
||||
print("enum mime {")
|
||||
for mime, ext in sorted(mimes.items()):
|
||||
print(" " + clean(mime) + "=" + mime_id(mime) + ",")
|
||||
print(f"{clean(mime)}={mime_id(mime)},")
|
||||
print("};")
|
||||
|
||||
# Enum -> string
|
||||
@@ -163,20 +181,20 @@ with open("scripts/mime.csv") as f:
|
||||
print("default: return NULL;}}")
|
||||
|
||||
# Ext -> Enum
|
||||
print("GHashTable *mime_get_ext_table() {"
|
||||
"GHashTable *ext_table = g_hash_table_new(g_str_hash, g_str_equal);")
|
||||
print("unsigned int mime_extension_lookup(unsigned long extension_crc32) {"
|
||||
"switch (extension_crc32) {")
|
||||
for mime, ext in mimes.items():
|
||||
for e in [e for e in ext if e]:
|
||||
print("g_hash_table_insert(ext_table, \"" + e + "\", (gpointer)" + clean(mime) + ");")
|
||||
if e in ext_in_hash:
|
||||
raise Exception("extension already in hash: " + e)
|
||||
ext_in_hash.add(e)
|
||||
print("return ext_table;}")
|
||||
if len(ext) > 0:
|
||||
for e in ext:
|
||||
print(f"case {crc(e)}:", end="")
|
||||
print(f"return {clean(mime)};")
|
||||
print("default: return 0;}}")
|
||||
|
||||
# string -> Enum
|
||||
print("GHashTable *mime_get_mime_table() {"
|
||||
"GHashTable *mime_table = g_hash_table_new(g_str_hash, g_str_equal);")
|
||||
for mime, ext in mimes.items():
|
||||
print("g_hash_table_insert(mime_table, \"" + mime + "\", (gpointer)" + clean(mime) + ");")
|
||||
print("return mime_table;}")
|
||||
print("unsigned int mime_name_lookup(unsigned long mime_crc32) {"
|
||||
"switch (mime_crc32) {")
|
||||
for mime in mimes.keys():
|
||||
print(f"case {crc(mime)}: return {clean(mime)};")
|
||||
|
||||
print("default: return 0;}}")
|
||||
print("#endif")
|
||||
|
||||
@@ -1,3 +1,3 @@
|
||||
docker run --rm -it --name "sist2-dev-es"\
|
||||
-p 9200:9200 -e "discovery.type=single-node" \
|
||||
-e "ES_JAVA_OPTS=-Xms8g -Xmx8g" elasticsearch:7.14.0
|
||||
-e "ES_JAVA_OPTS=-Xms8g -Xmx8g" elasticsearch:7.17.9
|
||||
|
||||
@@ -1,3 +1,3 @@
|
||||
docker run --rm -it --name "sist2-dev-es"\
|
||||
-p 9200:9200 -p 9300:9300 -e "discovery.type=single-node" \
|
||||
-e "ES_JAVA_OPTS=-Xms8g -Xmx8g" elasticsearch:8.1.2
|
||||
-e "ES_JAVA_OPTS=-Xms8g -Xmx8g" elasticsearch:8.7.0
|
||||
|
||||
1
sist2-admin/frontend/dist/css/app.css
vendored
1
sist2-admin/frontend/dist/css/app.css
vendored
@@ -1 +0,0 @@
|
||||
.navbar[data-v-27bc1d68]{box-shadow:0 .125rem .25rem rgba(0,0,0,.08)!important;border-radius:0}.theme-black .navbar[data-v-27bc1d68]{background:rgba(84,107,122,.18823529411764706);border-bottom:none}.navbar-brand[data-v-27bc1d68]{color:#222!important;font-size:1.75rem;padding:0}.navbar-brand[data-v-27bc1d68]:hover{color:#000!important}.version[data-v-27bc1d68]{color:#222!important;margin-left:-18px;margin-top:-14px;font-size:11px;font-family:monospace}.btn-link[data-v-27bc1d68]{color:#222}body,html{height:100%}#app{-webkit-font-smoothing:antialiased;-moz-osx-font-smoothing:grayscale;color:#2c3e50;padding-bottom:1em;min-height:100%}.info-icon{width:1rem;margin-right:.2rem;cursor:pointer;line-height:1rem;height:1rem;background-image:url();filter:brightness(45%);display:block}.tabs{margin-top:10px}.modal-title{text-overflow:ellipsis;overflow:hidden;white-space:nowrap}@media screen and (min-width:1500px){.container{max-width:1440px}}label{margin-top:.5rem;margin-bottom:0}.shrink[data-v-9b017c42]{flex-grow:inherit}#task-history[data-v-46960281]{font-family:monospace;font-size:12px}#log-tail-output span{display:block}span.DEBUG{color:#9e9e9e}span.WARNING{color:#ffb300}span.INFO{color:#039be5}span.ERROR,span.FATAL{color:#f4511e}span.ADMIN{color:#ee05ff}#log-tail-output{font-size:13px;font-family:monospace;padding:6px;background-color:#f5f5f5;border:1px solid #ccc;border-radius:4px;margin:3px;white-space:pre;color:#000;overflow:hidden}
|
||||
File diff suppressed because one or more lines are too long
BIN
sist2-admin/frontend/dist/favicon.ico
vendored
BIN
sist2-admin/frontend/dist/favicon.ico
vendored
Binary file not shown.
|
Before Width: | Height: | Size: 15 KiB |
1
sist2-admin/frontend/dist/index.html
vendored
1
sist2-admin/frontend/dist/index.html
vendored
@@ -1 +0,0 @@
|
||||
<!DOCTYPE html><html lang=""><head><meta charset="utf-8"><meta http-equiv="X-UA-Compatible" content="IE=edge"><meta name="viewport" content="width=device-width,initial-scale=1"><link rel="icon" href="favicon.ico"><title>sist2-admin</title><link href="css/app.css" rel="preload" as="style"><link href="css/chunk-vendors.css" rel="preload" as="style"><link href="js/app.js" rel="preload" as="script"><link href="js/chunk-vendors.js" rel="preload" as="script"><link href="css/chunk-vendors.css" rel="stylesheet"><link href="css/app.css" rel="stylesheet"></head><body><noscript><strong>We're sorry but sist2-admin-vue doesn't work properly without JavaScript enabled. Please enable it to continue.</strong></noscript><div id="app"></div><script src="js/chunk-vendors.js"></script><script src="js/app.js"></script></body></html>
|
||||
1
sist2-admin/frontend/dist/js/app.js
vendored
1
sist2-admin/frontend/dist/js/app.js
vendored
File diff suppressed because one or more lines are too long
344
sist2-admin/frontend/dist/js/chunk-vendors.js
vendored
344
sist2-admin/frontend/dist/js/chunk-vendors.js
vendored
File diff suppressed because one or more lines are too long
788
sist2-admin/frontend/package-lock.json
generated
788
sist2-admin/frontend/package-lock.json
generated
File diff suppressed because it is too large
Load Diff
@@ -20,14 +20,11 @@
|
||||
},
|
||||
"devDependencies": {
|
||||
"@vue/cli-plugin-babel": "~5.0.8",
|
||||
"@vue/cli-plugin-eslint": "~5.0.8",
|
||||
"@vue/cli-plugin-router": "~5.0.8",
|
||||
"@vue/cli-plugin-vuex": "~5.0.8",
|
||||
"@vue/cli-service": "~5.0.8",
|
||||
"babel-eslint": "^10.1.0",
|
||||
"bootstrap": "^4.5.2",
|
||||
"eslint": "^6.7.2",
|
||||
"eslint-plugin-vue": "^6.2.2",
|
||||
"vue-template-compiler": "^2.6.11"
|
||||
},
|
||||
"eslintConfig": {
|
||||
|
||||
@@ -4,7 +4,7 @@
|
||||
<meta charset="utf-8">
|
||||
<meta http-equiv="X-UA-Compatible" content="IE=edge">
|
||||
<meta name="viewport" content="width=device-width,initial-scale=1.0">
|
||||
<link rel="icon" href="<%= BASE_URL %>favicon.ico">
|
||||
<link rel="icon" href="<%= BASE_URL %>serve_favicon_ico.ico">
|
||||
<title>sist2-admin</title>
|
||||
</head>
|
||||
<body>
|
||||
|
||||
@@ -28,16 +28,22 @@ export default {
|
||||
return this.$store.state.jobDesktopNotificationMap[this.job.name];
|
||||
}
|
||||
},
|
||||
methods: {
|
||||
mounted() {
|
||||
this.cronValid = this.checkCron(this.job.cron_expression)
|
||||
},
|
||||
methods: {
|
||||
checkCron(expression) {
|
||||
return /((((\d+,)+\d+|(\d+([/-])\d+)|\d+|\*) ?){5,7})/.test(expression);
|
||||
},
|
||||
updateNotifications(value) {
|
||||
this.$store.dispatch("setJobDesktopNotification", {
|
||||
job: this.job.name,
|
||||
enabled: value
|
||||
})
|
||||
});
|
||||
},
|
||||
update() {
|
||||
if (this.job.schedule_enabled) {
|
||||
this.cronValid = /((((\d+,)+\d+|(\d+([/-])\d+)|\d+|\*) ?){5,7})/.test(this.job.cron_expression);
|
||||
this.cronValid = this.checkCron(this.job.cron_expression);
|
||||
} else {
|
||||
this.cronValid = undefined;
|
||||
}
|
||||
|
||||
@@ -6,9 +6,6 @@
|
||||
<label>{{ $t("scanOptions.threads") }}</label>
|
||||
<b-form-input type="number" min="1" v-model="options.threads" @change="update()"></b-form-input>
|
||||
|
||||
<label>{{ $t("scanOptions.memThrottle") }}</label>
|
||||
<b-form-input type="number" min="0" v-model="options.mem_throttle" @change="update()"></b-form-input>
|
||||
|
||||
<label>{{ $t("scanOptions.thumbnailQuality") }}</label>
|
||||
<b-form-input type="number" min="1" max="31" v-model="options.thumbnail_quality" @change="update()"></b-form-input>
|
||||
|
||||
@@ -70,8 +67,9 @@
|
||||
{{ $t("scanOptions.readSubtitles") }}
|
||||
</b-form-checkbox>
|
||||
|
||||
<label>{{ $t("scanOptions.memBuffer") }}</label>
|
||||
<b-form-input type="number" min="0" v-model="options.mem_buffer" @change="update()"></b-form-input>
|
||||
<b-form-checkbox v-model="options.optimize_index" @change="update()">
|
||||
{{ $t("scanOptions.optimizeIndex") }}
|
||||
</b-form-checkbox>
|
||||
|
||||
<label>{{ $t("scanOptions.treemapThreshold") }}</label>
|
||||
<b-form-input type="number" min="0" v-model="options.treemap_threshold" @change="update()"></b-form-input>
|
||||
|
||||
@@ -56,13 +56,17 @@ export default {
|
||||
tagline: "Tagline in navbar",
|
||||
auth: "Basic auth in user:password format",
|
||||
tagAuth: "Basic auth in user:password format for tagging",
|
||||
auth0Audience: "Auth0 audience",
|
||||
auth0Domain: "Auth0 domain",
|
||||
auth0ClientId: "Auth0 client ID",
|
||||
auth0PublicKey: "Auth0 public key",
|
||||
},
|
||||
scanOptions: {
|
||||
title: "Scanning options",
|
||||
path: "Path",
|
||||
threads: "Number of threads",
|
||||
memThrottle: "Total memory threshold in MiB for scan throttling",
|
||||
thumbnailQuality: "Thumbnail quality, on a scale of 1.0 to 31.0, 1.0 being the best",
|
||||
thumbnailQuality: "Thumbnail quality, on a scale of 2 to 32, 2 being the best",
|
||||
thumbnailCount: "Number of thumbnails to generate. Set a value > 1 to create video previews, set to 0 to disable thumbnails.",
|
||||
thumbnailSize: "Thumbnail size, in pixels",
|
||||
contentSize: "Number of bytes to be extracted from text documents. Set to 0 to disable",
|
||||
@@ -80,7 +84,8 @@ export default {
|
||||
checksums: "Calculate file checksums when scanning",
|
||||
readSubtitles: "Read subtitles from media files",
|
||||
memBuffer: "Maximum memory buffer size per thread in MiB for files inside archives",
|
||||
treemapThreshold: "Relative size threshold for treemap"
|
||||
treemapThreshold: "Relative size threshold for treemap",
|
||||
optimizeIndex: "Defragment index file after scan to reduce its file size."
|
||||
},
|
||||
indexOptions: {
|
||||
title: "Indexing options",
|
||||
|
||||
@@ -40,6 +40,39 @@ import TaskListItem from "@/components/TaskListItem";
|
||||
import Sist2AdminApi from "@/Sist2AdminApi";
|
||||
import moment from "moment";
|
||||
|
||||
const DAY = 3600 * 24;
|
||||
const HOUR = 3600;
|
||||
const MINUTE = 60;
|
||||
|
||||
function humanDuration(sec_num) {
|
||||
sec_num = sec_num / 1000;
|
||||
const days = Math.floor(sec_num / DAY);
|
||||
sec_num -= days * DAY;
|
||||
const hours = Math.floor(sec_num / HOUR);
|
||||
sec_num -= hours * HOUR;
|
||||
const minutes = Math.floor(sec_num / MINUTE);
|
||||
sec_num -= minutes * MINUTE;
|
||||
const seconds = Math.floor(sec_num);
|
||||
|
||||
if (days > 0) {
|
||||
return `${days} days ${hours}h ${minutes}m ${seconds}s`;
|
||||
}
|
||||
|
||||
if (hours > 0) {
|
||||
return `${hours}h ${minutes}m ${seconds}s`;
|
||||
}
|
||||
|
||||
if (minutes > 0) {
|
||||
return `${minutes}m ${seconds}s`;
|
||||
}
|
||||
|
||||
if (seconds > 0) {
|
||||
return `${seconds}s`;
|
||||
}
|
||||
|
||||
return "<0s";
|
||||
}
|
||||
|
||||
export default {
|
||||
name: 'Tasks',
|
||||
components: {TaskListItem},
|
||||
@@ -100,17 +133,10 @@ export default {
|
||||
})
|
||||
},
|
||||
taskDuration(task) {
|
||||
const start = moment(task.started);
|
||||
const end = moment(task.ended);
|
||||
const start = moment.utc(task.started);
|
||||
const end = moment.utc(task.ended);
|
||||
|
||||
let duration = moment.utc(end.diff(start)).format("HH[h] mm[m] ss[s]");
|
||||
|
||||
duration = duration.replace("00h ", "");
|
||||
duration = duration.replace(/^00m /, "");
|
||||
duration = duration.replace(/00s/, "<1s");
|
||||
duration = duration.replace(/^0/, "");
|
||||
|
||||
return duration;
|
||||
return humanDuration(end.diff(start))
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
File diff suppressed because it is too large
Load Diff
@@ -21,13 +21,11 @@ from config import LOG_FOLDER, logger, WEBSERVER_PORT, DATA_FOLDER, SIST2_BINARY
|
||||
from jobs import Sist2Job, Sist2ScanTask, TaskQueue, Sist2IndexTask, JobStatus
|
||||
from notifications import Subscribe, Notifications
|
||||
from sist2 import Sist2
|
||||
from state import PickleTable, RUNNING_FRONTENDS, TESSERACT_LANGS, DB_SCHEMA_VERSION
|
||||
from state import migrate_v1_to_v2, RUNNING_FRONTENDS, TESSERACT_LANGS, DB_SCHEMA_VERSION
|
||||
from web import Sist2Frontend
|
||||
|
||||
VERSION = "1.0"
|
||||
|
||||
sist2 = Sist2(SIST2_BINARY, DATA_FOLDER)
|
||||
db = PersistentState(table_factory=PickleTable, dbfile=os.path.join(DATA_FOLDER, "state.db"))
|
||||
db = PersistentState(dbfile=os.path.join(DATA_FOLDER, "state.db"))
|
||||
notifications = Notifications()
|
||||
task_queue = TaskQueue(sist2, db, notifications)
|
||||
|
||||
@@ -52,7 +50,6 @@ async def home():
|
||||
@app.get("/api")
|
||||
async def api():
|
||||
return {
|
||||
"version": VERSION,
|
||||
"tesseract_langs": TESSERACT_LANGS,
|
||||
"logs_folder": LOG_FOLDER
|
||||
}
|
||||
@@ -60,18 +57,17 @@ async def api():
|
||||
|
||||
@app.get("/api/job/{name:str}")
|
||||
async def get_job(name: str):
|
||||
row = db["jobs"][name]
|
||||
if row:
|
||||
return row["job"]
|
||||
raise HTTPException(status_code=404)
|
||||
job = db["jobs"][name]
|
||||
if not job:
|
||||
raise HTTPException(status_code=404)
|
||||
return job
|
||||
|
||||
|
||||
@app.get("/api/frontend/{name:str}")
|
||||
async def get_frontend(name: str):
|
||||
row = db["frontends"][name]
|
||||
if row:
|
||||
frontend = row["frontend"]
|
||||
frontend: Sist2Frontend
|
||||
frontend = db["frontends"][name]
|
||||
frontend: Sist2Frontend
|
||||
if frontend:
|
||||
frontend.running = frontend.name in RUNNING_FRONTENDS
|
||||
return frontend
|
||||
raise HTTPException(status_code=404)
|
||||
@@ -79,16 +75,16 @@ async def get_frontend(name: str):
|
||||
|
||||
@app.get("/api/job/")
|
||||
async def get_jobs():
|
||||
return [row["job"] for row in db["jobs"]]
|
||||
return list(db["jobs"])
|
||||
|
||||
|
||||
@app.put("/api/job/{name:str}")
|
||||
async def update_job(name: str, job: Sist2Job):
|
||||
async def update_job(name: str, new_job: Sist2Job):
|
||||
# TODO: Check etag
|
||||
|
||||
job.last_modified = datetime.now()
|
||||
row = db["jobs"][name]
|
||||
if not row:
|
||||
new_job.last_modified = datetime.now()
|
||||
job = db["jobs"][name]
|
||||
if not job:
|
||||
raise HTTPException(status_code=404)
|
||||
|
||||
args_that_trigger_full_scan = [
|
||||
@@ -108,15 +104,15 @@ async def update_job(name: str, job: Sist2Job):
|
||||
"read_subtitles",
|
||||
]
|
||||
for arg in args_that_trigger_full_scan:
|
||||
if getattr(row["job"].scan_options, arg) != getattr(job.scan_options, arg):
|
||||
job.do_full_scan = True
|
||||
if getattr(new_job.scan_options, arg) != getattr(job.scan_options, arg):
|
||||
new_job.do_full_scan = True
|
||||
|
||||
db["jobs"][name] = {"job": job}
|
||||
db["jobs"][name] = new_job
|
||||
|
||||
|
||||
@app.put("/api/frontend/{name:str}")
|
||||
async def update_frontend(name: str, frontend: Sist2Frontend):
|
||||
db["frontends"][name] = {"frontend": frontend}
|
||||
db["frontends"][name] = frontend
|
||||
|
||||
# TODO: Check etag
|
||||
|
||||
@@ -142,7 +138,7 @@ def _run_job(job: Sist2Job):
|
||||
job.last_modified = datetime.now()
|
||||
if job.status == JobStatus("created"):
|
||||
job.status = JobStatus("started")
|
||||
db["jobs"][job.name] = {"job": job}
|
||||
db["jobs"][job.name] = job
|
||||
|
||||
scan_task = Sist2ScanTask(job, f"Scan [{job.name}]")
|
||||
index_task = Sist2IndexTask(job, f"Index [{job.name}]", depends_on=scan_task)
|
||||
@@ -153,19 +149,19 @@ def _run_job(job: Sist2Job):
|
||||
|
||||
@app.get("/api/job/{name:str}/run")
|
||||
async def run_job(name: str):
|
||||
row = db["jobs"][name]
|
||||
if not row:
|
||||
job = db["jobs"][name]
|
||||
if not job:
|
||||
raise HTTPException(status_code=404)
|
||||
|
||||
_run_job(row["job"])
|
||||
_run_job(job)
|
||||
|
||||
return "ok"
|
||||
|
||||
|
||||
@app.delete("/api/job/{name:str}")
|
||||
async def delete_job(name: str):
|
||||
row = db["jobs"][name]
|
||||
if row:
|
||||
job = db["jobs"][name]
|
||||
if job:
|
||||
del db["jobs"][name]
|
||||
else:
|
||||
raise HTTPException(status_code=404)
|
||||
@@ -177,8 +173,8 @@ async def delete_frontend(name: str):
|
||||
os.kill(RUNNING_FRONTENDS[name], signal.SIGTERM)
|
||||
del RUNNING_FRONTENDS[name]
|
||||
|
||||
row = db["frontends"][name]
|
||||
if row:
|
||||
frontend = db["frontends"][name]
|
||||
if frontend:
|
||||
del db["frontends"][name]
|
||||
else:
|
||||
raise HTTPException(status_code=404)
|
||||
@@ -190,18 +186,18 @@ async def create_job(name: str):
|
||||
raise ValueError("Job with the same name already exists")
|
||||
|
||||
job = Sist2Job.create_default(name)
|
||||
db["jobs"][name] = {"job": job}
|
||||
db["jobs"][name] = job
|
||||
|
||||
return job
|
||||
|
||||
|
||||
@app.post("/api/frontend/{name:str}")
|
||||
async def create_frontend(name: str):
|
||||
if db["frontend"][name]:
|
||||
if db["frontends"][name]:
|
||||
raise ValueError("Frontend with the same name already exists")
|
||||
|
||||
frontend = Sist2Frontend.create_default(name)
|
||||
db["frontends"][name] = {"frontend": frontend}
|
||||
db["frontends"][name] = frontend
|
||||
|
||||
return frontend
|
||||
|
||||
@@ -255,7 +251,7 @@ def check_es_version(es_url: str, insecure: bool):
|
||||
|
||||
|
||||
def start_frontend_(frontend: Sist2Frontend):
|
||||
frontend.web_options.indices = list(map(lambda j: db["jobs"][j]["job"].last_index, frontend.jobs))
|
||||
frontend.web_options.indices = list(map(lambda j: db["jobs"][j].index_path, frontend.jobs))
|
||||
|
||||
pid = sist2.web(frontend.web_options, frontend.name)
|
||||
RUNNING_FRONTENDS[frontend.name] = pid
|
||||
@@ -263,11 +259,11 @@ def start_frontend_(frontend: Sist2Frontend):
|
||||
|
||||
@app.post("/api/frontend/{name:str}/start")
|
||||
async def start_frontend(name: str):
|
||||
row = db["frontends"][name]
|
||||
if not row:
|
||||
frontend = db["frontends"][name]
|
||||
if not frontend:
|
||||
raise HTTPException(status_code=404)
|
||||
|
||||
start_frontend_(row["frontend"])
|
||||
start_frontend_(frontend)
|
||||
|
||||
|
||||
@app.post("/api/frontend/{name:str}/stop")
|
||||
@@ -280,8 +276,7 @@ async def stop_frontend(name: str):
|
||||
@app.get("/api/frontend/")
|
||||
async def get_frontends():
|
||||
res = []
|
||||
for row in db["frontends"]:
|
||||
frontend = row["frontend"]
|
||||
for frontend in db["frontends"]:
|
||||
frontend: Sist2Frontend
|
||||
frontend.running = frontend.name in RUNNING_FRONTENDS
|
||||
res.append(frontend)
|
||||
@@ -364,14 +359,14 @@ def initialize_db():
|
||||
db["sist2_admin"]["info"] = {"version": DB_SCHEMA_VERSION}
|
||||
|
||||
frontend = Sist2Frontend.create_default("default")
|
||||
db["frontends"]["default"] = {"frontend": frontend}
|
||||
db["frontends"]["default"] = frontend
|
||||
|
||||
logger.info("Initialized database.")
|
||||
|
||||
|
||||
def start_frontends():
|
||||
for row in db["frontends"]:
|
||||
frontend: Sist2Frontend = row["frontend"]
|
||||
for frontend in db["frontends"]:
|
||||
frontend: Sist2Frontend
|
||||
if frontend.auto_start and len(frontend.jobs) > 0:
|
||||
start_frontend_(frontend)
|
||||
|
||||
@@ -380,8 +375,11 @@ if __name__ == '__main__':
|
||||
|
||||
if not db["sist2_admin"]["info"]:
|
||||
initialize_db()
|
||||
elif db["sist2_admin"]["info"]["version"] != DB_SCHEMA_VERSION:
|
||||
print("Database has incompatible schema version! Delete state.db to continue.")
|
||||
if db["sist2_admin"]["info"]["version"] == "1":
|
||||
logger.info("Migrating to v2 database schema")
|
||||
migrate_v1_to_v2(db)
|
||||
if db["sist2_admin"]["info"]["version"] == "2":
|
||||
logger.error("Cannot migrate database from v2 to v3. Delete state.db to proceed.")
|
||||
exit(-1)
|
||||
|
||||
start_frontends()
|
||||
|
||||
@@ -10,7 +10,7 @@ from jobs import Sist2Job
|
||||
|
||||
|
||||
def _check_schedule(db: PersistentState, run_job):
|
||||
for job in (row["job"] for row in db["jobs"]):
|
||||
for job in db["jobs"]:
|
||||
job: Sist2Job
|
||||
|
||||
if job.schedule_enabled:
|
||||
|
||||
@@ -1,23 +1,21 @@
|
||||
import json
|
||||
import logging
|
||||
import os.path
|
||||
import shutil
|
||||
import signal
|
||||
import uuid
|
||||
from datetime import datetime
|
||||
from enum import Enum
|
||||
from hashlib import md5
|
||||
from logging import FileHandler
|
||||
from threading import Lock, Thread
|
||||
from time import sleep
|
||||
from uuid import uuid4, UUID
|
||||
|
||||
from hexlib.db import PersistentState
|
||||
from pydantic import BaseModel, validator
|
||||
from pydantic import BaseModel
|
||||
|
||||
from config import logger, LOG_FOLDER
|
||||
from notifications import Notifications
|
||||
from sist2 import ScanOptions, IndexOptions, Sist2, Sist2Index
|
||||
from sist2 import ScanOptions, IndexOptions, Sist2
|
||||
from state import RUNNING_FRONTENDS
|
||||
from web import Sist2Frontend
|
||||
|
||||
@@ -38,7 +36,8 @@ class Sist2Job(BaseModel):
|
||||
schedule_enabled: bool = False
|
||||
|
||||
previous_index: str = None
|
||||
last_index: str = None
|
||||
index_path: str = None
|
||||
previous_index_path: str = None
|
||||
last_index_date: datetime = None
|
||||
status: JobStatus = JobStatus("created")
|
||||
last_modified: datetime
|
||||
@@ -58,10 +57,10 @@ class Sist2Job(BaseModel):
|
||||
cron_expression="0 0 * * *"
|
||||
)
|
||||
|
||||
@validator("etag", always=True)
|
||||
def validate_etag(cls, value, values):
|
||||
s = values["name"] + values["scan_options"].json() + values["index_options"].json() + values["cron_expression"]
|
||||
return md5(s.encode()).hexdigest()
|
||||
# @validator("etag", always=True)
|
||||
# def validate_etag(cls, value, values):
|
||||
# s = values["name"] + values["scan_options"].json() + values["index_options"].json() + values["cron_expression"]
|
||||
# return md5(s.encode()).hexdigest()
|
||||
|
||||
|
||||
class Sist2TaskProgress:
|
||||
@@ -124,10 +123,10 @@ class Sist2ScanTask(Sist2Task):
|
||||
|
||||
self.job.scan_options.name = self.job.name
|
||||
|
||||
if self.job.last_index and os.path.exists(self.job.last_index) and not self.job.do_full_scan:
|
||||
self.job.scan_options.incremental = self.job.last_index
|
||||
if self.job.index_path is not None and not self.job.do_full_scan:
|
||||
self.job.scan_options.output = self.job.index_path
|
||||
else:
|
||||
self.job.scan_options.incremental = None
|
||||
self.job.scan_options.output = None
|
||||
|
||||
def set_pid(pid):
|
||||
self.pid = pid
|
||||
@@ -139,19 +138,26 @@ class Sist2ScanTask(Sist2Task):
|
||||
self._logger.error(json.dumps({"sist2-admin": f"Process returned non-zero exit code ({return_code})"}))
|
||||
logger.info(f"Task {self.display_name} failed ({return_code})")
|
||||
else:
|
||||
index = Sist2Index(self.job.scan_options.output)
|
||||
|
||||
# Save latest index
|
||||
self.job.previous_index = self.job.last_index
|
||||
|
||||
self.job.last_index = index.path
|
||||
self.job.index_path = self.job.scan_options.output
|
||||
self.job.last_index_date = datetime.now()
|
||||
self.job.do_full_scan = False
|
||||
db["jobs"][self.job.name] = {"job": self.job}
|
||||
self._logger.info(json.dumps({"sist2-admin": f"Save last_index={self.job.last_index}"}))
|
||||
db["jobs"][self.job.name] = self.job
|
||||
self._logger.info(json.dumps({"sist2-admin": f"Save last_index_date={self.job.last_index_date}"}))
|
||||
|
||||
logger.info(f"Completed {self.display_name} ({return_code=})")
|
||||
|
||||
# Remove old index
|
||||
if return_code == 0:
|
||||
if self.job.previous_index_path is not None and self.job.previous_index_path != self.job.index_path:
|
||||
self._logger.info(json.dumps({"sist2-admin": f"Remove {self.job.previous_index_path=}"}))
|
||||
try:
|
||||
os.remove(self.job.previous_index_path)
|
||||
except FileNotFoundError:
|
||||
pass
|
||||
|
||||
self.job.previous_index_path = self.job.index_path
|
||||
db["jobs"][self.job.name] = self.job
|
||||
|
||||
return return_code
|
||||
|
||||
|
||||
@@ -173,19 +179,12 @@ class Sist2IndexTask(Sist2Task):
|
||||
ok = return_code == 0
|
||||
|
||||
if ok:
|
||||
# Remove old index
|
||||
if self.job.previous_index is not None:
|
||||
self._logger.info(json.dumps({"sist2-admin": f"Remove {self.job.previous_index=}"}))
|
||||
try:
|
||||
shutil.rmtree(self.job.previous_index)
|
||||
except FileNotFoundError:
|
||||
pass
|
||||
|
||||
self.restart_running_frontends(db, sist2)
|
||||
|
||||
# Update status
|
||||
self.job.status = JobStatus("indexed") if ok else JobStatus("failed")
|
||||
db["jobs"][self.job.name] = {"job": self.job}
|
||||
self.job.previous_index_path = self.job.index_path
|
||||
db["jobs"][self.job.name] = self.job
|
||||
|
||||
self._logger.info(json.dumps({"sist2-admin": f"Sist2Scan task finished {return_code=}, {duration=}"}))
|
||||
|
||||
@@ -195,16 +194,19 @@ class Sist2IndexTask(Sist2Task):
|
||||
|
||||
def restart_running_frontends(self, db: PersistentState, sist2: Sist2):
|
||||
for frontend_name, pid in RUNNING_FRONTENDS.items():
|
||||
frontend = db["frontends"][frontend_name]["frontend"]
|
||||
frontend = db["frontends"][frontend_name]
|
||||
frontend: Sist2Frontend
|
||||
|
||||
os.kill(pid, signal.SIGTERM)
|
||||
try:
|
||||
os.kill(pid, signal.SIGTERM)
|
||||
except ProcessLookupError:
|
||||
pass
|
||||
try:
|
||||
os.wait()
|
||||
except ChildProcessError:
|
||||
pass
|
||||
|
||||
frontend.web_options.indices = map(lambda j: db["jobs"][j]["job"].last_index, frontend.jobs)
|
||||
frontend.web_options.indices = map(lambda j: db["jobs"][j].index_path, frontend.jobs)
|
||||
|
||||
pid = sist2.web(frontend.web_options, frontend.name)
|
||||
RUNNING_FRONTENDS[frontend_name] = pid
|
||||
|
||||
@@ -2,7 +2,6 @@ import datetime
|
||||
import json
|
||||
import logging
|
||||
import os.path
|
||||
import traceback
|
||||
from datetime import datetime
|
||||
from io import TextIOWrapper
|
||||
from logging import FileHandler
|
||||
@@ -63,7 +62,7 @@ class WebOptions(BaseModel):
|
||||
if self.auth:
|
||||
args.append(f"--auth={self.auth}")
|
||||
if self.tag_auth:
|
||||
args.append(f"--tag_auth={self.tag_auth}")
|
||||
args.append(f"--tag-auth={self.tag_auth}")
|
||||
if self.dev:
|
||||
args.append(f"--dev")
|
||||
|
||||
@@ -78,10 +77,10 @@ class IndexOptions(BaseModel):
|
||||
es_url: str = "http://elasticsearch:9200"
|
||||
es_insecure_ssl: bool = False
|
||||
es_index: str = "sist2"
|
||||
incremental_index: bool = False
|
||||
incremental_index: bool = True
|
||||
script: str = ""
|
||||
script_file: str = None
|
||||
batch_size: int = 100
|
||||
batch_size: int = 70
|
||||
|
||||
def __init__(self, **kwargs):
|
||||
super().__init__(**kwargs)
|
||||
@@ -110,15 +109,14 @@ ARCHIVE_RECURSE = "recurse"
|
||||
class ScanOptions(BaseModel):
|
||||
path: str
|
||||
threads: int = 1
|
||||
mem_throttle: int = 0
|
||||
thumbnail_quality: float = 1.0
|
||||
thumbnail_size: int = 500
|
||||
thumbnail_quality: int = 2
|
||||
thumbnail_size: int = 552
|
||||
thumbnail_count: int = 1
|
||||
content_size: int = 32768
|
||||
depth: int = -1
|
||||
archive: str = ARCHIVE_RECURSE
|
||||
archive_passphrase: str = None
|
||||
ocr_lang: bool = None
|
||||
ocr_lang: str = None
|
||||
ocr_images: bool = False
|
||||
ocr_ebooks: bool = False
|
||||
exclude: str = None
|
||||
@@ -128,7 +126,8 @@ class ScanOptions(BaseModel):
|
||||
read_subtitles: bool = False
|
||||
fast_epub: bool = False
|
||||
checksums: bool = False
|
||||
incremental: str = None
|
||||
incremental: bool = True
|
||||
optimize_index: bool = False
|
||||
output: str = None
|
||||
name: str = None
|
||||
rewrite_url: str = None
|
||||
@@ -138,13 +137,15 @@ class ScanOptions(BaseModel):
|
||||
super().__init__(**kwargs)
|
||||
|
||||
def args(self):
|
||||
args = ["scan", self.path, f"--threads={self.threads}", f"--mem-throttle={self.mem_throttle}",
|
||||
f"--thumbnail-quality={self.thumbnail_quality}", f"--thumbnail-count={self.thumbnail_count}",
|
||||
args = ["scan", self.path, f"--threads={self.threads}", f"--thumbnail-quality={self.thumbnail_quality}",
|
||||
f"--thumbnail-count={self.thumbnail_count}", f"--thumbnail-size={self.thumbnail_size}",
|
||||
f"--content-size={self.content_size}", f"--output={self.output}", f"--depth={self.depth}",
|
||||
f"--archive={self.archive}", f"--mem-buffer={self.mem_buffer}"]
|
||||
|
||||
if self.incremental:
|
||||
args.append(f"--incremental={self.incremental}")
|
||||
args.append(f"--incremental")
|
||||
if self.optimize_index:
|
||||
args.append(f"--optimize-index")
|
||||
if self.rewrite_url:
|
||||
args.append(f"--rewrite-url={self.rewrite_url}")
|
||||
if self.name:
|
||||
@@ -234,11 +235,11 @@ class Sist2:
|
||||
|
||||
def scan(self, options: ScanOptions, logs_cb, set_pid_cb):
|
||||
|
||||
output_dir = os.path.join(
|
||||
self._data_dir,
|
||||
f"scan-{datetime.now()}.sist2"
|
||||
)
|
||||
options.output = output_dir
|
||||
if options.output is None:
|
||||
options.output = os.path.join(
|
||||
self._data_dir,
|
||||
f"scan-{options.name.replace('/', '_')}-{datetime.now()}.sist2"
|
||||
)
|
||||
|
||||
args = [
|
||||
self._bin_path,
|
||||
@@ -277,23 +278,17 @@ class Sist2:
|
||||
@staticmethod
|
||||
def _consume_logs_stdout(logs_cb, proc):
|
||||
pipe_wrapper = TextIOWrapper(proc.stdout, encoding="utf8", errors="ignore")
|
||||
try:
|
||||
for line in pipe_wrapper:
|
||||
for line in pipe_wrapper:
|
||||
try:
|
||||
if line.strip() == "":
|
||||
continue
|
||||
log_object = json.loads(line)
|
||||
logs_cb(log_object)
|
||||
except Exception as e:
|
||||
proc.kill()
|
||||
try:
|
||||
print(line)
|
||||
except NameError:
|
||||
pass
|
||||
print(traceback.format_exc())
|
||||
finally:
|
||||
pass
|
||||
# proc.wait()
|
||||
# pipe_wrapper.close()
|
||||
except Exception as e:
|
||||
try:
|
||||
logs_cb({"sist2-admin": f"Could not decode log line: {line}; {e}"})
|
||||
except NameError:
|
||||
pass
|
||||
|
||||
def web(self, options: WebOptions, name: str):
|
||||
|
||||
|
||||
@@ -1,6 +1,7 @@
|
||||
from typing import Dict
|
||||
import shutil
|
||||
|
||||
from hexlib.db import Table
|
||||
from hexlib.db import Table, PersistentState
|
||||
import pickle
|
||||
|
||||
from tesseract import get_tesseract_langs
|
||||
@@ -9,7 +10,7 @@ RUNNING_FRONTENDS: Dict[str, int] = {}
|
||||
|
||||
TESSERACT_LANGS = get_tesseract_langs()
|
||||
|
||||
DB_SCHEMA_VERSION = "1"
|
||||
DB_SCHEMA_VERSION = "3"
|
||||
|
||||
from pydantic import BaseModel
|
||||
|
||||
@@ -48,3 +49,31 @@ class PickleTable(Table):
|
||||
for row in super().sql(where_clause, *params):
|
||||
yield dict((k, _deserialize(v)) for k, v in row.items())
|
||||
|
||||
|
||||
def migrate_v1_to_v2(db: PersistentState):
|
||||
|
||||
shutil.copy(db.dbfile, db.dbfile + "-before-migrate-v2.bak")
|
||||
|
||||
# Frontends
|
||||
db._table_factory = PickleTable
|
||||
frontends = [row["frontend"] for row in db["frontends"]]
|
||||
del db["frontends"]
|
||||
|
||||
db._table_factory = Table
|
||||
for frontend in frontends:
|
||||
db["frontends"][frontend.name] = frontend
|
||||
list(db["frontends"])
|
||||
|
||||
# Jobs
|
||||
db._table_factory = PickleTable
|
||||
jobs = [row["job"] for row in db["jobs"]]
|
||||
del db["jobs"]
|
||||
|
||||
db._table_factory = Table
|
||||
for job in jobs:
|
||||
db["jobs"][job.name] = job
|
||||
list(db["jobs"])
|
||||
|
||||
db["sist2_admin"]["info"] = {
|
||||
"version": "2"
|
||||
}
|
||||
|
||||
8
sist2-vue/dist/css/chunk-vendors.css
vendored
8
sist2-vue/dist/css/chunk-vendors.css
vendored
File diff suppressed because one or more lines are too long
1
sist2-vue/dist/css/index.css
vendored
1
sist2-vue/dist/css/index.css
vendored
File diff suppressed because one or more lines are too long
3
sist2-vue/dist/index.html
vendored
3
sist2-vue/dist/index.html
vendored
@@ -1,3 +0,0 @@
|
||||
<!doctype html><html lang="en"><head><meta charset="utf-8"><meta http-equiv="X-UA-Compatible" content="IE=edge"><meta name="viewport" content="width=device-width,initial-scale=1,maximum-scale=1,user-scalable=no"/><title>sist2</title><script defer="defer" src="js/chunk-vendors.js"></script><script defer="defer" src="js/index.js"></script><link href="css/chunk-vendors.css" rel="stylesheet"><link href="css/index.css" rel="stylesheet"></head><body><noscript><style>body {
|
||||
height: initial;
|
||||
}</style><div style="text-align: center; margin-top: 100px"><strong>We're sorry but sist2 doesn't work properly without JavaScript enabled. Please enable it to continue.</strong><br/><strong>Nous sommes désolés mais sist2 ne fonctionne pas correctement si JavaScript est activé. Veuillez l'activer pour continuer.</strong></div></noscript><div id="app"></div></body></html>
|
||||
99
sist2-vue/dist/js/chunk-vendors.js
vendored
99
sist2-vue/dist/js/chunk-vendors.js
vendored
File diff suppressed because one or more lines are too long
1
sist2-vue/dist/js/index.js
vendored
1
sist2-vue/dist/js/index.js
vendored
File diff suppressed because one or more lines are too long
BIN
sist2-vue/fslightbox-vue.tgz
Normal file
BIN
sist2-vue/fslightbox-vue.tgz
Normal file
Binary file not shown.
1621
sist2-vue/package-lock.json
generated
1621
sist2-vue/package-lock.json
generated
File diff suppressed because it is too large
Load Diff
@@ -9,13 +9,14 @@
|
||||
"dependencies": {
|
||||
"@auth0/auth0-spa-js": "^2.0.2",
|
||||
"@egjs/vue-infinitegrid": "3.3.0",
|
||||
"@tensorflow/tfjs": "^4.4.0",
|
||||
"axios": "^0.25.0",
|
||||
"bootstrap-vue": "^2.21.2",
|
||||
"core-js": "^3.6.5",
|
||||
"d3": "^5.6.1",
|
||||
"d3": "^7.8.4",
|
||||
"date-fns": "^2.21.3",
|
||||
"dom-to-image": "^2.6.0",
|
||||
"fslightbox-vue": "file:../../../../mnt/Hatchery/projects/sist2/fslightbox-vue-pro-1.3.1.tgz",
|
||||
"fslightbox-vue": "fslightbox-vue.tgz",
|
||||
"nouislider": "^15.2.0",
|
||||
"underscore": "^1.13.1",
|
||||
"vue": "^2.6.12",
|
||||
|
||||
@@ -1,383 +1,395 @@
|
||||
<template>
|
||||
<div id="app" :class="getClass()" v-if="!authLoading">
|
||||
<NavBar></NavBar>
|
||||
<router-view v-if="!configLoading"/>
|
||||
</div>
|
||||
<div class="loading-page" v-else>
|
||||
<div class="loading-spinners">
|
||||
<b-spinner type="grow" variant="primary"></b-spinner>
|
||||
<b-spinner type="grow" variant="primary"></b-spinner>
|
||||
<b-spinner type="grow" variant="primary"></b-spinner>
|
||||
<div id="app" :class="getClass()" v-if="!authLoading">
|
||||
<NavBar></NavBar>
|
||||
<router-view v-if="!configLoading"/>
|
||||
</div>
|
||||
<div class="loading-text">
|
||||
Loading • Chargement • 装载
|
||||
<div class="loading-page" v-else>
|
||||
<div class="loading-spinners">
|
||||
<b-spinner type="grow" variant="primary"></b-spinner>
|
||||
<b-spinner type="grow" variant="primary"></b-spinner>
|
||||
<b-spinner type="grow" variant="primary"></b-spinner>
|
||||
</div>
|
||||
<div class="loading-text">
|
||||
Loading • Chargement • 装载 • Wird geladen
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
</template>
|
||||
|
||||
<script>
|
||||
import NavBar from "@/components/NavBar";
|
||||
import {mapActions, mapGetters, mapMutations} from "vuex";
|
||||
import Sist2Api from "@/Sist2Api";
|
||||
import ModelsRepo from "@/ml/modelsRepo";
|
||||
import {setupAuth0} from "@/main";
|
||||
|
||||
export default {
|
||||
components: {NavBar},
|
||||
data() {
|
||||
return {
|
||||
configLoading: false,
|
||||
authLoading: true,
|
||||
sist2InfoLoading: true
|
||||
}
|
||||
},
|
||||
computed: {
|
||||
...mapGetters(["optTheme"]),
|
||||
},
|
||||
mounted() {
|
||||
this.$store.dispatch("loadConfiguration").then(() => {
|
||||
this.$root.$i18n.locale = this.$store.state.optLang;
|
||||
});
|
||||
components: {NavBar},
|
||||
data() {
|
||||
return {
|
||||
configLoading: false,
|
||||
authLoading: true,
|
||||
sist2InfoLoading: true
|
||||
}
|
||||
},
|
||||
computed: {
|
||||
...mapGetters(["optTheme"]),
|
||||
},
|
||||
mounted() {
|
||||
this.$store.dispatch("loadConfiguration").then(() => {
|
||||
this.$root.$i18n.locale = this.$store.state.optLang;
|
||||
ModelsRepo.init(this.$store.getters.mlRepositoryList).catch(err => {
|
||||
this.$bvToast.toast(
|
||||
this.$t("ml.repoFetchError"),
|
||||
{
|
||||
title: this.$t("ml.repoFetchErrorTitle"),
|
||||
noAutoHide: true,
|
||||
toaster: "b-toaster-bottom-right",
|
||||
headerClass: "toast-header-warning",
|
||||
bodyClass: "toast-body-warning",
|
||||
});
|
||||
});
|
||||
});
|
||||
|
||||
this.$store.subscribe((mutation) => {
|
||||
if (mutation.type === "setOptLang") {
|
||||
this.$root.$i18n.locale = mutation.payload;
|
||||
this.configLoading = true;
|
||||
window.setTimeout(() => this.configLoading = false, 10);
|
||||
}
|
||||
|
||||
if (mutation.type === "setAuth0Token") {
|
||||
this.authLoading = false;
|
||||
}
|
||||
});
|
||||
|
||||
Sist2Api.getSist2Info().then(data => {
|
||||
|
||||
if (data.auth0Enabled) {
|
||||
this.authLoading = true;
|
||||
setupAuth0(data.auth0Domain, data.auth0ClientId, data.auth0Audience)
|
||||
|
||||
this.$auth.$watch("loading", loading => {
|
||||
if (loading === false) {
|
||||
|
||||
if (!this.$auth.isAuthenticated) {
|
||||
this.$auth.loginWithRedirect();
|
||||
return;
|
||||
this.$store.subscribe((mutation) => {
|
||||
if (mutation.type === "setOptLang") {
|
||||
this.$root.$i18n.locale = mutation.payload;
|
||||
this.configLoading = true;
|
||||
window.setTimeout(() => this.configLoading = false, 10);
|
||||
}
|
||||
|
||||
// Remove "code" param
|
||||
window.history.replaceState({}, "", "/" + window.location.hash);
|
||||
|
||||
this.$store.dispatch("loadAuth0Token");
|
||||
}
|
||||
if (mutation.type === "setAuth0Token") {
|
||||
this.authLoading = false;
|
||||
}
|
||||
});
|
||||
} else {
|
||||
this.authLoading = false;
|
||||
}
|
||||
|
||||
this.setSist2Info(data);
|
||||
this.setIndices(data.indices)
|
||||
});
|
||||
},
|
||||
methods: {
|
||||
...mapActions(["setSist2Info",]),
|
||||
...mapMutations(["setIndices",]),
|
||||
getClass() {
|
||||
return {
|
||||
"theme-light": this.optTheme === "light",
|
||||
"theme-black": this.optTheme === "black",
|
||||
}
|
||||
Sist2Api.getSist2Info().then(data => {
|
||||
|
||||
if (data.auth0Enabled) {
|
||||
this.authLoading = true;
|
||||
setupAuth0(data.auth0Domain, data.auth0ClientId, data.auth0Audience)
|
||||
|
||||
this.$auth.$watch("loading", loading => {
|
||||
if (loading === false) {
|
||||
|
||||
if (!this.$auth.isAuthenticated) {
|
||||
this.$auth.loginWithRedirect();
|
||||
return;
|
||||
}
|
||||
|
||||
// Remove "code" param
|
||||
window.history.replaceState({}, "", "/" + window.location.hash);
|
||||
|
||||
this.$store.dispatch("loadAuth0Token");
|
||||
}
|
||||
});
|
||||
} else {
|
||||
this.authLoading = false;
|
||||
}
|
||||
|
||||
this.setSist2Info(data);
|
||||
this.setIndices(data.indices)
|
||||
});
|
||||
},
|
||||
methods: {
|
||||
...mapActions(["setSist2Info",]),
|
||||
...mapMutations(["setIndices",]),
|
||||
getClass() {
|
||||
return {
|
||||
"theme-light": this.optTheme === "light",
|
||||
"theme-black": this.optTheme === "black",
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
,
|
||||
,
|
||||
}
|
||||
</script>
|
||||
|
||||
<style>
|
||||
html, body {
|
||||
height: 100%;
|
||||
height: 100%;
|
||||
}
|
||||
|
||||
#app {
|
||||
/*font-family: Avenir, Helvetica, Arial, sans-serif;*/
|
||||
-webkit-font-smoothing: antialiased;
|
||||
-moz-osx-font-smoothing: grayscale;
|
||||
/*text-align: center;*/
|
||||
color: #2c3e50;
|
||||
padding-bottom: 1em;
|
||||
min-height: 100%;
|
||||
/*font-family: Avenir, Helvetica, Arial, sans-serif;*/
|
||||
-webkit-font-smoothing: antialiased;
|
||||
-moz-osx-font-smoothing: grayscale;
|
||||
/*text-align: center;*/
|
||||
color: #2c3e50;
|
||||
padding-bottom: 1em;
|
||||
min-height: 100%;
|
||||
}
|
||||
|
||||
/*Black theme*/
|
||||
.theme-black {
|
||||
background-color: #000;
|
||||
background-color: #000;
|
||||
}
|
||||
|
||||
.theme-black .card, .theme-black .modal-content {
|
||||
background: #212121;
|
||||
color: #e0e0e0;
|
||||
border-radius: 1px;
|
||||
border: none;
|
||||
background: #212121;
|
||||
color: #e0e0e0;
|
||||
border-radius: 1px;
|
||||
border: none;
|
||||
}
|
||||
|
||||
|
||||
.theme-black .table {
|
||||
color: #e0e0e0;
|
||||
color: #e0e0e0;
|
||||
}
|
||||
|
||||
.theme-black .table td, .theme-black .table th {
|
||||
border: none;
|
||||
border: none;
|
||||
}
|
||||
|
||||
.theme-black .table thead th {
|
||||
border-bottom: 1px solid #646464;
|
||||
border-bottom: 1px solid #646464;
|
||||
}
|
||||
|
||||
.theme-black .custom-select {
|
||||
overflow: auto;
|
||||
background-color: #37474F;
|
||||
border: 1px solid #616161;
|
||||
color: #bdbdbd;
|
||||
overflow: auto;
|
||||
background-color: #37474F;
|
||||
border: 1px solid #616161;
|
||||
color: #bdbdbd;
|
||||
}
|
||||
|
||||
.theme-black .custom-select:focus {
|
||||
border-color: #757575;
|
||||
outline: 0;
|
||||
box-shadow: 0 0 0 .2rem rgba(0, 123, 255, .25);
|
||||
border-color: #757575;
|
||||
outline: 0;
|
||||
box-shadow: 0 0 0 .2rem rgba(0, 123, 255, .25);
|
||||
}
|
||||
|
||||
.theme-black .inspire-tree .selected > .wholerow, .theme-black .inspire-tree .selected > .title-wrap:hover + .wholerow {
|
||||
background: none !important;
|
||||
background: none !important;
|
||||
}
|
||||
|
||||
.theme-black .inspire-tree .icon-expand::before, .theme-black .inspire-tree .icon-collapse::before {
|
||||
background-color: black !important;
|
||||
background-color: black !important;
|
||||
}
|
||||
|
||||
.theme-black .inspire-tree .title {
|
||||
color: #eee;
|
||||
color: #eee;
|
||||
}
|
||||
|
||||
.theme-black .inspire-tree {
|
||||
font-weight: 400;
|
||||
font-size: 14px;
|
||||
font-family: Helvetica, Nueue, Verdana, sans-serif;
|
||||
max-height: 350px;
|
||||
overflow: auto;
|
||||
font-weight: 400;
|
||||
font-size: 14px;
|
||||
font-family: Helvetica, Nueue, Verdana, sans-serif;
|
||||
max-height: 350px;
|
||||
overflow: auto;
|
||||
}
|
||||
|
||||
.inspire-tree [type=checkbox] {
|
||||
left: 22px !important;
|
||||
top: 7px !important;
|
||||
left: 22px !important;
|
||||
top: 7px !important;
|
||||
}
|
||||
|
||||
.theme-black .form-control {
|
||||
background-color: #37474F;
|
||||
border: 1px solid #616161;
|
||||
color: #dbdbdb !important;
|
||||
background-color: #37474F;
|
||||
border: 1px solid #616161;
|
||||
color: #dbdbdb !important;
|
||||
}
|
||||
|
||||
.theme-black .form-control:focus {
|
||||
background-color: #546E7A;
|
||||
color: #fff;
|
||||
background-color: #546E7A;
|
||||
color: #fff;
|
||||
}
|
||||
|
||||
.theme-black .input-group-text, .theme-black .default-input {
|
||||
background: #37474F !important;
|
||||
border: 1px solid #616161 !important;
|
||||
color: #dbdbdb !important;
|
||||
background: #37474F !important;
|
||||
border: 1px solid #616161 !important;
|
||||
color: #dbdbdb !important;
|
||||
}
|
||||
|
||||
.theme-black ::placeholder {
|
||||
color: #BDBDBD !important;
|
||||
opacity: 1;
|
||||
color: #BDBDBD !important;
|
||||
opacity: 1;
|
||||
}
|
||||
|
||||
.theme-black .nav-tabs .nav-link {
|
||||
color: #e0e0e0;
|
||||
border-radius: 0;
|
||||
color: #e0e0e0;
|
||||
border-radius: 0;
|
||||
}
|
||||
|
||||
.theme-black .nav-tabs .nav-item.show .nav-link, .theme-black .nav-tabs .nav-link.active {
|
||||
background-color: #212121;
|
||||
border-color: #616161 #616161 #212121;
|
||||
color: #e0e0e0;
|
||||
background-color: #212121;
|
||||
border-color: #616161 #616161 #212121;
|
||||
color: #e0e0e0;
|
||||
}
|
||||
|
||||
.theme-black .nav-tabs .nav-link:focus, .theme-black .nav-tabs .nav-link:focus {
|
||||
border-color: #616161 #616161 #212121;
|
||||
color: #e0e0e0;
|
||||
border-color: #616161 #616161 #212121;
|
||||
color: #e0e0e0;
|
||||
}
|
||||
|
||||
.theme-black .nav-tabs .nav-link:focus, .theme-black .nav-tabs .nav-link:hover {
|
||||
border-color: #e0e0e0 #e0e0e0 #212121;
|
||||
color: #e0e0e0;
|
||||
border-color: #e0e0e0 #e0e0e0 #212121;
|
||||
color: #e0e0e0;
|
||||
}
|
||||
|
||||
.theme-black .nav-tabs {
|
||||
border-bottom: #616161;
|
||||
border-bottom: #616161;
|
||||
}
|
||||
|
||||
.theme-black a:hover, .theme-black .btn:hover {
|
||||
color: #fff;
|
||||
color: #fff;
|
||||
}
|
||||
|
||||
.theme-black .b-dropdown a:hover {
|
||||
color: inherit;
|
||||
color: inherit;
|
||||
}
|
||||
|
||||
.theme-black .btn {
|
||||
color: #eee;
|
||||
color: #eee;
|
||||
}
|
||||
|
||||
.theme-black .modal-header .close {
|
||||
color: #e0e0e0;
|
||||
text-shadow: none;
|
||||
color: #e0e0e0;
|
||||
text-shadow: none;
|
||||
}
|
||||
|
||||
.theme-black .modal-header {
|
||||
border-bottom: 1px solid #646464;
|
||||
border-bottom: 1px solid #646464;
|
||||
}
|
||||
|
||||
/* -------------------------- */
|
||||
|
||||
#nav {
|
||||
padding: 30px;
|
||||
padding: 30px;
|
||||
}
|
||||
|
||||
#nav a {
|
||||
font-weight: bold;
|
||||
color: #2c3e50;
|
||||
font-weight: bold;
|
||||
color: #2c3e50;
|
||||
}
|
||||
|
||||
#nav a.router-link-exact-active {
|
||||
color: #42b983;
|
||||
color: #42b983;
|
||||
}
|
||||
|
||||
.mobile {
|
||||
display: none;
|
||||
display: none;
|
||||
}
|
||||
|
||||
.container {
|
||||
padding-top: 1em;
|
||||
padding-top: 1em;
|
||||
}
|
||||
|
||||
@media (max-width: 650px) {
|
||||
.mobile {
|
||||
display: initial;
|
||||
}
|
||||
.mobile {
|
||||
display: initial;
|
||||
}
|
||||
|
||||
.not-mobile {
|
||||
display: none;
|
||||
}
|
||||
.not-mobile {
|
||||
display: none;
|
||||
}
|
||||
|
||||
.grid-single-column .fit {
|
||||
max-height: none !important;
|
||||
}
|
||||
.grid-single-column .fit {
|
||||
max-height: none !important;
|
||||
}
|
||||
|
||||
.container {
|
||||
padding-left: 0;
|
||||
padding-right: 0;
|
||||
padding-top: 0
|
||||
}
|
||||
.container {
|
||||
padding-left: 0;
|
||||
padding-right: 0;
|
||||
padding-top: 0
|
||||
}
|
||||
|
||||
.lightbox-caption {
|
||||
display: none;
|
||||
}
|
||||
.lightbox-caption {
|
||||
display: none;
|
||||
}
|
||||
}
|
||||
|
||||
.info-icon {
|
||||
width: 1rem;
|
||||
margin-right: 0.2rem;
|
||||
cursor: pointer;
|
||||
line-height: 1rem;
|
||||
height: 1rem;
|
||||
background-image: url();
|
||||
filter: brightness(45%);
|
||||
display: block;
|
||||
width: 1rem;
|
||||
margin-right: 0.2rem;
|
||||
cursor: pointer;
|
||||
line-height: 1rem;
|
||||
height: 1rem;
|
||||
background-image: url();
|
||||
filter: brightness(45%);
|
||||
display: block;
|
||||
}
|
||||
|
||||
.tabs {
|
||||
margin-top: 10px;
|
||||
margin-top: 10px;
|
||||
}
|
||||
|
||||
.modal-title {
|
||||
text-overflow: ellipsis;
|
||||
overflow: hidden;
|
||||
white-space: nowrap;
|
||||
text-overflow: ellipsis;
|
||||
overflow: hidden;
|
||||
white-space: nowrap;
|
||||
}
|
||||
|
||||
@media screen and (min-width: 1500px) {
|
||||
.container {
|
||||
max-width: 1440px;
|
||||
}
|
||||
.container {
|
||||
max-width: 1440px;
|
||||
}
|
||||
}
|
||||
|
||||
.noUi-connects {
|
||||
border-radius: 1px !important;
|
||||
border-radius: 1px !important;
|
||||
}
|
||||
|
||||
mark {
|
||||
background: #fff217;
|
||||
border-radius: 0;
|
||||
padding: 1px 0;
|
||||
color: inherit;
|
||||
background: #fff217;
|
||||
border-radius: 0;
|
||||
padding: 1px 0;
|
||||
color: inherit;
|
||||
}
|
||||
|
||||
.theme-black mark {
|
||||
background: rgba(251, 191, 41, 0.25);
|
||||
border-radius: 0;
|
||||
padding: 1px 0;
|
||||
color: inherit;
|
||||
background: rgba(251, 191, 41, 0.25);
|
||||
border-radius: 0;
|
||||
padding: 1px 0;
|
||||
color: inherit;
|
||||
}
|
||||
|
||||
.theme-black .content-div mark {
|
||||
background: rgba(251, 191, 41, 0.40);
|
||||
color: white;
|
||||
background: rgba(251, 191, 41, 0.40);
|
||||
color: white;
|
||||
}
|
||||
|
||||
.content-div {
|
||||
font-family: SFMono-Regular, Menlo, Monaco, Consolas, "Liberation Mono", "Courier New", monospace;
|
||||
font-size: 13px;
|
||||
padding: 1em;
|
||||
background-color: #f5f5f5;
|
||||
border: 1px solid #ccc;
|
||||
border-radius: 4px;
|
||||
margin: 3px;
|
||||
white-space: normal;
|
||||
color: #000;
|
||||
overflow: hidden;
|
||||
font-family: SFMono-Regular, Menlo, Monaco, Consolas, "Liberation Mono", "Courier New", monospace;
|
||||
font-size: 13px;
|
||||
padding: 1em;
|
||||
background-color: #f5f5f5;
|
||||
border: 1px solid #ccc;
|
||||
border-radius: 4px;
|
||||
margin: 3px;
|
||||
white-space: normal;
|
||||
color: #000;
|
||||
overflow: hidden;
|
||||
}
|
||||
|
||||
.theme-black .content-div {
|
||||
background-color: #37474F;
|
||||
border: 1px solid #616161;
|
||||
color: #E0E0E0FF;
|
||||
background-color: #37474F;
|
||||
border: 1px solid #616161;
|
||||
color: #E0E0E0FF;
|
||||
}
|
||||
|
||||
.graph {
|
||||
display: inline-block;
|
||||
width: 40%;
|
||||
display: inline-block;
|
||||
width: 40%;
|
||||
}
|
||||
|
||||
.pointer {
|
||||
cursor: pointer;
|
||||
cursor: pointer;
|
||||
}
|
||||
|
||||
.loading-page {
|
||||
display: flex;
|
||||
justify-content: center;
|
||||
align-items: center;
|
||||
flex-direction: column;
|
||||
height: 100%;
|
||||
gap: 15px
|
||||
display: flex;
|
||||
justify-content: center;
|
||||
align-items: center;
|
||||
flex-direction: column;
|
||||
height: 100%;
|
||||
gap: 15px
|
||||
}
|
||||
|
||||
.loading-spinners {
|
||||
display: flex;
|
||||
gap: 10px;
|
||||
display: flex;
|
||||
gap: 10px;
|
||||
}
|
||||
|
||||
.loading-text {
|
||||
text-align: center;
|
||||
text-align: center;
|
||||
}
|
||||
</style>
|
||||
|
||||
@@ -61,6 +61,7 @@ export interface EsHit {
|
||||
isAudio: boolean
|
||||
hasThumbnail: boolean
|
||||
hasVidPreview: boolean
|
||||
imageAspectRatio: number
|
||||
/** Number of thumbnails available */
|
||||
tnNum: number
|
||||
}
|
||||
@@ -155,6 +156,9 @@ class Sist2Api {
|
||||
&& hit._source.videoc !== "raw" && hit._source.videoc !== "ppm") {
|
||||
hit._props.isPlayableImage = true;
|
||||
}
|
||||
if ("width" in hit._source && "height" in hit._source) {
|
||||
hit._props.imageAspectRatio = hit._source.width / hit._source.height;
|
||||
}
|
||||
break;
|
||||
case "video":
|
||||
if ("videoc" in hit._source) {
|
||||
@@ -187,30 +191,6 @@ class Sist2Api {
|
||||
setHitTags(hit: EsHit): void {
|
||||
const tags = [] as Tag[];
|
||||
|
||||
const mimeCategory = hit._source.mime == null ? null : hit._source.mime.split("/")[0];
|
||||
|
||||
switch (mimeCategory) {
|
||||
case "image":
|
||||
case "video":
|
||||
if ("videoc" in hit._source && hit._source.videoc) {
|
||||
tags.push({
|
||||
style: "video",
|
||||
text: hit._source.videoc.replace(" ", ""),
|
||||
userTag: false
|
||||
} as Tag);
|
||||
}
|
||||
break
|
||||
case "audio":
|
||||
if ("audioc" in hit._source && hit._source.audioc) {
|
||||
tags.push({
|
||||
style: "audio",
|
||||
text: hit._source.audioc,
|
||||
userTag: false
|
||||
} as Tag);
|
||||
}
|
||||
break;
|
||||
}
|
||||
|
||||
// User tags
|
||||
if ("tag" in hit._source) {
|
||||
hit._source.tag.forEach(tag => {
|
||||
@@ -381,20 +361,20 @@ class Sist2Api {
|
||||
});
|
||||
}
|
||||
|
||||
getTreemapCsvUrl(indexId: string) {
|
||||
return `${this.baseUrl}s/${indexId}/1`;
|
||||
getTreemapStat(indexId: string) {
|
||||
return `${this.baseUrl}s/${indexId}/TMAP`;
|
||||
}
|
||||
|
||||
getMimeCsvUrl(indexId: string) {
|
||||
return `${this.baseUrl}s/${indexId}/2`;
|
||||
getMimeStat(indexId: string) {
|
||||
return `${this.baseUrl}s/${indexId}/MAGG`;
|
||||
}
|
||||
|
||||
getSizeCsv(indexId: string) {
|
||||
return `${this.baseUrl}s/${indexId}/3`;
|
||||
getSizeStat(indexId: string) {
|
||||
return `${this.baseUrl}s/${indexId}/SAGG`;
|
||||
}
|
||||
|
||||
getDateCsv(indexId: string) {
|
||||
return `${this.baseUrl}s/${indexId}/4`;
|
||||
getDateStat(indexId: string) {
|
||||
return `${this.baseUrl}s/${indexId}/DAGG`;
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
21
sist2-vue/src/components/AnalyzedContentSpan.vue
Normal file
21
sist2-vue/src/components/AnalyzedContentSpan.vue
Normal file
@@ -0,0 +1,21 @@
|
||||
<template>
|
||||
<span :style="getStyle()">{{span.text}}</span>
|
||||
</template>
|
||||
|
||||
<script>
|
||||
|
||||
|
||||
import ModelsRepo from "@/ml/modelsRepo";
|
||||
|
||||
export default {
|
||||
name: "AnalyzedContentSpan",
|
||||
props: ["span", "text"],
|
||||
methods: {
|
||||
getStyle() {
|
||||
return ModelsRepo.data[this.$store.getters.mlModel.name].labelStyles[this.span.label];
|
||||
}
|
||||
}
|
||||
}
|
||||
</script>
|
||||
|
||||
<style scoped></style>
|
||||
75
sist2-vue/src/components/AnalyzedContentSpanContainer.vue
Normal file
75
sist2-vue/src/components/AnalyzedContentSpanContainer.vue
Normal file
@@ -0,0 +1,75 @@
|
||||
<template>
|
||||
<div>
|
||||
<b-card class="mb-2">
|
||||
<AnalyzedContentSpan v-for="span of legend" :key="span.id" :span="span"
|
||||
class="mr-2"></AnalyzedContentSpan>
|
||||
</b-card>
|
||||
<div class="content-div">
|
||||
<AnalyzedContentSpan v-for="span of mergedSpans" :key="span.id" :span="span"></AnalyzedContentSpan>
|
||||
</div>
|
||||
</div>
|
||||
</template>
|
||||
|
||||
<script>
|
||||
|
||||
|
||||
import AnalyzedContentSpan from "@/components/AnalyzedContentSpan.vue";
|
||||
import ModelsRepo from "@/ml/modelsRepo";
|
||||
|
||||
export default {
|
||||
name: "AnalyzedContentSpanContainer",
|
||||
components: {AnalyzedContentSpan},
|
||||
props: ["spans", "text"],
|
||||
computed: {
|
||||
legend() {
|
||||
return Object.entries(ModelsRepo.data[this.$store.state.mlModel.name].legend)
|
||||
.map(([label, name]) => ({
|
||||
text: name,
|
||||
id: label,
|
||||
label: label
|
||||
}));
|
||||
},
|
||||
mergedSpans() {
|
||||
const spans = this.spans;
|
||||
|
||||
const merged = [];
|
||||
|
||||
let lastLabel = null;
|
||||
let fixSpace = false;
|
||||
for (let i = 0; i < spans.length; i++) {
|
||||
|
||||
if (spans[i].label !== lastLabel) {
|
||||
let start = spans[i].wordIndex;
|
||||
const nextSpan = spans.slice(i + 1).find(s => s.label !== spans[i].label)
|
||||
let end = nextSpan ? nextSpan.wordIndex : undefined;
|
||||
|
||||
if (end !== undefined && this.text[end - 1] === " ") {
|
||||
end -= 1;
|
||||
fixSpace = true;
|
||||
}
|
||||
|
||||
merged.push({
|
||||
text: this.text.slice(start, end),
|
||||
label: spans[i].label,
|
||||
id: spans[i].wordIndex
|
||||
});
|
||||
|
||||
if (fixSpace) {
|
||||
merged.push({
|
||||
text: " ",
|
||||
label: "O",
|
||||
id: end
|
||||
});
|
||||
fixSpace = false;
|
||||
}
|
||||
lastLabel = spans[i].label;
|
||||
}
|
||||
}
|
||||
|
||||
return merged;
|
||||
},
|
||||
},
|
||||
}
|
||||
</script>
|
||||
|
||||
<style scoped></style>
|
||||
@@ -120,7 +120,7 @@ export default {
|
||||
update(indexId) {
|
||||
const svg = d3.select("#date-histogram");
|
||||
|
||||
d3.csv(Sist2Api.getDateCsv(indexId)).then(tabularData => {
|
||||
d3.json(Sist2Api.getDateStat(indexId)).then(tabularData => {
|
||||
dateHistogram(tabularData.slice(), svg, this.$t("d3.dateHistogram"));
|
||||
});
|
||||
}
|
||||
|
||||
@@ -91,7 +91,7 @@ export default {
|
||||
const mimeSvgCount = d3.select("#agg-mime-count");
|
||||
const fillOpacity = this.$store.state.optTheme === "black" ? 0.9 : 0.6;
|
||||
|
||||
d3.csv(Sist2Api.getMimeCsvUrl(indexId)).then(tabularData => {
|
||||
d3.json(Sist2Api.getMimeStat(indexId)).then(tabularData => {
|
||||
mimeBarCount(tabularData.slice(), mimeSvgCount, fillOpacity, this.$t("d3.mimeCount"));
|
||||
});
|
||||
}
|
||||
|
||||
@@ -90,7 +90,7 @@ export default {
|
||||
const mimeSvgSize = d3.select("#agg-mime-size");
|
||||
const fillOpacity = this.$store.state.optTheme === "black" ? 0.9 : 0.6;
|
||||
|
||||
d3.csv(Sist2Api.getMimeCsvUrl(indexId)).then(tabularData => {
|
||||
d3.json(Sist2Api.getMimeStat(indexId)).then(tabularData => {
|
||||
mimeBarSize(tabularData.slice(), mimeSvgSize, fillOpacity, this.$t("d3.mimeSize"));
|
||||
});
|
||||
}
|
||||
|
||||
@@ -117,7 +117,7 @@ export default {
|
||||
update(indexId) {
|
||||
const svg = d3.select("#size-histogram");
|
||||
|
||||
d3.csv(Sist2Api.getSizeCsv(indexId)).then(tabularData => {
|
||||
d3.json(Sist2Api.getSizeStat(indexId)).then(tabularData => {
|
||||
sizeHistogram(tabularData.slice(), svg, this.$t("d3.sizeHistogram"));
|
||||
});
|
||||
}
|
||||
|
||||
@@ -240,7 +240,7 @@ export default {
|
||||
.style("overflow", "visible")
|
||||
.style("font", "10px sans-serif");
|
||||
|
||||
d3.csv(Sist2Api.getTreemapCsvUrl(indexId)).then(tabularData => {
|
||||
d3.json(Sist2Api.getTreemapStat(indexId)).then(tabularData => {
|
||||
tabularData.forEach(row => {
|
||||
row.taxonomy = row.path.split("/");
|
||||
row.size = Number(row.size);
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
<template>
|
||||
<b-card class="mb-4 mt-4">
|
||||
<b-card v-if="$store.state.sist2Info.showDebugInfo" class="mb-4 mt-4">
|
||||
<b-card-title><DebugIcon class="mr-1"></DebugIcon>{{ $t("debug") }}</b-card-title>
|
||||
<p v-html="$t('debugDescription')"></p>
|
||||
|
||||
|
||||
@@ -27,6 +27,11 @@
|
||||
<DocFileTitle :doc="doc"></DocFileTitle>
|
||||
</div>
|
||||
|
||||
<!-- Featured line -->
|
||||
<div style="display: flex">
|
||||
<FeaturedFieldsLine :doc="doc"></FeaturedFieldsLine>
|
||||
</div>
|
||||
|
||||
<!-- Tags -->
|
||||
<div class="card-text">
|
||||
<TagContainer :hit="doc"></TagContainer>
|
||||
@@ -43,10 +48,11 @@ import DocFileTitle from "@/components/DocFileTitle.vue";
|
||||
import DocInfoModal from "@/components/DocInfoModal.vue";
|
||||
import ContentDiv from "@/components/ContentDiv.vue";
|
||||
import FullThumbnail from "@/components/FullThumbnail";
|
||||
import FeaturedFieldsLine from "@/components/FeaturedFieldsLine";
|
||||
|
||||
|
||||
export default {
|
||||
components: {FullThumbnail, ContentDiv, DocInfoModal, DocFileTitle, TagContainer},
|
||||
components: {FeaturedFieldsLine, FullThumbnail, ContentDiv, DocInfoModal, DocFileTitle, TagContainer},
|
||||
props: ["doc", "width"],
|
||||
data() {
|
||||
return {
|
||||
|
||||
@@ -50,6 +50,11 @@
|
||||
<span v-if="doc._source.author && doc._source.pages" class="mx-1">-</span>
|
||||
<span v-if="doc._source.author">{{ doc._source.author }}</span>
|
||||
</div>
|
||||
|
||||
<!-- Featured line -->
|
||||
<div style="display: flex">
|
||||
<FeaturedFieldsLine :doc="doc"></FeaturedFieldsLine>
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
</b-list-group-item>
|
||||
@@ -61,10 +66,11 @@ import DocFileTitle from "@/components/DocFileTitle";
|
||||
import DocInfoModal from "@/components/DocInfoModal";
|
||||
import ContentDiv from "@/components/ContentDiv";
|
||||
import FileIcon from "@/components/icons/FileIcon";
|
||||
import FeaturedFieldsLine from "@/components/FeaturedFieldsLine";
|
||||
|
||||
export default {
|
||||
name: "DocListItem",
|
||||
components: {FileIcon, ContentDiv, DocInfoModal, DocFileTitle, TagContainer},
|
||||
components: {FileIcon, ContentDiv, DocInfoModal, DocFileTitle, TagContainer, FeaturedFieldsLine},
|
||||
props: ["doc"],
|
||||
data() {
|
||||
return {
|
||||
|
||||
46
sist2-vue/src/components/FeaturedFieldsLine.vue
Normal file
46
sist2-vue/src/components/FeaturedFieldsLine.vue
Normal file
@@ -0,0 +1,46 @@
|
||||
<template>
|
||||
<div class="featured-line" v-html="featuredLineHtml"></div>
|
||||
</template>
|
||||
|
||||
<script>
|
||||
import {humanDate, humanFileSize} from "@/util";
|
||||
|
||||
function scopedEval(context, expr) {
|
||||
const evaluator = Function.apply(null, [...Object.keys(context), "expr", "return eval(expr)"]);
|
||||
return evaluator.apply(null, [...Object.values(context), expr]);
|
||||
}
|
||||
|
||||
|
||||
export default {
|
||||
name: "FeaturedFieldsLine",
|
||||
props: ["doc"],
|
||||
computed: {
|
||||
featuredLineHtml() {
|
||||
if (this.$store.getters.optFeaturedFields === undefined) {
|
||||
return "";
|
||||
}
|
||||
|
||||
const scope = {doc: this.doc._source, humanDate: humanDate, humanFileSize: humanFileSize};
|
||||
|
||||
return this.$store.getters.optFeaturedFields
|
||||
.replaceAll(/\$\{([^}]*)}/g, (match, g1) => {
|
||||
return scopedEval(scope, g1);
|
||||
});
|
||||
}
|
||||
}
|
||||
}
|
||||
</script>
|
||||
|
||||
<style scoped>
|
||||
|
||||
.featured-line {
|
||||
font-size: 90%;
|
||||
font-family: 'Source Sans Pro', 'Helvetica Neue', Arial, sans-serif;
|
||||
color: #424242;
|
||||
padding-left: 2px;
|
||||
}
|
||||
|
||||
.theme-black .featured-line {
|
||||
color: #bebebe;
|
||||
}
|
||||
</style>
|
||||
@@ -6,13 +6,13 @@
|
||||
</div>
|
||||
|
||||
<div
|
||||
v-if="doc._props.isImage && !hover && doc._props.tnW / doc._props.tnH < 5"
|
||||
v-if="doc._props.isImage && doc._props.imageAspectRatio < 5"
|
||||
class="card-img-overlay"
|
||||
:class="{'small-badge': smallBadge}">
|
||||
<span class="badge badge-resolution">{{ `${doc._source.width}x${doc._source.height}` }}</span>
|
||||
</div>
|
||||
|
||||
<div v-if="(doc._props.isVideo || doc._props.isGif) && doc._source.duration > 0 && !hover"
|
||||
<div v-if="(doc._props.isVideo || doc._props.isGif) && doc._source.duration > 0"
|
||||
class="card-img-overlay"
|
||||
:class="{'small-badge': smallBadge}">
|
||||
<span class="badge badge-resolution">{{ humanTime(doc._source.duration) }}</span>
|
||||
@@ -63,6 +63,11 @@ export default {
|
||||
},
|
||||
computed: {
|
||||
tnSrc() {
|
||||
return this.getThumbnailSrc(this.currentThumbnailNum);
|
||||
},
|
||||
},
|
||||
methods: {
|
||||
getThumbnailSrc(thumbnailNum) {
|
||||
const doc = this.doc;
|
||||
const props = doc._props;
|
||||
if (props.isGif && this.hover) {
|
||||
@@ -70,10 +75,8 @@ export default {
|
||||
}
|
||||
return (this.currentThumbnailNum === 0)
|
||||
? `t/${doc._source.index}/${doc._id}`
|
||||
: `t/${doc._source.index}/${doc._id}${String(this.currentThumbnailNum).padStart(4, "0")}`;
|
||||
: `t/${doc._source.index}/${doc._id}/${String(thumbnailNum).padStart(4, "0")}`;
|
||||
},
|
||||
},
|
||||
methods: {
|
||||
humanTime: humanTime,
|
||||
onThumbnailClick() {
|
||||
this.$emit("onThumbnailClick");
|
||||
@@ -86,9 +89,14 @@ export default {
|
||||
},
|
||||
onTnEnter() {
|
||||
this.hover = true;
|
||||
const start = Date.now()
|
||||
if (this.doc._props.hasVidPreview) {
|
||||
this.currentThumbnailNum += 1;
|
||||
this.scheduleNextTnNum();
|
||||
let img = new Image();
|
||||
img.src = this.getThumbnailSrc(this.currentThumbnailNum + 1);
|
||||
img.onload = () => {
|
||||
this.currentThumbnailNum += 1;
|
||||
this.scheduleNextTnNum(Date.now() - start);
|
||||
}
|
||||
}
|
||||
},
|
||||
onTnLeave() {
|
||||
@@ -99,17 +107,23 @@ export default {
|
||||
this.timeoutId = null;
|
||||
}
|
||||
},
|
||||
scheduleNextTnNum() {
|
||||
const INTERVAL = this.$store.state.optVidPreviewInterval ?? 700;
|
||||
scheduleNextTnNum(offset = 0) {
|
||||
const INTERVAL = (this.$store.state.optVidPreviewInterval ?? 700) - offset;
|
||||
this.timeoutId = window.setTimeout(() => {
|
||||
const start = Date.now();
|
||||
if (!this.hover) {
|
||||
return;
|
||||
}
|
||||
this.scheduleNextTnNum();
|
||||
if (this.currentThumbnailNum === this.doc._props.tnNum - 1) {
|
||||
this.currentThumbnailNum = 0;
|
||||
this.scheduleNextTnNum();
|
||||
} else {
|
||||
this.currentThumbnailNum += 1;
|
||||
let img = new Image();
|
||||
img.src = this.getThumbnailSrc(this.currentThumbnailNum + 1);
|
||||
img.onload = () => {
|
||||
this.currentThumbnailNum += 1;
|
||||
this.scheduleNextTnNum(Date.now() - start);
|
||||
}
|
||||
}
|
||||
}, INTERVAL);
|
||||
},
|
||||
@@ -152,17 +166,18 @@ export default {
|
||||
}
|
||||
|
||||
.badge-resolution {
|
||||
color: #212529;
|
||||
background-color: #FFC107;
|
||||
color: #c6c6c6;
|
||||
background-color: #272727CC;
|
||||
padding: 2px 3px;
|
||||
}
|
||||
|
||||
.card-img-overlay {
|
||||
pointer-events: none;
|
||||
padding: 0.75rem;
|
||||
bottom: unset;
|
||||
top: 0;
|
||||
padding: 2px 6px;
|
||||
bottom: 4px;
|
||||
top: unset;
|
||||
left: unset;
|
||||
right: unset;
|
||||
right: 0;
|
||||
}
|
||||
|
||||
.small-badge {
|
||||
|
||||
@@ -1,6 +1,36 @@
|
||||
<template>
|
||||
<Preloader v-if="loading"></Preloader>
|
||||
<div v-else-if="content" class="content-div" v-html="content"></div>
|
||||
<Preloader v-if="loading"></Preloader>
|
||||
<div v-else-if="content">
|
||||
<b-form inline class="my-2" v-if="ModelsRepo.getOptions().length > 0">
|
||||
<b-checkbox class="ml-auto mr-2" :checked="optAutoAnalyze"
|
||||
@input="setOptAutoAnalyze($event); $store.dispatch('updateConfiguration')">
|
||||
{{ $t("ml.auto") }}
|
||||
</b-checkbox>
|
||||
<b-button :disabled="mlPredictionsLoading || mlLoading" @click="mlAnalyze" variant="primary"
|
||||
>{{ $t("ml.analyzeText") }}
|
||||
</b-button>
|
||||
<b-select :disabled="mlPredictionsLoading || mlLoading" class="ml-2" v-model="mlModel">
|
||||
<b-select-option :value="opt.value" v-for="opt of ModelsRepo.getOptions()">{{ opt.text }}
|
||||
</b-select-option>
|
||||
</b-select>
|
||||
</b-form>
|
||||
|
||||
<b-progress v-if="mlLoading" variant="warning" show-progress :max="1" class="mb-3"
|
||||
>
|
||||
<b-progress-bar :value="modelLoadingProgress">
|
||||
<strong>{{ ((modelLoadingProgress * modelSize) / (1024*1024)).toFixed(1) }}MB / {{
|
||||
(modelSize / (1024 * 1024)).toFixed(1)
|
||||
}}MB</strong>
|
||||
</b-progress-bar>
|
||||
</b-progress>
|
||||
|
||||
<b-progress v-if="mlPredictionsLoading" variant="primary" :value="modelPredictionProgress"
|
||||
:max="content.length" class="mb-3"></b-progress>
|
||||
|
||||
<AnalyzedContentSpansContainer v-if="analyzedContentSpans.length > 0"
|
||||
:spans="analyzedContentSpans" :text="rawContent"></AnalyzedContentSpansContainer>
|
||||
<div v-else class="content-div" v-html="content"></div>
|
||||
</div>
|
||||
</template>
|
||||
|
||||
<script>
|
||||
@@ -8,87 +38,169 @@ import Sist2Api from "@/Sist2Api";
|
||||
import Preloader from "@/components/Preloader";
|
||||
import Sist2Query from "@/Sist2Query";
|
||||
import store from "@/store";
|
||||
import BertNerModel from "@/ml/BertNerModel";
|
||||
import AnalyzedContentSpansContainer from "@/components/AnalyzedContentSpanContainer.vue";
|
||||
import ModelsRepo from "@/ml/modelsRepo";
|
||||
import {mapGetters, mapMutations} from "vuex";
|
||||
|
||||
export default {
|
||||
name: "LazyContentDiv",
|
||||
components: {Preloader},
|
||||
props: ["docId"],
|
||||
data() {
|
||||
return {
|
||||
content: "",
|
||||
loading: true
|
||||
name: "LazyContentDiv",
|
||||
components: {AnalyzedContentSpansContainer, Preloader},
|
||||
props: ["docId"],
|
||||
data() {
|
||||
return {
|
||||
ModelsRepo,
|
||||
content: "",
|
||||
rawContent: "",
|
||||
loading: true,
|
||||
modelLoadingProgress: 0,
|
||||
modelPredictionProgress: 0,
|
||||
mlPredictionsLoading: false,
|
||||
mlLoading: false,
|
||||
mlModel: null,
|
||||
analyzedContentSpans: []
|
||||
}
|
||||
},
|
||||
mounted() {
|
||||
|
||||
if (this.$store.getters.optMlDefaultModel) {
|
||||
this.mlModel = this.$store.getters.optMlDefaultModel
|
||||
} else {
|
||||
this.mlModel = ModelsRepo.getDefaultModel();
|
||||
}
|
||||
|
||||
const query = Sist2Query.searchQuery();
|
||||
|
||||
if (this.$store.state.optHighlight) {
|
||||
const fields = this.$store.state.fuzzy
|
||||
? {"content.nGram": {}}
|
||||
: {content: {}};
|
||||
|
||||
query.highlight = {
|
||||
pre_tags: ["<mark>"],
|
||||
post_tags: ["</mark>"],
|
||||
number_of_fragments: 0,
|
||||
fields,
|
||||
};
|
||||
|
||||
if (!store.state.sist2Info.esVersionLegacy) {
|
||||
query.highlight.max_analyzed_offset = 999_999;
|
||||
}
|
||||
}
|
||||
|
||||
if ("function_score" in query.query) {
|
||||
query.query = query.query.function_score.query;
|
||||
}
|
||||
|
||||
if (!("must" in query.query.bool)) {
|
||||
query.query.bool.must = [];
|
||||
} else if (!Array.isArray(query.query.bool.must)) {
|
||||
query.query.bool.must = [query.query.bool.must];
|
||||
}
|
||||
|
||||
query.query.bool.must.push({match: {_id: this.docId}});
|
||||
|
||||
delete query["sort"];
|
||||
delete query["aggs"];
|
||||
delete query["search_after"];
|
||||
delete query.query["function_score"];
|
||||
|
||||
query._source = {
|
||||
includes: ["content", "name", "path", "extension"]
|
||||
}
|
||||
|
||||
query.size = 1;
|
||||
|
||||
Sist2Api.esQuery(query).then(resp => {
|
||||
this.loading = false;
|
||||
if (resp.hits.hits.length === 1) {
|
||||
this.content = this.getContent(resp.hits.hits[0]);
|
||||
}
|
||||
|
||||
if (this.optAutoAnalyze) {
|
||||
this.mlAnalyze();
|
||||
}
|
||||
});
|
||||
},
|
||||
computed: {
|
||||
...mapGetters(["optAutoAnalyze"]),
|
||||
modelSize() {
|
||||
const modelData = ModelsRepo.data[this.mlModel];
|
||||
if (!modelData) {
|
||||
return 0;
|
||||
}
|
||||
return modelData.size;
|
||||
}
|
||||
},
|
||||
methods: {
|
||||
...mapMutations(["setOptAutoAnalyze"]),
|
||||
getContent(doc) {
|
||||
this.rawContent = doc._source.content;
|
||||
|
||||
if (!doc.highlight) {
|
||||
return doc._source.content;
|
||||
}
|
||||
|
||||
if (doc.highlight["content.nGram"]) {
|
||||
return doc.highlight["content.nGram"][0];
|
||||
}
|
||||
if (doc.highlight.content) {
|
||||
return doc.highlight.content[0];
|
||||
}
|
||||
},
|
||||
async getMlModel() {
|
||||
if (this.$store.getters.mlModel.name !== this.mlModel) {
|
||||
this.mlLoading = true;
|
||||
this.modelLoadingProgress = 0;
|
||||
const modelInfo = ModelsRepo.data[this.mlModel];
|
||||
|
||||
const model = new BertNerModel(
|
||||
modelInfo.vocabUrl,
|
||||
modelInfo.modelUrl,
|
||||
modelInfo.id2label,
|
||||
)
|
||||
|
||||
await model.init(progress => this.modelLoadingProgress = progress);
|
||||
this.$store.commit("setMlModel", {model, name: this.mlModel});
|
||||
|
||||
this.mlLoading = false;
|
||||
return model
|
||||
}
|
||||
|
||||
return this.$store.getters.mlModel.model;
|
||||
},
|
||||
async mlAnalyze() {
|
||||
if (!this.content) {
|
||||
return;
|
||||
}
|
||||
|
||||
const modelInfo = ModelsRepo.data[this.mlModel];
|
||||
if (modelInfo === undefined) {
|
||||
return;
|
||||
}
|
||||
|
||||
this.$store.commit("setOptMlDefaultModel", this.mlModel);
|
||||
await this.$store.dispatch("updateConfiguration");
|
||||
|
||||
const model = await this.getMlModel();
|
||||
|
||||
this.analyzedContentSpans = [];
|
||||
|
||||
this.mlPredictionsLoading = true;
|
||||
|
||||
await model.predict(this.rawContent, results => {
|
||||
results.forEach(result => result.label = modelInfo.humanLabels[result.label]);
|
||||
this.analyzedContentSpans.push(...results);
|
||||
this.modelPredictionProgress = results[results.length - 1].wordIndex;
|
||||
});
|
||||
this.mlPredictionsLoading = false;
|
||||
}
|
||||
}
|
||||
},
|
||||
mounted() {
|
||||
const query = Sist2Query.searchQuery();
|
||||
|
||||
if (this.$store.state.optHighlight) {
|
||||
|
||||
const fields = this.$store.state.fuzzy
|
||||
? {"content.nGram": {}}
|
||||
: {content: {}};
|
||||
|
||||
query.highlight = {
|
||||
pre_tags: ["<mark>"],
|
||||
post_tags: ["</mark>"],
|
||||
number_of_fragments: 0,
|
||||
fields,
|
||||
};
|
||||
|
||||
if (!store.state.sist2Info.esVersionLegacy) {
|
||||
query.highlight.max_analyzed_offset = 999_999;
|
||||
}
|
||||
}
|
||||
|
||||
if ("function_score" in query.query) {
|
||||
query.query = query.query.function_score.query;
|
||||
}
|
||||
|
||||
if (!("must" in query.query.bool)) {
|
||||
query.query.bool.must = [];
|
||||
} else if (!Array.isArray(query.query.bool.must)) {
|
||||
query.query.bool.must = [query.query.bool.must];
|
||||
}
|
||||
|
||||
query.query.bool.must.push({match: {_id: this.docId}});
|
||||
|
||||
delete query["sort"];
|
||||
delete query["aggs"];
|
||||
delete query["search_after"];
|
||||
delete query.query["function_score"];
|
||||
|
||||
query._source = {
|
||||
includes: ["content", "name", "path", "extension"]
|
||||
}
|
||||
|
||||
query.size = 1;
|
||||
|
||||
Sist2Api.esQuery(query).then(resp => {
|
||||
this.loading = false;
|
||||
if (resp.hits.hits.length === 1) {
|
||||
this.content = this.getContent(resp.hits.hits[0]);
|
||||
} else {
|
||||
console.log("FIXME: could not get content")
|
||||
console.log(resp)
|
||||
}
|
||||
});
|
||||
},
|
||||
methods: {
|
||||
getContent(doc) {
|
||||
if (!doc.highlight) {
|
||||
return doc._source.content;
|
||||
}
|
||||
|
||||
if (doc.highlight["content.nGram"]) {
|
||||
return doc.highlight["content.nGram"][0];
|
||||
}
|
||||
if (doc.highlight.content) {
|
||||
return doc.highlight.content[0];
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
</script>
|
||||
|
||||
<style scoped>
|
||||
<style>
|
||||
.progress-bar {
|
||||
transition: none;
|
||||
}
|
||||
</style>
|
||||
@@ -160,9 +160,13 @@ export default {
|
||||
},
|
||||
onSlideChange() {
|
||||
// Pause all videos when changing slide
|
||||
document.getElementsByTagName("video").forEach((el) => {
|
||||
const videos = document.getElementsByTagName("video");
|
||||
if (videos.length === 0) {
|
||||
return
|
||||
}
|
||||
for (let el of videos) {
|
||||
el.pause();
|
||||
});
|
||||
}
|
||||
},
|
||||
}
|
||||
|
||||
|
||||
@@ -40,6 +40,7 @@
|
||||
|
||||
|
||||
<template v-for="tag in hit._tags">
|
||||
<!-- User tag-->
|
||||
<div v-if="tag.userTag" :key="tag.rawText" style="display: inline-block">
|
||||
<span
|
||||
:id="hit._id+tag.rawText"
|
||||
@@ -51,7 +52,7 @@
|
||||
>{{ tag.text.split(".").pop() }}</span>
|
||||
|
||||
<b-popover :target="hit._id+tag.rawText" triggers="focus blur" placement="top">
|
||||
<b-button variant="danger" @click="onTagDeleteClick(tag, $event)">{{$t("deleteTag")}}</b-button>
|
||||
<b-button variant="danger" @click="onTagDeleteClick(tag, $event)">{{ $t("deleteTag") }}</b-button>
|
||||
</b-popover>
|
||||
</div>
|
||||
|
||||
@@ -66,7 +67,7 @@
|
||||
<small v-if="showAddButton" class="badge add-tag-button" @click="tagAdd()">{{$t("addTag")}}</small>
|
||||
|
||||
<!-- Size tag-->
|
||||
<small v-else class="text-muted badge-size">{{
|
||||
<small v-else class="text-muted badge-size" style="padding-left: 2px">{{
|
||||
humanFileSize(hit._source.size)
|
||||
}}</small>
|
||||
</div>
|
||||
@@ -211,7 +212,7 @@ export default Vue.extend({
|
||||
|
||||
return matches.sort().map(match => {
|
||||
return {
|
||||
title: match.split(".").slice(0,-1).join("."),
|
||||
title: match.split(".").slice(0, -1).join("."),
|
||||
id: match
|
||||
}
|
||||
});
|
||||
|
||||
@@ -8,7 +8,7 @@ export default {
|
||||
advanced: "Advanced search",
|
||||
fuzzy: "Fuzzy"
|
||||
},
|
||||
addTag: "Add",
|
||||
addTag: "Tag",
|
||||
deleteTag: "Delete",
|
||||
download: "Download",
|
||||
and: "and",
|
||||
@@ -17,6 +17,7 @@ export default {
|
||||
mimeTypes: "Media types",
|
||||
tags: "Tags",
|
||||
tagFilter: "Filter tags",
|
||||
forExample: "For example:",
|
||||
help: {
|
||||
simpleSearch: "Simple search",
|
||||
advancedSearch: "Advanced search",
|
||||
@@ -48,6 +49,7 @@ export default {
|
||||
configReset: "Reset configuration",
|
||||
searchOptions: "Search options",
|
||||
treemapOptions: "Treemap options",
|
||||
mlOptions: "Machine learning options",
|
||||
displayOptions: "Display options",
|
||||
opt: {
|
||||
lang: "Language",
|
||||
@@ -75,7 +77,12 @@ export default {
|
||||
useDatePicker: "Use a Date Picker component rather than a slider",
|
||||
vidPreviewInterval: "Video preview frame duration in ms",
|
||||
simpleLightbox: "Disable animations in image viewer",
|
||||
showTagPickerFilter: "Display the tag filter bar"
|
||||
showTagPickerFilter: "Display the tag filter bar",
|
||||
featuredFields: "Featured fields Javascript template string. Will appear in the search results.",
|
||||
featuredFieldsList: "Available variables",
|
||||
autoAnalyze: "Automatically analyze text",
|
||||
defaultModel: "Default model",
|
||||
mlRepositories: "Model repositories (one per line)"
|
||||
},
|
||||
queryMode: {
|
||||
simple: "Simple",
|
||||
@@ -83,6 +90,7 @@ export default {
|
||||
},
|
||||
lang: {
|
||||
en: "English",
|
||||
de: "Deutsch",
|
||||
fr: "Français",
|
||||
"zh-CN": "简体中文",
|
||||
},
|
||||
@@ -167,6 +175,185 @@ export default {
|
||||
selectedIndex: "selected index",
|
||||
selectedIndices: "selected indices",
|
||||
},
|
||||
ml: {
|
||||
analyzeText: "Analyze",
|
||||
auto: "Auto",
|
||||
repoFetchError: "Failed to get list of models. Check browser console for more details.",
|
||||
repoFetchErrorTitle: "Could not fetch model repositories",
|
||||
}
|
||||
},
|
||||
de: {
|
||||
filePage: {
|
||||
notFound: "Nicht gefunden"
|
||||
},
|
||||
searchBar: {
|
||||
simple: "Suche",
|
||||
advanced: "Erweiterte Suche",
|
||||
fuzzy: "Fuzzy"
|
||||
},
|
||||
addTag: "Tag",
|
||||
deleteTag: "Löschen",
|
||||
download: "Herunterladen",
|
||||
and: "und",
|
||||
page: "Seite",
|
||||
pages: "Seiten",
|
||||
mimeTypes: "Medientypen",
|
||||
tags: "Tags",
|
||||
tagFilter: "Tags filtern",
|
||||
forExample: "Zum Beispiel:",
|
||||
help: {
|
||||
simpleSearch: "Einfache Suche",
|
||||
advancedSearch: "Erweiterte Suche",
|
||||
help: "Hilfe",
|
||||
term: "<BEGRIFF>",
|
||||
and: "UND Operator",
|
||||
or: "ODER Operator",
|
||||
not: "negiert einen einzelnen Begriff",
|
||||
quotes: "liefert Treffer, wenn die Abfolge in der genauen Reihenfolge gefunden wird",
|
||||
prefix: "liefert Treffer, wenn die Abfolge einen solchen Präfix hat",
|
||||
parens: "gruppiert Ausdrücke",
|
||||
tildeTerm: "liefert Treffer, im gegebenen 'Editierabstand'",
|
||||
tildePhrase: "liefert Treffer, mit dem Ausdruck. Erfolgt die gegebene Anzahl zwischenstehnde Nicht-Treffer-Wörter.",
|
||||
example1:
|
||||
"Zum Beispiel: <code>\"fried eggs\" +(eggplant | potato) -frittata</code> wird " +
|
||||
"<i>fried eggs</i> und <i>eggplant</i> oder <i>potato</i> finden, aber keine Ergebnisse, " +
|
||||
"die <i>frittata</i> enthalten.",
|
||||
defaultOperator:
|
||||
"Wenn weder <code>+</code> noch <code>|</code> angegeben sind, ist " +
|
||||
"<code>+</code> (and) der Standard.",
|
||||
fuzzy:
|
||||
"Wenn <b>Fuzzy</b> aktiviert ist, werden Teil-Treffer (3-grams) ebenfalls akzeptiert.",
|
||||
moreInfoSimple: "Für weitere Informationen s.<a target=\"_blank\" " +
|
||||
"rel=\"noreferrer\" href=\"//www.elastic.co/guide/en/elasticsearch/reference/current/query-dsl-simple-query-string-query.html\">Elasticsearch Dokumentation</a>",
|
||||
moreInfoAdvanced: "Für die Dokumentation der erweiterten Suche s. <a target=\"_blank\" rel=\"noreferrer\" href=\"//www.elastic.co/guide/en/elasticsearch/reference/current/query-dsl-query-string-query.html#query-string-syntax\">Elasticsearch Dokumentation</a>"
|
||||
},
|
||||
config: "Konfiguration",
|
||||
configDescription: "Konfiguration wird in Echtzeit für diesen Browser gespeichert.",
|
||||
configReset: "Konfiguration zurücksetzen",
|
||||
searchOptions: "Such-Optionen",
|
||||
treemapOptions: "Kacheldiagramm-Optionen",
|
||||
displayOptions: "Anzeige-Optionen",
|
||||
opt: {
|
||||
lang: "Sprache",
|
||||
highlight: "Aktiviere Hervorhebung von Treffern",
|
||||
fuzzy: "Aktiviere Fuzzy-Suche standardmäßig",
|
||||
searchInPath: "Abgleich der Abfrage mit dem Dokumentpfad aktivieren",
|
||||
suggestPath: "Aktiviere Auto-Vervollständigung in Pfadfilter-Leiste",
|
||||
fragmentSize: "Kontextgröße in Zeichen hervorheben",
|
||||
queryMode: "Such-Modus",
|
||||
displayMode: "Ansicht",
|
||||
columns: "Anzahl Spalten",
|
||||
treemapType: "Kacheldiagramme Typ",
|
||||
treemapTiling: "Kacheldiagramm Tiling",
|
||||
treemapColorGroupingDepth: "Kacheldiagramme Gruppierungsfarbe Tiefe (flach)",
|
||||
treemapColor: "Kacheldiagramme Farbe (kaskadiert)",
|
||||
treemapSize: "Kacheldiagramm Größe",
|
||||
theme: "Theme",
|
||||
lightboxLoadOnlyCurrent: "keine Bilder in voller Größe für benachbachte Slides im Image-Viewer vorab laden.",
|
||||
slideDuration: "Slide Dauer",
|
||||
resultSize: "Anzahl Treffer pro Seite",
|
||||
tagOrOperator: "Verwende ODER Operator bei der Angabe mehrere Tags.",
|
||||
hideDuplicates: "Verstecke Duplikate basierend auf der Prüfsumme",
|
||||
hideLegacy: "Verstecke die 'legacyES' Elasticsearch Notiz",
|
||||
updateMimeMap: "Aktualisiere Medientyp-Baum in Echtzeit",
|
||||
useDatePicker: "Benutze Datumswähler statt Schieber",
|
||||
vidPreviewInterval: "Videovorschau Framedauer in ms",
|
||||
simpleLightbox: "Schalte Animationen im Image-Viewer ab",
|
||||
showTagPickerFilter: "Zeige die Tag-Filter-Leiste",
|
||||
featuredFields: "Variablen, welche zusätzlich in den Suchergebnissen angezeigt werden können.",
|
||||
featuredFieldsList: "verfügbare Variablen"
|
||||
},
|
||||
queryMode: {
|
||||
simple: "Einfach",
|
||||
advanced: "Erweitert",
|
||||
},
|
||||
lang: {
|
||||
en: "English",
|
||||
de: "Deutsch",
|
||||
fr: "Français",
|
||||
"zh-CN": "简体中文",
|
||||
},
|
||||
displayMode: {
|
||||
grid: "Gitter",
|
||||
list: "Liste",
|
||||
},
|
||||
columns: {
|
||||
auto: "Auto"
|
||||
},
|
||||
treemapType: {
|
||||
cascaded: "kaskadiert",
|
||||
flat: "flach (kompakt)"
|
||||
},
|
||||
treemapSize: {
|
||||
small: "klein",
|
||||
medium: "mittel",
|
||||
large: "groß",
|
||||
xLarge: "sehr groß",
|
||||
xxLarge: "riesig",
|
||||
custom: "eigene",
|
||||
},
|
||||
treemapTiling: {
|
||||
binary: "binär",
|
||||
squarify: "quadratisch",
|
||||
slice: "Slice",
|
||||
dice: "Dice",
|
||||
sliceDice: "Slice & Dice",
|
||||
},
|
||||
theme: {
|
||||
light: "Hell",
|
||||
black: "Dunkel"
|
||||
},
|
||||
hit: "Treffer",
|
||||
hits: "Treffer",
|
||||
details: "Details",
|
||||
stats: "Statistiken",
|
||||
queryTime: "Abfragedauer",
|
||||
totalSize: "Gesamtgröße",
|
||||
pathBar: {
|
||||
placeholder: "Filter Pfad",
|
||||
modalTitle: "Wähle Pfad"
|
||||
},
|
||||
debug: "Debug Informationen",
|
||||
debugDescription: "Informationen für das Debugging. Wenn du Bugs gefunden oder Anregungen für " +
|
||||
"neue Features hast, poste sie bitte <a href='https://github.com/simon987/sist2/issues/new/choose'>hier</a>.",
|
||||
tagline: "Tagline",
|
||||
toast: {
|
||||
esConnErrTitle: "Elasticsearch Verbindungsfehler",
|
||||
esConnErr: "sist2 Web-Modul stellte einen Fehler beim Verbinden mit Elasticsearch fest. " +
|
||||
"Schau in die Server-Logs für weitere Informationen.",
|
||||
esQueryErrTitle: "Query Fehler",
|
||||
esQueryErr: "Konnte Query nicht verarbeiten/ausführen, bitte schaue in die Dokumentation zur erweiterten Suche. " +
|
||||
"Schau in die Server-Logs für weitere Informationen.",
|
||||
dupeTagTitle: "Tag Duplikat",
|
||||
dupeTag: "Dieser Tag existiert bereits für das Dokument.",
|
||||
copiedToClipboard: "In die Zwischenablage kopiert."
|
||||
},
|
||||
saveTagModalTitle: "Tag hinzufügen",
|
||||
saveTagPlaceholder: "Tag Name",
|
||||
confirm: "Bestätigen",
|
||||
indexPickerPlaceholder: "Index auswählen",
|
||||
sort: {
|
||||
relevance: "Relevanz",
|
||||
dateAsc: "Datum (älteste zuerst)",
|
||||
dateDesc: "Datum (neuste zuerst)",
|
||||
sizeAsc: "Größe (kleinste zuerst)",
|
||||
sizeDesc: "Größe (größte zuerst)",
|
||||
nameAsc: "Name (A-z)",
|
||||
nameDesc: "Name (Z-a)",
|
||||
random: "zufällig",
|
||||
},
|
||||
d3: {
|
||||
mimeCount: "Anzahl nach Medientyp",
|
||||
mimeSize: "Größen nach Medientyp",
|
||||
dateHistogram: "Änderungszeiten",
|
||||
sizeHistogram: "Dateigrößen",
|
||||
},
|
||||
indexPicker: {
|
||||
selectNone: "keinen auswählen",
|
||||
selectAll: "alle auswählen",
|
||||
selectedIndex: "ausgewählter Index",
|
||||
selectedIndices: "ausgewählte Indizes",
|
||||
},
|
||||
},
|
||||
fr: {
|
||||
filePage: {
|
||||
@@ -177,7 +364,7 @@ export default {
|
||||
advanced: "Recherche avancée",
|
||||
fuzzy: "Approximatif"
|
||||
},
|
||||
addTag: "Ajouter",
|
||||
addTag: "Taguer",
|
||||
deleteTag: "Supprimer",
|
||||
download: "Télécharger",
|
||||
and: "et",
|
||||
@@ -186,6 +373,7 @@ export default {
|
||||
mimeTypes: "Types de médias",
|
||||
tags: "Tags",
|
||||
tagFilter: "Filtrer les tags",
|
||||
forExample: "Par exemple:",
|
||||
help: {
|
||||
simpleSearch: "Recherche simple",
|
||||
advancedSearch: "Recherche avancée",
|
||||
@@ -245,7 +433,9 @@ export default {
|
||||
useDatePicker: "Afficher un composant « Date Picker » plutôt qu'un slider",
|
||||
vidPreviewInterval: "Durée des images d'aperçu video en millisecondes",
|
||||
simpleLightbox: "Désactiver les animations du visualiseur d'images",
|
||||
showTagPickerFilter: "Afficher le filtre dans l'onglet Tags"
|
||||
showTagPickerFilter: "Afficher le filtre dans l'onglet Tags",
|
||||
featuredFields: "Expression Javascript pour les variables mises en évidence. Sera affiché dans les résultats de recherche.",
|
||||
featuredFieldsList: "Variables disponibles"
|
||||
},
|
||||
queryMode: {
|
||||
simple: "Simple",
|
||||
@@ -253,6 +443,7 @@ export default {
|
||||
},
|
||||
lang: {
|
||||
en: "English",
|
||||
de: "Deutsch",
|
||||
fr: "Français",
|
||||
"zh-CN": "简体中文",
|
||||
},
|
||||
@@ -348,7 +539,7 @@ export default {
|
||||
advanced: "高级搜索",
|
||||
fuzzy: "模糊搜索"
|
||||
},
|
||||
addTag: "添加",
|
||||
addTag: "签条",
|
||||
deleteTag: "删除",
|
||||
download: "下载",
|
||||
and: "与",
|
||||
@@ -357,6 +548,7 @@ export default {
|
||||
mimeTypes: "文件类型",
|
||||
tags: "标签",
|
||||
tagFilter: "筛选标签",
|
||||
forExample: "例如:",
|
||||
help: {
|
||||
simpleSearch: "简易搜索",
|
||||
advancedSearch: "高级搜索",
|
||||
@@ -415,7 +607,9 @@ export default {
|
||||
useDatePicker: "使用日期选择器组件而不是滑块",
|
||||
vidPreviewInterval: "视频预览帧的持续时间,以毫秒为单位",
|
||||
simpleLightbox: "在图片查看器中,禁用动画",
|
||||
showTagPickerFilter: "显示标签过滤栏"
|
||||
showTagPickerFilter: "显示标签过滤栏",
|
||||
featuredFields: "特色领域的Javascript模板字符串。将出现在搜索结果中。",
|
||||
featuredFieldsList: "可利用的变量"
|
||||
},
|
||||
queryMode: {
|
||||
simple: "简单",
|
||||
@@ -423,6 +617,7 @@ export default {
|
||||
},
|
||||
lang: {
|
||||
en: "English",
|
||||
de: "Deutsch",
|
||||
fr: "Français",
|
||||
"zh-CN": "简体中文",
|
||||
},
|
||||
|
||||
77
sist2-vue/src/ml/BertNerModel.js
Normal file
77
sist2-vue/src/ml/BertNerModel.js
Normal file
@@ -0,0 +1,77 @@
|
||||
import BertTokenizer from "@/ml/BertTokenizer";
|
||||
import * as tf from "@tensorflow/tfjs";
|
||||
import axios from "axios";
|
||||
|
||||
export default class BertNerModel {
|
||||
vocabUrl;
|
||||
modelUrl;
|
||||
|
||||
id2label;
|
||||
_tokenizer;
|
||||
_model;
|
||||
inputSize = 128;
|
||||
|
||||
_previousWordId = null;
|
||||
|
||||
constructor(vocabUrl, modelUrl, id2label) {
|
||||
this.vocabUrl = vocabUrl;
|
||||
this.modelUrl = modelUrl;
|
||||
this.id2label = id2label;
|
||||
}
|
||||
|
||||
async init(onProgress) {
|
||||
await Promise.all([this.loadTokenizer(), this.loadModel(onProgress)]);
|
||||
}
|
||||
|
||||
async loadTokenizer() {
|
||||
const vocab = (await axios.get(this.vocabUrl)).data;
|
||||
this._tokenizer = new BertTokenizer(vocab);
|
||||
}
|
||||
|
||||
async loadModel(onProgress) {
|
||||
this._model = await tf.loadGraphModel(this.modelUrl, {onProgress});
|
||||
}
|
||||
|
||||
alignLabels(labels, wordIds, words) {
|
||||
const result = [];
|
||||
|
||||
for (let i = 0; i < this.inputSize; i++) {
|
||||
const label = labels[i];
|
||||
const wordId = wordIds[i];
|
||||
|
||||
if (wordId === -1) {
|
||||
continue;
|
||||
}
|
||||
if (wordId === this._previousWordId) {
|
||||
continue;
|
||||
}
|
||||
|
||||
result.push({
|
||||
word: words[wordId].text, wordIndex: words[wordId].index, label: label
|
||||
});
|
||||
this._previousWordId = wordId;
|
||||
}
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
async predict(text, callback) {
|
||||
this._previousWordId = null;
|
||||
const encoded = this._tokenizer.encodeText(text, this.inputSize)
|
||||
|
||||
for (let chunk of encoded.inputChunks) {
|
||||
const rawResult = tf.tidy(() => this._model.execute({
|
||||
input_ids: tf.tensor2d(chunk.inputIds, [1, this.inputSize], "int32"),
|
||||
token_type_ids: tf.tensor2d(chunk.segmentIds, [1, this.inputSize], "int32"),
|
||||
attention_mask: tf.tensor2d(chunk.inputMask, [1, this.inputSize], "int32"),
|
||||
}));
|
||||
|
||||
const labelIds = await tf.argMax(rawResult, -1);
|
||||
const labelIdsArray = await labelIds.array();
|
||||
const labels = labelIdsArray[0].map(id => this.id2label[id]);
|
||||
rawResult.dispose()
|
||||
|
||||
callback(this.alignLabels(labels, chunk.wordIds, encoded.words))
|
||||
}
|
||||
}
|
||||
}
|
||||
184
sist2-vue/src/ml/BertTokenizer.js
Normal file
184
sist2-vue/src/ml/BertTokenizer.js
Normal file
@@ -0,0 +1,184 @@
|
||||
import {zip, chunk} from "underscore";
|
||||
|
||||
const UNK_INDEX = 100;
|
||||
const CLS_INDEX = 101;
|
||||
const SEP_INDEX = 102;
|
||||
const CONTINUING_SUBWORD_PREFIX = "##";
|
||||
|
||||
function isWhitespace(ch) {
|
||||
return /\s/.test(ch);
|
||||
}
|
||||
|
||||
function isInvalid(ch) {
|
||||
return (ch.charCodeAt(0) === 0 || ch.charCodeAt(0) === 0xfffd);
|
||||
}
|
||||
|
||||
const punctuations = '[~`!@#$%^&*(){}[];:"\'<,.>?/\\|-_+=';
|
||||
|
||||
/** To judge whether it's a punctuation. */
|
||||
function isPunctuation(ch) {
|
||||
return punctuations.indexOf(ch) !== -1;
|
||||
}
|
||||
|
||||
export default class BertTokenizer {
|
||||
vocab;
|
||||
|
||||
constructor(vocab) {
|
||||
this.vocab = vocab;
|
||||
}
|
||||
|
||||
tokenize(text) {
|
||||
const charOriginalIndex = [];
|
||||
const cleanedText = this.cleanText(text, charOriginalIndex);
|
||||
const origTokens = cleanedText.split(' ');
|
||||
|
||||
let charCount = 0;
|
||||
const tokens = origTokens.map((token) => {
|
||||
token = token.toLowerCase();
|
||||
const tokens = this.runSplitOnPunctuation(token, charCount, charOriginalIndex);
|
||||
charCount += token.length + 1;
|
||||
return tokens;
|
||||
});
|
||||
|
||||
let flattenTokens = [];
|
||||
for (let index = 0; index < tokens.length; index++) {
|
||||
flattenTokens = flattenTokens.concat(tokens[index]);
|
||||
}
|
||||
return flattenTokens;
|
||||
}
|
||||
|
||||
/* Performs invalid character removal and whitespace cleanup on text. */
|
||||
cleanText(text, charOriginalIndex) {
|
||||
text = text.replace(/\?/g, "").trim();
|
||||
|
||||
const stringBuilder = [];
|
||||
let originalCharIndex = 0;
|
||||
let newCharIndex = 0;
|
||||
|
||||
for (const ch of text) {
|
||||
// Skip the characters that cannot be used.
|
||||
if (isInvalid(ch)) {
|
||||
originalCharIndex += ch.length;
|
||||
continue;
|
||||
}
|
||||
if (isWhitespace(ch)) {
|
||||
if (stringBuilder.length > 0 && stringBuilder[stringBuilder.length - 1] !== ' ') {
|
||||
stringBuilder.push(' ');
|
||||
charOriginalIndex[newCharIndex] = originalCharIndex;
|
||||
originalCharIndex += ch.length;
|
||||
} else {
|
||||
originalCharIndex += ch.length;
|
||||
continue;
|
||||
}
|
||||
} else {
|
||||
stringBuilder.push(ch);
|
||||
charOriginalIndex[newCharIndex] = originalCharIndex;
|
||||
originalCharIndex += ch.length;
|
||||
}
|
||||
newCharIndex++;
|
||||
}
|
||||
return stringBuilder.join('');
|
||||
}
|
||||
|
||||
/* Splits punctuation on a piece of text. */
|
||||
runSplitOnPunctuation(text, count, charOriginalIndex) {
|
||||
const tokens = [];
|
||||
let startNewWord = true;
|
||||
for (const ch of text) {
|
||||
if (isPunctuation(ch)) {
|
||||
tokens.push({text: ch, index: charOriginalIndex[count]});
|
||||
count += ch.length;
|
||||
startNewWord = true;
|
||||
} else {
|
||||
if (startNewWord) {
|
||||
tokens.push({text: '', index: charOriginalIndex[count]});
|
||||
startNewWord = false;
|
||||
}
|
||||
tokens[tokens.length - 1].text += ch;
|
||||
count += ch.length;
|
||||
}
|
||||
}
|
||||
return tokens;
|
||||
}
|
||||
|
||||
encode(words) {
|
||||
let outputTokens = [];
|
||||
const wordIds = [];
|
||||
|
||||
for (let i = 0; i < words.length; i++) {
|
||||
let chars = [...words[i].text];
|
||||
|
||||
let isUnknown = false;
|
||||
let start = 0;
|
||||
let subTokens = [];
|
||||
|
||||
while (start < chars.length) {
|
||||
let end = chars.length;
|
||||
let currentSubstring = null;
|
||||
while (start < end) {
|
||||
let substr = chars.slice(start, end).join('');
|
||||
|
||||
if (start > 0) {
|
||||
substr = CONTINUING_SUBWORD_PREFIX + substr;
|
||||
}
|
||||
if (this.vocab.includes(substr)) {
|
||||
currentSubstring = this.vocab.indexOf(substr);
|
||||
break;
|
||||
}
|
||||
|
||||
--end;
|
||||
}
|
||||
if (currentSubstring == null) {
|
||||
isUnknown = true;
|
||||
break;
|
||||
}
|
||||
subTokens.push(currentSubstring);
|
||||
start = end;
|
||||
}
|
||||
|
||||
if (isUnknown) {
|
||||
outputTokens.push(UNK_INDEX);
|
||||
wordIds.push(i);
|
||||
} else {
|
||||
subTokens.forEach(tok => {
|
||||
outputTokens.push(tok);
|
||||
wordIds.push(i)
|
||||
});
|
||||
}
|
||||
}
|
||||
|
||||
return {tokens: outputTokens, wordIds};
|
||||
}
|
||||
|
||||
encodeText(inputText, inputSize) {
|
||||
|
||||
const tokenized = this.tokenize(inputText);
|
||||
const encoded = this.encode(tokenized);
|
||||
|
||||
const encodedTokenChunks = chunk(encoded.tokens, inputSize - 2);
|
||||
const encodedWordIdChunks = chunk(encoded.wordIds, inputSize - 2);
|
||||
|
||||
const chunks = [];
|
||||
|
||||
zip(encodedTokenChunks, encodedWordIdChunks).forEach(([tokens, wordIds]) => {
|
||||
const inputIds = [CLS_INDEX, ...tokens, SEP_INDEX];
|
||||
const segmentIds = Array(inputIds.length).fill(0);
|
||||
const inputMask = Array(inputIds.length).fill(1);
|
||||
wordIds = [-1, ...wordIds, -1];
|
||||
|
||||
while (inputIds.length < inputSize) {
|
||||
inputIds.push(0);
|
||||
inputMask.push(0);
|
||||
segmentIds.push(0);
|
||||
wordIds.push(-1);
|
||||
}
|
||||
|
||||
chunks.push({inputIds, inputMask, segmentIds, wordIds})
|
||||
});
|
||||
|
||||
return {
|
||||
inputChunks: chunks,
|
||||
words: tokenized
|
||||
};
|
||||
}
|
||||
}
|
||||
43
sist2-vue/src/ml/modelsRepo.js
Normal file
43
sist2-vue/src/ml/modelsRepo.js
Normal file
@@ -0,0 +1,43 @@
|
||||
import axios from "axios";
|
||||
|
||||
class ModelsRepo {
|
||||
_repositories;
|
||||
data = {};
|
||||
|
||||
async init(repositories) {
|
||||
this._repositories = repositories;
|
||||
|
||||
const data = await Promise.all(this._repositories.map(this._loadRepository));
|
||||
|
||||
data.forEach(models => {
|
||||
models.forEach(model => {
|
||||
this.data[model.name] = model;
|
||||
})
|
||||
});
|
||||
}
|
||||
|
||||
async _loadRepository(repository) {
|
||||
const data = (await axios.get(repository)).data;
|
||||
data.forEach(model => {
|
||||
model["modelUrl"] = new URL(model["modelPath"], repository).href;
|
||||
model["vocabUrl"] = new URL(model["vocabPath"], repository).href;
|
||||
});
|
||||
return data;
|
||||
}
|
||||
|
||||
getOptions() {
|
||||
return Object.values(this.data).map(model => ({
|
||||
text: `${model.name} (${Math.round(model.size / (1024*1024))}MB)`,
|
||||
value: model.name
|
||||
}));
|
||||
}
|
||||
|
||||
getDefaultModel() {
|
||||
if (Object.values(this.data).length === 0) {
|
||||
return null;
|
||||
}
|
||||
return Object.values(this.data).find(model => model.default).name;
|
||||
}
|
||||
}
|
||||
|
||||
export default new ModelsRepo();
|
||||
@@ -33,6 +33,7 @@ export default new Vuex.Store({
|
||||
optHideDuplicates: true,
|
||||
optTheme: "light",
|
||||
optDisplay: "grid",
|
||||
optFeaturedFields: "",
|
||||
|
||||
optSize: 60,
|
||||
optHighlight: true,
|
||||
@@ -56,6 +57,9 @@ export default new Vuex.Store({
|
||||
optVidPreviewInterval: 700,
|
||||
optSimpleLightbox: true,
|
||||
optShowTagPickerFilter: true,
|
||||
optMlRepositories: "https://raw.githubusercontent.com/simon987/sist2-ner-models/main/repo.json",
|
||||
optAutoAnalyze: false,
|
||||
optMlDefaultModel: null,
|
||||
|
||||
_onLoadSelectedIndices: [] as string[],
|
||||
_onLoadSelectedMimeTypes: [] as string[],
|
||||
@@ -85,7 +89,11 @@ export default new Vuex.Store({
|
||||
|
||||
uiMimeMap: [] as any[],
|
||||
|
||||
auth0Token: null
|
||||
auth0Token: null,
|
||||
mlModel: {
|
||||
model: null,
|
||||
name: null
|
||||
},
|
||||
},
|
||||
mutations: {
|
||||
setUiShowDetails: (state, val) => state.uiShowDetails = val,
|
||||
@@ -158,6 +166,7 @@ export default new Vuex.Store({
|
||||
setOptQueryMode: (state, val) => state.optQueryMode = val,
|
||||
setOptResultSize: (state, val) => state.optSize = val,
|
||||
setOptTagOrOperator: (state, val) => state.optTagOrOperator = val,
|
||||
setOptFeaturedFields: (state, val) => state.optFeaturedFields = val,
|
||||
|
||||
setOptTreemapType: (state, val) => state.optTreemapType = val,
|
||||
setOptTreemapTiling: (state, val) => state.optTreemapTiling = val,
|
||||
@@ -170,6 +179,9 @@ export default new Vuex.Store({
|
||||
setOptVidPreviewInterval: (state, val) => state.optVidPreviewInterval = val,
|
||||
setOptSimpleLightbox: (state, val) => state.optSimpleLightbox = val,
|
||||
setOptShowTagPickerFilter: (state, val) => state.optShowTagPickerFilter = val,
|
||||
setOptAutoAnalyze: (state, val) => {state.optAutoAnalyze = val},
|
||||
setOptMlRepositories: (state, val) => {state.optMlRepositories = val},
|
||||
setOptMlDefaultModel: (state, val) => {state.optMlDefaultModel = val},
|
||||
|
||||
setOptLightboxLoadOnlyCurrent: (state, val) => state.optLightboxLoadOnlyCurrent = val,
|
||||
setOptLightboxSlideDuration: (state, val) => state.optLightboxSlideDuration = val,
|
||||
@@ -192,6 +204,7 @@ export default new Vuex.Store({
|
||||
// noop
|
||||
},
|
||||
setAuth0Token: (state, val) => state.auth0Token = val,
|
||||
setMlModel: (state, val) => state.mlModel = val,
|
||||
},
|
||||
actions: {
|
||||
setSist2Info: (store, val) => {
|
||||
@@ -348,6 +361,7 @@ export default new Vuex.Store({
|
||||
},
|
||||
modules: {},
|
||||
getters: {
|
||||
mlModel: (state) => state.mlModel,
|
||||
seed: (state) => state.seed,
|
||||
getPathText: (state) => state.pathText,
|
||||
indices: state => state.indices,
|
||||
@@ -413,5 +427,13 @@ export default new Vuex.Store({
|
||||
optVidPreviewInterval: state => state.optVidPreviewInterval,
|
||||
optSimpleLightbox: state => state.optSimpleLightbox,
|
||||
optShowTagPickerFilter: state => state.optShowTagPickerFilter,
|
||||
optFeaturedFields: state => state.optFeaturedFields,
|
||||
optMlRepositories: state => state.optMlRepositories,
|
||||
mlRepositoryList: state => {
|
||||
const repos = state.optMlRepositories.split("\n")
|
||||
return repos[0] == "" ? [] : repos;
|
||||
},
|
||||
optMlDefaultModel: state => state.optMlDefaultModel,
|
||||
optAutoAnalyze: state => state.optAutoAnalyze,
|
||||
}
|
||||
})
|
||||
@@ -57,6 +57,14 @@ export function humanTime(sec_num: number): string {
|
||||
const minutes = Math.floor((sec_num - (hours * 3600)) / 60);
|
||||
const seconds = sec_num - (hours * 3600) - (minutes * 60);
|
||||
|
||||
if (sec_num < 60) {
|
||||
return `${sec_num}s`
|
||||
}
|
||||
|
||||
if (sec_num < 3600) {
|
||||
return `${minutes < 10 ? "0" : ""}${minutes}:${seconds < 10 ? "0" : ""}${seconds}`;
|
||||
}
|
||||
|
||||
return `${hours < 10 ? "0" : ""}${hours}:${minutes < 10 ? "0" : ""}${minutes}:${seconds < 10 ? "0" : ""}${seconds}`;
|
||||
}
|
||||
|
||||
|
||||
@@ -1,144 +1,218 @@
|
||||
<template>
|
||||
<!-- <div :style="{width: `${$store.getters.optContainerWidth}px`}"-->
|
||||
<div
|
||||
v-if="!configLoading"
|
||||
style="margin-left: auto; margin-right: auto;" class="container">
|
||||
|
||||
<b-card>
|
||||
<b-card-title>
|
||||
<GearIcon></GearIcon>
|
||||
{{ $t("config") }}
|
||||
</b-card-title>
|
||||
<p>{{ $t("configDescription") }}</p>
|
||||
|
||||
<b-card-body>
|
||||
<h4>{{ $t("displayOptions") }}</h4>
|
||||
<div
|
||||
v-if="!configLoading"
|
||||
style="margin-left: auto; margin-right: auto;" class="container">
|
||||
|
||||
<b-card>
|
||||
<b-card-title>
|
||||
<GearIcon></GearIcon>
|
||||
{{ $t("config") }}
|
||||
</b-card-title>
|
||||
<p>{{ $t("configDescription") }}</p>
|
||||
|
||||
<label><LanguageIcon/><span style="vertical-align: middle"> {{ $t("opt.lang") }}</span></label>
|
||||
<b-form-select :options="langOptions" :value="optLang" @input="setOptLang"></b-form-select>
|
||||
<b-card-body>
|
||||
<h4>{{ $t("displayOptions") }}</h4>
|
||||
|
||||
<label>{{ $t("opt.theme") }}</label>
|
||||
<b-form-select :options="themeOptions" :value="optTheme" @input="setOptTheme"></b-form-select>
|
||||
<b-card>
|
||||
|
||||
<label>{{ $t("opt.displayMode") }}</label>
|
||||
<b-form-select :options="displayModeOptions" :value="optDisplay" @input="setOptDisplay"></b-form-select>
|
||||
<label>
|
||||
<LanguageIcon/>
|
||||
<span style="vertical-align: middle"> {{ $t("opt.lang") }}</span></label>
|
||||
<b-form-select :options="langOptions" :value="optLang" @input="setOptLang"></b-form-select>
|
||||
|
||||
<label>{{ $t("opt.columns") }}</label>
|
||||
<b-form-select :options="columnsOptions" :value="optColumns" @input="setOptColumns"></b-form-select>
|
||||
<label>{{ $t("opt.theme") }}</label>
|
||||
<b-form-select :options="themeOptions" :value="optTheme" @input="setOptTheme"></b-form-select>
|
||||
|
||||
<div style="height: 10px"></div>
|
||||
<label>{{ $t("opt.displayMode") }}</label>
|
||||
<b-form-select :options="displayModeOptions" :value="optDisplay"
|
||||
@input="setOptDisplay"></b-form-select>
|
||||
|
||||
<b-form-checkbox :checked="optLightboxLoadOnlyCurrent" @input="setOptLightboxLoadOnlyCurrent">
|
||||
{{ $t("opt.lightboxLoadOnlyCurrent") }}
|
||||
</b-form-checkbox>
|
||||
<label>{{ $t("opt.columns") }}</label>
|
||||
<b-form-select :options="columnsOptions" :value="optColumns" @input="setOptColumns"></b-form-select>
|
||||
|
||||
<b-form-checkbox :checked="optHideLegacy" @input="setOptHideLegacy">
|
||||
{{ $t("opt.hideLegacy") }}
|
||||
</b-form-checkbox>
|
||||
<div style="height: 10px"></div>
|
||||
|
||||
<b-form-checkbox :checked="optUpdateMimeMap" @input="setOptUpdateMimeMap">
|
||||
{{ $t("opt.updateMimeMap") }}
|
||||
</b-form-checkbox>
|
||||
<b-form-checkbox :checked="optLightboxLoadOnlyCurrent" @input="setOptLightboxLoadOnlyCurrent">
|
||||
{{ $t("opt.lightboxLoadOnlyCurrent") }}
|
||||
</b-form-checkbox>
|
||||
|
||||
<b-form-checkbox :checked="optUseDatePicker" @input="setOptUseDatePicker">
|
||||
{{ $t("opt.useDatePicker") }}
|
||||
</b-form-checkbox>
|
||||
<b-form-checkbox :checked="optHideLegacy" @input="setOptHideLegacy">
|
||||
{{ $t("opt.hideLegacy") }}
|
||||
</b-form-checkbox>
|
||||
|
||||
<b-form-checkbox :checked="optSimpleLightbox" @input="setOptSimpleLightbox">{{
|
||||
$t("opt.simpleLightbox")
|
||||
}}
|
||||
</b-form-checkbox>
|
||||
<b-form-checkbox :checked="optUpdateMimeMap" @input="setOptUpdateMimeMap">
|
||||
{{ $t("opt.updateMimeMap") }}
|
||||
</b-form-checkbox>
|
||||
|
||||
<b-form-checkbox :checked="optShowTagPickerFilter" @input="setOptShowTagPickerFilter">{{
|
||||
$t("opt.showTagPickerFilter")
|
||||
}}
|
||||
</b-form-checkbox>
|
||||
<b-form-checkbox :checked="optUseDatePicker" @input="setOptUseDatePicker">
|
||||
{{ $t("opt.useDatePicker") }}
|
||||
</b-form-checkbox>
|
||||
|
||||
<b-form-checkbox :checked="optSimpleLightbox" @input="setOptSimpleLightbox">{{
|
||||
$t("opt.simpleLightbox")
|
||||
}}
|
||||
</b-form-checkbox>
|
||||
|
||||
<b-form-checkbox :checked="optShowTagPickerFilter" @input="setOptShowTagPickerFilter">{{
|
||||
$t("opt.showTagPickerFilter")
|
||||
}}
|
||||
</b-form-checkbox>
|
||||
|
||||
<br/>
|
||||
<label>{{ $t("opt.featuredFields") }}</label>
|
||||
|
||||
<br>
|
||||
<b-button v-b-toggle.collapse-1 variant="secondary" class="dropdown-toggle">{{
|
||||
$t("opt.featuredFieldsList")
|
||||
}}
|
||||
</b-button>
|
||||
<b-collapse id="collapse-1" class="mt-2">
|
||||
<ul>
|
||||
<li><code>doc.checksum</code></li>
|
||||
<li><code>doc.path</code></li>
|
||||
<li><code>doc.mime</code></li>
|
||||
<li><code>doc.videoc</code></li>
|
||||
<li><code>doc.audioc</code></li>
|
||||
<li><code>doc.pages</code></li>
|
||||
<li><code>doc.mtime</code></li>
|
||||
<li><code>doc.font_name</code></li>
|
||||
<li><code>doc.album</code></li>
|
||||
<li><code>doc.artist</code></li>
|
||||
<li><code>doc.title</code></li>
|
||||
<li><code>doc.genre</code></li>
|
||||
<li><code>doc.album_artist</code></li>
|
||||
<li><code>doc.exif_make</code></li>
|
||||
<li><code>doc.exif_model</code></li>
|
||||
<li><code>doc.exif_software</code></li>
|
||||
<li><code>doc.exif_exposure_time</code></li>
|
||||
<li><code>doc.exif_fnumber</code></li>
|
||||
<li><code>doc.exif_iso_speed_ratings</code></li>
|
||||
<li><code>doc.exif_focal_length</code></li>
|
||||
<li><code>doc.exif_user_comment</code></li>
|
||||
<li><code>doc.exif_user_comment</code></li>
|
||||
<li><code>doc.exif_gps_longitude_ref</code></li>
|
||||
<li><code>doc.exif_gps_longitude_dms</code></li>
|
||||
<li><code>doc.exif_gps_longitude_dec</code></li>
|
||||
<li><code>doc.exif_gps_latitude_ref</code></li>
|
||||
<li><code>doc.exif_gps_latitude_dec</code></li>
|
||||
<li><code>humanDate()</code></li>
|
||||
<li><code>humanFileSize()</code></li>
|
||||
</ul>
|
||||
|
||||
<p>{{ $t("forExample") }}</p>
|
||||
|
||||
<ul>
|
||||
<li>
|
||||
<code><b>${humanDate(doc.mtime)}</b> • ${doc.videoc || ''}</code>
|
||||
</li>
|
||||
<li>
|
||||
<code>${doc.pages ? (doc.pages + ' pages') : ''}</code>
|
||||
</li>
|
||||
</ul>
|
||||
</b-collapse>
|
||||
<br/>
|
||||
<br/>
|
||||
<b-textarea rows="3" :value="optFeaturedFields" @input="setOptFeaturedFields"></b-textarea>
|
||||
</b-card>
|
||||
|
||||
<br/>
|
||||
<h4>{{ $t("searchOptions") }}</h4>
|
||||
<b-card>
|
||||
<b-form-checkbox :checked="optHideDuplicates" @input="setOptHideDuplicates">{{
|
||||
$t("opt.hideDuplicates")
|
||||
}}
|
||||
</b-form-checkbox>
|
||||
|
||||
<b-form-checkbox :checked="optHighlight" @input="setOptHighlight">{{
|
||||
$t("opt.highlight")
|
||||
}}
|
||||
</b-form-checkbox>
|
||||
<b-form-checkbox :checked="optTagOrOperator" @input="setOptTagOrOperator">{{
|
||||
$t("opt.tagOrOperator")
|
||||
}}
|
||||
</b-form-checkbox>
|
||||
<b-form-checkbox :checked="optFuzzy" @input="setOptFuzzy">{{ $t("opt.fuzzy") }}</b-form-checkbox>
|
||||
<b-form-checkbox :checked="optSearchInPath" @input="setOptSearchInPath">{{
|
||||
$t("opt.searchInPath")
|
||||
}}
|
||||
</b-form-checkbox>
|
||||
<b-form-checkbox :checked="optSuggestPath" @input="setOptSuggestPath">{{
|
||||
$t("opt.suggestPath")
|
||||
}}
|
||||
</b-form-checkbox>
|
||||
|
||||
<br/>
|
||||
<label>{{ $t("opt.fragmentSize") }}</label>
|
||||
<b-form-input :value="optFragmentSize" step="10" type="number" min="0"
|
||||
@input="setOptFragmentSize"></b-form-input>
|
||||
|
||||
<label>{{ $t("opt.resultSize") }}</label>
|
||||
<b-form-input :value="optResultSize" type="number" min="10"
|
||||
@input="setOptResultSize"></b-form-input>
|
||||
|
||||
<label>{{ $t("opt.queryMode") }}</label>
|
||||
<b-form-select :options="queryModeOptions" :value="optQueryMode"
|
||||
@input="setOptQueryMode"></b-form-select>
|
||||
|
||||
<label>{{ $t("opt.slideDuration") }}</label>
|
||||
<b-form-input :value="optLightboxSlideDuration" type="number" min="1"
|
||||
@input="setOptLightboxSlideDuration"></b-form-input>
|
||||
|
||||
<label>{{ $t("opt.vidPreviewInterval") }}</label>
|
||||
<b-form-input :value="optVidPreviewInterval" type="number" min="50"
|
||||
@input="setOptVidPreviewInterval"></b-form-input>
|
||||
</b-card>
|
||||
|
||||
<h4 class="mt-3">{{ $t("mlOptions") }}</h4>
|
||||
<b-card>
|
||||
<label>{{ $t("opt.mlRepositories") }}</label>
|
||||
<b-textarea rows="3" :value="optMlRepositories" @input="setOptMlRepositories"></b-textarea>
|
||||
<br>
|
||||
<b-form-checkbox :checked="optAutoAnalyze" @input="setOptAutoAnalyze">{{
|
||||
$t("opt.autoAnalyze")
|
||||
}}
|
||||
</b-form-checkbox>
|
||||
</b-card>
|
||||
|
||||
<h4 class="mt-3">{{ $t("treemapOptions") }}</h4>
|
||||
<b-card>
|
||||
<label>{{ $t("opt.treemapType") }}</label>
|
||||
<b-form-select :value="optTreemapType" :options="treemapTypeOptions"
|
||||
@input="setOptTreemapType"></b-form-select>
|
||||
|
||||
<label>{{ $t("opt.treemapTiling") }}</label>
|
||||
<b-form-select :value="optTreemapTiling" :options="treemapTilingOptions"
|
||||
@input="setOptTreemapTiling"></b-form-select>
|
||||
|
||||
<label>{{ $t("opt.treemapColorGroupingDepth") }}</label>
|
||||
<b-form-input :value="optTreemapColorGroupingDepth" type="number" min="1"
|
||||
@input="setOptTreemapColorGroupingDepth"></b-form-input>
|
||||
|
||||
<label>{{ $t("opt.treemapSize") }}</label>
|
||||
<b-form-select :value="optTreemapSize" :options="treemapSizeOptions"
|
||||
@input="setOptTreemapSize"></b-form-select>
|
||||
|
||||
<template v-if="$store.getters.optTreemapSize === 'custom'">
|
||||
<!-- TODO Width/Height input -->
|
||||
<b-form-input type="number" min="0" step="10"></b-form-input>
|
||||
<b-form-input type="number" min="0" step="10"></b-form-input>
|
||||
</template>
|
||||
|
||||
<label>{{ $t("opt.treemapColor") }}</label>
|
||||
<b-form-select :value="optTreemapColor" :options="treemapColorOptions"
|
||||
@input="setOptTreemapColor"></b-form-select>
|
||||
</b-card>
|
||||
|
||||
<b-button variant="danger" class="mt-4" @click="onResetClick()">{{ $t("configReset") }}</b-button>
|
||||
</b-card-body>
|
||||
</b-card>
|
||||
|
||||
<br/>
|
||||
<h4>{{ $t("searchOptions") }}</h4>
|
||||
<b-card>
|
||||
<b-form-checkbox :checked="optHideDuplicates" @input="setOptHideDuplicates">{{
|
||||
$t("opt.hideDuplicates")
|
||||
}}
|
||||
</b-form-checkbox>
|
||||
|
||||
<b-form-checkbox :checked="optHighlight" @input="setOptHighlight">{{ $t("opt.highlight") }}</b-form-checkbox>
|
||||
<b-form-checkbox :checked="optTagOrOperator" @input="setOptTagOrOperator">{{
|
||||
$t("opt.tagOrOperator")
|
||||
}}
|
||||
</b-form-checkbox>
|
||||
<b-form-checkbox :checked="optFuzzy" @input="setOptFuzzy">{{ $t("opt.fuzzy") }}</b-form-checkbox>
|
||||
<b-form-checkbox :checked="optSearchInPath" @input="setOptSearchInPath">{{
|
||||
$t("opt.searchInPath")
|
||||
}}
|
||||
</b-form-checkbox>
|
||||
<b-form-checkbox :checked="optSuggestPath" @input="setOptSuggestPath">{{
|
||||
$t("opt.suggestPath")
|
||||
}}
|
||||
</b-form-checkbox>
|
||||
|
||||
<br/>
|
||||
<label>{{ $t("opt.fragmentSize") }}</label>
|
||||
<b-form-input :value="optFragmentSize" step="10" type="number" min="0"
|
||||
@input="setOptFragmentSize"></b-form-input>
|
||||
|
||||
<label>{{ $t("opt.resultSize") }}</label>
|
||||
<b-form-input :value="optResultSize" type="number" min="10"
|
||||
@input="setOptResultSize"></b-form-input>
|
||||
|
||||
<label>{{ $t("opt.queryMode") }}</label>
|
||||
<b-form-select :options="queryModeOptions" :value="optQueryMode" @input="setOptQueryMode"></b-form-select>
|
||||
|
||||
<label>{{ $t("opt.slideDuration") }}</label>
|
||||
<b-form-input :value="optLightboxSlideDuration" type="number" min="1"
|
||||
@input="setOptLightboxSlideDuration"></b-form-input>
|
||||
|
||||
<label>{{ $t("opt.vidPreviewInterval") }}</label>
|
||||
<b-form-input :value="optVidPreviewInterval" type="number" min="50"
|
||||
@input="setOptVidPreviewInterval"></b-form-input>
|
||||
<b-card v-if="loading" class="mt-4">
|
||||
<Preloader></Preloader>
|
||||
</b-card>
|
||||
|
||||
<h4 class="mt-3">{{ $t("treemapOptions") }}</h4>
|
||||
<b-card>
|
||||
<label>{{ $t("opt.treemapType") }}</label>
|
||||
<b-form-select :value="optTreemapType" :options="treemapTypeOptions"
|
||||
@input="setOptTreemapType"></b-form-select>
|
||||
|
||||
<label>{{ $t("opt.treemapTiling") }}</label>
|
||||
<b-form-select :value="optTreemapTiling" :options="treemapTilingOptions"
|
||||
@input="setOptTreemapTiling"></b-form-select>
|
||||
|
||||
<label>{{ $t("opt.treemapColorGroupingDepth") }}</label>
|
||||
<b-form-input :value="optTreemapColorGroupingDepth" type="number" min="1"
|
||||
@input="setOptTreemapColorGroupingDepth"></b-form-input>
|
||||
|
||||
<label>{{ $t("opt.treemapSize") }}</label>
|
||||
<b-form-select :value="optTreemapSize" :options="treemapSizeOptions"
|
||||
@input="setOptTreemapSize"></b-form-select>
|
||||
|
||||
<template v-if="$store.getters.optTreemapSize === 'custom'">
|
||||
<!-- TODO Width/Height input -->
|
||||
<b-form-input type="number" min="0" step="10"></b-form-input>
|
||||
<b-form-input type="number" min="0" step="10"></b-form-input>
|
||||
</template>
|
||||
|
||||
<label>{{ $t("opt.treemapColor") }}</label>
|
||||
<b-form-select :value="optTreemapColor" :options="treemapColorOptions"
|
||||
@input="setOptTreemapColor"></b-form-select>
|
||||
</b-card>
|
||||
|
||||
<b-button variant="danger" class="mt-4" @click="onResetClick()">{{ $t("configReset") }}</b-button>
|
||||
</b-card-body>
|
||||
</b-card>
|
||||
|
||||
<b-card v-if="loading" class="mt-4">
|
||||
<Preloader></Preloader>
|
||||
</b-card>
|
||||
<DebugInfo v-else></DebugInfo>
|
||||
</div>
|
||||
<DebugInfo v-else></DebugInfo>
|
||||
</div>
|
||||
</template>
|
||||
|
||||
<script>
|
||||
@@ -150,161 +224,168 @@ import GearIcon from "@/components/icons/GearIcon.vue";
|
||||
import LanguageIcon from "@/components/icons/LanguageIcon";
|
||||
|
||||
export default {
|
||||
components: {LanguageIcon, GearIcon, DebugInfo, Preloader},
|
||||
data() {
|
||||
return {
|
||||
loading: false,
|
||||
configLoading: false,
|
||||
langOptions: [
|
||||
{value: "en", text: this.$t("lang.en")},
|
||||
{value: "fr", text: this.$t("lang.fr")},
|
||||
{value: "zh-CN", text: this.$t("lang.zh-CN")},
|
||||
],
|
||||
queryModeOptions: [
|
||||
{value: "simple", text: this.$t("queryMode.simple")},
|
||||
{value: "advanced", text: this.$t("queryMode.advanced")}
|
||||
],
|
||||
displayModeOptions: [
|
||||
{value: "grid", text: this.$t("displayMode.grid")},
|
||||
{value: "list", text: this.$t("displayMode.list")}
|
||||
],
|
||||
columnsOptions: [
|
||||
{value: "auto", text: this.$t("columns.auto")},
|
||||
{value: 1, text: "1"},
|
||||
{value: 2, text: "2"},
|
||||
{value: 3, text: "3"},
|
||||
{value: 4, text: "4"},
|
||||
{value: 5, text: "5"},
|
||||
{value: 6, text: "6"},
|
||||
{value: 7, text: "7"},
|
||||
{value: 8, text: "8"},
|
||||
{value: 9, text: "9"},
|
||||
{value: 10, text: "10"},
|
||||
{value: 11, text: "11"},
|
||||
{value: 12, text: "12"},
|
||||
],
|
||||
treemapTypeOptions: [
|
||||
{value: "cascaded", text: this.$t("treemapType.cascaded")},
|
||||
{value: "flat", text: this.$t("treemapType.flat")}
|
||||
],
|
||||
treemapTilingOptions: [
|
||||
{value: "binary", text: this.$t("treemapTiling.binary")},
|
||||
{value: "squarify", text: this.$t("treemapTiling.squarify")},
|
||||
{value: "slice", text: this.$t("treemapTiling.slice")},
|
||||
{value: "dice", text: this.$t("treemapTiling.dice")},
|
||||
{value: "sliceDice", text: this.$t("treemapTiling.sliceDice")},
|
||||
],
|
||||
treemapSizeOptions: [
|
||||
{value: "small", text: this.$t("treemapSize.small")},
|
||||
{value: "medium", text: this.$t("treemapSize.medium")},
|
||||
{value: "large", text: this.$t("treemapSize.large")},
|
||||
{value: "x-large", text: this.$t("treemapSize.xLarge")},
|
||||
{value: "xx-large", text: this.$t("treemapSize.xxLarge")},
|
||||
// {value: "custom", text: this.$t("treemapSize.custom")},
|
||||
],
|
||||
treemapColorOptions: [
|
||||
{value: "PuBuGn", text: "Purple-Blue-Green"},
|
||||
{value: "PuRd", text: "Purple-Red"},
|
||||
{value: "PuBu", text: "Purple-Blue"},
|
||||
{value: "YlOrBr", text: "Yellow-Orange-Brown"},
|
||||
{value: "YlOrRd", text: "Yellow-Orange-Red"},
|
||||
{value: "YlGn", text: "Yellow-Green"},
|
||||
{value: "YlGnBu", text: "Yellow-Green-Blue"},
|
||||
{value: "Plasma", text: "Plasma"},
|
||||
{value: "Magma", text: "Magma"},
|
||||
{value: "Inferno", text: "Inferno"},
|
||||
{value: "Viridis", text: "Viridis"},
|
||||
{value: "Turbo", text: "Turbo"},
|
||||
],
|
||||
themeOptions: [
|
||||
{value: "light", text: this.$t("theme.light")},
|
||||
{value: "black", text: this.$t("theme.black")}
|
||||
]
|
||||
components: {LanguageIcon, GearIcon, DebugInfo, Preloader},
|
||||
data() {
|
||||
return {
|
||||
loading: false,
|
||||
configLoading: false,
|
||||
langOptions: [
|
||||
{value: "en", text: this.$t("lang.en")},
|
||||
{value: "fr", text: this.$t("lang.fr")},
|
||||
{value: "zh-CN", text: this.$t("lang.zh-CN")},
|
||||
{value: "de", text: this.$t("lang.de")},
|
||||
],
|
||||
queryModeOptions: [
|
||||
{value: "simple", text: this.$t("queryMode.simple")},
|
||||
{value: "advanced", text: this.$t("queryMode.advanced")}
|
||||
],
|
||||
displayModeOptions: [
|
||||
{value: "grid", text: this.$t("displayMode.grid")},
|
||||
{value: "list", text: this.$t("displayMode.list")}
|
||||
],
|
||||
columnsOptions: [
|
||||
{value: "auto", text: this.$t("columns.auto")},
|
||||
{value: 1, text: "1"},
|
||||
{value: 2, text: "2"},
|
||||
{value: 3, text: "3"},
|
||||
{value: 4, text: "4"},
|
||||
{value: 5, text: "5"},
|
||||
{value: 6, text: "6"},
|
||||
{value: 7, text: "7"},
|
||||
{value: 8, text: "8"},
|
||||
{value: 9, text: "9"},
|
||||
{value: 10, text: "10"},
|
||||
{value: 11, text: "11"},
|
||||
{value: 12, text: "12"},
|
||||
],
|
||||
treemapTypeOptions: [
|
||||
{value: "cascaded", text: this.$t("treemapType.cascaded")},
|
||||
{value: "flat", text: this.$t("treemapType.flat")}
|
||||
],
|
||||
treemapTilingOptions: [
|
||||
{value: "binary", text: this.$t("treemapTiling.binary")},
|
||||
{value: "squarify", text: this.$t("treemapTiling.squarify")},
|
||||
{value: "slice", text: this.$t("treemapTiling.slice")},
|
||||
{value: "dice", text: this.$t("treemapTiling.dice")},
|
||||
{value: "sliceDice", text: this.$t("treemapTiling.sliceDice")},
|
||||
],
|
||||
treemapSizeOptions: [
|
||||
{value: "small", text: this.$t("treemapSize.small")},
|
||||
{value: "medium", text: this.$t("treemapSize.medium")},
|
||||
{value: "large", text: this.$t("treemapSize.large")},
|
||||
{value: "x-large", text: this.$t("treemapSize.xLarge")},
|
||||
{value: "xx-large", text: this.$t("treemapSize.xxLarge")},
|
||||
// {value: "custom", text: this.$t("treemapSize.custom")},
|
||||
],
|
||||
treemapColorOptions: [
|
||||
{value: "PuBuGn", text: "Purple-Blue-Green"},
|
||||
{value: "PuRd", text: "Purple-Red"},
|
||||
{value: "PuBu", text: "Purple-Blue"},
|
||||
{value: "YlOrBr", text: "Yellow-Orange-Brown"},
|
||||
{value: "YlOrRd", text: "Yellow-Orange-Red"},
|
||||
{value: "YlGn", text: "Yellow-Green"},
|
||||
{value: "YlGnBu", text: "Yellow-Green-Blue"},
|
||||
{value: "Plasma", text: "Plasma"},
|
||||
{value: "Magma", text: "Magma"},
|
||||
{value: "Inferno", text: "Inferno"},
|
||||
{value: "Viridis", text: "Viridis"},
|
||||
{value: "Turbo", text: "Turbo"},
|
||||
],
|
||||
themeOptions: [
|
||||
{value: "light", text: this.$t("theme.light")},
|
||||
{value: "black", text: this.$t("theme.black")}
|
||||
]
|
||||
|
||||
}
|
||||
},
|
||||
computed: {
|
||||
...mapGetters([
|
||||
"optTheme",
|
||||
"optDisplay",
|
||||
"optColumns",
|
||||
"optHighlight",
|
||||
"optFuzzy",
|
||||
"optSearchInPath",
|
||||
"optSuggestPath",
|
||||
"optFragmentSize",
|
||||
"optQueryMode",
|
||||
"optTreemapType",
|
||||
"optTreemapTiling",
|
||||
"optTreemapColorGroupingDepth",
|
||||
"optTreemapColor",
|
||||
"optTreemapSize",
|
||||
"optLightboxLoadOnlyCurrent",
|
||||
"optLightboxSlideDuration",
|
||||
"optResultSize",
|
||||
"optTagOrOperator",
|
||||
"optLang",
|
||||
"optHideDuplicates",
|
||||
"optHideLegacy",
|
||||
"optUpdateMimeMap",
|
||||
"optUseDatePicker",
|
||||
"optVidPreviewInterval",
|
||||
"optSimpleLightbox",
|
||||
"optShowTagPickerFilter",
|
||||
]),
|
||||
clientWidth() {
|
||||
return window.innerWidth;
|
||||
}
|
||||
},
|
||||
mounted() {
|
||||
this.$store.subscribe((mutation) => {
|
||||
if (mutation.type.startsWith("setOpt")) {
|
||||
this.$store.dispatch("updateConfiguration");
|
||||
}
|
||||
});
|
||||
},
|
||||
methods: {
|
||||
...mapActions({
|
||||
setSist2Info: "setSist2Info",
|
||||
}),
|
||||
...mapMutations([
|
||||
"setOptTheme",
|
||||
"setOptDisplay",
|
||||
"setOptColumns",
|
||||
"setOptHighlight",
|
||||
"setOptFuzzy",
|
||||
"setOptSearchInPath",
|
||||
"setOptSuggestPath",
|
||||
"setOptFragmentSize",
|
||||
"setOptQueryMode",
|
||||
"setOptTreemapType",
|
||||
"setOptTreemapTiling",
|
||||
"setOptTreemapColorGroupingDepth",
|
||||
"setOptTreemapColor",
|
||||
"setOptTreemapSize",
|
||||
"setOptLightboxLoadOnlyCurrent",
|
||||
"setOptLightboxSlideDuration",
|
||||
"setOptResultSize",
|
||||
"setOptTagOrOperator",
|
||||
"setOptLang",
|
||||
"setOptHideDuplicates",
|
||||
"setOptHideLegacy",
|
||||
"setOptUpdateMimeMap",
|
||||
"setOptUseDatePicker",
|
||||
"setOptVidPreviewInterval",
|
||||
"setOptSimpleLightbox",
|
||||
"setOptShowTagPickerFilter",
|
||||
]),
|
||||
onResetClick() {
|
||||
localStorage.removeItem("sist2_configuration");
|
||||
window.location.reload();
|
||||
}
|
||||
},
|
||||
}
|
||||
},
|
||||
computed: {
|
||||
...mapGetters([
|
||||
"optTheme",
|
||||
"optDisplay",
|
||||
"optColumns",
|
||||
"optHighlight",
|
||||
"optFuzzy",
|
||||
"optSearchInPath",
|
||||
"optSuggestPath",
|
||||
"optFragmentSize",
|
||||
"optQueryMode",
|
||||
"optTreemapType",
|
||||
"optTreemapTiling",
|
||||
"optTreemapColorGroupingDepth",
|
||||
"optTreemapColor",
|
||||
"optTreemapSize",
|
||||
"optLightboxLoadOnlyCurrent",
|
||||
"optLightboxSlideDuration",
|
||||
"optResultSize",
|
||||
"optTagOrOperator",
|
||||
"optLang",
|
||||
"optHideDuplicates",
|
||||
"optHideLegacy",
|
||||
"optUpdateMimeMap",
|
||||
"optUseDatePicker",
|
||||
"optVidPreviewInterval",
|
||||
"optSimpleLightbox",
|
||||
"optShowTagPickerFilter",
|
||||
"optFeaturedFields",
|
||||
"optMlRepositories",
|
||||
"optAutoAnalyze",
|
||||
]),
|
||||
clientWidth() {
|
||||
return window.innerWidth;
|
||||
}
|
||||
},
|
||||
mounted() {
|
||||
this.$store.subscribe((mutation) => {
|
||||
if (mutation.type.startsWith("setOpt")) {
|
||||
this.$store.dispatch("updateConfiguration");
|
||||
}
|
||||
});
|
||||
},
|
||||
methods: {
|
||||
...mapActions({
|
||||
setSist2Info: "setSist2Info",
|
||||
}),
|
||||
...mapMutations([
|
||||
"setOptTheme",
|
||||
"setOptDisplay",
|
||||
"setOptColumns",
|
||||
"setOptHighlight",
|
||||
"setOptFuzzy",
|
||||
"setOptSearchInPath",
|
||||
"setOptSuggestPath",
|
||||
"setOptFragmentSize",
|
||||
"setOptQueryMode",
|
||||
"setOptTreemapType",
|
||||
"setOptTreemapTiling",
|
||||
"setOptTreemapColorGroupingDepth",
|
||||
"setOptTreemapColor",
|
||||
"setOptTreemapSize",
|
||||
"setOptLightboxLoadOnlyCurrent",
|
||||
"setOptLightboxSlideDuration",
|
||||
"setOptResultSize",
|
||||
"setOptTagOrOperator",
|
||||
"setOptLang",
|
||||
"setOptHideDuplicates",
|
||||
"setOptHideLegacy",
|
||||
"setOptUpdateMimeMap",
|
||||
"setOptUseDatePicker",
|
||||
"setOptVidPreviewInterval",
|
||||
"setOptSimpleLightbox",
|
||||
"setOptShowTagPickerFilter",
|
||||
"setOptFeaturedFields",
|
||||
"setOptMlRepositories",
|
||||
"setOptAutoAnalyze",
|
||||
]),
|
||||
onResetClick() {
|
||||
localStorage.removeItem("sist2_configuration");
|
||||
window.location.reload();
|
||||
}
|
||||
},
|
||||
}
|
||||
</script>
|
||||
|
||||
<style>
|
||||
.shrink {
|
||||
flex-grow: inherit;
|
||||
flex-grow: inherit;
|
||||
}
|
||||
</style>
|
||||
@@ -1,57 +1,61 @@
|
||||
<template>
|
||||
<div class="container">
|
||||
<Lightbox></Lightbox>
|
||||
<HelpDialog :show="showHelp" @close="showHelp = false"></HelpDialog>
|
||||
<div class="container">
|
||||
<Lightbox></Lightbox>
|
||||
<HelpDialog :show="showHelp" @close="showHelp = false"></HelpDialog>
|
||||
|
||||
<b-card v-if="uiLoading">
|
||||
<Preloader></Preloader>
|
||||
</b-card>
|
||||
<b-card v-if="uiLoading">
|
||||
<Preloader></Preloader>
|
||||
</b-card>
|
||||
|
||||
<b-card v-show="!uiLoading" id="search-panel">
|
||||
<SearchBar @show-help="showHelp=true"></SearchBar>
|
||||
<b-row>
|
||||
<b-col style="height: 70px;" sm="6">
|
||||
<SizeSlider></SizeSlider>
|
||||
</b-col>
|
||||
<b-col>
|
||||
<PathTree @search="search(true)"></PathTree>
|
||||
</b-col>
|
||||
</b-row>
|
||||
<b-row>
|
||||
<b-col sm="6">
|
||||
<DateSlider></DateSlider>
|
||||
<b-row>
|
||||
<b-col>
|
||||
<IndexPicker></IndexPicker>
|
||||
</b-col>
|
||||
</b-row>
|
||||
</b-col>
|
||||
<b-col>
|
||||
<b-tabs justified>
|
||||
<b-tab :title="$t('mimeTypes')">
|
||||
<MimePicker></MimePicker>
|
||||
</b-tab>
|
||||
<b-tab :title="$t('tags')">
|
||||
<TagPicker :show-search-bar="$store.state.optShowTagPickerFilter"></TagPicker>
|
||||
</b-tab>
|
||||
</b-tabs>
|
||||
</b-col>
|
||||
</b-row>
|
||||
</b-card>
|
||||
<b-alert v-show="!uiLoading && showEsConnectionError" show variant="danger" class="mt-2">
|
||||
{{ $t("toast.esConnErr") }}
|
||||
</b-alert>
|
||||
|
||||
<div v-show="docs.length === 0 && !uiLoading">
|
||||
<Preloader v-if="searchBusy" class="mt-3"></Preloader>
|
||||
<b-card v-show="!uiLoading && !showEsConnectionError" id="search-panel">
|
||||
<SearchBar @show-help="showHelp=true"></SearchBar>
|
||||
<b-row>
|
||||
<b-col style="height: 70px;" sm="6">
|
||||
<SizeSlider></SizeSlider>
|
||||
</b-col>
|
||||
<b-col>
|
||||
<PathTree @search="search(true)"></PathTree>
|
||||
</b-col>
|
||||
</b-row>
|
||||
<b-row>
|
||||
<b-col sm="6">
|
||||
<DateSlider></DateSlider>
|
||||
<b-row>
|
||||
<b-col>
|
||||
<IndexPicker></IndexPicker>
|
||||
</b-col>
|
||||
</b-row>
|
||||
</b-col>
|
||||
<b-col>
|
||||
<b-tabs justified>
|
||||
<b-tab :title="$t('mimeTypes')">
|
||||
<MimePicker></MimePicker>
|
||||
</b-tab>
|
||||
<b-tab :title="$t('tags')">
|
||||
<TagPicker :show-search-bar="$store.state.optShowTagPickerFilter"></TagPicker>
|
||||
</b-tab>
|
||||
</b-tabs>
|
||||
</b-col>
|
||||
</b-row>
|
||||
</b-card>
|
||||
|
||||
<ResultsCard></ResultsCard>
|
||||
<div v-show="docs.length === 0 && !uiLoading">
|
||||
<Preloader v-if="searchBusy" class="mt-3"></Preloader>
|
||||
|
||||
<ResultsCard></ResultsCard>
|
||||
</div>
|
||||
|
||||
<div v-if="docs.length > 0">
|
||||
<ResultsCard></ResultsCard>
|
||||
|
||||
<DocCardWall v-if="optDisplay==='grid'" :docs="docs" :append="appendFunc"></DocCardWall>
|
||||
<DocList v-else :docs="docs" :append="appendFunc"></DocList>
|
||||
</div>
|
||||
</div>
|
||||
|
||||
<div v-if="docs.length > 0">
|
||||
<ResultsCard></ResultsCard>
|
||||
|
||||
<DocCardWall v-if="optDisplay==='grid'" :docs="docs" :append="appendFunc"></DocCardWall>
|
||||
<DocList v-else :docs="docs" :append="appendFunc"></DocList>
|
||||
</div>
|
||||
</div>
|
||||
</template>
|
||||
|
||||
<script lang="ts">
|
||||
@@ -78,234 +82,253 @@ import HelpDialog from "@/components/HelpDialog.vue";
|
||||
|
||||
|
||||
export default Vue.extend({
|
||||
components: {
|
||||
HelpDialog,
|
||||
DocList,
|
||||
TagPicker,
|
||||
DateSlider,
|
||||
SizeSlider, PathTree, ResultsCard, MimePicker, Lightbox, DocCardWall, IndexPicker, SearchBar, Preloader
|
||||
},
|
||||
data: () => ({
|
||||
loading: false,
|
||||
uiLoading: true,
|
||||
search: undefined as any,
|
||||
docs: [] as EsHit[],
|
||||
docIds: new Set(),
|
||||
docChecksums: new Set(),
|
||||
searchBusy: false,
|
||||
Sist2Query: Sist2Query,
|
||||
showHelp: false
|
||||
}),
|
||||
computed: {
|
||||
...mapGetters(["indices", "optDisplay"]),
|
||||
},
|
||||
mounted() {
|
||||
// Handle touch events
|
||||
window.ontouchend = () => this.$store.commit("busTouchEnd");
|
||||
window.ontouchcancel = this.$store.commit("busTouchEnd");
|
||||
|
||||
this.search = _debounce(async (clear: boolean) => {
|
||||
if (clear) {
|
||||
await this.clearResults();
|
||||
}
|
||||
|
||||
await this.searchNow(Sist2Query.searchQuery());
|
||||
|
||||
}, 350, {leading: false});
|
||||
|
||||
this.$store.dispatch("loadFromArgs", this.$route).then(() => {
|
||||
this.$store.subscribe(() => this.$store.dispatch("updateArgs", this.$router));
|
||||
this.$store.subscribe((mutation) => {
|
||||
if ([
|
||||
"setSizeMin", "setSizeMax", "setDateMin", "setDateMax", "setSearchText", "setPathText",
|
||||
"setSortMode", "setOptHighlight", "setOptFragmentSize", "setFuzzy", "setSize", "setSelectedIndices",
|
||||
"setSelectedMimeTypes", "setSelectedTags", "setOptQueryMode", "setOptSearchInPath",
|
||||
].includes(mutation.type)) {
|
||||
if (this.searchBusy) {
|
||||
return;
|
||||
}
|
||||
|
||||
this.search(true);
|
||||
}
|
||||
});
|
||||
});
|
||||
|
||||
this.setIndices(this.$store.getters["sist2Info"].indices)
|
||||
|
||||
this.getDateRange().then((range: { min: number, max: number }) => {
|
||||
this.setDateBoundsMin(range.min);
|
||||
this.setDateBoundsMax(range.max);
|
||||
|
||||
const doBlankSearch = !this.$store.state.optUpdateMimeMap;
|
||||
|
||||
Sist2Api.getMimeTypes(Sist2Query.searchQuery(doBlankSearch)).then(({mimeMap}) => {
|
||||
this.$store.commit("setUiMimeMap", mimeMap);
|
||||
this.uiLoading = false;
|
||||
this.search(true);
|
||||
});
|
||||
});
|
||||
},
|
||||
methods: {
|
||||
...mapActions({
|
||||
setSist2Info: "setSist2Info",
|
||||
components: {
|
||||
HelpDialog,
|
||||
DocList,
|
||||
TagPicker,
|
||||
DateSlider,
|
||||
SizeSlider, PathTree, ResultsCard, MimePicker, Lightbox, DocCardWall, IndexPicker, SearchBar, Preloader
|
||||
},
|
||||
data: () => ({
|
||||
loading: false,
|
||||
uiLoading: true,
|
||||
search: undefined as any,
|
||||
docs: [] as EsHit[],
|
||||
docIds: new Set(),
|
||||
docChecksums: new Set(),
|
||||
searchBusy: false,
|
||||
Sist2Query: Sist2Query,
|
||||
showHelp: false,
|
||||
showEsConnectionError: false
|
||||
}),
|
||||
...mapMutations({
|
||||
setIndices: "setIndices",
|
||||
setDateBoundsMin: "setDateBoundsMin",
|
||||
setDateBoundsMax: "setDateBoundsMax",
|
||||
setTags: "setTags",
|
||||
}),
|
||||
showErrorToast() {
|
||||
this.$bvToast.toast(
|
||||
this.$t("toast.esConnErr"),
|
||||
{
|
||||
title: this.$t("toast.esConnErrTitle"),
|
||||
noAutoHide: true,
|
||||
toaster: "b-toaster-bottom-right",
|
||||
headerClass: "toast-header-error",
|
||||
bodyClass: "toast-body-error",
|
||||
});
|
||||
computed: {
|
||||
...mapGetters(["indices", "optDisplay"]),
|
||||
},
|
||||
showSyntaxErrorToast: function (): void {
|
||||
this.$bvToast.toast(
|
||||
this.$t("toast.esQueryErr"),
|
||||
{
|
||||
title: this.$t("toast.esQueryErrTitle"),
|
||||
noAutoHide: true,
|
||||
toaster: "b-toaster-bottom-right",
|
||||
headerClass: "toast-header-warning",
|
||||
bodyClass: "toast-body-warning",
|
||||
});
|
||||
},
|
||||
async searchNow(q: any) {
|
||||
this.searchBusy = true;
|
||||
await this.$store.dispatch("incrementQuerySequence");
|
||||
this.$store.commit("busSearch");
|
||||
mounted() {
|
||||
// Handle touch events
|
||||
window.ontouchend = () => this.$store.commit("busTouchEnd");
|
||||
window.ontouchcancel = this.$store.commit("busTouchEnd");
|
||||
|
||||
Sist2Api.esQuery(q).then(async (resp: EsResult) => {
|
||||
await this.handleSearch(resp);
|
||||
this.searchBusy = false;
|
||||
}).catch(err => {
|
||||
if (err.response.status === 500 && this.$store.state.optQueryMode === "advanced") {
|
||||
this.showSyntaxErrorToast();
|
||||
} else {
|
||||
this.showErrorToast();
|
||||
}
|
||||
});
|
||||
},
|
||||
async clearResults() {
|
||||
this.docs = [];
|
||||
this.docIds.clear();
|
||||
this.docChecksums.clear();
|
||||
await this.$store.dispatch("clearResults");
|
||||
this.$store.commit("setUiReachedScrollEnd", false);
|
||||
},
|
||||
async handleSearch(resp: EsResult) {
|
||||
if (resp.hits.hits.length == 0 || resp.hits.hits.length < this.$store.state.optSize) {
|
||||
this.$store.commit("setUiReachedScrollEnd", true);
|
||||
}
|
||||
this.search = _debounce(async (clear: boolean) => {
|
||||
if (clear) {
|
||||
await this.clearResults();
|
||||
}
|
||||
|
||||
resp.hits.hits = resp.hits.hits.filter(hit => !this.docIds.has(hit._id));
|
||||
await this.searchNow(Sist2Query.searchQuery());
|
||||
|
||||
if (this.$store.state.optHideDuplicates) {
|
||||
resp.hits.hits = resp.hits.hits.filter(hit => {
|
||||
}, 350, {leading: false});
|
||||
|
||||
if (!("checksum" in hit._source)) {
|
||||
return true;
|
||||
}
|
||||
this.$store.dispatch("loadFromArgs", this.$route).then(() => {
|
||||
this.$store.subscribe(() => this.$store.dispatch("updateArgs", this.$router));
|
||||
this.$store.subscribe((mutation) => {
|
||||
if ([
|
||||
"setSizeMin", "setSizeMax", "setDateMin", "setDateMax", "setSearchText", "setPathText",
|
||||
"setSortMode", "setOptHighlight", "setOptFragmentSize", "setFuzzy", "setSize", "setSelectedIndices",
|
||||
"setSelectedMimeTypes", "setSelectedTags", "setOptQueryMode", "setOptSearchInPath",
|
||||
].includes(mutation.type)) {
|
||||
if (this.searchBusy) {
|
||||
return;
|
||||
}
|
||||
|
||||
const isDupe = !this.docChecksums.has(hit._source.checksum);
|
||||
this.docChecksums.add(hit._source.checksum);
|
||||
return isDupe;
|
||||
this.search(true);
|
||||
}
|
||||
});
|
||||
});
|
||||
}
|
||||
|
||||
for (const hit of resp.hits.hits) {
|
||||
if (hit._props.isPlayableImage || hit._props.isPlayableVideo) {
|
||||
hit._seq = await this.$store.dispatch("getKeySequence");
|
||||
this.$store.commit("addLightboxSource", {
|
||||
source: `f/${hit._id}`,
|
||||
thumbnail: hit._props.hasThumbnail
|
||||
? `t/${hit._source.index}/${hit._id}`
|
||||
: null,
|
||||
caption: {
|
||||
component: LightboxCaption,
|
||||
props: {hit: hit}
|
||||
},
|
||||
type: hit._props.isVideo ? "video" : "image"
|
||||
});
|
||||
}
|
||||
}
|
||||
this.setIndices(this.$store.getters["sist2Info"].indices)
|
||||
|
||||
await this.$store.dispatch("remountLightbox");
|
||||
this.$store.commit("setLastQueryResult", resp);
|
||||
this.getDateRange().then((range: { min: number, max: number }) => {
|
||||
this.setDateBoundsMin(range.min);
|
||||
this.setDateBoundsMax(range.max);
|
||||
|
||||
this.docs.push(...resp.hits.hits);
|
||||
const doBlankSearch = !this.$store.state.optUpdateMimeMap;
|
||||
|
||||
resp.hits.hits.forEach(hit => this.docIds.add(hit._id));
|
||||
Sist2Api.getMimeTypes(Sist2Query.searchQuery(doBlankSearch)).then(({mimeMap}) => {
|
||||
this.$store.commit("setUiMimeMap", mimeMap);
|
||||
this.uiLoading = false;
|
||||
this.search(true);
|
||||
});
|
||||
}).catch(error => {
|
||||
console.log(error);
|
||||
|
||||
if (error.response.status == 503 || error.response.status == 500) {
|
||||
this.showEsConnectionError = true;
|
||||
this.uiLoading = false;
|
||||
} else {
|
||||
this.showErrorToast();
|
||||
}
|
||||
});
|
||||
},
|
||||
getDateRange(): Promise<{ min: number, max: number }> {
|
||||
return sist2.esQuery({
|
||||
// TODO: filter current selected indices
|
||||
aggs: {
|
||||
dateMin: {min: {field: "mtime"}},
|
||||
dateMax: {max: {field: "mtime"}},
|
||||
methods: {
|
||||
...mapActions({
|
||||
setSist2Info: "setSist2Info",
|
||||
}),
|
||||
...mapMutations({
|
||||
setIndices: "setIndices",
|
||||
setDateBoundsMin: "setDateBoundsMin",
|
||||
setDateBoundsMax: "setDateBoundsMax",
|
||||
setTags: "setTags",
|
||||
}),
|
||||
showErrorToast() {
|
||||
this.$bvToast.toast(
|
||||
this.$t("toast.esConnErr"),
|
||||
{
|
||||
title: this.$t("toast.esConnErrTitle"),
|
||||
noAutoHide: true,
|
||||
toaster: "b-toaster-bottom-right",
|
||||
headerClass: "toast-header-error",
|
||||
bodyClass: "toast-body-error",
|
||||
});
|
||||
},
|
||||
size: 0
|
||||
}).then(res => {
|
||||
return {
|
||||
min: res.aggregations.dateMin.value,
|
||||
max: res.aggregations.dateMax.value,
|
||||
showSyntaxErrorToast: function (): void {
|
||||
this.$bvToast.toast(
|
||||
this.$t("toast.esQueryErr"),
|
||||
{
|
||||
title: this.$t("toast.esQueryErrTitle"),
|
||||
noAutoHide: true,
|
||||
toaster: "b-toaster-bottom-right",
|
||||
headerClass: "toast-header-warning",
|
||||
bodyClass: "toast-body-warning",
|
||||
});
|
||||
},
|
||||
async searchNow(q: any) {
|
||||
this.searchBusy = true;
|
||||
await this.$store.dispatch("incrementQuerySequence");
|
||||
this.$store.commit("busSearch");
|
||||
|
||||
Sist2Api.esQuery(q).then(async (resp: EsResult) => {
|
||||
await this.handleSearch(resp);
|
||||
this.searchBusy = false;
|
||||
}).catch(err => {
|
||||
if (err.response.status === 500 && this.$store.state.optQueryMode === "advanced") {
|
||||
this.showSyntaxErrorToast();
|
||||
} else {
|
||||
this.showErrorToast();
|
||||
}
|
||||
});
|
||||
},
|
||||
async clearResults() {
|
||||
this.docs = [];
|
||||
this.docIds.clear();
|
||||
this.docChecksums.clear();
|
||||
await this.$store.dispatch("clearResults");
|
||||
this.$store.commit("setUiReachedScrollEnd", false);
|
||||
},
|
||||
async handleSearch(resp: EsResult) {
|
||||
if (resp.hits.hits.length == 0 || resp.hits.hits.length < this.$store.state.optSize) {
|
||||
this.$store.commit("setUiReachedScrollEnd", true);
|
||||
}
|
||||
|
||||
resp.hits.hits = resp.hits.hits.filter(hit => !this.docIds.has(hit._id));
|
||||
|
||||
if (this.$store.state.optHideDuplicates) {
|
||||
resp.hits.hits = resp.hits.hits.filter(hit => {
|
||||
|
||||
if (!("checksum" in hit._source)) {
|
||||
return true;
|
||||
}
|
||||
|
||||
const isDupe = !this.docChecksums.has(hit._source.checksum);
|
||||
this.docChecksums.add(hit._source.checksum);
|
||||
return isDupe;
|
||||
});
|
||||
}
|
||||
|
||||
for (const hit of resp.hits.hits) {
|
||||
if (hit._props.isPlayableImage || hit._props.isPlayableVideo) {
|
||||
hit._seq = await this.$store.dispatch("getKeySequence");
|
||||
this.$store.commit("addLightboxSource", {
|
||||
source: `f/${hit._id}`,
|
||||
thumbnail: hit._props.hasThumbnail
|
||||
? `t/${hit._source.index}/${hit._id}`
|
||||
: null,
|
||||
caption: {
|
||||
component: LightboxCaption,
|
||||
props: {hit: hit}
|
||||
},
|
||||
type: hit._props.isVideo ? "video" : "image"
|
||||
});
|
||||
}
|
||||
}
|
||||
|
||||
await this.$store.dispatch("remountLightbox");
|
||||
this.$store.commit("setLastQueryResult", resp);
|
||||
|
||||
this.docs.push(...resp.hits.hits);
|
||||
|
||||
resp.hits.hits.forEach(hit => this.docIds.add(hit._id));
|
||||
},
|
||||
getDateRange(): Promise<{ min: number, max: number }> {
|
||||
return sist2.esQuery({
|
||||
// TODO: filter current selected indices
|
||||
aggs: {
|
||||
dateMin: {min: {field: "mtime"}},
|
||||
dateMax: {max: {field: "mtime"}},
|
||||
},
|
||||
size: 0
|
||||
}).then(res => {
|
||||
const range = {
|
||||
min: res.aggregations.dateMin.value,
|
||||
max: res.aggregations.dateMax.value,
|
||||
}
|
||||
|
||||
if (range.min == null) {
|
||||
range.min = 0;
|
||||
range.max = 1;
|
||||
} else if (range.min == range.max) {
|
||||
range.max += 1;
|
||||
}
|
||||
|
||||
return range;
|
||||
});
|
||||
},
|
||||
appendFunc() {
|
||||
if (!this.$store.state.uiReachedScrollEnd && this.search && !this.searchBusy) {
|
||||
this.searchNow(Sist2Query.searchQuery());
|
||||
}
|
||||
}
|
||||
},
|
||||
beforeRouteUpdate(to, from, next) {
|
||||
if (this.$store.state.uiLightboxIsOpen) {
|
||||
this.$store.commit("_setUiShowLightbox", false);
|
||||
next(false);
|
||||
} else {
|
||||
next();
|
||||
}
|
||||
})
|
||||
},
|
||||
appendFunc() {
|
||||
if (!this.$store.state.uiReachedScrollEnd && this.search && !this.searchBusy) {
|
||||
this.searchNow(Sist2Query.searchQuery());
|
||||
}
|
||||
}
|
||||
},
|
||||
beforeRouteUpdate(to, from, next) {
|
||||
if (this.$store.state.uiLightboxIsOpen) {
|
||||
this.$store.commit("_setUiShowLightbox", false);
|
||||
next(false);
|
||||
} else {
|
||||
next();
|
||||
}
|
||||
},
|
||||
})
|
||||
</script>
|
||||
|
||||
<style>
|
||||
|
||||
#search-panel {
|
||||
box-shadow: 0 .125rem .25rem rgba(0, 0, 0, .08) !important;
|
||||
border-radius: 0;
|
||||
border: none;
|
||||
box-shadow: 0 .125rem .25rem rgba(0, 0, 0, .08) !important;
|
||||
border-radius: 0;
|
||||
border: none;
|
||||
}
|
||||
|
||||
.toast-header-info, .toast-body-info {
|
||||
background: #2196f3;
|
||||
color: #fff !important;
|
||||
background: #2196f3;
|
||||
color: #fff !important;
|
||||
}
|
||||
|
||||
.toast-header-error, .toast-body-error {
|
||||
background: #a94442;
|
||||
color: #f2dede !important;
|
||||
background: #a94442;
|
||||
color: #f2dede !important;
|
||||
}
|
||||
|
||||
.toast-header-error {
|
||||
color: #fff !important;
|
||||
border-bottom: none;
|
||||
margin-bottom: -1em;
|
||||
color: #fff !important;
|
||||
border-bottom: none;
|
||||
margin-bottom: -1em;
|
||||
}
|
||||
|
||||
.toast-header-error .close {
|
||||
text-shadow: none;
|
||||
text-shadow: none;
|
||||
}
|
||||
|
||||
.toast-header-warning, .toast-body-warning {
|
||||
background: #FF8F00;
|
||||
color: #FFF3E0 !important;
|
||||
background: #FF8F00;
|
||||
color: #FFF3E0 !important;
|
||||
}
|
||||
</style>
|
||||
@@ -1,12 +1,13 @@
|
||||
#ifndef SIST2_AUTH0_C_API_H
|
||||
#define SIST2_AUTH0_C_API_H
|
||||
|
||||
#include "stdlib.h"
|
||||
|
||||
#ifdef __cplusplus
|
||||
#define EXTERNC extern "C"
|
||||
#include "cstdlib"
|
||||
#else
|
||||
#define EXTERNC
|
||||
#include "stdlib.h"
|
||||
#endif
|
||||
|
||||
#define AUTH0_OK (0)
|
||||
|
||||
177
src/cli.c
177
src/cli.c
@@ -2,16 +2,17 @@
|
||||
#include "ctx.h"
|
||||
#include <tesseract/capi.h>
|
||||
|
||||
#define DEFAULT_OUTPUT "index.sist2/"
|
||||
#define DEFAULT_OUTPUT "index.sist2"
|
||||
#define DEFAULT_NAME "index"
|
||||
#define DEFAULT_CONTENT_SIZE 32768
|
||||
#define DEFAULT_QUALITY 1
|
||||
#define DEFAULT_THUMBNAIL_SIZE 500
|
||||
#define DEFAULT_QUALITY 2
|
||||
#define DEFAULT_THUMBNAIL_SIZE 552
|
||||
#define DEFAULT_THUMBNAIL_COUNT 1
|
||||
#define DEFAULT_REWRITE_URL ""
|
||||
|
||||
#define DEFAULT_ES_URL "http://localhost:9200"
|
||||
#define DEFAULT_ES_INDEX "sist2"
|
||||
#define DEFAULT_BATCH_SIZE 100
|
||||
#define DEFAULT_BATCH_SIZE 70
|
||||
#define DEFAULT_TAGLINE "Lightning-fast file system indexer and search tool"
|
||||
#define DEFAULT_LANG "en"
|
||||
|
||||
@@ -20,8 +21,6 @@
|
||||
|
||||
#define DEFAULT_MAX_MEM_BUFFER 2000
|
||||
|
||||
#define DEFAULT_THROTTLE_MEMORY_THRESHOLD 0
|
||||
|
||||
const char *TESS_DATAPATHS[] = {
|
||||
"/usr/share/tessdata/",
|
||||
"/usr/share/tesseract-ocr/tessdata/",
|
||||
@@ -48,9 +47,6 @@ void scan_args_destroy(scan_args_t *args) {
|
||||
if (args->name != NULL) {
|
||||
free(args->name);
|
||||
}
|
||||
if (args->incremental != NULL) {
|
||||
free(args->incremental);
|
||||
}
|
||||
if (args->path != NULL) {
|
||||
free(args->path);
|
||||
}
|
||||
@@ -61,7 +57,6 @@ void scan_args_destroy(scan_args_t *args) {
|
||||
}
|
||||
|
||||
void index_args_destroy(index_args_t *args) {
|
||||
//todo
|
||||
if (args->es_mappings_path) {
|
||||
free(args->es_mappings);
|
||||
}
|
||||
@@ -76,7 +71,6 @@ void index_args_destroy(index_args_t *args) {
|
||||
}
|
||||
|
||||
void web_args_destroy(web_args_t *args) {
|
||||
//todo
|
||||
free(args);
|
||||
}
|
||||
|
||||
@@ -97,23 +91,17 @@ int scan_args_validate(scan_args_t *args, int argc, const char **argv) {
|
||||
|
||||
char *abs_path = abspath(argv[1]);
|
||||
if (abs_path == NULL) {
|
||||
LOG_FATALF("cli.c", "Invalid PATH argument. File not found: %s", argv[1])
|
||||
LOG_FATALF("cli.c", "Invalid PATH argument. File not found: %s", argv[1]);
|
||||
} else {
|
||||
abs_path = realloc(abs_path, strlen(abs_path) + 2);
|
||||
strcat(abs_path, "/");
|
||||
args->path = abs_path;
|
||||
}
|
||||
|
||||
if (args->incremental != OPTION_VALUE_UNSPECIFIED) {
|
||||
args->incremental = abspath(args->incremental);
|
||||
if (abs_path == NULL) {
|
||||
sist_log("main.c", LOG_SIST_WARNING, "Could not open original index! Disabled incremental scan feature.");
|
||||
args->incremental = NULL;
|
||||
}
|
||||
}
|
||||
|
||||
if (args->tn_quality == OPTION_VALUE_UNSPECIFIED) {
|
||||
args->tn_quality = DEFAULT_QUALITY;
|
||||
} else if (args->tn_quality < 1.0f || args->tn_quality > 31.0f) {
|
||||
fprintf(stderr, "Invalid value for --thumbnail-quality argument: %f. Must be within [1.0, 31.0].\n",
|
||||
} else if (args->tn_quality < 2 || args->tn_quality > 31) {
|
||||
fprintf(stderr, "Invalid value for --thumbnail-quality argument: %d. Must be within [2, 31].\n",
|
||||
args->tn_quality);
|
||||
return 1;
|
||||
}
|
||||
@@ -140,8 +128,8 @@ int scan_args_validate(scan_args_t *args, int argc, const char **argv) {
|
||||
|
||||
if (args->threads == 0) {
|
||||
args->threads = 1;
|
||||
} else if (args->threads < 0) {
|
||||
fprintf(stderr, "Invalid value for --threads: %d. Must be a positive number\n", args->threads);
|
||||
} else if (args->threads < 0 || args->threads > 256) {
|
||||
fprintf(stderr, "Invalid value for --threads: %d. Must be a positive number <= 256\n", args->threads);
|
||||
return 1;
|
||||
}
|
||||
|
||||
@@ -152,20 +140,24 @@ int scan_args_validate(scan_args_t *args, int argc, const char **argv) {
|
||||
args->output = expandpath(args->output);
|
||||
}
|
||||
|
||||
int ret = mkdir(args->output, S_IRUSR | S_IWUSR | S_IXUSR);
|
||||
if (ret != 0) {
|
||||
fprintf(stderr, "Invalid output: '%s' (%s).\n", args->output, strerror(errno));
|
||||
return 1;
|
||||
char *abs_output = abspath(args->output);
|
||||
if (args->incremental && abs_output == NULL) {
|
||||
LOG_WARNINGF("main.c", "Could not open original index for incremental scan: %s. Will not perform incremental scan.", args->output);
|
||||
args->incremental = FALSE;
|
||||
} else if (!args->incremental && abs_output != NULL) {
|
||||
LOG_FATALF("main.c", "Index already exists: %s. If you wish to perform incremental scan, you must specify --incremental", abs_output);
|
||||
}
|
||||
free(abs_output);
|
||||
|
||||
if (args->depth <= 0) {
|
||||
args->depth = G_MAXINT32;
|
||||
args->depth = 2147483647;
|
||||
} else {
|
||||
args->depth += 1;
|
||||
}
|
||||
|
||||
if (args->name == OPTION_VALUE_UNSPECIFIED) {
|
||||
args->name = g_path_get_basename(args->output);
|
||||
args->name = malloc(strlen(DEFAULT_NAME) + 1);
|
||||
strcpy(args->name, DEFAULT_NAME);
|
||||
} else {
|
||||
char *tmp = malloc(strlen(args->name) + 1);
|
||||
strcpy(tmp, args->name);
|
||||
@@ -224,7 +216,7 @@ int scan_args_validate(scan_args_t *args, int argc, const char **argv) {
|
||||
}
|
||||
if (trained_data_path != NULL && path != trained_data_path) {
|
||||
LOG_FATAL("cli.c", "When specifying more than one tesseract language, all the traineddata "
|
||||
"files must be in the same folder")
|
||||
"files must be in the same folder");
|
||||
}
|
||||
trained_data_path = path;
|
||||
|
||||
@@ -232,7 +224,7 @@ int scan_args_validate(scan_args_t *args, int argc, const char **argv) {
|
||||
}
|
||||
free(lang);
|
||||
|
||||
ret = TessBaseAPIInit3(api, trained_data_path, args->tesseract_lang);
|
||||
int ret = TessBaseAPIInit3(api, trained_data_path, args->tesseract_lang);
|
||||
if (ret != 0) {
|
||||
fprintf(stderr, "Could not initialize tesseract with lang '%s'\n", args->tesseract_lang);
|
||||
return 1;
|
||||
@@ -249,12 +241,12 @@ int scan_args_validate(scan_args_t *args, int argc, const char **argv) {
|
||||
|
||||
pcre *re = pcre_compile(args->exclude_regex, 0, &error, &error_offset, 0);
|
||||
if (error != NULL) {
|
||||
LOG_FATALF("cli.c", "pcre_compile returned error: %s (offset:%d)", error, error_offset)
|
||||
LOG_FATALF("cli.c", "pcre_compile returned error: %s (offset:%d)", error, error_offset);
|
||||
}
|
||||
|
||||
pcre_extra *re_extra = pcre_study(re, 0, &error);
|
||||
if (error != NULL) {
|
||||
LOG_FATALF("cli.c", "pcre_study returned error: %s", error)
|
||||
LOG_FATALF("cli.c", "pcre_study returned error: %s", error);
|
||||
}
|
||||
|
||||
ScanCtx.exclude = re;
|
||||
@@ -273,14 +265,10 @@ int scan_args_validate(scan_args_t *args, int argc, const char **argv) {
|
||||
args->max_memory_buffer_mib = DEFAULT_MAX_MEM_BUFFER;
|
||||
}
|
||||
|
||||
if (args->scan_mem_limit_mib == OPTION_VALUE_UNSPECIFIED || args->scan_mem_limit_mib == OPTION_VALUE_DISABLE) {
|
||||
args->scan_mem_limit_mib = DEFAULT_THROTTLE_MEMORY_THRESHOLD;
|
||||
}
|
||||
|
||||
if (args->list_path != OPTION_VALUE_UNSPECIFIED) {
|
||||
if (strcmp(args->list_path, "-") == 0) {
|
||||
args->list_file = stdin;
|
||||
LOG_DEBUG("cli.c", "Using stdin as list file")
|
||||
LOG_DEBUG("cli.c", "Using stdin as list file");
|
||||
} else {
|
||||
args->list_file = fopen(args->list_path, "r");
|
||||
|
||||
@@ -290,27 +278,27 @@ int scan_args_validate(scan_args_t *args, int argc, const char **argv) {
|
||||
}
|
||||
}
|
||||
|
||||
LOG_DEBUGF("cli.c", "arg tn_quality=%f", args->tn_quality)
|
||||
LOG_DEBUGF("cli.c", "arg tn_size=%d", args->tn_size)
|
||||
LOG_DEBUGF("cli.c", "arg tn_count=%d", args->tn_count)
|
||||
LOG_DEBUGF("cli.c", "arg content_size=%d", args->content_size)
|
||||
LOG_DEBUGF("cli.c", "arg threads=%d", args->threads)
|
||||
LOG_DEBUGF("cli.c", "arg incremental=%s", args->incremental)
|
||||
LOG_DEBUGF("cli.c", "arg output=%s", args->output)
|
||||
LOG_DEBUGF("cli.c", "arg rewrite_url=%s", args->rewrite_url)
|
||||
LOG_DEBUGF("cli.c", "arg name=%s", args->name)
|
||||
LOG_DEBUGF("cli.c", "arg depth=%d", args->depth)
|
||||
LOG_DEBUGF("cli.c", "arg path=%s", args->path)
|
||||
LOG_DEBUGF("cli.c", "arg archive=%s", args->archive)
|
||||
LOG_DEBUGF("cli.c", "arg archive_passphrase=%s", args->archive_passphrase)
|
||||
LOG_DEBUGF("cli.c", "arg tesseract_lang=%s", args->tesseract_lang)
|
||||
LOG_DEBUGF("cli.c", "arg tesseract_path=%s", args->tesseract_path)
|
||||
LOG_DEBUGF("cli.c", "arg exclude=%s", args->exclude_regex)
|
||||
LOG_DEBUGF("cli.c", "arg fast=%d", args->fast)
|
||||
LOG_DEBUGF("cli.c", "arg fast_epub=%d", args->fast_epub)
|
||||
LOG_DEBUGF("cli.c", "arg treemap_threshold=%f", args->treemap_threshold)
|
||||
LOG_DEBUGF("cli.c", "arg max_memory_buffer_mib=%d", args->max_memory_buffer_mib)
|
||||
LOG_DEBUGF("cli.c", "arg list_path=%s", args->list_path)
|
||||
LOG_DEBUGF("cli.c", "arg tn_quality=%f", args->tn_quality);
|
||||
LOG_DEBUGF("cli.c", "arg tn_size=%d", args->tn_size);
|
||||
LOG_DEBUGF("cli.c", "arg tn_count=%d", args->tn_count);
|
||||
LOG_DEBUGF("cli.c", "arg content_size=%d", args->content_size);
|
||||
LOG_DEBUGF("cli.c", "arg threads=%d", args->threads);
|
||||
LOG_DEBUGF("cli.c", "arg incremental=%d", args->incremental);
|
||||
LOG_DEBUGF("cli.c", "arg output=%s", args->output);
|
||||
LOG_DEBUGF("cli.c", "arg rewrite_url=%s", args->rewrite_url);
|
||||
LOG_DEBUGF("cli.c", "arg name=%s", args->name);
|
||||
LOG_DEBUGF("cli.c", "arg depth=%d", args->depth);
|
||||
LOG_DEBUGF("cli.c", "arg path=%s", args->path);
|
||||
LOG_DEBUGF("cli.c", "arg archive=%s", args->archive);
|
||||
LOG_DEBUGF("cli.c", "arg archive_passphrase=%s", args->archive_passphrase);
|
||||
LOG_DEBUGF("cli.c", "arg tesseract_lang=%s", args->tesseract_lang);
|
||||
LOG_DEBUGF("cli.c", "arg tesseract_path=%s", args->tesseract_path);
|
||||
LOG_DEBUGF("cli.c", "arg exclude=%s", args->exclude_regex);
|
||||
LOG_DEBUGF("cli.c", "arg fast=%d", args->fast);
|
||||
LOG_DEBUGF("cli.c", "arg fast_epub=%d", args->fast_epub);
|
||||
LOG_DEBUGF("cli.c", "arg treemap_threshold=%f", args->treemap_threshold);
|
||||
LOG_DEBUGF("cli.c", "arg max_memory_buffer_mib=%d", args->max_memory_buffer_mib);
|
||||
LOG_DEBUGF("cli.c", "arg list_path=%s", args->list_path);
|
||||
|
||||
return 0;
|
||||
}
|
||||
@@ -320,20 +308,20 @@ int load_external_file(const char *file_path, char **dst) {
|
||||
int res = stat(file_path, &info);
|
||||
|
||||
if (res == -1) {
|
||||
LOG_ERRORF("cli.c", "Error opening file '%s': %s\n", file_path, strerror(errno))
|
||||
LOG_ERRORF("cli.c", "Error opening file '%s': %s\n", file_path, strerror(errno));
|
||||
return 1;
|
||||
}
|
||||
|
||||
int fd = open(file_path, O_RDONLY);
|
||||
if (fd == -1) {
|
||||
LOG_ERRORF("cli.c", "Error opening file '%s': %s\n", file_path, strerror(errno))
|
||||
LOG_ERRORF("cli.c", "Error opening file '%s': %s\n", file_path, strerror(errno));
|
||||
return 1;
|
||||
}
|
||||
|
||||
*dst = malloc(info.st_size + 1);
|
||||
res = read(fd, *dst, info.st_size);
|
||||
if (res < 0) {
|
||||
LOG_ERRORF("cli.c", "Error reading file '%s': %s\n", file_path, strerror(errno))
|
||||
LOG_ERRORF("cli.c", "Error reading file '%s': %s\n", file_path, strerror(errno));
|
||||
return 1;
|
||||
}
|
||||
|
||||
@@ -361,7 +349,7 @@ int index_args_validate(index_args_t *args, int argc, const char **argv) {
|
||||
|
||||
char *index_path = abspath(argv[1]);
|
||||
if (index_path == NULL) {
|
||||
LOG_FATALF("cli.c", "Invalid PATH argument. File not found: %s", argv[1])
|
||||
LOG_FATALF("cli.c", "Invalid PATH argument. File not found: %s", argv[1]);
|
||||
} else {
|
||||
args->index_path = index_path;
|
||||
}
|
||||
@@ -396,28 +384,28 @@ int index_args_validate(index_args_t *args, int argc, const char **argv) {
|
||||
args->batch_size = DEFAULT_BATCH_SIZE;
|
||||
}
|
||||
|
||||
LOG_DEBUGF("cli.c", "arg es_url=%s", args->es_url)
|
||||
LOG_DEBUGF("cli.c", "arg es_index=%s", args->es_index)
|
||||
LOG_DEBUGF("cli.c", "arg es_insecure_ssl=%d", args->es_insecure_ssl)
|
||||
LOG_DEBUGF("cli.c", "arg index_path=%s", args->index_path)
|
||||
LOG_DEBUGF("cli.c", "arg script_path=%s", args->script_path)
|
||||
LOG_DEBUGF("cli.c", "arg async_script=%d", args->async_script)
|
||||
LOG_DEBUGF("cli.c", "arg es_url=%s", args->es_url);
|
||||
LOG_DEBUGF("cli.c", "arg es_index=%s", args->es_index);
|
||||
LOG_DEBUGF("cli.c", "arg es_insecure_ssl=%d", args->es_insecure_ssl);
|
||||
LOG_DEBUGF("cli.c", "arg index_path=%s", args->index_path);
|
||||
LOG_DEBUGF("cli.c", "arg script_path=%s", args->script_path);
|
||||
LOG_DEBUGF("cli.c", "arg async_script=%d", args->async_script);
|
||||
|
||||
if (args->script) {
|
||||
char log_buf[5000];
|
||||
|
||||
strncpy(log_buf, args->script, sizeof(log_buf));
|
||||
*(log_buf + sizeof(log_buf) - 1) = '\0';
|
||||
LOG_DEBUGF("cli.c", "arg script=%s", log_buf)
|
||||
LOG_DEBUGF("cli.c", "arg script=%s", log_buf);
|
||||
}
|
||||
|
||||
LOG_DEBUGF("cli.c", "arg print=%d", args->print)
|
||||
LOG_DEBUGF("cli.c", "arg es_mappings_path=%s", args->es_mappings_path)
|
||||
LOG_DEBUGF("cli.c", "arg es_mappings=%s", args->es_mappings)
|
||||
LOG_DEBUGF("cli.c", "arg es_settings_path=%s", args->es_settings_path)
|
||||
LOG_DEBUGF("cli.c", "arg es_settings=%s", args->es_settings)
|
||||
LOG_DEBUGF("cli.c", "arg batch_size=%d", args->batch_size)
|
||||
LOG_DEBUGF("cli.c", "arg force_reset=%d", args->force_reset)
|
||||
LOG_DEBUGF("cli.c", "arg print=%d", args->print);
|
||||
LOG_DEBUGF("cli.c", "arg es_mappings_path=%s", args->es_mappings_path);
|
||||
LOG_DEBUGF("cli.c", "arg es_mappings=%s", args->es_mappings);
|
||||
LOG_DEBUGF("cli.c", "arg es_settings_path=%s", args->es_settings_path);
|
||||
LOG_DEBUGF("cli.c", "arg es_settings=%s", args->es_settings);
|
||||
LOG_DEBUGF("cli.c", "arg batch_size=%d", args->batch_size);
|
||||
LOG_DEBUGF("cli.c", "arg force_reset=%d", args->force_reset);
|
||||
|
||||
return 0;
|
||||
}
|
||||
@@ -538,23 +526,24 @@ int web_args_validate(web_args_t *args, int argc, const char **argv) {
|
||||
for (int i = 0; i < args->index_count; i++) {
|
||||
char *abs_path = abspath(args->indices[i]);
|
||||
if (abs_path == NULL) {
|
||||
LOG_FATALF("cli.c", "Index not found: %s", args->indices[i])
|
||||
LOG_FATALF("cli.c", "Index not found: %s", args->indices[i]);
|
||||
}
|
||||
free(abs_path);
|
||||
}
|
||||
|
||||
LOG_DEBUGF("cli.c", "arg es_url=%s", args->es_url)
|
||||
LOG_DEBUGF("cli.c", "arg es_index=%s", args->es_index)
|
||||
LOG_DEBUGF("cli.c", "arg es_insecure_ssl=%d", args->es_insecure_ssl)
|
||||
LOG_DEBUGF("cli.c", "arg tagline=%s", args->tagline)
|
||||
LOG_DEBUGF("cli.c", "arg dev=%d", args->dev)
|
||||
LOG_DEBUGF("cli.c", "arg listen=%s", args->listen_address)
|
||||
LOG_DEBUGF("cli.c", "arg credentials=%s", args->credentials)
|
||||
LOG_DEBUGF("cli.c", "arg tag_credentials=%s", args->tag_credentials)
|
||||
LOG_DEBUGF("cli.c", "arg auth_user=%s", args->auth_user)
|
||||
LOG_DEBUGF("cli.c", "arg auth_pass=%s", args->auth_pass)
|
||||
LOG_DEBUGF("cli.c", "arg index_count=%d", args->index_count)
|
||||
LOG_DEBUGF("cli.c", "arg es_url=%s", args->es_url);
|
||||
LOG_DEBUGF("cli.c", "arg es_index=%s", args->es_index);
|
||||
LOG_DEBUGF("cli.c", "arg es_insecure_ssl=%d", args->es_insecure_ssl);
|
||||
LOG_DEBUGF("cli.c", "arg tagline=%s", args->tagline);
|
||||
LOG_DEBUGF("cli.c", "arg dev=%d", args->dev);
|
||||
LOG_DEBUGF("cli.c", "arg listen=%s", args->listen_address);
|
||||
LOG_DEBUGF("cli.c", "arg credentials=%s", args->credentials);
|
||||
LOG_DEBUGF("cli.c", "arg tag_credentials=%s", args->tag_credentials);
|
||||
LOG_DEBUGF("cli.c", "arg auth_user=%s", args->auth_user);
|
||||
LOG_DEBUGF("cli.c", "arg auth_pass=%s", args->auth_pass);
|
||||
LOG_DEBUGF("cli.c", "arg index_count=%d", args->index_count);
|
||||
for (int i = 0; i < args->index_count; i++) {
|
||||
LOG_DEBUGF("cli.c", "arg indices[%d]=%s", i, args->indices[i])
|
||||
LOG_DEBUGF("cli.c", "arg indices[%d]=%s", i, args->indices[i]);
|
||||
}
|
||||
|
||||
return 0;
|
||||
@@ -579,7 +568,7 @@ int exec_args_validate(exec_args_t *args, int argc, const char **argv) {
|
||||
|
||||
char *index_path = abspath(argv[1]);
|
||||
if (index_path == NULL) {
|
||||
LOG_FATALF("cli.c", "Invalid index PATH argument. File not found: %s", argv[1])
|
||||
LOG_FATALF("cli.c", "Invalid index PATH argument. File not found: %s", argv[1]);
|
||||
} else {
|
||||
args->index_path = index_path;
|
||||
}
|
||||
@@ -600,12 +589,12 @@ int exec_args_validate(exec_args_t *args, int argc, const char **argv) {
|
||||
return 1;
|
||||
}
|
||||
|
||||
LOG_DEBUGF("cli.c", "arg script_path=%s", args->script_path)
|
||||
LOG_DEBUGF("cli.c", "arg script_path=%s", args->script_path);
|
||||
|
||||
char log_buf[5000];
|
||||
strncpy(log_buf, args->script, sizeof(log_buf));
|
||||
*(log_buf + sizeof(log_buf) - 1) = '\0';
|
||||
LOG_DEBUGF("cli.c", "arg script=%s", log_buf)
|
||||
LOG_DEBUGF("cli.c", "arg script=%s", log_buf);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
@@ -9,12 +9,12 @@
|
||||
#define OPTION_VALUE_UNSPECIFIED (0)
|
||||
|
||||
typedef struct scan_args {
|
||||
float tn_quality;
|
||||
int tn_quality;
|
||||
int tn_size;
|
||||
int content_size;
|
||||
int threads;
|
||||
int scan_mem_limit_mib;
|
||||
char *incremental;
|
||||
int incremental;
|
||||
int optimize_database;
|
||||
char *output;
|
||||
char *rewrite_url;
|
||||
char *name;
|
||||
|
||||
@@ -3,9 +3,10 @@
|
||||
ScanCtx_t ScanCtx = {
|
||||
.stat_index_size = 0,
|
||||
.stat_tn_size = 0,
|
||||
.dbg_current_files = NULL,
|
||||
.pool = NULL
|
||||
.pool = NULL,
|
||||
.index.path = {0,},
|
||||
};
|
||||
WebCtx_t WebCtx;
|
||||
IndexCtx_t IndexCtx;
|
||||
LogCtx_t LogCtx;
|
||||
__thread ProcData_t ProcData;
|
||||
|
||||
35
src/ctx.h
35
src/ctx.h
@@ -16,47 +16,28 @@
|
||||
#include "libscan/msdoc/msdoc.h"
|
||||
#include "libscan/wpd/wpd.h"
|
||||
#include "libscan/json/json.h"
|
||||
#include "src/io/store.h"
|
||||
#include "src/database/database.h"
|
||||
#include "src/index/elastic.h"
|
||||
#include "sqlite3.h"
|
||||
|
||||
#include <glib.h>
|
||||
#include <pcre.h>
|
||||
|
||||
typedef struct {
|
||||
struct index_t index;
|
||||
|
||||
GHashTable *mime_table;
|
||||
GHashTable *ext_table;
|
||||
|
||||
tpool_t *pool;
|
||||
|
||||
tpool_t *writer_pool;
|
||||
|
||||
int threads;
|
||||
int depth;
|
||||
int calculate_checksums;
|
||||
size_t mem_limit;
|
||||
|
||||
size_t stat_tn_size;
|
||||
size_t stat_index_size;
|
||||
|
||||
GHashTable *original_table;
|
||||
GHashTable *copy_table;
|
||||
GHashTable *new_table;
|
||||
pthread_mutex_t copy_table_mu;
|
||||
|
||||
pcre *exclude;
|
||||
pcre_extra *exclude_extra;
|
||||
int fast;
|
||||
|
||||
GHashTable *dbg_current_files;
|
||||
pthread_mutex_t dbg_current_files_mu;
|
||||
|
||||
int dbg_failed_files_count;
|
||||
int dbg_skipped_files_count;
|
||||
int dbg_excluded_files_count;
|
||||
pthread_mutex_t dbg_file_counts_mu;
|
||||
|
||||
scan_arc_ctx_t arc_ctx;
|
||||
scan_comic_ctx_t comic_ctx;
|
||||
scan_ebook_ctx_t ebook_ctx;
|
||||
@@ -85,10 +66,6 @@ typedef struct {
|
||||
char *es_index;
|
||||
int batch_size;
|
||||
tpool_t *pool;
|
||||
store_t *tag_store;
|
||||
GHashTable *tags;
|
||||
store_t *meta_store;
|
||||
GHashTable *meta;
|
||||
/**
|
||||
* Set to false when using --print
|
||||
*/
|
||||
@@ -118,10 +95,18 @@ typedef struct {
|
||||
int dev;
|
||||
} WebCtx_t;
|
||||
|
||||
|
||||
typedef struct {
|
||||
int thread_id;
|
||||
database_t *ipc_db;
|
||||
database_t *index_db;
|
||||
} ProcData_t;
|
||||
|
||||
extern ScanCtx_t ScanCtx;
|
||||
extern WebCtx_t WebCtx;
|
||||
extern IndexCtx_t IndexCtx;
|
||||
extern LogCtx_t LogCtx;
|
||||
extern __thread ProcData_t ProcData;
|
||||
|
||||
|
||||
#endif
|
||||
|
||||
644
src/database/database.c
Normal file
644
src/database/database.c
Normal file
@@ -0,0 +1,644 @@
|
||||
#include "database.h"
|
||||
#include "malloc.h"
|
||||
#include "src/ctx.h"
|
||||
#include <string.h>
|
||||
#include <pthread.h>
|
||||
#include "src/util.h"
|
||||
|
||||
#include <time.h>
|
||||
|
||||
|
||||
database_t *database_create(const char *filename, database_type_t type) {
|
||||
database_t *db = malloc(sizeof(database_t));
|
||||
|
||||
strcpy(db->filename, filename);
|
||||
db->type = type;
|
||||
db->select_thumbnail_stmt = NULL;
|
||||
|
||||
db->ipc_ctx = NULL;
|
||||
|
||||
return db;
|
||||
}
|
||||
|
||||
__always_inline
|
||||
static int sep_rfind(const char *str) {
|
||||
for (int i = (int) strlen(str); i >= 0; i--) {
|
||||
if (str[i] == '/') {
|
||||
return i;
|
||||
}
|
||||
}
|
||||
return -1;
|
||||
}
|
||||
|
||||
void path_parent_func(sqlite3_context *ctx, int argc, sqlite3_value **argv) {
|
||||
if (argc != 1 || sqlite3_value_type(argv[0]) != SQLITE_TEXT) {
|
||||
sqlite3_result_error(ctx, "Invalid parameters", -1);
|
||||
}
|
||||
|
||||
const char *value = (const char *) sqlite3_value_text(argv[0]);
|
||||
|
||||
int stop = sep_rfind(value);
|
||||
if (stop == -1) {
|
||||
sqlite3_result_null(ctx);
|
||||
return;
|
||||
}
|
||||
char parent[PATH_MAX * 3];
|
||||
strncpy(parent, value, stop);
|
||||
|
||||
sqlite3_result_text(ctx, parent, stop, SQLITE_TRANSIENT);
|
||||
}
|
||||
|
||||
|
||||
void save_current_job_info(sqlite3_context *ctx, int argc, sqlite3_value **argv) {
|
||||
if (argc != 1 || sqlite3_value_type(argv[0]) != SQLITE_TEXT) {
|
||||
sqlite3_result_error(ctx, "Invalid parameters", -1);
|
||||
}
|
||||
|
||||
database_ipc_ctx_t *ipc_ctx = sqlite3_user_data(ctx);
|
||||
|
||||
const char *current_job = (const char *) sqlite3_value_text(argv[0]);
|
||||
|
||||
char buf[PATH_MAX];
|
||||
strcpy(buf, current_job);
|
||||
|
||||
strcpy(ipc_ctx->current_job[ProcData.thread_id], current_job);
|
||||
|
||||
sqlite3_result_text(ctx, "ok", -1, SQLITE_STATIC);
|
||||
}
|
||||
|
||||
void database_initialize(database_t *db) {
|
||||
CRASH_IF_NOT_SQLITE_OK(sqlite3_open(db->filename, &db->db));
|
||||
|
||||
LOG_DEBUGF("database.c", "Initializing database %s", db->filename);
|
||||
if (db->type == INDEX_DATABASE) {
|
||||
CRASH_IF_NOT_SQLITE_OK(sqlite3_exec(db->db, IndexDatabaseSchema, NULL, NULL, NULL));
|
||||
} else if (db->type == IPC_CONSUMER_DATABASE || db->type == IPC_PRODUCER_DATABASE) {
|
||||
CRASH_IF_NOT_SQLITE_OK(sqlite3_exec(db->db, IpcDatabaseSchema, NULL, NULL, NULL));
|
||||
}
|
||||
|
||||
sqlite3_close(db->db);
|
||||
}
|
||||
|
||||
void database_open(database_t *db) {
|
||||
LOG_DEBUGF("database.c", "Opening database %s (%d)", db->filename, db->type);
|
||||
|
||||
CRASH_IF_NOT_SQLITE_OK(sqlite3_open(db->filename, &db->db));
|
||||
sqlite3_busy_timeout(db->db, 1000);
|
||||
|
||||
CRASH_IF_NOT_SQLITE_OK(sqlite3_exec(db->db, "PRAGMA cache_size = -200000;", NULL, NULL, NULL));
|
||||
CRASH_IF_NOT_SQLITE_OK(sqlite3_exec(db->db, "PRAGMA synchronous = OFF;", NULL, NULL, NULL));
|
||||
|
||||
if (db->type == INDEX_DATABASE) {
|
||||
CRASH_IF_NOT_SQLITE_OK(sqlite3_exec(db->db, "PRAGMA temp_store = memory;", NULL, NULL, NULL));
|
||||
}
|
||||
|
||||
if (db->type == INDEX_DATABASE) {
|
||||
// Prepare statements;
|
||||
CRASH_IF_NOT_SQLITE_OK(sqlite3_prepare_v2(
|
||||
db->db,
|
||||
"SELECT data FROM thumbnail WHERE id=? AND num=? LIMIT 1;", -1,
|
||||
&db->select_thumbnail_stmt, NULL));
|
||||
CRASH_IF_NOT_SQLITE_OK(sqlite3_prepare_v2(
|
||||
db->db,
|
||||
"UPDATE document SET marked=1 WHERE id=? AND mtime=? RETURNING id",
|
||||
-1,
|
||||
&db->mark_document_stmt, NULL));
|
||||
CRASH_IF_NOT_SQLITE_OK(sqlite3_prepare_v2(
|
||||
db->db,
|
||||
"REPLACE INTO document_sidecar (id, json_data) VALUES (?,?)", -1,
|
||||
&db->write_document_sidecar_stmt, NULL));
|
||||
CRASH_IF_NOT_SQLITE_OK(sqlite3_prepare_v2(
|
||||
db->db,
|
||||
"REPLACE INTO document (id, mtime, size, json_data) VALUES (?, ?, ?, ?);", -1,
|
||||
&db->write_document_stmt, NULL));
|
||||
CRASH_IF_NOT_SQLITE_OK(sqlite3_prepare_v2(
|
||||
db->db,
|
||||
"INSERT INTO thumbnail (id, num, data) VALUES (?,?,?) ON CONFLICT DO UPDATE SET data=excluded.data;",
|
||||
-1,
|
||||
&db->write_thumbnail_stmt, NULL));
|
||||
|
||||
// Create functions
|
||||
sqlite3_create_function(
|
||||
db->db,
|
||||
"path_parent",
|
||||
1,
|
||||
SQLITE_UTF8,
|
||||
NULL,
|
||||
path_parent_func,
|
||||
NULL,
|
||||
NULL
|
||||
);
|
||||
} else if (db->type == IPC_CONSUMER_DATABASE) {
|
||||
|
||||
sqlite3_create_function(
|
||||
db->db,
|
||||
"save_current_job_info",
|
||||
1,
|
||||
SQLITE_UTF8,
|
||||
db->ipc_ctx,
|
||||
save_current_job_info,
|
||||
NULL,
|
||||
NULL
|
||||
);
|
||||
|
||||
CRASH_IF_NOT_SQLITE_OK(sqlite3_prepare_v2(
|
||||
db->db,
|
||||
"DELETE FROM parse_job WHERE id = (SELECT MIN(id) FROM parse_job)"
|
||||
" RETURNING filepath,mtime,st_size,save_current_job_info(filepath);",
|
||||
-1, &db->pop_parse_job_stmt, NULL
|
||||
));
|
||||
CRASH_IF_NOT_SQLITE_OK(sqlite3_prepare_v2(
|
||||
db->db,
|
||||
"DELETE FROM index_job WHERE id = (SELECT MIN(id) FROM index_job)"
|
||||
" RETURNING doc_id,type,line;",
|
||||
-1, &db->pop_index_job_stmt, NULL
|
||||
));
|
||||
|
||||
} else if (db->type == IPC_PRODUCER_DATABASE) {
|
||||
char sql[40];
|
||||
int max_size_mb = 10; // TODO: read from args.
|
||||
|
||||
snprintf(sql, sizeof(sql), "PRAGMA max_page_count=%d", (max_size_mb * 1024 * 1024) / 4096);
|
||||
CRASH_IF_NOT_SQLITE_OK(sqlite3_exec(db->db, sql, NULL, NULL, NULL));
|
||||
|
||||
CRASH_IF_NOT_SQLITE_OK(sqlite3_prepare_v2(
|
||||
db->db, "INSERT INTO parse_job (filepath,mtime,st_size) VALUES (?,?,?);", -1,
|
||||
&db->insert_parse_job_stmt, NULL));
|
||||
CRASH_IF_NOT_SQLITE_OK(sqlite3_prepare_v2(
|
||||
db->db, "INSERT INTO index_job (doc_id,type,line) VALUES (?,?,?);", -1,
|
||||
&db->insert_index_job_stmt, NULL));
|
||||
|
||||
sqlite3_create_function(
|
||||
db->db,
|
||||
"path_parent",
|
||||
1,
|
||||
SQLITE_UTF8,
|
||||
NULL,
|
||||
path_parent_func,
|
||||
NULL,
|
||||
NULL
|
||||
);
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
void database_close(database_t *db, int optimize) {
|
||||
LOG_DEBUGF("database.c", "Closing database %s", db->filename);
|
||||
|
||||
if (optimize) {
|
||||
LOG_DEBUG("database.c", "Optimizing database");
|
||||
CRASH_IF_NOT_SQLITE_OK(sqlite3_exec(db->db, "VACUUM;", NULL, NULL, NULL));
|
||||
CRASH_IF_NOT_SQLITE_OK(sqlite3_exec(db->db, "PRAGMA optimize;", NULL, NULL, NULL));
|
||||
}
|
||||
|
||||
sqlite3_close(db->db);
|
||||
|
||||
if (db->type == IPC_PRODUCER_DATABASE) {
|
||||
remove(db->filename);
|
||||
}
|
||||
|
||||
free(db);
|
||||
db = NULL;
|
||||
}
|
||||
|
||||
void *database_read_thumbnail(database_t *db, const char *id, int num, size_t *return_value_len) {
|
||||
sqlite3_bind_text(db->select_thumbnail_stmt, 1, id, -1, SQLITE_STATIC);
|
||||
sqlite3_bind_int(db->select_thumbnail_stmt, 2, num);
|
||||
|
||||
int ret = sqlite3_step(db->select_thumbnail_stmt);
|
||||
|
||||
if (ret == SQLITE_DONE) {
|
||||
CRASH_IF_NOT_SQLITE_OK(sqlite3_reset(db->select_thumbnail_stmt));
|
||||
*return_value_len = 0;
|
||||
return NULL;
|
||||
}
|
||||
|
||||
CRASH_IF_STMT_FAIL(ret);
|
||||
|
||||
const void *blob = sqlite3_column_blob(db->select_thumbnail_stmt, 0);
|
||||
const int blob_size = sqlite3_column_bytes(db->select_thumbnail_stmt, 0);
|
||||
|
||||
*return_value_len = blob_size;
|
||||
void *return_data = malloc(blob_size);
|
||||
memcpy(return_data, blob, blob_size);
|
||||
|
||||
CRASH_IF_NOT_SQLITE_OK(sqlite3_reset(db->select_thumbnail_stmt));
|
||||
|
||||
return return_data;
|
||||
}
|
||||
|
||||
void database_write_index_descriptor(database_t *db, index_descriptor_t *desc) {
|
||||
|
||||
sqlite3_exec(db->db, "DELETE FROM descriptor;", NULL, NULL, NULL);
|
||||
|
||||
sqlite3_stmt *stmt;
|
||||
|
||||
sqlite3_prepare_v2(db->db, "INSERT INTO descriptor (id, version_major, version_minor, version_patch,"
|
||||
" root, name, rewrite_url, timestamp) VALUES (?,?,?,?,?,?,?,?);", -1, &stmt, NULL);
|
||||
sqlite3_bind_text(stmt, 1, desc->id, -1, SQLITE_STATIC);
|
||||
sqlite3_bind_int(stmt, 2, desc->version_major);
|
||||
sqlite3_bind_int(stmt, 3, desc->version_minor);
|
||||
sqlite3_bind_int(stmt, 4, desc->version_patch);
|
||||
sqlite3_bind_text(stmt, 5, desc->root, -1, SQLITE_STATIC);
|
||||
sqlite3_bind_text(stmt, 6, desc->name, -1, SQLITE_STATIC);
|
||||
sqlite3_bind_text(stmt, 7, desc->rewrite_url, -1, SQLITE_STATIC);
|
||||
sqlite3_bind_int64(stmt, 8, desc->timestamp);
|
||||
|
||||
CRASH_IF_STMT_FAIL(sqlite3_step(stmt));
|
||||
|
||||
sqlite3_finalize(stmt);
|
||||
}
|
||||
|
||||
index_descriptor_t *database_read_index_descriptor(database_t *db) {
|
||||
|
||||
sqlite3_stmt *stmt;
|
||||
|
||||
sqlite3_prepare_v2(db->db, "SELECT id, version_major, version_minor, version_patch,"
|
||||
" root, name, rewrite_url, timestamp FROM descriptor;", -1, &stmt, NULL);
|
||||
|
||||
CRASH_IF_STMT_FAIL(sqlite3_step(stmt));
|
||||
|
||||
const char *id = (char *) sqlite3_column_text(stmt, 0);
|
||||
int v_major = sqlite3_column_int(stmt, 1);
|
||||
int v_minor = sqlite3_column_int(stmt, 2);
|
||||
int v_patch = sqlite3_column_int(stmt, 3);
|
||||
const char *root = (char *) sqlite3_column_text(stmt, 4);
|
||||
const char *name = (char *) sqlite3_column_text(stmt, 5);
|
||||
const char *rewrite_url = (char *) sqlite3_column_text(stmt, 6);
|
||||
int timestamp = sqlite3_column_int(stmt, 7);
|
||||
|
||||
index_descriptor_t *desc = malloc(sizeof(index_descriptor_t));
|
||||
strcpy(desc->id, id);
|
||||
snprintf(desc->version, sizeof(desc->version), "%d.%d.%d", v_major, v_minor, v_patch);
|
||||
desc->version_major = v_major;
|
||||
desc->version_minor = v_minor;
|
||||
desc->version_patch = v_patch;
|
||||
strcpy(desc->root, root);
|
||||
strcpy(desc->name, name);
|
||||
strcpy(desc->rewrite_url, rewrite_url);
|
||||
desc->timestamp = timestamp;
|
||||
|
||||
CRASH_IF_NOT_SQLITE_OK(sqlite3_finalize(stmt));
|
||||
|
||||
return desc;
|
||||
}
|
||||
|
||||
database_iterator_t *database_create_delete_list_iterator(database_t *db) {
|
||||
|
||||
sqlite3_stmt *stmt;
|
||||
sqlite3_prepare_v2(db->db, "SELECT id FROM delete_list;", -1, &stmt, NULL);
|
||||
|
||||
database_iterator_t *iter = malloc(sizeof(database_iterator_t));
|
||||
|
||||
iter->stmt = stmt;
|
||||
iter->db = db;
|
||||
|
||||
return iter;
|
||||
}
|
||||
|
||||
char *database_delete_list_iter(database_iterator_t *iter) {
|
||||
int ret = sqlite3_step(iter->stmt);
|
||||
|
||||
if (ret == SQLITE_ROW) {
|
||||
const char *id = (const char *) sqlite3_column_text(iter->stmt, 0);
|
||||
char *id_heap = malloc(strlen(id) + 1);
|
||||
strcpy(id_heap, id);
|
||||
return id_heap;
|
||||
}
|
||||
|
||||
if (ret != SQLITE_DONE) {
|
||||
LOG_FATALF("database.c", "FIXME: delete iter returned %s", sqlite3_errmsg(iter->db->db));
|
||||
}
|
||||
|
||||
if (sqlite3_finalize(iter->stmt) != SQLITE_OK) {
|
||||
LOG_FATALF("database.c", "FIXME: delete iter returned %s", sqlite3_errmsg(iter->db->db));
|
||||
}
|
||||
|
||||
iter->stmt = NULL;
|
||||
|
||||
return NULL;
|
||||
}
|
||||
|
||||
database_iterator_t *database_create_document_iterator(database_t *db) {
|
||||
|
||||
sqlite3_stmt *stmt;
|
||||
|
||||
// TODO optimization: remove mtime, size, _id from json_data
|
||||
|
||||
sqlite3_prepare_v2(db->db, "WITH doc (j) AS (SELECT CASE"
|
||||
" WHEN sc.json_data IS NULL THEN"
|
||||
" CASE"
|
||||
" WHEN t.tag IS NULL THEN"
|
||||
" json_set(document.json_data, '$._id', document.id, '$.size', document.size, '$.mtime', document.mtime)"
|
||||
" ELSE"
|
||||
" json_set(document.json_data, '$._id', document.id, '$.size', document.size, '$.mtime', document.mtime, '$.tag', json_group_array(t.tag))"
|
||||
" END"
|
||||
" ELSE"
|
||||
" CASE"
|
||||
" WHEN t.tag IS NULL THEN"
|
||||
" json_patch(json_set(document.json_data, '$._id', document.id, '$.size', document.size, '$.mtime', document.mtime), sc.json_data)"
|
||||
" ELSE"
|
||||
// This will overwrite any tags specified in the sidecar file!
|
||||
// TODO: concatenate the two arrays?
|
||||
" json_set(json_patch(document.json_data, sc.json_data), '$._id', document.id, '$.size', document.size, '$.mtime', document.mtime, '$.tag', json_group_array(t.tag))"
|
||||
" END"
|
||||
" END"
|
||||
" FROM document"
|
||||
" LEFT JOIN document_sidecar sc ON document.id = sc.id"
|
||||
" LEFT JOIN tag t ON document.id = t.id"
|
||||
" GROUP BY document.id)"
|
||||
" SELECT json_set(j, '$.index', (SELECT id FROM descriptor)) FROM doc", -1, &stmt, NULL);
|
||||
|
||||
database_iterator_t *iter = malloc(sizeof(database_iterator_t));
|
||||
|
||||
iter->stmt = stmt;
|
||||
iter->db = db;
|
||||
|
||||
return iter;
|
||||
}
|
||||
|
||||
cJSON *database_document_iter(database_iterator_t *iter) {
|
||||
|
||||
if (iter->stmt == NULL) {
|
||||
LOG_ERROR("database.c", "FIXME: database_document_iter() called after iteration stopped");
|
||||
return NULL;
|
||||
}
|
||||
|
||||
int ret = sqlite3_step(iter->stmt);
|
||||
|
||||
if (ret == SQLITE_ROW) {
|
||||
const char *json_string = (const char *) sqlite3_column_text(iter->stmt, 0);
|
||||
return cJSON_Parse(json_string);
|
||||
}
|
||||
|
||||
if (ret != SQLITE_DONE) {
|
||||
LOG_FATALF("database.c", "FIXME: doc iter returned %s", sqlite3_errmsg(iter->db->db));
|
||||
}
|
||||
|
||||
if (sqlite3_finalize(iter->stmt) != SQLITE_OK) {
|
||||
LOG_FATALF("database.c", "FIXME: doc iter returned %s", sqlite3_errmsg(iter->db->db));
|
||||
}
|
||||
|
||||
iter->stmt = NULL;
|
||||
|
||||
return NULL;
|
||||
}
|
||||
|
||||
cJSON *database_incremental_scan_begin(database_t *db) {
|
||||
LOG_DEBUG("database.c", "Preparing database for incremental scan");
|
||||
CRASH_IF_NOT_SQLITE_OK(sqlite3_exec(db->db, "UPDATE document SET marked=0;", NULL, NULL, NULL));
|
||||
}
|
||||
|
||||
cJSON *database_incremental_scan_end(database_t *db) {
|
||||
CRASH_IF_NOT_SQLITE_OK(sqlite3_exec(
|
||||
db->db,
|
||||
"DELETE FROM delete_list WHERE id IN (SELECT id FROM document WHERE marked=1);",
|
||||
NULL, NULL, NULL
|
||||
));
|
||||
|
||||
CRASH_IF_NOT_SQLITE_OK(sqlite3_exec(
|
||||
db->db,
|
||||
"DELETE FROM thumbnail WHERE id IN (SELECT id FROM document WHERE marked=0);",
|
||||
NULL, NULL, NULL
|
||||
));
|
||||
|
||||
CRASH_IF_NOT_SQLITE_OK(sqlite3_exec(
|
||||
db->db,
|
||||
"INSERT INTO delete_list (id) SELECT id FROM document WHERE marked=0;",
|
||||
NULL, NULL, NULL
|
||||
));
|
||||
|
||||
CRASH_IF_NOT_SQLITE_OK(sqlite3_exec(
|
||||
db->db,
|
||||
"DELETE FROM document_sidecar WHERE id IN (SELECT id FROM document WHERE marked=0);",
|
||||
NULL, NULL, NULL
|
||||
));
|
||||
|
||||
CRASH_IF_NOT_SQLITE_OK(sqlite3_exec(
|
||||
db->db,
|
||||
"DELETE FROM document WHERE marked=0;",
|
||||
NULL, NULL, NULL
|
||||
));
|
||||
}
|
||||
|
||||
int database_mark_document(database_t *db, const char *id, int mtime) {
|
||||
sqlite3_bind_text(db->mark_document_stmt, 1, id, -1, SQLITE_STATIC);
|
||||
sqlite3_bind_int(db->mark_document_stmt, 2, mtime);
|
||||
|
||||
pthread_mutex_lock(&db->ipc_ctx->index_db_mutex);
|
||||
int ret = sqlite3_step(db->mark_document_stmt);
|
||||
|
||||
if (ret == SQLITE_ROW) {
|
||||
CRASH_IF_NOT_SQLITE_OK(sqlite3_reset(db->mark_document_stmt));
|
||||
pthread_mutex_unlock(&db->ipc_ctx->index_db_mutex);
|
||||
return TRUE;
|
||||
}
|
||||
|
||||
if (ret == SQLITE_DONE) {
|
||||
CRASH_IF_NOT_SQLITE_OK(sqlite3_reset(db->mark_document_stmt));
|
||||
pthread_mutex_unlock(&db->ipc_ctx->index_db_mutex);
|
||||
return FALSE;
|
||||
}
|
||||
pthread_mutex_unlock(&db->ipc_ctx->index_db_mutex);
|
||||
|
||||
CRASH_IF_STMT_FAIL(ret);
|
||||
}
|
||||
|
||||
void database_write_document(database_t *db, document_t *doc, const char *json_data) {
|
||||
sqlite3_bind_text(db->write_document_stmt, 1, doc->doc_id, -1, SQLITE_STATIC);
|
||||
sqlite3_bind_int(db->write_document_stmt, 2, doc->mtime);
|
||||
sqlite3_bind_int64(db->write_document_stmt, 3, (long) doc->size);
|
||||
sqlite3_bind_text(db->write_document_stmt, 4, json_data, -1, SQLITE_STATIC);
|
||||
|
||||
pthread_mutex_lock(&db->ipc_ctx->index_db_mutex);
|
||||
CRASH_IF_STMT_FAIL(sqlite3_step(db->write_document_stmt));
|
||||
CRASH_IF_NOT_SQLITE_OK(sqlite3_reset(db->write_document_stmt));
|
||||
pthread_mutex_unlock(&db->ipc_ctx->index_db_mutex);
|
||||
}
|
||||
|
||||
|
||||
void database_write_document_sidecar(database_t *db, const char *id, const char *json_data) {
|
||||
sqlite3_bind_text(db->write_document_sidecar_stmt, 1, id, -1, SQLITE_STATIC);
|
||||
sqlite3_bind_text(db->write_document_sidecar_stmt, 2, json_data, -1, SQLITE_STATIC);
|
||||
|
||||
pthread_mutex_lock(&db->ipc_ctx->index_db_mutex);
|
||||
CRASH_IF_STMT_FAIL(sqlite3_step(db->write_document_sidecar_stmt));
|
||||
CRASH_IF_NOT_SQLITE_OK(sqlite3_reset(db->write_document_sidecar_stmt));
|
||||
pthread_mutex_unlock(&db->ipc_ctx->index_db_mutex);
|
||||
}
|
||||
|
||||
void database_write_thumbnail(database_t *db, const char *id, int num, void *data, size_t data_size) {
|
||||
sqlite3_bind_text(db->write_thumbnail_stmt, 1, id, -1, SQLITE_STATIC);
|
||||
sqlite3_bind_int(db->write_thumbnail_stmt, 2, num);
|
||||
sqlite3_bind_blob(db->write_thumbnail_stmt, 3, data, (int) data_size, SQLITE_STATIC);
|
||||
|
||||
pthread_mutex_lock(&db->ipc_ctx->index_db_mutex);
|
||||
CRASH_IF_STMT_FAIL(sqlite3_step(db->write_thumbnail_stmt));
|
||||
CRASH_IF_NOT_SQLITE_OK(sqlite3_reset(db->write_thumbnail_stmt));
|
||||
pthread_mutex_unlock(&db->ipc_ctx->index_db_mutex);
|
||||
}
|
||||
|
||||
|
||||
//void database_create_fts_index(database_t *db, database_t *fts_db) {
|
||||
// // In a separate file,
|
||||
//
|
||||
// // use database_initialize() to create FTS schema
|
||||
// // if --force-reset, then truncate the tables first
|
||||
//
|
||||
// /*
|
||||
// * create/append fts table
|
||||
// *
|
||||
// * create/append scalar index table with
|
||||
// * id,index,size,mtime,mime
|
||||
// *
|
||||
// * create/append path index table with
|
||||
// * index,path,depth
|
||||
// *
|
||||
// * content table is a view with SELECT UNION for all attached tables
|
||||
// * random_seed column
|
||||
// */
|
||||
//
|
||||
// // INSERT INTO ft(ft) VALUES('optimize');
|
||||
//}
|
||||
|
||||
job_t *database_get_work(database_t *db, job_type_t job_type) {
|
||||
job_t *job;
|
||||
|
||||
pthread_mutex_lock(&db->ipc_ctx->mutex);
|
||||
while (db->ipc_ctx->job_count == 0 && !db->ipc_ctx->no_more_jobs) {
|
||||
pthread_cond_timedwait_ms(&db->ipc_ctx->has_work_cond, &db->ipc_ctx->mutex, 10);
|
||||
}
|
||||
pthread_mutex_unlock(&db->ipc_ctx->mutex);
|
||||
|
||||
pthread_mutex_lock(&db->ipc_ctx->db_mutex);
|
||||
|
||||
if (job_type == JOB_PARSE_JOB) {
|
||||
int ret = sqlite3_step(db->pop_parse_job_stmt);
|
||||
if (ret == SQLITE_DONE) {
|
||||
CRASH_IF_NOT_SQLITE_OK(sqlite3_reset(db->pop_parse_job_stmt));
|
||||
pthread_mutex_unlock(&db->ipc_ctx->db_mutex);
|
||||
return NULL;
|
||||
} else {
|
||||
CRASH_IF_STMT_FAIL(ret);
|
||||
}
|
||||
|
||||
job = malloc(sizeof(*job));
|
||||
|
||||
job->parse_job = create_parse_job(
|
||||
(const char *) sqlite3_column_text(db->pop_parse_job_stmt, 0),
|
||||
sqlite3_column_int(db->pop_parse_job_stmt, 1),
|
||||
sqlite3_column_int64(db->pop_parse_job_stmt, 2));
|
||||
|
||||
CRASH_IF_NOT_SQLITE_OK(sqlite3_reset(db->pop_parse_job_stmt));
|
||||
} else {
|
||||
|
||||
int ret = sqlite3_step(db->pop_index_job_stmt);
|
||||
|
||||
if (ret == SQLITE_DONE) {
|
||||
CRASH_IF_NOT_SQLITE_OK(sqlite3_reset(db->pop_index_job_stmt));
|
||||
pthread_mutex_unlock(&db->ipc_ctx->db_mutex);
|
||||
return NULL;
|
||||
}
|
||||
|
||||
CRASH_IF_STMT_FAIL(ret);
|
||||
|
||||
job = malloc(sizeof(*job));
|
||||
|
||||
const char *line = (const char *) sqlite3_column_text(db->pop_index_job_stmt, 2);
|
||||
if (line != NULL) {
|
||||
job->bulk_line = malloc(sizeof(es_bulk_line_t) + strlen(line) + 1);
|
||||
strcpy(job->bulk_line->line, line);
|
||||
} else {
|
||||
job->bulk_line = malloc(sizeof(es_bulk_line_t));
|
||||
}
|
||||
strcpy(job->bulk_line->doc_id, (const char *) sqlite3_column_text(db->pop_index_job_stmt, 0));
|
||||
job->bulk_line->type = sqlite3_column_int(db->pop_index_job_stmt, 1);
|
||||
job->bulk_line->next = NULL;
|
||||
|
||||
CRASH_IF_NOT_SQLITE_OK(sqlite3_reset(db->pop_index_job_stmt));
|
||||
}
|
||||
|
||||
pthread_mutex_unlock(&db->ipc_ctx->db_mutex);
|
||||
|
||||
pthread_mutex_lock(&db->ipc_ctx->mutex);
|
||||
db->ipc_ctx->job_count -= 1;
|
||||
pthread_mutex_unlock(&db->ipc_ctx->mutex);
|
||||
|
||||
job->type = job_type;
|
||||
return job;
|
||||
}
|
||||
|
||||
void database_add_work(database_t *db, job_t *job) {
|
||||
int ret;
|
||||
|
||||
pthread_mutex_lock(&db->ipc_ctx->db_mutex);
|
||||
|
||||
if (job->type == JOB_PARSE_JOB) {
|
||||
do {
|
||||
sqlite3_bind_text(db->insert_parse_job_stmt, 1, job->parse_job->filepath, -1, SQLITE_STATIC);
|
||||
sqlite3_bind_int(db->insert_parse_job_stmt, 2, job->parse_job->vfile.mtime);
|
||||
sqlite3_bind_int64(db->insert_parse_job_stmt, 3, (long) job->parse_job->vfile.st_size);
|
||||
|
||||
ret = sqlite3_step(db->insert_parse_job_stmt);
|
||||
|
||||
if (ret == SQLITE_FULL) {
|
||||
sqlite3_reset(db->insert_parse_job_stmt);
|
||||
pthread_mutex_unlock(&db->ipc_ctx->db_mutex);
|
||||
usleep(1000000);
|
||||
pthread_mutex_lock(&db->ipc_ctx->db_mutex);
|
||||
continue;
|
||||
} else {
|
||||
CRASH_IF_STMT_FAIL(ret);
|
||||
}
|
||||
|
||||
ret = sqlite3_reset(db->insert_parse_job_stmt);
|
||||
if (ret == SQLITE_FULL) {
|
||||
pthread_mutex_unlock(&db->ipc_ctx->db_mutex);
|
||||
usleep(100000);
|
||||
pthread_mutex_lock(&db->ipc_ctx->db_mutex);
|
||||
} else if (ret != SQLITE_OK) {
|
||||
LOG_FATALF("database.c", "sqlite3_reset returned error %d", ret);
|
||||
}
|
||||
} while (ret != SQLITE_DONE && ret != SQLITE_OK);
|
||||
} else if (job->type == JOB_BULK_LINE) {
|
||||
do {
|
||||
sqlite3_bind_text(db->insert_index_job_stmt, 1, job->bulk_line->doc_id, -1, SQLITE_STATIC);
|
||||
sqlite3_bind_int(db->insert_index_job_stmt, 2, job->bulk_line->type);
|
||||
if (job->bulk_line->type != ES_BULK_LINE_DELETE) {
|
||||
sqlite3_bind_text(db->insert_index_job_stmt, 3, job->bulk_line->line, -1, SQLITE_STATIC);
|
||||
} else {
|
||||
sqlite3_bind_null(db->insert_index_job_stmt, 3);
|
||||
}
|
||||
|
||||
ret = sqlite3_step(db->insert_index_job_stmt);
|
||||
|
||||
if (ret == SQLITE_FULL) {
|
||||
sqlite3_reset(db->insert_index_job_stmt);
|
||||
pthread_mutex_unlock(&db->ipc_ctx->db_mutex);
|
||||
usleep(100000);
|
||||
pthread_mutex_lock(&db->ipc_ctx->db_mutex);
|
||||
continue;
|
||||
} else {
|
||||
CRASH_IF_STMT_FAIL(ret);
|
||||
}
|
||||
|
||||
ret = sqlite3_reset(db->insert_index_job_stmt);
|
||||
if (ret == SQLITE_FULL) {
|
||||
pthread_mutex_unlock(&db->ipc_ctx->db_mutex);
|
||||
usleep(100000);
|
||||
pthread_mutex_lock(&db->ipc_ctx->db_mutex);
|
||||
} else if (ret != SQLITE_OK) {
|
||||
LOG_FATALF("database.c", "sqlite3_reset returned error %d", ret);
|
||||
}
|
||||
|
||||
} while (ret != SQLITE_DONE && ret != SQLITE_OK);
|
||||
} else {
|
||||
LOG_FATAL("database.c", "FIXME: invalid job type");
|
||||
}
|
||||
pthread_mutex_unlock(&db->ipc_ctx->db_mutex);
|
||||
|
||||
pthread_mutex_lock(&db->ipc_ctx->mutex);
|
||||
db->ipc_ctx->job_count += 1;
|
||||
pthread_cond_signal(&db->ipc_ctx->has_work_cond);
|
||||
pthread_mutex_unlock(&db->ipc_ctx->mutex);
|
||||
}
|
||||
167
src/database/database.h
Normal file
167
src/database/database.h
Normal file
@@ -0,0 +1,167 @@
|
||||
#ifndef SIST2_DATABASE_H
|
||||
#define SIST2_DATABASE_H
|
||||
|
||||
#include <sqlite3.h>
|
||||
#include <cjson/cJSON.h>
|
||||
#include "src/sist.h"
|
||||
#include "src/index/elastic.h"
|
||||
|
||||
typedef struct index_descriptor index_descriptor_t;
|
||||
|
||||
extern const char *IpcDatabaseSchema;
|
||||
extern const char *IndexDatabaseSchema;
|
||||
|
||||
typedef enum {
|
||||
INDEX_DATABASE,
|
||||
IPC_CONSUMER_DATABASE,
|
||||
IPC_PRODUCER_DATABASE,
|
||||
FTS_DATABASE
|
||||
} database_type_t;
|
||||
|
||||
typedef enum {
|
||||
DATABASE_STAT_INVALID,
|
||||
DATABASE_STAT_TREEMAP,
|
||||
DATABASE_STAT_MIME_AGG,
|
||||
DATABASE_STAT_SIZE_AGG,
|
||||
DATABASE_STAT_DATE_AGG,
|
||||
} database_stat_type_d;
|
||||
|
||||
typedef enum {
|
||||
JOB_UNDEFINED,
|
||||
JOB_BULK_LINE,
|
||||
JOB_PARSE_JOB
|
||||
} job_type_t;
|
||||
|
||||
typedef struct {
|
||||
job_type_t type;
|
||||
union {
|
||||
parse_job_t *parse_job;
|
||||
es_bulk_line_t *bulk_line;
|
||||
};
|
||||
} job_t;
|
||||
|
||||
typedef struct {
|
||||
int job_count;
|
||||
int no_more_jobs;
|
||||
int completed_job_count;
|
||||
|
||||
pthread_mutex_t mutex;
|
||||
pthread_mutex_t db_mutex;
|
||||
pthread_mutex_t index_db_mutex;
|
||||
pthread_cond_t has_work_cond;
|
||||
char current_job[MAX_THREADS][PATH_MAX * 2];
|
||||
} database_ipc_ctx_t;
|
||||
|
||||
typedef struct database {
|
||||
char filename[PATH_MAX];
|
||||
database_type_t type;
|
||||
sqlite3 *db;
|
||||
|
||||
// Prepared statements
|
||||
sqlite3_stmt *select_thumbnail_stmt;
|
||||
sqlite3_stmt *treemap_merge_up_update_stmt;
|
||||
sqlite3_stmt *treemap_merge_up_delete_stmt;
|
||||
|
||||
sqlite3_stmt *mark_document_stmt;
|
||||
sqlite3_stmt *write_document_stmt;
|
||||
sqlite3_stmt *write_document_sidecar_stmt;
|
||||
sqlite3_stmt *write_thumbnail_stmt;
|
||||
|
||||
sqlite3_stmt *insert_parse_job_stmt;
|
||||
sqlite3_stmt *insert_index_job_stmt;
|
||||
sqlite3_stmt *pop_parse_job_stmt;
|
||||
sqlite3_stmt *pop_index_job_stmt;
|
||||
|
||||
database_ipc_ctx_t *ipc_ctx;
|
||||
} database_t;
|
||||
|
||||
typedef struct {
|
||||
database_t *db;
|
||||
sqlite3_stmt *stmt;
|
||||
} database_iterator_t;
|
||||
|
||||
typedef struct {
|
||||
const char *path;
|
||||
const char *parent;
|
||||
long size;
|
||||
} treemap_row_t;
|
||||
|
||||
static treemap_row_t null_treemap_row = {0, 0, 0};
|
||||
|
||||
|
||||
database_t *database_create(const char *filename, database_type_t type);
|
||||
|
||||
void database_initialize(database_t *db);
|
||||
|
||||
void database_open(database_t *db);
|
||||
|
||||
void database_close(database_t *, int optimize);
|
||||
|
||||
void database_write_thumbnail(database_t *db, const char *id, int num, void *data, size_t data_size);
|
||||
|
||||
void *database_read_thumbnail(database_t *db, const char *id, int num, size_t *return_value_len);
|
||||
|
||||
void database_write_index_descriptor(database_t *db, index_descriptor_t *desc);
|
||||
|
||||
index_descriptor_t *database_read_index_descriptor(database_t *db);
|
||||
|
||||
void database_write_document(database_t *db, document_t *doc, const char *json_data);
|
||||
|
||||
database_iterator_t *database_create_document_iterator(database_t *db);
|
||||
|
||||
cJSON *database_document_iter(database_iterator_t *);
|
||||
|
||||
#define database_document_iter_foreach(element, iter) \
|
||||
for (cJSON *(element) = database_document_iter(iter); (element) != NULL; (element) = database_document_iter(iter))
|
||||
|
||||
database_iterator_t *database_create_delete_list_iterator(database_t *db);
|
||||
|
||||
char * database_delete_list_iter(database_iterator_t *iter);
|
||||
|
||||
#define database_delete_list_iter_foreach(element, iter) \
|
||||
for (char *(element) = database_delete_list_iter(iter); (element) != NULL; (element) = database_delete_list_iter(iter))
|
||||
|
||||
|
||||
cJSON *database_incremental_scan_begin(database_t *db);
|
||||
|
||||
cJSON *database_incremental_scan_end(database_t *db);
|
||||
|
||||
int database_mark_document(database_t *db, const char *id, int mtime);
|
||||
|
||||
void database_write_document_sidecar(database_t *db, const char *id, const char *json_data);
|
||||
|
||||
database_iterator_t *database_create_treemap_iterator(database_t *db, long threshold);
|
||||
|
||||
treemap_row_t database_treemap_iter(database_iterator_t *iter);
|
||||
|
||||
#define database_treemap_iter_foreach(element, iter) \
|
||||
for (treemap_row_t element = database_treemap_iter(iter); element.path != NULL; element = database_treemap_iter(iter))
|
||||
|
||||
|
||||
void database_generate_stats(database_t *db, double treemap_threshold);
|
||||
|
||||
database_stat_type_d database_get_stat_type_by_mnemonic(const char *name);
|
||||
|
||||
job_t *database_get_work(database_t *db, job_type_t job_type);
|
||||
|
||||
void database_add_work(database_t *db, job_t *job);
|
||||
|
||||
//void database_index(database_t *db);
|
||||
|
||||
cJSON *database_get_stats(database_t *db, database_stat_type_d type);
|
||||
|
||||
#define CRASH_IF_STMT_FAIL(x) do { \
|
||||
int return_value = x; \
|
||||
if (return_value != SQLITE_DONE && return_value != SQLITE_ROW) { \
|
||||
LOG_FATALF("database.c", "Sqlite error @ database.c:%d : (%d) %s", __LINE__, return_value, sqlite3_errmsg(db->db)); \
|
||||
} \
|
||||
} while (0)
|
||||
|
||||
#define CRASH_IF_NOT_SQLITE_OK(x) do { \
|
||||
int return_value = x; \
|
||||
if (return_value != SQLITE_OK) { \
|
||||
LOG_FATALF("database.c", "Sqlite error @ database.c:%d : (%d) %s", __LINE__, return_value, sqlite3_errmsg(db->db)); \
|
||||
} \
|
||||
} while (0)
|
||||
|
||||
#endif //SIST2_DATABASE_H
|
||||
78
src/database/database_schema.c
Normal file
78
src/database/database_schema.c
Normal file
@@ -0,0 +1,78 @@
|
||||
|
||||
const char *IpcDatabaseSchema =
|
||||
"CREATE TABLE parse_job ("
|
||||
" id INTEGER PRIMARY KEY,"
|
||||
" filepath TEXT NOT NULL,"
|
||||
" mtime INTEGER NOT NULL,"
|
||||
" st_size INTEGER NOT NULL"
|
||||
");"
|
||||
""
|
||||
"CREATE TABLE index_job ("
|
||||
" id INTEGER PRIMARY KEY,"
|
||||
" doc_id TEXT NOT NULL CHECK ( length(doc_id) = 32 ),"
|
||||
" type INTEGER NOT NULL,"
|
||||
" line TEXT"
|
||||
");";
|
||||
|
||||
const char *IndexDatabaseSchema =
|
||||
"CREATE TABLE thumbnail ("
|
||||
" id TEXT NOT NULL CHECK ( length(id) = 32 ),"
|
||||
" num INTEGER NOT NULL,"
|
||||
" data BLOB NOT NULL,"
|
||||
" PRIMARY KEY(id, num)"
|
||||
") WITHOUT ROWID;"
|
||||
""
|
||||
"CREATE TABLE document ("
|
||||
" id TEXT PRIMARY KEY CHECK ( length(id) = 32 ),"
|
||||
" marked INTEGER NOT NULL DEFAULT (1),"
|
||||
" mtime INTEGER NOT NULL,"
|
||||
" size INTEGER NOT NULL,"
|
||||
" json_data TEXT NOT NULL CHECK ( json_valid(json_data) )"
|
||||
") WITHOUT ROWID;"
|
||||
""
|
||||
"CREATE TABLE delete_list ("
|
||||
" id TEXT PRIMARY KEY CHECK ( length(id) = 32 )"
|
||||
") WITHOUT ROWID;"
|
||||
""
|
||||
"CREATE TABLE tag ("
|
||||
" id TEXT NOT NULL,"
|
||||
" tag TEXT NOT NULL"
|
||||
");"
|
||||
""
|
||||
"CREATE TABLE document_sidecar ("
|
||||
" id TEXT PRIMARY KEY NOT NULL,"
|
||||
" json_data TEXT NOT NULL"
|
||||
") WITHOUT ROWID;"
|
||||
""
|
||||
"CREATE TABLE descriptor ("
|
||||
" id TEXT NOT NULL,"
|
||||
" version_major INTEGER NOT NULL,"
|
||||
" version_minor INTEGER NOT NULL,"
|
||||
" version_patch INTEGER NOT NULL,"
|
||||
" root TEXT NOT NULL,"
|
||||
" name TEXT NOT NULL,"
|
||||
" rewrite_url TEXT,"
|
||||
" timestamp INTEGER NOT NULL"
|
||||
");"
|
||||
""
|
||||
"CREATE TABLE stats_treemap ("
|
||||
" path TEXT NOT NULL,"
|
||||
" size INTEGER NOT NULL"
|
||||
");"
|
||||
""
|
||||
"CREATE TABLE stats_size_agg ("
|
||||
" bucket INTEGER NOT NULL,"
|
||||
" count INTEGER NOT NULL"
|
||||
");"
|
||||
""
|
||||
"CREATE TABLE stats_date_agg ("
|
||||
" bucket INTEGER NOT NULL,"
|
||||
" count INTEGER NOT NULL"
|
||||
");"
|
||||
""
|
||||
"CREATE TABLE stats_mime_agg ("
|
||||
" mime TEXT NOT NULL,"
|
||||
" size INTEGER NOT NULL,"
|
||||
" count INTEGER NOT NULL"
|
||||
");";
|
||||
|
||||
242
src/database/database_stats.c
Normal file
242
src/database/database_stats.c
Normal file
@@ -0,0 +1,242 @@
|
||||
#include "database.h"
|
||||
#include "src/sist.h"
|
||||
#include "src/ctx.h"
|
||||
|
||||
#define TREEMAP_MINIMUM_MERGES_TO_CONTINUE (100)
|
||||
#define SIZE_BUCKET (long)(5 * 1000 * 1000)
|
||||
#define DATE_BUCKET (long)(2629800) // ~30 days
|
||||
|
||||
|
||||
database_iterator_t *database_create_treemap_iterator(database_t *db, long threshold) {
|
||||
|
||||
sqlite3_stmt *stmt;
|
||||
|
||||
sqlite3_prepare_v2(db->db,
|
||||
"SELECT path, path_parent(path), size FROM tm"
|
||||
" WHERE path_parent(path) IN (SELECT path FROM tm)"
|
||||
" AND size<?",
|
||||
-1, &stmt, NULL);
|
||||
|
||||
sqlite3_bind_int64(stmt, 1, threshold);
|
||||
|
||||
database_iterator_t *iter = malloc(sizeof(database_iterator_t));
|
||||
|
||||
iter->stmt = stmt;
|
||||
iter->db = db;
|
||||
|
||||
return iter;
|
||||
}
|
||||
|
||||
treemap_row_t database_treemap_iter(database_iterator_t *iter) {
|
||||
|
||||
if (iter->stmt == NULL) {
|
||||
LOG_FATAL("database.c", "FIXME: database_treemap_iter() called after iteration stopped");
|
||||
}
|
||||
|
||||
int ret = sqlite3_step(iter->stmt);
|
||||
|
||||
if (ret == SQLITE_ROW) {
|
||||
treemap_row_t row = {
|
||||
.path = (const char *) sqlite3_column_text(iter->stmt, 0),
|
||||
.parent = (const char *) sqlite3_column_text(iter->stmt, 1),
|
||||
.size = sqlite3_column_int64(iter->stmt, 2)
|
||||
};
|
||||
|
||||
return row;
|
||||
}
|
||||
|
||||
if (ret != SQLITE_DONE) {
|
||||
LOG_FATALF("database.c", "FIXME: doc iter returned %s", sqlite3_errmsg(iter->db->db));
|
||||
}
|
||||
|
||||
sqlite3_finalize(iter->stmt);
|
||||
iter->stmt = NULL;
|
||||
|
||||
return (treemap_row_t) {NULL, NULL, 0};
|
||||
}
|
||||
|
||||
void database_generate_stats(database_t *db, double treemap_threshold) {
|
||||
|
||||
LOG_INFO("database.c", "Generating stats");
|
||||
|
||||
CRASH_IF_NOT_SQLITE_OK(sqlite3_exec(db->db, "DELETE FROM stats_size_agg;", NULL, NULL, NULL));
|
||||
CRASH_IF_NOT_SQLITE_OK(sqlite3_exec(db->db, "DELETE FROM stats_date_agg;", NULL, NULL, NULL));
|
||||
CRASH_IF_NOT_SQLITE_OK(sqlite3_exec(db->db, "DELETE FROM stats_mime_agg;", NULL, NULL, NULL));
|
||||
CRASH_IF_NOT_SQLITE_OK(sqlite3_exec(db->db, "DELETE FROM stats_treemap;", NULL, NULL, NULL));
|
||||
|
||||
CRASH_IF_NOT_SQLITE_OK(
|
||||
sqlite3_exec(db->db, "CREATE TEMP TABLE tm(path TEXT PRIMARY KEY, size INT);", NULL, NULL, NULL));
|
||||
|
||||
sqlite3_prepare_v2(db->db, "UPDATE tm SET size=size+? WHERE path=?;", -1, &db->treemap_merge_up_update_stmt, NULL);
|
||||
sqlite3_prepare_v2(db->db, "DELETE FROM tm WHERE path = ?;", -1, &db->treemap_merge_up_delete_stmt, NULL);
|
||||
|
||||
// size aggregation
|
||||
sqlite3_stmt *stmt;
|
||||
sqlite3_prepare_v2(db->db, "INSERT INTO stats_size_agg"
|
||||
" SELECT"
|
||||
" cast(size / ?1 as int) * ?1 as bucket,"
|
||||
" count(*) as count"
|
||||
" FROM document"
|
||||
" GROUP BY bucket", -1, &stmt, NULL);
|
||||
sqlite3_bind_int(stmt, 1, SIZE_BUCKET);
|
||||
CRASH_IF_STMT_FAIL(sqlite3_step(stmt));
|
||||
|
||||
sqlite3_finalize(stmt);
|
||||
|
||||
// date aggregation
|
||||
sqlite3_prepare_v2(db->db, "INSERT INTO stats_date_agg"
|
||||
" SELECT"
|
||||
" cast(mtime / ?1 as int) * ?1 as bucket,"
|
||||
" count(*) as count"
|
||||
" FROM document"
|
||||
" GROUP BY bucket", -1, &stmt, NULL);
|
||||
sqlite3_bind_int(stmt, 1, DATE_BUCKET);
|
||||
CRASH_IF_STMT_FAIL(sqlite3_step(stmt));
|
||||
|
||||
sqlite3_finalize(stmt);
|
||||
|
||||
// mime aggregation
|
||||
sqlite3_prepare_v2(db->db, "INSERT INTO stats_mime_agg"
|
||||
" SELECT"
|
||||
" (json_data->>'mime') as bucket,"
|
||||
" sum(size),"
|
||||
" count(*)"
|
||||
" FROM document"
|
||||
" WHERE bucket IS NOT NULL"
|
||||
" GROUP BY bucket", -1, &stmt, NULL);
|
||||
CRASH_IF_STMT_FAIL(sqlite3_step(stmt));
|
||||
|
||||
sqlite3_finalize(stmt);
|
||||
|
||||
// Treemap
|
||||
sqlite3_prepare_v2(db->db, "SELECT SUM(size) FROM document;", -1, &stmt, NULL);
|
||||
CRASH_IF_STMT_FAIL(sqlite3_step(stmt));
|
||||
long total_size = sqlite3_column_int64(stmt, 0);
|
||||
long threshold = (long) ((double) total_size * treemap_threshold);
|
||||
sqlite3_finalize(stmt);
|
||||
|
||||
// flat map
|
||||
CRASH_IF_NOT_SQLITE_OK(sqlite3_exec(db->db,
|
||||
"INSERT INTO tm (path, size) SELECT json_data->>'path' as path, sum(size)"
|
||||
" FROM document WHERE json_data->>'parent' IS NULL GROUP BY path;",
|
||||
NULL, NULL, NULL));
|
||||
|
||||
// Merge up
|
||||
int merged_rows = 0;
|
||||
do {
|
||||
if (merged_rows) {
|
||||
LOG_INFOF("database.c", "Treemap merge iteration (%d rows changed)", merged_rows);
|
||||
}
|
||||
merged_rows = 0;
|
||||
|
||||
sqlite3_prepare_v2(db->db,
|
||||
"INSERT INTO tm (path, size) SELECT path_parent(path) as parent, 0 "
|
||||
" FROM tm WHERE parent not IN (SELECT path FROM tm) AND size<?"
|
||||
" ON CONFLICT DO NOTHING;", -1, &stmt, NULL);
|
||||
sqlite3_bind_int64(stmt, 1, threshold);
|
||||
CRASH_IF_STMT_FAIL(sqlite3_step(stmt));
|
||||
|
||||
database_iterator_t *iter = database_create_treemap_iterator(db, threshold);
|
||||
database_treemap_iter_foreach(row, iter) {
|
||||
sqlite3_bind_int64(db->treemap_merge_up_update_stmt, 1, row.size);
|
||||
sqlite3_bind_text(db->treemap_merge_up_update_stmt, 2, row.parent, -1, SQLITE_STATIC);
|
||||
CRASH_IF_STMT_FAIL(sqlite3_step(db->treemap_merge_up_update_stmt));
|
||||
CRASH_IF_NOT_SQLITE_OK(sqlite3_reset(db->treemap_merge_up_update_stmt));
|
||||
|
||||
sqlite3_bind_text(db->treemap_merge_up_delete_stmt, 1, row.path, -1, SQLITE_STATIC);
|
||||
CRASH_IF_STMT_FAIL(sqlite3_step(db->treemap_merge_up_delete_stmt));
|
||||
CRASH_IF_NOT_SQLITE_OK(sqlite3_reset(db->treemap_merge_up_delete_stmt));
|
||||
|
||||
merged_rows += 1;
|
||||
}
|
||||
} while (merged_rows > TREEMAP_MINIMUM_MERGES_TO_CONTINUE);
|
||||
|
||||
CRASH_IF_NOT_SQLITE_OK(sqlite3_exec(db->db,
|
||||
"INSERT INTO stats_treemap (path, size) SELECT path,size FROM tm;",
|
||||
NULL, NULL, NULL));
|
||||
|
||||
LOG_INFO("database.c", "Done!");
|
||||
}
|
||||
|
||||
database_stat_type_d database_get_stat_type_by_mnemonic(const char *name) {
|
||||
if (strcmp(name, "TMAP") == 0) {
|
||||
return DATABASE_STAT_TREEMAP;
|
||||
}
|
||||
if (strcmp(name, "MAGG") == 0) {
|
||||
return DATABASE_STAT_MIME_AGG;
|
||||
}
|
||||
if (strcmp(name, "SAGG") == 0) {
|
||||
return DATABASE_STAT_SIZE_AGG;
|
||||
}
|
||||
if (strcmp(name, "DAGG") == 0) {
|
||||
return DATABASE_STAT_DATE_AGG;
|
||||
}
|
||||
|
||||
return DATABASE_STAT_INVALID;
|
||||
}
|
||||
|
||||
cJSON *database_get_stats(database_t *db, database_stat_type_d type) {
|
||||
|
||||
sqlite3_stmt *stmt;
|
||||
|
||||
switch (type) {
|
||||
case DATABASE_STAT_TREEMAP:
|
||||
CRASH_IF_NOT_SQLITE_OK(sqlite3_prepare_v2(
|
||||
db->db, "SELECT path,size FROM stats_treemap", -1, &stmt, NULL
|
||||
));
|
||||
break;
|
||||
case DATABASE_STAT_DATE_AGG:
|
||||
CRASH_IF_NOT_SQLITE_OK(sqlite3_prepare_v2(
|
||||
db->db, "SELECT bucket,count FROM stats_date_agg", -1, &stmt, NULL
|
||||
));
|
||||
break;
|
||||
case DATABASE_STAT_SIZE_AGG:
|
||||
CRASH_IF_NOT_SQLITE_OK(sqlite3_prepare_v2(
|
||||
db->db, "SELECT bucket,count FROM stats_size_agg", -1, &stmt, NULL
|
||||
));
|
||||
break;
|
||||
case DATABASE_STAT_MIME_AGG:
|
||||
CRASH_IF_NOT_SQLITE_OK(sqlite3_prepare_v2(
|
||||
db->db, "SELECT mime,size,count FROM stats_mime_agg", -1, &stmt, NULL
|
||||
));
|
||||
break;
|
||||
case DATABASE_STAT_INVALID:
|
||||
default:
|
||||
LOG_FATALF("database_stats.c", "Invalid stat type: %d", type);
|
||||
}
|
||||
|
||||
cJSON *json = cJSON_CreateArray();
|
||||
|
||||
int ret;
|
||||
do {
|
||||
ret = sqlite3_step(stmt);
|
||||
CRASH_IF_STMT_FAIL(ret);
|
||||
|
||||
if (ret == SQLITE_DONE) {
|
||||
break;
|
||||
}
|
||||
|
||||
cJSON *row = cJSON_CreateObject();
|
||||
|
||||
switch (type) {
|
||||
case DATABASE_STAT_TREEMAP:
|
||||
cJSON_AddStringToObject(row, "path", (const char *) sqlite3_column_text(stmt, 0));
|
||||
cJSON_AddNumberToObject(row, "size", (double) sqlite3_column_int64(stmt, 1));
|
||||
break;
|
||||
case DATABASE_STAT_DATE_AGG:
|
||||
case DATABASE_STAT_SIZE_AGG:
|
||||
cJSON_AddNumberToObject(row, "bucket", (double) sqlite3_column_int64(stmt, 0));
|
||||
cJSON_AddNumberToObject(row, "count", (double) sqlite3_column_int64(stmt, 1));
|
||||
break;
|
||||
case DATABASE_STAT_MIME_AGG:
|
||||
cJSON_AddStringToObject(row, "mime", (const char *) sqlite3_column_text(stmt, 0));
|
||||
cJSON_AddNumberToObject(row, "size", (double) sqlite3_column_int64(stmt, 1));
|
||||
cJSON_AddNumberToObject(row, "count", (double) sqlite3_column_int64(stmt, 2));
|
||||
break;
|
||||
}
|
||||
|
||||
cJSON_AddItemToArray(json, row);
|
||||
} while (TRUE);
|
||||
|
||||
return json;
|
||||
}
|
||||
@@ -29,7 +29,7 @@ void destroy_indexer(es_indexer_t *indexer) {
|
||||
return;
|
||||
}
|
||||
|
||||
LOG_DEBUG("elastic.c", "Destroying indexer")
|
||||
LOG_DEBUG("elastic.c", "Destroying indexer");
|
||||
|
||||
if (indexer->es_url != NULL) {
|
||||
free(indexer->es_url);
|
||||
@@ -64,18 +64,17 @@ void print_json(cJSON *document, const char id_str[SIST_DOC_ID_LEN]) {
|
||||
cJSON_Delete(line);
|
||||
}
|
||||
|
||||
void index_json_func(void *arg) {
|
||||
es_bulk_line_t *line = arg;
|
||||
elastic_index_line(line);
|
||||
}
|
||||
void delete_document(const char *document_id) {
|
||||
es_bulk_line_t bulk_line;
|
||||
|
||||
void delete_document(const char* document_id_str, void* UNUSED(_data)) {
|
||||
es_bulk_line_t *bulk_line = malloc(sizeof(es_bulk_line_t));
|
||||
bulk_line->type = ES_BULK_LINE_DELETE;
|
||||
bulk_line->next = NULL;
|
||||
bulk_line.type = ES_BULK_LINE_DELETE;
|
||||
bulk_line.next = NULL;
|
||||
strcpy(bulk_line.doc_id, document_id);
|
||||
|
||||
strcpy(bulk_line->doc_id, document_id_str);
|
||||
tpool_add_work(IndexCtx.pool, index_json_func, bulk_line);
|
||||
tpool_add_work(IndexCtx.pool, &(job_t) {
|
||||
.type = JOB_BULK_LINE,
|
||||
.bulk_line = &bulk_line,
|
||||
});
|
||||
}
|
||||
|
||||
|
||||
@@ -92,7 +91,11 @@ void index_json(cJSON *document, const char doc_id[SIST_DOC_ID_LEN]) {
|
||||
bulk_line->next = NULL;
|
||||
|
||||
cJSON_free(json);
|
||||
tpool_add_work(IndexCtx.pool, index_json_func, bulk_line);
|
||||
tpool_add_work(IndexCtx.pool, &(job_t) {
|
||||
.type = JOB_BULK_LINE,
|
||||
.bulk_line = bulk_line,
|
||||
});
|
||||
free(bulk_line);
|
||||
}
|
||||
|
||||
void execute_update_script(const char *script, int async, const char index_id[SIST_INDEX_ID_LEN]) {
|
||||
@@ -266,7 +269,7 @@ void print_error(response_t *r) {
|
||||
void _elastic_flush(int max) {
|
||||
|
||||
if (max == 0) {
|
||||
LOG_WARNING("elastic.c", "calling _elastic_flush with 0 in queue")
|
||||
LOG_WARNING("elastic.c", "calling _elastic_flush with 0 in queue");
|
||||
return;
|
||||
}
|
||||
|
||||
@@ -279,13 +282,13 @@ void _elastic_flush(int max) {
|
||||
response_t *r = web_post(bulk_url, buf, IndexCtx.es_insecure_ssl);
|
||||
|
||||
if (r->status_code == 0) {
|
||||
LOG_FATALF("elastic.c", "Could not connect to %s, make sure that elasticsearch is running!\n", IndexCtx.es_url)
|
||||
LOG_FATALF("elastic.c", "Could not connect to %s, make sure that elasticsearch is running!\n", IndexCtx.es_url);
|
||||
}
|
||||
|
||||
if (r->status_code == 413) {
|
||||
|
||||
if (max <= 1) {
|
||||
LOG_ERRORF("elastic.c", "Single document too large, giving up: {%s}", Indexer->line_head->doc_id)
|
||||
LOG_ERRORF("elastic.c", "Single document too large, giving up: {%s}", Indexer->line_head->doc_id);
|
||||
free_response(r);
|
||||
free(buf);
|
||||
free_queue(1);
|
||||
@@ -306,7 +309,7 @@ void _elastic_flush(int max) {
|
||||
|
||||
free_response(r);
|
||||
free(buf);
|
||||
LOG_WARNING("elastic.c", "Got 429 status, will retry after delay")
|
||||
LOG_WARNING("elastic.c", "Got 429 status, will retry after delay");
|
||||
usleep(1000000 * 20);
|
||||
_elastic_flush(max);
|
||||
return;
|
||||
@@ -441,7 +444,7 @@ es_version_t *elastic_get_version(const char *es_url, int insecure) {
|
||||
}
|
||||
|
||||
if (cJSON_GetObjectItem(response, "error") != NULL) {
|
||||
LOG_WARNING("elastic.c", "Could not get Elasticsearch version")
|
||||
LOG_WARNING("elastic.c", "Could not get Elasticsearch version");
|
||||
print_error(r);
|
||||
free_response(r);
|
||||
return NULL;
|
||||
@@ -477,7 +480,7 @@ void elastic_init(int force_reset, const char *user_mappings, const char *user_s
|
||||
IndexCtx.es_version = es_version;
|
||||
|
||||
if (es_version == NULL) {
|
||||
LOG_FATAL("elastic.c", "Could not get ES version")
|
||||
LOG_FATAL("elastic.c", "Could not get ES version");
|
||||
}
|
||||
|
||||
LOG_INFOF("elastic.c",
|
||||
@@ -485,7 +488,7 @@ void elastic_init(int force_reset, const char *user_mappings, const char *user_s
|
||||
format_es_version(es_version), IS_SUPPORTED_ES_VERSION(es_version), IS_LEGACY_VERSION(es_version));
|
||||
|
||||
if (!IS_SUPPORTED_ES_VERSION(es_version)) {
|
||||
LOG_FATAL("elastic.c", "This elasticsearch version is not supported!")
|
||||
LOG_FATAL("elastic.c", "This elasticsearch version is not supported!");
|
||||
}
|
||||
|
||||
char *settings = NULL;
|
||||
@@ -512,7 +515,7 @@ void elastic_init(int force_reset, const char *user_mappings, const char *user_s
|
||||
|
||||
if (r->status_code != 200) {
|
||||
print_error(r);
|
||||
LOG_FATAL("elastic.c", "Could not create index")
|
||||
LOG_FATAL("elastic.c", "Could not create index");
|
||||
}
|
||||
|
||||
LOG_INFOF("elastic.c", "Create index <%d>", r->status_code);
|
||||
@@ -533,12 +536,13 @@ void elastic_init(int force_reset, const char *user_mappings, const char *user_s
|
||||
LOG_INFOF("elastic.c", "Update ES settings <%d>", r->status_code);
|
||||
if (r->status_code != 200) {
|
||||
print_error(r);
|
||||
LOG_FATAL("elastic.c", "Could not update user settings")
|
||||
LOG_FATAL("elastic.c", "Could not update user settings");
|
||||
}
|
||||
free_response(r);
|
||||
|
||||
if (IS_LEGACY_VERSION(es_version)) {
|
||||
snprintf(url, sizeof(url), "%s/%s/_mappings/_doc?include_type_name=true", IndexCtx.es_url, IndexCtx.es_index);
|
||||
snprintf(url, sizeof(url), "%s/%s/_mappings/_doc?include_type_name=true", IndexCtx.es_url,
|
||||
IndexCtx.es_index);
|
||||
} else {
|
||||
snprintf(url, sizeof(url), "%s/%s/_mappings", IndexCtx.es_url, IndexCtx.es_index);
|
||||
}
|
||||
@@ -547,7 +551,7 @@ void elastic_init(int force_reset, const char *user_mappings, const char *user_s
|
||||
LOG_INFOF("elastic.c", "Update ES mappings <%d>", r->status_code);
|
||||
if (r->status_code != 200) {
|
||||
print_error(r);
|
||||
LOG_FATAL("elastic.c", "Could not update user mappings")
|
||||
LOG_FATAL("elastic.c", "Could not update user mappings");
|
||||
}
|
||||
free_response(r);
|
||||
|
||||
|
||||
@@ -46,7 +46,7 @@ void print_json(cJSON *document, const char index_id_str[SIST_INDEX_ID_LEN]);
|
||||
|
||||
void index_json(cJSON *document, const char doc_id[SIST_INDEX_ID_LEN]);
|
||||
|
||||
void delete_document(const char *document_id_str, void* data);
|
||||
void delete_document(const char *document_id);
|
||||
|
||||
es_indexer_t *create_indexer(const char *url, const char *index);
|
||||
|
||||
|
||||
File diff suppressed because one or more lines are too long
@@ -65,7 +65,7 @@ void web_post_async_poll(subreq_ctx_t *req) {
|
||||
curl_easy_getinfo(req->handle, CURLINFO_RESPONSE_CODE, &req->response->status_code);
|
||||
|
||||
if (req->response->status_code == 0) {
|
||||
LOG_ERRORF("web.c", "CURL Error: %s", req->curl_err_buffer)
|
||||
LOG_ERRORF("web.c", "CURL Error: %s", req->curl_err_buffer);
|
||||
}
|
||||
|
||||
curl_multi_cleanup(req->multi);
|
||||
@@ -104,7 +104,7 @@ subreq_ctx_t *web_post_async(const char *url, char *data, int insecure) {
|
||||
curl_multi_add_handle(req->multi, curl);
|
||||
curl_multi_perform(req->multi, &req->running_handles);
|
||||
|
||||
LOG_DEBUGF("web.c", "async request POST %s", url)
|
||||
LOG_DEBUGF("web.c", "async request POST %s", url);
|
||||
|
||||
return req;
|
||||
}
|
||||
@@ -136,7 +136,7 @@ response_t *web_get(const char *url, int timeout, int insecure) {
|
||||
curl_easy_getinfo(curl, CURLINFO_RESPONSE_CODE, &resp->status_code);
|
||||
|
||||
if (resp->status_code == 0) {
|
||||
LOG_ERRORF("web.c", "CURL Error: %s", err_buffer)
|
||||
LOG_ERRORF("web.c", "CURL Error: %s", err_buffer);
|
||||
}
|
||||
|
||||
curl_easy_cleanup(curl);
|
||||
@@ -180,7 +180,7 @@ response_t *web_post(const char *url, const char *data, int insecure) {
|
||||
resp->size = buffer.cur;
|
||||
|
||||
if (resp->status_code == 0) {
|
||||
LOG_ERRORF("web.c", "CURL Error: %s", err_buffer)
|
||||
LOG_ERRORF("web.c", "CURL Error: %s", err_buffer);
|
||||
}
|
||||
|
||||
curl_easy_cleanup(curl);
|
||||
|
||||
@@ -1,9 +1,7 @@
|
||||
#include "src/ctx.h"
|
||||
#include "serialize.h"
|
||||
#include "src/parsing/parse.h"
|
||||
#include "src/parsing/mime.h"
|
||||
|
||||
#include <zstd.h>
|
||||
|
||||
char *get_meta_key_text(enum metakey meta_key) {
|
||||
|
||||
@@ -79,7 +77,7 @@ char *get_meta_key_text(enum metakey meta_key) {
|
||||
case MetaChecksum:
|
||||
return "checksum";
|
||||
default:
|
||||
LOG_FATALF("serialize.c", "FIXME: Unknown meta key: %d", meta_key)
|
||||
LOG_FATALF("serialize.c", "FIXME: Unknown meta key: %d", meta_key);
|
||||
}
|
||||
}
|
||||
|
||||
@@ -93,8 +91,6 @@ char *build_json_string(document_t *doc) {
|
||||
} else {
|
||||
cJSON_AddStringToObject(json, "mime", mime_text);
|
||||
}
|
||||
cJSON_AddNumberToObject(json, "size", (double) doc->size);
|
||||
cJSON_AddNumberToObject(json, "mtime", doc->mtime);
|
||||
|
||||
// Ignore root directory in the file path
|
||||
doc->ext = (short) (doc->ext - ScanCtx.index.desc.root_len);
|
||||
@@ -124,8 +120,6 @@ char *build_json_string(document_t *doc) {
|
||||
cJSON_AddStringToObject(json, "path", "");
|
||||
}
|
||||
|
||||
cJSON_AddStringToObject(json, "_id", doc->doc_id);
|
||||
|
||||
// Metadata
|
||||
meta_line_t *meta = doc->meta_head;
|
||||
while (meta != NULL) {
|
||||
@@ -175,7 +169,7 @@ char *build_json_string(document_t *doc) {
|
||||
break;
|
||||
}
|
||||
default:
|
||||
LOG_FATALF("serialize.c", "Invalid meta key: %x %s", meta->key, get_meta_key_text(meta->key))
|
||||
LOG_FATALF("serialize.c", "Invalid meta key: %x %s", meta->key, get_meta_key_text(meta->key));
|
||||
}
|
||||
|
||||
meta_line_t *tmp = meta;
|
||||
@@ -189,391 +183,10 @@ char *build_json_string(document_t *doc) {
|
||||
return json_str;
|
||||
}
|
||||
|
||||
static struct {
|
||||
FILE *out_file;
|
||||
size_t buf_out_size;
|
||||
|
||||
void *buf_out;
|
||||
|
||||
ZSTD_CCtx *cctx;
|
||||
} WriterCtx = {
|
||||
.out_file = NULL
|
||||
};
|
||||
|
||||
#define ZSTD_COMPRESSION_LEVEL 10
|
||||
|
||||
void initialize_writer_ctx(const char *file_path) {
|
||||
WriterCtx.out_file = fopen(file_path, "wb");
|
||||
|
||||
WriterCtx.buf_out_size = ZSTD_CStreamOutSize();
|
||||
WriterCtx.buf_out = malloc(WriterCtx.buf_out_size);
|
||||
|
||||
WriterCtx.cctx = ZSTD_createCCtx();
|
||||
|
||||
ZSTD_CCtx_setParameter(WriterCtx.cctx, ZSTD_c_compressionLevel, ZSTD_COMPRESSION_LEVEL);
|
||||
ZSTD_CCtx_setParameter(WriterCtx.cctx, ZSTD_c_checksumFlag, FALSE);
|
||||
|
||||
LOG_DEBUGF("serialize.c", "Open index file for writing %s", file_path)
|
||||
}
|
||||
|
||||
void zstd_write_string(const char *string, const size_t len) {
|
||||
ZSTD_inBuffer input = {string, len, 0};
|
||||
|
||||
do {
|
||||
ZSTD_outBuffer output = {WriterCtx.buf_out, WriterCtx.buf_out_size, 0};
|
||||
ZSTD_compressStream2(WriterCtx.cctx, &output, &input, ZSTD_e_continue);
|
||||
|
||||
if (output.pos > 0) {
|
||||
ScanCtx.stat_index_size += fwrite(WriterCtx.buf_out, 1, output.pos, WriterCtx.out_file);
|
||||
}
|
||||
} while (input.pos != input.size);
|
||||
}
|
||||
|
||||
void write_document_func(void *arg) {
|
||||
|
||||
if (WriterCtx.out_file == NULL) {
|
||||
char dstfile[PATH_MAX];
|
||||
snprintf(dstfile, PATH_MAX, "%s_index_main.ndjson.zst", ScanCtx.index.path);
|
||||
initialize_writer_ctx(dstfile);
|
||||
}
|
||||
|
||||
document_t *doc = arg;
|
||||
|
||||
char *json_str = build_json_string(doc);
|
||||
const size_t json_str_len = strlen(json_str);
|
||||
|
||||
json_str = realloc(json_str, json_str_len + 1);
|
||||
*(json_str + json_str_len) = '\n';
|
||||
|
||||
zstd_write_string(json_str, json_str_len + 1);
|
||||
|
||||
free(json_str);
|
||||
free(doc->filepath);
|
||||
}
|
||||
|
||||
void zstd_close() {
|
||||
if (WriterCtx.out_file == NULL) {
|
||||
LOG_DEBUG("serialize.c", "No zstd stream to close, skipping cleanup")
|
||||
return;
|
||||
}
|
||||
|
||||
size_t remaining;
|
||||
do {
|
||||
ZSTD_outBuffer output = {WriterCtx.buf_out, WriterCtx.buf_out_size, 0};
|
||||
remaining = ZSTD_endStream(WriterCtx.cctx, &output);
|
||||
|
||||
if (output.pos > 0) {
|
||||
ScanCtx.stat_index_size += fwrite(WriterCtx.buf_out, 1, output.pos, WriterCtx.out_file);
|
||||
}
|
||||
} while (remaining != 0);
|
||||
|
||||
ZSTD_freeCCtx(WriterCtx.cctx);
|
||||
free(WriterCtx.buf_out);
|
||||
fclose(WriterCtx.out_file);
|
||||
|
||||
LOG_DEBUG("serialize.c", "End zstd stream & close index file")
|
||||
}
|
||||
|
||||
void writer_cleanup() {
|
||||
zstd_close();
|
||||
WriterCtx.out_file = NULL;
|
||||
}
|
||||
|
||||
void write_index_descriptor(char *path, index_descriptor_t *desc) {
|
||||
cJSON *json = cJSON_CreateObject();
|
||||
cJSON_AddStringToObject(json, "id", desc->id);
|
||||
cJSON_AddStringToObject(json, "version", desc->version);
|
||||
cJSON_AddStringToObject(json, "root", desc->root);
|
||||
cJSON_AddStringToObject(json, "name", desc->name);
|
||||
cJSON_AddStringToObject(json, "type", desc->type);
|
||||
cJSON_AddStringToObject(json, "rewrite_url", desc->rewrite_url);
|
||||
cJSON_AddNumberToObject(json, "timestamp", (double) desc->timestamp);
|
||||
|
||||
int fd = open(path, O_CREAT | O_WRONLY, S_IRUSR | S_IWUSR);
|
||||
if (fd < 0) {
|
||||
LOG_FATALF("serialize.c", "Could not open index descriptor: %s", strerror(errno));
|
||||
}
|
||||
char *str = cJSON_Print(json);
|
||||
size_t ret = write(fd, str, strlen(str));
|
||||
if (ret == -1) {
|
||||
LOG_FATALF("serialize.c", "Could not write index descriptor: %s", strerror(errno));
|
||||
}
|
||||
free(str);
|
||||
close(fd);
|
||||
|
||||
cJSON_Delete(json);
|
||||
}
|
||||
|
||||
index_descriptor_t read_index_descriptor(char *path) {
|
||||
|
||||
struct stat info;
|
||||
stat(path, &info);
|
||||
int fd = open(path, O_RDONLY);
|
||||
|
||||
if (fd == -1) {
|
||||
LOG_FATALF("serialize.c", "Invalid/corrupt index (Could not find descriptor): %s: %s\n", path, strerror(errno))
|
||||
}
|
||||
|
||||
char *buf = malloc(info.st_size + 1);
|
||||
size_t ret = read(fd, buf, info.st_size);
|
||||
if (ret == -1) {
|
||||
LOG_FATALF("serialize.c", "Could not read index descriptor: %s", strerror(errno));
|
||||
}
|
||||
*(buf + info.st_size) = '\0';
|
||||
close(fd);
|
||||
|
||||
cJSON *json = cJSON_Parse(buf);
|
||||
|
||||
index_descriptor_t descriptor;
|
||||
descriptor.timestamp = (long) cJSON_GetObjectItem(json, "timestamp")->valuedouble;
|
||||
strcpy(descriptor.root, cJSON_GetObjectItem(json, "root")->valuestring);
|
||||
strcpy(descriptor.name, cJSON_GetObjectItem(json, "name")->valuestring);
|
||||
strcpy(descriptor.rewrite_url, cJSON_GetObjectItem(json, "rewrite_url")->valuestring);
|
||||
descriptor.root_len = (short) strlen(descriptor.root);
|
||||
strcpy(descriptor.version, cJSON_GetObjectItem(json, "version")->valuestring);
|
||||
strcpy(descriptor.id, cJSON_GetObjectItem(json, "id")->valuestring);
|
||||
if (cJSON_GetObjectItem(json, "type") == NULL) {
|
||||
strcpy(descriptor.type, INDEX_TYPE_NDJSON);
|
||||
} else {
|
||||
strcpy(descriptor.type, cJSON_GetObjectItem(json, "type")->valuestring);
|
||||
}
|
||||
|
||||
cJSON_Delete(json);
|
||||
free(buf);
|
||||
|
||||
return descriptor;
|
||||
}
|
||||
|
||||
|
||||
void write_document(document_t *doc) {
|
||||
tpool_add_work(ScanCtx.writer_pool, write_document_func, doc);
|
||||
}
|
||||
char *json_str = build_json_string(doc);
|
||||
|
||||
void thread_cleanup() {
|
||||
cleanup_parse();
|
||||
cleanup_font();
|
||||
}
|
||||
|
||||
void read_index_bin_handle_line(const char *line, const char *index_id, index_func func) {
|
||||
|
||||
cJSON *document = cJSON_Parse(line);
|
||||
const char *path_md5_str = cJSON_GetObjectItem(document, "_id")->valuestring;
|
||||
|
||||
cJSON_AddStringToObject(document, "index", index_id);
|
||||
|
||||
// Load meta from sidecar files
|
||||
cJSON *meta_obj = NULL;
|
||||
if (IndexCtx.meta != NULL) {
|
||||
const char *meta_string = g_hash_table_lookup(IndexCtx.meta, path_md5_str);
|
||||
if (meta_string != NULL) {
|
||||
meta_obj = cJSON_Parse(meta_string);
|
||||
|
||||
cJSON *child;
|
||||
for (child = meta_obj->child; child != NULL; child = child->next) {
|
||||
char meta_key[4096];
|
||||
strcpy(meta_key, child->string);
|
||||
cJSON_DeleteItemFromObject(document, meta_key);
|
||||
cJSON_AddItemReferenceToObject(document, meta_key, child);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Load tags from tags DB
|
||||
if (IndexCtx.tags != NULL) {
|
||||
const char *tags_string = g_hash_table_lookup(IndexCtx.tags, path_md5_str);
|
||||
if (tags_string != NULL) {
|
||||
cJSON *tags_arr = cJSON_Parse(tags_string);
|
||||
cJSON_DeleteItemFromObject(document, "tag");
|
||||
cJSON_AddItemToObject(document, "tag", tags_arr);
|
||||
}
|
||||
}
|
||||
|
||||
func(document, path_md5_str);
|
||||
cJSON_DeleteItemFromObject(document, "_id");
|
||||
cJSON_Delete(document);
|
||||
if (meta_obj) {
|
||||
cJSON_Delete(meta_obj);
|
||||
}
|
||||
}
|
||||
|
||||
void read_lines(const char *path, const line_processor_t processor) {
|
||||
dyn_buffer_t buf = dyn_buffer_create();
|
||||
|
||||
// Initialize zstd things
|
||||
FILE *file = fopen(path, "rb");
|
||||
|
||||
size_t const buf_in_size = ZSTD_DStreamInSize();
|
||||
void *const buf_in = malloc(buf_in_size);
|
||||
|
||||
size_t const buf_out_size = ZSTD_DStreamOutSize();
|
||||
void *const buf_out = malloc(buf_out_size);
|
||||
|
||||
ZSTD_DCtx *const dctx = ZSTD_createDCtx();
|
||||
|
||||
size_t read;
|
||||
size_t last_ret = 0;
|
||||
while ((read = fread(buf_in, 1, buf_in_size, file))) {
|
||||
ZSTD_inBuffer input = {buf_in, read, 0};
|
||||
|
||||
while (input.pos < input.size) {
|
||||
ZSTD_outBuffer output = {buf_out, buf_out_size, 0};
|
||||
|
||||
size_t const ret = ZSTD_decompressStream(dctx, &output, &input);
|
||||
|
||||
for (int i = 0; i < output.pos; i++) {
|
||||
char c = ((char *) output.dst)[i];
|
||||
|
||||
if (c == '\n') {
|
||||
dyn_buffer_write_char(&buf, '\0');
|
||||
processor.func(buf.buf, processor.data);
|
||||
buf.cur = 0;
|
||||
} else {
|
||||
dyn_buffer_write_char(&buf, c);
|
||||
}
|
||||
}
|
||||
|
||||
last_ret = ret;
|
||||
}
|
||||
}
|
||||
|
||||
if (last_ret != 0) {
|
||||
/* The last return value from ZSTD_decompressStream did not end on a
|
||||
* frame, but we reached the end of the file! We assume this is an
|
||||
* error, and the input was truncated.
|
||||
*/
|
||||
LOG_FATALF("serialize.c", "EOF before end of stream: %zu", last_ret)
|
||||
}
|
||||
|
||||
ZSTD_freeDCtx(dctx);
|
||||
free(buf_in);
|
||||
free(buf_out);
|
||||
|
||||
dyn_buffer_destroy(&buf);
|
||||
fclose(file);
|
||||
}
|
||||
|
||||
void read_index_ndjson(const char *line, void *_data) {
|
||||
void **data = _data;
|
||||
const char *index_id = data[0];
|
||||
index_func func = data[1];
|
||||
read_index_bin_handle_line(line, index_id, func);
|
||||
}
|
||||
|
||||
void read_index(const char *path, const char index_id[SIST_INDEX_ID_LEN], const char *type, index_func func) {
|
||||
if (strcmp(type, INDEX_TYPE_NDJSON) == 0) {
|
||||
read_lines(path, (line_processor_t) {
|
||||
.data = (void *[2]) {(void *) index_id, func},
|
||||
.func = read_index_ndjson,
|
||||
});
|
||||
}
|
||||
}
|
||||
|
||||
static __thread GHashTable *IncrementalReadTable = NULL;
|
||||
|
||||
void json_put_incremental(cJSON *document, UNUSED(const char doc_id[SIST_DOC_ID_LEN])) {
|
||||
const char *path_md5_str = cJSON_GetObjectItem(document, "_id")->valuestring;
|
||||
const int mtime = cJSON_GetObjectItem(document, "mtime")->valueint;
|
||||
|
||||
incremental_put(IncrementalReadTable, path_md5_str, mtime);
|
||||
}
|
||||
|
||||
void incremental_read(GHashTable *table, const char *filepath, index_descriptor_t *desc) {
|
||||
IncrementalReadTable = table;
|
||||
read_index(filepath, desc->id, desc->type, json_put_incremental);
|
||||
}
|
||||
|
||||
static __thread GHashTable *IncrementalCopyTable = NULL;
|
||||
static __thread GHashTable *IncrementalNewTable = NULL;
|
||||
static __thread store_t *IncrementalCopySourceStore = NULL;
|
||||
static __thread store_t *IncrementalCopyDestinationStore = NULL;
|
||||
|
||||
void incremental_copy_handle_doc(cJSON *document, UNUSED(const char id_str[SIST_DOC_ID_LEN])) {
|
||||
|
||||
const char *doc_id = cJSON_GetObjectItem(document, "_id")->valuestring;
|
||||
|
||||
if (cJSON_GetObjectItem(document, "parent") != NULL || incremental_get(IncrementalCopyTable, doc_id)) {
|
||||
// Copy index line
|
||||
cJSON_DeleteItemFromObject(document, "index");
|
||||
char *json_str = cJSON_PrintUnformatted(document);
|
||||
const size_t json_str_len = strlen(json_str);
|
||||
|
||||
json_str = realloc(json_str, json_str_len + 1);
|
||||
*(json_str + json_str_len) = '\n';
|
||||
|
||||
// Copy tn store contents
|
||||
size_t buf_len;
|
||||
char *buf = store_read(IncrementalCopySourceStore, (char *) doc_id, SIST_DOC_ID_LEN, &buf_len);
|
||||
if (buf_len != 0) {
|
||||
store_write(IncrementalCopyDestinationStore, (char *) doc_id, SIST_DOC_ID_LEN, buf, buf_len);
|
||||
free(buf);
|
||||
}
|
||||
|
||||
// Also copy additional thumbnails
|
||||
if (cJSON_GetObjectItem(document, "thumbnail") != NULL) {
|
||||
const int thumbnail_count = cJSON_GetObjectItem(document, "thumbnail")->valueint;
|
||||
|
||||
for (int i = 1; i < thumbnail_count; i++) {
|
||||
char tn_key[SIST_DOC_ID_LEN + sizeof(char) * 4];
|
||||
|
||||
snprintf(tn_key, sizeof(tn_key), "%s%04d", doc_id, i);
|
||||
|
||||
buf = store_read(IncrementalCopySourceStore, tn_key, sizeof(tn_key), &buf_len);
|
||||
if (buf_len != 0) {
|
||||
store_write(IncrementalCopyDestinationStore, tn_key, sizeof(tn_key), buf, buf_len);
|
||||
free(buf);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
zstd_write_string(json_str, json_str_len + 1);
|
||||
free(json_str);
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Copy items from an index that are in the copy_table. Also copies from
|
||||
* the store.
|
||||
*/
|
||||
void incremental_copy(store_t *store, store_t *dst_store, const char *filepath,
|
||||
const char *dst_filepath, GHashTable *copy_table) {
|
||||
|
||||
if (WriterCtx.out_file == NULL) {
|
||||
initialize_writer_ctx(dst_filepath);
|
||||
}
|
||||
|
||||
IncrementalCopyTable = copy_table;
|
||||
IncrementalCopySourceStore = store;
|
||||
IncrementalCopyDestinationStore = dst_store;
|
||||
|
||||
read_index(filepath, "", INDEX_TYPE_NDJSON, incremental_copy_handle_doc);
|
||||
}
|
||||
|
||||
void incremental_delete_handle_doc(cJSON *document, UNUSED(const char id_str[SIST_DOC_ID_LEN])) {
|
||||
|
||||
char doc_id_n[SIST_DOC_ID_LEN + 1];
|
||||
doc_id_n[SIST_DOC_ID_LEN] = '\0';
|
||||
doc_id_n[SIST_DOC_ID_LEN - 1] = '\n';
|
||||
const char *doc_id = cJSON_GetObjectItem(document, "_id")->valuestring;
|
||||
|
||||
// do not delete archive virtual entries
|
||||
if (cJSON_GetObjectItem(document, "parent") == NULL
|
||||
&& !incremental_get(IncrementalCopyTable, doc_id)
|
||||
&& !incremental_get(IncrementalNewTable, doc_id)
|
||||
) {
|
||||
memcpy(doc_id_n, doc_id, SIST_DOC_ID_LEN - 1);
|
||||
zstd_write_string(doc_id, sizeof(doc_id_n));
|
||||
}
|
||||
}
|
||||
|
||||
void incremental_delete(const char *del_filepath, const char *index_filepath,
|
||||
GHashTable *copy_table, GHashTable *new_table) {
|
||||
|
||||
if (WriterCtx.out_file == NULL) {
|
||||
initialize_writer_ctx(del_filepath);
|
||||
}
|
||||
|
||||
IncrementalCopyTable = copy_table;
|
||||
IncrementalNewTable = new_table;
|
||||
|
||||
read_index(index_filepath, "", INDEX_TYPE_NDJSON, incremental_delete_handle_doc);
|
||||
}
|
||||
database_write_document(ProcData.index_db, doc, json_str);
|
||||
free(doc);
|
||||
free(json_str);
|
||||
}
|
||||
@@ -2,55 +2,7 @@
|
||||
#define SIST2_SERIALIZE_H
|
||||
|
||||
#include "src/sist.h"
|
||||
#include "store.h"
|
||||
|
||||
#include <sys/syscall.h>
|
||||
#include <glib.h>
|
||||
|
||||
typedef struct line_processor {
|
||||
void* data;
|
||||
void (*func)(const char*, void*);
|
||||
} line_processor_t;
|
||||
|
||||
typedef void(*index_func)(cJSON *, const char[SIST_DOC_ID_LEN]);
|
||||
|
||||
void incremental_copy(store_t *store, store_t *dst_store, const char *filepath,
|
||||
const char *dst_filepath, GHashTable *copy_table);
|
||||
|
||||
void incremental_delete(const char *del_filepath, const char* index_filepath,
|
||||
GHashTable *copy_table, GHashTable *new_table);
|
||||
|
||||
void write_document(document_t *doc);
|
||||
|
||||
void read_lines(const char *path, const line_processor_t processor);
|
||||
|
||||
void read_index(const char *path, const char index_id[SIST_INDEX_ID_LEN], const char *type, index_func);
|
||||
|
||||
void incremental_read(GHashTable *table, const char *filepath, index_descriptor_t *desc);
|
||||
|
||||
/**
|
||||
* Must be called after write_document
|
||||
*/
|
||||
void thread_cleanup();
|
||||
|
||||
void writer_cleanup();
|
||||
|
||||
void write_index_descriptor(char *path, index_descriptor_t *desc);
|
||||
|
||||
index_descriptor_t read_index_descriptor(char *path);
|
||||
|
||||
// caller ensures char file_path[PATH_MAX]
|
||||
#define READ_INDICES(file_path, index_path, action_ok, action_main_fail, cond_original) \
|
||||
snprintf(file_path, PATH_MAX, "%s_index_main.ndjson.zst", index_path); \
|
||||
if (access(file_path, R_OK) == 0) { \
|
||||
action_ok; \
|
||||
} else { \
|
||||
action_main_fail; \
|
||||
} \
|
||||
snprintf(file_path, PATH_MAX, "%s_index_original.ndjson.zst", index_path); \
|
||||
if ((cond_original) && access(file_path, R_OK) == 0) { \
|
||||
action_ok; \
|
||||
} \
|
||||
|
||||
|
||||
#endif
|
||||
|
||||
195
src/io/store.c
195
src/io/store.c
@@ -1,195 +0,0 @@
|
||||
#include "store.h"
|
||||
#include "src/ctx.h"
|
||||
|
||||
store_t *store_create(const char *path, size_t chunk_size) {
|
||||
store_t *store = malloc(sizeof(struct store_t));
|
||||
mkdir(path, S_IWUSR | S_IRUSR | S_IXUSR);
|
||||
strcpy(store->path, path);
|
||||
|
||||
#if (SIST_FAKE_STORE != 1)
|
||||
store->chunk_size = chunk_size;
|
||||
pthread_rwlock_init(&store->lock, NULL);
|
||||
|
||||
mdb_env_create(&store->env);
|
||||
|
||||
int open_ret = mdb_env_open(store->env,
|
||||
path,
|
||||
MDB_WRITEMAP | MDB_MAPASYNC,
|
||||
S_IRUSR | S_IWUSR
|
||||
);
|
||||
|
||||
if (open_ret != 0) {
|
||||
LOG_FATALF("store.c", "Error while opening store: %s (%s)\n", mdb_strerror(open_ret), path)
|
||||
}
|
||||
|
||||
store->size = (size_t) store->chunk_size;
|
||||
mdb_env_set_mapsize(store->env, store->size);
|
||||
|
||||
// Open dbi
|
||||
MDB_txn *txn;
|
||||
mdb_txn_begin(store->env, NULL, 0, &txn);
|
||||
mdb_dbi_open(txn, NULL, 0, &store->dbi);
|
||||
mdb_txn_commit(txn);
|
||||
#endif
|
||||
|
||||
return store;
|
||||
}
|
||||
|
||||
void store_destroy(store_t *store) {
|
||||
|
||||
#if (SIST_FAKE_STORE != 1)
|
||||
pthread_rwlock_destroy(&store->lock);
|
||||
mdb_dbi_close(store->env, store->dbi);
|
||||
mdb_env_close(store->env);
|
||||
#endif
|
||||
free(store);
|
||||
}
|
||||
|
||||
void store_flush(store_t *store) {
|
||||
mdb_env_sync(store->env, TRUE);
|
||||
}
|
||||
|
||||
void store_write(store_t *store, char *key, size_t key_len, char *buf, size_t buf_len) {
|
||||
|
||||
if (LogCtx.very_verbose) {
|
||||
LOG_DEBUGF("store.c", "Store write %s@{%s} %lu bytes", store->path, key, buf_len)
|
||||
}
|
||||
|
||||
#if (SIST_FAKE_STORE != 1)
|
||||
|
||||
MDB_val mdb_key;
|
||||
mdb_key.mv_data = key;
|
||||
mdb_key.mv_size = key_len;
|
||||
|
||||
MDB_val mdb_value;
|
||||
mdb_value.mv_data = buf;
|
||||
mdb_value.mv_size = buf_len;
|
||||
|
||||
MDB_txn *txn;
|
||||
pthread_rwlock_rdlock(&store->lock);
|
||||
mdb_txn_begin(store->env, NULL, 0, &txn);
|
||||
|
||||
int put_ret = mdb_put(txn, store->dbi, &mdb_key, &mdb_value, 0);
|
||||
ScanCtx.stat_tn_size += buf_len;
|
||||
|
||||
int db_full = FALSE;
|
||||
int should_abort_transaction = FALSE;
|
||||
|
||||
if (put_ret == MDB_MAP_FULL) {
|
||||
db_full = TRUE;
|
||||
should_abort_transaction = TRUE;
|
||||
} else {
|
||||
int commit_ret = mdb_txn_commit(txn);
|
||||
|
||||
if (commit_ret == MDB_MAP_FULL) {
|
||||
db_full = TRUE;
|
||||
}
|
||||
}
|
||||
|
||||
if (db_full) {
|
||||
LOG_DEBUGF("store.c", "Updating mdb mapsize to %lu bytes", store->size)
|
||||
|
||||
if (should_abort_transaction) {
|
||||
mdb_txn_abort(txn);
|
||||
}
|
||||
|
||||
pthread_rwlock_unlock(&store->lock);
|
||||
|
||||
// Cannot resize when there is a opened transaction.
|
||||
// Resize take effect on the next commit.
|
||||
pthread_rwlock_wrlock(&store->lock);
|
||||
store->size += store->chunk_size;
|
||||
int resize_ret = mdb_env_set_mapsize(store->env, store->size);
|
||||
if (resize_ret != 0) {
|
||||
LOG_ERROR("store.c", mdb_strerror(put_ret))
|
||||
}
|
||||
mdb_txn_begin(store->env, NULL, 0, &txn);
|
||||
int put_ret_retry = mdb_put(txn, store->dbi, &mdb_key, &mdb_value, 0);
|
||||
|
||||
if (put_ret_retry != 0) {
|
||||
LOG_ERROR("store.c", mdb_strerror(put_ret))
|
||||
}
|
||||
|
||||
int ret = mdb_txn_commit(txn);
|
||||
if (ret != 0) {
|
||||
LOG_FATALF("store.c", "FIXME: Could not commit to store %s: %s (%d), %d, %d %d",
|
||||
store->path, mdb_strerror(ret), ret,
|
||||
put_ret, put_ret_retry);
|
||||
}
|
||||
LOG_DEBUGF("store.c", "Updated mdb mapsize to %lu bytes", store->size)
|
||||
} else if (put_ret != 0) {
|
||||
LOG_ERROR("store.c", mdb_strerror(put_ret))
|
||||
}
|
||||
|
||||
pthread_rwlock_unlock(&store->lock);
|
||||
|
||||
#endif
|
||||
}
|
||||
|
||||
char *store_read(store_t *store, char *key, size_t key_len, size_t *ret_vallen) {
|
||||
char *buf = NULL;
|
||||
|
||||
#if (SIST_FAKE_STORE != 1)
|
||||
MDB_val mdb_key;
|
||||
mdb_key.mv_data = key;
|
||||
mdb_key.mv_size = key_len;
|
||||
|
||||
MDB_val mdb_value;
|
||||
|
||||
MDB_txn *txn;
|
||||
mdb_txn_begin(store->env, NULL, MDB_RDONLY, &txn);
|
||||
|
||||
int get_ret = mdb_get(txn, store->dbi, &mdb_key, &mdb_value);
|
||||
|
||||
if (get_ret == MDB_NOTFOUND) {
|
||||
*ret_vallen = 0;
|
||||
} else {
|
||||
*ret_vallen = mdb_value.mv_size;
|
||||
buf = malloc(mdb_value.mv_size);
|
||||
memcpy(buf, mdb_value.mv_data, mdb_value.mv_size);
|
||||
}
|
||||
|
||||
mdb_txn_abort(txn);
|
||||
#endif
|
||||
return buf;
|
||||
}
|
||||
|
||||
GHashTable *store_read_all(store_t *store) {
|
||||
|
||||
int count = 0;
|
||||
|
||||
GHashTable *table = g_hash_table_new_full(g_str_hash, g_str_equal, free, free);
|
||||
|
||||
MDB_txn *txn = NULL;
|
||||
mdb_txn_begin(store->env, NULL, MDB_RDONLY, &txn);
|
||||
|
||||
MDB_cursor *cur = NULL;
|
||||
mdb_cursor_open(txn, store->dbi, &cur);
|
||||
|
||||
MDB_val key;
|
||||
MDB_val value;
|
||||
|
||||
while (mdb_cursor_get(cur, &key, &value, MDB_NEXT) == 0) {
|
||||
char *key_str = malloc(key.mv_size);
|
||||
memcpy(key_str, key.mv_data, key.mv_size);
|
||||
char *val_str = malloc(value.mv_size);
|
||||
memcpy(val_str, value.mv_data, value.mv_size);
|
||||
|
||||
g_hash_table_insert(table, key_str, val_str);
|
||||
count += 1;
|
||||
}
|
||||
|
||||
const char *path;
|
||||
mdb_env_get_path(store->env, &path);
|
||||
LOG_DEBUGF("store.c", "Read %d entries from %s", count, path);
|
||||
|
||||
mdb_cursor_close(cur);
|
||||
mdb_txn_abort(txn);
|
||||
return table;
|
||||
}
|
||||
|
||||
|
||||
void store_copy(store_t *store, const char *destination) {
|
||||
mkdir(destination, S_IWUSR | S_IRUSR | S_IXUSR);
|
||||
mdb_env_copy(store->env, destination);
|
||||
}
|
||||
@@ -1,37 +0,0 @@
|
||||
#ifndef SIST2_STORE_H
|
||||
#define SIST2_STORE_H
|
||||
|
||||
#include <pthread.h>
|
||||
#include <lmdb.h>
|
||||
|
||||
#include <glib.h>
|
||||
|
||||
#define STORE_SIZE_TN (1024 * 1024 * 5)
|
||||
#define STORE_SIZE_TAG (1024 * 1024)
|
||||
#define STORE_SIZE_META STORE_SIZE_TAG
|
||||
|
||||
typedef struct store_t {
|
||||
char path[PATH_MAX];
|
||||
char *tmp_path;
|
||||
MDB_dbi dbi;
|
||||
MDB_env *env;
|
||||
size_t size;
|
||||
size_t chunk_size;
|
||||
pthread_rwlock_t lock;
|
||||
} store_t;
|
||||
|
||||
store_t *store_create(const char *path, size_t chunk_size);
|
||||
|
||||
void store_destroy(store_t *store);
|
||||
|
||||
void store_write(store_t *store, char *key, size_t key_len, char *buf, size_t buf_len);
|
||||
|
||||
void store_flush(store_t *store);
|
||||
|
||||
char *store_read(store_t *store, char *key, size_t key_len, size_t *ret_vallen);
|
||||
|
||||
GHashTable *store_read_all(store_t *store);
|
||||
|
||||
void store_copy(store_t *store, const char *destination);
|
||||
|
||||
#endif
|
||||
@@ -1,44 +1,12 @@
|
||||
#include "walk.h"
|
||||
#include "src/ctx.h"
|
||||
#include "src/parsing/parse.h"
|
||||
#include "src/parsing/fs_util.h"
|
||||
|
||||
#include <ftw.h>
|
||||
#include <pthread.h>
|
||||
|
||||
#define STR_STARTS_WITH(x, y) (strncmp(y, x, strlen(y) - 1) == 0)
|
||||
|
||||
__always_inline
|
||||
parse_job_t *create_fs_parse_job(const char *filepath, const struct stat *info, int base) {
|
||||
int len = (int) strlen(filepath);
|
||||
parse_job_t *job = malloc(sizeof(parse_job_t) + len);
|
||||
|
||||
strcpy(job->filepath, filepath);
|
||||
job->base = base;
|
||||
char *p = strrchr(filepath + base, '.');
|
||||
if (p != NULL) {
|
||||
job->ext = (int) (p - filepath + 1);
|
||||
} else {
|
||||
job->ext = len;
|
||||
}
|
||||
|
||||
job->vfile.info = *info;
|
||||
|
||||
job->parent[0] = '\0';
|
||||
|
||||
job->vfile.filepath = job->filepath;
|
||||
job->vfile.read = fs_read;
|
||||
// Filesystem reads are always rewindable
|
||||
job->vfile.read_rewindable = fs_read;
|
||||
job->vfile.reset = fs_reset;
|
||||
job->vfile.close = fs_close;
|
||||
job->vfile.fd = -1;
|
||||
job->vfile.is_fs_file = TRUE;
|
||||
job->vfile.has_checksum = FALSE;
|
||||
job->vfile.rewind_buffer_size = 0;
|
||||
job->vfile.rewind_buffer = NULL;
|
||||
job->vfile.calculate_checksum = ScanCtx.calculate_checksums;
|
||||
|
||||
return job;
|
||||
}
|
||||
|
||||
int sub_strings[30];
|
||||
#define EXCLUDED(str) (pcre_exec(ScanCtx.exclude, ScanCtx.exclude_extra, str, strlen(str), 0, 0, sub_strings, sizeof(sub_strings)) >= 0)
|
||||
@@ -53,12 +21,9 @@ int handle_entry(const char *filepath, const struct stat *info, int typeflag, st
|
||||
}
|
||||
|
||||
if (ScanCtx.exclude != NULL && EXCLUDED(filepath)) {
|
||||
LOG_DEBUGF("walk.c", "Excluded: %s", filepath)
|
||||
LOG_DEBUGF("walk.c", "Excluded: %s", filepath);
|
||||
|
||||
if (typeflag == FTW_F && S_ISREG(info->st_mode)) {
|
||||
pthread_mutex_lock(&ScanCtx.dbg_file_counts_mu);
|
||||
ScanCtx.dbg_excluded_files_count += 1;
|
||||
pthread_mutex_unlock(&ScanCtx.dbg_file_counts_mu);
|
||||
} else if (typeflag == FTW_D) {
|
||||
return FTW_SKIP_SUBTREE;
|
||||
}
|
||||
@@ -67,8 +32,13 @@ int handle_entry(const char *filepath, const struct stat *info, int typeflag, st
|
||||
}
|
||||
|
||||
if (typeflag == FTW_F && S_ISREG(info->st_mode)) {
|
||||
parse_job_t *job = create_fs_parse_job(filepath, info, ftw->base);
|
||||
tpool_add_work(ScanCtx.pool, parse, job);
|
||||
parse_job_t *job = create_parse_job(filepath, (int) info->st_mtim.tv_sec, info->st_size);
|
||||
|
||||
tpool_add_work(ScanCtx.pool, &(job_t) {
|
||||
.type = JOB_PARSE_JOB,
|
||||
.parse_job = job
|
||||
});
|
||||
free(job);
|
||||
}
|
||||
|
||||
return FTW_CONTINUE;
|
||||
@@ -109,14 +79,7 @@ int iterate_file_list(void *input_file) {
|
||||
}
|
||||
|
||||
if (ScanCtx.exclude != NULL && EXCLUDED(absolute_path)) {
|
||||
LOG_DEBUGF("walk.c", "Excluded: %s", absolute_path)
|
||||
|
||||
if (S_ISREG(info.st_mode)) {
|
||||
pthread_mutex_lock(&ScanCtx.dbg_file_counts_mu);
|
||||
ScanCtx.dbg_excluded_files_count += 1;
|
||||
pthread_mutex_unlock(&ScanCtx.dbg_file_counts_mu);
|
||||
}
|
||||
|
||||
LOG_DEBUGF("walk.c", "Excluded: %s", absolute_path);
|
||||
continue;
|
||||
}
|
||||
|
||||
@@ -124,11 +87,14 @@ int iterate_file_list(void *input_file) {
|
||||
LOG_FATALF("walk.c", "File is not a children of root folder (%s): %s", ScanCtx.index.desc.root, buf);
|
||||
}
|
||||
|
||||
int base = (int) (strrchr(buf, '/') - buf) + 1;
|
||||
|
||||
parse_job_t *job = create_fs_parse_job(absolute_path, &info, base);
|
||||
parse_job_t *job = create_parse_job(absolute_path, (int) info.st_mtim.tv_sec, info.st_size);
|
||||
free(absolute_path);
|
||||
tpool_add_work(ScanCtx.pool, parse, job);
|
||||
|
||||
tpool_add_work(ScanCtx.pool, &(job_t) {
|
||||
.type = JOB_PARSE_JOB,
|
||||
.parse_job = job
|
||||
});
|
||||
free(job);
|
||||
}
|
||||
|
||||
return 0;
|
||||
|
||||
28
src/log.c
28
src/log.c
@@ -21,8 +21,6 @@ void vsist_logf(const char *filepath, int level, char *format, va_list ap) {
|
||||
|
||||
char log_str[LOG_MAX_LENGTH];
|
||||
|
||||
unsigned long long pid = (unsigned long long) pthread_self();
|
||||
|
||||
char datetime[32];
|
||||
time_t t;
|
||||
struct tm result;
|
||||
@@ -42,8 +40,8 @@ void vsist_logf(const char *filepath, int level, char *format, va_list ap) {
|
||||
|
||||
log_len = snprintf(
|
||||
log_str, sizeof(log_str),
|
||||
"{\"thread\":\"%04llX\",\"datetime\":\"%s\",\"level\":\"%s\",\"filepath\":%s,\"message\":%s}\n",
|
||||
pid, datetime, log_levels[level], filepath_json_str, log_str_json_str
|
||||
"{\"thread\":\"T%d\",\"datetime\":\"%s\",\"level\":\"%s\",\"filepath\":%s,\"message\":%s}\n",
|
||||
ProcData.thread_id, datetime, log_levels[level], filepath_json_str, log_str_json_str
|
||||
);
|
||||
|
||||
cJSON_Delete(filepath_json);
|
||||
@@ -58,15 +56,15 @@ void vsist_logf(const char *filepath, int level, char *format, va_list ap) {
|
||||
if (is_tty) {
|
||||
log_len = snprintf(
|
||||
log_str, sizeof(log_str),
|
||||
"\033[%dm[%04llX]%s [%s] [%s %s] ",
|
||||
31 + ((unsigned int) (pid)) % 7, pid, log_colors[level],
|
||||
"\033[%dmT%d%s [%s] [%s %s] ",
|
||||
31 + ProcData.thread_id % 7, ProcData.thread_id, log_colors[level],
|
||||
datetime, log_levels[level], filepath
|
||||
);
|
||||
} else {
|
||||
log_len = snprintf(
|
||||
log_str, sizeof(log_str),
|
||||
"[%04llX] [%s] [%s %s] ",
|
||||
pid, datetime, log_levels[level], filepath
|
||||
"T%d [%s] [%s %s] ",
|
||||
ProcData.thread_id, datetime, log_levels[level], filepath
|
||||
);
|
||||
}
|
||||
|
||||
@@ -112,8 +110,6 @@ void sist_log(const char *filepath, int level, char *str) {
|
||||
|
||||
char log_str[LOG_MAX_LENGTH];
|
||||
|
||||
unsigned long long pid = (unsigned long long) pthread_self();
|
||||
|
||||
char datetime[32];
|
||||
time_t t;
|
||||
struct tm result;
|
||||
@@ -132,8 +128,8 @@ void sist_log(const char *filepath, int level, char *str) {
|
||||
|
||||
log_len = snprintf(
|
||||
log_str, sizeof(log_str),
|
||||
"{\"thread\":\"%04llX\",\"datetime\":\"%s\",\"level\":\"%s\",\"filepath\":%s,\"message\":%s}\n",
|
||||
pid, datetime, log_levels[level], filepath_json_str, log_str_json_str
|
||||
"{\"thread\":\"T%d\",\"datetime\":\"%s\",\"level\":\"%s\",\"filepath\":%s,\"message\":%s}\n",
|
||||
ProcData.thread_id, datetime, log_levels[level], filepath_json_str, log_str_json_str
|
||||
);
|
||||
|
||||
cJSON_Delete(log_str_json);
|
||||
@@ -147,16 +143,16 @@ void sist_log(const char *filepath, int level, char *str) {
|
||||
if (is_tty) {
|
||||
log_len = snprintf(
|
||||
log_str, sizeof(log_str),
|
||||
"\033[%dm[%04llX]%s [%s] [%s %s] %s \033[0m\n",
|
||||
31 + ((unsigned int) (pid)) % 7, pid, log_colors[level],
|
||||
"\033[%dmT%d%s [%s] [%s %s] %s \033[0m\n",
|
||||
31 + ProcData.thread_id % 7, ProcData.thread_id, log_colors[level],
|
||||
datetime, log_levels[level], filepath,
|
||||
str
|
||||
);
|
||||
} else {
|
||||
log_len = snprintf(
|
||||
log_str, sizeof(log_str),
|
||||
"[%04llX] [%s] [%s %s] %s \n",
|
||||
pid, datetime, log_levels[level], filepath,
|
||||
"T%d [%s] [%s %s] %s \n",
|
||||
ProcData.thread_id, datetime, log_levels[level], filepath,
|
||||
str
|
||||
);
|
||||
}
|
||||
|
||||
44
src/log.h
44
src/log.h
@@ -2,6 +2,7 @@
|
||||
#define SIST2_LOG_H
|
||||
|
||||
|
||||
#include <signal.h>
|
||||
#define LOG_MAX_LENGTH 8192
|
||||
|
||||
#define LOG_SIST_DEBUG 0
|
||||
@@ -10,32 +11,37 @@
|
||||
#define LOG_SIST_ERROR 3
|
||||
#define LOG_SIST_FATAL 4
|
||||
|
||||
#define LOG_DEBUGF(filepath, fmt, ...) \
|
||||
if (LogCtx.very_verbose) {sist_logf(filepath, LOG_SIST_DEBUG, fmt, __VA_ARGS__);}
|
||||
#define LOG_DEBUG(filepath, str) \
|
||||
if (LogCtx.very_verbose) {sist_log(filepath, LOG_SIST_DEBUG, str);}
|
||||
#define LOG_DEBUGF(filepath, fmt, ...) do{\
|
||||
if (LogCtx.very_verbose) {sist_logf(filepath, LOG_SIST_DEBUG, fmt, __VA_ARGS__);}}while(0)
|
||||
#define LOG_DEBUG(filepath, str) do{\
|
||||
if (LogCtx.very_verbose) {sist_log(filepath, LOG_SIST_DEBUG, str);}}while(0)
|
||||
|
||||
#define LOG_INFOF(filepath, fmt, ...) \
|
||||
if (LogCtx.verbose) {sist_logf(filepath, LOG_SIST_INFO, fmt, __VA_ARGS__);}
|
||||
#define LOG_INFO(filepath, str) \
|
||||
if (LogCtx.verbose) {sist_log(filepath, LOG_SIST_INFO, str);}
|
||||
#define LOG_INFOF(filepath, fmt, ...) do {\
|
||||
if (LogCtx.verbose) {sist_logf(filepath, LOG_SIST_INFO, fmt, __VA_ARGS__);}} while(0)
|
||||
#define LOG_INFO(filepath, str) do {\
|
||||
if (LogCtx.verbose) {sist_log(filepath, LOG_SIST_INFO, str);}} while(0)
|
||||
|
||||
#define LOG_WARNINGF(filepath, fmt, ...) \
|
||||
if (LogCtx.verbose) {sist_logf(filepath, LOG_SIST_WARNING, fmt, __VA_ARGS__);}
|
||||
#define LOG_WARNING(filepath, str) \
|
||||
if (LogCtx.verbose) {sist_log(filepath, LOG_SIST_WARNING, str);}
|
||||
#define LOG_WARNINGF(filepath, fmt, ...) do {\
|
||||
if (LogCtx.verbose) {sist_logf(filepath, LOG_SIST_WARNING, fmt, __VA_ARGS__);}}while(0)
|
||||
#define LOG_WARNING(filepath, str) do{\
|
||||
if (LogCtx.verbose) {sist_log(filepath, LOG_SIST_WARNING, str);}}while(0)
|
||||
|
||||
#define LOG_ERRORF(filepath, fmt, ...) \
|
||||
if (LogCtx.verbose) {sist_logf(filepath, LOG_SIST_ERROR, fmt, __VA_ARGS__);}
|
||||
#define LOG_ERROR(filepath, str) \
|
||||
if (LogCtx.verbose) {sist_log(filepath, LOG_SIST_ERROR, str);}
|
||||
#define LOG_ERRORF(filepath, fmt, ...) do {\
|
||||
if (LogCtx.verbose) {sist_logf(filepath, LOG_SIST_ERROR, fmt, __VA_ARGS__);}}while(0)
|
||||
#define LOG_ERROR(filepath, str) do{\
|
||||
if (LogCtx.verbose) {sist_log(filepath, LOG_SIST_ERROR, str);}}while(0)
|
||||
|
||||
#define LOG_FATALF(filepath, fmt, ...) \
|
||||
#define LOG_FATALF(filepath, fmt, ...)\
|
||||
sist_logf(filepath, LOG_SIST_FATAL, fmt, __VA_ARGS__);\
|
||||
exit(-1);
|
||||
raise(SIGUSR1)
|
||||
#define LOG_FATAL(filepath, str) \
|
||||
sist_log(filepath, LOG_SIST_FATAL, str);\
|
||||
exit(-1);
|
||||
exit(SIGUSR1)
|
||||
|
||||
#define LOG_FATALF_NO_EXIT(filepath, fmt, ...) \
|
||||
sist_logf(filepath, LOG_SIST_FATAL, fmt, __VA_ARGS__)
|
||||
#define LOG_FATAL_NO_EXIT(filepath, str) \
|
||||
sist_log(filepath, LOG_SIST_FATAL, str)
|
||||
|
||||
#include "sist.h"
|
||||
|
||||
|
||||
File diff suppressed because one or more lines are too long
571
src/main.c
571
src/main.c
@@ -5,8 +5,6 @@
|
||||
#include <locale.h>
|
||||
|
||||
#include "cli.h"
|
||||
#include "io/serialize.h"
|
||||
#include "io/store.h"
|
||||
#include "tpool.h"
|
||||
#include "io/walk.h"
|
||||
#include "index/elastic.h"
|
||||
@@ -16,13 +14,9 @@
|
||||
#include "auth0/auth0_c_api.h"
|
||||
|
||||
#include <signal.h>
|
||||
#include <unistd.h>
|
||||
#include <pthread.h>
|
||||
|
||||
#include "stats.h"
|
||||
|
||||
#define DESCRIPTION "Lightning-fast file system indexer and search tool."
|
||||
|
||||
#define EPILOG "Made by simon987 <me@simon987.net>. Released under GPL-3.0"
|
||||
#include "src/database/database.h"
|
||||
|
||||
|
||||
static const char *const usage[] = {
|
||||
@@ -34,109 +28,62 @@ static const char *const usage[] = {
|
||||
};
|
||||
|
||||
|
||||
static __sighandler_t sigsegv_handler = NULL;
|
||||
static __sighandler_t sigabrt_handler = NULL;
|
||||
void database_scan_begin(scan_args_t *args) {
|
||||
index_descriptor_t *desc = &ScanCtx.index.desc;
|
||||
|
||||
void sig_handler(int signum) {
|
||||
database_t *db = database_create(args->output, INDEX_DATABASE);
|
||||
|
||||
LogCtx.verbose = TRUE;
|
||||
LogCtx.very_verbose = TRUE;
|
||||
if (args->incremental) {
|
||||
// Update existing descriptor
|
||||
database_open(db);
|
||||
index_descriptor_t *original_desc = database_read_index_descriptor(db);
|
||||
|
||||
LOG_ERROR("*SIGNAL HANDLER*", "=============================================\n\n");
|
||||
LOG_ERRORF("*SIGNAL HANDLER*", "Uh oh! Caught fatal signal: %s", strsignal(signum));
|
||||
// copy original index id
|
||||
strcpy(desc->id, original_desc->id);
|
||||
|
||||
if (ScanCtx.dbg_current_files != NULL) {
|
||||
GHashTableIter iter;
|
||||
g_hash_table_iter_init(&iter, ScanCtx.dbg_current_files);
|
||||
|
||||
void *key;
|
||||
void *value;
|
||||
while (g_hash_table_iter_next(&iter, &key, &value)) {
|
||||
parse_job_t *job = value;
|
||||
|
||||
if (isatty(STDERR_FILENO)) {
|
||||
LOG_DEBUGF(
|
||||
"*SIGNAL HANDLER*",
|
||||
"Thread \033[%dm[%04llX]\033[0m was working on job '%s'",
|
||||
31 + ((unsigned int) key) % 7, key, job->filepath
|
||||
);
|
||||
} else {
|
||||
LOG_DEBUGF(
|
||||
"*SIGNAL HANDLER*",
|
||||
"THREAD [%04llX] was working on job %s",
|
||||
key, job->filepath
|
||||
);
|
||||
}
|
||||
if (original_desc->version_major != VersionMajor) {
|
||||
LOG_FATALF("main.c", "Version mismatch! Index is %s but executable is %s", original_desc->version, Version);
|
||||
}
|
||||
}
|
||||
|
||||
if (ScanCtx.pool != NULL) {
|
||||
tpool_dump_debug_info(ScanCtx.pool);
|
||||
}
|
||||
strcpy(original_desc->root, desc->root);
|
||||
original_desc->root_len = desc->root_len;
|
||||
strcpy(original_desc->rewrite_url, desc->rewrite_url);
|
||||
strcpy(original_desc->name, desc->name);
|
||||
|
||||
if (IndexCtx.pool != NULL) {
|
||||
tpool_dump_debug_info(IndexCtx.pool);
|
||||
}
|
||||
time(&original_desc->timestamp);
|
||||
|
||||
LOG_INFO(
|
||||
"*SIGNAL HANDLER*",
|
||||
"Please consider creating a bug report at https://github.com/simon987/sist2/issues !"
|
||||
)
|
||||
LOG_INFO(
|
||||
"*SIGNAL HANDLER*",
|
||||
"sist2 is an open source project and relies on the collaboration of its users to diagnose and fix bugs"
|
||||
)
|
||||
database_write_index_descriptor(db, original_desc);
|
||||
free(original_desc);
|
||||
|
||||
#ifndef SIST_DEBUG
|
||||
LOG_WARNING(
|
||||
"*SIGNAL HANDLER*",
|
||||
"You are running sist2 in release mode! Please consider downloading the debug binary from the Github "
|
||||
"releases page to provide additionnal information when submitting a bug report."
|
||||
)
|
||||
#endif
|
||||
database_incremental_scan_begin(db);
|
||||
|
||||
if (signum == SIGSEGV && sigsegv_handler != NULL) {
|
||||
sigsegv_handler(signum);
|
||||
} else if (signum == SIGABRT && sigabrt_handler != NULL) {
|
||||
sigabrt_handler(signum);
|
||||
}
|
||||
|
||||
exit(-1);
|
||||
}
|
||||
|
||||
void init_dir(const char *dirpath, scan_args_t *args) {
|
||||
char path[PATH_MAX];
|
||||
snprintf(path, PATH_MAX, "%sdescriptor.json", dirpath);
|
||||
|
||||
time(&ScanCtx.index.desc.timestamp);
|
||||
strcpy(ScanCtx.index.desc.version, Version);
|
||||
strcpy(ScanCtx.index.desc.type, INDEX_TYPE_NDJSON);
|
||||
|
||||
if (args->incremental != NULL) {
|
||||
// copy old index id
|
||||
char descriptor_path[PATH_MAX];
|
||||
snprintf(descriptor_path, PATH_MAX, "%sdescriptor.json", args->incremental);
|
||||
index_descriptor_t original_desc = read_index_descriptor(descriptor_path);
|
||||
memcpy(ScanCtx.index.desc.id, original_desc.id, sizeof(original_desc.id));
|
||||
} else {
|
||||
// Create new descriptor
|
||||
|
||||
time(&desc->timestamp);
|
||||
strcpy(desc->version, Version);
|
||||
desc->version_major = VersionMajor;
|
||||
desc->version_minor = VersionMinor;
|
||||
desc->version_patch = VersionPatch;
|
||||
|
||||
// generate new index id based on timestamp
|
||||
unsigned char index_md5[MD5_DIGEST_LENGTH];
|
||||
MD5((unsigned char *) &ScanCtx.index.desc.timestamp, sizeof(ScanCtx.index.desc.timestamp), index_md5);
|
||||
buf2hex(index_md5, MD5_DIGEST_LENGTH, ScanCtx.index.desc.id);
|
||||
|
||||
database_initialize(db);
|
||||
database_open(db);
|
||||
database_write_index_descriptor(db, desc);
|
||||
}
|
||||
|
||||
write_index_descriptor(path, &ScanCtx.index.desc);
|
||||
database_close(db, FALSE);
|
||||
}
|
||||
|
||||
void scan_print_header() {
|
||||
LOG_INFOF("main.c", "sist2 v%s", Version)
|
||||
void write_thumbnail_callback(char *key, int num, void *buf, size_t buf_len) {
|
||||
database_write_thumbnail(ProcData.index_db, key, num, buf, buf_len);
|
||||
}
|
||||
|
||||
void _store(char *key, size_t key_len, char *buf, size_t buf_len) {
|
||||
store_write(ScanCtx.index.store, key, key_len, buf, buf_len);
|
||||
}
|
||||
|
||||
void _log(const char *filepath, int level, char *str) {
|
||||
void log_callback(const char *filepath, int level, char *str) {
|
||||
if (level == LEVEL_FATAL) {
|
||||
sist_log(filepath, level, str);
|
||||
exit(-1);
|
||||
@@ -153,7 +100,7 @@ void _log(const char *filepath, int level, char *str) {
|
||||
}
|
||||
}
|
||||
|
||||
void _logf(const char *filepath, int level, char *format, ...) {
|
||||
void logf_callback(const char *filepath, int level, char *format, ...) {
|
||||
|
||||
va_list args;
|
||||
|
||||
@@ -177,17 +124,12 @@ void _logf(const char *filepath, int level, char *format, ...) {
|
||||
|
||||
void initialize_scan_context(scan_args_t *args) {
|
||||
|
||||
ScanCtx.dbg_current_files = g_hash_table_new_full(g_int64_hash, g_int64_equal, NULL, NULL);
|
||||
pthread_mutex_init(&ScanCtx.dbg_current_files_mu, NULL);
|
||||
pthread_mutex_init(&ScanCtx.dbg_file_counts_mu, NULL);
|
||||
pthread_mutex_init(&ScanCtx.copy_table_mu, NULL);
|
||||
|
||||
ScanCtx.calculate_checksums = args->calculate_checksums;
|
||||
|
||||
// Archive
|
||||
ScanCtx.arc_ctx.mode = args->archive_mode;
|
||||
ScanCtx.arc_ctx.log = _log;
|
||||
ScanCtx.arc_ctx.logf = _logf;
|
||||
ScanCtx.arc_ctx.log = log_callback;
|
||||
ScanCtx.arc_ctx.logf = logf_callback;
|
||||
ScanCtx.arc_ctx.parse = (parse_callback_t) parse;
|
||||
if (args->archive_passphrase != NULL) {
|
||||
strcpy(ScanCtx.arc_ctx.passphrase, args->archive_passphrase);
|
||||
@@ -196,17 +138,16 @@ void initialize_scan_context(scan_args_t *args) {
|
||||
}
|
||||
|
||||
// Comic
|
||||
ScanCtx.comic_ctx.log = _log;
|
||||
ScanCtx.comic_ctx.logf = _logf;
|
||||
ScanCtx.comic_ctx.store = _store;
|
||||
ScanCtx.comic_ctx.log = log_callback;
|
||||
ScanCtx.comic_ctx.logf = logf_callback;
|
||||
ScanCtx.comic_ctx.store = write_thumbnail_callback;
|
||||
ScanCtx.comic_ctx.enable_tn = args->tn_count > 0;
|
||||
ScanCtx.comic_ctx.tn_size = args->tn_size;
|
||||
ScanCtx.comic_ctx.tn_qscale = args->tn_quality;
|
||||
ScanCtx.comic_ctx.cbr_mime = mime_get_mime_by_string(ScanCtx.mime_table, "application/x-cbr");
|
||||
ScanCtx.comic_ctx.cbz_mime = mime_get_mime_by_string(ScanCtx.mime_table, "application/x-cbz");
|
||||
ScanCtx.comic_ctx.cbr_mime = mime_get_mime_by_string("application/x-cbr");
|
||||
ScanCtx.comic_ctx.cbz_mime = mime_get_mime_by_string("application/x-cbz");
|
||||
|
||||
// Ebook
|
||||
pthread_mutex_init(&ScanCtx.ebook_ctx.mupdf_mutex, NULL);
|
||||
ScanCtx.ebook_ctx.content_size = args->content_size;
|
||||
ScanCtx.ebook_ctx.enable_tn = args->tn_count > 0;
|
||||
ScanCtx.ebook_ctx.tn_size = args->tn_size;
|
||||
@@ -214,25 +155,25 @@ void initialize_scan_context(scan_args_t *args) {
|
||||
ScanCtx.ebook_ctx.tesseract_lang = args->tesseract_lang;
|
||||
ScanCtx.ebook_ctx.tesseract_path = args->tesseract_path;
|
||||
}
|
||||
ScanCtx.ebook_ctx.log = _log;
|
||||
ScanCtx.ebook_ctx.logf = _logf;
|
||||
ScanCtx.ebook_ctx.store = _store;
|
||||
ScanCtx.ebook_ctx.log = log_callback;
|
||||
ScanCtx.ebook_ctx.logf = logf_callback;
|
||||
ScanCtx.ebook_ctx.store = write_thumbnail_callback;
|
||||
ScanCtx.ebook_ctx.fast_epub_parse = args->fast_epub;
|
||||
ScanCtx.ebook_ctx.tn_qscale = args->tn_quality;
|
||||
|
||||
// Font
|
||||
ScanCtx.font_ctx.enable_tn = args->tn_count > 0;
|
||||
ScanCtx.font_ctx.log = _log;
|
||||
ScanCtx.font_ctx.logf = _logf;
|
||||
ScanCtx.font_ctx.store = _store;
|
||||
ScanCtx.font_ctx.log = log_callback;
|
||||
ScanCtx.font_ctx.logf = logf_callback;
|
||||
ScanCtx.font_ctx.store = write_thumbnail_callback;
|
||||
|
||||
// Media
|
||||
ScanCtx.media_ctx.tn_qscale = args->tn_quality;
|
||||
ScanCtx.media_ctx.tn_size = args->tn_size;
|
||||
ScanCtx.media_ctx.tn_count = args->tn_count;
|
||||
ScanCtx.media_ctx.log = _log;
|
||||
ScanCtx.media_ctx.logf = _logf;
|
||||
ScanCtx.media_ctx.store = _store;
|
||||
ScanCtx.media_ctx.log = log_callback;
|
||||
ScanCtx.media_ctx.logf = logf_callback;
|
||||
ScanCtx.media_ctx.store = write_thumbnail_callback;
|
||||
ScanCtx.media_ctx.max_media_buffer = (long) args->max_memory_buffer_mib * 1024 * 1024;
|
||||
ScanCtx.media_ctx.read_subtitles = args->read_subtitles;
|
||||
ScanCtx.media_ctx.read_subtitles = args->tn_count;
|
||||
@@ -246,32 +187,33 @@ void initialize_scan_context(scan_args_t *args) {
|
||||
// OOXML
|
||||
ScanCtx.ooxml_ctx.enable_tn = args->tn_count > 0;
|
||||
ScanCtx.ooxml_ctx.content_size = args->content_size;
|
||||
ScanCtx.ooxml_ctx.log = _log;
|
||||
ScanCtx.ooxml_ctx.logf = _logf;
|
||||
ScanCtx.ooxml_ctx.store = _store;
|
||||
ScanCtx.ooxml_ctx.log = log_callback;
|
||||
ScanCtx.ooxml_ctx.logf = logf_callback;
|
||||
ScanCtx.ooxml_ctx.store = write_thumbnail_callback;
|
||||
|
||||
// MOBI
|
||||
ScanCtx.mobi_ctx.content_size = args->content_size;
|
||||
ScanCtx.mobi_ctx.log = _log;
|
||||
ScanCtx.mobi_ctx.logf = _logf;
|
||||
ScanCtx.mobi_ctx.log = log_callback;
|
||||
ScanCtx.mobi_ctx.logf = logf_callback;
|
||||
ScanCtx.mobi_ctx.store = write_thumbnail_callback;
|
||||
ScanCtx.mobi_ctx.enable_tn = args->tn_count > 0;
|
||||
ScanCtx.mobi_ctx.tn_size = args->tn_size;
|
||||
ScanCtx.mobi_ctx.tn_qscale = args->tn_quality;
|
||||
|
||||
// TEXT
|
||||
ScanCtx.text_ctx.content_size = args->content_size;
|
||||
ScanCtx.text_ctx.log = _log;
|
||||
ScanCtx.text_ctx.logf = _logf;
|
||||
ScanCtx.text_ctx.log = log_callback;
|
||||
ScanCtx.text_ctx.logf = logf_callback;
|
||||
|
||||
// MSDOC
|
||||
ScanCtx.msdoc_ctx.enable_tn = args->tn_count > 0;
|
||||
ScanCtx.msdoc_ctx.tn_size = args->tn_size;
|
||||
ScanCtx.msdoc_ctx.content_size = args->content_size;
|
||||
ScanCtx.msdoc_ctx.log = _log;
|
||||
ScanCtx.msdoc_ctx.logf = _logf;
|
||||
ScanCtx.msdoc_ctx.store = _store;
|
||||
ScanCtx.msdoc_ctx.msdoc_mime = mime_get_mime_by_string(ScanCtx.mime_table, "application/msword");
|
||||
ScanCtx.msdoc_ctx.log = log_callback;
|
||||
ScanCtx.msdoc_ctx.logf = logf_callback;
|
||||
ScanCtx.msdoc_ctx.store = write_thumbnail_callback;
|
||||
ScanCtx.msdoc_ctx.msdoc_mime = mime_get_mime_by_string("application/msword");
|
||||
|
||||
ScanCtx.threads = args->threads;
|
||||
ScanCtx.depth = args->depth;
|
||||
ScanCtx.mem_limit = (size_t) args->scan_mem_limit_mib * 1024 * 1024;
|
||||
|
||||
strncpy(ScanCtx.index.path, args->output, sizeof(ScanCtx.index.path));
|
||||
strncpy(ScanCtx.index.desc.name, args->name, sizeof(ScanCtx.index.desc.name));
|
||||
@@ -284,176 +226,66 @@ void initialize_scan_context(scan_args_t *args) {
|
||||
ScanCtx.raw_ctx.tn_qscale = args->tn_quality;
|
||||
ScanCtx.raw_ctx.enable_tn = args->tn_count > 0;
|
||||
ScanCtx.raw_ctx.tn_size = args->tn_size;
|
||||
ScanCtx.raw_ctx.log = _log;
|
||||
ScanCtx.raw_ctx.logf = _logf;
|
||||
ScanCtx.raw_ctx.store = _store;
|
||||
ScanCtx.raw_ctx.log = log_callback;
|
||||
ScanCtx.raw_ctx.logf = logf_callback;
|
||||
ScanCtx.raw_ctx.store = write_thumbnail_callback;
|
||||
|
||||
// Wpd
|
||||
ScanCtx.wpd_ctx.content_size = args->content_size;
|
||||
ScanCtx.wpd_ctx.log = _log;
|
||||
ScanCtx.wpd_ctx.logf = _logf;
|
||||
ScanCtx.wpd_ctx.wpd_mime = mime_get_mime_by_string(ScanCtx.mime_table, "application/wordperfect");
|
||||
ScanCtx.wpd_ctx.log = log_callback;
|
||||
ScanCtx.wpd_ctx.logf = logf_callback;
|
||||
ScanCtx.wpd_ctx.wpd_mime = mime_get_mime_by_string("application/wordperfect");
|
||||
|
||||
// Json
|
||||
ScanCtx.json_ctx.content_size = args->content_size;
|
||||
ScanCtx.json_ctx.log = _log;
|
||||
ScanCtx.json_ctx.logf = _logf;
|
||||
ScanCtx.json_ctx.json_mime = mime_get_mime_by_string(ScanCtx.mime_table, "application/json");
|
||||
ScanCtx.json_ctx.ndjson_mime = mime_get_mime_by_string(ScanCtx.mime_table, "application/ndjson");
|
||||
ScanCtx.json_ctx.log = log_callback;
|
||||
ScanCtx.json_ctx.logf = logf_callback;
|
||||
ScanCtx.json_ctx.json_mime = mime_get_mime_by_string("application/json");
|
||||
ScanCtx.json_ctx.ndjson_mime = mime_get_mime_by_string("application/ndjson");
|
||||
}
|
||||
|
||||
/**
|
||||
* Loads an existing index as the baseline for incremental scanning.
|
||||
* 1. load old index files (original+main) => original_table
|
||||
* 2. allocate empty table => copy_table
|
||||
* 3. allocate empty table => new_table
|
||||
* the original_table/copy_table/new_table will be populated in parsing/parse.c:parse
|
||||
* and consumed in main.c:save_incremental_index
|
||||
*
|
||||
* Note: the existing index may or may not be of incremental index form.
|
||||
*/
|
||||
void load_incremental_index(const scan_args_t *args) {
|
||||
char file_path[PATH_MAX];
|
||||
|
||||
ScanCtx.original_table = incremental_get_table();
|
||||
ScanCtx.copy_table = incremental_get_table();
|
||||
ScanCtx.new_table = incremental_get_table();
|
||||
|
||||
char descriptor_path[PATH_MAX];
|
||||
snprintf(descriptor_path, PATH_MAX, "%sdescriptor.json", args->incremental);
|
||||
index_descriptor_t original_desc = read_index_descriptor(descriptor_path);
|
||||
|
||||
if (strcmp(original_desc.version, Version) != 0) {
|
||||
LOG_FATALF("main.c", "Version mismatch! Index is %s but executable is %s", original_desc.version, Version)
|
||||
}
|
||||
|
||||
READ_INDICES(
|
||||
file_path,
|
||||
args->incremental,
|
||||
incremental_read(ScanCtx.original_table, file_path, &original_desc),
|
||||
LOG_DEBUG("main.c", "The base index for incremental scan does not have a main index"),
|
||||
TRUE
|
||||
);
|
||||
|
||||
LOG_INFOF("main.c", "Loaded %d items in to mtime table.", g_hash_table_size(ScanCtx.original_table))
|
||||
}
|
||||
|
||||
/**
|
||||
* Saves an incremental index.
|
||||
* Before calling this function, the scanner should have finished writing the main index.
|
||||
* 1. Build original_table - new_table => delete_table
|
||||
* 2. Incrementally copy from old index files [(original+main) /\ copy_table] => index_original.ndjson.zst & store
|
||||
*/
|
||||
void save_incremental_index(scan_args_t *args) {
|
||||
char dst_path[PATH_MAX];
|
||||
char store_path[PATH_MAX];
|
||||
char file_path[PATH_MAX];
|
||||
char del_path[PATH_MAX];
|
||||
snprintf(store_path, PATH_MAX, "%sthumbs", args->incremental);
|
||||
snprintf(dst_path, PATH_MAX, "%s_index_original.ndjson.zst", ScanCtx.index.path);
|
||||
store_t *source = store_create(store_path, STORE_SIZE_TN);
|
||||
|
||||
LOG_INFOF("main.c", "incremental_delete: original size = %u, copy size = %u, new size = %u",
|
||||
g_hash_table_size(ScanCtx.original_table),
|
||||
g_hash_table_size(ScanCtx.copy_table),
|
||||
g_hash_table_size(ScanCtx.new_table));
|
||||
snprintf(del_path, PATH_MAX, "%s_index_delete.list.zst", ScanCtx.index.path);
|
||||
READ_INDICES(file_path, args->incremental,
|
||||
incremental_delete(del_path, file_path, ScanCtx.copy_table, ScanCtx.new_table),
|
||||
perror("incremental_delete"), 1);
|
||||
writer_cleanup();
|
||||
|
||||
READ_INDICES(file_path, args->incremental,
|
||||
incremental_copy(source, ScanCtx.index.store, file_path, dst_path, ScanCtx.copy_table),
|
||||
perror("incremental_copy"), 1);
|
||||
writer_cleanup();
|
||||
|
||||
store_destroy(source);
|
||||
|
||||
snprintf(store_path, PATH_MAX, "%stags", args->incremental);
|
||||
snprintf(dst_path, PATH_MAX, "%stags", ScanCtx.index.path);
|
||||
store_t *source_tags = store_create(store_path, STORE_SIZE_TAG);
|
||||
store_copy(source_tags, dst_path);
|
||||
store_destroy(source_tags);
|
||||
}
|
||||
|
||||
/**
|
||||
* An index can be either incremental or non-incremental (initial index).
|
||||
* For an initial index, there is only the "main" index.
|
||||
* For an incremental index, there are, additionally:
|
||||
* - An "original" index, referencing all files unchanged since the previous index.
|
||||
* - A "delete" index, referencing all files that exist in the previous index, but deleted since then.
|
||||
* Therefore, for an incremental index, "main"+"original" covers all the current files in the live filesystem,
|
||||
* and is orthognal with the "delete" index. When building an incremental index upon an old incremental index,
|
||||
* the old "delete" index can be safely ignored.
|
||||
*/
|
||||
void sist2_scan(scan_args_t *args) {
|
||||
|
||||
ScanCtx.mime_table = mime_get_mime_table();
|
||||
ScanCtx.ext_table = mime_get_ext_table();
|
||||
|
||||
initialize_scan_context(args);
|
||||
|
||||
init_dir(ScanCtx.index.path, args);
|
||||
database_scan_begin(args);
|
||||
|
||||
char store_path[PATH_MAX];
|
||||
snprintf(store_path, PATH_MAX, "%sthumbs", ScanCtx.index.path);
|
||||
ScanCtx.index.store = store_create(store_path, STORE_SIZE_TN);
|
||||
LOG_INFOF("main.c", "sist2 v%s", Version);
|
||||
|
||||
snprintf(store_path, PATH_MAX, "%smeta", ScanCtx.index.path);
|
||||
ScanCtx.index.meta_store = store_create(store_path, STORE_SIZE_META);
|
||||
|
||||
scan_print_header();
|
||||
|
||||
if (args->incremental != NULL) {
|
||||
load_incremental_index(args);
|
||||
}
|
||||
|
||||
ScanCtx.pool = tpool_create(ScanCtx.threads, thread_cleanup, TRUE, TRUE, ScanCtx.mem_limit);
|
||||
ScanCtx.pool = tpool_create(ScanCtx.threads, TRUE);
|
||||
tpool_start(ScanCtx.pool);
|
||||
|
||||
ScanCtx.writer_pool = tpool_create(1, writer_cleanup, TRUE, FALSE, 0);
|
||||
tpool_start(ScanCtx.writer_pool);
|
||||
|
||||
if (args->list_path) {
|
||||
// Scan using file list
|
||||
int list_ret = iterate_file_list(args->list_file);
|
||||
if (list_ret != 0) {
|
||||
LOG_FATALF("main.c", "iterate_file_list() failed! (%d)", list_ret)
|
||||
LOG_FATALF("main.c", "iterate_file_list() failed! (%d)", list_ret);
|
||||
}
|
||||
} else {
|
||||
// Scan directory recursively
|
||||
int walk_ret = walk_directory_tree(ScanCtx.index.desc.root);
|
||||
if (walk_ret == -1) {
|
||||
LOG_FATALF("main.c", "walk_directory_tree() failed! %s (%d)", strerror(errno), errno)
|
||||
LOG_FATALF("main.c", "walk_directory_tree() failed! %s (%d)", strerror(errno), errno);
|
||||
}
|
||||
}
|
||||
|
||||
tpool_wait(ScanCtx.pool);
|
||||
tpool_destroy(ScanCtx.pool);
|
||||
|
||||
tpool_wait(ScanCtx.writer_pool);
|
||||
tpool_destroy(ScanCtx.writer_pool);
|
||||
LOG_DEBUGF("main.c", "Thumbnail store size: %lu", ScanCtx.stat_tn_size);
|
||||
LOG_DEBUGF("main.c", "Index size: %lu", ScanCtx.stat_index_size);
|
||||
|
||||
LOG_DEBUGF("main.c", "Skipped files: %d", ScanCtx.dbg_skipped_files_count)
|
||||
LOG_DEBUGF("main.c", "Excluded files: %d", ScanCtx.dbg_excluded_files_count)
|
||||
LOG_DEBUGF("main.c", "Failed files: %d", ScanCtx.dbg_failed_files_count)
|
||||
LOG_DEBUGF("main.c", "Thumbnail store size: %lu", ScanCtx.stat_tn_size)
|
||||
LOG_DEBUGF("main.c", "Index size: %lu", ScanCtx.stat_index_size)
|
||||
database_t *db = database_create(args->output, INDEX_DATABASE);
|
||||
database_open(db);
|
||||
|
||||
if (args->incremental != NULL) {
|
||||
save_incremental_index(args);
|
||||
if (args->incremental != FALSE) {
|
||||
database_incremental_scan_end(db);
|
||||
}
|
||||
|
||||
generate_stats(&ScanCtx.index, args->treemap_threshold, ScanCtx.index.path);
|
||||
|
||||
store_destroy(ScanCtx.index.store);
|
||||
store_destroy(ScanCtx.index.meta_store);
|
||||
database_generate_stats(db, args->treemap_threshold);
|
||||
database_close(db, args->optimize_database);
|
||||
}
|
||||
|
||||
void sist2_index(index_args_t *args) {
|
||||
char file_path[PATH_MAX];
|
||||
|
||||
IndexCtx.es_url = args->es_url;
|
||||
IndexCtx.es_index = args->es_index;
|
||||
IndexCtx.es_insecure_ssl = args->es_insecure_ssl;
|
||||
@@ -464,91 +296,78 @@ void sist2_index(index_args_t *args) {
|
||||
elastic_init(args->force_reset, args->es_mappings, args->es_settings);
|
||||
}
|
||||
|
||||
char descriptor_path[PATH_MAX];
|
||||
snprintf(descriptor_path, PATH_MAX, "%sdescriptor.json", args->index_path);
|
||||
database_t *db = database_create(args->index_path, INDEX_DATABASE);
|
||||
database_open(db);
|
||||
index_descriptor_t *desc = database_read_index_descriptor(db);
|
||||
database_close(db, FALSE);
|
||||
|
||||
index_descriptor_t desc = read_index_descriptor(descriptor_path);
|
||||
LOG_DEBUGF("main.c", "Index version %s", desc->version);
|
||||
|
||||
LOG_DEBUGF("main.c", "descriptor version %s (%s)", desc.version, desc.type)
|
||||
|
||||
if (strcmp(desc.version, Version) != 0) {
|
||||
LOG_FATALF("main.c", "Version mismatch! Index is %s but executable is %s", desc.version, Version)
|
||||
if (desc->version_major != VersionMajor) {
|
||||
LOG_FATALF("main.c", "Version mismatch! Index is %s but executable is %s", desc->version, Version);
|
||||
}
|
||||
|
||||
DIR *dir = opendir(args->index_path);
|
||||
if (dir == NULL) {
|
||||
LOG_FATALF("main.c", "Could not open index %s: %s", args->index_path, strerror(errno))
|
||||
}
|
||||
|
||||
char path_tmp[PATH_MAX];
|
||||
snprintf(path_tmp, sizeof(path_tmp), "%stags", args->index_path);
|
||||
IndexCtx.tag_store = store_create(path_tmp, STORE_SIZE_TAG);
|
||||
IndexCtx.tags = store_read_all(IndexCtx.tag_store);
|
||||
|
||||
snprintf(path_tmp, sizeof(path_tmp), "%smeta", args->index_path);
|
||||
IndexCtx.meta_store = store_create(path_tmp, STORE_SIZE_META);
|
||||
IndexCtx.meta = store_read_all(IndexCtx.meta_store);
|
||||
|
||||
index_func f;
|
||||
if (args->print) {
|
||||
f = print_json;
|
||||
} else {
|
||||
f = index_json;
|
||||
}
|
||||
|
||||
IndexCtx.pool = tpool_create(args->threads, elastic_cleanup, FALSE, args->print == 0, 0);
|
||||
IndexCtx.pool = tpool_create(args->threads, args->print == FALSE);
|
||||
tpool_start(IndexCtx.pool);
|
||||
|
||||
READ_INDICES(file_path, args->index_path, {
|
||||
read_index(file_path, desc.id, desc.type, f);
|
||||
LOG_DEBUGF("main.c", "Read index file %s (%s)", file_path, desc.type);
|
||||
}, {}, !args->incremental);
|
||||
int cnt = 0;
|
||||
|
||||
// Only read the _delete index if we're sending data to ES
|
||||
if (!args->print) {
|
||||
snprintf(file_path, PATH_MAX, "%s_index_delete.list.zst", args->index_path);
|
||||
if (0 == access(file_path, R_OK)) {
|
||||
read_lines(file_path, (line_processor_t) {
|
||||
.data = NULL,
|
||||
.func = delete_document
|
||||
});
|
||||
LOG_DEBUGF("main.c", "Read index file %s (%s)", file_path, desc.type)
|
||||
db = database_create(args->index_path, INDEX_DATABASE);
|
||||
database_open(db);
|
||||
database_iterator_t *iterator = database_create_document_iterator(db);
|
||||
database_document_iter_foreach(json, iterator) {
|
||||
char doc_id[SIST_DOC_ID_LEN];
|
||||
strcpy(doc_id, cJSON_GetObjectItem(json, "_id")->valuestring);
|
||||
cJSON_DeleteItemFromObject(json, "_id");
|
||||
|
||||
if (args->print) {
|
||||
print_json(json, doc_id);
|
||||
} else {
|
||||
index_json(json, doc_id);
|
||||
cnt += 1;
|
||||
}
|
||||
cJSON_Delete(json);
|
||||
}
|
||||
|
||||
closedir(dir);
|
||||
free(iterator);
|
||||
|
||||
if (!args->print) {
|
||||
database_iterator_t *del_iter = database_create_delete_list_iterator(db);
|
||||
database_delete_list_iter_foreach(id, del_iter) {
|
||||
delete_document(id);
|
||||
free(id);
|
||||
}
|
||||
free(del_iter);
|
||||
}
|
||||
|
||||
database_close(db, FALSE);
|
||||
|
||||
tpool_wait(IndexCtx.pool);
|
||||
|
||||
tpool_destroy(IndexCtx.pool);
|
||||
|
||||
if (IndexCtx.needs_es_connection) {
|
||||
finish_indexer(args->script, args->async_script, desc.id);
|
||||
finish_indexer(args->script, args->async_script, desc->id);
|
||||
}
|
||||
|
||||
store_destroy(IndexCtx.tag_store);
|
||||
store_destroy(IndexCtx.meta_store);
|
||||
g_hash_table_remove_all(IndexCtx.tags);
|
||||
g_hash_table_destroy(IndexCtx.tags);
|
||||
free(desc);
|
||||
}
|
||||
|
||||
void sist2_exec_script(exec_args_t *args) {
|
||||
|
||||
LogCtx.verbose = TRUE;
|
||||
|
||||
char descriptor_path[PATH_MAX];
|
||||
snprintf(descriptor_path, PATH_MAX, "%sdescriptor.json", args->index_path);
|
||||
index_descriptor_t desc = read_index_descriptor(descriptor_path);
|
||||
|
||||
IndexCtx.es_url = args->es_url;
|
||||
IndexCtx.es_index = args->es_index;
|
||||
IndexCtx.es_insecure_ssl = args->es_insecure_ssl;
|
||||
IndexCtx.needs_es_connection = TRUE;
|
||||
|
||||
LOG_DEBUGF("main.c", "descriptor version %s (%s)", desc.version, desc.type)
|
||||
database_t *db = database_create(args->index_path, INDEX_DATABASE);
|
||||
database_open(db);
|
||||
|
||||
execute_update_script(args->script, args->async_script, desc.id);
|
||||
index_descriptor_t *desc = database_read_index_descriptor(db);
|
||||
LOG_DEBUGF("main.c", "Index version %s", desc->version);
|
||||
|
||||
execute_update_script(args->script, args->async_script, desc->id);
|
||||
free(args->script);
|
||||
database_close(db, FALSE);
|
||||
}
|
||||
|
||||
void sist2_web(web_args_t *args) {
|
||||
@@ -572,23 +391,17 @@ void sist2_web(web_args_t *args) {
|
||||
|
||||
for (int i = 0; i < args->index_count; i++) {
|
||||
char *abs_path = abspath(args->indices[i]);
|
||||
if (abs_path == NULL) {
|
||||
return;
|
||||
}
|
||||
char path_tmp[PATH_MAX];
|
||||
|
||||
snprintf(path_tmp, PATH_MAX, "%sthumbs", abs_path);
|
||||
WebCtx.indices[i].store = store_create(path_tmp, STORE_SIZE_TN);
|
||||
|
||||
snprintf(path_tmp, PATH_MAX, "%stags", abs_path);
|
||||
mkdir(path_tmp, S_IWUSR | S_IRUSR | S_IXUSR);
|
||||
WebCtx.indices[i].tag_store = store_create(path_tmp, STORE_SIZE_TAG);
|
||||
|
||||
snprintf(path_tmp, PATH_MAX, "%sdescriptor.json", abs_path);
|
||||
WebCtx.indices[i].desc = read_index_descriptor(path_tmp);
|
||||
|
||||
strcpy(WebCtx.indices[i].path, abs_path);
|
||||
LOG_INFOF("main.c", "Loaded index: [%s]", WebCtx.indices[i].desc.name)
|
||||
|
||||
WebCtx.indices[i].db = database_create(abs_path, INDEX_DATABASE);
|
||||
database_open(WebCtx.indices[i].db);
|
||||
|
||||
index_descriptor_t *desc = database_read_index_descriptor(WebCtx.indices[i].db);
|
||||
WebCtx.indices[i].desc = *desc;
|
||||
free(desc);
|
||||
|
||||
LOG_INFOF("main.c", "Loaded index: [%s]", WebCtx.indices[i].desc.name);
|
||||
free(abs_path);
|
||||
}
|
||||
|
||||
@@ -603,7 +416,7 @@ void sist2_web(web_args_t *args) {
|
||||
* Negative number -> Raise error
|
||||
* Specified a valid number -> Continue as normal
|
||||
*/
|
||||
int set_to_negative_if_value_is_zero(struct argparse *self, const struct argparse_option *option) {
|
||||
int set_to_negative_if_value_is_zero(UNUSED(struct argparse *self), const struct argparse_option *option) {
|
||||
int specified_value = *(int *) option->value;
|
||||
|
||||
if (specified_value == 0) {
|
||||
@@ -616,11 +429,7 @@ int set_to_negative_if_value_is_zero(struct argparse *self, const struct argpars
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
int main(int argc, const char *argv[]) {
|
||||
sigsegv_handler = signal(SIGSEGV, sig_handler);
|
||||
sigabrt_handler = signal(SIGABRT, sig_handler);
|
||||
|
||||
setlocale(LC_ALL, "");
|
||||
|
||||
scan_args_t *scan_args = scan_args_create();
|
||||
@@ -640,38 +449,37 @@ int main(int argc, const char *argv[]) {
|
||||
struct argparse_option options[] = {
|
||||
OPT_HELP(),
|
||||
|
||||
OPT_BOOLEAN('v', "version", &arg_version, "Show version and exit"),
|
||||
OPT_BOOLEAN(0, "verbose", &LogCtx.verbose, "Turn on logging"),
|
||||
OPT_BOOLEAN(0, "very-verbose", &LogCtx.very_verbose, "Turn on debug messages"),
|
||||
OPT_BOOLEAN('v', "version", &arg_version, "Print version and exit."),
|
||||
OPT_BOOLEAN(0, "verbose", &LogCtx.verbose, "Turn on logging."),
|
||||
OPT_BOOLEAN(0, "very-verbose", &LogCtx.very_verbose, "Turn on debug messages."),
|
||||
OPT_BOOLEAN(0, "json-logs", &LogCtx.json_logs, "Output logs in JSON format."),
|
||||
|
||||
OPT_GROUP("Scan options"),
|
||||
OPT_INTEGER('t', "threads", &common_threads, "Number of threads. DEFAULT=1"),
|
||||
OPT_INTEGER(0, "mem-throttle", &scan_args->scan_mem_limit_mib,
|
||||
"Total memory threshold in MiB for scan throttling. DEFAULT=0",
|
||||
set_to_negative_if_value_is_zero, (intptr_t) &scan_args->scan_mem_limit_mib),
|
||||
OPT_FLOAT('q', "thumbnail-quality", &scan_args->tn_quality,
|
||||
"Thumbnail quality, on a scale of 1.0 to 31.0, 1.0 being the best. DEFAULT=1",
|
||||
set_to_negative_if_value_is_zero, (intptr_t) &scan_args->tn_quality),
|
||||
OPT_INTEGER('t', "threads", &common_threads, "Number of threads. DEFAULT: 1"),
|
||||
OPT_INTEGER('q', "thumbnail-quality", &scan_args->tn_quality,
|
||||
"Thumbnail quality, on a scale of 2 to 31, 2 being the best. DEFAULT: 2",
|
||||
set_to_negative_if_value_is_zero, (intptr_t) &scan_args->tn_quality),
|
||||
OPT_INTEGER(0, "thumbnail-size", &scan_args->tn_size,
|
||||
"Thumbnail size, in pixels. DEFAULT=500",
|
||||
"Thumbnail size, in pixels. DEFAULT: 552",
|
||||
set_to_negative_if_value_is_zero, (intptr_t) &scan_args->tn_size),
|
||||
OPT_INTEGER(0, "thumbnail-count", &scan_args->tn_count,
|
||||
"Number of thumbnails to generate. Set a value > 1 to create video previews, set to 0 to disable thumbnails. DEFAULT=1",
|
||||
"Number of thumbnails to generate. Set a value > 1 to create video previews, set to 0 to disable thumbnails. DEFAULT: 1",
|
||||
set_to_negative_if_value_is_zero, (intptr_t) &scan_args->tn_count),
|
||||
OPT_INTEGER(0, "content-size", &scan_args->content_size,
|
||||
"Number of bytes to be extracted from text documents. Set to 0 to disable. DEFAULT=32768",
|
||||
"Number of bytes to be extracted from text documents. Set to 0 to disable. DEFAULT: 32768",
|
||||
set_to_negative_if_value_is_zero, (intptr_t) &scan_args->content_size),
|
||||
OPT_STRING(0, "incremental", &scan_args->incremental,
|
||||
"Reuse an existing index and only scan modified files."),
|
||||
OPT_STRING('o', "output", &scan_args->output, "Output directory. DEFAULT=index.sist2/"),
|
||||
OPT_STRING('o', "output", &scan_args->output, "Output index file path. DEFAULT: index.sist2"),
|
||||
OPT_BOOLEAN(0, "incremental", &scan_args->incremental,
|
||||
"If the output file path exists, only scan new or modified files."),
|
||||
OPT_BOOLEAN(0, "optimize-index", &scan_args->optimize_database,
|
||||
"Defragment index file after scan to reduce its file size."),
|
||||
OPT_STRING(0, "rewrite-url", &scan_args->rewrite_url, "Serve files from this url instead of from disk."),
|
||||
OPT_STRING(0, "name", &scan_args->name, "Index display name. DEFAULT: (name of the directory)"),
|
||||
OPT_STRING(0, "name", &scan_args->name, "Index display name. DEFAULT: index"),
|
||||
OPT_INTEGER(0, "depth", &scan_args->depth, "Scan up to DEPTH subdirectories deep. "
|
||||
"Use 0 to only scan files in PATH. DEFAULT: -1"),
|
||||
OPT_STRING(0, "archive", &scan_args->archive, "Archive file mode (skip|list|shallow|recurse). "
|
||||
"skip: Don't parse, list: only get file names as text, "
|
||||
"shallow: Don't parse archives inside archives. DEFAULT: recurse"),
|
||||
"skip: don't scan, list: only save file names as text, "
|
||||
"shallow: don't scan archives inside archives. DEFAULT: recurse"),
|
||||
OPT_STRING(0, "archive-passphrase", &scan_args->archive_passphrase,
|
||||
"Passphrase for encrypted archive files"),
|
||||
|
||||
@@ -680,8 +488,8 @@ int main(int argc, const char *argv[]) {
|
||||
"which are installed on your machine)"),
|
||||
OPT_BOOLEAN(0, "ocr-images", &scan_args->ocr_images, "Enable OCR'ing of image files."),
|
||||
OPT_BOOLEAN(0, "ocr-ebooks", &scan_args->ocr_ebooks, "Enable OCR'ing of ebook files."),
|
||||
OPT_STRING('e', "exclude", &scan_args->exclude_regex, "Files that match this regex will not be scanned"),
|
||||
OPT_BOOLEAN(0, "fast", &scan_args->fast, "Only index file names & mime type"),
|
||||
OPT_STRING('e', "exclude", &scan_args->exclude_regex, "Files that match this regex will not be scanned."),
|
||||
OPT_BOOLEAN(0, "fast", &scan_args->fast, "Only index file names & mime type."),
|
||||
OPT_STRING(0, "treemap-threshold", &scan_args->treemap_threshold_str, "Relative size threshold for treemap "
|
||||
"(see USAGE.md). DEFAULT: 0.0005"),
|
||||
OPT_INTEGER(0, "mem-buffer", &scan_args->max_memory_buffer_mib,
|
||||
@@ -689,47 +497,52 @@ int main(int argc, const char *argv[]) {
|
||||
"(see USAGE.md). DEFAULT: 2000"),
|
||||
OPT_BOOLEAN(0, "read-subtitles", &scan_args->read_subtitles, "Read subtitles from media files."),
|
||||
OPT_BOOLEAN(0, "fast-epub", &scan_args->fast_epub,
|
||||
"Faster but less accurate EPUB parsing (no thumbnails, metadata)"),
|
||||
"Faster but less accurate EPUB parsing (no thumbnails, metadata)."),
|
||||
OPT_BOOLEAN(0, "checksums", &scan_args->calculate_checksums, "Calculate file checksums when scanning."),
|
||||
OPT_STRING(0, "list-file", &scan_args->list_path, "Specify a list of newline-delimited paths to be scanned"
|
||||
" instead of normal directory traversal. Use '-' to read"
|
||||
" from stdin."),
|
||||
|
||||
OPT_GROUP("Index options"),
|
||||
OPT_INTEGER('t', "threads", &common_threads, "Number of threads. DEFAULT=1"),
|
||||
OPT_STRING(0, "es-url", &common_es_url, "Elasticsearch url with port. DEFAULT=http://localhost:9200"),
|
||||
OPT_BOOLEAN(0, "es-insecure-ssl", &common_es_insecure_ssl, "Do not verify SSL connections to Elasticsearch."),
|
||||
OPT_STRING(0, "es-index", &common_es_index, "Elasticsearch index name. DEFAULT=sist2"),
|
||||
OPT_BOOLEAN('p', "print", &index_args->print, "Just print JSON documents to stdout."),
|
||||
OPT_INTEGER('t', "threads", &common_threads, "Number of threads. DEFAULT: 1"),
|
||||
OPT_STRING(0, "es-url", &common_es_url, "Elasticsearch url with port. DEFAULT: http://localhost:9200"),
|
||||
OPT_BOOLEAN(0, "es-insecure-ssl", &common_es_insecure_ssl,
|
||||
"Do not verify SSL connections to Elasticsearch."),
|
||||
OPT_STRING(0, "es-index", &common_es_index, "Elasticsearch index name. DEFAULT: sist2"),
|
||||
OPT_BOOLEAN('p', "print", &index_args->print,
|
||||
"Print JSON documents to stdout instead of indexing to elasticsearch."),
|
||||
OPT_BOOLEAN(0, "incremental-index", &index_args->incremental,
|
||||
"Conduct incremental indexing. Assumes that the old index is already ingested in Elasticsearch."),
|
||||
OPT_STRING(0, "script-file", &common_script_path, "Path to user script."),
|
||||
OPT_STRING(0, "mappings-file", &index_args->es_mappings_path, "Path to Elasticsearch mappings."),
|
||||
OPT_STRING(0, "settings-file", &index_args->es_settings_path, "Path to Elasticsearch settings."),
|
||||
OPT_BOOLEAN(0, "async-script", &common_async_script, "Execute user script asynchronously."),
|
||||
OPT_INTEGER(0, "batch-size", &index_args->batch_size, "Index batch size. DEFAULT: 100"),
|
||||
OPT_BOOLEAN('f', "force-reset", &index_args->force_reset, "Reset Elasticsearch mappings and settings. "
|
||||
"(You must use this option the first time you use the index command)"),
|
||||
OPT_INTEGER(0, "batch-size", &index_args->batch_size, "Index batch size. DEFAULT: 70"),
|
||||
OPT_BOOLEAN('f', "force-reset", &index_args->force_reset, "Reset Elasticsearch mappings and settings."),
|
||||
|
||||
OPT_GROUP("Web options"),
|
||||
OPT_STRING(0, "es-url", &common_es_url, "Elasticsearch url. DEFAULT=http://localhost:9200"),
|
||||
OPT_BOOLEAN(0, "es-insecure-ssl", &common_es_insecure_ssl, "Do not verify SSL connections to Elasticsearch."),
|
||||
OPT_STRING(0, "es-index", &common_es_index, "Elasticsearch index name. DEFAULT=sist2"),
|
||||
OPT_STRING(0, "bind", &web_args->listen_address, "Listen on this address. DEFAULT=localhost:4090"),
|
||||
OPT_STRING(0, "es-url", &common_es_url, "Elasticsearch url. DEFAULT: http://localhost:9200"),
|
||||
OPT_BOOLEAN(0, "es-insecure-ssl", &common_es_insecure_ssl,
|
||||
"Do not verify SSL connections to Elasticsearch."),
|
||||
OPT_STRING(0, "es-index", &common_es_index, "Elasticsearch index name. DEFAULT: sist2"),
|
||||
OPT_STRING(0, "bind", &web_args->listen_address,
|
||||
"Listen for connections on this address. DEFAULT: localhost:4090"),
|
||||
OPT_STRING(0, "auth", &web_args->credentials, "Basic auth in user:password format"),
|
||||
OPT_STRING(0, "auth0-audience", &web_args->auth0_audience, "API audience/identifier"),
|
||||
OPT_STRING(0, "auth0-domain", &web_args->auth0_domain, "Application domain"),
|
||||
OPT_STRING(0, "auth0-client-id", &web_args->auth0_client_id, "Application client ID"),
|
||||
OPT_STRING(0, "auth0-public-key-file", &web_args->auth0_public_key_path, "Path to Auth0 public key file extracted from <domain>/pem"),
|
||||
OPT_STRING(0, "auth0-public-key-file", &web_args->auth0_public_key_path,
|
||||
"Path to Auth0 public key file extracted from <domain>/pem"),
|
||||
OPT_STRING(0, "tag-auth", &web_args->tag_credentials, "Basic auth in user:password format for tagging"),
|
||||
OPT_STRING(0, "tagline", &web_args->tagline, "Tagline in navbar"),
|
||||
OPT_BOOLEAN(0, "dev", &web_args->dev, "Serve html & js files from disk (for development)"),
|
||||
OPT_STRING(0, "lang", &web_args->lang, "Default UI language. Can be changed by the user"),
|
||||
|
||||
OPT_GROUP("Exec-script options"),
|
||||
OPT_STRING(0, "es-url", &common_es_url, "Elasticsearch url. DEFAULT=http://localhost:9200"),
|
||||
OPT_BOOLEAN(0, "es-insecure-ssl", &common_es_insecure_ssl, "Do not verify SSL connections to Elasticsearch."),
|
||||
OPT_STRING(0, "es-index", &common_es_index, "Elasticsearch index name. DEFAULT=sist2"),
|
||||
OPT_STRING(0, "es-url", &common_es_url, "Elasticsearch url. DEFAULT: http://localhost:9200"),
|
||||
OPT_BOOLEAN(0, "es-insecure-ssl", &common_es_insecure_ssl,
|
||||
"Do not verify SSL connections to Elasticsearch."),
|
||||
OPT_STRING(0, "es-index", &common_es_index, "Elasticsearch index name. DEFAULT: sist2"),
|
||||
OPT_STRING(0, "script-file", &common_script_path, "Path to user script."),
|
||||
OPT_BOOLEAN(0, "async-script", &common_async_script, "Execute user script asynchronously."),
|
||||
|
||||
@@ -738,7 +551,11 @@ int main(int argc, const char *argv[]) {
|
||||
|
||||
struct argparse argparse;
|
||||
argparse_init(&argparse, options, usage, 0);
|
||||
argparse_describe(&argparse, DESCRIPTION, EPILOG);
|
||||
argparse_describe(
|
||||
&argparse,
|
||||
"\nLightning-fast file system indexer and search tool.",
|
||||
"\nMade by simon987 <me@simon987.net>. Released under GPL-3.0"
|
||||
);
|
||||
argc = argparse_parse(&argparse, argc, argv);
|
||||
|
||||
if (arg_version) {
|
||||
@@ -806,7 +623,7 @@ int main(int argc, const char *argv[]) {
|
||||
|
||||
} else {
|
||||
argparse_usage(&argparse);
|
||||
LOG_FATALF("main.c", "Invalid command: '%s'\n", argv[0])
|
||||
LOG_FATALF("main.c", "Invalid command: '%s'\n", argv[0]);
|
||||
}
|
||||
printf("\n");
|
||||
|
||||
|
||||
41
src/parsing/fs_util.h
Normal file
41
src/parsing/fs_util.h
Normal file
@@ -0,0 +1,41 @@
|
||||
#ifndef SIST2_FS_UTIL_H
|
||||
#define SIST2_FS_UTIL_H
|
||||
|
||||
#include "src/sist.h"
|
||||
|
||||
#define CLOSE_FILE(f) if ((f).close != NULL) {(f).close(&(f));};
|
||||
|
||||
static int fs_read(struct vfile *f, void *buf, size_t size) {
|
||||
if (f->fd == -1) {
|
||||
SHA1_Init(&f->sha1_ctx);
|
||||
|
||||
f->fd = open(f->filepath, O_RDONLY);
|
||||
if (f->fd == -1) {
|
||||
return -1;
|
||||
}
|
||||
}
|
||||
|
||||
int ret = (int) read(f->fd, buf, size);
|
||||
|
||||
if (ret != 0 && f->calculate_checksum) {
|
||||
f->has_checksum = TRUE;
|
||||
safe_sha1_update(&f->sha1_ctx, (unsigned char *) buf, ret);
|
||||
}
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
static void fs_close(struct vfile *f) {
|
||||
if (f->fd != -1) {
|
||||
SHA1_Final(f->sha1_digest, &f->sha1_ctx);
|
||||
close(f->fd);
|
||||
}
|
||||
}
|
||||
|
||||
static void fs_reset(struct vfile *f) {
|
||||
if (f->fd != -1) {
|
||||
lseek(f->fd, 0, SEEK_SET);
|
||||
}
|
||||
}
|
||||
|
||||
#endif
|
||||
32
src/parsing/magic_util.c
Normal file
32
src/parsing/magic_util.c
Normal file
@@ -0,0 +1,32 @@
|
||||
#include "magic_util.h"
|
||||
#include "src/log.h"
|
||||
#include "mime.h"
|
||||
#include <magic.h>
|
||||
#include "src/magic_generated.c"
|
||||
|
||||
|
||||
char *magic_buffer_embedded(void *buffer, size_t buffer_size) {
|
||||
|
||||
magic_t magic = magic_open(MAGIC_MIME_TYPE);
|
||||
|
||||
const char *magic_buffers[1] = {magic_database_buffer,};
|
||||
size_t sizes[1] = {sizeof(magic_database_buffer),};
|
||||
|
||||
// TODO optimisation: check if we can reuse the magic instance
|
||||
int load_ret = magic_load_buffers(magic, (void **) &magic_buffers, sizes, 1);
|
||||
|
||||
if (load_ret != 0) {
|
||||
LOG_FATALF("parse.c", "Could not load libmagic database: (%d)", load_ret);
|
||||
}
|
||||
|
||||
const char *magic_mime_str = magic_buffer(magic, buffer, buffer_size);
|
||||
char *return_value = NULL;
|
||||
|
||||
if (magic_mime_str != NULL) {
|
||||
return_value = malloc(strlen(magic_mime_str) + 1);
|
||||
strcpy(return_value, magic_mime_str);
|
||||
}
|
||||
|
||||
magic_close(magic);
|
||||
return return_value;
|
||||
}
|
||||
8
src/parsing/magic_util.h
Normal file
8
src/parsing/magic_util.h
Normal file
@@ -0,0 +1,8 @@
|
||||
#ifndef SIST2_MAGIC_UTIL_H
|
||||
#define SIST2_MAGIC_UTIL_H
|
||||
|
||||
#include <stdio.h>
|
||||
|
||||
char *magic_buffer_embedded(void *buffer, size_t buffer_size);
|
||||
|
||||
#endif //SIST2_MAGIC_UTIL_H
|
||||
Some files were not shown because too many files have changed in this diff Show More
Reference in New Issue
Block a user