mirror of
https://github.com/simon987/sist2.git
synced 2025-12-12 15:08:53 +00:00
Compare commits
191 Commits
| Author | SHA1 | Date | |
|---|---|---|---|
| 3da2c8cae3 | |||
| 2f0e999b06 | |||
| bf28dc8993 | |||
| c6fee7f6e2 | |||
| 201c2a1a47 | |||
| 7c46ad632a | |||
| 5b8c13fd13 | |||
| efa4a06e56 | |||
| 81670ee107 | |||
| f9dac80905 | |||
| f8d9b718c0 | |||
| 6f5fdc2935 | |||
| a01f6dff1f | |||
| 22dd58e140 | |||
| f3e07fb7f7 | |||
| 7990e5cd2e | |||
| e3ca660983 | |||
| b87fb25458 | |||
| c7a77869ad | |||
| 523c123e2e | |||
| fc7f30d670 | |||
| 152fe11669 | |||
| 33f97f6bfb | |||
| 71f9dfcfe0 | |||
| 5f657d61b3 | |||
| 908def1016 | |||
| db3d312835 | |||
| 32c9cb28a3 | |||
| f839127129 | |||
| 8111a6c143 | |||
| 707a570828 | |||
|
|
5073b00225 | ||
|
|
4923d1b51f | ||
|
|
097e332015 | ||
|
|
d4babe216b | ||
|
|
44511a2202 | ||
| 50771bd1dc | |||
| bc884e137c | |||
| ce1e241dea | |||
| 5fe9c9efa3 | |||
| 75e4e93ddd | |||
| 013c54daa0 | |||
| 54308ef5e2 | |||
| 638c2a5c1a | |||
| 9587caddd9 | |||
| f5bbe0dc97 | |||
| f87eac1f90 | |||
| ddafbab6a6 | |||
| b91d574756 | |||
| 576140e542 | |||
| 050c1283a3 | |||
| c6e1ba03bc | |||
| 10e32f707f | |||
| 86e83bafaf | |||
| 51a40c8819 | |||
|
|
36281a5108 | ||
|
|
76a0bda48b | ||
| 0cf29a660c | |||
| 6cd0741848 | |||
| bc120f349d | |||
| 8cac8c98d7 | |||
| 30921ac52e | |||
| 95bbe39afc | |||
| 72ce217f9c | |||
| 641a8ec90c | |||
| 7a505c2287 | |||
| 12f162d760 | |||
| 4b4ab12fac | |||
| ae283f77ad | |||
| d3bd53a5ea | |||
| f7887f24d1 | |||
| 5c8de19188 | |||
| d861d278a4 | |||
| b6ddeee0e0 | |||
| 0cd2523b05 | |||
| 5e798f9367 | |||
| 5da6c1488b | |||
| 9568e25f84 | |||
| 6a8027789a | |||
| b1d16d8abf | |||
| b2a157e24d | |||
| 9aead9389a | |||
| a32c68cba8 | |||
| d116cf9d91 | |||
|
|
a020a8b32c | ||
| 5d5d9c3092 | |||
| 3379d5ce71 | |||
| a0ff4a1f01 | |||
| 4589f3bde7 | |||
| 1c898640cf | |||
| a0739d5177 | |||
| 8f9d29dbc6 | |||
| 3ff4b70223 | |||
| 02ad035b09 | |||
| c11feb213d | |||
| 72902947cd | |||
| a18bb81222 | |||
| 1520288f19 | |||
| e507de194b | |||
| 0e517d5e2b | |||
| 8223ef3860 | |||
| 995a196690 | |||
| 465d017e18 | |||
| ca994d3914 | |||
| db2285973f | |||
| 61de9e9f14 | |||
| 3015ef0ff4 | |||
| b55d432841 | |||
| ed90a140ce | |||
| 052df82373 | |||
| 5676136777 | |||
| c061613302 | |||
| d0325fd9b9 | |||
| e05a6f3863 | |||
| f1690a9cca | |||
| 100a264413 | |||
| 29390bb454 | |||
| 4d43036ded | |||
| 0b5cdbd130 | |||
| 53d7695f66 | |||
| 8d53456404 | |||
| cbc08a7cc9 | |||
| e629b4d7d3 | |||
| 22f7073b39 | |||
| 1781a74960 | |||
| db96c95ac7 | |||
| 7b9fa4cc0a | |||
| 5cc1fa86a9 | |||
| 649689ce30 | |||
| c8536f65a8 | |||
| 75b5e249c1 | |||
|
|
f49e03ac79 | ||
| a6d2afc8dc | |||
| 8f8f66ba05 | |||
| 1d9fcf7105 | |||
| 8127745f2b | |||
| 230988d6d1 | |||
| 13f4dbed2d | |||
| ed15e89f45 | |||
| c636d3d921 | |||
| 7e92d4b7d1 | |||
| 8ffe780ab2 | |||
| d3c8928fe8 | |||
| d9f628fca4 | |||
| 68289268c1 | |||
| 649c50c465 | |||
| 7b49a0dc49 | |||
| eb559b53aa | |||
| 6d01f9c0df | |||
| e724fec668 | |||
| fe5e93b300 | |||
| ecad85fd7d | |||
| 74cc898259 | |||
| dc2e4443c4 | |||
| 1a64431b52 | |||
|
|
9bad515e06 | ||
| 648559cedb | |||
| 3e6cd9cd5c | |||
| f249992798 | |||
|
|
e9645ecdaa | ||
| 046edea0e2 | |||
| a011b7e97b | |||
| 8c1c1697e0 | |||
| 018b49fa4c | |||
| 27b4e6403e | |||
| 13fdbd9e69 | |||
| 5e7fdaf8dd | |||
| 19d5c8ac9f | |||
| 99497049a8 | |||
|
|
1a3181d78b | ||
| 449aa77c8f | |||
| 3058c55510 | |||
| dedf9287b2 | |||
| ab199b0c0c | |||
| c4fbae123e | |||
| dd2397ef5c | |||
| ee0f71f4d3 | |||
| 0bbb96b149 | |||
| 78f6e16701 | |||
| 4625bca9a9 | |||
| f2ae653886 | |||
| 5686bc864d | |||
| cf513b4ad8 | |||
| 013423424e | |||
| 16514fd6b0 | |||
| 27509f97e1 | |||
| 4c540eae1c | |||
| d2b53ff6fc | |||
| 0ef4292abf | |||
| e6fde38c24 | |||
| 5fa343d40f |
25
.dockerignore
Normal file
25
.dockerignore
Normal file
@@ -0,0 +1,25 @@
|
||||
.idea
|
||||
*/thumbs
|
||||
*.cbp
|
||||
CMakeCache.txt
|
||||
CMakeFiles
|
||||
cmake-build-debug
|
||||
cmake_install.cmake
|
||||
Makefile
|
||||
*.out
|
||||
LOG
|
||||
sist2*
|
||||
index.sist2/
|
||||
bundle*.css
|
||||
bundle.js
|
||||
**/*.a
|
||||
**/vgcore.*
|
||||
build/
|
||||
.git/
|
||||
third-party/libscan/libscan-test-files/
|
||||
**/ext_ffmpeg
|
||||
**/ext_libmobi
|
||||
**/scan_a_test
|
||||
Dockerfile
|
||||
*.idx/
|
||||
VERSION
|
||||
72
.drone.yml
Normal file
72
.drone.yml
Normal file
@@ -0,0 +1,72 @@
|
||||
kind: pipeline
|
||||
type: docker
|
||||
name: amd64
|
||||
|
||||
platform:
|
||||
os: linux
|
||||
arch: amd64
|
||||
|
||||
steps:
|
||||
- name: build
|
||||
image: simon987/sist2-build
|
||||
commands:
|
||||
- ./ci/build.sh
|
||||
- name: docker
|
||||
image: plugins/docker
|
||||
settings:
|
||||
username:
|
||||
from_secret: DOCKER_USER
|
||||
password:
|
||||
from_secret: DOCKER_PASSWORD
|
||||
repo: simon987/sist2
|
||||
context: ./
|
||||
dockerfile: ./Dockerfile
|
||||
auto_tag: true
|
||||
auto_tag_suffix: x64-linux
|
||||
when:
|
||||
event:
|
||||
- tag
|
||||
- name: scp files
|
||||
image: appleboy/drone-scp
|
||||
settings:
|
||||
host:
|
||||
from_secret: SSH_HOST
|
||||
port:
|
||||
from_secret: SSH_PORT
|
||||
user:
|
||||
from_secret: SSH_USER
|
||||
key:
|
||||
from_secret: SSH_KEY
|
||||
target: /files/sist2/${DRONE_REPO_OWNER}_${DRONE_REPO_NAME}/${DRONE_BRANCH}_${DRONE_BUILD_NUMBER}_${DRONE_COMMIT}/
|
||||
source:
|
||||
- ./VERSION
|
||||
- ./sist2-x64-linux
|
||||
- ./sist2-x64-linux-debug
|
||||
|
||||
---
|
||||
kind: pipeline
|
||||
type: docker
|
||||
name: arm64
|
||||
|
||||
platform:
|
||||
arch: arm64
|
||||
|
||||
steps:
|
||||
- name: build
|
||||
image: simon987/sist2-build-arm64
|
||||
commands:
|
||||
- ./ci/build_arm64.sh
|
||||
- name: scp files
|
||||
image: appleboy/drone-scp
|
||||
settings:
|
||||
host:
|
||||
from_secret: SSH_HOST
|
||||
port:
|
||||
from_secret: SSH_PORT
|
||||
user:
|
||||
from_secret: SSH_USER
|
||||
key:
|
||||
from_secret: SSH_KEY
|
||||
target: /files/sist2/${DRONE_REPO_OWNER}_${DRONE_REPO_NAME}/arm_${DRONE_BRANCH}_${DRONE_BUILD_NUMBER}_${DRONE_COMMIT}/
|
||||
source:
|
||||
- ./sist2-arm64-linux
|
||||
40
.github/ISSUE_TEMPLATE/bug_report.md
vendored
Normal file
40
.github/ISSUE_TEMPLATE/bug_report.md
vendored
Normal file
@@ -0,0 +1,40 @@
|
||||
---
|
||||
name: "🐞 Bug Report"
|
||||
about: Submit a bug report
|
||||
title: ''
|
||||
labels: bug
|
||||
assignees: ''
|
||||
|
||||
---
|
||||
|
||||
**Device Information (please complete the following information):**
|
||||
- OS: `[e.g., Ubuntu 20.04, WSL2]`
|
||||
- Deployment: `[Linux, Linux ARM64 or Docker]`
|
||||
- Browser *(if relevant)*: `[e.g., chrome, safari]`
|
||||
- SIST2 Version: `[e.g., v2.9.0]`
|
||||
- Elasticsearch Version *(if relevant)* : ``
|
||||
|
||||
**Command with arguments**
|
||||
<!-- `ex: "scan ~/Documents -o ./i2 --threads 3 -q 1.0` -->
|
||||
|
||||
**Describe the bug**
|
||||
<!-- A clear and concise description of what the bug is. -->
|
||||
|
||||
**Steps To Reproduce**
|
||||
Please be specific!
|
||||
1. Go to '...'
|
||||
2. Click on '....'
|
||||
3. etc.
|
||||
|
||||
**Expected behavior**
|
||||
<!-- A clear and concise description of what you expected to happen. -->
|
||||
|
||||
**Actual Behavior**
|
||||
<!-- A clear and concise description of what actually happens. -->
|
||||
|
||||
**Screenshots**
|
||||
<!-- If applicable, add screenshots to help explain your problem. -->
|
||||
|
||||
**Additional context**
|
||||
<!-- Add any other context about the problem here. If applicable, please include why you think the bug is occurring and/or troubleshooting you have already performed. -->
|
||||
<!-- If the issue is related to the `scan` module, please attach the files necessary to reproduce the error or email them to me[at]simon987.net. -->
|
||||
5
.github/ISSUE_TEMPLATE/config.yml
vendored
Normal file
5
.github/ISSUE_TEMPLATE/config.yml
vendored
Normal file
@@ -0,0 +1,5 @@
|
||||
blank_issues_enabled: false
|
||||
contact_links:
|
||||
- name: SIST2 Documentation
|
||||
url: https://github.com/simon987/sist2/blob/master/docs/USAGE.md
|
||||
about: Check out the SIST2 documentation for answers to common questions
|
||||
18
.github/ISSUE_TEMPLATE/feature_request.md
vendored
Normal file
18
.github/ISSUE_TEMPLATE/feature_request.md
vendored
Normal file
@@ -0,0 +1,18 @@
|
||||
---
|
||||
name: "🚀 Feature Request"
|
||||
about: Suggest an idea for SIST2
|
||||
title: ''
|
||||
assignees: ''
|
||||
|
||||
---
|
||||
**Which SIST2 component is your Feature Request related to?**
|
||||
<!-- e.g., Scan, Index, or Web? -->
|
||||
|
||||
**Is your feature request related to a problem? Please describe.**
|
||||
<!-- A clear and concise description of what the problem is. e.g., "I'm always frustrated when [...]" -->
|
||||
|
||||
**What would you like to see happen?**
|
||||
<!-- A clear and concise description of what you want to happen. -->
|
||||
|
||||
**Additional context**
|
||||
<!-- Add any other context or screenshots about the feature request here. -->
|
||||
4
.github/ISSUE_TEMPLATE/issue-template.md
vendored
4
.github/ISSUE_TEMPLATE/issue-template.md
vendored
@@ -9,7 +9,9 @@ assignees: ''
|
||||
|
||||
sist2 version:
|
||||
|
||||
Platform (please indicate if you're using Docker):
|
||||
Platform (Linux or Docker, x86-64 or arm64):
|
||||
|
||||
Elasticsearch version:
|
||||
|
||||
Command with arguments: `ex: "scan ~/Documents -o ./i2 --threads 3 -q 1.0`
|
||||
|
||||
|
||||
3
.gitignore
vendored
3
.gitignore
vendored
@@ -1,6 +1,5 @@
|
||||
.idea
|
||||
thumbs
|
||||
test
|
||||
*.cbp
|
||||
CMakeCache.txt
|
||||
CMakeFiles
|
||||
@@ -17,3 +16,5 @@ bundle.js
|
||||
vgcore.*
|
||||
build/
|
||||
third-party/
|
||||
*.idx/
|
||||
VERSION
|
||||
69
.teamcity/settings.kts
vendored
69
.teamcity/settings.kts
vendored
@@ -1,69 +0,0 @@
|
||||
import jetbrains.buildServer.configs.kotlin.v2019_2.*
|
||||
import jetbrains.buildServer.configs.kotlin.v2019_2.buildSteps.ExecBuildStep
|
||||
import jetbrains.buildServer.configs.kotlin.v2019_2.buildSteps.exec
|
||||
import jetbrains.buildServer.configs.kotlin.v2019_2.triggers.vcs
|
||||
import jetbrains.buildServer.configs.kotlin.v2019_2.vcs.GitVcsRoot
|
||||
|
||||
/*
|
||||
The settings script is an entry point for defining a TeamCity
|
||||
project hierarchy. The script should contain a single call to the
|
||||
project() function with a Project instance or an init function as
|
||||
an argument.
|
||||
|
||||
VcsRoots, BuildTypes, Templates, and subprojects can be
|
||||
registered inside the project using the vcsRoot(), buildType(),
|
||||
template(), and subProject() methods respectively.
|
||||
|
||||
To debug settings scripts in command-line, run the
|
||||
|
||||
mvnDebug org.jetbrains.teamcity:teamcity-configs-maven-plugin:generate
|
||||
|
||||
command and attach your debugger to the port 8000.
|
||||
|
||||
To debug in IntelliJ Idea, open the 'Maven Projects' tool window (View
|
||||
-> Tool Windows -> Maven Projects), find the generate task node
|
||||
(Plugins -> teamcity-configs -> teamcity-configs:generate), the
|
||||
'Debug' option is available in the context menu for the task.
|
||||
*/
|
||||
|
||||
version = "2019.2"
|
||||
|
||||
project {
|
||||
|
||||
vcsRoot(HttpsGithubComSimon987sist2refsHeadsMaster)
|
||||
|
||||
buildType(Build)
|
||||
}
|
||||
|
||||
object Build : BuildType({
|
||||
name = "Build"
|
||||
|
||||
artifactRules = """
|
||||
sist2
|
||||
sist2_scan
|
||||
""".trimIndent()
|
||||
|
||||
vcs {
|
||||
root(HttpsGithubComSimon987sist2refsHeadsMaster)
|
||||
}
|
||||
|
||||
steps {
|
||||
exec {
|
||||
name = "Build"
|
||||
path = "./ci/build.sh"
|
||||
dockerImage = "simon987/general_ci"
|
||||
dockerImagePlatform = ExecBuildStep.ImagePlatform.Linux
|
||||
dockerPull = true
|
||||
}
|
||||
}
|
||||
|
||||
triggers {
|
||||
vcs {
|
||||
}
|
||||
}
|
||||
})
|
||||
|
||||
object HttpsGithubComSimon987sist2refsHeadsMaster : GitVcsRoot({
|
||||
name = "https://github.com/simon987/sist2#refs/heads/master"
|
||||
url = "https://github.com/simon987/sist2"
|
||||
})
|
||||
@@ -5,12 +5,16 @@ project(sist2 C)
|
||||
|
||||
option(SIST_DEBUG "Build a debug executable" on)
|
||||
|
||||
set(BUILD_TESTS on)
|
||||
add_subdirectory(third-party/libscan)
|
||||
set(ARGPARSE_SHARED off)
|
||||
add_subdirectory(third-party/argparse)
|
||||
|
||||
add_executable(
|
||||
sist2
|
||||
add_executable(sist2
|
||||
|
||||
# argparse
|
||||
third-party/argparse/argparse.h third-party/argparse/argparse.c
|
||||
|
||||
src/main.c
|
||||
src/sist.h
|
||||
src/io/walk.h src/io/walk.c
|
||||
@@ -25,23 +29,21 @@ add_executable(
|
||||
src/util.c src/util.h
|
||||
src/ctx.h src/types.h
|
||||
src/log.c src/log.h
|
||||
|
||||
# argparse
|
||||
third-party/argparse/argparse.h third-party/argparse/argparse.c
|
||||
|
||||
src/cli.c src/cli.h
|
||||
)
|
||||
src/stats.c src/stats.h src/ctx.c
|
||||
src/parsing/sidecar.c src/parsing/sidecar.h)
|
||||
|
||||
target_link_directories(sist2 PRIVATE BEFORE ${_VCPKG_INSTALLED_DIR}/${VCPKG_TARGET_TRIPLET}/lib/)
|
||||
set(CMAKE_FIND_LIBRARY_SUFFIXES .a .lib)
|
||||
|
||||
find_package(PkgConfig REQUIRED)
|
||||
|
||||
pkg_search_module(GLIB REQUIRED glib-2.0)
|
||||
|
||||
find_package(lmdb CONFIG REQUIRED)
|
||||
find_package(cJSON CONFIG REQUIRED)
|
||||
find_package(unofficial-glib CONFIG REQUIRED)
|
||||
find_package(unofficial-mongoose CONFIG REQUIRED)
|
||||
find_library(UUID_LIB NAMES uuid)
|
||||
|
||||
#find_package(OpenSSL REQUIRED)
|
||||
find_package(CURL CONFIG REQUIRED)
|
||||
|
||||
|
||||
target_include_directories(
|
||||
@@ -50,13 +52,13 @@ target_include_directories(
|
||||
${CMAKE_SOURCE_DIR}/third-party/utf8.h/
|
||||
${CMAKE_SOURCE_DIR}/third-party/libscan/
|
||||
${CMAKE_SOURCE_DIR}/
|
||||
${GLIB_INCLUDE_DIRS}
|
||||
)
|
||||
|
||||
target_compile_options(
|
||||
sist2
|
||||
PRIVATE
|
||||
-fPIC
|
||||
-Werror
|
||||
)
|
||||
|
||||
if (SIST_DEBUG)
|
||||
@@ -67,12 +69,13 @@ if (SIST_DEBUG)
|
||||
-fstack-protector
|
||||
-fno-omit-frame-pointer
|
||||
-fsanitize=address
|
||||
-fno-inline
|
||||
# -O2
|
||||
)
|
||||
target_link_options(
|
||||
sist2
|
||||
PRIVATE
|
||||
-fsanitize=address
|
||||
# -static
|
||||
)
|
||||
set_target_properties(
|
||||
sist2
|
||||
@@ -102,14 +105,15 @@ target_link_libraries(
|
||||
lmdb
|
||||
cjson
|
||||
argparse
|
||||
unofficial::glib::glib
|
||||
${GLIB_LDFLAGS}
|
||||
unofficial::mongoose::mongoose
|
||||
# OpenSSL::SSL OpenSSL::Crypto
|
||||
CURL::libcurl
|
||||
|
||||
${UUID_LIB}
|
||||
pthread
|
||||
magic
|
||||
|
||||
c
|
||||
|
||||
scan
|
||||
)
|
||||
|
||||
|
||||
@@ -1,14 +0,0 @@
|
||||
rm ./sist2 sist2_debug
|
||||
cp ../sist2.gz .
|
||||
gzip -d sist2.gz
|
||||
strip sist2
|
||||
|
||||
version=$(./sist2 --version)
|
||||
|
||||
echo "Version ${version}"
|
||||
docker build . -t simon987/sist2:${version} -t simon987/sist2:latest
|
||||
|
||||
docker push simon987/sist2:${version}
|
||||
docker push simon987/sist2:latest
|
||||
|
||||
docker run --rm simon987/sist2 -v
|
||||
@@ -1,9 +1,15 @@
|
||||
FROM ubuntu:19.10
|
||||
FROM simon987/sist2-build as build
|
||||
MAINTAINER simon987 <me@simon987.net>
|
||||
|
||||
RUN apt update
|
||||
RUN apt install -y libglib2.0-0 libcurl4 libmagic1 libharfbuzz-bin libopenjp2-7 libarchive13 liblzma5 libzstd1 liblz4-1 \
|
||||
curl libtiff5 libpng16-16 libpcre3
|
||||
WORKDIR /build/
|
||||
ADD . /build/
|
||||
RUN cmake -DSIST_DEBUG=off -DBUILD_TESTS=off -DCMAKE_TOOLCHAIN_FILE=/vcpkg/scripts/buildsystems/vcpkg.cmake .
|
||||
RUN make -j$(nproc)
|
||||
RUN strip sist2
|
||||
|
||||
FROM ubuntu:20.10
|
||||
|
||||
RUN apt update && apt install -y curl
|
||||
|
||||
RUN mkdir -p /usr/share/tessdata && \
|
||||
cd /usr/share/tessdata/ && \
|
||||
@@ -12,9 +18,9 @@ RUN mkdir -p /usr/share/tessdata && \
|
||||
curl -o /usr/share/tessdata/eng.traineddata https://raw.githubusercontent.com/tesseract-ocr/tessdata/master/eng.traineddata &&\
|
||||
curl -o /usr/share/tessdata/fra.traineddata https://raw.githubusercontent.com/tesseract-ocr/tessdata/master/fra.traineddata &&\
|
||||
curl -o /usr/share/tessdata/rus.traineddata https://raw.githubusercontent.com/tesseract-ocr/tessdata/master/rus.traineddata &&\
|
||||
curl -o /usr/share/tessdata/spa.traineddata https://raw.githubusercontent.com/tesseract-ocr/tessdata/master/spa.traineddata && ls -lh
|
||||
curl -o /usr/share/tessdata/spa.traineddata https://raw.githubusercontent.com/tesseract-ocr/tessdata/master/spa.traineddata
|
||||
|
||||
ADD sist2 /root/sist2
|
||||
COPY --from=build /build/sist2 /root/sist2
|
||||
|
||||
ENV LANG C.UTF-8
|
||||
ENV LC_ALL C.UTF-8
|
||||
28
Dockerfile.arm64
Normal file
28
Dockerfile.arm64
Normal file
@@ -0,0 +1,28 @@
|
||||
FROM simon987/sist2-build-arm64 as build
|
||||
MAINTAINER simon987 <me@simon987.net>
|
||||
|
||||
WORKDIR /build/
|
||||
ADD . /build/
|
||||
RUN cmake -DSIST_DEBUG=off -DBUILD_TESTS=off -DCMAKE_TOOLCHAIN_FILE=/vcpkg/scripts/buildsystems/vcpkg.cmake .
|
||||
RUN make -j$(nproc)
|
||||
RUN strip sist2
|
||||
|
||||
FROM ubuntu:20.10
|
||||
|
||||
RUN apt update && apt install -y curl
|
||||
|
||||
RUN mkdir -p /usr/share/tessdata && \
|
||||
cd /usr/share/tessdata/ && \
|
||||
curl -o /usr/share/tessdata/hin.traineddata https://raw.githubusercontent.com/tesseract-ocr/tessdata/master/hin.traineddata &&\
|
||||
curl -o /usr/share/tessdata/jpn.traineddata https://raw.githubusercontent.com/tesseract-ocr/tessdata/master/jpn.traineddata &&\
|
||||
curl -o /usr/share/tessdata/eng.traineddata https://raw.githubusercontent.com/tesseract-ocr/tessdata/master/eng.traineddata &&\
|
||||
curl -o /usr/share/tessdata/fra.traineddata https://raw.githubusercontent.com/tesseract-ocr/tessdata/master/fra.traineddata &&\
|
||||
curl -o /usr/share/tessdata/rus.traineddata https://raw.githubusercontent.com/tesseract-ocr/tessdata/master/rus.traineddata &&\
|
||||
curl -o /usr/share/tessdata/spa.traineddata https://raw.githubusercontent.com/tesseract-ocr/tessdata/master/spa.traineddata
|
||||
|
||||
COPY --from=build /build/sist2 /root/sist2
|
||||
|
||||
ENV LANG C.UTF-8
|
||||
ENV LC_ALL C.UTF-8
|
||||
|
||||
ENTRYPOINT ["/root/sist2"]
|
||||
92
README.md
92
README.md
@@ -1,6 +1,8 @@
|
||||

|
||||
[](https://www.codefactor.io/repository/github/simon987/sist2)
|
||||
[/statusIcon)](https://files.simon987.net/artifacts/Sist2/Build/)
|
||||
[](https://files.simon987.net/.gate/sist2/simon987_sist2/)
|
||||
|
||||
**Demo**: [sist2.simon987.net](https://sist2.simon987.net/?i=Demo%20files)
|
||||
|
||||
# sist2
|
||||
|
||||
@@ -15,18 +17,20 @@ sist2 (Simple incremental search tool)
|
||||
* Fast, low memory usage, multi-threaded
|
||||
* Mobile-friendly Web interface
|
||||
* Portable (all its features are packaged in a single executable)
|
||||
* Extracts text from common file types \*
|
||||
* Extracts text and metadata from common file types \*
|
||||
* Generates thumbnails \*
|
||||
* Incremental scanning
|
||||
* Automatic tagging from file attributes via [user scripts](scripting/README.md)
|
||||
* Manual tagging from the UI and automatic tagging based on file attributes via [user scripts](docs/scripting.md)
|
||||
* Recursive scan inside archive files \*\*
|
||||
* OCR support with tesseract \*\*\*
|
||||
|
||||
* Stats page & disk utilisation visualization
|
||||
|
||||
\* See [format support](#format-support)
|
||||
\*\* See [Archive files](#archive-files)
|
||||
\*\*\* See [OCR](#ocr)
|
||||
|
||||

|
||||
|
||||
## Getting Started
|
||||
|
||||
1. Have an Elasticsearch (>= 6.X.X) instance running
|
||||
@@ -46,85 +50,103 @@ sist2 (Simple incremental search tool)
|
||||
```
|
||||
1. Download sist2 executable
|
||||
1. Download the [latest sist2 release](https://github.com/simon987/sist2/releases) *
|
||||
1. *(or)* Download a [development snapshot](https://files.simon987.net/artifacts/Sist2/Build/) *(Not recommended!)*
|
||||
1. *(or)* `docker pull simon987/sist2:latest`
|
||||
|
||||
1. See [Usage guide](DOCS/USAGE.md)
|
||||
1. *(or)* Download a [development snapshot](https://files.simon987.net/.gate/sist2/simon987_sist2/) *(Not recommended!)*
|
||||
1. *(or)* `docker pull simon987/sist2:2.10.1-x64-linux`
|
||||
|
||||
1. See [Usage guide](docs/USAGE.md)
|
||||
|
||||
\* *Windows users*: **sist2** runs under [WSL](https://en.wikipedia.org/wiki/Windows_Subsystem_for_Linux)
|
||||
|
||||
|
||||
## Example usage
|
||||
|
||||
See [Usage guide](DOCS/USAGE.md) for more details
|
||||
See [Usage guide](docs/USAGE.md) for more details
|
||||
|
||||
1. Scan a directory: `sist2 scan ~/Documents -o ./docs_idx`
|
||||
1. Push index to Elasticsearch: `sist2 index ./docs_idx`
|
||||
1. Start web interface: `sist2 web ./docs_idx`
|
||||
|
||||
|
||||
## Format support
|
||||
|
||||
File type | Library | Content | Thumbnail | Metadata
|
||||
File type | Library | Content | Thumbnail | Metadata
|
||||
:---|:---|:---|:---|:---
|
||||
pdf,xps,cbz,cbr,fb2,epub | MuPDF | text+ocr | yes, `png` | title |
|
||||
`audio/*` | ffmpeg | - | yes, `jpeg` | ID3 tags |
|
||||
`video/*` | ffmpeg | - | yes, `jpeg` | title, comment, artist |
|
||||
`image/*` | ffmpeg | - | yes, `jpeg` | [Common EXIF tags](https://github.com/simon987/sist2/blob/efdde2734eca9b14a54f84568863b7ffd59bdba3/src/parsing/media.c#L190) |
|
||||
pdf,xps,fb2,epub | MuPDF | text+ocr | yes | author, title |
|
||||
cbz,cbr | *(none)* | - | yes | - |
|
||||
`audio/*` | ffmpeg | - | yes | ID3 tags |
|
||||
`video/*` | ffmpeg | - | yes | title, comment, artist |
|
||||
`image/*` | ffmpeg | - | yes | [Common EXIF tags](https://github.com/simon987/sist2/blob/efdde2734eca9b14a54f84568863b7ffd59bdba3/src/parsing/media.c#L190), GPS tags |
|
||||
raw, rw2, dng, cr2, crw, dcr, k25, kdc, mrw, pef, xf3, arw, sr2, srf, erf | LibRaw | - | yes | Common EXIF tags, GPS tags |
|
||||
ttf,ttc,cff,woff,fnt,otf | Freetype2 | - | yes, `bmp` | Name & style |
|
||||
`text/plain` | *(none)* | yes | no | - |
|
||||
html, xml | *(none)* | yes | no | - |
|
||||
tar, zip, rar, 7z, ar ... | Libarchive | yes\* | - | no |
|
||||
docx, xlsx, pptx | *(none)* | yes | no | creator, modified_by, title |
|
||||
docx, xlsx, pptx | *(none)* | yes | if embedded | creator, modified_by, title |
|
||||
doc (MS Word 97-2003) | antiword | yes | yes | author, title |
|
||||
mobi, azw, azw3 | libmobi | yes | no | author, title |
|
||||
|
||||
\* *See [Archive files](#archive-files)*
|
||||
|
||||
### Archive files
|
||||
**sist2** will scan files stored into archive files (zip, tar, 7z...) as if
|
||||
they were directly in the file system. Recursive (archives inside archives)
|
||||
|
||||
**sist2** will scan files stored into archive files (zip, tar, 7z...) as if they were directly in the file system.
|
||||
Recursive (archives inside archives)
|
||||
scan is also supported.
|
||||
|
||||
**Limitations**:
|
||||
* Parsing media files with formats that require
|
||||
*seek* (e.g. `.gif`, `.mp4` w/ fragmented metadata etc.) is not supported.
|
||||
|
||||
* Support for parsing media files with formats that require *seek* (e.g. `.gif`, `.mp4` w/ fragmented metadata etc.)
|
||||
is limitted (see `--mem-buffer` option)
|
||||
* Archive files are scanned sequentially, by a single thread. On systems where
|
||||
**sist2** is not I/O bound, scans might be faster when larger archives are split
|
||||
into smaller parts.
|
||||
|
||||
To check if a media file can be parsed without *seek*, execute `cat file.mp4 | ffprobe -`
|
||||
|
||||
**sist2** is not I/O bound, scans might be faster when larger archives are split into smaller parts.
|
||||
|
||||
### OCR
|
||||
|
||||
You can enable OCR support for pdf,xps,cbz,cbr,fb2,epub file types with the
|
||||
`--ocr <lang>` option. Download the language data files with your
|
||||
package manager (`apt install tesseract-ocr-eng`) or directly [from Github](https://github.com/tesseract-ocr/tesseract/wiki/Data-Files).
|
||||
You can enable OCR support for pdf,xps,fb2,epub file types with the
|
||||
`--ocr <lang>` option. Download the language data files with your package manager (`apt install tesseract-ocr-eng`) or
|
||||
directly [from Github](https://github.com/tesseract-ocr/tesseract/wiki/Data-Files).
|
||||
|
||||
The `simon987/sist2` image comes with common languages
|
||||
(hin, jpn, eng, fra, rus, spa) pre-installed.
|
||||
|
||||
Examples
|
||||
|
||||
```bash
|
||||
sist2 scan --ocr jpn ~/Books/Manga/
|
||||
sist2 scan --ocr eng ~/Books/Textbooks/
|
||||
```
|
||||
|
||||
|
||||
## Build from source
|
||||
|
||||
You can compile **sist2** by yourself if you don't want to use the pre-compiled
|
||||
binaries (GCC 7+ required).
|
||||
You can compile **sist2** by yourself if you don't want to use the pre-compiled binaries
|
||||
|
||||
### With docker (recommended)
|
||||
|
||||
```bash
|
||||
git clone --recursive https://github.com/simon987/sist2/
|
||||
cd sist2
|
||||
docker build . -f ./Dockerfile -t my-sist2-image
|
||||
docker run --rm my-sist2-image cat /root/sist2 > sist2-x64-linux
|
||||
```
|
||||
|
||||
### On a linux computer
|
||||
|
||||
1. Install compile-time dependencies
|
||||
|
||||
```bash
|
||||
vcpkg install lmdb cjson glib libarchive[core,bzip2,libxml2,lz4,lzma,lzo] pthread tesseract libxml2 ffmpeg zstd gtest mongoose libuuid libmagic
|
||||
apt install gcc g++ python3 yasm ragel automake autotools-dev wget libtool libssl-dev curl zip unzip tar xorg-dev libglu1-mesa-dev libxcursor-dev libxml2-dev libxinerama-dev gettext nasm git
|
||||
```
|
||||
|
||||
2. Build
|
||||
1. Apply vcpkg patches, as per [sist2-build](https://github.com/simon987/sist2-build) Dockerfile
|
||||
|
||||
1. Install vcpkg dependencies
|
||||
|
||||
```bash
|
||||
vcpkg install curl[core,openssl]
|
||||
vcpkg install lmdb cjson glib brotli libarchive[core,bzip2,libxml2,lz4,lzma,lzo] pthread tesseract libxml2 libmupdf gtest mongoose libuuid libmagic libraw jasper lcms gumbo
|
||||
```
|
||||
|
||||
1. Build
|
||||
```bash
|
||||
git clone --recursive https://github.com/simon987/sist2/
|
||||
cmake -D <VCPKG_ROOT>/scripts/buildsystems/vcpkg.cmake .
|
||||
cmake -DSIST_DEBUG=off -DCMAKE_TOOLCHAIN_FILE=<VCPKG_ROOT>/scripts/buildsystems/vcpkg.cmake .
|
||||
make
|
||||
```
|
||||
|
||||
19
ci/build.sh
19
ci/build.sh
@@ -1,16 +1,19 @@
|
||||
#!/usr/bin/env bash
|
||||
|
||||
VCPKG_ROOT="/vcpkg"
|
||||
|
||||
rm *.gz
|
||||
rm *.gz &>/dev/null
|
||||
|
||||
git submodule update --init --recursive
|
||||
|
||||
rm -rf CMakeFiles CMakeCache.txt
|
||||
cmake -DSIST_DEBUG=off -DCMAKE_TOOLCHAIN_FILE=/vcpkg/scripts/buildsystems/vcpkg.cmake .
|
||||
make
|
||||
cmake -DSIST_DEBUG=off -DCMAKE_TOOLCHAIN_FILE="${VCPKG_ROOT}/scripts/buildsystems/vcpkg.cmake" .
|
||||
make -j $(nproc)
|
||||
strip sist2
|
||||
gzip -9 sist2
|
||||
./sist2 -v > VERSION
|
||||
mv sist2 sist2-x64-linux
|
||||
|
||||
rm -rf CMakeFiles CMakeCache.txt
|
||||
cmake -DSIST_DEBUG=on -DCMAKE_TOOLCHAIN_FILE=/vcpkg/scripts/buildsystems/vcpkg.cmake .
|
||||
make
|
||||
cp /usr/lib/x86_64-linux-gnu/libasan.so.2.0.0 libasan.so.2
|
||||
tar -czf sist2_debug.tar.gz sist2_debug libasan.so.2
|
||||
cmake -DSIST_DEBUG=on -DCMAKE_TOOLCHAIN_FILE="${VCPKG_ROOT}/scripts/buildsystems/vcpkg.cmake" .
|
||||
make -j $(nproc)
|
||||
mv sist2_debug sist2-x64-linux-debug
|
||||
13
ci/build_arm64.sh
Executable file
13
ci/build_arm64.sh
Executable file
@@ -0,0 +1,13 @@
|
||||
#!/usr/bin/env bash
|
||||
|
||||
VCPKG_ROOT="/vcpkg"
|
||||
|
||||
rm *.gz &>/dev/null
|
||||
|
||||
git submodule update --init --recursive
|
||||
|
||||
rm -rf CMakeFiles CMakeCache.txt
|
||||
cmake -DSIST_DEBUG=off -DCMAKE_TOOLCHAIN_FILE="${VCPKG_ROOT}/scripts/buildsystems/vcpkg.cmake" .
|
||||
make -j $(nproc)
|
||||
strip sist2
|
||||
mv sist2 sist2-arm64-linux
|
||||
205
docs/USAGE.md
205
docs/USAGE.md
@@ -14,47 +14,65 @@
|
||||
* [examples](#web-examples)
|
||||
* [rewrite_url](#rewrite_url)
|
||||
* [link to specific indices](#link-to-specific-indices)
|
||||
* [exec-script](#exec-script)
|
||||
* [tagging](#tagging)
|
||||
* [sidecar files](#sidecar-files)
|
||||
|
||||
```
|
||||
Usage: sist2 scan [OPTION]... PATH
|
||||
or: sist2 index [OPTION]... INDEX
|
||||
or: sist2 web [OPTION]... INDEX...
|
||||
or: sist2 exec-script [OPTION]... INDEX
|
||||
Lightning-fast file system indexer and search tool.
|
||||
|
||||
-h, --help show this help message and exit
|
||||
-v, --version Show version and exit
|
||||
--verbose Turn on logging
|
||||
--very-verbose Turn on debug messages
|
||||
-h, --help show this help message and exit
|
||||
-v, --version Show version and exit
|
||||
--verbose Turn on logging
|
||||
--very-verbose Turn on debug messages
|
||||
|
||||
Scan options
|
||||
-t, --threads=<int> Number of threads. DEFAULT=1
|
||||
-q, --quality=<flt> Thumbnail quality, on a scale of 1.0 to 31.0, 1.0 being the best. DEFAULT=5
|
||||
--size=<int> Thumbnail size, in pixels. Use negative value to disable. DEFAULT=500
|
||||
--content-size=<int> Number of bytes to be extracted from text documents. Use negative value to disable. DEFAULT=32768
|
||||
--incremental=<str> Reuse an existing index and only scan modified files.
|
||||
-o, --output=<str> Output directory. DEFAULT=index.sist2/
|
||||
--rewrite-url=<str> Serve files from this url instead of from disk.
|
||||
--name=<str> Index display name. DEFAULT: (name of the directory)
|
||||
--depth=<int> Scan up to DEPTH subdirectories deep. Use 0 to only scan files in PATH. DEFAULT: -1
|
||||
--archive=<str> Archive file mode (skip|list|shallow|recurse). skip: Don't parse, list: only get file names as text, shallow: Don't parse archives inside archives. DEFAULT: recurse
|
||||
--ocr=<str> Tesseract language (use tesseract --list-langs to see which are installed on your machine)
|
||||
-e, --exclude=<str> Files that match this regex will not be scanned
|
||||
--fast Only index file names & mime type
|
||||
-t, --threads=<int> Number of threads. DEFAULT=1
|
||||
-q, --quality=<flt> Thumbnail quality, on a scale of 1.0 to 31.0, 1.0 being the best. DEFAULT=5
|
||||
--size=<int> Thumbnail size, in pixels. Use negative value to disable. DEFAULT=500
|
||||
--content-size=<int> Number of bytes to be extracted from text documents. Use negative value to disable. DEFAULT=32768
|
||||
--incremental=<str> Reuse an existing index and only scan modified files.
|
||||
-o, --output=<str> Output directory. DEFAULT=index.sist2/
|
||||
--rewrite-url=<str> Serve files from this url instead of from disk.
|
||||
--name=<str> Index display name. DEFAULT: (name of the directory)
|
||||
--depth=<int> Scan up to DEPTH subdirectories deep. Use 0 to only scan files in PATH. DEFAULT: -1
|
||||
--archive=<str> Archive file mode (skip|list|shallow|recurse). skip: Don't parse, list: only get file names as text, shallow: Don't parse archives inside archives. DEFAULT: recurse
|
||||
--ocr=<str> Tesseract language (use tesseract --list-langs to see which are installed on your machine)
|
||||
-e, --exclude=<str> Files that match this regex will not be scanned
|
||||
--fast Only index file names & mime type
|
||||
--treemap-threshold=<str> Relative size threshold for treemap (see USAGE.md). DEFAULT: 0.0005
|
||||
--mem-buffer=<int> Maximum memory buffer size per thread in MB for files inside archives (see USAGE.md). DEFAULT: 2000
|
||||
--read-subtitles Read subtitles from media files
|
||||
|
||||
Index options
|
||||
--es-url=<str> Elasticsearch url with port. DEFAULT=http://localhost:9200
|
||||
-p, --print Just print JSON documents to stdout.
|
||||
--script-file=<str> Path to user script.
|
||||
--batch-size=<int> Index batch size. DEFAULT: 100
|
||||
-f, --force-reset Reset Elasticsearch mappings and settings. (You must use this option the first time you use the index command)
|
||||
-t, --threads=<int> Number of threads. DEFAULT=1
|
||||
--es-url=<str> Elasticsearch url with port. DEFAULT=http://localhost:9200
|
||||
--es-index=<str> Elasticsearch index name. DEFAULT=sist2
|
||||
-p, --print Just print JSON documents to stdout.
|
||||
--script-file=<str> Path to user script.
|
||||
--mappings-file=<str> Path to Elasticsearch mappings.
|
||||
--settings-file=<str> Path to Elasticsearch settings.
|
||||
--async-script Execute user script asynchronously.
|
||||
--batch-size=<int> Index batch size. DEFAULT: 100
|
||||
-f, --force-reset Reset Elasticsearch mappings and settings. (You must use this option the first time you use the index command)
|
||||
|
||||
Web options
|
||||
--es-url=<str> Elasticsearch url. DEFAULT=http://localhost:9200
|
||||
--bind=<str> Listen on this address. DEFAULT=localhost
|
||||
--port=<str> Listen on this port. DEFAULT=4090
|
||||
--auth=<str> Basic auth in user:password format
|
||||
Made by simon987 <me@simon987.net>. Released under GPL-3.0
|
||||
--es-url=<str> Elasticsearch url. DEFAULT=http://localhost:9200
|
||||
--es-index=<str> Elasticsearch index name. DEFAULT=sist2
|
||||
--bind=<str> Listen on this address. DEFAULT=localhost:4090
|
||||
--auth=<str> Basic auth in user:password format
|
||||
--tag-auth=<str> Basic auth in user:password format for tagging
|
||||
|
||||
Exec-script options
|
||||
--es-url=<str> Elasticsearch url. DEFAULT=http://localhost:9200
|
||||
--es-index=<str> Elasticsearch index name. DEFAULT=sist2
|
||||
--script-file=<str> Path to user script.
|
||||
--async-script Execute user script asynchronously.
|
||||
Made by simon987 <me@simon987.net>. Released under GPL-3.0
|
||||
```
|
||||
|
||||
## Scan
|
||||
@@ -62,7 +80,7 @@ Made by simon987 <me@simon987.net>. Released under GPL-3.0
|
||||
### Scan options
|
||||
|
||||
* `-t, --threads`
|
||||
Number of threads for file parsing. **Do not set a number higher than `$(nproc)`!**.
|
||||
Number of threads for file parsing. **Do not set a number higher than `$(nproc)` or `$(Get-WmiObject Win32_ComputerSystem).NumberOfLogicalProcessors` in Windows!**
|
||||
* `-q, --quality`
|
||||
Thumbnail quality, on a scale of 1.0 to 31.0, 1.0 being the best. *Does not affect PDF thumbnails quality*
|
||||
* `--size`
|
||||
@@ -74,7 +92,7 @@ Made by simon987 <me@simon987.net>. Released under GPL-3.0
|
||||
Specify an existing index. Information about files in this index that were not modified (based on *mtime* attribute)
|
||||
will be copied to the new index and will not be parsed again.
|
||||
* `-o, --output` Output directory.
|
||||
* `--rewrite-url` Set the `rewrite_url` option for the web module (See [rewrite_url](#rewrite_url))
|
||||
* `--rewrite-url` Set the `rewrite_url` option for the web module (See [rewrite_url](#rewrite_url))
|
||||
* `--name` Set the `name` option for the web module
|
||||
* `--depth` Maximum scan dept. Set to 0 only scan files directly in the root directory, set to -1 for infinite depth
|
||||
* `--archive` Archive file mode.
|
||||
@@ -94,6 +112,19 @@ Made by simon987 <me@simon987.net>. Released under GPL-3.0
|
||||
* `-e "(^/usr/)|(^/var/)|(^/media/DRIVE-A/tmp/)|(^/media/DRIVE-B/Trash/)"` Exclude the
|
||||
`/usr`, `/var`, `/media/DRIVE-A/tmp`, `/media/DRIVE-B/Trash` directories
|
||||
* `--fast` Only index file names and mime type
|
||||
* `--treemap-threshold` Directories smaller than (`treemap-threshold` * `<total size of the index>`)
|
||||
will not be considered for the disk utilisation visualization; their size will be added to
|
||||
the parent directory. If the parent directory is still smaller than the threshold, it will also be "merged upwards"
|
||||
and so on.
|
||||
|
||||
In effect, smaller `treemap-threshold` values will yield a more detailed
|
||||
(but also a more cluttered and harder to read) visualization.
|
||||
|
||||
* `--mem-buffer` Maximum memory buffer size in MB (per thread) for files inside archives. Media files
|
||||
larger than this number will be read sequentially and no *seek* operations will be supported.
|
||||
|
||||
To check if a media file can be parsed without *seek*, execute `cat file.mp4 | ffprobe -`
|
||||
* `--read-subtitles` When enabled, will attempt to read the subtitles stream from media files.
|
||||
|
||||
### Scan examples
|
||||
|
||||
@@ -122,7 +153,18 @@ documents.idx/
|
||||
├── _index_139965425223424
|
||||
├── _index_139965433616128
|
||||
├── _index_139965442008832
|
||||
└── thumbs
|
||||
├── _index_139965442008832
|
||||
├── treemap.csv
|
||||
├── agg_mime.csv
|
||||
├── agg_date.csv
|
||||
├── add_size.csv
|
||||
├── thumbs/
|
||||
| ├── data.mdb
|
||||
| └── lock.mdb
|
||||
├── tags/
|
||||
| ├── data.mdb
|
||||
| └── lock.mdb
|
||||
└── meta/
|
||||
├── data.mdb
|
||||
└── lock.mdb
|
||||
```
|
||||
@@ -137,6 +179,8 @@ database containing the thumbnails.
|
||||
The `descriptor.json` file contains general information about the index. The
|
||||
following fields are safe to modify manually: `root`, `name`, [rewrite_url](#rewrite_url) and `timestamp`.
|
||||
|
||||
The `.csv` are pre-computed aggregations necessary for the stats page.
|
||||
|
||||
|
||||
*Advanced usage*
|
||||
|
||||
@@ -147,9 +191,11 @@ by a third party application. The 'external' index must have the following forma
|
||||
my_index/
|
||||
├── descriptor.json
|
||||
├── _index_0
|
||||
└── thumbs
|
||||
├── data.mdb
|
||||
└── lock.mdb
|
||||
└── thumbs/
|
||||
| ├── data.mdb
|
||||
| └── lock.mdb
|
||||
└── meta/
|
||||
└── <empty>
|
||||
```
|
||||
|
||||
*descriptor.json*:
|
||||
@@ -197,9 +243,11 @@ The `_text.*` items will be indexed and searchable as **text** fields (fuzzy sea
|
||||
|
||||
*thumbs/*:
|
||||
|
||||
LMDB key-value store. Keys are **binary** 128-bit UUID4s (`_id` field)
|
||||
LMDB key-value store. Keys are **binary** 16-byte md5 hash* (`_id` field)
|
||||
and values are raw image bytes.
|
||||
|
||||
*\* Hash is calculated from the full path of the file, including the extension, relative to the index root*
|
||||
|
||||
Importing an external `binary` type index is technically possible but
|
||||
it is currently unsupported and has no guaranties of back/forward compatibility.
|
||||
|
||||
@@ -209,17 +257,25 @@ it is currently unsupported and has no guaranties of back/forward compatibility.
|
||||
* `--es-url`
|
||||
Elasticsearch url and port. If you are using docker, make sure that both containers are on the
|
||||
same network.
|
||||
* `--es-index`
|
||||
Elasticsearch index name. DEFAULT=sist2
|
||||
* `-p, --print`
|
||||
Print index in JSON format to stdout.
|
||||
* `--script-file`
|
||||
Path to user script. See [Scripting](scripting/README.md).
|
||||
Path to user script. See [Scripting](scripting.md).
|
||||
* `--mappings-file`
|
||||
Path to custom Elasticsearch mappings. If none is specified, [the bundled mappings](https://github.com/simon987/sist2/tree/master/schema) will be used.
|
||||
* `--settings-file`
|
||||
Path to custom Elasticsearch settings. *(See above)*
|
||||
* `--async-script`
|
||||
Use `wait_for_completion=false` elasticsearch option while executing user script.
|
||||
(See [Elasticsearch documentation](https://www.elastic.co/guide/en/elasticsearch/reference/current/tasks.html))
|
||||
* `--batch-size=<int>`
|
||||
Index batch size. Indexing is generally faster with larger batches, but payloads that
|
||||
are too large will fail and additional overhead for retrying with smaller sizes may slow
|
||||
down the process.
|
||||
* `-f, --force-reset`
|
||||
Reset Elasticsearch mappings and settings.
|
||||
**(You must use this option the first time you use the index command)**.
|
||||
|
||||
### Index examples
|
||||
|
||||
@@ -243,15 +299,18 @@ sist2 index --print ./my_index/ | jq | less
|
||||
|
||||
### Web options
|
||||
* `--es-url=<str>` Elasticsearch url.
|
||||
* `--es-index`
|
||||
Elasticsearch index name. DEFAULT=sist2
|
||||
* `--bind=<str>` Listen on this address.
|
||||
* `--port=<str>` Listen on this port.
|
||||
* `--auth=<str>` Basic auth in user:password format
|
||||
* `--tag-auth=<str>` Basic auth in user:password format. Works the same way as the
|
||||
`--auth` argument, but authentication is only applied the `/tag/` endpoint.
|
||||
|
||||
### Web examples
|
||||
|
||||
**Single index**
|
||||
```bash
|
||||
sist2 web --auth admin:hunter2 --bind 0.0.0.0 --port 8888 my_index
|
||||
sist2 web --auth admin:hunter2 --bind 0.0.0.0:8888 my_index
|
||||
```
|
||||
|
||||
**Multiple indices**
|
||||
@@ -273,3 +332,73 @@ Both the `root` and `rewrite_url` fields are safe to manually modify from the
|
||||
To link to specific indices, you can add a list of comma-separated index name to
|
||||
the URL: `?i=<name>,<name>`. By default, indices with `"(nsfw)"` in their name are
|
||||
not displayed.
|
||||
|
||||
## exec-script
|
||||
|
||||
The `exec-script` command is used to execute a user script for an index that has already been imported to Elasticsearch with the `index` command. Note that the documents will not be reset to their default state before each execution as the `index` command does: if you make undesired changes to the documents by accident, you will need to run `index` again to revert to the original state.
|
||||
|
||||
|
||||
# Tagging
|
||||
|
||||
### Manual tagging
|
||||
|
||||
You can modify tags of individual documents directly from the
|
||||
`web` interface. Note that you can setup authentication for this feature
|
||||
with the `--tag-auth` option (See [web options](#web-options))
|
||||
|
||||

|
||||
|
||||
Tags that are manually added are saved both in the
|
||||
index folder (in `/tags/`) and in Elasticsearch*. When re-`index`ing,
|
||||
they are read from the index and automatically applied.
|
||||
|
||||
You can safely copy the `/tags/` database to another index.
|
||||
|
||||
See [Automatic tagging](#automatic-tagging) for information about tag
|
||||
hierarchies and tag colors.
|
||||
|
||||
\* *It can take a few seconds to take effect in new search queries.*
|
||||
|
||||
|
||||
### Automatic tagging
|
||||
|
||||
See [scripting](scripting.md) documentation.
|
||||
|
||||
# Sidecar files
|
||||
|
||||
When scanning, sist2 will read metadata from `.s2meta` JSON files and overwrite the
|
||||
original document's metadata. Sidecar metadata files will also work inside archives.
|
||||
Sidecar files themselves are not saved in the index.
|
||||
|
||||
This feature is useful to leverage third-party applications such as speech-to-text or
|
||||
OCR to add additional metadata to a file.
|
||||
|
||||
**Example**
|
||||
|
||||
```
|
||||
~/Documents/
|
||||
├── Video.mp4
|
||||
└── Video.mp4.s2meta
|
||||
```
|
||||
|
||||
The sidecar file must have exactly the same file path and the `.s2meta` suffix.
|
||||
|
||||
`Video.mp4.s2meta`:
|
||||
```json
|
||||
{
|
||||
"content": "This sidecar file will overwrite some metadata fields of Video.mp4",
|
||||
"author": "Some author",
|
||||
"duration": 12345,
|
||||
"bitrate": 67890,
|
||||
"some_arbitrary_field": [1,2,3]
|
||||
}
|
||||
```
|
||||
|
||||
```
|
||||
sist2 scan ~/Documents -o ./docs.idx
|
||||
sist2 index ./docs.idx
|
||||
```
|
||||
|
||||
*NOTE*: It is technically possible to overwrite the `tag` value using sidecar files, however,
|
||||
it is not currently possible to restore both manual tags and sidecar tags without user scripts
|
||||
while reindexing.
|
||||
|
||||
BIN
docs/manual_tag.png
Normal file
BIN
docs/manual_tag.png
Normal file
Binary file not shown.
|
After Width: | Height: | Size: 3.9 KiB |
@@ -39,7 +39,7 @@ it adds the `genre.<genre>` tag.
|
||||
ArrayList tags = ctx._source.tag = new ArrayList();
|
||||
|
||||
if (ctx._source?.genre != null) {
|
||||
tags.add("genre." + ctx._source.genre.toLowerCase())
|
||||
tags.add("genre." + ctx._source.genre.toLowerCase());
|
||||
}
|
||||
```
|
||||
|
||||
@@ -67,7 +67,7 @@ ArrayList tags = ctx._source.tag = new ArrayList();
|
||||
|
||||
Matcher m = /[\(\.+](20[0-9]{2})[\)\.+]/.matcher(ctx._source.name);
|
||||
if (m.find()) {
|
||||
tags.add("year." + m.group(1))
|
||||
tags.add("year." + m.group(1));
|
||||
}
|
||||
```
|
||||
|
||||
@@ -111,16 +111,6 @@ if (ctx._source.path != "") {
|
||||
}
|
||||
```
|
||||
|
||||
Set the name of the last folder (`/path/to/<studio>/file.mp4`) to `studio.<studio>` tag
|
||||
```Java
|
||||
ArrayList tags = ctx._source.tag = new ArrayList();
|
||||
|
||||
if (ctx._source.path != "") {
|
||||
String[] names = ctx._source.path.splitOnToken('/');
|
||||
tags.add("studio." + names[names.length-1]);
|
||||
}
|
||||
```
|
||||
|
||||
Parse `EXIF:F Number` tag
|
||||
```Java
|
||||
if (ctx._source?.exif_fnumber != null) {
|
||||
|
||||
BIN
docs/stats.png
Normal file
BIN
docs/stats.png
Normal file
Binary file not shown.
|
After Width: | Height: | Size: 167 KiB |
@@ -10,12 +10,30 @@
|
||||
"path": {
|
||||
"type": "text",
|
||||
"analyzer": "path_analyzer",
|
||||
"copy_to": "suggest-path",
|
||||
"fielddata": true,
|
||||
"index_prefixes": {}
|
||||
"fields": {
|
||||
"nGram": {
|
||||
"type": "text",
|
||||
"analyzer": "my_nGram"
|
||||
},
|
||||
"text": {
|
||||
"type": "text",
|
||||
"analyzer": "content_analyzer"
|
||||
}
|
||||
}
|
||||
},
|
||||
"suggest-path": {
|
||||
"type": "completion",
|
||||
"analyzer": "case_insensitive_kw_analyzer"
|
||||
},
|
||||
"mime": {
|
||||
"type": "keyword"
|
||||
},
|
||||
"parent": {
|
||||
"type": "keyword",
|
||||
"index": false
|
||||
},
|
||||
"thumbnail": {
|
||||
"type": "keyword",
|
||||
"index": false
|
||||
@@ -40,6 +58,10 @@
|
||||
"type": "integer",
|
||||
"index": false
|
||||
},
|
||||
"pages": {
|
||||
"type": "integer",
|
||||
"index": false
|
||||
},
|
||||
"mtime": {
|
||||
"type": "integer"
|
||||
},
|
||||
@@ -83,10 +105,10 @@
|
||||
"analyzer": "my_nGram",
|
||||
"type": "text"
|
||||
},
|
||||
"_keyword.*": {
|
||||
"_keyword.*": {
|
||||
"type": "keyword"
|
||||
},
|
||||
"_text.*": {
|
||||
"_text.*": {
|
||||
"analyzer": "content_analyzer",
|
||||
"type": "text",
|
||||
"fields": {
|
||||
@@ -112,7 +134,12 @@
|
||||
}
|
||||
},
|
||||
"tag": {
|
||||
"type": "keyword"
|
||||
"type": "keyword",
|
||||
"copy_to": "suggest-tag"
|
||||
},
|
||||
"suggest-tag": {
|
||||
"type": "completion",
|
||||
"analyzer": "case_insensitive_kw_analyzer"
|
||||
},
|
||||
"exif_make": {
|
||||
"type": "text"
|
||||
@@ -138,6 +165,30 @@
|
||||
"exif_user_comment": {
|
||||
"type": "text"
|
||||
},
|
||||
"exif_gps_longitude_ref": {
|
||||
"type": "keyword",
|
||||
"index": false
|
||||
},
|
||||
"exif_gps_longitude_dms": {
|
||||
"type": "keyword",
|
||||
"index": false
|
||||
},
|
||||
"exif_gps_longitude_dec": {
|
||||
"type": "keyword",
|
||||
"index": false
|
||||
},
|
||||
"exif_gps_latitude_ref": {
|
||||
"type": "keyword",
|
||||
"index": false
|
||||
},
|
||||
"exif_gps_latitude_dms": {
|
||||
"type": "keyword",
|
||||
"index": false
|
||||
},
|
||||
"exif_gps_latitude_dec": {
|
||||
"type": "keyword",
|
||||
"index": false
|
||||
},
|
||||
"author": {
|
||||
"type": "text"
|
||||
},
|
||||
|
||||
@@ -1,7 +1,8 @@
|
||||
{
|
||||
"index": {
|
||||
"refresh_interval": "30s",
|
||||
"codec": "best_compression"
|
||||
"codec": "best_compression",
|
||||
"number_of_replicas": 0
|
||||
},
|
||||
"analysis": {
|
||||
"tokenizer": {
|
||||
|
||||
@@ -4,7 +4,7 @@ rm -rf index.sist2/
|
||||
|
||||
rm src/static/js/bundle.js 2> /dev/null
|
||||
cat `ls src/static/js/*.min.js` > src/static/js/bundle.js
|
||||
cat src/static/js/{util,dom,search}.js >> src/static/js/bundle.js
|
||||
cat src/static/js/{util,dom}.js >> src/static/js/bundle.js
|
||||
|
||||
rm src/static/css/bundle*.css 2> /dev/null
|
||||
cat src/static/css/*.min.css > src/static/css/bundle.css
|
||||
|
||||
@@ -13,7 +13,7 @@ application/epub+zip, epub
|
||||
application/freeloader, frl
|
||||
application/futuresplash, spl
|
||||
application/groupwise, vew
|
||||
application/gzip, gz
|
||||
application/gzip, gz|tgz
|
||||
application/hta, hta
|
||||
application/i-deas, unv
|
||||
application/iges, iges|igs
|
||||
@@ -111,7 +111,7 @@ application/x-dbf, dbf
|
||||
application/x-dbt,
|
||||
application/x-debian-package, deb
|
||||
application/x-deepv, deepv
|
||||
application/x-director, dcr|dir|dxr
|
||||
application/x-director, dir|dxr
|
||||
application/x-dmp, dmp
|
||||
application/x-dosdriver,
|
||||
application/x-dosexec, dll
|
||||
@@ -347,7 +347,8 @@ text/javascript, js
|
||||
text/mcf, mcf
|
||||
text/pascal, pas
|
||||
text/PGP,
|
||||
text/plain, com|cmd|conf|def|g|idc|list|lst|mar|sdml|text|txt|md|groovy|license|properties|desktop|ini|rst|cmake|ipynb|readme|less|lo|go|yml|d|cs|hpp|srt|nfo|sfv|m3u|csv|eml
|
||||
text/plain, com|cmd|conf|def|g|idc|list|lst|mar|sdml|text|txt|md|groovy|license|properties|desktop|ini|rst|cmake|ipynb|readme|less|lo|go|yml|d|cs|hpp|srt|nfo|sfv|m3u|csv|eml|make|log|markdown|yaml
|
||||
application/vnd.coffeescript, coffee
|
||||
text/richtext, rt|rtf|rtx
|
||||
text/rtf,
|
||||
text/scriplet, wsc
|
||||
@@ -429,3 +430,23 @@ video/x-qtc, qtc
|
||||
video/x-sgi-movie, movie|mv
|
||||
x-epoc/x-sisx-app,
|
||||
application/x-zstd-dictionary,
|
||||
application/vnd.ms-outlook, msg
|
||||
image/x-olympus-orf, orf
|
||||
image/x-nikon-nef, nef
|
||||
image/x-fuji-raf, raf
|
||||
image/x-panasonic-raw, rw2|raw
|
||||
image/x-adobe-dng, dng
|
||||
image/x-canon-cr2, cr2
|
||||
image/x-canon-crw, crw
|
||||
image/x-dcraw,
|
||||
image/x-kodak-dcr, dcr
|
||||
image/x-kodak-k25, k25
|
||||
image/x-kodak-kdc, kdc
|
||||
image/x-minolta-mrw, mrw
|
||||
image/x-pentax-pef, pef
|
||||
image/x-sigma-x3f, xf3
|
||||
image/x-sony-arw, arw
|
||||
image/x-sony-sr2, sr2
|
||||
image/x-sony-srf, srf
|
||||
image/x-epson-erf, erf
|
||||
sist2/sidecar, s2meta
|
||||
|
@@ -3,6 +3,7 @@ noparse = set()
|
||||
ext_in_hash = set()
|
||||
|
||||
major_mime = {
|
||||
"sist2": 0,
|
||||
"model": 1,
|
||||
"example": 2,
|
||||
"message": 3,
|
||||
@@ -18,7 +19,6 @@ major_mime = {
|
||||
|
||||
pdf = (
|
||||
"application/pdf",
|
||||
"application/x-cbz",
|
||||
"application/epub+zip",
|
||||
"application/vnd.ms-xpsdocument",
|
||||
)
|
||||
@@ -73,6 +73,29 @@ markup = (
|
||||
"text/x-sgml"
|
||||
)
|
||||
|
||||
raw = (
|
||||
"image/x-olympus-orf",
|
||||
"image/x-nikon-nef",
|
||||
"image/x-fuji-raf",
|
||||
"image/x-panasonic-raw",
|
||||
"image/x-adobe-dng",
|
||||
"image/x-canon-cr2",
|
||||
"image/x-canon-crw",
|
||||
"image/x-dcraw",
|
||||
"image/x-kodak-dcr",
|
||||
"image/x-kodak-k25",
|
||||
"image/x-kodak-kdc",
|
||||
"image/x-minolta-mrw",
|
||||
"image/x-pentax-pef",
|
||||
"image/x-sigma-x3f",
|
||||
"image/x-sony-arw",
|
||||
"image/x-sony-sr2",
|
||||
"image/x-sony-srf",
|
||||
"image/x-minolta-mrw",
|
||||
"image/x-pentax-pef",
|
||||
"image/x-epson-erf",
|
||||
)
|
||||
|
||||
cnt = 1
|
||||
|
||||
|
||||
@@ -97,8 +120,14 @@ def mime_id(mime):
|
||||
mime_id += " | 0x02000000"
|
||||
elif mime in markup:
|
||||
mime_id += " | 0x01000000"
|
||||
elif mime in raw:
|
||||
mime_id += " | 0x00800000"
|
||||
elif mime == "application/x-empty":
|
||||
cnt -= 1
|
||||
return "1"
|
||||
elif mime == "sist2/sidecar":
|
||||
cnt -= 1
|
||||
return "2"
|
||||
return mime_id
|
||||
|
||||
|
||||
|
||||
6
scripts/reset.sh
Executable file
6
scripts/reset.sh
Executable file
@@ -0,0 +1,6 @@
|
||||
#!/usr/bin/env bash
|
||||
|
||||
make clean
|
||||
rm -rf CMakeFiles/ CMakeCache.txt Makefile \
|
||||
third-party/libscan/CMakeFiles third-party/libscan/CMakeCache.txt third-party/libscan/third-party/ext_ffmpeg \
|
||||
third-party/libscan/third-party/ext_libmobi third-party/libscan/Makefile
|
||||
@@ -2,9 +2,11 @@ files = [
|
||||
"src/static/css/bundle.css",
|
||||
"src/static/css/bundle_dark.css",
|
||||
"src/static/js/bundle.js",
|
||||
"src/static/js/search.js",
|
||||
"src/static/img/sprite-skin-flat.png",
|
||||
"src/static/img/sprite-skin-flat-dark.png",
|
||||
"src/static/search.html",
|
||||
"src/static/stats.html",
|
||||
]
|
||||
|
||||
|
||||
|
||||
185
src/cli.c
185
src/cli.c
@@ -9,11 +9,15 @@
|
||||
#define DEFAULT_REWRITE_URL ""
|
||||
|
||||
#define DEFAULT_ES_URL "http://localhost:9200"
|
||||
#define DEFAULT_ES_INDEX "sist2"
|
||||
#define DEFAULT_BATCH_SIZE 100
|
||||
|
||||
#define DEFAULT_LISTEN_ADDRESS "localhost:4090"
|
||||
#define DEFAULT_TREEMAP_THRESHOLD 0.0005
|
||||
|
||||
const char* TESS_DATAPATHS[] = {
|
||||
#define DEFAULT_MAX_MEM_BUFFER 2000
|
||||
|
||||
const char *TESS_DATAPATHS[] = {
|
||||
"/usr/share/tessdata/",
|
||||
"/usr/share/tesseract-ocr/tessdata/",
|
||||
"./",
|
||||
@@ -29,10 +33,18 @@ scan_args_t *scan_args_create() {
|
||||
return args;
|
||||
}
|
||||
|
||||
exec_args_t *exec_args_create() {
|
||||
exec_args_t *args = calloc(sizeof(exec_args_t), 1);
|
||||
return args;
|
||||
}
|
||||
|
||||
void scan_args_destroy(scan_args_t *args) {
|
||||
if (args->name != NULL) {
|
||||
free(args->name);
|
||||
}
|
||||
if (args->incremental != NULL) {
|
||||
free(args->incremental);
|
||||
}
|
||||
if (args->path != NULL) {
|
||||
free(args->path);
|
||||
}
|
||||
@@ -44,6 +56,12 @@ void scan_args_destroy(scan_args_t *args) {
|
||||
|
||||
void index_args_destroy(index_args_t *args) {
|
||||
//todo
|
||||
if (args->es_mappings_path) {
|
||||
free(args->es_mappings);
|
||||
}
|
||||
if (args->es_settings_path) {
|
||||
free(args->es_settings);
|
||||
}
|
||||
free(args);
|
||||
}
|
||||
|
||||
@@ -52,6 +70,10 @@ void web_args_destroy(web_args_t *args) {
|
||||
free(args);
|
||||
}
|
||||
|
||||
void exec_args_destroy(exec_args_t *args) {
|
||||
free(args);
|
||||
}
|
||||
|
||||
int scan_args_validate(scan_args_t *args, int argc, const char **argv) {
|
||||
if (argc < 2) {
|
||||
fprintf(stderr, "Required positional argument: PATH.\n");
|
||||
@@ -67,7 +89,7 @@ int scan_args_validate(scan_args_t *args, int argc, const char **argv) {
|
||||
}
|
||||
|
||||
if (args->incremental != NULL) {
|
||||
abs_path = abspath(args->incremental);
|
||||
args->incremental = abspath(args->incremental);
|
||||
if (abs_path == NULL) {
|
||||
sist_log("main.c", SIST_WARNING, "Could not open original index! Disabled incremental scan feature.");
|
||||
args->incremental = NULL;
|
||||
@@ -112,7 +134,7 @@ int scan_args_validate(scan_args_t *args, int argc, const char **argv) {
|
||||
return 1;
|
||||
}
|
||||
|
||||
if (args->depth < 0) {
|
||||
if (args->depth <= 0) {
|
||||
args->depth = G_MAXINT32;
|
||||
} else {
|
||||
args->depth += 1;
|
||||
@@ -120,6 +142,10 @@ int scan_args_validate(scan_args_t *args, int argc, const char **argv) {
|
||||
|
||||
if (args->name == NULL) {
|
||||
args->name = g_path_get_basename(args->output);
|
||||
} else {
|
||||
char* tmp = malloc(strlen(args->name) + 1);
|
||||
strcpy(tmp, args->name);
|
||||
args->name = tmp;
|
||||
}
|
||||
|
||||
if (args->rewrite_url == NULL) {
|
||||
@@ -144,7 +170,7 @@ int scan_args_validate(scan_args_t *args, int argc, const char **argv) {
|
||||
|
||||
char filename[128];
|
||||
sprintf(filename, "%s.traineddata", args->tesseract_lang);
|
||||
const char * path = find_file_in_paths(TESS_DATAPATHS, filename);
|
||||
const char *path = find_file_in_paths(TESS_DATAPATHS, filename);
|
||||
if (path == NULL) {
|
||||
LOG_FATAL("cli.c", "Could not find tesseract language file!");
|
||||
}
|
||||
@@ -180,6 +206,16 @@ int scan_args_validate(scan_args_t *args, int argc, const char **argv) {
|
||||
ScanCtx.exclude = NULL;
|
||||
}
|
||||
|
||||
if (args->treemap_threshold_str == 0) {
|
||||
args->treemap_threshold = DEFAULT_TREEMAP_THRESHOLD;
|
||||
} else {
|
||||
args->treemap_threshold = atof(args->treemap_threshold_str);
|
||||
}
|
||||
|
||||
if (args->max_memory_buffer == 0) {
|
||||
args->max_memory_buffer = DEFAULT_MAX_MEM_BUFFER;
|
||||
}
|
||||
|
||||
LOG_DEBUGF("cli.c", "arg quality=%f", args->quality)
|
||||
LOG_DEBUGF("cli.c", "arg size=%d", args->size)
|
||||
LOG_DEBUGF("cli.c", "arg content_size=%d", args->content_size)
|
||||
@@ -191,10 +227,41 @@ int scan_args_validate(scan_args_t *args, int argc, const char **argv) {
|
||||
LOG_DEBUGF("cli.c", "arg depth=%d", args->depth)
|
||||
LOG_DEBUGF("cli.c", "arg path=%s", args->path)
|
||||
LOG_DEBUGF("cli.c", "arg archive=%s", args->archive)
|
||||
LOG_DEBUGF("cli.c", "arg archive_passphrase=%s", args->archive_passphrase)
|
||||
LOG_DEBUGF("cli.c", "arg tesseract_lang=%s", args->tesseract_lang)
|
||||
LOG_DEBUGF("cli.c", "arg tesseract_path=%s", args->tesseract_path)
|
||||
LOG_DEBUGF("cli.c", "arg exclude=%s", args->exclude_regex)
|
||||
LOG_DEBUGF("cli.c", "arg fast=%d", args->fast)
|
||||
LOG_DEBUGF("cli.c", "arg treemap_threshold=%f", args->treemap_threshold)
|
||||
LOG_DEBUGF("cli.c", "arg max_memory_buffer=%d", args->max_memory_buffer)
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
int load_external_file(const char *file_path, char **dst) {
|
||||
struct stat info;
|
||||
int res = stat(file_path, &info);
|
||||
|
||||
if (res == -1) {
|
||||
LOG_ERRORF("cli.c", "Error opening file '%s': %s\n", file_path, strerror(errno))
|
||||
return 1;
|
||||
}
|
||||
|
||||
int fd = open(file_path, O_RDONLY);
|
||||
if (fd == -1) {
|
||||
LOG_ERRORF("cli.c", "Error opening file '%s': %s\n", file_path, strerror(errno))
|
||||
return 1;
|
||||
}
|
||||
|
||||
*dst = malloc(info.st_size + 1);
|
||||
res = read(fd, *dst, info.st_size);
|
||||
if (res < 0) {
|
||||
LOG_ERRORF("cli.c", "Error reading file '%s': %s\n", file_path, strerror(errno))
|
||||
return 1;
|
||||
}
|
||||
|
||||
*(*dst + info.st_size) = '\0';
|
||||
close(fd);
|
||||
|
||||
return 0;
|
||||
}
|
||||
@@ -208,6 +275,13 @@ int index_args_validate(index_args_t *args, int argc, const char **argv) {
|
||||
return 1;
|
||||
}
|
||||
|
||||
if (args->threads == 0) {
|
||||
args->threads = 1;
|
||||
} else if (args->threads < 0) {
|
||||
fprintf(stderr, "Invalid threads: %d\n", args->threads);
|
||||
return 1;
|
||||
}
|
||||
|
||||
char *index_path = abspath(argv[1]);
|
||||
if (index_path == NULL) {
|
||||
fprintf(stderr, "File not found: %s\n", argv[1]);
|
||||
@@ -221,30 +295,26 @@ int index_args_validate(index_args_t *args, int argc, const char **argv) {
|
||||
args->es_url = DEFAULT_ES_URL;
|
||||
}
|
||||
|
||||
if (args->es_index == NULL) {
|
||||
args->es_index = DEFAULT_ES_INDEX;
|
||||
}
|
||||
|
||||
if (args->script_path != NULL) {
|
||||
struct stat info;
|
||||
int res = stat(args->script_path, &info);
|
||||
|
||||
if (res == -1) {
|
||||
fprintf(stderr, "Error opening script file '%s': %s\n", args->script_path, strerror(errno));
|
||||
if (load_external_file(args->script_path, &args->script) != 0) {
|
||||
return 1;
|
||||
}
|
||||
}
|
||||
|
||||
int fd = open(args->script_path, O_RDONLY);
|
||||
if (fd == -1) {
|
||||
fprintf(stderr, "Error opening script file '%s': %s\n", args->script_path, strerror(errno));
|
||||
if (args->es_settings_path != NULL) {
|
||||
if (load_external_file(args->es_settings_path, &args->es_settings) != 0) {
|
||||
return 1;
|
||||
}
|
||||
}
|
||||
|
||||
args->script = malloc(info.st_size + 1);
|
||||
res = read(fd, args->script, info.st_size);
|
||||
if (res < 0) {
|
||||
fprintf(stderr, "Error reading script file '%s': %s\n", args->script_path, strerror(errno));
|
||||
if (args->es_mappings_path != NULL) {
|
||||
if (load_external_file(args->es_mappings_path, &args->es_mappings) != 0) {
|
||||
return 1;
|
||||
}
|
||||
|
||||
*(args->script + info.st_size) = '\0';
|
||||
close(fd);
|
||||
}
|
||||
|
||||
if (args->batch_size == 0) {
|
||||
@@ -252,10 +322,16 @@ int index_args_validate(index_args_t *args, int argc, const char **argv) {
|
||||
}
|
||||
|
||||
LOG_DEBUGF("cli.c", "arg es_url=%s", args->es_url)
|
||||
LOG_DEBUGF("cli.c", "arg es_index=%s", args->es_index)
|
||||
LOG_DEBUGF("cli.c", "arg index_path=%s", args->index_path)
|
||||
LOG_DEBUGF("cli.c", "arg script_path=%s", args->script_path)
|
||||
LOG_DEBUGF("cli.c", "arg async_script=%s", args->async_script)
|
||||
LOG_DEBUGF("cli.c", "arg script=%s", args->script)
|
||||
LOG_DEBUGF("cli.c", "arg print=%d", args->print)
|
||||
LOG_DEBUGF("cli.c", "arg es_mappings_path=%s", args->es_mappings_path)
|
||||
LOG_DEBUGF("cli.c", "arg es_mappings=%s", args->es_mappings)
|
||||
LOG_DEBUGF("cli.c", "arg es_settings_path=%s", args->es_settings_path)
|
||||
LOG_DEBUGF("cli.c", "arg es_settings=%s", args->es_settings)
|
||||
LOG_DEBUGF("cli.c", "arg batch_size=%d", args->batch_size)
|
||||
LOG_DEBUGF("cli.c", "arg force_reset=%d", args->force_reset)
|
||||
|
||||
@@ -279,15 +355,19 @@ int web_args_validate(web_args_t *args, int argc, const char **argv) {
|
||||
args->listen_address = DEFAULT_LISTEN_ADDRESS;
|
||||
}
|
||||
|
||||
if (args->es_index == NULL) {
|
||||
args->es_index = DEFAULT_ES_INDEX;
|
||||
}
|
||||
|
||||
if (args->credentials != NULL) {
|
||||
char * ptr = strstr(args->credentials, ":");
|
||||
char *ptr = strstr(args->credentials, ":");
|
||||
if (ptr == NULL) {
|
||||
fprintf(stderr, "Invalid --auth format, see usage\n");
|
||||
return 1;
|
||||
}
|
||||
|
||||
strncpy(args->auth_user, args->credentials, (ptr - args->credentials));
|
||||
strncpy(args->auth_pass, ptr + 1, strlen(ptr + 1));
|
||||
strcpy(args->auth_pass, ptr + 1);
|
||||
|
||||
if (strlen(args->auth_user) == 0) {
|
||||
fprintf(stderr, "--auth username must be at least one character long");
|
||||
@@ -299,6 +379,31 @@ int web_args_validate(web_args_t *args, int argc, const char **argv) {
|
||||
args->auth_enabled = FALSE;
|
||||
}
|
||||
|
||||
if (args->tag_credentials != NULL && args->credentials != NULL) {
|
||||
fprintf(stderr, "--auth and --tag-auth are mutually exclusive");
|
||||
return 1;
|
||||
}
|
||||
|
||||
if (args->tag_credentials != NULL) {
|
||||
char *ptr = strstr(args->tag_credentials, ":");
|
||||
if (ptr == NULL) {
|
||||
fprintf(stderr, "Invalid --tag-auth format, see usage\n");
|
||||
return 1;
|
||||
}
|
||||
|
||||
strncpy(args->auth_user, args->tag_credentials, (ptr - args->tag_credentials));
|
||||
strcpy(args->auth_pass, ptr + 1);
|
||||
|
||||
if (strlen(args->auth_user) == 0) {
|
||||
fprintf(stderr, "--tag-auth username must be at least one character long");
|
||||
return 1;
|
||||
}
|
||||
|
||||
args->tag_auth_enabled = TRUE;
|
||||
} else {
|
||||
args->tag_auth_enabled = FALSE;
|
||||
}
|
||||
|
||||
args->index_count = argc - 1;
|
||||
args->indices = argv + 1;
|
||||
|
||||
@@ -311,8 +416,10 @@ int web_args_validate(web_args_t *args, int argc, const char **argv) {
|
||||
}
|
||||
|
||||
LOG_DEBUGF("cli.c", "arg es_url=%s", args->es_url)
|
||||
LOG_DEBUGF("cli.c", "arg es_index=%s", args->es_index)
|
||||
LOG_DEBUGF("cli.c", "arg listen=%s", args->listen_address)
|
||||
LOG_DEBUGF("cli.c", "arg credentials=%s", args->credentials)
|
||||
LOG_DEBUGF("cli.c", "arg tag_credentials=%s", args->tag_credentials)
|
||||
LOG_DEBUGF("cli.c", "arg auth_user=%s", args->auth_user)
|
||||
LOG_DEBUGF("cli.c", "arg auth_pass=%s", args->auth_pass)
|
||||
LOG_DEBUGF("cli.c", "arg index_count=%d", args->index_count)
|
||||
@@ -333,3 +440,39 @@ web_args_t *web_args_create() {
|
||||
return args;
|
||||
}
|
||||
|
||||
int exec_args_validate(exec_args_t *args, int argc, const char **argv) {
|
||||
|
||||
if (argc < 2) {
|
||||
fprintf(stderr, "Required positional argument: PATH.\n");
|
||||
return 1;
|
||||
}
|
||||
|
||||
char *index_path = abspath(argv[1]);
|
||||
if (index_path == NULL) {
|
||||
fprintf(stderr, "File not found: %s\n", argv[1]);
|
||||
return 1;
|
||||
} else {
|
||||
args->index_path = argv[1];
|
||||
free(index_path);
|
||||
}
|
||||
|
||||
if (args->es_url == NULL) {
|
||||
args->es_url = DEFAULT_ES_URL;
|
||||
}
|
||||
|
||||
if (args->es_index == NULL) {
|
||||
args->es_index = DEFAULT_ES_INDEX;
|
||||
}
|
||||
|
||||
if (args->script_path == NULL) {
|
||||
LOG_FATAL("cli.c", "--script-file argument is required");
|
||||
}
|
||||
|
||||
if (load_external_file(args->script_path, &args->script) != 0) {
|
||||
return 1;
|
||||
}
|
||||
|
||||
LOG_DEBUGF("cli.c", "arg script_path=%s", args->script_path)
|
||||
LOG_DEBUGF("cli.c", "arg script=%s", args->script)
|
||||
return 0;
|
||||
}
|
||||
|
||||
30
src/cli.h
30
src/cli.h
@@ -18,10 +18,15 @@ typedef struct scan_args {
|
||||
char *path;
|
||||
char *archive;
|
||||
archive_mode_t archive_mode;
|
||||
char *archive_passphrase;
|
||||
char *tesseract_lang;
|
||||
const char *tesseract_path;
|
||||
char *exclude_regex;
|
||||
int fast;
|
||||
const char* treemap_threshold_str;
|
||||
double treemap_threshold;
|
||||
int max_memory_buffer;
|
||||
int read_subtitles;
|
||||
} scan_args_t;
|
||||
|
||||
scan_args_t *scan_args_create();
|
||||
@@ -32,25 +37,44 @@ int scan_args_validate(scan_args_t *args, int argc, const char **argv);
|
||||
|
||||
typedef struct index_args {
|
||||
char *es_url;
|
||||
char *es_index;
|
||||
const char *index_path;
|
||||
const char *script_path;
|
||||
char *script;
|
||||
const char *es_settings_path;
|
||||
char *es_settings;
|
||||
const char *es_mappings_path;
|
||||
char *es_mappings;
|
||||
int print;
|
||||
int batch_size;
|
||||
int async_script;
|
||||
int force_reset;
|
||||
int threads;
|
||||
} index_args_t;
|
||||
|
||||
typedef struct web_args {
|
||||
char *es_url;
|
||||
char *es_index;
|
||||
char *listen_address;
|
||||
char *credentials;
|
||||
char *tag_credentials;
|
||||
char auth_user[256];
|
||||
char auth_pass[256];
|
||||
int auth_enabled;
|
||||
int tag_auth_enabled;
|
||||
int index_count;
|
||||
const char **indices;
|
||||
} web_args_t;
|
||||
|
||||
typedef struct exec_args {
|
||||
char *es_url;
|
||||
char *es_index;
|
||||
const char *index_path;
|
||||
const char *script_path;
|
||||
int async_script;
|
||||
char *script;
|
||||
} exec_args_t;
|
||||
|
||||
index_args_t *index_args_create();
|
||||
|
||||
void index_args_destroy(index_args_t *args);
|
||||
@@ -63,4 +87,10 @@ int index_args_validate(index_args_t *args, int argc, const char **argv);
|
||||
|
||||
int web_args_validate(web_args_t *args, int argc, const char **argv);
|
||||
|
||||
exec_args_t *exec_args_create();
|
||||
|
||||
void exec_args_destroy(exec_args_t *args);
|
||||
|
||||
int exec_args_validate(exec_args_t *args, int argc, const char **argv);
|
||||
|
||||
#endif
|
||||
|
||||
6
src/ctx.c
Normal file
6
src/ctx.c
Normal file
@@ -0,0 +1,6 @@
|
||||
#include "ctx.h"
|
||||
|
||||
ScanCtx_t ScanCtx;
|
||||
WebCtx_t WebCtx;
|
||||
IndexCtx_t IndexCtx;
|
||||
LogCtx_t LogCtx;
|
||||
45
src/ctx.h
45
src/ctx.h
@@ -5,19 +5,21 @@
|
||||
#include "tpool.h"
|
||||
#include "libscan/scan.h"
|
||||
#include "libscan/arc/arc.h"
|
||||
#include "libscan/cbr/cbr.h"
|
||||
#include "libscan/comic/comic.h"
|
||||
#include "libscan/ebook/ebook.h"
|
||||
#include "libscan/font/font.h"
|
||||
#include "libscan/media/media.h"
|
||||
#include "libscan/ooxml/ooxml.h"
|
||||
#include "libscan/text/text.h"
|
||||
#include "libscan/mobi/scan_mobi.h"
|
||||
#include "libscan/raw/raw.h"
|
||||
#include "libscan/msdoc/msdoc.h"
|
||||
#include "src/io/store.h"
|
||||
|
||||
#include <glib.h>
|
||||
#include <pcre.h>
|
||||
|
||||
//TODO Move to individual scan ctx
|
||||
struct {
|
||||
typedef struct {
|
||||
struct index_t index;
|
||||
|
||||
GHashTable *mime_table;
|
||||
@@ -27,8 +29,6 @@ struct {
|
||||
|
||||
int threads;
|
||||
int depth;
|
||||
int verbose;
|
||||
int very_verbose;
|
||||
|
||||
size_t stat_tn_size;
|
||||
size_t stat_index_size;
|
||||
@@ -40,35 +40,52 @@ struct {
|
||||
pcre_extra *exclude_extra;
|
||||
int fast;
|
||||
|
||||
GHashTable *dbg_current_files;
|
||||
|
||||
scan_arc_ctx_t arc_ctx;
|
||||
scan_cbr_ctx_t cbr_ctx;
|
||||
scan_comic_ctx_t comic_ctx;
|
||||
scan_ebook_ctx_t ebook_ctx;
|
||||
scan_font_ctx_t font_ctx;
|
||||
scan_media_ctx_t media_ctx;
|
||||
scan_ooxml_ctx_t ooxml_ctx;
|
||||
scan_text_ctx_t text_ctx;
|
||||
scan_mobi_ctx_t mobi_ctx;
|
||||
} ScanCtx;
|
||||
scan_raw_ctx_t raw_ctx;
|
||||
scan_msdoc_ctx_t msdoc_ctx;
|
||||
} ScanCtx_t;
|
||||
|
||||
struct {
|
||||
typedef struct {
|
||||
int verbose;
|
||||
int very_verbose;
|
||||
int no_color;
|
||||
} LogCtx;
|
||||
} LogCtx_t;
|
||||
|
||||
struct {
|
||||
typedef struct {
|
||||
char *es_url;
|
||||
char *es_index;
|
||||
int batch_size;
|
||||
} IndexCtx;
|
||||
tpool_t *pool;
|
||||
store_t *tag_store;
|
||||
GHashTable *tags;
|
||||
store_t *meta_store;
|
||||
GHashTable *meta;
|
||||
} IndexCtx_t;
|
||||
|
||||
struct {
|
||||
typedef struct {
|
||||
char *es_url;
|
||||
char *es_index;
|
||||
int index_count;
|
||||
char *auth_user;
|
||||
char *auth_pass;
|
||||
int auth_enabled;
|
||||
struct index_t indices[16];
|
||||
} WebCtx;
|
||||
int tag_auth_enabled;
|
||||
struct index_t indices[64];
|
||||
} WebCtx_t;
|
||||
|
||||
extern ScanCtx_t ScanCtx;
|
||||
extern WebCtx_t WebCtx;
|
||||
extern IndexCtx_t IndexCtx;
|
||||
extern LogCtx_t LogCtx;
|
||||
|
||||
|
||||
#endif
|
||||
|
||||
@@ -9,21 +9,33 @@
|
||||
typedef struct es_indexer {
|
||||
int queued;
|
||||
char *es_url;
|
||||
char *es_index;
|
||||
es_bulk_line_t *line_head;
|
||||
es_bulk_line_t *line_tail;
|
||||
} es_indexer_t;
|
||||
|
||||
|
||||
static es_indexer_t *Indexer;
|
||||
static __thread es_indexer_t *Indexer;
|
||||
|
||||
void delete_queue(int max);
|
||||
|
||||
void print_json(cJSON *document, const char uuid_str[UUID_STR_LEN]) {
|
||||
void elastic_flush();
|
||||
|
||||
void elastic_cleanup() {
|
||||
elastic_flush();
|
||||
if (Indexer != NULL) {
|
||||
free(Indexer->es_index);
|
||||
free(Indexer->es_url);
|
||||
free(Indexer);
|
||||
}
|
||||
}
|
||||
|
||||
void print_json(cJSON *document, const char id_str[MD5_STR_LENGTH]) {
|
||||
|
||||
cJSON *line = cJSON_CreateObject();
|
||||
|
||||
cJSON_AddStringToObject(line, "_id", uuid_str);
|
||||
cJSON_AddStringToObject(line, "_index", "sist2");
|
||||
cJSON_AddStringToObject(line, "_id", id_str);
|
||||
cJSON_AddStringToObject(line, "_index", IndexCtx.es_index);
|
||||
cJSON_AddStringToObject(line, "_type", "_doc");
|
||||
cJSON_AddItemReferenceToObject(line, "_source", document);
|
||||
|
||||
@@ -35,23 +47,31 @@ void print_json(cJSON *document, const char uuid_str[UUID_STR_LEN]) {
|
||||
cJSON_Delete(line);
|
||||
}
|
||||
|
||||
void index_json(cJSON *document, const char uuid_str[UUID_STR_LEN]) {
|
||||
void index_json_func(void *arg) {
|
||||
es_bulk_line_t *line = arg;
|
||||
elastic_index_line(line);
|
||||
}
|
||||
|
||||
void index_json(cJSON *document, const char index_id_str[MD5_STR_LENGTH]) {
|
||||
char *json = cJSON_PrintUnformatted(document);
|
||||
|
||||
size_t json_len = strlen(json);
|
||||
es_bulk_line_t *bulk_line = malloc(sizeof(es_bulk_line_t) + json_len + 2);
|
||||
memcpy(bulk_line->line, json, json_len);
|
||||
memcpy(bulk_line->uuid_str, uuid_str, UUID_STR_LEN);
|
||||
memcpy(bulk_line->path_md5_str, index_id_str, MD5_STR_LENGTH);
|
||||
*(bulk_line->line + json_len) = '\n';
|
||||
*(bulk_line->line + json_len + 1) = '\0';
|
||||
bulk_line->next = NULL;
|
||||
|
||||
cJSON_free(json);
|
||||
elastic_index_line(bulk_line);
|
||||
tpool_add_work(IndexCtx.pool, index_json_func, bulk_line);
|
||||
}
|
||||
|
||||
void execute_update_script(const char *script, const char index_id[UUID_STR_LEN]) {
|
||||
void execute_update_script(const char *script, int async, const char index_id[MD5_STR_LENGTH]) {
|
||||
|
||||
if (Indexer == NULL) {
|
||||
Indexer = create_indexer(IndexCtx.es_url, IndexCtx.es_index);
|
||||
}
|
||||
|
||||
cJSON *body = cJSON_CreateObject();
|
||||
cJSON *script_obj = cJSON_AddObjectToObject(body, "script");
|
||||
@@ -65,9 +85,16 @@ void execute_update_script(const char *script, const char index_id[UUID_STR_LEN]
|
||||
char *str = cJSON_Print(body);
|
||||
|
||||
char bulk_url[4096];
|
||||
snprintf(bulk_url, 4096, "%s/sist2/_update_by_query?pretty", Indexer->es_url);
|
||||
if (async) {
|
||||
snprintf(bulk_url, sizeof(bulk_url), "%s/%s/_update_by_query?wait_for_completion=false", Indexer->es_url,
|
||||
Indexer->es_index);
|
||||
} else {
|
||||
snprintf(bulk_url, sizeof(bulk_url), "%s/%s/_update_by_query", Indexer->es_url, Indexer->es_index);
|
||||
}
|
||||
response_t *r = web_post(bulk_url, str);
|
||||
LOG_INFOF("elastic.c", "Executed user script <%d>", r->status_code);
|
||||
if (!async) {
|
||||
LOG_INFOF("elastic.c", "Executed user script <%d>", r->status_code);
|
||||
}
|
||||
cJSON *resp = cJSON_Parse(r->body);
|
||||
|
||||
cJSON_free(str);
|
||||
@@ -82,6 +109,11 @@ void execute_update_script(const char *script, const char index_id[UUID_STR_LEN]
|
||||
cJSON_free(error_str);
|
||||
}
|
||||
|
||||
if (async) {
|
||||
cJSON *task = cJSON_GetObjectItem(resp, "task");
|
||||
LOG_INFOF("elastic.c", "User script queued: %s/_tasks/%s", Indexer->es_url, task->valuestring);
|
||||
}
|
||||
|
||||
cJSON_Delete(resp);
|
||||
}
|
||||
|
||||
@@ -91,16 +123,25 @@ void *create_bulk_buffer(int max, int *count, size_t *buf_len) {
|
||||
|
||||
size_t buf_size = 0;
|
||||
size_t buf_cur = 0;
|
||||
char *buf = malloc(1);
|
||||
char *buf = malloc(8192);
|
||||
size_t buf_capacity = 8192;
|
||||
|
||||
while (line != NULL && *count < max) {
|
||||
char action_str[512];
|
||||
snprintf(action_str, 512,
|
||||
"{\"index\":{\"_id\":\"%s\", \"_type\":\"_doc\", \"_index\":\"sist2\"}}\n", line->uuid_str);
|
||||
size_t action_str_len = strlen(action_str);
|
||||
char action_str[256];
|
||||
snprintf(
|
||||
action_str, sizeof(action_str),
|
||||
"{\"index\":{\"_id\":\"%s\",\"_type\":\"_doc\",\"_index\":\"%s\"}}\n",
|
||||
line->path_md5_str, Indexer->es_index
|
||||
);
|
||||
|
||||
size_t action_str_len = strlen(action_str);
|
||||
size_t line_len = strlen(line->line);
|
||||
buf = realloc(buf, buf_size + line_len + action_str_len);
|
||||
|
||||
while (buf_size + line_len + action_str_len > buf_capacity) {
|
||||
buf_capacity *= 2;
|
||||
buf = realloc(buf, buf_capacity);
|
||||
}
|
||||
|
||||
buf_size += line_len + action_str_len;
|
||||
|
||||
memcpy(buf + buf_cur, action_str, action_str_len);
|
||||
@@ -111,7 +152,11 @@ void *create_bulk_buffer(int max, int *count, size_t *buf_len) {
|
||||
line = line->next;
|
||||
(*count)++;
|
||||
}
|
||||
buf = realloc(buf, buf_size + 1);
|
||||
|
||||
if (buf_size + 1 > buf_capacity) {
|
||||
buf = realloc(buf, buf_capacity + 1);
|
||||
}
|
||||
|
||||
*(buf + buf_cur) = '\0';
|
||||
|
||||
*buf_len = buf_cur;
|
||||
@@ -119,7 +164,7 @@ void *create_bulk_buffer(int max, int *count, size_t *buf_len) {
|
||||
}
|
||||
|
||||
void print_errors(response_t *r) {
|
||||
char * tmp = malloc(r->size + 1);
|
||||
char *tmp = malloc(r->size + 1);
|
||||
memcpy(tmp, r->body, r->size);
|
||||
*(tmp + r->size) = '\0';
|
||||
|
||||
@@ -138,6 +183,21 @@ void print_errors(response_t *r) {
|
||||
free(tmp);
|
||||
}
|
||||
|
||||
void print_error(response_t *r) {
|
||||
char *tmp = malloc(r->size + 1);
|
||||
memcpy(tmp, r->body, r->size);
|
||||
*(tmp + r->size) = '\0';
|
||||
|
||||
cJSON *ret_json = cJSON_Parse(tmp);
|
||||
if (cJSON_GetObjectItem(ret_json, "error") != NULL) {
|
||||
char *str = cJSON_Print(cJSON_GetObjectItem(ret_json, "error"));
|
||||
LOG_ERRORF("elastic.c", "%s\n", str);
|
||||
cJSON_free(str);
|
||||
}
|
||||
cJSON_Delete(ret_json);
|
||||
free(tmp);
|
||||
}
|
||||
|
||||
void _elastic_flush(int max) {
|
||||
|
||||
if (max == 0) {
|
||||
@@ -150,7 +210,7 @@ void _elastic_flush(int max) {
|
||||
void *buf = create_bulk_buffer(max, &count, &buf_len);
|
||||
|
||||
char bulk_url[4096];
|
||||
snprintf(bulk_url, 4096, "%s/sist2/_bulk?pipeline=tie", Indexer->es_url);
|
||||
snprintf(bulk_url, sizeof(bulk_url), "%s/%s/_bulk?pipeline=tie", Indexer->es_url, Indexer->es_index);
|
||||
response_t *r = web_post(bulk_url, buf);
|
||||
|
||||
if (r->status_code == 0) {
|
||||
@@ -160,7 +220,7 @@ void _elastic_flush(int max) {
|
||||
if (r->status_code == 413) {
|
||||
|
||||
if (max <= 1) {
|
||||
LOG_ERRORF("elastic.c", "Single document too large, giving up: {%s}", Indexer->line_head->uuid_str)
|
||||
LOG_ERRORF("elastic.c", "Single document too large, giving up: {%s}", Indexer->line_head->path_md5_str)
|
||||
free_response(r);
|
||||
free(buf);
|
||||
delete_queue(1);
|
||||
@@ -177,6 +237,15 @@ void _elastic_flush(int max) {
|
||||
_elastic_flush(max / 2);
|
||||
return;
|
||||
|
||||
} else if (r->status_code == 429) {
|
||||
|
||||
free_response(r);
|
||||
free(buf);
|
||||
LOG_WARNING("elastic.c", "Got 429 status, will retry after delay")
|
||||
usleep(1000000 * 20);
|
||||
_elastic_flush(max);
|
||||
return;
|
||||
|
||||
} else if (r->status_code != 200) {
|
||||
print_errors(r);
|
||||
delete_queue(Indexer->queued);
|
||||
@@ -202,9 +271,8 @@ void delete_queue(int max) {
|
||||
Indexer->line_head = tmp->next;
|
||||
if (Indexer->line_head == NULL) {
|
||||
Indexer->line_tail = NULL;
|
||||
} else {
|
||||
free(tmp);
|
||||
}
|
||||
free(tmp);
|
||||
Indexer->queued -= 1;
|
||||
}
|
||||
}
|
||||
@@ -212,7 +280,7 @@ void delete_queue(int max) {
|
||||
void elastic_flush() {
|
||||
|
||||
if (Indexer == NULL) {
|
||||
Indexer = create_indexer(IndexCtx.es_url);
|
||||
Indexer = create_indexer(IndexCtx.es_url, IndexCtx.es_index);
|
||||
}
|
||||
|
||||
_elastic_flush(Indexer->queued);
|
||||
@@ -221,7 +289,7 @@ void elastic_flush() {
|
||||
void elastic_index_line(es_bulk_line_t *line) {
|
||||
|
||||
if (Indexer == NULL) {
|
||||
Indexer = create_indexer(IndexCtx.es_url);
|
||||
Indexer = create_indexer(IndexCtx.es_url, IndexCtx.es_index);
|
||||
}
|
||||
|
||||
if (Indexer->line_head == NULL) {
|
||||
@@ -239,14 +307,18 @@ void elastic_index_line(es_bulk_line_t *line) {
|
||||
}
|
||||
}
|
||||
|
||||
es_indexer_t *create_indexer(const char *url) {
|
||||
es_indexer_t *create_indexer(const char *url, const char *index) {
|
||||
|
||||
char *es_url = malloc(strlen(url) + 1);
|
||||
strcpy(es_url, url);
|
||||
|
||||
char *es_index = malloc(strlen(index) + 1);
|
||||
strcpy(es_index, index);
|
||||
|
||||
es_indexer_t *indexer = malloc(sizeof(es_indexer_t));
|
||||
|
||||
indexer->es_url = es_url;
|
||||
indexer->es_index = es_index;
|
||||
indexer->queued = 0;
|
||||
indexer->line_head = NULL;
|
||||
indexer->line_tail = NULL;
|
||||
@@ -254,41 +326,42 @@ es_indexer_t *create_indexer(const char *url) {
|
||||
return indexer;
|
||||
}
|
||||
|
||||
void destroy_indexer(char *script, char index_id[UUID_STR_LEN]) {
|
||||
void finish_indexer(char *script, int async_script, char *index_id) {
|
||||
|
||||
char url[4096];
|
||||
|
||||
snprintf(url, sizeof(url), "%s/sist2/_refresh", IndexCtx.es_url);
|
||||
snprintf(url, sizeof(url), "%s/%s/_refresh", IndexCtx.es_url, IndexCtx.es_index);
|
||||
response_t *r = web_post(url, "");
|
||||
LOG_INFOF("elastic.c", "Refresh index <%d>", r->status_code);
|
||||
free_response(r);
|
||||
|
||||
if (script != NULL) {
|
||||
execute_update_script(script, index_id);
|
||||
execute_update_script(script, async_script, index_id);
|
||||
free(script);
|
||||
|
||||
snprintf(url, sizeof(url), "%s/%s/_refresh", IndexCtx.es_url, IndexCtx.es_index);
|
||||
r = web_post(url, "");
|
||||
LOG_INFOF("elastic.c", "Refresh index <%d>", r->status_code);
|
||||
free_response(r);
|
||||
}
|
||||
|
||||
snprintf(url, sizeof(url), "%s/sist2/_refresh", IndexCtx.es_url);
|
||||
r = web_post(url, "");
|
||||
LOG_INFOF("elastic.c", "Refresh index <%d>", r->status_code);
|
||||
free_response(r);
|
||||
|
||||
snprintf(url, sizeof(url), "%s/sist2/_forcemerge", IndexCtx.es_url);
|
||||
snprintf(url, sizeof(url), "%s/%s/_forcemerge", IndexCtx.es_url, IndexCtx.es_index);
|
||||
r = web_post(url, "");
|
||||
LOG_INFOF("elastic.c", "Merge index <%d>", r->status_code);
|
||||
free_response(r);
|
||||
|
||||
if (Indexer != NULL) {
|
||||
free(Indexer->es_url);
|
||||
free(Indexer);
|
||||
}
|
||||
snprintf(url, sizeof(url), "%s/%s/_settings", IndexCtx.es_url, IndexCtx.es_index);
|
||||
r = web_put(url, "{\"index\":{\"refresh_interval\":\"1s\"}}");
|
||||
LOG_INFOF("elastic.c", "Set refresh interval <%d>", r->status_code);
|
||||
free_response(r);
|
||||
}
|
||||
|
||||
void elastic_init(int force_reset) {
|
||||
void elastic_init(int force_reset, const char* user_mappings, const char* user_settings) {
|
||||
|
||||
// Check if index exists
|
||||
char url[4096];
|
||||
snprintf(url, 4096, "%s/sist2", IndexCtx.es_url);
|
||||
response_t *r = web_get(url);
|
||||
snprintf(url, sizeof(url), "%s/%s", IndexCtx.es_url, IndexCtx.es_index);
|
||||
response_t *r = web_get(url, 30);
|
||||
int index_exists = r->status_code == 200;
|
||||
free_response(r);
|
||||
|
||||
@@ -297,46 +370,56 @@ void elastic_init(int force_reset) {
|
||||
LOG_INFOF("elastic.c", "Delete index <%d>", r->status_code);
|
||||
free_response(r);
|
||||
|
||||
snprintf(url, 4096, "%s/sist2", IndexCtx.es_url);
|
||||
snprintf(url, sizeof(url), "%s/%s", IndexCtx.es_url, IndexCtx.es_index);
|
||||
r = web_put(url, "");
|
||||
|
||||
if (r->status_code != 200) {
|
||||
print_error(r);
|
||||
LOG_FATAL("elastic.c", "Could not create index")
|
||||
}
|
||||
|
||||
LOG_INFOF("elastic.c", "Create index <%d>", r->status_code);
|
||||
free_response(r);
|
||||
|
||||
snprintf(url, 4096, "%s/sist2/_close", IndexCtx.es_url);
|
||||
snprintf(url, sizeof(url), "%s/%s/_close", IndexCtx.es_url, IndexCtx.es_index);
|
||||
r = web_post(url, "");
|
||||
LOG_INFOF("elastic.c", "Close index <%d>", r->status_code);
|
||||
free_response(r);
|
||||
|
||||
snprintf(url, 4096, "%s/_ingest/pipeline/tie", IndexCtx.es_url);
|
||||
snprintf(url, sizeof(url), "%s/_ingest/pipeline/tie", IndexCtx.es_url);
|
||||
r = web_put(url, pipeline_json);
|
||||
LOG_INFOF("elastic.c", "Create pipeline <%d>", r->status_code);
|
||||
free_response(r);
|
||||
|
||||
snprintf(url, 4096, "%s/sist2/_settings", IndexCtx.es_url);
|
||||
r = web_put(url, settings_json);
|
||||
LOG_INFOF("elastic.c", "Update settings <%d>", r->status_code);
|
||||
snprintf(url, sizeof(url), "%s/%s/_settings", IndexCtx.es_url, IndexCtx.es_index);
|
||||
r = web_put(url, user_settings ? user_settings : settings_json);
|
||||
LOG_INFOF("elastic.c", "Update user_settings <%d>", r->status_code);
|
||||
free_response(r);
|
||||
|
||||
snprintf(url, 4096, "%s/sist2/_mappings/_doc?include_type_name=true", IndexCtx.es_url);
|
||||
r = web_put(url, mappings_json);
|
||||
LOG_INFOF("elastic.c", "Update mappings <%d>", r->status_code);
|
||||
snprintf(url, sizeof(url), "%s/%s/_mappings/_doc?include_type_name=true", IndexCtx.es_url, IndexCtx.es_index);
|
||||
r = web_put(url, user_mappings ? user_mappings : mappings_json);
|
||||
LOG_INFOF("elastic.c", "Update user_mappings <%d>", r->status_code);
|
||||
free_response(r);
|
||||
|
||||
snprintf(url, 4096, "%s/sist2/_open", IndexCtx.es_url);
|
||||
snprintf(url, sizeof(url), "%s/%s/_open", IndexCtx.es_url, IndexCtx.es_index);
|
||||
r = web_post(url, "");
|
||||
LOG_INFOF("elastic.c", "Open index <%d>", r->status_code);
|
||||
free_response(r);
|
||||
}
|
||||
}
|
||||
|
||||
cJSON *elastic_get_document(const char *uuid_str) {
|
||||
cJSON *elastic_get_document(const char *id_str) {
|
||||
char url[4096];
|
||||
snprintf(url, 4096, "%s/sist2/_doc/%s", WebCtx.es_url, uuid_str);
|
||||
snprintf(url, sizeof(url), "%s/%s/_doc/%s", WebCtx.es_url, WebCtx.es_index, id_str);
|
||||
|
||||
response_t *r = web_get(url);
|
||||
response_t *r = web_get(url, 3);
|
||||
cJSON *json = NULL;
|
||||
if (r->status_code == 200) {
|
||||
json = cJSON_Parse(r->body);
|
||||
char *tmp = malloc(r->size + 1);
|
||||
memcpy(tmp, r->body, r->size);
|
||||
*(tmp + r->size) = '\0';
|
||||
json = cJSON_Parse(tmp);
|
||||
free(tmp);
|
||||
}
|
||||
free_response(r);
|
||||
return json;
|
||||
@@ -344,21 +427,25 @@ cJSON *elastic_get_document(const char *uuid_str) {
|
||||
|
||||
char *elastic_get_status() {
|
||||
char url[4096];
|
||||
snprintf(url, 4096,
|
||||
"%s/_cluster/state/metadata/sist2?filter_path=metadata.indices.*.state", WebCtx.es_url);
|
||||
snprintf(url, sizeof(url),
|
||||
"%s/_cluster/state/metadata/%s?filter_path=metadata.indices.*.state", WebCtx.es_url, WebCtx.es_index);
|
||||
|
||||
response_t *r = web_get(url);
|
||||
response_t *r = web_get(url, 30);
|
||||
cJSON *json = NULL;
|
||||
char *status = malloc(128 * sizeof(char));
|
||||
status[0] = '\0';
|
||||
|
||||
if (r->status_code == 200) {
|
||||
json = cJSON_Parse(r->body);
|
||||
char *tmp = malloc(r->size + 1);
|
||||
memcpy(tmp, r->body, r->size);
|
||||
*(tmp + r->size) = '\0';
|
||||
json = cJSON_Parse(tmp);
|
||||
free(tmp);
|
||||
const cJSON *metadata = cJSON_GetObjectItem(json, "metadata");
|
||||
if (metadata != NULL) {
|
||||
const cJSON *indices = cJSON_GetObjectItem(metadata, "indices");
|
||||
const cJSON *sist2 = cJSON_GetObjectItem(indices, "sist2");
|
||||
const cJSON *state = cJSON_GetObjectItem(sist2, "state");
|
||||
const cJSON *index = cJSON_GetObjectItem(indices, WebCtx.es_index);
|
||||
const cJSON *state = cJSON_GetObjectItem(index, "state");
|
||||
strcpy(status, state->valuestring);
|
||||
}
|
||||
}
|
||||
|
||||
@@ -5,7 +5,7 @@
|
||||
|
||||
typedef struct es_bulk_line {
|
||||
struct es_bulk_line *next;
|
||||
char uuid_str[UUID_STR_LEN];
|
||||
char path_md5_str[MD5_STR_LENGTH];
|
||||
char line[0];
|
||||
} es_bulk_line_t;
|
||||
|
||||
@@ -16,20 +16,21 @@ typedef struct es_indexer es_indexer_t;
|
||||
|
||||
void elastic_index_line(es_bulk_line_t *line);
|
||||
|
||||
void elastic_flush();
|
||||
void print_json(cJSON *document, const char index_id_str[MD5_STR_LENGTH]);
|
||||
|
||||
void print_json(cJSON *document, const char uuid_str[UUID_STR_LEN]);
|
||||
void index_json(cJSON *document, const char index_id_str[MD5_STR_LENGTH]);
|
||||
|
||||
void index_json(cJSON *document, const char uuid_str[UUID_STR_LEN]);
|
||||
es_indexer_t *create_indexer(const char *url, const char *index);
|
||||
|
||||
es_indexer_t *create_indexer(const char* es_url);
|
||||
void elastic_cleanup();
|
||||
void finish_indexer(char *script, int async_script, char *index_id);
|
||||
|
||||
void destroy_indexer(char *script, char index_id[UUID_STR_LEN]);
|
||||
void elastic_init(int force_reset, const char* user_mappings, const char* user_settings);
|
||||
|
||||
void elastic_init(int force_reset);
|
||||
|
||||
cJSON *elastic_get_document(const char *uuid_str);
|
||||
cJSON *elastic_get_document(const char *id_str);
|
||||
|
||||
char *elastic_get_status();
|
||||
|
||||
void execute_update_script(const char *script, int async, const char index_id[MD5_STR_LENGTH]);
|
||||
|
||||
#endif
|
||||
|
||||
File diff suppressed because one or more lines are too long
295
src/index/web.c
295
src/index/web.c
@@ -4,8 +4,17 @@
|
||||
|
||||
#include <mongoose.h>
|
||||
#include <pthread.h>
|
||||
#include <curl/curl.h>
|
||||
|
||||
|
||||
size_t write_cb(char *ptr, size_t size, size_t nmemb, void *user_data) {
|
||||
|
||||
size_t real_size = size * nmemb;
|
||||
dyn_buffer_t *buf = user_data;
|
||||
dyn_buffer_write(buf, ptr, real_size);
|
||||
return real_size;
|
||||
}
|
||||
|
||||
void free_response(response_t *resp) {
|
||||
if (resp->body != NULL) {
|
||||
free(resp->body);
|
||||
@@ -13,142 +22,204 @@ void free_response(response_t *resp) {
|
||||
free(resp);
|
||||
}
|
||||
|
||||
#define SIST2_HEADERS "User-Agent: sist2\r\nContent-Type: application/json\r\n"
|
||||
void web_post_async_poll(subreq_ctx_t* req) {
|
||||
fd_set fdread;
|
||||
fd_set fdwrite;
|
||||
fd_set fdexcep;
|
||||
int maxfd = -1;
|
||||
|
||||
FD_ZERO(&fdread);
|
||||
FD_ZERO(&fdwrite);
|
||||
FD_ZERO(&fdexcep);
|
||||
|
||||
void http_req_ev(struct mg_connection *nc, int ev, void *ptr) {
|
||||
CURLMcode mc = curl_multi_fdset(req->multi, &fdread, &fdwrite, &fdexcep, &maxfd);
|
||||
|
||||
http_ev_data_t *ev_data = (http_ev_data_t *) nc->user_data;
|
||||
if(mc != CURLM_OK) {
|
||||
req->done = TRUE;
|
||||
return;
|
||||
}
|
||||
|
||||
switch (ev) {
|
||||
case MG_EV_CONNECT: {
|
||||
int connect_status = *(int *) ptr;
|
||||
if (connect_status != 0) {
|
||||
ev_data->done = TRUE;
|
||||
ev_data->resp->status_code = 0;
|
||||
}
|
||||
if (maxfd == -1) {
|
||||
// no fds ready yet
|
||||
return;
|
||||
}
|
||||
|
||||
struct timeval timeout = {1, 0};
|
||||
int rc = select(maxfd + 1, &fdread, &fdwrite, &fdexcep, &timeout);
|
||||
|
||||
switch(rc) {
|
||||
case -1:
|
||||
req->done = TRUE;
|
||||
break;
|
||||
}
|
||||
case MG_EV_HTTP_REPLY: {
|
||||
struct http_message *hm = (struct http_message *) ptr;
|
||||
|
||||
//TODO: Check errors?
|
||||
|
||||
ev_data->resp->size = hm->body.len;
|
||||
ev_data->resp->status_code = hm->resp_code;
|
||||
ev_data->resp->body = malloc(hm->body.len + 1);
|
||||
memcpy(ev_data->resp->body, hm->body.p, hm->body.len);
|
||||
*(ev_data->resp->body + hm->body.len) = '\0';
|
||||
|
||||
ev_data->done = TRUE;
|
||||
case 0:
|
||||
break;
|
||||
}
|
||||
case MG_EV_CLOSE: {
|
||||
ev_data->done = TRUE;
|
||||
break;
|
||||
}
|
||||
default:
|
||||
curl_multi_perform(req->multi, &req->running_handles);
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
subreq_ctx_t *http_req(const char *url, const char *extra_headers, const char *post_data, const char *method) {
|
||||
if (req->running_handles == 0) {
|
||||
req->done = TRUE;
|
||||
req->response->body = req->response_buf.buf;
|
||||
req->response->size = req->response_buf.cur;
|
||||
curl_easy_getinfo(req->handle, CURLINFO_RESPONSE_CODE, &req->response->status_code);
|
||||
|
||||
struct mg_str scheme;
|
||||
struct mg_str user_info;
|
||||
struct mg_str host;
|
||||
unsigned int port;
|
||||
struct mg_str path;
|
||||
struct mg_str query;
|
||||
struct mg_str fragment;
|
||||
|
||||
if (post_data == NULL) post_data = "";
|
||||
if (extra_headers == NULL) extra_headers = "";
|
||||
if (path.len == 0) path = mg_mk_str("/");
|
||||
if (host.len == 0) host = mg_mk_str("");
|
||||
|
||||
// [scheme://[user_info@]]host[:port][/path][?query][#fragment]
|
||||
mg_parse_uri(mg_mk_str(url), &scheme, &user_info, &host, &port, &path, &query, &fragment);
|
||||
|
||||
if (query.len > 0) path.len += query.len + 1;
|
||||
|
||||
subreq_ctx_t *ctx = malloc(sizeof(subreq_ctx_t));
|
||||
mg_mgr_init(&ctx->mgr, NULL);
|
||||
|
||||
char address[8196];
|
||||
snprintf(address, sizeof(address), "tcp://%.*s:%u", (int) host.len, host.p, port);
|
||||
struct mg_connection *nc = mg_connect(&ctx->mgr, address, http_req_ev);
|
||||
nc->user_data = &ctx->ev_data;
|
||||
mg_set_protocol_http_websocket(nc);
|
||||
|
||||
ctx->ev_data.resp = calloc(1, sizeof(response_t));
|
||||
ctx->ev_data.done = FALSE;
|
||||
|
||||
mg_printf(
|
||||
nc, "%s %.*s HTTP/1.1\r\n"
|
||||
"Host: %.*s\r\n"
|
||||
"Content-Length: %zu\r\n"
|
||||
"%s\r\n"
|
||||
"%s",
|
||||
method, (int) path.len, path.p,
|
||||
(int) (path.p - host.p), host.p,
|
||||
strlen(post_data),
|
||||
extra_headers,
|
||||
post_data
|
||||
);
|
||||
|
||||
return ctx;
|
||||
}
|
||||
|
||||
response_t *web_get(const char *url) {
|
||||
subreq_ctx_t *ctx = http_req(url, SIST2_HEADERS, NULL, "GET");
|
||||
while (ctx->ev_data.done == FALSE) {
|
||||
mg_mgr_poll(&ctx->mgr, 50);
|
||||
curl_multi_cleanup(req->multi);
|
||||
curl_easy_cleanup(req->handle);
|
||||
curl_slist_free_all(req->headers);
|
||||
return;
|
||||
}
|
||||
mg_mgr_free(&ctx->mgr);
|
||||
|
||||
response_t *ret = ctx->ev_data.resp;
|
||||
free(ctx);
|
||||
return ret;
|
||||
}
|
||||
|
||||
subreq_ctx_t *web_post_async(const char *url, const char *data) {
|
||||
return http_req(url, SIST2_HEADERS, data, "POST");
|
||||
subreq_ctx_t *web_post_async(const char *url, char *data) {
|
||||
subreq_ctx_t *req = calloc(1, sizeof(subreq_ctx_t));
|
||||
req->response = calloc(1, sizeof(response_t));
|
||||
req->data = data;
|
||||
req->response_buf = dyn_buffer_create();
|
||||
|
||||
req->handle = curl_easy_init();
|
||||
CURL *curl = req->handle;
|
||||
curl_easy_setopt(curl, CURLOPT_URL, url);
|
||||
curl_easy_setopt(curl, CURLOPT_WRITEDATA, (void *) (&req->response_buf));
|
||||
curl_easy_setopt(curl, CURLOPT_WRITEFUNCTION, write_cb);
|
||||
curl_easy_setopt(curl, CURLOPT_POST, 1);
|
||||
curl_easy_setopt(curl, CURLOPT_USERAGENT, "sist2");
|
||||
|
||||
struct curl_slist *headers = NULL;
|
||||
headers = curl_slist_append(headers, "Content-Type: application/json");
|
||||
curl_easy_setopt(curl, CURLOPT_HTTPHEADER, headers);
|
||||
|
||||
curl_easy_setopt(curl, CURLOPT_POSTFIELDS, data);
|
||||
|
||||
req->multi = curl_multi_init();
|
||||
curl_multi_add_handle(req->multi, curl);
|
||||
curl_multi_perform(req->multi, &req->running_handles);
|
||||
|
||||
LOG_DEBUGF("web.c", "async request POST %s", url)
|
||||
|
||||
return req;
|
||||
}
|
||||
|
||||
response_t *web_get(const char *url, int timeout) {
|
||||
response_t *resp = malloc(sizeof(response_t));
|
||||
|
||||
CURL *curl;
|
||||
dyn_buffer_t buffer = dyn_buffer_create();
|
||||
|
||||
curl = curl_easy_init();
|
||||
curl_easy_setopt(curl, CURLOPT_URL, url);
|
||||
curl_easy_setopt(curl, CURLOPT_WRITEDATA, (void *) (&buffer));
|
||||
curl_easy_setopt(curl, CURLOPT_WRITEFUNCTION, write_cb);
|
||||
curl_easy_setopt(curl, CURLOPT_USERAGENT, "sist2");
|
||||
curl_easy_setopt(curl, CURLOPT_TIMEOUT, timeout);
|
||||
|
||||
struct curl_slist *headers = NULL;
|
||||
headers = curl_slist_append(headers, "Content-Type: application/json");
|
||||
curl_easy_setopt(curl, CURLOPT_HTTPHEADER, headers);
|
||||
|
||||
curl_easy_perform(curl);
|
||||
curl_easy_getinfo(curl, CURLINFO_RESPONSE_CODE, &resp->status_code);
|
||||
|
||||
curl_easy_cleanup(curl);
|
||||
curl_slist_free_all(headers);
|
||||
|
||||
resp->body = buffer.buf;
|
||||
resp->size = buffer.cur;
|
||||
return resp;
|
||||
}
|
||||
|
||||
response_t *web_post(const char *url, const char *data) {
|
||||
subreq_ctx_t *ctx = http_req(url, SIST2_HEADERS, data, "POST");
|
||||
|
||||
while (ctx->ev_data.done == FALSE) {
|
||||
mg_mgr_poll(&ctx->mgr, 50);
|
||||
}
|
||||
mg_mgr_free(&ctx->mgr);
|
||||
response_t *resp = malloc(sizeof(response_t));
|
||||
|
||||
response_t *ret = ctx->ev_data.resp;
|
||||
free(ctx);
|
||||
return ret;
|
||||
CURL *curl;
|
||||
dyn_buffer_t buffer = dyn_buffer_create();
|
||||
|
||||
curl = curl_easy_init();
|
||||
curl_easy_setopt(curl, CURLOPT_URL, url);
|
||||
curl_easy_setopt(curl, CURLOPT_WRITEDATA, (void *) (&buffer));
|
||||
curl_easy_setopt(curl, CURLOPT_WRITEFUNCTION, write_cb);
|
||||
curl_easy_setopt(curl, CURLOPT_POST, 1);
|
||||
curl_easy_setopt(curl, CURLOPT_USERAGENT, "sist2");
|
||||
|
||||
struct curl_slist *headers = NULL;
|
||||
headers = curl_slist_append(headers, "Content-Type: application/json");
|
||||
curl_easy_setopt(curl, CURLOPT_HTTPHEADER, headers);
|
||||
|
||||
curl_easy_setopt(curl, CURLOPT_POSTFIELDS, data);
|
||||
|
||||
curl_easy_perform(curl);
|
||||
curl_easy_getinfo(curl, CURLINFO_RESPONSE_CODE, &resp->status_code);
|
||||
|
||||
curl_easy_cleanup(curl);
|
||||
curl_slist_free_all(headers);
|
||||
|
||||
resp->body = buffer.buf;
|
||||
resp->size = buffer.cur;
|
||||
|
||||
return resp;
|
||||
}
|
||||
|
||||
response_t *web_put(const char *url, const char *data) {
|
||||
subreq_ctx_t *ctx = http_req(url, SIST2_HEADERS, data, "PUT");
|
||||
while (ctx->ev_data.done == FALSE) {
|
||||
mg_mgr_poll(&ctx->mgr, 50);
|
||||
}
|
||||
mg_mgr_free(&ctx->mgr);
|
||||
|
||||
response_t *ret = ctx->ev_data.resp;
|
||||
free(ctx);
|
||||
return ret;
|
||||
response_t *web_put(const char *url, const char *data) {
|
||||
|
||||
response_t *resp = malloc(sizeof(response_t));
|
||||
|
||||
CURL *curl;
|
||||
dyn_buffer_t buffer = dyn_buffer_create();
|
||||
|
||||
curl = curl_easy_init();
|
||||
curl_easy_setopt(curl, CURLOPT_URL, url);
|
||||
curl_easy_setopt(curl, CURLOPT_WRITEDATA, (void *) (&buffer));
|
||||
curl_easy_setopt(curl, CURLOPT_WRITEFUNCTION, write_cb);
|
||||
curl_easy_setopt(curl, CURLOPT_CUSTOMREQUEST, "PUT");
|
||||
curl_easy_setopt(curl, CURLOPT_USERAGENT, "sist2");
|
||||
curl_easy_setopt(curl, CURLOPT_DNS_USE_GLOBAL_CACHE, 0);
|
||||
curl_easy_setopt(curl, CURLOPT_IPRESOLVE, CURLOPT_DNS_LOCAL_IP4 );
|
||||
|
||||
struct curl_slist *headers = NULL;
|
||||
headers = curl_slist_append(headers, "Content-Type: application/json");
|
||||
curl_easy_setopt(curl, CURLOPT_HTTPHEADER, headers);
|
||||
|
||||
curl_easy_setopt(curl, CURLOPT_POSTFIELDS, data);
|
||||
|
||||
curl_easy_perform(curl);
|
||||
curl_easy_getinfo(curl, CURLINFO_RESPONSE_CODE, &resp->status_code);
|
||||
|
||||
curl_easy_cleanup(curl);
|
||||
curl_slist_free_all(headers);
|
||||
|
||||
resp->body = buffer.buf;
|
||||
resp->size = buffer.cur;
|
||||
return resp;
|
||||
}
|
||||
|
||||
response_t *web_delete(const char *url) {
|
||||
subreq_ctx_t *ctx = http_req(url, SIST2_HEADERS, NULL, "DELETE");
|
||||
while (ctx->ev_data.done == FALSE) {
|
||||
mg_mgr_poll(&ctx->mgr, 50);
|
||||
}
|
||||
mg_mgr_free(&ctx->mgr);
|
||||
|
||||
response_t *ret = ctx->ev_data.resp;
|
||||
free(ctx);
|
||||
return ret;
|
||||
response_t *resp = malloc(sizeof(response_t));
|
||||
|
||||
CURL *curl;
|
||||
dyn_buffer_t buffer = dyn_buffer_create();
|
||||
|
||||
curl = curl_easy_init();
|
||||
curl_easy_setopt(curl, CURLOPT_URL, url);
|
||||
curl_easy_setopt(curl, CURLOPT_WRITEDATA, (void *) (&buffer));
|
||||
curl_easy_setopt(curl, CURLOPT_WRITEFUNCTION, write_cb);
|
||||
curl_easy_setopt(curl, CURLOPT_CUSTOMREQUEST, "DELETE");
|
||||
curl_easy_setopt(curl, CURLOPT_USERAGENT, "sist2");
|
||||
|
||||
curl_easy_setopt(curl, CURLOPT_POSTFIELDS, "");
|
||||
struct curl_slist *headers = NULL;
|
||||
headers = curl_slist_append(headers, "Content-Type: application/json");
|
||||
curl_easy_setopt(curl, CURLOPT_HTTPHEADER, headers);
|
||||
|
||||
curl_easy_perform(curl);
|
||||
curl_easy_getinfo(curl, CURLINFO_RESPONSE_CODE, &resp->status_code);
|
||||
|
||||
curl_easy_cleanup(curl);
|
||||
curl_slist_free_all(headers);
|
||||
|
||||
resp->body = buffer.buf;
|
||||
resp->size = buffer.cur;
|
||||
return resp;
|
||||
}
|
||||
@@ -3,6 +3,7 @@
|
||||
|
||||
#include "src/sist.h"
|
||||
#include <mongoose.h>
|
||||
#include <curl/curl.h>
|
||||
|
||||
typedef struct response {
|
||||
char *body;
|
||||
@@ -16,13 +17,20 @@ typedef struct {
|
||||
} http_ev_data_t;
|
||||
|
||||
typedef struct {
|
||||
http_ev_data_t ev_data;
|
||||
struct mg_mgr mgr;
|
||||
char* data;
|
||||
dyn_buffer_t response_buf;
|
||||
struct curl_slist *headers;
|
||||
CURL *handle;
|
||||
CURLM *multi;
|
||||
response_t *response;
|
||||
int running_handles;
|
||||
int done;
|
||||
} subreq_ctx_t;
|
||||
|
||||
response_t *web_get(const char *url);
|
||||
response_t *web_get(const char *url, int timeout);
|
||||
response_t *web_post(const char * url, const char * data);
|
||||
subreq_ctx_t *web_post_async(const char *url, const char *data);
|
||||
void web_post_async_poll(subreq_ctx_t* req);
|
||||
subreq_ctx_t *web_post_async(const char *url, char *data);
|
||||
response_t *web_put(const char *url, const char *data);
|
||||
response_t *web_delete(const char *url);
|
||||
|
||||
|
||||
@@ -6,18 +6,22 @@
|
||||
static __thread int index_fd = -1;
|
||||
|
||||
typedef struct {
|
||||
unsigned char uuid[16];
|
||||
unsigned long ino;
|
||||
unsigned char path_md5[MD5_DIGEST_LENGTH];
|
||||
unsigned long size;
|
||||
unsigned int mime;
|
||||
int mtime;
|
||||
short base;
|
||||
short ext;
|
||||
char has_parent;
|
||||
} line_t;
|
||||
|
||||
#define META_NEXT 0xFFFF
|
||||
|
||||
void skip_meta(FILE *file) {
|
||||
enum metakey key = getc(file);
|
||||
while (key != '\n') {
|
||||
enum metakey key = 0;
|
||||
fread(&key, sizeof(uint16_t), 1, file);
|
||||
|
||||
while (key != META_NEXT) {
|
||||
if (IS_META_INT(key)) {
|
||||
fseek(file, sizeof(int), SEEK_CUR);
|
||||
} else if (IS_META_LONG(key)) {
|
||||
@@ -26,13 +30,13 @@ void skip_meta(FILE *file) {
|
||||
while ((getc(file))) {}
|
||||
}
|
||||
|
||||
key = getc(file);
|
||||
fread(&key, sizeof(uint16_t), 1, file);
|
||||
}
|
||||
}
|
||||
|
||||
void write_index_descriptor(char *path, index_descriptor_t *desc) {
|
||||
cJSON *json = cJSON_CreateObject();
|
||||
cJSON_AddStringToObject(json, "uuid", desc->uuid);
|
||||
cJSON_AddStringToObject(json, "id", desc->id);
|
||||
cJSON_AddStringToObject(json, "version", desc->version);
|
||||
cJSON_AddStringToObject(json, "root", desc->root);
|
||||
cJSON_AddStringToObject(json, "name", desc->name);
|
||||
@@ -62,11 +66,11 @@ index_descriptor_t read_index_descriptor(char *path) {
|
||||
int fd = open(path, O_RDONLY);
|
||||
|
||||
if (fd == -1) {
|
||||
LOG_FATALF("serialize.c", "Invalid/corrupt index (Could not find descriptor): %s: %s\n", path ,strerror(errno))
|
||||
LOG_FATALF("serialize.c", "Invalid/corrupt index (Could not find descriptor): %s: %s\n", path, strerror(errno))
|
||||
}
|
||||
|
||||
char *buf = malloc(info.st_size + 1);
|
||||
int ret = read(fd, buf, info.st_size);
|
||||
size_t ret = read(fd, buf, info.st_size);
|
||||
if (ret == -1) {
|
||||
LOG_FATALF("serialize.c", "Could not read index descriptor: %s", strerror(errno));
|
||||
}
|
||||
@@ -82,7 +86,7 @@ index_descriptor_t read_index_descriptor(char *path) {
|
||||
strcpy(descriptor.rewrite_url, cJSON_GetObjectItem(json, "rewrite_url")->valuestring);
|
||||
descriptor.root_len = (short) strlen(descriptor.root);
|
||||
strcpy(descriptor.version, cJSON_GetObjectItem(json, "version")->valuestring);
|
||||
strcpy(descriptor.uuid, cJSON_GetObjectItem(json, "uuid")->valuestring);
|
||||
strcpy(descriptor.id, cJSON_GetObjectItem(json, "id")->valuestring);
|
||||
if (cJSON_GetObjectItem(json, "type") == NULL) {
|
||||
strcpy(descriptor.type, INDEX_TYPE_BIN);
|
||||
} else {
|
||||
@@ -150,8 +154,22 @@ char *get_meta_key_text(enum metakey meta_key) {
|
||||
return "modified_by";
|
||||
case MetaThumbnail:
|
||||
return "thumbnail";
|
||||
case MetaPages:
|
||||
return "pages";
|
||||
case MetaExifGpsLongitudeRef:
|
||||
return "exif_gps_longitude_ref";
|
||||
case MetaExifGpsLongitudeDMS:
|
||||
return "exif_gps_longitude_dms";
|
||||
case MetaExifGpsLongitudeDec:
|
||||
return "exif_gps_longitude_dec";
|
||||
case MetaExifGpsLatitudeRef:
|
||||
return "exif_gps_latitude_ref";
|
||||
case MetaExifGpsLatitudeDMS:
|
||||
return "exif_gps_latitude_dms";
|
||||
case MetaExifGpsLatitudeDec:
|
||||
return "exif_gps_latitude_dec";
|
||||
default:
|
||||
return NULL;
|
||||
LOG_FATALF("serialize.c", "FIXME: Unknown meta key: %d", meta_key)
|
||||
}
|
||||
}
|
||||
|
||||
@@ -172,8 +190,8 @@ void write_document(document_t *doc) {
|
||||
dyn_buffer_t buf = dyn_buffer_create();
|
||||
|
||||
// Ignore root directory in the file path
|
||||
doc->ext = doc->ext - ScanCtx.index.desc.root_len;
|
||||
doc->base = doc->base - ScanCtx.index.desc.root_len;
|
||||
doc->ext = (short) (doc->ext - ScanCtx.index.desc.root_len);
|
||||
doc->base = (short) (doc->base - ScanCtx.index.desc.root_len);
|
||||
doc->filepath += ScanCtx.index.desc.root_len;
|
||||
|
||||
dyn_buffer_write(&buf, doc, sizeof(line_t));
|
||||
@@ -181,7 +199,7 @@ void write_document(document_t *doc) {
|
||||
|
||||
meta_line_t *meta = doc->meta_head;
|
||||
while (meta != NULL) {
|
||||
dyn_buffer_write_char(&buf, meta->key);
|
||||
dyn_buffer_write_short(&buf, (uint16_t) meta->key);
|
||||
|
||||
if (IS_META_INT(meta->key)) {
|
||||
dyn_buffer_write_int(&buf, meta->int_val);
|
||||
@@ -195,7 +213,7 @@ void write_document(document_t *doc) {
|
||||
meta = meta->next;
|
||||
free(tmp);
|
||||
}
|
||||
dyn_buffer_write_char(&buf, '\n');
|
||||
dyn_buffer_write_short(&buf, META_NEXT);
|
||||
|
||||
int res = write(index_fd, buf.buf, buf.cur);
|
||||
if (res == -1) {
|
||||
@@ -217,9 +235,9 @@ void read_index_bin(const char *path, const char *index_id, index_func func) {
|
||||
dyn_buffer_t buf = dyn_buffer_create();
|
||||
|
||||
FILE *file = fopen(path, "rb");
|
||||
while (1) {
|
||||
while (TRUE) {
|
||||
buf.cur = 0;
|
||||
size_t _ = fread((void *) &line, 1, sizeof(line_t), file);
|
||||
size_t _ = fread((void *) &line, sizeof(line_t), 1, file);
|
||||
if (feof(file)) {
|
||||
break;
|
||||
}
|
||||
@@ -227,10 +245,10 @@ void read_index_bin(const char *path, const char *index_id, index_func func) {
|
||||
cJSON *document = cJSON_CreateObject();
|
||||
cJSON_AddStringToObject(document, "index", index_id);
|
||||
|
||||
char uuid_str[UUID_STR_LEN];
|
||||
uuid_unparse(line.uuid, uuid_str);
|
||||
char path_md5_str[MD5_STR_LENGTH];
|
||||
buf2hex(line.path_md5, sizeof(line.path_md5), path_md5_str);
|
||||
|
||||
const char* mime_text = mime_get_mime_text(line.mime);
|
||||
const char *mime_text = mime_get_mime_text(line.mime);
|
||||
if (mime_text == NULL) {
|
||||
cJSON_AddNullToObject(document, "mime");
|
||||
} else {
|
||||
@@ -239,7 +257,7 @@ void read_index_bin(const char *path, const char *index_id, index_func func) {
|
||||
cJSON_AddNumberToObject(document, "size", (double) line.size);
|
||||
cJSON_AddNumberToObject(document, "mtime", line.mtime);
|
||||
|
||||
int c;
|
||||
int c = 0;
|
||||
while ((c = getc(file)) != 0) {
|
||||
dyn_buffer_write_char(&buf, (char) c);
|
||||
}
|
||||
@@ -251,18 +269,27 @@ void read_index_bin(const char *path, const char *index_id, index_func func) {
|
||||
} else {
|
||||
*(buf.buf + line.ext) = '\0';
|
||||
}
|
||||
cJSON_AddStringToObject(document, "name", buf.buf + line.base);
|
||||
|
||||
char tmp[PATH_MAX * 3];
|
||||
|
||||
str_escape(tmp, buf.buf + line.base);
|
||||
cJSON_AddStringToObject(document, "name", tmp);
|
||||
|
||||
if (line.base > 0) {
|
||||
*(buf.buf + line.base - 1) = '\0';
|
||||
cJSON_AddStringToObject(document, "path", buf.buf);
|
||||
|
||||
str_escape(tmp, buf.buf);
|
||||
cJSON_AddStringToObject(document, "path", tmp);
|
||||
} else {
|
||||
cJSON_AddStringToObject(document, "path", "");
|
||||
}
|
||||
|
||||
enum metakey key = getc(file);
|
||||
size_t ret = 0;
|
||||
while (key != '\n') {
|
||||
enum metakey key = 0;
|
||||
fread(&key, sizeof(uint16_t), 1, file);
|
||||
size_t ret;
|
||||
while (key != META_NEXT) {
|
||||
switch (key) {
|
||||
case MetaPages:
|
||||
case MetaWidth:
|
||||
case MetaHeight: {
|
||||
int value;
|
||||
@@ -298,6 +325,12 @@ void read_index_bin(const char *path, const char *index_id, index_func func) {
|
||||
case MetaAuthor:
|
||||
case MetaModifiedBy:
|
||||
case MetaThumbnail:
|
||||
case MetaExifGpsLongitudeDMS:
|
||||
case MetaExifGpsLongitudeDec:
|
||||
case MetaExifGpsLongitudeRef:
|
||||
case MetaExifGpsLatitudeDMS:
|
||||
case MetaExifGpsLatitudeDec:
|
||||
case MetaExifGpsLatitudeRef:
|
||||
case MetaTitle: {
|
||||
buf.cur = 0;
|
||||
while ((c = getc(file)) != 0) {
|
||||
@@ -313,11 +346,39 @@ void read_index_bin(const char *path, const char *index_id, index_func func) {
|
||||
LOG_FATALF("serialize.c", "Invalid meta key (corrupt index): %x", key)
|
||||
}
|
||||
|
||||
key = getc(file);
|
||||
fread(&key, sizeof(uint16_t), 1, file);
|
||||
}
|
||||
|
||||
func(document, uuid_str);
|
||||
cJSON *meta_obj = NULL;
|
||||
if (IndexCtx.meta != NULL) {
|
||||
const char *meta_string = g_hash_table_lookup(IndexCtx.meta, path_md5_str);
|
||||
if (meta_string != NULL) {
|
||||
meta_obj = cJSON_Parse(meta_string);
|
||||
|
||||
cJSON *child;
|
||||
for (child = meta_obj->child; child != NULL; child = child->next) {
|
||||
char meta_key[4096];
|
||||
strcpy(meta_key, child->string);
|
||||
cJSON_DeleteItemFromObject(document, meta_key);
|
||||
cJSON_AddItemReferenceToObject(document, meta_key, child);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if (IndexCtx.tags != NULL) {
|
||||
const char *tags_string = g_hash_table_lookup(IndexCtx.tags, path_md5_str);
|
||||
if (tags_string != NULL) {
|
||||
cJSON *tags_arr = cJSON_Parse(tags_string);
|
||||
cJSON_DeleteItemFromObject(document, "tag");
|
||||
cJSON_AddItemToObject(document, "tag", tags_arr);
|
||||
}
|
||||
}
|
||||
|
||||
func(document, path_md5_str);
|
||||
cJSON_Delete(document);
|
||||
if (meta_obj) {
|
||||
cJSON_Delete(meta_obj);
|
||||
}
|
||||
}
|
||||
dyn_buffer_destroy(&buf);
|
||||
fclose(file);
|
||||
@@ -341,7 +402,7 @@ const char *json_type_array_fields[] = {
|
||||
void read_index_json(const char *path, UNUSED(const char *index_id), index_func func) {
|
||||
|
||||
FILE *file = fopen(path, "r");
|
||||
while (1) {
|
||||
while (TRUE) {
|
||||
char *line = NULL;
|
||||
size_t len;
|
||||
size_t read = getline(&line, &len, file);
|
||||
@@ -361,7 +422,7 @@ void read_index_json(const char *path, UNUSED(const char *index_id), index_func
|
||||
}
|
||||
|
||||
cJSON *document = cJSON_CreateObject();
|
||||
const char *uuid_str = cJSON_GetObjectItem(input, "_id")->valuestring;
|
||||
const char *id_str = cJSON_GetObjectItem(input, "_id")->valuestring;
|
||||
|
||||
for (int i = 0; i < (sizeof(json_type_copy_fields) / sizeof(json_type_copy_fields[0])); i++) {
|
||||
cJSON *value = cJSON_GetObjectItem(input, json_type_copy_fields[i]);
|
||||
@@ -389,7 +450,7 @@ void read_index_json(const char *path, UNUSED(const char *index_id), index_func
|
||||
}
|
||||
}
|
||||
|
||||
func(document, uuid_str);
|
||||
func(document, id_str);
|
||||
cJSON_Delete(document);
|
||||
cJSON_Delete(input);
|
||||
|
||||
@@ -397,7 +458,7 @@ void read_index_json(const char *path, UNUSED(const char *index_id), index_func
|
||||
fclose(file);
|
||||
}
|
||||
|
||||
void read_index(const char *path, const char index_id[UUID_STR_LEN], const char *type, index_func func) {
|
||||
void read_index(const char *path, const char index_id[MD5_STR_LENGTH], const char *type, index_func func) {
|
||||
|
||||
if (strcmp(type, INDEX_TYPE_BIN) == 0) {
|
||||
read_index_bin(path, index_id, func);
|
||||
@@ -410,15 +471,17 @@ void incremental_read(GHashTable *table, const char *filepath) {
|
||||
FILE *file = fopen(filepath, "rb");
|
||||
line_t line;
|
||||
|
||||
LOG_DEBUGF("serialize.c", "Incremental read %s", filepath)
|
||||
|
||||
while (1) {
|
||||
size_t ret = fread((void *) &line, 1, sizeof(line_t), file);
|
||||
size_t ret = fread((void *) &line, sizeof(line_t), 1, file);
|
||||
if (ret != 1 || feof(file)) {
|
||||
break;
|
||||
}
|
||||
|
||||
incremental_put(table, line.ino, line.mtime);
|
||||
incremental_put(table, line.path_md5, line.mtime);
|
||||
|
||||
while ((getc(file))) {}
|
||||
while ((getc(file)) != 0) {}
|
||||
skip_meta(file);
|
||||
}
|
||||
fclose(file);
|
||||
@@ -434,33 +497,47 @@ void incremental_copy(store_t *store, store_t *dst_store, const char *filepath,
|
||||
FILE *dst_file = fopen(dst_filepath, "ab");
|
||||
line_t line;
|
||||
|
||||
while (1) {
|
||||
size_t ret = fread((void *) &line, 1, sizeof(line_t), file);
|
||||
LOG_DEBUGF("serialize.c", "Incremental copy %s", filepath)
|
||||
|
||||
while (TRUE) {
|
||||
size_t ret = fread((void *) &line, sizeof(line_t), 1, file);
|
||||
if (ret != 1 || feof(file)) {
|
||||
break;
|
||||
}
|
||||
|
||||
if (incremental_get(copy_table, line.ino)) {
|
||||
// Assume that files with parents still exist.
|
||||
// One way to "fix" this would be to check if the parent is marked for copy but it would consistently
|
||||
// delete files with grandparents, which is a side-effect worse than having orphaned files
|
||||
if (line.has_parent || incremental_get(copy_table, line.path_md5)) {
|
||||
fwrite(&line, sizeof(line), 1, dst_file);
|
||||
|
||||
size_t buf_len;
|
||||
char *buf = store_read(store, (char *) line.uuid, 16, &buf_len);
|
||||
store_write(dst_store, (char *) line.uuid, 16, buf, buf_len);
|
||||
free(buf);
|
||||
|
||||
// Copy filepath
|
||||
char filepath_buf[PATH_MAX];
|
||||
char c;
|
||||
char *ptr = filepath_buf;
|
||||
while ((c = (char) getc(file))) {
|
||||
fwrite(&c, sizeof(c), 1, dst_file);
|
||||
*ptr++ = c;
|
||||
}
|
||||
fwrite("\0", sizeof(c), 1, dst_file);
|
||||
*ptr = '\0';
|
||||
fwrite(filepath_buf, (ptr - filepath_buf) + 1, 1, dst_file);
|
||||
|
||||
enum metakey key;
|
||||
// Copy tn store contents
|
||||
size_t buf_len;
|
||||
char path_md5[MD5_DIGEST_LENGTH];
|
||||
MD5((unsigned char *) filepath_buf, (ptr - filepath_buf), (unsigned char *) path_md5);
|
||||
char *buf = store_read(store, path_md5, sizeof(path_md5), &buf_len);
|
||||
if (buf_len != 0) {
|
||||
store_write(dst_store, path_md5, sizeof(path_md5), buf, buf_len);
|
||||
free(buf);
|
||||
}
|
||||
|
||||
enum metakey key = 0;
|
||||
while (1) {
|
||||
key = getc(file);
|
||||
if (key == '\n') {
|
||||
fread(&key, sizeof(uint16_t), 1, file);
|
||||
fwrite(&key, sizeof(uint16_t), 1, dst_file);
|
||||
if (key == META_NEXT) {
|
||||
break;
|
||||
}
|
||||
fwrite(&key, sizeof(char), 1, dst_file);
|
||||
|
||||
if (IS_META_INT(key)) {
|
||||
int val;
|
||||
@@ -476,14 +553,12 @@ void incremental_copy(store_t *store, store_t *dst_store, const char *filepath,
|
||||
}
|
||||
fwrite("\0", sizeof(c), 1, dst_file);
|
||||
}
|
||||
|
||||
if (ret != 1) {
|
||||
break;
|
||||
}
|
||||
}
|
||||
} else {
|
||||
while ((getc(file))) {}
|
||||
skip_meta(file);
|
||||
}
|
||||
}
|
||||
fclose(file);
|
||||
fclose(dst_file);
|
||||
}
|
||||
|
||||
@@ -7,14 +7,14 @@
|
||||
#include <sys/syscall.h>
|
||||
#include <glib.h>
|
||||
|
||||
typedef void(*index_func)(cJSON *, const char[UUID_STR_LEN]);
|
||||
typedef void(*index_func)(cJSON *, const char[MD5_STR_LENGTH]);
|
||||
|
||||
void incremental_copy(store_t *store, store_t *dst_store, const char *filepath,
|
||||
const char *dst_filepath, GHashTable *copy_table);
|
||||
|
||||
void write_document(document_t *doc);
|
||||
|
||||
void read_index(const char *path, const char[UUID_STR_LEN], const char *type, index_func);
|
||||
void read_index(const char *path, const char[MD5_STR_LENGTH], const char *type, index_func);
|
||||
|
||||
void incremental_read(GHashTable *table, const char *filepath);
|
||||
|
||||
|
||||
@@ -1,9 +1,11 @@
|
||||
#include "store.h"
|
||||
#include "src/ctx.h"
|
||||
|
||||
store_t *store_create(char *path) {
|
||||
store_t *store_create(char *path, size_t chunk_size) {
|
||||
|
||||
store_t *store = malloc(sizeof(struct store_t));
|
||||
#if (SIST_FAKE_STORE != 1)
|
||||
store->chunk_size = chunk_size;
|
||||
pthread_rwlock_init(&store->lock, NULL);
|
||||
|
||||
mdb_env_create(&store->env);
|
||||
@@ -18,7 +20,7 @@ store_t *store_create(char *path) {
|
||||
LOG_FATALF("store.c", "Error while opening store: %s (%s)\n", mdb_strerror(open_ret), path)
|
||||
}
|
||||
|
||||
store->size = (size_t) 1024 * 1024 * 5;
|
||||
store->size = (size_t) store->chunk_size;
|
||||
ScanCtx.stat_tn_size = 0;
|
||||
mdb_env_set_mapsize(store->env, store->size);
|
||||
|
||||
@@ -27,26 +29,39 @@ store_t *store_create(char *path) {
|
||||
mdb_txn_begin(store->env, NULL, 0, &txn);
|
||||
mdb_dbi_open(txn, NULL, 0, &store->dbi);
|
||||
mdb_txn_commit(txn);
|
||||
#endif
|
||||
|
||||
return store;
|
||||
}
|
||||
|
||||
void store_destroy(store_t *store) {
|
||||
|
||||
#if (SIST_FAKE_STORE != 1)
|
||||
pthread_rwlock_destroy(&store->lock);
|
||||
mdb_close(store->env, store->dbi);
|
||||
mdb_env_close(store->env);
|
||||
#endif
|
||||
free(store);
|
||||
}
|
||||
|
||||
void store_flush(store_t *store) {
|
||||
mdb_env_sync(store->env, TRUE);
|
||||
}
|
||||
|
||||
void store_write(store_t *store, char *key, size_t key_len, char *buf, size_t buf_len) {
|
||||
|
||||
if (LogCtx.very_verbose) {
|
||||
char uuid_str[UUID_STR_LEN];
|
||||
uuid_unparse((unsigned char *) key, uuid_str);
|
||||
LOG_DEBUGF("store.c", "Store write {%s} %lu bytes", uuid_str, buf_len)
|
||||
if (key_len == MD5_DIGEST_LENGTH) {
|
||||
char path_md5_str[MD5_STR_LENGTH];
|
||||
buf2hex((unsigned char *) key, MD5_DIGEST_LENGTH, path_md5_str);
|
||||
LOG_DEBUGF("store.c", "Store write {%s} %lu bytes", path_md5_str, buf_len)
|
||||
} else {
|
||||
LOG_DEBUGF("store.c", "Store write {%s} %lu bytes", key, buf_len)
|
||||
}
|
||||
}
|
||||
|
||||
#if (SIST_FAKE_STORE != 1)
|
||||
|
||||
MDB_val mdb_key;
|
||||
mdb_key.mv_data = key;
|
||||
mdb_key.mv_size = key_len;
|
||||
@@ -69,7 +84,7 @@ void store_write(store_t *store, char *key, size_t key_len, char *buf, size_t bu
|
||||
// Cannot resize when there is a opened transaction.
|
||||
// Resize take effect on the next commit.
|
||||
pthread_rwlock_wrlock(&store->lock);
|
||||
store->size += 1024 * 1024 * 50;
|
||||
store->size += store->chunk_size;
|
||||
mdb_env_set_mapsize(store->env, store->size);
|
||||
mdb_txn_begin(store->env, NULL, 0, &txn);
|
||||
put_ret = mdb_put(txn, store->dbi, &mdb_key, &mdb_value, 0);
|
||||
@@ -83,10 +98,13 @@ void store_write(store_t *store, char *key, size_t key_len, char *buf, size_t bu
|
||||
if (put_ret != 0) {
|
||||
LOG_ERROR("store.c", mdb_strerror(put_ret))
|
||||
}
|
||||
#endif
|
||||
}
|
||||
|
||||
char *store_read(store_t *store, char *key, size_t key_len, size_t *ret_vallen) {
|
||||
char *buf = NULL;
|
||||
|
||||
#if (SIST_FAKE_STORE != 1)
|
||||
MDB_val mdb_key;
|
||||
mdb_key.mv_data = key;
|
||||
mdb_key.mv_size = key_len;
|
||||
@@ -107,6 +125,46 @@ char *store_read(store_t *store, char *key, size_t key_len, size_t *ret_vallen)
|
||||
}
|
||||
|
||||
mdb_txn_abort(txn);
|
||||
#endif
|
||||
return buf;
|
||||
}
|
||||
|
||||
GHashTable *store_read_all(store_t *store) {
|
||||
|
||||
int count = 0;
|
||||
|
||||
GHashTable *table = g_hash_table_new_full(g_str_hash, g_str_equal, free, free);
|
||||
|
||||
MDB_txn *txn = NULL;
|
||||
mdb_txn_begin(store->env, NULL, MDB_RDONLY, &txn);
|
||||
|
||||
MDB_cursor *cur = NULL;
|
||||
mdb_cursor_open(txn, store->dbi, &cur);
|
||||
|
||||
MDB_val key;
|
||||
MDB_val value;
|
||||
|
||||
while (mdb_cursor_get(cur, &key, &value, MDB_NEXT) == 0) {
|
||||
char *key_str = malloc(key.mv_size);
|
||||
memcpy(key_str, key.mv_data, key.mv_size);
|
||||
char *val_str = malloc(value.mv_size);
|
||||
memcpy(val_str, value.mv_data, value.mv_size);
|
||||
|
||||
g_hash_table_insert(table, key_str, val_str);
|
||||
count += 1;
|
||||
}
|
||||
|
||||
const char *path;
|
||||
mdb_env_get_path(store->env, &path);
|
||||
LOG_DEBUGF("store.c", "Read %d entries from %s", count, path);
|
||||
|
||||
mdb_cursor_close(cur);
|
||||
mdb_txn_abort(txn);
|
||||
return table;
|
||||
}
|
||||
|
||||
|
||||
void store_copy(store_t *store, const char *destination) {
|
||||
mkdir(destination, S_IWUSR | S_IRUSR | S_IXUSR);
|
||||
mdb_env_copy(store->env, destination);
|
||||
}
|
||||
|
||||
@@ -4,19 +4,32 @@
|
||||
#include <pthread.h>
|
||||
#include <lmdb.h>
|
||||
|
||||
#include <glib.h>
|
||||
|
||||
#define STORE_SIZE_TN 1024 * 1024 * 5
|
||||
#define STORE_SIZE_TAG 1024 * 16
|
||||
#define STORE_SIZE_META STORE_SIZE_TAG
|
||||
|
||||
typedef struct store_t {
|
||||
MDB_dbi dbi;
|
||||
MDB_env *env;
|
||||
size_t size;
|
||||
size_t chunk_size;
|
||||
pthread_rwlock_t lock;
|
||||
} store_t;
|
||||
|
||||
store_t *store_create(char *path);
|
||||
store_t *store_create(char *path, size_t chunk_size);
|
||||
|
||||
void store_destroy(store_t *store);
|
||||
|
||||
void store_write(store_t *store, char *key, size_t key_len, char *buf, size_t buf_len);
|
||||
|
||||
void store_flush(store_t *store);
|
||||
|
||||
char *store_read(store_t *store, char *key, size_t key_len, size_t *ret_vallen);
|
||||
|
||||
GHashTable *store_read_all(store_t *store);
|
||||
|
||||
void store_copy(store_t *store, const char *destination);
|
||||
|
||||
#endif
|
||||
|
||||
@@ -20,7 +20,7 @@ parse_job_t *create_fs_parse_job(const char *filepath, const struct stat *info,
|
||||
|
||||
job->vfile.info = *info;
|
||||
|
||||
memset(job->parent, 0, 16);
|
||||
memset(job->parent, 0, MD5_DIGEST_LENGTH);
|
||||
|
||||
job->vfile.filepath = job->filepath;
|
||||
job->vfile.read = fs_read;
|
||||
|
||||
15
src/log.c
15
src/log.c
@@ -4,15 +4,14 @@
|
||||
#include <stdarg.h>
|
||||
|
||||
const char *log_colors[] = {
|
||||
"\033[34m", "\033[01;34m", "\033[0m",
|
||||
"\033[01;33m", "\033[31m", "\033[01;31m"
|
||||
"\033[34m", "\033[01;34m", "\033[01;33m", "\033[0m", "\033[31m", "\033[01;31m"
|
||||
};
|
||||
|
||||
const char *log_levels[] = {
|
||||
"DEBUG", "INFO", "WARNING", "ERROR", "FATAL"
|
||||
};
|
||||
|
||||
void sist_logf(const char *filepath, int level, char *format, ...) {
|
||||
void vsist_logf(const char *filepath, int level, char *format, va_list ap) {
|
||||
|
||||
static int is_tty = -1;
|
||||
if (is_tty == -1) {
|
||||
@@ -46,11 +45,8 @@ void sist_logf(const char *filepath, int level, char *format, ...) {
|
||||
);
|
||||
}
|
||||
|
||||
va_list ap;
|
||||
va_start(ap, format);
|
||||
size_t maxsize = sizeof(log_str) - log_len;
|
||||
log_len += vsnprintf(log_str + log_len, maxsize, format, ap);
|
||||
va_end(ap);
|
||||
|
||||
if (is_tty) {
|
||||
log_len += sprintf(log_str + log_len, "\033[0m\n");
|
||||
@@ -65,6 +61,13 @@ void sist_logf(const char *filepath, int level, char *format, ...) {
|
||||
}
|
||||
}
|
||||
|
||||
void sist_logf(const char *filepath, int level, char *format, ...) {
|
||||
va_list ap;
|
||||
va_start(ap, format);
|
||||
vsist_logf(filepath, level, format, ap);
|
||||
va_end(ap);
|
||||
}
|
||||
|
||||
void sist_log(const char *filepath, int level, char *str) {
|
||||
|
||||
static int is_tty = -1;
|
||||
|
||||
@@ -40,6 +40,7 @@
|
||||
#include "sist.h"
|
||||
|
||||
void sist_logf(const char *filepath, int level, char *format, ...);
|
||||
void vsist_logf(const char *filepath, int level, char *format, va_list ap);
|
||||
|
||||
void sist_log(const char *filepath, int level, char *str);
|
||||
|
||||
|
||||
268
src/main.c
268
src/main.c
@@ -2,7 +2,7 @@
|
||||
#include "ctx.h"
|
||||
|
||||
#include <third-party/argparse/argparse.h>
|
||||
#include <glib.h>
|
||||
#include <locale.h>
|
||||
|
||||
#include "cli.h"
|
||||
#include "io/serialize.h"
|
||||
@@ -14,26 +14,93 @@
|
||||
#include "parsing/mime.h"
|
||||
#include "parsing/parse.h"
|
||||
|
||||
#include "stats.h"
|
||||
|
||||
#define DESCRIPTION "Lightning-fast file system indexer and search tool."
|
||||
|
||||
#define EPILOG "Made by simon987 <me@simon987.net>. Released under GPL-3.0"
|
||||
|
||||
|
||||
static const char *const Version = "2.1.0";
|
||||
static const char *const Version = "2.10.1";
|
||||
static const char *const usage[] = {
|
||||
"sist2 scan [OPTION]... PATH",
|
||||
"sist2 index [OPTION]... INDEX",
|
||||
"sist2 web [OPTION]... INDEX...",
|
||||
"sist2 exec-script [OPTION]... INDEX",
|
||||
NULL,
|
||||
};
|
||||
|
||||
#include<signal.h>
|
||||
#include<unistd.h>
|
||||
|
||||
static __sighandler_t sigsegv_handler = NULL;
|
||||
static __sighandler_t sigabrt_handler = NULL;
|
||||
|
||||
void sig_handler(int signum) {
|
||||
|
||||
LogCtx.verbose = 1;
|
||||
LogCtx.very_verbose = 1;
|
||||
|
||||
LOG_ERROR("*SIGNAL HANDLER*", "=============================================\n\n");
|
||||
LOG_ERRORF("*SIGNAL HANDLER*", "Uh oh! Caught fatal signal: %s", strsignal(signum));
|
||||
|
||||
GHashTableIter iter;
|
||||
g_hash_table_iter_init(&iter, ScanCtx.dbg_current_files);
|
||||
|
||||
void *key;
|
||||
void *value;
|
||||
while (g_hash_table_iter_next(&iter, &key, &value)) {
|
||||
parse_job_t *job = value;
|
||||
|
||||
if (isatty(STDERR_FILENO)) {
|
||||
LOG_DEBUGF(
|
||||
"*SIGNAL HANDLER*",
|
||||
"Thread \033[%dm[%04llX]\033[0m was working on job '%s'",
|
||||
31 + ((unsigned int) key) % 7, key, job->filepath
|
||||
);
|
||||
} else {
|
||||
LOG_DEBUGF(
|
||||
"*SIGNAL HANDLER*",
|
||||
"THREAD [%04llX] was working on job %s",
|
||||
key, job->filepath
|
||||
);
|
||||
}
|
||||
}
|
||||
|
||||
tpool_dump_debug_info(ScanCtx.pool);
|
||||
|
||||
LOG_INFO(
|
||||
"*SIGNAL HANDLER*",
|
||||
"Please consider creating a bug report at https://github.com/simon987/sist2/issues !"
|
||||
)
|
||||
LOG_INFO(
|
||||
"*SIGNAL HANDLER*",
|
||||
"sist2 is an open source project and relies on the collaboration of its users to diagnose and fix bugs"
|
||||
)
|
||||
|
||||
#ifndef SIST_DEBUG
|
||||
LOG_WARNING(
|
||||
"*SIGNAL HANDLER*",
|
||||
"You are running sist2 in release mode! Please consider downloading the debug binary from the Github "
|
||||
"releases page to provide additionnal information when submitting a bug report."
|
||||
)
|
||||
#endif
|
||||
|
||||
if (signum == SIGSEGV && sigsegv_handler != NULL) {
|
||||
sigsegv_handler(signum);
|
||||
} else if (signum == SIGABRT && sigabrt_handler != NULL) {
|
||||
sigabrt_handler(signum);
|
||||
}
|
||||
}
|
||||
|
||||
void init_dir(const char *dirpath) {
|
||||
char path[PATH_MAX];
|
||||
snprintf(path, PATH_MAX, "%sdescriptor.json", dirpath);
|
||||
|
||||
uuid_t uuid;
|
||||
uuid_generate(uuid);
|
||||
uuid_unparse(uuid, ScanCtx.index.desc.uuid);
|
||||
unsigned char index_md5[MD5_DIGEST_LENGTH];
|
||||
MD5((unsigned char *) ScanCtx.index.desc.name, strlen(ScanCtx.index.desc.name), index_md5);
|
||||
buf2hex(index_md5, MD5_DIGEST_LENGTH, ScanCtx.index.desc.id);
|
||||
|
||||
time(&ScanCtx.index.desc.timestamp);
|
||||
strcpy(ScanCtx.index.desc.version, Version);
|
||||
strcpy(ScanCtx.index.desc.type, INDEX_TYPE_BIN);
|
||||
@@ -55,9 +122,9 @@ void _log(const char *filepath, int level, char *str) {
|
||||
exit(-1);
|
||||
}
|
||||
|
||||
if (ScanCtx.verbose) {
|
||||
if (LogCtx.verbose) {
|
||||
if (level == LEVEL_DEBUG) {
|
||||
if (ScanCtx.very_verbose) {
|
||||
if (LogCtx.very_verbose) {
|
||||
sist_log(filepath, level, str);
|
||||
}
|
||||
} else {
|
||||
@@ -72,17 +139,17 @@ void _logf(const char *filepath, int level, char *format, ...) {
|
||||
|
||||
va_start(args, format);
|
||||
if (level == LEVEL_FATAL) {
|
||||
sist_logf(filepath, level, format, args);
|
||||
vsist_logf(filepath, level, format, args);
|
||||
exit(-1);
|
||||
}
|
||||
|
||||
if (ScanCtx.verbose) {
|
||||
if (LogCtx.verbose) {
|
||||
if (level == LEVEL_DEBUG) {
|
||||
if (ScanCtx.very_verbose) {
|
||||
sist_logf(filepath, level, format, args);
|
||||
if (LogCtx.very_verbose) {
|
||||
vsist_logf(filepath, level, format, args);
|
||||
}
|
||||
} else {
|
||||
sist_logf(filepath, level, format, args);
|
||||
vsist_logf(filepath, level, format, args);
|
||||
}
|
||||
}
|
||||
va_end(args);
|
||||
@@ -95,12 +162,22 @@ void initialize_scan_context(scan_args_t *args) {
|
||||
ScanCtx.arc_ctx.log = _log;
|
||||
ScanCtx.arc_ctx.logf = _logf;
|
||||
ScanCtx.arc_ctx.parse = (parse_callback_t) parse;
|
||||
if (args->archive_passphrase != NULL) {
|
||||
strcpy(ScanCtx.arc_ctx.passphrase, args->archive_passphrase);
|
||||
} else {
|
||||
ScanCtx.arc_ctx.passphrase[0] = 0;
|
||||
}
|
||||
|
||||
// Cbr
|
||||
ScanCtx.cbr_ctx.log = _log;
|
||||
ScanCtx.cbr_ctx.logf = _logf;
|
||||
ScanCtx.cbr_ctx.store = _store;
|
||||
ScanCtx.cbr_ctx.cbr_mime = mime_get_mime_by_string(ScanCtx.mime_table, "application/x-cbr");
|
||||
ScanCtx.dbg_current_files = g_hash_table_new(g_int64_hash, g_int64_equal);
|
||||
|
||||
// Comic
|
||||
ScanCtx.comic_ctx.log = _log;
|
||||
ScanCtx.comic_ctx.logf = _logf;
|
||||
ScanCtx.comic_ctx.store = _store;
|
||||
ScanCtx.comic_ctx.tn_size = args->size;
|
||||
ScanCtx.comic_ctx.tn_qscale = args->quality;
|
||||
ScanCtx.comic_ctx.cbr_mime = mime_get_mime_by_string(ScanCtx.mime_table, "application/x-cbr");
|
||||
ScanCtx.comic_ctx.cbz_mime = mime_get_mime_by_string(ScanCtx.mime_table, "application/x-cbz");
|
||||
|
||||
// Ebook
|
||||
pthread_mutex_init(&ScanCtx.ebook_ctx.mupdf_mutex, NULL);
|
||||
@@ -124,12 +201,15 @@ void initialize_scan_context(scan_args_t *args) {
|
||||
ScanCtx.media_ctx.log = _log;
|
||||
ScanCtx.media_ctx.logf = _logf;
|
||||
ScanCtx.media_ctx.store = _store;
|
||||
ScanCtx.media_ctx.max_media_buffer = (long) args->max_memory_buffer * 1024 * 1024;
|
||||
ScanCtx.media_ctx.read_subtitles = args->read_subtitles;
|
||||
init_media();
|
||||
|
||||
// OOXML
|
||||
ScanCtx.ooxml_ctx.content_size = args->content_size;
|
||||
ScanCtx.ooxml_ctx.log = _log;
|
||||
ScanCtx.ooxml_ctx.logf = _logf;
|
||||
ScanCtx.ooxml_ctx.store = _store;
|
||||
|
||||
// MOBI
|
||||
ScanCtx.mobi_ctx.content_size = args->content_size;
|
||||
@@ -141,6 +221,14 @@ void initialize_scan_context(scan_args_t *args) {
|
||||
ScanCtx.text_ctx.log = _log;
|
||||
ScanCtx.text_ctx.logf = _logf;
|
||||
|
||||
// MSDOC
|
||||
ScanCtx.msdoc_ctx.tn_size = args->size;
|
||||
ScanCtx.msdoc_ctx.content_size = args->content_size;
|
||||
ScanCtx.msdoc_ctx.log = _log;
|
||||
ScanCtx.msdoc_ctx.logf = _logf;
|
||||
ScanCtx.msdoc_ctx.store = _store;
|
||||
ScanCtx.msdoc_ctx.msdoc_mime = mime_get_mime_by_string(ScanCtx.mime_table, "application/msword");
|
||||
|
||||
ScanCtx.threads = args->threads;
|
||||
ScanCtx.depth = args->depth;
|
||||
|
||||
@@ -150,6 +238,13 @@ void initialize_scan_context(scan_args_t *args) {
|
||||
strncpy(ScanCtx.index.desc.rewrite_url, args->rewrite_url, sizeof(ScanCtx.index.desc.rewrite_url));
|
||||
ScanCtx.index.desc.root_len = (short) strlen(ScanCtx.index.desc.root);
|
||||
ScanCtx.fast = args->fast;
|
||||
|
||||
// Raw
|
||||
ScanCtx.raw_ctx.tn_qscale = args->quality;
|
||||
ScanCtx.raw_ctx.tn_size = args->size;
|
||||
ScanCtx.raw_ctx.log = _log;
|
||||
ScanCtx.raw_ctx.logf = _logf;
|
||||
ScanCtx.raw_ctx.store = _store;
|
||||
}
|
||||
|
||||
|
||||
@@ -165,7 +260,11 @@ void sist2_scan(scan_args_t *args) {
|
||||
char store_path[PATH_MAX];
|
||||
snprintf(store_path, PATH_MAX, "%sthumbs", ScanCtx.index.path);
|
||||
mkdir(store_path, S_IWUSR | S_IRUSR | S_IXUSR);
|
||||
ScanCtx.index.store = store_create(store_path);
|
||||
ScanCtx.index.store = store_create(store_path, STORE_SIZE_TN);
|
||||
|
||||
snprintf(store_path, PATH_MAX, "%smeta", ScanCtx.index.path);
|
||||
mkdir(store_path, S_IWUSR | S_IRUSR | S_IXUSR);
|
||||
ScanCtx.index.meta_store = store_create(store_path, STORE_SIZE_META);
|
||||
|
||||
scan_print_header();
|
||||
|
||||
@@ -191,7 +290,7 @@ void sist2_scan(scan_args_t *args) {
|
||||
while ((de = readdir(dir)) != NULL) {
|
||||
if (strncmp(de->d_name, "_index_", sizeof("_index_") - 1) == 0) {
|
||||
char file_path[PATH_MAX];
|
||||
snprintf(file_path, PATH_MAX, "%s/%s", args->incremental, de->d_name);
|
||||
snprintf(file_path, PATH_MAX, "%s%s", args->incremental, de->d_name);
|
||||
incremental_read(ScanCtx.original_table, file_path);
|
||||
}
|
||||
}
|
||||
@@ -200,7 +299,7 @@ void sist2_scan(scan_args_t *args) {
|
||||
LOG_INFOF("main.c", "Loaded %d items in to mtime table.", g_hash_table_size(ScanCtx.original_table))
|
||||
}
|
||||
|
||||
ScanCtx.pool = tpool_create(args->threads, thread_cleanup);
|
||||
ScanCtx.pool = tpool_create(args->threads, thread_cleanup, TRUE);
|
||||
tpool_start(ScanCtx.pool);
|
||||
walk_directory_tree(ScanCtx.index.desc.root);
|
||||
tpool_wait(ScanCtx.pool);
|
||||
@@ -210,7 +309,7 @@ void sist2_scan(scan_args_t *args) {
|
||||
char dst_path[PATH_MAX];
|
||||
snprintf(store_path, PATH_MAX, "%sthumbs", args->incremental);
|
||||
snprintf(dst_path, PATH_MAX, "%s_index_original", ScanCtx.index.path);
|
||||
store_t *source = store_create(store_path);
|
||||
store_t *source = store_create(store_path, STORE_SIZE_TN);
|
||||
|
||||
DIR *dir = opendir(args->incremental);
|
||||
if (dir == NULL) {
|
||||
@@ -221,24 +320,34 @@ void sist2_scan(scan_args_t *args) {
|
||||
while ((de = readdir(dir)) != NULL) {
|
||||
if (strncmp(de->d_name, "_index_", sizeof("_index_") - 1) == 0) {
|
||||
char file_path[PATH_MAX];
|
||||
snprintf(file_path, PATH_MAX, "%s/%s", args->incremental, de->d_name);
|
||||
snprintf(file_path, PATH_MAX, "%s%s", args->incremental, de->d_name);
|
||||
incremental_copy(source, ScanCtx.index.store, file_path, dst_path, ScanCtx.copy_table);
|
||||
}
|
||||
}
|
||||
closedir(dir);
|
||||
store_destroy(source);
|
||||
|
||||
snprintf(store_path, PATH_MAX, "%stags", args->incremental);
|
||||
snprintf(dst_path, PATH_MAX, "%stags", ScanCtx.index.path);
|
||||
mkdir(store_path, S_IWUSR | S_IRUSR | S_IXUSR);
|
||||
store_t *source_tags = store_create(store_path, STORE_SIZE_TAG);
|
||||
store_copy(source_tags, dst_path);
|
||||
store_destroy(source_tags);
|
||||
}
|
||||
|
||||
generate_stats(&ScanCtx.index, args->treemap_threshold, ScanCtx.index.path);
|
||||
|
||||
store_destroy(ScanCtx.index.store);
|
||||
}
|
||||
|
||||
void sist2_index(index_args_t *args) {
|
||||
|
||||
IndexCtx.es_url = args->es_url;
|
||||
IndexCtx.es_index = args->es_index;
|
||||
IndexCtx.batch_size = args->batch_size;
|
||||
|
||||
if (!args->print) {
|
||||
elastic_init(args->force_reset);
|
||||
elastic_init(args->force_reset, args->es_mappings, args->es_settings);
|
||||
}
|
||||
|
||||
char descriptor_path[PATH_MAX];
|
||||
@@ -258,6 +367,16 @@ void sist2_index(index_args_t *args) {
|
||||
LOG_FATALF("main.c", "Could not open index %s: %s", args->index_path, strerror(errno))
|
||||
}
|
||||
|
||||
char path_tmp[PATH_MAX];
|
||||
snprintf(path_tmp, sizeof(path_tmp), "%s/tags", args->index_path);
|
||||
mkdir(path_tmp, S_IWUSR | S_IRUSR | S_IXUSR);
|
||||
IndexCtx.tag_store = store_create(path_tmp, STORE_SIZE_TAG);
|
||||
IndexCtx.tags = store_read_all(IndexCtx.tag_store);
|
||||
|
||||
snprintf(path_tmp, sizeof(path_tmp), "%s/meta", args->index_path);
|
||||
IndexCtx.meta_store = store_create(path_tmp, STORE_SIZE_META);
|
||||
IndexCtx.meta = store_read_all(IndexCtx.meta_store);
|
||||
|
||||
index_func f;
|
||||
if (args->print) {
|
||||
f = print_json;
|
||||
@@ -265,29 +384,64 @@ void sist2_index(index_args_t *args) {
|
||||
f = index_json;
|
||||
}
|
||||
|
||||
void (*cleanup)();
|
||||
if (args->print) {
|
||||
cleanup = NULL;
|
||||
} else {
|
||||
cleanup = elastic_cleanup;
|
||||
}
|
||||
|
||||
IndexCtx.pool = tpool_create(args->threads, cleanup, FALSE);
|
||||
tpool_start(IndexCtx.pool);
|
||||
|
||||
struct dirent *de;
|
||||
while ((de = readdir(dir)) != NULL) {
|
||||
if (strncmp(de->d_name, "_index_", sizeof("_index_") - 1) == 0) {
|
||||
char file_path[PATH_MAX];
|
||||
snprintf(file_path, PATH_MAX, "%s/%s", args->index_path, de->d_name);
|
||||
read_index(file_path, desc.uuid, desc.type, f);
|
||||
read_index(file_path, desc.id, desc.type, f);
|
||||
}
|
||||
}
|
||||
closedir(dir);
|
||||
|
||||
tpool_wait(IndexCtx.pool);
|
||||
|
||||
tpool_destroy(IndexCtx.pool);
|
||||
|
||||
if (!args->print) {
|
||||
elastic_flush();
|
||||
destroy_indexer(args->script, desc.uuid);
|
||||
finish_indexer(args->script, args->async_script, desc.id);
|
||||
}
|
||||
|
||||
store_destroy(IndexCtx.tag_store);
|
||||
g_hash_table_remove_all(IndexCtx.tags);
|
||||
g_hash_table_destroy(IndexCtx.tags);
|
||||
}
|
||||
|
||||
void sist2_exec_script(exec_args_t *args) {
|
||||
|
||||
LogCtx.verbose = TRUE;
|
||||
|
||||
char descriptor_path[PATH_MAX];
|
||||
snprintf(descriptor_path, PATH_MAX, "%s/descriptor.json", args->index_path);
|
||||
index_descriptor_t desc = read_index_descriptor(descriptor_path);
|
||||
|
||||
IndexCtx.es_url = args->es_url;
|
||||
|
||||
LOG_DEBUGF("main.c", "descriptor version %s (%s)", desc.version, desc.type)
|
||||
|
||||
execute_update_script(args->script, args->async_script, desc.id);
|
||||
free(args->script);
|
||||
}
|
||||
|
||||
void sist2_web(web_args_t *args) {
|
||||
|
||||
WebCtx.es_url = args->es_url;
|
||||
WebCtx.es_index = args->es_index;
|
||||
WebCtx.index_count = args->index_count;
|
||||
WebCtx.auth_user = args->auth_user;
|
||||
WebCtx.auth_pass = args->auth_pass;
|
||||
WebCtx.auth_enabled = args->auth_enabled;
|
||||
WebCtx.tag_auth_enabled = args->tag_auth_enabled;
|
||||
|
||||
for (int i = 0; i < args->index_count; i++) {
|
||||
char *abs_path = abspath(args->indices[i]);
|
||||
@@ -297,7 +451,11 @@ void sist2_web(web_args_t *args) {
|
||||
char path_tmp[PATH_MAX];
|
||||
|
||||
snprintf(path_tmp, PATH_MAX, "%sthumbs", abs_path);
|
||||
WebCtx.indices[i].store = store_create(path_tmp);
|
||||
WebCtx.indices[i].store = store_create(path_tmp, STORE_SIZE_TN);
|
||||
|
||||
snprintf(path_tmp, PATH_MAX, "%stags", abs_path);
|
||||
mkdir(path_tmp, S_IWUSR | S_IRUSR | S_IXUSR);
|
||||
WebCtx.indices[i].tag_store = store_create(path_tmp, STORE_SIZE_TAG);
|
||||
|
||||
snprintf(path_tmp, PATH_MAX, "%sdescriptor.json", abs_path);
|
||||
WebCtx.indices[i].desc = read_index_descriptor(path_tmp);
|
||||
@@ -312,14 +470,23 @@ void sist2_web(web_args_t *args) {
|
||||
|
||||
|
||||
int main(int argc, const char *argv[]) {
|
||||
sigsegv_handler = signal(SIGSEGV, sig_handler);
|
||||
sigabrt_handler = signal(SIGABRT, sig_handler);
|
||||
|
||||
setlocale(LC_ALL, "");
|
||||
|
||||
scan_args_t *scan_args = scan_args_create();
|
||||
index_args_t *index_args = index_args_create();
|
||||
web_args_t *web_args = web_args_create();
|
||||
exec_args_t *exec_args = exec_args_create();
|
||||
|
||||
int arg_version = 0;
|
||||
|
||||
char *common_es_url = NULL;
|
||||
char *common_es_index = NULL;
|
||||
char *common_script_path = NULL;
|
||||
int common_async_script = 0;
|
||||
int common_threads = 0;
|
||||
|
||||
struct argparse_option options[] = {
|
||||
OPT_HELP(),
|
||||
@@ -329,7 +496,7 @@ int main(int argc, const char *argv[]) {
|
||||
OPT_BOOLEAN(0, "very-verbose", &LogCtx.very_verbose, "Turn on debug messages"),
|
||||
|
||||
OPT_GROUP("Scan options"),
|
||||
OPT_INTEGER('t', "threads", &scan_args->threads, "Number of threads. DEFAULT=1"),
|
||||
OPT_INTEGER('t', "threads", &common_threads, "Number of threads. DEFAULT=1"),
|
||||
OPT_FLOAT('q', "quality", &scan_args->quality,
|
||||
"Thumbnail quality, on a scale of 1.0 to 31.0, 1.0 being the best. DEFAULT=5"),
|
||||
OPT_INTEGER(0, "size", &scan_args->size,
|
||||
@@ -346,23 +513,45 @@ int main(int argc, const char *argv[]) {
|
||||
OPT_STRING(0, "archive", &scan_args->archive, "Archive file mode (skip|list|shallow|recurse). "
|
||||
"skip: Don't parse, list: only get file names as text, "
|
||||
"shallow: Don't parse archives inside archives. DEFAULT: recurse"),
|
||||
OPT_STRING(0, "archive-passphrase", &scan_args->archive_passphrase,
|
||||
"Passphrase for encrypted archive files"),
|
||||
|
||||
OPT_STRING(0, "ocr", &scan_args->tesseract_lang, "Tesseract language (use tesseract --list-langs to see "
|
||||
"which are installed on your machine)"),
|
||||
OPT_STRING('e', "exclude", &scan_args->exclude_regex, "Files that match this regex will not be scanned"),
|
||||
OPT_BOOLEAN(0, "fast", &scan_args->fast, "Only index file names & mime type"),
|
||||
OPT_STRING(0, "treemap-threshold", &scan_args->treemap_threshold_str, "Relative size threshold for treemap "
|
||||
"(see USAGE.md). DEFAULT: 0.0005"),
|
||||
OPT_INTEGER(0, "mem-buffer", &scan_args->max_memory_buffer,
|
||||
"Maximum memory buffer size per thread in MB for files inside archives "
|
||||
"(see USAGE.md). DEFAULT: 2000"),
|
||||
OPT_BOOLEAN(0, "read-subtitles", &scan_args->read_subtitles, "Read subtitles from media files."),
|
||||
|
||||
OPT_GROUP("Index options"),
|
||||
OPT_INTEGER('t', "threads", &common_threads, "Number of threads. DEFAULT=1"),
|
||||
OPT_STRING(0, "es-url", &common_es_url, "Elasticsearch url with port. DEFAULT=http://localhost:9200"),
|
||||
OPT_STRING(0, "es-index", &common_es_index, "Elasticsearch index name. DEFAULT=sist2"),
|
||||
OPT_BOOLEAN('p', "print", &index_args->print, "Just print JSON documents to stdout."),
|
||||
OPT_STRING(0, "script-file", &index_args->script_path, "Path to user script."),
|
||||
OPT_STRING(0, "script-file", &common_script_path, "Path to user script."),
|
||||
OPT_STRING(0, "mappings-file", &index_args->es_mappings_path, "Path to Elasticsearch mappings."),
|
||||
OPT_STRING(0, "settings-file", &index_args->es_settings_path, "Path to Elasticsearch settings."),
|
||||
OPT_BOOLEAN(0, "async-script", &common_async_script, "Execute user script asynchronously."),
|
||||
OPT_INTEGER(0, "batch-size", &index_args->batch_size, "Index batch size. DEFAULT: 100"),
|
||||
OPT_BOOLEAN('f', "force-reset", &index_args->force_reset, "Reset Elasticsearch mappings and settings. "
|
||||
"(You must use this option the first time you use the index command)"),
|
||||
|
||||
OPT_GROUP("Web options"),
|
||||
OPT_STRING(0, "es-url", &common_es_url, "Elasticsearch url. DEFAULT=http://localhost:9200"),
|
||||
OPT_STRING(0, "es-index", &common_es_index, "Elasticsearch index name. DEFAULT=sist2"),
|
||||
OPT_STRING(0, "bind", &web_args->listen_address, "Listen on this address. DEFAULT=localhost:4090"),
|
||||
OPT_STRING(0, "auth", &web_args->credentials, "Basic auth in user:password format"),
|
||||
OPT_STRING(0, "tag-auth", &web_args->tag_credentials, "Basic auth in user:password format for tagging"),
|
||||
|
||||
OPT_GROUP("Exec-script options"),
|
||||
OPT_STRING(0, "es-url", &common_es_url, "Elasticsearch url. DEFAULT=http://localhost:9200"),
|
||||
OPT_STRING(0, "es-index", &common_es_index, "Elasticsearch index name. DEFAULT=sist2"),
|
||||
OPT_STRING(0, "script-file", &common_script_path, "Path to user script."),
|
||||
OPT_BOOLEAN(0, "async-script", &common_async_script, "Execute user script asynchronously."),
|
||||
|
||||
OPT_END(),
|
||||
};
|
||||
@@ -383,6 +572,18 @@ int main(int argc, const char *argv[]) {
|
||||
|
||||
web_args->es_url = common_es_url;
|
||||
index_args->es_url = common_es_url;
|
||||
exec_args->es_url = common_es_url;
|
||||
|
||||
web_args->es_index = common_es_index;
|
||||
index_args->es_index = common_es_index;
|
||||
exec_args->es_index = common_es_index;
|
||||
|
||||
index_args->script_path = common_script_path;
|
||||
exec_args->script_path = common_script_path;
|
||||
index_args->threads = common_threads;
|
||||
scan_args->threads = common_threads;
|
||||
exec_args->async_script = common_async_script;
|
||||
index_args->async_script = common_async_script;
|
||||
|
||||
if (argc == 0) {
|
||||
argparse_usage(&argparse);
|
||||
@@ -411,6 +612,14 @@ int main(int argc, const char *argv[]) {
|
||||
}
|
||||
sist2_web(web_args);
|
||||
|
||||
} else if (strcmp(argv[0], "exec-script") == 0) {
|
||||
|
||||
int err = exec_args_validate(exec_args, argc, argv);
|
||||
if (err != 0) {
|
||||
goto end;
|
||||
}
|
||||
sist2_exec_script(exec_args);
|
||||
|
||||
} else {
|
||||
fprintf(stderr, "Invalid command: '%s'\n", argv[0]);
|
||||
argparse_usage(&argparse);
|
||||
@@ -422,6 +631,7 @@ int main(int argc, const char *argv[]) {
|
||||
scan_args_destroy(scan_args);
|
||||
index_args_destroy(index_args);
|
||||
web_args_destroy(web_args);
|
||||
exec_args_destroy(exec_args);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
@@ -3,9 +3,10 @@
|
||||
|
||||
#include "../sist.h"
|
||||
|
||||
#define MAJOR_MIME(mime_id) (mime_id & 0x00FF0000) >> 16
|
||||
#define MAJOR_MIME(mime_id) (mime_id & 0x000F0000) >> 16
|
||||
|
||||
#define MIME_EMPTY 1
|
||||
#define MIME_SIST2_SIDECAR 2
|
||||
|
||||
#define DONT_PARSE 0x80000000
|
||||
#define SHOULD_PARSE(mime_id) (ScanCtx.fast == 0 && (mime_id & DONT_PARSE) != DONT_PARSE && mime_id != 0)
|
||||
@@ -31,6 +32,9 @@
|
||||
#define MARKUP_MASK 0x01000000
|
||||
#define IS_MARKUP(mime_id) (mime_id & MARKUP_MASK) == MARKUP_MASK
|
||||
|
||||
#define RAW_MASK 0x00800000
|
||||
#define IS_RAW(mime_id) (mime_id & RAW_MASK) == RAW_MASK
|
||||
|
||||
enum major_mime {
|
||||
MimeInvalid = 0,
|
||||
MimeModel = 1,
|
||||
|
||||
@@ -54,387 +54,408 @@ enum mime {
|
||||
application_streamingmedia=655406,
|
||||
application_vda=655407,
|
||||
application_vnd_amazon_mobi8_ebook=655408 | 0x02000000,
|
||||
application_vnd_fdf=655409,
|
||||
application_vnd_font_fontforge_sfd=655410,
|
||||
application_vnd_hp_hpgl=655411,
|
||||
application_vnd_iccprofile=655412,
|
||||
application_vnd_lotus_1_2_3=655413,
|
||||
application_vnd_ms_cab_compressed=655414,
|
||||
application_vnd_ms_excel=655415,
|
||||
application_vnd_ms_fontobject=655416,
|
||||
application_vnd_ms_opentype=655417 | 0x20000000,
|
||||
application_vnd_ms_pki_certstore=655418,
|
||||
application_vnd_ms_pki_pko=655419,
|
||||
application_vnd_ms_pki_seccat=655420,
|
||||
application_vnd_ms_powerpoint=655421,
|
||||
application_vnd_ms_project=655422,
|
||||
application_vnd_oasis_opendocument_base=655423,
|
||||
application_vnd_oasis_opendocument_formula=655424,
|
||||
application_vnd_oasis_opendocument_graphics=655425,
|
||||
application_vnd_oasis_opendocument_presentation=655426,
|
||||
application_vnd_oasis_opendocument_spreadsheet=655427,
|
||||
application_vnd_oasis_opendocument_text=655428,
|
||||
application_vnd_openxmlformats_officedocument_presentationml_presentation=655429 | 0x04000000,
|
||||
application_vnd_openxmlformats_officedocument_spreadsheetml_sheet=655430 | 0x04000000,
|
||||
application_vnd_openxmlformats_officedocument_wordprocessingml_document=655431 | 0x04000000,
|
||||
application_vnd_symbian_install=655432,
|
||||
application_vnd_tcpdump_pcap=655433,
|
||||
application_vnd_wap_wmlc=655434,
|
||||
application_vnd_wap_wmlscriptc=655435,
|
||||
application_vnd_xara=655436,
|
||||
application_vocaltec_media_desc=655437,
|
||||
application_vocaltec_media_file=655438,
|
||||
application_warc=655439,
|
||||
application_winhelp=655440,
|
||||
application_wordperfect=655441,
|
||||
application_wordperfect6_0=655442,
|
||||
application_wordperfect6_1=655443,
|
||||
application_x_123=655444,
|
||||
application_x_7z_compressed=655445 | 0x10000000,
|
||||
application_x_aim=655446,
|
||||
application_x_apple_diskimage=655447,
|
||||
application_x_arc=655448 | 0x10000000,
|
||||
application_x_archive=655449,
|
||||
application_x_atari_7800_rom=655450,
|
||||
application_x_authorware_bin=655451,
|
||||
application_x_authorware_map=655452,
|
||||
application_x_authorware_seg=655453,
|
||||
application_x_avira_qua=655454,
|
||||
application_x_bcpio=655455,
|
||||
application_x_bittorrent=655456,
|
||||
application_x_bsh=655457,
|
||||
application_x_bytecode_python=655458,
|
||||
application_x_bzip=655459,
|
||||
application_x_bzip2=655460 | 0x08000000,
|
||||
application_x_cbr=655461,
|
||||
application_x_cbz=655462 | 0x40000000,
|
||||
application_x_cdlink=655463,
|
||||
application_x_chat=655464,
|
||||
application_x_chrome_extension=655465,
|
||||
application_x_cocoa=655466,
|
||||
application_x_conference=655467,
|
||||
application_x_coredump=655468,
|
||||
application_x_cpio=655469,
|
||||
application_x_dbf=655470,
|
||||
application_x_dbt=655471,
|
||||
application_x_debian_package=655472,
|
||||
application_x_deepv=655473,
|
||||
application_x_director=655474,
|
||||
application_x_dmp=655475,
|
||||
application_x_dosdriver=655476,
|
||||
application_x_dosexec=655477,
|
||||
application_x_dvi=655478,
|
||||
application_x_elc=655479,
|
||||
application_vnd_coffeescript=655409,
|
||||
application_vnd_fdf=655410,
|
||||
application_vnd_font_fontforge_sfd=655411,
|
||||
application_vnd_hp_hpgl=655412,
|
||||
application_vnd_iccprofile=655413,
|
||||
application_vnd_lotus_1_2_3=655414,
|
||||
application_vnd_ms_cab_compressed=655415,
|
||||
application_vnd_ms_excel=655416,
|
||||
application_vnd_ms_fontobject=655417,
|
||||
application_vnd_ms_opentype=655418 | 0x20000000,
|
||||
application_vnd_ms_outlook=655419,
|
||||
application_vnd_ms_pki_certstore=655420,
|
||||
application_vnd_ms_pki_pko=655421,
|
||||
application_vnd_ms_pki_seccat=655422,
|
||||
application_vnd_ms_powerpoint=655423,
|
||||
application_vnd_ms_project=655424,
|
||||
application_vnd_oasis_opendocument_base=655425,
|
||||
application_vnd_oasis_opendocument_formula=655426,
|
||||
application_vnd_oasis_opendocument_graphics=655427,
|
||||
application_vnd_oasis_opendocument_presentation=655428,
|
||||
application_vnd_oasis_opendocument_spreadsheet=655429,
|
||||
application_vnd_oasis_opendocument_text=655430,
|
||||
application_vnd_openxmlformats_officedocument_presentationml_presentation=655431 | 0x04000000,
|
||||
application_vnd_openxmlformats_officedocument_spreadsheetml_sheet=655432 | 0x04000000,
|
||||
application_vnd_openxmlformats_officedocument_wordprocessingml_document=655433 | 0x04000000,
|
||||
application_vnd_symbian_install=655434,
|
||||
application_vnd_tcpdump_pcap=655435,
|
||||
application_vnd_wap_wmlc=655436,
|
||||
application_vnd_wap_wmlscriptc=655437,
|
||||
application_vnd_xara=655438,
|
||||
application_vocaltec_media_desc=655439,
|
||||
application_vocaltec_media_file=655440,
|
||||
application_warc=655441,
|
||||
application_winhelp=655442,
|
||||
application_wordperfect=655443,
|
||||
application_wordperfect6_0=655444,
|
||||
application_wordperfect6_1=655445,
|
||||
application_x_123=655446,
|
||||
application_x_7z_compressed=655447 | 0x10000000,
|
||||
application_x_aim=655448,
|
||||
application_x_apple_diskimage=655449,
|
||||
application_x_arc=655450 | 0x10000000,
|
||||
application_x_archive=655451,
|
||||
application_x_atari_7800_rom=655452,
|
||||
application_x_authorware_bin=655453,
|
||||
application_x_authorware_map=655454,
|
||||
application_x_authorware_seg=655455,
|
||||
application_x_avira_qua=655456,
|
||||
application_x_bcpio=655457,
|
||||
application_x_bittorrent=655458,
|
||||
application_x_bsh=655459,
|
||||
application_x_bytecode_python=655460,
|
||||
application_x_bzip=655461,
|
||||
application_x_bzip2=655462 | 0x08000000,
|
||||
application_x_cbr=655463,
|
||||
application_x_cbz=655464,
|
||||
application_x_cdlink=655465,
|
||||
application_x_chat=655466,
|
||||
application_x_chrome_extension=655467,
|
||||
application_x_cocoa=655468,
|
||||
application_x_conference=655469,
|
||||
application_x_coredump=655470,
|
||||
application_x_cpio=655471,
|
||||
application_x_dbf=655472,
|
||||
application_x_dbt=655473,
|
||||
application_x_debian_package=655474,
|
||||
application_x_deepv=655475,
|
||||
application_x_director=655476,
|
||||
application_x_dmp=655477,
|
||||
application_x_dosdriver=655478,
|
||||
application_x_dosexec=655479,
|
||||
application_x_dvi=655480,
|
||||
application_x_elc=655481,
|
||||
application_x_empty=1,
|
||||
application_x_envoy=655481,
|
||||
application_x_esrehber=655482,
|
||||
application_x_excel=655483,
|
||||
application_x_executable=655484,
|
||||
application_x_font_gdos=655485,
|
||||
application_x_font_pf2=655486,
|
||||
application_x_font_pfm=655487,
|
||||
application_x_font_sfn=655488,
|
||||
application_x_font_ttf=655489 | 0x20000000,
|
||||
application_x_fptapplication_x_dbt=655490,
|
||||
application_x_freelance=655491,
|
||||
application_x_gamecube_rom=655492,
|
||||
application_x_gdbm=655493,
|
||||
application_x_gettext_translation=655494,
|
||||
application_x_git=655495,
|
||||
application_x_gsp=655496,
|
||||
application_x_gss=655497,
|
||||
application_x_gtar=655498,
|
||||
application_x_gzip=655499,
|
||||
application_x_hdf=655500,
|
||||
application_x_helpfile=655501,
|
||||
application_x_httpd_imap=655502,
|
||||
application_x_ima=655503,
|
||||
application_x_innosetup=655504,
|
||||
application_x_internett_signup=655505,
|
||||
application_x_inventor=655506,
|
||||
application_x_ip2=655507,
|
||||
application_x_java_applet=655508,
|
||||
application_x_java_commerce=655509,
|
||||
application_x_java_image=655510,
|
||||
application_x_java_jmod=655511,
|
||||
application_x_java_keystore=655512,
|
||||
application_x_kdelnk=655513,
|
||||
application_x_koan=655514,
|
||||
application_x_latex=655515,
|
||||
application_x_livescreen=655516,
|
||||
application_x_lotus=655517,
|
||||
application_x_lz4=655518 | 0x08000000,
|
||||
application_x_lz4_json=655519,
|
||||
application_x_lzh=655520,
|
||||
application_x_lzh_compressed=655521,
|
||||
application_x_lzip=655522 | 0x08000000,
|
||||
application_x_lzma=655523 | 0x08000000,
|
||||
application_x_lzop=655524 | 0x08000000,
|
||||
application_x_lzx=655525,
|
||||
application_x_mach_binary=655526,
|
||||
application_x_mach_executable=655527,
|
||||
application_x_magic_cap_package_1_0=655528,
|
||||
application_x_mathcad=655529,
|
||||
application_x_maxis_dbpf=655530,
|
||||
application_x_meme=655531,
|
||||
application_x_midi=655532,
|
||||
application_x_mif=655533,
|
||||
application_x_mix_transfer=655534,
|
||||
application_x_mobipocket_ebook=655535 | 0x02000000,
|
||||
application_x_ms_compress_szdd=655536,
|
||||
application_x_ms_pdb=655537,
|
||||
application_x_ms_reader=655538,
|
||||
application_x_msaccess=655539,
|
||||
application_x_n64_rom=655540,
|
||||
application_x_navi_animation=655541,
|
||||
application_x_navidoc=655542,
|
||||
application_x_navimap=655543,
|
||||
application_x_navistyle=655544,
|
||||
application_x_nes_rom=655545,
|
||||
application_x_netcdf=655546,
|
||||
application_x_newton_compatible_pkg=655547,
|
||||
application_x_nintendo_ds_rom=655548,
|
||||
application_x_object=655549,
|
||||
application_x_omc=655550,
|
||||
application_x_omcdatamaker=655551,
|
||||
application_x_omcregerator=655552,
|
||||
application_x_pagemaker=655553,
|
||||
application_x_pcl=655554,
|
||||
application_x_pgp_keyring=655555,
|
||||
application_x_pixclscript=655556,
|
||||
application_x_pkcs7_certreqresp=655557,
|
||||
application_x_pkcs7_signature=655558,
|
||||
application_x_project=655559,
|
||||
application_x_qpro=655560,
|
||||
application_x_rar=655561 | 0x10000000,
|
||||
application_x_rpm=655562,
|
||||
application_x_sdp=655563,
|
||||
application_x_sea=655564,
|
||||
application_x_seelogo=655565,
|
||||
application_x_setupscript=655566,
|
||||
application_x_shar=655567,
|
||||
application_x_sharedlib=655568,
|
||||
application_x_shockwave_flash=655569,
|
||||
application_x_snappy_framed=655570,
|
||||
application_x_sprite=655571,
|
||||
application_x_sqlite3=655572,
|
||||
application_x_stargallery_thm=655573,
|
||||
application_x_stuffit=655574,
|
||||
application_x_sv4cpio=655575,
|
||||
application_x_sv4crc=655576,
|
||||
application_x_tar=655577 | 0x10000000,
|
||||
application_x_tbook=655578,
|
||||
application_x_terminfo=655579,
|
||||
application_x_terminfo2=655580,
|
||||
application_x_tex_tfm=655581,
|
||||
application_x_texinfo=655582,
|
||||
application_x_ustar=655583,
|
||||
application_x_visio=655584,
|
||||
application_x_vnd_audioexplosion_mzz=655585,
|
||||
application_x_vnd_ls_xpix=655586,
|
||||
application_x_vrml=655587,
|
||||
application_x_wais_source=655588,
|
||||
application_x_wine_extension_ini=655589,
|
||||
application_x_wintalk=655590,
|
||||
application_x_world=655591,
|
||||
application_x_wri=655592,
|
||||
application_x_x509_ca_cert=655593,
|
||||
application_x_xz=655594 | 0x08000000,
|
||||
application_x_zip=655595,
|
||||
application_x_zstd=655596 | 0x08000000,
|
||||
application_x_zstd_dictionary=655597,
|
||||
application_xml=655598,
|
||||
application_zip=655599 | 0x10000000,
|
||||
application_zlib=655600,
|
||||
audio_basic=458993 | 0x80000000,
|
||||
audio_it=458994,
|
||||
audio_make=458995,
|
||||
audio_mid=458996,
|
||||
audio_midi=458997,
|
||||
audio_mp4=458998,
|
||||
audio_mpeg=458999,
|
||||
audio_ogg=459000,
|
||||
audio_s3m=459001,
|
||||
audio_tsp_audio=459002,
|
||||
audio_tsplayer=459003,
|
||||
audio_vnd_qcelp=459004,
|
||||
audio_voxware=459005,
|
||||
audio_x_aiff=459006,
|
||||
audio_x_flac=459007,
|
||||
audio_x_gsm=459008,
|
||||
audio_x_hx_aac_adts=459009,
|
||||
audio_x_jam=459010,
|
||||
audio_x_liveaudio=459011,
|
||||
audio_x_m4a=459012,
|
||||
audio_x_midi=459013,
|
||||
audio_x_mod=459014,
|
||||
audio_x_mp4a_latm=459015,
|
||||
audio_x_mpeg_3=459016,
|
||||
audio_x_mpequrl=459017,
|
||||
audio_x_nspaudio=459018,
|
||||
audio_x_pn_realaudio=459019,
|
||||
audio_x_psid=459020,
|
||||
audio_x_realaudio=459021,
|
||||
audio_x_s3m=459022,
|
||||
audio_x_twinvq=459023,
|
||||
audio_x_twinvq_plugin=459024,
|
||||
audio_x_voc=459025,
|
||||
audio_x_wav=459026,
|
||||
audio_x_xbox_executable=459027 | 0x80000000,
|
||||
audio_x_xbox360_executable=459028 | 0x80000000,
|
||||
audio_xm=459029,
|
||||
font_otf=327958 | 0x20000000,
|
||||
font_sfnt=327959 | 0x20000000,
|
||||
font_woff=327960 | 0x20000000,
|
||||
font_woff2=327961 | 0x20000000,
|
||||
image_bmp=524570,
|
||||
image_cmu_raster=524571,
|
||||
image_fif=524572,
|
||||
image_florian=524573,
|
||||
image_g3fax=524574,
|
||||
image_gif=524575,
|
||||
image_heic=524576,
|
||||
image_ief=524577,
|
||||
image_jpeg=524578,
|
||||
image_jutvision=524579,
|
||||
image_naplps=524580,
|
||||
image_pict=524581,
|
||||
image_png=524582,
|
||||
image_svg=524583 | 0x80000000,
|
||||
image_svg_xml=524584 | 0x80000000,
|
||||
image_tiff=524585,
|
||||
image_vnd_adobe_photoshop=524586 | 0x80000000,
|
||||
image_vnd_djvu=524587 | 0x80000000,
|
||||
image_vnd_fpx=524588,
|
||||
image_vnd_microsoft_icon=524589,
|
||||
image_vnd_rn_realflash=524590,
|
||||
image_vnd_rn_realpix=524591,
|
||||
image_vnd_wap_wbmp=524592,
|
||||
image_vnd_xiff=524593,
|
||||
image_webp=524594,
|
||||
image_wmf=524595,
|
||||
image_x_3ds=524596,
|
||||
image_x_award_bioslogo=524597,
|
||||
image_x_cmu_raster=524598,
|
||||
image_x_cur=524599,
|
||||
image_x_dwg=524600,
|
||||
image_x_eps=524601,
|
||||
image_x_exr=524602,
|
||||
image_x_gem=524603,
|
||||
image_x_icns=524604,
|
||||
image_x_icon=524605 | 0x80000000,
|
||||
image_x_jg=524606,
|
||||
image_x_jps=524607,
|
||||
image_x_ms_bmp=524608,
|
||||
image_x_niff=524609,
|
||||
image_x_pcx=524610,
|
||||
image_x_pict=524611,
|
||||
image_x_portable_bitmap=524612,
|
||||
image_x_portable_graymap=524613,
|
||||
image_x_portable_pixmap=524614,
|
||||
image_x_quicktime=524615,
|
||||
image_x_rgb=524616,
|
||||
image_x_tga=524617,
|
||||
image_x_tiff=524618,
|
||||
image_x_win_bitmap=524619,
|
||||
image_x_xcf=524620 | 0x80000000,
|
||||
image_x_xpixmap=524621 | 0x80000000,
|
||||
image_x_xwindowdump=524622,
|
||||
message_news=196943,
|
||||
message_rfc822=196944,
|
||||
model_vnd_dwf=65873,
|
||||
model_vnd_gdl=65874,
|
||||
model_vnd_gs_gdl=65875,
|
||||
model_vrml=65876,
|
||||
model_x_pov=65877,
|
||||
text_PGP=590166,
|
||||
text_asp=590167,
|
||||
text_css=590168,
|
||||
text_html=590169 | 0x01000000,
|
||||
text_javascript=590170,
|
||||
text_mcf=590171,
|
||||
text_pascal=590172,
|
||||
text_plain=590173,
|
||||
text_richtext=590174,
|
||||
text_rtf=590175,
|
||||
text_scriplet=590176,
|
||||
text_tab_separated_values=590177,
|
||||
text_troff=590178,
|
||||
text_uri_list=590179,
|
||||
text_vnd_abc=590180,
|
||||
text_vnd_fmi_flexstor=590181,
|
||||
text_vnd_wap_wml=590182,
|
||||
text_vnd_wap_wmlscript=590183,
|
||||
text_webviewhtml=590184,
|
||||
text_x_Algol68=590185,
|
||||
text_x_asm=590186,
|
||||
text_x_audiosoft_intra=590187,
|
||||
text_x_awk=590188,
|
||||
text_x_bcpl=590189,
|
||||
text_x_c=590190,
|
||||
text_x_c__=590191,
|
||||
text_x_component=590192,
|
||||
text_x_diff=590193,
|
||||
text_x_fortran=590194,
|
||||
text_x_java=590195,
|
||||
text_x_la_asf=590196,
|
||||
text_x_lisp=590197,
|
||||
text_x_m=590198,
|
||||
text_x_m4=590199,
|
||||
text_x_makefile=590200,
|
||||
text_x_ms_regedit=590201,
|
||||
text_x_msdos_batch=590202,
|
||||
text_x_objective_c=590203,
|
||||
text_x_pascal=590204,
|
||||
text_x_perl=590205,
|
||||
text_x_php=590206,
|
||||
text_x_po=590207,
|
||||
text_x_python=590208,
|
||||
text_x_ruby=590209,
|
||||
text_x_sass=590210,
|
||||
text_x_scss=590211,
|
||||
text_x_server_parsed_html=590212,
|
||||
text_x_setext=590213,
|
||||
text_x_sgml=590214 | 0x01000000,
|
||||
text_x_shellscript=590215,
|
||||
text_x_speech=590216,
|
||||
text_x_tcl=590217,
|
||||
text_x_tex=590218,
|
||||
text_x_uil=590219,
|
||||
text_x_uuencode=590220,
|
||||
text_x_vcalendar=590221,
|
||||
text_x_vcard=590222,
|
||||
text_xml=590223 | 0x01000000,
|
||||
video_MP2T=393616,
|
||||
video_animaflex=393617,
|
||||
video_avi=393618,
|
||||
video_avs_video=393619,
|
||||
video_mp4=393620,
|
||||
video_mpeg=393621,
|
||||
video_quicktime=393622,
|
||||
video_vdo=393623,
|
||||
video_vivo=393624,
|
||||
video_vnd_rn_realvideo=393625,
|
||||
video_vosaic=393626,
|
||||
video_webm=393627,
|
||||
video_x_amt_demorun=393628,
|
||||
video_x_amt_showrun=393629,
|
||||
video_x_atomic3d_feature=393630,
|
||||
video_x_dl=393631,
|
||||
video_x_dv=393632,
|
||||
video_x_fli=393633,
|
||||
video_x_flv=393634,
|
||||
video_x_isvideo=393635,
|
||||
video_x_jng=393636 | 0x80000000,
|
||||
video_x_m4v=393637,
|
||||
video_x_matroska=393638,
|
||||
video_x_mng=393639,
|
||||
video_x_motion_jpeg=393640,
|
||||
video_x_ms_asf=393641,
|
||||
video_x_msvideo=393642,
|
||||
video_x_qtc=393643,
|
||||
video_x_sgi_movie=393644,
|
||||
x_epoc_x_sisx_app=721325,
|
||||
application_x_envoy=655482,
|
||||
application_x_esrehber=655483,
|
||||
application_x_excel=655484,
|
||||
application_x_executable=655485,
|
||||
application_x_font_gdos=655486,
|
||||
application_x_font_pf2=655487,
|
||||
application_x_font_pfm=655488,
|
||||
application_x_font_sfn=655489,
|
||||
application_x_font_ttf=655490 | 0x20000000,
|
||||
application_x_fptapplication_x_dbt=655491,
|
||||
application_x_freelance=655492,
|
||||
application_x_gamecube_rom=655493,
|
||||
application_x_gdbm=655494,
|
||||
application_x_gettext_translation=655495,
|
||||
application_x_git=655496,
|
||||
application_x_gsp=655497,
|
||||
application_x_gss=655498,
|
||||
application_x_gtar=655499,
|
||||
application_x_gzip=655500,
|
||||
application_x_hdf=655501,
|
||||
application_x_helpfile=655502,
|
||||
application_x_httpd_imap=655503,
|
||||
application_x_ima=655504,
|
||||
application_x_innosetup=655505,
|
||||
application_x_internett_signup=655506,
|
||||
application_x_inventor=655507,
|
||||
application_x_ip2=655508,
|
||||
application_x_java_applet=655509,
|
||||
application_x_java_commerce=655510,
|
||||
application_x_java_image=655511,
|
||||
application_x_java_jmod=655512,
|
||||
application_x_java_keystore=655513,
|
||||
application_x_kdelnk=655514,
|
||||
application_x_koan=655515,
|
||||
application_x_latex=655516,
|
||||
application_x_livescreen=655517,
|
||||
application_x_lotus=655518,
|
||||
application_x_lz4=655519 | 0x08000000,
|
||||
application_x_lz4_json=655520,
|
||||
application_x_lzh=655521,
|
||||
application_x_lzh_compressed=655522,
|
||||
application_x_lzip=655523 | 0x08000000,
|
||||
application_x_lzma=655524 | 0x08000000,
|
||||
application_x_lzop=655525 | 0x08000000,
|
||||
application_x_lzx=655526,
|
||||
application_x_mach_binary=655527,
|
||||
application_x_mach_executable=655528,
|
||||
application_x_magic_cap_package_1_0=655529,
|
||||
application_x_mathcad=655530,
|
||||
application_x_maxis_dbpf=655531,
|
||||
application_x_meme=655532,
|
||||
application_x_midi=655533,
|
||||
application_x_mif=655534,
|
||||
application_x_mix_transfer=655535,
|
||||
application_x_mobipocket_ebook=655536 | 0x02000000,
|
||||
application_x_ms_compress_szdd=655537,
|
||||
application_x_ms_pdb=655538,
|
||||
application_x_ms_reader=655539,
|
||||
application_x_msaccess=655540,
|
||||
application_x_n64_rom=655541,
|
||||
application_x_navi_animation=655542,
|
||||
application_x_navidoc=655543,
|
||||
application_x_navimap=655544,
|
||||
application_x_navistyle=655545,
|
||||
application_x_nes_rom=655546,
|
||||
application_x_netcdf=655547,
|
||||
application_x_newton_compatible_pkg=655548,
|
||||
application_x_nintendo_ds_rom=655549,
|
||||
application_x_object=655550,
|
||||
application_x_omc=655551,
|
||||
application_x_omcdatamaker=655552,
|
||||
application_x_omcregerator=655553,
|
||||
application_x_pagemaker=655554,
|
||||
application_x_pcl=655555,
|
||||
application_x_pgp_keyring=655556,
|
||||
application_x_pixclscript=655557,
|
||||
application_x_pkcs7_certreqresp=655558,
|
||||
application_x_pkcs7_signature=655559,
|
||||
application_x_project=655560,
|
||||
application_x_qpro=655561,
|
||||
application_x_rar=655562 | 0x10000000,
|
||||
application_x_rpm=655563,
|
||||
application_x_sdp=655564,
|
||||
application_x_sea=655565,
|
||||
application_x_seelogo=655566,
|
||||
application_x_setupscript=655567,
|
||||
application_x_shar=655568,
|
||||
application_x_sharedlib=655569,
|
||||
application_x_shockwave_flash=655570,
|
||||
application_x_snappy_framed=655571,
|
||||
application_x_sprite=655572,
|
||||
application_x_sqlite3=655573,
|
||||
application_x_stargallery_thm=655574,
|
||||
application_x_stuffit=655575,
|
||||
application_x_sv4cpio=655576,
|
||||
application_x_sv4crc=655577,
|
||||
application_x_tar=655578 | 0x10000000,
|
||||
application_x_tbook=655579,
|
||||
application_x_terminfo=655580,
|
||||
application_x_terminfo2=655581,
|
||||
application_x_tex_tfm=655582,
|
||||
application_x_texinfo=655583,
|
||||
application_x_ustar=655584,
|
||||
application_x_visio=655585,
|
||||
application_x_vnd_audioexplosion_mzz=655586,
|
||||
application_x_vnd_ls_xpix=655587,
|
||||
application_x_vrml=655588,
|
||||
application_x_wais_source=655589,
|
||||
application_x_wine_extension_ini=655590,
|
||||
application_x_wintalk=655591,
|
||||
application_x_world=655592,
|
||||
application_x_wri=655593,
|
||||
application_x_x509_ca_cert=655594,
|
||||
application_x_xz=655595 | 0x08000000,
|
||||
application_x_zip=655596,
|
||||
application_x_zstd=655597 | 0x08000000,
|
||||
application_x_zstd_dictionary=655598,
|
||||
application_xml=655599,
|
||||
application_zip=655600 | 0x10000000,
|
||||
application_zlib=655601,
|
||||
audio_basic=458994 | 0x80000000,
|
||||
audio_it=458995,
|
||||
audio_make=458996,
|
||||
audio_mid=458997,
|
||||
audio_midi=458998,
|
||||
audio_mp4=458999,
|
||||
audio_mpeg=459000,
|
||||
audio_ogg=459001,
|
||||
audio_s3m=459002,
|
||||
audio_tsp_audio=459003,
|
||||
audio_tsplayer=459004,
|
||||
audio_vnd_qcelp=459005,
|
||||
audio_voxware=459006,
|
||||
audio_x_aiff=459007,
|
||||
audio_x_flac=459008,
|
||||
audio_x_gsm=459009,
|
||||
audio_x_hx_aac_adts=459010,
|
||||
audio_x_jam=459011,
|
||||
audio_x_liveaudio=459012,
|
||||
audio_x_m4a=459013,
|
||||
audio_x_midi=459014,
|
||||
audio_x_mod=459015,
|
||||
audio_x_mp4a_latm=459016,
|
||||
audio_x_mpeg_3=459017,
|
||||
audio_x_mpequrl=459018,
|
||||
audio_x_nspaudio=459019,
|
||||
audio_x_pn_realaudio=459020,
|
||||
audio_x_psid=459021,
|
||||
audio_x_realaudio=459022,
|
||||
audio_x_s3m=459023,
|
||||
audio_x_twinvq=459024,
|
||||
audio_x_twinvq_plugin=459025,
|
||||
audio_x_voc=459026,
|
||||
audio_x_wav=459027,
|
||||
audio_x_xbox_executable=459028 | 0x80000000,
|
||||
audio_x_xbox360_executable=459029 | 0x80000000,
|
||||
audio_xm=459030,
|
||||
font_otf=327959 | 0x20000000,
|
||||
font_sfnt=327960 | 0x20000000,
|
||||
font_woff=327961 | 0x20000000,
|
||||
font_woff2=327962 | 0x20000000,
|
||||
image_bmp=524571,
|
||||
image_cmu_raster=524572,
|
||||
image_fif=524573,
|
||||
image_florian=524574,
|
||||
image_g3fax=524575,
|
||||
image_gif=524576,
|
||||
image_heic=524577,
|
||||
image_ief=524578,
|
||||
image_jpeg=524579,
|
||||
image_jutvision=524580,
|
||||
image_naplps=524581,
|
||||
image_pict=524582,
|
||||
image_png=524583,
|
||||
image_svg=524584 | 0x80000000,
|
||||
image_svg_xml=524585 | 0x80000000,
|
||||
image_tiff=524586,
|
||||
image_vnd_adobe_photoshop=524587 | 0x80000000,
|
||||
image_vnd_djvu=524588 | 0x80000000,
|
||||
image_vnd_fpx=524589,
|
||||
image_vnd_microsoft_icon=524590,
|
||||
image_vnd_rn_realflash=524591,
|
||||
image_vnd_rn_realpix=524592,
|
||||
image_vnd_wap_wbmp=524593,
|
||||
image_vnd_xiff=524594,
|
||||
image_webp=524595,
|
||||
image_wmf=524596,
|
||||
image_x_3ds=524597,
|
||||
image_x_adobe_dng=524598 | 0x00800000,
|
||||
image_x_award_bioslogo=524599,
|
||||
image_x_canon_cr2=524600 | 0x00800000,
|
||||
image_x_canon_crw=524601 | 0x00800000,
|
||||
image_x_cmu_raster=524602,
|
||||
image_x_cur=524603,
|
||||
image_x_dcraw=524604 | 0x00800000,
|
||||
image_x_dwg=524605,
|
||||
image_x_eps=524606,
|
||||
image_x_epson_erf=524607 | 0x00800000,
|
||||
image_x_exr=524608,
|
||||
image_x_fuji_raf=524609 | 0x00800000,
|
||||
image_x_gem=524610,
|
||||
image_x_icns=524611,
|
||||
image_x_icon=524612 | 0x80000000,
|
||||
image_x_jg=524613,
|
||||
image_x_jps=524614,
|
||||
image_x_kodak_dcr=524615 | 0x00800000,
|
||||
image_x_kodak_k25=524616 | 0x00800000,
|
||||
image_x_kodak_kdc=524617 | 0x00800000,
|
||||
image_x_minolta_mrw=524618 | 0x00800000,
|
||||
image_x_ms_bmp=524619,
|
||||
image_x_niff=524620,
|
||||
image_x_nikon_nef=524621 | 0x00800000,
|
||||
image_x_olympus_orf=524622 | 0x00800000,
|
||||
image_x_panasonic_raw=524623 | 0x00800000,
|
||||
image_x_pcx=524624,
|
||||
image_x_pentax_pef=524625 | 0x00800000,
|
||||
image_x_pict=524626,
|
||||
image_x_portable_bitmap=524627,
|
||||
image_x_portable_graymap=524628,
|
||||
image_x_portable_pixmap=524629,
|
||||
image_x_quicktime=524630,
|
||||
image_x_rgb=524631,
|
||||
image_x_sigma_x3f=524632 | 0x00800000,
|
||||
image_x_sony_arw=524633 | 0x00800000,
|
||||
image_x_sony_sr2=524634 | 0x00800000,
|
||||
image_x_sony_srf=524635 | 0x00800000,
|
||||
image_x_tga=524636,
|
||||
image_x_tiff=524637,
|
||||
image_x_win_bitmap=524638,
|
||||
image_x_xcf=524639 | 0x80000000,
|
||||
image_x_xpixmap=524640 | 0x80000000,
|
||||
image_x_xwindowdump=524641,
|
||||
message_news=196962,
|
||||
message_rfc822=196963,
|
||||
model_vnd_dwf=65892,
|
||||
model_vnd_gdl=65893,
|
||||
model_vnd_gs_gdl=65894,
|
||||
model_vrml=65895,
|
||||
model_x_pov=65896,
|
||||
sist2_sidecar=2,
|
||||
text_PGP=590185,
|
||||
text_asp=590186,
|
||||
text_css=590187,
|
||||
text_html=590188 | 0x01000000,
|
||||
text_javascript=590189,
|
||||
text_mcf=590190,
|
||||
text_pascal=590191,
|
||||
text_plain=590192,
|
||||
text_richtext=590193,
|
||||
text_rtf=590194,
|
||||
text_scriplet=590195,
|
||||
text_tab_separated_values=590196,
|
||||
text_troff=590197,
|
||||
text_uri_list=590198,
|
||||
text_vnd_abc=590199,
|
||||
text_vnd_fmi_flexstor=590200,
|
||||
text_vnd_wap_wml=590201,
|
||||
text_vnd_wap_wmlscript=590202,
|
||||
text_webviewhtml=590203,
|
||||
text_x_Algol68=590204,
|
||||
text_x_asm=590205,
|
||||
text_x_audiosoft_intra=590206,
|
||||
text_x_awk=590207,
|
||||
text_x_bcpl=590208,
|
||||
text_x_c=590209,
|
||||
text_x_c__=590210,
|
||||
text_x_component=590211,
|
||||
text_x_diff=590212,
|
||||
text_x_fortran=590213,
|
||||
text_x_java=590214,
|
||||
text_x_la_asf=590215,
|
||||
text_x_lisp=590216,
|
||||
text_x_m=590217,
|
||||
text_x_m4=590218,
|
||||
text_x_makefile=590219,
|
||||
text_x_ms_regedit=590220,
|
||||
text_x_msdos_batch=590221,
|
||||
text_x_objective_c=590222,
|
||||
text_x_pascal=590223,
|
||||
text_x_perl=590224,
|
||||
text_x_php=590225,
|
||||
text_x_po=590226,
|
||||
text_x_python=590227,
|
||||
text_x_ruby=590228,
|
||||
text_x_sass=590229,
|
||||
text_x_scss=590230,
|
||||
text_x_server_parsed_html=590231,
|
||||
text_x_setext=590232,
|
||||
text_x_sgml=590233 | 0x01000000,
|
||||
text_x_shellscript=590234,
|
||||
text_x_speech=590235,
|
||||
text_x_tcl=590236,
|
||||
text_x_tex=590237,
|
||||
text_x_uil=590238,
|
||||
text_x_uuencode=590239,
|
||||
text_x_vcalendar=590240,
|
||||
text_x_vcard=590241,
|
||||
text_xml=590242 | 0x01000000,
|
||||
video_MP2T=393635,
|
||||
video_animaflex=393636,
|
||||
video_avi=393637,
|
||||
video_avs_video=393638,
|
||||
video_mp4=393639,
|
||||
video_mpeg=393640,
|
||||
video_quicktime=393641,
|
||||
video_vdo=393642,
|
||||
video_vivo=393643,
|
||||
video_vnd_rn_realvideo=393644,
|
||||
video_vosaic=393645,
|
||||
video_webm=393646,
|
||||
video_x_amt_demorun=393647,
|
||||
video_x_amt_showrun=393648,
|
||||
video_x_atomic3d_feature=393649,
|
||||
video_x_dl=393650,
|
||||
video_x_dv=393651,
|
||||
video_x_fli=393652,
|
||||
video_x_flv=393653,
|
||||
video_x_isvideo=393654,
|
||||
video_x_jng=393655 | 0x80000000,
|
||||
video_x_m4v=393656,
|
||||
video_x_matroska=393657,
|
||||
video_x_mng=393658,
|
||||
video_x_motion_jpeg=393659,
|
||||
video_x_ms_asf=393660,
|
||||
video_x_msvideo=393661,
|
||||
video_x_qtc=393662,
|
||||
video_x_sgi_movie=393663,
|
||||
x_epoc_x_sisx_app=721344,
|
||||
};
|
||||
char *mime_get_mime_text(unsigned int mime_id) {switch (mime_id) {
|
||||
case application_arj: return "application/arj";
|
||||
@@ -785,6 +806,7 @@ case text_mcf: return "text/mcf";
|
||||
case text_pascal: return "text/pascal";
|
||||
case text_PGP: return "text/PGP";
|
||||
case text_plain: return "text/plain";
|
||||
case application_vnd_coffeescript: return "application/vnd.coffeescript";
|
||||
case text_richtext: return "text/richtext";
|
||||
case text_rtf: return "text/rtf";
|
||||
case text_scriplet: return "text/scriplet";
|
||||
@@ -866,6 +888,26 @@ case video_x_qtc: return "video/x-qtc";
|
||||
case video_x_sgi_movie: return "video/x-sgi-movie";
|
||||
case x_epoc_x_sisx_app: return "x-epoc/x-sisx-app";
|
||||
case application_x_zstd_dictionary: return "application/x-zstd-dictionary";
|
||||
case application_vnd_ms_outlook: return "application/vnd.ms-outlook";
|
||||
case image_x_olympus_orf: return "image/x-olympus-orf";
|
||||
case image_x_nikon_nef: return "image/x-nikon-nef";
|
||||
case image_x_fuji_raf: return "image/x-fuji-raf";
|
||||
case image_x_panasonic_raw: return "image/x-panasonic-raw";
|
||||
case image_x_adobe_dng: return "image/x-adobe-dng";
|
||||
case image_x_canon_cr2: return "image/x-canon-cr2";
|
||||
case image_x_canon_crw: return "image/x-canon-crw";
|
||||
case image_x_dcraw: return "image/x-dcraw";
|
||||
case image_x_kodak_dcr: return "image/x-kodak-dcr";
|
||||
case image_x_kodak_k25: return "image/x-kodak-k25";
|
||||
case image_x_kodak_kdc: return "image/x-kodak-kdc";
|
||||
case image_x_minolta_mrw: return "image/x-minolta-mrw";
|
||||
case image_x_pentax_pef: return "image/x-pentax-pef";
|
||||
case image_x_sigma_x3f: return "image/x-sigma-x3f";
|
||||
case image_x_sony_arw: return "image/x-sony-arw";
|
||||
case image_x_sony_sr2: return "image/x-sony-sr2";
|
||||
case image_x_sony_srf: return "image/x-sony-srf";
|
||||
case image_x_epson_erf: return "image/x-epson-erf";
|
||||
case sist2_sidecar: return "sist2/sidecar";
|
||||
default: return NULL;}}
|
||||
GHashTable *mime_get_ext_table() {GHashTable *ext_table = g_hash_table_new(g_str_hash, g_str_equal);
|
||||
g_hash_table_insert(ext_table, "arj", (gpointer)application_arj);
|
||||
@@ -883,6 +925,7 @@ g_hash_table_insert(ext_table, "frl", (gpointer)application_freeloader);
|
||||
g_hash_table_insert(ext_table, "spl", (gpointer)application_futuresplash);
|
||||
g_hash_table_insert(ext_table, "vew", (gpointer)application_groupwise);
|
||||
g_hash_table_insert(ext_table, "gz", (gpointer)application_gzip);
|
||||
g_hash_table_insert(ext_table, "tgz", (gpointer)application_gzip);
|
||||
g_hash_table_insert(ext_table, "hta", (gpointer)application_hta);
|
||||
g_hash_table_insert(ext_table, "unv", (gpointer)application_i_deas);
|
||||
g_hash_table_insert(ext_table, "iges", (gpointer)application_iges);
|
||||
@@ -997,7 +1040,6 @@ g_hash_table_insert(ext_table, "cpio", (gpointer)application_x_cpio);
|
||||
g_hash_table_insert(ext_table, "dbf", (gpointer)application_x_dbf);
|
||||
g_hash_table_insert(ext_table, "deb", (gpointer)application_x_debian_package);
|
||||
g_hash_table_insert(ext_table, "deepv", (gpointer)application_x_deepv);
|
||||
g_hash_table_insert(ext_table, "dcr", (gpointer)application_x_director);
|
||||
g_hash_table_insert(ext_table, "dir", (gpointer)application_x_director);
|
||||
g_hash_table_insert(ext_table, "dxr", (gpointer)application_x_director);
|
||||
g_hash_table_insert(ext_table, "dmp", (gpointer)application_x_dmp);
|
||||
@@ -1277,6 +1319,11 @@ g_hash_table_insert(ext_table, "sfv", (gpointer)text_plain);
|
||||
g_hash_table_insert(ext_table, "m3u", (gpointer)text_plain);
|
||||
g_hash_table_insert(ext_table, "csv", (gpointer)text_plain);
|
||||
g_hash_table_insert(ext_table, "eml", (gpointer)text_plain);
|
||||
g_hash_table_insert(ext_table, "make", (gpointer)text_plain);
|
||||
g_hash_table_insert(ext_table, "log", (gpointer)text_plain);
|
||||
g_hash_table_insert(ext_table, "markdown", (gpointer)text_plain);
|
||||
g_hash_table_insert(ext_table, "yaml", (gpointer)text_plain);
|
||||
g_hash_table_insert(ext_table, "coffee", (gpointer)application_vnd_coffeescript);
|
||||
g_hash_table_insert(ext_table, "rt", (gpointer)text_richtext);
|
||||
g_hash_table_insert(ext_table, "rtf", (gpointer)text_richtext);
|
||||
g_hash_table_insert(ext_table, "rtx", (gpointer)text_richtext);
|
||||
@@ -1385,6 +1432,26 @@ g_hash_table_insert(ext_table, "divx", (gpointer)video_x_msvideo);
|
||||
g_hash_table_insert(ext_table, "qtc", (gpointer)video_x_qtc);
|
||||
g_hash_table_insert(ext_table, "movie", (gpointer)video_x_sgi_movie);
|
||||
g_hash_table_insert(ext_table, "mv", (gpointer)video_x_sgi_movie);
|
||||
g_hash_table_insert(ext_table, "msg", (gpointer)application_vnd_ms_outlook);
|
||||
g_hash_table_insert(ext_table, "orf", (gpointer)image_x_olympus_orf);
|
||||
g_hash_table_insert(ext_table, "nef", (gpointer)image_x_nikon_nef);
|
||||
g_hash_table_insert(ext_table, "raf", (gpointer)image_x_fuji_raf);
|
||||
g_hash_table_insert(ext_table, "rw2", (gpointer)image_x_panasonic_raw);
|
||||
g_hash_table_insert(ext_table, "raw", (gpointer)image_x_panasonic_raw);
|
||||
g_hash_table_insert(ext_table, "dng", (gpointer)image_x_adobe_dng);
|
||||
g_hash_table_insert(ext_table, "cr2", (gpointer)image_x_canon_cr2);
|
||||
g_hash_table_insert(ext_table, "crw", (gpointer)image_x_canon_crw);
|
||||
g_hash_table_insert(ext_table, "dcr", (gpointer)image_x_kodak_dcr);
|
||||
g_hash_table_insert(ext_table, "k25", (gpointer)image_x_kodak_k25);
|
||||
g_hash_table_insert(ext_table, "kdc", (gpointer)image_x_kodak_kdc);
|
||||
g_hash_table_insert(ext_table, "mrw", (gpointer)image_x_minolta_mrw);
|
||||
g_hash_table_insert(ext_table, "pef", (gpointer)image_x_pentax_pef);
|
||||
g_hash_table_insert(ext_table, "xf3", (gpointer)image_x_sigma_x3f);
|
||||
g_hash_table_insert(ext_table, "arw", (gpointer)image_x_sony_arw);
|
||||
g_hash_table_insert(ext_table, "sr2", (gpointer)image_x_sony_sr2);
|
||||
g_hash_table_insert(ext_table, "srf", (gpointer)image_x_sony_srf);
|
||||
g_hash_table_insert(ext_table, "erf", (gpointer)image_x_epson_erf);
|
||||
g_hash_table_insert(ext_table, "s2meta", (gpointer)sist2_sidecar);
|
||||
return ext_table;}
|
||||
GHashTable *mime_get_mime_table() {GHashTable *mime_table = g_hash_table_new(g_str_hash, g_str_equal);
|
||||
g_hash_table_insert(mime_table, "application/arj", (gpointer)application_arj);
|
||||
@@ -1735,6 +1802,7 @@ g_hash_table_insert(mime_table, "text/mcf", (gpointer)text_mcf);
|
||||
g_hash_table_insert(mime_table, "text/pascal", (gpointer)text_pascal);
|
||||
g_hash_table_insert(mime_table, "text/PGP", (gpointer)text_PGP);
|
||||
g_hash_table_insert(mime_table, "text/plain", (gpointer)text_plain);
|
||||
g_hash_table_insert(mime_table, "application/vnd.coffeescript", (gpointer)application_vnd_coffeescript);
|
||||
g_hash_table_insert(mime_table, "text/richtext", (gpointer)text_richtext);
|
||||
g_hash_table_insert(mime_table, "text/rtf", (gpointer)text_rtf);
|
||||
g_hash_table_insert(mime_table, "text/scriplet", (gpointer)text_scriplet);
|
||||
@@ -1816,5 +1884,25 @@ g_hash_table_insert(mime_table, "video/x-qtc", (gpointer)video_x_qtc);
|
||||
g_hash_table_insert(mime_table, "video/x-sgi-movie", (gpointer)video_x_sgi_movie);
|
||||
g_hash_table_insert(mime_table, "x-epoc/x-sisx-app", (gpointer)x_epoc_x_sisx_app);
|
||||
g_hash_table_insert(mime_table, "application/x-zstd-dictionary", (gpointer)application_x_zstd_dictionary);
|
||||
g_hash_table_insert(mime_table, "application/vnd.ms-outlook", (gpointer)application_vnd_ms_outlook);
|
||||
g_hash_table_insert(mime_table, "image/x-olympus-orf", (gpointer)image_x_olympus_orf);
|
||||
g_hash_table_insert(mime_table, "image/x-nikon-nef", (gpointer)image_x_nikon_nef);
|
||||
g_hash_table_insert(mime_table, "image/x-fuji-raf", (gpointer)image_x_fuji_raf);
|
||||
g_hash_table_insert(mime_table, "image/x-panasonic-raw", (gpointer)image_x_panasonic_raw);
|
||||
g_hash_table_insert(mime_table, "image/x-adobe-dng", (gpointer)image_x_adobe_dng);
|
||||
g_hash_table_insert(mime_table, "image/x-canon-cr2", (gpointer)image_x_canon_cr2);
|
||||
g_hash_table_insert(mime_table, "image/x-canon-crw", (gpointer)image_x_canon_crw);
|
||||
g_hash_table_insert(mime_table, "image/x-dcraw", (gpointer)image_x_dcraw);
|
||||
g_hash_table_insert(mime_table, "image/x-kodak-dcr", (gpointer)image_x_kodak_dcr);
|
||||
g_hash_table_insert(mime_table, "image/x-kodak-k25", (gpointer)image_x_kodak_k25);
|
||||
g_hash_table_insert(mime_table, "image/x-kodak-kdc", (gpointer)image_x_kodak_kdc);
|
||||
g_hash_table_insert(mime_table, "image/x-minolta-mrw", (gpointer)image_x_minolta_mrw);
|
||||
g_hash_table_insert(mime_table, "image/x-pentax-pef", (gpointer)image_x_pentax_pef);
|
||||
g_hash_table_insert(mime_table, "image/x-sigma-x3f", (gpointer)image_x_sigma_x3f);
|
||||
g_hash_table_insert(mime_table, "image/x-sony-arw", (gpointer)image_x_sony_arw);
|
||||
g_hash_table_insert(mime_table, "image/x-sony-sr2", (gpointer)image_x_sony_sr2);
|
||||
g_hash_table_insert(mime_table, "image/x-sony-srf", (gpointer)image_x_sony_srf);
|
||||
g_hash_table_insert(mime_table, "image/x-epson-erf", (gpointer)image_x_epson_erf);
|
||||
g_hash_table_insert(mime_table, "sist2/sidecar", (gpointer)sist2_sidecar);
|
||||
return mime_table;}
|
||||
#endif
|
||||
|
||||
@@ -4,6 +4,7 @@
|
||||
#include "src/ctx.h"
|
||||
#include "mime.h"
|
||||
#include "src/io/serialize.h"
|
||||
#include "src/parsing/sidecar.h"
|
||||
|
||||
#include <magic.h>
|
||||
|
||||
@@ -38,35 +39,45 @@ void fs_reset(struct vfile *f) {
|
||||
}
|
||||
}
|
||||
|
||||
#define IS_GIT_OBJ (strlen(doc.filepath + doc.base) == 38 && (strstr(doc.filepath, "objects") != NULL))
|
||||
|
||||
void set_dbg_current_file(parse_job_t *job) {
|
||||
unsigned long long pid = (unsigned long long) pthread_self();
|
||||
g_hash_table_replace(ScanCtx.dbg_current_files, GINT_TO_POINTER(pid), job);
|
||||
}
|
||||
|
||||
void parse(void *arg) {
|
||||
|
||||
parse_job_t *job = arg;
|
||||
document_t doc;
|
||||
|
||||
int inc_ts = incremental_get(ScanCtx.original_table, job->vfile.info.st_ino);
|
||||
if (inc_ts != 0 && inc_ts == job->vfile.info.st_mtim.tv_sec) {
|
||||
incremental_mark_file_for_copy(ScanCtx.copy_table, job->vfile.info.st_ino);
|
||||
return;
|
||||
}
|
||||
|
||||
set_dbg_current_file(job);
|
||||
|
||||
doc.filepath = job->filepath;
|
||||
doc.ext = (short) job->ext;
|
||||
doc.base = (short) job->base;
|
||||
|
||||
char *rel_path = doc.filepath + ScanCtx.index.desc.root_len;
|
||||
MD5((unsigned char *) rel_path, strlen(rel_path), doc.path_md5);
|
||||
|
||||
doc.meta_head = NULL;
|
||||
doc.meta_tail = NULL;
|
||||
doc.mime = 0;
|
||||
doc.size = job->vfile.info.st_size;
|
||||
doc.ino = job->vfile.info.st_ino;
|
||||
doc.mtime = job->vfile.info.st_mtim.tv_sec;
|
||||
|
||||
uuid_generate(doc.uuid);
|
||||
char *buf[PARSE_BUF_SIZE];
|
||||
int inc_ts = incremental_get(ScanCtx.original_table, doc.path_md5);
|
||||
if (inc_ts != 0 && inc_ts == job->vfile.info.st_mtim.tv_sec) {
|
||||
incremental_mark_file_for_copy(ScanCtx.copy_table, doc.path_md5);
|
||||
return;
|
||||
}
|
||||
|
||||
char *buf[MAGIC_BUF_SIZE];
|
||||
|
||||
if (LogCtx.very_verbose) {
|
||||
char uuid_str[UUID_STR_LEN];
|
||||
uuid_unparse(doc.uuid, uuid_str);
|
||||
LOG_DEBUGF(job->filepath, "Starting parse job {%s}", uuid_str)
|
||||
char path_md5_str[MD5_STR_LENGTH];
|
||||
buf2hex(doc.path_md5, MD5_DIGEST_LENGTH, path_md5_str);
|
||||
LOG_DEBUGF(job->filepath, "Starting parse job {%s}", path_md5_str)
|
||||
}
|
||||
|
||||
if (job->vfile.info.st_size == 0) {
|
||||
@@ -78,8 +89,18 @@ void parse(void *arg) {
|
||||
int bytes_read = 0;
|
||||
|
||||
if (doc.mime == 0 && !ScanCtx.fast) {
|
||||
if (IS_GIT_OBJ) {
|
||||
goto abort;
|
||||
}
|
||||
|
||||
// Get mime type with libmagic
|
||||
bytes_read = job->vfile.read(&job->vfile, buf, PARSE_BUF_SIZE);
|
||||
if (!job->vfile.is_fs_file) {
|
||||
LOG_WARNING(job->filepath,
|
||||
"Guessing mime type with libmagic inside archive files is not currently supported");
|
||||
goto abort;
|
||||
}
|
||||
|
||||
bytes_read = job->vfile.read(&job->vfile, buf, MAGIC_BUF_SIZE);
|
||||
if (bytes_read < 0) {
|
||||
|
||||
if (job->vfile.is_fs_file) {
|
||||
@@ -115,6 +136,8 @@ void parse(void *arg) {
|
||||
|
||||
if (!(SHOULD_PARSE(doc.mime))) {
|
||||
|
||||
} else if (IS_RAW(doc.mime)) {
|
||||
parse_raw(&ScanCtx.raw_ctx, &job->vfile, &doc);
|
||||
} else if ((mmime == MimeVideo && doc.size >= MIN_VIDEO_SIZE) ||
|
||||
(mmime == MimeImage && doc.size >= MIN_IMAGE_SIZE) || mmime == MimeAudio) {
|
||||
|
||||
@@ -139,24 +162,32 @@ void parse(void *arg) {
|
||||
(IS_ARC_FILTER(doc.mime) && should_parse_filtered_file(doc.filepath, doc.ext))
|
||||
)) {
|
||||
parse_archive(&ScanCtx.arc_ctx, &job->vfile, &doc);
|
||||
} else if (ScanCtx.ooxml_ctx.content_size > 0 && IS_DOC(doc.mime)) {
|
||||
} else if ((ScanCtx.ooxml_ctx.content_size > 0 || ScanCtx.media_ctx.tn_size > 0) && IS_DOC(doc.mime)) {
|
||||
parse_ooxml(&ScanCtx.ooxml_ctx, &job->vfile, &doc);
|
||||
|
||||
} else if (is_cbr(&ScanCtx.cbr_ctx, doc.mime)) {
|
||||
parse_cbr(&ScanCtx.cbr_ctx, &job->vfile, &doc);
|
||||
} else if (is_cbr(&ScanCtx.comic_ctx, doc.mime) || is_cbz(&ScanCtx.comic_ctx, doc.mime)) {
|
||||
parse_comic(&ScanCtx.comic_ctx, &job->vfile, &doc);
|
||||
} else if (IS_MOBI(doc.mime)) {
|
||||
parse_mobi(&ScanCtx.mobi_ctx, &job->vfile, &doc);
|
||||
} else if (doc.mime == MIME_SIST2_SIDECAR) {
|
||||
parse_sidecar(&job->vfile, &doc);
|
||||
CLOSE_FILE(job->vfile)
|
||||
return;
|
||||
} else if (is_msdoc(&ScanCtx.msdoc_ctx, doc.mime)) {
|
||||
parse_msdoc(&ScanCtx.msdoc_ctx, &job->vfile, &doc);
|
||||
}
|
||||
|
||||
//Parent meta
|
||||
if (!uuid_is_null(job->parent)) {
|
||||
char tmp[UUID_STR_LEN];
|
||||
uuid_unparse(job->parent, tmp);
|
||||
abort:
|
||||
|
||||
meta_line_t *meta_parent = malloc(sizeof(meta_line_t) + UUID_STR_LEN + 1);
|
||||
//Parent meta
|
||||
if (!md5_digest_is_null(job->parent)) {
|
||||
meta_line_t *meta_parent = malloc(sizeof(meta_line_t) + MD5_STR_LENGTH);
|
||||
meta_parent->key = MetaParent;
|
||||
strcpy(meta_parent->str_val, tmp);
|
||||
buf2hex(job->parent, MD5_DIGEST_LENGTH, meta_parent->str_val);
|
||||
APPEND_META((&doc), meta_parent)
|
||||
|
||||
doc.has_parent = TRUE;
|
||||
} else {
|
||||
doc.has_parent = FALSE;
|
||||
}
|
||||
|
||||
write_document(&doc);
|
||||
|
||||
@@ -3,7 +3,7 @@
|
||||
|
||||
#include "../sist.h"
|
||||
|
||||
#define PARSE_BUF_SIZE 4096
|
||||
#define MAGIC_BUF_SIZE 4096 * 6
|
||||
|
||||
int fs_read(struct vfile *f, void *buf, size_t size);
|
||||
void fs_close(struct vfile *f);
|
||||
|
||||
35
src/parsing/sidecar.c
Normal file
35
src/parsing/sidecar.c
Normal file
@@ -0,0 +1,35 @@
|
||||
#include "sidecar.h"
|
||||
|
||||
#include "src/ctx.h"
|
||||
|
||||
void parse_sidecar(vfile_t *vfile, document_t *doc) {
|
||||
|
||||
LOG_DEBUGF("sidecar.c", "Parsing sidecar file %s", vfile->filepath)
|
||||
|
||||
size_t size;
|
||||
char *buf = read_all(vfile, &size);
|
||||
if (buf == NULL) {
|
||||
LOG_ERRORF("sidecar.c", "Read error for %s", vfile->filepath)
|
||||
return;
|
||||
}
|
||||
|
||||
buf = realloc(buf, size + 1);
|
||||
*(buf + size) = '\0';
|
||||
|
||||
cJSON *json = cJSON_Parse(buf);
|
||||
if (json == NULL) {
|
||||
LOG_ERRORF("sidecar.c", "Could not parse JSON sidecar %s", vfile->filepath)
|
||||
return;
|
||||
}
|
||||
char *json_str = cJSON_PrintUnformatted(json);
|
||||
|
||||
unsigned char path_md5[MD5_DIGEST_LENGTH];
|
||||
MD5((unsigned char *) vfile->filepath + ScanCtx.index.desc.root_len, doc->ext - 1 - ScanCtx.index.desc.root_len,
|
||||
path_md5);
|
||||
|
||||
store_write(ScanCtx.index.meta_store, (char *) path_md5, sizeof(path_md5), json_str, strlen(json_str) + 1);
|
||||
|
||||
cJSON_Delete(json);
|
||||
free(json_str);
|
||||
free(buf);
|
||||
}
|
||||
8
src/parsing/sidecar.h
Normal file
8
src/parsing/sidecar.h
Normal file
@@ -0,0 +1,8 @@
|
||||
#ifndef SIST2_SIDECAR_H
|
||||
#define SIST2_SIDECAR_H
|
||||
|
||||
#include "src/sist.h"
|
||||
|
||||
void parse_sidecar(vfile_t *vfile, document_t *doc);
|
||||
|
||||
#endif
|
||||
@@ -23,9 +23,10 @@
|
||||
#undef ABS
|
||||
#define ABS(a) (((a) < 0) ? -(a) : (a))
|
||||
|
||||
#define UUID_STR_LEN 37
|
||||
#define UNUSED(x) __attribute__((__unused__)) x
|
||||
|
||||
#define MD5_STR_LENGTH 33
|
||||
|
||||
#include "util.h"
|
||||
#include "log.h"
|
||||
#include "types.h"
|
||||
@@ -47,5 +48,4 @@
|
||||
#include <errno.h>
|
||||
#include <ctype.h>
|
||||
|
||||
|
||||
#endif
|
||||
|
||||
4
src/static/css/autocomplete.min.css
vendored
Normal file
4
src/static/css/autocomplete.min.css
vendored
Normal file
@@ -0,0 +1,4 @@
|
||||
.autocomplete-suggestions { text-align: left; cursor: default; border: 1px solid #ccc; border-top: 0; background: #fff; box-shadow: -1px 1px 3px rgba(0,0,0,.1); position: absolute; display: none; z-index: 9999; max-height: 254px; overflow: hidden; overflow-y: auto; box-sizing: border-box; }
|
||||
.autocomplete-suggestion { position: relative; padding: 0 .6em; line-height: 23px; white-space: nowrap; overflow: hidden; text-overflow: ellipsis; font-size: 1.02em; color: #333; }
|
||||
.autocomplete-suggestion b { font-weight: normal; color: #1f8dd6; }
|
||||
.autocomplete-suggestion.selected { background: #f0f0f0; }
|
||||
9
src/static/css/bootstrap-colorpicker.min.css
vendored
Normal file
9
src/static/css/bootstrap-colorpicker.min.css
vendored
Normal file
File diff suppressed because one or more lines are too long
@@ -121,7 +121,7 @@ body {
|
||||
background: #546b7a;
|
||||
}
|
||||
|
||||
.navbar a:hover {
|
||||
a:hover, .btn:hover {
|
||||
color: #fff;
|
||||
}
|
||||
|
||||
@@ -130,7 +130,11 @@ body {
|
||||
}
|
||||
|
||||
.document {
|
||||
padding: 0.5rem;
|
||||
padding: 0.3rem;
|
||||
}
|
||||
|
||||
.card-text:last-child {
|
||||
margin-top: -1px;
|
||||
}
|
||||
|
||||
.document p {
|
||||
@@ -166,6 +170,12 @@ body {
|
||||
background-color: #FAAB3C;
|
||||
}
|
||||
|
||||
.add-tag-button {
|
||||
cursor: pointer;
|
||||
color: #212529;
|
||||
background-color: #e0e0e0;
|
||||
}
|
||||
|
||||
.card-img-overlay {
|
||||
pointer-events: none;
|
||||
padding: 0.75rem;
|
||||
@@ -191,22 +201,42 @@ body {
|
||||
margin-right: 3px;
|
||||
}
|
||||
|
||||
.badge-delete {
|
||||
margin-right: -2px;
|
||||
margin-left: 2px;
|
||||
margin-top: -1px;
|
||||
font-family: monospace;
|
||||
font-size: 90%;
|
||||
background: rgba(0, 0, 0, 0.2);
|
||||
padding: 0.1em 0.4em;
|
||||
color: white;
|
||||
cursor: pointer;
|
||||
}
|
||||
|
||||
.badge-user {
|
||||
color: #212529;
|
||||
background-color: #e0e0e0;
|
||||
}
|
||||
|
||||
.card-img-top {
|
||||
border-top-left-radius: 0;
|
||||
border-top-right-radius: 0;
|
||||
}
|
||||
|
||||
.fit {
|
||||
display: block;
|
||||
min-width: 64px;
|
||||
max-width: 100%;
|
||||
max-height: 175px;
|
||||
max-height: 400px;
|
||||
margin: 0 auto 0;
|
||||
padding: 3px 3px 0;
|
||||
width: auto;
|
||||
height: auto;
|
||||
}
|
||||
|
||||
.img-padding {
|
||||
padding: 4px 4px 0 4px;
|
||||
}
|
||||
|
||||
.fit-sm {
|
||||
display: block;
|
||||
max-width: 64px;
|
||||
@@ -223,20 +253,6 @@ body {
|
||||
width: 100%;
|
||||
}
|
||||
|
||||
@media screen and (min-width: 1500px) {
|
||||
.container {
|
||||
max-width: 1440px;
|
||||
}
|
||||
|
||||
.bricklayer-column-sizer {
|
||||
width: 20% !important;
|
||||
}
|
||||
|
||||
.bricklayer-column {
|
||||
max-width: 20%;
|
||||
}
|
||||
}
|
||||
|
||||
@media screen and (min-width: 1800px) {
|
||||
.container {
|
||||
max-width: 1550px;
|
||||
@@ -266,6 +282,7 @@ mark {
|
||||
margin: 3px;
|
||||
white-space: normal;
|
||||
color: rgb(224, 224, 224);
|
||||
overflow: hidden;
|
||||
}
|
||||
|
||||
.irs-single, .irs-from, .irs-to {
|
||||
@@ -432,6 +449,7 @@ option {
|
||||
.small-btn {
|
||||
display: none;
|
||||
}
|
||||
|
||||
.large-btn {
|
||||
display: inherit;
|
||||
}
|
||||
@@ -441,6 +459,7 @@ option {
|
||||
.small-btn {
|
||||
display: inherit;
|
||||
}
|
||||
|
||||
.large-btn {
|
||||
display: none;
|
||||
}
|
||||
@@ -480,3 +499,46 @@ svg {
|
||||
cursor: pointer;
|
||||
}
|
||||
|
||||
.stats-card {
|
||||
text-align: center;
|
||||
margin-top: 1em;
|
||||
padding: 1em;
|
||||
|
||||
box-shadow: 0 .125rem .25rem rgba(0, 0, 0, .08) !important;
|
||||
border-radius: 0;
|
||||
border: none;
|
||||
|
||||
background: #212121;
|
||||
}
|
||||
|
||||
.graph {
|
||||
display: inline-block;
|
||||
width: 40%;
|
||||
}
|
||||
|
||||
.full-screen {
|
||||
position: absolute;
|
||||
left: 0;
|
||||
width: 100%;
|
||||
}
|
||||
|
||||
.stats-btn {
|
||||
float: right;
|
||||
margin-bottom: 10px;
|
||||
}
|
||||
|
||||
#graphs-card svg text {
|
||||
fill: #eee;
|
||||
}
|
||||
|
||||
.wholerow {
|
||||
outline: none !important;
|
||||
}
|
||||
|
||||
.stat > .card-body {
|
||||
padding: 0.7em 1.25em;
|
||||
}
|
||||
|
||||
#modal-body > .img-wrapper {
|
||||
margin-bottom: 1em;
|
||||
}
|
||||
|
||||
@@ -1 +1 @@
|
||||
.irs-bar,.irs-bar-edge,.irs-line-left,.irs-line-mid,.irs-line-right,.irs-slider{background:url("../img/sprite-skin-flat.png") repeat-x}.irs{height:40px}.irs-with-grid{height:60px}.irs-line{height:12px;top:25px}.irs-line-left{height:12px;background-position:0 -30px}.irs-line-mid{height:12px;background-position:0 0}.irs-line-right{height:12px;background-position:100% -30px}.irs-bar{height:12px;top:25px;background-position:0 -60px}.irs-bar-edge{top:25px;height:12px;width:9px;background-position:0 -90px}.irs-shadow{height:3px;top:34px;background:#000;opacity:0.25}.lt-ie9 .irs-shadow{filter: alpha(opacity=25)}.irs-slider{width:16px;height:18px;top:22px;background-position:0 -120px}.irs-slider.state_hover,.irs-slider:hover{background-position:0 -150px}.irs-max,.irs-min{color:#999;font-size:10px;line-height:1.333;text-shadow:none;top:0;padding:1px 3px;background:#e1e4e9;-moz-border-radius:4px;border-radius:4px}.irs-from,.irs-single,.irs-to{color:#fff;font-size:10px;line-height:1.333;text-shadow:none;padding:1px 5px;background:#2196F3;-moz-border-radius:4px;border-radius:4px}.irs-from:after,.irs-single:after,.irs-to:after{position:absolute;display:block;content:"";bottom:-6px;left:50%;width:0;height:0;margin-left:-3px;overflow:hidden;border:3px solid transparent;border-top-color:#2196F3}.irs-grid-pol{background:#e1e4e9}.irs-grid-text{color:#999}.irs-disabled{}
|
||||
.irs-bar,.irs-bar-edge,.irs-line-left,.irs-line-mid,.irs-line-right,.irs-slider{background:url("./img/sprite-skin-flat.png") repeat-x}.irs{height:40px}.irs-with-grid{height:60px}.irs-line{height:12px;top:25px}.irs-line-left{height:12px;background-position:0 -30px}.irs-line-mid{height:12px;background-position:0 0}.irs-line-right{height:12px;background-position:100% -30px}.irs-bar{height:12px;top:25px;background-position:0 -60px}.irs-bar-edge{top:25px;height:12px;width:9px;background-position:0 -90px}.irs-shadow{height:3px;top:34px;background:#000;opacity:0.25}.lt-ie9 .irs-shadow{filter: alpha(opacity=25)}.irs-slider{width:16px;height:18px;top:22px;background-position:0 -120px}.irs-slider.state_hover,.irs-slider:hover{background-position:0 -150px}.irs-max,.irs-min{color:#999;font-size:10px;line-height:1.333;text-shadow:none;top:0;padding:1px 3px;background:#e1e4e9;-moz-border-radius:4px;border-radius:4px}.irs-from,.irs-single,.irs-to{color:#fff;font-size:10px;line-height:1.333;text-shadow:none;padding:1px 5px;background:#2196F3;-moz-border-radius:4px;border-radius:4px}.irs-from:after,.irs-single:after,.irs-to:after{position:absolute;display:block;content:"";bottom:-6px;left:50%;width:0;height:0;margin-left:-3px;overflow:hidden;border:3px solid transparent;border-top-color:#2196F3}.irs-grid-pol{background:#e1e4e9}.irs-grid-text{color:#999}.irs-disabled{}
|
||||
|
||||
@@ -70,7 +70,11 @@ body {
|
||||
}
|
||||
|
||||
.document {
|
||||
padding: 0.5rem;
|
||||
padding: 0.3rem;
|
||||
}
|
||||
|
||||
.card-text:last-child {
|
||||
margin-top: -1px;
|
||||
}
|
||||
|
||||
.document p {
|
||||
@@ -106,11 +110,33 @@ body {
|
||||
background-color: #e0e0e0;
|
||||
}
|
||||
|
||||
.badge {
|
||||
margin-right: 3px;
|
||||
}
|
||||
|
||||
.badge-delete {
|
||||
margin-right: -2px;
|
||||
margin-left: 2px;
|
||||
margin-top: -1px;
|
||||
font-family: monospace;
|
||||
font-size: 90%;
|
||||
background: rgba(0,0,0,0.2);
|
||||
padding: 0.1em 0.4em;
|
||||
color: white;
|
||||
cursor: pointer;
|
||||
}
|
||||
|
||||
.badge-text {
|
||||
color: #FFFFFF;
|
||||
background-color: #FAAB3C;
|
||||
}
|
||||
|
||||
.add-tag-button {
|
||||
cursor: pointer;
|
||||
color: #212529;
|
||||
background-color: #e0e0e0;
|
||||
}
|
||||
|
||||
.card-img-overlay {
|
||||
pointer-events: none;
|
||||
padding: 0.75rem;
|
||||
@@ -131,21 +157,25 @@ body {
|
||||
overflow: hidden;
|
||||
}
|
||||
|
||||
.badge {
|
||||
margin-right: 3px;
|
||||
.card-img-top {
|
||||
border-top-left-radius: 0;
|
||||
border-top-right-radius: 0;
|
||||
}
|
||||
|
||||
.fit {
|
||||
display: block;
|
||||
min-width: 64px;
|
||||
max-width: 100%;
|
||||
max-height: 175px;
|
||||
max-height: 400px;
|
||||
margin: 0 auto 0;
|
||||
padding: 3px 3px 0 3px;
|
||||
width: auto;
|
||||
height: auto;
|
||||
}
|
||||
|
||||
.img-padding {
|
||||
padding: 4px 4px 0 4px;
|
||||
}
|
||||
|
||||
.fit-sm {
|
||||
display: block;
|
||||
max-width: 64px;
|
||||
@@ -162,6 +192,10 @@ body {
|
||||
width: 100%;
|
||||
}
|
||||
|
||||
.bricklayer {
|
||||
/*max-width: 100%;*/
|
||||
}
|
||||
|
||||
@media screen and (max-width: 1200px) {
|
||||
.bricklayer-column {
|
||||
max-width: 100%;
|
||||
@@ -205,6 +239,7 @@ mark {
|
||||
margin: 3px;
|
||||
white-space: normal;
|
||||
color: #000;
|
||||
overflow: hidden;
|
||||
}
|
||||
|
||||
.irs-single, .irs-from, .irs-to {
|
||||
@@ -347,3 +382,42 @@ mark {
|
||||
cursor: pointer;
|
||||
}
|
||||
|
||||
.stats-card {
|
||||
text-align: center;
|
||||
margin-top: 1em;
|
||||
padding: 1em;
|
||||
|
||||
box-shadow: 0 .125rem .25rem rgba(0, 0, 0, .08) !important;
|
||||
border-radius: 0;
|
||||
border: none;
|
||||
|
||||
background: #fff;
|
||||
}
|
||||
|
||||
.graph {
|
||||
display: inline-block;
|
||||
width: 40%;
|
||||
}
|
||||
|
||||
.full-screen {
|
||||
position: absolute;
|
||||
left: 0;
|
||||
width: 100%;
|
||||
}
|
||||
|
||||
.stats-btn {
|
||||
float: right;
|
||||
margin-bottom: 10px;
|
||||
}
|
||||
|
||||
.wholerow {
|
||||
outline: none !important;
|
||||
}
|
||||
|
||||
.stat > .card-body {
|
||||
padding: 0.7em 1.25em;
|
||||
}
|
||||
|
||||
#modal-body > .img-wrapper {
|
||||
margin-bottom: 1em;
|
||||
}
|
||||
2
src/static/js/5_inspire-tree.min.js
vendored
2
src/static/js/5_inspire-tree.min.js
vendored
File diff suppressed because one or more lines are too long
1
src/static/js/8_md5.min.js
vendored
Normal file
1
src/static/js/8_md5.min.js
vendored
Normal file
@@ -0,0 +1 @@
|
||||
!function(n){"use strict";function d(n,t){var r=(65535&n)+(65535&t);return(n>>16)+(t>>16)+(r>>16)<<16|65535&r}function f(n,t,r,e,o,u){return d((c=d(d(t,n),d(e,u)))<<(f=o)|c>>>32-f,r);var c,f}function l(n,t,r,e,o,u,c){return f(t&r|~t&e,n,t,o,u,c)}function v(n,t,r,e,o,u,c){return f(t&e|r&~e,n,t,o,u,c)}function g(n,t,r,e,o,u,c){return f(t^r^e,n,t,o,u,c)}function m(n,t,r,e,o,u,c){return f(r^(t|~e),n,t,o,u,c)}function i(n,t){var r,e,o,u;n[t>>5]|=128<<t%32,n[14+(t+64>>>9<<4)]=t;for(var c=1732584193,f=-271733879,i=-1732584194,a=271733878,h=0;h<n.length;h+=16)c=l(r=c,e=f,o=i,u=a,n[h],7,-680876936),a=l(a,c,f,i,n[h+1],12,-389564586),i=l(i,a,c,f,n[h+2],17,606105819),f=l(f,i,a,c,n[h+3],22,-1044525330),c=l(c,f,i,a,n[h+4],7,-176418897),a=l(a,c,f,i,n[h+5],12,1200080426),i=l(i,a,c,f,n[h+6],17,-1473231341),f=l(f,i,a,c,n[h+7],22,-45705983),c=l(c,f,i,a,n[h+8],7,1770035416),a=l(a,c,f,i,n[h+9],12,-1958414417),i=l(i,a,c,f,n[h+10],17,-42063),f=l(f,i,a,c,n[h+11],22,-1990404162),c=l(c,f,i,a,n[h+12],7,1804603682),a=l(a,c,f,i,n[h+13],12,-40341101),i=l(i,a,c,f,n[h+14],17,-1502002290),c=v(c,f=l(f,i,a,c,n[h+15],22,1236535329),i,a,n[h+1],5,-165796510),a=v(a,c,f,i,n[h+6],9,-1069501632),i=v(i,a,c,f,n[h+11],14,643717713),f=v(f,i,a,c,n[h],20,-373897302),c=v(c,f,i,a,n[h+5],5,-701558691),a=v(a,c,f,i,n[h+10],9,38016083),i=v(i,a,c,f,n[h+15],14,-660478335),f=v(f,i,a,c,n[h+4],20,-405537848),c=v(c,f,i,a,n[h+9],5,568446438),a=v(a,c,f,i,n[h+14],9,-1019803690),i=v(i,a,c,f,n[h+3],14,-187363961),f=v(f,i,a,c,n[h+8],20,1163531501),c=v(c,f,i,a,n[h+13],5,-1444681467),a=v(a,c,f,i,n[h+2],9,-51403784),i=v(i,a,c,f,n[h+7],14,1735328473),c=g(c,f=v(f,i,a,c,n[h+12],20,-1926607734),i,a,n[h+5],4,-378558),a=g(a,c,f,i,n[h+8],11,-2022574463),i=g(i,a,c,f,n[h+11],16,1839030562),f=g(f,i,a,c,n[h+14],23,-35309556),c=g(c,f,i,a,n[h+1],4,-1530992060),a=g(a,c,f,i,n[h+4],11,1272893353),i=g(i,a,c,f,n[h+7],16,-155497632),f=g(f,i,a,c,n[h+10],23,-1094730640),c=g(c,f,i,a,n[h+13],4,681279174),a=g(a,c,f,i,n[h],11,-358537222),i=g(i,a,c,f,n[h+3],16,-722521979),f=g(f,i,a,c,n[h+6],23,76029189),c=g(c,f,i,a,n[h+9],4,-640364487),a=g(a,c,f,i,n[h+12],11,-421815835),i=g(i,a,c,f,n[h+15],16,530742520),c=m(c,f=g(f,i,a,c,n[h+2],23,-995338651),i,a,n[h],6,-198630844),a=m(a,c,f,i,n[h+7],10,1126891415),i=m(i,a,c,f,n[h+14],15,-1416354905),f=m(f,i,a,c,n[h+5],21,-57434055),c=m(c,f,i,a,n[h+12],6,1700485571),a=m(a,c,f,i,n[h+3],10,-1894986606),i=m(i,a,c,f,n[h+10],15,-1051523),f=m(f,i,a,c,n[h+1],21,-2054922799),c=m(c,f,i,a,n[h+8],6,1873313359),a=m(a,c,f,i,n[h+15],10,-30611744),i=m(i,a,c,f,n[h+6],15,-1560198380),f=m(f,i,a,c,n[h+13],21,1309151649),c=m(c,f,i,a,n[h+4],6,-145523070),a=m(a,c,f,i,n[h+11],10,-1120210379),i=m(i,a,c,f,n[h+2],15,718787259),f=m(f,i,a,c,n[h+9],21,-343485551),c=d(c,r),f=d(f,e),i=d(i,o),a=d(a,u);return[c,f,i,a]}function a(n){for(var t="",r=32*n.length,e=0;e<r;e+=8)t+=String.fromCharCode(n[e>>5]>>>e%32&255);return t}function h(n){var t=[];for(t[(n.length>>2)-1]=void 0,e=0;e<t.length;e+=1)t[e]=0;for(var r=8*n.length,e=0;e<r;e+=8)t[e>>5]|=(255&n.charCodeAt(e/8))<<e%32;return t}function e(n){for(var t,r="0123456789abcdef",e="",o=0;o<n.length;o+=1)t=n.charCodeAt(o),e+=r.charAt(t>>>4&15)+r.charAt(15&t);return e}function r(n){return unescape(encodeURIComponent(n))}function o(n){return a(i(h(t=r(n)),8*t.length));var t}function u(n,t){return function(n,t){var r,e,o=h(n),u=[],c=[];for(u[15]=c[15]=void 0,16<o.length&&(o=i(o,8*n.length)),r=0;r<16;r+=1)u[r]=909522486^o[r],c[r]=1549556828^o[r];return e=i(u.concat(h(t)),512+8*t.length),a(i(c.concat(e),640))}(r(n),r(t))}function t(n,t,r){return t?r?u(t,n):e(u(t,n)):r?o(n):e(o(n))}"function"==typeof define&&define.amd?define(function(){return t}):"object"==typeof module&&module.exports?module.exports=t:n.md5=t}(this);
|
||||
3
src/static/js/auto-complete.min.js
vendored
Normal file
3
src/static/js/auto-complete.min.js
vendored
Normal file
File diff suppressed because one or more lines are too long
9
src/static/js/bootstrap-colorpicker.min.js
vendored
Normal file
9
src/static/js/bootstrap-colorpicker.min.js
vendored
Normal file
File diff suppressed because one or more lines are too long
2
src/static/js/d3.v5.min.js
vendored
Normal file
2
src/static/js/d3.v5.min.js
vendored
Normal file
File diff suppressed because one or more lines are too long
2
src/static/js/dom-to-image.min.js
vendored
Normal file
2
src/static/js/dom-to-image.min.js
vendored
Normal file
File diff suppressed because one or more lines are too long
@@ -22,23 +22,29 @@ function gifOver(thumbnail, hit) {
|
||||
thumbnail.addEventListener("mouseout", function () {
|
||||
//Reset timer
|
||||
thumbnail.mouseStayedOver = false;
|
||||
thumbnail.setAttribute("src", `t/${hit["_source"]["index"]}/${hit["_id"]}`);
|
||||
thumbnail.setAttribute("src", `t/${hit["_source"]["index"]}/${hit["_path_md5"]}`);
|
||||
})
|
||||
}
|
||||
|
||||
function getContentHighlight(hit) {
|
||||
const re = RegExp(/<mark>/g);
|
||||
|
||||
const sortByMathCount = (a, b) => {
|
||||
return b.match(re).length - a.match(re).length;
|
||||
};
|
||||
|
||||
if (hit.hasOwnProperty("highlight")) {
|
||||
if (hit["highlight"].hasOwnProperty("content")) {
|
||||
return hit["highlight"]["content"].sort(sortByMathCount)[0];
|
||||
return hit["highlight"]["content"][0];
|
||||
|
||||
} else if (hit["highlight"].hasOwnProperty("content.nGram")) {
|
||||
return hit["highlight"]["content.nGram"].sort(sortByMathCount)[0];
|
||||
return hit["highlight"]["content.nGram"][0];
|
||||
}
|
||||
}
|
||||
|
||||
return undefined;
|
||||
}
|
||||
|
||||
function getPathHighlight(hit) {
|
||||
if (hit.hasOwnProperty("highlight")) {
|
||||
if (hit["highlight"].hasOwnProperty("path.text")) {
|
||||
return hit["highlight"]["path.text"][0];
|
||||
} else if (hit["highlight"].hasOwnProperty("path.nGram")) {
|
||||
return hit["highlight"]["path.nGram"][0];
|
||||
}
|
||||
}
|
||||
|
||||
@@ -65,9 +71,11 @@ function shouldPlayVideo(hit) {
|
||||
|
||||
return mime &&
|
||||
mime.startsWith("video/") &&
|
||||
!("parent" in hit["_source"]) &&
|
||||
hit["_source"]["extension"] !== "mkv" &&
|
||||
hit["_source"]["extension"] !== "avi" &&
|
||||
videoc !== "hevc" &&
|
||||
videoc !== "mpeg1video" &&
|
||||
videoc !== "mpeg2video" &&
|
||||
videoc !== "wmv3";
|
||||
}
|
||||
@@ -80,6 +88,7 @@ function shouldDisplayRawImage(hit) {
|
||||
hit["_source"]["mime"] &&
|
||||
!hit["_source"]["parent"] &&
|
||||
hit["_source"]["videoc"] !== "tiff" &&
|
||||
hit["_source"]["videoc"] !== "raw" &&
|
||||
hit["_source"]["videoc"] !== "ppm";
|
||||
}
|
||||
|
||||
@@ -145,33 +154,75 @@ function getTags(hit, mimeCategory) {
|
||||
// User tags
|
||||
if (hit["_source"].hasOwnProperty("tag")) {
|
||||
hit["_source"]["tag"].forEach(tag => {
|
||||
const userTag = document.createElement("span");
|
||||
userTag.setAttribute("class", "badge badge-pill badge-user");
|
||||
|
||||
const tokens = tag.split("#");
|
||||
|
||||
if (tokens.length > 1) {
|
||||
const bg = "#" + tokens[1];
|
||||
const fg = lum(tokens[1]) > 40 ? "#000" : "#fff";
|
||||
userTag.setAttribute("style", `background-color: ${bg}; color: ${fg}`);
|
||||
}
|
||||
|
||||
const name = tokens[0].split(".")[tokens[0].split(".").length - 1];
|
||||
userTag.appendChild(document.createTextNode(name));
|
||||
tags.push(userTag);
|
||||
tags.push(makeUserTag(tag, hit));
|
||||
})
|
||||
}
|
||||
|
||||
return tags
|
||||
}
|
||||
|
||||
function makeUserTag(tag, hit) {
|
||||
const userTag = document.createElement("span");
|
||||
userTag.setAttribute("class", "badge badge-pill badge-user");
|
||||
userTag.setAttribute("title", tag.split("#")[0])
|
||||
|
||||
const tokens = tag.split("#");
|
||||
|
||||
if (tokens.length > 1) {
|
||||
const bg = "#" + tokens[1];
|
||||
const fg = lum(tokens[1]) > 50 ? "#000" : "#fff";
|
||||
userTag.setAttribute("style", `background-color: ${bg}; color: ${fg}`);
|
||||
}
|
||||
|
||||
const deleteButton = document.createElement("span");
|
||||
deleteButton.setAttribute("class", "badge badge-pill badge-delete")
|
||||
deleteButton.setAttribute("title", "Delete tag")
|
||||
deleteButton.appendChild(document.createTextNode("X"));
|
||||
deleteButton.addEventListener("click", () => {
|
||||
deleteTag(tag, hit).then(() => {
|
||||
userTag.remove();
|
||||
});
|
||||
});
|
||||
userTag.addEventListener("mouseenter", () => userTag.appendChild(deleteButton));
|
||||
userTag.addEventListener("mouseleave", () => deleteButton.remove());
|
||||
|
||||
const name = tokens[0].split(".")[tokens[0].split(".").length - 1];
|
||||
userTag.appendChild(document.createTextNode(name));
|
||||
|
||||
return userTag;
|
||||
}
|
||||
|
||||
function makeGpsMetaRow(tbody, latitude, longitude) {
|
||||
tbody.append($("<tr>")
|
||||
.append($("<td>").text("Exif GPS"))
|
||||
.append($("<td>")
|
||||
.append($("<a>")
|
||||
.text(`${latitude}, ${longitude}`)
|
||||
.attr("href", `https://maps.google.com/?q=${latitude},${longitude}&ll=${latitude},${longitude}&t=k&z=17`)
|
||||
.attr("target", "_blank")
|
||||
)
|
||||
)
|
||||
);
|
||||
}
|
||||
|
||||
function infoButtonCb(hit) {
|
||||
return () => {
|
||||
getDocumentInfo(hit["_id"]).then(doc => {
|
||||
$("#modal-body").empty()
|
||||
|
||||
$("#modal-title").text(doc["name"] + ext(hit));
|
||||
|
||||
if (doc["mime"]) {
|
||||
const mimeCategory = doc["mime"].split("/")[0];
|
||||
const imgWrapper = document.createElement("div");
|
||||
imgWrapper.setAttribute("style", "position: relative");
|
||||
imgWrapper.setAttribute("class", "img-wrapper");
|
||||
makeThumbnail(mimeCategory, hit, imgWrapper, false);
|
||||
$("#modal-body").append(imgWrapper);
|
||||
}
|
||||
|
||||
const tbody = $("<tbody>");
|
||||
$("#modal-body").empty()
|
||||
$("#modal-body")
|
||||
.append($("<table class='table table-sm'>")
|
||||
.append($("<thead>")
|
||||
.append($("<tr>")
|
||||
@@ -182,12 +233,34 @@ function infoButtonCb(hit) {
|
||||
.append(tbody)
|
||||
);
|
||||
|
||||
tbody.append($("<tr>")
|
||||
.append($("<td>").text("index"))
|
||||
.append($("<td>").text(`[${indexMap[doc["index"]]}]`))
|
||||
).append($("<tr>")
|
||||
.append($("<td>").text("mtime"))
|
||||
.append($("<td>")
|
||||
.text(new Date(doc["mtime"] * 1000).toISOString().split(".")[0].replace("T", " "))
|
||||
.attr("title", doc["mtime"]))
|
||||
);
|
||||
|
||||
// Exif GPS
|
||||
if ("exif_gps_longitude_dec" in doc) {
|
||||
makeGpsMetaRow(tbody, doc["exif_gps_latitude_dec"], doc["exif_gps_longitude_dec"])
|
||||
} else if ("exif_gps_longitude_dms" in doc) {
|
||||
makeGpsMetaRow(
|
||||
tbody,
|
||||
dmsToDecimal(doc["exif_gps_latitude_dms"], doc["exif_gps_latitude_ref"]),
|
||||
dmsToDecimal(doc["exif_gps_longitude_dms"], doc["exif_gps_longitude_ref"]),
|
||||
)
|
||||
}
|
||||
|
||||
const displayFields = new Set([
|
||||
"mime", "size", "mtime", "path", "title", "width", "height", "duration", "audioc", "videoc",
|
||||
"bitrate", "artist", "album", "album_artist", "genre", "title", "font_name", "tag"
|
||||
"mime", "size", "path", "title", "width", "height", "duration", "audioc", "videoc",
|
||||
"bitrate", "artist", "album", "album_artist", "genre", "title", "font_name", "tag", "author",
|
||||
"modified_by", "pages"
|
||||
]);
|
||||
Object.keys(doc)
|
||||
.filter(key => key.startsWith("_keyword.") || key.startsWith("_text.") || displayFields.has(key) || key.startsWith("exif_"))
|
||||
.filter(key => key.startsWith("_keyword.") || key.startsWith("_text.") || displayFields.has(key) || (key.startsWith("exif_") && !key.includes("gps")))
|
||||
.forEach(key => {
|
||||
tbody.append($("<tr>")
|
||||
.append($("<td>").text(key))
|
||||
@@ -302,6 +375,14 @@ function createDocCard(hit) {
|
||||
audio.setAttribute("controls", "");
|
||||
audio.setAttribute("type", hit["_source"]["mime"]);
|
||||
audio.setAttribute("src", "f/" + hit["_id"]);
|
||||
audio.addEventListener("play", () => {
|
||||
// Pause all currently playing audio tags
|
||||
$("audio").each(function () {
|
||||
if (this !== audio) {
|
||||
this.pause();
|
||||
}
|
||||
});
|
||||
});
|
||||
|
||||
docCard.appendChild(audio)
|
||||
}
|
||||
@@ -330,9 +411,31 @@ function createDocCard(hit) {
|
||||
|
||||
docCardBody.appendChild(tagContainer);
|
||||
|
||||
attachTagContainerEventListener(tagContainer, hit);
|
||||
return docCard;
|
||||
}
|
||||
|
||||
function attachTagContainerEventListener(tagContainer, hit) {
|
||||
const sizeTag = Array.from(tagContainer.children).find(child => child.tagName === "SMALL");
|
||||
|
||||
const addTagButton = document.createElement("span");
|
||||
addTagButton.setAttribute("class", "badge badge-pill add-tag-button");
|
||||
addTagButton.appendChild(document.createTextNode("+Add"));
|
||||
|
||||
tagContainer.addEventListener("mouseenter", () => tagContainer.insertBefore(addTagButton, sizeTag));
|
||||
tagContainer.addEventListener("mouseleave", () => addTagButton.remove());
|
||||
|
||||
addTagButton.addEventListener("click", () => {
|
||||
tagBar.value = "";
|
||||
currentDocToTag = hit;
|
||||
currentTagCallback = tag => {
|
||||
tagContainer.insertBefore(makeUserTag(tag, hit), sizeTag);
|
||||
}
|
||||
$("#tagModal").modal("show");
|
||||
tagBar.focus();
|
||||
});
|
||||
}
|
||||
|
||||
function makeThumbnail(mimeCategory, hit, imgWrapper, small) {
|
||||
|
||||
if (!hit["_source"].hasOwnProperty("thumbnail")) {
|
||||
@@ -343,9 +446,13 @@ function makeThumbnail(mimeCategory, hit, imgWrapper, small) {
|
||||
if (small) {
|
||||
thumbnail.setAttribute("class", "fit-sm");
|
||||
} else {
|
||||
thumbnail.setAttribute("class", "card-img-top fit");
|
||||
if (hit["_source"].hasOwnProperty("parent")) {
|
||||
thumbnail.setAttribute("class", "card-img-top fit img-padding");
|
||||
} else {
|
||||
thumbnail.setAttribute("class", "card-img-top fit");
|
||||
}
|
||||
}
|
||||
thumbnail.setAttribute("src", `t/${hit["_source"]["index"]}/${hit["_id"]}`);
|
||||
thumbnail.setAttribute("src", `t/${hit["_source"]["index"]}/${hit["_path_md5"]}`);
|
||||
|
||||
if (shouldDisplayRawImage(hit)) {
|
||||
thumbnail.addEventListener("click", () => {
|
||||
@@ -405,7 +512,6 @@ function createDocLine(hit) {
|
||||
|
||||
if (hit["_source"].hasOwnProperty("parent")) {
|
||||
line.classList.add("sub-document");
|
||||
isSubDocument = true;
|
||||
}
|
||||
|
||||
const infoButton = makeInfoButton(hit);
|
||||
@@ -443,7 +549,7 @@ function createDocLine(hit) {
|
||||
if (contentHl !== undefined) {
|
||||
const contentDiv = document.createElement("div");
|
||||
contentDiv.setAttribute("class", "content-div");
|
||||
contentDiv.insertAdjacentHTML('afterbegin', contentHl);
|
||||
contentDiv.insertAdjacentHTML("afterbegin", contentHl);
|
||||
titleDiv.appendChild(contentDiv);
|
||||
}
|
||||
|
||||
@@ -453,7 +559,13 @@ function createDocLine(hit) {
|
||||
let path = document.createElement("div");
|
||||
path.setAttribute("class", "path-line");
|
||||
path.setAttribute("title", hit["_source"]["path"] + "/");
|
||||
path.appendChild(document.createTextNode(hit["_source"]["path"] + "/"));
|
||||
|
||||
const pathHighlight = getPathHighlight(hit);
|
||||
if (pathHighlight) {
|
||||
path.insertAdjacentHTML("afterbegin", pathHighlight + "/");
|
||||
} else {
|
||||
path.appendChild(document.createTextNode(hit["_source"]["path"] + "/"));
|
||||
}
|
||||
|
||||
let tagContainer = document.createElement("div");
|
||||
tagContainer.setAttribute("class", "tag-container");
|
||||
@@ -472,6 +584,8 @@ function createDocLine(hit) {
|
||||
pathLine.appendChild(path);
|
||||
pathLine.appendChild(tagContainer);
|
||||
|
||||
attachTagContainerEventListener(tagContainer, hit);
|
||||
|
||||
return line;
|
||||
}
|
||||
|
||||
|
||||
@@ -1,4 +1,4 @@
|
||||
const SIZE = 40;
|
||||
const SIZE = 60;
|
||||
let mimeMap = [];
|
||||
let tagMap = [];
|
||||
let mimeTree;
|
||||
@@ -6,6 +6,9 @@ let tagTree;
|
||||
|
||||
let searchBar = document.getElementById("searchBar");
|
||||
let pathBar = document.getElementById("pathBar");
|
||||
let tagBar = document.getElementById("tagBar");
|
||||
let currentDocToTag = null;
|
||||
let currentTagCallback = null;
|
||||
let lastDoc = null;
|
||||
let reachedEnd = false;
|
||||
let docCount = 0;
|
||||
@@ -20,15 +23,6 @@ let size_max = 10000000000000;
|
||||
let date_min = null;
|
||||
let date_max = null;
|
||||
|
||||
const CONF = new Settings();
|
||||
|
||||
const _defaults = {
|
||||
display: "grid",
|
||||
fuzzy: true,
|
||||
highlight: true,
|
||||
sort: "score"
|
||||
};
|
||||
|
||||
SORT_MODES = {
|
||||
score: {
|
||||
text: "Relevance",
|
||||
@@ -66,32 +60,7 @@ SORT_MODES = {
|
||||
],
|
||||
key: hit => hit["_source"]["size"]
|
||||
},
|
||||
}
|
||||
|
||||
function Settings() {
|
||||
this.options = {};
|
||||
|
||||
this._onUpdate = function () {
|
||||
$("#fuzzyToggle").prop("checked", this.options.fuzzy);
|
||||
}
|
||||
|
||||
this.load = function () {
|
||||
const raw = window.localStorage.getItem("options");
|
||||
if (raw === null) {
|
||||
this.options = _defaults;
|
||||
} else {
|
||||
this.options = JSON.parse(raw);
|
||||
}
|
||||
|
||||
this._onUpdate();
|
||||
}
|
||||
|
||||
this.save = function () {
|
||||
window.localStorage.setItem("options", JSON.stringify(this.options));
|
||||
this._onUpdate();
|
||||
}
|
||||
}
|
||||
|
||||
};
|
||||
|
||||
function showEsError() {
|
||||
$.toast({
|
||||
@@ -106,35 +75,156 @@ function showEsError() {
|
||||
});
|
||||
}
|
||||
|
||||
jQuery["jsonPost"] = function (url, data) {
|
||||
return jQuery.ajax({
|
||||
url: url,
|
||||
type: "post",
|
||||
data: JSON.stringify(data),
|
||||
contentType: "application/json"
|
||||
}).fail(err => {
|
||||
showEsError();
|
||||
console.log(err);
|
||||
window.onload = () => {
|
||||
CONF.load();
|
||||
new autoComplete({
|
||||
selector: '#pathBar',
|
||||
minChars: 1,
|
||||
delay: 400,
|
||||
renderItem: function (item) {
|
||||
return '<div class="autocomplete-suggestion" data-val="' + item + '">' + item + '</div>';
|
||||
},
|
||||
source: async function (term, suggest) {
|
||||
|
||||
if (!CONF.options.suggestPath) {
|
||||
return []
|
||||
}
|
||||
|
||||
term = term.toLowerCase();
|
||||
|
||||
const choices = await getPathChoices();
|
||||
|
||||
let matches = [];
|
||||
for (let i = 0; i < choices.length; i++) {
|
||||
if (~choices[i].toLowerCase().indexOf(term)) {
|
||||
matches.push(choices[i]);
|
||||
}
|
||||
}
|
||||
suggest(matches.sort());
|
||||
},
|
||||
onSelect: function () {
|
||||
searchDebounced();
|
||||
}
|
||||
});
|
||||
searchBar.addEventListener("keyup", searchDebounced);
|
||||
pathBar.addEventListener("keyup", e => {
|
||||
if (e.key === "Enter") {
|
||||
searchDebounced();
|
||||
}
|
||||
});
|
||||
new autoComplete({
|
||||
selector: '#tagBar',
|
||||
minChars: 1,
|
||||
delay: 200,
|
||||
renderItem: function (item) {
|
||||
return '<div class="autocomplete-suggestion" data-val="' + item + '">' + item.split("#")[0] + '</div>';
|
||||
},
|
||||
source: async function (term, suggest) {
|
||||
term = term.toLowerCase();
|
||||
|
||||
const choices = await getTagChoices();
|
||||
|
||||
let matches = [];
|
||||
for (let i = 0; i < choices.length; i++) {
|
||||
if (~choices[i].toLowerCase().indexOf(term)) {
|
||||
matches.push(choices[i]);
|
||||
}
|
||||
}
|
||||
suggest(matches.sort());
|
||||
},
|
||||
onSelect: function (e, item) {
|
||||
const name = item.split("#")[0];
|
||||
const color = "#" + item.split("#")[1];
|
||||
$("#tag-color").val(color);
|
||||
$("#tag-color").trigger("keyup", color);
|
||||
tagBar.value = name;
|
||||
e.preventDefault();
|
||||
}
|
||||
});
|
||||
[tagBar, document.getElementById("tag-color")].forEach(elem => {
|
||||
elem.addEventListener("keyup", e => {
|
||||
if (e.key === "Enter" && tagBar.value.length > 0) {
|
||||
const tag = tagBar.value + document.getElementById("tag-color").value;
|
||||
saveTag(tag, currentDocToTag).then(() => currentTagCallback(tag));
|
||||
}
|
||||
});
|
||||
})
|
||||
$("#tag-color").colorpicker({
|
||||
format: "hex",
|
||||
sliders: {
|
||||
saturation: {
|
||||
selector: '.colorpicker-saturation',
|
||||
callLeft: 'setSaturationRatio',
|
||||
callTop: 'setValueRatio'
|
||||
},
|
||||
hue: {
|
||||
selector: '.colorpicker-hue',
|
||||
maxLeft: 0,
|
||||
callLeft: false,
|
||||
callTop: 'setHueRatio'
|
||||
}
|
||||
}
|
||||
});
|
||||
|
||||
initTagTree();
|
||||
updateTagTree();
|
||||
};
|
||||
|
||||
window.onload = () => {
|
||||
$("#theme").on("click", () => {
|
||||
if (!document.cookie.includes("sist")) {
|
||||
document.cookie = "sist=dark";
|
||||
} else {
|
||||
document.cookie = "sist=; Max-Age=-99999999;";
|
||||
}
|
||||
window.location.reload();
|
||||
function saveTag(tag, hit) {
|
||||
const relPath = hit["_source"]["path"] + (hit["_source"]["path"] ? "/" : "") + hit["_source"]["name"] + ext(hit);
|
||||
|
||||
return $.jsonPost("/tag/" + hit["_source"]["index"], {
|
||||
delete: false,
|
||||
name: tag,
|
||||
doc_id: hit["_id"],
|
||||
path_md5: md5(relPath)
|
||||
}).then(() => {
|
||||
tagBar.blur();
|
||||
$("#tagModal").modal("hide");
|
||||
$.toast({
|
||||
heading: "Tag added",
|
||||
text: "Tag saved to index storage and updated in ElasticSearch",
|
||||
stack: 3,
|
||||
bgColor: "#00a4bc",
|
||||
textColor: "#fff",
|
||||
position: 'bottom-right',
|
||||
hideAfter: 3000,
|
||||
loaderBg: "#08c7e8",
|
||||
});
|
||||
|
||||
window.setTimeout(updateTagTree, 2000);
|
||||
})
|
||||
CONF.load();
|
||||
};
|
||||
}
|
||||
|
||||
function deleteTag(tag, hit) {
|
||||
const relPath = hit["_source"]["path"] + "/" + hit["_source"]["name"] + ext(hit);
|
||||
|
||||
return $.jsonPost("/tag/" + hit["_source"]["index"], {
|
||||
delete: true,
|
||||
name: tag,
|
||||
doc_id: hit["_id"],
|
||||
path_md5: md5(relPath)
|
||||
}).then(() => {
|
||||
$.toast({
|
||||
heading: "Tag deleted",
|
||||
text: "Tag deleted index storage and updated in ElasticSearch",
|
||||
stack: 3,
|
||||
bgColor: "#00a4bc",
|
||||
textColor: "#fff",
|
||||
position: 'bottom-right',
|
||||
hideAfter: 3000,
|
||||
loaderBg: "#08c7e8",
|
||||
});
|
||||
|
||||
window.setTimeout(updateTagTree, 2000);
|
||||
})
|
||||
}
|
||||
|
||||
function toggleFuzzy() {
|
||||
searchDebounced();
|
||||
}
|
||||
|
||||
$.jsonPost("i").then(resp => {
|
||||
$.get("i").then(resp => {
|
||||
|
||||
const urlIndices = (new URLSearchParams(location.search)).get("i");
|
||||
resp["indices"].forEach(idx => {
|
||||
@@ -159,10 +249,7 @@ $.jsonPost("i").then(resp => {
|
||||
});
|
||||
|
||||
function getDocumentInfo(id) {
|
||||
return $.getJSON("d/" + id).fail(e => {
|
||||
console.log(e);
|
||||
showEsError();
|
||||
})
|
||||
return $.getJSON("d/" + id).fail(showEsError)
|
||||
}
|
||||
|
||||
function handleTreeClick(tree) {
|
||||
@@ -173,7 +260,7 @@ function handleTreeClick(tree) {
|
||||
|
||||
if (node.id === "any") {
|
||||
if (!node.itree.state.checked) {
|
||||
tree.deselect();
|
||||
tree.deselectDeep();
|
||||
}
|
||||
} else {
|
||||
tree.node("any").deselect();
|
||||
@@ -233,29 +320,15 @@ $.jsonPost("es", {
|
||||
mimeTree.node("any").select();
|
||||
});
|
||||
|
||||
// Tags tree
|
||||
$.jsonPost("es", {
|
||||
aggs: {
|
||||
tags: {
|
||||
terms: {
|
||||
field: "tag",
|
||||
size: 10000
|
||||
}
|
||||
}
|
||||
},
|
||||
size: 0,
|
||||
}).then(resp => {
|
||||
resp["aggregations"]["tags"]["buckets"]
|
||||
.sort((a, b) => a["key"].localeCompare(b["key"]))
|
||||
.forEach(bucket => {
|
||||
addTag(tagMap, bucket["key"], bucket["key"], bucket["doc_count"])
|
||||
});
|
||||
|
||||
tagMap.push({"text": "All", "id": "any"});
|
||||
function initTagTree() {
|
||||
tagMap = [{text: "All", id: "any"}];
|
||||
tagTree = new InspireTree({
|
||||
selection: {
|
||||
mode: 'checkbox'
|
||||
},
|
||||
checkbox: {
|
||||
autoCheckChildren: false
|
||||
},
|
||||
data: tagMap
|
||||
});
|
||||
new InspireTreeDOM(tagTree, {
|
||||
@@ -263,24 +336,100 @@ $.jsonPost("es", {
|
||||
});
|
||||
tagTree.on("node.state.changed", handleTreeClick(tagTree));
|
||||
tagTree.node("any").select();
|
||||
searchBusy = false;
|
||||
});
|
||||
}
|
||||
|
||||
function updateTagTree() {
|
||||
$.jsonPost("es", {
|
||||
aggs: {
|
||||
tags: {
|
||||
terms: {
|
||||
field: "tag",
|
||||
size: 10000
|
||||
}
|
||||
}
|
||||
},
|
||||
size: 0,
|
||||
}).then(resp => {
|
||||
tagMap = [];
|
||||
resp["aggregations"]["tags"]["buckets"]
|
||||
.sort((a, b) => a["key"].localeCompare(b["key"]))
|
||||
.forEach(bucket => {
|
||||
addTag(tagMap, bucket["key"], bucket["key"], bucket["doc_count"])
|
||||
});
|
||||
|
||||
tagTree.removeAll();
|
||||
tagMap.push({text: "All", id: "any"})
|
||||
tagTree.addNodes(tagMap);
|
||||
searchBusy = false;
|
||||
});
|
||||
}
|
||||
|
||||
|
||||
function addTag(map, tag, id, count) {
|
||||
let tags = tag.split("#")[0].split(".");
|
||||
// let tags = tag.split("#")[0].split(".");
|
||||
let tags = tag.split(".");
|
||||
|
||||
let child = {
|
||||
id: id,
|
||||
text: tags.length !== 1 ? tags[0] : `${tags[0]} (${count})`,
|
||||
children: []
|
||||
values: [id],
|
||||
count: count,
|
||||
text: tags.length !== 1 ? tags[0] : `${tags[0].split("#")[0]} (${count})`,
|
||||
name: tags[0],
|
||||
children: [],
|
||||
isLeaf: tags.length === 1,
|
||||
//Overwrite base functions
|
||||
blur: function () {
|
||||
},
|
||||
select: function () {
|
||||
this.state("selected", true);
|
||||
return this.check()
|
||||
},
|
||||
deselect: function () {
|
||||
this.state("selected", false);
|
||||
return this.uncheck()
|
||||
},
|
||||
uncheck: function () {
|
||||
if (!this.isLeaf) {
|
||||
return;
|
||||
}
|
||||
|
||||
baseStateChange('checked', false, 'unchecked', this, false);
|
||||
this.state('indeterminate', false);
|
||||
|
||||
if (this.hasParent()) {
|
||||
this.getParent().refreshIndeterminateState();
|
||||
}
|
||||
|
||||
this._tree.end();
|
||||
return this;
|
||||
},
|
||||
check: function () {
|
||||
if (!this.isLeaf) {
|
||||
return;
|
||||
}
|
||||
|
||||
baseStateChange('checked', true, 'checked', this, false);
|
||||
|
||||
if (this.hasParent()) {
|
||||
this.getParent().refreshIndeterminateState();
|
||||
}
|
||||
|
||||
this._tree.end();
|
||||
return this;
|
||||
}
|
||||
};
|
||||
|
||||
let found = false;
|
||||
map.forEach(node => {
|
||||
if (node.text === child.text) {
|
||||
if (node.name.split("#")[0] === child.name.split("#")[0]) {
|
||||
found = true;
|
||||
if (tags.length !== 1) {
|
||||
addTag(node.children, tags.slice(1).join("."), id, count);
|
||||
} else {
|
||||
// Same name, different color
|
||||
node.count += count;
|
||||
node.text = `${tags[0].split("#")[0]} (${node.count})`;
|
||||
node.values.push(id);
|
||||
}
|
||||
}
|
||||
});
|
||||
@@ -332,7 +481,11 @@ function getSelectedNodes(tree) {
|
||||
|
||||
//Only get children
|
||||
if (selected[i].text.indexOf("(") !== -1) {
|
||||
selectedNodes.push(selected[i].id);
|
||||
if (selected[i].values) {
|
||||
selectedNodes.push(selected[i].values);
|
||||
} else {
|
||||
selectedNodes.push(selected[i].id);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@@ -358,8 +511,8 @@ function search(after = null) {
|
||||
searchResults.appendChild(preload);
|
||||
}
|
||||
|
||||
let query = searchBar.value;
|
||||
let empty = query === "";
|
||||
let searchBarValue = searchBar.value;
|
||||
let empty = searchBarValue === "";
|
||||
let condition = empty ? "should" : "must";
|
||||
let filters = [
|
||||
{range: {size: {gte: size_min, lte: size_max}}},
|
||||
@@ -372,31 +525,59 @@ function search(after = null) {
|
||||
"font_name^6"
|
||||
];
|
||||
|
||||
if (CONF.options.searchInPath) {
|
||||
fields.push("path.text^5");
|
||||
}
|
||||
|
||||
if ($("#fuzzyToggle").prop("checked")) {
|
||||
fields.push("content.nGram");
|
||||
if (CONF.options.searchInPath) {
|
||||
fields.push("path.nGram");
|
||||
}
|
||||
fields.push("name.nGram^3");
|
||||
}
|
||||
|
||||
let path = pathBar.value.replace(/\/$/, "").toLowerCase(); //remove trailing slashes
|
||||
if (path !== "") {
|
||||
filters.push([{term: {path: path}}])
|
||||
filters.push({term: {path: path}})
|
||||
}
|
||||
let mimeTypes = getSelectedNodes(mimeTree);
|
||||
if (!mimeTypes.includes("any")) {
|
||||
filters.push([{terms: {"mime": mimeTypes}}]);
|
||||
filters.push({terms: {"mime": mimeTypes}});
|
||||
}
|
||||
|
||||
let tags = getSelectedNodes(tagTree);
|
||||
if (!tags.includes("any")) {
|
||||
filters.push([{terms: {"tag": tags}}]);
|
||||
tags.forEach(tagGroup => {
|
||||
filters.push({terms: {"tag": tagGroup}})
|
||||
})
|
||||
}
|
||||
|
||||
if (date_min && date_max) {
|
||||
filters.push([{range: {mtime: {gte: date_min, lte: date_max}}}])
|
||||
filters.push({range: {mtime: {gte: date_min, lte: date_max}}})
|
||||
} else if (date_min) {
|
||||
filters.push([{range: {mtime: {gte: date_min}}}])
|
||||
filters.push({range: {mtime: {gte: date_min}}})
|
||||
} else if (date_max) {
|
||||
filters.push([{range: {mtime: {lte: date_max}}}])
|
||||
filters.push({range: {mtime: {lte: date_max}}})
|
||||
}
|
||||
|
||||
let query;
|
||||
if (CONF.options.queryMode === "simple") {
|
||||
query = {
|
||||
simple_query_string: {
|
||||
query: searchBarValue,
|
||||
fields: fields,
|
||||
default_operator: "and"
|
||||
}
|
||||
}
|
||||
} else {
|
||||
query = {
|
||||
query_string: {
|
||||
query: searchBarValue,
|
||||
default_field: "name",
|
||||
default_operator: "and"
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
let q = {
|
||||
@@ -405,13 +586,7 @@ function search(after = null) {
|
||||
},
|
||||
query: {
|
||||
bool: {
|
||||
[condition]: {
|
||||
simple_query_string: {
|
||||
query: query,
|
||||
fields: fields,
|
||||
default_operator: "and"
|
||||
}
|
||||
},
|
||||
[condition]: query,
|
||||
filter: filters
|
||||
}
|
||||
},
|
||||
@@ -432,6 +607,9 @@ function search(after = null) {
|
||||
q.highlight = {
|
||||
pre_tags: ["<mark>"],
|
||||
post_tags: ["</mark>"],
|
||||
fragment_size: CONF.options.fragmentSize,
|
||||
number_of_fragments: 1,
|
||||
order: "score",
|
||||
fields: {
|
||||
content: {},
|
||||
// "content.nGram": {},
|
||||
@@ -440,14 +618,26 @@ function search(after = null) {
|
||||
font_name: {},
|
||||
}
|
||||
};
|
||||
if (CONF.options.searchInPath) {
|
||||
q.highlight.fields["path.text"] = {};
|
||||
q.highlight.fields["path.nGram"] = {};
|
||||
}
|
||||
}
|
||||
|
||||
$.jsonPost("es", q).then(searchResult => {
|
||||
const showError = CONF.options.queryMode === "advanced";
|
||||
|
||||
$.jsonPost("es", q, showError).then(searchResult => {
|
||||
let hits = searchResult["hits"]["hits"];
|
||||
if (hits) {
|
||||
lastDoc = hits[hits.length - 1];
|
||||
}
|
||||
|
||||
hits.forEach(hit => {
|
||||
hit["_source"]["name"] = strUnescape(hit["_source"]["name"]);
|
||||
hit["_source"]["path"] = strUnescape(hit["_source"]["path"]);
|
||||
hit["_path_md5"] = md5(hit["_source"]["path"] + (hit["_source"]["path"] ? "/" : "") + hit["_source"]["name"] + ext(hit));
|
||||
});
|
||||
|
||||
if (!after) {
|
||||
preload.remove();
|
||||
searchResults.appendChild(makeStatsCard(searchResult));
|
||||
@@ -470,7 +660,25 @@ function search(after = null) {
|
||||
reachedEnd = hits.length !== SIZE;
|
||||
insertHits(resultContainer, hits);
|
||||
searchBusy = false;
|
||||
});
|
||||
}).fail(() => {
|
||||
searchBusy = false;
|
||||
if (!after) {
|
||||
preload.remove();
|
||||
}
|
||||
|
||||
console.log("QUERY:")
|
||||
console.log(q)
|
||||
$.toast({
|
||||
heading: "Query error",
|
||||
text: "Could not parse or execute query, please check the Advanced search documentation. " +
|
||||
"See server logs for more information.",
|
||||
stack: false,
|
||||
bgColor: "#FF8F00",
|
||||
textColor: "#FFF3E0",
|
||||
position: 'bottom-right',
|
||||
hideAfter: false
|
||||
});
|
||||
})
|
||||
}
|
||||
|
||||
|
||||
@@ -479,8 +687,6 @@ let searchDebounced = _.debounce(function () {
|
||||
search()
|
||||
}, 500);
|
||||
|
||||
searchBar.addEventListener("keyup", searchDebounced);
|
||||
pathBar.addEventListener("keyup", searchDebounced);
|
||||
|
||||
//Size slider
|
||||
$("#sizeSlider").ionRangeSlider({
|
||||
@@ -586,7 +792,7 @@ function getNextDepth(node) {
|
||||
}
|
||||
},
|
||||
size: 0
|
||||
}
|
||||
};
|
||||
|
||||
if (node.depth > 0) {
|
||||
q.query.bool.must = {
|
||||
@@ -622,6 +828,7 @@ function getNextDepth(node) {
|
||||
text: `${name}/ (${bucket.doc_count})`,
|
||||
depth: node.depth + 1,
|
||||
index: node.index,
|
||||
values: [bucket.key],
|
||||
children: true,
|
||||
}
|
||||
}).filter(x => x !== null)
|
||||
@@ -645,18 +852,20 @@ function createPathTree(target) {
|
||||
let pathTree = new InspireTree({
|
||||
data: function (node, resolve, reject) {
|
||||
return getNextDepth(node);
|
||||
}
|
||||
},
|
||||
sort: "text"
|
||||
});
|
||||
|
||||
selectedIndices.forEach(index => {
|
||||
pathTree.addNode({
|
||||
id: "/" + index,
|
||||
values: ["/" + index],
|
||||
text: `/[${indexMap[index]}]`,
|
||||
index: index,
|
||||
depth: 0,
|
||||
children: true
|
||||
})
|
||||
})
|
||||
});
|
||||
|
||||
new InspireTreeDOM(pathTree, {
|
||||
target: target
|
||||
@@ -665,30 +874,48 @@ function createPathTree(target) {
|
||||
pathTree.on("node.click", handlePathTreeClick(pathTree));
|
||||
}
|
||||
|
||||
function updateSettings() {
|
||||
CONF.options.display = $("#settingDisplay").val();
|
||||
CONF.options.fuzzy = $("#settingFuzzy").prop("checked");
|
||||
CONF.options.highlight = $("#settingHighlight").prop("checked");
|
||||
CONF.save();
|
||||
|
||||
searchDebounced();
|
||||
|
||||
$.toast({
|
||||
heading: "Settings updated",
|
||||
text: "Settings saved to browser storage",
|
||||
stack: 3,
|
||||
bgColor: "#00a4bc",
|
||||
textColor: "#fff",
|
||||
position: 'bottom-right',
|
||||
hideAfter: 3000,
|
||||
loaderBg: "#08c7e8",
|
||||
function getPathChoices() {
|
||||
return new Promise(getPaths => {
|
||||
$.jsonPost("es", {
|
||||
suggest: {
|
||||
path: {
|
||||
prefix: pathBar.value,
|
||||
completion: {
|
||||
field: "suggest-path",
|
||||
skip_duplicates: true,
|
||||
size: 10000
|
||||
}
|
||||
}
|
||||
}
|
||||
}).then(resp => getPaths(resp["suggest"]["path"][0]["options"].map(opt => opt["_source"]["path"])));
|
||||
});
|
||||
}
|
||||
|
||||
function loadSettings() {
|
||||
CONF.load();
|
||||
|
||||
$("#settingDisplay").val(CONF.options.display);
|
||||
$("#settingFuzzy").prop("checked", CONF.options.fuzzy);
|
||||
$("#settingHighlight").prop("checked", CONF.options.highlight);
|
||||
function getTagChoices() {
|
||||
return new Promise(getPaths => {
|
||||
$.jsonPost("es", {
|
||||
suggest: {
|
||||
tag: {
|
||||
prefix: tagBar.value,
|
||||
completion: {
|
||||
field: "suggest-tag",
|
||||
skip_duplicates: true,
|
||||
size: 10000
|
||||
}
|
||||
}
|
||||
}
|
||||
}).then(resp => {
|
||||
const result = [];
|
||||
resp["suggest"]["tag"][0]["options"].map(opt => opt["_source"]["tag"]).forEach(tags => {
|
||||
tags.forEach(tag => {
|
||||
const t = tag.split("#")[0];
|
||||
if (!result.find(x => x.split("#")[0] === t)) {
|
||||
result.push(tag);
|
||||
}
|
||||
});
|
||||
});
|
||||
getPaths(result);
|
||||
});
|
||||
});
|
||||
}
|
||||
|
||||
@@ -64,3 +64,185 @@ function lum(c) {
|
||||
|
||||
return 0.2126 * r + 0.7152 * g + 0.0722 * b;
|
||||
}
|
||||
|
||||
function strUnescape(str) {
|
||||
let result = "";
|
||||
|
||||
for (let i = 0; i < str.length; i++) {
|
||||
const c = str[i];
|
||||
const next = str[i + 1];
|
||||
|
||||
if (c === ']') {
|
||||
if (next === ']') {
|
||||
result += c;
|
||||
i += 1;
|
||||
} else {
|
||||
result += String.fromCharCode(parseInt(str.slice(i, i + 2), 16));
|
||||
i += 2;
|
||||
}
|
||||
} else {
|
||||
result += c;
|
||||
}
|
||||
}
|
||||
return result;
|
||||
}
|
||||
|
||||
const CONF = new Settings();
|
||||
|
||||
const _defaults = {
|
||||
display: "grid",
|
||||
fuzzy: true,
|
||||
highlight: true,
|
||||
sort: "score",
|
||||
searchInPath: false,
|
||||
treemapType: "cascaded",
|
||||
treemapTiling: "squarify",
|
||||
treemapGroupingDepth: 3,
|
||||
treemapColor: "PuBuGn",
|
||||
treemapSize: "large",
|
||||
suggestPath: true,
|
||||
fragmentSize: 100,
|
||||
columns: 5,
|
||||
queryMode: "simple"
|
||||
};
|
||||
|
||||
function loadSettings() {
|
||||
CONF.load();
|
||||
|
||||
$("#settingDisplay").val(CONF.options.display);
|
||||
$("#settingFuzzy").prop("checked", CONF.options.fuzzy);
|
||||
$("#settingHighlight").prop("checked", CONF.options.highlight);
|
||||
$("#settingSearchInPath").prop("checked", CONF.options.searchInPath);
|
||||
$("#settingTreemapTiling").val(CONF.options.treemapTiling);
|
||||
$("#settingTreemapGroupingDepth").val(CONF.options.treemapGroupingDepth);
|
||||
$("#settingTreemapColor").val(CONF.options.treemapColor);
|
||||
$("#settingTreemapSize").val(CONF.options.treemapSize);
|
||||
$("#settingTreemapType").val(CONF.options.treemapType);
|
||||
$("#settingSuggestPath").prop("checked", CONF.options.suggestPath);
|
||||
$("#settingFragmentSize").val(CONF.options.fragmentSize);
|
||||
$("#settingColumns").val(CONF.options.columns);
|
||||
$("#settingQueryMode").val(CONF.options.queryMode);
|
||||
}
|
||||
|
||||
function Settings() {
|
||||
this.options = {};
|
||||
|
||||
this._onUpdate = function () {
|
||||
$("#fuzzyToggle").prop("checked", this.options.fuzzy);
|
||||
$("#searchBar").attr("placeholder", this.options.queryMode === "simple" ? "Search" : "Advanced search");
|
||||
updateColumnStyle();
|
||||
};
|
||||
|
||||
this.load = function () {
|
||||
const raw = window.localStorage.getItem("options");
|
||||
if (raw === null) {
|
||||
this.options = _defaults;
|
||||
} else {
|
||||
const j = JSON.parse(raw);
|
||||
if (!j || Object.keys(_defaults).some(k => !j.hasOwnProperty(k))) {
|
||||
this.options = _defaults;
|
||||
} else {
|
||||
this.options = j;
|
||||
}
|
||||
}
|
||||
|
||||
this._onUpdate();
|
||||
};
|
||||
|
||||
this.save = function () {
|
||||
window.localStorage.setItem("options", JSON.stringify(this.options));
|
||||
this._onUpdate();
|
||||
}
|
||||
}
|
||||
|
||||
function updateSettings() {
|
||||
CONF.options.display = $("#settingDisplay").val();
|
||||
CONF.options.fuzzy = $("#settingFuzzy").prop("checked");
|
||||
CONF.options.highlight = $("#settingHighlight").prop("checked");
|
||||
CONF.options.searchInPath = $("#settingSearchInPath").prop("checked");
|
||||
CONF.options.treemapTiling = $("#settingTreemapTiling").val();
|
||||
CONF.options.treemapGroupingDepth = $("#settingTreemapGroupingDepth").val();
|
||||
CONF.options.treemapColor = $("#settingTreemapColor").val();
|
||||
CONF.options.treemapSize = $("#settingTreemapSize").val();
|
||||
CONF.options.treemapType = $("#settingTreemapType").val();
|
||||
CONF.options.suggestPath = $("#settingSuggestPath").prop("checked");
|
||||
CONF.options.fragmentSize = $("#settingFragmentSize").val();
|
||||
CONF.options.columns = $("#settingColumns").val();
|
||||
CONF.options.queryMode = $("#settingQueryMode").val();
|
||||
CONF.save();
|
||||
|
||||
if (typeof searchDebounced !== "undefined") {
|
||||
searchDebounced();
|
||||
}
|
||||
|
||||
if (typeof updateStats !== "undefined") {
|
||||
updateStats();
|
||||
}
|
||||
|
||||
$.toast({
|
||||
heading: "Settings updated",
|
||||
text: "Settings saved to browser storage",
|
||||
stack: 3,
|
||||
bgColor: "#00a4bc",
|
||||
textColor: "#fff",
|
||||
position: 'bottom-right',
|
||||
hideAfter: 3000,
|
||||
loaderBg: "#08c7e8",
|
||||
});
|
||||
}
|
||||
|
||||
jQuery["jsonPost"] = function (url, data, showError = true) {
|
||||
return jQuery.ajax({
|
||||
url: url,
|
||||
type: "post",
|
||||
data: JSON.stringify(data),
|
||||
contentType: "application/json"
|
||||
}).fail(err => {
|
||||
if (showError) {
|
||||
showEsError();
|
||||
}
|
||||
console.log(err);
|
||||
});
|
||||
};
|
||||
|
||||
function toggleTheme() {
|
||||
if (!document.cookie.includes("sist")) {
|
||||
document.cookie = "sist=dark;SameSite=Strict";
|
||||
} else {
|
||||
document.cookie = "sist=; Max-Age=-99999999;";
|
||||
}
|
||||
window.location.reload();
|
||||
}
|
||||
|
||||
function updateColumnStyle() {
|
||||
const style = document.getElementById("style");
|
||||
if (style) {
|
||||
style.innerHTML =
|
||||
`
|
||||
@media screen and (min-width: 1500px) {
|
||||
.container {
|
||||
max-width: 1440px;
|
||||
}
|
||||
|
||||
.bricklayer-column-sizer {
|
||||
width: ${100 / CONF.options.columns}% !important;
|
||||
}
|
||||
|
||||
.bricklayer-column {
|
||||
max-width: ${100 / CONF.options.columns}%;
|
||||
}
|
||||
}
|
||||
}
|
||||
`
|
||||
}
|
||||
}
|
||||
|
||||
function dmsToDecimal(dms, ref) {
|
||||
const tokens = dms.split(",")
|
||||
|
||||
const d = Number(tokens[0].trim().split(":")[0]) / Number(tokens[0].trim().split(":")[1])
|
||||
const m = Number(tokens[1].trim().split(":")[0]) / Number(tokens[1].trim().split(":")[1])
|
||||
const s = Number(tokens[2].trim().split(":")[0]) / Number(tokens[2].trim().split(":")[1])
|
||||
|
||||
return (d + (m / 60) + (s / 3600)) * (ref === "S" || ref === "W" ? -1 : 1)
|
||||
}
|
||||
@@ -6,15 +6,18 @@
|
||||
<meta name='viewport' content='width=device-width, initial-scale=1.0, maximum-scale=1.0, user-scalable=no'/>
|
||||
|
||||
<link href="css" rel="stylesheet" type="text/css">
|
||||
<style id="style"></style>
|
||||
</head>
|
||||
<body>
|
||||
|
||||
<nav class="navbar navbar-expand-lg">
|
||||
<a class="navbar-brand" href="/">sist2</a>
|
||||
<span class="badge badge-pill version">2.0.1</span>
|
||||
<span class="badge badge-pill version">2.10.1</span>
|
||||
<span class="tagline">Lightning-fast file system indexer and search tool </span>
|
||||
<button style="margin-left: auto" class="btn" type="button" data-toggle="modal" data-target="#settings" onclick="loadSettings()">Settings</button>
|
||||
<a id="theme" class="btn" title="Toggle theme" href="/">Theme</a>
|
||||
<a class="btn ml-auto" href="stats">Stats</a>
|
||||
<button class="btn" type="button" data-toggle="modal" data-target="#settings" onclick="loadSettings()">Settings
|
||||
</button>
|
||||
<button class="btn" title="Toggle theme" onclick="toggleTheme()">Theme</button>
|
||||
</nav>
|
||||
|
||||
<div class="container">
|
||||
@@ -47,8 +50,11 @@
|
||||
<div class="col">
|
||||
<div class="input-group" style="margin-bottom: 0.5em; margin-top: 1em">
|
||||
<div class="input-group-prepend">
|
||||
<button id="pathBarHelper" class="btn btn-outline-secondary" data-toggle="modal" data-target="#pathTreeModal">
|
||||
<svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 576 512" width="20px"><path d="M288 224h224a32 32 0 0 0 32-32V64a32 32 0 0 0-32-32H400L368 0h-80a32 32 0 0 0-32 32v64H64V8a8 8 0 0 0-8-8H40a8 8 0 0 0-8 8v392a16 16 0 0 0 16 16h208v64a32 32 0 0 0 32 32h224a32 32 0 0 0 32-32V352a32 32 0 0 0-32-32H400l-32-32h-80a32 32 0 0 0-32 32v64H64V128h192v64a32 32 0 0 0 32 32zm0 96h66.74l32 32H512v128H288zm0-288h66.74l32 32H512v128H288z"/></svg>
|
||||
<button id="pathBarHelper" class="btn btn-outline-secondary" data-toggle="modal"
|
||||
data-target="#pathTreeModal">
|
||||
<svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 576 512" width="20px">
|
||||
<path d="M288 224h224a32 32 0 0 0 32-32V64a32 32 0 0 0-32-32H400L368 0h-80a32 32 0 0 0-32 32v64H64V8a8 8 0 0 0-8-8H40a8 8 0 0 0-8 8v392a16 16 0 0 0 16 16h208v64a32 32 0 0 0 32 32h224a32 32 0 0 0 32-32V352a32 32 0 0 0-32-32H400l-32-32h-80a32 32 0 0 0-32 32v64H64V128h192v64a32 32 0 0 0 32 32zm0 96h66.74l32 32H512v128H288zm0-288h66.74l32 32H512v128H288z"/>
|
||||
</svg>
|
||||
</button>
|
||||
</div>
|
||||
<input id="pathBar" type="search" class="form-control" placeholder="Filter path">
|
||||
@@ -114,6 +120,8 @@
|
||||
</div>
|
||||
<div class="modal-body">
|
||||
|
||||
<h2>Simple search</h2>
|
||||
|
||||
<table class="table">
|
||||
<tbody>
|
||||
<tr>
|
||||
@@ -155,12 +163,19 @@
|
||||
<i>fried eggs</i> and either <i>eggplant</i> or <i>potato</i>, but will ignore results
|
||||
containing <i>frittata</i>.</p>
|
||||
|
||||
<p>When neither <code>+</code> or <code>|</code> is specified, the default operator is <code>+</code> (and).</p>
|
||||
<p>When neither <code>+</code> or <code>|</code> is specified, the default operator is
|
||||
<code>+</code> (and).</p>
|
||||
<p>When the <b>Fuzzy</b> option is checked, partial matches are also returned.</p>
|
||||
<br>
|
||||
<p>For more information, see <a target="_blank"
|
||||
href="//www.elastic.co/guide/en/elasticsearch/reference/current/query-dsl-simple-query-string-query.html">Elasticsearch
|
||||
documentation</a></p>
|
||||
|
||||
<h2>Advanced search</h2>
|
||||
<p>For documentation about the advanced search mode, see <a target="_blank"
|
||||
href="//www.elastic.co/guide/en/elasticsearch/reference/current/query-dsl-query-string-query.html#query-string-syntax">Elasticsearch
|
||||
documentation</a></p>
|
||||
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
@@ -186,14 +201,107 @@
|
||||
<label class="custom-control-label" for="settingFuzzy">Set fuzzy search by default</label>
|
||||
</div>
|
||||
|
||||
<div class="custom-control custom-checkbox">
|
||||
<input type="checkbox" class="custom-control-input" id="settingSearchInPath">
|
||||
<label class="custom-control-label" for="settingSearchInPath">Enable matching query against
|
||||
document path</label>
|
||||
</div>
|
||||
|
||||
<div class="custom-control custom-checkbox">
|
||||
<input type="checkbox" class="custom-control-input" id="settingSuggestPath">
|
||||
<label class="custom-control-label" for="settingSuggestPath">Enable auto-complete in path filter
|
||||
bar</label>
|
||||
</div>
|
||||
|
||||
<br/>
|
||||
<div class="form-group">
|
||||
<label for="settingFragmentSize">Highlight context size in characters</label>
|
||||
<input type="number" class="form-control" id="settingFragmentSize">
|
||||
</div>
|
||||
|
||||
<label for="settingQueryMode">Search mode</label>
|
||||
<select id="settingQueryMode" class="form-control form-control-sm">
|
||||
<option value="simple">Simple</option>
|
||||
<option value="advanced">Advanced</option>
|
||||
</select>
|
||||
|
||||
<label for="settingDisplay">Display</label>
|
||||
<select id="settingDisplay" class="form-control form-control-sm">
|
||||
<option value="grid">Grid</option>
|
||||
<option value="list">List</option>
|
||||
</select>
|
||||
|
||||
<div class="form-group">
|
||||
<label for="settingColumns">Maximum column count</label>
|
||||
<select id="settingColumns" class="form-control form-control-sm">
|
||||
<option value="3">3</option>
|
||||
<option value="4">4</option>
|
||||
<option value="5">5</option>
|
||||
<option value="6">6</option>
|
||||
<option value="7">7</option>
|
||||
<option value="8">8</option>
|
||||
<option value="9">9</option>
|
||||
</select>
|
||||
</div>
|
||||
|
||||
<hr/>
|
||||
<h4>Stats</h4>
|
||||
|
||||
<div class="form-group">
|
||||
<label for="settingTreemapType">Treemap type</label>
|
||||
<select id="settingTreemapType" class="form-control form-control-sm">
|
||||
<option value="cascaded">Cascaded</option>
|
||||
<option value="flat">Flat (compact)</option>
|
||||
</select>
|
||||
</div>
|
||||
|
||||
<div class="form-group">
|
||||
<label for="settingTreemapTiling">Treemap tiling</label>
|
||||
<select id="settingTreemapTiling" class="form-control form-control-sm">
|
||||
<option value="binary">Binary</option>
|
||||
<option value="squarify">Squarify</option>
|
||||
<option value="slice">Slice</option>
|
||||
<option value="dice">Dice</option>
|
||||
<option value="sliceDice">Slide & Dice</option>
|
||||
</select>
|
||||
</div>
|
||||
|
||||
<div class="form-group">
|
||||
<label for="settingTreemapGroupingDepth">Treemap color grouping depth (flat)</label>
|
||||
<input type="number" class="form-control" id="settingTreemapGroupingDepth" min="1" max="10">
|
||||
</div>
|
||||
|
||||
<div class="form-group">
|
||||
<label for="settingTreemapColor">Treemap color (cascaded)</label>
|
||||
<select id="settingTreemapColor" class="form-control form-control-sm">
|
||||
<option value="PuBuGn">Purple-Blue-Green</option>
|
||||
<option value="PuRd">Purple-Red</option>
|
||||
<option value="PuBu">Purple-Blue</option>
|
||||
<option value="YlOrBr">Yellow-Orange-Brown</option>
|
||||
<option value="YlOrRd">Yellow-Orange-Red</option>
|
||||
<option value="YlGn">Yellow-Green</option>
|
||||
<option value="YlGnBu">Yellow-Green-Blue</option>
|
||||
<option value="Plasma">Plasma</option>
|
||||
<option value="Magma">Magma</option>
|
||||
<option value="Inferno">Inferno</option>
|
||||
<option value="Viridis">Viridis</option>
|
||||
<option value="Turbo">Turbo</option>
|
||||
</select>
|
||||
</div>
|
||||
|
||||
<div class="form-group">
|
||||
<label for="settingTreemapSize">Treemap size</label>
|
||||
<select id="settingTreemapSize" class="form-control form-control-sm">
|
||||
<option value="small">Small</option>
|
||||
<option value="medium">Medium</option>
|
||||
<option value="large">Large</option>
|
||||
<option value="x-large">X-Large</option>
|
||||
<option value="xx-large">XX-Large</option>
|
||||
</select>
|
||||
</div>
|
||||
|
||||
<br>
|
||||
<button style="float: right" class="btn btn-primary" onclick="updateSettings()">Update settings</button>
|
||||
<button class="btn btn-primary ml-auto" onclick="updateSettings()">Update settings</button>
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
@@ -215,9 +323,36 @@
|
||||
</div>
|
||||
</div>
|
||||
|
||||
<div class="modal" id="tagModal" tabindex="-1" role="dialog" aria-labelledby="modal-title" aria-hidden="true">
|
||||
<div class="modal-dialog modal-dialog-centered" role="document">
|
||||
<div class="modal-content">
|
||||
<div class="modal-header">
|
||||
<h5 class="modal-title">Add tag</h5>
|
||||
<button type="button" class="close" data-dismiss="modal" aria-label="Close">
|
||||
<span aria-hidden="true">×</span>
|
||||
</button>
|
||||
</div>
|
||||
|
||||
<div class="modal-body">
|
||||
<div class="form-group">
|
||||
<div class="row">
|
||||
<div class="col col-8">
|
||||
<input type="text" id="tagBar" class="form-control">
|
||||
</div>
|
||||
<div class="col col-4">
|
||||
<input type="text" id="tag-color" value="" class="form-control"/>
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
|
||||
<div id="searchResults"></div>
|
||||
</div>
|
||||
|
||||
<script src="js" type="text/javascript"></script>
|
||||
<script src="jslib" type="text/javascript"></script>
|
||||
<script src="jssearch" type="text/javascript"></script>
|
||||
</body>
|
||||
</html>
|
||||
|
||||
827
src/static/stats.html
Normal file
827
src/static/stats.html
Normal file
@@ -0,0 +1,827 @@
|
||||
<!DOCTYPE html>
|
||||
<html lang="en">
|
||||
<head>
|
||||
<meta charset="utf-8">
|
||||
<title>sist2 - Stats</title>
|
||||
<meta name='viewport' content='width=device-width, initial-scale=1.0, maximum-scale=1.0, user-scalable=no'/>
|
||||
<link href="css" rel="stylesheet" type="text/css">
|
||||
</head>
|
||||
<body>
|
||||
|
||||
<nav class="navbar navbar-expand-lg">
|
||||
<a class="navbar-brand" href="/">sist2</a>
|
||||
<span class="badge badge-pill version">2.10.1</span>
|
||||
<span class="tagline">Lightning-fast file system indexer and search tool </span>
|
||||
<a style="margin-left: auto" class="btn" href="/">Back</a>
|
||||
<button class="btn" type="button" data-toggle="modal" data-target="#settings"
|
||||
onclick="loadSettings()">Settings
|
||||
</button>
|
||||
<button class="btn" title="Toggle theme" onclick="toggleTheme()">Theme</button>
|
||||
</nav>
|
||||
|
||||
<div class="container pb-3">
|
||||
<div class="card">
|
||||
<div class="card-body">
|
||||
|
||||
<label for="indices">Index</label>
|
||||
<select id="indices" onchange="updateStats()"></select>
|
||||
</div>
|
||||
</div>
|
||||
|
||||
<div id="treemap-card" class="stats-card">
|
||||
<button class="btn stats-btn" onclick="fullScreen('treemap-card')" id="treemap-card-enlarge">Enlarge</button>
|
||||
<button class="btn stats-btn" onclick="exportTreemap()">Export</button>
|
||||
<svg id="treemap"></svg>
|
||||
</div>
|
||||
|
||||
<div id="graphs-card" class="stats-card">
|
||||
<button class="btn stats-btn" onclick="fullScreen('graphs-card')" id="graphs-card-enlarge">Enlarge</button>
|
||||
<div class="graph">
|
||||
<svg id="agg_mime_size"></svg>
|
||||
</div>
|
||||
<div class="graph">
|
||||
<svg id="agg_mime_count"></svg>
|
||||
</div>
|
||||
<div class="graph">
|
||||
<svg id="date_histogram"></svg>
|
||||
</div>
|
||||
<div class="graph">
|
||||
<svg id="size_histogram"></svg>
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
|
||||
<div class="modal" id="settings" tabindex="-1" role="dialog" aria-labelledby="modal-title" aria-hidden="true">
|
||||
<div class="modal-dialog modal-dialog-centered" role="document">
|
||||
<div class="modal-content">
|
||||
<div class="modal-header">
|
||||
<h5 class="modal-title">Settings</h5>
|
||||
<button type="button" class="close" data-dismiss="modal" aria-label="Close">
|
||||
<span aria-hidden="true">×</span>
|
||||
</button>
|
||||
</div>
|
||||
<div class="modal-body">
|
||||
<div class="custom-control custom-checkbox">
|
||||
<input type="checkbox" class="custom-control-input" id="settingHighlight">
|
||||
<label class="custom-control-label" for="settingHighlight">Enable highlighting</label>
|
||||
</div>
|
||||
|
||||
<div class="custom-control custom-checkbox">
|
||||
<input type="checkbox" class="custom-control-input" id="settingFuzzy">
|
||||
<label class="custom-control-label" for="settingFuzzy">Set fuzzy search by default</label>
|
||||
</div>
|
||||
|
||||
<div class="custom-control custom-checkbox">
|
||||
<input type="checkbox" class="custom-control-input" id="settingSearchInPath">
|
||||
<label class="custom-control-label" for="settingSearchInPath">Enable matching query against document
|
||||
path</label>
|
||||
</div>
|
||||
|
||||
<div class="custom-control custom-checkbox">
|
||||
<input type="checkbox" class="custom-control-input" id="settingSuggestPath">
|
||||
<label class="custom-control-label" for="settingSuggestPath">Enable auto-complete in path filter bar</label>
|
||||
</div>
|
||||
|
||||
<br/>
|
||||
<div class="form-group">
|
||||
<label for="settingFragmentSize">Highlight context size in characters</label>
|
||||
<input type="number" class="form-control" id="settingFragmentSize">
|
||||
</div>
|
||||
|
||||
<label for="settingQueryMode">Search mode</label>
|
||||
<select id="settingQueryMode" class="form-control form-control-sm">
|
||||
<option value="simple">Simple</option>
|
||||
<option value="advanced">Advanced</option>
|
||||
</select>
|
||||
|
||||
<label for="settingDisplay">Display</label>
|
||||
<select id="settingDisplay" class="form-control form-control-sm">
|
||||
<option value="grid">Grid</option>
|
||||
<option value="list">List</option>
|
||||
</select>
|
||||
|
||||
<div class="form-group">
|
||||
<label for="settingColumns">Maximum column count</label>
|
||||
<select id="settingColumns" class="form-control form-control-sm">
|
||||
<option value="3">3</option>
|
||||
<option value="4">4</option>
|
||||
<option value="5">5</option>
|
||||
<option value="6">6</option>
|
||||
<option value="7">7</option>
|
||||
<option value="8">8</option>
|
||||
<option value="9">9</option>
|
||||
</select>
|
||||
</div>
|
||||
|
||||
<hr/>
|
||||
<h4>Stats</h4>
|
||||
|
||||
<div class="form-group">
|
||||
<label for="settingTreemapType">Treemap type</label>
|
||||
<select id="settingTreemapType" class="form-control form-control-sm">
|
||||
<option value="cascaded">Cascaded</option>
|
||||
<option value="flat">Flat (compact)</option>
|
||||
</select>
|
||||
</div>
|
||||
|
||||
<div class="form-group">
|
||||
<label for="settingTreemapTiling">Treemap tiling</label>
|
||||
<select id="settingTreemapTiling" class="form-control form-control-sm">
|
||||
<option value="binary">Binary</option>
|
||||
<option value="squarify">Squarify</option>
|
||||
<option value="slice">Slice</option>
|
||||
<option value="dice">Dice</option>
|
||||
<option value="sliceDice">Slide & Dice</option>
|
||||
</select>
|
||||
</div>
|
||||
|
||||
<div class="form-group">
|
||||
<label for="settingTreemapGroupingDepth">Treemap color grouping depth (flat)</label>
|
||||
<input type="number" class="form-control" id="settingTreemapGroupingDepth" min="1" max="10">
|
||||
</div>
|
||||
|
||||
<div class="form-group">
|
||||
<label for="settingTreemapColor">Treemap color (cascaded)</label>
|
||||
<select id="settingTreemapColor" class="form-control form-control-sm">
|
||||
<option value="PuBuGn">Purple-Blue-Green</option>
|
||||
<option value="PuRd">Purple-Red</option>
|
||||
<option value="PuBu">Purple-Blue</option>
|
||||
<option value="YlOrBr">Yellow-Orange-Brown</option>
|
||||
<option value="YlOrRd">Yellow-Orange-Red</option>
|
||||
<option value="YlGn">Yellow-Green</option>
|
||||
<option value="YlGnBu">Yellow-Green-Blue</option>
|
||||
<option value="Plasma">Plasma</option>
|
||||
<option value="Magma">Magma</option>
|
||||
<option value="Inferno">Inferno</option>
|
||||
<option value="Viridis">Viridis</option>
|
||||
<option value="Turbo">Turbo</option>
|
||||
</select>
|
||||
</div>
|
||||
|
||||
<div class="form-group">
|
||||
<label for="settingTreemapSize">Treemap size</label>
|
||||
<select id="settingTreemapSize" class="form-control form-control-sm">
|
||||
<option value="small">Small</option>
|
||||
<option value="medium">Medium</option>
|
||||
<option value="large">Large</option>
|
||||
<option value="x-large">X-Large</option>
|
||||
<option value="xx-large">XX-Large</option>
|
||||
</select>
|
||||
</div>
|
||||
|
||||
<br>
|
||||
<button class="btn btn-primary float-right" onclick="updateSettings()">Update settings</button>
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
|
||||
<script src="jslib" type="text/javascript"></script>
|
||||
<script>
|
||||
let width;
|
||||
let height;
|
||||
let indexMap = {};
|
||||
|
||||
const barHeight = 20;
|
||||
const ordinalColor = d3.scaleOrdinal(d3.schemeCategory10);
|
||||
|
||||
const formatSI = d3.format("~s");
|
||||
|
||||
|
||||
const TILING_MODES = {
|
||||
"squarify": d3.treemapSquarify,
|
||||
"binary": d3.treemapBinary,
|
||||
"sliceDice": d3.treemapSliceDice,
|
||||
"slice": d3.treemapSlice,
|
||||
"dice": d3.treemapDice,
|
||||
};
|
||||
|
||||
const COLORS = {
|
||||
"PuBuGn": d3.interpolatePuBuGn,
|
||||
"PuRd": d3.interpolatePuRd,
|
||||
"PuBu": d3.interpolatePuBu,
|
||||
"YlOrBr": d3.interpolateYlOrBr,
|
||||
"YlOrRd": d3.interpolateYlOrRd,
|
||||
"YlGn": d3.interpolateYlGn,
|
||||
"YlGnBu": d3.interpolateYlGnBu,
|
||||
"Plasma": d3.interpolatePlasma,
|
||||
"Magma": d3.interpolateMagma,
|
||||
"Inferno": d3.interpolateInferno,
|
||||
"Viridis": d3.interpolateViridis,
|
||||
"Turbo": d3.interpolateTurbo,
|
||||
};
|
||||
|
||||
const SIZES = {
|
||||
"small": [800, 600],
|
||||
"medium": [1300, 750],
|
||||
"large": [1900, 900],
|
||||
"x-large": [2800, 1700],
|
||||
"xx-large": [3600, 2000],
|
||||
};
|
||||
|
||||
const fillOpacity = document.cookie.includes("sist") ? 0.9 : 0.6;
|
||||
|
||||
const uids = {};
|
||||
|
||||
function uid(name) {
|
||||
let id = uids[name] || 0;
|
||||
uids[name] = id + 1;
|
||||
return name + id;
|
||||
}
|
||||
|
||||
const burrow = function (table, addSelfDir) {
|
||||
const root = {};
|
||||
table.forEach(row => {
|
||||
let layer = root;
|
||||
|
||||
row.taxonomy.forEach(key => {
|
||||
layer[key] = key in layer ? layer[key] : {};
|
||||
layer = layer[key];
|
||||
});
|
||||
if (Object.keys(layer).length === 0) {
|
||||
layer["$size$"] = row.size;
|
||||
} else if (addSelfDir) {
|
||||
layer["."] = {
|
||||
"$size$": row.size,
|
||||
};
|
||||
}
|
||||
});
|
||||
|
||||
const descend = function (obj, depth) {
|
||||
return Object.keys(obj).filter(k => k !== "$size$").map(k => {
|
||||
const child = {
|
||||
name: k,
|
||||
depth: depth,
|
||||
value: 0,
|
||||
children: descend(obj[k], depth + 1)
|
||||
};
|
||||
if ("$size$" in obj[k]) {
|
||||
child.value = obj[k]["$size$"];
|
||||
}
|
||||
return child;
|
||||
});
|
||||
};
|
||||
|
||||
return {
|
||||
name: `[${indexMap[$("#indices").val()]}]`,
|
||||
children: descend(root, 1),
|
||||
value: 0,
|
||||
depth: 0,
|
||||
}
|
||||
};
|
||||
|
||||
function flatTreemap(data, svg) {
|
||||
const root = d3.treemap()
|
||||
.tile(TILING_MODES[CONF.options.treemapTiling])
|
||||
.size([width, height])
|
||||
.padding(1)
|
||||
.round(true)(
|
||||
d3.hierarchy(data)
|
||||
.sum(d => d.value)
|
||||
.sort((a, b) => b.value - a.value)
|
||||
);
|
||||
|
||||
const leaf = svg.selectAll("g")
|
||||
.data(root.leaves())
|
||||
.join("g")
|
||||
.attr("transform", d => `translate(${d.x0},${d.y0})`);
|
||||
|
||||
leaf.append("title")
|
||||
.text(d => `${d.ancestors().reverse().map(d => d.data.name).join("/")}\n${humanFileSize(d.value)}`);
|
||||
|
||||
leaf.append("rect")
|
||||
.attr("id", d => (d.leafUid = uid("leaf")))
|
||||
.attr("fill", d => {
|
||||
while (d.depth > CONF.options.treemapGroupingDepth) d = d.parent;
|
||||
return ordinalColor(d.data.name);
|
||||
})
|
||||
.attr("fill-opacity", fillOpacity)
|
||||
.attr("width", d => d.x1 - d.x0)
|
||||
.attr("height", d => d.y1 - d.y0);
|
||||
|
||||
leaf.append("clipPath")
|
||||
.attr("id", d => (d.clipUid = uid("clip")))
|
||||
.append("use")
|
||||
.attr("href", d => `#${d.leafUid}`);
|
||||
|
||||
leaf.append("text")
|
||||
.attr("clip-path", d => `url(#${d.clipUid})`)
|
||||
.selectAll("tspan")
|
||||
.data(d => {
|
||||
if (d.data.name === ".") {
|
||||
d = d.parent;
|
||||
}
|
||||
return [d.data.name, humanFileSize(d.value)]
|
||||
})
|
||||
.join("tspan")
|
||||
.attr("x", 2)
|
||||
.attr("y", (d, i, nodes) => `${i === 0 ? 1.1 : 2.3}em`)
|
||||
.text(d => d);
|
||||
}
|
||||
|
||||
function cascade(root, offset) {
|
||||
const x = new Map;
|
||||
const y = new Map;
|
||||
return root.eachAfter(d => {
|
||||
if (d.children && d.children.length !== 0) {
|
||||
x.set(d, 1 + d3.max(d.children, c => c.x1 === d.x1 - offset ? x.get(c) : NaN));
|
||||
y.set(d, 1 + d3.max(d.children, c => c.y1 === d.y1 - offset ? y.get(c) : NaN));
|
||||
} else {
|
||||
x.set(d, 0);
|
||||
y.set(d, 0);
|
||||
}
|
||||
}).eachBefore(d => {
|
||||
d.x1 -= 2 * offset * x.get(d);
|
||||
d.y1 -= 2 * offset * y.get(d);
|
||||
});
|
||||
}
|
||||
|
||||
function cascadeTreemap(data, svg) {
|
||||
|
||||
const root = cascade(
|
||||
d3.treemap()
|
||||
.size([width, height])
|
||||
.tile(TILING_MODES[CONF.options.treemapTiling])
|
||||
.paddingOuter(3)
|
||||
.paddingTop(16)
|
||||
.paddingInner(1)
|
||||
.round(true)(
|
||||
d3.hierarchy(data)
|
||||
.sum(d => d.value)
|
||||
.sort((a, b) => b.value - a.value)
|
||||
),
|
||||
3 // treemap.paddingOuter
|
||||
);
|
||||
|
||||
const maxDepth = Math.max(...root.descendants().map(d => d.depth));
|
||||
const color = d3.scaleSequential([maxDepth, -1], COLORS[CONF.options.treemapColor]);
|
||||
|
||||
svg.append("filter")
|
||||
.attr("id", "shadow")
|
||||
.append("feDropShadow")
|
||||
.attr("flood-opacity", 0.3)
|
||||
.attr("dx", 0)
|
||||
.attr("stdDeviation", 3);
|
||||
|
||||
const node = svg.selectAll("g")
|
||||
.data(
|
||||
d3.nest()
|
||||
.key(d => d.depth).sortKeys(d3.ascending)
|
||||
.entries(root.descendants())
|
||||
)
|
||||
.join("g")
|
||||
.attr("filter", "url(#shadow)")
|
||||
.selectAll("g")
|
||||
.data(d => d.values)
|
||||
.join("g")
|
||||
.attr("transform", d => `translate(${d.x0},${d.y0})`);
|
||||
|
||||
node.append("title")
|
||||
.text(d => `${d.ancestors().reverse().splice(1).map(d => d.data.name).join("/")}\n${humanFileSize(d.value)}`);
|
||||
|
||||
node.append("rect")
|
||||
.attr("id", d => (d.nodeUid = uid("node")))
|
||||
.attr("fill", d => color(d.depth))
|
||||
.attr("width", d => d.x1 - d.x0)
|
||||
.attr("height", d => d.y1 - d.y0);
|
||||
|
||||
node.append("clipPath")
|
||||
.attr("id", d => (d.clipUid = uid("clip")))
|
||||
.append("use")
|
||||
.attr("href", d => `#${d.nodeUid}`);
|
||||
|
||||
node.append("text")
|
||||
.attr("fill", d => d3.hsl(color(d.depth)).l > .5 ? "#333" : "#eee")
|
||||
.attr("clip-path", d => `url(#${d.clipUid})`)
|
||||
.selectAll("tspan")
|
||||
.data(d => [d.data.name, humanFileSize(d.value)])
|
||||
.join("tspan")
|
||||
.text(d => d);
|
||||
|
||||
node.filter(d => d.children).selectAll("tspan")
|
||||
.attr("dx", 3)
|
||||
.attr("y", 13);
|
||||
|
||||
node.filter(d => !d.children).selectAll("tspan")
|
||||
.attr("x", 3)
|
||||
.attr("y", (d, i, nodes) => `${i === 0 ? 1.1 : 2.3}em`);
|
||||
}
|
||||
|
||||
|
||||
function mimeBarSize(data, svg) {
|
||||
|
||||
const margin = {
|
||||
top: 50,
|
||||
right: 0,
|
||||
bottom: 10,
|
||||
left: Math.max(
|
||||
d3.max(data.sort((a, b) => b.count - a.count).slice(0, 15), d => d.mime.length) * 6,
|
||||
d3.max(data.sort((a, b) => b.size - a.size).slice(0, 15), d => d.mime.length) * 6,
|
||||
)
|
||||
};
|
||||
|
||||
data.forEach(d => {
|
||||
d.name = d.mime;
|
||||
d.value = Number(d.size);
|
||||
});
|
||||
data = data.sort((a, b) => b.value - a.value).slice(0, 15);
|
||||
|
||||
const width = 550;
|
||||
const height = Math.ceil((data.length + 0.1) * barHeight) + margin.top + margin.bottom;
|
||||
|
||||
svg.selectAll("*").remove();
|
||||
svg.attr("viewBox", [0, 0, width, height]);
|
||||
|
||||
const y = d3.scaleBand()
|
||||
.domain(d3.range(data.length))
|
||||
.rangeRound([margin.top, height - margin.bottom]);
|
||||
|
||||
const x = d3.scaleLinear()
|
||||
.domain([0, d3.max(data, d => d.value)])
|
||||
.range([margin.left, width - margin.right]);
|
||||
|
||||
svg.append("g")
|
||||
.attr("fill-opacity", fillOpacity)
|
||||
.selectAll("rect")
|
||||
.data(data)
|
||||
.join("rect")
|
||||
.attr("fill", d => ordinalColor(d.name))
|
||||
.attr("x", x(0))
|
||||
.attr("y", (d, i) => y(i))
|
||||
.attr("width", d => x(d.value) - x(0))
|
||||
.attr("height", y.bandwidth())
|
||||
.append("title")
|
||||
.text(d => formatSI(d.value));
|
||||
|
||||
svg.append("g")
|
||||
.attr("transform", `translate(0,${margin.top})`)
|
||||
.call(d3.axisTop(x).ticks(width / 80, data.format).tickFormat(formatSI))
|
||||
.call(g => g.select(".domain").remove());
|
||||
|
||||
svg.append("g")
|
||||
.attr("transform", `translate(${margin.left},0)`)
|
||||
.call(d3.axisLeft(y).tickFormat(i => data[i].name).tickSizeOuter(0));
|
||||
|
||||
svg.append("text")
|
||||
.attr("x", (width / 2))
|
||||
.attr("y", (margin.top / 2))
|
||||
.attr("text-anchor", "middle")
|
||||
.style("font-size", "16px")
|
||||
.text("Size distribution by MIME type");
|
||||
}
|
||||
|
||||
function mimeBarCount(data, svg) {
|
||||
|
||||
const margin = {
|
||||
top: 50,
|
||||
right: 0,
|
||||
bottom: 10,
|
||||
left: Math.max(
|
||||
d3.max(data.sort((a, b) => b.count - a.count).slice(0, 15), d => d.mime.length) * 6,
|
||||
d3.max(data.sort((a, b) => b.size - a.size).slice(0, 15), d => d.mime.length) * 6,
|
||||
)
|
||||
};
|
||||
|
||||
data.forEach(d => {
|
||||
d.name = d.mime;
|
||||
d.value = Number(d.count);
|
||||
});
|
||||
|
||||
data = data.sort((a, b) => b.value - a.value).slice(0, 15);
|
||||
|
||||
const width = 550;
|
||||
const height = Math.ceil((data.length + 0.1) * barHeight) + margin.top + margin.bottom;
|
||||
|
||||
svg.selectAll("*").remove();
|
||||
svg.attr("viewBox", [0, 0, width, height]);
|
||||
|
||||
const y = d3.scaleBand()
|
||||
.domain(d3.range(data.length))
|
||||
.rangeRound([margin.top, height - margin.bottom]);
|
||||
|
||||
const x = d3.scaleLinear()
|
||||
.domain([0, d3.max(data, d => d.value)])
|
||||
.range([margin.left, width - margin.right]);
|
||||
|
||||
svg.append("g")
|
||||
.attr("fill-opacity", fillOpacity)
|
||||
.selectAll("rect")
|
||||
.data(data)
|
||||
.join("rect")
|
||||
.attr("fill", d => ordinalColor(d.name))
|
||||
.attr("x", x(0))
|
||||
.attr("y", (d, i) => y(i))
|
||||
.attr("width", d => x(d.value) - x(0))
|
||||
.attr("height", y.bandwidth())
|
||||
.append("title")
|
||||
.text(d => d3.format(",")(d.value));
|
||||
|
||||
svg.append("g")
|
||||
.attr("transform", `translate(0,${margin.top})`)
|
||||
.call(d3.axisTop(x).ticks(width / 80, data.format).tickFormat(formatSI))
|
||||
.call(g => g.select(".domain").remove());
|
||||
|
||||
svg.append("g")
|
||||
.attr("transform", `translate(${margin.left},0)`)
|
||||
.call(d3.axisLeft(y).tickFormat(i => data[i].name).tickSizeOuter(0));
|
||||
|
||||
svg.append("text")
|
||||
.attr("x", (width / 2))
|
||||
.attr("y", (margin.top / 2))
|
||||
.attr("text-anchor", "middle")
|
||||
.style("font-size", "16px")
|
||||
.text("File count distribution by MIME type");
|
||||
}
|
||||
|
||||
function dateHistogram(data, svg) {
|
||||
|
||||
let bins = data.map(d => {
|
||||
return {
|
||||
length: Number(d.count),
|
||||
x0: Number(d.bucket),
|
||||
x1: Number(d.bucket) + 2629800
|
||||
}
|
||||
});
|
||||
bins.sort((a, b) => a.length - b.length);
|
||||
|
||||
const margin = {
|
||||
top: 50,
|
||||
right: 20,
|
||||
bottom: 70,
|
||||
left: 40
|
||||
};
|
||||
|
||||
const thresh = d3.quantile(bins, 0.9, d => d.length);
|
||||
bins = bins.filter(d => d.length > thresh);
|
||||
|
||||
const width = 550;
|
||||
const height = 450;
|
||||
|
||||
svg.selectAll("*").remove();
|
||||
svg.attr("viewBox", [0, 0, width, height]);
|
||||
|
||||
const y = d3.scaleLinear()
|
||||
.domain([0, d3.max(bins, d => d.length)]).nice()
|
||||
.range([height - margin.bottom, margin.top]);
|
||||
|
||||
const x = d3.scaleLinear()
|
||||
.domain(d3.extent(bins, d => d.x0)).nice()
|
||||
.range([margin.left, width - margin.right]);
|
||||
|
||||
svg.append("g")
|
||||
.attr("fill", "steelblue")
|
||||
.selectAll("rect")
|
||||
.data(bins)
|
||||
.join("rect")
|
||||
.attr("x", d => x(d.x0) + 1)
|
||||
.attr("width", d => Math.max(1, x(d.x1) - x(d.x0) - 1))
|
||||
.attr("y", d => y(d.length))
|
||||
.attr("height", d => y(0) - y(d.length))
|
||||
.call(g => g
|
||||
.append("title")
|
||||
.text(d => d.length)
|
||||
);
|
||||
|
||||
svg.append("g")
|
||||
.attr("transform", `translate(0,${height - margin.bottom})`)
|
||||
.call(
|
||||
d3.axisBottom(x)
|
||||
.ticks(width / 30)
|
||||
.tickSizeOuter(0)
|
||||
.tickFormat(t => d3.timeFormat("%Y-%m-%d")(d3.utcParse("%s")(t)))
|
||||
)
|
||||
.call(g => g
|
||||
.selectAll("text")
|
||||
.style("text-anchor", "end")
|
||||
.attr("dx", "-.8em")
|
||||
.attr("dy", ".15em")
|
||||
.attr("transform", "rotate(-65)")
|
||||
)
|
||||
.call(g => g.append("text")
|
||||
.attr("x", width - margin.right)
|
||||
.attr("y", -4)
|
||||
.attr("fill", "currentColor")
|
||||
.attr("font-weight", "bold")
|
||||
.attr("text-anchor", "end")
|
||||
.text("mtime")
|
||||
);
|
||||
|
||||
svg.append("g")
|
||||
.attr("transform", `translate(${margin.left},0)`)
|
||||
.call(
|
||||
d3.axisLeft(y)
|
||||
.ticks(height / 40)
|
||||
.tickFormat(t => formatSI(t))
|
||||
)
|
||||
.call(g => g.select(".domain").remove())
|
||||
.call(g => g.select(".tick:last-of-type text").clone()
|
||||
.attr("x", 4)
|
||||
.attr("text-anchor", "start")
|
||||
.attr("font-weight", "bold")
|
||||
.text("File count"));
|
||||
|
||||
svg.append("text")
|
||||
.attr("x", (width / 2))
|
||||
.attr("y", (margin.top / 2))
|
||||
.attr("text-anchor", "middle")
|
||||
.style("font-size", "16px")
|
||||
.text("File modification time distribution");
|
||||
}
|
||||
|
||||
function sizeHistogram(data, svg) {
|
||||
|
||||
let bins = data.map(d => {
|
||||
return {
|
||||
length: Number(d.count),
|
||||
x0: Number(d.bucket),
|
||||
x1: Number(d.bucket) + (5 * 1024 * 1024)
|
||||
}
|
||||
});
|
||||
bins = bins.sort((a, b) => b.length - a.length).slice(0, 25);
|
||||
|
||||
const margin = {
|
||||
top: 50,
|
||||
right: 20,
|
||||
bottom: 70,
|
||||
left: 40
|
||||
};
|
||||
|
||||
const width = 550;
|
||||
const height = 450;
|
||||
|
||||
svg.selectAll("*").remove();
|
||||
svg.attr("viewBox", [0, 0, width, height]);
|
||||
|
||||
const y = d3.scaleLinear()
|
||||
.domain([0, d3.max(bins, d => d.length)])
|
||||
.range([height - margin.bottom, margin.top]);
|
||||
|
||||
const x = d3.scaleLinear()
|
||||
.domain(d3.extent(bins, d => d.x0)).nice()
|
||||
.range([margin.left, width - margin.right]);
|
||||
|
||||
svg.append("g")
|
||||
.attr("fill", "steelblue")
|
||||
.selectAll("rect")
|
||||
.data(bins)
|
||||
.join("rect")
|
||||
.attr("x", d => x(d.x0) + 1)
|
||||
.attr("width", d => Math.max(1, x(d.x1) - x(d.x0) - 1))
|
||||
.attr("y", d => y(d.length))
|
||||
.attr("height", d => y(0) - y(d.length))
|
||||
.call(g => g
|
||||
.append("title")
|
||||
.text(d => d.length)
|
||||
);
|
||||
|
||||
svg.append("g")
|
||||
.attr("transform", `translate(0,${height - margin.bottom})`)
|
||||
.call(
|
||||
d3.axisBottom(x)
|
||||
.ticks(width / 30)
|
||||
.tickSizeOuter(0)
|
||||
.tickFormat(formatSI)
|
||||
)
|
||||
.call(g => g
|
||||
.selectAll("text")
|
||||
.style("text-anchor", "end")
|
||||
.attr("dx", "-.8em")
|
||||
.attr("dy", ".15em")
|
||||
.attr("transform", "rotate(-65)")
|
||||
)
|
||||
.call(g => g.append("text")
|
||||
.attr("x", width - margin.right)
|
||||
.attr("y", -4)
|
||||
.attr("fill", "currentColor")
|
||||
.attr("font-weight", "bold")
|
||||
.attr("text-anchor", "end")
|
||||
.text("size (bytes)")
|
||||
);
|
||||
|
||||
svg.append("g")
|
||||
.attr("transform", `translate(${margin.left},0)`)
|
||||
.call(
|
||||
d3.axisLeft(y)
|
||||
.ticks(height / 40)
|
||||
.tickFormat(t => formatSI(t))
|
||||
)
|
||||
.call(g => g.select(".domain").remove())
|
||||
.call(g => g.select(".tick:last-of-type text").clone()
|
||||
.attr("x", 4)
|
||||
.attr("text-anchor", "start")
|
||||
.attr("font-weight", "bold")
|
||||
.text("File count"));
|
||||
|
||||
svg.append("text")
|
||||
.attr("x", (width / 2))
|
||||
.attr("y", (margin.top / 2))
|
||||
.attr("text-anchor", "middle")
|
||||
.style("font-size", "16px")
|
||||
.text("File size distribution");
|
||||
}
|
||||
|
||||
|
||||
function updateStats() {
|
||||
width = SIZES[CONF.options.treemapSize][0];
|
||||
height = SIZES[CONF.options.treemapSize][1];
|
||||
|
||||
const treemapSvg = d3.select("#treemap");
|
||||
const mimeSvgSize = d3.select("#agg_mime_size");
|
||||
const mimeSvgCount = d3.select("#agg_mime_count");
|
||||
const dateHistogramSvg = d3.select("#date_histogram");
|
||||
const sizeHistogramSvg = d3.select("#size_histogram");
|
||||
|
||||
const indexId = $("#indices").val();
|
||||
|
||||
d3.csv(`./s/${indexId}/1`).then(tabularData => {
|
||||
tabularData.forEach(row => {
|
||||
row.taxonomy = row.path.split("/");
|
||||
row.size = Number(row.size);
|
||||
});
|
||||
|
||||
if (CONF.options.treemapType === "cascaded") {
|
||||
const data = burrow(tabularData, false);
|
||||
cascadeTreemap(data, treemapSvg);
|
||||
} else {
|
||||
const data = burrow(tabularData.sort((a, b) => b.taxonomy.length - a.taxonomy.length), true);
|
||||
flatTreemap(data, treemapSvg);
|
||||
}
|
||||
});
|
||||
|
||||
d3.csv(`./s/${indexId}/2`).then(tabularData => {
|
||||
mimeBarSize(tabularData.slice(), mimeSvgSize);
|
||||
mimeBarCount(tabularData.slice(), mimeSvgCount);
|
||||
});
|
||||
|
||||
d3.csv(`./s/${indexId}/3`).then(tabularData => {
|
||||
sizeHistogram(tabularData, sizeHistogramSvg);
|
||||
});
|
||||
|
||||
d3.csv(`./s/${indexId}/4`).then(tabularData => {
|
||||
dateHistogram(tabularData, dateHistogramSvg);
|
||||
});
|
||||
|
||||
treemapSvg.selectAll("*").remove();
|
||||
treemapSvg.attr("viewBox", [0, 0, width, height])
|
||||
.attr("xmlns", "http://www.w3.org/2000/svg")
|
||||
.attr("xmlns:xlink", "http://www.w3.org/1999/xlink")
|
||||
.attr("version", "1.1")
|
||||
.style("overflow", "visible")
|
||||
.style("font", "10px sans-serif");
|
||||
|
||||
}
|
||||
|
||||
window.onload = function () {
|
||||
CONF.load();
|
||||
|
||||
$.jsonPost("i").then(resp => {
|
||||
const select = $("#indices");
|
||||
|
||||
const urlIndices = (new URLSearchParams(location.search)).get("i");
|
||||
resp["indices"].forEach(idx => {
|
||||
indexMap[idx.id] = idx.name;
|
||||
select.append($("<option>")
|
||||
.attr("value", idx.id)
|
||||
.append(idx.name));
|
||||
|
||||
if (urlIndices && urlIndices.split(",").indexOf(idx.name) !== -1) {
|
||||
select.select(idx.name);
|
||||
}
|
||||
});
|
||||
|
||||
updateStats();
|
||||
});
|
||||
};
|
||||
|
||||
function fullScreen(selector) {
|
||||
const card = document.getElementById(selector);
|
||||
const btn = document.getElementById(selector + "-enlarge");
|
||||
|
||||
card.classList.toggle("full-screen");
|
||||
|
||||
if (card.classList.contains("full-screen")) {
|
||||
btn.innerText = "Shrink";
|
||||
} else {
|
||||
btn.innerText = "Enlarge";
|
||||
}
|
||||
}
|
||||
|
||||
function exportTreemap() {
|
||||
domtoimage.toBlob(document.getElementById("treemap"), {width: width, height: height})
|
||||
.then(function (blob) {
|
||||
let a = document.createElement("a");
|
||||
let url = URL.createObjectURL(blob);
|
||||
|
||||
a.href = url;
|
||||
a.download = `${indexMap[$("#indices").val()]}_treemap.png`;
|
||||
document.body.appendChild(a);
|
||||
a.click();
|
||||
setTimeout(function() {
|
||||
document.body.removeChild(a);
|
||||
window.URL.revokeObjectURL(url);
|
||||
}, 0);
|
||||
});
|
||||
}
|
||||
</script>
|
||||
</body>
|
||||
</html>
|
||||
351
src/stats.c
Normal file
351
src/stats.c
Normal file
@@ -0,0 +1,351 @@
|
||||
#include "sist.h"
|
||||
#include "io/serialize.h"
|
||||
#include "ctx.h"
|
||||
|
||||
static GHashTable *FlatTree;
|
||||
static GHashTable *BufferTable;
|
||||
|
||||
static GHashTable *AggMime;
|
||||
static GHashTable *AggSize;
|
||||
static GHashTable *AggDate;
|
||||
|
||||
#define SIZE_BUCKET (long)(5 * 1024 * 1024)
|
||||
#define DATE_BUCKET (long)(2629800)
|
||||
|
||||
static long TotalSize = 0;
|
||||
static long DocumentCount = 0;
|
||||
|
||||
typedef struct {
|
||||
long size;
|
||||
long count;
|
||||
} agg_t;
|
||||
|
||||
void fill_tables(cJSON *document, UNUSED(const char index_id[MD5_STR_LENGTH])) {
|
||||
|
||||
if (cJSON_GetObjectItem(document, "parent") != NULL) {
|
||||
return;
|
||||
}
|
||||
|
||||
const char *json_path = cJSON_GetObjectItem(document, "path")->valuestring;
|
||||
char *path = malloc(strlen(json_path) + 1);
|
||||
strcpy(path, json_path);
|
||||
|
||||
const char *json_mime = cJSON_GetObjectItem(document, "mime")->valuestring;
|
||||
char *mime;
|
||||
if (json_mime == NULL) {
|
||||
mime = NULL;
|
||||
} else {
|
||||
mime = malloc(strlen(json_mime) + 1);
|
||||
strcpy(mime, json_mime);
|
||||
}
|
||||
|
||||
long size = (long) cJSON_GetObjectItem(document, "size")->valuedouble;
|
||||
int mtime = cJSON_GetObjectItem(document, "mtime")->valueint;
|
||||
|
||||
// treemap
|
||||
void *existing_path = g_hash_table_lookup(FlatTree, path);
|
||||
if (existing_path == NULL) {
|
||||
g_hash_table_insert(FlatTree, path, (gpointer) size);
|
||||
} else {
|
||||
g_hash_table_replace(FlatTree, path, (gpointer) ((long) existing_path + size));
|
||||
}
|
||||
|
||||
// mime agg
|
||||
if (mime != NULL) {
|
||||
agg_t *orig_agg = g_hash_table_lookup(AggMime, mime);
|
||||
if (orig_agg == NULL) {
|
||||
agg_t *agg = malloc(sizeof(agg_t));
|
||||
agg->size = size;
|
||||
agg->count = 1;
|
||||
g_hash_table_insert(AggMime, mime, agg);
|
||||
} else {
|
||||
orig_agg->size += size;
|
||||
orig_agg->count += 1;
|
||||
free(mime);
|
||||
}
|
||||
}
|
||||
|
||||
// size agg
|
||||
long size_bucket = size - (size % SIZE_BUCKET);
|
||||
agg_t *orig_agg = g_hash_table_lookup(AggSize, (gpointer) size_bucket);
|
||||
if (orig_agg == NULL) {
|
||||
agg_t *agg = malloc(sizeof(agg_t));
|
||||
agg->size = size;
|
||||
agg->count = 1;
|
||||
g_hash_table_insert(AggSize, (gpointer) size_bucket, agg);
|
||||
} else {
|
||||
orig_agg->count += 1;
|
||||
orig_agg->size += size;
|
||||
}
|
||||
|
||||
// date agg
|
||||
long date_bucket = mtime - (mtime % DATE_BUCKET);
|
||||
orig_agg = g_hash_table_lookup(AggDate, (gpointer) date_bucket);
|
||||
if (orig_agg == NULL) {
|
||||
agg_t *agg = malloc(sizeof(agg_t));
|
||||
agg->size = size;
|
||||
agg->count = 1;
|
||||
g_hash_table_insert(AggDate, (gpointer) date_bucket, agg);
|
||||
} else {
|
||||
orig_agg->count += 1;
|
||||
orig_agg->size += size;
|
||||
}
|
||||
|
||||
TotalSize += size;
|
||||
DocumentCount += 1;
|
||||
}
|
||||
|
||||
void read_index_into_tables(index_t *index) {
|
||||
DIR *dir = opendir(index->path);
|
||||
struct dirent *de;
|
||||
while ((de = readdir(dir)) != NULL) {
|
||||
if (strncmp(de->d_name, "_index_", sizeof("_index_") - 1) == 0) {
|
||||
char file_path[PATH_MAX];
|
||||
snprintf(file_path, PATH_MAX, "%s%s", index->path, de->d_name);
|
||||
read_index(file_path, index->desc.id, index->desc.type, fill_tables);
|
||||
}
|
||||
}
|
||||
closedir(dir);
|
||||
}
|
||||
|
||||
static size_t rfind(const char *str, int c) {
|
||||
for (int i = (int)strlen(str); i >= 0; i--) {
|
||||
if (str[i] == c) {
|
||||
return i;
|
||||
}
|
||||
}
|
||||
return -1;
|
||||
}
|
||||
|
||||
int merge_up(double thresh) {
|
||||
long min_size = (long) (thresh * (double) TotalSize);
|
||||
|
||||
int count = 0;
|
||||
GHashTableIter iter;
|
||||
g_hash_table_iter_init(&iter, FlatTree);
|
||||
|
||||
void *key;
|
||||
void *value;
|
||||
|
||||
while (g_hash_table_iter_next(&iter, &key, &value)) {
|
||||
long size = (long) value;
|
||||
|
||||
if (size < min_size) {
|
||||
int stop = rfind(key, '/');
|
||||
if (stop == -1) {
|
||||
stop = 0;
|
||||
}
|
||||
char *parent = malloc(stop + 1);
|
||||
strncpy(parent, key, stop);
|
||||
*(parent + stop) = '\0';
|
||||
|
||||
void *existing_parent = g_hash_table_lookup(FlatTree, parent);
|
||||
if (existing_parent == NULL) {
|
||||
void *existing_parent2_key;
|
||||
void *existing_parent2_val;
|
||||
int found = g_hash_table_lookup_extended(BufferTable, parent, &existing_parent2_key,
|
||||
&existing_parent2_val);
|
||||
if (!found) {
|
||||
g_hash_table_insert(BufferTable, parent, value);
|
||||
} else {
|
||||
g_hash_table_replace(BufferTable, parent, (gpointer) ((long) existing_parent2_val + size));
|
||||
free(existing_parent2_key);
|
||||
}
|
||||
} else {
|
||||
g_hash_table_replace(FlatTree, parent, (gpointer) ((long) existing_parent + size));
|
||||
}
|
||||
|
||||
g_hash_table_iter_remove(&iter);
|
||||
|
||||
count += 1;
|
||||
}
|
||||
}
|
||||
|
||||
g_hash_table_iter_init(&iter, BufferTable);
|
||||
while (g_hash_table_iter_next(&iter, &key, &value)) {
|
||||
g_hash_table_insert(FlatTree, key, value);
|
||||
g_hash_table_iter_remove(&iter);
|
||||
}
|
||||
|
||||
int size = g_hash_table_size(FlatTree);
|
||||
|
||||
LOG_DEBUGF("stats.c", "Merge up iteration (%d merged, %d in tree)", count, size)
|
||||
return count;
|
||||
}
|
||||
|
||||
/**
|
||||
* Assumes out is at at least PATH_MAX *4
|
||||
*/
|
||||
void csv_escape(char *dst, const char *str) {
|
||||
|
||||
const char *ptr = str;
|
||||
char *out = dst;
|
||||
|
||||
if (rfind(str, ',') == -1 && rfind(str, '"') == -1) {
|
||||
strcpy(dst, str);
|
||||
return;
|
||||
}
|
||||
|
||||
*out++ = '"';
|
||||
char c;
|
||||
while ((c = *ptr++) != 0) {
|
||||
if (c == '"') {
|
||||
*out++ = '"';
|
||||
*out++ = '"';
|
||||
} else {
|
||||
*out++ = c;
|
||||
}
|
||||
}
|
||||
*out++ = '"';
|
||||
*out = '\0';
|
||||
}
|
||||
|
||||
int open_or_exit(const char *path) {
|
||||
int fd = open(path, O_CREAT | O_WRONLY, S_IRUSR | S_IWUSR);
|
||||
if (fd < 0) {
|
||||
LOG_FATALF("stats.c", "Error while creating file: %s [%d]\n", strerror(errno), errno)
|
||||
}
|
||||
return fd;
|
||||
}
|
||||
|
||||
#define TREEMAP_CSV_HEADER "path,size"
|
||||
#define MIME_AGG_CSV_HEADER "mime,size,count"
|
||||
#define SIZE_AGG_CSV_HEADER "bucket,size,count"
|
||||
#define DATE_AGG_CSV_HEADER "bucket,size,count"
|
||||
|
||||
void write_treemap_csv(double thresh, const char *out_path) {
|
||||
|
||||
void *key;
|
||||
void *value;
|
||||
|
||||
long min_size = (long) (thresh * (double) TotalSize);
|
||||
|
||||
int fd = open_or_exit(out_path);
|
||||
int ret = write(fd, TREEMAP_CSV_HEADER, sizeof(TREEMAP_CSV_HEADER) - 1);
|
||||
if (ret == -1) {
|
||||
LOG_FATALF("stats.c", "Write error: %s", strerror(errno))
|
||||
}
|
||||
|
||||
GHashTableIter iter;
|
||||
g_hash_table_iter_init(&iter, FlatTree);
|
||||
while (g_hash_table_iter_next(&iter, &key, &value)) {
|
||||
long size = (long) value;
|
||||
|
||||
if (size >= min_size) {
|
||||
char path_buf[PATH_MAX * 4];
|
||||
char buf[PATH_MAX * 4 + 16];
|
||||
|
||||
csv_escape(path_buf, key);
|
||||
size_t written = sprintf(buf, "\n%s,%ld", path_buf, (long) value);
|
||||
ret = write(fd, buf, written);
|
||||
if (ret == -1) {
|
||||
LOG_FATALF("stats.c", "Write error: %s", strerror(errno))
|
||||
}
|
||||
}
|
||||
}
|
||||
close(fd);
|
||||
}
|
||||
|
||||
void write_agg_csv_str(const char *out_path, const char *header, GHashTable *table) {
|
||||
void *key;
|
||||
void *value;
|
||||
char buf[4096];
|
||||
|
||||
int fd = open_or_exit(out_path);
|
||||
int ret = write(fd, header, strlen(header));
|
||||
if (ret == -1) {
|
||||
LOG_FATALF("stats.c", "Write error: %s", strerror(errno))
|
||||
}
|
||||
|
||||
GHashTableIter iter;
|
||||
g_hash_table_iter_init(&iter, table);
|
||||
while (g_hash_table_iter_next(&iter, &key, &value)) {
|
||||
agg_t *agg = value;
|
||||
|
||||
size_t written = sprintf(buf, "\n%s,%ld,%ld", (const char*)key, agg->size, agg->count);
|
||||
ret = write(fd, buf, written);
|
||||
if (ret == -1) {
|
||||
LOG_FATALF("stats.c", "Write error: %s", strerror(errno))
|
||||
}
|
||||
}
|
||||
|
||||
close(fd);
|
||||
}
|
||||
|
||||
void write_agg_csv_long(const char *out_path, const char *header, GHashTable *table) {
|
||||
void *key;
|
||||
void *value;
|
||||
char buf[4096];
|
||||
|
||||
int fd = open_or_exit(out_path);
|
||||
int ret = write(fd, header, strlen(header));
|
||||
if (ret == -1) {
|
||||
LOG_FATALF("stats.c", "Write error: %s", strerror(errno))
|
||||
}
|
||||
|
||||
GHashTableIter iter;
|
||||
g_hash_table_iter_init(&iter, table);
|
||||
while (g_hash_table_iter_next(&iter, &key, &value)) {
|
||||
agg_t *agg = value;
|
||||
size_t written = sprintf(buf, "\n%ld,%ld,%ld", (long)key, agg->size, agg->count);
|
||||
ret = write(fd, buf, written);
|
||||
if (ret == -1) {
|
||||
LOG_FATALF("stats.c", "Write error: %s", strerror(errno))
|
||||
}
|
||||
}
|
||||
|
||||
close(fd);
|
||||
}
|
||||
|
||||
int generate_stats(index_t *index, const double threshold, const char *out_prefix) {
|
||||
|
||||
FlatTree = g_hash_table_new_full(g_str_hash, g_str_equal, free, NULL);
|
||||
BufferTable = g_hash_table_new(g_str_hash, g_str_equal);
|
||||
|
||||
AggMime = g_hash_table_new_full(g_str_hash, g_str_equal, free, free);
|
||||
AggSize = g_hash_table_new_full(g_direct_hash, g_direct_equal, NULL, free);
|
||||
AggDate = g_hash_table_new_full(g_direct_hash, g_direct_equal, NULL, free);
|
||||
|
||||
LOG_INFO("stats.c", "Generating stats...")
|
||||
|
||||
read_index_into_tables(index);
|
||||
|
||||
LOG_DEBUG("stats.c", "Read index into tables")
|
||||
LOG_DEBUGF("stats.c", "Total size is %ld", TotalSize)
|
||||
LOG_DEBUGF("stats.c", "Document count is %ld", DocumentCount)
|
||||
LOG_DEBUGF("stats.c", "Merging small directories upwards with a threshold of %f%%", threshold * 100)
|
||||
|
||||
while (merge_up(threshold) > 100) {}
|
||||
|
||||
char tmp[PATH_MAX];
|
||||
|
||||
strncpy(tmp, out_prefix, sizeof(tmp));
|
||||
strcat(tmp, "treemap.csv");
|
||||
write_treemap_csv(threshold, tmp);
|
||||
|
||||
strncpy(tmp, out_prefix, sizeof(tmp));
|
||||
strcat(tmp, "mime_agg.csv");
|
||||
write_agg_csv_str(tmp, MIME_AGG_CSV_HEADER, AggMime);
|
||||
|
||||
strncpy(tmp, out_prefix, sizeof(tmp));
|
||||
strcat(tmp, "size_agg.csv");
|
||||
write_agg_csv_long(tmp, SIZE_AGG_CSV_HEADER, AggSize);
|
||||
|
||||
strncpy(tmp, out_prefix, sizeof(tmp));
|
||||
strcat(tmp, "date_agg.csv");
|
||||
write_agg_csv_long(tmp, DATE_AGG_CSV_HEADER, AggDate);
|
||||
|
||||
g_hash_table_remove_all(FlatTree);
|
||||
g_hash_table_destroy(FlatTree);
|
||||
g_hash_table_destroy(BufferTable);
|
||||
|
||||
g_hash_table_remove_all(AggMime);
|
||||
g_hash_table_destroy(AggMime);
|
||||
g_hash_table_remove_all(AggSize);
|
||||
g_hash_table_destroy(AggSize);
|
||||
g_hash_table_remove_all(AggDate);
|
||||
g_hash_table_destroy(AggDate);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
6
src/stats.h
Normal file
6
src/stats.h
Normal file
@@ -0,0 +1,6 @@
|
||||
#ifndef SIST2_STATS_H
|
||||
#define SIST2_STATS_H
|
||||
|
||||
int generate_stats(index_t *index, double threshold, const char* out_prefix);
|
||||
|
||||
#endif
|
||||
28
src/tpool.c
28
src/tpool.c
@@ -3,6 +3,8 @@
|
||||
#include "sist.h"
|
||||
#include <pthread.h>
|
||||
|
||||
#define MAX_QUEUE_SIZE 1000000
|
||||
|
||||
typedef void (*thread_func_t)(void *arg);
|
||||
|
||||
typedef struct tpool_work {
|
||||
@@ -26,6 +28,7 @@ typedef struct tpool {
|
||||
int work_cnt;
|
||||
int done_cnt;
|
||||
|
||||
int free_arg;
|
||||
int stop;
|
||||
|
||||
void (*cleanup_func)();
|
||||
@@ -49,6 +52,13 @@ static tpool_work_t *tpool_work_create(thread_func_t func, void *arg) {
|
||||
return work;
|
||||
}
|
||||
|
||||
void tpool_dump_debug_info(tpool_t *pool) {
|
||||
LOG_DEBUGF("tpool.c", "pool->thread_cnt = %d", pool->thread_cnt)
|
||||
LOG_DEBUGF("tpool.c", "pool->work_cnt = %d", pool->work_cnt)
|
||||
LOG_DEBUGF("tpool.c", "pool->done_cnt = %d", pool->done_cnt)
|
||||
LOG_DEBUGF("tpool.c", "pool->stop = %d", pool->stop)
|
||||
}
|
||||
|
||||
/**
|
||||
* Pop work object from thread pool
|
||||
*/
|
||||
@@ -79,6 +89,10 @@ int tpool_add_work(tpool_t *pool, thread_func_t func, void *arg) {
|
||||
return 0;
|
||||
}
|
||||
|
||||
while ((pool->work_cnt - pool->done_cnt) >= MAX_QUEUE_SIZE) {
|
||||
usleep(10000);
|
||||
}
|
||||
|
||||
pthread_mutex_lock(&(pool->work_mutex));
|
||||
if (pool->work_head == NULL) {
|
||||
pool->work_head = work;
|
||||
@@ -121,7 +135,9 @@ static void *tpool_worker(void *arg) {
|
||||
}
|
||||
|
||||
work->func(work->arg);
|
||||
free(work->arg);
|
||||
if (pool->free_arg) {
|
||||
free(work->arg);
|
||||
}
|
||||
free(work);
|
||||
}
|
||||
|
||||
@@ -138,8 +154,11 @@ static void *tpool_worker(void *arg) {
|
||||
pthread_mutex_unlock(&(pool->work_mutex));
|
||||
}
|
||||
|
||||
LOG_INFO("tpool.c", "Executing cleaup function")
|
||||
pool->cleanup_func();
|
||||
if (pool->cleanup_func != NULL) {
|
||||
LOG_INFO("tpool.c", "Executing cleanup function")
|
||||
pool->cleanup_func();
|
||||
LOG_DEBUG("tpool.c", "Done executing cleanup function")
|
||||
}
|
||||
|
||||
pthread_cond_signal(&(pool->working_cond));
|
||||
pthread_mutex_unlock(&(pool->work_mutex));
|
||||
@@ -207,13 +226,14 @@ void tpool_destroy(tpool_t *pool) {
|
||||
* Create a thread pool
|
||||
* @param thread_cnt Worker threads count
|
||||
*/
|
||||
tpool_t *tpool_create(size_t thread_cnt, void cleanup_func()) {
|
||||
tpool_t *tpool_create(size_t thread_cnt, void cleanup_func(), int free_arg) {
|
||||
|
||||
tpool_t *pool = malloc(sizeof(tpool_t));
|
||||
pool->thread_cnt = thread_cnt;
|
||||
pool->work_cnt = 0;
|
||||
pool->done_cnt = 0;
|
||||
pool->stop = 0;
|
||||
pool->free_arg = free_arg;
|
||||
pool->cleanup_func = cleanup_func;
|
||||
pool->threads = calloc(sizeof(pthread_t), thread_cnt);
|
||||
|
||||
|
||||
@@ -8,12 +8,14 @@ typedef struct tpool tpool_t;
|
||||
|
||||
typedef void (*thread_func_t)(void *arg);
|
||||
|
||||
tpool_t *tpool_create(size_t num, void (*cleanup_func)());
|
||||
tpool_t *tpool_create(size_t num, void (*cleanup_func)(), int free_arg);
|
||||
void tpool_start(tpool_t *pool);
|
||||
void tpool_destroy(tpool_t *tm);
|
||||
void tpool_destroy(tpool_t *pool);
|
||||
|
||||
int tpool_add_work(tpool_t *pool, thread_func_t func, void *arg);
|
||||
void tpool_wait(tpool_t *tm);
|
||||
void tpool_wait(tpool_t *pool);
|
||||
|
||||
void tpool_dump_debug_info(tpool_t *pool);
|
||||
|
||||
#endif
|
||||
|
||||
|
||||
@@ -6,11 +6,11 @@
|
||||
#define INDEX_VERSION_EXTERNAL "_external_v1"
|
||||
|
||||
typedef struct index_descriptor {
|
||||
char uuid[UUID_STR_LEN];
|
||||
char id[MD5_STR_LENGTH];
|
||||
char version[64];
|
||||
long timestamp;
|
||||
char root[PATH_MAX];
|
||||
char rewrite_url[8196];
|
||||
char rewrite_url[8192];
|
||||
short root_len;
|
||||
char name[1024];
|
||||
char type[64];
|
||||
@@ -19,6 +19,8 @@ typedef struct index_descriptor {
|
||||
typedef struct index_t {
|
||||
struct index_descriptor desc;
|
||||
struct store_t *store;
|
||||
struct store_t *tag_store;
|
||||
struct store_t *meta_store;
|
||||
char path[PATH_MAX];
|
||||
} index_t;
|
||||
|
||||
|
||||
140
src/util.c
140
src/util.c
@@ -2,7 +2,6 @@
|
||||
#include "src/ctx.h"
|
||||
|
||||
#include <wordexp.h>
|
||||
#include <glib.h>
|
||||
|
||||
#define PBSTR "========================================"
|
||||
#define PBWIDTH 40
|
||||
@@ -26,10 +25,11 @@ dyn_buffer_t url_escape(char *str) {
|
||||
}
|
||||
|
||||
char *abspath(const char *path) {
|
||||
wordexp_t w;
|
||||
wordexp(path, &w, 0);
|
||||
|
||||
char *abs = realpath(w.we_wordv[0], NULL);
|
||||
char *expanded = expandpath(path);
|
||||
|
||||
char *abs = realpath(expanded, NULL);
|
||||
free(expanded);
|
||||
if (abs == NULL) {
|
||||
return NULL;
|
||||
}
|
||||
@@ -38,16 +38,46 @@ char *abspath(const char *path) {
|
||||
strcat(abs, "/");
|
||||
}
|
||||
|
||||
wordfree(&w);
|
||||
return abs;
|
||||
}
|
||||
|
||||
char *expandpath(const char *path) {
|
||||
wordexp_t w;
|
||||
wordexp(path, &w, 0);
|
||||
void shell_escape(char *dst, const char *src) {
|
||||
const char *ptr = src;
|
||||
char *out = dst;
|
||||
while ((*ptr)) {
|
||||
char c = *ptr++;
|
||||
|
||||
char *expanded = malloc(strlen(w.we_wordv[0]) + 2);
|
||||
strcpy(expanded, w.we_wordv[0]);
|
||||
if (c == '&' || c == '\n' || c == '|' || c == ';' || c == '<' ||
|
||||
c == '>' || c == '(' || c == ')' || c == '{' || c == '}') {
|
||||
*out++ = '\\';
|
||||
}
|
||||
*out++ = c;
|
||||
}
|
||||
*out = 0;
|
||||
}
|
||||
|
||||
char *expandpath(const char *path) {
|
||||
char tmp[PATH_MAX * 2];
|
||||
|
||||
shell_escape(tmp, path);
|
||||
|
||||
wordexp_t w;
|
||||
wordexp(tmp, &w, 0);
|
||||
|
||||
if (w.we_wordv == NULL) {
|
||||
return NULL;
|
||||
}
|
||||
|
||||
*tmp = '\0';
|
||||
for (int i = 0; i < w.we_wordc; i++) {
|
||||
strcat(tmp, w.we_wordv[i]);
|
||||
if (i != w.we_wordc - 1) {
|
||||
strcat(tmp, " ");
|
||||
}
|
||||
}
|
||||
|
||||
char *expanded = malloc(strlen(tmp) + 2);
|
||||
strcpy(expanded, tmp);
|
||||
strcat(expanded, "/");
|
||||
|
||||
wordfree(&w);
|
||||
@@ -94,7 +124,7 @@ void progress_bar_print(double percentage, size_t tn_size, size_t index_size) {
|
||||
}
|
||||
|
||||
GHashTable *incremental_get_table() {
|
||||
GHashTable *file_table = g_hash_table_new(g_direct_hash, g_direct_equal);
|
||||
GHashTable *file_table = g_hash_table_new_full(g_str_hash, g_str_equal, free, NULL);
|
||||
return file_table;
|
||||
}
|
||||
|
||||
@@ -123,4 +153,92 @@ const char *find_file_in_paths(const char *paths[], const char *filename) {
|
||||
return NULL;
|
||||
}
|
||||
|
||||
#define ESCAPE_CHAR ']'
|
||||
|
||||
void str_escape(char *dst, const char *str) {
|
||||
const size_t len = strlen(str);
|
||||
|
||||
char buf[16384];
|
||||
memset(buf + len, 0, 8);
|
||||
strcpy(buf, str);
|
||||
|
||||
char *cur = dst;
|
||||
const char *ptr = buf;
|
||||
const char *oldPtr = ptr;
|
||||
|
||||
utf8_int32_t c;
|
||||
char tmp[16];
|
||||
|
||||
do {
|
||||
ptr = (char *) utf8codepoint(ptr, &c);
|
||||
*(int *) tmp = 0x00000000;
|
||||
size_t code_len = (ptr - oldPtr);
|
||||
memcpy(tmp, oldPtr, code_len);
|
||||
oldPtr = ptr;
|
||||
|
||||
if (!utf8_validchr2(tmp)) {
|
||||
for (int i = 0; i < code_len; i++) {
|
||||
if (tmp[i] == 0) {
|
||||
break;
|
||||
}
|
||||
|
||||
cur += sprintf(cur, "%c%02X", ESCAPE_CHAR, (unsigned char) tmp[i]);
|
||||
}
|
||||
continue;
|
||||
}
|
||||
|
||||
if (c == ESCAPE_CHAR) {
|
||||
*cur++ = ESCAPE_CHAR;
|
||||
*cur++ = ESCAPE_CHAR;
|
||||
continue;
|
||||
}
|
||||
|
||||
if (((utf8_int32_t) 0xffffff80 & c) == 0) {
|
||||
*(cur++) = (char) c;
|
||||
} else if (((utf8_int32_t) 0xfffff800 & c) == 0) {
|
||||
*(cur++) = 0xc0 | (char) (c >> 6);
|
||||
*(cur++) = 0x80 | (char) (c & 0x3f);
|
||||
} else if (((utf8_int32_t) 0xffff0000 & c) == 0) {
|
||||
*(cur++) = 0xe0 | (char) (c >> 12);
|
||||
*(cur++) = 0x80 | (char) ((c >> 6) & 0x3f);
|
||||
*(cur++) = 0x80 | (char) (c & 0x3f);
|
||||
} else {
|
||||
*(cur++) = 0xf0 | (char) (c >> 18);
|
||||
*(cur++) = 0x80 | (char) ((c >> 12) & 0x3f);
|
||||
*(cur++) = 0x80 | (char) ((c >> 6) & 0x3f);
|
||||
*(cur++) = 0x80 | (char) (c & 0x3f);
|
||||
}
|
||||
|
||||
} while (*ptr != '\0');
|
||||
|
||||
*cur = '\0';
|
||||
}
|
||||
|
||||
void str_unescape(char *dst, const char *str) {
|
||||
char *cur = dst;
|
||||
const char *ptr = str;
|
||||
|
||||
char tmp[3];
|
||||
tmp[2] = '\0';
|
||||
|
||||
while (*ptr != 0) {
|
||||
char c = *ptr++;
|
||||
|
||||
if (c == ESCAPE_CHAR) {
|
||||
char next = *ptr;
|
||||
|
||||
if (next == ESCAPE_CHAR) {
|
||||
*cur++ = (char) c;
|
||||
ptr += 1;
|
||||
} else {
|
||||
tmp[0] = *(ptr);
|
||||
tmp[1] = *(ptr + 1);
|
||||
*cur++ = (char) strtol(tmp, NULL, 16);
|
||||
ptr += 2;
|
||||
}
|
||||
} else {
|
||||
*cur++ = c;
|
||||
}
|
||||
}
|
||||
*cur = '\0';
|
||||
}
|
||||
|
||||
99
src/util.h
99
src/util.h
@@ -10,6 +10,8 @@
|
||||
#include "third-party/utf8.h/utf8.h"
|
||||
#include "libscan/scan.h"
|
||||
|
||||
#define MD5_STR_LENGTH 33
|
||||
|
||||
|
||||
char *abspath(const char *path);
|
||||
|
||||
@@ -21,26 +23,103 @@ void progress_bar_print(double percentage, size_t tn_size, size_t index_size);
|
||||
|
||||
GHashTable *incremental_get_table();
|
||||
|
||||
__always_inline
|
||||
static void incremental_put(GHashTable *table, unsigned long inode_no, int mtime) {
|
||||
g_hash_table_insert(table, (gpointer) inode_no, GINT_TO_POINTER(mtime));
|
||||
|
||||
const char *find_file_in_paths(const char **paths, const char *filename);
|
||||
|
||||
|
||||
void str_escape(char *dst, const char *str);
|
||||
|
||||
void str_unescape(char *dst, const char *str);
|
||||
|
||||
static int hex2buf(const char *str, int len, unsigned char *bytes) {
|
||||
static const uint8_t hashmap[] = {
|
||||
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
|
||||
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
|
||||
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
|
||||
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
|
||||
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
|
||||
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
|
||||
0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07,
|
||||
0x08, 0x09, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
|
||||
0x00, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e, 0x0f, 0x00,
|
||||
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
|
||||
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
|
||||
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
|
||||
0x00, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e, 0x0f, 0x00,
|
||||
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
|
||||
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
|
||||
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
|
||||
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
|
||||
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
|
||||
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
|
||||
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
|
||||
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
|
||||
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
|
||||
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
|
||||
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
|
||||
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
|
||||
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
|
||||
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
|
||||
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
|
||||
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
|
||||
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
|
||||
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
|
||||
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00
|
||||
};
|
||||
|
||||
for (int pos = 0; pos < len; pos += 2) {
|
||||
int idx0 = (uint8_t) str[pos + 0];
|
||||
int idx1 = (uint8_t) str[pos + 1];
|
||||
bytes[pos / 2] = (uint8_t) (hashmap[idx0] << 4) | hashmap[idx1];
|
||||
}
|
||||
return TRUE;
|
||||
}
|
||||
|
||||
__always_inline
|
||||
static int incremental_get(GHashTable *table, unsigned long inode_no) {
|
||||
static void buf2hex(const unsigned char *buf, size_t buflen, char *hex_string) {
|
||||
static const char hexdig[] = "0123456789abcdef";
|
||||
|
||||
const unsigned char *p;
|
||||
size_t i;
|
||||
|
||||
char *s = hex_string;
|
||||
for (i = 0, p = buf; i < buflen; i++, p++) {
|
||||
*s++ = hexdig[(*p >> 4) & 0x0f];
|
||||
*s++ = hexdig[*p & 0x0f];
|
||||
}
|
||||
*s = '\0';
|
||||
}
|
||||
|
||||
|
||||
__always_inline
|
||||
static int md5_digest_is_null(const unsigned char digest[MD5_DIGEST_LENGTH]) {
|
||||
return (*(int64_t *) digest) == 0 && (*((int64_t *) digest + 1)) == 0;
|
||||
}
|
||||
|
||||
|
||||
__always_inline
|
||||
static void incremental_put(GHashTable *table, unsigned char path_md5[MD5_DIGEST_LENGTH], int mtime) {
|
||||
char *ptr = malloc(MD5_STR_LENGTH);
|
||||
buf2hex(path_md5, MD5_DIGEST_LENGTH, ptr);
|
||||
g_hash_table_insert(table, ptr, GINT_TO_POINTER(mtime));
|
||||
}
|
||||
|
||||
__always_inline
|
||||
static int incremental_get(GHashTable *table, unsigned char path_md5[MD5_DIGEST_LENGTH]) {
|
||||
if (table != NULL) {
|
||||
return GPOINTER_TO_INT(g_hash_table_lookup(table, (gpointer) inode_no));
|
||||
char md5_str[MD5_STR_LENGTH];
|
||||
buf2hex(path_md5, MD5_DIGEST_LENGTH, md5_str);
|
||||
return GPOINTER_TO_INT(g_hash_table_lookup(table, md5_str));
|
||||
} else {
|
||||
return 0;
|
||||
}
|
||||
}
|
||||
|
||||
__always_inline
|
||||
static int incremental_mark_file_for_copy(GHashTable *table, unsigned long inode_no) {
|
||||
return g_hash_table_insert(table, GINT_TO_POINTER(inode_no), GINT_TO_POINTER(1));
|
||||
static int incremental_mark_file_for_copy(GHashTable *table, unsigned char path_md5[MD5_DIGEST_LENGTH]) {
|
||||
char *ptr = malloc(MD5_STR_LENGTH);
|
||||
buf2hex(path_md5, MD5_DIGEST_LENGTH, ptr);
|
||||
return g_hash_table_insert(table, ptr, GINT_TO_POINTER(1));
|
||||
}
|
||||
|
||||
|
||||
const char *find_file_in_paths(const char **paths, const char *filename);
|
||||
|
||||
#endif
|
||||
|
||||
503
src/web/serve.c
503
src/web/serve.c
@@ -8,20 +8,8 @@
|
||||
|
||||
#include <src/ctx.h>
|
||||
|
||||
#include <mongoose.h>
|
||||
|
||||
#define CHUNK_SIZE 1024 * 1024 * 10
|
||||
|
||||
|
||||
static int has_prefix(const struct mg_str *str, const struct mg_str *prefix) {
|
||||
return str->len > prefix->len && memcmp(str->p, prefix->p, prefix->len) == 0;
|
||||
}
|
||||
|
||||
static int is_equal(const struct mg_str *s1, const struct mg_str *s2) {
|
||||
return s1->len == s2->len && memcmp(s1->p, s2->p, s2->len) == 0;
|
||||
}
|
||||
|
||||
static void send_response_line(struct mg_connection *nc, int status_code, int length, char *extra_headers) {
|
||||
static void send_response_line(struct mg_connection *nc, int status_code, size_t length, char *extra_headers) {
|
||||
mg_printf(
|
||||
nc,
|
||||
"HTTP/1.1 %d %s\r\n"
|
||||
@@ -38,7 +26,7 @@ static void send_response_line(struct mg_connection *nc, int status_code, int le
|
||||
|
||||
index_t *get_index_by_id(const char *index_id) {
|
||||
for (int i = WebCtx.index_count; i >= 0; i--) {
|
||||
if (strcmp(index_id, WebCtx.indices[i].desc.uuid) == 0) {
|
||||
if (strncmp(index_id, WebCtx.indices[i].desc.id, MD5_STR_LENGTH) == 0) {
|
||||
return &WebCtx.indices[i];
|
||||
}
|
||||
}
|
||||
@@ -53,39 +41,91 @@ store_t *get_store(const char *index_id) {
|
||||
return NULL;
|
||||
}
|
||||
|
||||
store_t *get_tag_store(const char *index_id) {
|
||||
index_t *idx = get_index_by_id(index_id);
|
||||
if (idx != NULL) {
|
||||
return idx->tag_store;
|
||||
}
|
||||
return NULL;
|
||||
}
|
||||
|
||||
void search_index(struct mg_connection *nc) {
|
||||
send_response_line(nc, 200, sizeof(search_html), "Content-Type: text/html");
|
||||
mg_send(nc, search_html, sizeof(search_html));
|
||||
nc->flags |= MG_F_SEND_AND_CLOSE;
|
||||
}
|
||||
|
||||
void javascript(struct mg_connection *nc) {
|
||||
void stats(struct mg_connection *nc) {
|
||||
send_response_line(nc, 200, sizeof(stats_html), "Content-Type: text/html");
|
||||
mg_send(nc, stats_html, sizeof(stats_html));
|
||||
}
|
||||
|
||||
void stats_files(struct mg_connection *nc, struct mg_http_message *hm) {
|
||||
|
||||
if (hm->uri.len != MD5_STR_LENGTH + 4) {
|
||||
mg_http_reply(nc, 404, "", "");
|
||||
return;
|
||||
}
|
||||
|
||||
char arg_md5[MD5_STR_LENGTH];
|
||||
memcpy(arg_md5, hm->uri.ptr + 3, MD5_STR_LENGTH);
|
||||
*(arg_md5 + MD5_STR_LENGTH - 1) = '\0';
|
||||
|
||||
index_t *index = get_index_by_id(arg_md5);
|
||||
if (index == NULL) {
|
||||
mg_http_reply(nc, 404, "", "");
|
||||
return;
|
||||
}
|
||||
|
||||
const char *file;
|
||||
switch (atoi(hm->uri.ptr + 3 + MD5_STR_LENGTH)) {
|
||||
case 1:
|
||||
file = "treemap.csv";
|
||||
break;
|
||||
case 2:
|
||||
file = "mime_agg.csv";
|
||||
break;
|
||||
case 3:
|
||||
file = "size_agg.csv";
|
||||
break;
|
||||
case 4:
|
||||
file = "date_agg.csv";
|
||||
break;
|
||||
default:
|
||||
return;
|
||||
}
|
||||
|
||||
char disposition[8192];
|
||||
snprintf(disposition, sizeof(disposition), "Content-Disposition: inline; filename=\"%s\"\r\n", file);
|
||||
|
||||
char full_path[PATH_MAX];
|
||||
strcpy(full_path, index->path);
|
||||
strcat(full_path, file);
|
||||
|
||||
mg_http_serve_file(nc, hm, full_path, "text/csv", disposition);
|
||||
}
|
||||
|
||||
void javascript_lib(struct mg_connection *nc) {
|
||||
send_response_line(nc, 200, sizeof(bundle_js), "Content-Type: application/javascript");
|
||||
mg_send(nc, bundle_js, sizeof(bundle_js));
|
||||
nc->flags |= MG_F_SEND_AND_CLOSE;
|
||||
}
|
||||
|
||||
int client_requested_dark_theme(struct http_message *hm) {
|
||||
struct mg_str *cookie_header = mg_get_http_header(hm, "cookie");
|
||||
void javascript_search(struct mg_connection *nc) {
|
||||
send_response_line(nc, 200, sizeof(search_js), "Content-Type: application/javascript");
|
||||
mg_send(nc, search_js, sizeof(search_js));
|
||||
}
|
||||
|
||||
int client_requested_dark_theme(struct mg_http_message *hm) {
|
||||
struct mg_str *cookie_header = mg_http_get_header(hm, "cookie");
|
||||
if (cookie_header == NULL) {
|
||||
return FALSE;
|
||||
}
|
||||
|
||||
char buf[4096];
|
||||
char *sist_cookie = buf;
|
||||
if (mg_http_parse_header2(cookie_header, "sist", &sist_cookie, sizeof(buf)) == 0) {
|
||||
return FALSE;
|
||||
}
|
||||
struct mg_str sist_cookie = mg_http_get_header_var(*cookie_header, mg_str_n("sist", 4));
|
||||
|
||||
int ret = strcmp(sist_cookie, "dark") == 0;
|
||||
if (sist_cookie != buf) {
|
||||
free(sist_cookie);
|
||||
}
|
||||
|
||||
return ret;
|
||||
return mg_strcmp(sist_cookie, mg_str_n("dark", 4)) == 0;
|
||||
}
|
||||
|
||||
void style(struct mg_connection *nc, struct http_message *hm) {
|
||||
void style(struct mg_connection *nc, struct mg_http_message *hm) {
|
||||
|
||||
if (client_requested_dark_theme(hm)) {
|
||||
send_response_line(nc, 200, sizeof(bundle_dark_css), "Content-Type: text/css");
|
||||
@@ -94,11 +134,9 @@ void style(struct mg_connection *nc, struct http_message *hm) {
|
||||
send_response_line(nc, 200, sizeof(bundle_css), "Content-Type: text/css");
|
||||
mg_send(nc, bundle_css, sizeof(bundle_css));
|
||||
}
|
||||
|
||||
nc->flags |= MG_F_SEND_AND_CLOSE;
|
||||
}
|
||||
|
||||
void img_sprite_skin_flat(struct mg_connection *nc, struct http_message *hm) {
|
||||
void img_sprite_skin_flat(struct mg_connection *nc, struct mg_http_message *hm) {
|
||||
if (client_requested_dark_theme(hm)) {
|
||||
send_response_line(nc, 200, sizeof(sprite_skin_flat_dark_png), "Content-Type: image/png");
|
||||
mg_send(nc, sprite_skin_flat_dark_png, sizeof(sprite_skin_flat_dark_png));
|
||||
@@ -106,110 +144,111 @@ void img_sprite_skin_flat(struct mg_connection *nc, struct http_message *hm) {
|
||||
send_response_line(nc, 200, sizeof(sprite_skin_flat_png), "Content-Type: image/png");
|
||||
mg_send(nc, sprite_skin_flat_png, sizeof(sprite_skin_flat_png));
|
||||
}
|
||||
|
||||
nc->flags |= MG_F_SEND_AND_CLOSE;
|
||||
}
|
||||
|
||||
void thumbnail(struct mg_connection *nc, struct http_message *hm, struct mg_str *path) {
|
||||
void thumbnail(struct mg_connection *nc, struct mg_http_message *hm) {
|
||||
|
||||
if (path->len != UUID_STR_LEN * 2 + 2) {
|
||||
LOG_DEBUGF("serve.c", "Invalid thumbnail path: %.*s", (int) path->len, path->p)
|
||||
nc->flags |= MG_F_SEND_AND_CLOSE;
|
||||
if (hm->uri.len != 68) {
|
||||
LOG_DEBUGF("serve.c", "Invalid thumbnail path: %.*s", (int) hm->uri.len, hm->uri.ptr)
|
||||
mg_http_reply(nc, 404, "", "Not found");
|
||||
return;
|
||||
}
|
||||
|
||||
char arg_uuid[UUID_STR_LEN];
|
||||
char arg_index[UUID_STR_LEN];
|
||||
char arg_file_md5[MD5_STR_LENGTH];
|
||||
char arg_index[MD5_STR_LENGTH];
|
||||
|
||||
memcpy(arg_index, hm->uri.p + 3, UUID_STR_LEN);
|
||||
*(arg_index + UUID_STR_LEN - 1) = '\0';
|
||||
memcpy(arg_uuid, hm->uri.p + 3 + UUID_STR_LEN, UUID_STR_LEN);
|
||||
*(arg_uuid + UUID_STR_LEN - 1) = '\0';
|
||||
memcpy(arg_index, hm->uri.ptr + 3, MD5_STR_LENGTH);
|
||||
*(arg_index + MD5_STR_LENGTH - 1) = '\0';
|
||||
memcpy(arg_file_md5, hm->uri.ptr + 3 + MD5_STR_LENGTH, MD5_STR_LENGTH);
|
||||
*(arg_file_md5 + MD5_STR_LENGTH - 1) = '\0';
|
||||
|
||||
uuid_t uuid;
|
||||
int ret = uuid_parse(arg_uuid, uuid);
|
||||
if (ret != 0) {
|
||||
LOG_DEBUGF("serve.c", "Invalid thumbnail UUID: %s", arg_uuid)
|
||||
nc->flags |= MG_F_SEND_AND_CLOSE;
|
||||
return;
|
||||
}
|
||||
unsigned char md5_buf[MD5_DIGEST_LENGTH];
|
||||
hex2buf(arg_file_md5, MD5_STR_LENGTH - 1, md5_buf);
|
||||
|
||||
store_t *store = get_store(arg_index);
|
||||
if (store == NULL) {
|
||||
LOG_DEBUGF("serve.c", "Could not get store for index: %s", arg_index)
|
||||
nc->flags |= MG_F_SEND_AND_CLOSE;
|
||||
mg_http_reply(nc, 404, "", "Not found");
|
||||
return;
|
||||
}
|
||||
|
||||
size_t data_len = 0;
|
||||
char *data = store_read(store, (char *) uuid, sizeof(uuid_t), &data_len);
|
||||
char *data = store_read(store, (char *) md5_buf, sizeof(md5_buf), &data_len);
|
||||
if (data_len != 0) {
|
||||
send_response_line(nc, 200, data_len, "Content-Type: image/jpeg");
|
||||
mg_send(nc, data, data_len);
|
||||
free(data);
|
||||
}
|
||||
nc->flags |= MG_F_SEND_AND_CLOSE;
|
||||
}
|
||||
|
||||
void search(struct mg_connection *nc, struct http_message *hm) {
|
||||
void search(struct mg_connection *nc, struct mg_http_message *hm) {
|
||||
|
||||
if (hm->body.len == 0) {
|
||||
LOG_DEBUG("serve.c", "Client sent empty body, ignoring request")
|
||||
nc->flags |= MG_F_SEND_AND_CLOSE;
|
||||
mg_http_reply(nc, 500, "", "Invalid request");
|
||||
return;
|
||||
}
|
||||
|
||||
char *body = malloc(hm->body.len + 1);
|
||||
memcpy(body, hm->body.p, hm->body.len);
|
||||
memcpy(body, hm->body.ptr, hm->body.len);
|
||||
*(body + hm->body.len) = '\0';
|
||||
|
||||
char url[4096];
|
||||
snprintf(url, 4096, "%s/sist2/_search", WebCtx.es_url);
|
||||
snprintf(url, 4096, "%s/%s/_search", WebCtx.es_url, WebCtx.es_index);
|
||||
|
||||
nc->user_data = web_post_async(url, body);
|
||||
free(body);
|
||||
nc->fn_data = web_post_async(url, body);
|
||||
}
|
||||
|
||||
int serve_file_from_url(cJSON *json, index_t *idx, struct mg_connection *nc) {
|
||||
void serve_file_from_url(cJSON *json, index_t *idx, struct mg_connection *nc) {
|
||||
|
||||
const char *path = cJSON_GetObjectItem(json, "path")->valuestring;
|
||||
const char *name = cJSON_GetObjectItem(json, "name")->valuestring;
|
||||
|
||||
char name_unescaped[PATH_MAX * 3];
|
||||
str_unescape(name_unescaped, name);
|
||||
|
||||
char path_unescaped[PATH_MAX * 3];
|
||||
str_unescape(path_unescaped, path);
|
||||
|
||||
const char *ext = cJSON_GetObjectItem(json, "extension")->valuestring;
|
||||
|
||||
char url[8196];
|
||||
char url[8192];
|
||||
snprintf(url, sizeof(url),
|
||||
"%s%s/%s%s%s",
|
||||
idx->desc.rewrite_url, path, name, strlen(ext) == 0 ? "" : ".", ext);
|
||||
idx->desc.rewrite_url, path_unescaped, name_unescaped, strlen(ext) == 0 ? "" : ".", ext);
|
||||
|
||||
dyn_buffer_t encoded = url_escape(url);
|
||||
mg_http_send_redirect(
|
||||
nc, 308,
|
||||
(struct mg_str) MG_MK_STR_N(encoded.buf, encoded.cur),
|
||||
(struct mg_str) MG_NULL_STR
|
||||
);
|
||||
dyn_buffer_write_char(&encoded, '\0');
|
||||
|
||||
mg_http_reply(nc, 308, "Location: %s", encoded.buf);
|
||||
dyn_buffer_destroy(&encoded);
|
||||
nc->flags |= MG_F_SEND_AND_CLOSE;
|
||||
}
|
||||
|
||||
void serve_file_from_disk(cJSON *json, index_t *idx, struct mg_connection *nc, struct http_message *hm) {
|
||||
void serve_file_from_disk(cJSON *json, index_t *idx, struct mg_connection *nc, struct mg_http_message *hm) {
|
||||
|
||||
const char *path = cJSON_GetObjectItem(json, "path")->valuestring;
|
||||
const char *name = cJSON_GetObjectItem(json, "name")->valuestring;
|
||||
const char *ext = cJSON_GetObjectItem(json, "extension")->valuestring;
|
||||
const char *mime = cJSON_GetObjectItem(json, "mime")->valuestring;
|
||||
|
||||
char name_unescaped[PATH_MAX * 3];
|
||||
str_unescape(name_unescaped, name);
|
||||
|
||||
char path_unescaped[PATH_MAX * 3];
|
||||
str_unescape(path_unescaped, path);
|
||||
|
||||
char full_path[PATH_MAX];
|
||||
snprintf(full_path, PATH_MAX, "%s%s%s%s%s%s",
|
||||
idx->desc.root, path, strlen(path) == 0 ? "" : "/",
|
||||
name, strlen(ext) == 0 ? "" : ".", ext);
|
||||
idx->desc.root, path_unescaped, strlen(path_unescaped) == 0 ? "" : "/",
|
||||
name_unescaped, strlen(ext) == 0 ? "" : ".", ext);
|
||||
|
||||
LOG_DEBUGF("serve.c", "Serving file from disk: %s", full_path)
|
||||
|
||||
char disposition[8196];
|
||||
snprintf(disposition, sizeof(disposition), "Content-Disposition: inline; filename=\"%s%s%s\"",
|
||||
char disposition[8192];
|
||||
snprintf(disposition, sizeof(disposition), "Content-Disposition: inline; filename=\"%s%s%s\"\r\n",
|
||||
name, strlen(ext) == 0 ? "" : ".", ext);
|
||||
|
||||
mg_http_serve_file(nc, hm, full_path, mg_mk_str(mime), mg_mk_str(disposition));
|
||||
mg_http_serve_file(nc, hm, full_path, mime, disposition);
|
||||
}
|
||||
|
||||
void index_info(struct mg_connection *nc) {
|
||||
@@ -222,7 +261,7 @@ void index_info(struct mg_connection *nc) {
|
||||
cJSON *idx_json = cJSON_CreateObject();
|
||||
cJSON_AddStringToObject(idx_json, "name", idx->desc.name);
|
||||
cJSON_AddStringToObject(idx_json, "version", idx->desc.version);
|
||||
cJSON_AddStringToObject(idx_json, "id", idx->desc.uuid);
|
||||
cJSON_AddStringToObject(idx_json, "id", idx->desc.id);
|
||||
cJSON_AddNumberToObject(idx_json, "timestamp", (double) idx->desc.timestamp);
|
||||
cJSON_AddItemToArray(arr, idx_json);
|
||||
}
|
||||
@@ -233,37 +272,35 @@ void index_info(struct mg_connection *nc) {
|
||||
mg_send(nc, json_str, strlen(json_str));
|
||||
free(json_str);
|
||||
cJSON_Delete(json);
|
||||
|
||||
nc->flags |= MG_F_SEND_AND_CLOSE;
|
||||
}
|
||||
|
||||
|
||||
void document_info(struct mg_connection *nc, struct http_message *hm, struct mg_str *path) {
|
||||
void document_info(struct mg_connection *nc, struct mg_http_message *hm) {
|
||||
|
||||
if (path->len != UUID_STR_LEN + 2) {
|
||||
LOG_DEBUGF("serve.c", "Invalid document_info path: %.*s", (int) path->len, path->p)
|
||||
nc->flags |= MG_F_SEND_AND_CLOSE;
|
||||
if (hm->uri.len != MD5_STR_LENGTH + 2) {
|
||||
LOG_DEBUGF("serve.c", "Invalid document_info path: %.*s", (int) hm->uri.len, hm->uri.ptr)
|
||||
mg_http_reply(nc, 404, "", "Not found");
|
||||
return;
|
||||
}
|
||||
|
||||
char arg_uuid[UUID_STR_LEN];
|
||||
memcpy(arg_uuid, hm->uri.p + 3, UUID_STR_LEN);
|
||||
*(arg_uuid + UUID_STR_LEN - 1) = '\0';
|
||||
char arg_md5[MD5_STR_LENGTH];
|
||||
memcpy(arg_md5, hm->uri.ptr + 3, MD5_STR_LENGTH);
|
||||
*(arg_md5 + MD5_STR_LENGTH - 1) = '\0';
|
||||
|
||||
cJSON *doc = elastic_get_document(arg_uuid);
|
||||
cJSON *doc = elastic_get_document(arg_md5);
|
||||
cJSON *source = cJSON_GetObjectItem(doc, "_source");
|
||||
|
||||
cJSON *index_id = cJSON_GetObjectItem(source, "index");
|
||||
if (index_id == NULL) {
|
||||
cJSON_Delete(doc);
|
||||
nc->flags |= MG_F_SEND_AND_CLOSE;
|
||||
mg_http_reply(nc, 404, "", "Not found");
|
||||
return;
|
||||
}
|
||||
|
||||
index_t *idx = get_index_by_id(index_id->valuestring);
|
||||
if (idx == NULL) {
|
||||
cJSON_Delete(doc);
|
||||
nc->flags |= MG_F_SEND_AND_CLOSE;
|
||||
mg_http_reply(nc, 404, "", "Not found");
|
||||
return;
|
||||
}
|
||||
|
||||
@@ -272,23 +309,21 @@ void document_info(struct mg_connection *nc, struct http_message *hm, struct mg_
|
||||
mg_send(nc, json_str, (int) strlen(json_str));
|
||||
free(json_str);
|
||||
cJSON_Delete(doc);
|
||||
|
||||
nc->flags |= MG_F_SEND_AND_CLOSE;
|
||||
}
|
||||
|
||||
void file(struct mg_connection *nc, struct http_message *hm, struct mg_str *path) {
|
||||
void file(struct mg_connection *nc, struct mg_http_message *hm) {
|
||||
|
||||
if (path->len != UUID_STR_LEN + 2) {
|
||||
LOG_DEBUGF("serve.c", "Invalid file path: %.*s", (int) path->len, path->p)
|
||||
nc->flags |= MG_F_SEND_AND_CLOSE;
|
||||
if (hm->uri.len != MD5_STR_LENGTH + 2) {
|
||||
LOG_DEBUGF("serve.c", "Invalid file path: %.*s", (int) hm->uri.len, hm->uri.ptr)
|
||||
mg_http_reply(nc, 404, "", "Not found");
|
||||
return;
|
||||
}
|
||||
|
||||
char arg_uuid[UUID_STR_LEN];
|
||||
memcpy(arg_uuid, hm->uri.p + 3, UUID_STR_LEN);
|
||||
*(arg_uuid + UUID_STR_LEN - 1) = '\0';
|
||||
char arg_md5[MD5_STR_LENGTH];
|
||||
memcpy(arg_md5, hm->uri.ptr + 3, MD5_STR_LENGTH);
|
||||
*(arg_md5 + MD5_STR_LENGTH - 1) = '\0';
|
||||
|
||||
const char *next = arg_uuid;
|
||||
const char *next = arg_md5;
|
||||
cJSON *doc = NULL;
|
||||
cJSON *index_id = NULL;
|
||||
cJSON *source = NULL;
|
||||
@@ -299,7 +334,7 @@ void file(struct mg_connection *nc, struct http_message *hm, struct mg_str *path
|
||||
index_id = cJSON_GetObjectItem(source, "index");
|
||||
if (index_id == NULL) {
|
||||
cJSON_Delete(doc);
|
||||
nc->flags |= MG_F_SEND_AND_CLOSE;
|
||||
mg_http_reply(nc, 404, "", "Not found");
|
||||
return;
|
||||
}
|
||||
cJSON *parent = cJSON_GetObjectItem(source, "parent");
|
||||
@@ -313,7 +348,7 @@ void file(struct mg_connection *nc, struct http_message *hm, struct mg_str *path
|
||||
|
||||
if (idx == NULL) {
|
||||
cJSON_Delete(doc);
|
||||
nc->flags |= MG_F_SEND_AND_CLOSE;
|
||||
mg_http_reply(nc, 404, "", "Not found");
|
||||
return;
|
||||
}
|
||||
|
||||
@@ -334,75 +369,230 @@ void status(struct mg_connection *nc) {
|
||||
}
|
||||
|
||||
free(status);
|
||||
|
||||
nc->flags |= MG_F_SEND_AND_CLOSE;
|
||||
}
|
||||
|
||||
static void ev_router(struct mg_connection *nc, int ev, void *p) {
|
||||
struct mg_str scheme;
|
||||
struct mg_str user_info;
|
||||
struct mg_str host;
|
||||
unsigned int port;
|
||||
struct mg_str path;
|
||||
struct mg_str query;
|
||||
struct mg_str fragment;
|
||||
typedef struct {
|
||||
char *name;
|
||||
int delete;
|
||||
char *path_md5_str;
|
||||
char *doc_id;
|
||||
} tag_req_t;
|
||||
|
||||
if (ev == MG_EV_HTTP_REQUEST) {
|
||||
struct http_message *hm = (struct http_message *) p;
|
||||
tag_req_t *parse_tag_request(cJSON *json) {
|
||||
|
||||
if (mg_parse_uri(hm->uri, &scheme, &user_info, &host, &port, &path, &query, &fragment) != 0) {
|
||||
nc->flags |= MG_F_SEND_AND_CLOSE;
|
||||
return;
|
||||
if (!cJSON_IsObject(json)) {
|
||||
return NULL;
|
||||
}
|
||||
|
||||
cJSON *arg_name = cJSON_GetObjectItem(json, "name");
|
||||
if (arg_name == NULL || !cJSON_IsString(arg_name)) {
|
||||
return NULL;
|
||||
}
|
||||
|
||||
cJSON *arg_delete = cJSON_GetObjectItem(json, "delete");
|
||||
if (arg_delete == NULL || !cJSON_IsBool(arg_delete)) {
|
||||
return NULL;
|
||||
}
|
||||
|
||||
cJSON *arg_path_md5 = cJSON_GetObjectItem(json, "path_md5");
|
||||
if (arg_path_md5 == NULL || !cJSON_IsString(arg_path_md5) ||
|
||||
strlen(arg_path_md5->valuestring) != MD5_STR_LENGTH - 1) {
|
||||
return NULL;
|
||||
}
|
||||
|
||||
cJSON *arg_doc_id = cJSON_GetObjectItem(json, "doc_id");
|
||||
if (arg_doc_id == NULL || !cJSON_IsString(arg_doc_id)) {
|
||||
return NULL;
|
||||
}
|
||||
|
||||
tag_req_t *req = malloc(sizeof(tag_req_t));
|
||||
req->delete = arg_delete->valueint;
|
||||
req->name = arg_name->valuestring;
|
||||
req->path_md5_str = arg_path_md5->valuestring;
|
||||
req->doc_id = arg_doc_id->valuestring;
|
||||
|
||||
return req;
|
||||
}
|
||||
|
||||
void tag(struct mg_connection *nc, struct mg_http_message *hm) {
|
||||
if (hm->uri.len != MD5_STR_LENGTH + 4) {
|
||||
LOG_DEBUGF("serve.c", "Invalid tag path: %.*s", (int) hm->uri.len, hm->uri.ptr)
|
||||
mg_http_reply(nc, 404, "", "Not found");
|
||||
return;
|
||||
}
|
||||
|
||||
char arg_index[MD5_STR_LENGTH];
|
||||
memcpy(arg_index, hm->uri.ptr + 5, MD5_STR_LENGTH);
|
||||
*(arg_index + MD5_STR_LENGTH - 1) = '\0';
|
||||
|
||||
if (hm->body.len < 2 || hm->method.len != 4 || memcmp(&hm->method, "POST", 4) == 0) {
|
||||
LOG_DEBUG("serve.c", "Invalid tag request")
|
||||
mg_http_reply(nc, 404, "", "Not found");
|
||||
return;
|
||||
}
|
||||
|
||||
store_t *store = get_tag_store(arg_index);
|
||||
if (store == NULL) {
|
||||
LOG_DEBUGF("serve.c", "Could not get tag store for index: %s", arg_index)
|
||||
mg_http_reply(nc, 404, "", "Not found");
|
||||
return;
|
||||
}
|
||||
|
||||
char *body = malloc(hm->body.len + 1);
|
||||
memcpy(body, hm->body.ptr, hm->body.len);
|
||||
*(body + hm->body.len) = '\0';
|
||||
cJSON *json = cJSON_Parse(body);
|
||||
|
||||
tag_req_t *arg_req = parse_tag_request(json);
|
||||
if (arg_req == NULL) {
|
||||
LOG_DEBUGF("serve.c", "Could not parse tag request", arg_index)
|
||||
cJSON_Delete(json);
|
||||
free(body);
|
||||
mg_http_reply(nc, 400, "", "Invalid request");
|
||||
return;
|
||||
}
|
||||
|
||||
cJSON *arr = NULL;
|
||||
|
||||
size_t data_len = 0;
|
||||
const char *data = store_read(store, arg_req->path_md5_str, MD5_STR_LENGTH, &data_len);
|
||||
if (data_len == 0) {
|
||||
arr = cJSON_CreateArray();
|
||||
} else {
|
||||
arr = cJSON_Parse(data);
|
||||
}
|
||||
|
||||
if (arg_req->delete) {
|
||||
|
||||
if (data_len > 0) {
|
||||
cJSON *element = NULL;
|
||||
int i = 0;
|
||||
cJSON_ArrayForEach(element, arr) {
|
||||
if (strcmp(element->valuestring, arg_req->name) == 0) {
|
||||
cJSON_DeleteItemFromArray(arr, i);
|
||||
break;
|
||||
}
|
||||
i++;
|
||||
}
|
||||
}
|
||||
|
||||
char *buf = malloc(sizeof(char) * 8192);
|
||||
snprintf(buf, 8192,
|
||||
"{"
|
||||
" \"script\" : {"
|
||||
" \"source\": \"if (ctx._source.tag.contains(params.tag)) { ctx._source.tag.remove(ctx._source.tag.indexOf(params.tag)) }\","
|
||||
" \"lang\": \"painless\","
|
||||
" \"params\" : {"
|
||||
" \"tag\" : \"%s\""
|
||||
" }"
|
||||
" }"
|
||||
"}", arg_req->name
|
||||
);
|
||||
|
||||
char url[4096];
|
||||
snprintf(url, sizeof(url), "%s/%s/_update/%s", WebCtx.es_url, WebCtx.es_index, arg_req->doc_id);
|
||||
nc->fn_data = web_post_async(url, buf);
|
||||
|
||||
} else {
|
||||
cJSON_AddItemToArray(arr, cJSON_CreateString(arg_req->name));
|
||||
|
||||
char *buf = malloc(sizeof(char) * 8192);
|
||||
snprintf(buf, 8192,
|
||||
"{"
|
||||
" \"script\" : {"
|
||||
" \"source\": \"if(ctx._source.tag == null) {ctx._source.tag = new ArrayList()} ctx._source.tag.add(params.tag)\","
|
||||
" \"lang\": \"painless\","
|
||||
" \"params\" : {"
|
||||
" \"tag\" : \"%s\""
|
||||
" }"
|
||||
" }"
|
||||
"}", arg_req->name
|
||||
);
|
||||
|
||||
char url[4096];
|
||||
snprintf(url, sizeof(url), "%s/%s/_update/%s", WebCtx.es_url, WebCtx.es_index, arg_req->doc_id);
|
||||
nc->fn_data = web_post_async(url, buf);
|
||||
}
|
||||
|
||||
char *json_str = cJSON_PrintUnformatted(arr);
|
||||
store_write(store, arg_req->path_md5_str, MD5_STR_LENGTH, json_str, strlen(json_str) + 1);
|
||||
store_flush(store);
|
||||
|
||||
free(arg_req);
|
||||
free(json_str);
|
||||
cJSON_Delete(json);
|
||||
cJSON_Delete(arr);
|
||||
free(body);
|
||||
}
|
||||
|
||||
int validate_auth(struct mg_connection *nc, struct mg_http_message *hm) {
|
||||
char user[256] = {0,};
|
||||
char pass[256] = {0,};
|
||||
|
||||
mg_http_creds(hm, user, sizeof(user), pass, sizeof(pass));
|
||||
if (strcmp(user, WebCtx.auth_user) != 0 || strcmp(pass, WebCtx.auth_pass) != 0) {
|
||||
mg_http_reply(nc, 401, "WWW-Authenticate: Basic realm=\"sist2\"", "");
|
||||
return FALSE;
|
||||
}
|
||||
return TRUE;
|
||||
}
|
||||
|
||||
static void ev_router(struct mg_connection *nc, int ev, void *ev_data, UNUSED(void *fn_data)) {
|
||||
|
||||
if (ev == MG_EV_HTTP_MSG) {
|
||||
struct mg_http_message *hm = (struct mg_http_message *) ev_data;
|
||||
|
||||
if (WebCtx.auth_enabled == TRUE) {
|
||||
char user[256] = {0,};
|
||||
char pass[256] = {0,};
|
||||
|
||||
int ret = mg_get_http_basic_auth(hm, user, sizeof(user), pass, sizeof(pass));
|
||||
if (ret == -1 || strcmp(user, WebCtx.auth_user) != 0 || strcmp(pass, WebCtx.auth_pass) != 0) {
|
||||
mg_printf(nc, "HTTP/1.1 401 Unauthorized\r\n"
|
||||
"WWW-Authenticate: Basic realm=\"sist2\"\r\n"
|
||||
"Content-Length: 0\r\n\r\n");
|
||||
nc->flags |= MG_F_SEND_AND_CLOSE;
|
||||
if (!validate_auth(nc, hm)) {
|
||||
nc->is_closing = 1;
|
||||
return;
|
||||
}
|
||||
}
|
||||
|
||||
if (is_equal(&path, &((struct mg_str) MG_MK_STR("/")))) {
|
||||
if (mg_http_match_uri(hm, "/")) {
|
||||
search_index(nc);
|
||||
} else if (is_equal(&path, &((struct mg_str) MG_MK_STR("/css")))) {
|
||||
} else if (mg_http_match_uri(hm, "/css")) {
|
||||
style(nc, hm);
|
||||
} else if (is_equal(&path, &((struct mg_str) MG_MK_STR("/js")))) {
|
||||
javascript(nc);
|
||||
} else if (is_equal(&path, &((struct mg_str) MG_MK_STR("/img/sprite-skin-flat.png")))) {
|
||||
} else if (mg_http_match_uri(hm, "/stats")) {
|
||||
stats(nc);
|
||||
} else if (mg_http_match_uri(hm, "/jslib")) {
|
||||
javascript_lib(nc);
|
||||
} else if (mg_http_match_uri(hm, "/jssearch")) {
|
||||
javascript_search(nc);
|
||||
} else if (mg_http_match_uri(hm, "/img/sprite-skin-flat.png")) {
|
||||
img_sprite_skin_flat(nc, hm);
|
||||
} else if (is_equal(&path, &((struct mg_str) MG_MK_STR("/es")))) {
|
||||
} else if (mg_http_match_uri(hm, "/es")) {
|
||||
search(nc, hm);
|
||||
} else if (is_equal(&path, &((struct mg_str) MG_MK_STR("/i")))) {
|
||||
} else if (mg_http_match_uri(hm, "/i")) {
|
||||
index_info(nc);
|
||||
} else if (is_equal(&path, &((struct mg_str) MG_MK_STR("/status")))) {
|
||||
} else if (mg_http_match_uri(hm, "/status")) {
|
||||
status(nc);
|
||||
} else if (has_prefix(&path, &((struct mg_str) MG_MK_STR("/f/")))) {
|
||||
file(nc, hm, &path);
|
||||
} else if (has_prefix(&path, &((struct mg_str) MG_MK_STR("/t/")))) {
|
||||
thumbnail(nc, hm, &path);
|
||||
} else if (has_prefix(&path, &((struct mg_str) MG_MK_STR("/d/")))) {
|
||||
document_info(nc, hm, &path);
|
||||
} else if (mg_http_match_uri(hm, "/f/*")) {
|
||||
file(nc, hm);
|
||||
} else if (mg_http_match_uri(hm, "/t/*/*")) {
|
||||
thumbnail(nc, hm);
|
||||
} else if (mg_http_match_uri(hm, "/s/*/*")) {
|
||||
stats_files(nc, hm);
|
||||
} else if (mg_http_match_uri(hm, "/tag/*")) {
|
||||
if (WebCtx.tag_auth_enabled == TRUE && !validate_auth(nc, hm)) {
|
||||
nc->is_closing = 1;
|
||||
return;
|
||||
}
|
||||
tag(nc, hm);
|
||||
} else if (mg_http_match_uri(hm, "/d/*")) {
|
||||
document_info(nc, hm);
|
||||
} else {
|
||||
nc->flags |= MG_F_SEND_AND_CLOSE;
|
||||
mg_http_reply(nc, 404, "", "Page not found");
|
||||
}
|
||||
|
||||
} else if (ev == MG_EV_POLL) {
|
||||
if (nc->user_data != NULL) {
|
||||
if (nc->fn_data != NULL) {
|
||||
//Waiting for ES reply
|
||||
subreq_ctx_t *ctx = (subreq_ctx_t *) nc->user_data;
|
||||
mg_mgr_poll(&ctx->mgr, 0);
|
||||
subreq_ctx_t *ctx = (subreq_ctx_t *) nc->fn_data;
|
||||
web_post_async_poll(ctx);
|
||||
|
||||
if (ctx->ev_data.done == TRUE) {
|
||||
|
||||
response_t *r = ctx->ev_data.resp;
|
||||
if (ctx->done == TRUE) {
|
||||
response_t *r = ctx->response;
|
||||
|
||||
if (r->status_code == 200) {
|
||||
send_response_line(nc, 200, r->size, "Content-Type: application/json");
|
||||
@@ -421,12 +611,14 @@ static void ev_router(struct mg_connection *nc, int ev, void *p) {
|
||||
free(json_str);
|
||||
free(tmp);
|
||||
}
|
||||
//todo return error code
|
||||
|
||||
mg_http_reply(nc, 500, "", "");
|
||||
}
|
||||
|
||||
free_response(r);
|
||||
nc->flags |= MG_F_SEND_AND_CLOSE;
|
||||
nc->user_data = NULL;
|
||||
free(ctx->data);
|
||||
free(ctx);
|
||||
nc->fn_data = NULL;
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -437,15 +629,18 @@ void serve(const char *listen_address) {
|
||||
printf("Starting web server @ http://%s\n", listen_address);
|
||||
|
||||
struct mg_mgr mgr;
|
||||
mg_mgr_init(&mgr, NULL);
|
||||
mg_mgr_init(&mgr);
|
||||
|
||||
struct mg_connection *nc = mg_bind(&mgr, listen_address, ev_router);
|
||||
int ok = 1;
|
||||
|
||||
struct mg_connection *nc = mg_http_listen(&mgr, listen_address, ev_router, NULL);
|
||||
if (nc == NULL) {
|
||||
LOG_FATALF("serve.c", "Couldn't bind web server on address %s", listen_address)
|
||||
}
|
||||
mg_set_protocol_http_websocket(nc);
|
||||
|
||||
for (;;) {
|
||||
while (ok) {
|
||||
mg_mgr_poll(&mgr, 10);
|
||||
}
|
||||
mg_mgr_free(&mgr);
|
||||
LOG_INFO("serve.c", "Finished web event loop")
|
||||
}
|
||||
|
||||
File diff suppressed because one or more lines are too long
77
tests/test_scan.py
Normal file
77
tests/test_scan.py
Normal file
@@ -0,0 +1,77 @@
|
||||
import unittest
|
||||
import subprocess
|
||||
import shutil
|
||||
import json
|
||||
import os
|
||||
|
||||
TEST_FILES = "third-party/libscan/libscan-test-files/test_files"
|
||||
|
||||
|
||||
def copy_files(files):
|
||||
base = os.path.basename(files)
|
||||
new_path = os.path.join("/tmp/sist2_test/", base)
|
||||
|
||||
shutil.rmtree(new_path, ignore_errors=True)
|
||||
shutil.copytree(files, new_path)
|
||||
return new_path
|
||||
|
||||
|
||||
def sist2(*args):
|
||||
print("./sist2 " + " ".join(args))
|
||||
|
||||
return subprocess.check_output(
|
||||
args=["./sist2", *args],
|
||||
)
|
||||
|
||||
|
||||
def sist2_index(files, *args):
|
||||
path = copy_files(files)
|
||||
|
||||
shutil.rmtree("test_i", ignore_errors=True)
|
||||
sist2("scan", path, "-o", "test_i", *args)
|
||||
return iter(sist2_index_to_dict("test_i"))
|
||||
|
||||
|
||||
def sist2_incremental_index(files, func=None, *args):
|
||||
path = copy_files(files)
|
||||
|
||||
if func:
|
||||
func(path)
|
||||
|
||||
shutil.rmtree("test_i_inc", ignore_errors=True)
|
||||
sist2("scan", path, "-o", "test_i_inc", "--incremental", "test_i", *args)
|
||||
return iter(sist2_index_to_dict("test_i_inc"))
|
||||
|
||||
|
||||
def sist2_index_to_dict(index):
|
||||
res = subprocess.check_output(
|
||||
args=["./sist2", "index", "--print", index],
|
||||
)
|
||||
|
||||
for line in res.splitlines():
|
||||
if line:
|
||||
yield json.loads(line)
|
||||
|
||||
|
||||
class ScanTest(unittest.TestCase):
|
||||
|
||||
def test_incremental1(self):
|
||||
def remove_files(path):
|
||||
os.remove(os.path.join(path, "msdoc/test1.doc"))
|
||||
os.remove(os.path.join(path, "msdoc/test2.doc"))
|
||||
|
||||
def add_files(path):
|
||||
with open(os.path.join(path, "newfile1"), "w"):
|
||||
pass
|
||||
with open(os.path.join(path, "newfile2"), "w"):
|
||||
pass
|
||||
with open(os.path.join(path, "newfile3"), "w"):
|
||||
pass
|
||||
|
||||
file_count = sum(1 for _ in sist2_index(TEST_FILES))
|
||||
self.assertEqual(sum(1 for _ in sist2_incremental_index(TEST_FILES, remove_files)), file_count - 2)
|
||||
self.assertEqual(sum(1 for _ in sist2_incremental_index(TEST_FILES, add_files)), file_count + 3)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
unittest.main()
|
||||
2
third-party/argparse
vendored
2
third-party/argparse
vendored
Submodule third-party/argparse updated: 4ed6099cb3...ffd9c23427
2
third-party/libscan
vendored
2
third-party/libscan
vendored
Submodule third-party/libscan updated: 621ee64084...a12ec1cb06
Reference in New Issue
Block a user