Compare commits

..

137 Commits

Author SHA1 Message Date
5fe9c9efa3 Tweak CI settings 2021-01-16 11:14:18 -05:00
75e4e93ddd Enable docker image builds 2021-01-16 10:57:55 -05:00
013c54daa0 Fix tag delete 2020-12-31 12:55:37 -05:00
54308ef5e2 Update tags tab automatically 2020-12-31 12:45:23 -05:00
638c2a5c1a Update binary names (again) 2020-12-31 11:03:25 -05:00
9587caddd9 Don't build tests by default, fix enlarge button 2020-12-31 10:55:34 -05:00
f5bbe0dc97 Update binary names 2020-12-31 10:54:30 -05:00
f87eac1f90 Update submodules 2020-12-31 10:26:05 -05:00
ddafbab6a6 Update readme 2020-12-31 10:26:05 -05:00
b91d574756 Add md5 client-side lib 2020-12-31 10:26:05 -05:00
576140e542 fix submodules 2020-12-31 10:26:05 -05:00
050c1283a3 Remove UUID dep, fix incremental scan, use MD5(path) as unique id, version bump 2020-12-31 10:26:05 -05:00
c6e1ba03bc Better support for .doc files 2020-12-31 10:26:05 -05:00
10e32f707f Update README.md 2020-12-31 10:26:05 -05:00
86e83bafaf Update README.md 2020-12-31 10:26:05 -05:00
51a40c8819 Add .doc support 2020-12-31 10:26:05 -05:00
acc557
36281a5108 Use relative path for loading csv in stats 2020-12-31 10:26:05 -05:00
acc557
76a0bda48b Update search.html
Fix relative stats URL
2020-12-31 10:26:05 -05:00
0cf29a660c Fix relative image URL #122 2020-12-31 10:26:05 -05:00
6cd0741848 update build instructions 2020-12-31 10:26:05 -05:00
bc120f349d Setup ARM CI builds 2020-12-23 10:26:26 -05:00
8cac8c98d7 Update dev builds template 2020-12-22 14:45:16 -05:00
30921ac52e Setup drone ci 2020-12-22 14:09:45 -05:00
95bbe39afc Update libmupdf 2020-10-25 09:44:30 -04:00
72ce217f9c Optionally ES schema from file #117 2020-10-25 09:44:30 -04:00
641a8ec90c sidecar files #114, version bump 2020-10-25 09:44:30 -04:00
7a505c2287 Fix typo 2020-10-25 09:44:30 -04:00
12f162d760 Fix #110 2020-10-25 09:44:30 -04:00
4b4ab12fac Version bump 2020-09-22 21:08:24 -04:00
ae283f77ad Fix #112 2020-09-22 21:08:24 -04:00
d3bd53a5ea Fix arm Dockerfile 2020-09-13 16:16:26 -04:00
f7887f24d1 sync libscan 2020-09-13 16:16:26 -04:00
5c8de19188 Update build instructions 2020-09-13 16:16:26 -04:00
d861d278a4 version bump 2020-09-13 16:16:26 -04:00
b6ddeee0e0 Use async curl for ES requests #108 2020-09-13 16:16:26 -04:00
0cd2523b05 arm64 build 2020-09-13 16:16:26 -04:00
5e798f9367 Update issue-template.md 2020-09-13 10:29:42 -04:00
5da6c1488b Handle null mime in document info dialog 2020-08-29 10:34:58 -04:00
9568e25f84 Fix #99 2020-08-29 10:17:28 -04:00
6a8027789a Limited support for UTF16 2020-08-29 10:17:28 -04:00
b1d16d8abf Fix #100 2020-08-29 10:17:28 -04:00
b2a157e24d Update docs 2020-08-25 10:38:38 -04:00
9aead9389a Fix typo in elastic.c 2020-08-25 10:38:38 -04:00
a32c68cba8 Build fixes 2020-08-25 10:38:38 -04:00
d116cf9d91 Default index for web & exec 2020-08-25 10:38:38 -04:00
Andrew
a020a8b32c Update USAGE.md
Fix link to scripting.
2020-08-25 10:38:38 -04:00
5d5d9c3092 Fix heap buffer overflow warning 2020-08-25 10:38:38 -04:00
3379d5ce71 Fix #97 2020-08-25 10:38:38 -04:00
a0ff4a1f01 Fix heap buffer overflow warning 2020-08-25 10:38:38 -04:00
4589f3bde7 Fix #94 2020-08-25 10:38:38 -04:00
1c898640cf Fix #88 2020-08-25 10:38:38 -04:00
a0739d5177 Fix #92 2020-08-25 10:38:38 -04:00
8f9d29dbc6 Fix #91 2020-08-25 10:38:38 -04:00
3ff4b70223 Update README.md 2020-08-25 10:38:38 -04:00
02ad035b09 Workaround when first ebook page is blank 2020-08-25 10:38:38 -04:00
c11feb213d Gracefully handle archive errors in comic.c 2020-08-25 10:38:38 -04:00
72902947cd Fix for #90 2020-08-25 10:38:38 -04:00
a18bb81222 remove warning 2020-08-25 10:38:38 -04:00
1520288f19 Fix #89 2020-08-25 10:38:38 -04:00
e507de194b Fix log colors 2020-08-25 10:38:38 -04:00
0e517d5e2b Fix #81 2020-08-03 20:09:07 -04:00
8223ef3860 Update USAGE.md 2020-08-03 19:48:49 -04:00
995a196690 Log user script task, add async arg 2020-08-03 19:44:43 -04:00
465d017e18 CSS tweaks, fix #87 2020-08-03 19:15:12 -04:00
ca994d3914 Fix bug with media files, don't encode thumbnail when not necessary 2020-07-26 11:52:48 -04:00
db2285973f Configurable column count 2020-07-26 11:50:21 -04:00
61de9e9f14 Set timeout for HTTP get request 2020-07-25 19:55:27 -04:00
3015ef0ff4 Increase file preview file 2020-07-25 17:26:17 -04:00
b55d432841 Fix #65 2020-07-25 09:37:37 -04:00
ed90a140ce Update README.md 2020-07-19 14:53:03 -04:00
052df82373 Fix #83 2020-07-19 13:10:30 -04:00
5676136777 Remove println that was left accidentally 2020-07-18 20:55:12 -04:00
c061613302 Fix #76 2020-07-18 19:23:43 -04:00
d0325fd9b9 Fix for simon987/sist2#85 2020-07-18 18:48:54 -04:00
e05a6f3863 Fix for #75 2020-07-18 18:46:52 -04:00
f1690a9cca Mobi build fix 2020-07-18 13:10:45 -04:00
100a264413 Don't show MuPDF warnings unless --very-verbose is specified 2020-07-18 10:28:05 -04:00
29390bb454 Update README 2020-07-18 09:54:36 -04:00
4d43036ded Fix simon987/sist2#78 2020-07-18 09:41:39 -04:00
0b5cdbd130 Fix #79 2020-07-18 09:36:10 -04:00
53d7695f66 Read .raw thumbnails #80, fix media probing for some formats 2020-07-18 09:31:42 -04:00
8d53456404 fix libscan submodule 2020-07-17 20:33:50 -04:00
cbc08a7cc9 Save ebook renders as jpeg 2020-07-17 20:18:21 -04:00
e629b4d7d3 Faster comic book parsing, probably fixes #77 2020-07-17 19:10:18 -04:00
22f7073b39 mobi reading bugfix 2020-07-16 20:30:28 -04:00
1781a74960 Oops I didn't mean to push this 2020-07-16 19:23:52 -04:00
db96c95ac7 log fix #73 2020-07-16 19:19:23 -04:00
7b9fa4cc0a Fix bad merge... 2020-07-15 21:00:51 -04:00
5cc1fa86a9 Read embedded thumbnail simon987/sist2#74 2020-07-15 20:56:25 -04:00
649689ce30 Remove warning when generating stats 2020-07-15 20:41:38 -04:00
c8536f65a8 Fix memory leak in index 2020-07-15 20:41:09 -04:00
75b5e249c1 Merge pull request #72 from dpieski/patch-1
Update USAGE.md
2020-07-15 14:37:28 -04:00
Andrew
f49e03ac79 Update USAGE.md
added example for Windows to display number of logical processors. 
Does this same limitation apply to the new `index` threads option?
2020-07-15 13:21:02 -05:00
a6d2afc8dc Merge pull request #71 from simon987/web-tag
Web tag
2020-07-14 20:23:22 -04:00
8f8f66ba05 Update README.md 2020-07-14 20:22:03 -04:00
1d9fcf7105 Manual tagging 2020-07-13 19:18:07 -04:00
8127745f2b wip 2020-07-13 19:16:51 -04:00
230988d6d1 frontend tags 2020-07-13 19:15:59 -04:00
13f4dbed2d Handle 429, multi-threaded index module 2020-07-11 17:42:46 -04:00
ed15e89f45 Fix exec-script --es-url not being passed 2020-06-28 12:41:09 -04:00
c636d3d921 Set number_of_replicas to 0 by default in elasticsearch 2020-06-26 18:10:51 -04:00
7e92d4b7d1 refresh index only if user script is ran 2020-06-25 20:48:47 -04:00
8ffe780ab2 Tag tree fix for #64, validate required argument in exec-script 2020-06-25 20:11:30 -04:00
d3c8928fe8 Update readme 2020-06-24 21:06:27 -04:00
d9f628fca4 Build fix 2020-06-21 16:53:22 -04:00
68289268c1 Add exif tag 2020-06-21 16:51:14 -04:00
649c50c465 Update README.md 2020-06-21 14:35:18 -04:00
7b49a0dc49 Build fix 2020-06-21 12:56:13 -04:00
eb559b53aa RAW picture file support 2020-06-21 10:46:11 -04:00
6d01f9c0df whoops 2020-06-19 22:12:19 -04:00
e724fec668 Fix web return codes 2020-06-19 21:41:17 -04:00
fe5e93b300 Update USAGE.md 2020-06-19 21:29:09 -04:00
ecad85fd7d version bump 2020-06-19 21:10:03 -04:00
74cc898259 Fix tag display issue 2020-06-19 21:07:19 -04:00
dc2e4443c4 Add exec-script command 2020-06-19 21:07:19 -04:00
1a64431b52 Merge pull request #63 from dpieski/patch-3
Correct typos in example
2020-06-19 18:26:10 -04:00
Andrew
9bad515e06 Correct typos in example
Correct typos in examples.
2020-06-19 17:22:02 -05:00
648559cedb Update README.md 2020-06-17 13:25:20 -04:00
3e6cd9cd5c Merge pull request #60 from dpieski/patch-2
update Usage.md
2020-06-17 13:04:46 -04:00
f249992798 Update scripting.md 2020-06-17 13:00:07 -04:00
Andrew
e9645ecdaa update Usage.md
Fixing a link.
2020-06-17 10:58:25 -05:00
046edea0e2 Handle special characters in file paths 2020-06-10 19:45:36 -04:00
a011b7e97b Fragment size setting 2020-06-09 21:40:53 -04:00
8c1c1697e0 Fix file wordexp in some paths #59 2020-06-05 19:41:02 -04:00
018b49fa4c Fix csv_escape #58 2020-06-05 19:13:03 -04:00
27b4e6403e Re-enable path autocomplete #54 2020-06-02 19:46:58 -04:00
13fdbd9e69 Fix for ES 7.7 #54 2020-06-01 18:14:34 -04:00
5e7fdaf8dd Update issue-template.md 2020-06-01 10:45:43 -04:00
19d5c8ac9f Update issue-template.md 2020-05-29 18:19:21 -04:00
99497049a8 Merge pull request #53 from dpieski/patch-1
Update README
2020-05-29 18:16:13 -04:00
Andrew
1a3181d78b Update README
changed case of path in a link to the usage guide to fix 404 error.
2020-05-29 15:37:20 -05:00
449aa77c8f Fix for unknown mime inside archives 2020-05-25 17:36:04 -04:00
3058c55510 Memory leak fix #37 2020-05-24 15:42:42 -04:00
dedf9287b2 Fix name separation in --archive list mode 2020-05-24 14:36:59 -04:00
ab199b0c0c Remove arc_reset() function because seek() inside archive doesn't work 2020-05-24 14:18:31 -04:00
c4fbae123e Better support for media files inside archives 2020-05-24 14:10:23 -04:00
dd2397ef5c handle .tgz #44, ignore files inside archives for stats page 2020-05-24 10:10:28 -04:00
66 changed files with 2803 additions and 912 deletions

72
.drone.yml Normal file
View File

@@ -0,0 +1,72 @@
kind: pipeline
type: docker
name: amd64
platform:
os: linux
arch: amd64
steps:
- name: build
image: simon987/ubuntu_ci
commands:
- ./ci/build.sh
- name: docker
image: plugins/docker
settings:
username:
from_secret: DOCKER_USER
password:
from_secret: DOCKER_PASSWORD
repo: simon987/sist2
context: ./Docker/
dockerfile: ./Docker/Dockerfile
auto_tag: true
auto_tag_suffix: x64-linux
when:
event:
- tag
- name: scp files
image: appleboy/drone-scp
settings:
host:
from_secret: SSH_HOST
port:
from_secret: SSH_PORT
user:
from_secret: SSH_USER
key:
from_secret: SSH_KEY
target: /files/sist2/${DRONE_REPO_OWNER}_${DRONE_REPO_NAME}/${DRONE_BRANCH}_${DRONE_BUILD_NUMBER}_${DRONE_COMMIT}/
source:
- ./VERSION
- ./sist2-x64-linux
- ./sist2-x64-linux-debug.tar.gz
---
kind: pipeline
type: docker
name: arm64
platform:
arch: arm64
steps:
- name: build
image: simon987/ubuntu_ci_arm
commands:
- ./ci/build_arm64.sh
- name: scp files
image: appleboy/drone-scp
settings:
host:
from_secret: SSH_HOST
port:
from_secret: SSH_PORT
user:
from_secret: SSH_USER
key:
from_secret: SSH_KEY
target: /files/sist2/${DRONE_REPO_OWNER}_${DRONE_REPO_NAME}/arm_${DRONE_BRANCH}_${DRONE_BUILD_NUMBER}_${DRONE_COMMIT}/
source:
- ./sist2-arm64-linux

View File

@@ -9,7 +9,9 @@ assignees: ''
sist2 version:
Platform (please indicate if you're using Docker):
Platform (Linux or Docker, x86-64 or arm64):
Elasticsearch version:
Command with arguments: `ex: "scan ~/Documents -o ./i2 --threads 3 -q 1.0`

1
.gitignore vendored
View File

@@ -1,6 +1,5 @@
.idea
thumbs
test
*.cbp
CMakeCache.txt
CMakeFiles

View File

@@ -1,69 +0,0 @@
import jetbrains.buildServer.configs.kotlin.v2019_2.*
import jetbrains.buildServer.configs.kotlin.v2019_2.buildSteps.ExecBuildStep
import jetbrains.buildServer.configs.kotlin.v2019_2.buildSteps.exec
import jetbrains.buildServer.configs.kotlin.v2019_2.triggers.vcs
import jetbrains.buildServer.configs.kotlin.v2019_2.vcs.GitVcsRoot
/*
The settings script is an entry point for defining a TeamCity
project hierarchy. The script should contain a single call to the
project() function with a Project instance or an init function as
an argument.
VcsRoots, BuildTypes, Templates, and subprojects can be
registered inside the project using the vcsRoot(), buildType(),
template(), and subProject() methods respectively.
To debug settings scripts in command-line, run the
mvnDebug org.jetbrains.teamcity:teamcity-configs-maven-plugin:generate
command and attach your debugger to the port 8000.
To debug in IntelliJ Idea, open the 'Maven Projects' tool window (View
-> Tool Windows -> Maven Projects), find the generate task node
(Plugins -> teamcity-configs -> teamcity-configs:generate), the
'Debug' option is available in the context menu for the task.
*/
version = "2019.2"
project {
vcsRoot(HttpsGithubComSimon987sist2refsHeadsMaster)
buildType(Build)
}
object Build : BuildType({
name = "Build"
artifactRules = """
sist2
sist2_scan
""".trimIndent()
vcs {
root(HttpsGithubComSimon987sist2refsHeadsMaster)
}
steps {
exec {
name = "Build"
path = "./ci/build.sh"
dockerImage = "simon987/general_ci"
dockerImagePlatform = ExecBuildStep.ImagePlatform.Linux
dockerPull = true
}
}
triggers {
vcs {
}
}
})
object HttpsGithubComSimon987sist2refsHeadsMaster : GitVcsRoot({
name = "https://github.com/simon987/sist2#refs/heads/master"
url = "https://github.com/simon987/sist2"
})

View File

@@ -5,12 +5,16 @@ project(sist2 C)
option(SIST_DEBUG "Build a debug executable" on)
set(BUILD_TESTS off)
add_subdirectory(third-party/libscan)
set(ARGPARSE_SHARED off)
add_subdirectory(third-party/argparse)
add_executable(
sist2
add_executable(sist2
# argparse
third-party/argparse/argparse.h third-party/argparse/argparse.c
src/main.c
src/sist.h
src/io/walk.h src/io/walk.c
@@ -25,12 +29,9 @@ add_executable(
src/util.c src/util.h
src/ctx.h src/types.h
src/log.c src/log.h
# argparse
third-party/argparse/argparse.h third-party/argparse/argparse.c
src/cli.c src/cli.h
src/stats.c src/stats.h)
src/stats.c src/stats.h src/ctx.c
src/parsing/sidecar.c src/parsing/sidecar.h)
target_link_directories(sist2 PRIVATE BEFORE ${_VCPKG_INSTALLED_DIR}/${VCPKG_TARGET_TRIPLET}/lib/)
set(CMAKE_FIND_LIBRARY_SUFFIXES .a .lib)
@@ -39,7 +40,7 @@ find_package(lmdb CONFIG REQUIRED)
find_package(cJSON CONFIG REQUIRED)
find_package(unofficial-glib CONFIG REQUIRED)
find_package(unofficial-mongoose CONFIG REQUIRED)
find_library(UUID_LIB NAMES uuid)
find_package(CURL CONFIG REQUIRED)
#find_package(OpenSSL REQUIRED)
@@ -56,7 +57,6 @@ target_compile_options(
sist2
PRIVATE
-fPIC
-Werror
)
if (SIST_DEBUG)
@@ -67,12 +67,13 @@ if (SIST_DEBUG)
-fstack-protector
-fno-omit-frame-pointer
-fsanitize=address
-fno-inline
# -O2
)
target_link_options(
sist2
PRIVATE
-fsanitize=address
# -static
)
set_target_properties(
sist2
@@ -104,12 +105,13 @@ target_link_libraries(
argparse
unofficial::glib::glib
unofficial::mongoose::mongoose
# OpenSSL::SSL OpenSSL::Crypto
CURL::libcurl
${UUID_LIB}
pthread
magic
c
scan
)

View File

@@ -1,4 +1,4 @@
FROM ubuntu:19.10
FROM ubuntu:20.04
MAINTAINER simon987 <me@simon987.net>
RUN apt update

View File

@@ -1,14 +0,0 @@
rm ./sist2 sist2_debug
cp ../sist2.gz .
gzip -d sist2.gz
strip sist2
version=$(./sist2 --version)
echo "Version ${version}"
docker build . -t simon987/sist2:${version} -t simon987/sist2:latest
docker push simon987/sist2:${version}
docker push simon987/sist2:latest
docker run --rm simon987/sist2 -v

22
DockerArm64/Dockerfile Normal file
View File

@@ -0,0 +1,22 @@
FROM ubuntu:19.10
MAINTAINER simon987 <me@simon987.net>
RUN apt update
RUN apt install -y libglib2.0-0 libcurl4 libmagic1 libharfbuzz-bin libopenjp2-7 libarchive13 liblzma5 libzstd1 liblz4-1 \
curl libtiff5 libpng16-16 libpcre3
RUN mkdir -p /usr/share/tessdata && \
cd /usr/share/tessdata/ && \
curl -o /usr/share/tessdata/hin.traineddata https://raw.githubusercontent.com/tesseract-ocr/tessdata/master/hin.traineddata &&\
curl -o /usr/share/tessdata/jpn.traineddata https://raw.githubusercontent.com/tesseract-ocr/tessdata/master/jpn.traineddata &&\
curl -o /usr/share/tessdata/eng.traineddata https://raw.githubusercontent.com/tesseract-ocr/tessdata/master/eng.traineddata &&\
curl -o /usr/share/tessdata/fra.traineddata https://raw.githubusercontent.com/tesseract-ocr/tessdata/master/fra.traineddata &&\
curl -o /usr/share/tessdata/rus.traineddata https://raw.githubusercontent.com/tesseract-ocr/tessdata/master/rus.traineddata &&\
curl -o /usr/share/tessdata/spa.traineddata https://raw.githubusercontent.com/tesseract-ocr/tessdata/master/spa.traineddata && ls -lh
ADD sist2_arm64 /root/sist2
ENV LANG C.UTF-8
ENV LC_ALL C.UTF-8
ENTRYPOINT ["/root/sist2"]

13
DockerArm64/build.sh Executable file
View File

@@ -0,0 +1,13 @@
rm ./sist2_arm64
cp ../sist2_arm64.gz .
gzip -d sist2_arm64.gz
version=$(./sist2_arm64 --version)
echo "Version ${version}"
docker build . -t simon987/sist2-arm64:"${version}" -t simon987/sist2-arm64:latest
docker push simon987/sist2-arm64:"${version}"
docker push simon987/sist2-arm64:latest
docker run --rm simon987/sist2-arm64 -v

View File

@@ -1,6 +1,8 @@
![GitHub](https://img.shields.io/github/license/simon987/sist2.svg)
[![CodeFactor](https://www.codefactor.io/repository/github/simon987/sist2/badge?s=05daa325188aac4eae32c786f3d9cf4e0593f822)](https://www.codefactor.io/repository/github/simon987/sist2)
[![Development snapshots](https://ci.simon987.net/app/rest/builds/buildType(Sist2_Build)/statusIcon)](https://files.simon987.net/artifacts/Sist2/Build/)
[![Development snapshots](https://ci.simon987.net/api/badges/simon987/sist2/status.svg)](https://files.simon987.net/sist2/simon987_sist2/)
**Demo**: [sist2.simon987.net](https://sist2.simon987.net/?i=Demo%20files)
# sist2
@@ -15,10 +17,10 @@ sist2 (Simple incremental search tool)
* Fast, low memory usage, multi-threaded
* Mobile-friendly Web interface
* Portable (all its features are packaged in a single executable)
* Extracts text from common file types \*
* Extracts text and metadata from common file types \*
* Generates thumbnails \*
* Incremental scanning
* Automatic tagging from file attributes via [user scripts](scripting/README.md)
* Manual tagging from the UI and automatic tagging based on file attributes via [user scripts](docs/scripting.md)
* Recursive scan inside archive files \*\*
* OCR support with tesseract \*\*\*
* Stats page & disk utilisation visualization
@@ -50,10 +52,10 @@ sist2 (Simple incremental search tool)
```
1. Download sist2 executable
1. Download the [latest sist2 release](https://github.com/simon987/sist2/releases) *
1. *(or)* Download a [development snapshot](https://files.simon987.net/artifacts/Sist2/Build/) *(Not recommended!)*
1. *(or)* Download a [development snapshot](https://files.simon987.net/sist2/simon987_sist2/) *(Not recommended!)*
1. *(or)* `docker pull simon987/sist2:latest`
1. See [Usage guide](DOCS/USAGE.md)
1. See [Usage guide](docs/USAGE.md)
\* *Windows users*: **sist2** runs under [WSL](https://en.wikipedia.org/wiki/Windows_Subsystem_for_Linux)
@@ -61,7 +63,7 @@ sist2 (Simple incremental search tool)
## Example usage
See [Usage guide](DOCS/USAGE.md) for more details
See [Usage guide](docs/USAGE.md) for more details
1. Scan a directory: `sist2 scan ~/Documents -o ./docs_idx`
1. Push index to Elasticsearch: `sist2 index ./docs_idx`
@@ -72,15 +74,18 @@ See [Usage guide](DOCS/USAGE.md) for more details
File type | Library | Content | Thumbnail | Metadata
:---|:---|:---|:---|:---
pdf,xps,cbz,cbr,fb2,epub | MuPDF | text+ocr | yes, `png` | title |
`audio/*` | ffmpeg | - | yes, `jpeg` | ID3 tags |
`video/*` | ffmpeg | - | yes, `jpeg` | title, comment, artist |
`image/*` | ffmpeg | - | yes, `jpeg` | [Common EXIF tags](https://github.com/simon987/sist2/blob/efdde2734eca9b14a54f84568863b7ffd59bdba3/src/parsing/media.c#L190) |
pdf,xps,fb2,epub | MuPDF | text+ocr | yes | author, title |
cbz,cbr | *(none)* | - | yes | - |
`audio/*` | ffmpeg | - | yes | ID3 tags |
`video/*` | ffmpeg | - | yes | title, comment, artist |
`image/*` | ffmpeg | - | yes | [Common EXIF tags](https://github.com/simon987/sist2/blob/efdde2734eca9b14a54f84568863b7ffd59bdba3/src/parsing/media.c#L190) |
raw, rw2, dng, cr2, crw, dcr, k25, kdc, mrw, pef, xf3, arw, sr2, srf, erf | LibRaw | - | yes | Common EXIF tags |
ttf,ttc,cff,woff,fnt,otf | Freetype2 | - | yes, `bmp` | Name & style |
`text/plain` | *(none)* | yes | no | - |
html, xml | *(none)* | yes | no | - |
tar, zip, rar, 7z, ar ... | Libarchive | yes\* | - | no |
docx, xlsx, pptx | *(none)* | yes | no | creator, modified_by, title |
docx, xlsx, pptx | *(none)* | yes | if embedded | creator, modified_by, title |
doc (MS Word 97-2003) | antiword | yes | yes | author, title |
mobi, azw, azw3 | libmobi | yes | no | author, title |
\* *See [Archive files](#archive-files)*
@@ -91,18 +96,16 @@ they were directly in the file system. Recursive (archives inside archives)
scan is also supported.
**Limitations**:
* Parsing media files with formats that require
*seek* (e.g. `.gif`, `.mp4` w/ fragmented metadata etc.) is not supported.
* Support for parsing media files with formats that require *seek* (e.g. `.gif`, `.mp4` w/ fragmented metadata etc.)
is limitted (see `--mem-buffer` option)
* Archive files are scanned sequentially, by a single thread. On systems where
**sist2** is not I/O bound, scans might be faster when larger archives are split
into smaller parts.
To check if a media file can be parsed without *seek*, execute `cat file.mp4 | ffprobe -`
### OCR
You can enable OCR support for pdf,xps,cbz,cbr,fb2,epub file types with the
You can enable OCR support for pdf,xps,fb2,epub file types with the
`--ocr <lang>` option. Download the language data files with your
package manager (`apt install tesseract-ocr-eng`) or directly [from Github](https://github.com/tesseract-ocr/tesseract/wiki/Data-Files).
@@ -124,12 +127,12 @@ binaries (GCC 7+ required).
1. Install compile-time dependencies
```bash
vcpkg install lmdb cjson glib libarchive[core,bzip2,libxml2,lz4,lzma,lzo] pthread tesseract libxml2 ffmpeg zstd gtest mongoose libuuid libmagic
vcpkg install lmdb cjson glib libarchive[core,bzip2,libxml2,lz4,lzma,lzo] pthread tesseract libxml2 ffmpeg zstd gtest mongoose libmagic libraw curl[core,ssl] jbig2dec brotli libmupdf
```
2. Build
```bash
git clone --recursive https://github.com/simon987/sist2/
cmake -D <VCPKG_ROOT>/scripts/buildsystems/vcpkg.cmake .
cmake -DSIST_DEBUG=off -DCMAKE_TOOLCHAIN_FILE=<VCPKG_ROOT>/scripts/buildsystems/vcpkg.cmake .
make
```

View File

@@ -1,16 +1,22 @@
#!/usr/bin/env bash
VCPKG_ROOT="/vcpkg"
rm *.gz
rm -rf CMakeFiles CMakeCache.txt
cmake -DSIST_DEBUG=off -DCMAKE_TOOLCHAIN_FILE=/vcpkg/scripts/buildsystems/vcpkg.cmake .
make
strip sist2
gzip -9 sist2
git submodule update --init --recursive
rm -rf CMakeFiles CMakeCache.txt
cmake -DSIST_DEBUG=on -DCMAKE_TOOLCHAIN_FILE=/vcpkg/scripts/buildsystems/vcpkg.cmake .
make
cmake -DSIST_DEBUG=off -DCMAKE_TOOLCHAIN_FILE="${VCPKG_ROOT}/scripts/buildsystems/vcpkg.cmake" .
make -j 33
strip sist2
./sist2 -v > VERSION
cp sist2 Docker/
mv sist2 sist2-x64-linux
rm -rf CMakeFiles CMakeCache.txt
cmake -DSIST_DEBUG=on -DCMAKE_TOOLCHAIN_FILE="${VCPKG_ROOT}/scripts/buildsystems/vcpkg.cmake" .
make -j 33
cp /usr/lib/x86_64-linux-gnu/libasan.so.2.0.0 libasan.so.2
tar -czf sist2_debug.tar.gz sist2_debug libasan.so.2
mv sist2_debug sist2-x64-linux-debug
tar -czf sist2-x64-linux-debug.tar.gz sist2-x64-linux-debug libasan.so.2

13
ci/build_arm64.sh Executable file
View File

@@ -0,0 +1,13 @@
#!/usr/bin/env bash
VCPKG_ROOT="/vcpkg"
rm *.gz
git submodule update --init --recursive
rm -rf CMakeFiles CMakeCache.txt
cmake -DSIST_DEBUG=off -DCMAKE_TOOLCHAIN_FILE="${VCPKG_ROOT}/scripts/buildsystems/vcpkg.cmake" .
make -j 4
strip sist2
mv sist2 sist2-arm64-linux

View File

@@ -14,47 +14,64 @@
* [examples](#web-examples)
* [rewrite_url](#rewrite_url)
* [link to specific indices](#link-to-specific-indices)
* [exec-script](#exec-script)
* [tagging](#tagging)
* [sidecar files](#sidecar-files)
```
Usage: sist2 scan [OPTION]... PATH
or: sist2 index [OPTION]... INDEX
or: sist2 web [OPTION]... INDEX...
or: sist2 exec-script [OPTION]... INDEX
Lightning-fast file system indexer and search tool.
-h, --help show this help message and exit
-v, --version Show version and exit
--verbose Turn on logging
--very-verbose Turn on debug messages
-h, --help show this help message and exit
-v, --version Show version and exit
--verbose Turn on logging
--very-verbose Turn on debug messages
Scan options
-t, --threads=<int> Number of threads. DEFAULT=1
-q, --quality=<flt> Thumbnail quality, on a scale of 1.0 to 31.0, 1.0 being the best. DEFAULT=5
--size=<int> Thumbnail size, in pixels. Use negative value to disable. DEFAULT=500
--content-size=<int> Number of bytes to be extracted from text documents. Use negative value to disable. DEFAULT=32768
--incremental=<str> Reuse an existing index and only scan modified files.
-o, --output=<str> Output directory. DEFAULT=index.sist2/
--rewrite-url=<str> Serve files from this url instead of from disk.
--name=<str> Index display name. DEFAULT: (name of the directory)
--depth=<int> Scan up to DEPTH subdirectories deep. Use 0 to only scan files in PATH. DEFAULT: -1
--archive=<str> Archive file mode (skip|list|shallow|recurse). skip: Don't parse, list: only get file names as text, shallow: Don't parse archives inside archives. DEFAULT: recurse
--ocr=<str> Tesseract language (use tesseract --list-langs to see which are installed on your machine)
-e, --exclude=<str> Files that match this regex will not be scanned
--fast Only index file names & mime type
-t, --threads=<int> Number of threads. DEFAULT=1
-q, --quality=<flt> Thumbnail quality, on a scale of 1.0 to 31.0, 1.0 being the best. DEFAULT=5
--size=<int> Thumbnail size, in pixels. Use negative value to disable. DEFAULT=500
--content-size=<int> Number of bytes to be extracted from text documents. Use negative value to disable. DEFAULT=32768
--incremental=<str> Reuse an existing index and only scan modified files.
-o, --output=<str> Output directory. DEFAULT=index.sist2/
--rewrite-url=<str> Serve files from this url instead of from disk.
--name=<str> Index display name. DEFAULT: (name of the directory)
--depth=<int> Scan up to DEPTH subdirectories deep. Use 0 to only scan files in PATH. DEFAULT: -1
--archive=<str> Archive file mode (skip|list|shallow|recurse). skip: Don't parse, list: only get file names as text, shallow: Don't parse archives inside archives. DEFAULT: recurse
--ocr=<str> Tesseract language (use tesseract --list-langs to see which are installed on your machine)
-e, --exclude=<str> Files that match this regex will not be scanned
--fast Only index file names & mime type
--treemap-threshold=<str> Relative size threshold for treemap (see USAGE.md). DEFAULT: 0.0005
--mem-buffer=<int> Maximum memory buffer size per thread in MB for files inside archives (see USAGE.md). DEFAULT: 2000
Index options
--es-url=<str> Elasticsearch url with port. DEFAULT=http://localhost:9200
-p, --print Just print JSON documents to stdout.
--script-file=<str> Path to user script.
--batch-size=<int> Index batch size. DEFAULT: 100
-f, --force-reset Reset Elasticsearch mappings and settings. (You must use this option the first time you use the index command)
-t, --threads=<int> Number of threads. DEFAULT=1
--es-url=<str> Elasticsearch url with port. DEFAULT=http://localhost:9200
--es-index=<str> Elasticsearch index name. DEFAULT=sist2
-p, --print Just print JSON documents to stdout.
--script-file=<str> Path to user script.
--mappings-file=<str> Path to Elasticsearch mappings.
--settings-file=<str> Path to Elasticsearch settings.
--async-script Execute user script asynchronously.
--batch-size=<int> Index batch size. DEFAULT: 100
-f, --force-reset Reset Elasticsearch mappings and settings. (You must use this option the first time you use the index command)
Web options
--es-url=<str> Elasticsearch url. DEFAULT=http://localhost:9200
--bind=<str> Listen on this address. DEFAULT=localhost:4090
--auth=<str> Basic auth in user:password format
Made by simon987 <me@simon987.net>. Released under GPL-3.0
--es-url=<str> Elasticsearch url. DEFAULT=http://localhost:9200
--es-index=<str> Elasticsearch index name. DEFAULT=sist2
--bind=<str> Listen on this address. DEFAULT=localhost:4090
--auth=<str> Basic auth in user:password format
--tag-auth=<str> Basic auth in user:password format for tagging
Exec-script options
--es-url=<str> Elasticsearch url. DEFAULT=http://localhost:9200
--es-index=<str> Elasticsearch index name. DEFAULT=sist2
--script-file=<str> Path to user script.
--async-script Execute user script asynchronously.
Made by simon987 <me@simon987.net>. Released under GPL-3.0
```
## Scan
@@ -62,7 +79,7 @@ Made by simon987 <me@simon987.net>. Released under GPL-3.0
### Scan options
* `-t, --threads`
Number of threads for file parsing. **Do not set a number higher than `$(nproc)`!**.
Number of threads for file parsing. **Do not set a number higher than `$(nproc)` or `$(Get-WmiObject Win32_ComputerSystem).NumberOfLogicalProcessors` in Windows!**
* `-q, --quality`
Thumbnail quality, on a scale of 1.0 to 31.0, 1.0 being the best. *Does not affect PDF thumbnails quality*
* `--size`
@@ -101,6 +118,11 @@ Made by simon987 <me@simon987.net>. Released under GPL-3.0
In effect, smaller `treemap-threshold` values will yield a more detailed
(but also a more cluttered and harder to read) visualization.
* `--mem-buffer` Maximum memory buffer size in MB (per thread) for files inside archives. Media files
larger than this number will be read sequentially and no *seek* operations will be supported.
To check if a media file can be parsed without *seek*, execute `cat file.mp4 | ffprobe -`
### Scan examples
@@ -134,7 +156,13 @@ documents.idx/
├── agg_mime.csv
├── agg_date.csv
├── add_size.csv
── thumbs
── thumbs/
| ├── data.mdb
| └── lock.mdb
├── tags/
| ├── data.mdb
| └── lock.mdb
└── meta/
├── data.mdb
└── lock.mdb
```
@@ -161,9 +189,11 @@ by a third party application. The 'external' index must have the following forma
my_index/
├── descriptor.json
├── _index_0
└── thumbs
├── data.mdb
└── lock.mdb
└── thumbs/
| ├── data.mdb
| └── lock.mdb
└── meta/
└── <empty>
```
*descriptor.json*:
@@ -211,9 +241,11 @@ The `_text.*` items will be indexed and searchable as **text** fields (fuzzy sea
*thumbs/*:
LMDB key-value store. Keys are **binary** 128-bit UUID4s (`_id` field)
LMDB key-value store. Keys are **binary** 16-byte md5 hash* (`_id` field)
and values are raw image bytes.
*\* Hash is calculated from the full path of the file, including the extension, relative to the index root*
Importing an external `binary` type index is technically possible but
it is currently unsupported and has no guaranties of back/forward compatibility.
@@ -223,17 +255,25 @@ it is currently unsupported and has no guaranties of back/forward compatibility.
* `--es-url`
Elasticsearch url and port. If you are using docker, make sure that both containers are on the
same network.
* `--es-index`
Elasticsearch index name. DEFAULT=sist2
* `-p, --print`
Print index in JSON format to stdout.
* `--script-file`
Path to user script. See [Scripting](scripting/README.md).
Path to user script. See [Scripting](scripting.md).
* `--mappings-file`
Path to custom Elasticsearch mappings. If none is specified, [the bundled mappings](https://github.com/simon987/sist2/tree/master/schema) will be used.
* `--settings-file`
Path to custom Elasticsearch settings. *(See above)*
* `--async-script`
Use `wait_for_completion=false` elasticsearch option while executing user script.
(See [Elasticsearch documentation](https://www.elastic.co/guide/en/elasticsearch/reference/current/tasks.html))
* `--batch-size=<int>`
Index batch size. Indexing is generally faster with larger batches, but payloads that
are too large will fail and additional overhead for retrying with smaller sizes may slow
down the process.
* `-f, --force-reset`
Reset Elasticsearch mappings and settings.
**(You must use this option the first time you use the index command)**.
### Index examples
@@ -257,8 +297,12 @@ sist2 index --print ./my_index/ | jq | less
### Web options
* `--es-url=<str>` Elasticsearch url.
* `--es-index`
Elasticsearch index name. DEFAULT=sist2
* `--bind=<str>` Listen on this address.
* `--auth=<str>` Basic auth in user:password format
* `--tag-auth=<str>` Basic auth in user:password format. Works the same way as the
`--auth` argument, but authentication is only applied the `/tag/` endpoint.
### Web examples
@@ -286,3 +330,74 @@ Both the `root` and `rewrite_url` fields are safe to manually modify from the
To link to specific indices, you can add a list of comma-separated index name to
the URL: `?i=<name>,<name>`. By default, indices with `"(nsfw)"` in their name are
not displayed.
## exec-script
The `exec-script` command is used to execute a user script for an index that has already been imported to Elasticsearch with the `index` command. Note that the documents will not be reset to their default state before each execution as the `index` command does: if you make undesired changes to the documents by accident, you will need to run `index` again to revert to the original state.
# Tagging
### Manual tagging
You can modify tags of individual documents directly from the
`web` interface. Note that you can setup authentication for this feature
with the `--tag-auth` option (See [web options](#web-options))
![manual_tag](manual_tag.png)
Tags that are manually added are saved both in the
index folder (in `/tags/`) and in Elasticsearch*. When re-`index`ing,
they are read from the index and automatically applied.
You can safely copy the `/tags/` database to another index.
See [Automatic tagging](#automatic-tagging) for information about tag
hierarchies and tag colors.
\* *It can take a few seconds to take effect in new search queries, and the page needs
to be reloaded for the tags tab to update*
### Automatic tagging
See [scripting](scripting.md) documentation.
# Sidecar files
When scanning, sist2 will read metadata from `.s2meta` JSON files and overwrite the
original document's metadata. Sidecar metadata files will also work inside archives.
Sidecar files themselves are not saved in the index.
This feature is useful to leverage third-party applications such as speech-to-text or
OCR to add additional metadata to a file.
**Example**
```
~/Documents/
├── Video.mp4
└── Video.mp4.s2meta
```
The sidecar file must have exactly the same file path and the `.s2meta` suffix.
`Video.mp4.s2meta`:
```json
{
"content": "This sidecar file will overwrite some metadata fields of Video.mp4",
"author": "Some author",
"duration": 12345,
"bitrate": 67890,
"some_arbitrary_field": [1,2,3]
}
```
```
sist2 scan ~/Documents -o ./docs.idx
sist2 index ./docs.idx
```
*NOTE*: It is technically possible to overwrite the `tag` value using sidecar files, however,
it is not currently possible to restore both manual tags and sidecar tags without user scripts
while reindexing.

BIN
docs/manual_tag.png Normal file

Binary file not shown.

After

Width:  |  Height:  |  Size: 3.9 KiB

View File

@@ -39,7 +39,7 @@ it adds the `genre.<genre>` tag.
ArrayList tags = ctx._source.tag = new ArrayList();
if (ctx._source?.genre != null) {
tags.add("genre." + ctx._source.genre.toLowerCase())
tags.add("genre." + ctx._source.genre.toLowerCase());
}
```
@@ -67,7 +67,7 @@ ArrayList tags = ctx._source.tag = new ArrayList();
Matcher m = /[\(\.+](20[0-9]{2})[\)\.+]/.matcher(ctx._source.name);
if (m.find()) {
tags.add("year." + m.group(1))
tags.add("year." + m.group(1));
}
```
@@ -111,16 +111,6 @@ if (ctx._source.path != "") {
}
```
Set the name of the last folder (`/path/to/<studio>/file.mp4`) to `studio.<studio>` tag
```Java
ArrayList tags = ctx._source.tag = new ArrayList();
if (ctx._source.path != "") {
String[] names = ctx._source.path.splitOnToken('/');
tags.add("studio." + names[names.length-1]);
}
```
Parse `EXIF:F Number` tag
```Java
if (ctx._source?.exif_fnumber != null) {

View File

@@ -10,6 +10,7 @@
"path": {
"type": "text",
"analyzer": "path_analyzer",
"copy_to": "suggest-path",
"fielddata": true,
"fields": {
"nGram": {
@@ -22,9 +23,17 @@
}
}
},
"suggest-path": {
"type": "completion",
"analyzer": "case_insensitive_kw_analyzer"
},
"mime": {
"type": "keyword"
},
"parent": {
"type": "keyword",
"index": false
},
"thumbnail": {
"type": "keyword",
"index": false
@@ -49,6 +58,10 @@
"type": "integer",
"index": false
},
"pages": {
"type": "integer",
"index": false
},
"mtime": {
"type": "integer"
},
@@ -121,7 +134,12 @@
}
},
"tag": {
"type": "keyword"
"type": "keyword",
"copy_to": "suggest-tag"
},
"suggest-tag": {
"type": "completion",
"analyzer": "case_insensitive_kw_analyzer"
},
"exif_make": {
"type": "text"

View File

@@ -1,7 +1,8 @@
{
"index": {
"refresh_interval": "30s",
"codec": "best_compression"
"codec": "best_compression",
"number_of_replicas": 0
},
"analysis": {
"tokenizer": {

View File

@@ -13,7 +13,7 @@ application/epub+zip, epub
application/freeloader, frl
application/futuresplash, spl
application/groupwise, vew
application/gzip, gz
application/gzip, gz|tgz
application/hta, hta
application/i-deas, unv
application/iges, iges|igs
@@ -111,7 +111,7 @@ application/x-dbf, dbf
application/x-dbt,
application/x-debian-package, deb
application/x-deepv, deepv
application/x-director, dcr|dir|dxr
application/x-director, dir|dxr
application/x-dmp, dmp
application/x-dosdriver,
application/x-dosexec, dll
@@ -347,7 +347,8 @@ text/javascript, js
text/mcf, mcf
text/pascal, pas
text/PGP,
text/plain, com|cmd|conf|def|g|idc|list|lst|mar|sdml|text|txt|md|groovy|license|properties|desktop|ini|rst|cmake|ipynb|readme|less|lo|go|yml|d|cs|hpp|srt|nfo|sfv|m3u|csv|eml
text/plain, com|cmd|conf|def|g|idc|list|lst|mar|sdml|text|txt|md|groovy|license|properties|desktop|ini|rst|cmake|ipynb|readme|less|lo|go|yml|d|cs|hpp|srt|nfo|sfv|m3u|csv|eml|make|log|markdown|yaml
application/vnd.coffeescript, coffee
text/richtext, rt|rtf|rtx
text/rtf,
text/scriplet, wsc
@@ -429,4 +430,23 @@ video/x-qtc, qtc
video/x-sgi-movie, movie|mv
x-epoc/x-sisx-app,
application/x-zstd-dictionary,
application/vnd.ms-outlook,
application/vnd.ms-outlook, msg
image/x-olympus-orf, orf
image/x-nikon-nef, nef
image/x-fuji-raf, raf
image/x-panasonic-raw, rw2|raw
image/x-adobe-dng, dng
image/x-canon-cr2, cr2
image/x-canon-crw, crw
image/x-dcraw,
image/x-kodak-dcr, dcr
image/x-kodak-k25, k25
image/x-kodak-kdc, kdc
image/x-minolta-mrw, mrw
image/x-pentax-pef, pef
image/x-sigma-x3f, xf3
image/x-sony-arw, arw
image/x-sony-sr2, sr2
image/x-sony-srf, srf
image/x-epson-erf, erf
sist2/sidecar, s2meta
1 application/arj arj
13 application/freeloader frl
14 application/futuresplash spl
15 application/groupwise vew
16 application/gzip gz gz|tgz
17 application/hta hta
18 application/i-deas unv
19 application/iges iges|igs
111 application/x-dbt
112 application/x-debian-package deb
113 application/x-deepv deepv
114 application/x-director dcr|dir|dxr dir|dxr
115 application/x-dmp dmp
116 application/x-dosdriver
117 application/x-dosexec dll
347 text/mcf mcf
348 text/pascal pas
349 text/PGP
350 text/plain com|cmd|conf|def|g|idc|list|lst|mar|sdml|text|txt|md|groovy|license|properties|desktop|ini|rst|cmake|ipynb|readme|less|lo|go|yml|d|cs|hpp|srt|nfo|sfv|m3u|csv|eml com|cmd|conf|def|g|idc|list|lst|mar|sdml|text|txt|md|groovy|license|properties|desktop|ini|rst|cmake|ipynb|readme|less|lo|go|yml|d|cs|hpp|srt|nfo|sfv|m3u|csv|eml|make|log|markdown|yaml
351 application/vnd.coffeescript coffee
352 text/richtext rt|rtf|rtx
353 text/rtf
354 text/scriplet wsc
430 video/x-sgi-movie movie|mv
431 x-epoc/x-sisx-app
432 application/x-zstd-dictionary
433 application/vnd.ms-outlook msg
434 image/x-olympus-orf orf
435 image/x-nikon-nef nef
436 image/x-fuji-raf raf
437 image/x-panasonic-raw rw2|raw
438 image/x-adobe-dng dng
439 image/x-canon-cr2 cr2
440 image/x-canon-crw crw
441 image/x-dcraw
442 image/x-kodak-dcr dcr
443 image/x-kodak-k25 k25
444 image/x-kodak-kdc kdc
445 image/x-minolta-mrw mrw
446 image/x-pentax-pef pef
447 image/x-sigma-x3f xf3
448 image/x-sony-arw arw
449 image/x-sony-sr2 sr2
450 image/x-sony-srf srf
451 image/x-epson-erf erf
452 sist2/sidecar s2meta

View File

@@ -3,6 +3,7 @@ noparse = set()
ext_in_hash = set()
major_mime = {
"sist2": 0,
"model": 1,
"example": 2,
"message": 3,
@@ -18,7 +19,6 @@ major_mime = {
pdf = (
"application/pdf",
"application/x-cbz",
"application/epub+zip",
"application/vnd.ms-xpsdocument",
)
@@ -73,6 +73,29 @@ markup = (
"text/x-sgml"
)
raw = (
"image/x-olympus-orf",
"image/x-nikon-nef",
"image/x-fuji-raf",
"image/x-panasonic-raw",
"image/x-adobe-dng",
"image/x-canon-cr2",
"image/x-canon-crw",
"image/x-dcraw",
"image/x-kodak-dcr",
"image/x-kodak-k25",
"image/x-kodak-kdc",
"image/x-minolta-mrw",
"image/x-pentax-pef",
"image/x-sigma-x3f",
"image/x-sony-arw",
"image/x-sony-sr2",
"image/x-sony-srf",
"image/x-minolta-mrw",
"image/x-pentax-pef",
"image/x-epson-erf",
)
cnt = 1
@@ -97,8 +120,14 @@ def mime_id(mime):
mime_id += " | 0x02000000"
elif mime in markup:
mime_id += " | 0x01000000"
elif mime in raw:
mime_id += " | 0x00800000"
elif mime == "application/x-empty":
cnt -= 1
return "1"
elif mime == "sist2/sidecar":
cnt -= 1
return "2"
return mime_id

176
src/cli.c
View File

@@ -9,12 +9,15 @@
#define DEFAULT_REWRITE_URL ""
#define DEFAULT_ES_URL "http://localhost:9200"
#define DEFAULT_ES_INDEX "sist2"
#define DEFAULT_BATCH_SIZE 100
#define DEFAULT_LISTEN_ADDRESS "localhost:4090"
#define DEFAULT_TREEMAP_THRESHOLD 0.0005
const char* TESS_DATAPATHS[] = {
#define DEFAULT_MAX_MEM_BUFFER 2000
const char *TESS_DATAPATHS[] = {
"/usr/share/tessdata/",
"/usr/share/tesseract-ocr/tessdata/",
"./",
@@ -30,10 +33,18 @@ scan_args_t *scan_args_create() {
return args;
}
exec_args_t *exec_args_create() {
exec_args_t *args = calloc(sizeof(exec_args_t), 1);
return args;
}
void scan_args_destroy(scan_args_t *args) {
if (args->name != NULL) {
free(args->name);
}
if (args->incremental != NULL) {
free(args->incremental);
}
if (args->path != NULL) {
free(args->path);
}
@@ -45,6 +56,12 @@ void scan_args_destroy(scan_args_t *args) {
void index_args_destroy(index_args_t *args) {
//todo
if (args->es_mappings_path) {
free(args->es_mappings);
}
if (args->es_settings_path) {
free(args->es_settings);
}
free(args);
}
@@ -53,6 +70,10 @@ void web_args_destroy(web_args_t *args) {
free(args);
}
void exec_args_destroy(exec_args_t *args) {
free(args);
}
int scan_args_validate(scan_args_t *args, int argc, const char **argv) {
if (argc < 2) {
fprintf(stderr, "Required positional argument: PATH.\n");
@@ -68,7 +89,7 @@ int scan_args_validate(scan_args_t *args, int argc, const char **argv) {
}
if (args->incremental != NULL) {
abs_path = abspath(args->incremental);
args->incremental = abspath(args->incremental);
if (abs_path == NULL) {
sist_log("main.c", SIST_WARNING, "Could not open original index! Disabled incremental scan feature.");
args->incremental = NULL;
@@ -113,7 +134,7 @@ int scan_args_validate(scan_args_t *args, int argc, const char **argv) {
return 1;
}
if (args->depth < 0) {
if (args->depth <= 0) {
args->depth = G_MAXINT32;
} else {
args->depth += 1;
@@ -121,6 +142,10 @@ int scan_args_validate(scan_args_t *args, int argc, const char **argv) {
if (args->name == NULL) {
args->name = g_path_get_basename(args->output);
} else {
char* tmp = malloc(strlen(args->name) + 1);
strcpy(tmp, args->name);
args->name = tmp;
}
if (args->rewrite_url == NULL) {
@@ -145,7 +170,7 @@ int scan_args_validate(scan_args_t *args, int argc, const char **argv) {
char filename[128];
sprintf(filename, "%s.traineddata", args->tesseract_lang);
const char * path = find_file_in_paths(TESS_DATAPATHS, filename);
const char *path = find_file_in_paths(TESS_DATAPATHS, filename);
if (path == NULL) {
LOG_FATAL("cli.c", "Could not find tesseract language file!");
}
@@ -187,6 +212,10 @@ int scan_args_validate(scan_args_t *args, int argc, const char **argv) {
args->treemap_threshold = atof(args->treemap_threshold_str);
}
if (args->max_memory_buffer == 0) {
args->max_memory_buffer = DEFAULT_MAX_MEM_BUFFER;
}
LOG_DEBUGF("cli.c", "arg quality=%f", args->quality)
LOG_DEBUGF("cli.c", "arg size=%d", args->size)
LOG_DEBUGF("cli.c", "arg content_size=%d", args->content_size)
@@ -203,6 +232,35 @@ int scan_args_validate(scan_args_t *args, int argc, const char **argv) {
LOG_DEBUGF("cli.c", "arg exclude=%s", args->exclude_regex)
LOG_DEBUGF("cli.c", "arg fast=%d", args->fast)
LOG_DEBUGF("cli.c", "arg treemap_threshold=%f", args->treemap_threshold)
LOG_DEBUGF("cli.c", "arg max_memory_buffer=%d", args->max_memory_buffer)
return 0;
}
int load_external_file(const char *file_path, char **dst) {
struct stat info;
int res = stat(file_path, &info);
if (res == -1) {
LOG_ERRORF("cli.c", "Error opening file '%s': %s\n", file_path, strerror(errno))
return 1;
}
int fd = open(file_path, O_RDONLY);
if (fd == -1) {
LOG_ERRORF("cli.c", "Error opening file '%s': %s\n", file_path, strerror(errno))
return 1;
}
*dst = malloc(info.st_size + 1);
res = read(fd, *dst, info.st_size);
if (res < 0) {
LOG_ERRORF("cli.c", "Error reading file '%s': %s\n", file_path, strerror(errno))
return 1;
}
*(*dst + info.st_size) = '\0';
close(fd);
return 0;
}
@@ -216,6 +274,13 @@ int index_args_validate(index_args_t *args, int argc, const char **argv) {
return 1;
}
if (args->threads == 0) {
args->threads = 1;
} else if (args->threads < 0) {
fprintf(stderr, "Invalid threads: %d\n", args->threads);
return 1;
}
char *index_path = abspath(argv[1]);
if (index_path == NULL) {
fprintf(stderr, "File not found: %s\n", argv[1]);
@@ -229,30 +294,26 @@ int index_args_validate(index_args_t *args, int argc, const char **argv) {
args->es_url = DEFAULT_ES_URL;
}
if (args->es_index == NULL) {
args->es_index = DEFAULT_ES_INDEX;
}
if (args->script_path != NULL) {
struct stat info;
int res = stat(args->script_path, &info);
if (res == -1) {
fprintf(stderr, "Error opening script file '%s': %s\n", args->script_path, strerror(errno));
if (load_external_file(args->script_path, &args->script) != 0) {
return 1;
}
}
int fd = open(args->script_path, O_RDONLY);
if (fd == -1) {
fprintf(stderr, "Error opening script file '%s': %s\n", args->script_path, strerror(errno));
if (args->es_settings_path != NULL) {
if (load_external_file(args->es_settings_path, &args->es_settings) != 0) {
return 1;
}
}
args->script = malloc(info.st_size + 1);
res = read(fd, args->script, info.st_size);
if (res < 0) {
fprintf(stderr, "Error reading script file '%s': %s\n", args->script_path, strerror(errno));
if (args->es_mappings_path != NULL) {
if (load_external_file(args->es_mappings_path, &args->es_mappings) != 0) {
return 1;
}
*(args->script + info.st_size) = '\0';
close(fd);
}
if (args->batch_size == 0) {
@@ -260,10 +321,16 @@ int index_args_validate(index_args_t *args, int argc, const char **argv) {
}
LOG_DEBUGF("cli.c", "arg es_url=%s", args->es_url)
LOG_DEBUGF("cli.c", "arg es_index=%s", args->es_index)
LOG_DEBUGF("cli.c", "arg index_path=%s", args->index_path)
LOG_DEBUGF("cli.c", "arg script_path=%s", args->script_path)
LOG_DEBUGF("cli.c", "arg async_script=%s", args->async_script)
LOG_DEBUGF("cli.c", "arg script=%s", args->script)
LOG_DEBUGF("cli.c", "arg print=%d", args->print)
LOG_DEBUGF("cli.c", "arg es_mappings_path=%s", args->es_mappings_path)
LOG_DEBUGF("cli.c", "arg es_mappings=%s", args->es_mappings)
LOG_DEBUGF("cli.c", "arg es_settings_path=%s", args->es_settings_path)
LOG_DEBUGF("cli.c", "arg es_settings=%s", args->es_settings)
LOG_DEBUGF("cli.c", "arg batch_size=%d", args->batch_size)
LOG_DEBUGF("cli.c", "arg force_reset=%d", args->force_reset)
@@ -287,15 +354,19 @@ int web_args_validate(web_args_t *args, int argc, const char **argv) {
args->listen_address = DEFAULT_LISTEN_ADDRESS;
}
if (args->es_index == NULL) {
args->es_index = DEFAULT_ES_INDEX;
}
if (args->credentials != NULL) {
char * ptr = strstr(args->credentials, ":");
char *ptr = strstr(args->credentials, ":");
if (ptr == NULL) {
fprintf(stderr, "Invalid --auth format, see usage\n");
return 1;
}
strncpy(args->auth_user, args->credentials, (ptr - args->credentials));
strncpy(args->auth_pass, ptr + 1, strlen(ptr + 1));
strcpy(args->auth_pass, ptr + 1);
if (strlen(args->auth_user) == 0) {
fprintf(stderr, "--auth username must be at least one character long");
@@ -307,6 +378,31 @@ int web_args_validate(web_args_t *args, int argc, const char **argv) {
args->auth_enabled = FALSE;
}
if (args->tag_credentials != NULL && args->credentials != NULL) {
fprintf(stderr, "--auth and --tag-auth are mutually exclusive");
return 1;
}
if (args->tag_credentials != NULL) {
char *ptr = strstr(args->tag_credentials, ":");
if (ptr == NULL) {
fprintf(stderr, "Invalid --tag-auth format, see usage\n");
return 1;
}
strncpy(args->auth_user, args->tag_credentials, (ptr - args->tag_credentials));
strcpy(args->auth_pass, ptr + 1);
if (strlen(args->auth_user) == 0) {
fprintf(stderr, "--tag-auth username must be at least one character long");
return 1;
}
args->tag_auth_enabled = TRUE;
} else {
args->tag_auth_enabled = FALSE;
}
args->index_count = argc - 1;
args->indices = argv + 1;
@@ -319,8 +415,10 @@ int web_args_validate(web_args_t *args, int argc, const char **argv) {
}
LOG_DEBUGF("cli.c", "arg es_url=%s", args->es_url)
LOG_DEBUGF("cli.c", "arg es_index=%s", args->es_index)
LOG_DEBUGF("cli.c", "arg listen=%s", args->listen_address)
LOG_DEBUGF("cli.c", "arg credentials=%s", args->credentials)
LOG_DEBUGF("cli.c", "arg tag_credentials=%s", args->tag_credentials)
LOG_DEBUGF("cli.c", "arg auth_user=%s", args->auth_user)
LOG_DEBUGF("cli.c", "arg auth_pass=%s", args->auth_pass)
LOG_DEBUGF("cli.c", "arg index_count=%d", args->index_count)
@@ -341,3 +439,39 @@ web_args_t *web_args_create() {
return args;
}
int exec_args_validate(exec_args_t *args, int argc, const char **argv) {
if (argc < 2) {
fprintf(stderr, "Required positional argument: PATH.\n");
return 1;
}
char *index_path = abspath(argv[1]);
if (index_path == NULL) {
fprintf(stderr, "File not found: %s\n", argv[1]);
return 1;
} else {
args->index_path = argv[1];
free(index_path);
}
if (args->es_url == NULL) {
args->es_url = DEFAULT_ES_URL;
}
if (args->es_index == NULL) {
args->es_index = DEFAULT_ES_INDEX;
}
if (args->script_path == NULL) {
LOG_FATAL("cli.c", "--script-file argument is required");
}
if (load_external_file(args->script_path, &args->script) != 0) {
return 1;
}
LOG_DEBUGF("cli.c", "arg script_path=%s", args->script_path)
LOG_DEBUGF("cli.c", "arg script=%s", args->script)
return 0;
}

View File

@@ -24,6 +24,7 @@ typedef struct scan_args {
int fast;
const char* treemap_threshold_str;
double treemap_threshold;
int max_memory_buffer;
} scan_args_t;
scan_args_t *scan_args_create();
@@ -34,25 +35,44 @@ int scan_args_validate(scan_args_t *args, int argc, const char **argv);
typedef struct index_args {
char *es_url;
char *es_index;
const char *index_path;
const char *script_path;
char *script;
const char *es_settings_path;
char *es_settings;
const char *es_mappings_path;
char *es_mappings;
int print;
int batch_size;
int async_script;
int force_reset;
int threads;
} index_args_t;
typedef struct web_args {
char *es_url;
char *es_index;
char *listen_address;
char *credentials;
char *tag_credentials;
char auth_user[256];
char auth_pass[256];
int auth_enabled;
int tag_auth_enabled;
int index_count;
const char **indices;
} web_args_t;
typedef struct exec_args {
char *es_url;
char *es_index;
const char *index_path;
const char *script_path;
int async_script;
char *script;
} exec_args_t;
index_args_t *index_args_create();
void index_args_destroy(index_args_t *args);
@@ -65,4 +85,10 @@ int index_args_validate(index_args_t *args, int argc, const char **argv);
int web_args_validate(web_args_t *args, int argc, const char **argv);
exec_args_t *exec_args_create();
void exec_args_destroy(exec_args_t *args);
int exec_args_validate(exec_args_t *args, int argc, const char **argv);
#endif

6
src/ctx.c Normal file
View File

@@ -0,0 +1,6 @@
#include "ctx.h"
ScanCtx_t ScanCtx;
WebCtx_t WebCtx;
IndexCtx_t IndexCtx;
LogCtx_t LogCtx;

View File

@@ -5,19 +5,21 @@
#include "tpool.h"
#include "libscan/scan.h"
#include "libscan/arc/arc.h"
#include "libscan/cbr/cbr.h"
#include "libscan/comic/comic.h"
#include "libscan/ebook/ebook.h"
#include "libscan/font/font.h"
#include "libscan/media/media.h"
#include "libscan/ooxml/ooxml.h"
#include "libscan/text/text.h"
#include "libscan/mobi/scan_mobi.h"
#include "libscan/raw/raw.h"
#include "libscan/msdoc/msdoc.h"
#include "src/io/store.h"
#include <glib.h>
#include <pcre.h>
//TODO Move to individual scan ctx
struct {
typedef struct {
struct index_t index;
GHashTable *mime_table;
@@ -39,34 +41,49 @@ struct {
int fast;
scan_arc_ctx_t arc_ctx;
scan_cbr_ctx_t cbr_ctx;
scan_comic_ctx_t comic_ctx;
scan_ebook_ctx_t ebook_ctx;
scan_font_ctx_t font_ctx;
scan_media_ctx_t media_ctx;
scan_ooxml_ctx_t ooxml_ctx;
scan_text_ctx_t text_ctx;
scan_mobi_ctx_t mobi_ctx;
} ScanCtx;
scan_raw_ctx_t raw_ctx;
scan_msdoc_ctx_t msdoc_ctx;
} ScanCtx_t;
struct {
typedef struct {
int verbose;
int very_verbose;
int no_color;
} LogCtx;
} LogCtx_t;
struct {
typedef struct {
char *es_url;
char *es_index;
int batch_size;
} IndexCtx;
tpool_t *pool;
store_t *tag_store;
GHashTable *tags;
store_t *meta_store;
GHashTable *meta;
} IndexCtx_t;
struct {
typedef struct {
char *es_url;
char *es_index;
int index_count;
char *auth_user;
char *auth_pass;
int auth_enabled;
struct index_t indices[16];
} WebCtx;
int tag_auth_enabled;
struct index_t indices[64];
} WebCtx_t;
extern ScanCtx_t ScanCtx;
extern WebCtx_t WebCtx;
extern IndexCtx_t IndexCtx;
extern LogCtx_t LogCtx;
#endif

View File

@@ -9,21 +9,33 @@
typedef struct es_indexer {
int queued;
char *es_url;
char *es_index;
es_bulk_line_t *line_head;
es_bulk_line_t *line_tail;
} es_indexer_t;
static es_indexer_t *Indexer;
static __thread es_indexer_t *Indexer;
void delete_queue(int max);
void print_json(cJSON *document, const char uuid_str[UUID_STR_LEN]) {
void elastic_flush();
void elastic_cleanup() {
elastic_flush();
if (Indexer != NULL) {
free(Indexer->es_index);
free(Indexer->es_url);
free(Indexer);
}
}
void print_json(cJSON *document, const char id_str[MD5_STR_LENGTH]) {
cJSON *line = cJSON_CreateObject();
cJSON_AddStringToObject(line, "_id", uuid_str);
cJSON_AddStringToObject(line, "_index", "sist2");
cJSON_AddStringToObject(line, "_id", id_str);
cJSON_AddStringToObject(line, "_index", IndexCtx.es_index);
cJSON_AddStringToObject(line, "_type", "_doc");
cJSON_AddItemReferenceToObject(line, "_source", document);
@@ -35,23 +47,31 @@ void print_json(cJSON *document, const char uuid_str[UUID_STR_LEN]) {
cJSON_Delete(line);
}
void index_json(cJSON *document, const char uuid_str[UUID_STR_LEN]) {
void index_json_func(void *arg) {
es_bulk_line_t *line = arg;
elastic_index_line(line);
}
void index_json(cJSON *document, const char index_id_str[MD5_STR_LENGTH]) {
char *json = cJSON_PrintUnformatted(document);
size_t json_len = strlen(json);
es_bulk_line_t *bulk_line = malloc(sizeof(es_bulk_line_t) + json_len + 2);
memcpy(bulk_line->line, json, json_len);
memcpy(bulk_line->uuid_str, uuid_str, UUID_STR_LEN);
memcpy(bulk_line->path_md5_str, index_id_str, MD5_STR_LENGTH);
*(bulk_line->line + json_len) = '\n';
*(bulk_line->line + json_len + 1) = '\0';
bulk_line->next = NULL;
cJSON_free(json);
elastic_index_line(bulk_line);
tpool_add_work(IndexCtx.pool, index_json_func, bulk_line);
}
void execute_update_script(const char *script, const char index_id[UUID_STR_LEN]) {
void execute_update_script(const char *script, int async, const char index_id[MD5_STR_LENGTH]) {
if (Indexer == NULL) {
Indexer = create_indexer(IndexCtx.es_url, IndexCtx.es_index);
}
cJSON *body = cJSON_CreateObject();
cJSON *script_obj = cJSON_AddObjectToObject(body, "script");
@@ -65,9 +85,16 @@ void execute_update_script(const char *script, const char index_id[UUID_STR_LEN]
char *str = cJSON_Print(body);
char bulk_url[4096];
snprintf(bulk_url, 4096, "%s/sist2/_update_by_query?pretty", Indexer->es_url);
if (async) {
snprintf(bulk_url, sizeof(bulk_url), "%s/%s/_update_by_query?wait_for_completion=false", Indexer->es_url,
Indexer->es_index);
} else {
snprintf(bulk_url, sizeof(bulk_url), "%s/%s/_update_by_query", Indexer->es_url, Indexer->es_index);
}
response_t *r = web_post(bulk_url, str);
LOG_INFOF("elastic.c", "Executed user script <%d>", r->status_code);
if (!async) {
LOG_INFOF("elastic.c", "Executed user script <%d>", r->status_code);
}
cJSON *resp = cJSON_Parse(r->body);
cJSON_free(str);
@@ -82,6 +109,11 @@ void execute_update_script(const char *script, const char index_id[UUID_STR_LEN]
cJSON_free(error_str);
}
if (async) {
cJSON *task = cJSON_GetObjectItem(resp, "task");
LOG_INFOF("elastic.c", "User script queued: %s/_tasks/%s", Indexer->es_url, task->valuestring);
}
cJSON_Delete(resp);
}
@@ -91,16 +123,25 @@ void *create_bulk_buffer(int max, int *count, size_t *buf_len) {
size_t buf_size = 0;
size_t buf_cur = 0;
char *buf = malloc(1);
char *buf = malloc(8192);
size_t buf_capacity = 8192;
while (line != NULL && *count < max) {
char action_str[512];
snprintf(action_str, 512,
"{\"index\":{\"_id\":\"%s\", \"_type\":\"_doc\", \"_index\":\"sist2\"}}\n", line->uuid_str);
size_t action_str_len = strlen(action_str);
char action_str[256];
snprintf(
action_str, sizeof(action_str),
"{\"index\":{\"_id\":\"%s\",\"_type\":\"_doc\",\"_index\":\"%s\"}}\n",
line->path_md5_str, Indexer->es_index
);
size_t action_str_len = strlen(action_str);
size_t line_len = strlen(line->line);
buf = realloc(buf, buf_size + line_len + action_str_len);
while (buf_size + line_len + action_str_len > buf_capacity) {
buf_capacity *= 2;
buf = realloc(buf, buf_capacity);
}
buf_size += line_len + action_str_len;
memcpy(buf + buf_cur, action_str, action_str_len);
@@ -111,7 +152,11 @@ void *create_bulk_buffer(int max, int *count, size_t *buf_len) {
line = line->next;
(*count)++;
}
buf = realloc(buf, buf_size + 1);
if (buf_size + 1 > buf_capacity) {
buf = realloc(buf, buf_capacity + 1);
}
*(buf + buf_cur) = '\0';
*buf_len = buf_cur;
@@ -119,7 +164,7 @@ void *create_bulk_buffer(int max, int *count, size_t *buf_len) {
}
void print_errors(response_t *r) {
char * tmp = malloc(r->size + 1);
char *tmp = malloc(r->size + 1);
memcpy(tmp, r->body, r->size);
*(tmp + r->size) = '\0';
@@ -138,6 +183,21 @@ void print_errors(response_t *r) {
free(tmp);
}
void print_error(response_t *r) {
char *tmp = malloc(r->size + 1);
memcpy(tmp, r->body, r->size);
*(tmp + r->size) = '\0';
cJSON *ret_json = cJSON_Parse(tmp);
if (cJSON_GetObjectItem(ret_json, "error") != NULL) {
char *str = cJSON_Print(cJSON_GetObjectItem(ret_json, "error"));
LOG_ERRORF("elastic.c", "%s\n", str);
cJSON_free(str);
}
cJSON_Delete(ret_json);
free(tmp);
}
void _elastic_flush(int max) {
if (max == 0) {
@@ -150,7 +210,7 @@ void _elastic_flush(int max) {
void *buf = create_bulk_buffer(max, &count, &buf_len);
char bulk_url[4096];
snprintf(bulk_url, 4096, "%s/sist2/_bulk?pipeline=tie", Indexer->es_url);
snprintf(bulk_url, sizeof(bulk_url), "%s/%s/_bulk?pipeline=tie", Indexer->es_url, Indexer->es_index);
response_t *r = web_post(bulk_url, buf);
if (r->status_code == 0) {
@@ -160,7 +220,7 @@ void _elastic_flush(int max) {
if (r->status_code == 413) {
if (max <= 1) {
LOG_ERRORF("elastic.c", "Single document too large, giving up: {%s}", Indexer->line_head->uuid_str)
LOG_ERRORF("elastic.c", "Single document too large, giving up: {%s}", Indexer->line_head->path_md5_str)
free_response(r);
free(buf);
delete_queue(1);
@@ -177,6 +237,15 @@ void _elastic_flush(int max) {
_elastic_flush(max / 2);
return;
} else if (r->status_code == 429) {
free_response(r);
free(buf);
LOG_WARNING("elastic.c", "Got 429 status, will retry after delay")
usleep(1000000 * 20);
_elastic_flush(max);
return;
} else if (r->status_code != 200) {
print_errors(r);
delete_queue(Indexer->queued);
@@ -202,9 +271,8 @@ void delete_queue(int max) {
Indexer->line_head = tmp->next;
if (Indexer->line_head == NULL) {
Indexer->line_tail = NULL;
} else {
free(tmp);
}
free(tmp);
Indexer->queued -= 1;
}
}
@@ -212,7 +280,7 @@ void delete_queue(int max) {
void elastic_flush() {
if (Indexer == NULL) {
Indexer = create_indexer(IndexCtx.es_url);
Indexer = create_indexer(IndexCtx.es_url, IndexCtx.es_index);
}
_elastic_flush(Indexer->queued);
@@ -221,7 +289,7 @@ void elastic_flush() {
void elastic_index_line(es_bulk_line_t *line) {
if (Indexer == NULL) {
Indexer = create_indexer(IndexCtx.es_url);
Indexer = create_indexer(IndexCtx.es_url, IndexCtx.es_index);
}
if (Indexer->line_head == NULL) {
@@ -239,14 +307,18 @@ void elastic_index_line(es_bulk_line_t *line) {
}
}
es_indexer_t *create_indexer(const char *url) {
es_indexer_t *create_indexer(const char *url, const char *index) {
char *es_url = malloc(strlen(url) + 1);
strcpy(es_url, url);
char *es_index = malloc(strlen(index) + 1);
strcpy(es_index, index);
es_indexer_t *indexer = malloc(sizeof(es_indexer_t));
indexer->es_url = es_url;
indexer->es_index = es_index;
indexer->queued = 0;
indexer->line_head = NULL;
indexer->line_tail = NULL;
@@ -254,41 +326,42 @@ es_indexer_t *create_indexer(const char *url) {
return indexer;
}
void destroy_indexer(char *script, char index_id[UUID_STR_LEN]) {
void finish_indexer(char *script, int async_script, char *index_id) {
char url[4096];
snprintf(url, sizeof(url), "%s/sist2/_refresh", IndexCtx.es_url);
snprintf(url, sizeof(url), "%s/%s/_refresh", IndexCtx.es_url, IndexCtx.es_index);
response_t *r = web_post(url, "");
LOG_INFOF("elastic.c", "Refresh index <%d>", r->status_code);
free_response(r);
if (script != NULL) {
execute_update_script(script, index_id);
execute_update_script(script, async_script, index_id);
free(script);
snprintf(url, sizeof(url), "%s/%s/_refresh", IndexCtx.es_url, IndexCtx.es_index);
r = web_post(url, "");
LOG_INFOF("elastic.c", "Refresh index <%d>", r->status_code);
free_response(r);
}
snprintf(url, sizeof(url), "%s/sist2/_refresh", IndexCtx.es_url);
r = web_post(url, "");
LOG_INFOF("elastic.c", "Refresh index <%d>", r->status_code);
free_response(r);
snprintf(url, sizeof(url), "%s/sist2/_forcemerge", IndexCtx.es_url);
snprintf(url, sizeof(url), "%s/%s/_forcemerge", IndexCtx.es_url, IndexCtx.es_index);
r = web_post(url, "");
LOG_INFOF("elastic.c", "Merge index <%d>", r->status_code);
free_response(r);
if (Indexer != NULL) {
free(Indexer->es_url);
free(Indexer);
}
snprintf(url, sizeof(url), "%s/%s/_settings", IndexCtx.es_url, IndexCtx.es_index);
r = web_put(url, "{\"index\":{\"refresh_interval\":\"1s\"}}");
LOG_INFOF("elastic.c", "Set refresh interval <%d>", r->status_code);
free_response(r);
}
void elastic_init(int force_reset) {
void elastic_init(int force_reset, const char* user_mappings, const char* user_settings) {
// Check if index exists
char url[4096];
snprintf(url, 4096, "%s/sist2", IndexCtx.es_url);
response_t *r = web_get(url);
snprintf(url, sizeof(url), "%s/%s", IndexCtx.es_url, IndexCtx.es_index);
response_t *r = web_get(url, 30);
int index_exists = r->status_code == 200;
free_response(r);
@@ -297,46 +370,56 @@ void elastic_init(int force_reset) {
LOG_INFOF("elastic.c", "Delete index <%d>", r->status_code);
free_response(r);
snprintf(url, 4096, "%s/sist2", IndexCtx.es_url);
snprintf(url, sizeof(url), "%s/%s", IndexCtx.es_url, IndexCtx.es_index);
r = web_put(url, "");
if (r->status_code != 200) {
print_error(r);
LOG_FATAL("elastic.c", "Could not create index")
}
LOG_INFOF("elastic.c", "Create index <%d>", r->status_code);
free_response(r);
snprintf(url, 4096, "%s/sist2/_close", IndexCtx.es_url);
snprintf(url, sizeof(url), "%s/%s/_close", IndexCtx.es_url, IndexCtx.es_index);
r = web_post(url, "");
LOG_INFOF("elastic.c", "Close index <%d>", r->status_code);
free_response(r);
snprintf(url, 4096, "%s/_ingest/pipeline/tie", IndexCtx.es_url);
snprintf(url, sizeof(url), "%s/_ingest/pipeline/tie", IndexCtx.es_url);
r = web_put(url, pipeline_json);
LOG_INFOF("elastic.c", "Create pipeline <%d>", r->status_code);
free_response(r);
snprintf(url, 4096, "%s/sist2/_settings", IndexCtx.es_url);
r = web_put(url, settings_json);
LOG_INFOF("elastic.c", "Update settings <%d>", r->status_code);
snprintf(url, sizeof(url), "%s/%s/_settings", IndexCtx.es_url, IndexCtx.es_index);
r = web_put(url, user_settings ? user_settings : settings_json);
LOG_INFOF("elastic.c", "Update user_settings <%d>", r->status_code);
free_response(r);
snprintf(url, 4096, "%s/sist2/_mappings/_doc?include_type_name=true", IndexCtx.es_url);
r = web_put(url, mappings_json);
LOG_INFOF("elastic.c", "Update mappings <%d>", r->status_code);
snprintf(url, sizeof(url), "%s/%s/_mappings/_doc?include_type_name=true", IndexCtx.es_url, IndexCtx.es_index);
r = web_put(url, user_mappings ? user_mappings : mappings_json);
LOG_INFOF("elastic.c", "Update user_mappings <%d>", r->status_code);
free_response(r);
snprintf(url, 4096, "%s/sist2/_open", IndexCtx.es_url);
snprintf(url, sizeof(url), "%s/%s/_open", IndexCtx.es_url, IndexCtx.es_index);
r = web_post(url, "");
LOG_INFOF("elastic.c", "Open index <%d>", r->status_code);
free_response(r);
}
}
cJSON *elastic_get_document(const char *uuid_str) {
cJSON *elastic_get_document(const char *id_str) {
char url[4096];
snprintf(url, 4096, "%s/sist2/_doc/%s", WebCtx.es_url, uuid_str);
snprintf(url, sizeof(url), "%s/%s/_doc/%s", WebCtx.es_url, WebCtx.es_index, id_str);
response_t *r = web_get(url);
response_t *r = web_get(url, 3);
cJSON *json = NULL;
if (r->status_code == 200) {
json = cJSON_Parse(r->body);
char *tmp = malloc(r->size + 1);
memcpy(tmp, r->body, r->size);
*(tmp + r->size) = '\0';
json = cJSON_Parse(tmp);
free(tmp);
}
free_response(r);
return json;
@@ -344,21 +427,25 @@ cJSON *elastic_get_document(const char *uuid_str) {
char *elastic_get_status() {
char url[4096];
snprintf(url, 4096,
"%s/_cluster/state/metadata/sist2?filter_path=metadata.indices.*.state", WebCtx.es_url);
snprintf(url, sizeof(url),
"%s/_cluster/state/metadata/%s?filter_path=metadata.indices.*.state", WebCtx.es_url, WebCtx.es_index);
response_t *r = web_get(url);
response_t *r = web_get(url, 30);
cJSON *json = NULL;
char *status = malloc(128 * sizeof(char));
status[0] = '\0';
if (r->status_code == 200) {
json = cJSON_Parse(r->body);
char *tmp = malloc(r->size + 1);
memcpy(tmp, r->body, r->size);
*(tmp + r->size) = '\0';
json = cJSON_Parse(tmp);
free(tmp);
const cJSON *metadata = cJSON_GetObjectItem(json, "metadata");
if (metadata != NULL) {
const cJSON *indices = cJSON_GetObjectItem(metadata, "indices");
const cJSON *sist2 = cJSON_GetObjectItem(indices, "sist2");
const cJSON *state = cJSON_GetObjectItem(sist2, "state");
const cJSON *index = cJSON_GetObjectItem(indices, WebCtx.es_index);
const cJSON *state = cJSON_GetObjectItem(index, "state");
strcpy(status, state->valuestring);
}
}

View File

@@ -5,7 +5,7 @@
typedef struct es_bulk_line {
struct es_bulk_line *next;
char uuid_str[UUID_STR_LEN];
char path_md5_str[MD5_STR_LENGTH];
char line[0];
} es_bulk_line_t;
@@ -16,20 +16,21 @@ typedef struct es_indexer es_indexer_t;
void elastic_index_line(es_bulk_line_t *line);
void elastic_flush();
void print_json(cJSON *document, const char index_id_str[MD5_STR_LENGTH]);
void print_json(cJSON *document, const char uuid_str[UUID_STR_LEN]);
void index_json(cJSON *document, const char index_id_str[MD5_STR_LENGTH]);
void index_json(cJSON *document, const char uuid_str[UUID_STR_LEN]);
es_indexer_t *create_indexer(const char *url, const char *index);
es_indexer_t *create_indexer(const char* es_url);
void elastic_cleanup();
void finish_indexer(char *script, int async_script, char *index_id);
void destroy_indexer(char *script, char index_id[UUID_STR_LEN]);
void elastic_init(int force_reset, const char* user_mappings, const char* user_settings);
void elastic_init(int force_reset);
cJSON *elastic_get_document(const char *uuid_str);
cJSON *elastic_get_document(const char *id_str);
char *elastic_get_status();
void execute_update_script(const char *script, int async, const char index_id[MD5_STR_LENGTH]);
#endif

File diff suppressed because one or more lines are too long

View File

@@ -4,8 +4,17 @@
#include <mongoose.h>
#include <pthread.h>
#include <curl/curl.h>
size_t write_cb(char *ptr, size_t size, size_t nmemb, void *user_data) {
size_t real_size = size * nmemb;
dyn_buffer_t *buf = user_data;
dyn_buffer_write(buf, ptr, real_size);
return real_size;
}
void free_response(response_t *resp) {
if (resp->body != NULL) {
free(resp->body);
@@ -13,142 +22,204 @@ void free_response(response_t *resp) {
free(resp);
}
#define SIST2_HEADERS "User-Agent: sist2\r\nContent-Type: application/json\r\n"
void web_post_async_poll(subreq_ctx_t* req) {
fd_set fdread;
fd_set fdwrite;
fd_set fdexcep;
int maxfd = -1;
FD_ZERO(&fdread);
FD_ZERO(&fdwrite);
FD_ZERO(&fdexcep);
void http_req_ev(struct mg_connection *nc, int ev, void *ptr) {
CURLMcode mc = curl_multi_fdset(req->multi, &fdread, &fdwrite, &fdexcep, &maxfd);
http_ev_data_t *ev_data = (http_ev_data_t *) nc->user_data;
if(mc != CURLM_OK) {
req->done = TRUE;
return;
}
switch (ev) {
case MG_EV_CONNECT: {
int connect_status = *(int *) ptr;
if (connect_status != 0) {
ev_data->done = TRUE;
ev_data->resp->status_code = 0;
}
if (maxfd == -1) {
// no fds ready yet
return;
}
struct timeval timeout = {1, 0};
int rc = select(maxfd + 1, &fdread, &fdwrite, &fdexcep, &timeout);
switch(rc) {
case -1:
req->done = TRUE;
break;
}
case MG_EV_HTTP_REPLY: {
struct http_message *hm = (struct http_message *) ptr;
//TODO: Check errors?
ev_data->resp->size = hm->body.len;
ev_data->resp->status_code = hm->resp_code;
ev_data->resp->body = malloc(hm->body.len + 1);
memcpy(ev_data->resp->body, hm->body.p, hm->body.len);
*(ev_data->resp->body + hm->body.len) = '\0';
ev_data->done = TRUE;
case 0:
break;
}
case MG_EV_CLOSE: {
ev_data->done = TRUE;
break;
}
default:
curl_multi_perform(req->multi, &req->running_handles);
break;
}
}
subreq_ctx_t *http_req(const char *url, const char *extra_headers, const char *post_data, const char *method) {
if (req->running_handles == 0) {
req->done = TRUE;
req->response->body = req->response_buf.buf;
req->response->size = req->response_buf.cur;
curl_easy_getinfo(req->handle, CURLINFO_RESPONSE_CODE, &req->response->status_code);
struct mg_str scheme;
struct mg_str user_info;
struct mg_str host;
unsigned int port;
struct mg_str path;
struct mg_str query;
struct mg_str fragment;
if (post_data == NULL) post_data = "";
if (extra_headers == NULL) extra_headers = "";
if (path.len == 0) path = mg_mk_str("/");
if (host.len == 0) host = mg_mk_str("");
// [scheme://[user_info@]]host[:port][/path][?query][#fragment]
mg_parse_uri(mg_mk_str(url), &scheme, &user_info, &host, &port, &path, &query, &fragment);
if (query.len > 0) path.len += query.len + 1;
subreq_ctx_t *ctx = malloc(sizeof(subreq_ctx_t));
mg_mgr_init(&ctx->mgr, NULL);
char address[8196];
snprintf(address, sizeof(address), "tcp://%.*s:%u", (int) host.len, host.p, port);
struct mg_connection *nc = mg_connect(&ctx->mgr, address, http_req_ev);
nc->user_data = &ctx->ev_data;
mg_set_protocol_http_websocket(nc);
ctx->ev_data.resp = calloc(1, sizeof(response_t));
ctx->ev_data.done = FALSE;
mg_printf(
nc, "%s %.*s HTTP/1.1\r\n"
"Host: %.*s\r\n"
"Content-Length: %zu\r\n"
"%s\r\n"
"%s",
method, (int) path.len, path.p,
(int) (path.p - host.p), host.p,
strlen(post_data),
extra_headers,
post_data
);
return ctx;
}
response_t *web_get(const char *url) {
subreq_ctx_t *ctx = http_req(url, SIST2_HEADERS, NULL, "GET");
while (ctx->ev_data.done == FALSE) {
mg_mgr_poll(&ctx->mgr, 50);
curl_multi_cleanup(req->multi);
curl_easy_cleanup(req->handle);
curl_slist_free_all(req->headers);
return;
}
mg_mgr_free(&ctx->mgr);
response_t *ret = ctx->ev_data.resp;
free(ctx);
return ret;
}
subreq_ctx_t *web_post_async(const char *url, const char *data) {
return http_req(url, SIST2_HEADERS, data, "POST");
subreq_ctx_t *web_post_async(const char *url, char *data) {
subreq_ctx_t *req = calloc(1, sizeof(subreq_ctx_t));
req->response = calloc(1, sizeof(response_t));
req->data = data;
req->response_buf = dyn_buffer_create();
req->handle = curl_easy_init();
CURL *curl = req->handle;
curl_easy_setopt(curl, CURLOPT_URL, url);
curl_easy_setopt(curl, CURLOPT_WRITEDATA, (void *) (&req->response_buf));
curl_easy_setopt(curl, CURLOPT_WRITEFUNCTION, write_cb);
curl_easy_setopt(curl, CURLOPT_POST, 1);
curl_easy_setopt(curl, CURLOPT_USERAGENT, "sist2");
struct curl_slist *headers = NULL;
headers = curl_slist_append(headers, "Content-Type: application/json");
curl_easy_setopt(curl, CURLOPT_HTTPHEADER, headers);
curl_easy_setopt(curl, CURLOPT_POSTFIELDS, data);
req->multi = curl_multi_init();
curl_multi_add_handle(req->multi, curl);
curl_multi_perform(req->multi, &req->running_handles);
LOG_DEBUGF("web.c", "async request POST %s", url)
return req;
}
response_t *web_get(const char *url, int timeout) {
response_t *resp = malloc(sizeof(response_t));
CURL *curl;
dyn_buffer_t buffer = dyn_buffer_create();
curl = curl_easy_init();
curl_easy_setopt(curl, CURLOPT_URL, url);
curl_easy_setopt(curl, CURLOPT_WRITEDATA, (void *) (&buffer));
curl_easy_setopt(curl, CURLOPT_WRITEFUNCTION, write_cb);
curl_easy_setopt(curl, CURLOPT_USERAGENT, "sist2");
curl_easy_setopt(curl, CURLOPT_TIMEOUT, timeout);
struct curl_slist *headers = NULL;
headers = curl_slist_append(headers, "Content-Type: application/json");
curl_easy_setopt(curl, CURLOPT_HTTPHEADER, headers);
curl_easy_perform(curl);
curl_easy_getinfo(curl, CURLINFO_RESPONSE_CODE, &resp->status_code);
curl_easy_cleanup(curl);
curl_slist_free_all(headers);
resp->body = buffer.buf;
resp->size = buffer.cur;
return resp;
}
response_t *web_post(const char *url, const char *data) {
subreq_ctx_t *ctx = http_req(url, SIST2_HEADERS, data, "POST");
while (ctx->ev_data.done == FALSE) {
mg_mgr_poll(&ctx->mgr, 50);
}
mg_mgr_free(&ctx->mgr);
response_t *resp = malloc(sizeof(response_t));
response_t *ret = ctx->ev_data.resp;
free(ctx);
return ret;
CURL *curl;
dyn_buffer_t buffer = dyn_buffer_create();
curl = curl_easy_init();
curl_easy_setopt(curl, CURLOPT_URL, url);
curl_easy_setopt(curl, CURLOPT_WRITEDATA, (void *) (&buffer));
curl_easy_setopt(curl, CURLOPT_WRITEFUNCTION, write_cb);
curl_easy_setopt(curl, CURLOPT_POST, 1);
curl_easy_setopt(curl, CURLOPT_USERAGENT, "sist2");
struct curl_slist *headers = NULL;
headers = curl_slist_append(headers, "Content-Type: application/json");
curl_easy_setopt(curl, CURLOPT_HTTPHEADER, headers);
curl_easy_setopt(curl, CURLOPT_POSTFIELDS, data);
curl_easy_perform(curl);
curl_easy_getinfo(curl, CURLINFO_RESPONSE_CODE, &resp->status_code);
curl_easy_cleanup(curl);
curl_slist_free_all(headers);
resp->body = buffer.buf;
resp->size = buffer.cur;
return resp;
}
response_t *web_put(const char *url, const char *data) {
subreq_ctx_t *ctx = http_req(url, SIST2_HEADERS, data, "PUT");
while (ctx->ev_data.done == FALSE) {
mg_mgr_poll(&ctx->mgr, 50);
}
mg_mgr_free(&ctx->mgr);
response_t *ret = ctx->ev_data.resp;
free(ctx);
return ret;
response_t *web_put(const char *url, const char *data) {
response_t *resp = malloc(sizeof(response_t));
CURL *curl;
dyn_buffer_t buffer = dyn_buffer_create();
curl = curl_easy_init();
curl_easy_setopt(curl, CURLOPT_URL, url);
curl_easy_setopt(curl, CURLOPT_WRITEDATA, (void *) (&buffer));
curl_easy_setopt(curl, CURLOPT_WRITEFUNCTION, write_cb);
curl_easy_setopt(curl, CURLOPT_CUSTOMREQUEST, "PUT");
curl_easy_setopt(curl, CURLOPT_USERAGENT, "sist2");
curl_easy_setopt(curl, CURLOPT_DNS_USE_GLOBAL_CACHE, 0);
curl_easy_setopt(curl, CURLOPT_IPRESOLVE, CURLOPT_DNS_LOCAL_IP4 );
struct curl_slist *headers = NULL;
headers = curl_slist_append(headers, "Content-Type: application/json");
curl_easy_setopt(curl, CURLOPT_HTTPHEADER, headers);
curl_easy_setopt(curl, CURLOPT_POSTFIELDS, data);
curl_easy_perform(curl);
curl_easy_getinfo(curl, CURLINFO_RESPONSE_CODE, &resp->status_code);
curl_easy_cleanup(curl);
curl_slist_free_all(headers);
resp->body = buffer.buf;
resp->size = buffer.cur;
return resp;
}
response_t *web_delete(const char *url) {
subreq_ctx_t *ctx = http_req(url, SIST2_HEADERS, NULL, "DELETE");
while (ctx->ev_data.done == FALSE) {
mg_mgr_poll(&ctx->mgr, 50);
}
mg_mgr_free(&ctx->mgr);
response_t *ret = ctx->ev_data.resp;
free(ctx);
return ret;
}
response_t *resp = malloc(sizeof(response_t));
CURL *curl;
dyn_buffer_t buffer = dyn_buffer_create();
curl = curl_easy_init();
curl_easy_setopt(curl, CURLOPT_URL, url);
curl_easy_setopt(curl, CURLOPT_WRITEDATA, (void *) (&buffer));
curl_easy_setopt(curl, CURLOPT_WRITEFUNCTION, write_cb);
curl_easy_setopt(curl, CURLOPT_CUSTOMREQUEST, "DELETE");
curl_easy_setopt(curl, CURLOPT_USERAGENT, "sist2");
curl_easy_setopt(curl, CURLOPT_POSTFIELDS, "");
struct curl_slist *headers = NULL;
headers = curl_slist_append(headers, "Content-Type: application/json");
curl_easy_setopt(curl, CURLOPT_HTTPHEADER, headers);
curl_easy_perform(curl);
curl_easy_getinfo(curl, CURLINFO_RESPONSE_CODE, &resp->status_code);
curl_easy_cleanup(curl);
curl_slist_free_all(headers);
resp->body = buffer.buf;
resp->size = buffer.cur;
return resp;
}

View File

@@ -3,6 +3,7 @@
#include "src/sist.h"
#include <mongoose.h>
#include <curl/curl.h>
typedef struct response {
char *body;
@@ -16,13 +17,20 @@ typedef struct {
} http_ev_data_t;
typedef struct {
http_ev_data_t ev_data;
struct mg_mgr mgr;
char* data;
dyn_buffer_t response_buf;
struct curl_slist *headers;
CURL *handle;
CURLM *multi;
response_t *response;
int running_handles;
int done;
} subreq_ctx_t;
response_t *web_get(const char *url);
response_t *web_get(const char *url, int timeout);
response_t *web_post(const char * url, const char * data);
subreq_ctx_t *web_post_async(const char *url, const char *data);
void web_post_async_poll(subreq_ctx_t* req);
subreq_ctx_t *web_post_async(const char *url, char *data);
response_t *web_put(const char *url, const char *data);
response_t *web_delete(const char *url);

View File

@@ -6,13 +6,13 @@
static __thread int index_fd = -1;
typedef struct {
unsigned char uuid[16];
unsigned long ino;
unsigned char path_md5[MD5_DIGEST_LENGTH];
unsigned long size;
unsigned int mime;
int mtime;
short base;
short ext;
char has_parent;
} line_t;
void skip_meta(FILE *file) {
@@ -32,7 +32,7 @@ void skip_meta(FILE *file) {
void write_index_descriptor(char *path, index_descriptor_t *desc) {
cJSON *json = cJSON_CreateObject();
cJSON_AddStringToObject(json, "uuid", desc->uuid);
cJSON_AddStringToObject(json, "id", desc->id);
cJSON_AddStringToObject(json, "version", desc->version);
cJSON_AddStringToObject(json, "root", desc->root);
cJSON_AddStringToObject(json, "name", desc->name);
@@ -62,7 +62,7 @@ index_descriptor_t read_index_descriptor(char *path) {
int fd = open(path, O_RDONLY);
if (fd == -1) {
LOG_FATALF("serialize.c", "Invalid/corrupt index (Could not find descriptor): %s: %s\n", path ,strerror(errno))
LOG_FATALF("serialize.c", "Invalid/corrupt index (Could not find descriptor): %s: %s\n", path, strerror(errno))
}
char *buf = malloc(info.st_size + 1);
@@ -82,7 +82,7 @@ index_descriptor_t read_index_descriptor(char *path) {
strcpy(descriptor.rewrite_url, cJSON_GetObjectItem(json, "rewrite_url")->valuestring);
descriptor.root_len = (short) strlen(descriptor.root);
strcpy(descriptor.version, cJSON_GetObjectItem(json, "version")->valuestring);
strcpy(descriptor.uuid, cJSON_GetObjectItem(json, "uuid")->valuestring);
strcpy(descriptor.id, cJSON_GetObjectItem(json, "id")->valuestring);
if (cJSON_GetObjectItem(json, "type") == NULL) {
strcpy(descriptor.type, INDEX_TYPE_BIN);
} else {
@@ -150,6 +150,8 @@ char *get_meta_key_text(enum metakey meta_key) {
return "modified_by";
case MetaThumbnail:
return "thumbnail";
case MetaPages:
return "pages";
default:
return NULL;
}
@@ -172,8 +174,8 @@ void write_document(document_t *doc) {
dyn_buffer_t buf = dyn_buffer_create();
// Ignore root directory in the file path
doc->ext = doc->ext - ScanCtx.index.desc.root_len;
doc->base = doc->base - ScanCtx.index.desc.root_len;
doc->ext = (short) (doc->ext - ScanCtx.index.desc.root_len);
doc->base = (short) (doc->base - ScanCtx.index.desc.root_len);
doc->filepath += ScanCtx.index.desc.root_len;
dyn_buffer_write(&buf, doc, sizeof(line_t));
@@ -217,7 +219,7 @@ void read_index_bin(const char *path, const char *index_id, index_func func) {
dyn_buffer_t buf = dyn_buffer_create();
FILE *file = fopen(path, "rb");
while (1) {
while (TRUE) {
buf.cur = 0;
size_t _ = fread((void *) &line, 1, sizeof(line_t), file);
if (feof(file)) {
@@ -227,10 +229,10 @@ void read_index_bin(const char *path, const char *index_id, index_func func) {
cJSON *document = cJSON_CreateObject();
cJSON_AddStringToObject(document, "index", index_id);
char uuid_str[UUID_STR_LEN];
uuid_unparse(line.uuid, uuid_str);
char path_md5_str[MD5_STR_LENGTH];
buf2hex(line.path_md5, sizeof(line.path_md5), path_md5_str);
const char* mime_text = mime_get_mime_text(line.mime);
const char *mime_text = mime_get_mime_text(line.mime);
if (mime_text == NULL) {
cJSON_AddNullToObject(document, "mime");
} else {
@@ -239,7 +241,7 @@ void read_index_bin(const char *path, const char *index_id, index_func func) {
cJSON_AddNumberToObject(document, "size", (double) line.size);
cJSON_AddNumberToObject(document, "mtime", line.mtime);
int c;
int c = 0;
while ((c = getc(file)) != 0) {
dyn_buffer_write_char(&buf, (char) c);
}
@@ -270,6 +272,7 @@ void read_index_bin(const char *path, const char *index_id, index_func func) {
size_t ret = 0;
while (key != '\n') {
switch (key) {
case MetaPages:
case MetaWidth:
case MetaHeight: {
int value;
@@ -323,8 +326,36 @@ void read_index_bin(const char *path, const char *index_id, index_func func) {
key = getc(file);
}
func(document, uuid_str);
cJSON *meta_obj = NULL;
if (IndexCtx.meta != NULL) {
const char *meta_string = g_hash_table_lookup(IndexCtx.meta, path_md5_str);
if (meta_string != NULL) {
meta_obj = cJSON_Parse(meta_string);
cJSON *child;
for (child = meta_obj->child; child != NULL; child = child->next) {
char meta_key[4096];
strcpy(meta_key, child->string);
cJSON_DeleteItemFromObject(document, meta_key);
cJSON_AddItemReferenceToObject(document, meta_key, child);
}
}
}
if (IndexCtx.tags != NULL) {
const char *tags_string = g_hash_table_lookup(IndexCtx.tags, path_md5_str);
if (tags_string != NULL) {
cJSON *tags_arr = cJSON_Parse(tags_string);
cJSON_DeleteItemFromObject(document, "tag");
cJSON_AddItemToObject(document, "tag", tags_arr);
}
}
func(document, path_md5_str);
cJSON_Delete(document);
if (meta_obj) {
cJSON_Delete(meta_obj);
}
}
dyn_buffer_destroy(&buf);
fclose(file);
@@ -348,7 +379,7 @@ const char *json_type_array_fields[] = {
void read_index_json(const char *path, UNUSED(const char *index_id), index_func func) {
FILE *file = fopen(path, "r");
while (1) {
while (TRUE) {
char *line = NULL;
size_t len;
size_t read = getline(&line, &len, file);
@@ -368,7 +399,7 @@ void read_index_json(const char *path, UNUSED(const char *index_id), index_func
}
cJSON *document = cJSON_CreateObject();
const char *uuid_str = cJSON_GetObjectItem(input, "_id")->valuestring;
const char *id_str = cJSON_GetObjectItem(input, "_id")->valuestring;
for (int i = 0; i < (sizeof(json_type_copy_fields) / sizeof(json_type_copy_fields[0])); i++) {
cJSON *value = cJSON_GetObjectItem(input, json_type_copy_fields[i]);
@@ -396,7 +427,7 @@ void read_index_json(const char *path, UNUSED(const char *index_id), index_func
}
}
func(document, uuid_str);
func(document, id_str);
cJSON_Delete(document);
cJSON_Delete(input);
@@ -404,7 +435,7 @@ void read_index_json(const char *path, UNUSED(const char *index_id), index_func
fclose(file);
}
void read_index(const char *path, const char index_id[UUID_STR_LEN], const char *type, index_func func) {
void read_index(const char *path, const char index_id[MD5_STR_LENGTH], const char *type, index_func func) {
if (strcmp(type, INDEX_TYPE_BIN) == 0) {
read_index_bin(path, index_id, func);
@@ -417,13 +448,15 @@ void incremental_read(GHashTable *table, const char *filepath) {
FILE *file = fopen(filepath, "rb");
line_t line;
LOG_DEBUGF("serialize.c", "Incremental read %s", filepath)
while (1) {
size_t ret = fread((void *) &line, 1, sizeof(line_t), file);
size_t ret = fread((void *) &line, sizeof(line_t), 1, file);
if (ret != 1 || feof(file)) {
break;
}
incremental_put(table, line.ino, line.mtime);
incremental_put(table, line.path_md5, line.mtime);
while ((getc(file))) {}
skip_meta(file);
@@ -441,33 +474,47 @@ void incremental_copy(store_t *store, store_t *dst_store, const char *filepath,
FILE *dst_file = fopen(dst_filepath, "ab");
line_t line;
while (1) {
size_t ret = fread((void *) &line, 1, sizeof(line_t), file);
LOG_DEBUGF("serialize.c", "Incremental copy %s", filepath)
while (TRUE) {
size_t ret = fread((void *) &line, sizeof(line_t), 1, file);
if (ret != 1 || feof(file)) {
break;
}
if (incremental_get(copy_table, line.ino)) {
// Assume that files with parents still exist.
// One way to "fix" this would be to check if the parent is marked for copy but it would consistently
// delete files with grandparents, which is a side-effect worse than having orphaned files
if (line.has_parent || incremental_get(copy_table, line.path_md5)) {
fwrite(&line, sizeof(line), 1, dst_file);
size_t buf_len;
char *buf = store_read(store, (char *) line.uuid, 16, &buf_len);
store_write(dst_store, (char *) line.uuid, 16, buf, buf_len);
free(buf);
// Copy filepath
char filepath_buf[PATH_MAX];
char c;
char *ptr = filepath_buf;
while ((c = (char) getc(file))) {
fwrite(&c, sizeof(c), 1, dst_file);
*ptr++ = c;
}
*ptr = '\0';
fwrite(filepath_buf, (ptr - filepath_buf) + 1, 1, dst_file);
// Copy tn store contents
size_t buf_len;
char path_md5[MD5_DIGEST_LENGTH];
MD5((unsigned char *) filepath_buf, (ptr - filepath_buf), (unsigned char *) path_md5);
char *buf = store_read(store, path_md5, sizeof(path_md5), &buf_len);
if (buf_len != 0) {
store_write(dst_store, path_md5, sizeof(path_md5), buf, buf_len);
free(buf);
}
fwrite("\0", sizeof(c), 1, dst_file);
enum metakey key;
while (1) {
key = getc(file);
fwrite(&key, sizeof(char), 1, dst_file);
if (key == '\n') {
break;
}
fwrite(&key, sizeof(char), 1, dst_file);
if (IS_META_INT(key)) {
int val;
@@ -483,14 +530,12 @@ void incremental_copy(store_t *store, store_t *dst_store, const char *filepath,
}
fwrite("\0", sizeof(c), 1, dst_file);
}
if (ret != 1) {
break;
}
}
} else {
while ((getc(file))) {}
skip_meta(file);
}
}
fclose(file);
fclose(dst_file);
}

View File

@@ -7,14 +7,14 @@
#include <sys/syscall.h>
#include <glib.h>
typedef void(*index_func)(cJSON *, const char[UUID_STR_LEN]);
typedef void(*index_func)(cJSON *, const char[MD5_STR_LENGTH]);
void incremental_copy(store_t *store, store_t *dst_store, const char *filepath,
const char *dst_filepath, GHashTable *copy_table);
void write_document(document_t *doc);
void read_index(const char *path, const char[UUID_STR_LEN], const char *type, index_func);
void read_index(const char *path, const char[MD5_STR_LENGTH], const char *type, index_func);
void incremental_read(GHashTable *table, const char *filepath);

View File

@@ -1,9 +1,10 @@
#include "store.h"
#include "src/ctx.h"
store_t *store_create(char *path) {
store_t *store_create(char *path, size_t chunk_size) {
store_t *store = malloc(sizeof(struct store_t));
store->chunk_size = chunk_size;
pthread_rwlock_init(&store->lock, NULL);
mdb_env_create(&store->env);
@@ -18,7 +19,7 @@ store_t *store_create(char *path) {
LOG_FATALF("store.c", "Error while opening store: %s (%s)\n", mdb_strerror(open_ret), path)
}
store->size = (size_t) 1024 * 1024 * 5;
store->size = (size_t) store->chunk_size;
ScanCtx.stat_tn_size = 0;
mdb_env_set_mapsize(store->env, store->size);
@@ -39,12 +40,20 @@ void store_destroy(store_t *store) {
free(store);
}
void store_flush(store_t *store) {
mdb_env_sync(store->env, TRUE);
}
void store_write(store_t *store, char *key, size_t key_len, char *buf, size_t buf_len) {
if (LogCtx.very_verbose) {
char uuid_str[UUID_STR_LEN];
uuid_unparse((unsigned char *) key, uuid_str);
LOG_DEBUGF("store.c", "Store write {%s} %lu bytes", uuid_str, buf_len)
if (key_len == MD5_DIGEST_LENGTH) {
char path_md5_str[MD5_STR_LENGTH];
buf2hex((unsigned char *) key, MD5_DIGEST_LENGTH, path_md5_str);
LOG_DEBUGF("store.c", "Store write {%s} %lu bytes", path_md5_str, buf_len)
} else {
LOG_DEBUGF("store.c", "Store write {%s} %lu bytes", key, buf_len)
}
}
MDB_val mdb_key;
@@ -69,7 +78,7 @@ void store_write(store_t *store, char *key, size_t key_len, char *buf, size_t bu
// Cannot resize when there is a opened transaction.
// Resize take effect on the next commit.
pthread_rwlock_wrlock(&store->lock);
store->size += 1024 * 1024 * 50;
store->size += store->chunk_size;
mdb_env_set_mapsize(store->env, store->size);
mdb_txn_begin(store->env, NULL, 0, &txn);
put_ret = mdb_put(txn, store->dbi, &mdb_key, &mdb_value, 0);
@@ -110,3 +119,42 @@ char *store_read(store_t *store, char *key, size_t key_len, size_t *ret_vallen)
return buf;
}
GHashTable *store_read_all(store_t *store) {
int count = 0;
GHashTable *table = g_hash_table_new_full(g_str_hash, g_str_equal, free, free);
MDB_txn *txn = NULL;
mdb_txn_begin(store->env, NULL, MDB_RDONLY, &txn);
MDB_cursor *cur = NULL;
mdb_cursor_open(txn, store->dbi, &cur);
MDB_val key;
MDB_val value;
while (mdb_cursor_get(cur, &key, &value, MDB_NEXT) == 0) {
char *key_str = malloc(key.mv_size);
memcpy(key_str, key.mv_data, key.mv_size);
char *val_str = malloc(value.mv_size);
memcpy(val_str, value.mv_data, value.mv_size);
g_hash_table_insert(table, key_str, val_str);
count += 1;
}
const char *path;
mdb_env_get_path(store->env, &path);
LOG_DEBUGF("store.c", "Read %d entries from %s", count, path);
mdb_cursor_close(cur);
mdb_txn_abort(txn);
return table;
}
void store_copy(store_t *store, const char *destination) {
mkdir(destination, S_IWUSR | S_IRUSR | S_IXUSR);
mdb_env_copy(store->env, destination);
}

View File

@@ -4,19 +4,32 @@
#include <pthread.h>
#include <lmdb.h>
#include <glib.h>
#define STORE_SIZE_TN 1024 * 1024 * 5
#define STORE_SIZE_TAG 1024 * 16
#define STORE_SIZE_META STORE_SIZE_TAG
typedef struct store_t {
MDB_dbi dbi;
MDB_env *env;
size_t size;
size_t chunk_size;
pthread_rwlock_t lock;
} store_t;
store_t *store_create(char *path);
store_t *store_create(char *path, size_t chunk_size);
void store_destroy(store_t *store);
void store_write(store_t *store, char *key, size_t key_len, char *buf, size_t buf_len);
void store_flush(store_t *store);
char *store_read(store_t *store, char *key, size_t key_len, size_t *ret_vallen);
GHashTable *store_read_all(store_t *store);
void store_copy(store_t *store, const char *destination);
#endif

View File

@@ -20,7 +20,7 @@ parse_job_t *create_fs_parse_job(const char *filepath, const struct stat *info,
job->vfile.info = *info;
memset(job->parent, 0, 16);
memset(job->parent, 0, MD5_DIGEST_LENGTH);
job->vfile.filepath = job->filepath;
job->vfile.read = fs_read;

View File

@@ -4,8 +4,7 @@
#include <stdarg.h>
const char *log_colors[] = {
"\033[34m", "\033[01;34m", "\033[0m",
"\033[01;33m", "\033[31m", "\033[01;31m"
"\033[34m", "\033[01;34m", "\033[01;33m", "\033[0m", "\033[31m", "\033[01;31m"
};
const char *log_levels[] = {

View File

@@ -2,7 +2,6 @@
#include "ctx.h"
#include <third-party/argparse/argparse.h>
#include <glib.h>
#include <locale.h>
#include "cli.h"
@@ -22,11 +21,12 @@
#define EPILOG "Made by simon987 <me@simon987.net>. Released under GPL-3.0"
static const char *const Version = "2.3.0";
static const char *const Version = "2.9.0";
static const char *const usage[] = {
"sist2 scan [OPTION]... PATH",
"sist2 index [OPTION]... INDEX",
"sist2 web [OPTION]... INDEX...",
"sist2 exec-script [OPTION]... INDEX",
NULL,
};
@@ -34,9 +34,10 @@ void init_dir(const char *dirpath) {
char path[PATH_MAX];
snprintf(path, PATH_MAX, "%sdescriptor.json", dirpath);
uuid_t uuid;
uuid_generate(uuid);
uuid_unparse(uuid, ScanCtx.index.desc.uuid);
unsigned char index_md5[MD5_DIGEST_LENGTH];
MD5((unsigned char *) ScanCtx.index.desc.name, strlen(ScanCtx.index.desc.name), index_md5);
buf2hex(index_md5, MD5_DIGEST_LENGTH, ScanCtx.index.desc.id);
time(&ScanCtx.index.desc.timestamp);
strcpy(ScanCtx.index.desc.version, Version);
strcpy(ScanCtx.index.desc.type, INDEX_TYPE_BIN);
@@ -75,7 +76,7 @@ void _logf(const char *filepath, int level, char *format, ...) {
va_start(args, format);
if (level == LEVEL_FATAL) {
sist_logf(filepath, level, format, args);
vsist_logf(filepath, level, format, args);
exit(-1);
}
@@ -85,7 +86,7 @@ void _logf(const char *filepath, int level, char *format, ...) {
vsist_logf(filepath, level, format, args);
}
} else {
sist_logf(filepath, level, format, args);
vsist_logf(filepath, level, format, args);
}
}
va_end(args);
@@ -99,11 +100,14 @@ void initialize_scan_context(scan_args_t *args) {
ScanCtx.arc_ctx.logf = _logf;
ScanCtx.arc_ctx.parse = (parse_callback_t) parse;
// Cbr
ScanCtx.cbr_ctx.log = _log;
ScanCtx.cbr_ctx.logf = _logf;
ScanCtx.cbr_ctx.store = _store;
ScanCtx.cbr_ctx.cbr_mime = mime_get_mime_by_string(ScanCtx.mime_table, "application/x-cbr");
// Comic
ScanCtx.comic_ctx.log = _log;
ScanCtx.comic_ctx.logf = _logf;
ScanCtx.comic_ctx.store = _store;
ScanCtx.comic_ctx.tn_size = args->size;
ScanCtx.comic_ctx.tn_qscale = args->quality;
ScanCtx.comic_ctx.cbr_mime = mime_get_mime_by_string(ScanCtx.mime_table, "application/x-cbr");
ScanCtx.comic_ctx.cbz_mime = mime_get_mime_by_string(ScanCtx.mime_table, "application/x-cbz");
// Ebook
pthread_mutex_init(&ScanCtx.ebook_ctx.mupdf_mutex, NULL);
@@ -127,12 +131,14 @@ void initialize_scan_context(scan_args_t *args) {
ScanCtx.media_ctx.log = _log;
ScanCtx.media_ctx.logf = _logf;
ScanCtx.media_ctx.store = _store;
ScanCtx.media_ctx.max_media_buffer = (long) args->max_memory_buffer * 1024 * 1024;
init_media();
// OOXML
ScanCtx.ooxml_ctx.content_size = args->content_size;
ScanCtx.ooxml_ctx.log = _log;
ScanCtx.ooxml_ctx.logf = _logf;
ScanCtx.ooxml_ctx.store = _store;
// MOBI
ScanCtx.mobi_ctx.content_size = args->content_size;
@@ -144,6 +150,14 @@ void initialize_scan_context(scan_args_t *args) {
ScanCtx.text_ctx.log = _log;
ScanCtx.text_ctx.logf = _logf;
// MSDOC
ScanCtx.msdoc_ctx.tn_size = args->size;
ScanCtx.msdoc_ctx.content_size = args->content_size;
ScanCtx.msdoc_ctx.log = _log;
ScanCtx.msdoc_ctx.logf = _logf;
ScanCtx.msdoc_ctx.store = _store;
ScanCtx.msdoc_ctx.msdoc_mime = mime_get_mime_by_string(ScanCtx.mime_table, "application/msword");
ScanCtx.threads = args->threads;
ScanCtx.depth = args->depth;
@@ -153,6 +167,13 @@ void initialize_scan_context(scan_args_t *args) {
strncpy(ScanCtx.index.desc.rewrite_url, args->rewrite_url, sizeof(ScanCtx.index.desc.rewrite_url));
ScanCtx.index.desc.root_len = (short) strlen(ScanCtx.index.desc.root);
ScanCtx.fast = args->fast;
// Raw
ScanCtx.raw_ctx.tn_qscale = args->quality;
ScanCtx.raw_ctx.tn_size = args->size;
ScanCtx.raw_ctx.log = _log;
ScanCtx.raw_ctx.logf = _logf;
ScanCtx.raw_ctx.store = _store;
}
@@ -168,7 +189,11 @@ void sist2_scan(scan_args_t *args) {
char store_path[PATH_MAX];
snprintf(store_path, PATH_MAX, "%sthumbs", ScanCtx.index.path);
mkdir(store_path, S_IWUSR | S_IRUSR | S_IXUSR);
ScanCtx.index.store = store_create(store_path);
ScanCtx.index.store = store_create(store_path, STORE_SIZE_TN);
snprintf(store_path, PATH_MAX, "%smeta", ScanCtx.index.path);
mkdir(store_path, S_IWUSR | S_IRUSR | S_IXUSR);
ScanCtx.index.meta_store = store_create(store_path, STORE_SIZE_META);
scan_print_header();
@@ -194,7 +219,7 @@ void sist2_scan(scan_args_t *args) {
while ((de = readdir(dir)) != NULL) {
if (strncmp(de->d_name, "_index_", sizeof("_index_") - 1) == 0) {
char file_path[PATH_MAX];
snprintf(file_path, PATH_MAX, "%s/%s", args->incremental, de->d_name);
snprintf(file_path, PATH_MAX, "%s%s", args->incremental, de->d_name);
incremental_read(ScanCtx.original_table, file_path);
}
}
@@ -203,19 +228,17 @@ void sist2_scan(scan_args_t *args) {
LOG_INFOF("main.c", "Loaded %d items in to mtime table.", g_hash_table_size(ScanCtx.original_table))
}
ScanCtx.pool = tpool_create(args->threads, thread_cleanup);
ScanCtx.pool = tpool_create(args->threads, thread_cleanup, TRUE);
tpool_start(ScanCtx.pool);
walk_directory_tree(ScanCtx.index.desc.root);
tpool_wait(ScanCtx.pool);
tpool_destroy(ScanCtx.pool);
generate_stats(&ScanCtx.index, args->treemap_threshold, ScanCtx.index.path);
if (args->incremental != NULL) {
char dst_path[PATH_MAX];
snprintf(store_path, PATH_MAX, "%sthumbs", args->incremental);
snprintf(dst_path, PATH_MAX, "%s_index_original", ScanCtx.index.path);
store_t *source = store_create(store_path);
store_t *source = store_create(store_path, STORE_SIZE_TN);
DIR *dir = opendir(args->incremental);
if (dir == NULL) {
@@ -226,24 +249,34 @@ void sist2_scan(scan_args_t *args) {
while ((de = readdir(dir)) != NULL) {
if (strncmp(de->d_name, "_index_", sizeof("_index_") - 1) == 0) {
char file_path[PATH_MAX];
snprintf(file_path, PATH_MAX, "%s/%s", args->incremental, de->d_name);
snprintf(file_path, PATH_MAX, "%s%s", args->incremental, de->d_name);
incremental_copy(source, ScanCtx.index.store, file_path, dst_path, ScanCtx.copy_table);
}
}
closedir(dir);
store_destroy(source);
snprintf(store_path, PATH_MAX, "%stags", args->incremental);
snprintf(dst_path, PATH_MAX, "%stags", ScanCtx.index.path);
mkdir(store_path, S_IWUSR | S_IRUSR | S_IXUSR);
store_t *source_tags = store_create(store_path, STORE_SIZE_TAG);
store_copy(source_tags, dst_path);
store_destroy(source_tags);
}
generate_stats(&ScanCtx.index, args->treemap_threshold, ScanCtx.index.path);
store_destroy(ScanCtx.index.store);
}
void sist2_index(index_args_t *args) {
IndexCtx.es_url = args->es_url;
IndexCtx.es_index = args->es_index;
IndexCtx.batch_size = args->batch_size;
if (!args->print) {
elastic_init(args->force_reset);
elastic_init(args->force_reset, args->es_mappings, args->es_settings);
}
char descriptor_path[PATH_MAX];
@@ -263,6 +296,16 @@ void sist2_index(index_args_t *args) {
LOG_FATALF("main.c", "Could not open index %s: %s", args->index_path, strerror(errno))
}
char path_tmp[PATH_MAX];
snprintf(path_tmp, sizeof(path_tmp), "%s/tags", args->index_path);
mkdir(path_tmp, S_IWUSR | S_IRUSR | S_IXUSR);
IndexCtx.tag_store = store_create(path_tmp, STORE_SIZE_TAG);
IndexCtx.tags = store_read_all(IndexCtx.tag_store);
snprintf(path_tmp, sizeof(path_tmp), "%s/meta", args->index_path);
IndexCtx.meta_store = store_create(path_tmp, STORE_SIZE_META);
IndexCtx.meta = store_read_all(IndexCtx.meta_store);
index_func f;
if (args->print) {
f = print_json;
@@ -270,29 +313,64 @@ void sist2_index(index_args_t *args) {
f = index_json;
}
void (*cleanup)();
if (args->print) {
cleanup = NULL;
} else {
cleanup = elastic_cleanup;
}
IndexCtx.pool = tpool_create(args->threads, cleanup, FALSE);
tpool_start(IndexCtx.pool);
struct dirent *de;
while ((de = readdir(dir)) != NULL) {
if (strncmp(de->d_name, "_index_", sizeof("_index_") - 1) == 0) {
char file_path[PATH_MAX];
snprintf(file_path, PATH_MAX, "%s/%s", args->index_path, de->d_name);
read_index(file_path, desc.uuid, desc.type, f);
read_index(file_path, desc.id, desc.type, f);
}
}
closedir(dir);
tpool_wait(IndexCtx.pool);
tpool_destroy(IndexCtx.pool);
if (!args->print) {
elastic_flush();
destroy_indexer(args->script, desc.uuid);
finish_indexer(args->script, args->async_script, desc.id);
}
store_destroy(IndexCtx.tag_store);
g_hash_table_remove_all(IndexCtx.tags);
g_hash_table_destroy(IndexCtx.tags);
}
void sist2_exec_script(exec_args_t *args) {
LogCtx.verbose = TRUE;
char descriptor_path[PATH_MAX];
snprintf(descriptor_path, PATH_MAX, "%s/descriptor.json", args->index_path);
index_descriptor_t desc = read_index_descriptor(descriptor_path);
IndexCtx.es_url = args->es_url;
LOG_DEBUGF("main.c", "descriptor version %s (%s)", desc.version, desc.type)
execute_update_script(args->script, args->async_script, desc.id);
free(args->script);
}
void sist2_web(web_args_t *args) {
WebCtx.es_url = args->es_url;
WebCtx.es_index = args->es_index;
WebCtx.index_count = args->index_count;
WebCtx.auth_user = args->auth_user;
WebCtx.auth_pass = args->auth_pass;
WebCtx.auth_enabled = args->auth_enabled;
WebCtx.tag_auth_enabled = args->tag_auth_enabled;
for (int i = 0; i < args->index_count; i++) {
char *abs_path = abspath(args->indices[i]);
@@ -302,7 +380,11 @@ void sist2_web(web_args_t *args) {
char path_tmp[PATH_MAX];
snprintf(path_tmp, PATH_MAX, "%sthumbs", abs_path);
WebCtx.indices[i].store = store_create(path_tmp);
WebCtx.indices[i].store = store_create(path_tmp, STORE_SIZE_TN);
snprintf(path_tmp, PATH_MAX, "%stags", abs_path);
mkdir(path_tmp, S_IWUSR | S_IRUSR | S_IXUSR);
WebCtx.indices[i].tag_store = store_create(path_tmp, STORE_SIZE_TAG);
snprintf(path_tmp, PATH_MAX, "%sdescriptor.json", abs_path);
WebCtx.indices[i].desc = read_index_descriptor(path_tmp);
@@ -322,10 +404,15 @@ int main(int argc, const char *argv[]) {
scan_args_t *scan_args = scan_args_create();
index_args_t *index_args = index_args_create();
web_args_t *web_args = web_args_create();
exec_args_t *exec_args = exec_args_create();
int arg_version = 0;
char *common_es_url = NULL;
char *common_es_index = NULL;
char *common_script_path = NULL;
int common_async_script = 0;
int common_threads = 0;
struct argparse_option options[] = {
OPT_HELP(),
@@ -335,7 +422,7 @@ int main(int argc, const char *argv[]) {
OPT_BOOLEAN(0, "very-verbose", &LogCtx.very_verbose, "Turn on debug messages"),
OPT_GROUP("Scan options"),
OPT_INTEGER('t', "threads", &scan_args->threads, "Number of threads. DEFAULT=1"),
OPT_INTEGER('t', "threads", &common_threads, "Number of threads. DEFAULT=1"),
OPT_FLOAT('q', "quality", &scan_args->quality,
"Thumbnail quality, on a scale of 1.0 to 31.0, 1.0 being the best. DEFAULT=5"),
OPT_INTEGER(0, "size", &scan_args->size,
@@ -357,20 +444,36 @@ int main(int argc, const char *argv[]) {
OPT_STRING('e', "exclude", &scan_args->exclude_regex, "Files that match this regex will not be scanned"),
OPT_BOOLEAN(0, "fast", &scan_args->fast, "Only index file names & mime type"),
OPT_STRING(0, "treemap-threshold", &scan_args->treemap_threshold_str, "Relative size threshold for treemap "
"(see USAGE.md). DEFAULT: 0.0005"),
"(see USAGE.md). DEFAULT: 0.0005"),
OPT_INTEGER(0, "mem-buffer", &scan_args->max_memory_buffer,
"Maximum memory buffer size per thread in MB for files inside archives "
"(see USAGE.md). DEFAULT: 2000"),
OPT_GROUP("Index options"),
OPT_INTEGER('t', "threads", &common_threads, "Number of threads. DEFAULT=1"),
OPT_STRING(0, "es-url", &common_es_url, "Elasticsearch url with port. DEFAULT=http://localhost:9200"),
OPT_STRING(0, "es-index", &common_es_index, "Elasticsearch index name. DEFAULT=sist2"),
OPT_BOOLEAN('p', "print", &index_args->print, "Just print JSON documents to stdout."),
OPT_STRING(0, "script-file", &index_args->script_path, "Path to user script."),
OPT_STRING(0, "script-file", &common_script_path, "Path to user script."),
OPT_STRING(0, "mappings-file", &index_args->es_mappings_path, "Path to Elasticsearch mappings."),
OPT_STRING(0, "settings-file", &index_args->es_settings_path, "Path to Elasticsearch settings."),
OPT_BOOLEAN(0, "async-script", &common_async_script, "Execute user script asynchronously."),
OPT_INTEGER(0, "batch-size", &index_args->batch_size, "Index batch size. DEFAULT: 100"),
OPT_BOOLEAN('f', "force-reset", &index_args->force_reset, "Reset Elasticsearch mappings and settings. "
"(You must use this option the first time you use the index command)"),
OPT_GROUP("Web options"),
OPT_STRING(0, "es-url", &common_es_url, "Elasticsearch url. DEFAULT=http://localhost:9200"),
OPT_STRING(0, "es-index", &common_es_index, "Elasticsearch index name. DEFAULT=sist2"),
OPT_STRING(0, "bind", &web_args->listen_address, "Listen on this address. DEFAULT=localhost:4090"),
OPT_STRING(0, "auth", &web_args->credentials, "Basic auth in user:password format"),
OPT_STRING(0, "tag-auth", &web_args->tag_credentials, "Basic auth in user:password format for tagging"),
OPT_GROUP("Exec-script options"),
OPT_STRING(0, "es-url", &common_es_url, "Elasticsearch url. DEFAULT=http://localhost:9200"),
OPT_STRING(0, "es-index", &common_es_index, "Elasticsearch index name. DEFAULT=sist2"),
OPT_STRING(0, "script-file", &common_script_path, "Path to user script."),
OPT_BOOLEAN(0, "async-script", &common_async_script, "Execute user script asynchronously."),
OPT_END(),
};
@@ -391,6 +494,18 @@ int main(int argc, const char *argv[]) {
web_args->es_url = common_es_url;
index_args->es_url = common_es_url;
exec_args->es_url = common_es_url;
web_args->es_index = common_es_index;
index_args->es_index = common_es_index;
exec_args->es_index = common_es_index;
index_args->script_path = common_script_path;
exec_args->script_path = common_script_path;
index_args->threads = common_threads;
scan_args->threads = common_threads;
exec_args->async_script = common_async_script;
index_args->async_script = common_async_script;
if (argc == 0) {
argparse_usage(&argparse);
@@ -419,6 +534,14 @@ int main(int argc, const char *argv[]) {
}
sist2_web(web_args);
} else if (strcmp(argv[0], "exec-script") == 0) {
int err = exec_args_validate(exec_args, argc, argv);
if (err != 0) {
goto end;
}
sist2_exec_script(exec_args);
} else {
fprintf(stderr, "Invalid command: '%s'\n", argv[0]);
argparse_usage(&argparse);
@@ -430,6 +553,7 @@ int main(int argc, const char *argv[]) {
scan_args_destroy(scan_args);
index_args_destroy(index_args);
web_args_destroy(web_args);
exec_args_destroy(exec_args);
return 0;
}

View File

@@ -3,9 +3,10 @@
#include "../sist.h"
#define MAJOR_MIME(mime_id) (mime_id & 0x00FF0000) >> 16
#define MAJOR_MIME(mime_id) (mime_id & 0x000F0000) >> 16
#define MIME_EMPTY 1
#define MIME_SIST2_SIDECAR 2
#define DONT_PARSE 0x80000000
#define SHOULD_PARSE(mime_id) (ScanCtx.fast == 0 && (mime_id & DONT_PARSE) != DONT_PARSE && mime_id != 0)
@@ -31,6 +32,9 @@
#define MARKUP_MASK 0x01000000
#define IS_MARKUP(mime_id) (mime_id & MARKUP_MASK) == MARKUP_MASK
#define RAW_MASK 0x00800000
#define IS_RAW(mime_id) (mime_id & RAW_MASK) == RAW_MASK
enum major_mime {
MimeInvalid = 0,
MimeModel = 1,

View File

@@ -54,78 +54,79 @@ enum mime {
application_streamingmedia=655406,
application_vda=655407,
application_vnd_amazon_mobi8_ebook=655408 | 0x02000000,
application_vnd_fdf=655409,
application_vnd_font_fontforge_sfd=655410,
application_vnd_hp_hpgl=655411,
application_vnd_iccprofile=655412,
application_vnd_lotus_1_2_3=655413,
application_vnd_ms_cab_compressed=655414,
application_vnd_ms_excel=655415,
application_vnd_ms_fontobject=655416,
application_vnd_ms_opentype=655417 | 0x20000000,
application_vnd_ms_outlook=655418,
application_vnd_ms_pki_certstore=655419,
application_vnd_ms_pki_pko=655420,
application_vnd_ms_pki_seccat=655421,
application_vnd_ms_powerpoint=655422,
application_vnd_ms_project=655423,
application_vnd_oasis_opendocument_base=655424,
application_vnd_oasis_opendocument_formula=655425,
application_vnd_oasis_opendocument_graphics=655426,
application_vnd_oasis_opendocument_presentation=655427,
application_vnd_oasis_opendocument_spreadsheet=655428,
application_vnd_oasis_opendocument_text=655429,
application_vnd_openxmlformats_officedocument_presentationml_presentation=655430 | 0x04000000,
application_vnd_openxmlformats_officedocument_spreadsheetml_sheet=655431 | 0x04000000,
application_vnd_openxmlformats_officedocument_wordprocessingml_document=655432 | 0x04000000,
application_vnd_symbian_install=655433,
application_vnd_tcpdump_pcap=655434,
application_vnd_wap_wmlc=655435,
application_vnd_wap_wmlscriptc=655436,
application_vnd_xara=655437,
application_vocaltec_media_desc=655438,
application_vocaltec_media_file=655439,
application_warc=655440,
application_winhelp=655441,
application_wordperfect=655442,
application_wordperfect6_0=655443,
application_wordperfect6_1=655444,
application_x_123=655445,
application_x_7z_compressed=655446 | 0x10000000,
application_x_aim=655447,
application_x_apple_diskimage=655448,
application_x_arc=655449 | 0x10000000,
application_x_archive=655450,
application_x_atari_7800_rom=655451,
application_x_authorware_bin=655452,
application_x_authorware_map=655453,
application_x_authorware_seg=655454,
application_x_avira_qua=655455,
application_x_bcpio=655456,
application_x_bittorrent=655457,
application_x_bsh=655458,
application_x_bytecode_python=655459,
application_x_bzip=655460,
application_x_bzip2=655461 | 0x08000000,
application_x_cbr=655462,
application_x_cbz=655463 | 0x40000000,
application_x_cdlink=655464,
application_x_chat=655465,
application_x_chrome_extension=655466,
application_x_cocoa=655467,
application_x_conference=655468,
application_x_coredump=655469,
application_x_cpio=655470,
application_x_dbf=655471,
application_x_dbt=655472,
application_x_debian_package=655473,
application_x_deepv=655474,
application_x_director=655475,
application_x_dmp=655476,
application_x_dosdriver=655477,
application_x_dosexec=655478,
application_x_dvi=655479,
application_x_elc=655480,
application_vnd_coffeescript=655409,
application_vnd_fdf=655410,
application_vnd_font_fontforge_sfd=655411,
application_vnd_hp_hpgl=655412,
application_vnd_iccprofile=655413,
application_vnd_lotus_1_2_3=655414,
application_vnd_ms_cab_compressed=655415,
application_vnd_ms_excel=655416,
application_vnd_ms_fontobject=655417,
application_vnd_ms_opentype=655418 | 0x20000000,
application_vnd_ms_outlook=655419,
application_vnd_ms_pki_certstore=655420,
application_vnd_ms_pki_pko=655421,
application_vnd_ms_pki_seccat=655422,
application_vnd_ms_powerpoint=655423,
application_vnd_ms_project=655424,
application_vnd_oasis_opendocument_base=655425,
application_vnd_oasis_opendocument_formula=655426,
application_vnd_oasis_opendocument_graphics=655427,
application_vnd_oasis_opendocument_presentation=655428,
application_vnd_oasis_opendocument_spreadsheet=655429,
application_vnd_oasis_opendocument_text=655430,
application_vnd_openxmlformats_officedocument_presentationml_presentation=655431 | 0x04000000,
application_vnd_openxmlformats_officedocument_spreadsheetml_sheet=655432 | 0x04000000,
application_vnd_openxmlformats_officedocument_wordprocessingml_document=655433 | 0x04000000,
application_vnd_symbian_install=655434,
application_vnd_tcpdump_pcap=655435,
application_vnd_wap_wmlc=655436,
application_vnd_wap_wmlscriptc=655437,
application_vnd_xara=655438,
application_vocaltec_media_desc=655439,
application_vocaltec_media_file=655440,
application_warc=655441,
application_winhelp=655442,
application_wordperfect=655443,
application_wordperfect6_0=655444,
application_wordperfect6_1=655445,
application_x_123=655446,
application_x_7z_compressed=655447 | 0x10000000,
application_x_aim=655448,
application_x_apple_diskimage=655449,
application_x_arc=655450 | 0x10000000,
application_x_archive=655451,
application_x_atari_7800_rom=655452,
application_x_authorware_bin=655453,
application_x_authorware_map=655454,
application_x_authorware_seg=655455,
application_x_avira_qua=655456,
application_x_bcpio=655457,
application_x_bittorrent=655458,
application_x_bsh=655459,
application_x_bytecode_python=655460,
application_x_bzip=655461,
application_x_bzip2=655462 | 0x08000000,
application_x_cbr=655463,
application_x_cbz=655464,
application_x_cdlink=655465,
application_x_chat=655466,
application_x_chrome_extension=655467,
application_x_cocoa=655468,
application_x_conference=655469,
application_x_coredump=655470,
application_x_cpio=655471,
application_x_dbf=655472,
application_x_dbt=655473,
application_x_debian_package=655474,
application_x_deepv=655475,
application_x_director=655476,
application_x_dmp=655477,
application_x_dosdriver=655478,
application_x_dosexec=655479,
application_x_dvi=655480,
application_x_elc=655481,
application_x_empty=1,
application_x_envoy=655482,
application_x_esrehber=655483,
@@ -315,127 +316,146 @@ enum mime {
image_webp=524595,
image_wmf=524596,
image_x_3ds=524597,
image_x_award_bioslogo=524598,
image_x_cmu_raster=524599,
image_x_cur=524600,
image_x_dwg=524601,
image_x_eps=524602,
image_x_exr=524603,
image_x_gem=524604,
image_x_icns=524605,
image_x_icon=524606 | 0x80000000,
image_x_jg=524607,
image_x_jps=524608,
image_x_ms_bmp=524609,
image_x_niff=524610,
image_x_pcx=524611,
image_x_pict=524612,
image_x_portable_bitmap=524613,
image_x_portable_graymap=524614,
image_x_portable_pixmap=524615,
image_x_quicktime=524616,
image_x_rgb=524617,
image_x_tga=524618,
image_x_tiff=524619,
image_x_win_bitmap=524620,
image_x_xcf=524621 | 0x80000000,
image_x_xpixmap=524622 | 0x80000000,
image_x_xwindowdump=524623,
message_news=196944,
message_rfc822=196945,
model_vnd_dwf=65874,
model_vnd_gdl=65875,
model_vnd_gs_gdl=65876,
model_vrml=65877,
model_x_pov=65878,
text_PGP=590167,
text_asp=590168,
text_css=590169,
text_html=590170 | 0x01000000,
text_javascript=590171,
text_mcf=590172,
text_pascal=590173,
text_plain=590174,
text_richtext=590175,
text_rtf=590176,
text_scriplet=590177,
text_tab_separated_values=590178,
text_troff=590179,
text_uri_list=590180,
text_vnd_abc=590181,
text_vnd_fmi_flexstor=590182,
text_vnd_wap_wml=590183,
text_vnd_wap_wmlscript=590184,
text_webviewhtml=590185,
text_x_Algol68=590186,
text_x_asm=590187,
text_x_audiosoft_intra=590188,
text_x_awk=590189,
text_x_bcpl=590190,
text_x_c=590191,
text_x_c__=590192,
text_x_component=590193,
text_x_diff=590194,
text_x_fortran=590195,
text_x_java=590196,
text_x_la_asf=590197,
text_x_lisp=590198,
text_x_m=590199,
text_x_m4=590200,
text_x_makefile=590201,
text_x_ms_regedit=590202,
text_x_msdos_batch=590203,
text_x_objective_c=590204,
text_x_pascal=590205,
text_x_perl=590206,
text_x_php=590207,
text_x_po=590208,
text_x_python=590209,
text_x_ruby=590210,
text_x_sass=590211,
text_x_scss=590212,
text_x_server_parsed_html=590213,
text_x_setext=590214,
text_x_sgml=590215 | 0x01000000,
text_x_shellscript=590216,
text_x_speech=590217,
text_x_tcl=590218,
text_x_tex=590219,
text_x_uil=590220,
text_x_uuencode=590221,
text_x_vcalendar=590222,
text_x_vcard=590223,
text_xml=590224 | 0x01000000,
video_MP2T=393617,
video_animaflex=393618,
video_avi=393619,
video_avs_video=393620,
video_mp4=393621,
video_mpeg=393622,
video_quicktime=393623,
video_vdo=393624,
video_vivo=393625,
video_vnd_rn_realvideo=393626,
video_vosaic=393627,
video_webm=393628,
video_x_amt_demorun=393629,
video_x_amt_showrun=393630,
video_x_atomic3d_feature=393631,
video_x_dl=393632,
video_x_dv=393633,
video_x_fli=393634,
video_x_flv=393635,
video_x_isvideo=393636,
video_x_jng=393637 | 0x80000000,
video_x_m4v=393638,
video_x_matroska=393639,
video_x_mng=393640,
video_x_motion_jpeg=393641,
video_x_ms_asf=393642,
video_x_msvideo=393643,
video_x_qtc=393644,
video_x_sgi_movie=393645,
x_epoc_x_sisx_app=721326,
image_x_adobe_dng=524598 | 0x00800000,
image_x_award_bioslogo=524599,
image_x_canon_cr2=524600 | 0x00800000,
image_x_canon_crw=524601 | 0x00800000,
image_x_cmu_raster=524602,
image_x_cur=524603,
image_x_dcraw=524604 | 0x00800000,
image_x_dwg=524605,
image_x_eps=524606,
image_x_epson_erf=524607 | 0x00800000,
image_x_exr=524608,
image_x_fuji_raf=524609 | 0x00800000,
image_x_gem=524610,
image_x_icns=524611,
image_x_icon=524612 | 0x80000000,
image_x_jg=524613,
image_x_jps=524614,
image_x_kodak_dcr=524615 | 0x00800000,
image_x_kodak_k25=524616 | 0x00800000,
image_x_kodak_kdc=524617 | 0x00800000,
image_x_minolta_mrw=524618 | 0x00800000,
image_x_ms_bmp=524619,
image_x_niff=524620,
image_x_nikon_nef=524621 | 0x00800000,
image_x_olympus_orf=524622 | 0x00800000,
image_x_panasonic_raw=524623 | 0x00800000,
image_x_pcx=524624,
image_x_pentax_pef=524625 | 0x00800000,
image_x_pict=524626,
image_x_portable_bitmap=524627,
image_x_portable_graymap=524628,
image_x_portable_pixmap=524629,
image_x_quicktime=524630,
image_x_rgb=524631,
image_x_sigma_x3f=524632 | 0x00800000,
image_x_sony_arw=524633 | 0x00800000,
image_x_sony_sr2=524634 | 0x00800000,
image_x_sony_srf=524635 | 0x00800000,
image_x_tga=524636,
image_x_tiff=524637,
image_x_win_bitmap=524638,
image_x_xcf=524639 | 0x80000000,
image_x_xpixmap=524640 | 0x80000000,
image_x_xwindowdump=524641,
message_news=196962,
message_rfc822=196963,
model_vnd_dwf=65892,
model_vnd_gdl=65893,
model_vnd_gs_gdl=65894,
model_vrml=65895,
model_x_pov=65896,
sist2_sidecar=2,
text_PGP=590185,
text_asp=590186,
text_css=590187,
text_html=590188 | 0x01000000,
text_javascript=590189,
text_mcf=590190,
text_pascal=590191,
text_plain=590192,
text_richtext=590193,
text_rtf=590194,
text_scriplet=590195,
text_tab_separated_values=590196,
text_troff=590197,
text_uri_list=590198,
text_vnd_abc=590199,
text_vnd_fmi_flexstor=590200,
text_vnd_wap_wml=590201,
text_vnd_wap_wmlscript=590202,
text_webviewhtml=590203,
text_x_Algol68=590204,
text_x_asm=590205,
text_x_audiosoft_intra=590206,
text_x_awk=590207,
text_x_bcpl=590208,
text_x_c=590209,
text_x_c__=590210,
text_x_component=590211,
text_x_diff=590212,
text_x_fortran=590213,
text_x_java=590214,
text_x_la_asf=590215,
text_x_lisp=590216,
text_x_m=590217,
text_x_m4=590218,
text_x_makefile=590219,
text_x_ms_regedit=590220,
text_x_msdos_batch=590221,
text_x_objective_c=590222,
text_x_pascal=590223,
text_x_perl=590224,
text_x_php=590225,
text_x_po=590226,
text_x_python=590227,
text_x_ruby=590228,
text_x_sass=590229,
text_x_scss=590230,
text_x_server_parsed_html=590231,
text_x_setext=590232,
text_x_sgml=590233 | 0x01000000,
text_x_shellscript=590234,
text_x_speech=590235,
text_x_tcl=590236,
text_x_tex=590237,
text_x_uil=590238,
text_x_uuencode=590239,
text_x_vcalendar=590240,
text_x_vcard=590241,
text_xml=590242 | 0x01000000,
video_MP2T=393635,
video_animaflex=393636,
video_avi=393637,
video_avs_video=393638,
video_mp4=393639,
video_mpeg=393640,
video_quicktime=393641,
video_vdo=393642,
video_vivo=393643,
video_vnd_rn_realvideo=393644,
video_vosaic=393645,
video_webm=393646,
video_x_amt_demorun=393647,
video_x_amt_showrun=393648,
video_x_atomic3d_feature=393649,
video_x_dl=393650,
video_x_dv=393651,
video_x_fli=393652,
video_x_flv=393653,
video_x_isvideo=393654,
video_x_jng=393655 | 0x80000000,
video_x_m4v=393656,
video_x_matroska=393657,
video_x_mng=393658,
video_x_motion_jpeg=393659,
video_x_ms_asf=393660,
video_x_msvideo=393661,
video_x_qtc=393662,
video_x_sgi_movie=393663,
x_epoc_x_sisx_app=721344,
};
char *mime_get_mime_text(unsigned int mime_id) {switch (mime_id) {
case application_arj: return "application/arj";
@@ -786,6 +806,7 @@ case text_mcf: return "text/mcf";
case text_pascal: return "text/pascal";
case text_PGP: return "text/PGP";
case text_plain: return "text/plain";
case application_vnd_coffeescript: return "application/vnd.coffeescript";
case text_richtext: return "text/richtext";
case text_rtf: return "text/rtf";
case text_scriplet: return "text/scriplet";
@@ -868,6 +889,25 @@ case video_x_sgi_movie: return "video/x-sgi-movie";
case x_epoc_x_sisx_app: return "x-epoc/x-sisx-app";
case application_x_zstd_dictionary: return "application/x-zstd-dictionary";
case application_vnd_ms_outlook: return "application/vnd.ms-outlook";
case image_x_olympus_orf: return "image/x-olympus-orf";
case image_x_nikon_nef: return "image/x-nikon-nef";
case image_x_fuji_raf: return "image/x-fuji-raf";
case image_x_panasonic_raw: return "image/x-panasonic-raw";
case image_x_adobe_dng: return "image/x-adobe-dng";
case image_x_canon_cr2: return "image/x-canon-cr2";
case image_x_canon_crw: return "image/x-canon-crw";
case image_x_dcraw: return "image/x-dcraw";
case image_x_kodak_dcr: return "image/x-kodak-dcr";
case image_x_kodak_k25: return "image/x-kodak-k25";
case image_x_kodak_kdc: return "image/x-kodak-kdc";
case image_x_minolta_mrw: return "image/x-minolta-mrw";
case image_x_pentax_pef: return "image/x-pentax-pef";
case image_x_sigma_x3f: return "image/x-sigma-x3f";
case image_x_sony_arw: return "image/x-sony-arw";
case image_x_sony_sr2: return "image/x-sony-sr2";
case image_x_sony_srf: return "image/x-sony-srf";
case image_x_epson_erf: return "image/x-epson-erf";
case sist2_sidecar: return "sist2/sidecar";
default: return NULL;}}
GHashTable *mime_get_ext_table() {GHashTable *ext_table = g_hash_table_new(g_str_hash, g_str_equal);
g_hash_table_insert(ext_table, "arj", (gpointer)application_arj);
@@ -885,6 +925,7 @@ g_hash_table_insert(ext_table, "frl", (gpointer)application_freeloader);
g_hash_table_insert(ext_table, "spl", (gpointer)application_futuresplash);
g_hash_table_insert(ext_table, "vew", (gpointer)application_groupwise);
g_hash_table_insert(ext_table, "gz", (gpointer)application_gzip);
g_hash_table_insert(ext_table, "tgz", (gpointer)application_gzip);
g_hash_table_insert(ext_table, "hta", (gpointer)application_hta);
g_hash_table_insert(ext_table, "unv", (gpointer)application_i_deas);
g_hash_table_insert(ext_table, "iges", (gpointer)application_iges);
@@ -999,7 +1040,6 @@ g_hash_table_insert(ext_table, "cpio", (gpointer)application_x_cpio);
g_hash_table_insert(ext_table, "dbf", (gpointer)application_x_dbf);
g_hash_table_insert(ext_table, "deb", (gpointer)application_x_debian_package);
g_hash_table_insert(ext_table, "deepv", (gpointer)application_x_deepv);
g_hash_table_insert(ext_table, "dcr", (gpointer)application_x_director);
g_hash_table_insert(ext_table, "dir", (gpointer)application_x_director);
g_hash_table_insert(ext_table, "dxr", (gpointer)application_x_director);
g_hash_table_insert(ext_table, "dmp", (gpointer)application_x_dmp);
@@ -1279,6 +1319,11 @@ g_hash_table_insert(ext_table, "sfv", (gpointer)text_plain);
g_hash_table_insert(ext_table, "m3u", (gpointer)text_plain);
g_hash_table_insert(ext_table, "csv", (gpointer)text_plain);
g_hash_table_insert(ext_table, "eml", (gpointer)text_plain);
g_hash_table_insert(ext_table, "make", (gpointer)text_plain);
g_hash_table_insert(ext_table, "log", (gpointer)text_plain);
g_hash_table_insert(ext_table, "markdown", (gpointer)text_plain);
g_hash_table_insert(ext_table, "yaml", (gpointer)text_plain);
g_hash_table_insert(ext_table, "coffee", (gpointer)application_vnd_coffeescript);
g_hash_table_insert(ext_table, "rt", (gpointer)text_richtext);
g_hash_table_insert(ext_table, "rtf", (gpointer)text_richtext);
g_hash_table_insert(ext_table, "rtx", (gpointer)text_richtext);
@@ -1387,6 +1432,26 @@ g_hash_table_insert(ext_table, "divx", (gpointer)video_x_msvideo);
g_hash_table_insert(ext_table, "qtc", (gpointer)video_x_qtc);
g_hash_table_insert(ext_table, "movie", (gpointer)video_x_sgi_movie);
g_hash_table_insert(ext_table, "mv", (gpointer)video_x_sgi_movie);
g_hash_table_insert(ext_table, "msg", (gpointer)application_vnd_ms_outlook);
g_hash_table_insert(ext_table, "orf", (gpointer)image_x_olympus_orf);
g_hash_table_insert(ext_table, "nef", (gpointer)image_x_nikon_nef);
g_hash_table_insert(ext_table, "raf", (gpointer)image_x_fuji_raf);
g_hash_table_insert(ext_table, "rw2", (gpointer)image_x_panasonic_raw);
g_hash_table_insert(ext_table, "raw", (gpointer)image_x_panasonic_raw);
g_hash_table_insert(ext_table, "dng", (gpointer)image_x_adobe_dng);
g_hash_table_insert(ext_table, "cr2", (gpointer)image_x_canon_cr2);
g_hash_table_insert(ext_table, "crw", (gpointer)image_x_canon_crw);
g_hash_table_insert(ext_table, "dcr", (gpointer)image_x_kodak_dcr);
g_hash_table_insert(ext_table, "k25", (gpointer)image_x_kodak_k25);
g_hash_table_insert(ext_table, "kdc", (gpointer)image_x_kodak_kdc);
g_hash_table_insert(ext_table, "mrw", (gpointer)image_x_minolta_mrw);
g_hash_table_insert(ext_table, "pef", (gpointer)image_x_pentax_pef);
g_hash_table_insert(ext_table, "xf3", (gpointer)image_x_sigma_x3f);
g_hash_table_insert(ext_table, "arw", (gpointer)image_x_sony_arw);
g_hash_table_insert(ext_table, "sr2", (gpointer)image_x_sony_sr2);
g_hash_table_insert(ext_table, "srf", (gpointer)image_x_sony_srf);
g_hash_table_insert(ext_table, "erf", (gpointer)image_x_epson_erf);
g_hash_table_insert(ext_table, "s2meta", (gpointer)sist2_sidecar);
return ext_table;}
GHashTable *mime_get_mime_table() {GHashTable *mime_table = g_hash_table_new(g_str_hash, g_str_equal);
g_hash_table_insert(mime_table, "application/arj", (gpointer)application_arj);
@@ -1737,6 +1802,7 @@ g_hash_table_insert(mime_table, "text/mcf", (gpointer)text_mcf);
g_hash_table_insert(mime_table, "text/pascal", (gpointer)text_pascal);
g_hash_table_insert(mime_table, "text/PGP", (gpointer)text_PGP);
g_hash_table_insert(mime_table, "text/plain", (gpointer)text_plain);
g_hash_table_insert(mime_table, "application/vnd.coffeescript", (gpointer)application_vnd_coffeescript);
g_hash_table_insert(mime_table, "text/richtext", (gpointer)text_richtext);
g_hash_table_insert(mime_table, "text/rtf", (gpointer)text_rtf);
g_hash_table_insert(mime_table, "text/scriplet", (gpointer)text_scriplet);
@@ -1819,5 +1885,24 @@ g_hash_table_insert(mime_table, "video/x-sgi-movie", (gpointer)video_x_sgi_movie
g_hash_table_insert(mime_table, "x-epoc/x-sisx-app", (gpointer)x_epoc_x_sisx_app);
g_hash_table_insert(mime_table, "application/x-zstd-dictionary", (gpointer)application_x_zstd_dictionary);
g_hash_table_insert(mime_table, "application/vnd.ms-outlook", (gpointer)application_vnd_ms_outlook);
g_hash_table_insert(mime_table, "image/x-olympus-orf", (gpointer)image_x_olympus_orf);
g_hash_table_insert(mime_table, "image/x-nikon-nef", (gpointer)image_x_nikon_nef);
g_hash_table_insert(mime_table, "image/x-fuji-raf", (gpointer)image_x_fuji_raf);
g_hash_table_insert(mime_table, "image/x-panasonic-raw", (gpointer)image_x_panasonic_raw);
g_hash_table_insert(mime_table, "image/x-adobe-dng", (gpointer)image_x_adobe_dng);
g_hash_table_insert(mime_table, "image/x-canon-cr2", (gpointer)image_x_canon_cr2);
g_hash_table_insert(mime_table, "image/x-canon-crw", (gpointer)image_x_canon_crw);
g_hash_table_insert(mime_table, "image/x-dcraw", (gpointer)image_x_dcraw);
g_hash_table_insert(mime_table, "image/x-kodak-dcr", (gpointer)image_x_kodak_dcr);
g_hash_table_insert(mime_table, "image/x-kodak-k25", (gpointer)image_x_kodak_k25);
g_hash_table_insert(mime_table, "image/x-kodak-kdc", (gpointer)image_x_kodak_kdc);
g_hash_table_insert(mime_table, "image/x-minolta-mrw", (gpointer)image_x_minolta_mrw);
g_hash_table_insert(mime_table, "image/x-pentax-pef", (gpointer)image_x_pentax_pef);
g_hash_table_insert(mime_table, "image/x-sigma-x3f", (gpointer)image_x_sigma_x3f);
g_hash_table_insert(mime_table, "image/x-sony-arw", (gpointer)image_x_sony_arw);
g_hash_table_insert(mime_table, "image/x-sony-sr2", (gpointer)image_x_sony_sr2);
g_hash_table_insert(mime_table, "image/x-sony-srf", (gpointer)image_x_sony_srf);
g_hash_table_insert(mime_table, "image/x-epson-erf", (gpointer)image_x_epson_erf);
g_hash_table_insert(mime_table, "sist2/sidecar", (gpointer)sist2_sidecar);
return mime_table;}
#endif

View File

@@ -4,6 +4,7 @@
#include "src/ctx.h"
#include "mime.h"
#include "src/io/serialize.h"
#include "src/parsing/sidecar.h"
#include <magic.h>
@@ -38,34 +39,38 @@ void fs_reset(struct vfile *f) {
}
}
#define IS_GIT_OBJ (strlen(doc.filepath + doc.base) == 38 && (strstr(doc.filepath, "objects") != NULL))
void parse(void *arg) {
parse_job_t *job = arg;
document_t doc;
int inc_ts = incremental_get(ScanCtx.original_table, job->vfile.info.st_ino);
if (inc_ts != 0 && inc_ts == job->vfile.info.st_mtim.tv_sec) {
incremental_mark_file_for_copy(ScanCtx.copy_table, job->vfile.info.st_ino);
return;
}
doc.filepath = job->filepath;
doc.ext = (short) job->ext;
doc.base = (short) job->base;
char *rel_path = doc.filepath + ScanCtx.index.desc.root_len;
MD5((unsigned char *) rel_path, strlen(rel_path), doc.path_md5);
doc.meta_head = NULL;
doc.meta_tail = NULL;
doc.mime = 0;
doc.size = job->vfile.info.st_size;
doc.ino = job->vfile.info.st_ino;
doc.mtime = job->vfile.info.st_mtim.tv_sec;
uuid_generate(doc.uuid);
int inc_ts = incremental_get(ScanCtx.original_table, doc.path_md5);
if (inc_ts != 0 && inc_ts == job->vfile.info.st_mtim.tv_sec) {
incremental_mark_file_for_copy(ScanCtx.copy_table, doc.path_md5);
return;
}
char *buf[MAGIC_BUF_SIZE];
if (LogCtx.very_verbose) {
char uuid_str[UUID_STR_LEN];
uuid_unparse(doc.uuid, uuid_str);
LOG_DEBUGF(job->filepath, "Starting parse job {%s}", uuid_str)
char path_md5_str[MD5_STR_LENGTH];
buf2hex(doc.path_md5, MD5_DIGEST_LENGTH, path_md5_str);
LOG_DEBUGF(job->filepath, "Starting parse job {%s}", path_md5_str)
}
if (job->vfile.info.st_size == 0) {
@@ -77,7 +82,17 @@ void parse(void *arg) {
int bytes_read = 0;
if (doc.mime == 0 && !ScanCtx.fast) {
if (IS_GIT_OBJ) {
goto abort;
}
// Get mime type with libmagic
if (!job->vfile.is_fs_file) {
LOG_WARNING(job->filepath,
"Guessing mime type with libmagic inside archive files is not currently supported");
goto abort;
}
bytes_read = job->vfile.read(&job->vfile, buf, MAGIC_BUF_SIZE);
if (bytes_read < 0) {
@@ -114,6 +129,8 @@ void parse(void *arg) {
if (!(SHOULD_PARSE(doc.mime))) {
} else if (IS_RAW(doc.mime)) {
parse_raw(&ScanCtx.raw_ctx, &job->vfile, &doc);
} else if ((mmime == MimeVideo && doc.size >= MIN_VIDEO_SIZE) ||
(mmime == MimeImage && doc.size >= MIN_IMAGE_SIZE) || mmime == MimeAudio) {
@@ -138,24 +155,32 @@ void parse(void *arg) {
(IS_ARC_FILTER(doc.mime) && should_parse_filtered_file(doc.filepath, doc.ext))
)) {
parse_archive(&ScanCtx.arc_ctx, &job->vfile, &doc);
} else if (ScanCtx.ooxml_ctx.content_size > 0 && IS_DOC(doc.mime)) {
} else if ((ScanCtx.ooxml_ctx.content_size > 0 || ScanCtx.media_ctx.tn_size > 0) && IS_DOC(doc.mime)) {
parse_ooxml(&ScanCtx.ooxml_ctx, &job->vfile, &doc);
} else if (is_cbr(&ScanCtx.cbr_ctx, doc.mime)) {
parse_cbr(&ScanCtx.cbr_ctx, &job->vfile, &doc);
} else if (is_cbr(&ScanCtx.comic_ctx, doc.mime) || is_cbz(&ScanCtx.comic_ctx, doc.mime)) {
parse_comic(&ScanCtx.comic_ctx, &job->vfile, &doc);
} else if (IS_MOBI(doc.mime)) {
parse_mobi(&ScanCtx.mobi_ctx, &job->vfile, &doc);
} else if (doc.mime == MIME_SIST2_SIDECAR) {
parse_sidecar(&job->vfile, &doc);
CLOSE_FILE(job->vfile)
return;
} else if (is_msdoc(&ScanCtx.msdoc_ctx, doc.mime)) {
parse_msdoc(&ScanCtx.msdoc_ctx, &job->vfile, &doc);
}
//Parent meta
if (!uuid_is_null(job->parent)) {
char tmp[UUID_STR_LEN];
uuid_unparse(job->parent, tmp);
abort:
meta_line_t *meta_parent = malloc(sizeof(meta_line_t) + UUID_STR_LEN + 1);
//Parent meta
if (!md5_digest_is_null(job->parent)) {
meta_line_t *meta_parent = malloc(sizeof(meta_line_t) + MD5_STR_LENGTH);
meta_parent->key = MetaParent;
strcpy(meta_parent->str_val, tmp);
buf2hex(job->parent, MD5_DIGEST_LENGTH, meta_parent->str_val);
APPEND_META((&doc), meta_parent)
doc.has_parent = TRUE;
} else {
doc.has_parent = FALSE;
}
write_document(&doc);

35
src/parsing/sidecar.c Normal file
View File

@@ -0,0 +1,35 @@
#include "sidecar.h"
#include "src/ctx.h"
void parse_sidecar(vfile_t *vfile, document_t *doc) {
LOG_DEBUGF("sidecar.c", "Parsing sidecar file %s", vfile->filepath)
size_t size;
char *buf = read_all(vfile, &size);
if (buf == NULL) {
LOG_ERRORF("sidecar.c", "Read error for %s", vfile->filepath)
return;
}
buf = realloc(buf, size + 1);
*(buf + size) = '\0';
cJSON *json = cJSON_Parse(buf);
if (json == NULL) {
LOG_ERRORF("sidecar.c", "Could not parse JSON sidecar %s", vfile->filepath)
return;
}
char *json_str = cJSON_PrintUnformatted(json);
unsigned char path_md5[MD5_DIGEST_LENGTH];
MD5((unsigned char *) vfile->filepath + ScanCtx.index.desc.root_len, doc->ext - 1 - ScanCtx.index.desc.root_len,
path_md5);
store_write(ScanCtx.index.meta_store, (char *) path_md5, sizeof(path_md5), json_str, strlen(json_str) + 1);
cJSON_Delete(json);
free(json_str);
free(buf);
}

8
src/parsing/sidecar.h Normal file
View File

@@ -0,0 +1,8 @@
#ifndef SIST2_SIDECAR_H
#define SIST2_SIDECAR_H
#include "src/sist.h"
void parse_sidecar(vfile_t *vfile, document_t *doc);
#endif

View File

@@ -23,9 +23,10 @@
#undef ABS
#define ABS(a) (((a) < 0) ? -(a) : (a))
#define UUID_STR_LEN 37
#define UNUSED(x) __attribute__((__unused__)) x
#define MD5_STR_LENGTH 33
#include "util.h"
#include "log.h"
#include "types.h"
@@ -47,5 +48,4 @@
#include <errno.h>
#include <ctype.h>
#endif

4
src/static/css/autocomplete.min.css vendored Normal file
View File

@@ -0,0 +1,4 @@
.autocomplete-suggestions { text-align: left; cursor: default; border: 1px solid #ccc; border-top: 0; background: #fff; box-shadow: -1px 1px 3px rgba(0,0,0,.1); position: absolute; display: none; z-index: 9999; max-height: 254px; overflow: hidden; overflow-y: auto; box-sizing: border-box; }
.autocomplete-suggestion { position: relative; padding: 0 .6em; line-height: 23px; white-space: nowrap; overflow: hidden; text-overflow: ellipsis; font-size: 1.02em; color: #333; }
.autocomplete-suggestion b { font-weight: normal; color: #1f8dd6; }
.autocomplete-suggestion.selected { background: #f0f0f0; }

File diff suppressed because one or more lines are too long

View File

@@ -121,7 +121,7 @@ body {
background: #546b7a;
}
a:hover,.btn:hover {
a:hover, .btn:hover {
color: #fff;
}
@@ -130,7 +130,11 @@ a:hover,.btn:hover {
}
.document {
padding: 0.5rem;
padding: 0.3rem;
}
.card-text:last-child {
margin-top: -1px;
}
.document p {
@@ -166,6 +170,12 @@ a:hover,.btn:hover {
background-color: #FAAB3C;
}
.add-tag-button {
cursor: pointer;
color: #212529;
background-color: #e0e0e0;
}
.card-img-overlay {
pointer-events: none;
padding: 0.75rem;
@@ -191,22 +201,42 @@ a:hover,.btn:hover {
margin-right: 3px;
}
.badge-delete {
margin-right: -2px;
margin-left: 2px;
margin-top: -1px;
font-family: monospace;
font-size: 90%;
background: rgba(0, 0, 0, 0.2);
padding: 0.1em 0.4em;
color: white;
cursor: pointer;
}
.badge-user {
color: #212529;
background-color: #e0e0e0;
}
.card-img-top {
border-top-left-radius: 0;
border-top-right-radius: 0;
}
.fit {
display: block;
min-width: 64px;
max-width: 100%;
max-height: 175px;
max-height: 400px;
margin: 0 auto 0;
padding: 3px 3px 0;
width: auto;
height: auto;
}
.img-padding {
padding: 4px 4px 0 4px;
}
.fit-sm {
display: block;
max-width: 64px;
@@ -223,20 +253,6 @@ a:hover,.btn:hover {
width: 100%;
}
@media screen and (min-width: 1500px) {
.container {
max-width: 1440px;
}
.bricklayer-column-sizer {
width: 20% !important;
}
.bricklayer-column {
max-width: 20%;
}
}
@media screen and (min-width: 1800px) {
.container {
max-width: 1550px;
@@ -266,6 +282,7 @@ mark {
margin: 3px;
white-space: normal;
color: rgb(224, 224, 224);
overflow: hidden;
}
.irs-single, .irs-from, .irs-to {
@@ -432,6 +449,7 @@ option {
.small-btn {
display: none;
}
.large-btn {
display: inherit;
}
@@ -441,6 +459,7 @@ option {
.small-btn {
display: inherit;
}
.large-btn {
display: none;
}
@@ -511,3 +530,15 @@ svg {
#graphs-card svg text {
fill: #eee;
}
.wholerow {
outline: none !important;
}
.stat > .card-body {
padding: 0.7em 1.25em;
}
#modal-body > .img-wrapper {
margin-bottom: 1em;
}

View File

@@ -1 +1 @@
.irs-bar,.irs-bar-edge,.irs-line-left,.irs-line-mid,.irs-line-right,.irs-slider{background:url("../img/sprite-skin-flat.png") repeat-x}.irs{height:40px}.irs-with-grid{height:60px}.irs-line{height:12px;top:25px}.irs-line-left{height:12px;background-position:0 -30px}.irs-line-mid{height:12px;background-position:0 0}.irs-line-right{height:12px;background-position:100% -30px}.irs-bar{height:12px;top:25px;background-position:0 -60px}.irs-bar-edge{top:25px;height:12px;width:9px;background-position:0 -90px}.irs-shadow{height:3px;top:34px;background:#000;opacity:0.25}.lt-ie9 .irs-shadow{filter: alpha(opacity=25)}.irs-slider{width:16px;height:18px;top:22px;background-position:0 -120px}.irs-slider.state_hover,.irs-slider:hover{background-position:0 -150px}.irs-max,.irs-min{color:#999;font-size:10px;line-height:1.333;text-shadow:none;top:0;padding:1px 3px;background:#e1e4e9;-moz-border-radius:4px;border-radius:4px}.irs-from,.irs-single,.irs-to{color:#fff;font-size:10px;line-height:1.333;text-shadow:none;padding:1px 5px;background:#2196F3;-moz-border-radius:4px;border-radius:4px}.irs-from:after,.irs-single:after,.irs-to:after{position:absolute;display:block;content:"";bottom:-6px;left:50%;width:0;height:0;margin-left:-3px;overflow:hidden;border:3px solid transparent;border-top-color:#2196F3}.irs-grid-pol{background:#e1e4e9}.irs-grid-text{color:#999}.irs-disabled{}
.irs-bar,.irs-bar-edge,.irs-line-left,.irs-line-mid,.irs-line-right,.irs-slider{background:url("./img/sprite-skin-flat.png") repeat-x}.irs{height:40px}.irs-with-grid{height:60px}.irs-line{height:12px;top:25px}.irs-line-left{height:12px;background-position:0 -30px}.irs-line-mid{height:12px;background-position:0 0}.irs-line-right{height:12px;background-position:100% -30px}.irs-bar{height:12px;top:25px;background-position:0 -60px}.irs-bar-edge{top:25px;height:12px;width:9px;background-position:0 -90px}.irs-shadow{height:3px;top:34px;background:#000;opacity:0.25}.lt-ie9 .irs-shadow{filter: alpha(opacity=25)}.irs-slider{width:16px;height:18px;top:22px;background-position:0 -120px}.irs-slider.state_hover,.irs-slider:hover{background-position:0 -150px}.irs-max,.irs-min{color:#999;font-size:10px;line-height:1.333;text-shadow:none;top:0;padding:1px 3px;background:#e1e4e9;-moz-border-radius:4px;border-radius:4px}.irs-from,.irs-single,.irs-to{color:#fff;font-size:10px;line-height:1.333;text-shadow:none;padding:1px 5px;background:#2196F3;-moz-border-radius:4px;border-radius:4px}.irs-from:after,.irs-single:after,.irs-to:after{position:absolute;display:block;content:"";bottom:-6px;left:50%;width:0;height:0;margin-left:-3px;overflow:hidden;border:3px solid transparent;border-top-color:#2196F3}.irs-grid-pol{background:#e1e4e9}.irs-grid-text{color:#999}.irs-disabled{}

View File

@@ -70,7 +70,11 @@ body {
}
.document {
padding: 0.5rem;
padding: 0.3rem;
}
.card-text:last-child {
margin-top: -1px;
}
.document p {
@@ -106,11 +110,33 @@ body {
background-color: #e0e0e0;
}
.badge {
margin-right: 3px;
}
.badge-delete {
margin-right: -2px;
margin-left: 2px;
margin-top: -1px;
font-family: monospace;
font-size: 90%;
background: rgba(0,0,0,0.2);
padding: 0.1em 0.4em;
color: white;
cursor: pointer;
}
.badge-text {
color: #FFFFFF;
background-color: #FAAB3C;
}
.add-tag-button {
cursor: pointer;
color: #212529;
background-color: #e0e0e0;
}
.card-img-overlay {
pointer-events: none;
padding: 0.75rem;
@@ -131,21 +157,25 @@ body {
overflow: hidden;
}
.badge {
margin-right: 3px;
.card-img-top {
border-top-left-radius: 0;
border-top-right-radius: 0;
}
.fit {
display: block;
min-width: 64px;
max-width: 100%;
max-height: 175px;
max-height: 400px;
margin: 0 auto 0;
padding: 3px 3px 0 3px;
width: auto;
height: auto;
}
.img-padding {
padding: 4px 4px 0 4px;
}
.fit-sm {
display: block;
max-width: 64px;
@@ -162,6 +192,10 @@ body {
width: 100%;
}
.bricklayer {
/*max-width: 100%;*/
}
@media screen and (max-width: 1200px) {
.bricklayer-column {
max-width: 100%;
@@ -205,6 +239,7 @@ mark {
margin: 3px;
white-space: normal;
color: #000;
overflow: hidden;
}
.irs-single, .irs-from, .irs-to {
@@ -374,3 +409,15 @@ mark {
float: right;
margin-bottom: 10px;
}
.wholerow {
outline: none !important;
}
.stat > .card-body {
padding: 0.7em 1.25em;
}
#modal-body > .img-wrapper {
margin-bottom: 1em;
}

File diff suppressed because one or more lines are too long

1
src/static/js/8_md5.min.js vendored Normal file
View File

@@ -0,0 +1 @@
!function(n){"use strict";function d(n,t){var r=(65535&n)+(65535&t);return(n>>16)+(t>>16)+(r>>16)<<16|65535&r}function f(n,t,r,e,o,u){return d((c=d(d(t,n),d(e,u)))<<(f=o)|c>>>32-f,r);var c,f}function l(n,t,r,e,o,u,c){return f(t&r|~t&e,n,t,o,u,c)}function v(n,t,r,e,o,u,c){return f(t&e|r&~e,n,t,o,u,c)}function g(n,t,r,e,o,u,c){return f(t^r^e,n,t,o,u,c)}function m(n,t,r,e,o,u,c){return f(r^(t|~e),n,t,o,u,c)}function i(n,t){var r,e,o,u;n[t>>5]|=128<<t%32,n[14+(t+64>>>9<<4)]=t;for(var c=1732584193,f=-271733879,i=-1732584194,a=271733878,h=0;h<n.length;h+=16)c=l(r=c,e=f,o=i,u=a,n[h],7,-680876936),a=l(a,c,f,i,n[h+1],12,-389564586),i=l(i,a,c,f,n[h+2],17,606105819),f=l(f,i,a,c,n[h+3],22,-1044525330),c=l(c,f,i,a,n[h+4],7,-176418897),a=l(a,c,f,i,n[h+5],12,1200080426),i=l(i,a,c,f,n[h+6],17,-1473231341),f=l(f,i,a,c,n[h+7],22,-45705983),c=l(c,f,i,a,n[h+8],7,1770035416),a=l(a,c,f,i,n[h+9],12,-1958414417),i=l(i,a,c,f,n[h+10],17,-42063),f=l(f,i,a,c,n[h+11],22,-1990404162),c=l(c,f,i,a,n[h+12],7,1804603682),a=l(a,c,f,i,n[h+13],12,-40341101),i=l(i,a,c,f,n[h+14],17,-1502002290),c=v(c,f=l(f,i,a,c,n[h+15],22,1236535329),i,a,n[h+1],5,-165796510),a=v(a,c,f,i,n[h+6],9,-1069501632),i=v(i,a,c,f,n[h+11],14,643717713),f=v(f,i,a,c,n[h],20,-373897302),c=v(c,f,i,a,n[h+5],5,-701558691),a=v(a,c,f,i,n[h+10],9,38016083),i=v(i,a,c,f,n[h+15],14,-660478335),f=v(f,i,a,c,n[h+4],20,-405537848),c=v(c,f,i,a,n[h+9],5,568446438),a=v(a,c,f,i,n[h+14],9,-1019803690),i=v(i,a,c,f,n[h+3],14,-187363961),f=v(f,i,a,c,n[h+8],20,1163531501),c=v(c,f,i,a,n[h+13],5,-1444681467),a=v(a,c,f,i,n[h+2],9,-51403784),i=v(i,a,c,f,n[h+7],14,1735328473),c=g(c,f=v(f,i,a,c,n[h+12],20,-1926607734),i,a,n[h+5],4,-378558),a=g(a,c,f,i,n[h+8],11,-2022574463),i=g(i,a,c,f,n[h+11],16,1839030562),f=g(f,i,a,c,n[h+14],23,-35309556),c=g(c,f,i,a,n[h+1],4,-1530992060),a=g(a,c,f,i,n[h+4],11,1272893353),i=g(i,a,c,f,n[h+7],16,-155497632),f=g(f,i,a,c,n[h+10],23,-1094730640),c=g(c,f,i,a,n[h+13],4,681279174),a=g(a,c,f,i,n[h],11,-358537222),i=g(i,a,c,f,n[h+3],16,-722521979),f=g(f,i,a,c,n[h+6],23,76029189),c=g(c,f,i,a,n[h+9],4,-640364487),a=g(a,c,f,i,n[h+12],11,-421815835),i=g(i,a,c,f,n[h+15],16,530742520),c=m(c,f=g(f,i,a,c,n[h+2],23,-995338651),i,a,n[h],6,-198630844),a=m(a,c,f,i,n[h+7],10,1126891415),i=m(i,a,c,f,n[h+14],15,-1416354905),f=m(f,i,a,c,n[h+5],21,-57434055),c=m(c,f,i,a,n[h+12],6,1700485571),a=m(a,c,f,i,n[h+3],10,-1894986606),i=m(i,a,c,f,n[h+10],15,-1051523),f=m(f,i,a,c,n[h+1],21,-2054922799),c=m(c,f,i,a,n[h+8],6,1873313359),a=m(a,c,f,i,n[h+15],10,-30611744),i=m(i,a,c,f,n[h+6],15,-1560198380),f=m(f,i,a,c,n[h+13],21,1309151649),c=m(c,f,i,a,n[h+4],6,-145523070),a=m(a,c,f,i,n[h+11],10,-1120210379),i=m(i,a,c,f,n[h+2],15,718787259),f=m(f,i,a,c,n[h+9],21,-343485551),c=d(c,r),f=d(f,e),i=d(i,o),a=d(a,u);return[c,f,i,a]}function a(n){for(var t="",r=32*n.length,e=0;e<r;e+=8)t+=String.fromCharCode(n[e>>5]>>>e%32&255);return t}function h(n){var t=[];for(t[(n.length>>2)-1]=void 0,e=0;e<t.length;e+=1)t[e]=0;for(var r=8*n.length,e=0;e<r;e+=8)t[e>>5]|=(255&n.charCodeAt(e/8))<<e%32;return t}function e(n){for(var t,r="0123456789abcdef",e="",o=0;o<n.length;o+=1)t=n.charCodeAt(o),e+=r.charAt(t>>>4&15)+r.charAt(15&t);return e}function r(n){return unescape(encodeURIComponent(n))}function o(n){return a(i(h(t=r(n)),8*t.length));var t}function u(n,t){return function(n,t){var r,e,o=h(n),u=[],c=[];for(u[15]=c[15]=void 0,16<o.length&&(o=i(o,8*n.length)),r=0;r<16;r+=1)u[r]=909522486^o[r],c[r]=1549556828^o[r];return e=i(u.concat(h(t)),512+8*t.length),a(i(c.concat(e),640))}(r(n),r(t))}function t(n,t,r){return t?r?u(t,n):e(u(t,n)):r?o(n):e(o(n))}"function"==typeof define&&define.amd?define(function(){return t}):"object"==typeof module&&module.exports?module.exports=t:n.md5=t}(this);

3
src/static/js/auto-complete.min.js vendored Normal file

File diff suppressed because one or more lines are too long

File diff suppressed because one or more lines are too long

View File

@@ -22,23 +22,17 @@ function gifOver(thumbnail, hit) {
thumbnail.addEventListener("mouseout", function () {
//Reset timer
thumbnail.mouseStayedOver = false;
thumbnail.setAttribute("src", `t/${hit["_source"]["index"]}/${hit["_id"]}`);
thumbnail.setAttribute("src", `t/${hit["_source"]["index"]}/${hit["_path_md5"]}`);
})
}
function getContentHighlight(hit) {
const re = RegExp(/<mark>/g);
const sortByMathCount = (a, b) => {
return b.match(re).length - a.match(re).length;
};
if (hit.hasOwnProperty("highlight")) {
if (hit["highlight"].hasOwnProperty("content")) {
return hit["highlight"]["content"].sort(sortByMathCount)[0];
return hit["highlight"]["content"][0];
} else if (hit["highlight"].hasOwnProperty("content.nGram")) {
return hit["highlight"]["content.nGram"].sort(sortByMathCount)[0];
return hit["highlight"]["content.nGram"][0];
}
}
@@ -77,9 +71,11 @@ function shouldPlayVideo(hit) {
return mime &&
mime.startsWith("video/") &&
!("parent" in hit["_source"]) &&
hit["_source"]["extension"] !== "mkv" &&
hit["_source"]["extension"] !== "avi" &&
videoc !== "hevc" &&
videoc !== "mpeg1video" &&
videoc !== "mpeg2video" &&
videoc !== "wmv3";
}
@@ -92,6 +88,7 @@ function shouldDisplayRawImage(hit) {
hit["_source"]["mime"] &&
!hit["_source"]["parent"] &&
hit["_source"]["videoc"] !== "tiff" &&
hit["_source"]["videoc"] !== "raw" &&
hit["_source"]["videoc"] !== "ppm";
}
@@ -157,33 +154,62 @@ function getTags(hit, mimeCategory) {
// User tags
if (hit["_source"].hasOwnProperty("tag")) {
hit["_source"]["tag"].forEach(tag => {
const userTag = document.createElement("span");
userTag.setAttribute("class", "badge badge-pill badge-user");
const tokens = tag.split("#");
if (tokens.length > 1) {
const bg = "#" + tokens[1];
const fg = lum(tokens[1]) > 40 ? "#000" : "#fff";
userTag.setAttribute("style", `background-color: ${bg}; color: ${fg}`);
}
const name = tokens[0].split(".")[tokens[0].split(".").length - 1];
userTag.appendChild(document.createTextNode(name));
tags.push(userTag);
tags.push(makeUserTag(tag, hit));
})
}
return tags
}
function makeUserTag(tag, hit) {
const userTag = document.createElement("span");
userTag.setAttribute("class", "badge badge-pill badge-user");
userTag.setAttribute("title", tag.split("#")[0])
const tokens = tag.split("#");
if (tokens.length > 1) {
const bg = "#" + tokens[1];
const fg = lum(tokens[1]) > 50 ? "#000" : "#fff";
userTag.setAttribute("style", `background-color: ${bg}; color: ${fg}`);
}
const deleteButton = document.createElement("span");
deleteButton.setAttribute("class", "badge badge-pill badge-delete")
deleteButton.setAttribute("title", "Delete tag")
deleteButton.appendChild(document.createTextNode("X"));
deleteButton.addEventListener("click", () => {
deleteTag(tag, hit).then(() => {
userTag.remove();
});
});
userTag.addEventListener("mouseenter", () => userTag.appendChild(deleteButton));
userTag.addEventListener("mouseleave", () => deleteButton.remove());
const name = tokens[0].split(".")[tokens[0].split(".").length - 1];
userTag.appendChild(document.createTextNode(name));
return userTag;
}
function infoButtonCb(hit) {
return () => {
getDocumentInfo(hit["_id"]).then(doc => {
$("#modal-body").empty()
$("#modal-title").text(doc["name"] + ext(hit));
if (doc["mime"]) {
const mimeCategory = doc["mime"].split("/")[0];
const imgWrapper = document.createElement("div");
imgWrapper.setAttribute("style", "position: relative");
imgWrapper.setAttribute("class", "img-wrapper");
makeThumbnail(mimeCategory, hit, imgWrapper, false);
$("#modal-body").append(imgWrapper);
}
const tbody = $("<tbody>");
$("#modal-body").empty()
$("#modal-body")
.append($("<table class='table table-sm'>")
.append($("<thead>")
.append($("<tr>")
@@ -194,9 +220,19 @@ function infoButtonCb(hit) {
.append(tbody)
);
tbody.append($("<tr>")
.append($("<td>").text("index"))
.append($("<td>").text(`[${indexMap[doc["index"]]}]`))
).append($("<tr>")
.append($("<td>").text("mtime"))
.append($("<td>")
.text(new Date(doc["mtime"] * 1000).toISOString().split(".")[0].replace("T", " "))
.attr("title", doc["mtime"]))
);
const displayFields = new Set([
"mime", "size", "mtime", "path", "title", "width", "height", "duration", "audioc", "videoc",
"bitrate", "artist", "album", "album_artist", "genre", "title", "font_name", "tag"
"mime", "size", "path", "title", "width", "height", "duration", "audioc", "videoc",
"bitrate", "artist", "album", "album_artist", "genre", "title", "font_name", "tag", "author",
"modified_by", "pages"
]);
Object.keys(doc)
.filter(key => key.startsWith("_keyword.") || key.startsWith("_text.") || displayFields.has(key) || key.startsWith("exif_"))
@@ -342,9 +378,31 @@ function createDocCard(hit) {
docCardBody.appendChild(tagContainer);
attachTagContainerEventListener(tagContainer, hit);
return docCard;
}
function attachTagContainerEventListener(tagContainer, hit) {
const sizeTag = Array.from(tagContainer.children).find(child => child.tagName === "SMALL");
const addTagButton = document.createElement("span");
addTagButton.setAttribute("class", "badge badge-pill add-tag-button");
addTagButton.appendChild(document.createTextNode("+Add"));
tagContainer.addEventListener("mouseenter", () => tagContainer.insertBefore(addTagButton, sizeTag));
tagContainer.addEventListener("mouseleave", () => addTagButton.remove());
addTagButton.addEventListener("click", () => {
tagBar.value = "";
currentDocToTag = hit;
currentTagCallback = tag => {
tagContainer.insertBefore(makeUserTag(tag, hit), sizeTag);
}
$("#tagModal").modal("show");
tagBar.focus();
});
}
function makeThumbnail(mimeCategory, hit, imgWrapper, small) {
if (!hit["_source"].hasOwnProperty("thumbnail")) {
@@ -355,9 +413,13 @@ function makeThumbnail(mimeCategory, hit, imgWrapper, small) {
if (small) {
thumbnail.setAttribute("class", "fit-sm");
} else {
thumbnail.setAttribute("class", "card-img-top fit");
if (hit["_source"].hasOwnProperty("parent")) {
thumbnail.setAttribute("class", "card-img-top fit img-padding");
} else {
thumbnail.setAttribute("class", "card-img-top fit");
}
}
thumbnail.setAttribute("src", `t/${hit["_source"]["index"]}/${hit["_id"]}`);
thumbnail.setAttribute("src", `t/${hit["_source"]["index"]}/${hit["_path_md5"]}`);
if (shouldDisplayRawImage(hit)) {
thumbnail.addEventListener("click", () => {
@@ -417,7 +479,6 @@ function createDocLine(hit) {
if (hit["_source"].hasOwnProperty("parent")) {
line.classList.add("sub-document");
isSubDocument = true;
}
const infoButton = makeInfoButton(hit);
@@ -490,6 +551,8 @@ function createDocLine(hit) {
pathLine.appendChild(path);
pathLine.appendChild(tagContainer);
attachTagContainerEventListener(tagContainer, hit);
return line;
}

View File

@@ -1,4 +1,4 @@
const SIZE = 40;
const SIZE = 60;
let mimeMap = [];
let tagMap = [];
let mimeTree;
@@ -6,6 +6,9 @@ let tagTree;
let searchBar = document.getElementById("searchBar");
let pathBar = document.getElementById("pathBar");
let tagBar = document.getElementById("tagBar");
let currentDocToTag = null;
let currentTagCallback = null;
let lastDoc = null;
let reachedEnd = false;
let docCount = 0;
@@ -74,13 +77,154 @@ function showEsError() {
window.onload = () => {
CONF.load();
new autoComplete({
selector: '#pathBar',
minChars: 1,
delay: 400,
renderItem: function (item) {
return '<div class="autocomplete-suggestion" data-val="' + item + '">' + item + '</div>';
},
source: async function (term, suggest) {
if (!CONF.options.suggestPath) {
return []
}
term = term.toLowerCase();
const choices = await getPathChoices();
let matches = [];
for (let i = 0; i < choices.length; i++) {
if (~choices[i].toLowerCase().indexOf(term)) {
matches.push(choices[i]);
}
}
suggest(matches.sort());
},
onSelect: function () {
searchDebounced();
}
});
searchBar.addEventListener("keyup", searchDebounced);
pathBar.addEventListener("keyup", e => {
if (e.key === "Enter") {
searchDebounced();
}
});
new autoComplete({
selector: '#tagBar',
minChars: 1,
delay: 200,
renderItem: function (item) {
return '<div class="autocomplete-suggestion" data-val="' + item + '">' + item.split("#")[0] + '</div>';
},
source: async function (term, suggest) {
term = term.toLowerCase();
const choices = await getTagChoices();
let matches = [];
for (let i = 0; i < choices.length; i++) {
if (~choices[i].toLowerCase().indexOf(term)) {
matches.push(choices[i]);
}
}
suggest(matches.sort());
},
onSelect: function (e, item) {
const name = item.split("#")[0];
const color = "#" + item.split("#")[1];
$("#tag-color").val(color);
$("#tag-color").trigger("keyup", color);
tagBar.value = name;
e.preventDefault();
}
});
[tagBar, document.getElementById("tag-color")].forEach(elem => {
elem.addEventListener("keyup", e => {
if (e.key === "Enter" && tagBar.value.length > 0) {
const tag = tagBar.value + document.getElementById("tag-color").value;
saveTag(tag, currentDocToTag).then(() => currentTagCallback(tag));
}
});
})
$("#tag-color").colorpicker({
format: "hex",
sliders: {
saturation: {
selector: '.colorpicker-saturation',
callLeft: 'setSaturationRatio',
callTop: 'setValueRatio'
},
hue: {
selector: '.colorpicker-hue',
maxLeft: 0,
callLeft: false,
callTop: 'setHueRatio'
}
}
});
initTagTree();
updateTagTree();
};
function saveTag(tag, hit) {
const relPath = hit["_source"]["path"] + (hit["_source"]["path"] ? "/" : "") + hit["_source"]["name"] + ext(hit);
return $.jsonPost("/tag/" + hit["_source"]["index"], {
delete: false,
name: tag,
doc_id: hit["_id"],
path_md5: md5(relPath)
}).then(() => {
tagBar.blur();
$("#tagModal").modal("hide");
$.toast({
heading: "Tag added",
text: "Tag saved to index storage and updated in ElasticSearch",
stack: 3,
bgColor: "#00a4bc",
textColor: "#fff",
position: 'bottom-right',
hideAfter: 3000,
loaderBg: "#08c7e8",
});
window.setTimeout(updateTagTree, 2000);
})
}
function deleteTag(tag, hit) {
const relPath = hit["_source"]["path"] + "/" + hit["_source"]["name"] + ext(hit);
return $.jsonPost("/tag/" + hit["_source"]["index"], {
delete: true,
name: tag,
doc_id: hit["_id"],
path_md5: md5(relPath)
}).then(() => {
$.toast({
heading: "Tag deleted",
text: "Tag deleted index storage and updated in ElasticSearch",
stack: 3,
bgColor: "#00a4bc",
textColor: "#fff",
position: 'bottom-right',
hideAfter: 3000,
loaderBg: "#08c7e8",
});
window.setTimeout(updateTagTree, 2000);
})
}
function toggleFuzzy() {
searchDebounced();
}
$.jsonPost("i").then(resp => {
$.get("i").then(resp => {
const urlIndices = (new URLSearchParams(location.search)).get("i");
resp["indices"].forEach(idx => {
@@ -105,10 +249,7 @@ $.jsonPost("i").then(resp => {
});
function getDocumentInfo(id) {
return $.getJSON("d/" + id).fail(e => {
console.log(e);
showEsError();
})
return $.getJSON("d/" + id).fail(showEsError)
}
function handleTreeClick(tree) {
@@ -119,7 +260,7 @@ function handleTreeClick(tree) {
if (node.id === "any") {
if (!node.itree.state.checked) {
tree.deselect();
tree.deselectDeep();
}
} else {
tree.node("any").deselect();
@@ -179,29 +320,15 @@ $.jsonPost("es", {
mimeTree.node("any").select();
});
// Tags tree
$.jsonPost("es", {
aggs: {
tags: {
terms: {
field: "tag",
size: 10000
}
}
},
size: 0,
}).then(resp => {
resp["aggregations"]["tags"]["buckets"]
.sort((a, b) => a["key"].localeCompare(b["key"]))
.forEach(bucket => {
addTag(tagMap, bucket["key"], bucket["key"], bucket["doc_count"])
});
tagMap.push({"text": "All", "id": "any"});
function initTagTree() {
tagMap = [{text: "All", id: "any"}];
tagTree = new InspireTree({
selection: {
mode: 'checkbox'
},
checkbox: {
autoCheckChildren: false
},
data: tagMap
});
new InspireTreeDOM(tagTree, {
@@ -209,24 +336,100 @@ $.jsonPost("es", {
});
tagTree.on("node.state.changed", handleTreeClick(tagTree));
tagTree.node("any").select();
searchBusy = false;
});
}
function updateTagTree() {
$.jsonPost("es", {
aggs: {
tags: {
terms: {
field: "tag",
size: 10000
}
}
},
size: 0,
}).then(resp => {
tagMap = [];
resp["aggregations"]["tags"]["buckets"]
.sort((a, b) => a["key"].localeCompare(b["key"]))
.forEach(bucket => {
addTag(tagMap, bucket["key"], bucket["key"], bucket["doc_count"])
});
tagTree.removeAll();
tagMap.push({text: "All", id: "any"})
tagTree.addNodes(tagMap);
searchBusy = false;
});
}
function addTag(map, tag, id, count) {
let tags = tag.split("#")[0].split(".");
// let tags = tag.split("#")[0].split(".");
let tags = tag.split(".");
let child = {
id: id,
text: tags.length !== 1 ? tags[0] : `${tags[0]} (${count})`,
children: []
values: [id],
count: count,
text: tags.length !== 1 ? tags[0] : `${tags[0].split("#")[0]} (${count})`,
name: tags[0],
children: [],
isLeaf: tags.length === 1,
//Overwrite base functions
blur: function () {
},
select: function () {
this.state("selected", true);
return this.check()
},
deselect: function () {
this.state("selected", false);
return this.uncheck()
},
uncheck: function () {
if (!this.isLeaf) {
return;
}
baseStateChange('checked', false, 'unchecked', this, false);
this.state('indeterminate', false);
if (this.hasParent()) {
this.getParent().refreshIndeterminateState();
}
this._tree.end();
return this;
},
check: function () {
if (!this.isLeaf) {
return;
}
baseStateChange('checked', true, 'checked', this, false);
if (this.hasParent()) {
this.getParent().refreshIndeterminateState();
}
this._tree.end();
return this;
}
};
let found = false;
map.forEach(node => {
if (node.text === child.text) {
if (node.name.split("#")[0] === child.name.split("#")[0]) {
found = true;
if (tags.length !== 1) {
addTag(node.children, tags.slice(1).join("."), id, count);
} else {
// Same name, different color
node.count += count;
node.text = `${tags[0].split("#")[0]} (${node.count})`;
node.values.push(id);
}
}
});
@@ -278,7 +481,11 @@ function getSelectedNodes(tree) {
//Only get children
if (selected[i].text.indexOf("(") !== -1) {
selectedNodes.push(selected[i].id);
if (selected[i].values) {
selectedNodes.push(selected[i].values);
} else {
selectedNodes.push(selected[i].id);
}
}
}
@@ -332,24 +539,26 @@ function search(after = null) {
let path = pathBar.value.replace(/\/$/, "").toLowerCase(); //remove trailing slashes
if (path !== "") {
filters.push([{term: {path: path}}])
filters.push({term: {path: path}})
}
let mimeTypes = getSelectedNodes(mimeTree);
if (!mimeTypes.includes("any")) {
filters.push([{terms: {"mime": mimeTypes}}]);
filters.push({terms: {"mime": mimeTypes}});
}
let tags = getSelectedNodes(tagTree);
if (!tags.includes("any")) {
filters.push([{terms: {"tag": tags}}]);
tags.forEach(tagGroup => {
filters.push({terms: {"tag": tagGroup}})
})
}
if (date_min && date_max) {
filters.push([{range: {mtime: {gte: date_min, lte: date_max}}}])
filters.push({range: {mtime: {gte: date_min, lte: date_max}}})
} else if (date_min) {
filters.push([{range: {mtime: {gte: date_min}}}])
filters.push({range: {mtime: {gte: date_min}}})
} else if (date_max) {
filters.push([{range: {mtime: {lte: date_max}}}])
filters.push({range: {mtime: {lte: date_max}}})
}
let q = {
@@ -385,6 +594,9 @@ function search(after = null) {
q.highlight = {
pre_tags: ["<mark>"],
post_tags: ["</mark>"],
fragment_size: CONF.options.fragmentSize,
number_of_fragments: 1,
order: "score",
fields: {
content: {},
// "content.nGram": {},
@@ -408,6 +620,7 @@ function search(after = null) {
hits.forEach(hit => {
hit["_source"]["name"] = strUnescape(hit["_source"]["name"]);
hit["_source"]["path"] = strUnescape(hit["_source"]["path"]);
hit["_path_md5"] = md5(hit["_source"]["path"] + (hit["_source"]["path"] ? "/" : "") + hit["_source"]["name"] + ext(hit));
});
if (!after) {
@@ -441,8 +654,6 @@ let searchDebounced = _.debounce(function () {
search()
}, 500);
searchBar.addEventListener("keyup", searchDebounced);
pathBar.addEventListener("keyup", searchDebounced);
//Size slider
$("#sizeSlider").ionRangeSlider({
@@ -584,6 +795,7 @@ function getNextDepth(node) {
text: `${name}/ (${bucket.doc_count})`,
depth: node.depth + 1,
index: node.index,
values: [bucket.key],
children: true,
}
}).filter(x => x !== null)
@@ -607,12 +819,14 @@ function createPathTree(target) {
let pathTree = new InspireTree({
data: function (node, resolve, reject) {
return getNextDepth(node);
}
},
sort: "text"
});
selectedIndices.forEach(index => {
pathTree.addNode({
id: "/" + index,
values: ["/" + index],
text: `/[${indexMap[index]}]`,
index: index,
depth: 0,
@@ -627,3 +841,48 @@ function createPathTree(target) {
pathTree.on("node.click", handlePathTreeClick(pathTree));
}
function getPathChoices() {
return new Promise(getPaths => {
$.jsonPost("es", {
suggest: {
path: {
prefix: pathBar.value,
completion: {
field: "suggest-path",
skip_duplicates: true,
size: 10000
}
}
}
}).then(resp => getPaths(resp["suggest"]["path"][0]["options"].map(opt => opt["_source"]["path"])));
});
}
function getTagChoices() {
return new Promise(getPaths => {
$.jsonPost("es", {
suggest: {
tag: {
prefix: tagBar.value,
completion: {
field: "suggest-tag",
skip_duplicates: true,
size: 10000
}
}
}
}).then(resp => {
const result = [];
resp["suggest"]["tag"][0]["options"].map(opt => opt["_source"]["tag"]).forEach(tags => {
tags.forEach(tag => {
const t = tag.split("#")[0];
if (!result.find(x => x.split("#")[0] === t)) {
result.push(tag);
}
});
});
getPaths(result);
});
});
}

View File

@@ -100,6 +100,9 @@ const _defaults = {
treemapGroupingDepth: 3,
treemapColor: "PuBuGn",
treemapSize: "large",
suggestPath: true,
fragmentSize: 100,
columns: 5
};
function loadSettings() {
@@ -114,6 +117,9 @@ function loadSettings() {
$("#settingTreemapColor").val(CONF.options.treemapColor);
$("#settingTreemapSize").val(CONF.options.treemapSize);
$("#settingTreemapType").val(CONF.options.treemapType);
$("#settingSuggestPath").prop("checked", CONF.options.suggestPath);
$("#settingFragmentSize").val(CONF.options.fragmentSize);
$("#settingColumns").val(CONF.options.columns);
}
function Settings() {
@@ -121,6 +127,7 @@ function Settings() {
this._onUpdate = function () {
$("#fuzzyToggle").prop("checked", this.options.fuzzy);
updateColumnStyle();
};
this.load = function () {
@@ -155,6 +162,9 @@ function updateSettings() {
CONF.options.treemapColor = $("#settingTreemapColor").val();
CONF.options.treemapSize = $("#settingTreemapSize").val();
CONF.options.treemapType = $("#settingTreemapType").val();
CONF.options.suggestPath = $("#settingSuggestPath").prop("checked");
CONF.options.fragmentSize = $("#settingFragmentSize").val();
CONF.options.columns = $("#settingColumns").val();
CONF.save();
if (typeof searchDebounced !== "undefined") {
@@ -197,3 +207,26 @@ function toggleTheme() {
}
window.location.reload();
}
function updateColumnStyle() {
const style = document.getElementById("style");
if (style) {
style.innerHTML =
`
@media screen and (min-width: 1500px) {
.container {
max-width: 1440px;
}
.bricklayer-column-sizer {
width: ${100 / CONF.options.columns}% !important;
}
.bricklayer-column {
max-width: ${100 / CONF.options.columns}%;
}
}
}
`
}
}

View File

@@ -6,15 +6,17 @@
<meta name='viewport' content='width=device-width, initial-scale=1.0, maximum-scale=1.0, user-scalable=no'/>
<link href="css" rel="stylesheet" type="text/css">
<style id="style"></style>
</head>
<body>
<nav class="navbar navbar-expand-lg">
<a class="navbar-brand" href="/">sist2</a>
<span class="badge badge-pill version">2.3.0</span>
<span class="badge badge-pill version">2.9.0</span>
<span class="tagline">Lightning-fast file system indexer and search tool </span>
<a class="btn ml-auto" href="/stats">Stats</a>
<button class="btn" type="button" data-toggle="modal" data-target="#settings" onclick="loadSettings()">Settings</button>
<a class="btn ml-auto" href="stats">Stats</a>
<button class="btn" type="button" data-toggle="modal" data-target="#settings" onclick="loadSettings()">Settings
</button>
<button class="btn" title="Toggle theme" onclick="toggleTheme()">Theme</button>
</nav>
@@ -48,8 +50,11 @@
<div class="col">
<div class="input-group" style="margin-bottom: 0.5em; margin-top: 1em">
<div class="input-group-prepend">
<button id="pathBarHelper" class="btn btn-outline-secondary" data-toggle="modal" data-target="#pathTreeModal">
<svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 576 512" width="20px"><path d="M288 224h224a32 32 0 0 0 32-32V64a32 32 0 0 0-32-32H400L368 0h-80a32 32 0 0 0-32 32v64H64V8a8 8 0 0 0-8-8H40a8 8 0 0 0-8 8v392a16 16 0 0 0 16 16h208v64a32 32 0 0 0 32 32h224a32 32 0 0 0 32-32V352a32 32 0 0 0-32-32H400l-32-32h-80a32 32 0 0 0-32 32v64H64V128h192v64a32 32 0 0 0 32 32zm0 96h66.74l32 32H512v128H288zm0-288h66.74l32 32H512v128H288z"/></svg>
<button id="pathBarHelper" class="btn btn-outline-secondary" data-toggle="modal"
data-target="#pathTreeModal">
<svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 576 512" width="20px">
<path d="M288 224h224a32 32 0 0 0 32-32V64a32 32 0 0 0-32-32H400L368 0h-80a32 32 0 0 0-32 32v64H64V8a8 8 0 0 0-8-8H40a8 8 0 0 0-8 8v392a16 16 0 0 0 16 16h208v64a32 32 0 0 0 32 32h224a32 32 0 0 0 32-32V352a32 32 0 0 0-32-32H400l-32-32h-80a32 32 0 0 0-32 32v64H64V128h192v64a32 32 0 0 0 32 32zm0 96h66.74l32 32H512v128H288zm0-288h66.74l32 32H512v128H288z"/>
</svg>
</button>
</div>
<input id="pathBar" type="search" class="form-control" placeholder="Filter path">
@@ -156,7 +161,8 @@
<i>fried eggs</i> and either <i>eggplant</i> or <i>potato</i>, but will ignore results
containing <i>frittata</i>.</p>
<p>When neither <code>+</code> or <code>|</code> is specified, the default operator is <code>+</code> (and).</p>
<p>When neither <code>+</code> or <code>|</code> is specified, the default operator is
<code>+</code> (and).</p>
<p>When the <b>Fuzzy</b> option is checked, partial matches are also returned.</p>
<br>
<p>For more information, see <a target="_blank"
@@ -189,7 +195,20 @@
<div class="custom-control custom-checkbox">
<input type="checkbox" class="custom-control-input" id="settingSearchInPath">
<label class="custom-control-label" for="settingSearchInPath">Enable matching query against document path</label>
<label class="custom-control-label" for="settingSearchInPath">Enable matching query against
document path</label>
</div>
<div class="custom-control custom-checkbox">
<input type="checkbox" class="custom-control-input" id="settingSuggestPath">
<label class="custom-control-label" for="settingSuggestPath">Enable auto-complete in path filter
bar</label>
</div>
<br/>
<div class="form-group">
<input type="number" class="form-control" id="settingFragmentSize">
<label for="settingFragmentSize">Highlight context size in characters</label>
</div>
<label for="settingDisplay">Display</label>
@@ -198,6 +217,19 @@
<option value="list">List</option>
</select>
<div class="form-group">
<label for="settingColumns">Maximum column count</label>
<select id="settingColumns" class="form-control form-control-sm">
<option value="3">3</option>
<option value="4">4</option>
<option value="5">5</option>
<option value="6">6</option>
<option value="7">7</option>
<option value="8">8</option>
<option value="9">9</option>
</select>
</div>
<hr/>
<h4>Stats</h4>
@@ -277,6 +309,32 @@
</div>
</div>
<div class="modal" id="tagModal" tabindex="-1" role="dialog" aria-labelledby="modal-title" aria-hidden="true">
<div class="modal-dialog modal-dialog-centered" role="document">
<div class="modal-content">
<div class="modal-header">
<h5 class="modal-title">Add tag</h5>
<button type="button" class="close" data-dismiss="modal" aria-label="Close">
<span aria-hidden="true">&times;</span>
</button>
</div>
<div class="modal-body">
<div class="form-group">
<div class="row">
<div class="col col-8">
<input type="text" id="tagBar" class="form-control">
</div>
<div class="col col-4">
<input type="text" id="tag-color" value="" class="form-control"/>
</div>
</div>
</div>
</div>
</div>
</div>
</div>
<div id="searchResults"></div>
</div>

View File

@@ -10,7 +10,7 @@
<nav class="navbar navbar-expand-lg">
<a class="navbar-brand" href="/">sist2</a>
<span class="badge badge-pill version">2.3.0</span>
<span class="badge badge-pill version">2.9.0</span>
<span class="tagline">Lightning-fast file system indexer and search tool </span>
<a style="margin-left: auto" class="btn" href="/">Back</a>
<button class="btn" type="button" data-toggle="modal" data-target="#settings"
@@ -29,13 +29,13 @@
</div>
<div id="treemap-card" class="stats-card">
<button class="btn stats-btn" onclick="fullScreen('treemap-card')">Enlarge</button>
<button class="btn stats-btn" onclick="fullScreen('treemap-card')" id="treemap-card-enlarge">Enlarge</button>
<button class="btn stats-btn" onclick="exportTreemap()">Export</button>
<svg id="treemap"></svg>
</div>
<div id="graphs-card" class="stats-card">
<button class="btn stats-btn" onclick="fullScreen('graphs-card')">Enlarge</button>
<button class="btn stats-btn" onclick="fullScreen('graphs-card')" id="graphs-card-enlarge">Enlarge</button>
<div class="graph">
<svg id="agg_mime_size"></svg>
</div>
@@ -77,12 +77,36 @@
path</label>
</div>
<div class="custom-control custom-checkbox">
<input type="checkbox" class="custom-control-input" id="settingSuggestPath">
<label class="custom-control-label" for="settingSuggestPath">Enable auto-complete in path filter bar</label>
</div>
<br/>
<div class="form-group">
<input type="number" class="form-control" id="settingFragmentSize">
<label for="settingFragmentSize">Highlight context size in characters</label>
</div>
<label for="settingDisplay">Display</label>
<select id="settingDisplay" class="form-control form-control-sm">
<option value="grid">Grid</option>
<option value="list">List</option>
</select>
<div class="form-group">
<label for="settingColumns">Maximum column count</label>
<select id="settingColumns" class="form-control form-control-sm">
<option value="3">3</option>
<option value="4">4</option>
<option value="5">5</option>
<option value="6">6</option>
<option value="7">7</option>
<option value="8">8</option>
<option value="9">9</option>
</select>
</div>
<hr/>
<h4>Stats</h4>
@@ -703,7 +727,7 @@ function updateStats() {
const indexId = $("#indices").val();
d3.csv(`/s/${indexId}/1`).then(tabularData => {
d3.csv(`./s/${indexId}/1`).then(tabularData => {
tabularData.forEach(row => {
row.taxonomy = row.path.split("/");
row.size = Number(row.size);
@@ -718,16 +742,16 @@ function updateStats() {
}
});
d3.csv(`/s/${indexId}/2`).then(tabularData => {
d3.csv(`./s/${indexId}/2`).then(tabularData => {
mimeBarSize(tabularData.slice(), mimeSvgSize);
mimeBarCount(tabularData.slice(), mimeSvgCount);
});
d3.csv(`/s/${indexId}/3`).then(tabularData => {
d3.csv(`./s/${indexId}/3`).then(tabularData => {
sizeHistogram(tabularData, sizeHistogramSvg);
});
d3.csv(`/s/${indexId}/4`).then(tabularData => {
d3.csv(`./s/${indexId}/4`).then(tabularData => {
dateHistogram(tabularData, dateHistogramSvg);
});
@@ -765,7 +789,15 @@ window.onload = function () {
function fullScreen(selector) {
const card = document.getElementById(selector);
const btn = document.getElementById(selector + "-enlarge");
card.classList.toggle("full-screen");
if (card.classList.contains("full-screen")) {
btn.innerText = "Shrink";
} else {
btn.innerText = "Enlarge";
}
}
function exportTreemap() {

View File

@@ -2,8 +2,6 @@
#include "io/serialize.h"
#include "ctx.h"
#include <glib.h>
static GHashTable *FlatTree;
static GHashTable *BufferTable;
@@ -22,7 +20,11 @@ typedef struct {
long count;
} agg_t;
void fill_tables(cJSON *document, UNUSED(const char uuid_str[UUID_STR_LEN])) {
void fill_tables(cJSON *document, UNUSED(const char index_id[MD5_STR_LENGTH])) {
if (cJSON_GetObjectItem(document, "parent") != NULL) {
return;
}
const char *json_path = cJSON_GetObjectItem(document, "path")->valuestring;
char *path = malloc(strlen(json_path) + 1);
@@ -99,8 +101,8 @@ void read_index_into_tables(index_t *index) {
while ((de = readdir(dir)) != NULL) {
if (strncmp(de->d_name, "_index_", sizeof("_index_") - 1) == 0) {
char file_path[PATH_MAX];
snprintf(file_path, PATH_MAX, "%s/%s", index->path, de->d_name);
read_index(file_path, index->desc.uuid, index->desc.type, fill_tables);
snprintf(file_path, PATH_MAX, "%s%s", index->path, de->d_name);
read_index(file_path, index->desc.id, index->desc.type, fill_tables);
}
}
closedir(dir);
@@ -167,7 +169,7 @@ int merge_up(double thresh) {
int size = g_hash_table_size(FlatTree);
LOG_DEBUGF("stats.h", "Merge up iteration (%d merged, %d in tree)", count, size)
LOG_DEBUGF("stats.c", "Merge up iteration (%d merged, %d in tree)", count, size)
return count;
}
@@ -184,9 +186,9 @@ void csv_escape(char *dst, const char *str) {
return;
}
while (*ptr++ != 0) {
char c = *ptr;
*out++ = '"';
char c;
while ((c = *ptr++) != 0) {
if (c == '"') {
*out++ = '"';
*out++ = '"';
@@ -194,6 +196,8 @@ void csv_escape(char *dst, const char *str) {
*out++ = c;
}
}
*out++ = '"';
*out = '\0';
}
int open_or_exit(const char *path) {

View File

@@ -3,6 +3,8 @@
#include "sist.h"
#include <pthread.h>
#define MAX_QUEUE_SIZE 10000
typedef void (*thread_func_t)(void *arg);
typedef struct tpool_work {
@@ -26,6 +28,7 @@ typedef struct tpool {
int work_cnt;
int done_cnt;
int free_arg;
int stop;
void (*cleanup_func)();
@@ -79,6 +82,10 @@ int tpool_add_work(tpool_t *pool, thread_func_t func, void *arg) {
return 0;
}
while ((pool->work_cnt - pool->done_cnt) >= MAX_QUEUE_SIZE) {
usleep(100000);
}
pthread_mutex_lock(&(pool->work_mutex));
if (pool->work_head == NULL) {
pool->work_head = work;
@@ -121,7 +128,9 @@ static void *tpool_worker(void *arg) {
}
work->func(work->arg);
free(work->arg);
if (pool->free_arg) {
free(work->arg);
}
free(work);
}
@@ -138,8 +147,10 @@ static void *tpool_worker(void *arg) {
pthread_mutex_unlock(&(pool->work_mutex));
}
LOG_INFO("tpool.c", "Executing cleaup function")
pool->cleanup_func();
if (pool->cleanup_func != NULL) {
LOG_INFO("tpool.c", "Executing cleanup function")
pool->cleanup_func();
}
pthread_cond_signal(&(pool->working_cond));
pthread_mutex_unlock(&(pool->work_mutex));
@@ -207,13 +218,14 @@ void tpool_destroy(tpool_t *pool) {
* Create a thread pool
* @param thread_cnt Worker threads count
*/
tpool_t *tpool_create(size_t thread_cnt, void cleanup_func()) {
tpool_t *tpool_create(size_t thread_cnt, void cleanup_func(), int free_arg) {
tpool_t *pool = malloc(sizeof(tpool_t));
pool->thread_cnt = thread_cnt;
pool->work_cnt = 0;
pool->done_cnt = 0;
pool->stop = 0;
pool->free_arg = free_arg;
pool->cleanup_func = cleanup_func;
pool->threads = calloc(sizeof(pthread_t), thread_cnt);

View File

@@ -8,7 +8,7 @@ typedef struct tpool tpool_t;
typedef void (*thread_func_t)(void *arg);
tpool_t *tpool_create(size_t num, void (*cleanup_func)());
tpool_t *tpool_create(size_t num, void (*cleanup_func)(), int free_arg);
void tpool_start(tpool_t *pool);
void tpool_destroy(tpool_t *tm);

View File

@@ -6,11 +6,11 @@
#define INDEX_VERSION_EXTERNAL "_external_v1"
typedef struct index_descriptor {
char uuid[UUID_STR_LEN];
char id[MD5_STR_LENGTH];
char version[64];
long timestamp;
char root[PATH_MAX];
char rewrite_url[8196];
char rewrite_url[8192];
short root_len;
char name[1024];
char type[64];
@@ -19,6 +19,8 @@ typedef struct index_descriptor {
typedef struct index_t {
struct index_descriptor desc;
struct store_t *store;
struct store_t *tag_store;
struct store_t *meta_store;
char path[PATH_MAX];
} index_t;

View File

@@ -2,7 +2,6 @@
#include "src/ctx.h"
#include <wordexp.h>
#include <glib.h>
#define PBSTR "========================================"
#define PBWIDTH 40
@@ -26,10 +25,11 @@ dyn_buffer_t url_escape(char *str) {
}
char *abspath(const char *path) {
wordexp_t w;
wordexp(path, &w, 0);
char *abs = realpath(w.we_wordv[0], NULL);
char *expanded = expandpath(path);
char *abs = realpath(expanded, NULL);
free(expanded);
if (abs == NULL) {
return NULL;
}
@@ -38,16 +38,46 @@ char *abspath(const char *path) {
strcat(abs, "/");
}
wordfree(&w);
return abs;
}
char *expandpath(const char *path) {
wordexp_t w;
wordexp(path, &w, 0);
void shell_escape(char *dst, const char *src) {
const char *ptr = src;
char *out = dst;
while ((*ptr)) {
char c = *ptr++;
char *expanded = malloc(strlen(w.we_wordv[0]) + 2);
strcpy(expanded, w.we_wordv[0]);
if (c == '&' || c == '\n' || c == '|' || c == ';' || c == '<' ||
c == '>' || c == '(' || c == ')' || c == '{' || c == '}') {
*out++ = '\\';
}
*out++ = c;
}
*out = 0;
}
char *expandpath(const char *path) {
char tmp[PATH_MAX * 2];
shell_escape(tmp, path);
wordexp_t w;
wordexp(tmp, &w, 0);
if (w.we_wordv == NULL) {
return NULL;
}
*tmp = '\0';
for (int i = 0; i < w.we_wordc; i++) {
strcat(tmp, w.we_wordv[i]);
if (i != w.we_wordc - 1) {
strcat(tmp, " ");
}
}
char *expanded = malloc(strlen(tmp) + 2);
strcpy(expanded, tmp);
strcat(expanded, "/");
wordfree(&w);
@@ -94,7 +124,7 @@ void progress_bar_print(double percentage, size_t tn_size, size_t index_size) {
}
GHashTable *incremental_get_table() {
GHashTable *file_table = g_hash_table_new(g_direct_hash, g_direct_equal);
GHashTable *file_table = g_hash_table_new_full(g_str_hash, g_str_equal, free, NULL);
return file_table;
}
@@ -152,7 +182,7 @@ void str_escape(char *dst, const char *str) {
break;
}
cur += sprintf(cur, "%c%02X", ESCAPE_CHAR, (unsigned char)tmp[i]);
cur += sprintf(cur, "%c%02X", ESCAPE_CHAR, (unsigned char) tmp[i]);
}
continue;
}
@@ -198,12 +228,12 @@ void str_unescape(char *dst, const char *str) {
char next = *ptr;
if (next == ESCAPE_CHAR) {
*cur++ = (char)c;
*cur++ = (char) c;
ptr += 1;
} else {
tmp[0] = *(ptr);
tmp[1] = *(ptr + 1);
*cur++ = (char)strtol(tmp, NULL, 16);
*cur++ = (char) strtol(tmp, NULL, 16);
ptr += 2;
}
} else {

View File

@@ -10,6 +10,8 @@
#include "third-party/utf8.h/utf8.h"
#include "libscan/scan.h"
#define MD5_STR_LENGTH 33
char *abspath(const char *path);
@@ -21,25 +23,6 @@ void progress_bar_print(double percentage, size_t tn_size, size_t index_size);
GHashTable *incremental_get_table();
__always_inline
static void incremental_put(GHashTable *table, unsigned long inode_no, int mtime) {
g_hash_table_insert(table, (gpointer) inode_no, GINT_TO_POINTER(mtime));
}
__always_inline
static int incremental_get(GHashTable *table, unsigned long inode_no) {
if (table != NULL) {
return GPOINTER_TO_INT(g_hash_table_lookup(table, (gpointer) inode_no));
} else {
return 0;
}
}
__always_inline
static int incremental_mark_file_for_copy(GHashTable *table, unsigned long inode_no) {
return g_hash_table_insert(table, GINT_TO_POINTER(inode_no), GINT_TO_POINTER(1));
}
const char *find_file_in_paths(const char **paths, const char *filename);
@@ -48,4 +31,95 @@ void str_escape(char *dst, const char *str);
void str_unescape(char *dst, const char *str);
static int hex2buf(const char *str, int len, unsigned char *bytes) {
static const uint8_t hashmap[] = {
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07,
0x08, 0x09, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
0x00, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e, 0x0f, 0x00,
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
0x00, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e, 0x0f, 0x00,
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00
};
for (int pos = 0; pos < len; pos += 2) {
int idx0 = (uint8_t) str[pos + 0];
int idx1 = (uint8_t) str[pos + 1];
bytes[pos / 2] = (uint8_t) (hashmap[idx0] << 4) | hashmap[idx1];
}
return TRUE;
}
__always_inline
static void buf2hex(const unsigned char *buf, size_t buflen, char *hex_string) {
static const char hexdig[] = "0123456789abcdef";
const unsigned char *p;
size_t i;
char *s = hex_string;
for (i = 0, p = buf; i < buflen; i++, p++) {
*s++ = hexdig[(*p >> 4) & 0x0f];
*s++ = hexdig[*p & 0x0f];
}
*s = '\0';
}
__always_inline
static int md5_digest_is_null(const unsigned char digest[MD5_DIGEST_LENGTH]) {
return (*(int64_t *) digest) == 0 && (*((int64_t *) digest + 1)) == 0;
}
__always_inline
static void incremental_put(GHashTable *table, unsigned char path_md5[MD5_DIGEST_LENGTH], int mtime) {
char *ptr = malloc(MD5_STR_LENGTH);
buf2hex(path_md5, MD5_DIGEST_LENGTH, ptr);
g_hash_table_insert(table, ptr, GINT_TO_POINTER(mtime));
}
__always_inline
static int incremental_get(GHashTable *table, unsigned char path_md5[MD5_DIGEST_LENGTH]) {
if (table != NULL) {
char md5_str[MD5_STR_LENGTH];
buf2hex(path_md5, MD5_DIGEST_LENGTH, md5_str);
return GPOINTER_TO_INT(g_hash_table_lookup(table, md5_str));
} else {
return 0;
}
}
__always_inline
static int incremental_mark_file_for_copy(GHashTable *table, unsigned char path_md5[MD5_DIGEST_LENGTH]) {
char *ptr = malloc(MD5_STR_LENGTH);
buf2hex(path_md5, MD5_DIGEST_LENGTH, ptr);
return g_hash_table_insert(table, ptr, GINT_TO_POINTER(1));
}
#endif

View File

@@ -10,8 +10,6 @@
#include <mongoose.h>
#define CHUNK_SIZE 1024 * 1024 * 10
static int has_prefix(const struct mg_str *str, const struct mg_str *prefix) {
return str->len > prefix->len && memcmp(str->p, prefix->p, prefix->len) == 0;
@@ -38,7 +36,7 @@ static void send_response_line(struct mg_connection *nc, int status_code, int le
index_t *get_index_by_id(const char *index_id) {
for (int i = WebCtx.index_count; i >= 0; i--) {
if (strcmp(index_id, WebCtx.indices[i].desc.uuid) == 0) {
if (strncmp(index_id, WebCtx.indices[i].desc.id, MD5_STR_LENGTH) == 0) {
return &WebCtx.indices[i];
}
}
@@ -53,6 +51,14 @@ store_t *get_store(const char *index_id) {
return NULL;
}
store_t *get_tag_store(const char *index_id) {
index_t *idx = get_index_by_id(index_id);
if (idx != NULL) {
return idx->tag_store;
}
return NULL;
}
void search_index(struct mg_connection *nc) {
send_response_line(nc, 200, sizeof(search_html), "Content-Type: text/html");
mg_send(nc, search_html, sizeof(search_html));
@@ -67,23 +73,25 @@ void stats(struct mg_connection *nc) {
void stats_files(struct mg_connection *nc, struct http_message *hm, struct mg_str *path) {
if (path->len != UUID_STR_LEN + 4) {
if (path->len != MD5_STR_LENGTH + 4) {
mg_http_send_error(nc, 404, NULL);
nc->flags |= MG_F_SEND_AND_CLOSE;
return;
}
char arg_uuid[UUID_STR_LEN];
memcpy(arg_uuid, hm->uri.p + 3, UUID_STR_LEN);
*(arg_uuid + UUID_STR_LEN - 1) = '\0';
char arg_md5[MD5_STR_LENGTH];
memcpy(arg_md5, hm->uri.p + 3, MD5_STR_LENGTH);
*(arg_md5 + MD5_STR_LENGTH - 1) = '\0';
index_t *index = get_index_by_id(arg_uuid);
index_t *index = get_index_by_id(arg_md5);
if (index == NULL) {
mg_http_send_error(nc, 404, NULL);
nc->flags |= MG_F_SEND_AND_CLOSE;
return;
}
const char *file;
switch (atoi(hm->uri.p + 3 + UUID_STR_LEN)) {
switch (atoi(hm->uri.p + 3 + MD5_STR_LENGTH)) {
case 1:
file = "treemap.csv";
break;
@@ -101,7 +109,7 @@ void stats_files(struct mg_connection *nc, struct http_message *hm, struct mg_st
return;
}
char disposition[8196];
char disposition[8192];
snprintf(disposition, sizeof(disposition), "Content-Disposition: inline; filename=\"%s\"", file);
char full_path[PATH_MAX];
@@ -171,37 +179,34 @@ void img_sprite_skin_flat(struct mg_connection *nc, struct http_message *hm) {
void thumbnail(struct mg_connection *nc, struct http_message *hm, struct mg_str *path) {
if (path->len != UUID_STR_LEN * 2 + 2) {
if (path->len != 68) {
LOG_DEBUGF("serve.c", "Invalid thumbnail path: %.*s", (int) path->len, path->p)
mg_http_send_error(nc, 404, NULL);
nc->flags |= MG_F_SEND_AND_CLOSE;
return;
}
char arg_uuid[UUID_STR_LEN];
char arg_index[UUID_STR_LEN];
char arg_file_md5[MD5_STR_LENGTH];
char arg_index[MD5_STR_LENGTH];
memcpy(arg_index, hm->uri.p + 3, UUID_STR_LEN);
*(arg_index + UUID_STR_LEN - 1) = '\0';
memcpy(arg_uuid, hm->uri.p + 3 + UUID_STR_LEN, UUID_STR_LEN);
*(arg_uuid + UUID_STR_LEN - 1) = '\0';
memcpy(arg_index, hm->uri.p + 3, MD5_STR_LENGTH);
*(arg_index + MD5_STR_LENGTH - 1) = '\0';
memcpy(arg_file_md5, hm->uri.p + 3 + MD5_STR_LENGTH, MD5_STR_LENGTH);
*(arg_file_md5 + MD5_STR_LENGTH - 1) = '\0';
uuid_t uuid;
int ret = uuid_parse(arg_uuid, uuid);
if (ret != 0) {
LOG_DEBUGF("serve.c", "Invalid thumbnail UUID: %s", arg_uuid)
nc->flags |= MG_F_SEND_AND_CLOSE;
return;
}
unsigned char md5_buf[MD5_DIGEST_LENGTH];
hex2buf(arg_file_md5, MD5_STR_LENGTH - 1, md5_buf);
store_t *store = get_store(arg_index);
if (store == NULL) {
LOG_DEBUGF("serve.c", "Could not get store for index: %s", arg_index)
mg_http_send_error(nc, 404, NULL);
nc->flags |= MG_F_SEND_AND_CLOSE;
return;
}
size_t data_len = 0;
char *data = store_read(store, (char *) uuid, sizeof(uuid_t), &data_len);
char *data = store_read(store, (char *) md5_buf, sizeof(md5_buf), &data_len);
if (data_len != 0) {
send_response_line(nc, 200, data_len, "Content-Type: image/jpeg");
mg_send(nc, data, data_len);
@@ -214,6 +219,7 @@ void search(struct mg_connection *nc, struct http_message *hm) {
if (hm->body.len == 0) {
LOG_DEBUG("serve.c", "Client sent empty body, ignoring request")
mg_http_send_error(nc, 500, NULL);
nc->flags |= MG_F_SEND_AND_CLOSE;
return;
}
@@ -223,13 +229,12 @@ void search(struct mg_connection *nc, struct http_message *hm) {
*(body + hm->body.len) = '\0';
char url[4096];
snprintf(url, 4096, "%s/sist2/_search", WebCtx.es_url);
snprintf(url, 4096, "%s/%s/_search", WebCtx.es_url, WebCtx.es_index);
nc->user_data = web_post_async(url, body);
free(body);
}
int serve_file_from_url(cJSON *json, index_t *idx, struct mg_connection *nc) {
void serve_file_from_url(cJSON *json, index_t *idx, struct mg_connection *nc) {
const char *path = cJSON_GetObjectItem(json, "path")->valuestring;
const char *name = cJSON_GetObjectItem(json, "name")->valuestring;
@@ -242,7 +247,7 @@ int serve_file_from_url(cJSON *json, index_t *idx, struct mg_connection *nc) {
const char *ext = cJSON_GetObjectItem(json, "extension")->valuestring;
char url[8196];
char url[8192];
snprintf(url, sizeof(url),
"%s%s/%s%s%s",
idx->desc.rewrite_url, path_unescaped, name_unescaped, strlen(ext) == 0 ? "" : ".", ext);
@@ -277,7 +282,7 @@ void serve_file_from_disk(cJSON *json, index_t *idx, struct mg_connection *nc, s
LOG_DEBUGF("serve.c", "Serving file from disk: %s", full_path)
char disposition[8196];
char disposition[8192];
snprintf(disposition, sizeof(disposition), "Content-Disposition: inline; filename=\"%s%s%s\"",
name, strlen(ext) == 0 ? "" : ".", ext);
@@ -294,7 +299,7 @@ void index_info(struct mg_connection *nc) {
cJSON *idx_json = cJSON_CreateObject();
cJSON_AddStringToObject(idx_json, "name", idx->desc.name);
cJSON_AddStringToObject(idx_json, "version", idx->desc.version);
cJSON_AddStringToObject(idx_json, "id", idx->desc.uuid);
cJSON_AddStringToObject(idx_json, "id", idx->desc.id);
cJSON_AddNumberToObject(idx_json, "timestamp", (double) idx->desc.timestamp);
cJSON_AddItemToArray(arr, idx_json);
}
@@ -312,22 +317,24 @@ void index_info(struct mg_connection *nc) {
void document_info(struct mg_connection *nc, struct http_message *hm, struct mg_str *path) {
if (path->len != UUID_STR_LEN + 2) {
if (path->len != MD5_STR_LENGTH + 2) {
LOG_DEBUGF("serve.c", "Invalid document_info path: %.*s", (int) path->len, path->p)
mg_http_send_error(nc, 404, NULL);
nc->flags |= MG_F_SEND_AND_CLOSE;
return;
}
char arg_uuid[UUID_STR_LEN];
memcpy(arg_uuid, hm->uri.p + 3, UUID_STR_LEN);
*(arg_uuid + UUID_STR_LEN - 1) = '\0';
char arg_md5[MD5_STR_LENGTH];
memcpy(arg_md5, hm->uri.p + 3, MD5_STR_LENGTH);
*(arg_md5 + MD5_STR_LENGTH - 1) = '\0';
cJSON *doc = elastic_get_document(arg_uuid);
cJSON *doc = elastic_get_document(arg_md5);
cJSON *source = cJSON_GetObjectItem(doc, "_source");
cJSON *index_id = cJSON_GetObjectItem(source, "index");
if (index_id == NULL) {
cJSON_Delete(doc);
mg_http_send_error(nc, 404, NULL);
nc->flags |= MG_F_SEND_AND_CLOSE;
return;
}
@@ -335,6 +342,7 @@ void document_info(struct mg_connection *nc, struct http_message *hm, struct mg_
index_t *idx = get_index_by_id(index_id->valuestring);
if (idx == NULL) {
cJSON_Delete(doc);
mg_http_send_error(nc, 404, NULL);
nc->flags |= MG_F_SEND_AND_CLOSE;
return;
}
@@ -350,17 +358,18 @@ void document_info(struct mg_connection *nc, struct http_message *hm, struct mg_
void file(struct mg_connection *nc, struct http_message *hm, struct mg_str *path) {
if (path->len != UUID_STR_LEN + 2) {
if (path->len != MD5_STR_LENGTH + 2) {
LOG_DEBUGF("serve.c", "Invalid file path: %.*s", (int) path->len, path->p)
mg_http_send_error(nc, 404, NULL);
nc->flags |= MG_F_SEND_AND_CLOSE;
return;
}
char arg_uuid[UUID_STR_LEN];
memcpy(arg_uuid, hm->uri.p + 3, UUID_STR_LEN);
*(arg_uuid + UUID_STR_LEN - 1) = '\0';
char arg_md5[MD5_STR_LENGTH];
memcpy(arg_md5, hm->uri.p + 3, MD5_STR_LENGTH);
*(arg_md5 + MD5_STR_LENGTH - 1) = '\0';
const char *next = arg_uuid;
const char *next = arg_md5;
cJSON *doc = NULL;
cJSON *index_id = NULL;
cJSON *source = NULL;
@@ -371,6 +380,7 @@ void file(struct mg_connection *nc, struct http_message *hm, struct mg_str *path
index_id = cJSON_GetObjectItem(source, "index");
if (index_id == NULL) {
cJSON_Delete(doc);
mg_http_send_error(nc, 404, NULL);
nc->flags |= MG_F_SEND_AND_CLOSE;
return;
}
@@ -386,6 +396,7 @@ void file(struct mg_connection *nc, struct http_message *hm, struct mg_str *path
if (idx == NULL) {
cJSON_Delete(doc);
nc->flags |= MG_F_SEND_AND_CLOSE;
mg_http_send_error(nc, 404, NULL);
return;
}
@@ -410,6 +421,179 @@ void status(struct mg_connection *nc) {
nc->flags |= MG_F_SEND_AND_CLOSE;
}
typedef struct {
char *name;
int delete;
char *path_md5_str;
char *doc_id;
} tag_req_t;
tag_req_t *parse_tag_request(cJSON *json) {
if (!cJSON_IsObject(json)) {
return NULL;
}
cJSON *arg_name = cJSON_GetObjectItem(json, "name");
if (arg_name == NULL || !cJSON_IsString(arg_name)) {
return NULL;
}
cJSON *arg_delete = cJSON_GetObjectItem(json, "delete");
if (arg_delete == NULL || !cJSON_IsBool(arg_delete)) {
return NULL;
}
cJSON *arg_path_md5 = cJSON_GetObjectItem(json, "path_md5");
if (arg_path_md5 == NULL || !cJSON_IsString(arg_path_md5) ||
strlen(arg_path_md5->valuestring) != MD5_STR_LENGTH - 1) {
return NULL;
}
cJSON *arg_doc_id = cJSON_GetObjectItem(json, "doc_id");
if (arg_doc_id == NULL || !cJSON_IsString(arg_doc_id)) {
return NULL;
}
tag_req_t *req = malloc(sizeof(tag_req_t));
req->delete = arg_delete->valueint;
req->name = arg_name->valuestring;
req->path_md5_str = arg_path_md5->valuestring;
req->doc_id = arg_doc_id->valuestring;
return req;
}
void tag(struct mg_connection *nc, struct http_message *hm, struct mg_str *path) {
if (path->len != MD5_STR_LENGTH + 4) {
LOG_DEBUGF("serve.c", "Invalid tag path: %.*s", (int) path->len, path->p)
mg_http_send_error(nc, 404, NULL);
nc->flags |= MG_F_SEND_AND_CLOSE;
return;
}
char arg_index[MD5_STR_LENGTH];
memcpy(arg_index, hm->uri.p + 5, MD5_STR_LENGTH);
*(arg_index + MD5_STR_LENGTH - 1) = '\0';
if (hm->body.len < 2 || hm->method.len != 4 || memcmp(&hm->method, "POST", 4) == 0) {
LOG_DEBUG("serve.c", "Invalid tag request")
mg_http_send_error(nc, 400, NULL);
nc->flags |= MG_F_SEND_AND_CLOSE;
return;
}
store_t *store = get_tag_store(arg_index);
if (store == NULL) {
LOG_DEBUGF("serve.c", "Could not get tag store for index: %s", arg_index)
mg_http_send_error(nc, 404, NULL);
nc->flags |= MG_F_SEND_AND_CLOSE;
return;
}
char *body = malloc(hm->body.len + 1);
memcpy(body, hm->body.p, hm->body.len);
*(body + hm->body.len) = '\0';
cJSON *json = cJSON_Parse(body);
tag_req_t *arg_req = parse_tag_request(json);
if (arg_req == NULL) {
LOG_DEBUGF("serve.c", "Could not parse tag request", arg_index)
cJSON_Delete(json);
free(body);
mg_http_send_error(nc, 400, NULL);
nc->flags |= MG_F_SEND_AND_CLOSE;
return;
}
cJSON *arr = NULL;
size_t data_len = 0;
const char *data = store_read(store, arg_req->path_md5_str, MD5_STR_LENGTH, &data_len);
if (data_len == 0) {
arr = cJSON_CreateArray();
} else {
arr = cJSON_Parse(data);
}
if (arg_req->delete) {
if (data_len > 0) {
cJSON *element = NULL;
int i = 0;
cJSON_ArrayForEach(element, arr) {
if (strcmp(element->valuestring, arg_req->name) == 0) {
cJSON_DeleteItemFromArray(arr, i);
break;
}
i++;
}
}
char *buf = malloc(sizeof(char) * 8192);
snprintf(buf, 8192,
"{"
" \"script\" : {"
" \"source\": \"if (ctx._source.tag.contains(params.tag)) { ctx._source.tag.remove(ctx._source.tag.indexOf(params.tag)) }\","
" \"lang\": \"painless\","
" \"params\" : {"
" \"tag\" : \"%s\""
" }"
" }"
"}", arg_req->name
);
char url[4096];
snprintf(url, sizeof(url), "%s/%s/_update/%s", WebCtx.es_url, WebCtx.es_index, arg_req->doc_id);
nc->user_data = web_post_async(url, buf);
} else {
cJSON_AddItemToArray(arr, cJSON_CreateString(arg_req->name));
char *buf = malloc(sizeof(char) * 8192);
snprintf(buf, 8192,
"{"
" \"script\" : {"
" \"source\": \"if(ctx._source.tag == null) {ctx._source.tag = new ArrayList()} ctx._source.tag.add(params.tag)\","
" \"lang\": \"painless\","
" \"params\" : {"
" \"tag\" : \"%s\""
" }"
" }"
"}", arg_req->name
);
char url[4096];
snprintf(url, sizeof(url), "%s/%s/_update/%s", WebCtx.es_url, WebCtx.es_index, arg_req->doc_id);
nc->user_data = web_post_async(url, buf);
}
char *json_str = cJSON_PrintUnformatted(arr);
store_write(store, arg_req->path_md5_str, MD5_STR_LENGTH, json_str, strlen(json_str) + 1);
store_flush(store);
free(arg_req);
free(json_str);
cJSON_Delete(json);
cJSON_Delete(arr);
free(body);
}
int validate_auth(struct mg_connection *nc, struct http_message *hm) {
char user[256] = {0,};
char pass[256] = {0,};
int ret = mg_get_http_basic_auth(hm, user, sizeof(user), pass, sizeof(pass));
if (ret == -1 || strcmp(user, WebCtx.auth_user) != 0 || strcmp(pass, WebCtx.auth_pass) != 0) {
mg_printf(nc, "HTTP/1.1 401 Unauthorized\r\n"
"WWW-Authenticate: Basic realm=\"sist2\"\r\n"
"Content-Length: 0\r\n\r\n");
nc->flags |= MG_F_SEND_AND_CLOSE;
return FALSE;
}
return TRUE;
}
static void ev_router(struct mg_connection *nc, int ev, void *p) {
struct mg_str scheme;
struct mg_str user_info;
@@ -423,21 +607,14 @@ static void ev_router(struct mg_connection *nc, int ev, void *p) {
struct http_message *hm = (struct http_message *) p;
if (mg_parse_uri(hm->uri, &scheme, &user_info, &host, &port, &path, &query, &fragment) != 0) {
mg_http_send_error(nc, 400, NULL);
nc->flags |= MG_F_SEND_AND_CLOSE;
return;
}
if (WebCtx.auth_enabled == TRUE) {
char user[256] = {0,};
char pass[256] = {0,};
int ret = mg_get_http_basic_auth(hm, user, sizeof(user), pass, sizeof(pass));
if (ret == -1 || strcmp(user, WebCtx.auth_user) != 0 || strcmp(pass, WebCtx.auth_pass) != 0) {
mg_printf(nc, "HTTP/1.1 401 Unauthorized\r\n"
"WWW-Authenticate: Basic realm=\"sist2\"\r\n"
"Content-Length: 0\r\n\r\n");
nc->flags |= MG_F_SEND_AND_CLOSE;
if (!validate_auth(nc, hm)) {
return;
}
}
@@ -466,9 +643,17 @@ static void ev_router(struct mg_connection *nc, int ev, void *p) {
thumbnail(nc, hm, &path);
} else if (has_prefix(&path, &((struct mg_str) MG_MK_STR("/s/")))) {
stats_files(nc, hm, &path);
} else if (has_prefix(&path, &((struct mg_str) MG_MK_STR("/tag/")))) {
if (WebCtx.tag_auth_enabled == TRUE) {
if (!validate_auth(nc, hm)) {
return;
}
}
tag(nc, hm, &path);
} else if (has_prefix(&path, &((struct mg_str) MG_MK_STR("/d/")))) {
document_info(nc, hm, &path);
} else {
mg_http_send_error(nc, 404, NULL);
nc->flags |= MG_F_SEND_AND_CLOSE;
}
@@ -476,11 +661,11 @@ static void ev_router(struct mg_connection *nc, int ev, void *p) {
if (nc->user_data != NULL) {
//Waiting for ES reply
subreq_ctx_t *ctx = (subreq_ctx_t *) nc->user_data;
mg_mgr_poll(&ctx->mgr, 0);
web_post_async_poll(ctx);
if (ctx->ev_data.done == TRUE) {
if (ctx->done == TRUE) {
response_t *r = ctx->ev_data.resp;
response_t *r = ctx->response;
if (r->status_code == 200) {
send_response_line(nc, 200, r->size, "Content-Type: application/json");
@@ -499,10 +684,12 @@ static void ev_router(struct mg_connection *nc, int ev, void *p) {
free(json_str);
free(tmp);
}
//todo return error code
mg_http_send_error(nc, 500, NULL);
}
free_response(r);
free(ctx->data);
free(ctx);
nc->flags |= MG_F_SEND_AND_CLOSE;
nc->user_data = NULL;
}

File diff suppressed because one or more lines are too long

75
tests/test_scan.py Normal file
View File

@@ -0,0 +1,75 @@
import unittest
import subprocess
import shutil
import json
import os
TEST_FILES = "third-party/libscan/libscan-test-files/test_files"
def copy_files(files):
base = os.path.basename(files)
new_path = os.path.join("/tmp/sist2_test/", base)
shutil.rmtree(new_path, ignore_errors=True)
shutil.copytree(files, new_path)
return new_path
def sist2(*args):
return subprocess.check_output(
args=["./sist2_debug", *args],
)
def sist2_index(files, *args):
path = copy_files(files)
shutil.rmtree("i", ignore_errors=True)
sist2("scan", path, "-o", "i", *args)
return iter(sist2_index_to_dict("i"))
def sist2_incremental_index(files, func=None, *args):
path = copy_files(files)
if func:
func(path)
shutil.rmtree("i_inc", ignore_errors=True)
sist2("scan", path, "-o", "i_inc", "--incremental", "i", *args)
return iter(sist2_index_to_dict("i_inc"))
def sist2_index_to_dict(index):
res = subprocess.check_output(
args=["./sist2_debug", "index", "--print", index],
)
for line in res.splitlines():
if line:
yield json.loads(line)
class ScanTest(unittest.TestCase):
def test_incremental1(self):
def remove_files(path):
os.remove(os.path.join(path, "msdoc/test1.doc"))
os.remove(os.path.join(path, "msdoc/test2.doc"))
def add_files(path):
with open(os.path.join(path, "newfile1"), "w"):
pass
with open(os.path.join(path, "newfile2"), "w"):
pass
with open(os.path.join(path, "newfile3"), "w"):
pass
file_count = sum(1 for _ in sist2_index(TEST_FILES))
self.assertEqual(sum(1 for _ in sist2_incremental_index(TEST_FILES, remove_files)), file_count - 2)
self.assertEqual(sum(1 for _ in sist2_incremental_index(TEST_FILES, add_files)), file_count + 3)
if __name__ == "__main__":
unittest.main()