Compare commits

..

13 Commits

Author SHA1 Message Date
2766f2419f Better support for .doc files 2020-12-16 20:06:06 -05:00
7a88a2c956 Update README.md 2020-12-08 18:16:19 -05:00
c229a0bd05 Update README.md 2020-11-17 12:34:54 -05:00
7fce56da03 Add .doc support 2020-11-15 21:18:02 -05:00
acc557
50a1ed43fe Use relative path for loading csv in stats 2020-11-13 08:30:43 -05:00
acc557
4117b88cbb Update search.html
Fix relative stats URL
2020-11-13 08:30:22 -05:00
c6bfe6ec83 Fix relative image URL #122 2020-11-12 08:06:57 -05:00
3ae6e9b604 update build instructions 2020-11-10 21:52:45 -05:00
c82b1658fe Update libmupdf 2020-10-24 15:38:31 -04:00
9386f75d78 Optionally ES schema from file #117 2020-10-24 15:38:31 -04:00
31cc2c0c39 sidecar files #114, version bump 2020-10-24 15:38:31 -04:00
a468cab46d Fix typo 2020-10-24 15:38:31 -04:00
2aea57b985 Fix #110 2020-10-24 15:38:31 -04:00
50 changed files with 560 additions and 1058 deletions

View File

@@ -1,25 +0,0 @@
.idea
*/thumbs
*.cbp
CMakeCache.txt
CMakeFiles
cmake-build-debug
cmake_install.cmake
Makefile
*.out
LOG
sist2*
index.sist2/
bundle*.css
bundle.js
**/*.a
**/vgcore.*
build/
.git/
third-party/libscan/libscan-test-files/
**/ext_ffmpeg
**/ext_libmobi
**/scan_a_test
Dockerfile
*.idx/
VERSION

View File

@@ -1,72 +0,0 @@
kind: pipeline
type: docker
name: amd64
platform:
os: linux
arch: amd64
steps:
- name: build
image: simon987/sist2-build
commands:
- ./ci/build.sh
- name: docker
image: plugins/docker
settings:
username:
from_secret: DOCKER_USER
password:
from_secret: DOCKER_PASSWORD
repo: simon987/sist2
context: ./
dockerfile: ./Dockerfile
auto_tag: true
auto_tag_suffix: x64-linux
when:
event:
- tag
- name: scp files
image: appleboy/drone-scp
settings:
host:
from_secret: SSH_HOST
port:
from_secret: SSH_PORT
user:
from_secret: SSH_USER
key:
from_secret: SSH_KEY
target: /files/sist2/${DRONE_REPO_OWNER}_${DRONE_REPO_NAME}/${DRONE_BRANCH}_${DRONE_BUILD_NUMBER}_${DRONE_COMMIT}/
source:
- ./VERSION
- ./sist2-x64-linux
- ./sist2-x64-linux-debug
---
kind: pipeline
type: docker
name: arm64
platform:
arch: arm64
steps:
- name: build
image: simon987/sist2-build-arm64
commands:
- ./ci/build_arm64.sh
- name: scp files
image: appleboy/drone-scp
settings:
host:
from_secret: SSH_HOST
port:
from_secret: SSH_PORT
user:
from_secret: SSH_USER
key:
from_secret: SSH_KEY
target: /files/sist2/${DRONE_REPO_OWNER}_${DRONE_REPO_NAME}/arm_${DRONE_BRANCH}_${DRONE_BUILD_NUMBER}_${DRONE_COMMIT}/
source:
- ./sist2-arm64-linux

View File

@@ -1,40 +0,0 @@
---
name: "🐞 Bug Report"
about: Submit a bug report
title: ''
labels: bug
assignees: ''
---
**Device Information (please complete the following information):**
- OS: `[e.g., Ubuntu 20.04, WSL2]`
- Deployment: `[Linux, Linux ARM64 or Docker]`
- Browser *(if relevant)*: `[e.g., chrome, safari]`
- SIST2 Version: `[e.g., v2.9.0]`
- Elasticsearch Version *(if relevant)* : ``
**Command with arguments**
<!-- `ex: "scan ~/Documents -o ./i2 --threads 3 -q 1.0` -->
**Describe the bug**
<!-- A clear and concise description of what the bug is. -->
**Steps To Reproduce**
Please be specific!
1. Go to '...'
2. Click on '....'
3. etc.
**Expected behavior**
<!-- A clear and concise description of what you expected to happen. -->
**Actual Behavior**
<!-- A clear and concise description of what actually happens. -->
**Screenshots**
<!-- If applicable, add screenshots to help explain your problem. -->
**Additional context**
<!-- Add any other context about the problem here. If applicable, please include why you think the bug is occurring and/or troubleshooting you have already performed. -->
<!-- If the issue is related to the `scan` module, please attach the files necessary to reproduce the error or email them to me[at]simon987.net. -->

View File

@@ -1,5 +0,0 @@
blank_issues_enabled: false
contact_links:
- name: SIST2 Documentation
url: https://github.com/simon987/sist2/blob/master/docs/USAGE.md
about: Check out the SIST2 documentation for answers to common questions

View File

@@ -1,18 +0,0 @@
---
name: "🚀 Feature Request"
about: Suggest an idea for SIST2
title: ''
assignees: ''
---
**Which SIST2 component is your Feature Request related to?**
<!-- e.g., Scan, Index, or Web? -->
**Is your feature request related to a problem? Please describe.**
<!-- A clear and concise description of what the problem is. e.g., "I'm always frustrated when [...]" -->
**What would you like to see happen?**
<!-- A clear and concise description of what you want to happen. -->
**Additional context**
<!-- Add any other context or screenshots about the feature request here. -->

3
.gitignore vendored
View File

@@ -1,5 +1,6 @@
.idea
thumbs
test
*.cbp
CMakeCache.txt
CMakeFiles
@@ -16,5 +17,3 @@ bundle.js
vgcore.*
build/
third-party/
*.idx/
VERSION

69
.teamcity/settings.kts vendored Normal file
View File

@@ -0,0 +1,69 @@
import jetbrains.buildServer.configs.kotlin.v2019_2.*
import jetbrains.buildServer.configs.kotlin.v2019_2.buildSteps.ExecBuildStep
import jetbrains.buildServer.configs.kotlin.v2019_2.buildSteps.exec
import jetbrains.buildServer.configs.kotlin.v2019_2.triggers.vcs
import jetbrains.buildServer.configs.kotlin.v2019_2.vcs.GitVcsRoot
/*
The settings script is an entry point for defining a TeamCity
project hierarchy. The script should contain a single call to the
project() function with a Project instance or an init function as
an argument.
VcsRoots, BuildTypes, Templates, and subprojects can be
registered inside the project using the vcsRoot(), buildType(),
template(), and subProject() methods respectively.
To debug settings scripts in command-line, run the
mvnDebug org.jetbrains.teamcity:teamcity-configs-maven-plugin:generate
command and attach your debugger to the port 8000.
To debug in IntelliJ Idea, open the 'Maven Projects' tool window (View
-> Tool Windows -> Maven Projects), find the generate task node
(Plugins -> teamcity-configs -> teamcity-configs:generate), the
'Debug' option is available in the context menu for the task.
*/
version = "2019.2"
project {
vcsRoot(HttpsGithubComSimon987sist2refsHeadsMaster)
buildType(Build)
}
object Build : BuildType({
name = "Build"
artifactRules = """
sist2
sist2_scan
""".trimIndent()
vcs {
root(HttpsGithubComSimon987sist2refsHeadsMaster)
}
steps {
exec {
name = "Build"
path = "./ci/build.sh"
dockerImage = "simon987/general_ci"
dockerImagePlatform = ExecBuildStep.ImagePlatform.Linux
dockerPull = true
}
}
triggers {
vcs {
}
}
})
object HttpsGithubComSimon987sist2refsHeadsMaster : GitVcsRoot({
name = "https://github.com/simon987/sist2#refs/heads/master"
url = "https://github.com/simon987/sist2"
})

View File

@@ -36,15 +36,15 @@ add_executable(sist2
target_link_directories(sist2 PRIVATE BEFORE ${_VCPKG_INSTALLED_DIR}/${VCPKG_TARGET_TRIPLET}/lib/)
set(CMAKE_FIND_LIBRARY_SUFFIXES .a .lib)
find_package(PkgConfig REQUIRED)
pkg_search_module(GLIB REQUIRED glib-2.0)
find_package(lmdb CONFIG REQUIRED)
find_package(cJSON CONFIG REQUIRED)
find_package(unofficial-glib CONFIG REQUIRED)
find_package(unofficial-mongoose CONFIG REQUIRED)
find_library(UUID_LIB NAMES uuid)
find_package(CURL CONFIG REQUIRED)
#find_package(OpenSSL REQUIRED)
target_include_directories(
sist2 PUBLIC
@@ -52,7 +52,6 @@ target_include_directories(
${CMAKE_SOURCE_DIR}/third-party/utf8.h/
${CMAKE_SOURCE_DIR}/third-party/libscan/
${CMAKE_SOURCE_DIR}/
${GLIB_INCLUDE_DIRS}
)
target_compile_options(
@@ -69,8 +68,7 @@ if (SIST_DEBUG)
-fstack-protector
-fno-omit-frame-pointer
-fsanitize=address
-fno-inline
# -O2
-O2
)
target_link_options(
sist2
@@ -83,6 +81,7 @@ if (SIST_DEBUG)
OUTPUT_NAME sist2_debug
)
else ()
# set(VCPKG_BUILD_TYPE release)
target_compile_options(
sist2
PRIVATE
@@ -105,15 +104,14 @@ target_link_libraries(
lmdb
cjson
argparse
${GLIB_LDFLAGS}
unofficial::glib::glib
unofficial::mongoose::mongoose
CURL::libcurl
${UUID_LIB}
pthread
magic
c
scan
)

View File

@@ -1,15 +1,9 @@
FROM simon987/sist2-build as build
FROM ubuntu:19.10
MAINTAINER simon987 <me@simon987.net>
WORKDIR /build/
ADD . /build/
RUN cmake -DSIST_DEBUG=off -DBUILD_TESTS=off -DCMAKE_TOOLCHAIN_FILE=/vcpkg/scripts/buildsystems/vcpkg.cmake .
RUN make -j$(nproc)
RUN strip sist2
FROM ubuntu:20.10
RUN apt update && apt install -y curl
RUN apt update
RUN apt install -y libglib2.0-0 libcurl4 libmagic1 libharfbuzz-bin libopenjp2-7 libarchive13 liblzma5 libzstd1 liblz4-1 \
curl libtiff5 libpng16-16 libpcre3
RUN mkdir -p /usr/share/tessdata && \
cd /usr/share/tessdata/ && \
@@ -18,9 +12,9 @@ RUN mkdir -p /usr/share/tessdata && \
curl -o /usr/share/tessdata/eng.traineddata https://raw.githubusercontent.com/tesseract-ocr/tessdata/master/eng.traineddata &&\
curl -o /usr/share/tessdata/fra.traineddata https://raw.githubusercontent.com/tesseract-ocr/tessdata/master/fra.traineddata &&\
curl -o /usr/share/tessdata/rus.traineddata https://raw.githubusercontent.com/tesseract-ocr/tessdata/master/rus.traineddata &&\
curl -o /usr/share/tessdata/spa.traineddata https://raw.githubusercontent.com/tesseract-ocr/tessdata/master/spa.traineddata
curl -o /usr/share/tessdata/spa.traineddata https://raw.githubusercontent.com/tesseract-ocr/tessdata/master/spa.traineddata && ls -lh
COPY --from=build /build/sist2 /root/sist2
ADD sist2 /root/sist2
ENV LANG C.UTF-8
ENV LC_ALL C.UTF-8

14
Docker/build.sh Executable file
View File

@@ -0,0 +1,14 @@
rm ./sist2 sist2_debug
cp ../sist2.gz .
gzip -d sist2.gz
strip sist2
version=$(./sist2 --version)
echo "Version ${version}"
docker build . -t simon987/sist2:${version} -t simon987/sist2:latest
docker push simon987/sist2:${version}
docker push simon987/sist2:latest
docker run --rm simon987/sist2 -v

View File

@@ -1,15 +1,9 @@
FROM simon987/sist2-build-arm64 as build
FROM ubuntu:19.10
MAINTAINER simon987 <me@simon987.net>
WORKDIR /build/
ADD . /build/
RUN cmake -DSIST_DEBUG=off -DBUILD_TESTS=off -DCMAKE_TOOLCHAIN_FILE=/vcpkg/scripts/buildsystems/vcpkg.cmake .
RUN make -j$(nproc)
RUN strip sist2
FROM ubuntu:20.10
RUN apt update && apt install -y curl
RUN apt update
RUN apt install -y libglib2.0-0 libcurl4 libmagic1 libharfbuzz-bin libopenjp2-7 libarchive13 liblzma5 libzstd1 liblz4-1 \
curl libtiff5 libpng16-16 libpcre3
RUN mkdir -p /usr/share/tessdata && \
cd /usr/share/tessdata/ && \
@@ -18,9 +12,9 @@ RUN mkdir -p /usr/share/tessdata && \
curl -o /usr/share/tessdata/eng.traineddata https://raw.githubusercontent.com/tesseract-ocr/tessdata/master/eng.traineddata &&\
curl -o /usr/share/tessdata/fra.traineddata https://raw.githubusercontent.com/tesseract-ocr/tessdata/master/fra.traineddata &&\
curl -o /usr/share/tessdata/rus.traineddata https://raw.githubusercontent.com/tesseract-ocr/tessdata/master/rus.traineddata &&\
curl -o /usr/share/tessdata/spa.traineddata https://raw.githubusercontent.com/tesseract-ocr/tessdata/master/spa.traineddata
curl -o /usr/share/tessdata/spa.traineddata https://raw.githubusercontent.com/tesseract-ocr/tessdata/master/spa.traineddata && ls -lh
COPY --from=build /build/sist2 /root/sist2
ADD sist2_arm64 /root/sist2
ENV LANG C.UTF-8
ENV LC_ALL C.UTF-8

13
DockerArm64/build.sh Executable file
View File

@@ -0,0 +1,13 @@
rm ./sist2_arm64
cp ../sist2_arm64.gz .
gzip -d sist2_arm64.gz
version=$(./sist2_arm64 --version)
echo "Version ${version}"
docker build . -t simon987/sist2-arm64:"${version}" -t simon987/sist2-arm64:latest
docker push simon987/sist2-arm64:"${version}"
docker push simon987/sist2-arm64:latest
docker run --rm simon987/sist2-arm64 -v

View File

@@ -1,6 +1,6 @@
![GitHub](https://img.shields.io/github/license/simon987/sist2.svg)
[![CodeFactor](https://www.codefactor.io/repository/github/simon987/sist2/badge?s=05daa325188aac4eae32c786f3d9cf4e0593f822)](https://www.codefactor.io/repository/github/simon987/sist2)
[![Development snapshots](https://ci.simon987.net/api/badges/simon987/sist2/status.svg)](https://files.simon987.net/.gate/sist2/simon987_sist2/)
[![Development snapshots](https://ci.simon987.net/app/rest/builds/buildType(Sist2_Build)/statusIcon)](https://files.simon987.net/artifacts/Sist2/Build/)
**Demo**: [sist2.simon987.net](https://sist2.simon987.net/?i=Demo%20files)
@@ -25,12 +25,14 @@ sist2 (Simple incremental search tool)
* OCR support with tesseract \*\*\*
* Stats page & disk utilisation visualization
\* See [format support](#format-support)
\*\* See [Archive files](#archive-files)
\*\*\* See [OCR](#ocr)
![stats](docs/stats.png)
## Getting Started
1. Have an Elasticsearch (>= 6.X.X) instance running
@@ -50,13 +52,15 @@ sist2 (Simple incremental search tool)
```
1. Download sist2 executable
1. Download the [latest sist2 release](https://github.com/simon987/sist2/releases) *
1. *(or)* Download a [development snapshot](https://files.simon987.net/.gate/sist2/simon987_sist2/) *(Not recommended!)*
1. *(or)* `docker pull simon987/sist2:2.10.1-x64-linux`
1. *(or)* Download a [development snapshot](https://files.simon987.net/artifacts/Sist2/Build/) *(Not recommended!)*
1. *(or)* `docker pull simon987/sist2:latest`
1. See [Usage guide](docs/USAGE.md)
\* *Windows users*: **sist2** runs under [WSL](https://en.wikipedia.org/wiki/Windows_Subsystem_for_Linux)
## Example usage
See [Usage guide](docs/USAGE.md) for more details
@@ -65,16 +69,17 @@ See [Usage guide](docs/USAGE.md) for more details
1. Push index to Elasticsearch: `sist2 index ./docs_idx`
1. Start web interface: `sist2 web ./docs_idx`
## Format support
File type | Library | Content | Thumbnail | Metadata
File type | Library | Content | Thumbnail | Metadata
:---|:---|:---|:---|:---
pdf,xps,fb2,epub | MuPDF | text+ocr | yes | author, title |
cbz,cbr | *(none)* | - | yes | - |
`audio/*` | ffmpeg | - | yes | ID3 tags |
`video/*` | ffmpeg | - | yes | title, comment, artist |
`image/*` | ffmpeg | - | yes | [Common EXIF tags](https://github.com/simon987/sist2/blob/efdde2734eca9b14a54f84568863b7ffd59bdba3/src/parsing/media.c#L190), GPS tags |
raw, rw2, dng, cr2, crw, dcr, k25, kdc, mrw, pef, xf3, arw, sr2, srf, erf | LibRaw | - | yes | Common EXIF tags, GPS tags |
`image/*` | ffmpeg | - | yes | [Common EXIF tags](https://github.com/simon987/sist2/blob/efdde2734eca9b14a54f84568863b7ffd59bdba3/src/parsing/media.c#L190) |
raw, rw2, dng, cr2, crw, dcr, k25, kdc, mrw, pef, xf3, arw, sr2, srf, erf | LibRaw | - | yes | Common EXIF tags |
ttf,ttc,cff,woff,fnt,otf | Freetype2 | - | yes, `bmp` | Name & style |
`text/plain` | *(none)* | yes | no | - |
html, xml | *(none)* | yes | no | - |
@@ -86,65 +91,46 @@ mobi, azw, azw3 | libmobi | yes | no | author, title |
\* *See [Archive files](#archive-files)*
### Archive files
**sist2** will scan files stored into archive files (zip, tar, 7z...) as if they were directly in the file system.
Recursive (archives inside archives)
**sist2** will scan files stored into archive files (zip, tar, 7z...) as if
they were directly in the file system. Recursive (archives inside archives)
scan is also supported.
**Limitations**:
* Support for parsing media files with formats that require *seek* (e.g. `.gif`, `.mp4` w/ fragmented metadata etc.)
is limitted (see `--mem-buffer` option)
* Archive files are scanned sequentially, by a single thread. On systems where
**sist2** is not I/O bound, scans might be faster when larger archives are split into smaller parts.
**sist2** is not I/O bound, scans might be faster when larger archives are split
into smaller parts.
### OCR
You can enable OCR support for pdf,xps,fb2,epub file types with the
`--ocr <lang>` option. Download the language data files with your package manager (`apt install tesseract-ocr-eng`) or
directly [from Github](https://github.com/tesseract-ocr/tesseract/wiki/Data-Files).
`--ocr <lang>` option. Download the language data files with your
package manager (`apt install tesseract-ocr-eng`) or directly [from Github](https://github.com/tesseract-ocr/tesseract/wiki/Data-Files).
The `simon987/sist2` image comes with common languages
(hin, jpn, eng, fra, rus, spa) pre-installed.
Examples
```bash
sist2 scan --ocr jpn ~/Books/Manga/
sist2 scan --ocr eng ~/Books/Textbooks/
```
## Build from source
You can compile **sist2** by yourself if you don't want to use the pre-compiled binaries
### With docker (recommended)
```bash
git clone --recursive https://github.com/simon987/sist2/
cd sist2
docker build . -f ./Dockerfile -t my-sist2-image
docker run --rm my-sist2-image cat /root/sist2 > sist2-x64-linux
```
### On a linux computer
You can compile **sist2** by yourself if you don't want to use the pre-compiled
binaries (GCC 7+ required).
1. Install compile-time dependencies
```bash
apt install gcc g++ python3 yasm ragel automake autotools-dev wget libtool libssl-dev curl zip unzip tar xorg-dev libglu1-mesa-dev libxcursor-dev libxml2-dev libxinerama-dev gettext nasm git
vcpkg install lmdb cjson glib libarchive[core,bzip2,libxml2,lz4,lzma,lzo] pthread tesseract libxml2 ffmpeg zstd gtest mongoose libuuid libmagic libraw curl[core,ssl] jbig2dec brotli libmupdf
```
1. Apply vcpkg patches, as per [sist2-build](https://github.com/simon987/sist2-build) Dockerfile
1. Install vcpkg dependencies
```bash
vcpkg install curl[core,openssl]
vcpkg install lmdb cjson glib brotli libarchive[core,bzip2,libxml2,lz4,lzma,lzo] pthread tesseract libxml2 libmupdf gtest mongoose libuuid libmagic libraw jasper lcms gumbo
```
1. Build
2. Build
```bash
git clone --recursive https://github.com/simon987/sist2/
cmake -DSIST_DEBUG=off -DCMAKE_TOOLCHAIN_FILE=<VCPKG_ROOT>/scripts/buildsystems/vcpkg.cmake .

View File

@@ -2,18 +2,16 @@
VCPKG_ROOT="/vcpkg"
rm *.gz &>/dev/null
git submodule update --init --recursive
rm *.gz
rm -rf CMakeFiles CMakeCache.txt
cmake -DSIST_DEBUG=off -DCMAKE_TOOLCHAIN_FILE="${VCPKG_ROOT}/scripts/buildsystems/vcpkg.cmake" .
make -j $(nproc)
cmake -DSIST_DEBUG=off -DVCPKG_BUILD_TYPE=release -DCMAKE_TOOLCHAIN_FILE="${VCPKG_ROOT}/scripts/buildsystems/vcpkg.cmake" .
make -j 12
strip sist2
./sist2 -v > VERSION
mv sist2 sist2-x64-linux
gzip -9 sist2
rm -rf CMakeFiles CMakeCache.txt
cmake -DSIST_DEBUG=on -DCMAKE_TOOLCHAIN_FILE="${VCPKG_ROOT}/scripts/buildsystems/vcpkg.cmake" .
make -j $(nproc)
mv sist2_debug sist2-x64-linux-debug
cmake -DSIST_DEBUG=on -DVCPKG_BUILD_TYPE=debug -DCMAKE_TOOLCHAIN_FILE="${VCPKG_ROOT}/scripts/buildsystems/vcpkg.cmake" .
make -j 12
cp /usr/lib/x86_64-linux-gnu/libasan.so.2.0.0 libasan.so.2
tar -czf sist2_debug.tar.gz sist2_debug libasan.so.2

View File

@@ -2,12 +2,11 @@
VCPKG_ROOT="/vcpkg"
rm *.gz &>/dev/null
git submodule update --init --recursive
rm *.gz
rm -rf CMakeFiles CMakeCache.txt
cmake -DSIST_DEBUG=off -DCMAKE_TOOLCHAIN_FILE="${VCPKG_ROOT}/scripts/buildsystems/vcpkg.cmake" .
make -j $(nproc)
make -j 4
strip sist2
mv sist2 sist2-arm64-linux
mv sist2 sist2_arm64
gzip -9 sist2_arm64

View File

@@ -46,7 +46,6 @@ Scan options
--fast Only index file names & mime type
--treemap-threshold=<str> Relative size threshold for treemap (see USAGE.md). DEFAULT: 0.0005
--mem-buffer=<int> Maximum memory buffer size per thread in MB for files inside archives (see USAGE.md). DEFAULT: 2000
--read-subtitles Read subtitles from media files
Index options
-t, --threads=<int> Number of threads. DEFAULT=1
@@ -92,7 +91,7 @@ Made by simon987 <me@simon987.net>. Released under GPL-3.0
Specify an existing index. Information about files in this index that were not modified (based on *mtime* attribute)
will be copied to the new index and will not be parsed again.
* `-o, --output` Output directory.
* `--rewrite-url` Set the `rewrite_url` option for the web module (See [rewrite_url](#rewrite_url))
* `--rewrite-url` Set the `rewrite_url` option for the web module (See [rewrite_url](#rewrite_url))
* `--name` Set the `name` option for the web module
* `--depth` Maximum scan dept. Set to 0 only scan files directly in the root directory, set to -1 for infinite depth
* `--archive` Archive file mode.
@@ -124,7 +123,6 @@ Made by simon987 <me@simon987.net>. Released under GPL-3.0
larger than this number will be read sequentially and no *seek* operations will be supported.
To check if a media file can be parsed without *seek*, execute `cat file.mp4 | ffprobe -`
* `--read-subtitles` When enabled, will attempt to read the subtitles stream from media files.
### Scan examples
@@ -243,11 +241,9 @@ The `_text.*` items will be indexed and searchable as **text** fields (fuzzy sea
*thumbs/*:
LMDB key-value store. Keys are **binary** 16-byte md5 hash* (`_id` field)
LMDB key-value store. Keys are **binary** 128-bit UUID4s (`_id` field)
and values are raw image bytes.
*\* Hash is calculated from the full path of the file, including the extension, relative to the index root*
Importing an external `binary` type index is technically possible but
it is currently unsupported and has no guaranties of back/forward compatibility.
@@ -357,7 +353,8 @@ You can safely copy the `/tags/` database to another index.
See [Automatic tagging](#automatic-tagging) for information about tag
hierarchies and tag colors.
\* *It can take a few seconds to take effect in new search queries.*
\* *It can take a few seconds to take effect in new search queries, and the page needs
to be reloaded for the tags tab to update*
### Automatic tagging

View File

@@ -30,10 +30,6 @@
"mime": {
"type": "keyword"
},
"parent": {
"type": "keyword",
"index": false
},
"thumbnail": {
"type": "keyword",
"index": false
@@ -165,30 +161,6 @@
"exif_user_comment": {
"type": "text"
},
"exif_gps_longitude_ref": {
"type": "keyword",
"index": false
},
"exif_gps_longitude_dms": {
"type": "keyword",
"index": false
},
"exif_gps_longitude_dec": {
"type": "keyword",
"index": false
},
"exif_gps_latitude_ref": {
"type": "keyword",
"index": false
},
"exif_gps_latitude_dms": {
"type": "keyword",
"index": false
},
"exif_gps_latitude_dec": {
"type": "keyword",
"index": false
},
"author": {
"type": "text"
},

View File

@@ -1,6 +0,0 @@
#!/usr/bin/env bash
make clean
rm -rf CMakeFiles/ CMakeCache.txt Makefile \
third-party/libscan/CMakeFiles third-party/libscan/CMakeCache.txt third-party/libscan/third-party/ext_ffmpeg \
third-party/libscan/third-party/ext_libmobi third-party/libscan/Makefile

View File

@@ -227,7 +227,6 @@ int scan_args_validate(scan_args_t *args, int argc, const char **argv) {
LOG_DEBUGF("cli.c", "arg depth=%d", args->depth)
LOG_DEBUGF("cli.c", "arg path=%s", args->path)
LOG_DEBUGF("cli.c", "arg archive=%s", args->archive)
LOG_DEBUGF("cli.c", "arg archive_passphrase=%s", args->archive_passphrase)
LOG_DEBUGF("cli.c", "arg tesseract_lang=%s", args->tesseract_lang)
LOG_DEBUGF("cli.c", "arg tesseract_path=%s", args->tesseract_path)
LOG_DEBUGF("cli.c", "arg exclude=%s", args->exclude_regex)

View File

@@ -18,7 +18,6 @@ typedef struct scan_args {
char *path;
char *archive;
archive_mode_t archive_mode;
char *archive_passphrase;
char *tesseract_lang;
const char *tesseract_path;
char *exclude_regex;
@@ -26,7 +25,6 @@ typedef struct scan_args {
const char* treemap_threshold_str;
double treemap_threshold;
int max_memory_buffer;
int read_subtitles;
} scan_args_t;
scan_args_t *scan_args_create();

View File

@@ -40,9 +40,6 @@ typedef struct {
pcre_extra *exclude_extra;
int fast;
GHashTable *dbg_current_files;
pthread_mutex_t dbg_current_files_mu;
scan_arc_ctx_t arc_ctx;
scan_comic_ctx_t comic_ctx;
scan_ebook_ctx_t ebook_ctx;

View File

@@ -30,11 +30,11 @@ void elastic_cleanup() {
}
}
void print_json(cJSON *document, const char id_str[MD5_STR_LENGTH]) {
void print_json(cJSON *document, const char uuid_str[UUID_STR_LEN]) {
cJSON *line = cJSON_CreateObject();
cJSON_AddStringToObject(line, "_id", id_str);
cJSON_AddStringToObject(line, "_id", uuid_str);
cJSON_AddStringToObject(line, "_index", IndexCtx.es_index);
cJSON_AddStringToObject(line, "_type", "_doc");
cJSON_AddItemReferenceToObject(line, "_source", document);
@@ -52,13 +52,13 @@ void index_json_func(void *arg) {
elastic_index_line(line);
}
void index_json(cJSON *document, const char index_id_str[MD5_STR_LENGTH]) {
void index_json(cJSON *document, const char uuid_str[UUID_STR_LEN]) {
char *json = cJSON_PrintUnformatted(document);
size_t json_len = strlen(json);
es_bulk_line_t *bulk_line = malloc(sizeof(es_bulk_line_t) + json_len + 2);
memcpy(bulk_line->line, json, json_len);
memcpy(bulk_line->path_md5_str, index_id_str, MD5_STR_LENGTH);
memcpy(bulk_line->uuid_str, uuid_str, UUID_STR_LEN);
*(bulk_line->line + json_len) = '\n';
*(bulk_line->line + json_len + 1) = '\0';
bulk_line->next = NULL;
@@ -67,7 +67,7 @@ void index_json(cJSON *document, const char index_id_str[MD5_STR_LENGTH]) {
tpool_add_work(IndexCtx.pool, index_json_func, bulk_line);
}
void execute_update_script(const char *script, int async, const char index_id[MD5_STR_LENGTH]) {
void execute_update_script(const char *script, int async, const char index_id[UUID_STR_LEN]) {
if (Indexer == NULL) {
Indexer = create_indexer(IndexCtx.es_url, IndexCtx.es_index);
@@ -129,9 +129,9 @@ void *create_bulk_buffer(int max, int *count, size_t *buf_len) {
while (line != NULL && *count < max) {
char action_str[256];
snprintf(
action_str, sizeof(action_str),
action_str, 256,
"{\"index\":{\"_id\":\"%s\",\"_type\":\"_doc\",\"_index\":\"%s\"}}\n",
line->path_md5_str, Indexer->es_index
line->uuid_str, Indexer->es_index
);
size_t action_str_len = strlen(action_str);
@@ -220,7 +220,7 @@ void _elastic_flush(int max) {
if (r->status_code == 413) {
if (max <= 1) {
LOG_ERRORF("elastic.c", "Single document too large, giving up: {%s}", Indexer->line_head->path_md5_str)
LOG_ERRORF("elastic.c", "Single document too large, giving up: {%s}", Indexer->line_head->uuid_str)
free_response(r);
free(buf);
delete_queue(1);
@@ -408,9 +408,9 @@ void elastic_init(int force_reset, const char* user_mappings, const char* user_s
}
}
cJSON *elastic_get_document(const char *id_str) {
cJSON *elastic_get_document(const char *uuid_str) {
char url[4096];
snprintf(url, sizeof(url), "%s/%s/_doc/%s", WebCtx.es_url, WebCtx.es_index, id_str);
snprintf(url, sizeof(url), "%s/%s/_doc/%s", WebCtx.es_url, WebCtx.es_index, uuid_str);
response_t *r = web_get(url, 3);
cJSON *json = NULL;

View File

@@ -5,7 +5,7 @@
typedef struct es_bulk_line {
struct es_bulk_line *next;
char path_md5_str[MD5_STR_LENGTH];
char uuid_str[UUID_STR_LEN];
char line[0];
} es_bulk_line_t;
@@ -16,9 +16,9 @@ typedef struct es_indexer es_indexer_t;
void elastic_index_line(es_bulk_line_t *line);
void print_json(cJSON *document, const char index_id_str[MD5_STR_LENGTH]);
void print_json(cJSON *document, const char uuid_str[UUID_STR_LEN]);
void index_json(cJSON *document, const char index_id_str[MD5_STR_LENGTH]);
void index_json(cJSON *document, const char uuid_str[UUID_STR_LEN]);
es_indexer_t *create_indexer(const char *url, const char *index);
@@ -27,10 +27,10 @@ void finish_indexer(char *script, int async_script, char *index_id);
void elastic_init(int force_reset, const char* user_mappings, const char* user_settings);
cJSON *elastic_get_document(const char *id_str);
cJSON *elastic_get_document(const char *uuid_str);
char *elastic_get_status();
void execute_update_script(const char *script, int async, const char index_id[MD5_STR_LENGTH]);
void execute_update_script(const char *script, int async, const char index_id[UUID_STR_LEN]);
#endif

File diff suppressed because one or more lines are too long

View File

@@ -6,22 +6,18 @@
static __thread int index_fd = -1;
typedef struct {
unsigned char path_md5[MD5_DIGEST_LENGTH];
unsigned char uuid[16];
unsigned long ino;
unsigned long size;
unsigned int mime;
int mtime;
short base;
short ext;
char has_parent;
} line_t;
#define META_NEXT 0xFFFF
void skip_meta(FILE *file) {
enum metakey key = 0;
fread(&key, sizeof(uint16_t), 1, file);
while (key != META_NEXT) {
enum metakey key = getc(file);
while (key != '\n') {
if (IS_META_INT(key)) {
fseek(file, sizeof(int), SEEK_CUR);
} else if (IS_META_LONG(key)) {
@@ -30,13 +26,13 @@ void skip_meta(FILE *file) {
while ((getc(file))) {}
}
fread(&key, sizeof(uint16_t), 1, file);
key = getc(file);
}
}
void write_index_descriptor(char *path, index_descriptor_t *desc) {
cJSON *json = cJSON_CreateObject();
cJSON_AddStringToObject(json, "id", desc->id);
cJSON_AddStringToObject(json, "uuid", desc->uuid);
cJSON_AddStringToObject(json, "version", desc->version);
cJSON_AddStringToObject(json, "root", desc->root);
cJSON_AddStringToObject(json, "name", desc->name);
@@ -70,7 +66,7 @@ index_descriptor_t read_index_descriptor(char *path) {
}
char *buf = malloc(info.st_size + 1);
size_t ret = read(fd, buf, info.st_size);
int ret = read(fd, buf, info.st_size);
if (ret == -1) {
LOG_FATALF("serialize.c", "Could not read index descriptor: %s", strerror(errno));
}
@@ -86,7 +82,7 @@ index_descriptor_t read_index_descriptor(char *path) {
strcpy(descriptor.rewrite_url, cJSON_GetObjectItem(json, "rewrite_url")->valuestring);
descriptor.root_len = (short) strlen(descriptor.root);
strcpy(descriptor.version, cJSON_GetObjectItem(json, "version")->valuestring);
strcpy(descriptor.id, cJSON_GetObjectItem(json, "id")->valuestring);
strcpy(descriptor.uuid, cJSON_GetObjectItem(json, "uuid")->valuestring);
if (cJSON_GetObjectItem(json, "type") == NULL) {
strcpy(descriptor.type, INDEX_TYPE_BIN);
} else {
@@ -156,20 +152,8 @@ char *get_meta_key_text(enum metakey meta_key) {
return "thumbnail";
case MetaPages:
return "pages";
case MetaExifGpsLongitudeRef:
return "exif_gps_longitude_ref";
case MetaExifGpsLongitudeDMS:
return "exif_gps_longitude_dms";
case MetaExifGpsLongitudeDec:
return "exif_gps_longitude_dec";
case MetaExifGpsLatitudeRef:
return "exif_gps_latitude_ref";
case MetaExifGpsLatitudeDMS:
return "exif_gps_latitude_dms";
case MetaExifGpsLatitudeDec:
return "exif_gps_latitude_dec";
default:
LOG_FATALF("serialize.c", "FIXME: Unknown meta key: %d", meta_key)
return NULL;
}
}
@@ -199,7 +183,7 @@ void write_document(document_t *doc) {
meta_line_t *meta = doc->meta_head;
while (meta != NULL) {
dyn_buffer_write_short(&buf, (uint16_t) meta->key);
dyn_buffer_write_char(&buf, meta->key);
if (IS_META_INT(meta->key)) {
dyn_buffer_write_int(&buf, meta->int_val);
@@ -213,7 +197,7 @@ void write_document(document_t *doc) {
meta = meta->next;
free(tmp);
}
dyn_buffer_write_short(&buf, META_NEXT);
dyn_buffer_write_char(&buf, '\n');
int res = write(index_fd, buf.buf, buf.cur);
if (res == -1) {
@@ -235,9 +219,9 @@ void read_index_bin(const char *path, const char *index_id, index_func func) {
dyn_buffer_t buf = dyn_buffer_create();
FILE *file = fopen(path, "rb");
while (TRUE) {
while (1) {
buf.cur = 0;
size_t _ = fread((void *) &line, sizeof(line_t), 1, file);
size_t _ = fread((void *) &line, 1, sizeof(line_t), file);
if (feof(file)) {
break;
}
@@ -245,8 +229,8 @@ void read_index_bin(const char *path, const char *index_id, index_func func) {
cJSON *document = cJSON_CreateObject();
cJSON_AddStringToObject(document, "index", index_id);
char path_md5_str[MD5_STR_LENGTH];
buf2hex(line.path_md5, sizeof(line.path_md5), path_md5_str);
char uuid_str[UUID_STR_LEN];
uuid_unparse(line.uuid, uuid_str);
const char *mime_text = mime_get_mime_text(line.mime);
if (mime_text == NULL) {
@@ -263,6 +247,9 @@ void read_index_bin(const char *path, const char *index_id, index_func func) {
}
dyn_buffer_write_char(&buf, '\0');
char full_filename[PATH_MAX];
strcpy(full_filename, buf.buf);
cJSON_AddStringToObject(document, "extension", buf.buf + line.ext);
if (*(buf.buf + line.ext - 1) == '.') {
*(buf.buf + line.ext - 1) = '\0';
@@ -284,10 +271,9 @@ void read_index_bin(const char *path, const char *index_id, index_func func) {
cJSON_AddStringToObject(document, "path", "");
}
enum metakey key = 0;
fread(&key, sizeof(uint16_t), 1, file);
size_t ret;
while (key != META_NEXT) {
enum metakey key = getc(file);
size_t ret = 0;
while (key != '\n') {
switch (key) {
case MetaPages:
case MetaWidth:
@@ -325,12 +311,6 @@ void read_index_bin(const char *path, const char *index_id, index_func func) {
case MetaAuthor:
case MetaModifiedBy:
case MetaThumbnail:
case MetaExifGpsLongitudeDMS:
case MetaExifGpsLongitudeDec:
case MetaExifGpsLongitudeRef:
case MetaExifGpsLatitudeDMS:
case MetaExifGpsLatitudeDec:
case MetaExifGpsLatitudeRef:
case MetaTitle: {
buf.cur = 0;
while ((c = getc(file)) != 0) {
@@ -346,12 +326,12 @@ void read_index_bin(const char *path, const char *index_id, index_func func) {
LOG_FATALF("serialize.c", "Invalid meta key (corrupt index): %x", key)
}
fread(&key, sizeof(uint16_t), 1, file);
key = getc(file);
}
cJSON *meta_obj = NULL;
if (IndexCtx.meta != NULL) {
const char *meta_string = g_hash_table_lookup(IndexCtx.meta, path_md5_str);
const char *meta_string = g_hash_table_lookup(IndexCtx.meta, full_filename);
if (meta_string != NULL) {
meta_obj = cJSON_Parse(meta_string);
@@ -366,7 +346,7 @@ void read_index_bin(const char *path, const char *index_id, index_func func) {
}
if (IndexCtx.tags != NULL) {
const char *tags_string = g_hash_table_lookup(IndexCtx.tags, path_md5_str);
const char *tags_string = g_hash_table_lookup(IndexCtx.tags, full_filename);
if (tags_string != NULL) {
cJSON *tags_arr = cJSON_Parse(tags_string);
cJSON_DeleteItemFromObject(document, "tag");
@@ -374,7 +354,7 @@ void read_index_bin(const char *path, const char *index_id, index_func func) {
}
}
func(document, path_md5_str);
func(document, uuid_str);
cJSON_Delete(document);
if (meta_obj) {
cJSON_Delete(meta_obj);
@@ -402,7 +382,7 @@ const char *json_type_array_fields[] = {
void read_index_json(const char *path, UNUSED(const char *index_id), index_func func) {
FILE *file = fopen(path, "r");
while (TRUE) {
while (1) {
char *line = NULL;
size_t len;
size_t read = getline(&line, &len, file);
@@ -422,7 +402,7 @@ void read_index_json(const char *path, UNUSED(const char *index_id), index_func
}
cJSON *document = cJSON_CreateObject();
const char *id_str = cJSON_GetObjectItem(input, "_id")->valuestring;
const char *uuid_str = cJSON_GetObjectItem(input, "_id")->valuestring;
for (int i = 0; i < (sizeof(json_type_copy_fields) / sizeof(json_type_copy_fields[0])); i++) {
cJSON *value = cJSON_GetObjectItem(input, json_type_copy_fields[i]);
@@ -450,7 +430,7 @@ void read_index_json(const char *path, UNUSED(const char *index_id), index_func
}
}
func(document, id_str);
func(document, uuid_str);
cJSON_Delete(document);
cJSON_Delete(input);
@@ -458,7 +438,7 @@ void read_index_json(const char *path, UNUSED(const char *index_id), index_func
fclose(file);
}
void read_index(const char *path, const char index_id[MD5_STR_LENGTH], const char *type, index_func func) {
void read_index(const char *path, const char index_id[UUID_STR_LEN], const char *type, index_func func) {
if (strcmp(type, INDEX_TYPE_BIN) == 0) {
read_index_bin(path, index_id, func);
@@ -471,17 +451,15 @@ void incremental_read(GHashTable *table, const char *filepath) {
FILE *file = fopen(filepath, "rb");
line_t line;
LOG_DEBUGF("serialize.c", "Incremental read %s", filepath)
while (1) {
size_t ret = fread((void *) &line, sizeof(line_t), 1, file);
size_t ret = fread((void *) &line, 1, sizeof(line_t), file);
if (ret != 1 || feof(file)) {
break;
}
incremental_put(table, line.path_md5, line.mtime);
incremental_put(table, line.ino, line.mtime);
while ((getc(file)) != 0) {}
while ((getc(file))) {}
skip_meta(file);
}
fclose(file);
@@ -497,47 +475,33 @@ void incremental_copy(store_t *store, store_t *dst_store, const char *filepath,
FILE *dst_file = fopen(dst_filepath, "ab");
line_t line;
LOG_DEBUGF("serialize.c", "Incremental copy %s", filepath)
while (TRUE) {
size_t ret = fread((void *) &line, sizeof(line_t), 1, file);
while (1) {
size_t ret = fread((void *) &line, 1, sizeof(line_t), file);
if (ret != 1 || feof(file)) {
break;
}
// Assume that files with parents still exist.
// One way to "fix" this would be to check if the parent is marked for copy but it would consistently
// delete files with grandparents, which is a side-effect worse than having orphaned files
if (line.has_parent || incremental_get(copy_table, line.path_md5)) {
if (incremental_get(copy_table, line.ino)) {
fwrite(&line, sizeof(line), 1, dst_file);
// Copy filepath
char filepath_buf[PATH_MAX];
char c;
char *ptr = filepath_buf;
while ((c = (char) getc(file))) {
*ptr++ = c;
}
*ptr = '\0';
fwrite(filepath_buf, (ptr - filepath_buf) + 1, 1, dst_file);
// Copy tn store contents
size_t buf_len;
char path_md5[MD5_DIGEST_LENGTH];
MD5((unsigned char *) filepath_buf, (ptr - filepath_buf), (unsigned char *) path_md5);
char *buf = store_read(store, path_md5, sizeof(path_md5), &buf_len);
if (buf_len != 0) {
store_write(dst_store, path_md5, sizeof(path_md5), buf, buf_len);
char *buf = store_read(store, (char *) line.uuid, 16, &buf_len);
store_write(dst_store, (char *) line.uuid, 16, buf, buf_len);
free(buf);
}
enum metakey key = 0;
char c;
while ((c = (char) getc(file))) {
fwrite(&c, sizeof(c), 1, dst_file);
}
fwrite("\0", sizeof(c), 1, dst_file);
enum metakey key;
while (1) {
fread(&key, sizeof(uint16_t), 1, file);
fwrite(&key, sizeof(uint16_t), 1, dst_file);
if (key == META_NEXT) {
key = getc(file);
if (key == '\n') {
break;
}
fwrite(&key, sizeof(char), 1, dst_file);
if (IS_META_INT(key)) {
int val;
@@ -553,12 +517,14 @@ void incremental_copy(store_t *store, store_t *dst_store, const char *filepath,
}
fwrite("\0", sizeof(c), 1, dst_file);
}
if (ret != 1) {
break;
}
}
} else {
while ((getc(file))) {}
skip_meta(file);
}
}
fclose(file);
fclose(dst_file);
}

View File

@@ -7,14 +7,14 @@
#include <sys/syscall.h>
#include <glib.h>
typedef void(*index_func)(cJSON *, const char[MD5_STR_LENGTH]);
typedef void(*index_func)(cJSON *, const char[UUID_STR_LEN]);
void incremental_copy(store_t *store, store_t *dst_store, const char *filepath,
const char *dst_filepath, GHashTable *copy_table);
void write_document(document_t *doc);
void read_index(const char *path, const char[MD5_STR_LENGTH], const char *type, index_func);
void read_index(const char *path, const char[UUID_STR_LEN], const char *type, index_func);
void incremental_read(GHashTable *table, const char *filepath);

View File

@@ -4,7 +4,6 @@
store_t *store_create(char *path, size_t chunk_size) {
store_t *store = malloc(sizeof(struct store_t));
#if (SIST_FAKE_STORE != 1)
store->chunk_size = chunk_size;
pthread_rwlock_init(&store->lock, NULL);
@@ -29,39 +28,30 @@ store_t *store_create(char *path, size_t chunk_size) {
mdb_txn_begin(store->env, NULL, 0, &txn);
mdb_dbi_open(txn, NULL, 0, &store->dbi);
mdb_txn_commit(txn);
#endif
return store;
}
void store_destroy(store_t *store) {
#if (SIST_FAKE_STORE != 1)
pthread_rwlock_destroy(&store->lock);
mdb_close(store->env, store->dbi);
mdb_env_close(store->env);
#endif
free(store);
}
void store_flush(store_t *store) {
mdb_env_sync(store->env, TRUE);
}
void store_write(store_t *store, char *key, size_t key_len, char *buf, size_t buf_len) {
if (LogCtx.very_verbose) {
if (key_len == MD5_DIGEST_LENGTH) {
char path_md5_str[MD5_STR_LENGTH];
buf2hex((unsigned char *) key, MD5_DIGEST_LENGTH, path_md5_str);
LOG_DEBUGF("store.c", "Store write {%s} %lu bytes", path_md5_str, buf_len)
if (key_len == 16) {
char uuid_str[UUID_STR_LEN] = {0, };
uuid_unparse((unsigned char *) key, uuid_str);
LOG_DEBUGF("store.c", "Store write {%s} %lu bytes", uuid_str, buf_len)
} else {
LOG_DEBUGF("store.c", "Store write {%s} %lu bytes", key, buf_len)
}
}
#if (SIST_FAKE_STORE != 1)
MDB_val mdb_key;
mdb_key.mv_data = key;
mdb_key.mv_size = key_len;
@@ -98,13 +88,10 @@ void store_write(store_t *store, char *key, size_t key_len, char *buf, size_t bu
if (put_ret != 0) {
LOG_ERROR("store.c", mdb_strerror(put_ret))
}
#endif
}
char *store_read(store_t *store, char *key, size_t key_len, size_t *ret_vallen) {
char *buf = NULL;
#if (SIST_FAKE_STORE != 1)
MDB_val mdb_key;
mdb_key.mv_data = key;
mdb_key.mv_size = key_len;
@@ -125,7 +112,6 @@ char *store_read(store_t *store, char *key, size_t key_len, size_t *ret_vallen)
}
mdb_txn_abort(txn);
#endif
return buf;
}

View File

@@ -24,8 +24,6 @@ void store_destroy(store_t *store);
void store_write(store_t *store, char *key, size_t key_len, char *buf, size_t buf_len);
void store_flush(store_t *store);
char *store_read(store_t *store, char *key, size_t key_len, size_t *ret_vallen);
GHashTable *store_read_all(store_t *store);

View File

@@ -20,7 +20,7 @@ parse_job_t *create_fs_parse_job(const char *filepath, const struct stat *info,
job->vfile.info = *info;
memset(job->parent, 0, MD5_DIGEST_LENGTH);
memset(job->parent, 0, 16);
job->vfile.filepath = job->filepath;
job->vfile.read = fs_read;

View File

@@ -21,7 +21,7 @@
#define EPILOG "Made by simon987 <me@simon987.net>. Released under GPL-3.0"
static const char *const Version = "2.10.2";
static const char *const Version = "2.8.5";
static const char *const usage[] = {
"sist2 scan [OPTION]... PATH",
"sist2 index [OPTION]... INDEX",
@@ -30,77 +30,13 @@ static const char *const usage[] = {
NULL,
};
#include<signal.h>
#include<unistd.h>
static __sighandler_t sigsegv_handler = NULL;
static __sighandler_t sigabrt_handler = NULL;
void sig_handler(int signum) {
LogCtx.verbose = 1;
LogCtx.very_verbose = 1;
LOG_ERROR("*SIGNAL HANDLER*", "=============================================\n\n");
LOG_ERRORF("*SIGNAL HANDLER*", "Uh oh! Caught fatal signal: %s", strsignal(signum));
GHashTableIter iter;
g_hash_table_iter_init(&iter, ScanCtx.dbg_current_files);
void *key;
void *value;
while (g_hash_table_iter_next(&iter, &key, &value)) {
parse_job_t *job = value;
if (isatty(STDERR_FILENO)) {
LOG_DEBUGF(
"*SIGNAL HANDLER*",
"Thread \033[%dm[%04llX]\033[0m was working on job '%s'",
31 + ((unsigned int) key) % 7, key, job->filepath
);
} else {
LOG_DEBUGF(
"*SIGNAL HANDLER*",
"THREAD [%04llX] was working on job %s",
key, job->filepath
);
}
}
tpool_dump_debug_info(ScanCtx.pool);
LOG_INFO(
"*SIGNAL HANDLER*",
"Please consider creating a bug report at https://github.com/simon987/sist2/issues !"
)
LOG_INFO(
"*SIGNAL HANDLER*",
"sist2 is an open source project and relies on the collaboration of its users to diagnose and fix bugs"
)
#ifndef SIST_DEBUG
LOG_WARNING(
"*SIGNAL HANDLER*",
"You are running sist2 in release mode! Please consider downloading the debug binary from the Github "
"releases page to provide additionnal information when submitting a bug report."
)
#endif
if (signum == SIGSEGV && sigsegv_handler != NULL) {
sigsegv_handler(signum);
} else if (signum == SIGABRT && sigabrt_handler != NULL) {
sigabrt_handler(signum);
}
}
void init_dir(const char *dirpath) {
char path[PATH_MAX];
snprintf(path, PATH_MAX, "%sdescriptor.json", dirpath);
unsigned char index_md5[MD5_DIGEST_LENGTH];
MD5((unsigned char *) ScanCtx.index.desc.name, strlen(ScanCtx.index.desc.name), index_md5);
buf2hex(index_md5, MD5_DIGEST_LENGTH, ScanCtx.index.desc.id);
uuid_t uuid;
uuid_generate(uuid);
uuid_unparse(uuid, ScanCtx.index.desc.uuid);
time(&ScanCtx.index.desc.timestamp);
strcpy(ScanCtx.index.desc.version, Version);
strcpy(ScanCtx.index.desc.type, INDEX_TYPE_BIN);
@@ -162,14 +98,6 @@ void initialize_scan_context(scan_args_t *args) {
ScanCtx.arc_ctx.log = _log;
ScanCtx.arc_ctx.logf = _logf;
ScanCtx.arc_ctx.parse = (parse_callback_t) parse;
if (args->archive_passphrase != NULL) {
strcpy(ScanCtx.arc_ctx.passphrase, args->archive_passphrase);
} else {
ScanCtx.arc_ctx.passphrase[0] = 0;
}
ScanCtx.dbg_current_files = g_hash_table_new_full(g_int64_hash, g_int64_equal, NULL, NULL);
pthread_mutex_init(&ScanCtx.dbg_current_files_mu, NULL);
// Comic
ScanCtx.comic_ctx.log = _log;
@@ -203,7 +131,6 @@ void initialize_scan_context(scan_args_t *args) {
ScanCtx.media_ctx.logf = _logf;
ScanCtx.media_ctx.store = _store;
ScanCtx.media_ctx.max_media_buffer = (long) args->max_memory_buffer * 1024 * 1024;
ScanCtx.media_ctx.read_subtitles = args->read_subtitles;
init_media();
// OOXML
@@ -291,7 +218,7 @@ void sist2_scan(scan_args_t *args) {
while ((de = readdir(dir)) != NULL) {
if (strncmp(de->d_name, "_index_", sizeof("_index_") - 1) == 0) {
char file_path[PATH_MAX];
snprintf(file_path, PATH_MAX, "%s%s", args->incremental, de->d_name);
snprintf(file_path, PATH_MAX, "%s/%s", args->incremental, de->d_name);
incremental_read(ScanCtx.original_table, file_path);
}
}
@@ -306,6 +233,8 @@ void sist2_scan(scan_args_t *args) {
tpool_wait(ScanCtx.pool);
tpool_destroy(ScanCtx.pool);
generate_stats(&ScanCtx.index, args->treemap_threshold, ScanCtx.index.path);
if (args->incremental != NULL) {
char dst_path[PATH_MAX];
snprintf(store_path, PATH_MAX, "%sthumbs", args->incremental);
@@ -321,7 +250,7 @@ void sist2_scan(scan_args_t *args) {
while ((de = readdir(dir)) != NULL) {
if (strncmp(de->d_name, "_index_", sizeof("_index_") - 1) == 0) {
char file_path[PATH_MAX];
snprintf(file_path, PATH_MAX, "%s%s", args->incremental, de->d_name);
snprintf(file_path, PATH_MAX, "%s/%s", args->incremental, de->d_name);
incremental_copy(source, ScanCtx.index.store, file_path, dst_path, ScanCtx.copy_table);
}
}
@@ -336,8 +265,6 @@ void sist2_scan(scan_args_t *args) {
store_destroy(source_tags);
}
generate_stats(&ScanCtx.index, args->treemap_threshold, ScanCtx.index.path);
store_destroy(ScanCtx.index.store);
}
@@ -400,7 +327,7 @@ void sist2_index(index_args_t *args) {
if (strncmp(de->d_name, "_index_", sizeof("_index_") - 1) == 0) {
char file_path[PATH_MAX];
snprintf(file_path, PATH_MAX, "%s/%s", args->index_path, de->d_name);
read_index(file_path, desc.id, desc.type, f);
read_index(file_path, desc.uuid, desc.type, f);
}
}
closedir(dir);
@@ -410,7 +337,7 @@ void sist2_index(index_args_t *args) {
tpool_destroy(IndexCtx.pool);
if (!args->print) {
finish_indexer(args->script, args->async_script, desc.id);
finish_indexer(args->script, args->async_script, desc.uuid);
}
store_destroy(IndexCtx.tag_store);
@@ -430,7 +357,7 @@ void sist2_exec_script(exec_args_t *args) {
LOG_DEBUGF("main.c", "descriptor version %s (%s)", desc.version, desc.type)
execute_update_script(args->script, args->async_script, desc.id);
execute_update_script(args->script, args->async_script, desc.uuid);
free(args->script);
}
@@ -471,9 +398,6 @@ void sist2_web(web_args_t *args) {
int main(int argc, const char *argv[]) {
sigsegv_handler = signal(SIGSEGV, sig_handler);
sigabrt_handler = signal(SIGABRT, sig_handler);
setlocale(LC_ALL, "");
scan_args_t *scan_args = scan_args_create();
@@ -514,9 +438,6 @@ int main(int argc, const char *argv[]) {
OPT_STRING(0, "archive", &scan_args->archive, "Archive file mode (skip|list|shallow|recurse). "
"skip: Don't parse, list: only get file names as text, "
"shallow: Don't parse archives inside archives. DEFAULT: recurse"),
OPT_STRING(0, "archive-passphrase", &scan_args->archive_passphrase,
"Passphrase for encrypted archive files"),
OPT_STRING(0, "ocr", &scan_args->tesseract_lang, "Tesseract language (use tesseract --list-langs to see "
"which are installed on your machine)"),
OPT_STRING('e', "exclude", &scan_args->exclude_regex, "Files that match this regex will not be scanned"),
@@ -526,7 +447,6 @@ int main(int argc, const char *argv[]) {
OPT_INTEGER(0, "mem-buffer", &scan_args->max_memory_buffer,
"Maximum memory buffer size per thread in MB for files inside archives "
"(see USAGE.md). DEFAULT: 2000"),
OPT_BOOLEAN(0, "read-subtitles", &scan_args->read_subtitles, "Read subtitles from media files."),
OPT_GROUP("Index options"),
OPT_INTEGER('t', "threads", &common_threads, "Number of threads. DEFAULT=1"),

View File

@@ -41,45 +41,34 @@ void fs_reset(struct vfile *f) {
#define IS_GIT_OBJ (strlen(doc.filepath + doc.base) == 38 && (strstr(doc.filepath, "objects") != NULL))
void set_dbg_current_file(parse_job_t *job) {
unsigned long long pid = (unsigned long long) pthread_self();
pthread_mutex_lock(&ScanCtx.dbg_current_files_mu);
g_hash_table_replace(ScanCtx.dbg_current_files, GINT_TO_POINTER(pid), job);
pthread_mutex_unlock(&ScanCtx.dbg_current_files_mu);
}
void parse(void *arg) {
parse_job_t *job = arg;
document_t doc;
set_dbg_current_file(job);
int inc_ts = incremental_get(ScanCtx.original_table, job->vfile.info.st_ino);
if (inc_ts != 0 && inc_ts == job->vfile.info.st_mtim.tv_sec) {
incremental_mark_file_for_copy(ScanCtx.copy_table, job->vfile.info.st_ino);
return;
}
doc.filepath = job->filepath;
doc.ext = (short) job->ext;
doc.base = (short) job->base;
char *rel_path = doc.filepath + ScanCtx.index.desc.root_len;
MD5((unsigned char *) rel_path, strlen(rel_path), doc.path_md5);
doc.meta_head = NULL;
doc.meta_tail = NULL;
doc.mime = 0;
doc.size = job->vfile.info.st_size;
doc.ino = job->vfile.info.st_ino;
doc.mtime = job->vfile.info.st_mtim.tv_sec;
int inc_ts = incremental_get(ScanCtx.original_table, doc.path_md5);
if (inc_ts != 0 && inc_ts == job->vfile.info.st_mtim.tv_sec) {
incremental_mark_file_for_copy(ScanCtx.copy_table, doc.path_md5);
return;
}
uuid_generate(doc.uuid);
char *buf[MAGIC_BUF_SIZE];
if (LogCtx.very_verbose) {
char path_md5_str[MD5_STR_LENGTH];
buf2hex(doc.path_md5, MD5_DIGEST_LENGTH, path_md5_str);
LOG_DEBUGF(job->filepath, "Starting parse job {%s}", path_md5_str)
char uuid_str[UUID_STR_LEN];
uuid_unparse(doc.uuid, uuid_str);
LOG_DEBUGF(job->filepath, "Starting parse job {%s}", uuid_str)
}
if (job->vfile.info.st_size == 0) {
@@ -97,8 +86,7 @@ void parse(void *arg) {
// Get mime type with libmagic
if (!job->vfile.is_fs_file) {
LOG_WARNING(job->filepath,
"Guessing mime type with libmagic inside archive files is not currently supported");
LOG_WARNING(job->filepath, "Guessing mime type with libmagic inside archive files is not currently supported");
goto abort;
}
@@ -181,15 +169,11 @@ void parse(void *arg) {
abort:
//Parent meta
if (!md5_digest_is_null(job->parent)) {
meta_line_t *meta_parent = malloc(sizeof(meta_line_t) + MD5_STR_LENGTH);
if (!uuid_is_null(job->parent)) {
meta_line_t *meta_parent = malloc(sizeof(meta_line_t) + UUID_STR_LEN + 1);
meta_parent->key = MetaParent;
buf2hex(job->parent, MD5_DIGEST_LENGTH, meta_parent->str_val);
uuid_unparse(job->parent, meta_parent->str_val);
APPEND_META((&doc), meta_parent)
doc.has_parent = TRUE;
} else {
doc.has_parent = FALSE;
}
write_document(&doc);

View File

@@ -7,7 +7,7 @@ void parse_sidecar(vfile_t *vfile, document_t *doc) {
LOG_DEBUGF("sidecar.c", "Parsing sidecar file %s", vfile->filepath)
size_t size;
char *buf = read_all(vfile, &size);
char* buf = read_all(vfile, &size);
if (buf == NULL) {
LOG_ERRORF("sidecar.c", "Read error for %s", vfile->filepath)
return;
@@ -23,11 +23,11 @@ void parse_sidecar(vfile_t *vfile, document_t *doc) {
}
char *json_str = cJSON_PrintUnformatted(json);
unsigned char path_md5[MD5_DIGEST_LENGTH];
MD5((unsigned char *) vfile->filepath + ScanCtx.index.desc.root_len, doc->ext - 1 - ScanCtx.index.desc.root_len,
path_md5);
char filepath[PATH_MAX];
memcpy(filepath, vfile->filepath + ScanCtx.index.desc.root_len, doc->ext - 1 - ScanCtx.index.desc.root_len);
*(filepath + doc->ext - 1) = '\0';
store_write(ScanCtx.index.meta_store, (char *) path_md5, sizeof(path_md5), json_str, strlen(json_str) + 1);
store_write(ScanCtx.index.meta_store, filepath, doc->ext, json_str, strlen(json_str) + 1);
cJSON_Delete(json);
free(json_str);

View File

@@ -23,10 +23,9 @@
#undef ABS
#define ABS(a) (((a) < 0) ? -(a) : (a))
#define UUID_STR_LEN 37
#define UNUSED(x) __attribute__((__unused__)) x
#define MD5_STR_LENGTH 33
#include "util.h"
#include "log.h"
#include "types.h"
@@ -48,4 +47,5 @@
#include <errno.h>
#include <ctype.h>
#endif

View File

@@ -1 +0,0 @@
!function(n){"use strict";function d(n,t){var r=(65535&n)+(65535&t);return(n>>16)+(t>>16)+(r>>16)<<16|65535&r}function f(n,t,r,e,o,u){return d((c=d(d(t,n),d(e,u)))<<(f=o)|c>>>32-f,r);var c,f}function l(n,t,r,e,o,u,c){return f(t&r|~t&e,n,t,o,u,c)}function v(n,t,r,e,o,u,c){return f(t&e|r&~e,n,t,o,u,c)}function g(n,t,r,e,o,u,c){return f(t^r^e,n,t,o,u,c)}function m(n,t,r,e,o,u,c){return f(r^(t|~e),n,t,o,u,c)}function i(n,t){var r,e,o,u;n[t>>5]|=128<<t%32,n[14+(t+64>>>9<<4)]=t;for(var c=1732584193,f=-271733879,i=-1732584194,a=271733878,h=0;h<n.length;h+=16)c=l(r=c,e=f,o=i,u=a,n[h],7,-680876936),a=l(a,c,f,i,n[h+1],12,-389564586),i=l(i,a,c,f,n[h+2],17,606105819),f=l(f,i,a,c,n[h+3],22,-1044525330),c=l(c,f,i,a,n[h+4],7,-176418897),a=l(a,c,f,i,n[h+5],12,1200080426),i=l(i,a,c,f,n[h+6],17,-1473231341),f=l(f,i,a,c,n[h+7],22,-45705983),c=l(c,f,i,a,n[h+8],7,1770035416),a=l(a,c,f,i,n[h+9],12,-1958414417),i=l(i,a,c,f,n[h+10],17,-42063),f=l(f,i,a,c,n[h+11],22,-1990404162),c=l(c,f,i,a,n[h+12],7,1804603682),a=l(a,c,f,i,n[h+13],12,-40341101),i=l(i,a,c,f,n[h+14],17,-1502002290),c=v(c,f=l(f,i,a,c,n[h+15],22,1236535329),i,a,n[h+1],5,-165796510),a=v(a,c,f,i,n[h+6],9,-1069501632),i=v(i,a,c,f,n[h+11],14,643717713),f=v(f,i,a,c,n[h],20,-373897302),c=v(c,f,i,a,n[h+5],5,-701558691),a=v(a,c,f,i,n[h+10],9,38016083),i=v(i,a,c,f,n[h+15],14,-660478335),f=v(f,i,a,c,n[h+4],20,-405537848),c=v(c,f,i,a,n[h+9],5,568446438),a=v(a,c,f,i,n[h+14],9,-1019803690),i=v(i,a,c,f,n[h+3],14,-187363961),f=v(f,i,a,c,n[h+8],20,1163531501),c=v(c,f,i,a,n[h+13],5,-1444681467),a=v(a,c,f,i,n[h+2],9,-51403784),i=v(i,a,c,f,n[h+7],14,1735328473),c=g(c,f=v(f,i,a,c,n[h+12],20,-1926607734),i,a,n[h+5],4,-378558),a=g(a,c,f,i,n[h+8],11,-2022574463),i=g(i,a,c,f,n[h+11],16,1839030562),f=g(f,i,a,c,n[h+14],23,-35309556),c=g(c,f,i,a,n[h+1],4,-1530992060),a=g(a,c,f,i,n[h+4],11,1272893353),i=g(i,a,c,f,n[h+7],16,-155497632),f=g(f,i,a,c,n[h+10],23,-1094730640),c=g(c,f,i,a,n[h+13],4,681279174),a=g(a,c,f,i,n[h],11,-358537222),i=g(i,a,c,f,n[h+3],16,-722521979),f=g(f,i,a,c,n[h+6],23,76029189),c=g(c,f,i,a,n[h+9],4,-640364487),a=g(a,c,f,i,n[h+12],11,-421815835),i=g(i,a,c,f,n[h+15],16,530742520),c=m(c,f=g(f,i,a,c,n[h+2],23,-995338651),i,a,n[h],6,-198630844),a=m(a,c,f,i,n[h+7],10,1126891415),i=m(i,a,c,f,n[h+14],15,-1416354905),f=m(f,i,a,c,n[h+5],21,-57434055),c=m(c,f,i,a,n[h+12],6,1700485571),a=m(a,c,f,i,n[h+3],10,-1894986606),i=m(i,a,c,f,n[h+10],15,-1051523),f=m(f,i,a,c,n[h+1],21,-2054922799),c=m(c,f,i,a,n[h+8],6,1873313359),a=m(a,c,f,i,n[h+15],10,-30611744),i=m(i,a,c,f,n[h+6],15,-1560198380),f=m(f,i,a,c,n[h+13],21,1309151649),c=m(c,f,i,a,n[h+4],6,-145523070),a=m(a,c,f,i,n[h+11],10,-1120210379),i=m(i,a,c,f,n[h+2],15,718787259),f=m(f,i,a,c,n[h+9],21,-343485551),c=d(c,r),f=d(f,e),i=d(i,o),a=d(a,u);return[c,f,i,a]}function a(n){for(var t="",r=32*n.length,e=0;e<r;e+=8)t+=String.fromCharCode(n[e>>5]>>>e%32&255);return t}function h(n){var t=[];for(t[(n.length>>2)-1]=void 0,e=0;e<t.length;e+=1)t[e]=0;for(var r=8*n.length,e=0;e<r;e+=8)t[e>>5]|=(255&n.charCodeAt(e/8))<<e%32;return t}function e(n){for(var t,r="0123456789abcdef",e="",o=0;o<n.length;o+=1)t=n.charCodeAt(o),e+=r.charAt(t>>>4&15)+r.charAt(15&t);return e}function r(n){return unescape(encodeURIComponent(n))}function o(n){return a(i(h(t=r(n)),8*t.length));var t}function u(n,t){return function(n,t){var r,e,o=h(n),u=[],c=[];for(u[15]=c[15]=void 0,16<o.length&&(o=i(o,8*n.length)),r=0;r<16;r+=1)u[r]=909522486^o[r],c[r]=1549556828^o[r];return e=i(u.concat(h(t)),512+8*t.length),a(i(c.concat(e),640))}(r(n),r(t))}function t(n,t,r){return t?r?u(t,n):e(u(t,n)):r?o(n):e(o(n))}"function"==typeof define&&define.amd?define(function(){return t}):"object"==typeof module&&module.exports?module.exports=t:n.md5=t}(this);

View File

@@ -22,7 +22,7 @@ function gifOver(thumbnail, hit) {
thumbnail.addEventListener("mouseout", function () {
//Reset timer
thumbnail.mouseStayedOver = false;
thumbnail.setAttribute("src", `t/${hit["_source"]["index"]}/${hit["_path_md5"]}`);
thumbnail.setAttribute("src", `t/${hit["_source"]["index"]}/${hit["_id"]}`);
})
}
@@ -192,19 +192,6 @@ function makeUserTag(tag, hit) {
return userTag;
}
function makeGpsMetaRow(tbody, latitude, longitude) {
tbody.append($("<tr>")
.append($("<td>").text("Exif GPS"))
.append($("<td>")
.append($("<a>")
.text(`${latitude}, ${longitude}`)
.attr("href", `https://maps.google.com/?q=${latitude},${longitude}&ll=${latitude},${longitude}&t=k&z=17`)
.attr("target", "_blank")
)
)
);
}
function infoButtonCb(hit) {
return () => {
getDocumentInfo(hit["_id"]).then(doc => {
@@ -242,25 +229,13 @@ function infoButtonCb(hit) {
.text(new Date(doc["mtime"] * 1000).toISOString().split(".")[0].replace("T", " "))
.attr("title", doc["mtime"]))
);
// Exif GPS
if ("exif_gps_longitude_dec" in doc) {
makeGpsMetaRow(tbody, doc["exif_gps_latitude_dec"], doc["exif_gps_longitude_dec"])
} else if ("exif_gps_longitude_dms" in doc) {
makeGpsMetaRow(
tbody,
dmsToDecimal(doc["exif_gps_latitude_dms"], doc["exif_gps_latitude_ref"]),
dmsToDecimal(doc["exif_gps_longitude_dms"], doc["exif_gps_longitude_ref"]),
)
}
const displayFields = new Set([
"mime", "size", "path", "title", "width", "height", "duration", "audioc", "videoc",
"bitrate", "artist", "album", "album_artist", "genre", "title", "font_name", "tag", "author",
"modified_by", "pages"
]);
Object.keys(doc)
.filter(key => key.startsWith("_keyword.") || key.startsWith("_text.") || displayFields.has(key) || (key.startsWith("exif_") && !key.includes("gps")))
.filter(key => key.startsWith("_keyword.") || key.startsWith("_text.") || displayFields.has(key) || key.startsWith("exif_"))
.forEach(key => {
tbody.append($("<tr>")
.append($("<td>").text(key))
@@ -375,14 +350,6 @@ function createDocCard(hit) {
audio.setAttribute("controls", "");
audio.setAttribute("type", hit["_source"]["mime"]);
audio.setAttribute("src", "f/" + hit["_id"]);
audio.addEventListener("play", () => {
// Pause all currently playing audio tags
$("audio").each(function () {
if (this !== audio) {
this.pause();
}
});
});
docCard.appendChild(audio)
}
@@ -452,7 +419,7 @@ function makeThumbnail(mimeCategory, hit, imgWrapper, small) {
thumbnail.setAttribute("class", "card-img-top fit");
}
}
thumbnail.setAttribute("src", `t/${hit["_source"]["index"]}/${hit["_path_md5"]}`);
thumbnail.setAttribute("src", `t/${hit["_source"]["index"]}/${hit["_id"]}`);
if (shouldDisplayRawImage(hit)) {
thumbnail.addEventListener("click", () => {

View File

@@ -165,9 +165,6 @@ window.onload = () => {
}
}
});
initTagTree();
updateTagTree();
};
function saveTag(tag, hit) {
@@ -177,7 +174,7 @@ function saveTag(tag, hit) {
delete: false,
name: tag,
doc_id: hit["_id"],
path_md5: md5(relPath)
relpath: relPath
}).then(() => {
tagBar.blur();
$("#tagModal").modal("hide");
@@ -191,8 +188,6 @@ function saveTag(tag, hit) {
hideAfter: 3000,
loaderBg: "#08c7e8",
});
window.setTimeout(updateTagTree, 2000);
})
}
@@ -203,7 +198,7 @@ function deleteTag(tag, hit) {
delete: true,
name: tag,
doc_id: hit["_id"],
path_md5: md5(relPath)
relpath: relPath
}).then(() => {
$.toast({
heading: "Tag deleted",
@@ -215,8 +210,6 @@ function deleteTag(tag, hit) {
hideAfter: 3000,
loaderBg: "#08c7e8",
});
window.setTimeout(updateTagTree, 2000);
})
}
@@ -320,8 +313,25 @@ $.jsonPost("es", {
mimeTree.node("any").select();
});
function initTagTree() {
tagMap = [{text: "All", id: "any"}];
// Tags tree
$.jsonPost("es", {
aggs: {
tags: {
terms: {
field: "tag",
size: 10000
}
}
},
size: 0,
}).then(resp => {
resp["aggregations"]["tags"]["buckets"]
.sort((a, b) => a["key"].localeCompare(b["key"]))
.forEach(bucket => {
addTag(tagMap, bucket["key"], bucket["key"], bucket["doc_count"])
});
tagMap.push({"text": "All", "id": "any"});
tagTree = new InspireTree({
selection: {
mode: 'checkbox'
@@ -336,34 +346,8 @@ function initTagTree() {
});
tagTree.on("node.state.changed", handleTreeClick(tagTree));
tagTree.node("any").select();
}
function updateTagTree() {
$.jsonPost("es", {
aggs: {
tags: {
terms: {
field: "tag",
size: 10000
}
}
},
size: 0,
}).then(resp => {
tagMap = [];
resp["aggregations"]["tags"]["buckets"]
.sort((a, b) => a["key"].localeCompare(b["key"]))
.forEach(bucket => {
addTag(tagMap, bucket["key"], bucket["key"], bucket["doc_count"])
});
tagTree.removeAll();
tagMap.push({text: "All", id: "any"})
tagTree.addNodes(tagMap);
searchBusy = false;
});
}
});
function addTag(map, tag, id, count) {
// let tags = tag.split("#")[0].split(".");
@@ -511,8 +495,8 @@ function search(after = null) {
searchResults.appendChild(preload);
}
let searchBarValue = searchBar.value;
let empty = searchBarValue === "";
let query = searchBar.value;
let empty = query === "";
let condition = empty ? "should" : "must";
let filters = [
{range: {size: {gte: size_min, lte: size_max}}},
@@ -561,32 +545,19 @@ function search(after = null) {
filters.push({range: {mtime: {lte: date_max}}})
}
let query;
if (CONF.options.queryMode === "simple") {
query = {
simple_query_string: {
query: searchBarValue,
fields: fields,
default_operator: "and"
}
}
} else {
query = {
query_string: {
query: searchBarValue,
default_field: "name",
default_operator: "and"
}
}
}
let q = {
"_source": {
excludes: ["content", "_tie"]
},
query: {
bool: {
[condition]: query,
[condition]: {
simple_query_string: {
query: query,
fields: fields,
default_operator: "and"
}
},
filter: filters
}
},
@@ -624,9 +595,7 @@ function search(after = null) {
}
}
const showError = CONF.options.queryMode === "advanced";
$.jsonPost("es", q, showError).then(searchResult => {
$.jsonPost("es", q).then(searchResult => {
let hits = searchResult["hits"]["hits"];
if (hits) {
lastDoc = hits[hits.length - 1];
@@ -635,7 +604,6 @@ function search(after = null) {
hits.forEach(hit => {
hit["_source"]["name"] = strUnescape(hit["_source"]["name"]);
hit["_source"]["path"] = strUnescape(hit["_source"]["path"]);
hit["_path_md5"] = md5(hit["_source"]["path"] + (hit["_source"]["path"] ? "/" : "") + hit["_source"]["name"] + ext(hit));
});
if (!after) {
@@ -660,25 +628,7 @@ function search(after = null) {
reachedEnd = hits.length !== SIZE;
insertHits(resultContainer, hits);
searchBusy = false;
}).fail(() => {
searchBusy = false;
if (!after) {
preload.remove();
}
console.log("QUERY:")
console.log(q)
$.toast({
heading: "Query error",
text: "Could not parse or execute query, please check the Advanced search documentation. " +
"See server logs for more information.",
stack: false,
bgColor: "#FF8F00",
textColor: "#FFF3E0",
position: 'bottom-right',
hideAfter: false
});
})
}

View File

@@ -70,7 +70,7 @@ function strUnescape(str) {
for (let i = 0; i < str.length; i++) {
const c = str[i];
const next = str[i + 1];
const next = str[i+1];
if (c === ']') {
if (next === ']') {
@@ -102,8 +102,7 @@ const _defaults = {
treemapSize: "large",
suggestPath: true,
fragmentSize: 100,
columns: 5,
queryMode: "simple"
columns: 5
};
function loadSettings() {
@@ -121,7 +120,6 @@ function loadSettings() {
$("#settingSuggestPath").prop("checked", CONF.options.suggestPath);
$("#settingFragmentSize").val(CONF.options.fragmentSize);
$("#settingColumns").val(CONF.options.columns);
$("#settingQueryMode").val(CONF.options.queryMode);
}
function Settings() {
@@ -129,7 +127,6 @@ function Settings() {
this._onUpdate = function () {
$("#fuzzyToggle").prop("checked", this.options.fuzzy);
$("#searchBar").attr("placeholder", this.options.queryMode === "simple" ? "Search" : "Advanced search");
updateColumnStyle();
};
@@ -168,7 +165,6 @@ function updateSettings() {
CONF.options.suggestPath = $("#settingSuggestPath").prop("checked");
CONF.options.fragmentSize = $("#settingFragmentSize").val();
CONF.options.columns = $("#settingColumns").val();
CONF.options.queryMode = $("#settingQueryMode").val();
CONF.save();
if (typeof searchDebounced !== "undefined") {
@@ -191,16 +187,14 @@ function updateSettings() {
});
}
jQuery["jsonPost"] = function (url, data, showError = true) {
jQuery["jsonPost"] = function (url, data) {
return jQuery.ajax({
url: url,
type: "post",
data: JSON.stringify(data),
contentType: "application/json"
}).fail(err => {
if (showError) {
showEsError();
}
console.log(err);
});
};
@@ -236,13 +230,3 @@ function updateColumnStyle() {
`
}
}
function dmsToDecimal(dms, ref) {
const tokens = dms.split(",")
const d = Number(tokens[0].trim().split(":")[0]) / Number(tokens[0].trim().split(":")[1])
const m = Number(tokens[1].trim().split(":")[0]) / Number(tokens[1].trim().split(":")[1])
const s = Number(tokens[2].trim().split(":")[0]) / Number(tokens[2].trim().split(":")[1])
return (d + (m / 60) + (s / 3600)) * (ref === "S" || ref === "W" ? -1 : 1)
}

View File

@@ -12,7 +12,7 @@
<nav class="navbar navbar-expand-lg">
<a class="navbar-brand" href="/">sist2</a>
<span class="badge badge-pill version">2.10.2</span>
<span class="badge badge-pill version">2.8.5</span>
<span class="tagline">Lightning-fast file system indexer and search tool </span>
<a class="btn ml-auto" href="stats">Stats</a>
<button class="btn" type="button" data-toggle="modal" data-target="#settings" onclick="loadSettings()">Settings
@@ -120,8 +120,6 @@
</div>
<div class="modal-body">
<h2>Simple search</h2>
<table class="table">
<tbody>
<tr>
@@ -170,12 +168,6 @@
<p>For more information, see <a target="_blank"
href="//www.elastic.co/guide/en/elasticsearch/reference/current/query-dsl-simple-query-string-query.html">Elasticsearch
documentation</a></p>
<h2>Advanced search</h2>
<p>For documentation about the advanced search mode, see <a target="_blank"
href="//www.elastic.co/guide/en/elasticsearch/reference/current/query-dsl-query-string-query.html#query-string-syntax">Elasticsearch
documentation</a></p>
</div>
</div>
</div>
@@ -215,16 +207,10 @@
<br/>
<div class="form-group">
<label for="settingFragmentSize">Highlight context size in characters</label>
<input type="number" class="form-control" id="settingFragmentSize">
<label for="settingFragmentSize">Highlight context size in characters</label>
</div>
<label for="settingQueryMode">Search mode</label>
<select id="settingQueryMode" class="form-control form-control-sm">
<option value="simple">Simple</option>
<option value="advanced">Advanced</option>
</select>
<label for="settingDisplay">Display</label>
<select id="settingDisplay" class="form-control form-control-sm">
<option value="grid">Grid</option>

View File

@@ -10,7 +10,7 @@
<nav class="navbar navbar-expand-lg">
<a class="navbar-brand" href="/">sist2</a>
<span class="badge badge-pill version">2.10.2</span>
<span class="badge badge-pill version">2.8.5</span>
<span class="tagline">Lightning-fast file system indexer and search tool </span>
<a style="margin-left: auto" class="btn" href="/">Back</a>
<button class="btn" type="button" data-toggle="modal" data-target="#settings"
@@ -29,13 +29,13 @@
</div>
<div id="treemap-card" class="stats-card">
<button class="btn stats-btn" onclick="fullScreen('treemap-card')" id="treemap-card-enlarge">Enlarge</button>
<button class="btn stats-btn" onclick="fullScreen('treemap-card')">Enlarge</button>
<button class="btn stats-btn" onclick="exportTreemap()">Export</button>
<svg id="treemap"></svg>
</div>
<div id="graphs-card" class="stats-card">
<button class="btn stats-btn" onclick="fullScreen('graphs-card')" id="graphs-card-enlarge">Enlarge</button>
<button class="btn stats-btn" onclick="fullScreen('graphs-card')">Enlarge</button>
<div class="graph">
<svg id="agg_mime_size"></svg>
</div>
@@ -84,16 +84,10 @@
<br/>
<div class="form-group">
<label for="settingFragmentSize">Highlight context size in characters</label>
<input type="number" class="form-control" id="settingFragmentSize">
<label for="settingFragmentSize">Highlight context size in characters</label>
</div>
<label for="settingQueryMode">Search mode</label>
<select id="settingQueryMode" class="form-control form-control-sm">
<option value="simple">Simple</option>
<option value="advanced">Advanced</option>
</select>
<label for="settingDisplay">Display</label>
<select id="settingDisplay" class="form-control form-control-sm">
<option value="grid">Grid</option>
@@ -795,15 +789,7 @@ window.onload = function () {
function fullScreen(selector) {
const card = document.getElementById(selector);
const btn = document.getElementById(selector + "-enlarge");
card.classList.toggle("full-screen");
if (card.classList.contains("full-screen")) {
btn.innerText = "Shrink";
} else {
btn.innerText = "Enlarge";
}
}
function exportTreemap() {

View File

@@ -2,6 +2,8 @@
#include "io/serialize.h"
#include "ctx.h"
#include <glib.h>
static GHashTable *FlatTree;
static GHashTable *BufferTable;
@@ -20,7 +22,7 @@ typedef struct {
long count;
} agg_t;
void fill_tables(cJSON *document, UNUSED(const char index_id[MD5_STR_LENGTH])) {
void fill_tables(cJSON *document, UNUSED(const char uuid_str[UUID_STR_LEN])) {
if (cJSON_GetObjectItem(document, "parent") != NULL) {
return;
@@ -101,8 +103,8 @@ void read_index_into_tables(index_t *index) {
while ((de = readdir(dir)) != NULL) {
if (strncmp(de->d_name, "_index_", sizeof("_index_") - 1) == 0) {
char file_path[PATH_MAX];
snprintf(file_path, PATH_MAX, "%s%s", index->path, de->d_name);
read_index(file_path, index->desc.id, index->desc.type, fill_tables);
snprintf(file_path, PATH_MAX, "%s/%s", index->path, de->d_name);
read_index(file_path, index->desc.uuid, index->desc.type, fill_tables);
}
}
closedir(dir);

View File

@@ -3,7 +3,7 @@
#include "sist.h"
#include <pthread.h>
#define MAX_QUEUE_SIZE 1000000
#define MAX_QUEUE_SIZE 10000
typedef void (*thread_func_t)(void *arg);
@@ -52,13 +52,6 @@ static tpool_work_t *tpool_work_create(thread_func_t func, void *arg) {
return work;
}
void tpool_dump_debug_info(tpool_t *pool) {
LOG_DEBUGF("tpool.c", "pool->thread_cnt = %d", pool->thread_cnt)
LOG_DEBUGF("tpool.c", "pool->work_cnt = %d", pool->work_cnt)
LOG_DEBUGF("tpool.c", "pool->done_cnt = %d", pool->done_cnt)
LOG_DEBUGF("tpool.c", "pool->stop = %d", pool->stop)
}
/**
* Pop work object from thread pool
*/
@@ -90,7 +83,7 @@ int tpool_add_work(tpool_t *pool, thread_func_t func, void *arg) {
}
while ((pool->work_cnt - pool->done_cnt) >= MAX_QUEUE_SIZE) {
usleep(10000);
usleep(100000);
}
pthread_mutex_lock(&(pool->work_mutex));
@@ -157,7 +150,6 @@ static void *tpool_worker(void *arg) {
if (pool->cleanup_func != NULL) {
LOG_INFO("tpool.c", "Executing cleanup function")
pool->cleanup_func();
LOG_DEBUG("tpool.c", "Done executing cleanup function")
}
pthread_cond_signal(&(pool->working_cond));

View File

@@ -10,12 +10,10 @@ typedef void (*thread_func_t)(void *arg);
tpool_t *tpool_create(size_t num, void (*cleanup_func)(), int free_arg);
void tpool_start(tpool_t *pool);
void tpool_destroy(tpool_t *pool);
void tpool_destroy(tpool_t *tm);
int tpool_add_work(tpool_t *pool, thread_func_t func, void *arg);
void tpool_wait(tpool_t *pool);
void tpool_dump_debug_info(tpool_t *pool);
void tpool_wait(tpool_t *tm);
#endif

View File

@@ -6,7 +6,7 @@
#define INDEX_VERSION_EXTERNAL "_external_v1"
typedef struct index_descriptor {
char id[MD5_STR_LENGTH];
char uuid[UUID_STR_LEN];
char version[64];
long timestamp;
char root[PATH_MAX];

View File

@@ -2,6 +2,7 @@
#include "src/ctx.h"
#include <wordexp.h>
#include <glib.h>
#define PBSTR "========================================"
#define PBWIDTH 40
@@ -124,7 +125,7 @@ void progress_bar_print(double percentage, size_t tn_size, size_t index_size) {
}
GHashTable *incremental_get_table() {
GHashTable *file_table = g_hash_table_new_full(g_str_hash, g_str_equal, free, NULL);
GHashTable *file_table = g_hash_table_new(g_direct_hash, g_direct_equal);
return file_table;
}

View File

@@ -10,8 +10,6 @@
#include "third-party/utf8.h/utf8.h"
#include "libscan/scan.h"
#define MD5_STR_LENGTH 33
char *abspath(const char *path);
@@ -23,6 +21,25 @@ void progress_bar_print(double percentage, size_t tn_size, size_t index_size);
GHashTable *incremental_get_table();
__always_inline
static void incremental_put(GHashTable *table, unsigned long inode_no, int mtime) {
g_hash_table_insert(table, (gpointer) inode_no, GINT_TO_POINTER(mtime));
}
__always_inline
static int incremental_get(GHashTable *table, unsigned long inode_no) {
if (table != NULL) {
return GPOINTER_TO_INT(g_hash_table_lookup(table, (gpointer) inode_no));
} else {
return 0;
}
}
__always_inline
static int incremental_mark_file_for_copy(GHashTable *table, unsigned long inode_no) {
return g_hash_table_insert(table, GINT_TO_POINTER(inode_no), GINT_TO_POINTER(1));
}
const char *find_file_in_paths(const char **paths, const char *filename);
@@ -31,95 +48,4 @@ void str_escape(char *dst, const char *str);
void str_unescape(char *dst, const char *str);
static int hex2buf(const char *str, int len, unsigned char *bytes) {
static const uint8_t hashmap[] = {
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07,
0x08, 0x09, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
0x00, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e, 0x0f, 0x00,
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
0x00, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e, 0x0f, 0x00,
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00
};
for (int pos = 0; pos < len; pos += 2) {
int idx0 = (uint8_t) str[pos + 0];
int idx1 = (uint8_t) str[pos + 1];
bytes[pos / 2] = (uint8_t) (hashmap[idx0] << 4) | hashmap[idx1];
}
return TRUE;
}
__always_inline
static void buf2hex(const unsigned char *buf, size_t buflen, char *hex_string) {
static const char hexdig[] = "0123456789abcdef";
const unsigned char *p;
size_t i;
char *s = hex_string;
for (i = 0, p = buf; i < buflen; i++, p++) {
*s++ = hexdig[(*p >> 4) & 0x0f];
*s++ = hexdig[*p & 0x0f];
}
*s = '\0';
}
__always_inline
static int md5_digest_is_null(const unsigned char digest[MD5_DIGEST_LENGTH]) {
return (*(int64_t *) digest) == 0 && (*((int64_t *) digest + 1)) == 0;
}
__always_inline
static void incremental_put(GHashTable *table, unsigned char path_md5[MD5_DIGEST_LENGTH], int mtime) {
char *ptr = malloc(MD5_STR_LENGTH);
buf2hex(path_md5, MD5_DIGEST_LENGTH, ptr);
g_hash_table_insert(table, ptr, GINT_TO_POINTER(mtime));
}
__always_inline
static int incremental_get(GHashTable *table, unsigned char path_md5[MD5_DIGEST_LENGTH]) {
if (table != NULL) {
char md5_str[MD5_STR_LENGTH];
buf2hex(path_md5, MD5_DIGEST_LENGTH, md5_str);
return GPOINTER_TO_INT(g_hash_table_lookup(table, md5_str));
} else {
return 0;
}
}
__always_inline
static int incremental_mark_file_for_copy(GHashTable *table, unsigned char path_md5[MD5_DIGEST_LENGTH]) {
char *ptr = malloc(MD5_STR_LENGTH);
buf2hex(path_md5, MD5_DIGEST_LENGTH, ptr);
return g_hash_table_insert(table, ptr, GINT_TO_POINTER(1));
}
#endif

View File

@@ -8,8 +8,18 @@
#include <src/ctx.h>
#include <mongoose.h>
static void send_response_line(struct mg_connection *nc, int status_code, size_t length, char *extra_headers) {
static int has_prefix(const struct mg_str *str, const struct mg_str *prefix) {
return str->len > prefix->len && memcmp(str->p, prefix->p, prefix->len) == 0;
}
static int is_equal(const struct mg_str *s1, const struct mg_str *s2) {
return s1->len == s2->len && memcmp(s1->p, s2->p, s2->len) == 0;
}
static void send_response_line(struct mg_connection *nc, int status_code, int length, char *extra_headers) {
mg_printf(
nc,
"HTTP/1.1 %d %s\r\n"
@@ -26,7 +36,7 @@ static void send_response_line(struct mg_connection *nc, int status_code, size_t
index_t *get_index_by_id(const char *index_id) {
for (int i = WebCtx.index_count; i >= 0; i--) {
if (strncmp(index_id, WebCtx.indices[i].desc.id, MD5_STR_LENGTH) == 0) {
if (strcmp(index_id, WebCtx.indices[i].desc.uuid) == 0) {
return &WebCtx.indices[i];
}
}
@@ -52,32 +62,36 @@ store_t *get_tag_store(const char *index_id) {
void search_index(struct mg_connection *nc) {
send_response_line(nc, 200, sizeof(search_html), "Content-Type: text/html");
mg_send(nc, search_html, sizeof(search_html));
nc->flags |= MG_F_SEND_AND_CLOSE;
}
void stats(struct mg_connection *nc) {
send_response_line(nc, 200, sizeof(stats_html), "Content-Type: text/html");
mg_send(nc, stats_html, sizeof(stats_html));
nc->flags |= MG_F_SEND_AND_CLOSE;
}
void stats_files(struct mg_connection *nc, struct mg_http_message *hm) {
void stats_files(struct mg_connection *nc, struct http_message *hm, struct mg_str *path) {
if (hm->uri.len != MD5_STR_LENGTH + 4) {
mg_http_reply(nc, 404, "", "");
if (path->len != UUID_STR_LEN + 4) {
mg_http_send_error(nc, 404, NULL);
nc->flags |= MG_F_SEND_AND_CLOSE;
return;
}
char arg_md5[MD5_STR_LENGTH];
memcpy(arg_md5, hm->uri.ptr + 3, MD5_STR_LENGTH);
*(arg_md5 + MD5_STR_LENGTH - 1) = '\0';
char arg_uuid[UUID_STR_LEN];
memcpy(arg_uuid, hm->uri.p + 3, UUID_STR_LEN);
*(arg_uuid + UUID_STR_LEN - 1) = '\0';
index_t *index = get_index_by_id(arg_md5);
index_t *index = get_index_by_id(arg_uuid);
if (index == NULL) {
mg_http_reply(nc, 404, "", "");
mg_http_send_error(nc, 404, NULL);
nc->flags |= MG_F_SEND_AND_CLOSE;
return;
}
const char *file;
switch (atoi(hm->uri.ptr + 3 + MD5_STR_LENGTH)) {
switch (atoi(hm->uri.p + 3 + UUID_STR_LEN)) {
case 1:
file = "treemap.csv";
break;
@@ -91,41 +105,54 @@ void stats_files(struct mg_connection *nc, struct mg_http_message *hm) {
file = "date_agg.csv";
break;
default:
nc->flags |= MG_F_SEND_AND_CLOSE;
return;
}
char disposition[8192];
snprintf(disposition, sizeof(disposition), "Content-Disposition: inline; filename=\"%s\"\r\n", file);
snprintf(disposition, sizeof(disposition), "Content-Disposition: inline; filename=\"%s\"", file);
char full_path[PATH_MAX];
strcpy(full_path, index->path);
strcat(full_path, file);
mg_http_serve_file(nc, hm, full_path, "text/csv", disposition);
mg_http_serve_file(nc, hm, full_path, mg_mk_str("text/csv"), mg_mk_str(disposition));
nc->flags |= MG_F_SEND_AND_CLOSE;
}
void javascript_lib(struct mg_connection *nc) {
send_response_line(nc, 200, sizeof(bundle_js), "Content-Type: application/javascript");
mg_send(nc, bundle_js, sizeof(bundle_js));
nc->flags |= MG_F_SEND_AND_CLOSE;
}
void javascript_search(struct mg_connection *nc) {
send_response_line(nc, 200, sizeof(search_js), "Content-Type: application/javascript");
mg_send(nc, search_js, sizeof(search_js));
nc->flags |= MG_F_SEND_AND_CLOSE;
}
int client_requested_dark_theme(struct mg_http_message *hm) {
struct mg_str *cookie_header = mg_http_get_header(hm, "cookie");
int client_requested_dark_theme(struct http_message *hm) {
struct mg_str *cookie_header = mg_get_http_header(hm, "cookie");
if (cookie_header == NULL) {
return FALSE;
}
struct mg_str sist_cookie = mg_http_get_header_var(*cookie_header, mg_str_n("sist", 4));
char buf[4096];
char *sist_cookie = buf;
if (mg_http_parse_header2(cookie_header, "sist", &sist_cookie, sizeof(buf)) == 0) {
return FALSE;
}
return mg_strcmp(sist_cookie, mg_str_n("dark", 4)) == 0;
int ret = strcmp(sist_cookie, "dark") == 0;
if (sist_cookie != buf) {
free(sist_cookie);
}
return ret;
}
void style(struct mg_connection *nc, struct mg_http_message *hm) {
void style(struct mg_connection *nc, struct http_message *hm) {
if (client_requested_dark_theme(hm)) {
send_response_line(nc, 200, sizeof(bundle_dark_css), "Content-Type: text/css");
@@ -134,9 +161,11 @@ void style(struct mg_connection *nc, struct mg_http_message *hm) {
send_response_line(nc, 200, sizeof(bundle_css), "Content-Type: text/css");
mg_send(nc, bundle_css, sizeof(bundle_css));
}
nc->flags |= MG_F_SEND_AND_CLOSE;
}
void img_sprite_skin_flat(struct mg_connection *nc, struct mg_http_message *hm) {
void img_sprite_skin_flat(struct mg_connection *nc, struct http_message *hm) {
if (client_requested_dark_theme(hm)) {
send_response_line(nc, 200, sizeof(sprite_skin_flat_dark_png), "Content-Type: image/png");
mg_send(nc, sprite_skin_flat_dark_png, sizeof(sprite_skin_flat_dark_png));
@@ -144,59 +173,71 @@ void img_sprite_skin_flat(struct mg_connection *nc, struct mg_http_message *hm)
send_response_line(nc, 200, sizeof(sprite_skin_flat_png), "Content-Type: image/png");
mg_send(nc, sprite_skin_flat_png, sizeof(sprite_skin_flat_png));
}
nc->flags |= MG_F_SEND_AND_CLOSE;
}
void thumbnail(struct mg_connection *nc, struct mg_http_message *hm) {
void thumbnail(struct mg_connection *nc, struct http_message *hm, struct mg_str *path) {
if (hm->uri.len != 68) {
LOG_DEBUGF("serve.c", "Invalid thumbnail path: %.*s", (int) hm->uri.len, hm->uri.ptr)
mg_http_reply(nc, 404, "", "Not found");
if (path->len != UUID_STR_LEN * 2 + 2) {
LOG_DEBUGF("serve.c", "Invalid thumbnail path: %.*s", (int) path->len, path->p)
mg_http_send_error(nc, 404, NULL);
nc->flags |= MG_F_SEND_AND_CLOSE;
return;
}
char arg_file_md5[MD5_STR_LENGTH];
char arg_index[MD5_STR_LENGTH];
char arg_uuid[UUID_STR_LEN];
char arg_index[UUID_STR_LEN];
memcpy(arg_index, hm->uri.ptr + 3, MD5_STR_LENGTH);
*(arg_index + MD5_STR_LENGTH - 1) = '\0';
memcpy(arg_file_md5, hm->uri.ptr + 3 + MD5_STR_LENGTH, MD5_STR_LENGTH);
*(arg_file_md5 + MD5_STR_LENGTH - 1) = '\0';
memcpy(arg_index, hm->uri.p + 3, UUID_STR_LEN);
*(arg_index + UUID_STR_LEN - 1) = '\0';
memcpy(arg_uuid, hm->uri.p + 3 + UUID_STR_LEN, UUID_STR_LEN);
*(arg_uuid + UUID_STR_LEN - 1) = '\0';
unsigned char md5_buf[MD5_DIGEST_LENGTH];
hex2buf(arg_file_md5, MD5_STR_LENGTH - 1, md5_buf);
uuid_t uuid;
int ret = uuid_parse(arg_uuid, uuid);
if (ret != 0) {
LOG_DEBUGF("serve.c", "Invalid thumbnail UUID: %s", arg_uuid)
mg_http_send_error(nc, 404, NULL);
nc->flags |= MG_F_SEND_AND_CLOSE;
return;
}
store_t *store = get_store(arg_index);
if (store == NULL) {
LOG_DEBUGF("serve.c", "Could not get store for index: %s", arg_index)
mg_http_reply(nc, 404, "", "Not found");
mg_http_send_error(nc, 404, NULL);
nc->flags |= MG_F_SEND_AND_CLOSE;
return;
}
size_t data_len = 0;
char *data = store_read(store, (char *) md5_buf, sizeof(md5_buf), &data_len);
char *data = store_read(store, (char *) uuid, sizeof(uuid_t), &data_len);
if (data_len != 0) {
send_response_line(nc, 200, data_len, "Content-Type: image/jpeg");
mg_send(nc, data, data_len);
free(data);
}
nc->flags |= MG_F_SEND_AND_CLOSE;
}
void search(struct mg_connection *nc, struct mg_http_message *hm) {
void search(struct mg_connection *nc, struct http_message *hm) {
if (hm->body.len == 0) {
LOG_DEBUG("serve.c", "Client sent empty body, ignoring request")
mg_http_reply(nc, 500, "", "Invalid request");
mg_http_send_error(nc, 500, NULL);
nc->flags |= MG_F_SEND_AND_CLOSE;
return;
}
char *body = malloc(hm->body.len + 1);
memcpy(body, hm->body.ptr, hm->body.len);
memcpy(body, hm->body.p, hm->body.len);
*(body + hm->body.len) = '\0';
char url[4096];
snprintf(url, 4096, "%s/%s/_search", WebCtx.es_url, WebCtx.es_index);
nc->fn_data = web_post_async(url, body);
nc->user_data = web_post_async(url, body);
}
void serve_file_from_url(cJSON *json, index_t *idx, struct mg_connection *nc) {
@@ -218,16 +259,16 @@ void serve_file_from_url(cJSON *json, index_t *idx, struct mg_connection *nc) {
idx->desc.rewrite_url, path_unescaped, name_unescaped, strlen(ext) == 0 ? "" : ".", ext);
dyn_buffer_t encoded = url_escape(url);
dyn_buffer_write_char(&encoded, '\0');
char location_header[8192];
snprintf(location_header, sizeof(location_header), "Location: %s\r\n", encoded.buf);
mg_http_reply(nc, 308, location_header, "");
mg_http_send_redirect(
nc, 308,
(struct mg_str) MG_MK_STR_N(encoded.buf, encoded.cur),
(struct mg_str) MG_NULL_STR
);
dyn_buffer_destroy(&encoded);
nc->flags |= MG_F_SEND_AND_CLOSE;
}
void serve_file_from_disk(cJSON *json, index_t *idx, struct mg_connection *nc, struct mg_http_message *hm) {
void serve_file_from_disk(cJSON *json, index_t *idx, struct mg_connection *nc, struct http_message *hm) {
const char *path = cJSON_GetObjectItem(json, "path")->valuestring;
const char *name = cJSON_GetObjectItem(json, "name")->valuestring;
@@ -248,10 +289,10 @@ void serve_file_from_disk(cJSON *json, index_t *idx, struct mg_connection *nc, s
LOG_DEBUGF("serve.c", "Serving file from disk: %s", full_path)
char disposition[8192];
snprintf(disposition, sizeof(disposition), "Content-Disposition: inline; filename=\"%s%s%s\"\r\n",
snprintf(disposition, sizeof(disposition), "Content-Disposition: inline; filename=\"%s%s%s\"",
name, strlen(ext) == 0 ? "" : ".", ext);
mg_http_serve_file(nc, hm, full_path, mime, disposition);
mg_http_serve_file(nc, hm, full_path, mg_mk_str(mime), mg_mk_str(disposition));
}
void index_info(struct mg_connection *nc) {
@@ -264,7 +305,7 @@ void index_info(struct mg_connection *nc) {
cJSON *idx_json = cJSON_CreateObject();
cJSON_AddStringToObject(idx_json, "name", idx->desc.name);
cJSON_AddStringToObject(idx_json, "version", idx->desc.version);
cJSON_AddStringToObject(idx_json, "id", idx->desc.id);
cJSON_AddStringToObject(idx_json, "id", idx->desc.uuid);
cJSON_AddNumberToObject(idx_json, "timestamp", (double) idx->desc.timestamp);
cJSON_AddItemToArray(arr, idx_json);
}
@@ -275,35 +316,40 @@ void index_info(struct mg_connection *nc) {
mg_send(nc, json_str, strlen(json_str));
free(json_str);
cJSON_Delete(json);
nc->flags |= MG_F_SEND_AND_CLOSE;
}
void document_info(struct mg_connection *nc, struct mg_http_message *hm) {
void document_info(struct mg_connection *nc, struct http_message *hm, struct mg_str *path) {
if (hm->uri.len != MD5_STR_LENGTH + 2) {
LOG_DEBUGF("serve.c", "Invalid document_info path: %.*s", (int) hm->uri.len, hm->uri.ptr)
mg_http_reply(nc, 404, "", "Not found");
if (path->len != UUID_STR_LEN + 2) {
LOG_DEBUGF("serve.c", "Invalid document_info path: %.*s", (int) path->len, path->p)
mg_http_send_error(nc, 404, NULL);
nc->flags |= MG_F_SEND_AND_CLOSE;
return;
}
char arg_md5[MD5_STR_LENGTH];
memcpy(arg_md5, hm->uri.ptr + 3, MD5_STR_LENGTH);
*(arg_md5 + MD5_STR_LENGTH - 1) = '\0';
char arg_uuid[UUID_STR_LEN];
memcpy(arg_uuid, hm->uri.p + 3, UUID_STR_LEN);
*(arg_uuid + UUID_STR_LEN - 1) = '\0';
cJSON *doc = elastic_get_document(arg_md5);
cJSON *doc = elastic_get_document(arg_uuid);
cJSON *source = cJSON_GetObjectItem(doc, "_source");
cJSON *index_id = cJSON_GetObjectItem(source, "index");
if (index_id == NULL) {
cJSON_Delete(doc);
mg_http_reply(nc, 404, "", "Not found");
mg_http_send_error(nc, 404, NULL);
nc->flags |= MG_F_SEND_AND_CLOSE;
return;
}
index_t *idx = get_index_by_id(index_id->valuestring);
if (idx == NULL) {
cJSON_Delete(doc);
mg_http_reply(nc, 404, "", "Not found");
mg_http_send_error(nc, 404, NULL);
nc->flags |= MG_F_SEND_AND_CLOSE;
return;
}
@@ -312,21 +358,24 @@ void document_info(struct mg_connection *nc, struct mg_http_message *hm) {
mg_send(nc, json_str, (int) strlen(json_str));
free(json_str);
cJSON_Delete(doc);
nc->flags |= MG_F_SEND_AND_CLOSE;
}
void file(struct mg_connection *nc, struct mg_http_message *hm) {
void file(struct mg_connection *nc, struct http_message *hm, struct mg_str *path) {
if (hm->uri.len != MD5_STR_LENGTH + 2) {
LOG_DEBUGF("serve.c", "Invalid file path: %.*s", (int) hm->uri.len, hm->uri.ptr)
mg_http_reply(nc, 404, "", "Not found");
if (path->len != UUID_STR_LEN + 2) {
LOG_DEBUGF("serve.c", "Invalid file path: %.*s", (int) path->len, path->p)
mg_http_send_error(nc, 404, NULL);
nc->flags |= MG_F_SEND_AND_CLOSE;
return;
}
char arg_md5[MD5_STR_LENGTH];
memcpy(arg_md5, hm->uri.ptr + 3, MD5_STR_LENGTH);
*(arg_md5 + MD5_STR_LENGTH - 1) = '\0';
char arg_uuid[UUID_STR_LEN];
memcpy(arg_uuid, hm->uri.p + 3, UUID_STR_LEN);
*(arg_uuid + UUID_STR_LEN - 1) = '\0';
const char *next = arg_md5;
const char *next = arg_uuid;
cJSON *doc = NULL;
cJSON *index_id = NULL;
cJSON *source = NULL;
@@ -337,7 +386,8 @@ void file(struct mg_connection *nc, struct mg_http_message *hm) {
index_id = cJSON_GetObjectItem(source, "index");
if (index_id == NULL) {
cJSON_Delete(doc);
mg_http_reply(nc, 404, "", "Not found");
mg_http_send_error(nc, 404, NULL);
nc->flags |= MG_F_SEND_AND_CLOSE;
return;
}
cJSON *parent = cJSON_GetObjectItem(source, "parent");
@@ -351,7 +401,8 @@ void file(struct mg_connection *nc, struct mg_http_message *hm) {
if (idx == NULL) {
cJSON_Delete(doc);
mg_http_reply(nc, 404, "", "Not found");
nc->flags |= MG_F_SEND_AND_CLOSE;
mg_http_send_error(nc, 404, NULL);
return;
}
@@ -372,12 +423,14 @@ void status(struct mg_connection *nc) {
}
free(status);
nc->flags |= MG_F_SEND_AND_CLOSE;
}
typedef struct {
char *name;
int delete;
char *path_md5_str;
char *relpath;
char *doc_id;
} tag_req_t;
@@ -397,9 +450,8 @@ tag_req_t *parse_tag_request(cJSON *json) {
return NULL;
}
cJSON *arg_path_md5 = cJSON_GetObjectItem(json, "path_md5");
if (arg_path_md5 == NULL || !cJSON_IsString(arg_path_md5) ||
strlen(arg_path_md5->valuestring) != MD5_STR_LENGTH - 1) {
cJSON *arg_relpath = cJSON_GetObjectItem(json, "relpath");
if (arg_relpath == NULL || !cJSON_IsString(arg_relpath)) {
return NULL;
}
@@ -411,38 +463,41 @@ tag_req_t *parse_tag_request(cJSON *json) {
tag_req_t *req = malloc(sizeof(tag_req_t));
req->delete = arg_delete->valueint;
req->name = arg_name->valuestring;
req->path_md5_str = arg_path_md5->valuestring;
req->relpath = arg_relpath->valuestring;
req->doc_id = arg_doc_id->valuestring;
return req;
}
void tag(struct mg_connection *nc, struct mg_http_message *hm) {
if (hm->uri.len != MD5_STR_LENGTH + 4) {
LOG_DEBUGF("serve.c", "Invalid tag path: %.*s", (int) hm->uri.len, hm->uri.ptr)
mg_http_reply(nc, 404, "", "Not found");
void tag(struct mg_connection *nc, struct http_message *hm, struct mg_str *path) {
if (path->len != UUID_STR_LEN + 4) {
LOG_DEBUGF("serve.c", "Invalid tag path: %.*s", (int) path->len, path->p)
mg_http_send_error(nc, 404, NULL);
nc->flags |= MG_F_SEND_AND_CLOSE;
return;
}
char arg_index[MD5_STR_LENGTH];
memcpy(arg_index, hm->uri.ptr + 5, MD5_STR_LENGTH);
*(arg_index + MD5_STR_LENGTH - 1) = '\0';
char arg_index[UUID_STR_LEN];
memcpy(arg_index, hm->uri.p + 5, UUID_STR_LEN);
*(arg_index + UUID_STR_LEN - 1) = '\0';
if (hm->body.len < 2 || hm->method.len != 4 || memcmp(&hm->method, "POST", 4) == 0) {
LOG_DEBUG("serve.c", "Invalid tag request")
mg_http_reply(nc, 404, "", "Not found");
mg_http_send_error(nc, 400, NULL);
nc->flags |= MG_F_SEND_AND_CLOSE;
return;
}
store_t *store = get_tag_store(arg_index);
if (store == NULL) {
LOG_DEBUGF("serve.c", "Could not get tag store for index: %s", arg_index)
mg_http_reply(nc, 404, "", "Not found");
mg_http_send_error(nc, 404, NULL);
nc->flags |= MG_F_SEND_AND_CLOSE;
return;
}
char *body = malloc(hm->body.len + 1);
memcpy(body, hm->body.ptr, hm->body.len);
memcpy(body, hm->body.p, hm->body.len);
*(body + hm->body.len) = '\0';
cJSON *json = cJSON_Parse(body);
@@ -451,14 +506,15 @@ void tag(struct mg_connection *nc, struct mg_http_message *hm) {
LOG_DEBUGF("serve.c", "Could not parse tag request", arg_index)
cJSON_Delete(json);
free(body);
mg_http_reply(nc, 400, "", "Invalid request");
mg_http_send_error(nc, 400, NULL);
nc->flags |= MG_F_SEND_AND_CLOSE;
return;
}
cJSON *arr = NULL;
size_t data_len = 0;
const char *data = store_read(store, arg_req->path_md5_str, MD5_STR_LENGTH, &data_len);
const char *data = store_read(store, arg_req->relpath, strlen(arg_req->relpath), &data_len);
if (data_len == 0) {
arr = cJSON_CreateArray();
} else {
@@ -494,7 +550,7 @@ void tag(struct mg_connection *nc, struct mg_http_message *hm) {
char url[4096];
snprintf(url, sizeof(url), "%s/%s/_update/%s", WebCtx.es_url, WebCtx.es_index, arg_req->doc_id);
nc->fn_data = web_post_async(url, buf);
nc->user_data = web_post_async(url, buf);
} else {
cJSON_AddItemToArray(arr, cJSON_CreateString(arg_req->name));
@@ -514,12 +570,11 @@ void tag(struct mg_connection *nc, struct mg_http_message *hm) {
char url[4096];
snprintf(url, sizeof(url), "%s/%s/_update/%s", WebCtx.es_url, WebCtx.es_index, arg_req->doc_id);
nc->fn_data = web_post_async(url, buf);
nc->user_data = web_post_async(url, buf);
}
char *json_str = cJSON_PrintUnformatted(arr);
store_write(store, arg_req->path_md5_str, MD5_STR_LENGTH, json_str, strlen(json_str) + 1);
store_flush(store);
store_write(store, arg_req->relpath, strlen(arg_req->relpath) + 1, json_str, strlen(json_str) + 1);
free(arg_req);
free(json_str);
@@ -528,22 +583,39 @@ void tag(struct mg_connection *nc, struct mg_http_message *hm) {
free(body);
}
int validate_auth(struct mg_connection *nc, struct mg_http_message *hm) {
int validate_auth(struct mg_connection *nc, struct http_message *hm) {
char user[256] = {0,};
char pass[256] = {0,};
mg_http_creds(hm, user, sizeof(user), pass, sizeof(pass));
if (strcmp(user, WebCtx.auth_user) != 0 || strcmp(pass, WebCtx.auth_pass) != 0) {
mg_http_reply(nc, 401, "WWW-Authenticate: Basic realm=\"sist2\"\r\n", "");
int ret = mg_get_http_basic_auth(hm, user, sizeof(user), pass, sizeof(pass));
if (ret == -1 || strcmp(user, WebCtx.auth_user) != 0 || strcmp(pass, WebCtx.auth_pass) != 0) {
mg_printf(nc, "HTTP/1.1 401 Unauthorized\r\n"
"WWW-Authenticate: Basic realm=\"sist2\"\r\n"
"Content-Length: 0\r\n\r\n");
nc->flags |= MG_F_SEND_AND_CLOSE;
return FALSE;
}
return TRUE;
}
static void ev_router(struct mg_connection *nc, int ev, void *ev_data, UNUSED(void *fn_data)) {
static void ev_router(struct mg_connection *nc, int ev, void *p) {
struct mg_str scheme;
struct mg_str user_info;
struct mg_str host;
unsigned int port;
struct mg_str path;
struct mg_str query;
struct mg_str fragment;
if (ev == MG_EV_HTTP_REQUEST) {
struct http_message *hm = (struct http_message *) p;
if (mg_parse_uri(hm->uri, &scheme, &user_info, &host, &port, &path, &query, &fragment) != 0) {
mg_http_send_error(nc, 400, NULL);
nc->flags |= MG_F_SEND_AND_CLOSE;
return;
}
if (ev == MG_EV_HTTP_MSG) {
struct mg_http_message *hm = (struct mg_http_message *) ev_data;
if (WebCtx.auth_enabled == TRUE) {
if (!validate_auth(nc, hm)) {
@@ -551,48 +623,52 @@ static void ev_router(struct mg_connection *nc, int ev, void *ev_data, UNUSED(vo
}
}
if (mg_http_match_uri(hm, "/")) {
if (is_equal(&path, &((struct mg_str) MG_MK_STR("/")))) {
search_index(nc);
} else if (mg_http_match_uri(hm, "/css")) {
} else if (is_equal(&path, &((struct mg_str) MG_MK_STR("/css")))) {
style(nc, hm);
} else if (mg_http_match_uri(hm, "/stats")) {
} else if (is_equal(&path, &((struct mg_str) MG_MK_STR("/stats")))) {
stats(nc);
} else if (mg_http_match_uri(hm, "/jslib")) {
} else if (is_equal(&path, &((struct mg_str) MG_MK_STR("/jslib")))) {
javascript_lib(nc);
} else if (mg_http_match_uri(hm, "/jssearch")) {
} else if (is_equal(&path, &((struct mg_str) MG_MK_STR("/jssearch")))) {
javascript_search(nc);
} else if (mg_http_match_uri(hm, "/img/sprite-skin-flat.png")) {
} else if (is_equal(&path, &((struct mg_str) MG_MK_STR("/img/sprite-skin-flat.png")))) {
img_sprite_skin_flat(nc, hm);
} else if (mg_http_match_uri(hm, "/es")) {
} else if (is_equal(&path, &((struct mg_str) MG_MK_STR("/es")))) {
search(nc, hm);
} else if (mg_http_match_uri(hm, "/i")) {
} else if (is_equal(&path, &((struct mg_str) MG_MK_STR("/i")))) {
index_info(nc);
} else if (mg_http_match_uri(hm, "/status")) {
} else if (is_equal(&path, &((struct mg_str) MG_MK_STR("/status")))) {
status(nc);
} else if (mg_http_match_uri(hm, "/f/*")) {
file(nc, hm);
} else if (mg_http_match_uri(hm, "/t/*/*")) {
thumbnail(nc, hm);
} else if (mg_http_match_uri(hm, "/s/*/*")) {
stats_files(nc, hm);
} else if (mg_http_match_uri(hm, "/tag/*")) {
if (WebCtx.tag_auth_enabled == TRUE && !validate_auth(nc, hm)) {
} else if (has_prefix(&path, &((struct mg_str) MG_MK_STR("/f/")))) {
file(nc, hm, &path);
} else if (has_prefix(&path, &((struct mg_str) MG_MK_STR("/t/")))) {
thumbnail(nc, hm, &path);
} else if (has_prefix(&path, &((struct mg_str) MG_MK_STR("/s/")))) {
stats_files(nc, hm, &path);
} else if (has_prefix(&path, &((struct mg_str) MG_MK_STR("/tag/")))) {
if (WebCtx.tag_auth_enabled == TRUE) {
if (!validate_auth(nc, hm)) {
return;
}
tag(nc, hm);
} else if (mg_http_match_uri(hm, "/d/*")) {
document_info(nc, hm);
}
tag(nc, hm, &path);
} else if (has_prefix(&path, &((struct mg_str) MG_MK_STR("/d/")))) {
document_info(nc, hm, &path);
} else {
mg_http_reply(nc, 404, "", "Page not found");
mg_http_send_error(nc, 404, NULL);
nc->flags |= MG_F_SEND_AND_CLOSE;
}
} else if (ev == MG_EV_POLL) {
if (nc->fn_data != NULL) {
if (nc->user_data != NULL) {
//Waiting for ES reply
subreq_ctx_t *ctx = (subreq_ctx_t *) nc->fn_data;
subreq_ctx_t *ctx = (subreq_ctx_t *) nc->user_data;
web_post_async_poll(ctx);
if (ctx->done == TRUE) {
response_t *r = ctx->response;
if (r->status_code == 200) {
@@ -612,14 +688,14 @@ static void ev_router(struct mg_connection *nc, int ev, void *ev_data, UNUSED(vo
free(json_str);
free(tmp);
}
mg_http_reply(nc, 500, "", "");
mg_http_send_error(nc, 500, NULL);
}
free_response(r);
free(ctx->data);
free(ctx);
nc->fn_data = NULL;
nc->flags |= MG_F_SEND_AND_CLOSE;
nc->user_data = NULL;
}
}
}
@@ -630,18 +706,15 @@ void serve(const char *listen_address) {
printf("Starting web server @ http://%s\n", listen_address);
struct mg_mgr mgr;
mg_mgr_init(&mgr);
mg_mgr_init(&mgr, NULL);
int ok = 1;
struct mg_connection *nc = mg_http_listen(&mgr, listen_address, ev_router, NULL);
struct mg_connection *nc = mg_bind(&mgr, listen_address, ev_router);
if (nc == NULL) {
LOG_FATALF("serve.c", "Couldn't bind web server on address %s", listen_address)
}
mg_set_protocol_http_websocket(nc);
while (ok) {
for (;;) {
mg_mgr_poll(&mgr, 10);
}
mg_mgr_free(&mgr);
LOG_INFO("serve.c", "Finished web event loop")
}

File diff suppressed because one or more lines are too long

View File

@@ -1,77 +0,0 @@
import unittest
import subprocess
import shutil
import json
import os
TEST_FILES = "third-party/libscan/libscan-test-files/test_files"
def copy_files(files):
base = os.path.basename(files)
new_path = os.path.join("/tmp/sist2_test/", base)
shutil.rmtree(new_path, ignore_errors=True)
shutil.copytree(files, new_path)
return new_path
def sist2(*args):
print("./sist2 " + " ".join(args))
return subprocess.check_output(
args=["./sist2", *args],
)
def sist2_index(files, *args):
path = copy_files(files)
shutil.rmtree("test_i", ignore_errors=True)
sist2("scan", path, "-o", "test_i", *args)
return iter(sist2_index_to_dict("test_i"))
def sist2_incremental_index(files, func=None, *args):
path = copy_files(files)
if func:
func(path)
shutil.rmtree("test_i_inc", ignore_errors=True)
sist2("scan", path, "-o", "test_i_inc", "--incremental", "test_i", *args)
return iter(sist2_index_to_dict("test_i_inc"))
def sist2_index_to_dict(index):
res = subprocess.check_output(
args=["./sist2", "index", "--print", index],
)
for line in res.splitlines():
if line:
yield json.loads(line)
class ScanTest(unittest.TestCase):
def test_incremental1(self):
def remove_files(path):
os.remove(os.path.join(path, "msdoc/test1.doc"))
os.remove(os.path.join(path, "msdoc/test2.doc"))
def add_files(path):
with open(os.path.join(path, "newfile1"), "w"):
pass
with open(os.path.join(path, "newfile2"), "w"):
pass
with open(os.path.join(path, "newfile3"), "w"):
pass
file_count = sum(1 for _ in sist2_index(TEST_FILES))
self.assertEqual(sum(1 for _ in sist2_incremental_index(TEST_FILES, remove_files)), file_count - 2)
self.assertEqual(sum(1 for _ in sist2_incremental_index(TEST_FILES, add_files)), file_count + 3)
if __name__ == "__main__":
unittest.main()