Compare commits

..

63 Commits

Author SHA1 Message Date
142a4869e6 Bump version 2021-06-23 08:19:05 -04:00
ddb7f8d5d7 Fix some bugs in serve.c caused by mongoose upgrade 2021-06-23 08:18:11 -04:00
dfb8c67490 thread safety for debug info table 2021-06-14 15:04:08 -04:00
3da2c8cae3 Update CI scripts, Dockerfiles, enable arm64 build again 2021-06-14 14:02:16 -04:00
2f0e999b06 Update README.md 2021-06-13 09:50:56 -04:00
bf28dc8993 Merge pull request #164 from simon987/dev
v2.10.1
2021-06-13 09:50:23 -04:00
c6fee7f6e2 update argparse 2021-06-13 09:41:18 -04:00
201c2a1a47 Update CI things 2021-06-13 09:26:27 -04:00
7c46ad632a Update readme 2021-06-11 20:44:47 -04:00
5b8c13fd13 Handle GPS metadata in the UI 2021-06-11 20:41:05 -04:00
efa4a06e56 Fix meta_key UB problem 2021-06-11 20:19:36 -04:00
81670ee107 Fix subtitle problems 2021-06-11 10:05:33 -04:00
f9dac80905 Fix file download in mongoose 7.x 2021-06-09 13:34:38 -04:00
f8d9b718c0 Fix memory leak in RAW parsing 2021-06-09 08:22:31 -04:00
6f5fdc2935 Fix for segfault in some comic files 2021-06-07 09:01:46 -04:00
a01f6dff1f Use 16-bit ints for meta keys (wip) 2021-06-07 08:40:12 -04:00
22dd58e140 add signal handler w/ debug info 2021-05-08 16:23:24 -04:00
f3e07fb7f7 Merge pull request #155 from dpieski/patch-4
Create feature_request.md
2021-05-06 20:17:29 -04:00
7990e5cd2e Update feature_request.md 2021-05-06 20:16:59 -04:00
e3ca660983 Merge pull request #153 from dpieski/patch-1
Create bug_report.md
2021-05-06 20:15:52 -04:00
b87fb25458 Update bug_report.md 2021-05-06 20:15:41 -04:00
c7a77869ad Merge pull request #154 from dpieski/patch-2
Create config.yml
2021-05-06 20:09:36 -04:00
523c123e2e Enable advanced search with query_string 2021-05-06 20:07:20 -04:00
fc7f30d670 Add tests for subtitle 2021-05-05 16:10:55 -04:00
152fe11669 Set passphrase arg in arc_ctx 2021-05-05 15:52:46 -04:00
33f97f6bfb Increase scan queue size 2021-05-05 14:25:35 -04:00
71f9dfcfe0 sync libscan 2021-05-05 14:21:01 -04:00
5f657d61b3 Merge pull request #157 from simon987/mongoose-7
Update to mongoose 7.x, change Docker build
2021-05-05 14:18:36 -04:00
908def1016 Fix build, update dockerfile 2021-05-05 14:13:46 -04:00
db3d312835 wip 2021-05-05 13:55:57 -04:00
32c9cb28a3 Read subtitles from media files, fix bug in text_buffer 2021-05-05 13:55:57 -04:00
f839127129 Change encoding for antiword PDF 2021-05-05 13:55:57 -04:00
8111a6c143 Workaround for UTF8 .doc files 2021-05-05 13:55:57 -04:00
707a570828 Pause all other audio tags on play #148 2021-04-17 13:24:21 -04:00
Andrew
5073b00225 Create feature_request.md
Created basic feature request template.
2021-04-13 11:25:48 -05:00
Andrew
4923d1b51f Update bug_report.md
Forgot an exclamation mark
2021-04-13 11:20:46 -05:00
Andrew
097e332015 Create config.yml
Does two things:
1. Creates a link to the USAGE page. 
2. Removes "Open Blank Issue" option on the New Issue page.
2021-04-13 11:10:08 -05:00
Andrew
d4babe216b Update bug_report.md
Added a bug to the title just because. Not necessary, just wanted to see how it looks.
2021-04-13 10:58:32 -05:00
Andrew
44511a2202 Create bug_report.md
Beginnings of a Bug Report template for #151
2021-04-13 10:49:43 -05:00
50771bd1dc Read subtitles from media files, fix bug in text_buffer 2021-03-26 19:48:16 -04:00
bc884e137c Change encoding for antiword PDF 2021-01-16 12:17:43 -05:00
ce1e241dea Workaround for UTF8 .doc files 2021-01-16 12:13:56 -05:00
5fe9c9efa3 Tweak CI settings 2021-01-16 11:14:18 -05:00
75e4e93ddd Enable docker image builds 2021-01-16 10:57:55 -05:00
013c54daa0 Fix tag delete 2020-12-31 12:55:37 -05:00
54308ef5e2 Update tags tab automatically 2020-12-31 12:45:23 -05:00
638c2a5c1a Update binary names (again) 2020-12-31 11:03:25 -05:00
9587caddd9 Don't build tests by default, fix enlarge button 2020-12-31 10:55:34 -05:00
f5bbe0dc97 Update binary names 2020-12-31 10:54:30 -05:00
f87eac1f90 Update submodules 2020-12-31 10:26:05 -05:00
ddafbab6a6 Update readme 2020-12-31 10:26:05 -05:00
b91d574756 Add md5 client-side lib 2020-12-31 10:26:05 -05:00
576140e542 fix submodules 2020-12-31 10:26:05 -05:00
050c1283a3 Remove UUID dep, fix incremental scan, use MD5(path) as unique id, version bump 2020-12-31 10:26:05 -05:00
c6e1ba03bc Better support for .doc files 2020-12-31 10:26:05 -05:00
10e32f707f Update README.md 2020-12-31 10:26:05 -05:00
86e83bafaf Update README.md 2020-12-31 10:26:05 -05:00
51a40c8819 Add .doc support 2020-12-31 10:26:05 -05:00
acc557
36281a5108 Use relative path for loading csv in stats 2020-12-31 10:26:05 -05:00
acc557
76a0bda48b Update search.html
Fix relative stats URL
2020-12-31 10:26:05 -05:00
0cf29a660c Fix relative image URL #122 2020-12-31 10:26:05 -05:00
6cd0741848 update build instructions 2020-12-31 10:26:05 -05:00
bc120f349d Setup ARM CI builds 2020-12-23 10:26:26 -05:00
50 changed files with 1057 additions and 502 deletions

25
.dockerignore Normal file
View File

@@ -0,0 +1,25 @@
.idea
*/thumbs
*.cbp
CMakeCache.txt
CMakeFiles
cmake-build-debug
cmake_install.cmake
Makefile
*.out
LOG
sist2*
index.sist2/
bundle*.css
bundle.js
**/*.a
**/vgcore.*
build/
.git/
third-party/libscan/libscan-test-files/
**/ext_ffmpeg
**/ext_libmobi
**/scan_a_test
Dockerfile
*.idx/
VERSION

View File

@@ -1,12 +1,31 @@
kind: pipeline
type: docker
name: default
name: amd64
platform:
os: linux
arch: amd64
steps:
- name: build
image: simon987/ubuntu_ci
image: simon987/sist2-build
commands:
- ./ci/build.sh
- name: docker
image: plugins/docker
settings:
username:
from_secret: DOCKER_USER
password:
from_secret: DOCKER_PASSWORD
repo: simon987/sist2
context: ./
dockerfile: ./Dockerfile
auto_tag: true
auto_tag_suffix: x64-linux
when:
event:
- tag
- name: scp files
image: appleboy/drone-scp
settings:
@@ -20,5 +39,34 @@ steps:
from_secret: SSH_KEY
target: /files/sist2/${DRONE_REPO_OWNER}_${DRONE_REPO_NAME}/${DRONE_BRANCH}_${DRONE_BUILD_NUMBER}_${DRONE_COMMIT}/
source:
- ./sist2.gz
- ./sist2_debug.tar.gz
- ./VERSION
- ./sist2-x64-linux
- ./sist2-x64-linux-debug
---
kind: pipeline
type: docker
name: arm64
platform:
arch: arm64
steps:
- name: build
image: simon987/sist2-build-arm64
commands:
- ./ci/build_arm64.sh
- name: scp files
image: appleboy/drone-scp
settings:
host:
from_secret: SSH_HOST
port:
from_secret: SSH_PORT
user:
from_secret: SSH_USER
key:
from_secret: SSH_KEY
target: /files/sist2/${DRONE_REPO_OWNER}_${DRONE_REPO_NAME}/arm_${DRONE_BRANCH}_${DRONE_BUILD_NUMBER}_${DRONE_COMMIT}/
source:
- ./sist2-arm64-linux

40
.github/ISSUE_TEMPLATE/bug_report.md vendored Normal file
View File

@@ -0,0 +1,40 @@
---
name: "🐞 Bug Report"
about: Submit a bug report
title: ''
labels: bug
assignees: ''
---
**Device Information (please complete the following information):**
- OS: `[e.g., Ubuntu 20.04, WSL2]`
- Deployment: `[Linux, Linux ARM64 or Docker]`
- Browser *(if relevant)*: `[e.g., chrome, safari]`
- SIST2 Version: `[e.g., v2.9.0]`
- Elasticsearch Version *(if relevant)* : ``
**Command with arguments**
<!-- `ex: "scan ~/Documents -o ./i2 --threads 3 -q 1.0` -->
**Describe the bug**
<!-- A clear and concise description of what the bug is. -->
**Steps To Reproduce**
Please be specific!
1. Go to '...'
2. Click on '....'
3. etc.
**Expected behavior**
<!-- A clear and concise description of what you expected to happen. -->
**Actual Behavior**
<!-- A clear and concise description of what actually happens. -->
**Screenshots**
<!-- If applicable, add screenshots to help explain your problem. -->
**Additional context**
<!-- Add any other context about the problem here. If applicable, please include why you think the bug is occurring and/or troubleshooting you have already performed. -->
<!-- If the issue is related to the `scan` module, please attach the files necessary to reproduce the error or email them to me[at]simon987.net. -->

5
.github/ISSUE_TEMPLATE/config.yml vendored Normal file
View File

@@ -0,0 +1,5 @@
blank_issues_enabled: false
contact_links:
- name: SIST2 Documentation
url: https://github.com/simon987/sist2/blob/master/docs/USAGE.md
about: Check out the SIST2 documentation for answers to common questions

View File

@@ -0,0 +1,18 @@
---
name: "🚀 Feature Request"
about: Suggest an idea for SIST2
title: ''
assignees: ''
---
**Which SIST2 component is your Feature Request related to?**
<!-- e.g., Scan, Index, or Web? -->
**Is your feature request related to a problem? Please describe.**
<!-- A clear and concise description of what the problem is. e.g., "I'm always frustrated when [...]" -->
**What would you like to see happen?**
<!-- A clear and concise description of what you want to happen. -->
**Additional context**
<!-- Add any other context or screenshots about the feature request here. -->

3
.gitignore vendored
View File

@@ -1,6 +1,5 @@
.idea
thumbs
test
*.cbp
CMakeCache.txt
CMakeFiles
@@ -17,3 +16,5 @@ bundle.js
vgcore.*
build/
third-party/
*.idx/
VERSION

View File

@@ -5,6 +5,7 @@ project(sist2 C)
option(SIST_DEBUG "Build a debug executable" on)
set(BUILD_TESTS on)
add_subdirectory(third-party/libscan)
set(ARGPARSE_SHARED off)
add_subdirectory(third-party/argparse)
@@ -35,15 +36,15 @@ add_executable(sist2
target_link_directories(sist2 PRIVATE BEFORE ${_VCPKG_INSTALLED_DIR}/${VCPKG_TARGET_TRIPLET}/lib/)
set(CMAKE_FIND_LIBRARY_SUFFIXES .a .lib)
find_package(PkgConfig REQUIRED)
pkg_search_module(GLIB REQUIRED glib-2.0)
find_package(lmdb CONFIG REQUIRED)
find_package(cJSON CONFIG REQUIRED)
find_package(unofficial-glib CONFIG REQUIRED)
find_package(unofficial-mongoose CONFIG REQUIRED)
find_library(UUID_LIB NAMES uuid)
find_package(CURL CONFIG REQUIRED)
#find_package(OpenSSL REQUIRED)
target_include_directories(
sist2 PUBLIC
@@ -51,6 +52,7 @@ target_include_directories(
${CMAKE_SOURCE_DIR}/third-party/utf8.h/
${CMAKE_SOURCE_DIR}/third-party/libscan/
${CMAKE_SOURCE_DIR}/
${GLIB_INCLUDE_DIRS}
)
target_compile_options(
@@ -67,7 +69,8 @@ if (SIST_DEBUG)
-fstack-protector
-fno-omit-frame-pointer
-fsanitize=address
-O2
-fno-inline
# -O2
)
target_link_options(
sist2
@@ -80,7 +83,6 @@ if (SIST_DEBUG)
OUTPUT_NAME sist2_debug
)
else ()
# set(VCPKG_BUILD_TYPE release)
target_compile_options(
sist2
PRIVATE
@@ -103,14 +105,15 @@ target_link_libraries(
lmdb
cjson
argparse
unofficial::glib::glib
${GLIB_LDFLAGS}
unofficial::mongoose::mongoose
CURL::libcurl
${UUID_LIB}
pthread
magic
c
scan
)

View File

@@ -1,14 +0,0 @@
rm ./sist2 sist2_debug
cp ../sist2.gz .
gzip -d sist2.gz
strip sist2
version=$(./sist2 --version)
echo "Version ${version}"
docker build . -t simon987/sist2:${version} -t simon987/sist2:latest
docker push simon987/sist2:${version}
docker push simon987/sist2:latest
docker run --rm simon987/sist2 -v

View File

@@ -1,13 +0,0 @@
rm ./sist2_arm64
cp ../sist2_arm64.gz .
gzip -d sist2_arm64.gz
version=$(./sist2_arm64 --version)
echo "Version ${version}"
docker build . -t simon987/sist2-arm64:"${version}" -t simon987/sist2-arm64:latest
docker push simon987/sist2-arm64:"${version}"
docker push simon987/sist2-arm64:latest
docker run --rm simon987/sist2-arm64 -v

View File

@@ -1,9 +1,15 @@
FROM ubuntu:19.10
FROM simon987/sist2-build as build
MAINTAINER simon987 <me@simon987.net>
RUN apt update
RUN apt install -y libglib2.0-0 libcurl4 libmagic1 libharfbuzz-bin libopenjp2-7 libarchive13 liblzma5 libzstd1 liblz4-1 \
curl libtiff5 libpng16-16 libpcre3
WORKDIR /build/
ADD . /build/
RUN cmake -DSIST_DEBUG=off -DBUILD_TESTS=off -DCMAKE_TOOLCHAIN_FILE=/vcpkg/scripts/buildsystems/vcpkg.cmake .
RUN make -j$(nproc)
RUN strip sist2
FROM ubuntu:20.10
RUN apt update && apt install -y curl
RUN mkdir -p /usr/share/tessdata && \
cd /usr/share/tessdata/ && \
@@ -12,9 +18,9 @@ RUN mkdir -p /usr/share/tessdata && \
curl -o /usr/share/tessdata/eng.traineddata https://raw.githubusercontent.com/tesseract-ocr/tessdata/master/eng.traineddata &&\
curl -o /usr/share/tessdata/fra.traineddata https://raw.githubusercontent.com/tesseract-ocr/tessdata/master/fra.traineddata &&\
curl -o /usr/share/tessdata/rus.traineddata https://raw.githubusercontent.com/tesseract-ocr/tessdata/master/rus.traineddata &&\
curl -o /usr/share/tessdata/spa.traineddata https://raw.githubusercontent.com/tesseract-ocr/tessdata/master/spa.traineddata && ls -lh
curl -o /usr/share/tessdata/spa.traineddata https://raw.githubusercontent.com/tesseract-ocr/tessdata/master/spa.traineddata
ADD sist2 /root/sist2
COPY --from=build /build/sist2 /root/sist2
ENV LANG C.UTF-8
ENV LC_ALL C.UTF-8

View File

@@ -1,9 +1,15 @@
FROM ubuntu:19.10
FROM simon987/sist2-build-arm64 as build
MAINTAINER simon987 <me@simon987.net>
RUN apt update
RUN apt install -y libglib2.0-0 libcurl4 libmagic1 libharfbuzz-bin libopenjp2-7 libarchive13 liblzma5 libzstd1 liblz4-1 \
curl libtiff5 libpng16-16 libpcre3
WORKDIR /build/
ADD . /build/
RUN cmake -DSIST_DEBUG=off -DBUILD_TESTS=off -DCMAKE_TOOLCHAIN_FILE=/vcpkg/scripts/buildsystems/vcpkg.cmake .
RUN make -j$(nproc)
RUN strip sist2
FROM ubuntu:20.10
RUN apt update && apt install -y curl
RUN mkdir -p /usr/share/tessdata && \
cd /usr/share/tessdata/ && \
@@ -12,9 +18,9 @@ RUN mkdir -p /usr/share/tessdata && \
curl -o /usr/share/tessdata/eng.traineddata https://raw.githubusercontent.com/tesseract-ocr/tessdata/master/eng.traineddata &&\
curl -o /usr/share/tessdata/fra.traineddata https://raw.githubusercontent.com/tesseract-ocr/tessdata/master/fra.traineddata &&\
curl -o /usr/share/tessdata/rus.traineddata https://raw.githubusercontent.com/tesseract-ocr/tessdata/master/rus.traineddata &&\
curl -o /usr/share/tessdata/spa.traineddata https://raw.githubusercontent.com/tesseract-ocr/tessdata/master/spa.traineddata && ls -lh
curl -o /usr/share/tessdata/spa.traineddata https://raw.githubusercontent.com/tesseract-ocr/tessdata/master/spa.traineddata
ADD sist2_arm64 /root/sist2
COPY --from=build /build/sist2 /root/sist2
ENV LANG C.UTF-8
ENV LC_ALL C.UTF-8

View File

@@ -1,8 +1,8 @@
![GitHub](https://img.shields.io/github/license/simon987/sist2.svg)
[![CodeFactor](https://www.codefactor.io/repository/github/simon987/sist2/badge?s=05daa325188aac4eae32c786f3d9cf4e0593f822)](https://www.codefactor.io/repository/github/simon987/sist2)
[![Development snapshots](https://ci.simon987.net/app/rest/builds/buildType(Sist2_Build)/statusIcon)](https://files.simon987.net/artifacts/Sist2/Build/)
[![Development snapshots](https://ci.simon987.net/api/badges/simon987/sist2/status.svg)](https://files.simon987.net/.gate/sist2/simon987_sist2/)
**Demo**: [sist2.simon987.net](https://sist2.simon987.net/)
**Demo**: [sist2.simon987.net](https://sist2.simon987.net/?i=Demo%20files)
# sist2
@@ -25,14 +25,12 @@ sist2 (Simple incremental search tool)
* OCR support with tesseract \*\*\*
* Stats page & disk utilisation visualization
\* See [format support](#format-support)
\*\* See [Archive files](#archive-files)
\*\*\* See [OCR](#ocr)
![stats](docs/stats.png)
## Getting Started
1. Have an Elasticsearch (>= 6.X.X) instance running
@@ -52,15 +50,13 @@ sist2 (Simple incremental search tool)
```
1. Download sist2 executable
1. Download the [latest sist2 release](https://github.com/simon987/sist2/releases) *
1. *(or)* Download a [development snapshot](https://files.simon987.net/artifacts/Sist2/Build/) *(Not recommended!)*
1. *(or)* `docker pull simon987/sist2:latest`
1. *(or)* Download a [development snapshot](https://files.simon987.net/.gate/sist2/simon987_sist2/) *(Not recommended!)*
1. *(or)* `docker pull simon987/sist2:2.10.1-x64-linux`
1. See [Usage guide](docs/USAGE.md)
\* *Windows users*: **sist2** runs under [WSL](https://en.wikipedia.org/wiki/Windows_Subsystem_for_Linux)
## Example usage
See [Usage guide](docs/USAGE.md) for more details
@@ -69,69 +65,88 @@ See [Usage guide](docs/USAGE.md) for more details
1. Push index to Elasticsearch: `sist2 index ./docs_idx`
1. Start web interface: `sist2 web ./docs_idx`
## Format support
File type | Library | Content | Thumbnail | Metadata
File type | Library | Content | Thumbnail | Metadata
:---|:---|:---|:---|:---
pdf,xps,fb2,epub | MuPDF | text+ocr | yes | title |
pdf,xps,fb2,epub | MuPDF | text+ocr | yes | author, title |
cbz,cbr | *(none)* | - | yes | - |
`audio/*` | ffmpeg | - | yes | ID3 tags |
`video/*` | ffmpeg | - | yes | title, comment, artist |
`image/*` | ffmpeg | - | yes | [Common EXIF tags](https://github.com/simon987/sist2/blob/efdde2734eca9b14a54f84568863b7ffd59bdba3/src/parsing/media.c#L190) |
raw, rw2, dng, cr2, crw, dcr, k25, kdc, mrw, pef, xf3, arw, sr2, srf, erf | LibRaw | - | yes | Common EXIF tags |
`image/*` | ffmpeg | - | yes | [Common EXIF tags](https://github.com/simon987/sist2/blob/efdde2734eca9b14a54f84568863b7ffd59bdba3/src/parsing/media.c#L190), GPS tags |
raw, rw2, dng, cr2, crw, dcr, k25, kdc, mrw, pef, xf3, arw, sr2, srf, erf | LibRaw | - | yes | Common EXIF tags, GPS tags |
ttf,ttc,cff,woff,fnt,otf | Freetype2 | - | yes, `bmp` | Name & style |
`text/plain` | *(none)* | yes | no | - |
html, xml | *(none)* | yes | no | - |
tar, zip, rar, 7z, ar ... | Libarchive | yes\* | - | no |
docx, xlsx, pptx | *(none)* | yes | if embedded | creator, modified_by, title |
doc (MS Word 97-2003) | antiword | yes | yes | author, title |
mobi, azw, azw3 | libmobi | yes | no | author, title |
\* *See [Archive files](#archive-files)*
### Archive files
**sist2** will scan files stored into archive files (zip, tar, 7z...) as if
they were directly in the file system. Recursive (archives inside archives)
**sist2** will scan files stored into archive files (zip, tar, 7z...) as if they were directly in the file system.
Recursive (archives inside archives)
scan is also supported.
**Limitations**:
* Support for parsing media files with formats that require *seek* (e.g. `.gif`, `.mp4` w/ fragmented metadata etc.)
is limitted (see `--mem-buffer` option)
* Archive files are scanned sequentially, by a single thread. On systems where
**sist2** is not I/O bound, scans might be faster when larger archives are split
into smaller parts.
**sist2** is not I/O bound, scans might be faster when larger archives are split into smaller parts.
### OCR
You can enable OCR support for pdf,xps,fb2,epub file types with the
`--ocr <lang>` option. Download the language data files with your
package manager (`apt install tesseract-ocr-eng`) or directly [from Github](https://github.com/tesseract-ocr/tesseract/wiki/Data-Files).
`--ocr <lang>` option. Download the language data files with your package manager (`apt install tesseract-ocr-eng`) or
directly [from Github](https://github.com/tesseract-ocr/tesseract/wiki/Data-Files).
The `simon987/sist2` image comes with common languages
(hin, jpn, eng, fra, rus, spa) pre-installed.
Examples
```bash
sist2 scan --ocr jpn ~/Books/Manga/
sist2 scan --ocr eng ~/Books/Textbooks/
```
## Build from source
You can compile **sist2** by yourself if you don't want to use the pre-compiled
binaries (GCC 7+ required).
You can compile **sist2** by yourself if you don't want to use the pre-compiled binaries
### With docker (recommended)
```bash
git clone --recursive https://github.com/simon987/sist2/
cd sist2
docker build . -f ./Dockerfile -t my-sist2-image
docker run --rm my-sist2-image cat /root/sist2 > sist2-x64-linux
```
### On a linux computer
1. Install compile-time dependencies
```bash
vcpkg install lmdb cjson glib libarchive[core,bzip2,libxml2,lz4,lzma,lzo] pthread tesseract libxml2 ffmpeg zstd gtest mongoose libuuid libmagic libraw curl[core,ssl] jbig2dec brotli libmupdf
apt install gcc g++ python3 yasm ragel automake autotools-dev wget libtool libssl-dev curl zip unzip tar xorg-dev libglu1-mesa-dev libxcursor-dev libxml2-dev libxinerama-dev gettext nasm git
```
2. Build
1. Apply vcpkg patches, as per [sist2-build](https://github.com/simon987/sist2-build) Dockerfile
1. Install vcpkg dependencies
```bash
vcpkg install curl[core,openssl]
vcpkg install lmdb cjson glib brotli libarchive[core,bzip2,libxml2,lz4,lzma,lzo] pthread tesseract libxml2 libmupdf gtest mongoose libuuid libmagic libraw jasper lcms gumbo
```
1. Build
```bash
git clone --recursive https://github.com/simon987/sist2/
cmake -DCMAKE_TOOLCHAIN_FILE=<VCPKG_ROOT>/scripts/buildsystems/vcpkg.cmake .
cmake -DSIST_DEBUG=off -DCMAKE_TOOLCHAIN_FILE=<VCPKG_ROOT>/scripts/buildsystems/vcpkg.cmake .
make
```

View File

@@ -2,18 +2,18 @@
VCPKG_ROOT="/vcpkg"
rm *.gz
rm *.gz &>/dev/null
git submodule update --init --recursive
rm -rf CMakeFiles CMakeCache.txt
cmake -DSIST_DEBUG=off -DCMAKE_TOOLCHAIN_FILE="${VCPKG_ROOT}/scripts/buildsystems/vcpkg.cmake" .
make -j 33
make -j $(nproc)
strip sist2
gzip -9 sist2
./sist2 -v > VERSION
mv sist2 sist2-x64-linux
rm -rf CMakeFiles CMakeCache.txt
cmake -DSIST_DEBUG=on -DCMAKE_TOOLCHAIN_FILE="${VCPKG_ROOT}/scripts/buildsystems/vcpkg.cmake" .
make -j 33
cp /usr/lib/x86_64-linux-gnu/libasan.so.2.0.0 libasan.so.2
tar -czf sist2_debug.tar.gz sist2_debug libasan.so.2
make -j $(nproc)
mv sist2_debug sist2-x64-linux-debug

View File

@@ -2,11 +2,12 @@
VCPKG_ROOT="/vcpkg"
rm *.gz
rm *.gz &>/dev/null
git submodule update --init --recursive
rm -rf CMakeFiles CMakeCache.txt
cmake -DSIST_DEBUG=off -DCMAKE_TOOLCHAIN_FILE="${VCPKG_ROOT}/scripts/buildsystems/vcpkg.cmake" .
make -j 4
make -j $(nproc)
strip sist2
mv sist2 sist2_arm64
gzip -9 sist2_arm64
mv sist2 sist2-arm64-linux

View File

@@ -46,6 +46,7 @@ Scan options
--fast Only index file names & mime type
--treemap-threshold=<str> Relative size threshold for treemap (see USAGE.md). DEFAULT: 0.0005
--mem-buffer=<int> Maximum memory buffer size per thread in MB for files inside archives (see USAGE.md). DEFAULT: 2000
--read-subtitles Read subtitles from media files
Index options
-t, --threads=<int> Number of threads. DEFAULT=1
@@ -91,7 +92,7 @@ Made by simon987 <me@simon987.net>. Released under GPL-3.0
Specify an existing index. Information about files in this index that were not modified (based on *mtime* attribute)
will be copied to the new index and will not be parsed again.
* `-o, --output` Output directory.
* `--rewrite-url` Set the `rewrite_url` option for the web module (See [rewrite_url](#rewrite_url))
* `--rewrite-url` Set the `rewrite_url` option for the web module (See [rewrite_url](#rewrite_url))
* `--name` Set the `name` option for the web module
* `--depth` Maximum scan dept. Set to 0 only scan files directly in the root directory, set to -1 for infinite depth
* `--archive` Archive file mode.
@@ -123,6 +124,7 @@ Made by simon987 <me@simon987.net>. Released under GPL-3.0
larger than this number will be read sequentially and no *seek* operations will be supported.
To check if a media file can be parsed without *seek*, execute `cat file.mp4 | ffprobe -`
* `--read-subtitles` When enabled, will attempt to read the subtitles stream from media files.
### Scan examples
@@ -241,9 +243,11 @@ The `_text.*` items will be indexed and searchable as **text** fields (fuzzy sea
*thumbs/*:
LMDB key-value store. Keys are **binary** 128-bit UUID4s (`_id` field)
LMDB key-value store. Keys are **binary** 16-byte md5 hash* (`_id` field)
and values are raw image bytes.
*\* Hash is calculated from the full path of the file, including the extension, relative to the index root*
Importing an external `binary` type index is technically possible but
it is currently unsupported and has no guaranties of back/forward compatibility.
@@ -353,8 +357,7 @@ You can safely copy the `/tags/` database to another index.
See [Automatic tagging](#automatic-tagging) for information about tag
hierarchies and tag colors.
\* *It can take a few seconds to take effect in new search queries, and the page needs
to be reloaded for the tags tab to update*
\* *It can take a few seconds to take effect in new search queries.*
### Automatic tagging

View File

@@ -30,6 +30,10 @@
"mime": {
"type": "keyword"
},
"parent": {
"type": "keyword",
"index": false
},
"thumbnail": {
"type": "keyword",
"index": false
@@ -161,6 +165,30 @@
"exif_user_comment": {
"type": "text"
},
"exif_gps_longitude_ref": {
"type": "keyword",
"index": false
},
"exif_gps_longitude_dms": {
"type": "keyword",
"index": false
},
"exif_gps_longitude_dec": {
"type": "keyword",
"index": false
},
"exif_gps_latitude_ref": {
"type": "keyword",
"index": false
},
"exif_gps_latitude_dms": {
"type": "keyword",
"index": false
},
"exif_gps_latitude_dec": {
"type": "keyword",
"index": false
},
"author": {
"type": "text"
},

6
scripts/reset.sh Executable file
View File

@@ -0,0 +1,6 @@
#!/usr/bin/env bash
make clean
rm -rf CMakeFiles/ CMakeCache.txt Makefile \
third-party/libscan/CMakeFiles third-party/libscan/CMakeCache.txt third-party/libscan/third-party/ext_ffmpeg \
third-party/libscan/third-party/ext_libmobi third-party/libscan/Makefile

View File

@@ -227,6 +227,7 @@ int scan_args_validate(scan_args_t *args, int argc, const char **argv) {
LOG_DEBUGF("cli.c", "arg depth=%d", args->depth)
LOG_DEBUGF("cli.c", "arg path=%s", args->path)
LOG_DEBUGF("cli.c", "arg archive=%s", args->archive)
LOG_DEBUGF("cli.c", "arg archive_passphrase=%s", args->archive_passphrase)
LOG_DEBUGF("cli.c", "arg tesseract_lang=%s", args->tesseract_lang)
LOG_DEBUGF("cli.c", "arg tesseract_path=%s", args->tesseract_path)
LOG_DEBUGF("cli.c", "arg exclude=%s", args->exclude_regex)

View File

@@ -18,6 +18,7 @@ typedef struct scan_args {
char *path;
char *archive;
archive_mode_t archive_mode;
char *archive_passphrase;
char *tesseract_lang;
const char *tesseract_path;
char *exclude_regex;
@@ -25,6 +26,7 @@ typedef struct scan_args {
const char* treemap_threshold_str;
double treemap_threshold;
int max_memory_buffer;
int read_subtitles;
} scan_args_t;
scan_args_t *scan_args_create();

View File

@@ -13,6 +13,7 @@
#include "libscan/text/text.h"
#include "libscan/mobi/scan_mobi.h"
#include "libscan/raw/raw.h"
#include "libscan/msdoc/msdoc.h"
#include "src/io/store.h"
#include <glib.h>
@@ -39,6 +40,9 @@ typedef struct {
pcre_extra *exclude_extra;
int fast;
GHashTable *dbg_current_files;
pthread_mutex_t dbg_current_files_mu;
scan_arc_ctx_t arc_ctx;
scan_comic_ctx_t comic_ctx;
scan_ebook_ctx_t ebook_ctx;
@@ -48,6 +52,7 @@ typedef struct {
scan_text_ctx_t text_ctx;
scan_mobi_ctx_t mobi_ctx;
scan_raw_ctx_t raw_ctx;
scan_msdoc_ctx_t msdoc_ctx;
} ScanCtx_t;
typedef struct {

View File

@@ -30,11 +30,11 @@ void elastic_cleanup() {
}
}
void print_json(cJSON *document, const char uuid_str[UUID_STR_LEN]) {
void print_json(cJSON *document, const char id_str[MD5_STR_LENGTH]) {
cJSON *line = cJSON_CreateObject();
cJSON_AddStringToObject(line, "_id", uuid_str);
cJSON_AddStringToObject(line, "_id", id_str);
cJSON_AddStringToObject(line, "_index", IndexCtx.es_index);
cJSON_AddStringToObject(line, "_type", "_doc");
cJSON_AddItemReferenceToObject(line, "_source", document);
@@ -52,13 +52,13 @@ void index_json_func(void *arg) {
elastic_index_line(line);
}
void index_json(cJSON *document, const char uuid_str[UUID_STR_LEN]) {
void index_json(cJSON *document, const char index_id_str[MD5_STR_LENGTH]) {
char *json = cJSON_PrintUnformatted(document);
size_t json_len = strlen(json);
es_bulk_line_t *bulk_line = malloc(sizeof(es_bulk_line_t) + json_len + 2);
memcpy(bulk_line->line, json, json_len);
memcpy(bulk_line->uuid_str, uuid_str, UUID_STR_LEN);
memcpy(bulk_line->path_md5_str, index_id_str, MD5_STR_LENGTH);
*(bulk_line->line + json_len) = '\n';
*(bulk_line->line + json_len + 1) = '\0';
bulk_line->next = NULL;
@@ -67,7 +67,7 @@ void index_json(cJSON *document, const char uuid_str[UUID_STR_LEN]) {
tpool_add_work(IndexCtx.pool, index_json_func, bulk_line);
}
void execute_update_script(const char *script, int async, const char index_id[UUID_STR_LEN]) {
void execute_update_script(const char *script, int async, const char index_id[MD5_STR_LENGTH]) {
if (Indexer == NULL) {
Indexer = create_indexer(IndexCtx.es_url, IndexCtx.es_index);
@@ -129,9 +129,9 @@ void *create_bulk_buffer(int max, int *count, size_t *buf_len) {
while (line != NULL && *count < max) {
char action_str[256];
snprintf(
action_str, 256,
action_str, sizeof(action_str),
"{\"index\":{\"_id\":\"%s\",\"_type\":\"_doc\",\"_index\":\"%s\"}}\n",
line->uuid_str, Indexer->es_index
line->path_md5_str, Indexer->es_index
);
size_t action_str_len = strlen(action_str);
@@ -220,7 +220,7 @@ void _elastic_flush(int max) {
if (r->status_code == 413) {
if (max <= 1) {
LOG_ERRORF("elastic.c", "Single document too large, giving up: {%s}", Indexer->line_head->uuid_str)
LOG_ERRORF("elastic.c", "Single document too large, giving up: {%s}", Indexer->line_head->path_md5_str)
free_response(r);
free(buf);
delete_queue(1);
@@ -408,9 +408,9 @@ void elastic_init(int force_reset, const char* user_mappings, const char* user_s
}
}
cJSON *elastic_get_document(const char *uuid_str) {
cJSON *elastic_get_document(const char *id_str) {
char url[4096];
snprintf(url, sizeof(url), "%s/%s/_doc/%s", WebCtx.es_url, WebCtx.es_index, uuid_str);
snprintf(url, sizeof(url), "%s/%s/_doc/%s", WebCtx.es_url, WebCtx.es_index, id_str);
response_t *r = web_get(url, 3);
cJSON *json = NULL;

View File

@@ -5,7 +5,7 @@
typedef struct es_bulk_line {
struct es_bulk_line *next;
char uuid_str[UUID_STR_LEN];
char path_md5_str[MD5_STR_LENGTH];
char line[0];
} es_bulk_line_t;
@@ -16,9 +16,9 @@ typedef struct es_indexer es_indexer_t;
void elastic_index_line(es_bulk_line_t *line);
void print_json(cJSON *document, const char uuid_str[UUID_STR_LEN]);
void print_json(cJSON *document, const char index_id_str[MD5_STR_LENGTH]);
void index_json(cJSON *document, const char uuid_str[UUID_STR_LEN]);
void index_json(cJSON *document, const char index_id_str[MD5_STR_LENGTH]);
es_indexer_t *create_indexer(const char *url, const char *index);
@@ -27,10 +27,10 @@ void finish_indexer(char *script, int async_script, char *index_id);
void elastic_init(int force_reset, const char* user_mappings, const char* user_settings);
cJSON *elastic_get_document(const char *uuid_str);
cJSON *elastic_get_document(const char *id_str);
char *elastic_get_status();
void execute_update_script(const char *script, int async, const char index_id[UUID_STR_LEN]);
void execute_update_script(const char *script, int async, const char index_id[MD5_STR_LENGTH]);
#endif

File diff suppressed because one or more lines are too long

View File

@@ -6,18 +6,22 @@
static __thread int index_fd = -1;
typedef struct {
unsigned char uuid[16];
unsigned long ino;
unsigned char path_md5[MD5_DIGEST_LENGTH];
unsigned long size;
unsigned int mime;
int mtime;
short base;
short ext;
char has_parent;
} line_t;
#define META_NEXT 0xFFFF
void skip_meta(FILE *file) {
enum metakey key = getc(file);
while (key != '\n') {
enum metakey key = 0;
fread(&key, sizeof(uint16_t), 1, file);
while (key != META_NEXT) {
if (IS_META_INT(key)) {
fseek(file, sizeof(int), SEEK_CUR);
} else if (IS_META_LONG(key)) {
@@ -26,13 +30,13 @@ void skip_meta(FILE *file) {
while ((getc(file))) {}
}
key = getc(file);
fread(&key, sizeof(uint16_t), 1, file);
}
}
void write_index_descriptor(char *path, index_descriptor_t *desc) {
cJSON *json = cJSON_CreateObject();
cJSON_AddStringToObject(json, "uuid", desc->uuid);
cJSON_AddStringToObject(json, "id", desc->id);
cJSON_AddStringToObject(json, "version", desc->version);
cJSON_AddStringToObject(json, "root", desc->root);
cJSON_AddStringToObject(json, "name", desc->name);
@@ -66,7 +70,7 @@ index_descriptor_t read_index_descriptor(char *path) {
}
char *buf = malloc(info.st_size + 1);
int ret = read(fd, buf, info.st_size);
size_t ret = read(fd, buf, info.st_size);
if (ret == -1) {
LOG_FATALF("serialize.c", "Could not read index descriptor: %s", strerror(errno));
}
@@ -82,7 +86,7 @@ index_descriptor_t read_index_descriptor(char *path) {
strcpy(descriptor.rewrite_url, cJSON_GetObjectItem(json, "rewrite_url")->valuestring);
descriptor.root_len = (short) strlen(descriptor.root);
strcpy(descriptor.version, cJSON_GetObjectItem(json, "version")->valuestring);
strcpy(descriptor.uuid, cJSON_GetObjectItem(json, "uuid")->valuestring);
strcpy(descriptor.id, cJSON_GetObjectItem(json, "id")->valuestring);
if (cJSON_GetObjectItem(json, "type") == NULL) {
strcpy(descriptor.type, INDEX_TYPE_BIN);
} else {
@@ -152,8 +156,20 @@ char *get_meta_key_text(enum metakey meta_key) {
return "thumbnail";
case MetaPages:
return "pages";
case MetaExifGpsLongitudeRef:
return "exif_gps_longitude_ref";
case MetaExifGpsLongitudeDMS:
return "exif_gps_longitude_dms";
case MetaExifGpsLongitudeDec:
return "exif_gps_longitude_dec";
case MetaExifGpsLatitudeRef:
return "exif_gps_latitude_ref";
case MetaExifGpsLatitudeDMS:
return "exif_gps_latitude_dms";
case MetaExifGpsLatitudeDec:
return "exif_gps_latitude_dec";
default:
return NULL;
LOG_FATALF("serialize.c", "FIXME: Unknown meta key: %d", meta_key)
}
}
@@ -183,7 +199,7 @@ void write_document(document_t *doc) {
meta_line_t *meta = doc->meta_head;
while (meta != NULL) {
dyn_buffer_write_char(&buf, meta->key);
dyn_buffer_write_short(&buf, (uint16_t) meta->key);
if (IS_META_INT(meta->key)) {
dyn_buffer_write_int(&buf, meta->int_val);
@@ -197,7 +213,7 @@ void write_document(document_t *doc) {
meta = meta->next;
free(tmp);
}
dyn_buffer_write_char(&buf, '\n');
dyn_buffer_write_short(&buf, META_NEXT);
int res = write(index_fd, buf.buf, buf.cur);
if (res == -1) {
@@ -219,9 +235,9 @@ void read_index_bin(const char *path, const char *index_id, index_func func) {
dyn_buffer_t buf = dyn_buffer_create();
FILE *file = fopen(path, "rb");
while (1) {
while (TRUE) {
buf.cur = 0;
size_t _ = fread((void *) &line, 1, sizeof(line_t), file);
size_t _ = fread((void *) &line, sizeof(line_t), 1, file);
if (feof(file)) {
break;
}
@@ -229,8 +245,8 @@ void read_index_bin(const char *path, const char *index_id, index_func func) {
cJSON *document = cJSON_CreateObject();
cJSON_AddStringToObject(document, "index", index_id);
char uuid_str[UUID_STR_LEN];
uuid_unparse(line.uuid, uuid_str);
char path_md5_str[MD5_STR_LENGTH];
buf2hex(line.path_md5, sizeof(line.path_md5), path_md5_str);
const char *mime_text = mime_get_mime_text(line.mime);
if (mime_text == NULL) {
@@ -247,9 +263,6 @@ void read_index_bin(const char *path, const char *index_id, index_func func) {
}
dyn_buffer_write_char(&buf, '\0');
char full_filename[PATH_MAX];
strcpy(full_filename, buf.buf);
cJSON_AddStringToObject(document, "extension", buf.buf + line.ext);
if (*(buf.buf + line.ext - 1) == '.') {
*(buf.buf + line.ext - 1) = '\0';
@@ -271,9 +284,10 @@ void read_index_bin(const char *path, const char *index_id, index_func func) {
cJSON_AddStringToObject(document, "path", "");
}
enum metakey key = getc(file);
size_t ret = 0;
while (key != '\n') {
enum metakey key = 0;
fread(&key, sizeof(uint16_t), 1, file);
size_t ret;
while (key != META_NEXT) {
switch (key) {
case MetaPages:
case MetaWidth:
@@ -311,6 +325,12 @@ void read_index_bin(const char *path, const char *index_id, index_func func) {
case MetaAuthor:
case MetaModifiedBy:
case MetaThumbnail:
case MetaExifGpsLongitudeDMS:
case MetaExifGpsLongitudeDec:
case MetaExifGpsLongitudeRef:
case MetaExifGpsLatitudeDMS:
case MetaExifGpsLatitudeDec:
case MetaExifGpsLatitudeRef:
case MetaTitle: {
buf.cur = 0;
while ((c = getc(file)) != 0) {
@@ -326,12 +346,12 @@ void read_index_bin(const char *path, const char *index_id, index_func func) {
LOG_FATALF("serialize.c", "Invalid meta key (corrupt index): %x", key)
}
key = getc(file);
fread(&key, sizeof(uint16_t), 1, file);
}
cJSON *meta_obj = NULL;
if (IndexCtx.meta != NULL) {
const char *meta_string = g_hash_table_lookup(IndexCtx.meta, full_filename);
const char *meta_string = g_hash_table_lookup(IndexCtx.meta, path_md5_str);
if (meta_string != NULL) {
meta_obj = cJSON_Parse(meta_string);
@@ -346,7 +366,7 @@ void read_index_bin(const char *path, const char *index_id, index_func func) {
}
if (IndexCtx.tags != NULL) {
const char *tags_string = g_hash_table_lookup(IndexCtx.tags, full_filename);
const char *tags_string = g_hash_table_lookup(IndexCtx.tags, path_md5_str);
if (tags_string != NULL) {
cJSON *tags_arr = cJSON_Parse(tags_string);
cJSON_DeleteItemFromObject(document, "tag");
@@ -354,7 +374,7 @@ void read_index_bin(const char *path, const char *index_id, index_func func) {
}
}
func(document, uuid_str);
func(document, path_md5_str);
cJSON_Delete(document);
if (meta_obj) {
cJSON_Delete(meta_obj);
@@ -382,7 +402,7 @@ const char *json_type_array_fields[] = {
void read_index_json(const char *path, UNUSED(const char *index_id), index_func func) {
FILE *file = fopen(path, "r");
while (1) {
while (TRUE) {
char *line = NULL;
size_t len;
size_t read = getline(&line, &len, file);
@@ -402,7 +422,7 @@ void read_index_json(const char *path, UNUSED(const char *index_id), index_func
}
cJSON *document = cJSON_CreateObject();
const char *uuid_str = cJSON_GetObjectItem(input, "_id")->valuestring;
const char *id_str = cJSON_GetObjectItem(input, "_id")->valuestring;
for (int i = 0; i < (sizeof(json_type_copy_fields) / sizeof(json_type_copy_fields[0])); i++) {
cJSON *value = cJSON_GetObjectItem(input, json_type_copy_fields[i]);
@@ -430,7 +450,7 @@ void read_index_json(const char *path, UNUSED(const char *index_id), index_func
}
}
func(document, uuid_str);
func(document, id_str);
cJSON_Delete(document);
cJSON_Delete(input);
@@ -438,7 +458,7 @@ void read_index_json(const char *path, UNUSED(const char *index_id), index_func
fclose(file);
}
void read_index(const char *path, const char index_id[UUID_STR_LEN], const char *type, index_func func) {
void read_index(const char *path, const char index_id[MD5_STR_LENGTH], const char *type, index_func func) {
if (strcmp(type, INDEX_TYPE_BIN) == 0) {
read_index_bin(path, index_id, func);
@@ -451,15 +471,17 @@ void incremental_read(GHashTable *table, const char *filepath) {
FILE *file = fopen(filepath, "rb");
line_t line;
LOG_DEBUGF("serialize.c", "Incremental read %s", filepath)
while (1) {
size_t ret = fread((void *) &line, 1, sizeof(line_t), file);
size_t ret = fread((void *) &line, sizeof(line_t), 1, file);
if (ret != 1 || feof(file)) {
break;
}
incremental_put(table, line.ino, line.mtime);
incremental_put(table, line.path_md5, line.mtime);
while ((getc(file))) {}
while ((getc(file)) != 0) {}
skip_meta(file);
}
fclose(file);
@@ -475,33 +497,47 @@ void incremental_copy(store_t *store, store_t *dst_store, const char *filepath,
FILE *dst_file = fopen(dst_filepath, "ab");
line_t line;
while (1) {
size_t ret = fread((void *) &line, 1, sizeof(line_t), file);
LOG_DEBUGF("serialize.c", "Incremental copy %s", filepath)
while (TRUE) {
size_t ret = fread((void *) &line, sizeof(line_t), 1, file);
if (ret != 1 || feof(file)) {
break;
}
if (incremental_get(copy_table, line.ino)) {
// Assume that files with parents still exist.
// One way to "fix" this would be to check if the parent is marked for copy but it would consistently
// delete files with grandparents, which is a side-effect worse than having orphaned files
if (line.has_parent || incremental_get(copy_table, line.path_md5)) {
fwrite(&line, sizeof(line), 1, dst_file);
size_t buf_len;
char *buf = store_read(store, (char *) line.uuid, 16, &buf_len);
store_write(dst_store, (char *) line.uuid, 16, buf, buf_len);
free(buf);
// Copy filepath
char filepath_buf[PATH_MAX];
char c;
char *ptr = filepath_buf;
while ((c = (char) getc(file))) {
fwrite(&c, sizeof(c), 1, dst_file);
*ptr++ = c;
}
fwrite("\0", sizeof(c), 1, dst_file);
*ptr = '\0';
fwrite(filepath_buf, (ptr - filepath_buf) + 1, 1, dst_file);
enum metakey key;
// Copy tn store contents
size_t buf_len;
char path_md5[MD5_DIGEST_LENGTH];
MD5((unsigned char *) filepath_buf, (ptr - filepath_buf), (unsigned char *) path_md5);
char *buf = store_read(store, path_md5, sizeof(path_md5), &buf_len);
if (buf_len != 0) {
store_write(dst_store, path_md5, sizeof(path_md5), buf, buf_len);
free(buf);
}
enum metakey key = 0;
while (1) {
key = getc(file);
if (key == '\n') {
fread(&key, sizeof(uint16_t), 1, file);
fwrite(&key, sizeof(uint16_t), 1, dst_file);
if (key == META_NEXT) {
break;
}
fwrite(&key, sizeof(char), 1, dst_file);
if (IS_META_INT(key)) {
int val;
@@ -517,14 +553,12 @@ void incremental_copy(store_t *store, store_t *dst_store, const char *filepath,
}
fwrite("\0", sizeof(c), 1, dst_file);
}
if (ret != 1) {
break;
}
}
} else {
while ((getc(file))) {}
skip_meta(file);
}
}
fclose(file);
fclose(dst_file);
}

View File

@@ -7,14 +7,14 @@
#include <sys/syscall.h>
#include <glib.h>
typedef void(*index_func)(cJSON *, const char[UUID_STR_LEN]);
typedef void(*index_func)(cJSON *, const char[MD5_STR_LENGTH]);
void incremental_copy(store_t *store, store_t *dst_store, const char *filepath,
const char *dst_filepath, GHashTable *copy_table);
void write_document(document_t *doc);
void read_index(const char *path, const char[UUID_STR_LEN], const char *type, index_func);
void read_index(const char *path, const char[MD5_STR_LENGTH], const char *type, index_func);
void incremental_read(GHashTable *table, const char *filepath);

View File

@@ -4,6 +4,7 @@
store_t *store_create(char *path, size_t chunk_size) {
store_t *store = malloc(sizeof(struct store_t));
#if (SIST_FAKE_STORE != 1)
store->chunk_size = chunk_size;
pthread_rwlock_init(&store->lock, NULL);
@@ -28,30 +29,39 @@ store_t *store_create(char *path, size_t chunk_size) {
mdb_txn_begin(store->env, NULL, 0, &txn);
mdb_dbi_open(txn, NULL, 0, &store->dbi);
mdb_txn_commit(txn);
#endif
return store;
}
void store_destroy(store_t *store) {
#if (SIST_FAKE_STORE != 1)
pthread_rwlock_destroy(&store->lock);
mdb_close(store->env, store->dbi);
mdb_env_close(store->env);
#endif
free(store);
}
void store_flush(store_t *store) {
mdb_env_sync(store->env, TRUE);
}
void store_write(store_t *store, char *key, size_t key_len, char *buf, size_t buf_len) {
if (LogCtx.very_verbose) {
if (key_len == 16) {
char uuid_str[UUID_STR_LEN] = {0, };
uuid_unparse((unsigned char *) key, uuid_str);
LOG_DEBUGF("store.c", "Store write {%s} %lu bytes", uuid_str, buf_len)
if (key_len == MD5_DIGEST_LENGTH) {
char path_md5_str[MD5_STR_LENGTH];
buf2hex((unsigned char *) key, MD5_DIGEST_LENGTH, path_md5_str);
LOG_DEBUGF("store.c", "Store write {%s} %lu bytes", path_md5_str, buf_len)
} else {
LOG_DEBUGF("store.c", "Store write {%s} %lu bytes", key, buf_len)
}
}
#if (SIST_FAKE_STORE != 1)
MDB_val mdb_key;
mdb_key.mv_data = key;
mdb_key.mv_size = key_len;
@@ -88,10 +98,13 @@ void store_write(store_t *store, char *key, size_t key_len, char *buf, size_t bu
if (put_ret != 0) {
LOG_ERROR("store.c", mdb_strerror(put_ret))
}
#endif
}
char *store_read(store_t *store, char *key, size_t key_len, size_t *ret_vallen) {
char *buf = NULL;
#if (SIST_FAKE_STORE != 1)
MDB_val mdb_key;
mdb_key.mv_data = key;
mdb_key.mv_size = key_len;
@@ -112,6 +125,7 @@ char *store_read(store_t *store, char *key, size_t key_len, size_t *ret_vallen)
}
mdb_txn_abort(txn);
#endif
return buf;
}

View File

@@ -24,6 +24,8 @@ void store_destroy(store_t *store);
void store_write(store_t *store, char *key, size_t key_len, char *buf, size_t buf_len);
void store_flush(store_t *store);
char *store_read(store_t *store, char *key, size_t key_len, size_t *ret_vallen);
GHashTable *store_read_all(store_t *store);

View File

@@ -20,7 +20,7 @@ parse_job_t *create_fs_parse_job(const char *filepath, const struct stat *info,
job->vfile.info = *info;
memset(job->parent, 0, 16);
memset(job->parent, 0, MD5_DIGEST_LENGTH);
job->vfile.filepath = job->filepath;
job->vfile.read = fs_read;

View File

@@ -21,7 +21,7 @@
#define EPILOG "Made by simon987 <me@simon987.net>. Released under GPL-3.0"
static const char *const Version = "2.8.5";
static const char *const Version = "2.10.2";
static const char *const usage[] = {
"sist2 scan [OPTION]... PATH",
"sist2 index [OPTION]... INDEX",
@@ -30,13 +30,77 @@ static const char *const usage[] = {
NULL,
};
#include<signal.h>
#include<unistd.h>
static __sighandler_t sigsegv_handler = NULL;
static __sighandler_t sigabrt_handler = NULL;
void sig_handler(int signum) {
LogCtx.verbose = 1;
LogCtx.very_verbose = 1;
LOG_ERROR("*SIGNAL HANDLER*", "=============================================\n\n");
LOG_ERRORF("*SIGNAL HANDLER*", "Uh oh! Caught fatal signal: %s", strsignal(signum));
GHashTableIter iter;
g_hash_table_iter_init(&iter, ScanCtx.dbg_current_files);
void *key;
void *value;
while (g_hash_table_iter_next(&iter, &key, &value)) {
parse_job_t *job = value;
if (isatty(STDERR_FILENO)) {
LOG_DEBUGF(
"*SIGNAL HANDLER*",
"Thread \033[%dm[%04llX]\033[0m was working on job '%s'",
31 + ((unsigned int) key) % 7, key, job->filepath
);
} else {
LOG_DEBUGF(
"*SIGNAL HANDLER*",
"THREAD [%04llX] was working on job %s",
key, job->filepath
);
}
}
tpool_dump_debug_info(ScanCtx.pool);
LOG_INFO(
"*SIGNAL HANDLER*",
"Please consider creating a bug report at https://github.com/simon987/sist2/issues !"
)
LOG_INFO(
"*SIGNAL HANDLER*",
"sist2 is an open source project and relies on the collaboration of its users to diagnose and fix bugs"
)
#ifndef SIST_DEBUG
LOG_WARNING(
"*SIGNAL HANDLER*",
"You are running sist2 in release mode! Please consider downloading the debug binary from the Github "
"releases page to provide additionnal information when submitting a bug report."
)
#endif
if (signum == SIGSEGV && sigsegv_handler != NULL) {
sigsegv_handler(signum);
} else if (signum == SIGABRT && sigabrt_handler != NULL) {
sigabrt_handler(signum);
}
}
void init_dir(const char *dirpath) {
char path[PATH_MAX];
snprintf(path, PATH_MAX, "%sdescriptor.json", dirpath);
uuid_t uuid;
uuid_generate(uuid);
uuid_unparse(uuid, ScanCtx.index.desc.uuid);
unsigned char index_md5[MD5_DIGEST_LENGTH];
MD5((unsigned char *) ScanCtx.index.desc.name, strlen(ScanCtx.index.desc.name), index_md5);
buf2hex(index_md5, MD5_DIGEST_LENGTH, ScanCtx.index.desc.id);
time(&ScanCtx.index.desc.timestamp);
strcpy(ScanCtx.index.desc.version, Version);
strcpy(ScanCtx.index.desc.type, INDEX_TYPE_BIN);
@@ -98,6 +162,14 @@ void initialize_scan_context(scan_args_t *args) {
ScanCtx.arc_ctx.log = _log;
ScanCtx.arc_ctx.logf = _logf;
ScanCtx.arc_ctx.parse = (parse_callback_t) parse;
if (args->archive_passphrase != NULL) {
strcpy(ScanCtx.arc_ctx.passphrase, args->archive_passphrase);
} else {
ScanCtx.arc_ctx.passphrase[0] = 0;
}
ScanCtx.dbg_current_files = g_hash_table_new_full(g_int64_hash, g_int64_equal, NULL, NULL);
pthread_mutex_init(&ScanCtx.dbg_current_files_mu, NULL);
// Comic
ScanCtx.comic_ctx.log = _log;
@@ -131,6 +203,7 @@ void initialize_scan_context(scan_args_t *args) {
ScanCtx.media_ctx.logf = _logf;
ScanCtx.media_ctx.store = _store;
ScanCtx.media_ctx.max_media_buffer = (long) args->max_memory_buffer * 1024 * 1024;
ScanCtx.media_ctx.read_subtitles = args->read_subtitles;
init_media();
// OOXML
@@ -149,6 +222,14 @@ void initialize_scan_context(scan_args_t *args) {
ScanCtx.text_ctx.log = _log;
ScanCtx.text_ctx.logf = _logf;
// MSDOC
ScanCtx.msdoc_ctx.tn_size = args->size;
ScanCtx.msdoc_ctx.content_size = args->content_size;
ScanCtx.msdoc_ctx.log = _log;
ScanCtx.msdoc_ctx.logf = _logf;
ScanCtx.msdoc_ctx.store = _store;
ScanCtx.msdoc_ctx.msdoc_mime = mime_get_mime_by_string(ScanCtx.mime_table, "application/msword");
ScanCtx.threads = args->threads;
ScanCtx.depth = args->depth;
@@ -210,7 +291,7 @@ void sist2_scan(scan_args_t *args) {
while ((de = readdir(dir)) != NULL) {
if (strncmp(de->d_name, "_index_", sizeof("_index_") - 1) == 0) {
char file_path[PATH_MAX];
snprintf(file_path, PATH_MAX, "%s/%s", args->incremental, de->d_name);
snprintf(file_path, PATH_MAX, "%s%s", args->incremental, de->d_name);
incremental_read(ScanCtx.original_table, file_path);
}
}
@@ -225,8 +306,6 @@ void sist2_scan(scan_args_t *args) {
tpool_wait(ScanCtx.pool);
tpool_destroy(ScanCtx.pool);
generate_stats(&ScanCtx.index, args->treemap_threshold, ScanCtx.index.path);
if (args->incremental != NULL) {
char dst_path[PATH_MAX];
snprintf(store_path, PATH_MAX, "%sthumbs", args->incremental);
@@ -242,7 +321,7 @@ void sist2_scan(scan_args_t *args) {
while ((de = readdir(dir)) != NULL) {
if (strncmp(de->d_name, "_index_", sizeof("_index_") - 1) == 0) {
char file_path[PATH_MAX];
snprintf(file_path, PATH_MAX, "%s/%s", args->incremental, de->d_name);
snprintf(file_path, PATH_MAX, "%s%s", args->incremental, de->d_name);
incremental_copy(source, ScanCtx.index.store, file_path, dst_path, ScanCtx.copy_table);
}
}
@@ -257,6 +336,8 @@ void sist2_scan(scan_args_t *args) {
store_destroy(source_tags);
}
generate_stats(&ScanCtx.index, args->treemap_threshold, ScanCtx.index.path);
store_destroy(ScanCtx.index.store);
}
@@ -319,7 +400,7 @@ void sist2_index(index_args_t *args) {
if (strncmp(de->d_name, "_index_", sizeof("_index_") - 1) == 0) {
char file_path[PATH_MAX];
snprintf(file_path, PATH_MAX, "%s/%s", args->index_path, de->d_name);
read_index(file_path, desc.uuid, desc.type, f);
read_index(file_path, desc.id, desc.type, f);
}
}
closedir(dir);
@@ -329,7 +410,7 @@ void sist2_index(index_args_t *args) {
tpool_destroy(IndexCtx.pool);
if (!args->print) {
finish_indexer(args->script, args->async_script, desc.uuid);
finish_indexer(args->script, args->async_script, desc.id);
}
store_destroy(IndexCtx.tag_store);
@@ -349,7 +430,7 @@ void sist2_exec_script(exec_args_t *args) {
LOG_DEBUGF("main.c", "descriptor version %s (%s)", desc.version, desc.type)
execute_update_script(args->script, args->async_script, desc.uuid);
execute_update_script(args->script, args->async_script, desc.id);
free(args->script);
}
@@ -390,6 +471,9 @@ void sist2_web(web_args_t *args) {
int main(int argc, const char *argv[]) {
sigsegv_handler = signal(SIGSEGV, sig_handler);
sigabrt_handler = signal(SIGABRT, sig_handler);
setlocale(LC_ALL, "");
scan_args_t *scan_args = scan_args_create();
@@ -430,6 +514,9 @@ int main(int argc, const char *argv[]) {
OPT_STRING(0, "archive", &scan_args->archive, "Archive file mode (skip|list|shallow|recurse). "
"skip: Don't parse, list: only get file names as text, "
"shallow: Don't parse archives inside archives. DEFAULT: recurse"),
OPT_STRING(0, "archive-passphrase", &scan_args->archive_passphrase,
"Passphrase for encrypted archive files"),
OPT_STRING(0, "ocr", &scan_args->tesseract_lang, "Tesseract language (use tesseract --list-langs to see "
"which are installed on your machine)"),
OPT_STRING('e', "exclude", &scan_args->exclude_regex, "Files that match this regex will not be scanned"),
@@ -439,6 +526,7 @@ int main(int argc, const char *argv[]) {
OPT_INTEGER(0, "mem-buffer", &scan_args->max_memory_buffer,
"Maximum memory buffer size per thread in MB for files inside archives "
"(see USAGE.md). DEFAULT: 2000"),
OPT_BOOLEAN(0, "read-subtitles", &scan_args->read_subtitles, "Read subtitles from media files."),
OPT_GROUP("Index options"),
OPT_INTEGER('t', "threads", &common_threads, "Number of threads. DEFAULT=1"),

View File

@@ -41,34 +41,45 @@ void fs_reset(struct vfile *f) {
#define IS_GIT_OBJ (strlen(doc.filepath + doc.base) == 38 && (strstr(doc.filepath, "objects") != NULL))
void set_dbg_current_file(parse_job_t *job) {
unsigned long long pid = (unsigned long long) pthread_self();
pthread_mutex_lock(&ScanCtx.dbg_current_files_mu);
g_hash_table_replace(ScanCtx.dbg_current_files, GINT_TO_POINTER(pid), job);
pthread_mutex_unlock(&ScanCtx.dbg_current_files_mu);
}
void parse(void *arg) {
parse_job_t *job = arg;
document_t doc;
int inc_ts = incremental_get(ScanCtx.original_table, job->vfile.info.st_ino);
if (inc_ts != 0 && inc_ts == job->vfile.info.st_mtim.tv_sec) {
incremental_mark_file_for_copy(ScanCtx.copy_table, job->vfile.info.st_ino);
return;
}
set_dbg_current_file(job);
doc.filepath = job->filepath;
doc.ext = (short) job->ext;
doc.base = (short) job->base;
char *rel_path = doc.filepath + ScanCtx.index.desc.root_len;
MD5((unsigned char *) rel_path, strlen(rel_path), doc.path_md5);
doc.meta_head = NULL;
doc.meta_tail = NULL;
doc.mime = 0;
doc.size = job->vfile.info.st_size;
doc.ino = job->vfile.info.st_ino;
doc.mtime = job->vfile.info.st_mtim.tv_sec;
uuid_generate(doc.uuid);
int inc_ts = incremental_get(ScanCtx.original_table, doc.path_md5);
if (inc_ts != 0 && inc_ts == job->vfile.info.st_mtim.tv_sec) {
incremental_mark_file_for_copy(ScanCtx.copy_table, doc.path_md5);
return;
}
char *buf[MAGIC_BUF_SIZE];
if (LogCtx.very_verbose) {
char uuid_str[UUID_STR_LEN];
uuid_unparse(doc.uuid, uuid_str);
LOG_DEBUGF(job->filepath, "Starting parse job {%s}", uuid_str)
char path_md5_str[MD5_STR_LENGTH];
buf2hex(doc.path_md5, MD5_DIGEST_LENGTH, path_md5_str);
LOG_DEBUGF(job->filepath, "Starting parse job {%s}", path_md5_str)
}
if (job->vfile.info.st_size == 0) {
@@ -86,7 +97,8 @@ void parse(void *arg) {
// Get mime type with libmagic
if (!job->vfile.is_fs_file) {
LOG_WARNING(job->filepath, "Guessing mime type with libmagic inside archive files is not currently supported");
LOG_WARNING(job->filepath,
"Guessing mime type with libmagic inside archive files is not currently supported");
goto abort;
}
@@ -162,16 +174,22 @@ void parse(void *arg) {
parse_sidecar(&job->vfile, &doc);
CLOSE_FILE(job->vfile)
return;
} else if (is_msdoc(&ScanCtx.msdoc_ctx, doc.mime)) {
parse_msdoc(&ScanCtx.msdoc_ctx, &job->vfile, &doc);
}
abort:
//Parent meta
if (!uuid_is_null(job->parent)) {
meta_line_t *meta_parent = malloc(sizeof(meta_line_t) + UUID_STR_LEN + 1);
if (!md5_digest_is_null(job->parent)) {
meta_line_t *meta_parent = malloc(sizeof(meta_line_t) + MD5_STR_LENGTH);
meta_parent->key = MetaParent;
uuid_unparse(job->parent, meta_parent->str_val);
buf2hex(job->parent, MD5_DIGEST_LENGTH, meta_parent->str_val);
APPEND_META((&doc), meta_parent)
doc.has_parent = TRUE;
} else {
doc.has_parent = FALSE;
}
write_document(&doc);

View File

@@ -7,7 +7,7 @@ void parse_sidecar(vfile_t *vfile, document_t *doc) {
LOG_DEBUGF("sidecar.c", "Parsing sidecar file %s", vfile->filepath)
size_t size;
char* buf = read_all(vfile, &size);
char *buf = read_all(vfile, &size);
if (buf == NULL) {
LOG_ERRORF("sidecar.c", "Read error for %s", vfile->filepath)
return;
@@ -23,11 +23,11 @@ void parse_sidecar(vfile_t *vfile, document_t *doc) {
}
char *json_str = cJSON_PrintUnformatted(json);
char filepath[PATH_MAX];
memcpy(filepath, vfile->filepath + ScanCtx.index.desc.root_len, doc->ext - 1 - ScanCtx.index.desc.root_len);
*(filepath + doc->ext - 1) = '\0';
unsigned char path_md5[MD5_DIGEST_LENGTH];
MD5((unsigned char *) vfile->filepath + ScanCtx.index.desc.root_len, doc->ext - 1 - ScanCtx.index.desc.root_len,
path_md5);
store_write(ScanCtx.index.meta_store, filepath, doc->ext, json_str, strlen(json_str) + 1);
store_write(ScanCtx.index.meta_store, (char *) path_md5, sizeof(path_md5), json_str, strlen(json_str) + 1);
cJSON_Delete(json);
free(json_str);

View File

@@ -23,9 +23,10 @@
#undef ABS
#define ABS(a) (((a) < 0) ? -(a) : (a))
#define UUID_STR_LEN 37
#define UNUSED(x) __attribute__((__unused__)) x
#define MD5_STR_LENGTH 33
#include "util.h"
#include "log.h"
#include "types.h"
@@ -47,5 +48,4 @@
#include <errno.h>
#include <ctype.h>
#endif

View File

@@ -1 +1 @@
.irs-bar,.irs-bar-edge,.irs-line-left,.irs-line-mid,.irs-line-right,.irs-slider{background:url("../img/sprite-skin-flat.png") repeat-x}.irs{height:40px}.irs-with-grid{height:60px}.irs-line{height:12px;top:25px}.irs-line-left{height:12px;background-position:0 -30px}.irs-line-mid{height:12px;background-position:0 0}.irs-line-right{height:12px;background-position:100% -30px}.irs-bar{height:12px;top:25px;background-position:0 -60px}.irs-bar-edge{top:25px;height:12px;width:9px;background-position:0 -90px}.irs-shadow{height:3px;top:34px;background:#000;opacity:0.25}.lt-ie9 .irs-shadow{filter: alpha(opacity=25)}.irs-slider{width:16px;height:18px;top:22px;background-position:0 -120px}.irs-slider.state_hover,.irs-slider:hover{background-position:0 -150px}.irs-max,.irs-min{color:#999;font-size:10px;line-height:1.333;text-shadow:none;top:0;padding:1px 3px;background:#e1e4e9;-moz-border-radius:4px;border-radius:4px}.irs-from,.irs-single,.irs-to{color:#fff;font-size:10px;line-height:1.333;text-shadow:none;padding:1px 5px;background:#2196F3;-moz-border-radius:4px;border-radius:4px}.irs-from:after,.irs-single:after,.irs-to:after{position:absolute;display:block;content:"";bottom:-6px;left:50%;width:0;height:0;margin-left:-3px;overflow:hidden;border:3px solid transparent;border-top-color:#2196F3}.irs-grid-pol{background:#e1e4e9}.irs-grid-text{color:#999}.irs-disabled{}
.irs-bar,.irs-bar-edge,.irs-line-left,.irs-line-mid,.irs-line-right,.irs-slider{background:url("./img/sprite-skin-flat.png") repeat-x}.irs{height:40px}.irs-with-grid{height:60px}.irs-line{height:12px;top:25px}.irs-line-left{height:12px;background-position:0 -30px}.irs-line-mid{height:12px;background-position:0 0}.irs-line-right{height:12px;background-position:100% -30px}.irs-bar{height:12px;top:25px;background-position:0 -60px}.irs-bar-edge{top:25px;height:12px;width:9px;background-position:0 -90px}.irs-shadow{height:3px;top:34px;background:#000;opacity:0.25}.lt-ie9 .irs-shadow{filter: alpha(opacity=25)}.irs-slider{width:16px;height:18px;top:22px;background-position:0 -120px}.irs-slider.state_hover,.irs-slider:hover{background-position:0 -150px}.irs-max,.irs-min{color:#999;font-size:10px;line-height:1.333;text-shadow:none;top:0;padding:1px 3px;background:#e1e4e9;-moz-border-radius:4px;border-radius:4px}.irs-from,.irs-single,.irs-to{color:#fff;font-size:10px;line-height:1.333;text-shadow:none;padding:1px 5px;background:#2196F3;-moz-border-radius:4px;border-radius:4px}.irs-from:after,.irs-single:after,.irs-to:after{position:absolute;display:block;content:"";bottom:-6px;left:50%;width:0;height:0;margin-left:-3px;overflow:hidden;border:3px solid transparent;border-top-color:#2196F3}.irs-grid-pol{background:#e1e4e9}.irs-grid-text{color:#999}.irs-disabled{}

1
src/static/js/8_md5.min.js vendored Normal file
View File

@@ -0,0 +1 @@
!function(n){"use strict";function d(n,t){var r=(65535&n)+(65535&t);return(n>>16)+(t>>16)+(r>>16)<<16|65535&r}function f(n,t,r,e,o,u){return d((c=d(d(t,n),d(e,u)))<<(f=o)|c>>>32-f,r);var c,f}function l(n,t,r,e,o,u,c){return f(t&r|~t&e,n,t,o,u,c)}function v(n,t,r,e,o,u,c){return f(t&e|r&~e,n,t,o,u,c)}function g(n,t,r,e,o,u,c){return f(t^r^e,n,t,o,u,c)}function m(n,t,r,e,o,u,c){return f(r^(t|~e),n,t,o,u,c)}function i(n,t){var r,e,o,u;n[t>>5]|=128<<t%32,n[14+(t+64>>>9<<4)]=t;for(var c=1732584193,f=-271733879,i=-1732584194,a=271733878,h=0;h<n.length;h+=16)c=l(r=c,e=f,o=i,u=a,n[h],7,-680876936),a=l(a,c,f,i,n[h+1],12,-389564586),i=l(i,a,c,f,n[h+2],17,606105819),f=l(f,i,a,c,n[h+3],22,-1044525330),c=l(c,f,i,a,n[h+4],7,-176418897),a=l(a,c,f,i,n[h+5],12,1200080426),i=l(i,a,c,f,n[h+6],17,-1473231341),f=l(f,i,a,c,n[h+7],22,-45705983),c=l(c,f,i,a,n[h+8],7,1770035416),a=l(a,c,f,i,n[h+9],12,-1958414417),i=l(i,a,c,f,n[h+10],17,-42063),f=l(f,i,a,c,n[h+11],22,-1990404162),c=l(c,f,i,a,n[h+12],7,1804603682),a=l(a,c,f,i,n[h+13],12,-40341101),i=l(i,a,c,f,n[h+14],17,-1502002290),c=v(c,f=l(f,i,a,c,n[h+15],22,1236535329),i,a,n[h+1],5,-165796510),a=v(a,c,f,i,n[h+6],9,-1069501632),i=v(i,a,c,f,n[h+11],14,643717713),f=v(f,i,a,c,n[h],20,-373897302),c=v(c,f,i,a,n[h+5],5,-701558691),a=v(a,c,f,i,n[h+10],9,38016083),i=v(i,a,c,f,n[h+15],14,-660478335),f=v(f,i,a,c,n[h+4],20,-405537848),c=v(c,f,i,a,n[h+9],5,568446438),a=v(a,c,f,i,n[h+14],9,-1019803690),i=v(i,a,c,f,n[h+3],14,-187363961),f=v(f,i,a,c,n[h+8],20,1163531501),c=v(c,f,i,a,n[h+13],5,-1444681467),a=v(a,c,f,i,n[h+2],9,-51403784),i=v(i,a,c,f,n[h+7],14,1735328473),c=g(c,f=v(f,i,a,c,n[h+12],20,-1926607734),i,a,n[h+5],4,-378558),a=g(a,c,f,i,n[h+8],11,-2022574463),i=g(i,a,c,f,n[h+11],16,1839030562),f=g(f,i,a,c,n[h+14],23,-35309556),c=g(c,f,i,a,n[h+1],4,-1530992060),a=g(a,c,f,i,n[h+4],11,1272893353),i=g(i,a,c,f,n[h+7],16,-155497632),f=g(f,i,a,c,n[h+10],23,-1094730640),c=g(c,f,i,a,n[h+13],4,681279174),a=g(a,c,f,i,n[h],11,-358537222),i=g(i,a,c,f,n[h+3],16,-722521979),f=g(f,i,a,c,n[h+6],23,76029189),c=g(c,f,i,a,n[h+9],4,-640364487),a=g(a,c,f,i,n[h+12],11,-421815835),i=g(i,a,c,f,n[h+15],16,530742520),c=m(c,f=g(f,i,a,c,n[h+2],23,-995338651),i,a,n[h],6,-198630844),a=m(a,c,f,i,n[h+7],10,1126891415),i=m(i,a,c,f,n[h+14],15,-1416354905),f=m(f,i,a,c,n[h+5],21,-57434055),c=m(c,f,i,a,n[h+12],6,1700485571),a=m(a,c,f,i,n[h+3],10,-1894986606),i=m(i,a,c,f,n[h+10],15,-1051523),f=m(f,i,a,c,n[h+1],21,-2054922799),c=m(c,f,i,a,n[h+8],6,1873313359),a=m(a,c,f,i,n[h+15],10,-30611744),i=m(i,a,c,f,n[h+6],15,-1560198380),f=m(f,i,a,c,n[h+13],21,1309151649),c=m(c,f,i,a,n[h+4],6,-145523070),a=m(a,c,f,i,n[h+11],10,-1120210379),i=m(i,a,c,f,n[h+2],15,718787259),f=m(f,i,a,c,n[h+9],21,-343485551),c=d(c,r),f=d(f,e),i=d(i,o),a=d(a,u);return[c,f,i,a]}function a(n){for(var t="",r=32*n.length,e=0;e<r;e+=8)t+=String.fromCharCode(n[e>>5]>>>e%32&255);return t}function h(n){var t=[];for(t[(n.length>>2)-1]=void 0,e=0;e<t.length;e+=1)t[e]=0;for(var r=8*n.length,e=0;e<r;e+=8)t[e>>5]|=(255&n.charCodeAt(e/8))<<e%32;return t}function e(n){for(var t,r="0123456789abcdef",e="",o=0;o<n.length;o+=1)t=n.charCodeAt(o),e+=r.charAt(t>>>4&15)+r.charAt(15&t);return e}function r(n){return unescape(encodeURIComponent(n))}function o(n){return a(i(h(t=r(n)),8*t.length));var t}function u(n,t){return function(n,t){var r,e,o=h(n),u=[],c=[];for(u[15]=c[15]=void 0,16<o.length&&(o=i(o,8*n.length)),r=0;r<16;r+=1)u[r]=909522486^o[r],c[r]=1549556828^o[r];return e=i(u.concat(h(t)),512+8*t.length),a(i(c.concat(e),640))}(r(n),r(t))}function t(n,t,r){return t?r?u(t,n):e(u(t,n)):r?o(n):e(o(n))}"function"==typeof define&&define.amd?define(function(){return t}):"object"==typeof module&&module.exports?module.exports=t:n.md5=t}(this);

View File

@@ -22,7 +22,7 @@ function gifOver(thumbnail, hit) {
thumbnail.addEventListener("mouseout", function () {
//Reset timer
thumbnail.mouseStayedOver = false;
thumbnail.setAttribute("src", `t/${hit["_source"]["index"]}/${hit["_id"]}`);
thumbnail.setAttribute("src", `t/${hit["_source"]["index"]}/${hit["_path_md5"]}`);
})
}
@@ -192,6 +192,19 @@ function makeUserTag(tag, hit) {
return userTag;
}
function makeGpsMetaRow(tbody, latitude, longitude) {
tbody.append($("<tr>")
.append($("<td>").text("Exif GPS"))
.append($("<td>")
.append($("<a>")
.text(`${latitude}, ${longitude}`)
.attr("href", `https://maps.google.com/?q=${latitude},${longitude}&ll=${latitude},${longitude}&t=k&z=17`)
.attr("target", "_blank")
)
)
);
}
function infoButtonCb(hit) {
return () => {
getDocumentInfo(hit["_id"]).then(doc => {
@@ -229,13 +242,25 @@ function infoButtonCb(hit) {
.text(new Date(doc["mtime"] * 1000).toISOString().split(".")[0].replace("T", " "))
.attr("title", doc["mtime"]))
);
// Exif GPS
if ("exif_gps_longitude_dec" in doc) {
makeGpsMetaRow(tbody, doc["exif_gps_latitude_dec"], doc["exif_gps_longitude_dec"])
} else if ("exif_gps_longitude_dms" in doc) {
makeGpsMetaRow(
tbody,
dmsToDecimal(doc["exif_gps_latitude_dms"], doc["exif_gps_latitude_ref"]),
dmsToDecimal(doc["exif_gps_longitude_dms"], doc["exif_gps_longitude_ref"]),
)
}
const displayFields = new Set([
"mime", "size", "path", "title", "width", "height", "duration", "audioc", "videoc",
"bitrate", "artist", "album", "album_artist", "genre", "title", "font_name", "tag", "author",
"modified_by", "pages"
]);
Object.keys(doc)
.filter(key => key.startsWith("_keyword.") || key.startsWith("_text.") || displayFields.has(key) || key.startsWith("exif_"))
.filter(key => key.startsWith("_keyword.") || key.startsWith("_text.") || displayFields.has(key) || (key.startsWith("exif_") && !key.includes("gps")))
.forEach(key => {
tbody.append($("<tr>")
.append($("<td>").text(key))
@@ -350,6 +375,14 @@ function createDocCard(hit) {
audio.setAttribute("controls", "");
audio.setAttribute("type", hit["_source"]["mime"]);
audio.setAttribute("src", "f/" + hit["_id"]);
audio.addEventListener("play", () => {
// Pause all currently playing audio tags
$("audio").each(function () {
if (this !== audio) {
this.pause();
}
});
});
docCard.appendChild(audio)
}
@@ -419,7 +452,7 @@ function makeThumbnail(mimeCategory, hit, imgWrapper, small) {
thumbnail.setAttribute("class", "card-img-top fit");
}
}
thumbnail.setAttribute("src", `t/${hit["_source"]["index"]}/${hit["_id"]}`);
thumbnail.setAttribute("src", `t/${hit["_source"]["index"]}/${hit["_path_md5"]}`);
if (shouldDisplayRawImage(hit)) {
thumbnail.addEventListener("click", () => {

View File

@@ -165,6 +165,9 @@ window.onload = () => {
}
}
});
initTagTree();
updateTagTree();
};
function saveTag(tag, hit) {
@@ -174,7 +177,7 @@ function saveTag(tag, hit) {
delete: false,
name: tag,
doc_id: hit["_id"],
relpath: relPath
path_md5: md5(relPath)
}).then(() => {
tagBar.blur();
$("#tagModal").modal("hide");
@@ -188,6 +191,8 @@ function saveTag(tag, hit) {
hideAfter: 3000,
loaderBg: "#08c7e8",
});
window.setTimeout(updateTagTree, 2000);
})
}
@@ -198,7 +203,7 @@ function deleteTag(tag, hit) {
delete: true,
name: tag,
doc_id: hit["_id"],
relpath: relPath
path_md5: md5(relPath)
}).then(() => {
$.toast({
heading: "Tag deleted",
@@ -210,6 +215,8 @@ function deleteTag(tag, hit) {
hideAfter: 3000,
loaderBg: "#08c7e8",
});
window.setTimeout(updateTagTree, 2000);
})
}
@@ -313,25 +320,8 @@ $.jsonPost("es", {
mimeTree.node("any").select();
});
// Tags tree
$.jsonPost("es", {
aggs: {
tags: {
terms: {
field: "tag",
size: 10000
}
}
},
size: 0,
}).then(resp => {
resp["aggregations"]["tags"]["buckets"]
.sort((a, b) => a["key"].localeCompare(b["key"]))
.forEach(bucket => {
addTag(tagMap, bucket["key"], bucket["key"], bucket["doc_count"])
});
tagMap.push({"text": "All", "id": "any"});
function initTagTree() {
tagMap = [{text: "All", id: "any"}];
tagTree = new InspireTree({
selection: {
mode: 'checkbox'
@@ -346,8 +336,34 @@ $.jsonPost("es", {
});
tagTree.on("node.state.changed", handleTreeClick(tagTree));
tagTree.node("any").select();
}
function updateTagTree() {
$.jsonPost("es", {
aggs: {
tags: {
terms: {
field: "tag",
size: 10000
}
}
},
size: 0,
}).then(resp => {
tagMap = [];
resp["aggregations"]["tags"]["buckets"]
.sort((a, b) => a["key"].localeCompare(b["key"]))
.forEach(bucket => {
addTag(tagMap, bucket["key"], bucket["key"], bucket["doc_count"])
});
tagTree.removeAll();
tagMap.push({text: "All", id: "any"})
tagTree.addNodes(tagMap);
searchBusy = false;
});
});
}
function addTag(map, tag, id, count) {
// let tags = tag.split("#")[0].split(".");
@@ -495,8 +511,8 @@ function search(after = null) {
searchResults.appendChild(preload);
}
let query = searchBar.value;
let empty = query === "";
let searchBarValue = searchBar.value;
let empty = searchBarValue === "";
let condition = empty ? "should" : "must";
let filters = [
{range: {size: {gte: size_min, lte: size_max}}},
@@ -545,19 +561,32 @@ function search(after = null) {
filters.push({range: {mtime: {lte: date_max}}})
}
let query;
if (CONF.options.queryMode === "simple") {
query = {
simple_query_string: {
query: searchBarValue,
fields: fields,
default_operator: "and"
}
}
} else {
query = {
query_string: {
query: searchBarValue,
default_field: "name",
default_operator: "and"
}
}
}
let q = {
"_source": {
excludes: ["content", "_tie"]
},
query: {
bool: {
[condition]: {
simple_query_string: {
query: query,
fields: fields,
default_operator: "and"
}
},
[condition]: query,
filter: filters
}
},
@@ -595,7 +624,9 @@ function search(after = null) {
}
}
$.jsonPost("es", q).then(searchResult => {
const showError = CONF.options.queryMode === "advanced";
$.jsonPost("es", q, showError).then(searchResult => {
let hits = searchResult["hits"]["hits"];
if (hits) {
lastDoc = hits[hits.length - 1];
@@ -604,6 +635,7 @@ function search(after = null) {
hits.forEach(hit => {
hit["_source"]["name"] = strUnescape(hit["_source"]["name"]);
hit["_source"]["path"] = strUnescape(hit["_source"]["path"]);
hit["_path_md5"] = md5(hit["_source"]["path"] + (hit["_source"]["path"] ? "/" : "") + hit["_source"]["name"] + ext(hit));
});
if (!after) {
@@ -628,7 +660,25 @@ function search(after = null) {
reachedEnd = hits.length !== SIZE;
insertHits(resultContainer, hits);
searchBusy = false;
}).fail(() => {
searchBusy = false;
if (!after) {
preload.remove();
}
console.log("QUERY:")
console.log(q)
$.toast({
heading: "Query error",
text: "Could not parse or execute query, please check the Advanced search documentation. " +
"See server logs for more information.",
stack: false,
bgColor: "#FF8F00",
textColor: "#FFF3E0",
position: 'bottom-right',
hideAfter: false
});
})
}

View File

@@ -70,7 +70,7 @@ function strUnescape(str) {
for (let i = 0; i < str.length; i++) {
const c = str[i];
const next = str[i+1];
const next = str[i + 1];
if (c === ']') {
if (next === ']') {
@@ -102,7 +102,8 @@ const _defaults = {
treemapSize: "large",
suggestPath: true,
fragmentSize: 100,
columns: 5
columns: 5,
queryMode: "simple"
};
function loadSettings() {
@@ -120,6 +121,7 @@ function loadSettings() {
$("#settingSuggestPath").prop("checked", CONF.options.suggestPath);
$("#settingFragmentSize").val(CONF.options.fragmentSize);
$("#settingColumns").val(CONF.options.columns);
$("#settingQueryMode").val(CONF.options.queryMode);
}
function Settings() {
@@ -127,6 +129,7 @@ function Settings() {
this._onUpdate = function () {
$("#fuzzyToggle").prop("checked", this.options.fuzzy);
$("#searchBar").attr("placeholder", this.options.queryMode === "simple" ? "Search" : "Advanced search");
updateColumnStyle();
};
@@ -165,6 +168,7 @@ function updateSettings() {
CONF.options.suggestPath = $("#settingSuggestPath").prop("checked");
CONF.options.fragmentSize = $("#settingFragmentSize").val();
CONF.options.columns = $("#settingColumns").val();
CONF.options.queryMode = $("#settingQueryMode").val();
CONF.save();
if (typeof searchDebounced !== "undefined") {
@@ -187,14 +191,16 @@ function updateSettings() {
});
}
jQuery["jsonPost"] = function (url, data) {
jQuery["jsonPost"] = function (url, data, showError = true) {
return jQuery.ajax({
url: url,
type: "post",
data: JSON.stringify(data),
contentType: "application/json"
}).fail(err => {
if (showError) {
showEsError();
}
console.log(err);
});
};
@@ -230,3 +236,13 @@ function updateColumnStyle() {
`
}
}
function dmsToDecimal(dms, ref) {
const tokens = dms.split(",")
const d = Number(tokens[0].trim().split(":")[0]) / Number(tokens[0].trim().split(":")[1])
const m = Number(tokens[1].trim().split(":")[0]) / Number(tokens[1].trim().split(":")[1])
const s = Number(tokens[2].trim().split(":")[0]) / Number(tokens[2].trim().split(":")[1])
return (d + (m / 60) + (s / 3600)) * (ref === "S" || ref === "W" ? -1 : 1)
}

View File

@@ -12,9 +12,9 @@
<nav class="navbar navbar-expand-lg">
<a class="navbar-brand" href="/">sist2</a>
<span class="badge badge-pill version">2.8.5</span>
<span class="badge badge-pill version">2.10.2</span>
<span class="tagline">Lightning-fast file system indexer and search tool </span>
<a class="btn ml-auto" href="/stats">Stats</a>
<a class="btn ml-auto" href="stats">Stats</a>
<button class="btn" type="button" data-toggle="modal" data-target="#settings" onclick="loadSettings()">Settings
</button>
<button class="btn" title="Toggle theme" onclick="toggleTheme()">Theme</button>
@@ -120,6 +120,8 @@
</div>
<div class="modal-body">
<h2>Simple search</h2>
<table class="table">
<tbody>
<tr>
@@ -168,6 +170,12 @@
<p>For more information, see <a target="_blank"
href="//www.elastic.co/guide/en/elasticsearch/reference/current/query-dsl-simple-query-string-query.html">Elasticsearch
documentation</a></p>
<h2>Advanced search</h2>
<p>For documentation about the advanced search mode, see <a target="_blank"
href="//www.elastic.co/guide/en/elasticsearch/reference/current/query-dsl-query-string-query.html#query-string-syntax">Elasticsearch
documentation</a></p>
</div>
</div>
</div>
@@ -207,10 +215,16 @@
<br/>
<div class="form-group">
<input type="number" class="form-control" id="settingFragmentSize">
<label for="settingFragmentSize">Highlight context size in characters</label>
<input type="number" class="form-control" id="settingFragmentSize">
</div>
<label for="settingQueryMode">Search mode</label>
<select id="settingQueryMode" class="form-control form-control-sm">
<option value="simple">Simple</option>
<option value="advanced">Advanced</option>
</select>
<label for="settingDisplay">Display</label>
<select id="settingDisplay" class="form-control form-control-sm">
<option value="grid">Grid</option>

View File

@@ -10,7 +10,7 @@
<nav class="navbar navbar-expand-lg">
<a class="navbar-brand" href="/">sist2</a>
<span class="badge badge-pill version">2.8.5</span>
<span class="badge badge-pill version">2.10.2</span>
<span class="tagline">Lightning-fast file system indexer and search tool </span>
<a style="margin-left: auto" class="btn" href="/">Back</a>
<button class="btn" type="button" data-toggle="modal" data-target="#settings"
@@ -29,13 +29,13 @@
</div>
<div id="treemap-card" class="stats-card">
<button class="btn stats-btn" onclick="fullScreen('treemap-card')">Enlarge</button>
<button class="btn stats-btn" onclick="fullScreen('treemap-card')" id="treemap-card-enlarge">Enlarge</button>
<button class="btn stats-btn" onclick="exportTreemap()">Export</button>
<svg id="treemap"></svg>
</div>
<div id="graphs-card" class="stats-card">
<button class="btn stats-btn" onclick="fullScreen('graphs-card')">Enlarge</button>
<button class="btn stats-btn" onclick="fullScreen('graphs-card')" id="graphs-card-enlarge">Enlarge</button>
<div class="graph">
<svg id="agg_mime_size"></svg>
</div>
@@ -84,10 +84,16 @@
<br/>
<div class="form-group">
<input type="number" class="form-control" id="settingFragmentSize">
<label for="settingFragmentSize">Highlight context size in characters</label>
<input type="number" class="form-control" id="settingFragmentSize">
</div>
<label for="settingQueryMode">Search mode</label>
<select id="settingQueryMode" class="form-control form-control-sm">
<option value="simple">Simple</option>
<option value="advanced">Advanced</option>
</select>
<label for="settingDisplay">Display</label>
<select id="settingDisplay" class="form-control form-control-sm">
<option value="grid">Grid</option>
@@ -727,7 +733,7 @@ function updateStats() {
const indexId = $("#indices").val();
d3.csv(`/s/${indexId}/1`).then(tabularData => {
d3.csv(`./s/${indexId}/1`).then(tabularData => {
tabularData.forEach(row => {
row.taxonomy = row.path.split("/");
row.size = Number(row.size);
@@ -742,16 +748,16 @@ function updateStats() {
}
});
d3.csv(`/s/${indexId}/2`).then(tabularData => {
d3.csv(`./s/${indexId}/2`).then(tabularData => {
mimeBarSize(tabularData.slice(), mimeSvgSize);
mimeBarCount(tabularData.slice(), mimeSvgCount);
});
d3.csv(`/s/${indexId}/3`).then(tabularData => {
d3.csv(`./s/${indexId}/3`).then(tabularData => {
sizeHistogram(tabularData, sizeHistogramSvg);
});
d3.csv(`/s/${indexId}/4`).then(tabularData => {
d3.csv(`./s/${indexId}/4`).then(tabularData => {
dateHistogram(tabularData, dateHistogramSvg);
});
@@ -789,7 +795,15 @@ window.onload = function () {
function fullScreen(selector) {
const card = document.getElementById(selector);
const btn = document.getElementById(selector + "-enlarge");
card.classList.toggle("full-screen");
if (card.classList.contains("full-screen")) {
btn.innerText = "Shrink";
} else {
btn.innerText = "Enlarge";
}
}
function exportTreemap() {

View File

@@ -2,8 +2,6 @@
#include "io/serialize.h"
#include "ctx.h"
#include <glib.h>
static GHashTable *FlatTree;
static GHashTable *BufferTable;
@@ -22,7 +20,7 @@ typedef struct {
long count;
} agg_t;
void fill_tables(cJSON *document, UNUSED(const char uuid_str[UUID_STR_LEN])) {
void fill_tables(cJSON *document, UNUSED(const char index_id[MD5_STR_LENGTH])) {
if (cJSON_GetObjectItem(document, "parent") != NULL) {
return;
@@ -103,8 +101,8 @@ void read_index_into_tables(index_t *index) {
while ((de = readdir(dir)) != NULL) {
if (strncmp(de->d_name, "_index_", sizeof("_index_") - 1) == 0) {
char file_path[PATH_MAX];
snprintf(file_path, PATH_MAX, "%s/%s", index->path, de->d_name);
read_index(file_path, index->desc.uuid, index->desc.type, fill_tables);
snprintf(file_path, PATH_MAX, "%s%s", index->path, de->d_name);
read_index(file_path, index->desc.id, index->desc.type, fill_tables);
}
}
closedir(dir);

View File

@@ -3,7 +3,7 @@
#include "sist.h"
#include <pthread.h>
#define MAX_QUEUE_SIZE 10000
#define MAX_QUEUE_SIZE 1000000
typedef void (*thread_func_t)(void *arg);
@@ -52,6 +52,13 @@ static tpool_work_t *tpool_work_create(thread_func_t func, void *arg) {
return work;
}
void tpool_dump_debug_info(tpool_t *pool) {
LOG_DEBUGF("tpool.c", "pool->thread_cnt = %d", pool->thread_cnt)
LOG_DEBUGF("tpool.c", "pool->work_cnt = %d", pool->work_cnt)
LOG_DEBUGF("tpool.c", "pool->done_cnt = %d", pool->done_cnt)
LOG_DEBUGF("tpool.c", "pool->stop = %d", pool->stop)
}
/**
* Pop work object from thread pool
*/
@@ -83,7 +90,7 @@ int tpool_add_work(tpool_t *pool, thread_func_t func, void *arg) {
}
while ((pool->work_cnt - pool->done_cnt) >= MAX_QUEUE_SIZE) {
usleep(100000);
usleep(10000);
}
pthread_mutex_lock(&(pool->work_mutex));
@@ -150,6 +157,7 @@ static void *tpool_worker(void *arg) {
if (pool->cleanup_func != NULL) {
LOG_INFO("tpool.c", "Executing cleanup function")
pool->cleanup_func();
LOG_DEBUG("tpool.c", "Done executing cleanup function")
}
pthread_cond_signal(&(pool->working_cond));

View File

@@ -10,10 +10,12 @@ typedef void (*thread_func_t)(void *arg);
tpool_t *tpool_create(size_t num, void (*cleanup_func)(), int free_arg);
void tpool_start(tpool_t *pool);
void tpool_destroy(tpool_t *tm);
void tpool_destroy(tpool_t *pool);
int tpool_add_work(tpool_t *pool, thread_func_t func, void *arg);
void tpool_wait(tpool_t *tm);
void tpool_wait(tpool_t *pool);
void tpool_dump_debug_info(tpool_t *pool);
#endif

View File

@@ -6,7 +6,7 @@
#define INDEX_VERSION_EXTERNAL "_external_v1"
typedef struct index_descriptor {
char uuid[UUID_STR_LEN];
char id[MD5_STR_LENGTH];
char version[64];
long timestamp;
char root[PATH_MAX];

View File

@@ -2,7 +2,6 @@
#include "src/ctx.h"
#include <wordexp.h>
#include <glib.h>
#define PBSTR "========================================"
#define PBWIDTH 40
@@ -125,7 +124,7 @@ void progress_bar_print(double percentage, size_t tn_size, size_t index_size) {
}
GHashTable *incremental_get_table() {
GHashTable *file_table = g_hash_table_new(g_direct_hash, g_direct_equal);
GHashTable *file_table = g_hash_table_new_full(g_str_hash, g_str_equal, free, NULL);
return file_table;
}

View File

@@ -10,6 +10,8 @@
#include "third-party/utf8.h/utf8.h"
#include "libscan/scan.h"
#define MD5_STR_LENGTH 33
char *abspath(const char *path);
@@ -21,25 +23,6 @@ void progress_bar_print(double percentage, size_t tn_size, size_t index_size);
GHashTable *incremental_get_table();
__always_inline
static void incremental_put(GHashTable *table, unsigned long inode_no, int mtime) {
g_hash_table_insert(table, (gpointer) inode_no, GINT_TO_POINTER(mtime));
}
__always_inline
static int incremental_get(GHashTable *table, unsigned long inode_no) {
if (table != NULL) {
return GPOINTER_TO_INT(g_hash_table_lookup(table, (gpointer) inode_no));
} else {
return 0;
}
}
__always_inline
static int incremental_mark_file_for_copy(GHashTable *table, unsigned long inode_no) {
return g_hash_table_insert(table, GINT_TO_POINTER(inode_no), GINT_TO_POINTER(1));
}
const char *find_file_in_paths(const char **paths, const char *filename);
@@ -48,4 +31,95 @@ void str_escape(char *dst, const char *str);
void str_unescape(char *dst, const char *str);
static int hex2buf(const char *str, int len, unsigned char *bytes) {
static const uint8_t hashmap[] = {
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07,
0x08, 0x09, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
0x00, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e, 0x0f, 0x00,
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
0x00, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e, 0x0f, 0x00,
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00
};
for (int pos = 0; pos < len; pos += 2) {
int idx0 = (uint8_t) str[pos + 0];
int idx1 = (uint8_t) str[pos + 1];
bytes[pos / 2] = (uint8_t) (hashmap[idx0] << 4) | hashmap[idx1];
}
return TRUE;
}
__always_inline
static void buf2hex(const unsigned char *buf, size_t buflen, char *hex_string) {
static const char hexdig[] = "0123456789abcdef";
const unsigned char *p;
size_t i;
char *s = hex_string;
for (i = 0, p = buf; i < buflen; i++, p++) {
*s++ = hexdig[(*p >> 4) & 0x0f];
*s++ = hexdig[*p & 0x0f];
}
*s = '\0';
}
__always_inline
static int md5_digest_is_null(const unsigned char digest[MD5_DIGEST_LENGTH]) {
return (*(int64_t *) digest) == 0 && (*((int64_t *) digest + 1)) == 0;
}
__always_inline
static void incremental_put(GHashTable *table, unsigned char path_md5[MD5_DIGEST_LENGTH], int mtime) {
char *ptr = malloc(MD5_STR_LENGTH);
buf2hex(path_md5, MD5_DIGEST_LENGTH, ptr);
g_hash_table_insert(table, ptr, GINT_TO_POINTER(mtime));
}
__always_inline
static int incremental_get(GHashTable *table, unsigned char path_md5[MD5_DIGEST_LENGTH]) {
if (table != NULL) {
char md5_str[MD5_STR_LENGTH];
buf2hex(path_md5, MD5_DIGEST_LENGTH, md5_str);
return GPOINTER_TO_INT(g_hash_table_lookup(table, md5_str));
} else {
return 0;
}
}
__always_inline
static int incremental_mark_file_for_copy(GHashTable *table, unsigned char path_md5[MD5_DIGEST_LENGTH]) {
char *ptr = malloc(MD5_STR_LENGTH);
buf2hex(path_md5, MD5_DIGEST_LENGTH, ptr);
return g_hash_table_insert(table, ptr, GINT_TO_POINTER(1));
}
#endif

View File

@@ -8,18 +8,8 @@
#include <src/ctx.h>
#include <mongoose.h>
static int has_prefix(const struct mg_str *str, const struct mg_str *prefix) {
return str->len > prefix->len && memcmp(str->p, prefix->p, prefix->len) == 0;
}
static int is_equal(const struct mg_str *s1, const struct mg_str *s2) {
return s1->len == s2->len && memcmp(s1->p, s2->p, s2->len) == 0;
}
static void send_response_line(struct mg_connection *nc, int status_code, int length, char *extra_headers) {
static void send_response_line(struct mg_connection *nc, int status_code, size_t length, char *extra_headers) {
mg_printf(
nc,
"HTTP/1.1 %d %s\r\n"
@@ -36,7 +26,7 @@ static void send_response_line(struct mg_connection *nc, int status_code, int le
index_t *get_index_by_id(const char *index_id) {
for (int i = WebCtx.index_count; i >= 0; i--) {
if (strcmp(index_id, WebCtx.indices[i].desc.uuid) == 0) {
if (strncmp(index_id, WebCtx.indices[i].desc.id, MD5_STR_LENGTH) == 0) {
return &WebCtx.indices[i];
}
}
@@ -62,36 +52,32 @@ store_t *get_tag_store(const char *index_id) {
void search_index(struct mg_connection *nc) {
send_response_line(nc, 200, sizeof(search_html), "Content-Type: text/html");
mg_send(nc, search_html, sizeof(search_html));
nc->flags |= MG_F_SEND_AND_CLOSE;
}
void stats(struct mg_connection *nc) {
send_response_line(nc, 200, sizeof(stats_html), "Content-Type: text/html");
mg_send(nc, stats_html, sizeof(stats_html));
nc->flags |= MG_F_SEND_AND_CLOSE;
}
void stats_files(struct mg_connection *nc, struct http_message *hm, struct mg_str *path) {
void stats_files(struct mg_connection *nc, struct mg_http_message *hm) {
if (path->len != UUID_STR_LEN + 4) {
mg_http_send_error(nc, 404, NULL);
nc->flags |= MG_F_SEND_AND_CLOSE;
if (hm->uri.len != MD5_STR_LENGTH + 4) {
mg_http_reply(nc, 404, "", "");
return;
}
char arg_uuid[UUID_STR_LEN];
memcpy(arg_uuid, hm->uri.p + 3, UUID_STR_LEN);
*(arg_uuid + UUID_STR_LEN - 1) = '\0';
char arg_md5[MD5_STR_LENGTH];
memcpy(arg_md5, hm->uri.ptr + 3, MD5_STR_LENGTH);
*(arg_md5 + MD5_STR_LENGTH - 1) = '\0';
index_t *index = get_index_by_id(arg_uuid);
index_t *index = get_index_by_id(arg_md5);
if (index == NULL) {
mg_http_send_error(nc, 404, NULL);
nc->flags |= MG_F_SEND_AND_CLOSE;
mg_http_reply(nc, 404, "", "");
return;
}
const char *file;
switch (atoi(hm->uri.p + 3 + UUID_STR_LEN)) {
switch (atoi(hm->uri.ptr + 3 + MD5_STR_LENGTH)) {
case 1:
file = "treemap.csv";
break;
@@ -105,54 +91,41 @@ void stats_files(struct mg_connection *nc, struct http_message *hm, struct mg_st
file = "date_agg.csv";
break;
default:
nc->flags |= MG_F_SEND_AND_CLOSE;
return;
}
char disposition[8192];
snprintf(disposition, sizeof(disposition), "Content-Disposition: inline; filename=\"%s\"", file);
snprintf(disposition, sizeof(disposition), "Content-Disposition: inline; filename=\"%s\"\r\n", file);
char full_path[PATH_MAX];
strcpy(full_path, index->path);
strcat(full_path, file);
mg_http_serve_file(nc, hm, full_path, mg_mk_str("text/csv"), mg_mk_str(disposition));
nc->flags |= MG_F_SEND_AND_CLOSE;
mg_http_serve_file(nc, hm, full_path, "text/csv", disposition);
}
void javascript_lib(struct mg_connection *nc) {
send_response_line(nc, 200, sizeof(bundle_js), "Content-Type: application/javascript");
mg_send(nc, bundle_js, sizeof(bundle_js));
nc->flags |= MG_F_SEND_AND_CLOSE;
}
void javascript_search(struct mg_connection *nc) {
send_response_line(nc, 200, sizeof(search_js), "Content-Type: application/javascript");
mg_send(nc, search_js, sizeof(search_js));
nc->flags |= MG_F_SEND_AND_CLOSE;
}
int client_requested_dark_theme(struct http_message *hm) {
struct mg_str *cookie_header = mg_get_http_header(hm, "cookie");
int client_requested_dark_theme(struct mg_http_message *hm) {
struct mg_str *cookie_header = mg_http_get_header(hm, "cookie");
if (cookie_header == NULL) {
return FALSE;
}
char buf[4096];
char *sist_cookie = buf;
if (mg_http_parse_header2(cookie_header, "sist", &sist_cookie, sizeof(buf)) == 0) {
return FALSE;
}
struct mg_str sist_cookie = mg_http_get_header_var(*cookie_header, mg_str_n("sist", 4));
int ret = strcmp(sist_cookie, "dark") == 0;
if (sist_cookie != buf) {
free(sist_cookie);
}
return ret;
return mg_strcmp(sist_cookie, mg_str_n("dark", 4)) == 0;
}
void style(struct mg_connection *nc, struct http_message *hm) {
void style(struct mg_connection *nc, struct mg_http_message *hm) {
if (client_requested_dark_theme(hm)) {
send_response_line(nc, 200, sizeof(bundle_dark_css), "Content-Type: text/css");
@@ -161,11 +134,9 @@ void style(struct mg_connection *nc, struct http_message *hm) {
send_response_line(nc, 200, sizeof(bundle_css), "Content-Type: text/css");
mg_send(nc, bundle_css, sizeof(bundle_css));
}
nc->flags |= MG_F_SEND_AND_CLOSE;
}
void img_sprite_skin_flat(struct mg_connection *nc, struct http_message *hm) {
void img_sprite_skin_flat(struct mg_connection *nc, struct mg_http_message *hm) {
if (client_requested_dark_theme(hm)) {
send_response_line(nc, 200, sizeof(sprite_skin_flat_dark_png), "Content-Type: image/png");
mg_send(nc, sprite_skin_flat_dark_png, sizeof(sprite_skin_flat_dark_png));
@@ -173,71 +144,59 @@ void img_sprite_skin_flat(struct mg_connection *nc, struct http_message *hm) {
send_response_line(nc, 200, sizeof(sprite_skin_flat_png), "Content-Type: image/png");
mg_send(nc, sprite_skin_flat_png, sizeof(sprite_skin_flat_png));
}
nc->flags |= MG_F_SEND_AND_CLOSE;
}
void thumbnail(struct mg_connection *nc, struct http_message *hm, struct mg_str *path) {
void thumbnail(struct mg_connection *nc, struct mg_http_message *hm) {
if (path->len != UUID_STR_LEN * 2 + 2) {
LOG_DEBUGF("serve.c", "Invalid thumbnail path: %.*s", (int) path->len, path->p)
mg_http_send_error(nc, 404, NULL);
nc->flags |= MG_F_SEND_AND_CLOSE;
if (hm->uri.len != 68) {
LOG_DEBUGF("serve.c", "Invalid thumbnail path: %.*s", (int) hm->uri.len, hm->uri.ptr)
mg_http_reply(nc, 404, "", "Not found");
return;
}
char arg_uuid[UUID_STR_LEN];
char arg_index[UUID_STR_LEN];
char arg_file_md5[MD5_STR_LENGTH];
char arg_index[MD5_STR_LENGTH];
memcpy(arg_index, hm->uri.p + 3, UUID_STR_LEN);
*(arg_index + UUID_STR_LEN - 1) = '\0';
memcpy(arg_uuid, hm->uri.p + 3 + UUID_STR_LEN, UUID_STR_LEN);
*(arg_uuid + UUID_STR_LEN - 1) = '\0';
memcpy(arg_index, hm->uri.ptr + 3, MD5_STR_LENGTH);
*(arg_index + MD5_STR_LENGTH - 1) = '\0';
memcpy(arg_file_md5, hm->uri.ptr + 3 + MD5_STR_LENGTH, MD5_STR_LENGTH);
*(arg_file_md5 + MD5_STR_LENGTH - 1) = '\0';
uuid_t uuid;
int ret = uuid_parse(arg_uuid, uuid);
if (ret != 0) {
LOG_DEBUGF("serve.c", "Invalid thumbnail UUID: %s", arg_uuid)
mg_http_send_error(nc, 404, NULL);
nc->flags |= MG_F_SEND_AND_CLOSE;
return;
}
unsigned char md5_buf[MD5_DIGEST_LENGTH];
hex2buf(arg_file_md5, MD5_STR_LENGTH - 1, md5_buf);
store_t *store = get_store(arg_index);
if (store == NULL) {
LOG_DEBUGF("serve.c", "Could not get store for index: %s", arg_index)
mg_http_send_error(nc, 404, NULL);
nc->flags |= MG_F_SEND_AND_CLOSE;
mg_http_reply(nc, 404, "", "Not found");
return;
}
size_t data_len = 0;
char *data = store_read(store, (char *) uuid, sizeof(uuid_t), &data_len);
char *data = store_read(store, (char *) md5_buf, sizeof(md5_buf), &data_len);
if (data_len != 0) {
send_response_line(nc, 200, data_len, "Content-Type: image/jpeg");
mg_send(nc, data, data_len);
free(data);
}
nc->flags |= MG_F_SEND_AND_CLOSE;
}
void search(struct mg_connection *nc, struct http_message *hm) {
void search(struct mg_connection *nc, struct mg_http_message *hm) {
if (hm->body.len == 0) {
LOG_DEBUG("serve.c", "Client sent empty body, ignoring request")
mg_http_send_error(nc, 500, NULL);
nc->flags |= MG_F_SEND_AND_CLOSE;
mg_http_reply(nc, 500, "", "Invalid request");
return;
}
char *body = malloc(hm->body.len + 1);
memcpy(body, hm->body.p, hm->body.len);
memcpy(body, hm->body.ptr, hm->body.len);
*(body + hm->body.len) = '\0';
char url[4096];
snprintf(url, 4096, "%s/%s/_search", WebCtx.es_url, WebCtx.es_index);
nc->user_data = web_post_async(url, body);
nc->fn_data = web_post_async(url, body);
}
void serve_file_from_url(cJSON *json, index_t *idx, struct mg_connection *nc) {
@@ -259,16 +218,16 @@ void serve_file_from_url(cJSON *json, index_t *idx, struct mg_connection *nc) {
idx->desc.rewrite_url, path_unescaped, name_unescaped, strlen(ext) == 0 ? "" : ".", ext);
dyn_buffer_t encoded = url_escape(url);
mg_http_send_redirect(
nc, 308,
(struct mg_str) MG_MK_STR_N(encoded.buf, encoded.cur),
(struct mg_str) MG_NULL_STR
);
dyn_buffer_write_char(&encoded, '\0');
char location_header[8192];
snprintf(location_header, sizeof(location_header), "Location: %s\r\n", encoded.buf);
mg_http_reply(nc, 308, location_header, "");
dyn_buffer_destroy(&encoded);
nc->flags |= MG_F_SEND_AND_CLOSE;
}
void serve_file_from_disk(cJSON *json, index_t *idx, struct mg_connection *nc, struct http_message *hm) {
void serve_file_from_disk(cJSON *json, index_t *idx, struct mg_connection *nc, struct mg_http_message *hm) {
const char *path = cJSON_GetObjectItem(json, "path")->valuestring;
const char *name = cJSON_GetObjectItem(json, "name")->valuestring;
@@ -289,10 +248,10 @@ void serve_file_from_disk(cJSON *json, index_t *idx, struct mg_connection *nc, s
LOG_DEBUGF("serve.c", "Serving file from disk: %s", full_path)
char disposition[8192];
snprintf(disposition, sizeof(disposition), "Content-Disposition: inline; filename=\"%s%s%s\"",
snprintf(disposition, sizeof(disposition), "Content-Disposition: inline; filename=\"%s%s%s\"\r\n",
name, strlen(ext) == 0 ? "" : ".", ext);
mg_http_serve_file(nc, hm, full_path, mg_mk_str(mime), mg_mk_str(disposition));
mg_http_serve_file(nc, hm, full_path, mime, disposition);
}
void index_info(struct mg_connection *nc) {
@@ -305,7 +264,7 @@ void index_info(struct mg_connection *nc) {
cJSON *idx_json = cJSON_CreateObject();
cJSON_AddStringToObject(idx_json, "name", idx->desc.name);
cJSON_AddStringToObject(idx_json, "version", idx->desc.version);
cJSON_AddStringToObject(idx_json, "id", idx->desc.uuid);
cJSON_AddStringToObject(idx_json, "id", idx->desc.id);
cJSON_AddNumberToObject(idx_json, "timestamp", (double) idx->desc.timestamp);
cJSON_AddItemToArray(arr, idx_json);
}
@@ -316,40 +275,35 @@ void index_info(struct mg_connection *nc) {
mg_send(nc, json_str, strlen(json_str));
free(json_str);
cJSON_Delete(json);
nc->flags |= MG_F_SEND_AND_CLOSE;
}
void document_info(struct mg_connection *nc, struct http_message *hm, struct mg_str *path) {
void document_info(struct mg_connection *nc, struct mg_http_message *hm) {
if (path->len != UUID_STR_LEN + 2) {
LOG_DEBUGF("serve.c", "Invalid document_info path: %.*s", (int) path->len, path->p)
mg_http_send_error(nc, 404, NULL);
nc->flags |= MG_F_SEND_AND_CLOSE;
if (hm->uri.len != MD5_STR_LENGTH + 2) {
LOG_DEBUGF("serve.c", "Invalid document_info path: %.*s", (int) hm->uri.len, hm->uri.ptr)
mg_http_reply(nc, 404, "", "Not found");
return;
}
char arg_uuid[UUID_STR_LEN];
memcpy(arg_uuid, hm->uri.p + 3, UUID_STR_LEN);
*(arg_uuid + UUID_STR_LEN - 1) = '\0';
char arg_md5[MD5_STR_LENGTH];
memcpy(arg_md5, hm->uri.ptr + 3, MD5_STR_LENGTH);
*(arg_md5 + MD5_STR_LENGTH - 1) = '\0';
cJSON *doc = elastic_get_document(arg_uuid);
cJSON *doc = elastic_get_document(arg_md5);
cJSON *source = cJSON_GetObjectItem(doc, "_source");
cJSON *index_id = cJSON_GetObjectItem(source, "index");
if (index_id == NULL) {
cJSON_Delete(doc);
mg_http_send_error(nc, 404, NULL);
nc->flags |= MG_F_SEND_AND_CLOSE;
mg_http_reply(nc, 404, "", "Not found");
return;
}
index_t *idx = get_index_by_id(index_id->valuestring);
if (idx == NULL) {
cJSON_Delete(doc);
mg_http_send_error(nc, 404, NULL);
nc->flags |= MG_F_SEND_AND_CLOSE;
mg_http_reply(nc, 404, "", "Not found");
return;
}
@@ -358,24 +312,21 @@ void document_info(struct mg_connection *nc, struct http_message *hm, struct mg_
mg_send(nc, json_str, (int) strlen(json_str));
free(json_str);
cJSON_Delete(doc);
nc->flags |= MG_F_SEND_AND_CLOSE;
}
void file(struct mg_connection *nc, struct http_message *hm, struct mg_str *path) {
void file(struct mg_connection *nc, struct mg_http_message *hm) {
if (path->len != UUID_STR_LEN + 2) {
LOG_DEBUGF("serve.c", "Invalid file path: %.*s", (int) path->len, path->p)
mg_http_send_error(nc, 404, NULL);
nc->flags |= MG_F_SEND_AND_CLOSE;
if (hm->uri.len != MD5_STR_LENGTH + 2) {
LOG_DEBUGF("serve.c", "Invalid file path: %.*s", (int) hm->uri.len, hm->uri.ptr)
mg_http_reply(nc, 404, "", "Not found");
return;
}
char arg_uuid[UUID_STR_LEN];
memcpy(arg_uuid, hm->uri.p + 3, UUID_STR_LEN);
*(arg_uuid + UUID_STR_LEN - 1) = '\0';
char arg_md5[MD5_STR_LENGTH];
memcpy(arg_md5, hm->uri.ptr + 3, MD5_STR_LENGTH);
*(arg_md5 + MD5_STR_LENGTH - 1) = '\0';
const char *next = arg_uuid;
const char *next = arg_md5;
cJSON *doc = NULL;
cJSON *index_id = NULL;
cJSON *source = NULL;
@@ -386,8 +337,7 @@ void file(struct mg_connection *nc, struct http_message *hm, struct mg_str *path
index_id = cJSON_GetObjectItem(source, "index");
if (index_id == NULL) {
cJSON_Delete(doc);
mg_http_send_error(nc, 404, NULL);
nc->flags |= MG_F_SEND_AND_CLOSE;
mg_http_reply(nc, 404, "", "Not found");
return;
}
cJSON *parent = cJSON_GetObjectItem(source, "parent");
@@ -401,8 +351,7 @@ void file(struct mg_connection *nc, struct http_message *hm, struct mg_str *path
if (idx == NULL) {
cJSON_Delete(doc);
nc->flags |= MG_F_SEND_AND_CLOSE;
mg_http_send_error(nc, 404, NULL);
mg_http_reply(nc, 404, "", "Not found");
return;
}
@@ -423,14 +372,12 @@ void status(struct mg_connection *nc) {
}
free(status);
nc->flags |= MG_F_SEND_AND_CLOSE;
}
typedef struct {
char *name;
int delete;
char *relpath;
char *path_md5_str;
char *doc_id;
} tag_req_t;
@@ -450,8 +397,9 @@ tag_req_t *parse_tag_request(cJSON *json) {
return NULL;
}
cJSON *arg_relpath = cJSON_GetObjectItem(json, "relpath");
if (arg_relpath == NULL || !cJSON_IsString(arg_relpath)) {
cJSON *arg_path_md5 = cJSON_GetObjectItem(json, "path_md5");
if (arg_path_md5 == NULL || !cJSON_IsString(arg_path_md5) ||
strlen(arg_path_md5->valuestring) != MD5_STR_LENGTH - 1) {
return NULL;
}
@@ -463,41 +411,38 @@ tag_req_t *parse_tag_request(cJSON *json) {
tag_req_t *req = malloc(sizeof(tag_req_t));
req->delete = arg_delete->valueint;
req->name = arg_name->valuestring;
req->relpath = arg_relpath->valuestring;
req->path_md5_str = arg_path_md5->valuestring;
req->doc_id = arg_doc_id->valuestring;
return req;
}
void tag(struct mg_connection *nc, struct http_message *hm, struct mg_str *path) {
if (path->len != UUID_STR_LEN + 4) {
LOG_DEBUGF("serve.c", "Invalid tag path: %.*s", (int) path->len, path->p)
mg_http_send_error(nc, 404, NULL);
nc->flags |= MG_F_SEND_AND_CLOSE;
void tag(struct mg_connection *nc, struct mg_http_message *hm) {
if (hm->uri.len != MD5_STR_LENGTH + 4) {
LOG_DEBUGF("serve.c", "Invalid tag path: %.*s", (int) hm->uri.len, hm->uri.ptr)
mg_http_reply(nc, 404, "", "Not found");
return;
}
char arg_index[UUID_STR_LEN];
memcpy(arg_index, hm->uri.p + 5, UUID_STR_LEN);
*(arg_index + UUID_STR_LEN - 1) = '\0';
char arg_index[MD5_STR_LENGTH];
memcpy(arg_index, hm->uri.ptr + 5, MD5_STR_LENGTH);
*(arg_index + MD5_STR_LENGTH - 1) = '\0';
if (hm->body.len < 2 || hm->method.len != 4 || memcmp(&hm->method, "POST", 4) == 0) {
LOG_DEBUG("serve.c", "Invalid tag request")
mg_http_send_error(nc, 400, NULL);
nc->flags |= MG_F_SEND_AND_CLOSE;
mg_http_reply(nc, 404, "", "Not found");
return;
}
store_t *store = get_tag_store(arg_index);
if (store == NULL) {
LOG_DEBUGF("serve.c", "Could not get tag store for index: %s", arg_index)
mg_http_send_error(nc, 404, NULL);
nc->flags |= MG_F_SEND_AND_CLOSE;
mg_http_reply(nc, 404, "", "Not found");
return;
}
char *body = malloc(hm->body.len + 1);
memcpy(body, hm->body.p, hm->body.len);
memcpy(body, hm->body.ptr, hm->body.len);
*(body + hm->body.len) = '\0';
cJSON *json = cJSON_Parse(body);
@@ -506,15 +451,14 @@ void tag(struct mg_connection *nc, struct http_message *hm, struct mg_str *path)
LOG_DEBUGF("serve.c", "Could not parse tag request", arg_index)
cJSON_Delete(json);
free(body);
mg_http_send_error(nc, 400, NULL);
nc->flags |= MG_F_SEND_AND_CLOSE;
mg_http_reply(nc, 400, "", "Invalid request");
return;
}
cJSON *arr = NULL;
size_t data_len = 0;
const char *data = store_read(store, arg_req->relpath, strlen(arg_req->relpath), &data_len);
const char *data = store_read(store, arg_req->path_md5_str, MD5_STR_LENGTH, &data_len);
if (data_len == 0) {
arr = cJSON_CreateArray();
} else {
@@ -550,7 +494,7 @@ void tag(struct mg_connection *nc, struct http_message *hm, struct mg_str *path)
char url[4096];
snprintf(url, sizeof(url), "%s/%s/_update/%s", WebCtx.es_url, WebCtx.es_index, arg_req->doc_id);
nc->user_data = web_post_async(url, buf);
nc->fn_data = web_post_async(url, buf);
} else {
cJSON_AddItemToArray(arr, cJSON_CreateString(arg_req->name));
@@ -570,11 +514,12 @@ void tag(struct mg_connection *nc, struct http_message *hm, struct mg_str *path)
char url[4096];
snprintf(url, sizeof(url), "%s/%s/_update/%s", WebCtx.es_url, WebCtx.es_index, arg_req->doc_id);
nc->user_data = web_post_async(url, buf);
nc->fn_data = web_post_async(url, buf);
}
char *json_str = cJSON_PrintUnformatted(arr);
store_write(store, arg_req->relpath, strlen(arg_req->relpath) + 1, json_str, strlen(json_str) + 1);
store_write(store, arg_req->path_md5_str, MD5_STR_LENGTH, json_str, strlen(json_str) + 1);
store_flush(store);
free(arg_req);
free(json_str);
@@ -583,39 +528,22 @@ void tag(struct mg_connection *nc, struct http_message *hm, struct mg_str *path)
free(body);
}
int validate_auth(struct mg_connection *nc, struct http_message *hm) {
int validate_auth(struct mg_connection *nc, struct mg_http_message *hm) {
char user[256] = {0,};
char pass[256] = {0,};
int ret = mg_get_http_basic_auth(hm, user, sizeof(user), pass, sizeof(pass));
if (ret == -1 || strcmp(user, WebCtx.auth_user) != 0 || strcmp(pass, WebCtx.auth_pass) != 0) {
mg_printf(nc, "HTTP/1.1 401 Unauthorized\r\n"
"WWW-Authenticate: Basic realm=\"sist2\"\r\n"
"Content-Length: 0\r\n\r\n");
nc->flags |= MG_F_SEND_AND_CLOSE;
mg_http_creds(hm, user, sizeof(user), pass, sizeof(pass));
if (strcmp(user, WebCtx.auth_user) != 0 || strcmp(pass, WebCtx.auth_pass) != 0) {
mg_http_reply(nc, 401, "WWW-Authenticate: Basic realm=\"sist2\"\r\n", "");
return FALSE;
}
return TRUE;
}
static void ev_router(struct mg_connection *nc, int ev, void *p) {
struct mg_str scheme;
struct mg_str user_info;
struct mg_str host;
unsigned int port;
struct mg_str path;
struct mg_str query;
struct mg_str fragment;
if (ev == MG_EV_HTTP_REQUEST) {
struct http_message *hm = (struct http_message *) p;
if (mg_parse_uri(hm->uri, &scheme, &user_info, &host, &port, &path, &query, &fragment) != 0) {
mg_http_send_error(nc, 400, NULL);
nc->flags |= MG_F_SEND_AND_CLOSE;
return;
}
static void ev_router(struct mg_connection *nc, int ev, void *ev_data, UNUSED(void *fn_data)) {
if (ev == MG_EV_HTTP_MSG) {
struct mg_http_message *hm = (struct mg_http_message *) ev_data;
if (WebCtx.auth_enabled == TRUE) {
if (!validate_auth(nc, hm)) {
@@ -623,52 +551,48 @@ static void ev_router(struct mg_connection *nc, int ev, void *p) {
}
}
if (is_equal(&path, &((struct mg_str) MG_MK_STR("/")))) {
if (mg_http_match_uri(hm, "/")) {
search_index(nc);
} else if (is_equal(&path, &((struct mg_str) MG_MK_STR("/css")))) {
} else if (mg_http_match_uri(hm, "/css")) {
style(nc, hm);
} else if (is_equal(&path, &((struct mg_str) MG_MK_STR("/stats")))) {
} else if (mg_http_match_uri(hm, "/stats")) {
stats(nc);
} else if (is_equal(&path, &((struct mg_str) MG_MK_STR("/jslib")))) {
} else if (mg_http_match_uri(hm, "/jslib")) {
javascript_lib(nc);
} else if (is_equal(&path, &((struct mg_str) MG_MK_STR("/jssearch")))) {
} else if (mg_http_match_uri(hm, "/jssearch")) {
javascript_search(nc);
} else if (is_equal(&path, &((struct mg_str) MG_MK_STR("/img/sprite-skin-flat.png")))) {
} else if (mg_http_match_uri(hm, "/img/sprite-skin-flat.png")) {
img_sprite_skin_flat(nc, hm);
} else if (is_equal(&path, &((struct mg_str) MG_MK_STR("/es")))) {
} else if (mg_http_match_uri(hm, "/es")) {
search(nc, hm);
} else if (is_equal(&path, &((struct mg_str) MG_MK_STR("/i")))) {
} else if (mg_http_match_uri(hm, "/i")) {
index_info(nc);
} else if (is_equal(&path, &((struct mg_str) MG_MK_STR("/status")))) {
} else if (mg_http_match_uri(hm, "/status")) {
status(nc);
} else if (has_prefix(&path, &((struct mg_str) MG_MK_STR("/f/")))) {
file(nc, hm, &path);
} else if (has_prefix(&path, &((struct mg_str) MG_MK_STR("/t/")))) {
thumbnail(nc, hm, &path);
} else if (has_prefix(&path, &((struct mg_str) MG_MK_STR("/s/")))) {
stats_files(nc, hm, &path);
} else if (has_prefix(&path, &((struct mg_str) MG_MK_STR("/tag/")))) {
if (WebCtx.tag_auth_enabled == TRUE) {
if (!validate_auth(nc, hm)) {
} else if (mg_http_match_uri(hm, "/f/*")) {
file(nc, hm);
} else if (mg_http_match_uri(hm, "/t/*/*")) {
thumbnail(nc, hm);
} else if (mg_http_match_uri(hm, "/s/*/*")) {
stats_files(nc, hm);
} else if (mg_http_match_uri(hm, "/tag/*")) {
if (WebCtx.tag_auth_enabled == TRUE && !validate_auth(nc, hm)) {
return;
}
}
tag(nc, hm, &path);
} else if (has_prefix(&path, &((struct mg_str) MG_MK_STR("/d/")))) {
document_info(nc, hm, &path);
tag(nc, hm);
} else if (mg_http_match_uri(hm, "/d/*")) {
document_info(nc, hm);
} else {
mg_http_send_error(nc, 404, NULL);
nc->flags |= MG_F_SEND_AND_CLOSE;
mg_http_reply(nc, 404, "", "Page not found");
}
} else if (ev == MG_EV_POLL) {
if (nc->user_data != NULL) {
if (nc->fn_data != NULL) {
//Waiting for ES reply
subreq_ctx_t *ctx = (subreq_ctx_t *) nc->user_data;
subreq_ctx_t *ctx = (subreq_ctx_t *) nc->fn_data;
web_post_async_poll(ctx);
if (ctx->done == TRUE) {
response_t *r = ctx->response;
if (r->status_code == 200) {
@@ -688,14 +612,14 @@ static void ev_router(struct mg_connection *nc, int ev, void *p) {
free(json_str);
free(tmp);
}
mg_http_send_error(nc, 500, NULL);
mg_http_reply(nc, 500, "", "");
}
free_response(r);
free(ctx->data);
free(ctx);
nc->flags |= MG_F_SEND_AND_CLOSE;
nc->user_data = NULL;
nc->fn_data = NULL;
}
}
}
@@ -706,15 +630,18 @@ void serve(const char *listen_address) {
printf("Starting web server @ http://%s\n", listen_address);
struct mg_mgr mgr;
mg_mgr_init(&mgr, NULL);
mg_mgr_init(&mgr);
struct mg_connection *nc = mg_bind(&mgr, listen_address, ev_router);
int ok = 1;
struct mg_connection *nc = mg_http_listen(&mgr, listen_address, ev_router, NULL);
if (nc == NULL) {
LOG_FATALF("serve.c", "Couldn't bind web server on address %s", listen_address)
}
mg_set_protocol_http_websocket(nc);
for (;;) {
while (ok) {
mg_mgr_poll(&mgr, 10);
}
mg_mgr_free(&mgr);
LOG_INFO("serve.c", "Finished web event loop")
}

File diff suppressed because one or more lines are too long

77
tests/test_scan.py Normal file
View File

@@ -0,0 +1,77 @@
import unittest
import subprocess
import shutil
import json
import os
TEST_FILES = "third-party/libscan/libscan-test-files/test_files"
def copy_files(files):
base = os.path.basename(files)
new_path = os.path.join("/tmp/sist2_test/", base)
shutil.rmtree(new_path, ignore_errors=True)
shutil.copytree(files, new_path)
return new_path
def sist2(*args):
print("./sist2 " + " ".join(args))
return subprocess.check_output(
args=["./sist2", *args],
)
def sist2_index(files, *args):
path = copy_files(files)
shutil.rmtree("test_i", ignore_errors=True)
sist2("scan", path, "-o", "test_i", *args)
return iter(sist2_index_to_dict("test_i"))
def sist2_incremental_index(files, func=None, *args):
path = copy_files(files)
if func:
func(path)
shutil.rmtree("test_i_inc", ignore_errors=True)
sist2("scan", path, "-o", "test_i_inc", "--incremental", "test_i", *args)
return iter(sist2_index_to_dict("test_i_inc"))
def sist2_index_to_dict(index):
res = subprocess.check_output(
args=["./sist2", "index", "--print", index],
)
for line in res.splitlines():
if line:
yield json.loads(line)
class ScanTest(unittest.TestCase):
def test_incremental1(self):
def remove_files(path):
os.remove(os.path.join(path, "msdoc/test1.doc"))
os.remove(os.path.join(path, "msdoc/test2.doc"))
def add_files(path):
with open(os.path.join(path, "newfile1"), "w"):
pass
with open(os.path.join(path, "newfile2"), "w"):
pass
with open(os.path.join(path, "newfile3"), "w"):
pass
file_count = sum(1 for _ in sist2_index(TEST_FILES))
self.assertEqual(sum(1 for _ in sist2_incremental_index(TEST_FILES, remove_files)), file_count - 2)
self.assertEqual(sum(1 for _ in sist2_incremental_index(TEST_FILES, add_files)), file_count + 3)
if __name__ == "__main__":
unittest.main()