Compare commits

...

50 Commits

Author SHA1 Message Date
bb23651087 update docker image version in readme 2021-07-10 13:05:13 -04:00
78f3c897e2 libscan version sync 2021-07-10 12:52:24 -04:00
a2209e91ca add fast-epub arg, tweak thread pool, error handling for nftw 2021-07-10 12:52:04 -04:00
ba31531d3a Return 404 when thumbnail is not found 2021-07-10 12:45:30 -04:00
d5a47b8dab Fix segfault in exec-script 2021-06-23 19:46:52 -04:00
d8c0b80524 Update README.md 2021-06-23 10:26:20 -04:00
142a4869e6 Bump version 2021-06-23 08:19:05 -04:00
ddb7f8d5d7 Fix some bugs in serve.c caused by mongoose upgrade 2021-06-23 08:18:11 -04:00
dfb8c67490 thread safety for debug info table 2021-06-14 15:04:08 -04:00
3da2c8cae3 Update CI scripts, Dockerfiles, enable arm64 build again 2021-06-14 14:02:16 -04:00
2f0e999b06 Update README.md 2021-06-13 09:50:56 -04:00
bf28dc8993 Merge pull request #164 from simon987/dev
v2.10.1
2021-06-13 09:50:23 -04:00
c6fee7f6e2 update argparse 2021-06-13 09:41:18 -04:00
201c2a1a47 Update CI things 2021-06-13 09:26:27 -04:00
7c46ad632a Update readme 2021-06-11 20:44:47 -04:00
5b8c13fd13 Handle GPS metadata in the UI 2021-06-11 20:41:05 -04:00
efa4a06e56 Fix meta_key UB problem 2021-06-11 20:19:36 -04:00
81670ee107 Fix subtitle problems 2021-06-11 10:05:33 -04:00
f9dac80905 Fix file download in mongoose 7.x 2021-06-09 13:34:38 -04:00
f8d9b718c0 Fix memory leak in RAW parsing 2021-06-09 08:22:31 -04:00
6f5fdc2935 Fix for segfault in some comic files 2021-06-07 09:01:46 -04:00
a01f6dff1f Use 16-bit ints for meta keys (wip) 2021-06-07 08:40:12 -04:00
22dd58e140 add signal handler w/ debug info 2021-05-08 16:23:24 -04:00
f3e07fb7f7 Merge pull request #155 from dpieski/patch-4
Create feature_request.md
2021-05-06 20:17:29 -04:00
7990e5cd2e Update feature_request.md 2021-05-06 20:16:59 -04:00
e3ca660983 Merge pull request #153 from dpieski/patch-1
Create bug_report.md
2021-05-06 20:15:52 -04:00
b87fb25458 Update bug_report.md 2021-05-06 20:15:41 -04:00
c7a77869ad Merge pull request #154 from dpieski/patch-2
Create config.yml
2021-05-06 20:09:36 -04:00
523c123e2e Enable advanced search with query_string 2021-05-06 20:07:20 -04:00
fc7f30d670 Add tests for subtitle 2021-05-05 16:10:55 -04:00
152fe11669 Set passphrase arg in arc_ctx 2021-05-05 15:52:46 -04:00
33f97f6bfb Increase scan queue size 2021-05-05 14:25:35 -04:00
71f9dfcfe0 sync libscan 2021-05-05 14:21:01 -04:00
5f657d61b3 Merge pull request #157 from simon987/mongoose-7
Update to mongoose 7.x, change Docker build
2021-05-05 14:18:36 -04:00
908def1016 Fix build, update dockerfile 2021-05-05 14:13:46 -04:00
db3d312835 wip 2021-05-05 13:55:57 -04:00
32c9cb28a3 Read subtitles from media files, fix bug in text_buffer 2021-05-05 13:55:57 -04:00
f839127129 Change encoding for antiword PDF 2021-05-05 13:55:57 -04:00
8111a6c143 Workaround for UTF8 .doc files 2021-05-05 13:55:57 -04:00
707a570828 Pause all other audio tags on play #148 2021-04-17 13:24:21 -04:00
Andrew
5073b00225 Create feature_request.md
Created basic feature request template.
2021-04-13 11:25:48 -05:00
Andrew
4923d1b51f Update bug_report.md
Forgot an exclamation mark
2021-04-13 11:20:46 -05:00
Andrew
097e332015 Create config.yml
Does two things:
1. Creates a link to the USAGE page. 
2. Removes "Open Blank Issue" option on the New Issue page.
2021-04-13 11:10:08 -05:00
Andrew
d4babe216b Update bug_report.md
Added a bug to the title just because. Not necessary, just wanted to see how it looks.
2021-04-13 10:58:32 -05:00
Andrew
44511a2202 Create bug_report.md
Beginnings of a Bug Report template for #151
2021-04-13 10:49:43 -05:00
50771bd1dc Read subtitles from media files, fix bug in text_buffer 2021-03-26 19:48:16 -04:00
bc884e137c Change encoding for antiword PDF 2021-01-16 12:17:43 -05:00
ce1e241dea Workaround for UTF8 .doc files 2021-01-16 12:13:56 -05:00
5fe9c9efa3 Tweak CI settings 2021-01-16 11:14:18 -05:00
75e4e93ddd Enable docker image builds 2021-01-16 10:57:55 -05:00
38 changed files with 719 additions and 357 deletions

25
.dockerignore Normal file
View File

@@ -0,0 +1,25 @@
.idea
*/thumbs
*.cbp
CMakeCache.txt
CMakeFiles
cmake-build-debug
cmake_install.cmake
Makefile
*.out
LOG
sist2*
index.sist2/
bundle*.css
bundle.js
**/*.a
**/vgcore.*
build/
.git/
third-party/libscan/libscan-test-files/
**/ext_ffmpeg
**/ext_libmobi
**/scan_a_test
Dockerfile
*.idx/
VERSION

View File

@@ -8,9 +8,24 @@ platform:
steps: steps:
- name: build - name: build
image: simon987/ubuntu_ci image: simon987/sist2-build
commands: commands:
- ./ci/build.sh - ./ci/build.sh
- name: docker
image: plugins/docker
settings:
username:
from_secret: DOCKER_USER
password:
from_secret: DOCKER_PASSWORD
repo: simon987/sist2
context: ./
dockerfile: ./Dockerfile
auto_tag: true
auto_tag_suffix: x64-linux
when:
event:
- tag
- name: scp files - name: scp files
image: appleboy/drone-scp image: appleboy/drone-scp
settings: settings:
@@ -24,8 +39,9 @@ steps:
from_secret: SSH_KEY from_secret: SSH_KEY
target: /files/sist2/${DRONE_REPO_OWNER}_${DRONE_REPO_NAME}/${DRONE_BRANCH}_${DRONE_BUILD_NUMBER}_${DRONE_COMMIT}/ target: /files/sist2/${DRONE_REPO_OWNER}_${DRONE_REPO_NAME}/${DRONE_BRANCH}_${DRONE_BUILD_NUMBER}_${DRONE_COMMIT}/
source: source:
- ./VERSION
- ./sist2-x64-linux - ./sist2-x64-linux
- ./sist2-x64-linux-debug.tar.gz - ./sist2-x64-linux-debug
--- ---
kind: pipeline kind: pipeline
@@ -37,7 +53,7 @@ platform:
steps: steps:
- name: build - name: build
image: simon987/ubuntu_ci_arm image: simon987/sist2-build-arm64
commands: commands:
- ./ci/build_arm64.sh - ./ci/build_arm64.sh
- name: scp files - name: scp files
@@ -51,6 +67,6 @@ steps:
from_secret: SSH_USER from_secret: SSH_USER
key: key:
from_secret: SSH_KEY from_secret: SSH_KEY
target: /files/sist2/${DRONE_REPO_OWNER}_${DRONE_REPO_NAME}/${DRONE_BRANCH}_${DRONE_BUILD_NUMBER}_${DRONE_COMMIT}/ target: /files/sist2/${DRONE_REPO_OWNER}_${DRONE_REPO_NAME}/arm_${DRONE_BRANCH}_${DRONE_BUILD_NUMBER}_${DRONE_COMMIT}/
source: source:
- ./sist2-arm64-linux - ./sist2-arm64-linux

40
.github/ISSUE_TEMPLATE/bug_report.md vendored Normal file
View File

@@ -0,0 +1,40 @@
---
name: "🐞 Bug Report"
about: Submit a bug report
title: ''
labels: bug
assignees: ''
---
**Device Information (please complete the following information):**
- OS: `[e.g., Ubuntu 20.04, WSL2]`
- Deployment: `[Linux, Linux ARM64 or Docker]`
- Browser *(if relevant)*: `[e.g., chrome, safari]`
- SIST2 Version: `[e.g., v2.9.0]`
- Elasticsearch Version *(if relevant)* : ``
**Command with arguments**
<!-- `ex: "scan ~/Documents -o ./i2 --threads 3 -q 1.0` -->
**Describe the bug**
<!-- A clear and concise description of what the bug is. -->
**Steps To Reproduce**
Please be specific!
1. Go to '...'
2. Click on '....'
3. etc.
**Expected behavior**
<!-- A clear and concise description of what you expected to happen. -->
**Actual Behavior**
<!-- A clear and concise description of what actually happens. -->
**Screenshots**
<!-- If applicable, add screenshots to help explain your problem. -->
**Additional context**
<!-- Add any other context about the problem here. If applicable, please include why you think the bug is occurring and/or troubleshooting you have already performed. -->
<!-- If the issue is related to the `scan` module, please attach the files necessary to reproduce the error or email them to me[at]simon987.net. -->

5
.github/ISSUE_TEMPLATE/config.yml vendored Normal file
View File

@@ -0,0 +1,5 @@
blank_issues_enabled: false
contact_links:
- name: SIST2 Documentation
url: https://github.com/simon987/sist2/blob/master/docs/USAGE.md
about: Check out the SIST2 documentation for answers to common questions

View File

@@ -0,0 +1,18 @@
---
name: "🚀 Feature Request"
about: Suggest an idea for SIST2
title: ''
assignees: ''
---
**Which SIST2 component is your Feature Request related to?**
<!-- e.g., Scan, Index, or Web? -->
**Is your feature request related to a problem? Please describe.**
<!-- A clear and concise description of what the problem is. e.g., "I'm always frustrated when [...]" -->
**What would you like to see happen?**
<!-- A clear and concise description of what you want to happen. -->
**Additional context**
<!-- Add any other context or screenshots about the feature request here. -->

2
.gitignore vendored
View File

@@ -16,3 +16,5 @@ bundle.js
vgcore.* vgcore.*
build/ build/
third-party/ third-party/
*.idx/
VERSION

View File

@@ -5,7 +5,7 @@ project(sist2 C)
option(SIST_DEBUG "Build a debug executable" on) option(SIST_DEBUG "Build a debug executable" on)
set(BUILD_TESTS off) set(BUILD_TESTS on)
add_subdirectory(third-party/libscan) add_subdirectory(third-party/libscan)
set(ARGPARSE_SHARED off) set(ARGPARSE_SHARED off)
add_subdirectory(third-party/argparse) add_subdirectory(third-party/argparse)
@@ -36,14 +36,15 @@ add_executable(sist2
target_link_directories(sist2 PRIVATE BEFORE ${_VCPKG_INSTALLED_DIR}/${VCPKG_TARGET_TRIPLET}/lib/) target_link_directories(sist2 PRIVATE BEFORE ${_VCPKG_INSTALLED_DIR}/${VCPKG_TARGET_TRIPLET}/lib/)
set(CMAKE_FIND_LIBRARY_SUFFIXES .a .lib) set(CMAKE_FIND_LIBRARY_SUFFIXES .a .lib)
find_package(PkgConfig REQUIRED)
pkg_search_module(GLIB REQUIRED glib-2.0)
find_package(lmdb CONFIG REQUIRED) find_package(lmdb CONFIG REQUIRED)
find_package(cJSON CONFIG REQUIRED) find_package(cJSON CONFIG REQUIRED)
find_package(unofficial-glib CONFIG REQUIRED)
find_package(unofficial-mongoose CONFIG REQUIRED) find_package(unofficial-mongoose CONFIG REQUIRED)
find_package(CURL CONFIG REQUIRED) find_package(CURL CONFIG REQUIRED)
#find_package(OpenSSL REQUIRED)
target_include_directories( target_include_directories(
sist2 PUBLIC sist2 PUBLIC
@@ -51,6 +52,7 @@ target_include_directories(
${CMAKE_SOURCE_DIR}/third-party/utf8.h/ ${CMAKE_SOURCE_DIR}/third-party/utf8.h/
${CMAKE_SOURCE_DIR}/third-party/libscan/ ${CMAKE_SOURCE_DIR}/third-party/libscan/
${CMAKE_SOURCE_DIR}/ ${CMAKE_SOURCE_DIR}/
${GLIB_INCLUDE_DIRS}
) )
target_compile_options( target_compile_options(
@@ -103,7 +105,7 @@ target_link_libraries(
lmdb lmdb
cjson cjson
argparse argparse
unofficial::glib::glib ${GLIB_LDFLAGS}
unofficial::mongoose::mongoose unofficial::mongoose::mongoose
CURL::libcurl CURL::libcurl

View File

@@ -1,14 +0,0 @@
rm ./sist2 sist2_debug
cp ../sist2.gz .
gzip -d sist2.gz
strip sist2
version=$(./sist2 --version)
echo "Version ${version}"
docker build . -t simon987/sist2:${version} -t simon987/sist2:latest
docker push simon987/sist2:${version}
docker push simon987/sist2:latest
docker run --rm simon987/sist2 -v

View File

@@ -1,13 +0,0 @@
rm ./sist2_arm64
cp ../sist2_arm64.gz .
gzip -d sist2_arm64.gz
version=$(./sist2_arm64 --version)
echo "Version ${version}"
docker build . -t simon987/sist2-arm64:"${version}" -t simon987/sist2-arm64:latest
docker push simon987/sist2-arm64:"${version}"
docker push simon987/sist2-arm64:latest
docker run --rm simon987/sist2-arm64 -v

View File

@@ -1,9 +1,15 @@
FROM ubuntu:19.10 FROM simon987/sist2-build as build
MAINTAINER simon987 <me@simon987.net> MAINTAINER simon987 <me@simon987.net>
RUN apt update WORKDIR /build/
RUN apt install -y libglib2.0-0 libcurl4 libmagic1 libharfbuzz-bin libopenjp2-7 libarchive13 liblzma5 libzstd1 liblz4-1 \ ADD . /build/
curl libtiff5 libpng16-16 libpcre3 RUN cmake -DSIST_DEBUG=off -DBUILD_TESTS=off -DCMAKE_TOOLCHAIN_FILE=/vcpkg/scripts/buildsystems/vcpkg.cmake .
RUN make -j$(nproc)
RUN strip sist2
FROM ubuntu:20.10
RUN apt update && apt install -y curl
RUN mkdir -p /usr/share/tessdata && \ RUN mkdir -p /usr/share/tessdata && \
cd /usr/share/tessdata/ && \ cd /usr/share/tessdata/ && \
@@ -12,9 +18,9 @@ RUN mkdir -p /usr/share/tessdata && \
curl -o /usr/share/tessdata/eng.traineddata https://raw.githubusercontent.com/tesseract-ocr/tessdata/master/eng.traineddata &&\ curl -o /usr/share/tessdata/eng.traineddata https://raw.githubusercontent.com/tesseract-ocr/tessdata/master/eng.traineddata &&\
curl -o /usr/share/tessdata/fra.traineddata https://raw.githubusercontent.com/tesseract-ocr/tessdata/master/fra.traineddata &&\ curl -o /usr/share/tessdata/fra.traineddata https://raw.githubusercontent.com/tesseract-ocr/tessdata/master/fra.traineddata &&\
curl -o /usr/share/tessdata/rus.traineddata https://raw.githubusercontent.com/tesseract-ocr/tessdata/master/rus.traineddata &&\ curl -o /usr/share/tessdata/rus.traineddata https://raw.githubusercontent.com/tesseract-ocr/tessdata/master/rus.traineddata &&\
curl -o /usr/share/tessdata/spa.traineddata https://raw.githubusercontent.com/tesseract-ocr/tessdata/master/spa.traineddata && ls -lh curl -o /usr/share/tessdata/spa.traineddata https://raw.githubusercontent.com/tesseract-ocr/tessdata/master/spa.traineddata
ADD sist2 /root/sist2 COPY --from=build /build/sist2 /root/sist2
ENV LANG C.UTF-8 ENV LANG C.UTF-8
ENV LC_ALL C.UTF-8 ENV LC_ALL C.UTF-8

View File

@@ -1,9 +1,15 @@
FROM ubuntu:19.10 FROM simon987/sist2-build-arm64 as build
MAINTAINER simon987 <me@simon987.net> MAINTAINER simon987 <me@simon987.net>
RUN apt update WORKDIR /build/
RUN apt install -y libglib2.0-0 libcurl4 libmagic1 libharfbuzz-bin libopenjp2-7 libarchive13 liblzma5 libzstd1 liblz4-1 \ ADD . /build/
curl libtiff5 libpng16-16 libpcre3 RUN cmake -DSIST_DEBUG=off -DBUILD_TESTS=off -DCMAKE_TOOLCHAIN_FILE=/vcpkg/scripts/buildsystems/vcpkg.cmake .
RUN make -j$(nproc)
RUN strip sist2
FROM ubuntu:20.10
RUN apt update && apt install -y curl
RUN mkdir -p /usr/share/tessdata && \ RUN mkdir -p /usr/share/tessdata && \
cd /usr/share/tessdata/ && \ cd /usr/share/tessdata/ && \
@@ -12,11 +18,11 @@ RUN mkdir -p /usr/share/tessdata && \
curl -o /usr/share/tessdata/eng.traineddata https://raw.githubusercontent.com/tesseract-ocr/tessdata/master/eng.traineddata &&\ curl -o /usr/share/tessdata/eng.traineddata https://raw.githubusercontent.com/tesseract-ocr/tessdata/master/eng.traineddata &&\
curl -o /usr/share/tessdata/fra.traineddata https://raw.githubusercontent.com/tesseract-ocr/tessdata/master/fra.traineddata &&\ curl -o /usr/share/tessdata/fra.traineddata https://raw.githubusercontent.com/tesseract-ocr/tessdata/master/fra.traineddata &&\
curl -o /usr/share/tessdata/rus.traineddata https://raw.githubusercontent.com/tesseract-ocr/tessdata/master/rus.traineddata &&\ curl -o /usr/share/tessdata/rus.traineddata https://raw.githubusercontent.com/tesseract-ocr/tessdata/master/rus.traineddata &&\
curl -o /usr/share/tessdata/spa.traineddata https://raw.githubusercontent.com/tesseract-ocr/tessdata/master/spa.traineddata && ls -lh curl -o /usr/share/tessdata/spa.traineddata https://raw.githubusercontent.com/tesseract-ocr/tessdata/master/spa.traineddata
ADD sist2_arm64 /root/sist2 COPY --from=build /build/sist2 /root/sist2
ENV LANG C.UTF-8 ENV LANG C.UTF-8
ENV LC_ALL C.UTF-8 ENV LC_ALL C.UTF-8
ENTRYPOINT ["/root/sist2"] ENTRYPOINT ["/root/sist2"]

View File

@@ -1,6 +1,6 @@
![GitHub](https://img.shields.io/github/license/simon987/sist2.svg) ![GitHub](https://img.shields.io/github/license/simon987/sist2.svg)
[![CodeFactor](https://www.codefactor.io/repository/github/simon987/sist2/badge?s=05daa325188aac4eae32c786f3d9cf4e0593f822)](https://www.codefactor.io/repository/github/simon987/sist2) [![CodeFactor](https://www.codefactor.io/repository/github/simon987/sist2/badge?s=05daa325188aac4eae32c786f3d9cf4e0593f822)](https://www.codefactor.io/repository/github/simon987/sist2)
[![Development snapshots](https://ci.simon987.net/api/badges/simon987/sist2/status.svg)](https://files.simon987.net/sist2/simon987_sist2/) [![Development snapshots](https://ci.simon987.net/api/badges/simon987/sist2/status.svg)](https://files.simon987.net/.gate/sist2/simon987_sist2/)
**Demo**: [sist2.simon987.net](https://sist2.simon987.net/?i=Demo%20files) **Demo**: [sist2.simon987.net](https://sist2.simon987.net/?i=Demo%20files)
@@ -25,14 +25,12 @@ sist2 (Simple incremental search tool)
* OCR support with tesseract \*\*\* * OCR support with tesseract \*\*\*
* Stats page & disk utilisation visualization * Stats page & disk utilisation visualization
\* See [format support](#format-support) \* See [format support](#format-support)
\*\* See [Archive files](#archive-files) \*\* See [Archive files](#archive-files)
\*\*\* See [OCR](#ocr) \*\*\* See [OCR](#ocr)
![stats](docs/stats.png) ![stats](docs/stats.png)
## Getting Started ## Getting Started
1. Have an Elasticsearch (>= 6.X.X) instance running 1. Have an Elasticsearch (>= 6.X.X) instance running
@@ -52,14 +50,12 @@ sist2 (Simple incremental search tool)
``` ```
1. Download sist2 executable 1. Download sist2 executable
1. Download the [latest sist2 release](https://github.com/simon987/sist2/releases) * 1. Download the [latest sist2 release](https://github.com/simon987/sist2/releases) *
1. *(or)* Download a [development snapshot](https://files.simon987.net/sist2/simon987_sist2/) *(Not recommended!)* 1. *(or)* Download a [development snapshot](https://files.simon987.net/.gate/sist2/simon987_sist2/) *(Not recommended!)*
1. *(or)* `docker pull simon987/sist2:latest` 1. *(or)* `docker pull simon987/sist2:2.10.3-x64-linux`
1. See [Usage guide](docs/USAGE.md) 1. See [Usage guide](docs/USAGE.md)
\* *Windows users*: **sist2** runs under [WSL](https://en.wikipedia.org/wiki/Windows_Subsystem_for_Linux)
\* *Windows users*: **sist2** runs under [WSL](https://en.wikipedia.org/wiki/Windows_Subsystem_for_Linux)
## Example usage ## Example usage
@@ -69,17 +65,16 @@ See [Usage guide](docs/USAGE.md) for more details
1. Push index to Elasticsearch: `sist2 index ./docs_idx` 1. Push index to Elasticsearch: `sist2 index ./docs_idx`
1. Start web interface: `sist2 web ./docs_idx` 1. Start web interface: `sist2 web ./docs_idx`
## Format support ## Format support
File type | Library | Content | Thumbnail | Metadata File type | Library | Content | Thumbnail | Metadata
:---|:---|:---|:---|:--- :---|:---|:---|:---|:---
pdf,xps,fb2,epub | MuPDF | text+ocr | yes | author, title | pdf,xps,fb2,epub | MuPDF | text+ocr | yes | author, title |
cbz,cbr | *(none)* | - | yes | - | cbz,cbr | *(none)* | - | yes | - |
`audio/*` | ffmpeg | - | yes | ID3 tags | `audio/*` | ffmpeg | - | yes | ID3 tags |
`video/*` | ffmpeg | - | yes | title, comment, artist | `video/*` | ffmpeg | - | yes | title, comment, artist |
`image/*` | ffmpeg | - | yes | [Common EXIF tags](https://github.com/simon987/sist2/blob/efdde2734eca9b14a54f84568863b7ffd59bdba3/src/parsing/media.c#L190) | `image/*` | ffmpeg | - | yes | [Common EXIF tags](https://github.com/simon987/sist2/blob/efdde2734eca9b14a54f84568863b7ffd59bdba3/src/parsing/media.c#L190), GPS tags |
raw, rw2, dng, cr2, crw, dcr, k25, kdc, mrw, pef, xf3, arw, sr2, srf, erf | LibRaw | - | yes | Common EXIF tags | raw, rw2, dng, cr2, crw, dcr, k25, kdc, mrw, pef, xf3, arw, sr2, srf, erf | LibRaw | - | yes | Common EXIF tags, GPS tags |
ttf,ttc,cff,woff,fnt,otf | Freetype2 | - | yes, `bmp` | Name & style | ttf,ttc,cff,woff,fnt,otf | Freetype2 | - | yes, `bmp` | Name & style |
`text/plain` | *(none)* | yes | no | - | `text/plain` | *(none)* | yes | no | - |
html, xml | *(none)* | yes | no | - | html, xml | *(none)* | yes | no | - |
@@ -89,48 +84,67 @@ doc (MS Word 97-2003) | antiword | yes | yes | author, title |
mobi, azw, azw3 | libmobi | yes | no | author, title | mobi, azw, azw3 | libmobi | yes | no | author, title |
\* *See [Archive files](#archive-files)* \* *See [Archive files](#archive-files)*
### Archive files ### Archive files
**sist2** will scan files stored into archive files (zip, tar, 7z...) as if
they were directly in the file system. Recursive (archives inside archives) **sist2** will scan files stored into archive files (zip, tar, 7z...) as if they were directly in the file system.
Recursive (archives inside archives)
scan is also supported. scan is also supported.
**Limitations**: **Limitations**:
* Support for parsing media files with formats that require *seek* (e.g. `.gif`, `.mp4` w/ fragmented metadata etc.)
* Support for parsing media files with formats that require *seek* (e.g. `.gif`, `.mp4` w/ fragmented metadata etc.)
is limitted (see `--mem-buffer` option) is limitted (see `--mem-buffer` option)
* Archive files are scanned sequentially, by a single thread. On systems where * Archive files are scanned sequentially, by a single thread. On systems where
**sist2** is not I/O bound, scans might be faster when larger archives are split **sist2** is not I/O bound, scans might be faster when larger archives are split into smaller parts.
into smaller parts.
### OCR ### OCR
You can enable OCR support for pdf,xps,fb2,epub file types with the You can enable OCR support for pdf,xps,fb2,epub file types with the
`--ocr <lang>` option. Download the language data files with your `--ocr <lang>` option. Download the language data files with your package manager (`apt install tesseract-ocr-eng`) or
package manager (`apt install tesseract-ocr-eng`) or directly [from Github](https://github.com/tesseract-ocr/tesseract/wiki/Data-Files). directly [from Github](https://github.com/tesseract-ocr/tesseract/wiki/Data-Files).
The `simon987/sist2` image comes with common languages The `simon987/sist2` image comes with common languages
(hin, jpn, eng, fra, rus, spa) pre-installed. (hin, jpn, eng, fra, rus, spa) pre-installed.
Examples Examples
```bash ```bash
sist2 scan --ocr jpn ~/Books/Manga/ sist2 scan --ocr jpn ~/Books/Manga/
sist2 scan --ocr eng ~/Books/Textbooks/ sist2 scan --ocr eng ~/Books/Textbooks/
``` ```
## Build from source ## Build from source
You can compile **sist2** by yourself if you don't want to use the pre-compiled You can compile **sist2** by yourself if you don't want to use the pre-compiled binaries
binaries (GCC 7+ required).
### With docker (recommended)
```bash
git clone --recursive https://github.com/simon987/sist2/
cd sist2
docker build . -f ./Dockerfile -t my-sist2-image
docker run --rm my-sist2-image cat /root/sist2 > sist2-x64-linux
```
### On a linux computer
1. Install compile-time dependencies 1. Install compile-time dependencies
```bash ```bash
vcpkg install lmdb cjson glib libarchive[core,bzip2,libxml2,lz4,lzma,lzo] pthread tesseract libxml2 ffmpeg zstd gtest mongoose libmagic libraw curl[core,ssl] jbig2dec brotli libmupdf apt install gcc g++ python3 yasm ragel automake autotools-dev wget libtool libssl-dev curl zip unzip tar xorg-dev libglu1-mesa-dev libxcursor-dev libxml2-dev libxinerama-dev gettext nasm git
``` ```
1. Apply vcpkg patches, as per [sist2-build](https://github.com/simon987/sist2-build) Dockerfile
2. Build 1. Install vcpkg dependencies
```bash
vcpkg install curl[core,openssl]
vcpkg install lmdb cjson glib brotli libarchive[core,bzip2,libxml2,lz4,lzma,lzo] pthread tesseract libxml2 libmupdf gtest mongoose libuuid libmagic libraw jasper lcms gumbo
```
1. Build
```bash ```bash
git clone --recursive https://github.com/simon987/sist2/ git clone --recursive https://github.com/simon987/sist2/
cmake -DSIST_DEBUG=off -DCMAKE_TOOLCHAIN_FILE=<VCPKG_ROOT>/scripts/buildsystems/vcpkg.cmake . cmake -DSIST_DEBUG=off -DCMAKE_TOOLCHAIN_FILE=<VCPKG_ROOT>/scripts/buildsystems/vcpkg.cmake .

View File

@@ -2,19 +2,18 @@
VCPKG_ROOT="/vcpkg" VCPKG_ROOT="/vcpkg"
rm *.gz rm *.gz &>/dev/null
git submodule update --init --recursive git submodule update --init --recursive
rm -rf CMakeFiles CMakeCache.txt rm -rf CMakeFiles CMakeCache.txt
cmake -DSIST_DEBUG=off -DCMAKE_TOOLCHAIN_FILE="${VCPKG_ROOT}/scripts/buildsystems/vcpkg.cmake" . cmake -DSIST_DEBUG=off -DCMAKE_TOOLCHAIN_FILE="${VCPKG_ROOT}/scripts/buildsystems/vcpkg.cmake" .
make -j 33 make -j $(nproc)
strip sist2 strip sist2
./sist2 -v > VERSION
mv sist2 sist2-x64-linux mv sist2 sist2-x64-linux
rm -rf CMakeFiles CMakeCache.txt rm -rf CMakeFiles CMakeCache.txt
cmake -DSIST_DEBUG=on -DCMAKE_TOOLCHAIN_FILE="${VCPKG_ROOT}/scripts/buildsystems/vcpkg.cmake" . cmake -DSIST_DEBUG=on -DCMAKE_TOOLCHAIN_FILE="${VCPKG_ROOT}/scripts/buildsystems/vcpkg.cmake" .
make -j 33 make -j $(nproc)
cp /usr/lib/x86_64-linux-gnu/libasan.so.2.0.0 libasan.so.2 mv sist2_debug sist2-x64-linux-debug
mv sist2_debug sist2-x64-linux-debug
tar -czf sist2-x64-linux-debug.tar.gz sist2-x64-linux-debug libasan.so.2

View File

@@ -2,12 +2,12 @@
VCPKG_ROOT="/vcpkg" VCPKG_ROOT="/vcpkg"
rm *.gz rm *.gz &>/dev/null
git submodule update --init --recursive git submodule update --init --recursive
rm -rf CMakeFiles CMakeCache.txt rm -rf CMakeFiles CMakeCache.txt
cmake -DSIST_DEBUG=off -DCMAKE_TOOLCHAIN_FILE="${VCPKG_ROOT}/scripts/buildsystems/vcpkg.cmake" . cmake -DSIST_DEBUG=off -DCMAKE_TOOLCHAIN_FILE="${VCPKG_ROOT}/scripts/buildsystems/vcpkg.cmake" .
make -j 4 make -j $(nproc)
strip sist2 strip sist2
mv sist2 sist2-arm64-linux mv sist2 sist2-arm64-linux

View File

@@ -46,6 +46,7 @@ Scan options
--fast Only index file names & mime type --fast Only index file names & mime type
--treemap-threshold=<str> Relative size threshold for treemap (see USAGE.md). DEFAULT: 0.0005 --treemap-threshold=<str> Relative size threshold for treemap (see USAGE.md). DEFAULT: 0.0005
--mem-buffer=<int> Maximum memory buffer size per thread in MB for files inside archives (see USAGE.md). DEFAULT: 2000 --mem-buffer=<int> Maximum memory buffer size per thread in MB for files inside archives (see USAGE.md). DEFAULT: 2000
--read-subtitles Read subtitles from media files
Index options Index options
-t, --threads=<int> Number of threads. DEFAULT=1 -t, --threads=<int> Number of threads. DEFAULT=1
@@ -91,7 +92,7 @@ Made by simon987 <me@simon987.net>. Released under GPL-3.0
Specify an existing index. Information about files in this index that were not modified (based on *mtime* attribute) Specify an existing index. Information about files in this index that were not modified (based on *mtime* attribute)
will be copied to the new index and will not be parsed again. will be copied to the new index and will not be parsed again.
* `-o, --output` Output directory. * `-o, --output` Output directory.
* `--rewrite-url` Set the `rewrite_url` option for the web module (See [rewrite_url](#rewrite_url)) * `--rewrite-url` Set the `rewrite_url` option for the web module (See [rewrite_url](#rewrite_url))
* `--name` Set the `name` option for the web module * `--name` Set the `name` option for the web module
* `--depth` Maximum scan dept. Set to 0 only scan files directly in the root directory, set to -1 for infinite depth * `--depth` Maximum scan dept. Set to 0 only scan files directly in the root directory, set to -1 for infinite depth
* `--archive` Archive file mode. * `--archive` Archive file mode.
@@ -123,6 +124,7 @@ Made by simon987 <me@simon987.net>. Released under GPL-3.0
larger than this number will be read sequentially and no *seek* operations will be supported. larger than this number will be read sequentially and no *seek* operations will be supported.
To check if a media file can be parsed without *seek*, execute `cat file.mp4 | ffprobe -` To check if a media file can be parsed without *seek*, execute `cat file.mp4 | ffprobe -`
* `--read-subtitles` When enabled, will attempt to read the subtitles stream from media files.
### Scan examples ### Scan examples
@@ -355,8 +357,7 @@ You can safely copy the `/tags/` database to another index.
See [Automatic tagging](#automatic-tagging) for information about tag See [Automatic tagging](#automatic-tagging) for information about tag
hierarchies and tag colors. hierarchies and tag colors.
\* *It can take a few seconds to take effect in new search queries, and the page needs \* *It can take a few seconds to take effect in new search queries.*
to be reloaded for the tags tab to update*
### Automatic tagging ### Automatic tagging

View File

@@ -105,10 +105,10 @@
"analyzer": "my_nGram", "analyzer": "my_nGram",
"type": "text" "type": "text"
}, },
"_keyword.*": { "_keyword.*": {
"type": "keyword" "type": "keyword"
}, },
"_text.*": { "_text.*": {
"analyzer": "content_analyzer", "analyzer": "content_analyzer",
"type": "text", "type": "text",
"fields": { "fields": {
@@ -165,6 +165,30 @@
"exif_user_comment": { "exif_user_comment": {
"type": "text" "type": "text"
}, },
"exif_gps_longitude_ref": {
"type": "keyword",
"index": false
},
"exif_gps_longitude_dms": {
"type": "keyword",
"index": false
},
"exif_gps_longitude_dec": {
"type": "keyword",
"index": false
},
"exif_gps_latitude_ref": {
"type": "keyword",
"index": false
},
"exif_gps_latitude_dms": {
"type": "keyword",
"index": false
},
"exif_gps_latitude_dec": {
"type": "keyword",
"index": false
},
"author": { "author": {
"type": "text" "type": "text"
}, },

6
scripts/reset.sh Executable file
View File

@@ -0,0 +1,6 @@
#!/usr/bin/env bash
make clean
rm -rf CMakeFiles/ CMakeCache.txt Makefile \
third-party/libscan/CMakeFiles third-party/libscan/CMakeCache.txt third-party/libscan/third-party/ext_ffmpeg \
third-party/libscan/third-party/ext_libmobi third-party/libscan/Makefile

View File

@@ -227,10 +227,12 @@ int scan_args_validate(scan_args_t *args, int argc, const char **argv) {
LOG_DEBUGF("cli.c", "arg depth=%d", args->depth) LOG_DEBUGF("cli.c", "arg depth=%d", args->depth)
LOG_DEBUGF("cli.c", "arg path=%s", args->path) LOG_DEBUGF("cli.c", "arg path=%s", args->path)
LOG_DEBUGF("cli.c", "arg archive=%s", args->archive) LOG_DEBUGF("cli.c", "arg archive=%s", args->archive)
LOG_DEBUGF("cli.c", "arg archive_passphrase=%s", args->archive_passphrase)
LOG_DEBUGF("cli.c", "arg tesseract_lang=%s", args->tesseract_lang) LOG_DEBUGF("cli.c", "arg tesseract_lang=%s", args->tesseract_lang)
LOG_DEBUGF("cli.c", "arg tesseract_path=%s", args->tesseract_path) LOG_DEBUGF("cli.c", "arg tesseract_path=%s", args->tesseract_path)
LOG_DEBUGF("cli.c", "arg exclude=%s", args->exclude_regex) LOG_DEBUGF("cli.c", "arg exclude=%s", args->exclude_regex)
LOG_DEBUGF("cli.c", "arg fast=%d", args->fast) LOG_DEBUGF("cli.c", "arg fast=%d", args->fast)
LOG_DEBUGF("cli.c", "arg fast_epub=%d", args->fast_epub)
LOG_DEBUGF("cli.c", "arg treemap_threshold=%f", args->treemap_threshold) LOG_DEBUGF("cli.c", "arg treemap_threshold=%f", args->treemap_threshold)
LOG_DEBUGF("cli.c", "arg max_memory_buffer=%d", args->max_memory_buffer) LOG_DEBUGF("cli.c", "arg max_memory_buffer=%d", args->max_memory_buffer)

View File

@@ -18,6 +18,7 @@ typedef struct scan_args {
char *path; char *path;
char *archive; char *archive;
archive_mode_t archive_mode; archive_mode_t archive_mode;
char *archive_passphrase;
char *tesseract_lang; char *tesseract_lang;
const char *tesseract_path; const char *tesseract_path;
char *exclude_regex; char *exclude_regex;
@@ -25,6 +26,8 @@ typedef struct scan_args {
const char* treemap_threshold_str; const char* treemap_threshold_str;
double treemap_threshold; double treemap_threshold;
int max_memory_buffer; int max_memory_buffer;
int read_subtitles;
int fast_epub;
} scan_args_t; } scan_args_t;
scan_args_t *scan_args_create(); scan_args_t *scan_args_create();

View File

@@ -40,6 +40,14 @@ typedef struct {
pcre_extra *exclude_extra; pcre_extra *exclude_extra;
int fast; int fast;
GHashTable *dbg_current_files;
pthread_mutex_t dbg_current_files_mu;
int dbg_failed_files_count;
int dbg_skipped_files_count;
int dbg_excluded_files_count;
pthread_mutex_t dbg_file_counts_mu;
scan_arc_ctx_t arc_ctx; scan_arc_ctx_t arc_ctx;
scan_comic_ctx_t comic_ctx; scan_comic_ctx_t comic_ctx;
scan_ebook_ctx_t ebook_ctx; scan_ebook_ctx_t ebook_ctx;

File diff suppressed because one or more lines are too long

View File

@@ -15,9 +15,13 @@ typedef struct {
char has_parent; char has_parent;
} line_t; } line_t;
#define META_NEXT 0xFFFF
void skip_meta(FILE *file) { void skip_meta(FILE *file) {
enum metakey key = getc(file); enum metakey key = 0;
while (key != '\n') { fread(&key, sizeof(uint16_t), 1, file);
while (key != META_NEXT) {
if (IS_META_INT(key)) { if (IS_META_INT(key)) {
fseek(file, sizeof(int), SEEK_CUR); fseek(file, sizeof(int), SEEK_CUR);
} else if (IS_META_LONG(key)) { } else if (IS_META_LONG(key)) {
@@ -26,7 +30,7 @@ void skip_meta(FILE *file) {
while ((getc(file))) {} while ((getc(file))) {}
} }
key = getc(file); fread(&key, sizeof(uint16_t), 1, file);
} }
} }
@@ -66,7 +70,7 @@ index_descriptor_t read_index_descriptor(char *path) {
} }
char *buf = malloc(info.st_size + 1); char *buf = malloc(info.st_size + 1);
int ret = read(fd, buf, info.st_size); size_t ret = read(fd, buf, info.st_size);
if (ret == -1) { if (ret == -1) {
LOG_FATALF("serialize.c", "Could not read index descriptor: %s", strerror(errno)); LOG_FATALF("serialize.c", "Could not read index descriptor: %s", strerror(errno));
} }
@@ -152,8 +156,20 @@ char *get_meta_key_text(enum metakey meta_key) {
return "thumbnail"; return "thumbnail";
case MetaPages: case MetaPages:
return "pages"; return "pages";
case MetaExifGpsLongitudeRef:
return "exif_gps_longitude_ref";
case MetaExifGpsLongitudeDMS:
return "exif_gps_longitude_dms";
case MetaExifGpsLongitudeDec:
return "exif_gps_longitude_dec";
case MetaExifGpsLatitudeRef:
return "exif_gps_latitude_ref";
case MetaExifGpsLatitudeDMS:
return "exif_gps_latitude_dms";
case MetaExifGpsLatitudeDec:
return "exif_gps_latitude_dec";
default: default:
return NULL; LOG_FATALF("serialize.c", "FIXME: Unknown meta key: %d", meta_key)
} }
} }
@@ -183,7 +199,7 @@ void write_document(document_t *doc) {
meta_line_t *meta = doc->meta_head; meta_line_t *meta = doc->meta_head;
while (meta != NULL) { while (meta != NULL) {
dyn_buffer_write_char(&buf, meta->key); dyn_buffer_write_short(&buf, (uint16_t) meta->key);
if (IS_META_INT(meta->key)) { if (IS_META_INT(meta->key)) {
dyn_buffer_write_int(&buf, meta->int_val); dyn_buffer_write_int(&buf, meta->int_val);
@@ -197,7 +213,7 @@ void write_document(document_t *doc) {
meta = meta->next; meta = meta->next;
free(tmp); free(tmp);
} }
dyn_buffer_write_char(&buf, '\n'); dyn_buffer_write_short(&buf, META_NEXT);
int res = write(index_fd, buf.buf, buf.cur); int res = write(index_fd, buf.buf, buf.cur);
if (res == -1) { if (res == -1) {
@@ -221,7 +237,7 @@ void read_index_bin(const char *path, const char *index_id, index_func func) {
FILE *file = fopen(path, "rb"); FILE *file = fopen(path, "rb");
while (TRUE) { while (TRUE) {
buf.cur = 0; buf.cur = 0;
size_t _ = fread((void *) &line, 1, sizeof(line_t), file); size_t _ = fread((void *) &line, sizeof(line_t), 1, file);
if (feof(file)) { if (feof(file)) {
break; break;
} }
@@ -268,9 +284,10 @@ void read_index_bin(const char *path, const char *index_id, index_func func) {
cJSON_AddStringToObject(document, "path", ""); cJSON_AddStringToObject(document, "path", "");
} }
enum metakey key = getc(file); enum metakey key = 0;
size_t ret = 0; fread(&key, sizeof(uint16_t), 1, file);
while (key != '\n') { size_t ret;
while (key != META_NEXT) {
switch (key) { switch (key) {
case MetaPages: case MetaPages:
case MetaWidth: case MetaWidth:
@@ -308,6 +325,12 @@ void read_index_bin(const char *path, const char *index_id, index_func func) {
case MetaAuthor: case MetaAuthor:
case MetaModifiedBy: case MetaModifiedBy:
case MetaThumbnail: case MetaThumbnail:
case MetaExifGpsLongitudeDMS:
case MetaExifGpsLongitudeDec:
case MetaExifGpsLongitudeRef:
case MetaExifGpsLatitudeDMS:
case MetaExifGpsLatitudeDec:
case MetaExifGpsLatitudeRef:
case MetaTitle: { case MetaTitle: {
buf.cur = 0; buf.cur = 0;
while ((c = getc(file)) != 0) { while ((c = getc(file)) != 0) {
@@ -323,7 +346,7 @@ void read_index_bin(const char *path, const char *index_id, index_func func) {
LOG_FATALF("serialize.c", "Invalid meta key (corrupt index): %x", key) LOG_FATALF("serialize.c", "Invalid meta key (corrupt index): %x", key)
} }
key = getc(file); fread(&key, sizeof(uint16_t), 1, file);
} }
cJSON *meta_obj = NULL; cJSON *meta_obj = NULL;
@@ -458,7 +481,7 @@ void incremental_read(GHashTable *table, const char *filepath) {
incremental_put(table, line.path_md5, line.mtime); incremental_put(table, line.path_md5, line.mtime);
while ((getc(file))) {} while ((getc(file)) != 0) {}
skip_meta(file); skip_meta(file);
} }
fclose(file); fclose(file);
@@ -508,11 +531,11 @@ void incremental_copy(store_t *store, store_t *dst_store, const char *filepath,
free(buf); free(buf);
} }
enum metakey key; enum metakey key = 0;
while (1) { while (1) {
key = getc(file); fread(&key, sizeof(uint16_t), 1, file);
fwrite(&key, sizeof(char), 1, dst_file); fwrite(&key, sizeof(uint16_t), 1, dst_file);
if (key == '\n') { if (key == META_NEXT) {
break; break;
} }

View File

@@ -4,6 +4,7 @@
store_t *store_create(char *path, size_t chunk_size) { store_t *store_create(char *path, size_t chunk_size) {
store_t *store = malloc(sizeof(struct store_t)); store_t *store = malloc(sizeof(struct store_t));
#if (SIST_FAKE_STORE != 1)
store->chunk_size = chunk_size; store->chunk_size = chunk_size;
pthread_rwlock_init(&store->lock, NULL); pthread_rwlock_init(&store->lock, NULL);
@@ -28,15 +29,18 @@ store_t *store_create(char *path, size_t chunk_size) {
mdb_txn_begin(store->env, NULL, 0, &txn); mdb_txn_begin(store->env, NULL, 0, &txn);
mdb_dbi_open(txn, NULL, 0, &store->dbi); mdb_dbi_open(txn, NULL, 0, &store->dbi);
mdb_txn_commit(txn); mdb_txn_commit(txn);
#endif
return store; return store;
} }
void store_destroy(store_t *store) { void store_destroy(store_t *store) {
#if (SIST_FAKE_STORE != 1)
pthread_rwlock_destroy(&store->lock); pthread_rwlock_destroy(&store->lock);
mdb_close(store->env, store->dbi); mdb_close(store->env, store->dbi);
mdb_env_close(store->env); mdb_env_close(store->env);
#endif
free(store); free(store);
} }
@@ -56,6 +60,8 @@ void store_write(store_t *store, char *key, size_t key_len, char *buf, size_t bu
} }
} }
#if (SIST_FAKE_STORE != 1)
MDB_val mdb_key; MDB_val mdb_key;
mdb_key.mv_data = key; mdb_key.mv_data = key;
mdb_key.mv_size = key_len; mdb_key.mv_size = key_len;
@@ -92,10 +98,13 @@ void store_write(store_t *store, char *key, size_t key_len, char *buf, size_t bu
if (put_ret != 0) { if (put_ret != 0) {
LOG_ERROR("store.c", mdb_strerror(put_ret)) LOG_ERROR("store.c", mdb_strerror(put_ret))
} }
#endif
} }
char *store_read(store_t *store, char *key, size_t key_len, size_t *ret_vallen) { char *store_read(store_t *store, char *key, size_t key_len, size_t *ret_vallen) {
char *buf = NULL; char *buf = NULL;
#if (SIST_FAKE_STORE != 1)
MDB_val mdb_key; MDB_val mdb_key;
mdb_key.mv_data = key; mdb_key.mv_data = key;
mdb_key.mv_size = key_len; mdb_key.mv_size = key_len;
@@ -116,6 +125,7 @@ char *store_read(store_t *store, char *key, size_t key_len, size_t *ret_vallen)
} }
mdb_txn_abort(txn); mdb_txn_abort(txn);
#endif
return buf; return buf;
} }

View File

@@ -41,6 +41,10 @@ int handle_entry(const char *filepath, const struct stat *info, int typeflag, st
if (ScanCtx.exclude != NULL && EXCLUDED(filepath)) { if (ScanCtx.exclude != NULL && EXCLUDED(filepath)) {
LOG_DEBUGF("walk.c", "Excluded: %s", filepath) LOG_DEBUGF("walk.c", "Excluded: %s", filepath)
pthread_mutex_lock(&ScanCtx.dbg_file_counts_mu);
ScanCtx.dbg_excluded_files_count += 1;
pthread_mutex_unlock(&ScanCtx.dbg_file_counts_mu);
return 0; return 0;
} }
@@ -51,6 +55,8 @@ int handle_entry(const char *filepath, const struct stat *info, int typeflag, st
return 0; return 0;
} }
#define MAX_FILE_DESCRIPTORS 64
int walk_directory_tree(const char *dirpath) { int walk_directory_tree(const char *dirpath) {
return nftw(dirpath, handle_entry, 15, FTW_PHYS); return nftw(dirpath, handle_entry, MAX_FILE_DESCRIPTORS, FTW_PHYS | FTW_DEPTH);
} }

View File

@@ -21,7 +21,7 @@
#define EPILOG "Made by simon987 <me@simon987.net>. Released under GPL-3.0" #define EPILOG "Made by simon987 <me@simon987.net>. Released under GPL-3.0"
static const char *const Version = "2.9.0"; static const char *const Version = "2.10.3";
static const char *const usage[] = { static const char *const usage[] = {
"sist2 scan [OPTION]... PATH", "sist2 scan [OPTION]... PATH",
"sist2 index [OPTION]... INDEX", "sist2 index [OPTION]... INDEX",
@@ -30,6 +30,69 @@ static const char *const usage[] = {
NULL, NULL,
}; };
#include<signal.h>
#include<unistd.h>
static __sighandler_t sigsegv_handler = NULL;
static __sighandler_t sigabrt_handler = NULL;
void sig_handler(int signum) {
LogCtx.verbose = 1;
LogCtx.very_verbose = 1;
LOG_ERROR("*SIGNAL HANDLER*", "=============================================\n\n");
LOG_ERRORF("*SIGNAL HANDLER*", "Uh oh! Caught fatal signal: %s", strsignal(signum));
GHashTableIter iter;
g_hash_table_iter_init(&iter, ScanCtx.dbg_current_files);
void *key;
void *value;
while (g_hash_table_iter_next(&iter, &key, &value)) {
parse_job_t *job = value;
if (isatty(STDERR_FILENO)) {
LOG_DEBUGF(
"*SIGNAL HANDLER*",
"Thread \033[%dm[%04llX]\033[0m was working on job '%s'",
31 + ((unsigned int) key) % 7, key, job->filepath
);
} else {
LOG_DEBUGF(
"*SIGNAL HANDLER*",
"THREAD [%04llX] was working on job %s",
key, job->filepath
);
}
}
tpool_dump_debug_info(ScanCtx.pool);
LOG_INFO(
"*SIGNAL HANDLER*",
"Please consider creating a bug report at https://github.com/simon987/sist2/issues !"
)
LOG_INFO(
"*SIGNAL HANDLER*",
"sist2 is an open source project and relies on the collaboration of its users to diagnose and fix bugs"
)
#ifndef SIST_DEBUG
LOG_WARNING(
"*SIGNAL HANDLER*",
"You are running sist2 in release mode! Please consider downloading the debug binary from the Github "
"releases page to provide additionnal information when submitting a bug report."
)
#endif
if (signum == SIGSEGV && sigsegv_handler != NULL) {
sigsegv_handler(signum);
} else if (signum == SIGABRT && sigabrt_handler != NULL) {
sigabrt_handler(signum);
}
}
void init_dir(const char *dirpath) { void init_dir(const char *dirpath) {
char path[PATH_MAX]; char path[PATH_MAX];
snprintf(path, PATH_MAX, "%sdescriptor.json", dirpath); snprintf(path, PATH_MAX, "%sdescriptor.json", dirpath);
@@ -99,6 +162,16 @@ void initialize_scan_context(scan_args_t *args) {
ScanCtx.arc_ctx.log = _log; ScanCtx.arc_ctx.log = _log;
ScanCtx.arc_ctx.logf = _logf; ScanCtx.arc_ctx.logf = _logf;
ScanCtx.arc_ctx.parse = (parse_callback_t) parse; ScanCtx.arc_ctx.parse = (parse_callback_t) parse;
if (args->archive_passphrase != NULL) {
strcpy(ScanCtx.arc_ctx.passphrase, args->archive_passphrase);
} else {
ScanCtx.arc_ctx.passphrase[0] = 0;
}
ScanCtx.dbg_current_files = g_hash_table_new_full(g_int64_hash, g_int64_equal, NULL, NULL);
pthread_mutex_init(&ScanCtx.dbg_current_files_mu, NULL);
pthread_mutex_init(&ScanCtx.dbg_file_counts_mu, NULL);
// Comic // Comic
ScanCtx.comic_ctx.log = _log; ScanCtx.comic_ctx.log = _log;
@@ -118,6 +191,7 @@ void initialize_scan_context(scan_args_t *args) {
ScanCtx.ebook_ctx.log = _log; ScanCtx.ebook_ctx.log = _log;
ScanCtx.ebook_ctx.logf = _logf; ScanCtx.ebook_ctx.logf = _logf;
ScanCtx.ebook_ctx.store = _store; ScanCtx.ebook_ctx.store = _store;
ScanCtx.ebook_ctx.fast_epub_parse = args->fast_epub;
// Font // Font
ScanCtx.font_ctx.enable_tn = args->size > 0; ScanCtx.font_ctx.enable_tn = args->size > 0;
@@ -132,6 +206,7 @@ void initialize_scan_context(scan_args_t *args) {
ScanCtx.media_ctx.logf = _logf; ScanCtx.media_ctx.logf = _logf;
ScanCtx.media_ctx.store = _store; ScanCtx.media_ctx.store = _store;
ScanCtx.media_ctx.max_media_buffer = (long) args->max_memory_buffer * 1024 * 1024; ScanCtx.media_ctx.max_media_buffer = (long) args->max_memory_buffer * 1024 * 1024;
ScanCtx.media_ctx.read_subtitles = args->read_subtitles;
init_media(); init_media();
// OOXML // OOXML
@@ -177,6 +252,37 @@ void initialize_scan_context(scan_args_t *args) {
} }
void load_incremental_index(const scan_args_t *args) {
ScanCtx.original_table = incremental_get_table();
ScanCtx.copy_table = incremental_get_table();
DIR *dir = opendir(args->incremental);
if (dir == NULL) {
LOG_FATALF("main.c", "Could not open original index for incremental scan: %s", strerror(errno))
}
char descriptor_path[PATH_MAX];
snprintf(descriptor_path, PATH_MAX, "%s/descriptor.json", args->incremental);
index_descriptor_t original_desc = read_index_descriptor(descriptor_path);
if (strcmp(original_desc.version, Version) != 0) {
LOG_FATALF("main.c", "Version mismatch! Index is %s but executable is %s/%s", original_desc.version,
Version, INDEX_VERSION_EXTERNAL)
}
struct dirent *de;
while ((de = readdir(dir)) != NULL) {
if (strncmp(de->d_name, "_index_", sizeof("_index_") - 1) == 0) {
char file_path[PATH_MAX];
snprintf(file_path, PATH_MAX, "%s%s", args->incremental, de->d_name);
incremental_read(ScanCtx.original_table, file_path);
}
}
closedir(dir);
LOG_INFOF("main.c", "Loaded %d items in to mtime table.", g_hash_table_size(ScanCtx.original_table))
}
void sist2_scan(scan_args_t *args) { void sist2_scan(scan_args_t *args) {
ScanCtx.mime_table = mime_get_mime_table(); ScanCtx.mime_table = mime_get_mime_table();
@@ -198,42 +304,22 @@ void sist2_scan(scan_args_t *args) {
scan_print_header(); scan_print_header();
if (args->incremental != NULL) { if (args->incremental != NULL) {
ScanCtx.original_table = incremental_get_table(); load_incremental_index(args);
ScanCtx.copy_table = incremental_get_table();
DIR *dir = opendir(args->incremental);
if (dir == NULL) {
LOG_FATALF("main.c", "Could not open original index for incremental scan: %s", strerror(errno))
}
char descriptor_path[PATH_MAX];
snprintf(descriptor_path, PATH_MAX, "%s/descriptor.json", args->incremental);
index_descriptor_t original_desc = read_index_descriptor(descriptor_path);
if (strcmp(original_desc.version, Version) != 0) {
LOG_FATALF("main.c", "Version mismatch! Index is %s but executable is %s/%s", original_desc.version,
Version, INDEX_VERSION_EXTERNAL)
}
struct dirent *de;
while ((de = readdir(dir)) != NULL) {
if (strncmp(de->d_name, "_index_", sizeof("_index_") - 1) == 0) {
char file_path[PATH_MAX];
snprintf(file_path, PATH_MAX, "%s%s", args->incremental, de->d_name);
incremental_read(ScanCtx.original_table, file_path);
}
}
closedir(dir);
LOG_INFOF("main.c", "Loaded %d items in to mtime table.", g_hash_table_size(ScanCtx.original_table))
} }
ScanCtx.pool = tpool_create(args->threads, thread_cleanup, TRUE); ScanCtx.pool = tpool_create(args->threads, thread_cleanup, TRUE);
tpool_start(ScanCtx.pool); tpool_start(ScanCtx.pool);
walk_directory_tree(ScanCtx.index.desc.root); int walk_ret = walk_directory_tree(ScanCtx.index.desc.root);
if (walk_ret == -1) {
LOG_FATALF("main.c", "walk_directory_tree() failed! %s (%d)", strerror(errno), errno)
}
tpool_wait(ScanCtx.pool); tpool_wait(ScanCtx.pool);
tpool_destroy(ScanCtx.pool); tpool_destroy(ScanCtx.pool);
LOG_DEBUGF("main.c", "Skipped files: %d", ScanCtx.dbg_skipped_files_count)
LOG_DEBUGF("main.c", "Excluded files: %d", ScanCtx.dbg_excluded_files_count)
LOG_DEBUGF("main.c", "Failed files: %d", ScanCtx.dbg_failed_files_count)
if (args->incremental != NULL) { if (args->incremental != NULL) {
char dst_path[PATH_MAX]; char dst_path[PATH_MAX];
snprintf(store_path, PATH_MAX, "%sthumbs", args->incremental); snprintf(store_path, PATH_MAX, "%sthumbs", args->incremental);
@@ -355,6 +441,7 @@ void sist2_exec_script(exec_args_t *args) {
index_descriptor_t desc = read_index_descriptor(descriptor_path); index_descriptor_t desc = read_index_descriptor(descriptor_path);
IndexCtx.es_url = args->es_url; IndexCtx.es_url = args->es_url;
IndexCtx.es_index = args->es_index;
LOG_DEBUGF("main.c", "descriptor version %s (%s)", desc.version, desc.type) LOG_DEBUGF("main.c", "descriptor version %s (%s)", desc.version, desc.type)
@@ -399,6 +486,9 @@ void sist2_web(web_args_t *args) {
int main(int argc, const char *argv[]) { int main(int argc, const char *argv[]) {
sigsegv_handler = signal(SIGSEGV, sig_handler);
sigabrt_handler = signal(SIGABRT, sig_handler);
setlocale(LC_ALL, ""); setlocale(LC_ALL, "");
scan_args_t *scan_args = scan_args_create(); scan_args_t *scan_args = scan_args_create();
@@ -439,6 +529,9 @@ int main(int argc, const char *argv[]) {
OPT_STRING(0, "archive", &scan_args->archive, "Archive file mode (skip|list|shallow|recurse). " OPT_STRING(0, "archive", &scan_args->archive, "Archive file mode (skip|list|shallow|recurse). "
"skip: Don't parse, list: only get file names as text, " "skip: Don't parse, list: only get file names as text, "
"shallow: Don't parse archives inside archives. DEFAULT: recurse"), "shallow: Don't parse archives inside archives. DEFAULT: recurse"),
OPT_STRING(0, "archive-passphrase", &scan_args->archive_passphrase,
"Passphrase for encrypted archive files"),
OPT_STRING(0, "ocr", &scan_args->tesseract_lang, "Tesseract language (use tesseract --list-langs to see " OPT_STRING(0, "ocr", &scan_args->tesseract_lang, "Tesseract language (use tesseract --list-langs to see "
"which are installed on your machine)"), "which are installed on your machine)"),
OPT_STRING('e', "exclude", &scan_args->exclude_regex, "Files that match this regex will not be scanned"), OPT_STRING('e', "exclude", &scan_args->exclude_regex, "Files that match this regex will not be scanned"),
@@ -448,6 +541,8 @@ int main(int argc, const char *argv[]) {
OPT_INTEGER(0, "mem-buffer", &scan_args->max_memory_buffer, OPT_INTEGER(0, "mem-buffer", &scan_args->max_memory_buffer,
"Maximum memory buffer size per thread in MB for files inside archives " "Maximum memory buffer size per thread in MB for files inside archives "
"(see USAGE.md). DEFAULT: 2000"), "(see USAGE.md). DEFAULT: 2000"),
OPT_BOOLEAN(0, "read-subtitles", &scan_args->read_subtitles, "Read subtitles from media files."),
OPT_BOOLEAN(0, "fast-epub", &scan_args->fast_epub, "Faster but less accurate EPUB parsing (no thumbnails, metadata)"),
OPT_GROUP("Index options"), OPT_GROUP("Index options"),
OPT_INTEGER('t', "threads", &common_threads, "Number of threads. DEFAULT=1"), OPT_INTEGER('t', "threads", &common_threads, "Number of threads. DEFAULT=1"),

View File

@@ -41,11 +41,20 @@ void fs_reset(struct vfile *f) {
#define IS_GIT_OBJ (strlen(doc.filepath + doc.base) == 38 && (strstr(doc.filepath, "objects") != NULL)) #define IS_GIT_OBJ (strlen(doc.filepath + doc.base) == 38 && (strstr(doc.filepath, "objects") != NULL))
void set_dbg_current_file(parse_job_t *job) {
unsigned long long pid = (unsigned long long) pthread_self();
pthread_mutex_lock(&ScanCtx.dbg_current_files_mu);
g_hash_table_replace(ScanCtx.dbg_current_files, GINT_TO_POINTER(pid), job);
pthread_mutex_unlock(&ScanCtx.dbg_current_files_mu);
}
void parse(void *arg) { void parse(void *arg) {
parse_job_t *job = arg; parse_job_t *job = arg;
document_t doc; document_t doc;
set_dbg_current_file(job);
doc.filepath = job->filepath; doc.filepath = job->filepath;
doc.ext = (short) job->ext; doc.ext = (short) job->ext;
doc.base = (short) job->base; doc.base = (short) job->base;
@@ -62,6 +71,11 @@ void parse(void *arg) {
int inc_ts = incremental_get(ScanCtx.original_table, doc.path_md5); int inc_ts = incremental_get(ScanCtx.original_table, doc.path_md5);
if (inc_ts != 0 && inc_ts == job->vfile.info.st_mtim.tv_sec) { if (inc_ts != 0 && inc_ts == job->vfile.info.st_mtim.tv_sec) {
incremental_mark_file_for_copy(ScanCtx.copy_table, doc.path_md5); incremental_mark_file_for_copy(ScanCtx.copy_table, doc.path_md5);
pthread_mutex_lock(&ScanCtx.dbg_file_counts_mu);
ScanCtx.dbg_skipped_files_count += 1;
pthread_mutex_unlock(&ScanCtx.dbg_file_counts_mu);
return; return;
} }
@@ -103,6 +117,10 @@ void parse(void *arg) {
} }
CLOSE_FILE(job->vfile) CLOSE_FILE(job->vfile)
pthread_mutex_lock(&ScanCtx.dbg_file_counts_mu);
ScanCtx.dbg_failed_files_count += 1;
pthread_mutex_unlock(&ScanCtx.dbg_file_counts_mu);
return; return;
} }

View File

@@ -192,6 +192,19 @@ function makeUserTag(tag, hit) {
return userTag; return userTag;
} }
function makeGpsMetaRow(tbody, latitude, longitude) {
tbody.append($("<tr>")
.append($("<td>").text("Exif GPS"))
.append($("<td>")
.append($("<a>")
.text(`${latitude}, ${longitude}`)
.attr("href", `https://maps.google.com/?q=${latitude},${longitude}&ll=${latitude},${longitude}&t=k&z=17`)
.attr("target", "_blank")
)
)
);
}
function infoButtonCb(hit) { function infoButtonCb(hit) {
return () => { return () => {
getDocumentInfo(hit["_id"]).then(doc => { getDocumentInfo(hit["_id"]).then(doc => {
@@ -229,13 +242,25 @@ function infoButtonCb(hit) {
.text(new Date(doc["mtime"] * 1000).toISOString().split(".")[0].replace("T", " ")) .text(new Date(doc["mtime"] * 1000).toISOString().split(".")[0].replace("T", " "))
.attr("title", doc["mtime"])) .attr("title", doc["mtime"]))
); );
// Exif GPS
if ("exif_gps_longitude_dec" in doc) {
makeGpsMetaRow(tbody, doc["exif_gps_latitude_dec"], doc["exif_gps_longitude_dec"])
} else if ("exif_gps_longitude_dms" in doc) {
makeGpsMetaRow(
tbody,
dmsToDecimal(doc["exif_gps_latitude_dms"], doc["exif_gps_latitude_ref"]),
dmsToDecimal(doc["exif_gps_longitude_dms"], doc["exif_gps_longitude_ref"]),
)
}
const displayFields = new Set([ const displayFields = new Set([
"mime", "size", "path", "title", "width", "height", "duration", "audioc", "videoc", "mime", "size", "path", "title", "width", "height", "duration", "audioc", "videoc",
"bitrate", "artist", "album", "album_artist", "genre", "title", "font_name", "tag", "author", "bitrate", "artist", "album", "album_artist", "genre", "title", "font_name", "tag", "author",
"modified_by", "pages" "modified_by", "pages"
]); ]);
Object.keys(doc) Object.keys(doc)
.filter(key => key.startsWith("_keyword.") || key.startsWith("_text.") || displayFields.has(key) || key.startsWith("exif_")) .filter(key => key.startsWith("_keyword.") || key.startsWith("_text.") || displayFields.has(key) || (key.startsWith("exif_") && !key.includes("gps")))
.forEach(key => { .forEach(key => {
tbody.append($("<tr>") tbody.append($("<tr>")
.append($("<td>").text(key)) .append($("<td>").text(key))
@@ -350,6 +375,14 @@ function createDocCard(hit) {
audio.setAttribute("controls", ""); audio.setAttribute("controls", "");
audio.setAttribute("type", hit["_source"]["mime"]); audio.setAttribute("type", hit["_source"]["mime"]);
audio.setAttribute("src", "f/" + hit["_id"]); audio.setAttribute("src", "f/" + hit["_id"]);
audio.addEventListener("play", () => {
// Pause all currently playing audio tags
$("audio").each(function () {
if (this !== audio) {
this.pause();
}
});
});
docCard.appendChild(audio) docCard.appendChild(audio)
} }

View File

@@ -511,8 +511,8 @@ function search(after = null) {
searchResults.appendChild(preload); searchResults.appendChild(preload);
} }
let query = searchBar.value; let searchBarValue = searchBar.value;
let empty = query === ""; let empty = searchBarValue === "";
let condition = empty ? "should" : "must"; let condition = empty ? "should" : "must";
let filters = [ let filters = [
{range: {size: {gte: size_min, lte: size_max}}}, {range: {size: {gte: size_min, lte: size_max}}},
@@ -561,19 +561,32 @@ function search(after = null) {
filters.push({range: {mtime: {lte: date_max}}}) filters.push({range: {mtime: {lte: date_max}}})
} }
let query;
if (CONF.options.queryMode === "simple") {
query = {
simple_query_string: {
query: searchBarValue,
fields: fields,
default_operator: "and"
}
}
} else {
query = {
query_string: {
query: searchBarValue,
default_field: "name",
default_operator: "and"
}
}
}
let q = { let q = {
"_source": { "_source": {
excludes: ["content", "_tie"] excludes: ["content", "_tie"]
}, },
query: { query: {
bool: { bool: {
[condition]: { [condition]: query,
simple_query_string: {
query: query,
fields: fields,
default_operator: "and"
}
},
filter: filters filter: filters
} }
}, },
@@ -611,7 +624,9 @@ function search(after = null) {
} }
} }
$.jsonPost("es", q).then(searchResult => { const showError = CONF.options.queryMode === "advanced";
$.jsonPost("es", q, showError).then(searchResult => {
let hits = searchResult["hits"]["hits"]; let hits = searchResult["hits"]["hits"];
if (hits) { if (hits) {
lastDoc = hits[hits.length - 1]; lastDoc = hits[hits.length - 1];
@@ -645,7 +660,25 @@ function search(after = null) {
reachedEnd = hits.length !== SIZE; reachedEnd = hits.length !== SIZE;
insertHits(resultContainer, hits); insertHits(resultContainer, hits);
searchBusy = false; searchBusy = false;
}); }).fail(() => {
searchBusy = false;
if (!after) {
preload.remove();
}
console.log("QUERY:")
console.log(q)
$.toast({
heading: "Query error",
text: "Could not parse or execute query, please check the Advanced search documentation. " +
"See server logs for more information.",
stack: false,
bgColor: "#FF8F00",
textColor: "#FFF3E0",
position: 'bottom-right',
hideAfter: false
});
})
} }

View File

@@ -70,7 +70,7 @@ function strUnescape(str) {
for (let i = 0; i < str.length; i++) { for (let i = 0; i < str.length; i++) {
const c = str[i]; const c = str[i];
const next = str[i+1]; const next = str[i + 1];
if (c === ']') { if (c === ']') {
if (next === ']') { if (next === ']') {
@@ -102,7 +102,8 @@ const _defaults = {
treemapSize: "large", treemapSize: "large",
suggestPath: true, suggestPath: true,
fragmentSize: 100, fragmentSize: 100,
columns: 5 columns: 5,
queryMode: "simple"
}; };
function loadSettings() { function loadSettings() {
@@ -120,6 +121,7 @@ function loadSettings() {
$("#settingSuggestPath").prop("checked", CONF.options.suggestPath); $("#settingSuggestPath").prop("checked", CONF.options.suggestPath);
$("#settingFragmentSize").val(CONF.options.fragmentSize); $("#settingFragmentSize").val(CONF.options.fragmentSize);
$("#settingColumns").val(CONF.options.columns); $("#settingColumns").val(CONF.options.columns);
$("#settingQueryMode").val(CONF.options.queryMode);
} }
function Settings() { function Settings() {
@@ -127,6 +129,7 @@ function Settings() {
this._onUpdate = function () { this._onUpdate = function () {
$("#fuzzyToggle").prop("checked", this.options.fuzzy); $("#fuzzyToggle").prop("checked", this.options.fuzzy);
$("#searchBar").attr("placeholder", this.options.queryMode === "simple" ? "Search" : "Advanced search");
updateColumnStyle(); updateColumnStyle();
}; };
@@ -165,6 +168,7 @@ function updateSettings() {
CONF.options.suggestPath = $("#settingSuggestPath").prop("checked"); CONF.options.suggestPath = $("#settingSuggestPath").prop("checked");
CONF.options.fragmentSize = $("#settingFragmentSize").val(); CONF.options.fragmentSize = $("#settingFragmentSize").val();
CONF.options.columns = $("#settingColumns").val(); CONF.options.columns = $("#settingColumns").val();
CONF.options.queryMode = $("#settingQueryMode").val();
CONF.save(); CONF.save();
if (typeof searchDebounced !== "undefined") { if (typeof searchDebounced !== "undefined") {
@@ -187,14 +191,16 @@ function updateSettings() {
}); });
} }
jQuery["jsonPost"] = function (url, data) { jQuery["jsonPost"] = function (url, data, showError = true) {
return jQuery.ajax({ return jQuery.ajax({
url: url, url: url,
type: "post", type: "post",
data: JSON.stringify(data), data: JSON.stringify(data),
contentType: "application/json" contentType: "application/json"
}).fail(err => { }).fail(err => {
showEsError(); if (showError) {
showEsError();
}
console.log(err); console.log(err);
}); });
}; };
@@ -212,7 +218,7 @@ function updateColumnStyle() {
const style = document.getElementById("style"); const style = document.getElementById("style");
if (style) { if (style) {
style.innerHTML = style.innerHTML =
` `
@media screen and (min-width: 1500px) { @media screen and (min-width: 1500px) {
.container { .container {
max-width: 1440px; max-width: 1440px;
@@ -229,4 +235,14 @@ function updateColumnStyle() {
} }
` `
} }
}
function dmsToDecimal(dms, ref) {
const tokens = dms.split(",")
const d = Number(tokens[0].trim().split(":")[0]) / Number(tokens[0].trim().split(":")[1])
const m = Number(tokens[1].trim().split(":")[0]) / Number(tokens[1].trim().split(":")[1])
const s = Number(tokens[2].trim().split(":")[0]) / Number(tokens[2].trim().split(":")[1])
return (d + (m / 60) + (s / 3600)) * (ref === "S" || ref === "W" ? -1 : 1)
} }

View File

@@ -12,7 +12,7 @@
<nav class="navbar navbar-expand-lg"> <nav class="navbar navbar-expand-lg">
<a class="navbar-brand" href="/">sist2</a> <a class="navbar-brand" href="/">sist2</a>
<span class="badge badge-pill version">2.9.0</span> <span class="badge badge-pill version">2.10.3</span>
<span class="tagline">Lightning-fast file system indexer and search tool </span> <span class="tagline">Lightning-fast file system indexer and search tool </span>
<a class="btn ml-auto" href="stats">Stats</a> <a class="btn ml-auto" href="stats">Stats</a>
<button class="btn" type="button" data-toggle="modal" data-target="#settings" onclick="loadSettings()">Settings <button class="btn" type="button" data-toggle="modal" data-target="#settings" onclick="loadSettings()">Settings
@@ -120,6 +120,8 @@
</div> </div>
<div class="modal-body"> <div class="modal-body">
<h2>Simple search</h2>
<table class="table"> <table class="table">
<tbody> <tbody>
<tr> <tr>
@@ -168,6 +170,12 @@
<p>For more information, see <a target="_blank" <p>For more information, see <a target="_blank"
href="//www.elastic.co/guide/en/elasticsearch/reference/current/query-dsl-simple-query-string-query.html">Elasticsearch href="//www.elastic.co/guide/en/elasticsearch/reference/current/query-dsl-simple-query-string-query.html">Elasticsearch
documentation</a></p> documentation</a></p>
<h2>Advanced search</h2>
<p>For documentation about the advanced search mode, see <a target="_blank"
href="//www.elastic.co/guide/en/elasticsearch/reference/current/query-dsl-query-string-query.html#query-string-syntax">Elasticsearch
documentation</a></p>
</div> </div>
</div> </div>
</div> </div>
@@ -207,10 +215,16 @@
<br/> <br/>
<div class="form-group"> <div class="form-group">
<input type="number" class="form-control" id="settingFragmentSize">
<label for="settingFragmentSize">Highlight context size in characters</label> <label for="settingFragmentSize">Highlight context size in characters</label>
<input type="number" class="form-control" id="settingFragmentSize">
</div> </div>
<label for="settingQueryMode">Search mode</label>
<select id="settingQueryMode" class="form-control form-control-sm">
<option value="simple">Simple</option>
<option value="advanced">Advanced</option>
</select>
<label for="settingDisplay">Display</label> <label for="settingDisplay">Display</label>
<select id="settingDisplay" class="form-control form-control-sm"> <select id="settingDisplay" class="form-control form-control-sm">
<option value="grid">Grid</option> <option value="grid">Grid</option>

View File

@@ -10,7 +10,7 @@
<nav class="navbar navbar-expand-lg"> <nav class="navbar navbar-expand-lg">
<a class="navbar-brand" href="/">sist2</a> <a class="navbar-brand" href="/">sist2</a>
<span class="badge badge-pill version">2.9.0</span> <span class="badge badge-pill version">2.10.3</span>
<span class="tagline">Lightning-fast file system indexer and search tool </span> <span class="tagline">Lightning-fast file system indexer and search tool </span>
<a style="margin-left: auto" class="btn" href="/">Back</a> <a style="margin-left: auto" class="btn" href="/">Back</a>
<button class="btn" type="button" data-toggle="modal" data-target="#settings" <button class="btn" type="button" data-toggle="modal" data-target="#settings"
@@ -84,10 +84,16 @@
<br/> <br/>
<div class="form-group"> <div class="form-group">
<input type="number" class="form-control" id="settingFragmentSize">
<label for="settingFragmentSize">Highlight context size in characters</label> <label for="settingFragmentSize">Highlight context size in characters</label>
<input type="number" class="form-control" id="settingFragmentSize">
</div> </div>
<label for="settingQueryMode">Search mode</label>
<select id="settingQueryMode" class="form-control form-control-sm">
<option value="simple">Simple</option>
<option value="advanced">Advanced</option>
</select>
<label for="settingDisplay">Display</label> <label for="settingDisplay">Display</label>
<select id="settingDisplay" class="form-control form-control-sm"> <select id="settingDisplay" class="form-control form-control-sm">
<option value="grid">Grid</option> <option value="grid">Grid</option>

View File

@@ -3,7 +3,7 @@
#include "sist.h" #include "sist.h"
#include <pthread.h> #include <pthread.h>
#define MAX_QUEUE_SIZE 10000 #define MAX_QUEUE_SIZE 1000000
typedef void (*thread_func_t)(void *arg); typedef void (*thread_func_t)(void *arg);
@@ -27,6 +27,7 @@ typedef struct tpool {
int thread_cnt; int thread_cnt;
int work_cnt; int work_cnt;
int done_cnt; int done_cnt;
int busy_cnt;
int free_arg; int free_arg;
int stop; int stop;
@@ -52,6 +53,14 @@ static tpool_work_t *tpool_work_create(thread_func_t func, void *arg) {
return work; return work;
} }
void tpool_dump_debug_info(tpool_t *pool) {
LOG_DEBUGF("tpool.c", "pool->thread_cnt = %d", pool->thread_cnt)
LOG_DEBUGF("tpool.c", "pool->work_cnt = %d", pool->work_cnt)
LOG_DEBUGF("tpool.c", "pool->done_cnt = %d", pool->done_cnt)
LOG_DEBUGF("tpool.c", "pool->busy_cnt = %d", pool->busy_cnt)
LOG_DEBUGF("tpool.c", "pool->stop = %d", pool->stop)
}
/** /**
* Pop work object from thread pool * Pop work object from thread pool
*/ */
@@ -83,7 +92,7 @@ int tpool_add_work(tpool_t *pool, thread_func_t func, void *arg) {
} }
while ((pool->work_cnt - pool->done_cnt) >= MAX_QUEUE_SIZE) { while ((pool->work_cnt - pool->done_cnt) >= MAX_QUEUE_SIZE) {
usleep(100000); usleep(10000);
} }
pthread_mutex_lock(&(pool->work_mutex)); pthread_mutex_lock(&(pool->work_mutex));
@@ -120,6 +129,10 @@ static void *tpool_worker(void *arg) {
} }
tpool_work_t *work = tpool_work_get(pool); tpool_work_t *work = tpool_work_get(pool);
if (work != NULL) {
pool->busy_cnt += 1;
}
pthread_mutex_unlock(&(pool->work_mutex)); pthread_mutex_unlock(&(pool->work_mutex));
if (work != NULL) { if (work != NULL) {
@@ -136,6 +149,7 @@ static void *tpool_worker(void *arg) {
pthread_mutex_lock(&(pool->work_mutex)); pthread_mutex_lock(&(pool->work_mutex));
if (work != NULL) { if (work != NULL) {
pool->busy_cnt -= 1;
pool->done_cnt++; pool->done_cnt++;
} }
@@ -150,6 +164,7 @@ static void *tpool_worker(void *arg) {
if (pool->cleanup_func != NULL) { if (pool->cleanup_func != NULL) {
LOG_INFO("tpool.c", "Executing cleanup function") LOG_INFO("tpool.c", "Executing cleanup function")
pool->cleanup_func(); pool->cleanup_func();
LOG_DEBUG("tpool.c", "Done executing cleanup function")
} }
pthread_cond_signal(&(pool->working_cond)); pthread_cond_signal(&(pool->working_cond));
@@ -160,14 +175,14 @@ static void *tpool_worker(void *arg) {
void tpool_wait(tpool_t *pool) { void tpool_wait(tpool_t *pool) {
LOG_INFO("tpool.c", "Waiting for worker threads to finish") LOG_INFO("tpool.c", "Waiting for worker threads to finish")
pthread_mutex_lock(&(pool->work_mutex)); pthread_mutex_lock(&(pool->work_mutex));
while (1) { while (TRUE) {
if (pool->done_cnt < pool->work_cnt) { if (pool->done_cnt < pool->work_cnt) {
pthread_cond_wait(&(pool->working_cond), &(pool->work_mutex)); pthread_cond_wait(&(pool->working_cond), &(pool->work_mutex));
} else { } else {
usleep(500000); LOG_INFOF("tpool.c", "Received head=NULL signal, busy_cnt=%d", pool->busy_cnt);
if (pool->done_cnt == pool->work_cnt) {
pool->stop = 1; if (pool->done_cnt == pool->work_cnt && pool->busy_cnt == 0) {
usleep(1000000); pool->stop = TRUE;
break; break;
} }
} }
@@ -187,12 +202,16 @@ void tpool_destroy(tpool_t *pool) {
pthread_mutex_lock(&(pool->work_mutex)); pthread_mutex_lock(&(pool->work_mutex));
tpool_work_t *work = pool->work_head; tpool_work_t *work = pool->work_head;
int count = 0;
while (work != NULL) { while (work != NULL) {
tpool_work_t *tmp = work->next; tpool_work_t *tmp = work->next;
free(work); free(work);
work = tmp; work = tmp;
count += 1;
} }
LOG_DEBUGF("tpool.c", "Destroyed %d jobs", count);
pthread_cond_broadcast(&(pool->has_work_cond)); pthread_cond_broadcast(&(pool->has_work_cond));
pthread_mutex_unlock(&(pool->work_mutex)); pthread_mutex_unlock(&(pool->work_mutex));
@@ -218,13 +237,14 @@ void tpool_destroy(tpool_t *pool) {
* Create a thread pool * Create a thread pool
* @param thread_cnt Worker threads count * @param thread_cnt Worker threads count
*/ */
tpool_t *tpool_create(size_t thread_cnt, void cleanup_func(), int free_arg) { tpool_t *tpool_create(int thread_cnt, void cleanup_func(), int free_arg) {
tpool_t *pool = malloc(sizeof(tpool_t)); tpool_t *pool = malloc(sizeof(tpool_t));
pool->thread_cnt = thread_cnt; pool->thread_cnt = thread_cnt;
pool->work_cnt = 0; pool->work_cnt = 0;
pool->done_cnt = 0; pool->done_cnt = 0;
pool->stop = 0; pool->busy_cnt = 0;
pool->stop = FALSE;
pool->free_arg = free_arg; pool->free_arg = free_arg;
pool->cleanup_func = cleanup_func; pool->cleanup_func = cleanup_func;
pool->threads = calloc(sizeof(pthread_t), thread_cnt); pool->threads = calloc(sizeof(pthread_t), thread_cnt);

View File

@@ -8,12 +8,14 @@ typedef struct tpool tpool_t;
typedef void (*thread_func_t)(void *arg); typedef void (*thread_func_t)(void *arg);
tpool_t *tpool_create(size_t num, void (*cleanup_func)(), int free_arg); tpool_t *tpool_create(int num, void (*cleanup_func)(), int free_arg);
void tpool_start(tpool_t *pool); void tpool_start(tpool_t *pool);
void tpool_destroy(tpool_t *tm); void tpool_destroy(tpool_t *pool);
int tpool_add_work(tpool_t *pool, thread_func_t func, void *arg); int tpool_add_work(tpool_t *pool, thread_func_t func, void *arg);
void tpool_wait(tpool_t *tm); void tpool_wait(tpool_t *pool);
void tpool_dump_debug_info(tpool_t *pool);
#endif #endif

View File

@@ -8,18 +8,8 @@
#include <src/ctx.h> #include <src/ctx.h>
#include <mongoose.h>
static void send_response_line(struct mg_connection *nc, int status_code, size_t length, char *extra_headers) {
static int has_prefix(const struct mg_str *str, const struct mg_str *prefix) {
return str->len > prefix->len && memcmp(str->p, prefix->p, prefix->len) == 0;
}
static int is_equal(const struct mg_str *s1, const struct mg_str *s2) {
return s1->len == s2->len && memcmp(s1->p, s2->p, s2->len) == 0;
}
static void send_response_line(struct mg_connection *nc, int status_code, int length, char *extra_headers) {
mg_printf( mg_printf(
nc, nc,
"HTTP/1.1 %d %s\r\n" "HTTP/1.1 %d %s\r\n"
@@ -62,36 +52,32 @@ store_t *get_tag_store(const char *index_id) {
void search_index(struct mg_connection *nc) { void search_index(struct mg_connection *nc) {
send_response_line(nc, 200, sizeof(search_html), "Content-Type: text/html"); send_response_line(nc, 200, sizeof(search_html), "Content-Type: text/html");
mg_send(nc, search_html, sizeof(search_html)); mg_send(nc, search_html, sizeof(search_html));
nc->flags |= MG_F_SEND_AND_CLOSE;
} }
void stats(struct mg_connection *nc) { void stats(struct mg_connection *nc) {
send_response_line(nc, 200, sizeof(stats_html), "Content-Type: text/html"); send_response_line(nc, 200, sizeof(stats_html), "Content-Type: text/html");
mg_send(nc, stats_html, sizeof(stats_html)); mg_send(nc, stats_html, sizeof(stats_html));
nc->flags |= MG_F_SEND_AND_CLOSE;
} }
void stats_files(struct mg_connection *nc, struct http_message *hm, struct mg_str *path) { void stats_files(struct mg_connection *nc, struct mg_http_message *hm) {
if (path->len != MD5_STR_LENGTH + 4) { if (hm->uri.len != MD5_STR_LENGTH + 4) {
mg_http_send_error(nc, 404, NULL); mg_http_reply(nc, 404, "", "");
nc->flags |= MG_F_SEND_AND_CLOSE;
return; return;
} }
char arg_md5[MD5_STR_LENGTH]; char arg_md5[MD5_STR_LENGTH];
memcpy(arg_md5, hm->uri.p + 3, MD5_STR_LENGTH); memcpy(arg_md5, hm->uri.ptr + 3, MD5_STR_LENGTH);
*(arg_md5 + MD5_STR_LENGTH - 1) = '\0'; *(arg_md5 + MD5_STR_LENGTH - 1) = '\0';
index_t *index = get_index_by_id(arg_md5); index_t *index = get_index_by_id(arg_md5);
if (index == NULL) { if (index == NULL) {
mg_http_send_error(nc, 404, NULL); mg_http_reply(nc, 404, "", "");
nc->flags |= MG_F_SEND_AND_CLOSE;
return; return;
} }
const char *file; const char *file;
switch (atoi(hm->uri.p + 3 + MD5_STR_LENGTH)) { switch (atoi(hm->uri.ptr + 3 + MD5_STR_LENGTH)) {
case 1: case 1:
file = "treemap.csv"; file = "treemap.csv";
break; break;
@@ -105,54 +91,41 @@ void stats_files(struct mg_connection *nc, struct http_message *hm, struct mg_st
file = "date_agg.csv"; file = "date_agg.csv";
break; break;
default: default:
nc->flags |= MG_F_SEND_AND_CLOSE;
return; return;
} }
char disposition[8192]; char disposition[8192];
snprintf(disposition, sizeof(disposition), "Content-Disposition: inline; filename=\"%s\"", file); snprintf(disposition, sizeof(disposition), "Content-Disposition: inline; filename=\"%s\"\r\n", file);
char full_path[PATH_MAX]; char full_path[PATH_MAX];
strcpy(full_path, index->path); strcpy(full_path, index->path);
strcat(full_path, file); strcat(full_path, file);
mg_http_serve_file(nc, hm, full_path, mg_mk_str("text/csv"), mg_mk_str(disposition)); mg_http_serve_file(nc, hm, full_path, "text/csv", disposition);
nc->flags |= MG_F_SEND_AND_CLOSE;
} }
void javascript_lib(struct mg_connection *nc) { void javascript_lib(struct mg_connection *nc) {
send_response_line(nc, 200, sizeof(bundle_js), "Content-Type: application/javascript"); send_response_line(nc, 200, sizeof(bundle_js), "Content-Type: application/javascript");
mg_send(nc, bundle_js, sizeof(bundle_js)); mg_send(nc, bundle_js, sizeof(bundle_js));
nc->flags |= MG_F_SEND_AND_CLOSE;
} }
void javascript_search(struct mg_connection *nc) { void javascript_search(struct mg_connection *nc) {
send_response_line(nc, 200, sizeof(search_js), "Content-Type: application/javascript"); send_response_line(nc, 200, sizeof(search_js), "Content-Type: application/javascript");
mg_send(nc, search_js, sizeof(search_js)); mg_send(nc, search_js, sizeof(search_js));
nc->flags |= MG_F_SEND_AND_CLOSE;
} }
int client_requested_dark_theme(struct http_message *hm) { int client_requested_dark_theme(struct mg_http_message *hm) {
struct mg_str *cookie_header = mg_get_http_header(hm, "cookie"); struct mg_str *cookie_header = mg_http_get_header(hm, "cookie");
if (cookie_header == NULL) { if (cookie_header == NULL) {
return FALSE; return FALSE;
} }
char buf[4096]; struct mg_str sist_cookie = mg_http_get_header_var(*cookie_header, mg_str_n("sist", 4));
char *sist_cookie = buf;
if (mg_http_parse_header2(cookie_header, "sist", &sist_cookie, sizeof(buf)) == 0) {
return FALSE;
}
int ret = strcmp(sist_cookie, "dark") == 0; return mg_strcmp(sist_cookie, mg_str_n("dark", 4)) == 0;
if (sist_cookie != buf) {
free(sist_cookie);
}
return ret;
} }
void style(struct mg_connection *nc, struct http_message *hm) { void style(struct mg_connection *nc, struct mg_http_message *hm) {
if (client_requested_dark_theme(hm)) { if (client_requested_dark_theme(hm)) {
send_response_line(nc, 200, sizeof(bundle_dark_css), "Content-Type: text/css"); send_response_line(nc, 200, sizeof(bundle_dark_css), "Content-Type: text/css");
@@ -161,11 +134,9 @@ void style(struct mg_connection *nc, struct http_message *hm) {
send_response_line(nc, 200, sizeof(bundle_css), "Content-Type: text/css"); send_response_line(nc, 200, sizeof(bundle_css), "Content-Type: text/css");
mg_send(nc, bundle_css, sizeof(bundle_css)); mg_send(nc, bundle_css, sizeof(bundle_css));
} }
nc->flags |= MG_F_SEND_AND_CLOSE;
} }
void img_sprite_skin_flat(struct mg_connection *nc, struct http_message *hm) { void img_sprite_skin_flat(struct mg_connection *nc, struct mg_http_message *hm) {
if (client_requested_dark_theme(hm)) { if (client_requested_dark_theme(hm)) {
send_response_line(nc, 200, sizeof(sprite_skin_flat_dark_png), "Content-Type: image/png"); send_response_line(nc, 200, sizeof(sprite_skin_flat_dark_png), "Content-Type: image/png");
mg_send(nc, sprite_skin_flat_dark_png, sizeof(sprite_skin_flat_dark_png)); mg_send(nc, sprite_skin_flat_dark_png, sizeof(sprite_skin_flat_dark_png));
@@ -173,25 +144,22 @@ void img_sprite_skin_flat(struct mg_connection *nc, struct http_message *hm) {
send_response_line(nc, 200, sizeof(sprite_skin_flat_png), "Content-Type: image/png"); send_response_line(nc, 200, sizeof(sprite_skin_flat_png), "Content-Type: image/png");
mg_send(nc, sprite_skin_flat_png, sizeof(sprite_skin_flat_png)); mg_send(nc, sprite_skin_flat_png, sizeof(sprite_skin_flat_png));
} }
nc->flags |= MG_F_SEND_AND_CLOSE;
} }
void thumbnail(struct mg_connection *nc, struct http_message *hm, struct mg_str *path) { void thumbnail(struct mg_connection *nc, struct mg_http_message *hm) {
if (path->len != 68) { if (hm->uri.len != 68) {
LOG_DEBUGF("serve.c", "Invalid thumbnail path: %.*s", (int) path->len, path->p) LOG_DEBUGF("serve.c", "Invalid thumbnail path: %.*s", (int) hm->uri.len, hm->uri.ptr)
mg_http_send_error(nc, 404, NULL); mg_http_reply(nc, 404, "", "Not found");
nc->flags |= MG_F_SEND_AND_CLOSE;
return; return;
} }
char arg_file_md5[MD5_STR_LENGTH]; char arg_file_md5[MD5_STR_LENGTH];
char arg_index[MD5_STR_LENGTH]; char arg_index[MD5_STR_LENGTH];
memcpy(arg_index, hm->uri.p + 3, MD5_STR_LENGTH); memcpy(arg_index, hm->uri.ptr + 3, MD5_STR_LENGTH);
*(arg_index + MD5_STR_LENGTH - 1) = '\0'; *(arg_index + MD5_STR_LENGTH - 1) = '\0';
memcpy(arg_file_md5, hm->uri.p + 3 + MD5_STR_LENGTH, MD5_STR_LENGTH); memcpy(arg_file_md5, hm->uri.ptr + 3 + MD5_STR_LENGTH, MD5_STR_LENGTH);
*(arg_file_md5 + MD5_STR_LENGTH - 1) = '\0'; *(arg_file_md5 + MD5_STR_LENGTH - 1) = '\0';
unsigned char md5_buf[MD5_DIGEST_LENGTH]; unsigned char md5_buf[MD5_DIGEST_LENGTH];
@@ -200,8 +168,7 @@ void thumbnail(struct mg_connection *nc, struct http_message *hm, struct mg_str
store_t *store = get_store(arg_index); store_t *store = get_store(arg_index);
if (store == NULL) { if (store == NULL) {
LOG_DEBUGF("serve.c", "Could not get store for index: %s", arg_index) LOG_DEBUGF("serve.c", "Could not get store for index: %s", arg_index)
mg_http_send_error(nc, 404, NULL); mg_http_reply(nc, 404, "", "Not found");
nc->flags |= MG_F_SEND_AND_CLOSE;
return; return;
} }
@@ -211,27 +178,28 @@ void thumbnail(struct mg_connection *nc, struct http_message *hm, struct mg_str
send_response_line(nc, 200, data_len, "Content-Type: image/jpeg"); send_response_line(nc, 200, data_len, "Content-Type: image/jpeg");
mg_send(nc, data, data_len); mg_send(nc, data, data_len);
free(data); free(data);
} else {
mg_http_reply(nc, 404, "Content-Type: text/plain;charset=utf-8\r\n", "Not found");
return;
} }
nc->flags |= MG_F_SEND_AND_CLOSE;
} }
void search(struct mg_connection *nc, struct http_message *hm) { void search(struct mg_connection *nc, struct mg_http_message *hm) {
if (hm->body.len == 0) { if (hm->body.len == 0) {
LOG_DEBUG("serve.c", "Client sent empty body, ignoring request") LOG_DEBUG("serve.c", "Client sent empty body, ignoring request")
mg_http_send_error(nc, 500, NULL); mg_http_reply(nc, 500, "", "Invalid request");
nc->flags |= MG_F_SEND_AND_CLOSE;
return; return;
} }
char *body = malloc(hm->body.len + 1); char *body = malloc(hm->body.len + 1);
memcpy(body, hm->body.p, hm->body.len); memcpy(body, hm->body.ptr, hm->body.len);
*(body + hm->body.len) = '\0'; *(body + hm->body.len) = '\0';
char url[4096]; char url[4096];
snprintf(url, 4096, "%s/%s/_search", WebCtx.es_url, WebCtx.es_index); snprintf(url, 4096, "%s/%s/_search", WebCtx.es_url, WebCtx.es_index);
nc->user_data = web_post_async(url, body); nc->fn_data = web_post_async(url, body);
} }
void serve_file_from_url(cJSON *json, index_t *idx, struct mg_connection *nc) { void serve_file_from_url(cJSON *json, index_t *idx, struct mg_connection *nc) {
@@ -253,16 +221,16 @@ void serve_file_from_url(cJSON *json, index_t *idx, struct mg_connection *nc) {
idx->desc.rewrite_url, path_unescaped, name_unescaped, strlen(ext) == 0 ? "" : ".", ext); idx->desc.rewrite_url, path_unescaped, name_unescaped, strlen(ext) == 0 ? "" : ".", ext);
dyn_buffer_t encoded = url_escape(url); dyn_buffer_t encoded = url_escape(url);
mg_http_send_redirect( dyn_buffer_write_char(&encoded, '\0');
nc, 308,
(struct mg_str) MG_MK_STR_N(encoded.buf, encoded.cur), char location_header[8192];
(struct mg_str) MG_NULL_STR snprintf(location_header, sizeof(location_header), "Location: %s\r\n", encoded.buf);
);
mg_http_reply(nc, 308, location_header, "");
dyn_buffer_destroy(&encoded); dyn_buffer_destroy(&encoded);
nc->flags |= MG_F_SEND_AND_CLOSE;
} }
void serve_file_from_disk(cJSON *json, index_t *idx, struct mg_connection *nc, struct http_message *hm) { void serve_file_from_disk(cJSON *json, index_t *idx, struct mg_connection *nc, struct mg_http_message *hm) {
const char *path = cJSON_GetObjectItem(json, "path")->valuestring; const char *path = cJSON_GetObjectItem(json, "path")->valuestring;
const char *name = cJSON_GetObjectItem(json, "name")->valuestring; const char *name = cJSON_GetObjectItem(json, "name")->valuestring;
@@ -283,10 +251,10 @@ void serve_file_from_disk(cJSON *json, index_t *idx, struct mg_connection *nc, s
LOG_DEBUGF("serve.c", "Serving file from disk: %s", full_path) LOG_DEBUGF("serve.c", "Serving file from disk: %s", full_path)
char disposition[8192]; char disposition[8192];
snprintf(disposition, sizeof(disposition), "Content-Disposition: inline; filename=\"%s%s%s\"", snprintf(disposition, sizeof(disposition), "Content-Disposition: inline; filename=\"%s%s%s\"\r\n",
name, strlen(ext) == 0 ? "" : ".", ext); name, strlen(ext) == 0 ? "" : ".", ext);
mg_http_serve_file(nc, hm, full_path, mg_mk_str(mime), mg_mk_str(disposition)); mg_http_serve_file(nc, hm, full_path, mime, disposition);
} }
void index_info(struct mg_connection *nc) { void index_info(struct mg_connection *nc) {
@@ -310,22 +278,19 @@ void index_info(struct mg_connection *nc) {
mg_send(nc, json_str, strlen(json_str)); mg_send(nc, json_str, strlen(json_str));
free(json_str); free(json_str);
cJSON_Delete(json); cJSON_Delete(json);
nc->flags |= MG_F_SEND_AND_CLOSE;
} }
void document_info(struct mg_connection *nc, struct http_message *hm, struct mg_str *path) { void document_info(struct mg_connection *nc, struct mg_http_message *hm) {
if (path->len != MD5_STR_LENGTH + 2) { if (hm->uri.len != MD5_STR_LENGTH + 2) {
LOG_DEBUGF("serve.c", "Invalid document_info path: %.*s", (int) path->len, path->p) LOG_DEBUGF("serve.c", "Invalid document_info path: %.*s", (int) hm->uri.len, hm->uri.ptr)
mg_http_send_error(nc, 404, NULL); mg_http_reply(nc, 404, "", "Not found");
nc->flags |= MG_F_SEND_AND_CLOSE;
return; return;
} }
char arg_md5[MD5_STR_LENGTH]; char arg_md5[MD5_STR_LENGTH];
memcpy(arg_md5, hm->uri.p + 3, MD5_STR_LENGTH); memcpy(arg_md5, hm->uri.ptr + 3, MD5_STR_LENGTH);
*(arg_md5 + MD5_STR_LENGTH - 1) = '\0'; *(arg_md5 + MD5_STR_LENGTH - 1) = '\0';
cJSON *doc = elastic_get_document(arg_md5); cJSON *doc = elastic_get_document(arg_md5);
@@ -334,16 +299,14 @@ void document_info(struct mg_connection *nc, struct http_message *hm, struct mg_
cJSON *index_id = cJSON_GetObjectItem(source, "index"); cJSON *index_id = cJSON_GetObjectItem(source, "index");
if (index_id == NULL) { if (index_id == NULL) {
cJSON_Delete(doc); cJSON_Delete(doc);
mg_http_send_error(nc, 404, NULL); mg_http_reply(nc, 404, "", "Not found");
nc->flags |= MG_F_SEND_AND_CLOSE;
return; return;
} }
index_t *idx = get_index_by_id(index_id->valuestring); index_t *idx = get_index_by_id(index_id->valuestring);
if (idx == NULL) { if (idx == NULL) {
cJSON_Delete(doc); cJSON_Delete(doc);
mg_http_send_error(nc, 404, NULL); mg_http_reply(nc, 404, "", "Not found");
nc->flags |= MG_F_SEND_AND_CLOSE;
return; return;
} }
@@ -352,21 +315,18 @@ void document_info(struct mg_connection *nc, struct http_message *hm, struct mg_
mg_send(nc, json_str, (int) strlen(json_str)); mg_send(nc, json_str, (int) strlen(json_str));
free(json_str); free(json_str);
cJSON_Delete(doc); cJSON_Delete(doc);
nc->flags |= MG_F_SEND_AND_CLOSE;
} }
void file(struct mg_connection *nc, struct http_message *hm, struct mg_str *path) { void file(struct mg_connection *nc, struct mg_http_message *hm) {
if (path->len != MD5_STR_LENGTH + 2) { if (hm->uri.len != MD5_STR_LENGTH + 2) {
LOG_DEBUGF("serve.c", "Invalid file path: %.*s", (int) path->len, path->p) LOG_DEBUGF("serve.c", "Invalid file path: %.*s", (int) hm->uri.len, hm->uri.ptr)
mg_http_send_error(nc, 404, NULL); mg_http_reply(nc, 404, "", "Not found");
nc->flags |= MG_F_SEND_AND_CLOSE;
return; return;
} }
char arg_md5[MD5_STR_LENGTH]; char arg_md5[MD5_STR_LENGTH];
memcpy(arg_md5, hm->uri.p + 3, MD5_STR_LENGTH); memcpy(arg_md5, hm->uri.ptr + 3, MD5_STR_LENGTH);
*(arg_md5 + MD5_STR_LENGTH - 1) = '\0'; *(arg_md5 + MD5_STR_LENGTH - 1) = '\0';
const char *next = arg_md5; const char *next = arg_md5;
@@ -380,8 +340,7 @@ void file(struct mg_connection *nc, struct http_message *hm, struct mg_str *path
index_id = cJSON_GetObjectItem(source, "index"); index_id = cJSON_GetObjectItem(source, "index");
if (index_id == NULL) { if (index_id == NULL) {
cJSON_Delete(doc); cJSON_Delete(doc);
mg_http_send_error(nc, 404, NULL); mg_http_reply(nc, 404, "", "Not found");
nc->flags |= MG_F_SEND_AND_CLOSE;
return; return;
} }
cJSON *parent = cJSON_GetObjectItem(source, "parent"); cJSON *parent = cJSON_GetObjectItem(source, "parent");
@@ -395,8 +354,7 @@ void file(struct mg_connection *nc, struct http_message *hm, struct mg_str *path
if (idx == NULL) { if (idx == NULL) {
cJSON_Delete(doc); cJSON_Delete(doc);
nc->flags |= MG_F_SEND_AND_CLOSE; mg_http_reply(nc, 404, "", "Not found");
mg_http_send_error(nc, 404, NULL);
return; return;
} }
@@ -417,8 +375,6 @@ void status(struct mg_connection *nc) {
} }
free(status); free(status);
nc->flags |= MG_F_SEND_AND_CLOSE;
} }
typedef struct { typedef struct {
@@ -464,35 +420,32 @@ tag_req_t *parse_tag_request(cJSON *json) {
return req; return req;
} }
void tag(struct mg_connection *nc, struct http_message *hm, struct mg_str *path) { void tag(struct mg_connection *nc, struct mg_http_message *hm) {
if (path->len != MD5_STR_LENGTH + 4) { if (hm->uri.len != MD5_STR_LENGTH + 4) {
LOG_DEBUGF("serve.c", "Invalid tag path: %.*s", (int) path->len, path->p) LOG_DEBUGF("serve.c", "Invalid tag path: %.*s", (int) hm->uri.len, hm->uri.ptr)
mg_http_send_error(nc, 404, NULL); mg_http_reply(nc, 404, "", "Not found");
nc->flags |= MG_F_SEND_AND_CLOSE;
return; return;
} }
char arg_index[MD5_STR_LENGTH]; char arg_index[MD5_STR_LENGTH];
memcpy(arg_index, hm->uri.p + 5, MD5_STR_LENGTH); memcpy(arg_index, hm->uri.ptr + 5, MD5_STR_LENGTH);
*(arg_index + MD5_STR_LENGTH - 1) = '\0'; *(arg_index + MD5_STR_LENGTH - 1) = '\0';
if (hm->body.len < 2 || hm->method.len != 4 || memcmp(&hm->method, "POST", 4) == 0) { if (hm->body.len < 2 || hm->method.len != 4 || memcmp(&hm->method, "POST", 4) == 0) {
LOG_DEBUG("serve.c", "Invalid tag request") LOG_DEBUG("serve.c", "Invalid tag request")
mg_http_send_error(nc, 400, NULL); mg_http_reply(nc, 404, "", "Not found");
nc->flags |= MG_F_SEND_AND_CLOSE;
return; return;
} }
store_t *store = get_tag_store(arg_index); store_t *store = get_tag_store(arg_index);
if (store == NULL) { if (store == NULL) {
LOG_DEBUGF("serve.c", "Could not get tag store for index: %s", arg_index) LOG_DEBUGF("serve.c", "Could not get tag store for index: %s", arg_index)
mg_http_send_error(nc, 404, NULL); mg_http_reply(nc, 404, "", "Not found");
nc->flags |= MG_F_SEND_AND_CLOSE;
return; return;
} }
char *body = malloc(hm->body.len + 1); char *body = malloc(hm->body.len + 1);
memcpy(body, hm->body.p, hm->body.len); memcpy(body, hm->body.ptr, hm->body.len);
*(body + hm->body.len) = '\0'; *(body + hm->body.len) = '\0';
cJSON *json = cJSON_Parse(body); cJSON *json = cJSON_Parse(body);
@@ -501,8 +454,7 @@ void tag(struct mg_connection *nc, struct http_message *hm, struct mg_str *path)
LOG_DEBUGF("serve.c", "Could not parse tag request", arg_index) LOG_DEBUGF("serve.c", "Could not parse tag request", arg_index)
cJSON_Delete(json); cJSON_Delete(json);
free(body); free(body);
mg_http_send_error(nc, 400, NULL); mg_http_reply(nc, 400, "", "Invalid request");
nc->flags |= MG_F_SEND_AND_CLOSE;
return; return;
} }
@@ -545,7 +497,7 @@ void tag(struct mg_connection *nc, struct http_message *hm, struct mg_str *path)
char url[4096]; char url[4096];
snprintf(url, sizeof(url), "%s/%s/_update/%s", WebCtx.es_url, WebCtx.es_index, arg_req->doc_id); snprintf(url, sizeof(url), "%s/%s/_update/%s", WebCtx.es_url, WebCtx.es_index, arg_req->doc_id);
nc->user_data = web_post_async(url, buf); nc->fn_data = web_post_async(url, buf);
} else { } else {
cJSON_AddItemToArray(arr, cJSON_CreateString(arg_req->name)); cJSON_AddItemToArray(arr, cJSON_CreateString(arg_req->name));
@@ -565,7 +517,7 @@ void tag(struct mg_connection *nc, struct http_message *hm, struct mg_str *path)
char url[4096]; char url[4096];
snprintf(url, sizeof(url), "%s/%s/_update/%s", WebCtx.es_url, WebCtx.es_index, arg_req->doc_id); snprintf(url, sizeof(url), "%s/%s/_update/%s", WebCtx.es_url, WebCtx.es_index, arg_req->doc_id);
nc->user_data = web_post_async(url, buf); nc->fn_data = web_post_async(url, buf);
} }
char *json_str = cJSON_PrintUnformatted(arr); char *json_str = cJSON_PrintUnformatted(arr);
@@ -579,39 +531,22 @@ void tag(struct mg_connection *nc, struct http_message *hm, struct mg_str *path)
free(body); free(body);
} }
int validate_auth(struct mg_connection *nc, struct http_message *hm) { int validate_auth(struct mg_connection *nc, struct mg_http_message *hm) {
char user[256] = {0,}; char user[256] = {0,};
char pass[256] = {0,}; char pass[256] = {0,};
int ret = mg_get_http_basic_auth(hm, user, sizeof(user), pass, sizeof(pass)); mg_http_creds(hm, user, sizeof(user), pass, sizeof(pass));
if (ret == -1 || strcmp(user, WebCtx.auth_user) != 0 || strcmp(pass, WebCtx.auth_pass) != 0) { if (strcmp(user, WebCtx.auth_user) != 0 || strcmp(pass, WebCtx.auth_pass) != 0) {
mg_printf(nc, "HTTP/1.1 401 Unauthorized\r\n" mg_http_reply(nc, 401, "WWW-Authenticate: Basic realm=\"sist2\"\r\n", "");
"WWW-Authenticate: Basic realm=\"sist2\"\r\n"
"Content-Length: 0\r\n\r\n");
nc->flags |= MG_F_SEND_AND_CLOSE;
return FALSE; return FALSE;
} }
return TRUE; return TRUE;
} }
static void ev_router(struct mg_connection *nc, int ev, void *p) { static void ev_router(struct mg_connection *nc, int ev, void *ev_data, UNUSED(void *fn_data)) {
struct mg_str scheme;
struct mg_str user_info;
struct mg_str host;
unsigned int port;
struct mg_str path;
struct mg_str query;
struct mg_str fragment;
if (ev == MG_EV_HTTP_REQUEST) {
struct http_message *hm = (struct http_message *) p;
if (mg_parse_uri(hm->uri, &scheme, &user_info, &host, &port, &path, &query, &fragment) != 0) {
mg_http_send_error(nc, 400, NULL);
nc->flags |= MG_F_SEND_AND_CLOSE;
return;
}
if (ev == MG_EV_HTTP_MSG) {
struct mg_http_message *hm = (struct mg_http_message *) ev_data;
if (WebCtx.auth_enabled == TRUE) { if (WebCtx.auth_enabled == TRUE) {
if (!validate_auth(nc, hm)) { if (!validate_auth(nc, hm)) {
@@ -619,52 +554,48 @@ static void ev_router(struct mg_connection *nc, int ev, void *p) {
} }
} }
if (is_equal(&path, &((struct mg_str) MG_MK_STR("/")))) { if (mg_http_match_uri(hm, "/")) {
search_index(nc); search_index(nc);
} else if (is_equal(&path, &((struct mg_str) MG_MK_STR("/css")))) { } else if (mg_http_match_uri(hm, "/css")) {
style(nc, hm); style(nc, hm);
} else if (is_equal(&path, &((struct mg_str) MG_MK_STR("/stats")))) { } else if (mg_http_match_uri(hm, "/stats")) {
stats(nc); stats(nc);
} else if (is_equal(&path, &((struct mg_str) MG_MK_STR("/jslib")))) { } else if (mg_http_match_uri(hm, "/jslib")) {
javascript_lib(nc); javascript_lib(nc);
} else if (is_equal(&path, &((struct mg_str) MG_MK_STR("/jssearch")))) { } else if (mg_http_match_uri(hm, "/jssearch")) {
javascript_search(nc); javascript_search(nc);
} else if (is_equal(&path, &((struct mg_str) MG_MK_STR("/img/sprite-skin-flat.png")))) { } else if (mg_http_match_uri(hm, "/img/sprite-skin-flat.png")) {
img_sprite_skin_flat(nc, hm); img_sprite_skin_flat(nc, hm);
} else if (is_equal(&path, &((struct mg_str) MG_MK_STR("/es")))) { } else if (mg_http_match_uri(hm, "/es")) {
search(nc, hm); search(nc, hm);
} else if (is_equal(&path, &((struct mg_str) MG_MK_STR("/i")))) { } else if (mg_http_match_uri(hm, "/i")) {
index_info(nc); index_info(nc);
} else if (is_equal(&path, &((struct mg_str) MG_MK_STR("/status")))) { } else if (mg_http_match_uri(hm, "/status")) {
status(nc); status(nc);
} else if (has_prefix(&path, &((struct mg_str) MG_MK_STR("/f/")))) { } else if (mg_http_match_uri(hm, "/f/*")) {
file(nc, hm, &path); file(nc, hm);
} else if (has_prefix(&path, &((struct mg_str) MG_MK_STR("/t/")))) { } else if (mg_http_match_uri(hm, "/t/*/*")) {
thumbnail(nc, hm, &path); thumbnail(nc, hm);
} else if (has_prefix(&path, &((struct mg_str) MG_MK_STR("/s/")))) { } else if (mg_http_match_uri(hm, "/s/*/*")) {
stats_files(nc, hm, &path); stats_files(nc, hm);
} else if (has_prefix(&path, &((struct mg_str) MG_MK_STR("/tag/")))) { } else if (mg_http_match_uri(hm, "/tag/*")) {
if (WebCtx.tag_auth_enabled == TRUE) { if (WebCtx.tag_auth_enabled == TRUE && !validate_auth(nc, hm)) {
if (!validate_auth(nc, hm)) { return;
return;
}
} }
tag(nc, hm, &path); tag(nc, hm);
} else if (has_prefix(&path, &((struct mg_str) MG_MK_STR("/d/")))) { } else if (mg_http_match_uri(hm, "/d/*")) {
document_info(nc, hm, &path); document_info(nc, hm);
} else { } else {
mg_http_send_error(nc, 404, NULL); mg_http_reply(nc, 404, "", "Page not found");
nc->flags |= MG_F_SEND_AND_CLOSE;
} }
} else if (ev == MG_EV_POLL) { } else if (ev == MG_EV_POLL) {
if (nc->user_data != NULL) { if (nc->fn_data != NULL) {
//Waiting for ES reply //Waiting for ES reply
subreq_ctx_t *ctx = (subreq_ctx_t *) nc->user_data; subreq_ctx_t *ctx = (subreq_ctx_t *) nc->fn_data;
web_post_async_poll(ctx); web_post_async_poll(ctx);
if (ctx->done == TRUE) { if (ctx->done == TRUE) {
response_t *r = ctx->response; response_t *r = ctx->response;
if (r->status_code == 200) { if (r->status_code == 200) {
@@ -684,14 +615,14 @@ static void ev_router(struct mg_connection *nc, int ev, void *p) {
free(json_str); free(json_str);
free(tmp); free(tmp);
} }
mg_http_send_error(nc, 500, NULL);
mg_http_reply(nc, 500, "", "");
} }
free_response(r); free_response(r);
free(ctx->data); free(ctx->data);
free(ctx); free(ctx);
nc->flags |= MG_F_SEND_AND_CLOSE; nc->fn_data = NULL;
nc->user_data = NULL;
} }
} }
} }
@@ -702,15 +633,18 @@ void serve(const char *listen_address) {
printf("Starting web server @ http://%s\n", listen_address); printf("Starting web server @ http://%s\n", listen_address);
struct mg_mgr mgr; struct mg_mgr mgr;
mg_mgr_init(&mgr, NULL); mg_mgr_init(&mgr);
struct mg_connection *nc = mg_bind(&mgr, listen_address, ev_router); int ok = 1;
struct mg_connection *nc = mg_http_listen(&mgr, listen_address, ev_router, NULL);
if (nc == NULL) { if (nc == NULL) {
LOG_FATALF("serve.c", "Couldn't bind web server on address %s", listen_address) LOG_FATALF("serve.c", "Couldn't bind web server on address %s", listen_address)
} }
mg_set_protocol_http_websocket(nc);
for (;;) { while (ok) {
mg_mgr_poll(&mgr, 10); mg_mgr_poll(&mgr, 10);
} }
mg_mgr_free(&mgr);
LOG_INFO("serve.c", "Finished web event loop")
} }

File diff suppressed because one or more lines are too long

View File

@@ -17,17 +17,19 @@ def copy_files(files):
def sist2(*args): def sist2(*args):
print("./sist2 " + " ".join(args))
return subprocess.check_output( return subprocess.check_output(
args=["./sist2_debug", *args], args=["./sist2", *args],
) )
def sist2_index(files, *args): def sist2_index(files, *args):
path = copy_files(files) path = copy_files(files)
shutil.rmtree("i", ignore_errors=True) shutil.rmtree("test_i", ignore_errors=True)
sist2("scan", path, "-o", "i", *args) sist2("scan", path, "-o", "test_i", *args)
return iter(sist2_index_to_dict("i")) return iter(sist2_index_to_dict("test_i"))
def sist2_incremental_index(files, func=None, *args): def sist2_incremental_index(files, func=None, *args):
@@ -36,14 +38,14 @@ def sist2_incremental_index(files, func=None, *args):
if func: if func:
func(path) func(path)
shutil.rmtree("i_inc", ignore_errors=True) shutil.rmtree("test_i_inc", ignore_errors=True)
sist2("scan", path, "-o", "i_inc", "--incremental", "i", *args) sist2("scan", path, "-o", "test_i_inc", "--incremental", "test_i", *args)
return iter(sist2_index_to_dict("i_inc")) return iter(sist2_index_to_dict("test_i_inc"))
def sist2_index_to_dict(index): def sist2_index_to_dict(index):
res = subprocess.check_output( res = subprocess.check_output(
args=["./sist2_debug", "index", "--print", index], args=["./sist2", "index", "--print", index],
) )
for line in res.splitlines(): for line in res.splitlines():