Compare commits

..

28 Commits

Author SHA1 Message Date
2f0e999b06
Update README.md 2021-06-13 09:50:56 -04:00
bf28dc8993
Merge pull request #164 from simon987/dev
v2.10.1
2021-06-13 09:50:23 -04:00
c6fee7f6e2 update argparse 2021-06-13 09:41:18 -04:00
201c2a1a47 Update CI things 2021-06-13 09:26:27 -04:00
7c46ad632a Update readme 2021-06-11 20:44:47 -04:00
5b8c13fd13 Handle GPS metadata in the UI 2021-06-11 20:41:05 -04:00
efa4a06e56 Fix meta_key UB problem 2021-06-11 20:19:36 -04:00
81670ee107 Fix subtitle problems 2021-06-11 10:05:33 -04:00
f9dac80905 Fix file download in mongoose 7.x 2021-06-09 13:34:38 -04:00
f8d9b718c0 Fix memory leak in RAW parsing 2021-06-09 08:22:31 -04:00
6f5fdc2935 Fix for segfault in some comic files 2021-06-07 09:01:46 -04:00
a01f6dff1f Use 16-bit ints for meta keys (wip) 2021-06-07 08:40:12 -04:00
22dd58e140 add signal handler w/ debug info 2021-05-08 16:23:24 -04:00
523c123e2e Enable advanced search with query_string 2021-05-06 20:07:20 -04:00
fc7f30d670 Add tests for subtitle 2021-05-05 16:10:55 -04:00
152fe11669 Set passphrase arg in arc_ctx 2021-05-05 15:52:46 -04:00
33f97f6bfb Increase scan queue size 2021-05-05 14:25:35 -04:00
71f9dfcfe0 sync libscan 2021-05-05 14:21:01 -04:00
5f657d61b3
Merge pull request #157 from simon987/mongoose-7
Update to mongoose 7.x, change Docker build
2021-05-05 14:18:36 -04:00
908def1016 Fix build, update dockerfile 2021-05-05 14:13:46 -04:00
db3d312835 wip 2021-05-05 13:55:57 -04:00
32c9cb28a3 Read subtitles from media files, fix bug in text_buffer 2021-05-05 13:55:57 -04:00
f839127129 Change encoding for antiword PDF 2021-05-05 13:55:57 -04:00
8111a6c143 Workaround for UTF8 .doc files 2021-05-05 13:55:57 -04:00
707a570828 Pause all other audio tags on play #148 2021-04-17 13:24:21 -04:00
50771bd1dc Read subtitles from media files, fix bug in text_buffer 2021-03-26 19:48:16 -04:00
bc884e137c Change encoding for antiword PDF 2021-01-16 12:17:43 -05:00
ce1e241dea Workaround for UTF8 .doc files 2021-01-16 12:13:56 -05:00
33 changed files with 526 additions and 334 deletions

24
.dockerignore Normal file
View File

@ -0,0 +1,24 @@
.idea
*/thumbs
*.cbp
CMakeCache.txt
CMakeFiles
cmake-build-debug
cmake_install.cmake
Makefile
*.out
LOG
sist2*
index.sist2/
bundle*.css
bundle.js
**/*.a
**/vgcore.*
build/
.git/
third-party/libscan/libscan-test-files/
**/ext_ffmpeg
**/ext_libmobi
**/scan_a_test
Dockerfile
*.idx/

View File

@ -8,7 +8,7 @@ platform:
steps: steps:
- name: build - name: build
image: simon987/ubuntu_ci image: simon987/sist2-build
commands: commands:
- ./ci/build.sh - ./ci/build.sh
- name: docker - name: docker
@ -19,8 +19,8 @@ steps:
password: password:
from_secret: DOCKER_PASSWORD from_secret: DOCKER_PASSWORD
repo: simon987/sist2 repo: simon987/sist2
context: ./Docker/ context: ./
dockerfile: ./Docker/Dockerfile dockerfile: ./Dockerfile
auto_tag: true auto_tag: true
auto_tag_suffix: x64-linux auto_tag_suffix: x64-linux
when: when:
@ -41,32 +41,32 @@ steps:
source: source:
- ./VERSION - ./VERSION
- ./sist2-x64-linux - ./sist2-x64-linux
- ./sist2-x64-linux-debug.tar.gz - ./sist2-x64-linux-debug
--- #---
kind: pipeline #kind: pipeline
type: docker #type: docker
name: arm64 #name: arm64
#
platform: #platform:
arch: arm64 # arch: arm64
#
steps: #steps:
- name: build # - name: build
image: simon987/ubuntu_ci_arm # image: simon987/ubuntu_ci_arm
commands: # commands:
- ./ci/build_arm64.sh # - ./ci/build_arm64.sh
- name: scp files # - name: scp files
image: appleboy/drone-scp # image: appleboy/drone-scp
settings: # settings:
host: # host:
from_secret: SSH_HOST # from_secret: SSH_HOST
port: # port:
from_secret: SSH_PORT # from_secret: SSH_PORT
user: # user:
from_secret: SSH_USER # from_secret: SSH_USER
key: # key:
from_secret: SSH_KEY # from_secret: SSH_KEY
target: /files/sist2/${DRONE_REPO_OWNER}_${DRONE_REPO_NAME}/arm_${DRONE_BRANCH}_${DRONE_BUILD_NUMBER}_${DRONE_COMMIT}/ # target: /files/sist2/${DRONE_REPO_OWNER}_${DRONE_REPO_NAME}/arm_${DRONE_BRANCH}_${DRONE_BUILD_NUMBER}_${DRONE_COMMIT}/
source: # source:
- ./sist2-arm64-linux # - ./sist2-arm64-linux

1
.gitignore vendored
View File

@ -16,3 +16,4 @@ bundle.js
vgcore.* vgcore.*
build/ build/
third-party/ third-party/
*.idx/

View File

@ -5,7 +5,7 @@ project(sist2 C)
option(SIST_DEBUG "Build a debug executable" on) option(SIST_DEBUG "Build a debug executable" on)
set(BUILD_TESTS off) set(BUILD_TESTS on)
add_subdirectory(third-party/libscan) add_subdirectory(third-party/libscan)
set(ARGPARSE_SHARED off) set(ARGPARSE_SHARED off)
add_subdirectory(third-party/argparse) add_subdirectory(third-party/argparse)
@ -36,14 +36,15 @@ add_executable(sist2
target_link_directories(sist2 PRIVATE BEFORE ${_VCPKG_INSTALLED_DIR}/${VCPKG_TARGET_TRIPLET}/lib/) target_link_directories(sist2 PRIVATE BEFORE ${_VCPKG_INSTALLED_DIR}/${VCPKG_TARGET_TRIPLET}/lib/)
set(CMAKE_FIND_LIBRARY_SUFFIXES .a .lib) set(CMAKE_FIND_LIBRARY_SUFFIXES .a .lib)
find_package(PkgConfig REQUIRED)
pkg_search_module(GLIB REQUIRED glib-2.0)
find_package(lmdb CONFIG REQUIRED) find_package(lmdb CONFIG REQUIRED)
find_package(cJSON CONFIG REQUIRED) find_package(cJSON CONFIG REQUIRED)
find_package(unofficial-glib CONFIG REQUIRED)
find_package(unofficial-mongoose CONFIG REQUIRED) find_package(unofficial-mongoose CONFIG REQUIRED)
find_package(CURL CONFIG REQUIRED) find_package(CURL CONFIG REQUIRED)
#find_package(OpenSSL REQUIRED)
target_include_directories( target_include_directories(
sist2 PUBLIC sist2 PUBLIC
@ -51,6 +52,7 @@ target_include_directories(
${CMAKE_SOURCE_DIR}/third-party/utf8.h/ ${CMAKE_SOURCE_DIR}/third-party/utf8.h/
${CMAKE_SOURCE_DIR}/third-party/libscan/ ${CMAKE_SOURCE_DIR}/third-party/libscan/
${CMAKE_SOURCE_DIR}/ ${CMAKE_SOURCE_DIR}/
${GLIB_INCLUDE_DIRS}
) )
target_compile_options( target_compile_options(
@ -103,7 +105,7 @@ target_link_libraries(
lmdb lmdb
cjson cjson
argparse argparse
unofficial::glib::glib ${GLIB_LDFLAGS}
unofficial::mongoose::mongoose unofficial::mongoose::mongoose
CURL::libcurl CURL::libcurl

View File

@ -1,22 +0,0 @@
FROM ubuntu:19.10
MAINTAINER simon987 <me@simon987.net>
RUN apt update
RUN apt install -y libglib2.0-0 libcurl4 libmagic1 libharfbuzz-bin libopenjp2-7 libarchive13 liblzma5 libzstd1 liblz4-1 \
curl libtiff5 libpng16-16 libpcre3
RUN mkdir -p /usr/share/tessdata && \
cd /usr/share/tessdata/ && \
curl -o /usr/share/tessdata/hin.traineddata https://raw.githubusercontent.com/tesseract-ocr/tessdata/master/hin.traineddata &&\
curl -o /usr/share/tessdata/jpn.traineddata https://raw.githubusercontent.com/tesseract-ocr/tessdata/master/jpn.traineddata &&\
curl -o /usr/share/tessdata/eng.traineddata https://raw.githubusercontent.com/tesseract-ocr/tessdata/master/eng.traineddata &&\
curl -o /usr/share/tessdata/fra.traineddata https://raw.githubusercontent.com/tesseract-ocr/tessdata/master/fra.traineddata &&\
curl -o /usr/share/tessdata/rus.traineddata https://raw.githubusercontent.com/tesseract-ocr/tessdata/master/rus.traineddata &&\
curl -o /usr/share/tessdata/spa.traineddata https://raw.githubusercontent.com/tesseract-ocr/tessdata/master/spa.traineddata && ls -lh
ADD sist2_arm64 /root/sist2
ENV LANG C.UTF-8
ENV LC_ALL C.UTF-8
ENTRYPOINT ["/root/sist2"]

View File

@ -1,13 +0,0 @@
rm ./sist2_arm64
cp ../sist2_arm64.gz .
gzip -d sist2_arm64.gz
version=$(./sist2_arm64 --version)
echo "Version ${version}"
docker build . -t simon987/sist2-arm64:"${version}" -t simon987/sist2-arm64:latest
docker push simon987/sist2-arm64:"${version}"
docker push simon987/sist2-arm64:latest
docker run --rm simon987/sist2-arm64 -v

View File

@ -1,6 +1,14 @@
FROM ubuntu:20.04 FROM simon987/sist2-build as build
MAINTAINER simon987 <me@simon987.net> MAINTAINER simon987 <me@simon987.net>
WORKDIR /build/
ADD . /build/
RUN cmake -DSIST_DEBUG=off -DBUILD_TESTS=off -DCMAKE_TOOLCHAIN_FILE=/vcpkg/scripts/buildsystems/vcpkg.cmake .
RUN make -j$(nproc)
RUN strip sist2
FROM ubuntu:20.10
RUN apt update RUN apt update
RUN apt install -y libglib2.0-0 libcurl4 libmagic1 libharfbuzz-bin libopenjp2-7 libarchive13 liblzma5 libzstd1 liblz4-1 \ RUN apt install -y libglib2.0-0 libcurl4 libmagic1 libharfbuzz-bin libopenjp2-7 libarchive13 liblzma5 libzstd1 liblz4-1 \
curl libtiff5 libpng16-16 libpcre3 curl libtiff5 libpng16-16 libpcre3
@ -12,9 +20,9 @@ RUN mkdir -p /usr/share/tessdata && \
curl -o /usr/share/tessdata/eng.traineddata https://raw.githubusercontent.com/tesseract-ocr/tessdata/master/eng.traineddata &&\ curl -o /usr/share/tessdata/eng.traineddata https://raw.githubusercontent.com/tesseract-ocr/tessdata/master/eng.traineddata &&\
curl -o /usr/share/tessdata/fra.traineddata https://raw.githubusercontent.com/tesseract-ocr/tessdata/master/fra.traineddata &&\ curl -o /usr/share/tessdata/fra.traineddata https://raw.githubusercontent.com/tesseract-ocr/tessdata/master/fra.traineddata &&\
curl -o /usr/share/tessdata/rus.traineddata https://raw.githubusercontent.com/tesseract-ocr/tessdata/master/rus.traineddata &&\ curl -o /usr/share/tessdata/rus.traineddata https://raw.githubusercontent.com/tesseract-ocr/tessdata/master/rus.traineddata &&\
curl -o /usr/share/tessdata/spa.traineddata https://raw.githubusercontent.com/tesseract-ocr/tessdata/master/spa.traineddata && ls -lh curl -o /usr/share/tessdata/spa.traineddata https://raw.githubusercontent.com/tesseract-ocr/tessdata/master/spa.traineddata
ADD sist2 /root/sist2 COPY --from=build /build/sist2 /root/sist2
ENV LANG C.UTF-8 ENV LANG C.UTF-8
ENV LC_ALL C.UTF-8 ENV LC_ALL C.UTF-8

1
Dockerfile.arm64 Normal file
View File

@ -0,0 +1 @@
# TODO

View File

@ -1,6 +1,6 @@
![GitHub](https://img.shields.io/github/license/simon987/sist2.svg) ![GitHub](https://img.shields.io/github/license/simon987/sist2.svg)
[![CodeFactor](https://www.codefactor.io/repository/github/simon987/sist2/badge?s=05daa325188aac4eae32c786f3d9cf4e0593f822)](https://www.codefactor.io/repository/github/simon987/sist2) [![CodeFactor](https://www.codefactor.io/repository/github/simon987/sist2/badge?s=05daa325188aac4eae32c786f3d9cf4e0593f822)](https://www.codefactor.io/repository/github/simon987/sist2)
[![Development snapshots](https://ci.simon987.net/api/badges/simon987/sist2/status.svg)](https://files.simon987.net/sist2/simon987_sist2/) [![Development snapshots](https://ci.simon987.net/api/badges/simon987/sist2/status.svg)](https://files.simon987.net/.gate/sist2/simon987_sist2/)
**Demo**: [sist2.simon987.net](https://sist2.simon987.net/?i=Demo%20files) **Demo**: [sist2.simon987.net](https://sist2.simon987.net/?i=Demo%20files)
@ -25,14 +25,12 @@ sist2 (Simple incremental search tool)
* OCR support with tesseract \*\*\* * OCR support with tesseract \*\*\*
* Stats page & disk utilisation visualization * Stats page & disk utilisation visualization
\* See [format support](#format-support) \* See [format support](#format-support)
\*\* See [Archive files](#archive-files) \*\* See [Archive files](#archive-files)
\*\*\* See [OCR](#ocr) \*\*\* See [OCR](#ocr)
![stats](docs/stats.png) ![stats](docs/stats.png)
## Getting Started ## Getting Started
1. Have an Elasticsearch (>= 6.X.X) instance running 1. Have an Elasticsearch (>= 6.X.X) instance running
@ -57,10 +55,8 @@ sist2 (Simple incremental search tool)
1. See [Usage guide](docs/USAGE.md) 1. See [Usage guide](docs/USAGE.md)
\* *Windows users*: **sist2** runs under [WSL](https://en.wikipedia.org/wiki/Windows_Subsystem_for_Linux) \* *Windows users*: **sist2** runs under [WSL](https://en.wikipedia.org/wiki/Windows_Subsystem_for_Linux)
## Example usage ## Example usage
See [Usage guide](docs/USAGE.md) for more details See [Usage guide](docs/USAGE.md) for more details
@ -69,7 +65,6 @@ See [Usage guide](docs/USAGE.md) for more details
1. Push index to Elasticsearch: `sist2 index ./docs_idx` 1. Push index to Elasticsearch: `sist2 index ./docs_idx`
1. Start web interface: `sist2 web ./docs_idx` 1. Start web interface: `sist2 web ./docs_idx`
## Format support ## Format support
File type | Library | Content | Thumbnail | Metadata File type | Library | Content | Thumbnail | Metadata
@ -78,8 +73,8 @@ pdf,xps,fb2,epub | MuPDF | text+ocr | yes | author, title |
cbz,cbr | *(none)* | - | yes | - | cbz,cbr | *(none)* | - | yes | - |
`audio/*` | ffmpeg | - | yes | ID3 tags | `audio/*` | ffmpeg | - | yes | ID3 tags |
`video/*` | ffmpeg | - | yes | title, comment, artist | `video/*` | ffmpeg | - | yes | title, comment, artist |
`image/*` | ffmpeg | - | yes | [Common EXIF tags](https://github.com/simon987/sist2/blob/efdde2734eca9b14a54f84568863b7ffd59bdba3/src/parsing/media.c#L190) | `image/*` | ffmpeg | - | yes | [Common EXIF tags](https://github.com/simon987/sist2/blob/efdde2734eca9b14a54f84568863b7ffd59bdba3/src/parsing/media.c#L190), GPS tags |
raw, rw2, dng, cr2, crw, dcr, k25, kdc, mrw, pef, xf3, arw, sr2, srf, erf | LibRaw | - | yes | Common EXIF tags | raw, rw2, dng, cr2, crw, dcr, k25, kdc, mrw, pef, xf3, arw, sr2, srf, erf | LibRaw | - | yes | Common EXIF tags, GPS tags |
ttf,ttc,cff,woff,fnt,otf | Freetype2 | - | yes, `bmp` | Name & style | ttf,ttc,cff,woff,fnt,otf | Freetype2 | - | yes, `bmp` | Name & style |
`text/plain` | *(none)* | yes | no | - | `text/plain` | *(none)* | yes | no | - |
html, xml | *(none)* | yes | no | - | html, xml | *(none)* | yes | no | - |
@ -91,38 +86,37 @@ mobi, azw, azw3 | libmobi | yes | no | author, title |
\* *See [Archive files](#archive-files)* \* *See [Archive files](#archive-files)*
### Archive files ### Archive files
**sist2** will scan files stored into archive files (zip, tar, 7z...) as if
they were directly in the file system. Recursive (archives inside archives) **sist2** will scan files stored into archive files (zip, tar, 7z...) as if they were directly in the file system.
Recursive (archives inside archives)
scan is also supported. scan is also supported.
**Limitations**: **Limitations**:
* Support for parsing media files with formats that require *seek* (e.g. `.gif`, `.mp4` w/ fragmented metadata etc.) * Support for parsing media files with formats that require *seek* (e.g. `.gif`, `.mp4` w/ fragmented metadata etc.)
is limitted (see `--mem-buffer` option) is limitted (see `--mem-buffer` option)
* Archive files are scanned sequentially, by a single thread. On systems where * Archive files are scanned sequentially, by a single thread. On systems where
**sist2** is not I/O bound, scans might be faster when larger archives are split **sist2** is not I/O bound, scans might be faster when larger archives are split into smaller parts.
into smaller parts.
### OCR ### OCR
You can enable OCR support for pdf,xps,fb2,epub file types with the You can enable OCR support for pdf,xps,fb2,epub file types with the
`--ocr <lang>` option. Download the language data files with your `--ocr <lang>` option. Download the language data files with your package manager (`apt install tesseract-ocr-eng`) or
package manager (`apt install tesseract-ocr-eng`) or directly [from Github](https://github.com/tesseract-ocr/tesseract/wiki/Data-Files). directly [from Github](https://github.com/tesseract-ocr/tesseract/wiki/Data-Files).
The `simon987/sist2` image comes with common languages The `simon987/sist2` image comes with common languages
(hin, jpn, eng, fra, rus, spa) pre-installed. (hin, jpn, eng, fra, rus, spa) pre-installed.
Examples Examples
```bash ```bash
sist2 scan --ocr jpn ~/Books/Manga/ sist2 scan --ocr jpn ~/Books/Manga/
sist2 scan --ocr eng ~/Books/Textbooks/ sist2 scan --ocr eng ~/Books/Textbooks/
``` ```
## Build from source ## Build from source
You can compile **sist2** by yourself if you don't want to use the pre-compiled You can compile **sist2** by yourself if you don't want to use the pre-compiled binaries (GCC 7+ required).
binaries (GCC 7+ required).
1. Install compile-time dependencies 1. Install compile-time dependencies

View File

@ -8,7 +8,7 @@ git submodule update --init --recursive
rm -rf CMakeFiles CMakeCache.txt rm -rf CMakeFiles CMakeCache.txt
cmake -DSIST_DEBUG=off -DCMAKE_TOOLCHAIN_FILE="${VCPKG_ROOT}/scripts/buildsystems/vcpkg.cmake" . cmake -DSIST_DEBUG=off -DCMAKE_TOOLCHAIN_FILE="${VCPKG_ROOT}/scripts/buildsystems/vcpkg.cmake" .
make -j 33 make -j $(nproc)
strip sist2 strip sist2
./sist2 -v > VERSION ./sist2 -v > VERSION
cp sist2 Docker/ cp sist2 Docker/
@ -16,7 +16,5 @@ mv sist2 sist2-x64-linux
rm -rf CMakeFiles CMakeCache.txt rm -rf CMakeFiles CMakeCache.txt
cmake -DSIST_DEBUG=on -DCMAKE_TOOLCHAIN_FILE="${VCPKG_ROOT}/scripts/buildsystems/vcpkg.cmake" . cmake -DSIST_DEBUG=on -DCMAKE_TOOLCHAIN_FILE="${VCPKG_ROOT}/scripts/buildsystems/vcpkg.cmake" .
make -j 33 make -j $(nproc)
cp /usr/lib/x86_64-linux-gnu/libasan.so.2.0.0 libasan.so.2
mv sist2_debug sist2-x64-linux-debug mv sist2_debug sist2-x64-linux-debug
tar -czf sist2-x64-linux-debug.tar.gz sist2-x64-linux-debug libasan.so.2

View File

@ -46,6 +46,7 @@ Scan options
--fast Only index file names & mime type --fast Only index file names & mime type
--treemap-threshold=<str> Relative size threshold for treemap (see USAGE.md). DEFAULT: 0.0005 --treemap-threshold=<str> Relative size threshold for treemap (see USAGE.md). DEFAULT: 0.0005
--mem-buffer=<int> Maximum memory buffer size per thread in MB for files inside archives (see USAGE.md). DEFAULT: 2000 --mem-buffer=<int> Maximum memory buffer size per thread in MB for files inside archives (see USAGE.md). DEFAULT: 2000
--read-subtitles Read subtitles from media files
Index options Index options
-t, --threads=<int> Number of threads. DEFAULT=1 -t, --threads=<int> Number of threads. DEFAULT=1
@ -91,7 +92,7 @@ Made by simon987 <me@simon987.net>. Released under GPL-3.0
Specify an existing index. Information about files in this index that were not modified (based on *mtime* attribute) Specify an existing index. Information about files in this index that were not modified (based on *mtime* attribute)
will be copied to the new index and will not be parsed again. will be copied to the new index and will not be parsed again.
* `-o, --output` Output directory. * `-o, --output` Output directory.
* `--rewrite-url` Set the `rewrite_url` option for the web module (See [rewrite_url](#rewrite_url)) * `--rewrite-url` Set the `rewrite_url` option for the web module (See [rewrite_url](#rewrite_url))
* `--name` Set the `name` option for the web module * `--name` Set the `name` option for the web module
* `--depth` Maximum scan dept. Set to 0 only scan files directly in the root directory, set to -1 for infinite depth * `--depth` Maximum scan dept. Set to 0 only scan files directly in the root directory, set to -1 for infinite depth
* `--archive` Archive file mode. * `--archive` Archive file mode.
@ -123,6 +124,7 @@ Made by simon987 <me@simon987.net>. Released under GPL-3.0
larger than this number will be read sequentially and no *seek* operations will be supported. larger than this number will be read sequentially and no *seek* operations will be supported.
To check if a media file can be parsed without *seek*, execute `cat file.mp4 | ffprobe -` To check if a media file can be parsed without *seek*, execute `cat file.mp4 | ffprobe -`
* `--read-subtitles` When enabled, will attempt to read the subtitles stream from media files.
### Scan examples ### Scan examples

View File

@ -105,10 +105,10 @@
"analyzer": "my_nGram", "analyzer": "my_nGram",
"type": "text" "type": "text"
}, },
"_keyword.*": { "_keyword.*": {
"type": "keyword" "type": "keyword"
}, },
"_text.*": { "_text.*": {
"analyzer": "content_analyzer", "analyzer": "content_analyzer",
"type": "text", "type": "text",
"fields": { "fields": {
@ -165,6 +165,30 @@
"exif_user_comment": { "exif_user_comment": {
"type": "text" "type": "text"
}, },
"exif_gps_longitude_ref": {
"type": "keyword",
"index": false
},
"exif_gps_longitude_dms": {
"type": "keyword",
"index": false
},
"exif_gps_longitude_dec": {
"type": "keyword",
"index": false
},
"exif_gps_latitude_ref": {
"type": "keyword",
"index": false
},
"exif_gps_latitude_dms": {
"type": "keyword",
"index": false
},
"exif_gps_latitude_dec": {
"type": "keyword",
"index": false
},
"author": { "author": {
"type": "text" "type": "text"
}, },

6
scripts/reset.sh Executable file
View File

@ -0,0 +1,6 @@
#!/usr/bin/env bash
make clean
rm -rf CMakeFiles/ CMakeCache.txt Makefile \
third-party/libscan/CMakeFiles third-party/libscan/CMakeCache.txt third-party/libscan/third-party/ext_ffmpeg \
third-party/libscan/third-party/ext_libmobi third-party/libscan/Makefile

View File

@ -227,6 +227,7 @@ int scan_args_validate(scan_args_t *args, int argc, const char **argv) {
LOG_DEBUGF("cli.c", "arg depth=%d", args->depth) LOG_DEBUGF("cli.c", "arg depth=%d", args->depth)
LOG_DEBUGF("cli.c", "arg path=%s", args->path) LOG_DEBUGF("cli.c", "arg path=%s", args->path)
LOG_DEBUGF("cli.c", "arg archive=%s", args->archive) LOG_DEBUGF("cli.c", "arg archive=%s", args->archive)
LOG_DEBUGF("cli.c", "arg archive_passphrase=%s", args->archive_passphrase)
LOG_DEBUGF("cli.c", "arg tesseract_lang=%s", args->tesseract_lang) LOG_DEBUGF("cli.c", "arg tesseract_lang=%s", args->tesseract_lang)
LOG_DEBUGF("cli.c", "arg tesseract_path=%s", args->tesseract_path) LOG_DEBUGF("cli.c", "arg tesseract_path=%s", args->tesseract_path)
LOG_DEBUGF("cli.c", "arg exclude=%s", args->exclude_regex) LOG_DEBUGF("cli.c", "arg exclude=%s", args->exclude_regex)

View File

@ -18,6 +18,7 @@ typedef struct scan_args {
char *path; char *path;
char *archive; char *archive;
archive_mode_t archive_mode; archive_mode_t archive_mode;
char *archive_passphrase;
char *tesseract_lang; char *tesseract_lang;
const char *tesseract_path; const char *tesseract_path;
char *exclude_regex; char *exclude_regex;
@ -25,6 +26,7 @@ typedef struct scan_args {
const char* treemap_threshold_str; const char* treemap_threshold_str;
double treemap_threshold; double treemap_threshold;
int max_memory_buffer; int max_memory_buffer;
int read_subtitles;
} scan_args_t; } scan_args_t;
scan_args_t *scan_args_create(); scan_args_t *scan_args_create();

View File

@ -40,6 +40,8 @@ typedef struct {
pcre_extra *exclude_extra; pcre_extra *exclude_extra;
int fast; int fast;
GHashTable *dbg_current_files;
scan_arc_ctx_t arc_ctx; scan_arc_ctx_t arc_ctx;
scan_comic_ctx_t comic_ctx; scan_comic_ctx_t comic_ctx;
scan_ebook_ctx_t ebook_ctx; scan_ebook_ctx_t ebook_ctx;

File diff suppressed because one or more lines are too long

View File

@ -15,9 +15,13 @@ typedef struct {
char has_parent; char has_parent;
} line_t; } line_t;
#define META_NEXT 0xFFFF
void skip_meta(FILE *file) { void skip_meta(FILE *file) {
enum metakey key = getc(file); enum metakey key = 0;
while (key != '\n') { fread(&key, sizeof(uint16_t), 1, file);
while (key != META_NEXT) {
if (IS_META_INT(key)) { if (IS_META_INT(key)) {
fseek(file, sizeof(int), SEEK_CUR); fseek(file, sizeof(int), SEEK_CUR);
} else if (IS_META_LONG(key)) { } else if (IS_META_LONG(key)) {
@ -26,7 +30,7 @@ void skip_meta(FILE *file) {
while ((getc(file))) {} while ((getc(file))) {}
} }
key = getc(file); fread(&key, sizeof(uint16_t), 1, file);
} }
} }
@ -66,7 +70,7 @@ index_descriptor_t read_index_descriptor(char *path) {
} }
char *buf = malloc(info.st_size + 1); char *buf = malloc(info.st_size + 1);
int ret = read(fd, buf, info.st_size); size_t ret = read(fd, buf, info.st_size);
if (ret == -1) { if (ret == -1) {
LOG_FATALF("serialize.c", "Could not read index descriptor: %s", strerror(errno)); LOG_FATALF("serialize.c", "Could not read index descriptor: %s", strerror(errno));
} }
@ -152,8 +156,20 @@ char *get_meta_key_text(enum metakey meta_key) {
return "thumbnail"; return "thumbnail";
case MetaPages: case MetaPages:
return "pages"; return "pages";
case MetaExifGpsLongitudeRef:
return "exif_gps_longitude_ref";
case MetaExifGpsLongitudeDMS:
return "exif_gps_longitude_dms";
case MetaExifGpsLongitudeDec:
return "exif_gps_longitude_dec";
case MetaExifGpsLatitudeRef:
return "exif_gps_latitude_ref";
case MetaExifGpsLatitudeDMS:
return "exif_gps_latitude_dms";
case MetaExifGpsLatitudeDec:
return "exif_gps_latitude_dec";
default: default:
return NULL; LOG_FATALF("serialize.c", "FIXME: Unknown meta key: %d", meta_key)
} }
} }
@ -183,7 +199,7 @@ void write_document(document_t *doc) {
meta_line_t *meta = doc->meta_head; meta_line_t *meta = doc->meta_head;
while (meta != NULL) { while (meta != NULL) {
dyn_buffer_write_char(&buf, meta->key); dyn_buffer_write_short(&buf, (uint16_t) meta->key);
if (IS_META_INT(meta->key)) { if (IS_META_INT(meta->key)) {
dyn_buffer_write_int(&buf, meta->int_val); dyn_buffer_write_int(&buf, meta->int_val);
@ -197,7 +213,7 @@ void write_document(document_t *doc) {
meta = meta->next; meta = meta->next;
free(tmp); free(tmp);
} }
dyn_buffer_write_char(&buf, '\n'); dyn_buffer_write_short(&buf, META_NEXT);
int res = write(index_fd, buf.buf, buf.cur); int res = write(index_fd, buf.buf, buf.cur);
if (res == -1) { if (res == -1) {
@ -221,7 +237,7 @@ void read_index_bin(const char *path, const char *index_id, index_func func) {
FILE *file = fopen(path, "rb"); FILE *file = fopen(path, "rb");
while (TRUE) { while (TRUE) {
buf.cur = 0; buf.cur = 0;
size_t _ = fread((void *) &line, 1, sizeof(line_t), file); size_t _ = fread((void *) &line, sizeof(line_t), 1, file);
if (feof(file)) { if (feof(file)) {
break; break;
} }
@ -268,9 +284,10 @@ void read_index_bin(const char *path, const char *index_id, index_func func) {
cJSON_AddStringToObject(document, "path", ""); cJSON_AddStringToObject(document, "path", "");
} }
enum metakey key = getc(file); enum metakey key = 0;
size_t ret = 0; fread(&key, sizeof(uint16_t), 1, file);
while (key != '\n') { size_t ret;
while (key != META_NEXT) {
switch (key) { switch (key) {
case MetaPages: case MetaPages:
case MetaWidth: case MetaWidth:
@ -308,6 +325,12 @@ void read_index_bin(const char *path, const char *index_id, index_func func) {
case MetaAuthor: case MetaAuthor:
case MetaModifiedBy: case MetaModifiedBy:
case MetaThumbnail: case MetaThumbnail:
case MetaExifGpsLongitudeDMS:
case MetaExifGpsLongitudeDec:
case MetaExifGpsLongitudeRef:
case MetaExifGpsLatitudeDMS:
case MetaExifGpsLatitudeDec:
case MetaExifGpsLatitudeRef:
case MetaTitle: { case MetaTitle: {
buf.cur = 0; buf.cur = 0;
while ((c = getc(file)) != 0) { while ((c = getc(file)) != 0) {
@ -323,7 +346,7 @@ void read_index_bin(const char *path, const char *index_id, index_func func) {
LOG_FATALF("serialize.c", "Invalid meta key (corrupt index): %x", key) LOG_FATALF("serialize.c", "Invalid meta key (corrupt index): %x", key)
} }
key = getc(file); fread(&key, sizeof(uint16_t), 1, file);
} }
cJSON *meta_obj = NULL; cJSON *meta_obj = NULL;
@ -458,7 +481,7 @@ void incremental_read(GHashTable *table, const char *filepath) {
incremental_put(table, line.path_md5, line.mtime); incremental_put(table, line.path_md5, line.mtime);
while ((getc(file))) {} while ((getc(file)) != 0) {}
skip_meta(file); skip_meta(file);
} }
fclose(file); fclose(file);
@ -508,11 +531,11 @@ void incremental_copy(store_t *store, store_t *dst_store, const char *filepath,
free(buf); free(buf);
} }
enum metakey key; enum metakey key = 0;
while (1) { while (1) {
key = getc(file); fread(&key, sizeof(uint16_t), 1, file);
fwrite(&key, sizeof(char), 1, dst_file); fwrite(&key, sizeof(uint16_t), 1, dst_file);
if (key == '\n') { if (key == META_NEXT) {
break; break;
} }

View File

@ -4,6 +4,7 @@
store_t *store_create(char *path, size_t chunk_size) { store_t *store_create(char *path, size_t chunk_size) {
store_t *store = malloc(sizeof(struct store_t)); store_t *store = malloc(sizeof(struct store_t));
#if (SIST_FAKE_STORE != 1)
store->chunk_size = chunk_size; store->chunk_size = chunk_size;
pthread_rwlock_init(&store->lock, NULL); pthread_rwlock_init(&store->lock, NULL);
@ -28,15 +29,18 @@ store_t *store_create(char *path, size_t chunk_size) {
mdb_txn_begin(store->env, NULL, 0, &txn); mdb_txn_begin(store->env, NULL, 0, &txn);
mdb_dbi_open(txn, NULL, 0, &store->dbi); mdb_dbi_open(txn, NULL, 0, &store->dbi);
mdb_txn_commit(txn); mdb_txn_commit(txn);
#endif
return store; return store;
} }
void store_destroy(store_t *store) { void store_destroy(store_t *store) {
#if (SIST_FAKE_STORE != 1)
pthread_rwlock_destroy(&store->lock); pthread_rwlock_destroy(&store->lock);
mdb_close(store->env, store->dbi); mdb_close(store->env, store->dbi);
mdb_env_close(store->env); mdb_env_close(store->env);
#endif
free(store); free(store);
} }
@ -56,6 +60,8 @@ void store_write(store_t *store, char *key, size_t key_len, char *buf, size_t bu
} }
} }
#if (SIST_FAKE_STORE != 1)
MDB_val mdb_key; MDB_val mdb_key;
mdb_key.mv_data = key; mdb_key.mv_data = key;
mdb_key.mv_size = key_len; mdb_key.mv_size = key_len;
@ -92,10 +98,13 @@ void store_write(store_t *store, char *key, size_t key_len, char *buf, size_t bu
if (put_ret != 0) { if (put_ret != 0) {
LOG_ERROR("store.c", mdb_strerror(put_ret)) LOG_ERROR("store.c", mdb_strerror(put_ret))
} }
#endif
} }
char *store_read(store_t *store, char *key, size_t key_len, size_t *ret_vallen) { char *store_read(store_t *store, char *key, size_t key_len, size_t *ret_vallen) {
char *buf = NULL; char *buf = NULL;
#if (SIST_FAKE_STORE != 1)
MDB_val mdb_key; MDB_val mdb_key;
mdb_key.mv_data = key; mdb_key.mv_data = key;
mdb_key.mv_size = key_len; mdb_key.mv_size = key_len;
@ -116,6 +125,7 @@ char *store_read(store_t *store, char *key, size_t key_len, size_t *ret_vallen)
} }
mdb_txn_abort(txn); mdb_txn_abort(txn);
#endif
return buf; return buf;
} }

View File

@ -21,7 +21,7 @@
#define EPILOG "Made by simon987 <me@simon987.net>. Released under GPL-3.0" #define EPILOG "Made by simon987 <me@simon987.net>. Released under GPL-3.0"
static const char *const Version = "2.9.0"; static const char *const Version = "2.10.1";
static const char *const usage[] = { static const char *const usage[] = {
"sist2 scan [OPTION]... PATH", "sist2 scan [OPTION]... PATH",
"sist2 index [OPTION]... INDEX", "sist2 index [OPTION]... INDEX",
@ -30,6 +30,69 @@ static const char *const usage[] = {
NULL, NULL,
}; };
#include<signal.h>
#include<unistd.h>
static __sighandler_t sigsegv_handler = NULL;
static __sighandler_t sigabrt_handler = NULL;
void sig_handler(int signum) {
LogCtx.verbose = 1;
LogCtx.very_verbose = 1;
LOG_ERROR("*SIGNAL HANDLER*", "=============================================\n\n");
LOG_ERRORF("*SIGNAL HANDLER*", "Uh oh! Caught fatal signal: %s", strsignal(signum));
GHashTableIter iter;
g_hash_table_iter_init(&iter, ScanCtx.dbg_current_files);
void *key;
void *value;
while (g_hash_table_iter_next(&iter, &key, &value)) {
parse_job_t *job = value;
if (isatty(STDERR_FILENO)) {
LOG_DEBUGF(
"*SIGNAL HANDLER*",
"Thread \033[%dm[%04llX]\033[0m was working on job '%s'",
31 + ((unsigned int) key) % 7, key, job->filepath
);
} else {
LOG_DEBUGF(
"*SIGNAL HANDLER*",
"THREAD [%04llX] was working on job %s",
key, job->filepath
);
}
}
tpool_dump_debug_info(ScanCtx.pool);
LOG_INFO(
"*SIGNAL HANDLER*",
"Please consider creating a bug report at https://github.com/simon987/sist2/issues !"
)
LOG_INFO(
"*SIGNAL HANDLER*",
"sist2 is an open source project and relies on the collaboration of its users to diagnose and fix bugs"
)
#ifndef SIST_DEBUG
LOG_WARNING(
"*SIGNAL HANDLER*",
"You are running sist2 in release mode! Please consider downloading the debug binary from the Github "
"releases page to provide additionnal information when submitting a bug report."
)
#endif
if (signum == SIGSEGV && sigsegv_handler != NULL) {
sigsegv_handler(signum);
} else if (signum == SIGABRT && sigabrt_handler != NULL) {
sigabrt_handler(signum);
}
}
void init_dir(const char *dirpath) { void init_dir(const char *dirpath) {
char path[PATH_MAX]; char path[PATH_MAX];
snprintf(path, PATH_MAX, "%sdescriptor.json", dirpath); snprintf(path, PATH_MAX, "%sdescriptor.json", dirpath);
@ -99,6 +162,13 @@ void initialize_scan_context(scan_args_t *args) {
ScanCtx.arc_ctx.log = _log; ScanCtx.arc_ctx.log = _log;
ScanCtx.arc_ctx.logf = _logf; ScanCtx.arc_ctx.logf = _logf;
ScanCtx.arc_ctx.parse = (parse_callback_t) parse; ScanCtx.arc_ctx.parse = (parse_callback_t) parse;
if (args->archive_passphrase != NULL) {
strcpy(ScanCtx.arc_ctx.passphrase, args->archive_passphrase);
} else {
ScanCtx.arc_ctx.passphrase[0] = 0;
}
ScanCtx.dbg_current_files = g_hash_table_new(g_int64_hash, g_int64_equal);
// Comic // Comic
ScanCtx.comic_ctx.log = _log; ScanCtx.comic_ctx.log = _log;
@ -132,6 +202,7 @@ void initialize_scan_context(scan_args_t *args) {
ScanCtx.media_ctx.logf = _logf; ScanCtx.media_ctx.logf = _logf;
ScanCtx.media_ctx.store = _store; ScanCtx.media_ctx.store = _store;
ScanCtx.media_ctx.max_media_buffer = (long) args->max_memory_buffer * 1024 * 1024; ScanCtx.media_ctx.max_media_buffer = (long) args->max_memory_buffer * 1024 * 1024;
ScanCtx.media_ctx.read_subtitles = args->read_subtitles;
init_media(); init_media();
// OOXML // OOXML
@ -399,6 +470,9 @@ void sist2_web(web_args_t *args) {
int main(int argc, const char *argv[]) { int main(int argc, const char *argv[]) {
sigsegv_handler = signal(SIGSEGV, sig_handler);
sigabrt_handler = signal(SIGABRT, sig_handler);
setlocale(LC_ALL, ""); setlocale(LC_ALL, "");
scan_args_t *scan_args = scan_args_create(); scan_args_t *scan_args = scan_args_create();
@ -439,6 +513,9 @@ int main(int argc, const char *argv[]) {
OPT_STRING(0, "archive", &scan_args->archive, "Archive file mode (skip|list|shallow|recurse). " OPT_STRING(0, "archive", &scan_args->archive, "Archive file mode (skip|list|shallow|recurse). "
"skip: Don't parse, list: only get file names as text, " "skip: Don't parse, list: only get file names as text, "
"shallow: Don't parse archives inside archives. DEFAULT: recurse"), "shallow: Don't parse archives inside archives. DEFAULT: recurse"),
OPT_STRING(0, "archive-passphrase", &scan_args->archive_passphrase,
"Passphrase for encrypted archive files"),
OPT_STRING(0, "ocr", &scan_args->tesseract_lang, "Tesseract language (use tesseract --list-langs to see " OPT_STRING(0, "ocr", &scan_args->tesseract_lang, "Tesseract language (use tesseract --list-langs to see "
"which are installed on your machine)"), "which are installed on your machine)"),
OPT_STRING('e', "exclude", &scan_args->exclude_regex, "Files that match this regex will not be scanned"), OPT_STRING('e', "exclude", &scan_args->exclude_regex, "Files that match this regex will not be scanned"),
@ -448,6 +525,7 @@ int main(int argc, const char *argv[]) {
OPT_INTEGER(0, "mem-buffer", &scan_args->max_memory_buffer, OPT_INTEGER(0, "mem-buffer", &scan_args->max_memory_buffer,
"Maximum memory buffer size per thread in MB for files inside archives " "Maximum memory buffer size per thread in MB for files inside archives "
"(see USAGE.md). DEFAULT: 2000"), "(see USAGE.md). DEFAULT: 2000"),
OPT_BOOLEAN(0, "read-subtitles", &scan_args->read_subtitles, "Read subtitles from media files."),
OPT_GROUP("Index options"), OPT_GROUP("Index options"),
OPT_INTEGER('t', "threads", &common_threads, "Number of threads. DEFAULT=1"), OPT_INTEGER('t', "threads", &common_threads, "Number of threads. DEFAULT=1"),

View File

@ -41,11 +41,18 @@ void fs_reset(struct vfile *f) {
#define IS_GIT_OBJ (strlen(doc.filepath + doc.base) == 38 && (strstr(doc.filepath, "objects") != NULL)) #define IS_GIT_OBJ (strlen(doc.filepath + doc.base) == 38 && (strstr(doc.filepath, "objects") != NULL))
void set_dbg_current_file(parse_job_t *job) {
unsigned long long pid = (unsigned long long) pthread_self();
g_hash_table_replace(ScanCtx.dbg_current_files, GINT_TO_POINTER(pid), job);
}
void parse(void *arg) { void parse(void *arg) {
parse_job_t *job = arg; parse_job_t *job = arg;
document_t doc; document_t doc;
set_dbg_current_file(job);
doc.filepath = job->filepath; doc.filepath = job->filepath;
doc.ext = (short) job->ext; doc.ext = (short) job->ext;
doc.base = (short) job->base; doc.base = (short) job->base;

View File

@ -192,6 +192,19 @@ function makeUserTag(tag, hit) {
return userTag; return userTag;
} }
function makeGpsMetaRow(tbody, latitude, longitude) {
tbody.append($("<tr>")
.append($("<td>").text("Exif GPS"))
.append($("<td>")
.append($("<a>")
.text(`${latitude}, ${longitude}`)
.attr("href", `https://maps.google.com/?q=${latitude},${longitude}&ll=${latitude},${longitude}&t=k&z=17`)
.attr("target", "_blank")
)
)
);
}
function infoButtonCb(hit) { function infoButtonCb(hit) {
return () => { return () => {
getDocumentInfo(hit["_id"]).then(doc => { getDocumentInfo(hit["_id"]).then(doc => {
@ -229,13 +242,25 @@ function infoButtonCb(hit) {
.text(new Date(doc["mtime"] * 1000).toISOString().split(".")[0].replace("T", " ")) .text(new Date(doc["mtime"] * 1000).toISOString().split(".")[0].replace("T", " "))
.attr("title", doc["mtime"])) .attr("title", doc["mtime"]))
); );
// Exif GPS
if ("exif_gps_longitude_dec" in doc) {
makeGpsMetaRow(tbody, doc["exif_gps_latitude_dec"], doc["exif_gps_longitude_dec"])
} else if ("exif_gps_longitude_dms" in doc) {
makeGpsMetaRow(
tbody,
dmsToDecimal(doc["exif_gps_latitude_dms"], doc["exif_gps_latitude_ref"]),
dmsToDecimal(doc["exif_gps_longitude_dms"], doc["exif_gps_longitude_ref"]),
)
}
const displayFields = new Set([ const displayFields = new Set([
"mime", "size", "path", "title", "width", "height", "duration", "audioc", "videoc", "mime", "size", "path", "title", "width", "height", "duration", "audioc", "videoc",
"bitrate", "artist", "album", "album_artist", "genre", "title", "font_name", "tag", "author", "bitrate", "artist", "album", "album_artist", "genre", "title", "font_name", "tag", "author",
"modified_by", "pages" "modified_by", "pages"
]); ]);
Object.keys(doc) Object.keys(doc)
.filter(key => key.startsWith("_keyword.") || key.startsWith("_text.") || displayFields.has(key) || key.startsWith("exif_")) .filter(key => key.startsWith("_keyword.") || key.startsWith("_text.") || displayFields.has(key) || (key.startsWith("exif_") && !key.includes("gps")))
.forEach(key => { .forEach(key => {
tbody.append($("<tr>") tbody.append($("<tr>")
.append($("<td>").text(key)) .append($("<td>").text(key))
@ -350,6 +375,14 @@ function createDocCard(hit) {
audio.setAttribute("controls", ""); audio.setAttribute("controls", "");
audio.setAttribute("type", hit["_source"]["mime"]); audio.setAttribute("type", hit["_source"]["mime"]);
audio.setAttribute("src", "f/" + hit["_id"]); audio.setAttribute("src", "f/" + hit["_id"]);
audio.addEventListener("play", () => {
// Pause all currently playing audio tags
$("audio").each(function () {
if (this !== audio) {
this.pause();
}
});
});
docCard.appendChild(audio) docCard.appendChild(audio)
} }

View File

@ -511,8 +511,8 @@ function search(after = null) {
searchResults.appendChild(preload); searchResults.appendChild(preload);
} }
let query = searchBar.value; let searchBarValue = searchBar.value;
let empty = query === ""; let empty = searchBarValue === "";
let condition = empty ? "should" : "must"; let condition = empty ? "should" : "must";
let filters = [ let filters = [
{range: {size: {gte: size_min, lte: size_max}}}, {range: {size: {gte: size_min, lte: size_max}}},
@ -561,19 +561,32 @@ function search(after = null) {
filters.push({range: {mtime: {lte: date_max}}}) filters.push({range: {mtime: {lte: date_max}}})
} }
let query;
if (CONF.options.queryMode === "simple") {
query = {
simple_query_string: {
query: searchBarValue,
fields: fields,
default_operator: "and"
}
}
} else {
query = {
query_string: {
query: searchBarValue,
default_field: "name",
default_operator: "and"
}
}
}
let q = { let q = {
"_source": { "_source": {
excludes: ["content", "_tie"] excludes: ["content", "_tie"]
}, },
query: { query: {
bool: { bool: {
[condition]: { [condition]: query,
simple_query_string: {
query: query,
fields: fields,
default_operator: "and"
}
},
filter: filters filter: filters
} }
}, },
@ -611,7 +624,9 @@ function search(after = null) {
} }
} }
$.jsonPost("es", q).then(searchResult => { const showError = CONF.options.queryMode === "advanced";
$.jsonPost("es", q, showError).then(searchResult => {
let hits = searchResult["hits"]["hits"]; let hits = searchResult["hits"]["hits"];
if (hits) { if (hits) {
lastDoc = hits[hits.length - 1]; lastDoc = hits[hits.length - 1];
@ -645,7 +660,25 @@ function search(after = null) {
reachedEnd = hits.length !== SIZE; reachedEnd = hits.length !== SIZE;
insertHits(resultContainer, hits); insertHits(resultContainer, hits);
searchBusy = false; searchBusy = false;
}); }).fail(() => {
searchBusy = false;
if (!after) {
preload.remove();
}
console.log("QUERY:")
console.log(q)
$.toast({
heading: "Query error",
text: "Could not parse or execute query, please check the Advanced search documentation. " +
"See server logs for more information.",
stack: false,
bgColor: "#FF8F00",
textColor: "#FFF3E0",
position: 'bottom-right',
hideAfter: false
});
})
} }

View File

@ -70,7 +70,7 @@ function strUnescape(str) {
for (let i = 0; i < str.length; i++) { for (let i = 0; i < str.length; i++) {
const c = str[i]; const c = str[i];
const next = str[i+1]; const next = str[i + 1];
if (c === ']') { if (c === ']') {
if (next === ']') { if (next === ']') {
@ -102,7 +102,8 @@ const _defaults = {
treemapSize: "large", treemapSize: "large",
suggestPath: true, suggestPath: true,
fragmentSize: 100, fragmentSize: 100,
columns: 5 columns: 5,
queryMode: "simple"
}; };
function loadSettings() { function loadSettings() {
@ -120,6 +121,7 @@ function loadSettings() {
$("#settingSuggestPath").prop("checked", CONF.options.suggestPath); $("#settingSuggestPath").prop("checked", CONF.options.suggestPath);
$("#settingFragmentSize").val(CONF.options.fragmentSize); $("#settingFragmentSize").val(CONF.options.fragmentSize);
$("#settingColumns").val(CONF.options.columns); $("#settingColumns").val(CONF.options.columns);
$("#settingQueryMode").val(CONF.options.queryMode);
} }
function Settings() { function Settings() {
@ -127,6 +129,7 @@ function Settings() {
this._onUpdate = function () { this._onUpdate = function () {
$("#fuzzyToggle").prop("checked", this.options.fuzzy); $("#fuzzyToggle").prop("checked", this.options.fuzzy);
$("#searchBar").attr("placeholder", this.options.queryMode === "simple" ? "Search" : "Advanced search");
updateColumnStyle(); updateColumnStyle();
}; };
@ -165,6 +168,7 @@ function updateSettings() {
CONF.options.suggestPath = $("#settingSuggestPath").prop("checked"); CONF.options.suggestPath = $("#settingSuggestPath").prop("checked");
CONF.options.fragmentSize = $("#settingFragmentSize").val(); CONF.options.fragmentSize = $("#settingFragmentSize").val();
CONF.options.columns = $("#settingColumns").val(); CONF.options.columns = $("#settingColumns").val();
CONF.options.queryMode = $("#settingQueryMode").val();
CONF.save(); CONF.save();
if (typeof searchDebounced !== "undefined") { if (typeof searchDebounced !== "undefined") {
@ -187,14 +191,16 @@ function updateSettings() {
}); });
} }
jQuery["jsonPost"] = function (url, data) { jQuery["jsonPost"] = function (url, data, showError = true) {
return jQuery.ajax({ return jQuery.ajax({
url: url, url: url,
type: "post", type: "post",
data: JSON.stringify(data), data: JSON.stringify(data),
contentType: "application/json" contentType: "application/json"
}).fail(err => { }).fail(err => {
showEsError(); if (showError) {
showEsError();
}
console.log(err); console.log(err);
}); });
}; };
@ -212,7 +218,7 @@ function updateColumnStyle() {
const style = document.getElementById("style"); const style = document.getElementById("style");
if (style) { if (style) {
style.innerHTML = style.innerHTML =
` `
@media screen and (min-width: 1500px) { @media screen and (min-width: 1500px) {
.container { .container {
max-width: 1440px; max-width: 1440px;
@ -230,3 +236,13 @@ function updateColumnStyle() {
` `
} }
} }
function dmsToDecimal(dms, ref) {
const tokens = dms.split(",")
const d = Number(tokens[0].trim().split(":")[0]) / Number(tokens[0].trim().split(":")[1])
const m = Number(tokens[1].trim().split(":")[0]) / Number(tokens[1].trim().split(":")[1])
const s = Number(tokens[2].trim().split(":")[0]) / Number(tokens[2].trim().split(":")[1])
return (d + (m / 60) + (s / 3600)) * (ref === "S" || ref === "W" ? -1 : 1)
}

View File

@ -12,7 +12,7 @@
<nav class="navbar navbar-expand-lg"> <nav class="navbar navbar-expand-lg">
<a class="navbar-brand" href="/">sist2</a> <a class="navbar-brand" href="/">sist2</a>
<span class="badge badge-pill version">2.9.0</span> <span class="badge badge-pill version">2.10.1</span>
<span class="tagline">Lightning-fast file system indexer and search tool </span> <span class="tagline">Lightning-fast file system indexer and search tool </span>
<a class="btn ml-auto" href="stats">Stats</a> <a class="btn ml-auto" href="stats">Stats</a>
<button class="btn" type="button" data-toggle="modal" data-target="#settings" onclick="loadSettings()">Settings <button class="btn" type="button" data-toggle="modal" data-target="#settings" onclick="loadSettings()">Settings
@ -120,6 +120,8 @@
</div> </div>
<div class="modal-body"> <div class="modal-body">
<h2>Simple search</h2>
<table class="table"> <table class="table">
<tbody> <tbody>
<tr> <tr>
@ -168,6 +170,12 @@
<p>For more information, see <a target="_blank" <p>For more information, see <a target="_blank"
href="//www.elastic.co/guide/en/elasticsearch/reference/current/query-dsl-simple-query-string-query.html">Elasticsearch href="//www.elastic.co/guide/en/elasticsearch/reference/current/query-dsl-simple-query-string-query.html">Elasticsearch
documentation</a></p> documentation</a></p>
<h2>Advanced search</h2>
<p>For documentation about the advanced search mode, see <a target="_blank"
href="//www.elastic.co/guide/en/elasticsearch/reference/current/query-dsl-query-string-query.html#query-string-syntax">Elasticsearch
documentation</a></p>
</div> </div>
</div> </div>
</div> </div>
@ -207,10 +215,16 @@
<br/> <br/>
<div class="form-group"> <div class="form-group">
<input type="number" class="form-control" id="settingFragmentSize">
<label for="settingFragmentSize">Highlight context size in characters</label> <label for="settingFragmentSize">Highlight context size in characters</label>
<input type="number" class="form-control" id="settingFragmentSize">
</div> </div>
<label for="settingQueryMode">Search mode</label>
<select id="settingQueryMode" class="form-control form-control-sm">
<option value="simple">Simple</option>
<option value="advanced">Advanced</option>
</select>
<label for="settingDisplay">Display</label> <label for="settingDisplay">Display</label>
<select id="settingDisplay" class="form-control form-control-sm"> <select id="settingDisplay" class="form-control form-control-sm">
<option value="grid">Grid</option> <option value="grid">Grid</option>

View File

@ -10,7 +10,7 @@
<nav class="navbar navbar-expand-lg"> <nav class="navbar navbar-expand-lg">
<a class="navbar-brand" href="/">sist2</a> <a class="navbar-brand" href="/">sist2</a>
<span class="badge badge-pill version">2.9.0</span> <span class="badge badge-pill version">2.10.1</span>
<span class="tagline">Lightning-fast file system indexer and search tool </span> <span class="tagline">Lightning-fast file system indexer and search tool </span>
<a style="margin-left: auto" class="btn" href="/">Back</a> <a style="margin-left: auto" class="btn" href="/">Back</a>
<button class="btn" type="button" data-toggle="modal" data-target="#settings" <button class="btn" type="button" data-toggle="modal" data-target="#settings"
@ -84,10 +84,16 @@
<br/> <br/>
<div class="form-group"> <div class="form-group">
<input type="number" class="form-control" id="settingFragmentSize">
<label for="settingFragmentSize">Highlight context size in characters</label> <label for="settingFragmentSize">Highlight context size in characters</label>
<input type="number" class="form-control" id="settingFragmentSize">
</div> </div>
<label for="settingQueryMode">Search mode</label>
<select id="settingQueryMode" class="form-control form-control-sm">
<option value="simple">Simple</option>
<option value="advanced">Advanced</option>
</select>
<label for="settingDisplay">Display</label> <label for="settingDisplay">Display</label>
<select id="settingDisplay" class="form-control form-control-sm"> <select id="settingDisplay" class="form-control form-control-sm">
<option value="grid">Grid</option> <option value="grid">Grid</option>

View File

@ -3,7 +3,7 @@
#include "sist.h" #include "sist.h"
#include <pthread.h> #include <pthread.h>
#define MAX_QUEUE_SIZE 10000 #define MAX_QUEUE_SIZE 1000000
typedef void (*thread_func_t)(void *arg); typedef void (*thread_func_t)(void *arg);
@ -52,6 +52,13 @@ static tpool_work_t *tpool_work_create(thread_func_t func, void *arg) {
return work; return work;
} }
void tpool_dump_debug_info(tpool_t *pool) {
LOG_DEBUGF("tpool.c", "pool->thread_cnt = %d", pool->thread_cnt)
LOG_DEBUGF("tpool.c", "pool->work_cnt = %d", pool->work_cnt)
LOG_DEBUGF("tpool.c", "pool->done_cnt = %d", pool->done_cnt)
LOG_DEBUGF("tpool.c", "pool->stop = %d", pool->stop)
}
/** /**
* Pop work object from thread pool * Pop work object from thread pool
*/ */
@ -83,7 +90,7 @@ int tpool_add_work(tpool_t *pool, thread_func_t func, void *arg) {
} }
while ((pool->work_cnt - pool->done_cnt) >= MAX_QUEUE_SIZE) { while ((pool->work_cnt - pool->done_cnt) >= MAX_QUEUE_SIZE) {
usleep(100000); usleep(10000);
} }
pthread_mutex_lock(&(pool->work_mutex)); pthread_mutex_lock(&(pool->work_mutex));
@ -150,6 +157,7 @@ static void *tpool_worker(void *arg) {
if (pool->cleanup_func != NULL) { if (pool->cleanup_func != NULL) {
LOG_INFO("tpool.c", "Executing cleanup function") LOG_INFO("tpool.c", "Executing cleanup function")
pool->cleanup_func(); pool->cleanup_func();
LOG_DEBUG("tpool.c", "Done executing cleanup function")
} }
pthread_cond_signal(&(pool->working_cond)); pthread_cond_signal(&(pool->working_cond));

View File

@ -10,10 +10,12 @@ typedef void (*thread_func_t)(void *arg);
tpool_t *tpool_create(size_t num, void (*cleanup_func)(), int free_arg); tpool_t *tpool_create(size_t num, void (*cleanup_func)(), int free_arg);
void tpool_start(tpool_t *pool); void tpool_start(tpool_t *pool);
void tpool_destroy(tpool_t *tm); void tpool_destroy(tpool_t *pool);
int tpool_add_work(tpool_t *pool, thread_func_t func, void *arg); int tpool_add_work(tpool_t *pool, thread_func_t func, void *arg);
void tpool_wait(tpool_t *tm); void tpool_wait(tpool_t *pool);
void tpool_dump_debug_info(tpool_t *pool);
#endif #endif

View File

@ -8,18 +8,8 @@
#include <src/ctx.h> #include <src/ctx.h>
#include <mongoose.h>
static void send_response_line(struct mg_connection *nc, int status_code, size_t length, char *extra_headers) {
static int has_prefix(const struct mg_str *str, const struct mg_str *prefix) {
return str->len > prefix->len && memcmp(str->p, prefix->p, prefix->len) == 0;
}
static int is_equal(const struct mg_str *s1, const struct mg_str *s2) {
return s1->len == s2->len && memcmp(s1->p, s2->p, s2->len) == 0;
}
static void send_response_line(struct mg_connection *nc, int status_code, int length, char *extra_headers) {
mg_printf( mg_printf(
nc, nc,
"HTTP/1.1 %d %s\r\n" "HTTP/1.1 %d %s\r\n"
@ -62,36 +52,32 @@ store_t *get_tag_store(const char *index_id) {
void search_index(struct mg_connection *nc) { void search_index(struct mg_connection *nc) {
send_response_line(nc, 200, sizeof(search_html), "Content-Type: text/html"); send_response_line(nc, 200, sizeof(search_html), "Content-Type: text/html");
mg_send(nc, search_html, sizeof(search_html)); mg_send(nc, search_html, sizeof(search_html));
nc->flags |= MG_F_SEND_AND_CLOSE;
} }
void stats(struct mg_connection *nc) { void stats(struct mg_connection *nc) {
send_response_line(nc, 200, sizeof(stats_html), "Content-Type: text/html"); send_response_line(nc, 200, sizeof(stats_html), "Content-Type: text/html");
mg_send(nc, stats_html, sizeof(stats_html)); mg_send(nc, stats_html, sizeof(stats_html));
nc->flags |= MG_F_SEND_AND_CLOSE;
} }
void stats_files(struct mg_connection *nc, struct http_message *hm, struct mg_str *path) { void stats_files(struct mg_connection *nc, struct mg_http_message *hm) {
if (path->len != MD5_STR_LENGTH + 4) { if (hm->uri.len != MD5_STR_LENGTH + 4) {
mg_http_send_error(nc, 404, NULL); mg_http_reply(nc, 404, "", "");
nc->flags |= MG_F_SEND_AND_CLOSE;
return; return;
} }
char arg_md5[MD5_STR_LENGTH]; char arg_md5[MD5_STR_LENGTH];
memcpy(arg_md5, hm->uri.p + 3, MD5_STR_LENGTH); memcpy(arg_md5, hm->uri.ptr + 3, MD5_STR_LENGTH);
*(arg_md5 + MD5_STR_LENGTH - 1) = '\0'; *(arg_md5 + MD5_STR_LENGTH - 1) = '\0';
index_t *index = get_index_by_id(arg_md5); index_t *index = get_index_by_id(arg_md5);
if (index == NULL) { if (index == NULL) {
mg_http_send_error(nc, 404, NULL); mg_http_reply(nc, 404, "", "");
nc->flags |= MG_F_SEND_AND_CLOSE;
return; return;
} }
const char *file; const char *file;
switch (atoi(hm->uri.p + 3 + MD5_STR_LENGTH)) { switch (atoi(hm->uri.ptr + 3 + MD5_STR_LENGTH)) {
case 1: case 1:
file = "treemap.csv"; file = "treemap.csv";
break; break;
@ -105,54 +91,41 @@ void stats_files(struct mg_connection *nc, struct http_message *hm, struct mg_st
file = "date_agg.csv"; file = "date_agg.csv";
break; break;
default: default:
nc->flags |= MG_F_SEND_AND_CLOSE;
return; return;
} }
char disposition[8192]; char disposition[8192];
snprintf(disposition, sizeof(disposition), "Content-Disposition: inline; filename=\"%s\"", file); snprintf(disposition, sizeof(disposition), "Content-Disposition: inline; filename=\"%s\"\r\n", file);
char full_path[PATH_MAX]; char full_path[PATH_MAX];
strcpy(full_path, index->path); strcpy(full_path, index->path);
strcat(full_path, file); strcat(full_path, file);
mg_http_serve_file(nc, hm, full_path, mg_mk_str("text/csv"), mg_mk_str(disposition)); mg_http_serve_file(nc, hm, full_path, "text/csv", disposition);
nc->flags |= MG_F_SEND_AND_CLOSE;
} }
void javascript_lib(struct mg_connection *nc) { void javascript_lib(struct mg_connection *nc) {
send_response_line(nc, 200, sizeof(bundle_js), "Content-Type: application/javascript"); send_response_line(nc, 200, sizeof(bundle_js), "Content-Type: application/javascript");
mg_send(nc, bundle_js, sizeof(bundle_js)); mg_send(nc, bundle_js, sizeof(bundle_js));
nc->flags |= MG_F_SEND_AND_CLOSE;
} }
void javascript_search(struct mg_connection *nc) { void javascript_search(struct mg_connection *nc) {
send_response_line(nc, 200, sizeof(search_js), "Content-Type: application/javascript"); send_response_line(nc, 200, sizeof(search_js), "Content-Type: application/javascript");
mg_send(nc, search_js, sizeof(search_js)); mg_send(nc, search_js, sizeof(search_js));
nc->flags |= MG_F_SEND_AND_CLOSE;
} }
int client_requested_dark_theme(struct http_message *hm) { int client_requested_dark_theme(struct mg_http_message *hm) {
struct mg_str *cookie_header = mg_get_http_header(hm, "cookie"); struct mg_str *cookie_header = mg_http_get_header(hm, "cookie");
if (cookie_header == NULL) { if (cookie_header == NULL) {
return FALSE; return FALSE;
} }
char buf[4096]; struct mg_str sist_cookie = mg_http_get_header_var(*cookie_header, mg_str_n("sist", 4));
char *sist_cookie = buf;
if (mg_http_parse_header2(cookie_header, "sist", &sist_cookie, sizeof(buf)) == 0) {
return FALSE;
}
int ret = strcmp(sist_cookie, "dark") == 0; return mg_strcmp(sist_cookie, mg_str_n("dark", 4)) == 0;
if (sist_cookie != buf) {
free(sist_cookie);
}
return ret;
} }
void style(struct mg_connection *nc, struct http_message *hm) { void style(struct mg_connection *nc, struct mg_http_message *hm) {
if (client_requested_dark_theme(hm)) { if (client_requested_dark_theme(hm)) {
send_response_line(nc, 200, sizeof(bundle_dark_css), "Content-Type: text/css"); send_response_line(nc, 200, sizeof(bundle_dark_css), "Content-Type: text/css");
@ -161,11 +134,9 @@ void style(struct mg_connection *nc, struct http_message *hm) {
send_response_line(nc, 200, sizeof(bundle_css), "Content-Type: text/css"); send_response_line(nc, 200, sizeof(bundle_css), "Content-Type: text/css");
mg_send(nc, bundle_css, sizeof(bundle_css)); mg_send(nc, bundle_css, sizeof(bundle_css));
} }
nc->flags |= MG_F_SEND_AND_CLOSE;
} }
void img_sprite_skin_flat(struct mg_connection *nc, struct http_message *hm) { void img_sprite_skin_flat(struct mg_connection *nc, struct mg_http_message *hm) {
if (client_requested_dark_theme(hm)) { if (client_requested_dark_theme(hm)) {
send_response_line(nc, 200, sizeof(sprite_skin_flat_dark_png), "Content-Type: image/png"); send_response_line(nc, 200, sizeof(sprite_skin_flat_dark_png), "Content-Type: image/png");
mg_send(nc, sprite_skin_flat_dark_png, sizeof(sprite_skin_flat_dark_png)); mg_send(nc, sprite_skin_flat_dark_png, sizeof(sprite_skin_flat_dark_png));
@ -173,25 +144,22 @@ void img_sprite_skin_flat(struct mg_connection *nc, struct http_message *hm) {
send_response_line(nc, 200, sizeof(sprite_skin_flat_png), "Content-Type: image/png"); send_response_line(nc, 200, sizeof(sprite_skin_flat_png), "Content-Type: image/png");
mg_send(nc, sprite_skin_flat_png, sizeof(sprite_skin_flat_png)); mg_send(nc, sprite_skin_flat_png, sizeof(sprite_skin_flat_png));
} }
nc->flags |= MG_F_SEND_AND_CLOSE;
} }
void thumbnail(struct mg_connection *nc, struct http_message *hm, struct mg_str *path) { void thumbnail(struct mg_connection *nc, struct mg_http_message *hm) {
if (path->len != 68) { if (hm->uri.len != 68) {
LOG_DEBUGF("serve.c", "Invalid thumbnail path: %.*s", (int) path->len, path->p) LOG_DEBUGF("serve.c", "Invalid thumbnail path: %.*s", (int) hm->uri.len, hm->uri.ptr)
mg_http_send_error(nc, 404, NULL); mg_http_reply(nc, 404, "", "Not found");
nc->flags |= MG_F_SEND_AND_CLOSE;
return; return;
} }
char arg_file_md5[MD5_STR_LENGTH]; char arg_file_md5[MD5_STR_LENGTH];
char arg_index[MD5_STR_LENGTH]; char arg_index[MD5_STR_LENGTH];
memcpy(arg_index, hm->uri.p + 3, MD5_STR_LENGTH); memcpy(arg_index, hm->uri.ptr + 3, MD5_STR_LENGTH);
*(arg_index + MD5_STR_LENGTH - 1) = '\0'; *(arg_index + MD5_STR_LENGTH - 1) = '\0';
memcpy(arg_file_md5, hm->uri.p + 3 + MD5_STR_LENGTH, MD5_STR_LENGTH); memcpy(arg_file_md5, hm->uri.ptr + 3 + MD5_STR_LENGTH, MD5_STR_LENGTH);
*(arg_file_md5 + MD5_STR_LENGTH - 1) = '\0'; *(arg_file_md5 + MD5_STR_LENGTH - 1) = '\0';
unsigned char md5_buf[MD5_DIGEST_LENGTH]; unsigned char md5_buf[MD5_DIGEST_LENGTH];
@ -200,8 +168,7 @@ void thumbnail(struct mg_connection *nc, struct http_message *hm, struct mg_str
store_t *store = get_store(arg_index); store_t *store = get_store(arg_index);
if (store == NULL) { if (store == NULL) {
LOG_DEBUGF("serve.c", "Could not get store for index: %s", arg_index) LOG_DEBUGF("serve.c", "Could not get store for index: %s", arg_index)
mg_http_send_error(nc, 404, NULL); mg_http_reply(nc, 404, "", "Not found");
nc->flags |= MG_F_SEND_AND_CLOSE;
return; return;
} }
@ -212,26 +179,24 @@ void thumbnail(struct mg_connection *nc, struct http_message *hm, struct mg_str
mg_send(nc, data, data_len); mg_send(nc, data, data_len);
free(data); free(data);
} }
nc->flags |= MG_F_SEND_AND_CLOSE;
} }
void search(struct mg_connection *nc, struct http_message *hm) { void search(struct mg_connection *nc, struct mg_http_message *hm) {
if (hm->body.len == 0) { if (hm->body.len == 0) {
LOG_DEBUG("serve.c", "Client sent empty body, ignoring request") LOG_DEBUG("serve.c", "Client sent empty body, ignoring request")
mg_http_send_error(nc, 500, NULL); mg_http_reply(nc, 500, "", "Invalid request");
nc->flags |= MG_F_SEND_AND_CLOSE;
return; return;
} }
char *body = malloc(hm->body.len + 1); char *body = malloc(hm->body.len + 1);
memcpy(body, hm->body.p, hm->body.len); memcpy(body, hm->body.ptr, hm->body.len);
*(body + hm->body.len) = '\0'; *(body + hm->body.len) = '\0';
char url[4096]; char url[4096];
snprintf(url, 4096, "%s/%s/_search", WebCtx.es_url, WebCtx.es_index); snprintf(url, 4096, "%s/%s/_search", WebCtx.es_url, WebCtx.es_index);
nc->user_data = web_post_async(url, body); nc->fn_data = web_post_async(url, body);
} }
void serve_file_from_url(cJSON *json, index_t *idx, struct mg_connection *nc) { void serve_file_from_url(cJSON *json, index_t *idx, struct mg_connection *nc) {
@ -253,16 +218,13 @@ void serve_file_from_url(cJSON *json, index_t *idx, struct mg_connection *nc) {
idx->desc.rewrite_url, path_unescaped, name_unescaped, strlen(ext) == 0 ? "" : ".", ext); idx->desc.rewrite_url, path_unescaped, name_unescaped, strlen(ext) == 0 ? "" : ".", ext);
dyn_buffer_t encoded = url_escape(url); dyn_buffer_t encoded = url_escape(url);
mg_http_send_redirect( dyn_buffer_write_char(&encoded, '\0');
nc, 308,
(struct mg_str) MG_MK_STR_N(encoded.buf, encoded.cur), mg_http_reply(nc, 308, "Location: %s", encoded.buf);
(struct mg_str) MG_NULL_STR
);
dyn_buffer_destroy(&encoded); dyn_buffer_destroy(&encoded);
nc->flags |= MG_F_SEND_AND_CLOSE;
} }
void serve_file_from_disk(cJSON *json, index_t *idx, struct mg_connection *nc, struct http_message *hm) { void serve_file_from_disk(cJSON *json, index_t *idx, struct mg_connection *nc, struct mg_http_message *hm) {
const char *path = cJSON_GetObjectItem(json, "path")->valuestring; const char *path = cJSON_GetObjectItem(json, "path")->valuestring;
const char *name = cJSON_GetObjectItem(json, "name")->valuestring; const char *name = cJSON_GetObjectItem(json, "name")->valuestring;
@ -283,10 +245,10 @@ void serve_file_from_disk(cJSON *json, index_t *idx, struct mg_connection *nc, s
LOG_DEBUGF("serve.c", "Serving file from disk: %s", full_path) LOG_DEBUGF("serve.c", "Serving file from disk: %s", full_path)
char disposition[8192]; char disposition[8192];
snprintf(disposition, sizeof(disposition), "Content-Disposition: inline; filename=\"%s%s%s\"", snprintf(disposition, sizeof(disposition), "Content-Disposition: inline; filename=\"%s%s%s\"\r\n",
name, strlen(ext) == 0 ? "" : ".", ext); name, strlen(ext) == 0 ? "" : ".", ext);
mg_http_serve_file(nc, hm, full_path, mg_mk_str(mime), mg_mk_str(disposition)); mg_http_serve_file(nc, hm, full_path, mime, disposition);
} }
void index_info(struct mg_connection *nc) { void index_info(struct mg_connection *nc) {
@ -310,22 +272,19 @@ void index_info(struct mg_connection *nc) {
mg_send(nc, json_str, strlen(json_str)); mg_send(nc, json_str, strlen(json_str));
free(json_str); free(json_str);
cJSON_Delete(json); cJSON_Delete(json);
nc->flags |= MG_F_SEND_AND_CLOSE;
} }
void document_info(struct mg_connection *nc, struct http_message *hm, struct mg_str *path) { void document_info(struct mg_connection *nc, struct mg_http_message *hm) {
if (path->len != MD5_STR_LENGTH + 2) { if (hm->uri.len != MD5_STR_LENGTH + 2) {
LOG_DEBUGF("serve.c", "Invalid document_info path: %.*s", (int) path->len, path->p) LOG_DEBUGF("serve.c", "Invalid document_info path: %.*s", (int) hm->uri.len, hm->uri.ptr)
mg_http_send_error(nc, 404, NULL); mg_http_reply(nc, 404, "", "Not found");
nc->flags |= MG_F_SEND_AND_CLOSE;
return; return;
} }
char arg_md5[MD5_STR_LENGTH]; char arg_md5[MD5_STR_LENGTH];
memcpy(arg_md5, hm->uri.p + 3, MD5_STR_LENGTH); memcpy(arg_md5, hm->uri.ptr + 3, MD5_STR_LENGTH);
*(arg_md5 + MD5_STR_LENGTH - 1) = '\0'; *(arg_md5 + MD5_STR_LENGTH - 1) = '\0';
cJSON *doc = elastic_get_document(arg_md5); cJSON *doc = elastic_get_document(arg_md5);
@ -334,16 +293,14 @@ void document_info(struct mg_connection *nc, struct http_message *hm, struct mg_
cJSON *index_id = cJSON_GetObjectItem(source, "index"); cJSON *index_id = cJSON_GetObjectItem(source, "index");
if (index_id == NULL) { if (index_id == NULL) {
cJSON_Delete(doc); cJSON_Delete(doc);
mg_http_send_error(nc, 404, NULL); mg_http_reply(nc, 404, "", "Not found");
nc->flags |= MG_F_SEND_AND_CLOSE;
return; return;
} }
index_t *idx = get_index_by_id(index_id->valuestring); index_t *idx = get_index_by_id(index_id->valuestring);
if (idx == NULL) { if (idx == NULL) {
cJSON_Delete(doc); cJSON_Delete(doc);
mg_http_send_error(nc, 404, NULL); mg_http_reply(nc, 404, "", "Not found");
nc->flags |= MG_F_SEND_AND_CLOSE;
return; return;
} }
@ -352,21 +309,18 @@ void document_info(struct mg_connection *nc, struct http_message *hm, struct mg_
mg_send(nc, json_str, (int) strlen(json_str)); mg_send(nc, json_str, (int) strlen(json_str));
free(json_str); free(json_str);
cJSON_Delete(doc); cJSON_Delete(doc);
nc->flags |= MG_F_SEND_AND_CLOSE;
} }
void file(struct mg_connection *nc, struct http_message *hm, struct mg_str *path) { void file(struct mg_connection *nc, struct mg_http_message *hm) {
if (path->len != MD5_STR_LENGTH + 2) { if (hm->uri.len != MD5_STR_LENGTH + 2) {
LOG_DEBUGF("serve.c", "Invalid file path: %.*s", (int) path->len, path->p) LOG_DEBUGF("serve.c", "Invalid file path: %.*s", (int) hm->uri.len, hm->uri.ptr)
mg_http_send_error(nc, 404, NULL); mg_http_reply(nc, 404, "", "Not found");
nc->flags |= MG_F_SEND_AND_CLOSE;
return; return;
} }
char arg_md5[MD5_STR_LENGTH]; char arg_md5[MD5_STR_LENGTH];
memcpy(arg_md5, hm->uri.p + 3, MD5_STR_LENGTH); memcpy(arg_md5, hm->uri.ptr + 3, MD5_STR_LENGTH);
*(arg_md5 + MD5_STR_LENGTH - 1) = '\0'; *(arg_md5 + MD5_STR_LENGTH - 1) = '\0';
const char *next = arg_md5; const char *next = arg_md5;
@ -380,8 +334,7 @@ void file(struct mg_connection *nc, struct http_message *hm, struct mg_str *path
index_id = cJSON_GetObjectItem(source, "index"); index_id = cJSON_GetObjectItem(source, "index");
if (index_id == NULL) { if (index_id == NULL) {
cJSON_Delete(doc); cJSON_Delete(doc);
mg_http_send_error(nc, 404, NULL); mg_http_reply(nc, 404, "", "Not found");
nc->flags |= MG_F_SEND_AND_CLOSE;
return; return;
} }
cJSON *parent = cJSON_GetObjectItem(source, "parent"); cJSON *parent = cJSON_GetObjectItem(source, "parent");
@ -395,8 +348,7 @@ void file(struct mg_connection *nc, struct http_message *hm, struct mg_str *path
if (idx == NULL) { if (idx == NULL) {
cJSON_Delete(doc); cJSON_Delete(doc);
nc->flags |= MG_F_SEND_AND_CLOSE; mg_http_reply(nc, 404, "", "Not found");
mg_http_send_error(nc, 404, NULL);
return; return;
} }
@ -417,8 +369,6 @@ void status(struct mg_connection *nc) {
} }
free(status); free(status);
nc->flags |= MG_F_SEND_AND_CLOSE;
} }
typedef struct { typedef struct {
@ -464,35 +414,32 @@ tag_req_t *parse_tag_request(cJSON *json) {
return req; return req;
} }
void tag(struct mg_connection *nc, struct http_message *hm, struct mg_str *path) { void tag(struct mg_connection *nc, struct mg_http_message *hm) {
if (path->len != MD5_STR_LENGTH + 4) { if (hm->uri.len != MD5_STR_LENGTH + 4) {
LOG_DEBUGF("serve.c", "Invalid tag path: %.*s", (int) path->len, path->p) LOG_DEBUGF("serve.c", "Invalid tag path: %.*s", (int) hm->uri.len, hm->uri.ptr)
mg_http_send_error(nc, 404, NULL); mg_http_reply(nc, 404, "", "Not found");
nc->flags |= MG_F_SEND_AND_CLOSE;
return; return;
} }
char arg_index[MD5_STR_LENGTH]; char arg_index[MD5_STR_LENGTH];
memcpy(arg_index, hm->uri.p + 5, MD5_STR_LENGTH); memcpy(arg_index, hm->uri.ptr + 5, MD5_STR_LENGTH);
*(arg_index + MD5_STR_LENGTH - 1) = '\0'; *(arg_index + MD5_STR_LENGTH - 1) = '\0';
if (hm->body.len < 2 || hm->method.len != 4 || memcmp(&hm->method, "POST", 4) == 0) { if (hm->body.len < 2 || hm->method.len != 4 || memcmp(&hm->method, "POST", 4) == 0) {
LOG_DEBUG("serve.c", "Invalid tag request") LOG_DEBUG("serve.c", "Invalid tag request")
mg_http_send_error(nc, 400, NULL); mg_http_reply(nc, 404, "", "Not found");
nc->flags |= MG_F_SEND_AND_CLOSE;
return; return;
} }
store_t *store = get_tag_store(arg_index); store_t *store = get_tag_store(arg_index);
if (store == NULL) { if (store == NULL) {
LOG_DEBUGF("serve.c", "Could not get tag store for index: %s", arg_index) LOG_DEBUGF("serve.c", "Could not get tag store for index: %s", arg_index)
mg_http_send_error(nc, 404, NULL); mg_http_reply(nc, 404, "", "Not found");
nc->flags |= MG_F_SEND_AND_CLOSE;
return; return;
} }
char *body = malloc(hm->body.len + 1); char *body = malloc(hm->body.len + 1);
memcpy(body, hm->body.p, hm->body.len); memcpy(body, hm->body.ptr, hm->body.len);
*(body + hm->body.len) = '\0'; *(body + hm->body.len) = '\0';
cJSON *json = cJSON_Parse(body); cJSON *json = cJSON_Parse(body);
@ -501,8 +448,7 @@ void tag(struct mg_connection *nc, struct http_message *hm, struct mg_str *path)
LOG_DEBUGF("serve.c", "Could not parse tag request", arg_index) LOG_DEBUGF("serve.c", "Could not parse tag request", arg_index)
cJSON_Delete(json); cJSON_Delete(json);
free(body); free(body);
mg_http_send_error(nc, 400, NULL); mg_http_reply(nc, 400, "", "Invalid request");
nc->flags |= MG_F_SEND_AND_CLOSE;
return; return;
} }
@ -545,7 +491,7 @@ void tag(struct mg_connection *nc, struct http_message *hm, struct mg_str *path)
char url[4096]; char url[4096];
snprintf(url, sizeof(url), "%s/%s/_update/%s", WebCtx.es_url, WebCtx.es_index, arg_req->doc_id); snprintf(url, sizeof(url), "%s/%s/_update/%s", WebCtx.es_url, WebCtx.es_index, arg_req->doc_id);
nc->user_data = web_post_async(url, buf); nc->fn_data = web_post_async(url, buf);
} else { } else {
cJSON_AddItemToArray(arr, cJSON_CreateString(arg_req->name)); cJSON_AddItemToArray(arr, cJSON_CreateString(arg_req->name));
@ -565,7 +511,7 @@ void tag(struct mg_connection *nc, struct http_message *hm, struct mg_str *path)
char url[4096]; char url[4096];
snprintf(url, sizeof(url), "%s/%s/_update/%s", WebCtx.es_url, WebCtx.es_index, arg_req->doc_id); snprintf(url, sizeof(url), "%s/%s/_update/%s", WebCtx.es_url, WebCtx.es_index, arg_req->doc_id);
nc->user_data = web_post_async(url, buf); nc->fn_data = web_post_async(url, buf);
} }
char *json_str = cJSON_PrintUnformatted(arr); char *json_str = cJSON_PrintUnformatted(arr);
@ -579,92 +525,73 @@ void tag(struct mg_connection *nc, struct http_message *hm, struct mg_str *path)
free(body); free(body);
} }
int validate_auth(struct mg_connection *nc, struct http_message *hm) { int validate_auth(struct mg_connection *nc, struct mg_http_message *hm) {
char user[256] = {0,}; char user[256] = {0,};
char pass[256] = {0,}; char pass[256] = {0,};
int ret = mg_get_http_basic_auth(hm, user, sizeof(user), pass, sizeof(pass)); mg_http_creds(hm, user, sizeof(user), pass, sizeof(pass));
if (ret == -1 || strcmp(user, WebCtx.auth_user) != 0 || strcmp(pass, WebCtx.auth_pass) != 0) { if (strcmp(user, WebCtx.auth_user) != 0 || strcmp(pass, WebCtx.auth_pass) != 0) {
mg_printf(nc, "HTTP/1.1 401 Unauthorized\r\n" mg_http_reply(nc, 401, "WWW-Authenticate: Basic realm=\"sist2\"", "");
"WWW-Authenticate: Basic realm=\"sist2\"\r\n"
"Content-Length: 0\r\n\r\n");
nc->flags |= MG_F_SEND_AND_CLOSE;
return FALSE; return FALSE;
} }
return TRUE; return TRUE;
} }
static void ev_router(struct mg_connection *nc, int ev, void *p) { static void ev_router(struct mg_connection *nc, int ev, void *ev_data, UNUSED(void *fn_data)) {
struct mg_str scheme;
struct mg_str user_info;
struct mg_str host;
unsigned int port;
struct mg_str path;
struct mg_str query;
struct mg_str fragment;
if (ev == MG_EV_HTTP_REQUEST) {
struct http_message *hm = (struct http_message *) p;
if (mg_parse_uri(hm->uri, &scheme, &user_info, &host, &port, &path, &query, &fragment) != 0) {
mg_http_send_error(nc, 400, NULL);
nc->flags |= MG_F_SEND_AND_CLOSE;
return;
}
if (ev == MG_EV_HTTP_MSG) {
struct mg_http_message *hm = (struct mg_http_message *) ev_data;
if (WebCtx.auth_enabled == TRUE) { if (WebCtx.auth_enabled == TRUE) {
if (!validate_auth(nc, hm)) { if (!validate_auth(nc, hm)) {
nc->is_closing = 1;
return; return;
} }
} }
if (is_equal(&path, &((struct mg_str) MG_MK_STR("/")))) { if (mg_http_match_uri(hm, "/")) {
search_index(nc); search_index(nc);
} else if (is_equal(&path, &((struct mg_str) MG_MK_STR("/css")))) { } else if (mg_http_match_uri(hm, "/css")) {
style(nc, hm); style(nc, hm);
} else if (is_equal(&path, &((struct mg_str) MG_MK_STR("/stats")))) { } else if (mg_http_match_uri(hm, "/stats")) {
stats(nc); stats(nc);
} else if (is_equal(&path, &((struct mg_str) MG_MK_STR("/jslib")))) { } else if (mg_http_match_uri(hm, "/jslib")) {
javascript_lib(nc); javascript_lib(nc);
} else if (is_equal(&path, &((struct mg_str) MG_MK_STR("/jssearch")))) { } else if (mg_http_match_uri(hm, "/jssearch")) {
javascript_search(nc); javascript_search(nc);
} else if (is_equal(&path, &((struct mg_str) MG_MK_STR("/img/sprite-skin-flat.png")))) { } else if (mg_http_match_uri(hm, "/img/sprite-skin-flat.png")) {
img_sprite_skin_flat(nc, hm); img_sprite_skin_flat(nc, hm);
} else if (is_equal(&path, &((struct mg_str) MG_MK_STR("/es")))) { } else if (mg_http_match_uri(hm, "/es")) {
search(nc, hm); search(nc, hm);
} else if (is_equal(&path, &((struct mg_str) MG_MK_STR("/i")))) { } else if (mg_http_match_uri(hm, "/i")) {
index_info(nc); index_info(nc);
} else if (is_equal(&path, &((struct mg_str) MG_MK_STR("/status")))) { } else if (mg_http_match_uri(hm, "/status")) {
status(nc); status(nc);
} else if (has_prefix(&path, &((struct mg_str) MG_MK_STR("/f/")))) { } else if (mg_http_match_uri(hm, "/f/*")) {
file(nc, hm, &path); file(nc, hm);
} else if (has_prefix(&path, &((struct mg_str) MG_MK_STR("/t/")))) { } else if (mg_http_match_uri(hm, "/t/*/*")) {
thumbnail(nc, hm, &path); thumbnail(nc, hm);
} else if (has_prefix(&path, &((struct mg_str) MG_MK_STR("/s/")))) { } else if (mg_http_match_uri(hm, "/s/*/*")) {
stats_files(nc, hm, &path); stats_files(nc, hm);
} else if (has_prefix(&path, &((struct mg_str) MG_MK_STR("/tag/")))) { } else if (mg_http_match_uri(hm, "/tag/*")) {
if (WebCtx.tag_auth_enabled == TRUE) { if (WebCtx.tag_auth_enabled == TRUE && !validate_auth(nc, hm)) {
if (!validate_auth(nc, hm)) { nc->is_closing = 1;
return; return;
}
} }
tag(nc, hm, &path); tag(nc, hm);
} else if (has_prefix(&path, &((struct mg_str) MG_MK_STR("/d/")))) { } else if (mg_http_match_uri(hm, "/d/*")) {
document_info(nc, hm, &path); document_info(nc, hm);
} else { } else {
mg_http_send_error(nc, 404, NULL); mg_http_reply(nc, 404, "", "Page not found");
nc->flags |= MG_F_SEND_AND_CLOSE;
} }
} else if (ev == MG_EV_POLL) { } else if (ev == MG_EV_POLL) {
if (nc->user_data != NULL) { if (nc->fn_data != NULL) {
//Waiting for ES reply //Waiting for ES reply
subreq_ctx_t *ctx = (subreq_ctx_t *) nc->user_data; subreq_ctx_t *ctx = (subreq_ctx_t *) nc->fn_data;
web_post_async_poll(ctx); web_post_async_poll(ctx);
if (ctx->done == TRUE) { if (ctx->done == TRUE) {
response_t *r = ctx->response; response_t *r = ctx->response;
if (r->status_code == 200) { if (r->status_code == 200) {
@ -684,14 +611,14 @@ static void ev_router(struct mg_connection *nc, int ev, void *p) {
free(json_str); free(json_str);
free(tmp); free(tmp);
} }
mg_http_send_error(nc, 500, NULL);
mg_http_reply(nc, 500, "", "");
} }
free_response(r); free_response(r);
free(ctx->data); free(ctx->data);
free(ctx); free(ctx);
nc->flags |= MG_F_SEND_AND_CLOSE; nc->fn_data = NULL;
nc->user_data = NULL;
} }
} }
} }
@ -702,15 +629,18 @@ void serve(const char *listen_address) {
printf("Starting web server @ http://%s\n", listen_address); printf("Starting web server @ http://%s\n", listen_address);
struct mg_mgr mgr; struct mg_mgr mgr;
mg_mgr_init(&mgr, NULL); mg_mgr_init(&mgr);
struct mg_connection *nc = mg_bind(&mgr, listen_address, ev_router); int ok = 1;
struct mg_connection *nc = mg_http_listen(&mgr, listen_address, ev_router, NULL);
if (nc == NULL) { if (nc == NULL) {
LOG_FATALF("serve.c", "Couldn't bind web server on address %s", listen_address) LOG_FATALF("serve.c", "Couldn't bind web server on address %s", listen_address)
} }
mg_set_protocol_http_websocket(nc);
for (;;) { while (ok) {
mg_mgr_poll(&mgr, 10); mg_mgr_poll(&mgr, 10);
} }
mg_mgr_free(&mgr);
LOG_INFO("serve.c", "Finished web event loop")
} }

File diff suppressed because one or more lines are too long

View File

@ -17,17 +17,19 @@ def copy_files(files):
def sist2(*args): def sist2(*args):
print("./sist2 " + " ".join(args))
return subprocess.check_output( return subprocess.check_output(
args=["./sist2_debug", *args], args=["./sist2", *args],
) )
def sist2_index(files, *args): def sist2_index(files, *args):
path = copy_files(files) path = copy_files(files)
shutil.rmtree("i", ignore_errors=True) shutil.rmtree("test_i", ignore_errors=True)
sist2("scan", path, "-o", "i", *args) sist2("scan", path, "-o", "test_i", *args)
return iter(sist2_index_to_dict("i")) return iter(sist2_index_to_dict("test_i"))
def sist2_incremental_index(files, func=None, *args): def sist2_incremental_index(files, func=None, *args):
@ -36,14 +38,14 @@ def sist2_incremental_index(files, func=None, *args):
if func: if func:
func(path) func(path)
shutil.rmtree("i_inc", ignore_errors=True) shutil.rmtree("test_i_inc", ignore_errors=True)
sist2("scan", path, "-o", "i_inc", "--incremental", "i", *args) sist2("scan", path, "-o", "test_i_inc", "--incremental", "test_i", *args)
return iter(sist2_index_to_dict("i_inc")) return iter(sist2_index_to_dict("test_i_inc"))
def sist2_index_to_dict(index): def sist2_index_to_dict(index):
res = subprocess.check_output( res = subprocess.check_output(
args=["./sist2_debug", "index", "--print", index], args=["./sist2", "index", "--print", index],
) )
for line in res.splitlines(): for line in res.splitlines():

@ -1 +1 @@
Subproject commit 3f4e3594a6891b942d5a711781d5425111aa13bf Subproject commit ffd9c23427d0cb105e27f27f0cf97b463b6a8bf8

2
third-party/libscan vendored

@ -1 +1 @@
Subproject commit ae9fadec473e6e4ade05259fe359c5366c3f3af6 Subproject commit 9be4f02851107edac65894a1fdde16a80cad43ac