Compare commits

...

84 Commits

Author SHA1 Message Date
052df82373 Fix #83 2020-07-19 13:10:30 -04:00
5676136777 Remove println that was left accidentally 2020-07-18 20:55:12 -04:00
c061613302 Fix #76 2020-07-18 19:23:43 -04:00
d0325fd9b9 Fix for simon987/sist2#85 2020-07-18 18:48:54 -04:00
e05a6f3863 Fix for #75 2020-07-18 18:46:52 -04:00
f1690a9cca Mobi build fix 2020-07-18 13:10:45 -04:00
100a264413 Don't show MuPDF warnings unless --very-verbose is specified 2020-07-18 10:28:05 -04:00
29390bb454 Update README 2020-07-18 09:54:36 -04:00
4d43036ded Fix simon987/sist2#78 2020-07-18 09:41:39 -04:00
0b5cdbd130 Fix #79 2020-07-18 09:36:10 -04:00
53d7695f66 Read .raw thumbnails #80, fix media probing for some formats 2020-07-18 09:31:42 -04:00
8d53456404 fix libscan submodule 2020-07-17 20:33:50 -04:00
cbc08a7cc9 Save ebook renders as jpeg 2020-07-17 20:18:21 -04:00
e629b4d7d3 Faster comic book parsing, probably fixes #77 2020-07-17 19:10:18 -04:00
22f7073b39 mobi reading bugfix 2020-07-16 20:30:28 -04:00
1781a74960 Oops I didn't mean to push this 2020-07-16 19:23:52 -04:00
db96c95ac7 log fix #73 2020-07-16 19:19:23 -04:00
7b9fa4cc0a Fix bad merge... 2020-07-15 21:00:51 -04:00
5cc1fa86a9 Read embedded thumbnail simon987/sist2#74 2020-07-15 20:56:25 -04:00
649689ce30 Remove warning when generating stats 2020-07-15 20:41:38 -04:00
c8536f65a8 Fix memory leak in index 2020-07-15 20:41:09 -04:00
75b5e249c1 Merge pull request #72 from dpieski/patch-1
Update USAGE.md
2020-07-15 14:37:28 -04:00
Andrew
f49e03ac79 Update USAGE.md
added example for Windows to display number of logical processors. 
Does this same limitation apply to the new `index` threads option?
2020-07-15 13:21:02 -05:00
a6d2afc8dc Merge pull request #71 from simon987/web-tag
Web tag
2020-07-14 20:23:22 -04:00
8f8f66ba05 Update README.md 2020-07-14 20:22:03 -04:00
1d9fcf7105 Manual tagging 2020-07-13 19:18:07 -04:00
8127745f2b wip 2020-07-13 19:16:51 -04:00
230988d6d1 frontend tags 2020-07-13 19:15:59 -04:00
13f4dbed2d Handle 429, multi-threaded index module 2020-07-11 17:42:46 -04:00
ed15e89f45 Fix exec-script --es-url not being passed 2020-06-28 12:41:09 -04:00
c636d3d921 Set number_of_replicas to 0 by default in elasticsearch 2020-06-26 18:10:51 -04:00
7e92d4b7d1 refresh index only if user script is ran 2020-06-25 20:48:47 -04:00
8ffe780ab2 Tag tree fix for #64, validate required argument in exec-script 2020-06-25 20:11:30 -04:00
d3c8928fe8 Update readme 2020-06-24 21:06:27 -04:00
d9f628fca4 Build fix 2020-06-21 16:53:22 -04:00
68289268c1 Add exif tag 2020-06-21 16:51:14 -04:00
649c50c465 Update README.md 2020-06-21 14:35:18 -04:00
7b49a0dc49 Build fix 2020-06-21 12:56:13 -04:00
eb559b53aa RAW picture file support 2020-06-21 10:46:11 -04:00
6d01f9c0df whoops 2020-06-19 22:12:19 -04:00
e724fec668 Fix web return codes 2020-06-19 21:41:17 -04:00
fe5e93b300 Update USAGE.md 2020-06-19 21:29:09 -04:00
ecad85fd7d version bump 2020-06-19 21:10:03 -04:00
74cc898259 Fix tag display issue 2020-06-19 21:07:19 -04:00
dc2e4443c4 Add exec-script command 2020-06-19 21:07:19 -04:00
1a64431b52 Merge pull request #63 from dpieski/patch-3
Correct typos in example
2020-06-19 18:26:10 -04:00
Andrew
9bad515e06 Correct typos in example
Correct typos in examples.
2020-06-19 17:22:02 -05:00
648559cedb Update README.md 2020-06-17 13:25:20 -04:00
3e6cd9cd5c Merge pull request #60 from dpieski/patch-2
update Usage.md
2020-06-17 13:04:46 -04:00
f249992798 Update scripting.md 2020-06-17 13:00:07 -04:00
Andrew
e9645ecdaa update Usage.md
Fixing a link.
2020-06-17 10:58:25 -05:00
046edea0e2 Handle special characters in file paths 2020-06-10 19:45:36 -04:00
a011b7e97b Fragment size setting 2020-06-09 21:40:53 -04:00
8c1c1697e0 Fix file wordexp in some paths #59 2020-06-05 19:41:02 -04:00
018b49fa4c Fix csv_escape #58 2020-06-05 19:13:03 -04:00
27b4e6403e Re-enable path autocomplete #54 2020-06-02 19:46:58 -04:00
13fdbd9e69 Fix for ES 7.7 #54 2020-06-01 18:14:34 -04:00
5e7fdaf8dd Update issue-template.md 2020-06-01 10:45:43 -04:00
19d5c8ac9f Update issue-template.md 2020-05-29 18:19:21 -04:00
99497049a8 Merge pull request #53 from dpieski/patch-1
Update README
2020-05-29 18:16:13 -04:00
Andrew
1a3181d78b Update README
changed case of path in a link to the usage guide to fix 404 error.
2020-05-29 15:37:20 -05:00
449aa77c8f Fix for unknown mime inside archives 2020-05-25 17:36:04 -04:00
3058c55510 Memory leak fix #37 2020-05-24 15:42:42 -04:00
dedf9287b2 Fix name separation in --archive list mode 2020-05-24 14:36:59 -04:00
ab199b0c0c Remove arc_reset() function because seek() inside archive doesn't work 2020-05-24 14:18:31 -04:00
c4fbae123e Better support for media files inside archives 2020-05-24 14:10:23 -04:00
dd2397ef5c handle .tgz #44, ignore files inside archives for stats page 2020-05-24 10:10:28 -04:00
ee0f71f4d3 fix compile warning 2020-05-17 15:00:56 -04:00
0bbb96b149 Merge pull request #51 from simon987/stats
Stats page
2020-05-17 14:49:28 -04:00
78f6e16701 image 2020-05-17 12:47:45 -04:00
4625bca9a9 stats 2020-05-17 12:47:02 -04:00
f2ae653886 Revert "wip"
This reverts commit 5686bc86
2020-05-16 08:16:49 -04:00
5686bc864d wip 2020-05-13 17:37:40 -04:00
cf513b4ad8 Escape invalid UTF8 characters simon987/sist2#44, increase magic buffer size 2020-05-12 19:28:02 -04:00
013423424e UTF-8 fix attempt w/ libarchive (#44) 2020-05-10 19:52:42 -04:00
16514fd6b0 Option to search in path #49 2020-05-09 22:00:22 -04:00
27509f97e1 Update USAGE.md 2020-05-08 19:08:46 -04:00
4c540eae1c Update USAGE.md 2020-05-08 19:07:45 -04:00
d2b53ff6fc Update README.md 2020-05-08 18:32:32 -04:00
0ef4292abf Fix duplicate tag problem (simon987/sist2#48) 2020-05-05 20:20:10 -04:00
e6fde38c24 Load defaults when LocalStorage is outdated 2020-05-03 08:13:25 -04:00
5fa343d40f fix version typo 2020-05-03 08:10:28 -04:00
7ee1374802 oops 2020-04-30 21:21:48 -04:00
bd9e56829c Support for markup files 2020-04-30 20:21:09 -04:00
57 changed files with 3618 additions and 780 deletions

View File

@@ -9,7 +9,9 @@ assignees: ''
sist2 version: sist2 version:
Platform (please indicate if you're using Docker): Platform (Linux or Docker):
Elasticsearch version:
Command with arguments: `ex: "scan ~/Documents -o ./i2 --threads 3 -q 1.0` Command with arguments: `ex: "scan ~/Documents -o ./i2 --threads 3 -q 1.0`

View File

@@ -30,7 +30,7 @@ add_executable(
third-party/argparse/argparse.h third-party/argparse/argparse.c third-party/argparse/argparse.h third-party/argparse/argparse.c
src/cli.c src/cli.h src/cli.c src/cli.h
) src/stats.c src/stats.h src/ctx.c)
target_link_directories(sist2 PRIVATE BEFORE ${_VCPKG_INSTALLED_DIR}/${VCPKG_TARGET_TRIPLET}/lib/) target_link_directories(sist2 PRIVATE BEFORE ${_VCPKG_INSTALLED_DIR}/${VCPKG_TARGET_TRIPLET}/lib/)
set(CMAKE_FIND_LIBRARY_SUFFIXES .a .lib) set(CMAKE_FIND_LIBRARY_SUFFIXES .a .lib)
@@ -40,6 +40,7 @@ find_package(cJSON CONFIG REQUIRED)
find_package(unofficial-glib CONFIG REQUIRED) find_package(unofficial-glib CONFIG REQUIRED)
find_package(unofficial-mongoose CONFIG REQUIRED) find_package(unofficial-mongoose CONFIG REQUIRED)
find_library(UUID_LIB NAMES uuid) find_library(UUID_LIB NAMES uuid)
find_package(CURL CONFIG REQUIRED)
#find_package(OpenSSL REQUIRED) #find_package(OpenSSL REQUIRED)
@@ -56,7 +57,6 @@ target_compile_options(
sist2 sist2
PRIVATE PRIVATE
-fPIC -fPIC
-Werror
) )
if (SIST_DEBUG) if (SIST_DEBUG)
@@ -67,6 +67,7 @@ if (SIST_DEBUG)
-fstack-protector -fstack-protector
-fno-omit-frame-pointer -fno-omit-frame-pointer
-fsanitize=address -fsanitize=address
-O2
) )
target_link_options( target_link_options(
sist2 sist2
@@ -80,6 +81,7 @@ if (SIST_DEBUG)
OUTPUT_NAME sist2_debug OUTPUT_NAME sist2_debug
) )
else () else ()
# set(VCPKG_BUILD_TYPE release)
target_compile_options( target_compile_options(
sist2 sist2
PRIVATE PRIVATE
@@ -105,6 +107,7 @@ target_link_libraries(
unofficial::glib::glib unofficial::glib::glib
unofficial::mongoose::mongoose unofficial::mongoose::mongoose
# OpenSSL::SSL OpenSSL::Crypto # OpenSSL::SSL OpenSSL::Crypto
CURL::libcurl
${UUID_LIB} ${UUID_LIB}
pthread pthread

View File

@@ -1,6 +1,6 @@
rm ./sist2 sist2_debug rm ./sist2 sist2_debug
cp ../sist2.gz ../sist2_debug.gz . cp ../sist2.gz .
gzip -d sist2.gz sist2_debug.gz gzip -d sist2.gz
strip sist2 strip sist2
version=$(./sist2 --version) version=$(./sist2 --version)

View File

@@ -15,18 +15,22 @@ sist2 (Simple incremental search tool)
* Fast, low memory usage, multi-threaded * Fast, low memory usage, multi-threaded
* Mobile-friendly Web interface * Mobile-friendly Web interface
* Portable (all its features are packaged in a single executable) * Portable (all its features are packaged in a single executable)
* Extracts text from common file types \* * Extracts text and metadata from common file types \*
* Generates thumbnails \* * Generates thumbnails \*
* Incremental scanning * Incremental scanning
* Automatic tagging from file attributes via [user scripts](scripting/README.md) * Manual tagging from the UI and automatic tagging based on file attributes via [user scripts](docs/scripting.md)
* Recursive scan inside archive files \*\* * Recursive scan inside archive files \*\*
* OCR support with tesseract \*\*\* * OCR support with tesseract \*\*\*
* Stats page & disk utilisation visualization
\* See [format support](#format-support) \* See [format support](#format-support)
\*\* See [Archive files](#archive-files) \*\* See [Archive files](#archive-files)
\*\*\* See [OCR](#ocr) \*\*\* See [OCR](#ocr)
![stats](docs/stats.png)
## Getting Started ## Getting Started
1. Have an Elasticsearch (>= 6.X.X) instance running 1. Have an Elasticsearch (>= 6.X.X) instance running
@@ -49,7 +53,7 @@ sist2 (Simple incremental search tool)
1. *(or)* Download a [development snapshot](https://files.simon987.net/artifacts/Sist2/Build/) *(Not recommended!)* 1. *(or)* Download a [development snapshot](https://files.simon987.net/artifacts/Sist2/Build/) *(Not recommended!)*
1. *(or)* `docker pull simon987/sist2:latest` 1. *(or)* `docker pull simon987/sist2:latest`
1. See [Usage guide](DOCS/USAGE.md) 1. See [Usage guide](docs/USAGE.md)
\* *Windows users*: **sist2** runs under [WSL](https://en.wikipedia.org/wiki/Windows_Subsystem_for_Linux) \* *Windows users*: **sist2** runs under [WSL](https://en.wikipedia.org/wiki/Windows_Subsystem_for_Linux)
@@ -57,7 +61,7 @@ sist2 (Simple incremental search tool)
## Example usage ## Example usage
See [Usage guide](DOCS/USAGE.md) for more details See [Usage guide](docs/USAGE.md) for more details
1. Scan a directory: `sist2 scan ~/Documents -o ./docs_idx` 1. Scan a directory: `sist2 scan ~/Documents -o ./docs_idx`
1. Push index to Elasticsearch: `sist2 index ./docs_idx` 1. Push index to Elasticsearch: `sist2 index ./docs_idx`
@@ -68,14 +72,17 @@ See [Usage guide](DOCS/USAGE.md) for more details
File type | Library | Content | Thumbnail | Metadata File type | Library | Content | Thumbnail | Metadata
:---|:---|:---|:---|:--- :---|:---|:---|:---|:---
pdf,xps,cbz,cbr,fb2,epub | MuPDF | text+ocr | yes, `png` | title | pdf,xps,fb2,epub | MuPDF | text+ocr | yes | title |
`audio/*` | ffmpeg | - | yes, `jpeg` | ID3 tags | cbz,cbr | *(none)* | - | yes | - |
`video/*` | ffmpeg | - | yes, `jpeg` | title, comment, artist | `audio/*` | ffmpeg | - | yes | ID3 tags |
`image/*` | ffmpeg | - | yes, `jpeg` | [Common EXIF tags](https://github.com/simon987/sist2/blob/efdde2734eca9b14a54f84568863b7ffd59bdba3/src/parsing/media.c#L190) | `video/*` | ffmpeg | - | yes | title, comment, artist |
`image/*` | ffmpeg | - | yes | [Common EXIF tags](https://github.com/simon987/sist2/blob/efdde2734eca9b14a54f84568863b7ffd59bdba3/src/parsing/media.c#L190) |
raw, rw2, dng, cr2, crw, dcr, k25, kdc, mrw, pef, xf3, arw, sr2, srf, erf | LibRaw | - | yes | Common EXIF tags |
ttf,ttc,cff,woff,fnt,otf | Freetype2 | - | yes, `bmp` | Name & style | ttf,ttc,cff,woff,fnt,otf | Freetype2 | - | yes, `bmp` | Name & style |
`text/plain` | *(none)* | yes | no | - | `text/plain` | *(none)* | yes | no | - |
html, xml | *(none)* | yes | no | - |
tar, zip, rar, 7z, ar ... | Libarchive | yes\* | - | no | tar, zip, rar, 7z, ar ... | Libarchive | yes\* | - | no |
docx, xlsx, pptx | *(none)* | yes | no | creator, modified_by, title | docx, xlsx, pptx | *(none)* | yes | if embedded | creator, modified_by, title |
mobi, azw, azw3 | libmobi | yes | no | author, title | mobi, azw, azw3 | libmobi | yes | no | author, title |
\* *See [Archive files](#archive-files)* \* *See [Archive files](#archive-files)*
@@ -86,13 +93,11 @@ they were directly in the file system. Recursive (archives inside archives)
scan is also supported. scan is also supported.
**Limitations**: **Limitations**:
* Parsing media files with formats that require * Support for parsing media files with formats that require *seek* (e.g. `.gif`, `.mp4` w/ fragmented metadata etc.)
*seek* (e.g. `.gif`, `.mp4` w/ fragmented metadata etc.) is not supported. is limitted (see `--mem-buffer` option)
* Archive files are scanned sequentially, by a single thread. On systems where * Archive files are scanned sequentially, by a single thread. On systems where
**sist2** is not I/O bound, scans might be faster when larger archives are split **sist2** is not I/O bound, scans might be faster when larger archives are split
into smaller parts. into smaller parts.
To check if a media file can be parsed without *seek*, execute `cat file.mp4 | ffprobe -`
### OCR ### OCR
@@ -119,12 +124,12 @@ binaries (GCC 7+ required).
1. Install compile-time dependencies 1. Install compile-time dependencies
```bash ```bash
vcpkg install lmdb cjson glib libarchive[core,bzip2,libxml2,lz4,lzma,lzo] pthread tesseract libxml2 ffmpeg zstd gtest mongoose libuuid libmagic vcpkg install lmdb cjson glib libarchive[core,bzip2,libxml2,lz4,lzma,lzo] pthread tesseract libxml2 ffmpeg zstd gtest mongoose libuuid libmagic libraw
``` ```
2. Build 2. Build
```bash ```bash
git clone --recursive https://github.com/simon987/sist2/ git clone --recursive https://github.com/simon987/sist2/
cmake -D <VCPKG_ROOT>/scripts/buildsystems/vcpkg.cmake . cmake -DCMAKE_TOOLCHAIN_FILE=<VCPKG_ROOT>/scripts/buildsystems/vcpkg.cmake .
make make
``` ```

View File

@@ -1,16 +1,17 @@
#!/usr/bin/env bash #!/usr/bin/env bash
VCPKG_ROOT="/vcpkg"
rm *.gz rm *.gz
rm -rf CMakeFiles CMakeCache.txt rm -rf CMakeFiles CMakeCache.txt
cmake -DSIST_DEBUG=off -DCMAKE_TOOLCHAIN_FILE=/vcpkg/scripts/buildsystems/vcpkg.cmake . cmake -DSIST_DEBUG=off -DVCPKG_BUILD_TYPE=release -DCMAKE_TOOLCHAIN_FILE="${VCPKG_ROOT}/scripts/buildsystems/vcpkg.cmake" .
make make -j 12
strip sist2 strip sist2
gzip -9 sist2 gzip -9 sist2
rm -rf CMakeFiles CMakeCache.txt rm -rf CMakeFiles CMakeCache.txt
cmake -DSIST_DEBUG=on -DCMAKE_TOOLCHAIN_FILE=/vcpkg/scripts/buildsystems/vcpkg.cmake . cmake -DSIST_DEBUG=on -DVCPKG_BUILD_TYPE=debug -DCMAKE_TOOLCHAIN_FILE="${VCPKG_ROOT}/scripts/buildsystems/vcpkg.cmake" .
make make -j 12
cp /usr/lib/x86_64-linux-gnu/libasan.so.2.0.0 libasan.so.2 cp /usr/lib/x86_64-linux-gnu/libasan.so.2.0.0 libasan.so.2
tar -czf sist2_debug.tar.gz sist2_debug libasan.so.2 tar -czf sist2_debug.tar.gz sist2_debug libasan.so.2

View File

@@ -14,47 +14,55 @@
* [examples](#web-examples) * [examples](#web-examples)
* [rewrite_url](#rewrite_url) * [rewrite_url](#rewrite_url)
* [link to specific indices](#link-to-specific-indices) * [link to specific indices](#link-to-specific-indices)
* [exec-script](#exec-script)
* [tagging](#tagging)
``` ```
Usage: sist2 scan [OPTION]... PATH Usage: sist2 scan [OPTION]... PATH
or: sist2 index [OPTION]... INDEX or: sist2 index [OPTION]... INDEX
or: sist2 web [OPTION]... INDEX... or: sist2 web [OPTION]... INDEX...
or: sist2 exec-script [OPTION]... INDEX
Lightning-fast file system indexer and search tool. Lightning-fast file system indexer and search tool.
-h, --help show this help message and exit -h, --help show this help message and exit
-v, --version Show version and exit -v, --version Show version and exit
--verbose Turn on logging --verbose Turn on logging
--very-verbose Turn on debug messages --very-verbose Turn on debug messages
Scan options Scan options
-t, --threads=<int> Number of threads. DEFAULT=1 -t, --threads=<int> Number of threads. DEFAULT=1
-q, --quality=<flt> Thumbnail quality, on a scale of 1.0 to 31.0, 1.0 being the best. DEFAULT=5 -q, --quality=<flt> Thumbnail quality, on a scale of 1.0 to 31.0, 1.0 being the best. DEFAULT=5
--size=<int> Thumbnail size, in pixels. Use negative value to disable. DEFAULT=500 --size=<int> Thumbnail size, in pixels. Use negative value to disable. DEFAULT=500
--content-size=<int> Number of bytes to be extracted from text documents. Use negative value to disable. DEFAULT=32768 --content-size=<int> Number of bytes to be extracted from text documents. Use negative value to disable. DEFAULT=32768
--incremental=<str> Reuse an existing index and only scan modified files. --incremental=<str> Reuse an existing index and only scan modified files.
-o, --output=<str> Output directory. DEFAULT=index.sist2/ -o, --output=<str> Output directory. DEFAULT=index.sist2/
--rewrite-url=<str> Serve files from this url instead of from disk. --rewrite-url=<str> Serve files from this url instead of from disk.
--name=<str> Index display name. DEFAULT: (name of the directory) --name=<str> Index display name. DEFAULT: (name of the directory)
--depth=<int> Scan up to DEPTH subdirectories deep. Use 0 to only scan files in PATH. DEFAULT: -1 --depth=<int> Scan up to DEPTH subdirectories deep. Use 0 to only scan files in PATH. DEFAULT: -1
--archive=<str> Archive file mode (skip|list|shallow|recurse). skip: Don't parse, list: only get file names as text, shallow: Don't parse archives inside archives. DEFAULT: recurse --archive=<str> Archive file mode (skip|list|shallow|recurse). skip: Don't parse, list: only get file names as text, shallow: Don't parse archives inside archives. DEFAULT: recurse
--ocr=<str> Tesseract language (use tesseract --list-langs to see which are installed on your machine) --ocr=<str> Tesseract language (use tesseract --list-langs to see which are installed on your machine)
-e, --exclude=<str> Files that match this regex will not be scanned -e, --exclude=<str> Files that match this regex will not be scanned
--fast Only index file names & mime type --fast Only index file names & mime type
--treemap-threshold=<str> Relative size threshold for treemap (see USAGE.md). DEFAULT: 0.0005
--mem-buffer=<int> Maximum memory buffer size per thread in MB for files inside archives (see USAGE.md). DEFAULT: 2000
Index options Index options
--es-url=<str> Elasticsearch url with port. DEFAULT=http://localhost:9200 -t, --threads=<int> Number of threads. DEFAULT=1
-p, --print Just print JSON documents to stdout. --es-url=<str> Elasticsearch url with port. DEFAULT=http://localhost:9200
--script-file=<str> Path to user script. -p, --print Just print JSON documents to stdout.
--batch-size=<int> Index batch size. DEFAULT: 100 --script-file=<str> Path to user script.
-f, --force-reset Reset Elasticsearch mappings and settings. (You must use this option the first time you use the index command) --batch-size=<int> Index batch size. DEFAULT: 100
-f, --force-reset Reset Elasticsearch mappings and settings. (You must use this option the first time you use the index command)
Web options Web options
--es-url=<str> Elasticsearch url. DEFAULT=http://localhost:9200 --es-url=<str> Elasticsearch url. DEFAULT=http://localhost:9200
--bind=<str> Listen on this address. DEFAULT=localhost --bind=<str> Listen on this address. DEFAULT=localhost:4090
--port=<str> Listen on this port. DEFAULT=4090 --auth=<str> Basic auth in user:password format
--auth=<str> Basic auth in user:password format --tag-auth=<str> Basic auth in user:password format for tagging
Made by simon987 <me@simon987.net>. Released under GPL-3.0
Exec-script options
--script-file=<str> Path to user script.
Made by simon987 <me@simon987.net>. Released under GPL-3.0
``` ```
## Scan ## Scan
@@ -62,7 +70,7 @@ Made by simon987 <me@simon987.net>. Released under GPL-3.0
### Scan options ### Scan options
* `-t, --threads` * `-t, --threads`
Number of threads for file parsing. **Do not set a number higher than `$(nproc)`!**. Number of threads for file parsing. **Do not set a number higher than `$(nproc)` or `$(Get-WmiObject Win32_ComputerSystem).NumberOfLogicalProcessors` in Windows!**
* `-q, --quality` * `-q, --quality`
Thumbnail quality, on a scale of 1.0 to 31.0, 1.0 being the best. *Does not affect PDF thumbnails quality* Thumbnail quality, on a scale of 1.0 to 31.0, 1.0 being the best. *Does not affect PDF thumbnails quality*
* `--size` * `--size`
@@ -94,6 +102,18 @@ Made by simon987 <me@simon987.net>. Released under GPL-3.0
* `-e "(^/usr/)|(^/var/)|(^/media/DRIVE-A/tmp/)|(^/media/DRIVE-B/Trash/)"` Exclude the * `-e "(^/usr/)|(^/var/)|(^/media/DRIVE-A/tmp/)|(^/media/DRIVE-B/Trash/)"` Exclude the
`/usr`, `/var`, `/media/DRIVE-A/tmp`, `/media/DRIVE-B/Trash` directories `/usr`, `/var`, `/media/DRIVE-A/tmp`, `/media/DRIVE-B/Trash` directories
* `--fast` Only index file names and mime type * `--fast` Only index file names and mime type
* `--treemap-threshold` Directories smaller than (`treemap-threshold` * `<total size of the index>`)
will not be considered for the disk utilisation visualization; their size will be added to
the parent directory. If the parent directory is still smaller than the threshold, it will also be "merged upwards"
and so on.
In effect, smaller `treemap-threshold` values will yield a more detailed
(but also a more cluttered and harder to read) visualization.
* `--mem-buffer` Maximum memory buffer size in MB (per thread) for files inside archives. Media files
larger than this number will be read sequentially and no *seek* operations will be supported.
To check if a media file can be parsed without *seek*, execute `cat file.mp4 | ffprobe -`
### Scan examples ### Scan examples
@@ -122,7 +142,15 @@ documents.idx/
├── _index_139965425223424 ├── _index_139965425223424
├── _index_139965433616128 ├── _index_139965433616128
├── _index_139965442008832 ├── _index_139965442008832
── thumbs ── _index_139965442008832
├── treemap.csv
├── agg_mime.csv
├── agg_date.csv
├── add_size.csv
├── thumbs
| ├── data.mdb
| └── lock.mdb
└── tags
├── data.mdb ├── data.mdb
└── lock.mdb └── lock.mdb
``` ```
@@ -137,6 +165,8 @@ database containing the thumbnails.
The `descriptor.json` file contains general information about the index. The The `descriptor.json` file contains general information about the index. The
following fields are safe to modify manually: `root`, `name`, [rewrite_url](#rewrite_url) and `timestamp`. following fields are safe to modify manually: `root`, `name`, [rewrite_url](#rewrite_url) and `timestamp`.
The `.csv` are pre-computed aggregations necessary for the stats page.
*Advanced usage* *Advanced usage*
@@ -212,7 +242,7 @@ it is currently unsupported and has no guaranties of back/forward compatibility.
* `-p, --print` * `-p, --print`
Print index in JSON format to stdout. Print index in JSON format to stdout.
* `--script-file` * `--script-file`
Path to user script. See [Scripting](scripting/README.md). Path to user script. See [Scripting](scripting.md).
* `--batch-size=<int>` * `--batch-size=<int>`
Index batch size. Indexing is generally faster with larger batches, but payloads that Index batch size. Indexing is generally faster with larger batches, but payloads that
are too large will fail and additional overhead for retrying with smaller sizes may slow are too large will fail and additional overhead for retrying with smaller sizes may slow
@@ -244,14 +274,15 @@ sist2 index --print ./my_index/ | jq | less
### Web options ### Web options
* `--es-url=<str>` Elasticsearch url. * `--es-url=<str>` Elasticsearch url.
* `--bind=<str>` Listen on this address. * `--bind=<str>` Listen on this address.
* `--port=<str>` Listen on this port.
* `--auth=<str>` Basic auth in user:password format * `--auth=<str>` Basic auth in user:password format
* `--tag-auth=<str>` Basic auth in user:password format. Works the same way as the
`--auth` argument, but authentication is only applied the `/tag/` endpoint.
### Web examples ### Web examples
**Single index** **Single index**
```bash ```bash
sist2 web --auth admin:hunter2 --bind 0.0.0.0 --port 8888 my_index sist2 web --auth admin:hunter2 --bind 0.0.0.0:8888 my_index
``` ```
**Multiple indices** **Multiple indices**
@@ -273,3 +304,35 @@ Both the `root` and `rewrite_url` fields are safe to manually modify from the
To link to specific indices, you can add a list of comma-separated index name to To link to specific indices, you can add a list of comma-separated index name to
the URL: `?i=<name>,<name>`. By default, indices with `"(nsfw)"` in their name are the URL: `?i=<name>,<name>`. By default, indices with `"(nsfw)"` in their name are
not displayed. not displayed.
## exec-script
The `exec-script` command is used to execute a user script for an index that has already been imported to Elasticsearch with the `index` command. Note that the documents will not be reset to their default state before each execution as the `index` command does: if you make undesired changes to the documents by accident, you will need to run `index` again to revert to the original state.
# Tagging
### Manual tagging
You can modify tags of individual documents directly from the
`web` interface. Note that you can setup authentication for this feature
with the `--tag-auth` option (See [web options](#web-options))
![manual_tag](manual_tag.png)
Tags that are manually added are saved both in the
index folder (in `/tags/`) and in Elasticsearch*. When re-`index`ing,
they are read from the index and automatically applied.
You can safely copy the `/tags/` database to another index.
See [Automatic tagging](#automatic-tagging) for information about tag
hierarchies and tag colors.
\* *It can take a few seconds to take effect in new search queries, and the page needs
to be reloaded for the tag tab to update*
### Automatic tagging
See [scripting](docs/scripting.md) documentation.

BIN
docs/manual_tag.png Normal file

Binary file not shown.

After

Width:  |  Height:  |  Size: 3.9 KiB

View File

@@ -39,7 +39,7 @@ it adds the `genre.<genre>` tag.
ArrayList tags = ctx._source.tag = new ArrayList(); ArrayList tags = ctx._source.tag = new ArrayList();
if (ctx._source?.genre != null) { if (ctx._source?.genre != null) {
tags.add("genre." + ctx._source.genre.toLowerCase()) tags.add("genre." + ctx._source.genre.toLowerCase());
} }
``` ```
@@ -67,7 +67,7 @@ ArrayList tags = ctx._source.tag = new ArrayList();
Matcher m = /[\(\.+](20[0-9]{2})[\)\.+]/.matcher(ctx._source.name); Matcher m = /[\(\.+](20[0-9]{2})[\)\.+]/.matcher(ctx._source.name);
if (m.find()) { if (m.find()) {
tags.add("year." + m.group(1)) tags.add("year." + m.group(1));
} }
``` ```
@@ -111,16 +111,6 @@ if (ctx._source.path != "") {
} }
``` ```
Set the name of the last folder (`/path/to/<studio>/file.mp4`) to `studio.<studio>` tag
```Java
ArrayList tags = ctx._source.tag = new ArrayList();
if (ctx._source.path != "") {
String[] names = ctx._source.path.splitOnToken('/');
tags.add("studio." + names[names.length-1]);
}
```
Parse `EXIF:F Number` tag Parse `EXIF:F Number` tag
```Java ```Java
if (ctx._source?.exif_fnumber != null) { if (ctx._source?.exif_fnumber != null) {

BIN
docs/stats.png Normal file

Binary file not shown.

After

Width:  |  Height:  |  Size: 167 KiB

View File

@@ -10,8 +10,22 @@
"path": { "path": {
"type": "text", "type": "text",
"analyzer": "path_analyzer", "analyzer": "path_analyzer",
"copy_to": "suggest-path",
"fielddata": true, "fielddata": true,
"index_prefixes": {} "fields": {
"nGram": {
"type": "text",
"analyzer": "my_nGram"
},
"text": {
"type": "text",
"analyzer": "content_analyzer"
}
}
},
"suggest-path": {
"type": "completion",
"analyzer": "case_insensitive_kw_analyzer"
}, },
"mime": { "mime": {
"type": "keyword" "type": "keyword"
@@ -112,7 +126,12 @@
} }
}, },
"tag": { "tag": {
"type": "keyword" "type": "keyword",
"copy_to": "suggest-tag"
},
"suggest-tag": {
"type": "completion",
"analyzer": "case_insensitive_kw_analyzer"
}, },
"exif_make": { "exif_make": {
"type": "text" "type": "text"

View File

@@ -1,7 +1,8 @@
{ {
"index": { "index": {
"refresh_interval": "30s", "refresh_interval": "30s",
"codec": "best_compression" "codec": "best_compression",
"number_of_replicas": 0
}, },
"analysis": { "analysis": {
"tokenizer": { "tokenizer": {

View File

@@ -4,7 +4,7 @@ rm -rf index.sist2/
rm src/static/js/bundle.js 2> /dev/null rm src/static/js/bundle.js 2> /dev/null
cat `ls src/static/js/*.min.js` > src/static/js/bundle.js cat `ls src/static/js/*.min.js` > src/static/js/bundle.js
cat src/static/js/{util,dom,search}.js >> src/static/js/bundle.js cat src/static/js/{util,dom}.js >> src/static/js/bundle.js
rm src/static/css/bundle*.css 2> /dev/null rm src/static/css/bundle*.css 2> /dev/null
cat src/static/css/*.min.css > src/static/css/bundle.css cat src/static/css/*.min.css > src/static/css/bundle.css

View File

@@ -13,7 +13,7 @@ application/epub+zip, epub
application/freeloader, frl application/freeloader, frl
application/futuresplash, spl application/futuresplash, spl
application/groupwise, vew application/groupwise, vew
application/gzip, gz application/gzip, gz|tgz
application/hta, hta application/hta, hta
application/i-deas, unv application/i-deas, unv
application/iges, iges|igs application/iges, iges|igs
@@ -111,7 +111,7 @@ application/x-dbf, dbf
application/x-dbt, application/x-dbt,
application/x-debian-package, deb application/x-debian-package, deb
application/x-deepv, deepv application/x-deepv, deepv
application/x-director, dcr|dir|dxr application/x-director, dir|dxr
application/x-dmp, dmp application/x-dmp, dmp
application/x-dosdriver, application/x-dosdriver,
application/x-dosexec, dll application/x-dosexec, dll
@@ -428,3 +428,23 @@ video/x-msvideo, divx
video/x-qtc, qtc video/x-qtc, qtc
video/x-sgi-movie, movie|mv video/x-sgi-movie, movie|mv
x-epoc/x-sisx-app, x-epoc/x-sisx-app,
application/x-zstd-dictionary,
application/vnd.ms-outlook, msg
image/x-olympus-orf, orf
image/x-nikon-nef, nef
image/x-fuji-raf, raf
image/x-panasonic-raw, rw2|raw
image/x-adobe-dng, dng
image/x-canon-cr2, cr2
image/x-canon-crw, crw
image/x-dcraw,
image/x-kodak-dcr, dcr
image/x-kodak-k25, k25
image/x-kodak-kdc, kdc
image/x-minolta-mrw, mrw
image/x-pentax-pef, pef
image/x-sigma-x3f, xf3
image/x-sony-arw, arw
image/x-sony-sr2, sr2
image/x-sony-srf, srf
image/x-epson-erf, erf
1 application/arj arj
13 application/freeloader frl
14 application/futuresplash spl
15 application/groupwise vew
16 application/gzip gz gz|tgz
17 application/hta hta
18 application/i-deas unv
19 application/iges iges|igs
111 application/x-dbt
112 application/x-debian-package deb
113 application/x-deepv deepv
114 application/x-director dcr|dir|dxr dir|dxr
115 application/x-dmp dmp
116 application/x-dosdriver
117 application/x-dosexec dll
428 video/x-qtc qtc
429 video/x-sgi-movie movie|mv
430 x-epoc/x-sisx-app
431 application/x-zstd-dictionary
432 application/vnd.ms-outlook msg
433 image/x-olympus-orf orf
434 image/x-nikon-nef nef
435 image/x-fuji-raf raf
436 image/x-panasonic-raw rw2|raw
437 image/x-adobe-dng dng
438 image/x-canon-cr2 cr2
439 image/x-canon-crw crw
440 image/x-dcraw
441 image/x-kodak-dcr dcr
442 image/x-kodak-k25 k25
443 image/x-kodak-kdc kdc
444 image/x-minolta-mrw mrw
445 image/x-pentax-pef pef
446 image/x-sigma-x3f xf3
447 image/x-sony-arw arw
448 image/x-sony-sr2 sr2
449 image/x-sony-srf srf
450 image/x-epson-erf erf

View File

@@ -18,7 +18,6 @@ major_mime = {
pdf = ( pdf = (
"application/pdf", "application/pdf",
"application/x-cbz",
"application/epub+zip", "application/epub+zip",
"application/vnd.ms-xpsdocument", "application/vnd.ms-xpsdocument",
) )
@@ -67,6 +66,35 @@ mobi = (
"application/vnd.amazon.mobi8-ebook" "application/vnd.amazon.mobi8-ebook"
) )
markup = (
"text/xml",
"text/html",
"text/x-sgml"
)
raw = (
"image/x-olympus-orf",
"image/x-nikon-nef",
"image/x-fuji-raf",
"image/x-panasonic-raw",
"image/x-adobe-dng",
"image/x-canon-cr2",
"image/x-canon-crw",
"image/x-dcraw",
"image/x-kodak-dcr",
"image/x-kodak-k25",
"image/x-kodak-kdc",
"image/x-minolta-mrw",
"image/x-pentax-pef",
"image/x-sigma-x3f",
"image/x-sony-arw",
"image/x-sony-sr2",
"image/x-sony-srf",
"image/x-minolta-mrw",
"image/x-pentax-pef",
"image/x-epson-erf",
)
cnt = 1 cnt = 1
@@ -89,6 +117,10 @@ def mime_id(mime):
mime_id += " | 0x04000000" mime_id += " | 0x04000000"
elif mime in mobi: elif mime in mobi:
mime_id += " | 0x02000000" mime_id += " | 0x02000000"
elif mime in markup:
mime_id += " | 0x01000000"
elif mime in raw:
mime_id += " | 0x00800000"
elif mime == "application/x-empty": elif mime == "application/x-empty":
return "1" return "1"
return mime_id return mime_id

View File

@@ -2,9 +2,11 @@ files = [
"src/static/css/bundle.css", "src/static/css/bundle.css",
"src/static/css/bundle_dark.css", "src/static/css/bundle_dark.css",
"src/static/js/bundle.js", "src/static/js/bundle.js",
"src/static/js/search.js",
"src/static/img/sprite-skin-flat.png", "src/static/img/sprite-skin-flat.png",
"src/static/img/sprite-skin-flat-dark.png", "src/static/img/sprite-skin-flat-dark.png",
"src/static/search.html", "src/static/search.html",
"src/static/stats.html",
] ]

154
src/cli.c
View File

@@ -12,8 +12,11 @@
#define DEFAULT_BATCH_SIZE 100 #define DEFAULT_BATCH_SIZE 100
#define DEFAULT_LISTEN_ADDRESS "localhost:4090" #define DEFAULT_LISTEN_ADDRESS "localhost:4090"
#define DEFAULT_TREEMAP_THRESHOLD 0.0005
const char* TESS_DATAPATHS[] = { #define DEFAULT_MAX_MEM_BUFFER 2000
const char *TESS_DATAPATHS[] = {
"/usr/share/tessdata/", "/usr/share/tessdata/",
"/usr/share/tesseract-ocr/tessdata/", "/usr/share/tesseract-ocr/tessdata/",
"./", "./",
@@ -29,10 +32,18 @@ scan_args_t *scan_args_create() {
return args; return args;
} }
exec_args_t *exec_args_create() {
exec_args_t *args = calloc(sizeof(exec_args_t), 1);
return args;
}
void scan_args_destroy(scan_args_t *args) { void scan_args_destroy(scan_args_t *args) {
if (args->name != NULL) { if (args->name != NULL) {
free(args->name); free(args->name);
} }
if (args->incremental != NULL) {
free(args->incremental);
}
if (args->path != NULL) { if (args->path != NULL) {
free(args->path); free(args->path);
} }
@@ -52,6 +63,10 @@ void web_args_destroy(web_args_t *args) {
free(args); free(args);
} }
void exec_args_destroy(exec_args_t *args) {
free(args);
}
int scan_args_validate(scan_args_t *args, int argc, const char **argv) { int scan_args_validate(scan_args_t *args, int argc, const char **argv) {
if (argc < 2) { if (argc < 2) {
fprintf(stderr, "Required positional argument: PATH.\n"); fprintf(stderr, "Required positional argument: PATH.\n");
@@ -67,7 +82,7 @@ int scan_args_validate(scan_args_t *args, int argc, const char **argv) {
} }
if (args->incremental != NULL) { if (args->incremental != NULL) {
abs_path = abspath(args->incremental); args->incremental = abspath(args->incremental);
if (abs_path == NULL) { if (abs_path == NULL) {
sist_log("main.c", SIST_WARNING, "Could not open original index! Disabled incremental scan feature."); sist_log("main.c", SIST_WARNING, "Could not open original index! Disabled incremental scan feature.");
args->incremental = NULL; args->incremental = NULL;
@@ -112,7 +127,7 @@ int scan_args_validate(scan_args_t *args, int argc, const char **argv) {
return 1; return 1;
} }
if (args->depth < 0) { if (args->depth <= 0) {
args->depth = G_MAXINT32; args->depth = G_MAXINT32;
} else { } else {
args->depth += 1; args->depth += 1;
@@ -144,7 +159,7 @@ int scan_args_validate(scan_args_t *args, int argc, const char **argv) {
char filename[128]; char filename[128];
sprintf(filename, "%s.traineddata", args->tesseract_lang); sprintf(filename, "%s.traineddata", args->tesseract_lang);
const char * path = find_file_in_paths(TESS_DATAPATHS, filename); const char *path = find_file_in_paths(TESS_DATAPATHS, filename);
if (path == NULL) { if (path == NULL) {
LOG_FATAL("cli.c", "Could not find tesseract language file!"); LOG_FATAL("cli.c", "Could not find tesseract language file!");
} }
@@ -180,6 +195,16 @@ int scan_args_validate(scan_args_t *args, int argc, const char **argv) {
ScanCtx.exclude = NULL; ScanCtx.exclude = NULL;
} }
if (args->treemap_threshold_str == 0) {
args->treemap_threshold = DEFAULT_TREEMAP_THRESHOLD;
} else {
args->treemap_threshold = atof(args->treemap_threshold_str);
}
if (args->max_memory_buffer == 0) {
args->max_memory_buffer = DEFAULT_MAX_MEM_BUFFER;
}
LOG_DEBUGF("cli.c", "arg quality=%f", args->quality) LOG_DEBUGF("cli.c", "arg quality=%f", args->quality)
LOG_DEBUGF("cli.c", "arg size=%d", args->size) LOG_DEBUGF("cli.c", "arg size=%d", args->size)
LOG_DEBUGF("cli.c", "arg content_size=%d", args->content_size) LOG_DEBUGF("cli.c", "arg content_size=%d", args->content_size)
@@ -195,6 +220,36 @@ int scan_args_validate(scan_args_t *args, int argc, const char **argv) {
LOG_DEBUGF("cli.c", "arg tesseract_path=%s", args->tesseract_path) LOG_DEBUGF("cli.c", "arg tesseract_path=%s", args->tesseract_path)
LOG_DEBUGF("cli.c", "arg exclude=%s", args->exclude_regex) LOG_DEBUGF("cli.c", "arg exclude=%s", args->exclude_regex)
LOG_DEBUGF("cli.c", "arg fast=%d", args->fast) LOG_DEBUGF("cli.c", "arg fast=%d", args->fast)
LOG_DEBUGF("cli.c", "arg treemap_threshold=%f", args->treemap_threshold)
LOG_DEBUGF("cli.c", "arg max_memory_buffer=%d", args->max_memory_buffer)
return 0;
}
int load_script(const char *script_path, char **dst) {
struct stat info;
int res = stat(script_path, &info);
if (res == -1) {
fprintf(stderr, "Error opening script file '%s': %s\n", script_path, strerror(errno));
return 1;
}
int fd = open(script_path, O_RDONLY);
if (fd == -1) {
fprintf(stderr, "Error opening script file '%s': %s\n", script_path, strerror(errno));
return 1;
}
*dst = malloc(info.st_size + 1);
res = read(fd, *dst, info.st_size);
if (res < 0) {
fprintf(stderr, "Error reading script file '%s': %s\n", script_path, strerror(errno));
return 1;
}
*(*dst + info.st_size) = '\0';
close(fd);
return 0; return 0;
} }
@@ -208,6 +263,13 @@ int index_args_validate(index_args_t *args, int argc, const char **argv) {
return 1; return 1;
} }
if (args->threads == 0) {
args->threads = 1;
} else if (args->threads < 0) {
fprintf(stderr, "Invalid threads: %d\n", args->threads);
return 1;
}
char *index_path = abspath(argv[1]); char *index_path = abspath(argv[1]);
if (index_path == NULL) { if (index_path == NULL) {
fprintf(stderr, "File not found: %s\n", argv[1]); fprintf(stderr, "File not found: %s\n", argv[1]);
@@ -222,29 +284,9 @@ int index_args_validate(index_args_t *args, int argc, const char **argv) {
} }
if (args->script_path != NULL) { if (args->script_path != NULL) {
struct stat info; if (load_script(args->script_path, &args->script) != 0) {
int res = stat(args->script_path, &info);
if (res == -1) {
fprintf(stderr, "Error opening script file '%s': %s\n", args->script_path, strerror(errno));
return 1; return 1;
} }
int fd = open(args->script_path, O_RDONLY);
if (fd == -1) {
fprintf(stderr, "Error opening script file '%s': %s\n", args->script_path, strerror(errno));
return 1;
}
args->script = malloc(info.st_size + 1);
res = read(fd, args->script, info.st_size);
if (res < 0) {
fprintf(stderr, "Error reading script file '%s': %s\n", args->script_path, strerror(errno));
return 1;
}
*(args->script + info.st_size) = '\0';
close(fd);
} }
if (args->batch_size == 0) { if (args->batch_size == 0) {
@@ -280,14 +322,14 @@ int web_args_validate(web_args_t *args, int argc, const char **argv) {
} }
if (args->credentials != NULL) { if (args->credentials != NULL) {
char * ptr = strstr(args->credentials, ":"); char *ptr = strstr(args->credentials, ":");
if (ptr == NULL) { if (ptr == NULL) {
fprintf(stderr, "Invalid --auth format, see usage\n"); fprintf(stderr, "Invalid --auth format, see usage\n");
return 1; return 1;
} }
strncpy(args->auth_user, args->credentials, (ptr - args->credentials)); strncpy(args->auth_user, args->credentials, (ptr - args->credentials));
strncpy(args->auth_pass, ptr + 1, strlen(ptr + 1)); strcpy(args->auth_pass, ptr + 1);
if (strlen(args->auth_user) == 0) { if (strlen(args->auth_user) == 0) {
fprintf(stderr, "--auth username must be at least one character long"); fprintf(stderr, "--auth username must be at least one character long");
@@ -299,6 +341,31 @@ int web_args_validate(web_args_t *args, int argc, const char **argv) {
args->auth_enabled = FALSE; args->auth_enabled = FALSE;
} }
if (args->tag_credentials != NULL && args->credentials != NULL) {
fprintf(stderr, "--auth and --tag-auth are mutually exclusive");
return 1;
}
if (args->tag_credentials != NULL) {
char *ptr = strstr(args->tag_credentials, ":");
if (ptr == NULL) {
fprintf(stderr, "Invalid --tag-auth format, see usage\n");
return 1;
}
strncpy(args->auth_user, args->tag_credentials, (ptr - args->tag_credentials));
strcpy(args->auth_pass, ptr + 1);
if (strlen(args->auth_user) == 0) {
fprintf(stderr, "--tag-auth username must be at least one character long");
return 1;
}
args->tag_auth_enabled = TRUE;
} else {
args->tag_auth_enabled = FALSE;
}
args->index_count = argc - 1; args->index_count = argc - 1;
args->indices = argv + 1; args->indices = argv + 1;
@@ -313,6 +380,7 @@ int web_args_validate(web_args_t *args, int argc, const char **argv) {
LOG_DEBUGF("cli.c", "arg es_url=%s", args->es_url) LOG_DEBUGF("cli.c", "arg es_url=%s", args->es_url)
LOG_DEBUGF("cli.c", "arg listen=%s", args->listen_address) LOG_DEBUGF("cli.c", "arg listen=%s", args->listen_address)
LOG_DEBUGF("cli.c", "arg credentials=%s", args->credentials) LOG_DEBUGF("cli.c", "arg credentials=%s", args->credentials)
LOG_DEBUGF("cli.c", "arg tag_credentials=%s", args->tag_credentials)
LOG_DEBUGF("cli.c", "arg auth_user=%s", args->auth_user) LOG_DEBUGF("cli.c", "arg auth_user=%s", args->auth_user)
LOG_DEBUGF("cli.c", "arg auth_pass=%s", args->auth_pass) LOG_DEBUGF("cli.c", "arg auth_pass=%s", args->auth_pass)
LOG_DEBUGF("cli.c", "arg index_count=%d", args->index_count) LOG_DEBUGF("cli.c", "arg index_count=%d", args->index_count)
@@ -333,3 +401,35 @@ web_args_t *web_args_create() {
return args; return args;
} }
int exec_args_validate(exec_args_t *args, int argc, const char **argv) {
if (argc < 2) {
fprintf(stderr, "Required positional argument: PATH.\n");
return 1;
}
char *index_path = abspath(argv[1]);
if (index_path == NULL) {
fprintf(stderr, "File not found: %s\n", argv[1]);
return 1;
} else {
args->index_path = argv[1];
free(index_path);
}
if (args->es_url == NULL) {
args->es_url = DEFAULT_ES_URL;
}
if (args->script_path == NULL) {
LOG_FATAL("cli.c", "--script-file argument is required");
}
if (load_script(args->script_path, &args->script) != 0) {
return 1;
}
LOG_DEBUGF("cli.c", "arg script_path=%s", args->script_path)
LOG_DEBUGF("cli.c", "arg script=%s", args->script)
return 0;
}

View File

@@ -22,6 +22,9 @@ typedef struct scan_args {
const char *tesseract_path; const char *tesseract_path;
char *exclude_regex; char *exclude_regex;
int fast; int fast;
const char* treemap_threshold_str;
double treemap_threshold;
int max_memory_buffer;
} scan_args_t; } scan_args_t;
scan_args_t *scan_args_create(); scan_args_t *scan_args_create();
@@ -38,19 +41,29 @@ typedef struct index_args {
int print; int print;
int batch_size; int batch_size;
int force_reset; int force_reset;
int threads;
} index_args_t; } index_args_t;
typedef struct web_args { typedef struct web_args {
char *es_url; char *es_url;
char *listen_address; char *listen_address;
char *credentials; char *credentials;
char *tag_credentials;
char auth_user[256]; char auth_user[256];
char auth_pass[256]; char auth_pass[256];
int auth_enabled; int auth_enabled;
int tag_auth_enabled;
int index_count; int index_count;
const char **indices; const char **indices;
} web_args_t; } web_args_t;
typedef struct exec_args {
char *es_url;
const char *index_path;
const char *script_path;
char *script;
} exec_args_t;
index_args_t *index_args_create(); index_args_t *index_args_create();
void index_args_destroy(index_args_t *args); void index_args_destroy(index_args_t *args);
@@ -63,4 +76,10 @@ int index_args_validate(index_args_t *args, int argc, const char **argv);
int web_args_validate(web_args_t *args, int argc, const char **argv); int web_args_validate(web_args_t *args, int argc, const char **argv);
exec_args_t *exec_args_create();
void exec_args_destroy(exec_args_t *args);
int exec_args_validate(exec_args_t *args, int argc, const char **argv);
#endif #endif

6
src/ctx.c Normal file
View File

@@ -0,0 +1,6 @@
#include "ctx.h"
ScanCtx_t ScanCtx;
WebCtx_t WebCtx;
IndexCtx_t IndexCtx;
LogCtx_t LogCtx;

View File

@@ -5,19 +5,20 @@
#include "tpool.h" #include "tpool.h"
#include "libscan/scan.h" #include "libscan/scan.h"
#include "libscan/arc/arc.h" #include "libscan/arc/arc.h"
#include "libscan/cbr/cbr.h" #include "libscan/comic/comic.h"
#include "libscan/ebook/ebook.h" #include "libscan/ebook/ebook.h"
#include "libscan/font/font.h" #include "libscan/font/font.h"
#include "libscan/media/media.h" #include "libscan/media/media.h"
#include "libscan/ooxml/ooxml.h" #include "libscan/ooxml/ooxml.h"
#include "libscan/text/text.h" #include "libscan/text/text.h"
#include "libscan/mobi/scan_mobi.h" #include "libscan/mobi/scan_mobi.h"
#include "libscan/raw/raw.h"
#include "src/io/store.h"
#include <glib.h> #include <glib.h>
#include <pcre.h> #include <pcre.h>
//TODO Move to individual scan ctx typedef struct {
struct {
struct index_t index; struct index_t index;
GHashTable *mime_table; GHashTable *mime_table;
@@ -27,8 +28,6 @@ struct {
int threads; int threads;
int depth; int depth;
int verbose;
int very_verbose;
size_t stat_tn_size; size_t stat_tn_size;
size_t stat_index_size; size_t stat_index_size;
@@ -41,34 +40,44 @@ struct {
int fast; int fast;
scan_arc_ctx_t arc_ctx; scan_arc_ctx_t arc_ctx;
scan_cbr_ctx_t cbr_ctx; scan_comic_ctx_t comic_ctx;
scan_ebook_ctx_t ebook_ctx; scan_ebook_ctx_t ebook_ctx;
scan_font_ctx_t font_ctx; scan_font_ctx_t font_ctx;
scan_media_ctx_t media_ctx; scan_media_ctx_t media_ctx;
scan_ooxml_ctx_t ooxml_ctx; scan_ooxml_ctx_t ooxml_ctx;
scan_text_ctx_t text_ctx; scan_text_ctx_t text_ctx;
scan_mobi_ctx_t mobi_ctx; scan_mobi_ctx_t mobi_ctx;
} ScanCtx; scan_raw_ctx_t raw_ctx;
} ScanCtx_t;
struct { typedef struct {
int verbose; int verbose;
int very_verbose; int very_verbose;
int no_color; int no_color;
} LogCtx; } LogCtx_t;
struct { typedef struct {
char *es_url; char *es_url;
int batch_size; int batch_size;
} IndexCtx; tpool_t *pool;
store_t *tag_store;
GHashTable *tags;
} IndexCtx_t;
struct { typedef struct {
char *es_url; char *es_url;
int index_count; int index_count;
char *auth_user; char *auth_user;
char *auth_pass; char *auth_pass;
int auth_enabled; int auth_enabled;
struct index_t indices[16]; int tag_auth_enabled;
} WebCtx; struct index_t indices[64];
} WebCtx_t;
extern ScanCtx_t ScanCtx;
extern WebCtx_t WebCtx;
extern IndexCtx_t IndexCtx;
extern LogCtx_t LogCtx;
#endif #endif

View File

@@ -14,9 +14,18 @@ typedef struct es_indexer {
} es_indexer_t; } es_indexer_t;
static es_indexer_t *Indexer; static __thread es_indexer_t *Indexer;
void delete_queue(int max); void delete_queue(int max);
void elastic_flush();
void elastic_cleanup() {
elastic_flush();
if (Indexer != NULL) {
free(Indexer->es_url);
free(Indexer);
}
}
void print_json(cJSON *document, const char uuid_str[UUID_STR_LEN]) { void print_json(cJSON *document, const char uuid_str[UUID_STR_LEN]) {
@@ -35,8 +44,12 @@ void print_json(cJSON *document, const char uuid_str[UUID_STR_LEN]) {
cJSON_Delete(line); cJSON_Delete(line);
} }
void index_json(cJSON *document, const char uuid_str[UUID_STR_LEN]) { void index_json_func(void *arg) {
es_bulk_line_t *line = arg;
elastic_index_line(line);
}
void index_json(cJSON *document, const char uuid_str[UUID_STR_LEN]) {
char *json = cJSON_PrintUnformatted(document); char *json = cJSON_PrintUnformatted(document);
size_t json_len = strlen(json); size_t json_len = strlen(json);
@@ -48,11 +61,15 @@ void index_json(cJSON *document, const char uuid_str[UUID_STR_LEN]) {
bulk_line->next = NULL; bulk_line->next = NULL;
cJSON_free(json); cJSON_free(json);
elastic_index_line(bulk_line); tpool_add_work(IndexCtx.pool, index_json_func, bulk_line);
} }
void execute_update_script(const char *script, const char index_id[UUID_STR_LEN]) { void execute_update_script(const char *script, const char index_id[UUID_STR_LEN]) {
if (Indexer == NULL) {
Indexer = create_indexer(IndexCtx.es_url);
}
cJSON *body = cJSON_CreateObject(); cJSON *body = cJSON_CreateObject();
cJSON *script_obj = cJSON_AddObjectToObject(body, "script"); cJSON *script_obj = cJSON_AddObjectToObject(body, "script");
cJSON_AddStringToObject(script_obj, "lang", "painless"); cJSON_AddStringToObject(script_obj, "lang", "painless");
@@ -65,7 +82,7 @@ void execute_update_script(const char *script, const char index_id[UUID_STR_LEN]
char *str = cJSON_Print(body); char *str = cJSON_Print(body);
char bulk_url[4096]; char bulk_url[4096];
snprintf(bulk_url, 4096, "%s/sist2/_update_by_query?pretty", Indexer->es_url); snprintf(bulk_url, 4096, "%s/sist2/_update_by_query?wait_for_completion=false", Indexer->es_url);
response_t *r = web_post(bulk_url, str); response_t *r = web_post(bulk_url, str);
LOG_INFOF("elastic.c", "Executed user script <%d>", r->status_code); LOG_INFOF("elastic.c", "Executed user script <%d>", r->status_code);
cJSON *resp = cJSON_Parse(r->body); cJSON *resp = cJSON_Parse(r->body);
@@ -85,33 +102,44 @@ void execute_update_script(const char *script, const char index_id[UUID_STR_LEN]
cJSON_Delete(resp); cJSON_Delete(resp);
} }
#define ACTION_STR_LEN 91
void *create_bulk_buffer(int max, int *count, size_t *buf_len) { void *create_bulk_buffer(int max, int *count, size_t *buf_len) {
es_bulk_line_t *line = Indexer->line_head; es_bulk_line_t *line = Indexer->line_head;
*count = 0; *count = 0;
size_t buf_size = 0; size_t buf_size = 0;
size_t buf_cur = 0; size_t buf_cur = 0;
char *buf = malloc(1); char *buf = malloc(8196);
size_t buf_capacity = 8196;
while (line != NULL && *count < max) { while (line != NULL && *count < max) {
char action_str[512]; char action_str[256];
snprintf(action_str, 512, snprintf(action_str, 256,
"{\"index\":{\"_id\":\"%s\", \"_type\":\"_doc\", \"_index\":\"sist2\"}}\n", line->uuid_str); "{\"index\":{\"_id\":\"%s\", \"_type\":\"_doc\", \"_index\":\"sist2\"}}\n", line->uuid_str);
size_t action_str_len = strlen(action_str);
size_t line_len = strlen(line->line); size_t line_len = strlen(line->line);
buf = realloc(buf, buf_size + line_len + action_str_len);
buf_size += line_len + action_str_len;
memcpy(buf + buf_cur, action_str, action_str_len); while (buf_size + line_len + ACTION_STR_LEN > buf_capacity) {
buf_cur += action_str_len; buf_capacity *= 2;
buf = realloc(buf, buf_capacity);
}
buf_size += line_len + ACTION_STR_LEN;
memcpy(buf + buf_cur, action_str, ACTION_STR_LEN);
buf_cur += ACTION_STR_LEN;
memcpy(buf + buf_cur, line->line, line_len); memcpy(buf + buf_cur, line->line, line_len);
buf_cur += line_len; buf_cur += line_len;
line = line->next; line = line->next;
(*count)++; (*count)++;
} }
buf = realloc(buf, buf_size + 1);
if (buf_size + 1 > buf_capacity) {
buf = realloc(buf, buf_capacity + 1);
}
*(buf + buf_cur) = '\0'; *(buf + buf_cur) = '\0';
*buf_len = buf_cur; *buf_len = buf_cur;
@@ -119,7 +147,7 @@ void *create_bulk_buffer(int max, int *count, size_t *buf_len) {
} }
void print_errors(response_t *r) { void print_errors(response_t *r) {
char * tmp = malloc(r->size + 1); char *tmp = malloc(r->size + 1);
memcpy(tmp, r->body, r->size); memcpy(tmp, r->body, r->size);
*(tmp + r->size) = '\0'; *(tmp + r->size) = '\0';
@@ -177,6 +205,15 @@ void _elastic_flush(int max) {
_elastic_flush(max / 2); _elastic_flush(max / 2);
return; return;
} else if (r->status_code == 429) {
free_response(r);
free(buf);
LOG_WARNING("elastic.c", "Got 429 status, will retry after delay")
usleep(1000000 * 20);
_elastic_flush(max);
return;
} else if (r->status_code != 200) { } else if (r->status_code != 200) {
print_errors(r); print_errors(r);
delete_queue(Indexer->queued); delete_queue(Indexer->queued);
@@ -202,9 +239,8 @@ void delete_queue(int max) {
Indexer->line_head = tmp->next; Indexer->line_head = tmp->next;
if (Indexer->line_head == NULL) { if (Indexer->line_head == NULL) {
Indexer->line_tail = NULL; Indexer->line_tail = NULL;
} else {
free(tmp);
} }
free(tmp);
Indexer->queued -= 1; Indexer->queued -= 1;
} }
} }
@@ -254,7 +290,7 @@ es_indexer_t *create_indexer(const char *url) {
return indexer; return indexer;
} }
void destroy_indexer(char *script, char index_id[UUID_STR_LEN]) { void finish_indexer(char *script, char *index_id) {
char url[4096]; char url[4096];
@@ -265,22 +301,23 @@ void destroy_indexer(char *script, char index_id[UUID_STR_LEN]) {
if (script != NULL) { if (script != NULL) {
execute_update_script(script, index_id); execute_update_script(script, index_id);
} free(script);
snprintf(url, sizeof(url), "%s/sist2/_refresh", IndexCtx.es_url); snprintf(url, sizeof(url), "%s/sist2/_refresh", IndexCtx.es_url);
r = web_post(url, ""); r = web_post(url, "");
LOG_INFOF("elastic.c", "Refresh index <%d>", r->status_code); LOG_INFOF("elastic.c", "Refresh index <%d>", r->status_code);
free_response(r); free_response(r);
}
snprintf(url, sizeof(url), "%s/sist2/_forcemerge", IndexCtx.es_url); snprintf(url, sizeof(url), "%s/sist2/_forcemerge", IndexCtx.es_url);
r = web_post(url, ""); r = web_post(url, "");
LOG_INFOF("elastic.c", "Merge index <%d>", r->status_code); LOG_INFOF("elastic.c", "Merge index <%d>", r->status_code);
free_response(r); free_response(r);
if (Indexer != NULL) { snprintf(url, sizeof(url), "%s/sist2/_settings", IndexCtx.es_url);
free(Indexer->es_url); r = web_put(url, "{\"index\":{\"refresh_interval\":\"1s\"}}");
free(Indexer); LOG_INFOF("elastic.c", "Set refresh interval <%d>", r->status_code);
} free_response(r);
} }
void elastic_init(int force_reset) { void elastic_init(int force_reset) {

View File

@@ -16,15 +16,14 @@ typedef struct es_indexer es_indexer_t;
void elastic_index_line(es_bulk_line_t *line); void elastic_index_line(es_bulk_line_t *line);
void elastic_flush();
void print_json(cJSON *document, const char uuid_str[UUID_STR_LEN]); void print_json(cJSON *document, const char uuid_str[UUID_STR_LEN]);
void index_json(cJSON *document, const char uuid_str[UUID_STR_LEN]); void index_json(cJSON *document, const char uuid_str[UUID_STR_LEN]);
es_indexer_t *create_indexer(const char* es_url); es_indexer_t *create_indexer(const char* es_url);
void destroy_indexer(char *script, char index_id[UUID_STR_LEN]); void elastic_cleanup();
void finish_indexer(char *script, char *index_id);
void elastic_init(int force_reset); void elastic_init(int force_reset);
@@ -32,4 +31,6 @@ cJSON *elastic_get_document(const char *uuid_str);
char *elastic_get_status(); char *elastic_get_status();
void execute_update_script(const char *script, const char index_id[UUID_STR_LEN]);
#endif #endif

File diff suppressed because one or more lines are too long

View File

@@ -1,11 +1,19 @@
#include "web.h" #include "web.h"
#include "src/sist.h" #include "src/sist.h"
#include "src/ctx.h"
#include <mongoose.h> #include <mongoose.h>
#include <pthread.h> #include <pthread.h>
#include <curl/curl.h>
size_t write_cb(char *ptr, size_t size, size_t nmemb, void *user_data) {
size_t real_size = size * nmemb;
dyn_buffer_t *buf = user_data;
dyn_buffer_write(buf, ptr, real_size);
return real_size;
}
void free_response(response_t *resp) { void free_response(response_t *resp) {
if (resp->body != NULL) { if (resp->body != NULL) {
free(resp->body); free(resp->body);
@@ -100,55 +108,125 @@ subreq_ctx_t *http_req(const char *url, const char *extra_headers, const char *p
return ctx; return ctx;
} }
response_t *web_get(const char *url) {
subreq_ctx_t *ctx = http_req(url, SIST2_HEADERS, NULL, "GET");
while (ctx->ev_data.done == FALSE) {
mg_mgr_poll(&ctx->mgr, 50);
}
mg_mgr_free(&ctx->mgr);
response_t *ret = ctx->ev_data.resp;
free(ctx);
return ret;
}
subreq_ctx_t *web_post_async(const char *url, const char *data) { subreq_ctx_t *web_post_async(const char *url, const char *data) {
return http_req(url, SIST2_HEADERS, data, "POST"); return http_req(url, SIST2_HEADERS, data, "POST");
} }
response_t *web_post(const char *url, const char *data) { response_t *web_get(const char *url) {
subreq_ctx_t *ctx = http_req(url, SIST2_HEADERS, data, "POST"); response_t *resp = malloc(sizeof(response_t));
while (ctx->ev_data.done == FALSE) { CURL *curl;
mg_mgr_poll(&ctx->mgr, 50); dyn_buffer_t buffer = dyn_buffer_create();
}
mg_mgr_free(&ctx->mgr);
response_t *ret = ctx->ev_data.resp; curl = curl_easy_init();
free(ctx); curl_easy_setopt(curl, CURLOPT_URL, url);
return ret; curl_easy_setopt(curl, CURLOPT_WRITEDATA, (void *) (&buffer));
curl_easy_setopt(curl, CURLOPT_WRITEFUNCTION, write_cb);
curl_easy_setopt(curl, CURLOPT_USERAGENT, "sist2");
struct curl_slist *headers = curl_slist_append(headers, "Content-Type: application/json");
curl_easy_setopt(curl, CURLOPT_HTTPHEADER, headers);
curl_easy_perform(curl);
curl_easy_getinfo(curl, CURLINFO_RESPONSE_CODE, &resp->status_code);
curl_easy_cleanup(curl);
curl_slist_free_all(headers);
resp->body = buffer.buf;
resp->size = buffer.cur;
return resp;
} }
response_t *web_put(const char *url, const char *data) { response_t *web_post(const char *url, const char *data) {
subreq_ctx_t *ctx = http_req(url, SIST2_HEADERS, data, "PUT");
while (ctx->ev_data.done == FALSE) {
mg_mgr_poll(&ctx->mgr, 50);
}
mg_mgr_free(&ctx->mgr);
response_t *ret = ctx->ev_data.resp; response_t *resp = malloc(sizeof(response_t));
free(ctx);
return ret; CURL *curl;
dyn_buffer_t buffer = dyn_buffer_create();
curl = curl_easy_init();
curl_easy_setopt(curl, CURLOPT_URL, url);
curl_easy_setopt(curl, CURLOPT_WRITEDATA, (void *) (&buffer));
curl_easy_setopt(curl, CURLOPT_WRITEFUNCTION, write_cb);
curl_easy_setopt(curl, CURLOPT_POST, 1);
curl_easy_setopt(curl, CURLOPT_USERAGENT, "sist2");
struct curl_slist *headers = curl_slist_append(headers, "Content-Type: application/json");
curl_easy_setopt(curl, CURLOPT_HTTPHEADER, headers);
curl_easy_setopt(curl, CURLOPT_POSTFIELDS, data);
curl_easy_perform(curl);
curl_easy_getinfo(curl, CURLINFO_RESPONSE_CODE, &resp->status_code);
curl_easy_cleanup(curl);
curl_slist_free_all(headers);
resp->body = buffer.buf;
resp->size = buffer.cur;
return resp;
}
response_t *web_put(const char *url, const char *data) {
response_t *resp = malloc(sizeof(response_t));
CURL *curl;
dyn_buffer_t buffer = dyn_buffer_create();
curl = curl_easy_init();
curl_easy_setopt(curl, CURLOPT_URL, url);
curl_easy_setopt(curl, CURLOPT_WRITEDATA, (void *) (&buffer));
curl_easy_setopt(curl, CURLOPT_WRITEFUNCTION, write_cb);
curl_easy_setopt(curl, CURLOPT_CUSTOMREQUEST, "PUT");
curl_easy_setopt(curl, CURLOPT_USERAGENT, "sist2");
curl_easy_setopt(curl, CURLOPT_DNS_USE_GLOBAL_CACHE, 0);
curl_easy_setopt(curl, CURLOPT_IPRESOLVE, CURLOPT_DNS_LOCAL_IP4 );
struct curl_slist *headers = curl_slist_append(headers, "Content-Type: application/json");
curl_easy_setopt(curl, CURLOPT_HTTPHEADER, headers);
curl_easy_setopt(curl, CURLOPT_POSTFIELDS, data);
curl_easy_perform(curl);
curl_easy_getinfo(curl, CURLINFO_RESPONSE_CODE, &resp->status_code);
curl_easy_cleanup(curl);
curl_slist_free_all(headers);
resp->body = buffer.buf;
resp->size = buffer.cur;
return resp;
} }
response_t *web_delete(const char *url) { response_t *web_delete(const char *url) {
subreq_ctx_t *ctx = http_req(url, SIST2_HEADERS, NULL, "DELETE");
while (ctx->ev_data.done == FALSE) {
mg_mgr_poll(&ctx->mgr, 50);
}
mg_mgr_free(&ctx->mgr);
response_t *ret = ctx->ev_data.resp; response_t *resp = malloc(sizeof(response_t));
free(ctx);
return ret; CURL *curl;
} dyn_buffer_t buffer = dyn_buffer_create();
curl = curl_easy_init();
curl_easy_setopt(curl, CURLOPT_URL, url);
curl_easy_setopt(curl, CURLOPT_WRITEDATA, (void *) (&buffer));
curl_easy_setopt(curl, CURLOPT_WRITEFUNCTION, write_cb);
curl_easy_setopt(curl, CURLOPT_CUSTOMREQUEST, "DELETE");
curl_easy_setopt(curl, CURLOPT_USERAGENT, "sist2");
curl_easy_setopt(curl, CURLOPT_POSTFIELDS, "");
struct curl_slist *headers = curl_slist_append(headers, "Content-Type: application/json");
curl_easy_setopt(curl, CURLOPT_HTTPHEADER, headers);
curl_easy_perform(curl);
curl_easy_getinfo(curl, CURLINFO_RESPONSE_CODE, &resp->status_code);
curl_easy_cleanup(curl);
curl_slist_free_all(headers);
resp->body = buffer.buf;
resp->size = buffer.cur;
return resp;
}

View File

@@ -62,7 +62,7 @@ index_descriptor_t read_index_descriptor(char *path) {
int fd = open(path, O_RDONLY); int fd = open(path, O_RDONLY);
if (fd == -1) { if (fd == -1) {
LOG_FATALF("serialize.c", "Invalid/corrupt index (Could not find descriptor): %s: %s\n", path ,strerror(errno)) LOG_FATALF("serialize.c", "Invalid/corrupt index (Could not find descriptor): %s: %s\n", path, strerror(errno))
} }
char *buf = malloc(info.st_size + 1); char *buf = malloc(info.st_size + 1);
@@ -172,8 +172,8 @@ void write_document(document_t *doc) {
dyn_buffer_t buf = dyn_buffer_create(); dyn_buffer_t buf = dyn_buffer_create();
// Ignore root directory in the file path // Ignore root directory in the file path
doc->ext = doc->ext - ScanCtx.index.desc.root_len; doc->ext = (short) (doc->ext - ScanCtx.index.desc.root_len);
doc->base = doc->base - ScanCtx.index.desc.root_len; doc->base = (short) (doc->base - ScanCtx.index.desc.root_len);
doc->filepath += ScanCtx.index.desc.root_len; doc->filepath += ScanCtx.index.desc.root_len;
dyn_buffer_write(&buf, doc, sizeof(line_t)); dyn_buffer_write(&buf, doc, sizeof(line_t));
@@ -230,7 +230,7 @@ void read_index_bin(const char *path, const char *index_id, index_func func) {
char uuid_str[UUID_STR_LEN]; char uuid_str[UUID_STR_LEN];
uuid_unparse(line.uuid, uuid_str); uuid_unparse(line.uuid, uuid_str);
const char* mime_text = mime_get_mime_text(line.mime); const char *mime_text = mime_get_mime_text(line.mime);
if (mime_text == NULL) { if (mime_text == NULL) {
cJSON_AddNullToObject(document, "mime"); cJSON_AddNullToObject(document, "mime");
} else { } else {
@@ -239,22 +239,37 @@ void read_index_bin(const char *path, const char *index_id, index_func func) {
cJSON_AddNumberToObject(document, "size", (double) line.size); cJSON_AddNumberToObject(document, "size", (double) line.size);
cJSON_AddNumberToObject(document, "mtime", line.mtime); cJSON_AddNumberToObject(document, "mtime", line.mtime);
int c; int c = 0;
while ((c = getc(file)) != 0) { while ((c = getc(file)) != 0) {
dyn_buffer_write_char(&buf, (char) c); dyn_buffer_write_char(&buf, (char) c);
} }
dyn_buffer_write_char(&buf, '\0'); dyn_buffer_write_char(&buf, '\0');
if (IndexCtx.tags != NULL) {
const char *tags_string = g_hash_table_lookup(IndexCtx.tags, buf.buf);
if (tags_string != NULL) {
cJSON *tags_arr = cJSON_Parse(tags_string);
cJSON_AddItemToObject(document, "tag", tags_arr);
}
}
cJSON_AddStringToObject(document, "extension", buf.buf + line.ext); cJSON_AddStringToObject(document, "extension", buf.buf + line.ext);
if (*(buf.buf + line.ext - 1) == '.') { if (*(buf.buf + line.ext - 1) == '.') {
*(buf.buf + line.ext - 1) = '\0'; *(buf.buf + line.ext - 1) = '\0';
} else { } else {
*(buf.buf + line.ext) = '\0'; *(buf.buf + line.ext) = '\0';
} }
cJSON_AddStringToObject(document, "name", buf.buf + line.base);
char tmp[PATH_MAX * 3];
str_escape(tmp, buf.buf + line.base);
cJSON_AddStringToObject(document, "name", tmp);
if (line.base > 0) { if (line.base > 0) {
*(buf.buf + line.base - 1) = '\0'; *(buf.buf + line.base - 1) = '\0';
cJSON_AddStringToObject(document, "path", buf.buf);
str_escape(tmp, buf.buf);
cJSON_AddStringToObject(document, "path", tmp);
} else { } else {
cJSON_AddStringToObject(document, "path", ""); cJSON_AddStringToObject(document, "path", "");
} }

View File

@@ -1,9 +1,10 @@
#include "store.h" #include "store.h"
#include "src/ctx.h" #include "src/ctx.h"
store_t *store_create(char *path) { store_t *store_create(char *path, size_t chunk_size) {
store_t *store = malloc(sizeof(struct store_t)); store_t *store = malloc(sizeof(struct store_t));
store->chunk_size = chunk_size;
pthread_rwlock_init(&store->lock, NULL); pthread_rwlock_init(&store->lock, NULL);
mdb_env_create(&store->env); mdb_env_create(&store->env);
@@ -18,7 +19,7 @@ store_t *store_create(char *path) {
LOG_FATALF("store.c", "Error while opening store: %s (%s)\n", mdb_strerror(open_ret), path) LOG_FATALF("store.c", "Error while opening store: %s (%s)\n", mdb_strerror(open_ret), path)
} }
store->size = (size_t) 1024 * 1024 * 5; store->size = (size_t) store->chunk_size;
ScanCtx.stat_tn_size = 0; ScanCtx.stat_tn_size = 0;
mdb_env_set_mapsize(store->env, store->size); mdb_env_set_mapsize(store->env, store->size);
@@ -69,7 +70,7 @@ void store_write(store_t *store, char *key, size_t key_len, char *buf, size_t bu
// Cannot resize when there is a opened transaction. // Cannot resize when there is a opened transaction.
// Resize take effect on the next commit. // Resize take effect on the next commit.
pthread_rwlock_wrlock(&store->lock); pthread_rwlock_wrlock(&store->lock);
store->size += 1024 * 1024 * 50; store->size += store->chunk_size;
mdb_env_set_mapsize(store->env, store->size); mdb_env_set_mapsize(store->env, store->size);
mdb_txn_begin(store->env, NULL, 0, &txn); mdb_txn_begin(store->env, NULL, 0, &txn);
put_ret = mdb_put(txn, store->dbi, &mdb_key, &mdb_value, 0); put_ret = mdb_put(txn, store->dbi, &mdb_key, &mdb_value, 0);
@@ -110,3 +111,40 @@ char *store_read(store_t *store, char *key, size_t key_len, size_t *ret_vallen)
return buf; return buf;
} }
GHashTable *store_read_all(store_t *store) {
int count = 0;
GHashTable *table = g_hash_table_new_full(g_str_hash, g_str_equal, free, free);
MDB_txn *txn = NULL;
mdb_txn_begin(store->env, NULL, MDB_RDONLY, &txn);
MDB_cursor *cur = NULL;
mdb_cursor_open(txn, store->dbi, &cur);
MDB_val key;
MDB_val value;
while (mdb_cursor_get(cur, &key, &value, MDB_NEXT) == 0) {
char *key_str = malloc(key.mv_size);
memcpy(key_str, key.mv_data, key.mv_size);
char *val_str = malloc(value.mv_size);
memcpy(val_str, value.mv_data, value.mv_size);
g_hash_table_insert(table, key_str, val_str);
count += 1;
}
LOG_DEBUGF("store.c", "Read tags for %d documents", count);
mdb_cursor_close(cur);
mdb_txn_abort(txn);
return table;
}
void store_copy(store_t *store, const char *destination) {
mkdir(destination, S_IWUSR | S_IRUSR | S_IXUSR);
mdb_env_copy(store->env, destination);
}

View File

@@ -4,14 +4,20 @@
#include <pthread.h> #include <pthread.h>
#include <lmdb.h> #include <lmdb.h>
#include <glib.h>
#define STORE_SIZE_TN 1024 * 1024 * 5
#define STORE_SIZE_TAG 1024 * 16
typedef struct store_t { typedef struct store_t {
MDB_dbi dbi; MDB_dbi dbi;
MDB_env *env; MDB_env *env;
size_t size; size_t size;
size_t chunk_size;
pthread_rwlock_t lock; pthread_rwlock_t lock;
} store_t; } store_t;
store_t *store_create(char *path); store_t *store_create(char *path, size_t chunk_size);
void store_destroy(store_t *store); void store_destroy(store_t *store);
@@ -19,4 +25,8 @@ void store_write(store_t *store, char *key, size_t key_len, char *buf, size_t bu
char *store_read(store_t *store, char *key, size_t key_len, size_t *ret_vallen); char *store_read(store_t *store, char *key, size_t key_len, size_t *ret_vallen);
GHashTable *store_read_all(store_t *store);
void store_copy(store_t *store, const char *destination);
#endif #endif

View File

@@ -12,7 +12,7 @@ const char *log_levels[] = {
"DEBUG", "INFO", "WARNING", "ERROR", "FATAL" "DEBUG", "INFO", "WARNING", "ERROR", "FATAL"
}; };
void sist_logf(const char *filepath, int level, char *format, ...) { void vsist_logf(const char *filepath, int level, char *format, va_list ap) {
static int is_tty = -1; static int is_tty = -1;
if (is_tty == -1) { if (is_tty == -1) {
@@ -46,11 +46,8 @@ void sist_logf(const char *filepath, int level, char *format, ...) {
); );
} }
va_list ap;
va_start(ap, format);
size_t maxsize = sizeof(log_str) - log_len; size_t maxsize = sizeof(log_str) - log_len;
log_len += vsnprintf(log_str + log_len, maxsize, format, ap); log_len += vsnprintf(log_str + log_len, maxsize, format, ap);
va_end(ap);
if (is_tty) { if (is_tty) {
log_len += sprintf(log_str + log_len, "\033[0m\n"); log_len += sprintf(log_str + log_len, "\033[0m\n");
@@ -65,6 +62,13 @@ void sist_logf(const char *filepath, int level, char *format, ...) {
} }
} }
void sist_logf(const char *filepath, int level, char *format, ...) {
va_list ap;
va_start(ap, format);
vsist_logf(filepath, level, format, ap);
va_end(ap);
}
void sist_log(const char *filepath, int level, char *str) { void sist_log(const char *filepath, int level, char *str) {
static int is_tty = -1; static int is_tty = -1;

View File

@@ -40,6 +40,7 @@
#include "sist.h" #include "sist.h"
void sist_logf(const char *filepath, int level, char *format, ...); void sist_logf(const char *filepath, int level, char *format, ...);
void vsist_logf(const char *filepath, int level, char *format, va_list ap);
void sist_log(const char *filepath, int level, char *str); void sist_log(const char *filepath, int level, char *str);

View File

@@ -2,7 +2,7 @@
#include "ctx.h" #include "ctx.h"
#include <third-party/argparse/argparse.h> #include <third-party/argparse/argparse.h>
#include <glib.h> #include <locale.h>
#include "cli.h" #include "cli.h"
#include "io/serialize.h" #include "io/serialize.h"
@@ -14,16 +14,19 @@
#include "parsing/mime.h" #include "parsing/mime.h"
#include "parsing/parse.h" #include "parsing/parse.h"
#include "stats.h"
#define DESCRIPTION "Lightning-fast file system indexer and search tool." #define DESCRIPTION "Lightning-fast file system indexer and search tool."
#define EPILOG "Made by simon987 <me@simon987.net>. Released under GPL-3.0" #define EPILOG "Made by simon987 <me@simon987.net>. Released under GPL-3.0"
static const char *const Version = "2.0.0"; static const char *const Version = "2.7.3";
static const char *const usage[] = { static const char *const usage[] = {
"sist2 scan [OPTION]... PATH", "sist2 scan [OPTION]... PATH",
"sist2 index [OPTION]... INDEX", "sist2 index [OPTION]... INDEX",
"sist2 web [OPTION]... INDEX...", "sist2 web [OPTION]... INDEX...",
"sist2 exec-script [OPTION]... INDEX",
NULL, NULL,
}; };
@@ -55,9 +58,9 @@ void _log(const char *filepath, int level, char *str) {
exit(-1); exit(-1);
} }
if (ScanCtx.verbose) { if (LogCtx.verbose) {
if (level == LEVEL_DEBUG) { if (level == LEVEL_DEBUG) {
if (ScanCtx.very_verbose) { if (LogCtx.very_verbose) {
sist_log(filepath, level, str); sist_log(filepath, level, str);
} }
} else { } else {
@@ -72,17 +75,17 @@ void _logf(const char *filepath, int level, char *format, ...) {
va_start(args, format); va_start(args, format);
if (level == LEVEL_FATAL) { if (level == LEVEL_FATAL) {
sist_logf(filepath, level, format, args); vsist_logf(filepath, level, format, args);
exit(-1); exit(-1);
} }
if (ScanCtx.verbose) { if (LogCtx.verbose) {
if (level == LEVEL_DEBUG) { if (level == LEVEL_DEBUG) {
if (ScanCtx.very_verbose) { if (LogCtx.very_verbose) {
sist_logf(filepath, level, format, args); vsist_logf(filepath, level, format, args);
} }
} else { } else {
sist_logf(filepath, level, format, args); vsist_logf(filepath, level, format, args);
} }
} }
va_end(args); va_end(args);
@@ -96,11 +99,14 @@ void initialize_scan_context(scan_args_t *args) {
ScanCtx.arc_ctx.logf = _logf; ScanCtx.arc_ctx.logf = _logf;
ScanCtx.arc_ctx.parse = (parse_callback_t) parse; ScanCtx.arc_ctx.parse = (parse_callback_t) parse;
// Cbr // Comic
ScanCtx.cbr_ctx.log = _log; ScanCtx.comic_ctx.log = _log;
ScanCtx.cbr_ctx.logf = _logf; ScanCtx.comic_ctx.logf = _logf;
ScanCtx.cbr_ctx.store = _store; ScanCtx.comic_ctx.store = _store;
ScanCtx.cbr_ctx.cbr_mime = mime_get_mime_by_string(ScanCtx.mime_table, "application/x-cbr"); ScanCtx.comic_ctx.tn_size = args->size;
ScanCtx.comic_ctx.tn_qscale = args->quality;
ScanCtx.comic_ctx.cbr_mime = mime_get_mime_by_string(ScanCtx.mime_table, "application/x-cbr");
ScanCtx.comic_ctx.cbz_mime = mime_get_mime_by_string(ScanCtx.mime_table, "application/x-cbz");
// Ebook // Ebook
pthread_mutex_init(&ScanCtx.ebook_ctx.mupdf_mutex, NULL); pthread_mutex_init(&ScanCtx.ebook_ctx.mupdf_mutex, NULL);
@@ -124,18 +130,25 @@ void initialize_scan_context(scan_args_t *args) {
ScanCtx.media_ctx.log = _log; ScanCtx.media_ctx.log = _log;
ScanCtx.media_ctx.logf = _logf; ScanCtx.media_ctx.logf = _logf;
ScanCtx.media_ctx.store = _store; ScanCtx.media_ctx.store = _store;
ScanCtx.media_ctx.max_media_buffer = (long) args->max_memory_buffer * 1024 * 1024;
init_media(); init_media();
// OOXML // OOXML
ScanCtx.ooxml_ctx.content_size = args->content_size; ScanCtx.ooxml_ctx.content_size = args->content_size;
ScanCtx.ooxml_ctx.log = _log; ScanCtx.ooxml_ctx.log = _log;
ScanCtx.ooxml_ctx.logf = _logf; ScanCtx.ooxml_ctx.logf = _logf;
ScanCtx.ooxml_ctx.store = _store;
// MOBI // MOBI
ScanCtx.mobi_ctx.content_size = args->content_size; ScanCtx.mobi_ctx.content_size = args->content_size;
ScanCtx.mobi_ctx.log = _log; ScanCtx.mobi_ctx.log = _log;
ScanCtx.mobi_ctx.logf = _logf; ScanCtx.mobi_ctx.logf = _logf;
// TEXT
ScanCtx.text_ctx.content_size = args->content_size;
ScanCtx.text_ctx.log = _log;
ScanCtx.text_ctx.logf = _logf;
ScanCtx.threads = args->threads; ScanCtx.threads = args->threads;
ScanCtx.depth = args->depth; ScanCtx.depth = args->depth;
@@ -145,6 +158,13 @@ void initialize_scan_context(scan_args_t *args) {
strncpy(ScanCtx.index.desc.rewrite_url, args->rewrite_url, sizeof(ScanCtx.index.desc.rewrite_url)); strncpy(ScanCtx.index.desc.rewrite_url, args->rewrite_url, sizeof(ScanCtx.index.desc.rewrite_url));
ScanCtx.index.desc.root_len = (short) strlen(ScanCtx.index.desc.root); ScanCtx.index.desc.root_len = (short) strlen(ScanCtx.index.desc.root);
ScanCtx.fast = args->fast; ScanCtx.fast = args->fast;
// Raw
ScanCtx.raw_ctx.tn_qscale = args->quality;
ScanCtx.raw_ctx.tn_size = args->size;
ScanCtx.raw_ctx.log = _log;
ScanCtx.raw_ctx.logf = _logf;
ScanCtx.raw_ctx.store = _store;
} }
@@ -160,7 +180,7 @@ void sist2_scan(scan_args_t *args) {
char store_path[PATH_MAX]; char store_path[PATH_MAX];
snprintf(store_path, PATH_MAX, "%sthumbs", ScanCtx.index.path); snprintf(store_path, PATH_MAX, "%sthumbs", ScanCtx.index.path);
mkdir(store_path, S_IWUSR | S_IRUSR | S_IXUSR); mkdir(store_path, S_IWUSR | S_IRUSR | S_IXUSR);
ScanCtx.index.store = store_create(store_path); ScanCtx.index.store = store_create(store_path, STORE_SIZE_TN);
scan_print_header(); scan_print_header();
@@ -195,17 +215,19 @@ void sist2_scan(scan_args_t *args) {
LOG_INFOF("main.c", "Loaded %d items in to mtime table.", g_hash_table_size(ScanCtx.original_table)) LOG_INFOF("main.c", "Loaded %d items in to mtime table.", g_hash_table_size(ScanCtx.original_table))
} }
ScanCtx.pool = tpool_create(args->threads, thread_cleanup); ScanCtx.pool = tpool_create(args->threads, thread_cleanup, TRUE);
tpool_start(ScanCtx.pool); tpool_start(ScanCtx.pool);
walk_directory_tree(ScanCtx.index.desc.root); walk_directory_tree(ScanCtx.index.desc.root);
tpool_wait(ScanCtx.pool); tpool_wait(ScanCtx.pool);
tpool_destroy(ScanCtx.pool); tpool_destroy(ScanCtx.pool);
generate_stats(&ScanCtx.index, args->treemap_threshold, ScanCtx.index.path);
if (args->incremental != NULL) { if (args->incremental != NULL) {
char dst_path[PATH_MAX]; char dst_path[PATH_MAX];
snprintf(store_path, PATH_MAX, "%sthumbs", args->incremental); snprintf(store_path, PATH_MAX, "%sthumbs", args->incremental);
snprintf(dst_path, PATH_MAX, "%s_index_original", ScanCtx.index.path); snprintf(dst_path, PATH_MAX, "%s_index_original", ScanCtx.index.path);
store_t *source = store_create(store_path); store_t *source = store_create(store_path, STORE_SIZE_TN);
DIR *dir = opendir(args->incremental); DIR *dir = opendir(args->incremental);
if (dir == NULL) { if (dir == NULL) {
@@ -222,6 +244,13 @@ void sist2_scan(scan_args_t *args) {
} }
closedir(dir); closedir(dir);
store_destroy(source); store_destroy(source);
snprintf(store_path, PATH_MAX, "%stags", args->incremental);
snprintf(dst_path, PATH_MAX, "%stags", ScanCtx.index.path);
mkdir(store_path, S_IWUSR | S_IRUSR | S_IXUSR);
store_t *source_tags = store_create(store_path, STORE_SIZE_TAG);
store_copy(source_tags, dst_path);
store_destroy(source_tags);
} }
store_destroy(ScanCtx.index.store); store_destroy(ScanCtx.index.store);
@@ -253,6 +282,12 @@ void sist2_index(index_args_t *args) {
LOG_FATALF("main.c", "Could not open index %s: %s", args->index_path, strerror(errno)) LOG_FATALF("main.c", "Could not open index %s: %s", args->index_path, strerror(errno))
} }
char path_tmp[PATH_MAX];
snprintf(path_tmp, sizeof(path_tmp), "%s/tags", args->index_path);
mkdir(path_tmp, S_IWUSR | S_IRUSR | S_IXUSR);
IndexCtx.tag_store = store_create(path_tmp, STORE_SIZE_TAG);
IndexCtx.tags = store_read_all(IndexCtx.tag_store);
index_func f; index_func f;
if (args->print) { if (args->print) {
f = print_json; f = print_json;
@@ -260,6 +295,16 @@ void sist2_index(index_args_t *args) {
f = index_json; f = index_json;
} }
void (*cleanup)();
if (args->print) {
cleanup = NULL;
} else {
cleanup = elastic_cleanup;
}
IndexCtx.pool = tpool_create(args->threads, cleanup, FALSE);
tpool_start(IndexCtx.pool);
struct dirent *de; struct dirent *de;
while ((de = readdir(dir)) != NULL) { while ((de = readdir(dir)) != NULL) {
if (strncmp(de->d_name, "_index_", sizeof("_index_") - 1) == 0) { if (strncmp(de->d_name, "_index_", sizeof("_index_") - 1) == 0) {
@@ -270,10 +315,32 @@ void sist2_index(index_args_t *args) {
} }
closedir(dir); closedir(dir);
tpool_wait(IndexCtx.pool);
if (!args->print) { if (!args->print) {
elastic_flush(); finish_indexer(args->script, desc.uuid);
destroy_indexer(args->script, desc.uuid);
} }
tpool_destroy(IndexCtx.pool);
store_destroy(IndexCtx.tag_store);
g_hash_table_remove_all(IndexCtx.tags);
g_hash_table_destroy(IndexCtx.tags);
}
void sist2_exec_script(exec_args_t *args) {
LogCtx.verbose = TRUE;
char descriptor_path[PATH_MAX];
snprintf(descriptor_path, PATH_MAX, "%s/descriptor.json", args->index_path);
index_descriptor_t desc = read_index_descriptor(descriptor_path);
IndexCtx.es_url = args->es_url;
LOG_DEBUGF("main.c", "descriptor version %s (%s)", desc.version, desc.type)
execute_update_script(args->script, desc.uuid);
free(args->script);
} }
void sist2_web(web_args_t *args) { void sist2_web(web_args_t *args) {
@@ -283,6 +350,7 @@ void sist2_web(web_args_t *args) {
WebCtx.auth_user = args->auth_user; WebCtx.auth_user = args->auth_user;
WebCtx.auth_pass = args->auth_pass; WebCtx.auth_pass = args->auth_pass;
WebCtx.auth_enabled = args->auth_enabled; WebCtx.auth_enabled = args->auth_enabled;
WebCtx.tag_auth_enabled = args->tag_auth_enabled;
for (int i = 0; i < args->index_count; i++) { for (int i = 0; i < args->index_count; i++) {
char *abs_path = abspath(args->indices[i]); char *abs_path = abspath(args->indices[i]);
@@ -292,7 +360,11 @@ void sist2_web(web_args_t *args) {
char path_tmp[PATH_MAX]; char path_tmp[PATH_MAX];
snprintf(path_tmp, PATH_MAX, "%sthumbs", abs_path); snprintf(path_tmp, PATH_MAX, "%sthumbs", abs_path);
WebCtx.indices[i].store = store_create(path_tmp); WebCtx.indices[i].store = store_create(path_tmp, STORE_SIZE_TN);
snprintf(path_tmp, PATH_MAX, "%stags", abs_path);
mkdir(path_tmp, S_IWUSR | S_IRUSR | S_IXUSR);
WebCtx.indices[i].tag_store = store_create(path_tmp, STORE_SIZE_TAG);
snprintf(path_tmp, PATH_MAX, "%sdescriptor.json", abs_path); snprintf(path_tmp, PATH_MAX, "%sdescriptor.json", abs_path);
WebCtx.indices[i].desc = read_index_descriptor(path_tmp); WebCtx.indices[i].desc = read_index_descriptor(path_tmp);
@@ -307,14 +379,18 @@ void sist2_web(web_args_t *args) {
int main(int argc, const char *argv[]) { int main(int argc, const char *argv[]) {
setlocale(LC_ALL, "");
scan_args_t *scan_args = scan_args_create(); scan_args_t *scan_args = scan_args_create();
index_args_t *index_args = index_args_create(); index_args_t *index_args = index_args_create();
web_args_t *web_args = web_args_create(); web_args_t *web_args = web_args_create();
exec_args_t *exec_args = exec_args_create();
int arg_version = 0; int arg_version = 0;
char *common_es_url = NULL; char *common_es_url = NULL;
char *common_script_path = NULL;
int common_threads = 0;
struct argparse_option options[] = { struct argparse_option options[] = {
OPT_HELP(), OPT_HELP(),
@@ -324,7 +400,7 @@ int main(int argc, const char *argv[]) {
OPT_BOOLEAN(0, "very-verbose", &LogCtx.very_verbose, "Turn on debug messages"), OPT_BOOLEAN(0, "very-verbose", &LogCtx.very_verbose, "Turn on debug messages"),
OPT_GROUP("Scan options"), OPT_GROUP("Scan options"),
OPT_INTEGER('t', "threads", &scan_args->threads, "Number of threads. DEFAULT=1"), OPT_INTEGER('t', "threads", &common_threads, "Number of threads. DEFAULT=1"),
OPT_FLOAT('q', "quality", &scan_args->quality, OPT_FLOAT('q', "quality", &scan_args->quality,
"Thumbnail quality, on a scale of 1.0 to 31.0, 1.0 being the best. DEFAULT=5"), "Thumbnail quality, on a scale of 1.0 to 31.0, 1.0 being the best. DEFAULT=5"),
OPT_INTEGER(0, "size", &scan_args->size, OPT_INTEGER(0, "size", &scan_args->size,
@@ -345,11 +421,17 @@ int main(int argc, const char *argv[]) {
"which are installed on your machine)"), "which are installed on your machine)"),
OPT_STRING('e', "exclude", &scan_args->exclude_regex, "Files that match this regex will not be scanned"), OPT_STRING('e', "exclude", &scan_args->exclude_regex, "Files that match this regex will not be scanned"),
OPT_BOOLEAN(0, "fast", &scan_args->fast, "Only index file names & mime type"), OPT_BOOLEAN(0, "fast", &scan_args->fast, "Only index file names & mime type"),
OPT_STRING(0, "treemap-threshold", &scan_args->treemap_threshold_str, "Relative size threshold for treemap "
"(see USAGE.md). DEFAULT: 0.0005"),
OPT_INTEGER(0, "mem-buffer", &scan_args->max_memory_buffer,
"Maximum memory buffer size per thread in MB for files inside archives "
"(see USAGE.md). DEFAULT: 2000"),
OPT_GROUP("Index options"), OPT_GROUP("Index options"),
OPT_INTEGER('t', "threads", &common_threads, "Number of threads. DEFAULT=1"),
OPT_STRING(0, "es-url", &common_es_url, "Elasticsearch url with port. DEFAULT=http://localhost:9200"), OPT_STRING(0, "es-url", &common_es_url, "Elasticsearch url with port. DEFAULT=http://localhost:9200"),
OPT_BOOLEAN('p', "print", &index_args->print, "Just print JSON documents to stdout."), OPT_BOOLEAN('p', "print", &index_args->print, "Just print JSON documents to stdout."),
OPT_STRING(0, "script-file", &index_args->script_path, "Path to user script."), OPT_STRING(0, "script-file", &common_script_path, "Path to user script."),
OPT_INTEGER(0, "batch-size", &index_args->batch_size, "Index batch size. DEFAULT: 100"), OPT_INTEGER(0, "batch-size", &index_args->batch_size, "Index batch size. DEFAULT: 100"),
OPT_BOOLEAN('f', "force-reset", &index_args->force_reset, "Reset Elasticsearch mappings and settings. " OPT_BOOLEAN('f', "force-reset", &index_args->force_reset, "Reset Elasticsearch mappings and settings. "
"(You must use this option the first time you use the index command)"), "(You must use this option the first time you use the index command)"),
@@ -358,6 +440,10 @@ int main(int argc, const char *argv[]) {
OPT_STRING(0, "es-url", &common_es_url, "Elasticsearch url. DEFAULT=http://localhost:9200"), OPT_STRING(0, "es-url", &common_es_url, "Elasticsearch url. DEFAULT=http://localhost:9200"),
OPT_STRING(0, "bind", &web_args->listen_address, "Listen on this address. DEFAULT=localhost:4090"), OPT_STRING(0, "bind", &web_args->listen_address, "Listen on this address. DEFAULT=localhost:4090"),
OPT_STRING(0, "auth", &web_args->credentials, "Basic auth in user:password format"), OPT_STRING(0, "auth", &web_args->credentials, "Basic auth in user:password format"),
OPT_STRING(0, "tag-auth", &web_args->tag_credentials, "Basic auth in user:password format for tagging"),
OPT_GROUP("Exec-script options"),
OPT_STRING(0, "script-file", &common_script_path, "Path to user script."),
OPT_END(), OPT_END(),
}; };
@@ -378,6 +464,11 @@ int main(int argc, const char *argv[]) {
web_args->es_url = common_es_url; web_args->es_url = common_es_url;
index_args->es_url = common_es_url; index_args->es_url = common_es_url;
exec_args->es_url = common_es_url;
index_args->script_path = common_script_path;
exec_args->script_path = common_script_path;
index_args->threads = common_threads;
scan_args->threads = common_threads;
if (argc == 0) { if (argc == 0) {
argparse_usage(&argparse); argparse_usage(&argparse);
@@ -406,6 +497,14 @@ int main(int argc, const char *argv[]) {
} }
sist2_web(web_args); sist2_web(web_args);
} else if (strcmp(argv[0], "exec-script") == 0) {
int err = exec_args_validate(exec_args, argc, argv);
if (err != 0) {
goto end;
}
sist2_exec_script(exec_args);
} else { } else {
fprintf(stderr, "Invalid command: '%s'\n", argv[0]); fprintf(stderr, "Invalid command: '%s'\n", argv[0]);
argparse_usage(&argparse); argparse_usage(&argparse);
@@ -417,6 +516,7 @@ int main(int argc, const char *argv[]) {
scan_args_destroy(scan_args); scan_args_destroy(scan_args);
index_args_destroy(index_args); index_args_destroy(index_args);
web_args_destroy(web_args); web_args_destroy(web_args);
exec_args_destroy(exec_args);
return 0; return 0;
} }

View File

@@ -3,7 +3,7 @@
#include "../sist.h" #include "../sist.h"
#define MAJOR_MIME(mime_id) (mime_id & 0x0FFF0000) >> 16 #define MAJOR_MIME(mime_id) (mime_id & 0x000F0000) >> 16
#define MIME_EMPTY 1 #define MIME_EMPTY 1
@@ -28,6 +28,12 @@
#define MOBI_MASK 0x02000000 #define MOBI_MASK 0x02000000
#define IS_MOBI(mime_id) (mime_id & MOBI_MASK) == MOBI_MASK #define IS_MOBI(mime_id) (mime_id & MOBI_MASK) == MOBI_MASK
#define MARKUP_MASK 0x01000000
#define IS_MARKUP(mime_id) (mime_id & MARKUP_MASK) == MARKUP_MASK
#define RAW_MASK 0x00800000
#define IS_RAW(mime_id) (mime_id & RAW_MASK) == RAW_MASK
enum major_mime { enum major_mime {
MimeInvalid = 0, MimeInvalid = 0,
MimeModel = 1, MimeModel = 1,

View File

@@ -63,377 +63,397 @@ enum mime {
application_vnd_ms_excel=655415, application_vnd_ms_excel=655415,
application_vnd_ms_fontobject=655416, application_vnd_ms_fontobject=655416,
application_vnd_ms_opentype=655417 | 0x20000000, application_vnd_ms_opentype=655417 | 0x20000000,
application_vnd_ms_pki_certstore=655418, application_vnd_ms_outlook=655418,
application_vnd_ms_pki_pko=655419, application_vnd_ms_pki_certstore=655419,
application_vnd_ms_pki_seccat=655420, application_vnd_ms_pki_pko=655420,
application_vnd_ms_powerpoint=655421, application_vnd_ms_pki_seccat=655421,
application_vnd_ms_project=655422, application_vnd_ms_powerpoint=655422,
application_vnd_oasis_opendocument_base=655423, application_vnd_ms_project=655423,
application_vnd_oasis_opendocument_formula=655424, application_vnd_oasis_opendocument_base=655424,
application_vnd_oasis_opendocument_graphics=655425, application_vnd_oasis_opendocument_formula=655425,
application_vnd_oasis_opendocument_presentation=655426, application_vnd_oasis_opendocument_graphics=655426,
application_vnd_oasis_opendocument_spreadsheet=655427, application_vnd_oasis_opendocument_presentation=655427,
application_vnd_oasis_opendocument_text=655428, application_vnd_oasis_opendocument_spreadsheet=655428,
application_vnd_openxmlformats_officedocument_presentationml_presentation=655429 | 0x04000000, application_vnd_oasis_opendocument_text=655429,
application_vnd_openxmlformats_officedocument_spreadsheetml_sheet=655430 | 0x04000000, application_vnd_openxmlformats_officedocument_presentationml_presentation=655430 | 0x04000000,
application_vnd_openxmlformats_officedocument_wordprocessingml_document=655431 | 0x04000000, application_vnd_openxmlformats_officedocument_spreadsheetml_sheet=655431 | 0x04000000,
application_vnd_symbian_install=655432, application_vnd_openxmlformats_officedocument_wordprocessingml_document=655432 | 0x04000000,
application_vnd_tcpdump_pcap=655433, application_vnd_symbian_install=655433,
application_vnd_wap_wmlc=655434, application_vnd_tcpdump_pcap=655434,
application_vnd_wap_wmlscriptc=655435, application_vnd_wap_wmlc=655435,
application_vnd_xara=655436, application_vnd_wap_wmlscriptc=655436,
application_vocaltec_media_desc=655437, application_vnd_xara=655437,
application_vocaltec_media_file=655438, application_vocaltec_media_desc=655438,
application_warc=655439, application_vocaltec_media_file=655439,
application_winhelp=655440, application_warc=655440,
application_wordperfect=655441, application_winhelp=655441,
application_wordperfect6_0=655442, application_wordperfect=655442,
application_wordperfect6_1=655443, application_wordperfect6_0=655443,
application_x_123=655444, application_wordperfect6_1=655444,
application_x_7z_compressed=655445 | 0x10000000, application_x_123=655445,
application_x_aim=655446, application_x_7z_compressed=655446 | 0x10000000,
application_x_apple_diskimage=655447, application_x_aim=655447,
application_x_arc=655448 | 0x10000000, application_x_apple_diskimage=655448,
application_x_archive=655449, application_x_arc=655449 | 0x10000000,
application_x_atari_7800_rom=655450, application_x_archive=655450,
application_x_authorware_bin=655451, application_x_atari_7800_rom=655451,
application_x_authorware_map=655452, application_x_authorware_bin=655452,
application_x_authorware_seg=655453, application_x_authorware_map=655453,
application_x_avira_qua=655454, application_x_authorware_seg=655454,
application_x_bcpio=655455, application_x_avira_qua=655455,
application_x_bittorrent=655456, application_x_bcpio=655456,
application_x_bsh=655457, application_x_bittorrent=655457,
application_x_bytecode_python=655458, application_x_bsh=655458,
application_x_bzip=655459, application_x_bytecode_python=655459,
application_x_bzip2=655460 | 0x08000000, application_x_bzip=655460,
application_x_cbr=655461, application_x_bzip2=655461 | 0x08000000,
application_x_cbz=655462 | 0x40000000, application_x_cbr=655462,
application_x_cdlink=655463, application_x_cbz=655463,
application_x_chat=655464, application_x_cdlink=655464,
application_x_chrome_extension=655465, application_x_chat=655465,
application_x_cocoa=655466, application_x_chrome_extension=655466,
application_x_conference=655467, application_x_cocoa=655467,
application_x_coredump=655468, application_x_conference=655468,
application_x_cpio=655469, application_x_coredump=655469,
application_x_dbf=655470, application_x_cpio=655470,
application_x_dbt=655471, application_x_dbf=655471,
application_x_debian_package=655472, application_x_dbt=655472,
application_x_deepv=655473, application_x_debian_package=655473,
application_x_director=655474, application_x_deepv=655474,
application_x_dmp=655475, application_x_director=655475,
application_x_dosdriver=655476, application_x_dmp=655476,
application_x_dosexec=655477, application_x_dosdriver=655477,
application_x_dvi=655478, application_x_dosexec=655478,
application_x_elc=655479, application_x_dvi=655479,
application_x_elc=655480,
application_x_empty=1, application_x_empty=1,
application_x_envoy=655481, application_x_envoy=655482,
application_x_esrehber=655482, application_x_esrehber=655483,
application_x_excel=655483, application_x_excel=655484,
application_x_executable=655484, application_x_executable=655485,
application_x_font_gdos=655485, application_x_font_gdos=655486,
application_x_font_pf2=655486, application_x_font_pf2=655487,
application_x_font_pfm=655487, application_x_font_pfm=655488,
application_x_font_sfn=655488, application_x_font_sfn=655489,
application_x_font_ttf=655489 | 0x20000000, application_x_font_ttf=655490 | 0x20000000,
application_x_fptapplication_x_dbt=655490, application_x_fptapplication_x_dbt=655491,
application_x_freelance=655491, application_x_freelance=655492,
application_x_gamecube_rom=655492, application_x_gamecube_rom=655493,
application_x_gdbm=655493, application_x_gdbm=655494,
application_x_gettext_translation=655494, application_x_gettext_translation=655495,
application_x_git=655495, application_x_git=655496,
application_x_gsp=655496, application_x_gsp=655497,
application_x_gss=655497, application_x_gss=655498,
application_x_gtar=655498, application_x_gtar=655499,
application_x_gzip=655499, application_x_gzip=655500,
application_x_hdf=655500, application_x_hdf=655501,
application_x_helpfile=655501, application_x_helpfile=655502,
application_x_httpd_imap=655502, application_x_httpd_imap=655503,
application_x_ima=655503, application_x_ima=655504,
application_x_innosetup=655504, application_x_innosetup=655505,
application_x_internett_signup=655505, application_x_internett_signup=655506,
application_x_inventor=655506, application_x_inventor=655507,
application_x_ip2=655507, application_x_ip2=655508,
application_x_java_applet=655508, application_x_java_applet=655509,
application_x_java_commerce=655509, application_x_java_commerce=655510,
application_x_java_image=655510, application_x_java_image=655511,
application_x_java_jmod=655511, application_x_java_jmod=655512,
application_x_java_keystore=655512, application_x_java_keystore=655513,
application_x_kdelnk=655513, application_x_kdelnk=655514,
application_x_koan=655514, application_x_koan=655515,
application_x_latex=655515, application_x_latex=655516,
application_x_livescreen=655516, application_x_livescreen=655517,
application_x_lotus=655517, application_x_lotus=655518,
application_x_lz4=655518 | 0x08000000, application_x_lz4=655519 | 0x08000000,
application_x_lz4_json=655519, application_x_lz4_json=655520,
application_x_lzh=655520, application_x_lzh=655521,
application_x_lzh_compressed=655521, application_x_lzh_compressed=655522,
application_x_lzip=655522 | 0x08000000, application_x_lzip=655523 | 0x08000000,
application_x_lzma=655523 | 0x08000000, application_x_lzma=655524 | 0x08000000,
application_x_lzop=655524 | 0x08000000, application_x_lzop=655525 | 0x08000000,
application_x_lzx=655525, application_x_lzx=655526,
application_x_mach_binary=655526, application_x_mach_binary=655527,
application_x_mach_executable=655527, application_x_mach_executable=655528,
application_x_magic_cap_package_1_0=655528, application_x_magic_cap_package_1_0=655529,
application_x_mathcad=655529, application_x_mathcad=655530,
application_x_maxis_dbpf=655530, application_x_maxis_dbpf=655531,
application_x_meme=655531, application_x_meme=655532,
application_x_midi=655532, application_x_midi=655533,
application_x_mif=655533, application_x_mif=655534,
application_x_mix_transfer=655534, application_x_mix_transfer=655535,
application_x_mobipocket_ebook=655535 | 0x02000000, application_x_mobipocket_ebook=655536 | 0x02000000,
application_x_ms_compress_szdd=655536, application_x_ms_compress_szdd=655537,
application_x_ms_pdb=655537, application_x_ms_pdb=655538,
application_x_ms_reader=655538, application_x_ms_reader=655539,
application_x_msaccess=655539, application_x_msaccess=655540,
application_x_n64_rom=655540, application_x_n64_rom=655541,
application_x_navi_animation=655541, application_x_navi_animation=655542,
application_x_navidoc=655542, application_x_navidoc=655543,
application_x_navimap=655543, application_x_navimap=655544,
application_x_navistyle=655544, application_x_navistyle=655545,
application_x_nes_rom=655545, application_x_nes_rom=655546,
application_x_netcdf=655546, application_x_netcdf=655547,
application_x_newton_compatible_pkg=655547, application_x_newton_compatible_pkg=655548,
application_x_nintendo_ds_rom=655548, application_x_nintendo_ds_rom=655549,
application_x_object=655549, application_x_object=655550,
application_x_omc=655550, application_x_omc=655551,
application_x_omcdatamaker=655551, application_x_omcdatamaker=655552,
application_x_omcregerator=655552, application_x_omcregerator=655553,
application_x_pagemaker=655553, application_x_pagemaker=655554,
application_x_pcl=655554, application_x_pcl=655555,
application_x_pgp_keyring=655555, application_x_pgp_keyring=655556,
application_x_pixclscript=655556, application_x_pixclscript=655557,
application_x_pkcs7_certreqresp=655557, application_x_pkcs7_certreqresp=655558,
application_x_pkcs7_signature=655558, application_x_pkcs7_signature=655559,
application_x_project=655559, application_x_project=655560,
application_x_qpro=655560, application_x_qpro=655561,
application_x_rar=655561 | 0x10000000, application_x_rar=655562 | 0x10000000,
application_x_rpm=655562, application_x_rpm=655563,
application_x_sdp=655563, application_x_sdp=655564,
application_x_sea=655564, application_x_sea=655565,
application_x_seelogo=655565, application_x_seelogo=655566,
application_x_setupscript=655566, application_x_setupscript=655567,
application_x_shar=655567, application_x_shar=655568,
application_x_sharedlib=655568, application_x_sharedlib=655569,
application_x_shockwave_flash=655569, application_x_shockwave_flash=655570,
application_x_snappy_framed=655570, application_x_snappy_framed=655571,
application_x_sprite=655571, application_x_sprite=655572,
application_x_sqlite3=655572, application_x_sqlite3=655573,
application_x_stargallery_thm=655573, application_x_stargallery_thm=655574,
application_x_stuffit=655574, application_x_stuffit=655575,
application_x_sv4cpio=655575, application_x_sv4cpio=655576,
application_x_sv4crc=655576, application_x_sv4crc=655577,
application_x_tar=655577 | 0x10000000, application_x_tar=655578 | 0x10000000,
application_x_tbook=655578, application_x_tbook=655579,
application_x_terminfo=655579, application_x_terminfo=655580,
application_x_terminfo2=655580, application_x_terminfo2=655581,
application_x_tex_tfm=655581, application_x_tex_tfm=655582,
application_x_texinfo=655582, application_x_texinfo=655583,
application_x_ustar=655583, application_x_ustar=655584,
application_x_visio=655584, application_x_visio=655585,
application_x_vnd_audioexplosion_mzz=655585, application_x_vnd_audioexplosion_mzz=655586,
application_x_vnd_ls_xpix=655586, application_x_vnd_ls_xpix=655587,
application_x_vrml=655587, application_x_vrml=655588,
application_x_wais_source=655588, application_x_wais_source=655589,
application_x_wine_extension_ini=655589, application_x_wine_extension_ini=655590,
application_x_wintalk=655590, application_x_wintalk=655591,
application_x_world=655591, application_x_world=655592,
application_x_wri=655592, application_x_wri=655593,
application_x_x509_ca_cert=655593, application_x_x509_ca_cert=655594,
application_x_xz=655594 | 0x08000000, application_x_xz=655595 | 0x08000000,
application_x_zip=655595, application_x_zip=655596,
application_x_zstd=655596 | 0x08000000, application_x_zstd=655597 | 0x08000000,
application_xml=655597, application_x_zstd_dictionary=655598,
application_zip=655598 | 0x10000000, application_xml=655599,
application_zlib=655599, application_zip=655600 | 0x10000000,
audio_basic=458992 | 0x80000000, application_zlib=655601,
audio_it=458993, audio_basic=458994 | 0x80000000,
audio_make=458994, audio_it=458995,
audio_mid=458995, audio_make=458996,
audio_midi=458996, audio_mid=458997,
audio_mp4=458997, audio_midi=458998,
audio_mpeg=458998, audio_mp4=458999,
audio_ogg=458999, audio_mpeg=459000,
audio_s3m=459000, audio_ogg=459001,
audio_tsp_audio=459001, audio_s3m=459002,
audio_tsplayer=459002, audio_tsp_audio=459003,
audio_vnd_qcelp=459003, audio_tsplayer=459004,
audio_voxware=459004, audio_vnd_qcelp=459005,
audio_x_aiff=459005, audio_voxware=459006,
audio_x_flac=459006, audio_x_aiff=459007,
audio_x_gsm=459007, audio_x_flac=459008,
audio_x_hx_aac_adts=459008, audio_x_gsm=459009,
audio_x_jam=459009, audio_x_hx_aac_adts=459010,
audio_x_liveaudio=459010, audio_x_jam=459011,
audio_x_m4a=459011, audio_x_liveaudio=459012,
audio_x_midi=459012, audio_x_m4a=459013,
audio_x_mod=459013, audio_x_midi=459014,
audio_x_mp4a_latm=459014, audio_x_mod=459015,
audio_x_mpeg_3=459015, audio_x_mp4a_latm=459016,
audio_x_mpequrl=459016, audio_x_mpeg_3=459017,
audio_x_nspaudio=459017, audio_x_mpequrl=459018,
audio_x_pn_realaudio=459018, audio_x_nspaudio=459019,
audio_x_psid=459019, audio_x_pn_realaudio=459020,
audio_x_realaudio=459020, audio_x_psid=459021,
audio_x_s3m=459021, audio_x_realaudio=459022,
audio_x_twinvq=459022, audio_x_s3m=459023,
audio_x_twinvq_plugin=459023, audio_x_twinvq=459024,
audio_x_voc=459024, audio_x_twinvq_plugin=459025,
audio_x_wav=459025, audio_x_voc=459026,
audio_x_xbox_executable=459026 | 0x80000000, audio_x_wav=459027,
audio_x_xbox360_executable=459027 | 0x80000000, audio_x_xbox_executable=459028 | 0x80000000,
audio_xm=459028, audio_x_xbox360_executable=459029 | 0x80000000,
font_otf=327957 | 0x20000000, audio_xm=459030,
font_sfnt=327958 | 0x20000000, font_otf=327959 | 0x20000000,
font_woff=327959 | 0x20000000, font_sfnt=327960 | 0x20000000,
font_woff2=327960 | 0x20000000, font_woff=327961 | 0x20000000,
image_bmp=524569, font_woff2=327962 | 0x20000000,
image_cmu_raster=524570, image_bmp=524571,
image_fif=524571, image_cmu_raster=524572,
image_florian=524572, image_fif=524573,
image_g3fax=524573, image_florian=524574,
image_gif=524574, image_g3fax=524575,
image_heic=524575, image_gif=524576,
image_ief=524576, image_heic=524577,
image_jpeg=524577, image_ief=524578,
image_jutvision=524578, image_jpeg=524579,
image_naplps=524579, image_jutvision=524580,
image_pict=524580, image_naplps=524581,
image_png=524581, image_pict=524582,
image_svg=524582 | 0x80000000, image_png=524583,
image_svg_xml=524583 | 0x80000000, image_svg=524584 | 0x80000000,
image_tiff=524584, image_svg_xml=524585 | 0x80000000,
image_vnd_adobe_photoshop=524585 | 0x80000000, image_tiff=524586,
image_vnd_djvu=524586 | 0x80000000, image_vnd_adobe_photoshop=524587 | 0x80000000,
image_vnd_fpx=524587, image_vnd_djvu=524588 | 0x80000000,
image_vnd_microsoft_icon=524588, image_vnd_fpx=524589,
image_vnd_rn_realflash=524589, image_vnd_microsoft_icon=524590,
image_vnd_rn_realpix=524590, image_vnd_rn_realflash=524591,
image_vnd_wap_wbmp=524591, image_vnd_rn_realpix=524592,
image_vnd_xiff=524592, image_vnd_wap_wbmp=524593,
image_webp=524593, image_vnd_xiff=524594,
image_wmf=524594, image_webp=524595,
image_x_3ds=524595, image_wmf=524596,
image_x_award_bioslogo=524596, image_x_3ds=524597,
image_x_cmu_raster=524597, image_x_adobe_dng=524598 | 0x00800000,
image_x_cur=524598, image_x_award_bioslogo=524599,
image_x_dwg=524599, image_x_canon_cr2=524600 | 0x00800000,
image_x_eps=524600, image_x_canon_crw=524601 | 0x00800000,
image_x_exr=524601, image_x_cmu_raster=524602,
image_x_gem=524602, image_x_cur=524603,
image_x_icns=524603, image_x_dcraw=524604 | 0x00800000,
image_x_icon=524604 | 0x80000000, image_x_dwg=524605,
image_x_jg=524605, image_x_eps=524606,
image_x_jps=524606, image_x_epson_erf=524607 | 0x00800000,
image_x_ms_bmp=524607, image_x_exr=524608,
image_x_niff=524608, image_x_fuji_raf=524609 | 0x00800000,
image_x_pcx=524609, image_x_gem=524610,
image_x_pict=524610, image_x_icns=524611,
image_x_portable_bitmap=524611, image_x_icon=524612 | 0x80000000,
image_x_portable_graymap=524612, image_x_jg=524613,
image_x_portable_pixmap=524613, image_x_jps=524614,
image_x_quicktime=524614, image_x_kodak_dcr=524615 | 0x00800000,
image_x_rgb=524615, image_x_kodak_k25=524616 | 0x00800000,
image_x_tga=524616, image_x_kodak_kdc=524617 | 0x00800000,
image_x_tiff=524617, image_x_minolta_mrw=524618 | 0x00800000,
image_x_win_bitmap=524618, image_x_ms_bmp=524619,
image_x_xcf=524619 | 0x80000000, image_x_niff=524620,
image_x_xpixmap=524620 | 0x80000000, image_x_nikon_nef=524621 | 0x00800000,
image_x_xwindowdump=524621, image_x_olympus_orf=524622 | 0x00800000,
message_news=196942, image_x_panasonic_raw=524623 | 0x00800000,
message_rfc822=196943, image_x_pcx=524624,
model_vnd_dwf=65872, image_x_pentax_pef=524625 | 0x00800000,
model_vnd_gdl=65873, image_x_pict=524626,
model_vnd_gs_gdl=65874, image_x_portable_bitmap=524627,
model_vrml=65875, image_x_portable_graymap=524628,
model_x_pov=65876, image_x_portable_pixmap=524629,
text_PGP=590165, image_x_quicktime=524630,
text_asp=590166, image_x_rgb=524631,
text_css=590167, image_x_sigma_x3f=524632 | 0x00800000,
text_html=590168, image_x_sony_arw=524633 | 0x00800000,
text_javascript=590169, image_x_sony_sr2=524634 | 0x00800000,
text_mcf=590170, image_x_sony_srf=524635 | 0x00800000,
text_pascal=590171, image_x_tga=524636,
text_plain=590172, image_x_tiff=524637,
text_richtext=590173, image_x_win_bitmap=524638,
text_rtf=590174, image_x_xcf=524639 | 0x80000000,
text_scriplet=590175, image_x_xpixmap=524640 | 0x80000000,
text_tab_separated_values=590176, image_x_xwindowdump=524641,
text_troff=590177, message_news=196962,
text_uri_list=590178, message_rfc822=196963,
text_vnd_abc=590179, model_vnd_dwf=65892,
text_vnd_fmi_flexstor=590180, model_vnd_gdl=65893,
text_vnd_wap_wml=590181, model_vnd_gs_gdl=65894,
text_vnd_wap_wmlscript=590182, model_vrml=65895,
text_webviewhtml=590183, model_x_pov=65896,
text_x_Algol68=590184, text_PGP=590185,
text_x_asm=590185, text_asp=590186,
text_x_audiosoft_intra=590186, text_css=590187,
text_x_awk=590187, text_html=590188 | 0x01000000,
text_x_bcpl=590188, text_javascript=590189,
text_x_c=590189, text_mcf=590190,
text_x_c__=590190, text_pascal=590191,
text_x_component=590191, text_plain=590192,
text_x_diff=590192, text_richtext=590193,
text_x_fortran=590193, text_rtf=590194,
text_x_java=590194, text_scriplet=590195,
text_x_la_asf=590195, text_tab_separated_values=590196,
text_x_lisp=590196, text_troff=590197,
text_x_m=590197, text_uri_list=590198,
text_x_m4=590198, text_vnd_abc=590199,
text_x_makefile=590199, text_vnd_fmi_flexstor=590200,
text_x_ms_regedit=590200, text_vnd_wap_wml=590201,
text_x_msdos_batch=590201, text_vnd_wap_wmlscript=590202,
text_x_objective_c=590202, text_webviewhtml=590203,
text_x_pascal=590203, text_x_Algol68=590204,
text_x_perl=590204, text_x_asm=590205,
text_x_php=590205, text_x_audiosoft_intra=590206,
text_x_po=590206, text_x_awk=590207,
text_x_python=590207, text_x_bcpl=590208,
text_x_ruby=590208, text_x_c=590209,
text_x_sass=590209, text_x_c__=590210,
text_x_scss=590210, text_x_component=590211,
text_x_server_parsed_html=590211, text_x_diff=590212,
text_x_setext=590212, text_x_fortran=590213,
text_x_sgml=590213, text_x_java=590214,
text_x_shellscript=590214, text_x_la_asf=590215,
text_x_speech=590215, text_x_lisp=590216,
text_x_tcl=590216, text_x_m=590217,
text_x_tex=590217, text_x_m4=590218,
text_x_uil=590218, text_x_makefile=590219,
text_x_uuencode=590219, text_x_ms_regedit=590220,
text_x_vcalendar=590220, text_x_msdos_batch=590221,
text_x_vcard=590221, text_x_objective_c=590222,
text_xml=590222, text_x_pascal=590223,
video_MP2T=393615, text_x_perl=590224,
video_animaflex=393616, text_x_php=590225,
video_avi=393617, text_x_po=590226,
video_avs_video=393618, text_x_python=590227,
video_mp4=393619, text_x_ruby=590228,
video_mpeg=393620, text_x_sass=590229,
video_quicktime=393621, text_x_scss=590230,
video_vdo=393622, text_x_server_parsed_html=590231,
video_vivo=393623, text_x_setext=590232,
video_vnd_rn_realvideo=393624, text_x_sgml=590233 | 0x01000000,
video_vosaic=393625, text_x_shellscript=590234,
video_webm=393626, text_x_speech=590235,
video_x_amt_demorun=393627, text_x_tcl=590236,
video_x_amt_showrun=393628, text_x_tex=590237,
video_x_atomic3d_feature=393629, text_x_uil=590238,
video_x_dl=393630, text_x_uuencode=590239,
video_x_dv=393631, text_x_vcalendar=590240,
video_x_fli=393632, text_x_vcard=590241,
video_x_flv=393633, text_xml=590242 | 0x01000000,
video_x_isvideo=393634, video_MP2T=393635,
video_x_jng=393635 | 0x80000000, video_animaflex=393636,
video_x_m4v=393636, video_avi=393637,
video_x_matroska=393637, video_avs_video=393638,
video_x_mng=393638, video_mp4=393639,
video_x_motion_jpeg=393639, video_mpeg=393640,
video_x_ms_asf=393640, video_quicktime=393641,
video_x_msvideo=393641, video_vdo=393642,
video_x_qtc=393642, video_vivo=393643,
video_x_sgi_movie=393643, video_vnd_rn_realvideo=393644,
x_epoc_x_sisx_app=721324, video_vosaic=393645,
video_webm=393646,
video_x_amt_demorun=393647,
video_x_amt_showrun=393648,
video_x_atomic3d_feature=393649,
video_x_dl=393650,
video_x_dv=393651,
video_x_fli=393652,
video_x_flv=393653,
video_x_isvideo=393654,
video_x_jng=393655 | 0x80000000,
video_x_m4v=393656,
video_x_matroska=393657,
video_x_mng=393658,
video_x_motion_jpeg=393659,
video_x_ms_asf=393660,
video_x_msvideo=393661,
video_x_qtc=393662,
video_x_sgi_movie=393663,
x_epoc_x_sisx_app=721344,
}; };
char *mime_get_mime_text(unsigned int mime_id) {switch (mime_id) { char *mime_get_mime_text(unsigned int mime_id) {switch (mime_id) {
case application_arj: return "application/arj"; case application_arj: return "application/arj";
@@ -864,6 +884,26 @@ case video_x_msvideo: return "video/x-msvideo";
case video_x_qtc: return "video/x-qtc"; case video_x_qtc: return "video/x-qtc";
case video_x_sgi_movie: return "video/x-sgi-movie"; case video_x_sgi_movie: return "video/x-sgi-movie";
case x_epoc_x_sisx_app: return "x-epoc/x-sisx-app"; case x_epoc_x_sisx_app: return "x-epoc/x-sisx-app";
case application_x_zstd_dictionary: return "application/x-zstd-dictionary";
case application_vnd_ms_outlook: return "application/vnd.ms-outlook";
case image_x_olympus_orf: return "image/x-olympus-orf";
case image_x_nikon_nef: return "image/x-nikon-nef";
case image_x_fuji_raf: return "image/x-fuji-raf";
case image_x_panasonic_raw: return "image/x-panasonic-raw";
case image_x_adobe_dng: return "image/x-adobe-dng";
case image_x_canon_cr2: return "image/x-canon-cr2";
case image_x_canon_crw: return "image/x-canon-crw";
case image_x_dcraw: return "image/x-dcraw";
case image_x_kodak_dcr: return "image/x-kodak-dcr";
case image_x_kodak_k25: return "image/x-kodak-k25";
case image_x_kodak_kdc: return "image/x-kodak-kdc";
case image_x_minolta_mrw: return "image/x-minolta-mrw";
case image_x_pentax_pef: return "image/x-pentax-pef";
case image_x_sigma_x3f: return "image/x-sigma-x3f";
case image_x_sony_arw: return "image/x-sony-arw";
case image_x_sony_sr2: return "image/x-sony-sr2";
case image_x_sony_srf: return "image/x-sony-srf";
case image_x_epson_erf: return "image/x-epson-erf";
default: return NULL;}} default: return NULL;}}
GHashTable *mime_get_ext_table() {GHashTable *ext_table = g_hash_table_new(g_str_hash, g_str_equal); GHashTable *mime_get_ext_table() {GHashTable *ext_table = g_hash_table_new(g_str_hash, g_str_equal);
g_hash_table_insert(ext_table, "arj", (gpointer)application_arj); g_hash_table_insert(ext_table, "arj", (gpointer)application_arj);
@@ -881,6 +921,7 @@ g_hash_table_insert(ext_table, "frl", (gpointer)application_freeloader);
g_hash_table_insert(ext_table, "spl", (gpointer)application_futuresplash); g_hash_table_insert(ext_table, "spl", (gpointer)application_futuresplash);
g_hash_table_insert(ext_table, "vew", (gpointer)application_groupwise); g_hash_table_insert(ext_table, "vew", (gpointer)application_groupwise);
g_hash_table_insert(ext_table, "gz", (gpointer)application_gzip); g_hash_table_insert(ext_table, "gz", (gpointer)application_gzip);
g_hash_table_insert(ext_table, "tgz", (gpointer)application_gzip);
g_hash_table_insert(ext_table, "hta", (gpointer)application_hta); g_hash_table_insert(ext_table, "hta", (gpointer)application_hta);
g_hash_table_insert(ext_table, "unv", (gpointer)application_i_deas); g_hash_table_insert(ext_table, "unv", (gpointer)application_i_deas);
g_hash_table_insert(ext_table, "iges", (gpointer)application_iges); g_hash_table_insert(ext_table, "iges", (gpointer)application_iges);
@@ -995,7 +1036,6 @@ g_hash_table_insert(ext_table, "cpio", (gpointer)application_x_cpio);
g_hash_table_insert(ext_table, "dbf", (gpointer)application_x_dbf); g_hash_table_insert(ext_table, "dbf", (gpointer)application_x_dbf);
g_hash_table_insert(ext_table, "deb", (gpointer)application_x_debian_package); g_hash_table_insert(ext_table, "deb", (gpointer)application_x_debian_package);
g_hash_table_insert(ext_table, "deepv", (gpointer)application_x_deepv); g_hash_table_insert(ext_table, "deepv", (gpointer)application_x_deepv);
g_hash_table_insert(ext_table, "dcr", (gpointer)application_x_director);
g_hash_table_insert(ext_table, "dir", (gpointer)application_x_director); g_hash_table_insert(ext_table, "dir", (gpointer)application_x_director);
g_hash_table_insert(ext_table, "dxr", (gpointer)application_x_director); g_hash_table_insert(ext_table, "dxr", (gpointer)application_x_director);
g_hash_table_insert(ext_table, "dmp", (gpointer)application_x_dmp); g_hash_table_insert(ext_table, "dmp", (gpointer)application_x_dmp);
@@ -1383,6 +1423,25 @@ g_hash_table_insert(ext_table, "divx", (gpointer)video_x_msvideo);
g_hash_table_insert(ext_table, "qtc", (gpointer)video_x_qtc); g_hash_table_insert(ext_table, "qtc", (gpointer)video_x_qtc);
g_hash_table_insert(ext_table, "movie", (gpointer)video_x_sgi_movie); g_hash_table_insert(ext_table, "movie", (gpointer)video_x_sgi_movie);
g_hash_table_insert(ext_table, "mv", (gpointer)video_x_sgi_movie); g_hash_table_insert(ext_table, "mv", (gpointer)video_x_sgi_movie);
g_hash_table_insert(ext_table, "msg", (gpointer)application_vnd_ms_outlook);
g_hash_table_insert(ext_table, "orf", (gpointer)image_x_olympus_orf);
g_hash_table_insert(ext_table, "nef", (gpointer)image_x_nikon_nef);
g_hash_table_insert(ext_table, "raf", (gpointer)image_x_fuji_raf);
g_hash_table_insert(ext_table, "rw2", (gpointer)image_x_panasonic_raw);
g_hash_table_insert(ext_table, "raw", (gpointer)image_x_panasonic_raw);
g_hash_table_insert(ext_table, "dng", (gpointer)image_x_adobe_dng);
g_hash_table_insert(ext_table, "cr2", (gpointer)image_x_canon_cr2);
g_hash_table_insert(ext_table, "crw", (gpointer)image_x_canon_crw);
g_hash_table_insert(ext_table, "dcr", (gpointer)image_x_kodak_dcr);
g_hash_table_insert(ext_table, "k25", (gpointer)image_x_kodak_k25);
g_hash_table_insert(ext_table, "kdc", (gpointer)image_x_kodak_kdc);
g_hash_table_insert(ext_table, "mrw", (gpointer)image_x_minolta_mrw);
g_hash_table_insert(ext_table, "pef", (gpointer)image_x_pentax_pef);
g_hash_table_insert(ext_table, "xf3", (gpointer)image_x_sigma_x3f);
g_hash_table_insert(ext_table, "arw", (gpointer)image_x_sony_arw);
g_hash_table_insert(ext_table, "sr2", (gpointer)image_x_sony_sr2);
g_hash_table_insert(ext_table, "srf", (gpointer)image_x_sony_srf);
g_hash_table_insert(ext_table, "erf", (gpointer)image_x_epson_erf);
return ext_table;} return ext_table;}
GHashTable *mime_get_mime_table() {GHashTable *mime_table = g_hash_table_new(g_str_hash, g_str_equal); GHashTable *mime_get_mime_table() {GHashTable *mime_table = g_hash_table_new(g_str_hash, g_str_equal);
g_hash_table_insert(mime_table, "application/arj", (gpointer)application_arj); g_hash_table_insert(mime_table, "application/arj", (gpointer)application_arj);
@@ -1813,5 +1872,25 @@ g_hash_table_insert(mime_table, "video/x-msvideo", (gpointer)video_x_msvideo);
g_hash_table_insert(mime_table, "video/x-qtc", (gpointer)video_x_qtc); g_hash_table_insert(mime_table, "video/x-qtc", (gpointer)video_x_qtc);
g_hash_table_insert(mime_table, "video/x-sgi-movie", (gpointer)video_x_sgi_movie); g_hash_table_insert(mime_table, "video/x-sgi-movie", (gpointer)video_x_sgi_movie);
g_hash_table_insert(mime_table, "x-epoc/x-sisx-app", (gpointer)x_epoc_x_sisx_app); g_hash_table_insert(mime_table, "x-epoc/x-sisx-app", (gpointer)x_epoc_x_sisx_app);
g_hash_table_insert(mime_table, "application/x-zstd-dictionary", (gpointer)application_x_zstd_dictionary);
g_hash_table_insert(mime_table, "application/vnd.ms-outlook", (gpointer)application_vnd_ms_outlook);
g_hash_table_insert(mime_table, "image/x-olympus-orf", (gpointer)image_x_olympus_orf);
g_hash_table_insert(mime_table, "image/x-nikon-nef", (gpointer)image_x_nikon_nef);
g_hash_table_insert(mime_table, "image/x-fuji-raf", (gpointer)image_x_fuji_raf);
g_hash_table_insert(mime_table, "image/x-panasonic-raw", (gpointer)image_x_panasonic_raw);
g_hash_table_insert(mime_table, "image/x-adobe-dng", (gpointer)image_x_adobe_dng);
g_hash_table_insert(mime_table, "image/x-canon-cr2", (gpointer)image_x_canon_cr2);
g_hash_table_insert(mime_table, "image/x-canon-crw", (gpointer)image_x_canon_crw);
g_hash_table_insert(mime_table, "image/x-dcraw", (gpointer)image_x_dcraw);
g_hash_table_insert(mime_table, "image/x-kodak-dcr", (gpointer)image_x_kodak_dcr);
g_hash_table_insert(mime_table, "image/x-kodak-k25", (gpointer)image_x_kodak_k25);
g_hash_table_insert(mime_table, "image/x-kodak-kdc", (gpointer)image_x_kodak_kdc);
g_hash_table_insert(mime_table, "image/x-minolta-mrw", (gpointer)image_x_minolta_mrw);
g_hash_table_insert(mime_table, "image/x-pentax-pef", (gpointer)image_x_pentax_pef);
g_hash_table_insert(mime_table, "image/x-sigma-x3f", (gpointer)image_x_sigma_x3f);
g_hash_table_insert(mime_table, "image/x-sony-arw", (gpointer)image_x_sony_arw);
g_hash_table_insert(mime_table, "image/x-sony-sr2", (gpointer)image_x_sony_sr2);
g_hash_table_insert(mime_table, "image/x-sony-srf", (gpointer)image_x_sony_srf);
g_hash_table_insert(mime_table, "image/x-epson-erf", (gpointer)image_x_epson_erf);
return mime_table;} return mime_table;}
#endif #endif

View File

@@ -49,7 +49,6 @@ void parse(void *arg) {
return; return;
} }
doc.filepath = job->filepath; doc.filepath = job->filepath;
doc.ext = (short) job->ext; doc.ext = (short) job->ext;
doc.base = (short) job->base; doc.base = (short) job->base;
@@ -61,7 +60,7 @@ void parse(void *arg) {
doc.mtime = job->vfile.info.st_mtim.tv_sec; doc.mtime = job->vfile.info.st_mtim.tv_sec;
uuid_generate(doc.uuid); uuid_generate(doc.uuid);
char *buf[PARSE_BUF_SIZE]; char *buf[MAGIC_BUF_SIZE];
if (LogCtx.very_verbose) { if (LogCtx.very_verbose) {
char uuid_str[UUID_STR_LEN]; char uuid_str[UUID_STR_LEN];
@@ -79,7 +78,12 @@ void parse(void *arg) {
if (doc.mime == 0 && !ScanCtx.fast) { if (doc.mime == 0 && !ScanCtx.fast) {
// Get mime type with libmagic // Get mime type with libmagic
bytes_read = job->vfile.read(&job->vfile, buf, PARSE_BUF_SIZE); if (!job->vfile.is_fs_file) {
LOG_WARNING(job->filepath, "Guessing mime type with libmagic inside archive files is not currently supported");
goto abort;
}
bytes_read = job->vfile.read(&job->vfile, buf, MAGIC_BUF_SIZE);
if (bytes_read < 0) { if (bytes_read < 0) {
if (job->vfile.is_fs_file) { if (job->vfile.is_fs_file) {
@@ -115,6 +119,8 @@ void parse(void *arg) {
if (!(SHOULD_PARSE(doc.mime))) { if (!(SHOULD_PARSE(doc.mime))) {
} else if (IS_RAW(doc.mime)) {
parse_raw(&ScanCtx.raw_ctx, &job->vfile, &doc);
} else if ((mmime == MimeVideo && doc.size >= MIN_VIDEO_SIZE) || } else if ((mmime == MimeVideo && doc.size >= MIN_VIDEO_SIZE) ||
(mmime == MimeImage && doc.size >= MIN_IMAGE_SIZE) || mmime == MimeAudio) { (mmime == MimeImage && doc.size >= MIN_IMAGE_SIZE) || mmime == MimeAudio) {
@@ -124,7 +130,11 @@ void parse(void *arg) {
parse_ebook(&ScanCtx.ebook_ctx, &job->vfile, mime_get_mime_text(doc.mime), &doc); parse_ebook(&ScanCtx.ebook_ctx, &job->vfile, mime_get_mime_text(doc.mime), &doc);
} else if (mmime == MimeText && ScanCtx.text_ctx.content_size > 0) { } else if (mmime == MimeText && ScanCtx.text_ctx.content_size > 0) {
parse_text(&ScanCtx.text_ctx, &job->vfile, &doc); if (IS_MARKUP(doc.mime)) {
parse_markup(&ScanCtx.text_ctx, &job->vfile, &doc);
} else {
parse_text(&ScanCtx.text_ctx, &job->vfile, &doc);
}
} else if (IS_FONT(doc.mime)) { } else if (IS_FONT(doc.mime)) {
parse_font(&ScanCtx.font_ctx, &job->vfile, &doc); parse_font(&ScanCtx.font_ctx, &job->vfile, &doc);
@@ -135,23 +145,21 @@ void parse(void *arg) {
(IS_ARC_FILTER(doc.mime) && should_parse_filtered_file(doc.filepath, doc.ext)) (IS_ARC_FILTER(doc.mime) && should_parse_filtered_file(doc.filepath, doc.ext))
)) { )) {
parse_archive(&ScanCtx.arc_ctx, &job->vfile, &doc); parse_archive(&ScanCtx.arc_ctx, &job->vfile, &doc);
} else if (ScanCtx.ooxml_ctx.content_size > 0 && IS_DOC(doc.mime)) { } else if ((ScanCtx.ooxml_ctx.content_size > 0 || ScanCtx.media_ctx.tn_size > 0) && IS_DOC(doc.mime)) {
parse_ooxml(&ScanCtx.ooxml_ctx, &job->vfile, &doc); parse_ooxml(&ScanCtx.ooxml_ctx, &job->vfile, &doc);
} else if (is_cbr(&ScanCtx.comic_ctx, doc.mime) || is_cbz(&ScanCtx.comic_ctx, doc.mime)) {
} else if (is_cbr(&ScanCtx.cbr_ctx, doc.mime)) { parse_comic(&ScanCtx.comic_ctx, &job->vfile, &doc);
parse_cbr(&ScanCtx.cbr_ctx, &job->vfile, &doc);
} else if (IS_MOBI(doc.mime)) { } else if (IS_MOBI(doc.mime)) {
parse_mobi(&ScanCtx.mobi_ctx, &job->vfile, &doc); parse_mobi(&ScanCtx.mobi_ctx, &job->vfile, &doc);
} }
abort:
//Parent meta //Parent meta
if (!uuid_is_null(job->parent)) { if (!uuid_is_null(job->parent)) {
char tmp[UUID_STR_LEN];
uuid_unparse(job->parent, tmp);
meta_line_t *meta_parent = malloc(sizeof(meta_line_t) + UUID_STR_LEN + 1); meta_line_t *meta_parent = malloc(sizeof(meta_line_t) + UUID_STR_LEN + 1);
meta_parent->key = MetaParent; meta_parent->key = MetaParent;
strcpy(meta_parent->str_val, tmp); uuid_unparse(job->parent, meta_parent->str_val);
APPEND_META((&doc), meta_parent) APPEND_META((&doc), meta_parent)
} }

View File

@@ -3,7 +3,7 @@
#include "../sist.h" #include "../sist.h"
#define PARSE_BUF_SIZE 4096 #define MAGIC_BUF_SIZE 4096 * 6
int fs_read(struct vfile *f, void *buf, size_t size); int fs_read(struct vfile *f, void *buf, size_t size);
void fs_close(struct vfile *f); void fs_close(struct vfile *f);

4
src/static/css/autocomplete.min.css vendored Normal file
View File

@@ -0,0 +1,4 @@
.autocomplete-suggestions { text-align: left; cursor: default; border: 1px solid #ccc; border-top: 0; background: #fff; box-shadow: -1px 1px 3px rgba(0,0,0,.1); position: absolute; display: none; z-index: 9999; max-height: 254px; overflow: hidden; overflow-y: auto; box-sizing: border-box; }
.autocomplete-suggestion { position: relative; padding: 0 .6em; line-height: 23px; white-space: nowrap; overflow: hidden; text-overflow: ellipsis; font-size: 1.02em; color: #333; }
.autocomplete-suggestion b { font-weight: normal; color: #1f8dd6; }
.autocomplete-suggestion.selected { background: #f0f0f0; }

File diff suppressed because one or more lines are too long

View File

@@ -121,7 +121,7 @@ body {
background: #546b7a; background: #546b7a;
} }
.navbar a:hover { a:hover,.btn:hover {
color: #fff; color: #fff;
} }
@@ -166,6 +166,12 @@ body {
background-color: #FAAB3C; background-color: #FAAB3C;
} }
.add-tag-button {
cursor: pointer;
color: #212529;
background-color: #e0e0e0;
}
.card-img-overlay { .card-img-overlay {
pointer-events: none; pointer-events: none;
padding: 0.75rem; padding: 0.75rem;
@@ -191,6 +197,18 @@ body {
margin-right: 3px; margin-right: 3px;
} }
.badge-delete {
margin-right: -2px;
margin-left: 2px;
margin-top: -1px;
font-family: monospace;
font-size: 90%;
background: rgba(0,0,0,0.2);
padding: 0.1em 0.4em;
color: white;
cursor: pointer;
}
.badge-user { .badge-user {
color: #212529; color: #212529;
background-color: #e0e0e0; background-color: #e0e0e0;
@@ -266,6 +284,7 @@ mark {
margin: 3px; margin: 3px;
white-space: normal; white-space: normal;
color: rgb(224, 224, 224); color: rgb(224, 224, 224);
overflow: hidden;
} }
.irs-single, .irs-from, .irs-to { .irs-single, .irs-from, .irs-to {
@@ -480,3 +499,42 @@ svg {
cursor: pointer; cursor: pointer;
} }
.stats-card {
text-align: center;
margin-top: 1em;
padding: 1em;
box-shadow: 0 .125rem .25rem rgba(0, 0, 0, .08) !important;
border-radius: 0;
border: none;
background: #212121;
}
.graph {
display: inline-block;
width: 40%;
}
.full-screen {
position: absolute;
left: 0;
width: 100%;
}
.stats-btn {
float: right;
margin-bottom: 10px;
}
#graphs-card svg text {
fill: #eee;
}
.wholerow {
outline: none !important;
}
.stat > .card-body {
padding: 0.7em 1.25em;
}

View File

@@ -106,11 +106,33 @@ body {
background-color: #e0e0e0; background-color: #e0e0e0;
} }
.badge {
margin-right: 3px;
}
.badge-delete {
margin-right: -2px;
margin-left: 2px;
margin-top: -1px;
font-family: monospace;
font-size: 90%;
background: rgba(0,0,0,0.2);
padding: 0.1em 0.4em;
color: white;
cursor: pointer;
}
.badge-text { .badge-text {
color: #FFFFFF; color: #FFFFFF;
background-color: #FAAB3C; background-color: #FAAB3C;
} }
.add-tag-button {
cursor: pointer;
color: #212529;
background-color: #e0e0e0;
}
.card-img-overlay { .card-img-overlay {
pointer-events: none; pointer-events: none;
padding: 0.75rem; padding: 0.75rem;
@@ -131,9 +153,6 @@ body {
overflow: hidden; overflow: hidden;
} }
.badge {
margin-right: 3px;
}
.fit { .fit {
display: block; display: block;
@@ -205,6 +224,7 @@ mark {
margin: 3px; margin: 3px;
white-space: normal; white-space: normal;
color: #000; color: #000;
overflow: hidden;
} }
.irs-single, .irs-from, .irs-to { .irs-single, .irs-from, .irs-to {
@@ -347,3 +367,38 @@ mark {
cursor: pointer; cursor: pointer;
} }
.stats-card {
text-align: center;
margin-top: 1em;
padding: 1em;
box-shadow: 0 .125rem .25rem rgba(0, 0, 0, .08) !important;
border-radius: 0;
border: none;
background: #fff;
}
.graph {
display: inline-block;
width: 40%;
}
.full-screen {
position: absolute;
left: 0;
width: 100%;
}
.stats-btn {
float: right;
margin-bottom: 10px;
}
.wholerow {
outline: none !important;
}
.stat > .card-body {
padding: 0.7em 1.25em;
}

File diff suppressed because one or more lines are too long

3
src/static/js/auto-complete.min.js vendored Normal file

File diff suppressed because one or more lines are too long

File diff suppressed because one or more lines are too long

2
src/static/js/d3.v5.min.js vendored Normal file

File diff suppressed because one or more lines are too long

2
src/static/js/dom-to-image.min.js vendored Normal file

File diff suppressed because one or more lines are too long

View File

@@ -27,18 +27,24 @@ function gifOver(thumbnail, hit) {
} }
function getContentHighlight(hit) { function getContentHighlight(hit) {
const re = RegExp(/<mark>/g);
const sortByMathCount = (a, b) => {
return b.match(re).length - a.match(re).length;
};
if (hit.hasOwnProperty("highlight")) { if (hit.hasOwnProperty("highlight")) {
if (hit["highlight"].hasOwnProperty("content")) { if (hit["highlight"].hasOwnProperty("content")) {
return hit["highlight"]["content"].sort(sortByMathCount)[0]; return hit["highlight"]["content"][0];
} else if (hit["highlight"].hasOwnProperty("content.nGram")) { } else if (hit["highlight"].hasOwnProperty("content.nGram")) {
return hit["highlight"]["content.nGram"].sort(sortByMathCount)[0]; return hit["highlight"]["content.nGram"][0];
}
}
return undefined;
}
function getPathHighlight(hit) {
if (hit.hasOwnProperty("highlight")) {
if (hit["highlight"].hasOwnProperty("path.text")) {
return hit["highlight"]["path.text"][0];
} else if (hit["highlight"].hasOwnProperty("path.nGram")) {
return hit["highlight"]["path.nGram"][0];
} }
} }
@@ -65,6 +71,7 @@ function shouldPlayVideo(hit) {
return mime && return mime &&
mime.startsWith("video/") && mime.startsWith("video/") &&
!("parent" in hit["_source"]) &&
hit["_source"]["extension"] !== "mkv" && hit["_source"]["extension"] !== "mkv" &&
hit["_source"]["extension"] !== "avi" && hit["_source"]["extension"] !== "avi" &&
videoc !== "hevc" && videoc !== "hevc" &&
@@ -80,6 +87,7 @@ function shouldDisplayRawImage(hit) {
hit["_source"]["mime"] && hit["_source"]["mime"] &&
!hit["_source"]["parent"] && !hit["_source"]["parent"] &&
hit["_source"]["videoc"] !== "tiff" && hit["_source"]["videoc"] !== "tiff" &&
hit["_source"]["videoc"] !== "raw" &&
hit["_source"]["videoc"] !== "ppm"; hit["_source"]["videoc"] !== "ppm";
} }
@@ -145,26 +153,44 @@ function getTags(hit, mimeCategory) {
// User tags // User tags
if (hit["_source"].hasOwnProperty("tag")) { if (hit["_source"].hasOwnProperty("tag")) {
hit["_source"]["tag"].forEach(tag => { hit["_source"]["tag"].forEach(tag => {
const userTag = document.createElement("span"); tags.push(makeUserTag(tag, hit));
userTag.setAttribute("class", "badge badge-pill badge-user");
const tokens = tag.split("#");
if (tokens.length > 1) {
const bg = "#" + tokens[1];
const fg = lum(tokens[1]) > 40 ? "#000" : "#fff";
userTag.setAttribute("style", `background-color: ${bg}; color: ${fg}`);
}
const name = tokens[0].split(".")[tokens[0].split(".").length - 1];
userTag.appendChild(document.createTextNode(name));
tags.push(userTag);
}) })
} }
return tags return tags
} }
function makeUserTag(tag, hit) {
const userTag = document.createElement("span");
userTag.setAttribute("class", "badge badge-pill badge-user");
userTag.setAttribute("title", tag.split("#")[0])
const tokens = tag.split("#");
if (tokens.length > 1) {
const bg = "#" + tokens[1];
const fg = lum(tokens[1]) > 50 ? "#000" : "#fff";
userTag.setAttribute("style", `background-color: ${bg}; color: ${fg}`);
}
const deleteButton = document.createElement("span");
deleteButton.setAttribute("class", "badge badge-pill badge-delete")
deleteButton.setAttribute("title", "Delete tag")
deleteButton.appendChild(document.createTextNode("X"));
deleteButton.addEventListener("click", () => {
deleteTag(tag, hit).then(() => {
userTag.remove();
});
});
userTag.addEventListener("mouseenter", () => userTag.appendChild(deleteButton));
userTag.addEventListener("mouseleave", () => deleteButton.remove());
const name = tokens[0].split(".")[tokens[0].split(".").length - 1];
userTag.appendChild(document.createTextNode(name));
return userTag;
}
function infoButtonCb(hit) { function infoButtonCb(hit) {
return () => { return () => {
getDocumentInfo(hit["_id"]).then(doc => { getDocumentInfo(hit["_id"]).then(doc => {
@@ -184,7 +210,8 @@ function infoButtonCb(hit) {
const displayFields = new Set([ const displayFields = new Set([
"mime", "size", "mtime", "path", "title", "width", "height", "duration", "audioc", "videoc", "mime", "size", "mtime", "path", "title", "width", "height", "duration", "audioc", "videoc",
"bitrate", "artist", "album", "album_artist", "genre", "title", "font_name", "tag" "bitrate", "artist", "album", "album_artist", "genre", "title", "font_name", "tag", "author",
"modified_by"
]); ]);
Object.keys(doc) Object.keys(doc)
.filter(key => key.startsWith("_keyword.") || key.startsWith("_text.") || displayFields.has(key) || key.startsWith("exif_")) .filter(key => key.startsWith("_keyword.") || key.startsWith("_text.") || displayFields.has(key) || key.startsWith("exif_"))
@@ -330,9 +357,31 @@ function createDocCard(hit) {
docCardBody.appendChild(tagContainer); docCardBody.appendChild(tagContainer);
attachTagContainerEventListener(tagContainer, hit);
return docCard; return docCard;
} }
function attachTagContainerEventListener(tagContainer, hit) {
const sizeTag = Array.from(tagContainer.children).find(child => child.tagName === "SMALL");
const addTagButton = document.createElement("span");
addTagButton.setAttribute("class", "badge badge-pill add-tag-button");
addTagButton.appendChild(document.createTextNode("+Add"));
tagContainer.addEventListener("mouseenter", () => tagContainer.insertBefore(addTagButton, sizeTag));
tagContainer.addEventListener("mouseleave", () => addTagButton.remove());
addTagButton.addEventListener("click", () => {
tagBar.value = "";
currentDocToTag = hit;
currentTagCallback = tag => {
tagContainer.insertBefore(makeUserTag(tag, hit), sizeTag);
}
$("#tagModal").modal("show");
tagBar.focus();
});
}
function makeThumbnail(mimeCategory, hit, imgWrapper, small) { function makeThumbnail(mimeCategory, hit, imgWrapper, small) {
if (!hit["_source"].hasOwnProperty("thumbnail")) { if (!hit["_source"].hasOwnProperty("thumbnail")) {
@@ -405,7 +454,6 @@ function createDocLine(hit) {
if (hit["_source"].hasOwnProperty("parent")) { if (hit["_source"].hasOwnProperty("parent")) {
line.classList.add("sub-document"); line.classList.add("sub-document");
isSubDocument = true;
} }
const infoButton = makeInfoButton(hit); const infoButton = makeInfoButton(hit);
@@ -443,7 +491,7 @@ function createDocLine(hit) {
if (contentHl !== undefined) { if (contentHl !== undefined) {
const contentDiv = document.createElement("div"); const contentDiv = document.createElement("div");
contentDiv.setAttribute("class", "content-div"); contentDiv.setAttribute("class", "content-div");
contentDiv.insertAdjacentHTML('afterbegin', contentHl); contentDiv.insertAdjacentHTML("afterbegin", contentHl);
titleDiv.appendChild(contentDiv); titleDiv.appendChild(contentDiv);
} }
@@ -453,7 +501,13 @@ function createDocLine(hit) {
let path = document.createElement("div"); let path = document.createElement("div");
path.setAttribute("class", "path-line"); path.setAttribute("class", "path-line");
path.setAttribute("title", hit["_source"]["path"] + "/"); path.setAttribute("title", hit["_source"]["path"] + "/");
path.appendChild(document.createTextNode(hit["_source"]["path"] + "/"));
const pathHighlight = getPathHighlight(hit);
if (pathHighlight) {
path.insertAdjacentHTML("afterbegin", pathHighlight + "/");
} else {
path.appendChild(document.createTextNode(hit["_source"]["path"] + "/"));
}
let tagContainer = document.createElement("div"); let tagContainer = document.createElement("div");
tagContainer.setAttribute("class", "tag-container"); tagContainer.setAttribute("class", "tag-container");
@@ -472,6 +526,8 @@ function createDocLine(hit) {
pathLine.appendChild(path); pathLine.appendChild(path);
pathLine.appendChild(tagContainer); pathLine.appendChild(tagContainer);
attachTagContainerEventListener(tagContainer, hit);
return line; return line;
} }

View File

@@ -6,6 +6,9 @@ let tagTree;
let searchBar = document.getElementById("searchBar"); let searchBar = document.getElementById("searchBar");
let pathBar = document.getElementById("pathBar"); let pathBar = document.getElementById("pathBar");
let tagBar = document.getElementById("tagBar");
let currentDocToTag = null;
let currentTagCallback = null;
let lastDoc = null; let lastDoc = null;
let reachedEnd = false; let reachedEnd = false;
let docCount = 0; let docCount = 0;
@@ -20,15 +23,6 @@ let size_max = 10000000000000;
let date_min = null; let date_min = null;
let date_max = null; let date_max = null;
const CONF = new Settings();
const _defaults = {
display: "grid",
fuzzy: true,
highlight: true,
sort: "score"
};
SORT_MODES = { SORT_MODES = {
score: { score: {
text: "Relevance", text: "Relevance",
@@ -66,32 +60,7 @@ SORT_MODES = {
], ],
key: hit => hit["_source"]["size"] key: hit => hit["_source"]["size"]
}, },
} };
function Settings() {
this.options = {};
this._onUpdate = function () {
$("#fuzzyToggle").prop("checked", this.options.fuzzy);
}
this.load = function () {
const raw = window.localStorage.getItem("options");
if (raw === null) {
this.options = _defaults;
} else {
this.options = JSON.parse(raw);
}
this._onUpdate();
}
this.save = function () {
window.localStorage.setItem("options", JSON.stringify(this.options));
this._onUpdate();
}
}
function showEsError() { function showEsError() {
$.toast({ $.toast({
@@ -106,35 +75,149 @@ function showEsError() {
}); });
} }
jQuery["jsonPost"] = function (url, data) { window.onload = () => {
return jQuery.ajax({ CONF.load();
url: url, new autoComplete({
type: "post", selector: '#pathBar',
data: JSON.stringify(data), minChars: 1,
contentType: "application/json" delay: 400,
}).fail(err => { renderItem: function (item) {
showEsError(); return '<div class="autocomplete-suggestion" data-val="' + item + '">' + item + '</div>';
console.log(err); },
source: async function (term, suggest) {
if (!CONF.options.suggestPath) {
return []
}
term = term.toLowerCase();
const choices = await getPathChoices();
let matches = [];
for (let i = 0; i < choices.length; i++) {
if (~choices[i].toLowerCase().indexOf(term)) {
matches.push(choices[i]);
}
}
suggest(matches.sort());
},
onSelect: function () {
searchDebounced();
}
});
searchBar.addEventListener("keyup", searchDebounced);
pathBar.addEventListener("keyup", e => {
if (e.key === "Enter") {
searchDebounced();
}
});
new autoComplete({
selector: '#tagBar',
minChars: 1,
delay: 200,
renderItem: function (item) {
return '<div class="autocomplete-suggestion" data-val="' + item + '">' + item.split("#")[0] + '</div>';
},
source: async function (term, suggest) {
term = term.toLowerCase();
const choices = await getTagChoices();
let matches = [];
for (let i = 0; i < choices.length; i++) {
if (~choices[i].toLowerCase().indexOf(term)) {
matches.push(choices[i]);
}
}
suggest(matches.sort());
},
onSelect: function (e, item) {
const name = item.split("#")[0];
const color = "#" + item.split("#")[1];
$("#tag-color").val(color);
$("#tag-color").trigger("keyup", color);
tagBar.value = name;
e.preventDefault();
}
});
[tagBar, document.getElementById("tag-color")].forEach(elem => {
elem.addEventListener("keyup", e => {
if (e.key === "Enter" && tagBar.value.length > 0) {
const tag = tagBar.value + document.getElementById("tag-color").value;
saveTag(tag, currentDocToTag).then(() => currentTagCallback(tag));
}
});
})
$("#tag-color").colorpicker({
format: "hex",
sliders: {
saturation: {
selector: '.colorpicker-saturation',
callLeft: 'setSaturationRatio',
callTop: 'setValueRatio'
},
hue: {
selector: '.colorpicker-hue',
maxLeft: 0,
callLeft: false,
callTop: 'setHueRatio'
}
}
}); });
}; };
window.onload = () => { function saveTag(tag, hit) {
$("#theme").on("click", () => { const relPath = hit["_source"]["path"] + "/" + hit["_source"]["name"] + ext(hit);
if (!document.cookie.includes("sist")) {
document.cookie = "sist=dark"; return $.jsonPost("/tag/" + hit["_source"]["index"], {
} else { delete: false,
document.cookie = "sist=; Max-Age=-99999999;"; name: tag,
} doc_id: hit["_id"],
window.location.reload(); relpath: relPath
}).then(() => {
tagBar.blur();
$("#tagModal").modal("hide");
$.toast({
heading: "Tag added",
text: "Tag saved to index storage and updated in ElasticSearch",
stack: 3,
bgColor: "#00a4bc",
textColor: "#fff",
position: 'bottom-right',
hideAfter: 3000,
loaderBg: "#08c7e8",
});
}) })
CONF.load(); }
};
function deleteTag(tag, hit) {
const relPath = hit["_source"]["path"] + "/" + hit["_source"]["name"] + ext(hit);
return $.jsonPost("/tag/" + hit["_source"]["index"], {
delete: true,
name: tag,
doc_id: hit["_id"],
relpath: relPath
}).then(() => {
$.toast({
heading: "Tag deleted",
text: "Tag deleted index storage and updated in ElasticSearch",
stack: 3,
bgColor: "#00a4bc",
textColor: "#fff",
position: 'bottom-right',
hideAfter: 3000,
loaderBg: "#08c7e8",
});
})
}
function toggleFuzzy() { function toggleFuzzy() {
searchDebounced(); searchDebounced();
} }
$.jsonPost("i").then(resp => { $.get("i").then(resp => {
const urlIndices = (new URLSearchParams(location.search)).get("i"); const urlIndices = (new URLSearchParams(location.search)).get("i");
resp["indices"].forEach(idx => { resp["indices"].forEach(idx => {
@@ -159,10 +242,7 @@ $.jsonPost("i").then(resp => {
}); });
function getDocumentInfo(id) { function getDocumentInfo(id) {
return $.getJSON("d/" + id).fail(e => { return $.getJSON("d/" + id).fail(showEsError)
console.log(e);
showEsError();
})
} }
function handleTreeClick(tree) { function handleTreeClick(tree) {
@@ -256,6 +336,9 @@ $.jsonPost("es", {
selection: { selection: {
mode: 'checkbox' mode: 'checkbox'
}, },
checkbox: {
autoCheckChildren: false
},
data: tagMap data: tagMap
}); });
new InspireTreeDOM(tagTree, { new InspireTreeDOM(tagTree, {
@@ -267,20 +350,70 @@ $.jsonPost("es", {
}); });
function addTag(map, tag, id, count) { function addTag(map, tag, id, count) {
let tags = tag.split("#")[0].split("."); // let tags = tag.split("#")[0].split(".");
let tags = tag.split(".");
let child = { let child = {
id: id, id: id,
text: tags.length !== 1 ? tags[0] : `${tags[0]} (${count})`, values: [id],
children: [] count: count,
text: tags.length !== 1 ? tags[0] : `${tags[0].split("#")[0]} (${count})`,
name: tags[0],
children: [],
isLeaf: tags.length === 1,
//Overwrite base functions
blur: function () {
},
select: function () {
this.state("selected", true);
return this.check()
},
deselect: function () {
this.state("selected", false);
return this.uncheck()
},
uncheck: function () {
if (!this.isLeaf) {
return;
}
baseStateChange('checked', false, 'unchecked', this, false);
this.state('indeterminate', false);
if (this.hasParent()) {
this.getParent().refreshIndeterminateState();
}
this._tree.end();
return this;
},
check: function () {
if (!this.isLeaf) {
return;
}
baseStateChange('checked', true, 'checked', this, false);
if (this.hasParent()) {
this.getParent().refreshIndeterminateState();
}
this._tree.end();
return this;
}
}; };
let found = false; let found = false;
map.forEach(node => { map.forEach(node => {
if (node.text === child.text) { if (node.name.split("#")[0] === child.name.split("#")[0]) {
found = true; found = true;
if (tags.length !== 1) { if (tags.length !== 1) {
addTag(node.children, tags.slice(1).join("."), id, count); addTag(node.children, tags.slice(1).join("."), id, count);
} else {
// Same name, different color
node.count += count;
node.text = `${tags[0].split("#")[0]} (${node.count})`;
node.values.push(id);
} }
} }
}); });
@@ -332,7 +465,11 @@ function getSelectedNodes(tree) {
//Only get children //Only get children
if (selected[i].text.indexOf("(") !== -1) { if (selected[i].text.indexOf("(") !== -1) {
selectedNodes.push(selected[i].id); if (selected[i].values) {
selectedNodes.push(selected[i].values);
} else {
selectedNodes.push(selected[i].id);
}
} }
} }
@@ -372,31 +509,40 @@ function search(after = null) {
"font_name^6" "font_name^6"
]; ];
if (CONF.options.searchInPath) {
fields.push("path.text^5");
}
if ($("#fuzzyToggle").prop("checked")) { if ($("#fuzzyToggle").prop("checked")) {
fields.push("content.nGram"); fields.push("content.nGram");
if (CONF.options.searchInPath) {
fields.push("path.nGram");
}
fields.push("name.nGram^3"); fields.push("name.nGram^3");
} }
let path = pathBar.value.replace(/\/$/, "").toLowerCase(); //remove trailing slashes let path = pathBar.value.replace(/\/$/, "").toLowerCase(); //remove trailing slashes
if (path !== "") { if (path !== "") {
filters.push([{term: {path: path}}]) filters.push({term: {path: path}})
} }
let mimeTypes = getSelectedNodes(mimeTree); let mimeTypes = getSelectedNodes(mimeTree);
if (!mimeTypes.includes("any")) { if (!mimeTypes.includes("any")) {
filters.push([{terms: {"mime": mimeTypes}}]); filters.push({terms: {"mime": mimeTypes}});
} }
let tags = getSelectedNodes(tagTree); let tags = getSelectedNodes(tagTree);
if (!tags.includes("any")) { if (!tags.includes("any")) {
filters.push([{terms: {"tag": tags}}]); tags.forEach(tagGroup => {
filters.push({terms: {"tag": tagGroup}})
})
} }
if (date_min && date_max) { if (date_min && date_max) {
filters.push([{range: {mtime: {gte: date_min, lte: date_max}}}]) filters.push({range: {mtime: {gte: date_min, lte: date_max}}})
} else if (date_min) { } else if (date_min) {
filters.push([{range: {mtime: {gte: date_min}}}]) filters.push({range: {mtime: {gte: date_min}}})
} else if (date_max) { } else if (date_max) {
filters.push([{range: {mtime: {lte: date_max}}}]) filters.push({range: {mtime: {lte: date_max}}})
} }
let q = { let q = {
@@ -432,6 +578,9 @@ function search(after = null) {
q.highlight = { q.highlight = {
pre_tags: ["<mark>"], pre_tags: ["<mark>"],
post_tags: ["</mark>"], post_tags: ["</mark>"],
fragment_size: CONF.options.fragmentSize,
number_of_fragments: 1,
order: "score",
fields: { fields: {
content: {}, content: {},
// "content.nGram": {}, // "content.nGram": {},
@@ -440,6 +589,10 @@ function search(after = null) {
font_name: {}, font_name: {},
} }
}; };
if (CONF.options.searchInPath) {
q.highlight.fields["path.text"] = {};
q.highlight.fields["path.nGram"] = {};
}
} }
$.jsonPost("es", q).then(searchResult => { $.jsonPost("es", q).then(searchResult => {
@@ -448,6 +601,11 @@ function search(after = null) {
lastDoc = hits[hits.length - 1]; lastDoc = hits[hits.length - 1];
} }
hits.forEach(hit => {
hit["_source"]["name"] = strUnescape(hit["_source"]["name"]);
hit["_source"]["path"] = strUnescape(hit["_source"]["path"]);
});
if (!after) { if (!after) {
preload.remove(); preload.remove();
searchResults.appendChild(makeStatsCard(searchResult)); searchResults.appendChild(makeStatsCard(searchResult));
@@ -479,8 +637,6 @@ let searchDebounced = _.debounce(function () {
search() search()
}, 500); }, 500);
searchBar.addEventListener("keyup", searchDebounced);
pathBar.addEventListener("keyup", searchDebounced);
//Size slider //Size slider
$("#sizeSlider").ionRangeSlider({ $("#sizeSlider").ionRangeSlider({
@@ -586,7 +742,7 @@ function getNextDepth(node) {
} }
}, },
size: 0 size: 0
} };
if (node.depth > 0) { if (node.depth > 0) {
q.query.bool.must = { q.query.bool.must = {
@@ -622,6 +778,7 @@ function getNextDepth(node) {
text: `${name}/ (${bucket.doc_count})`, text: `${name}/ (${bucket.doc_count})`,
depth: node.depth + 1, depth: node.depth + 1,
index: node.index, index: node.index,
values: [bucket.key],
children: true, children: true,
} }
}).filter(x => x !== null) }).filter(x => x !== null)
@@ -645,18 +802,20 @@ function createPathTree(target) {
let pathTree = new InspireTree({ let pathTree = new InspireTree({
data: function (node, resolve, reject) { data: function (node, resolve, reject) {
return getNextDepth(node); return getNextDepth(node);
} },
sort: "text"
}); });
selectedIndices.forEach(index => { selectedIndices.forEach(index => {
pathTree.addNode({ pathTree.addNode({
id: "/" + index, id: "/" + index,
values: ["/" + index],
text: `/[${indexMap[index]}]`, text: `/[${indexMap[index]}]`,
index: index, index: index,
depth: 0, depth: 0,
children: true children: true
}) })
}) });
new InspireTreeDOM(pathTree, { new InspireTreeDOM(pathTree, {
target: target target: target
@@ -665,30 +824,48 @@ function createPathTree(target) {
pathTree.on("node.click", handlePathTreeClick(pathTree)); pathTree.on("node.click", handlePathTreeClick(pathTree));
} }
function updateSettings() { function getPathChoices() {
CONF.options.display = $("#settingDisplay").val(); return new Promise(getPaths => {
CONF.options.fuzzy = $("#settingFuzzy").prop("checked"); $.jsonPost("es", {
CONF.options.highlight = $("#settingHighlight").prop("checked"); suggest: {
CONF.save(); path: {
prefix: pathBar.value,
searchDebounced(); completion: {
field: "suggest-path",
$.toast({ skip_duplicates: true,
heading: "Settings updated", size: 10000
text: "Settings saved to browser storage", }
stack: 3, }
bgColor: "#00a4bc", }
textColor: "#fff", }).then(resp => getPaths(resp["suggest"]["path"][0]["options"].map(opt => opt["_source"]["path"])));
position: 'bottom-right',
hideAfter: 3000,
loaderBg: "#08c7e8",
}); });
} }
function loadSettings() {
CONF.load();
$("#settingDisplay").val(CONF.options.display); function getTagChoices() {
$("#settingFuzzy").prop("checked", CONF.options.fuzzy); return new Promise(getPaths => {
$("#settingHighlight").prop("checked", CONF.options.highlight); $.jsonPost("es", {
suggest: {
tag: {
prefix: tagBar.value,
completion: {
field: "suggest-tag",
skip_duplicates: true,
size: 10000
}
}
}
}).then(resp => {
const result = [];
resp["suggest"]["tag"][0]["options"].map(opt => opt["_source"]["tag"]).forEach(tags => {
tags.forEach(tag => {
const t = tag.split("#")[0];
if (!result.find(x => x.split("#")[0] === t)) {
result.push(tag);
}
});
});
getPaths(result);
});
});
} }

View File

@@ -64,3 +64,142 @@ function lum(c) {
return 0.2126 * r + 0.7152 * g + 0.0722 * b; return 0.2126 * r + 0.7152 * g + 0.0722 * b;
} }
function strUnescape(str) {
let result = "";
for (let i = 0; i < str.length; i++) {
const c = str[i];
const next = str[i+1];
if (c === ']') {
if (next === ']') {
result += c;
i += 1;
} else {
result += String.fromCharCode(parseInt(str.slice(i, i + 2), 16));
i += 2;
}
} else {
result += c;
}
}
return result;
}
const CONF = new Settings();
const _defaults = {
display: "grid",
fuzzy: true,
highlight: true,
sort: "score",
searchInPath: false,
treemapType: "cascaded",
treemapTiling: "squarify",
treemapGroupingDepth: 3,
treemapColor: "PuBuGn",
treemapSize: "large",
suggestPath: true,
fragmentSize: 100
};
function loadSettings() {
CONF.load();
$("#settingDisplay").val(CONF.options.display);
$("#settingFuzzy").prop("checked", CONF.options.fuzzy);
$("#settingHighlight").prop("checked", CONF.options.highlight);
$("#settingSearchInPath").prop("checked", CONF.options.searchInPath);
$("#settingTreemapTiling").val(CONF.options.treemapTiling);
$("#settingTreemapGroupingDepth").val(CONF.options.treemapGroupingDepth);
$("#settingTreemapColor").val(CONF.options.treemapColor);
$("#settingTreemapSize").val(CONF.options.treemapSize);
$("#settingTreemapType").val(CONF.options.treemapType);
$("#settingSuggestPath").prop("checked", CONF.options.suggestPath);
$("#settingFragmentSize").val(CONF.options.fragmentSize);
}
function Settings() {
this.options = {};
this._onUpdate = function () {
$("#fuzzyToggle").prop("checked", this.options.fuzzy);
};
this.load = function () {
const raw = window.localStorage.getItem("options");
if (raw === null) {
this.options = _defaults;
} else {
const j = JSON.parse(raw);
if (!j || Object.keys(_defaults).some(k => !j.hasOwnProperty(k))) {
this.options = _defaults;
} else {
this.options = j;
}
}
this._onUpdate();
};
this.save = function () {
window.localStorage.setItem("options", JSON.stringify(this.options));
this._onUpdate();
}
}
function updateSettings() {
CONF.options.display = $("#settingDisplay").val();
CONF.options.fuzzy = $("#settingFuzzy").prop("checked");
CONF.options.highlight = $("#settingHighlight").prop("checked");
CONF.options.searchInPath = $("#settingSearchInPath").prop("checked");
CONF.options.treemapTiling = $("#settingTreemapTiling").val();
CONF.options.treemapGroupingDepth = $("#settingTreemapGroupingDepth").val();
CONF.options.treemapColor = $("#settingTreemapColor").val();
CONF.options.treemapSize = $("#settingTreemapSize").val();
CONF.options.treemapType = $("#settingTreemapType").val();
CONF.options.suggestPath = $("#settingSuggestPath").prop("checked");
CONF.options.fragmentSize = $("#settingFragmentSize").val();
CONF.save();
if (typeof searchDebounced !== "undefined") {
searchDebounced();
}
if (typeof updateStats !== "undefined") {
updateStats();
}
$.toast({
heading: "Settings updated",
text: "Settings saved to browser storage",
stack: 3,
bgColor: "#00a4bc",
textColor: "#fff",
position: 'bottom-right',
hideAfter: 3000,
loaderBg: "#08c7e8",
});
}
jQuery["jsonPost"] = function (url, data) {
return jQuery.ajax({
url: url,
type: "post",
data: JSON.stringify(data),
contentType: "application/json"
}).fail(err => {
showEsError();
console.log(err);
});
};
function toggleTheme() {
if (!document.cookie.includes("sist")) {
document.cookie = "sist=dark;SameSite=Strict";
} else {
document.cookie = "sist=; Max-Age=-99999999;";
}
window.location.reload();
}

View File

@@ -11,10 +11,12 @@
<nav class="navbar navbar-expand-lg"> <nav class="navbar navbar-expand-lg">
<a class="navbar-brand" href="/">sist2</a> <a class="navbar-brand" href="/">sist2</a>
<span class="badge badge-pill version">2.0.0</span> <span class="badge badge-pill version">2.7.3</span>
<span class="tagline">Lightning-fast file system indexer and search tool </span> <span class="tagline">Lightning-fast file system indexer and search tool </span>
<button style="margin-left: auto" class="btn" type="button" data-toggle="modal" data-target="#settings" onclick="loadSettings()">Settings</button> <a class="btn ml-auto" href="/stats">Stats</a>
<a id="theme" class="btn" title="Toggle theme" href="/">Theme</a> <button class="btn" type="button" data-toggle="modal" data-target="#settings" onclick="loadSettings()">Settings
</button>
<button class="btn" title="Toggle theme" onclick="toggleTheme()">Theme</button>
</nav> </nav>
<div class="container"> <div class="container">
@@ -47,8 +49,11 @@
<div class="col"> <div class="col">
<div class="input-group" style="margin-bottom: 0.5em; margin-top: 1em"> <div class="input-group" style="margin-bottom: 0.5em; margin-top: 1em">
<div class="input-group-prepend"> <div class="input-group-prepend">
<button id="pathBarHelper" class="btn btn-outline-secondary" data-toggle="modal" data-target="#pathTreeModal"> <button id="pathBarHelper" class="btn btn-outline-secondary" data-toggle="modal"
<svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 576 512" width="20px"><path d="M288 224h224a32 32 0 0 0 32-32V64a32 32 0 0 0-32-32H400L368 0h-80a32 32 0 0 0-32 32v64H64V8a8 8 0 0 0-8-8H40a8 8 0 0 0-8 8v392a16 16 0 0 0 16 16h208v64a32 32 0 0 0 32 32h224a32 32 0 0 0 32-32V352a32 32 0 0 0-32-32H400l-32-32h-80a32 32 0 0 0-32 32v64H64V128h192v64a32 32 0 0 0 32 32zm0 96h66.74l32 32H512v128H288zm0-288h66.74l32 32H512v128H288z"/></svg> data-target="#pathTreeModal">
<svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 576 512" width="20px">
<path d="M288 224h224a32 32 0 0 0 32-32V64a32 32 0 0 0-32-32H400L368 0h-80a32 32 0 0 0-32 32v64H64V8a8 8 0 0 0-8-8H40a8 8 0 0 0-8 8v392a16 16 0 0 0 16 16h208v64a32 32 0 0 0 32 32h224a32 32 0 0 0 32-32V352a32 32 0 0 0-32-32H400l-32-32h-80a32 32 0 0 0-32 32v64H64V128h192v64a32 32 0 0 0 32 32zm0 96h66.74l32 32H512v128H288zm0-288h66.74l32 32H512v128H288z"/>
</svg>
</button> </button>
</div> </div>
<input id="pathBar" type="search" class="form-control" placeholder="Filter path"> <input id="pathBar" type="search" class="form-control" placeholder="Filter path">
@@ -155,7 +160,8 @@
<i>fried eggs</i> and either <i>eggplant</i> or <i>potato</i>, but will ignore results <i>fried eggs</i> and either <i>eggplant</i> or <i>potato</i>, but will ignore results
containing <i>frittata</i>.</p> containing <i>frittata</i>.</p>
<p>When neither <code>+</code> or <code>|</code> is specified, the default operator is <code>+</code> (and).</p> <p>When neither <code>+</code> or <code>|</code> is specified, the default operator is
<code>+</code> (and).</p>
<p>When the <b>Fuzzy</b> option is checked, partial matches are also returned.</p> <p>When the <b>Fuzzy</b> option is checked, partial matches are also returned.</p>
<br> <br>
<p>For more information, see <a target="_blank" <p>For more information, see <a target="_blank"
@@ -186,14 +192,88 @@
<label class="custom-control-label" for="settingFuzzy">Set fuzzy search by default</label> <label class="custom-control-label" for="settingFuzzy">Set fuzzy search by default</label>
</div> </div>
<div class="custom-control custom-checkbox">
<input type="checkbox" class="custom-control-input" id="settingSearchInPath">
<label class="custom-control-label" for="settingSearchInPath">Enable matching query against
document path</label>
</div>
<div class="custom-control custom-checkbox">
<input type="checkbox" class="custom-control-input" id="settingSuggestPath">
<label class="custom-control-label" for="settingSuggestPath">Enable auto-complete in path filter
bar</label>
</div>
<br/>
<div class="form-group">
<input type="number" class="form-control" id="settingFragmentSize">
<label for="settingFragmentSize">Highlight context size in characters</label>
</div>
<label for="settingDisplay">Display</label> <label for="settingDisplay">Display</label>
<select id="settingDisplay" class="form-control form-control-sm"> <select id="settingDisplay" class="form-control form-control-sm">
<option value="grid">Grid</option> <option value="grid">Grid</option>
<option value="list">List</option> <option value="list">List</option>
</select> </select>
<hr/>
<h4>Stats</h4>
<div class="form-group">
<label for="settingTreemapType">Treemap type</label>
<select id="settingTreemapType" class="form-control form-control-sm">
<option value="cascaded">Cascaded</option>
<option value="flat">Flat (compact)</option>
</select>
</div>
<div class="form-group">
<label for="settingTreemapTiling">Treemap tiling</label>
<select id="settingTreemapTiling" class="form-control form-control-sm">
<option value="binary">Binary</option>
<option value="squarify">Squarify</option>
<option value="slice">Slice</option>
<option value="dice">Dice</option>
<option value="sliceDice">Slide & Dice</option>
</select>
</div>
<div class="form-group">
<label for="settingTreemapGroupingDepth">Treemap color grouping depth (flat)</label>
<input type="number" class="form-control" id="settingTreemapGroupingDepth" min="1" max="10">
</div>
<div class="form-group">
<label for="settingTreemapColor">Treemap color (cascaded)</label>
<select id="settingTreemapColor" class="form-control form-control-sm">
<option value="PuBuGn">Purple-Blue-Green</option>
<option value="PuRd">Purple-Red</option>
<option value="PuBu">Purple-Blue</option>
<option value="YlOrBr">Yellow-Orange-Brown</option>
<option value="YlOrRd">Yellow-Orange-Red</option>
<option value="YlGn">Yellow-Green</option>
<option value="YlGnBu">Yellow-Green-Blue</option>
<option value="Plasma">Plasma</option>
<option value="Magma">Magma</option>
<option value="Inferno">Inferno</option>
<option value="Viridis">Viridis</option>
<option value="Turbo">Turbo</option>
</select>
</div>
<div class="form-group">
<label for="settingTreemapSize">Treemap size</label>
<select id="settingTreemapSize" class="form-control form-control-sm">
<option value="small">Small</option>
<option value="medium">Medium</option>
<option value="large">Large</option>
<option value="x-large">X-Large</option>
<option value="xx-large">XX-Large</option>
</select>
</div>
<br> <br>
<button style="float: right" class="btn btn-primary" onclick="updateSettings()">Update settings</button> <button class="btn btn-primary ml-auto" onclick="updateSettings()">Update settings</button>
</div> </div>
</div> </div>
</div> </div>
@@ -215,9 +295,36 @@
</div> </div>
</div> </div>
<div class="modal" id="tagModal" tabindex="-1" role="dialog" aria-labelledby="modal-title" aria-hidden="true">
<div class="modal-dialog modal-dialog-centered" role="document">
<div class="modal-content">
<div class="modal-header">
<h5 class="modal-title">Add tag</h5>
<button type="button" class="close" data-dismiss="modal" aria-label="Close">
<span aria-hidden="true">&times;</span>
</button>
</div>
<div class="modal-body">
<div class="form-group">
<div class="row">
<div class="col col-8">
<input type="text" id="tagBar" class="form-control">
</div>
<div class="col col-4">
<input type="text" id="tag-color" value="" class="form-control"/>
</div>
</div>
</div>
</div>
</div>
</div>
</div>
<div id="searchResults"></div> <div id="searchResults"></div>
</div> </div>
<script src="js" type="text/javascript"></script> <script src="jslib" type="text/javascript"></script>
<script src="jssearch" type="text/javascript"></script>
</body> </body>
</html> </html>

800
src/static/stats.html Normal file
View File

@@ -0,0 +1,800 @@
<!DOCTYPE html>
<html lang="en">
<head>
<meta charset="utf-8">
<title>sist2 - Stats</title>
<meta name='viewport' content='width=device-width, initial-scale=1.0, maximum-scale=1.0, user-scalable=no'/>
<link href="css" rel="stylesheet" type="text/css">
</head>
<body>
<nav class="navbar navbar-expand-lg">
<a class="navbar-brand" href="/">sist2</a>
<span class="badge badge-pill version">2.7.3</span>
<span class="tagline">Lightning-fast file system indexer and search tool </span>
<a style="margin-left: auto" class="btn" href="/">Back</a>
<button class="btn" type="button" data-toggle="modal" data-target="#settings"
onclick="loadSettings()">Settings
</button>
<button class="btn" title="Toggle theme" onclick="toggleTheme()">Theme</button>
</nav>
<div class="container pb-3">
<div class="card">
<div class="card-body">
<label for="indices">Index</label>
<select id="indices" onchange="updateStats()"></select>
</div>
</div>
<div id="treemap-card" class="stats-card">
<button class="btn stats-btn" onclick="fullScreen('treemap-card')">Enlarge</button>
<button class="btn stats-btn" onclick="exportTreemap()">Export</button>
<svg id="treemap"></svg>
</div>
<div id="graphs-card" class="stats-card">
<button class="btn stats-btn" onclick="fullScreen('graphs-card')">Enlarge</button>
<div class="graph">
<svg id="agg_mime_size"></svg>
</div>
<div class="graph">
<svg id="agg_mime_count"></svg>
</div>
<div class="graph">
<svg id="date_histogram"></svg>
</div>
<div class="graph">
<svg id="size_histogram"></svg>
</div>
</div>
</div>
<div class="modal" id="settings" tabindex="-1" role="dialog" aria-labelledby="modal-title" aria-hidden="true">
<div class="modal-dialog modal-dialog-centered" role="document">
<div class="modal-content">
<div class="modal-header">
<h5 class="modal-title">Settings</h5>
<button type="button" class="close" data-dismiss="modal" aria-label="Close">
<span aria-hidden="true">&times;</span>
</button>
</div>
<div class="modal-body">
<div class="custom-control custom-checkbox">
<input type="checkbox" class="custom-control-input" id="settingHighlight">
<label class="custom-control-label" for="settingHighlight">Enable highlighting</label>
</div>
<div class="custom-control custom-checkbox">
<input type="checkbox" class="custom-control-input" id="settingFuzzy">
<label class="custom-control-label" for="settingFuzzy">Set fuzzy search by default</label>
</div>
<div class="custom-control custom-checkbox">
<input type="checkbox" class="custom-control-input" id="settingSearchInPath">
<label class="custom-control-label" for="settingSearchInPath">Enable matching query against document
path</label>
</div>
<div class="custom-control custom-checkbox">
<input type="checkbox" class="custom-control-input" id="settingSuggestPath">
<label class="custom-control-label" for="settingSuggestPath">Enable auto-complete in path filter bar</label>
</div>
<br/>
<div class="form-group">
<input type="number" class="form-control" id="settingFragmentSize">
<label for="settingFragmentSize">Highlight context size in characters</label>
</div>
<label for="settingDisplay">Display</label>
<select id="settingDisplay" class="form-control form-control-sm">
<option value="grid">Grid</option>
<option value="list">List</option>
</select>
<hr/>
<h4>Stats</h4>
<div class="form-group">
<label for="settingTreemapType">Treemap type</label>
<select id="settingTreemapType" class="form-control form-control-sm">
<option value="cascaded">Cascaded</option>
<option value="flat">Flat (compact)</option>
</select>
</div>
<div class="form-group">
<label for="settingTreemapTiling">Treemap tiling</label>
<select id="settingTreemapTiling" class="form-control form-control-sm">
<option value="binary">Binary</option>
<option value="squarify">Squarify</option>
<option value="slice">Slice</option>
<option value="dice">Dice</option>
<option value="sliceDice">Slide & Dice</option>
</select>
</div>
<div class="form-group">
<label for="settingTreemapGroupingDepth">Treemap color grouping depth (flat)</label>
<input type="number" class="form-control" id="settingTreemapGroupingDepth" min="1" max="10">
</div>
<div class="form-group">
<label for="settingTreemapColor">Treemap color (cascaded)</label>
<select id="settingTreemapColor" class="form-control form-control-sm">
<option value="PuBuGn">Purple-Blue-Green</option>
<option value="PuRd">Purple-Red</option>
<option value="PuBu">Purple-Blue</option>
<option value="YlOrBr">Yellow-Orange-Brown</option>
<option value="YlOrRd">Yellow-Orange-Red</option>
<option value="YlGn">Yellow-Green</option>
<option value="YlGnBu">Yellow-Green-Blue</option>
<option value="Plasma">Plasma</option>
<option value="Magma">Magma</option>
<option value="Inferno">Inferno</option>
<option value="Viridis">Viridis</option>
<option value="Turbo">Turbo</option>
</select>
</div>
<div class="form-group">
<label for="settingTreemapSize">Treemap size</label>
<select id="settingTreemapSize" class="form-control form-control-sm">
<option value="small">Small</option>
<option value="medium">Medium</option>
<option value="large">Large</option>
<option value="x-large">X-Large</option>
<option value="xx-large">XX-Large</option>
</select>
</div>
<br>
<button class="btn btn-primary float-right" onclick="updateSettings()">Update settings</button>
</div>
</div>
</div>
</div>
<script src="jslib" type="text/javascript"></script>
<script>
let width;
let height;
let indexMap = {};
const barHeight = 20;
const ordinalColor = d3.scaleOrdinal(d3.schemeCategory10);
const formatSI = d3.format("~s");
const TILING_MODES = {
"squarify": d3.treemapSquarify,
"binary": d3.treemapBinary,
"sliceDice": d3.treemapSliceDice,
"slice": d3.treemapSlice,
"dice": d3.treemapDice,
};
const COLORS = {
"PuBuGn": d3.interpolatePuBuGn,
"PuRd": d3.interpolatePuRd,
"PuBu": d3.interpolatePuBu,
"YlOrBr": d3.interpolateYlOrBr,
"YlOrRd": d3.interpolateYlOrRd,
"YlGn": d3.interpolateYlGn,
"YlGnBu": d3.interpolateYlGnBu,
"Plasma": d3.interpolatePlasma,
"Magma": d3.interpolateMagma,
"Inferno": d3.interpolateInferno,
"Viridis": d3.interpolateViridis,
"Turbo": d3.interpolateTurbo,
};
const SIZES = {
"small": [800, 600],
"medium": [1300, 750],
"large": [1900, 900],
"x-large": [2800, 1700],
"xx-large": [3600, 2000],
};
const fillOpacity = document.cookie.includes("sist") ? 0.9 : 0.6;
const uids = {};
function uid(name) {
let id = uids[name] || 0;
uids[name] = id + 1;
return name + id;
}
const burrow = function (table, addSelfDir) {
const root = {};
table.forEach(row => {
let layer = root;
row.taxonomy.forEach(key => {
layer[key] = key in layer ? layer[key] : {};
layer = layer[key];
});
if (Object.keys(layer).length === 0) {
layer["$size$"] = row.size;
} else if (addSelfDir) {
layer["."] = {
"$size$": row.size,
};
}
});
const descend = function (obj, depth) {
return Object.keys(obj).filter(k => k !== "$size$").map(k => {
const child = {
name: k,
depth: depth,
value: 0,
children: descend(obj[k], depth + 1)
};
if ("$size$" in obj[k]) {
child.value = obj[k]["$size$"];
}
return child;
});
};
return {
name: `[${indexMap[$("#indices").val()]}]`,
children: descend(root, 1),
value: 0,
depth: 0,
}
};
function flatTreemap(data, svg) {
const root = d3.treemap()
.tile(TILING_MODES[CONF.options.treemapTiling])
.size([width, height])
.padding(1)
.round(true)(
d3.hierarchy(data)
.sum(d => d.value)
.sort((a, b) => b.value - a.value)
);
const leaf = svg.selectAll("g")
.data(root.leaves())
.join("g")
.attr("transform", d => `translate(${d.x0},${d.y0})`);
leaf.append("title")
.text(d => `${d.ancestors().reverse().map(d => d.data.name).join("/")}\n${humanFileSize(d.value)}`);
leaf.append("rect")
.attr("id", d => (d.leafUid = uid("leaf")))
.attr("fill", d => {
while (d.depth > CONF.options.treemapGroupingDepth) d = d.parent;
return ordinalColor(d.data.name);
})
.attr("fill-opacity", fillOpacity)
.attr("width", d => d.x1 - d.x0)
.attr("height", d => d.y1 - d.y0);
leaf.append("clipPath")
.attr("id", d => (d.clipUid = uid("clip")))
.append("use")
.attr("href", d => `#${d.leafUid}`);
leaf.append("text")
.attr("clip-path", d => `url(#${d.clipUid})`)
.selectAll("tspan")
.data(d => {
if (d.data.name === ".") {
d = d.parent;
}
return [d.data.name, humanFileSize(d.value)]
})
.join("tspan")
.attr("x", 2)
.attr("y", (d, i, nodes) => `${i === 0 ? 1.1 : 2.3}em`)
.text(d => d);
}
function cascade(root, offset) {
const x = new Map;
const y = new Map;
return root.eachAfter(d => {
if (d.children && d.children.length !== 0) {
x.set(d, 1 + d3.max(d.children, c => c.x1 === d.x1 - offset ? x.get(c) : NaN));
y.set(d, 1 + d3.max(d.children, c => c.y1 === d.y1 - offset ? y.get(c) : NaN));
} else {
x.set(d, 0);
y.set(d, 0);
}
}).eachBefore(d => {
d.x1 -= 2 * offset * x.get(d);
d.y1 -= 2 * offset * y.get(d);
});
}
function cascadeTreemap(data, svg) {
const root = cascade(
d3.treemap()
.size([width, height])
.tile(TILING_MODES[CONF.options.treemapTiling])
.paddingOuter(3)
.paddingTop(16)
.paddingInner(1)
.round(true)(
d3.hierarchy(data)
.sum(d => d.value)
.sort((a, b) => b.value - a.value)
),
3 // treemap.paddingOuter
);
const maxDepth = Math.max(...root.descendants().map(d => d.depth));
const color = d3.scaleSequential([maxDepth, -1], COLORS[CONF.options.treemapColor]);
svg.append("filter")
.attr("id", "shadow")
.append("feDropShadow")
.attr("flood-opacity", 0.3)
.attr("dx", 0)
.attr("stdDeviation", 3);
const node = svg.selectAll("g")
.data(
d3.nest()
.key(d => d.depth).sortKeys(d3.ascending)
.entries(root.descendants())
)
.join("g")
.attr("filter", "url(#shadow)")
.selectAll("g")
.data(d => d.values)
.join("g")
.attr("transform", d => `translate(${d.x0},${d.y0})`);
node.append("title")
.text(d => `${d.ancestors().reverse().splice(1).map(d => d.data.name).join("/")}\n${humanFileSize(d.value)}`);
node.append("rect")
.attr("id", d => (d.nodeUid = uid("node")))
.attr("fill", d => color(d.depth))
.attr("width", d => d.x1 - d.x0)
.attr("height", d => d.y1 - d.y0);
node.append("clipPath")
.attr("id", d => (d.clipUid = uid("clip")))
.append("use")
.attr("href", d => `#${d.nodeUid}`);
node.append("text")
.attr("fill", d => d3.hsl(color(d.depth)).l > .5 ? "#333" : "#eee")
.attr("clip-path", d => `url(#${d.clipUid})`)
.selectAll("tspan")
.data(d => [d.data.name, humanFileSize(d.value)])
.join("tspan")
.text(d => d);
node.filter(d => d.children).selectAll("tspan")
.attr("dx", 3)
.attr("y", 13);
node.filter(d => !d.children).selectAll("tspan")
.attr("x", 3)
.attr("y", (d, i, nodes) => `${i === 0 ? 1.1 : 2.3}em`);
}
function mimeBarSize(data, svg) {
const margin = {
top: 50,
right: 0,
bottom: 10,
left: Math.max(
d3.max(data.sort((a, b) => b.count - a.count).slice(0, 15), d => d.mime.length) * 6,
d3.max(data.sort((a, b) => b.size - a.size).slice(0, 15), d => d.mime.length) * 6,
)
};
data.forEach(d => {
d.name = d.mime;
d.value = Number(d.size);
});
data = data.sort((a, b) => b.value - a.value).slice(0, 15);
const width = 550;
const height = Math.ceil((data.length + 0.1) * barHeight) + margin.top + margin.bottom;
svg.selectAll("*").remove();
svg.attr("viewBox", [0, 0, width, height]);
const y = d3.scaleBand()
.domain(d3.range(data.length))
.rangeRound([margin.top, height - margin.bottom]);
const x = d3.scaleLinear()
.domain([0, d3.max(data, d => d.value)])
.range([margin.left, width - margin.right]);
svg.append("g")
.attr("fill-opacity", fillOpacity)
.selectAll("rect")
.data(data)
.join("rect")
.attr("fill", d => ordinalColor(d.name))
.attr("x", x(0))
.attr("y", (d, i) => y(i))
.attr("width", d => x(d.value) - x(0))
.attr("height", y.bandwidth())
.append("title")
.text(d => formatSI(d.value));
svg.append("g")
.attr("transform", `translate(0,${margin.top})`)
.call(d3.axisTop(x).ticks(width / 80, data.format).tickFormat(formatSI))
.call(g => g.select(".domain").remove());
svg.append("g")
.attr("transform", `translate(${margin.left},0)`)
.call(d3.axisLeft(y).tickFormat(i => data[i].name).tickSizeOuter(0));
svg.append("text")
.attr("x", (width / 2))
.attr("y", (margin.top / 2))
.attr("text-anchor", "middle")
.style("font-size", "16px")
.text("Size distribution by MIME type");
}
function mimeBarCount(data, svg) {
const margin = {
top: 50,
right: 0,
bottom: 10,
left: Math.max(
d3.max(data.sort((a, b) => b.count - a.count).slice(0, 15), d => d.mime.length) * 6,
d3.max(data.sort((a, b) => b.size - a.size).slice(0, 15), d => d.mime.length) * 6,
)
};
data.forEach(d => {
d.name = d.mime;
d.value = Number(d.count);
});
data = data.sort((a, b) => b.value - a.value).slice(0, 15);
const width = 550;
const height = Math.ceil((data.length + 0.1) * barHeight) + margin.top + margin.bottom;
svg.selectAll("*").remove();
svg.attr("viewBox", [0, 0, width, height]);
const y = d3.scaleBand()
.domain(d3.range(data.length))
.rangeRound([margin.top, height - margin.bottom]);
const x = d3.scaleLinear()
.domain([0, d3.max(data, d => d.value)])
.range([margin.left, width - margin.right]);
svg.append("g")
.attr("fill-opacity", fillOpacity)
.selectAll("rect")
.data(data)
.join("rect")
.attr("fill", d => ordinalColor(d.name))
.attr("x", x(0))
.attr("y", (d, i) => y(i))
.attr("width", d => x(d.value) - x(0))
.attr("height", y.bandwidth())
.append("title")
.text(d => d3.format(",")(d.value));
svg.append("g")
.attr("transform", `translate(0,${margin.top})`)
.call(d3.axisTop(x).ticks(width / 80, data.format).tickFormat(formatSI))
.call(g => g.select(".domain").remove());
svg.append("g")
.attr("transform", `translate(${margin.left},0)`)
.call(d3.axisLeft(y).tickFormat(i => data[i].name).tickSizeOuter(0));
svg.append("text")
.attr("x", (width / 2))
.attr("y", (margin.top / 2))
.attr("text-anchor", "middle")
.style("font-size", "16px")
.text("File count distribution by MIME type");
}
function dateHistogram(data, svg) {
let bins = data.map(d => {
return {
length: Number(d.count),
x0: Number(d.bucket),
x1: Number(d.bucket) + 2629800
}
});
bins.sort((a, b) => a.length - b.length);
const margin = {
top: 50,
right: 20,
bottom: 70,
left: 40
};
const thresh = d3.quantile(bins, 0.9, d => d.length);
bins = bins.filter(d => d.length > thresh);
const width = 550;
const height = 450;
svg.selectAll("*").remove();
svg.attr("viewBox", [0, 0, width, height]);
const y = d3.scaleLinear()
.domain([0, d3.max(bins, d => d.length)]).nice()
.range([height - margin.bottom, margin.top]);
const x = d3.scaleLinear()
.domain(d3.extent(bins, d => d.x0)).nice()
.range([margin.left, width - margin.right]);
svg.append("g")
.attr("fill", "steelblue")
.selectAll("rect")
.data(bins)
.join("rect")
.attr("x", d => x(d.x0) + 1)
.attr("width", d => Math.max(1, x(d.x1) - x(d.x0) - 1))
.attr("y", d => y(d.length))
.attr("height", d => y(0) - y(d.length))
.call(g => g
.append("title")
.text(d => d.length)
);
svg.append("g")
.attr("transform", `translate(0,${height - margin.bottom})`)
.call(
d3.axisBottom(x)
.ticks(width / 30)
.tickSizeOuter(0)
.tickFormat(t => d3.timeFormat("%Y-%m-%d")(d3.utcParse("%s")(t)))
)
.call(g => g
.selectAll("text")
.style("text-anchor", "end")
.attr("dx", "-.8em")
.attr("dy", ".15em")
.attr("transform", "rotate(-65)")
)
.call(g => g.append("text")
.attr("x", width - margin.right)
.attr("y", -4)
.attr("fill", "currentColor")
.attr("font-weight", "bold")
.attr("text-anchor", "end")
.text("mtime")
);
svg.append("g")
.attr("transform", `translate(${margin.left},0)`)
.call(
d3.axisLeft(y)
.ticks(height / 40)
.tickFormat(t => formatSI(t))
)
.call(g => g.select(".domain").remove())
.call(g => g.select(".tick:last-of-type text").clone()
.attr("x", 4)
.attr("text-anchor", "start")
.attr("font-weight", "bold")
.text("File count"));
svg.append("text")
.attr("x", (width / 2))
.attr("y", (margin.top / 2))
.attr("text-anchor", "middle")
.style("font-size", "16px")
.text("File modification time distribution");
}
function sizeHistogram(data, svg) {
let bins = data.map(d => {
return {
length: Number(d.count),
x0: Number(d.bucket),
x1: Number(d.bucket) + (5 * 1024 * 1024)
}
});
bins = bins.sort((a, b) => b.length - a.length).slice(0, 25);
const margin = {
top: 50,
right: 20,
bottom: 70,
left: 40
};
const width = 550;
const height = 450;
svg.selectAll("*").remove();
svg.attr("viewBox", [0, 0, width, height]);
const y = d3.scaleLinear()
.domain([0, d3.max(bins, d => d.length)])
.range([height - margin.bottom, margin.top]);
const x = d3.scaleLinear()
.domain(d3.extent(bins, d => d.x0)).nice()
.range([margin.left, width - margin.right]);
svg.append("g")
.attr("fill", "steelblue")
.selectAll("rect")
.data(bins)
.join("rect")
.attr("x", d => x(d.x0) + 1)
.attr("width", d => Math.max(1, x(d.x1) - x(d.x0) - 1))
.attr("y", d => y(d.length))
.attr("height", d => y(0) - y(d.length))
.call(g => g
.append("title")
.text(d => d.length)
);
svg.append("g")
.attr("transform", `translate(0,${height - margin.bottom})`)
.call(
d3.axisBottom(x)
.ticks(width / 30)
.tickSizeOuter(0)
.tickFormat(formatSI)
)
.call(g => g
.selectAll("text")
.style("text-anchor", "end")
.attr("dx", "-.8em")
.attr("dy", ".15em")
.attr("transform", "rotate(-65)")
)
.call(g => g.append("text")
.attr("x", width - margin.right)
.attr("y", -4)
.attr("fill", "currentColor")
.attr("font-weight", "bold")
.attr("text-anchor", "end")
.text("size (bytes)")
);
svg.append("g")
.attr("transform", `translate(${margin.left},0)`)
.call(
d3.axisLeft(y)
.ticks(height / 40)
.tickFormat(t => formatSI(t))
)
.call(g => g.select(".domain").remove())
.call(g => g.select(".tick:last-of-type text").clone()
.attr("x", 4)
.attr("text-anchor", "start")
.attr("font-weight", "bold")
.text("File count"));
svg.append("text")
.attr("x", (width / 2))
.attr("y", (margin.top / 2))
.attr("text-anchor", "middle")
.style("font-size", "16px")
.text("File size distribution");
}
function updateStats() {
width = SIZES[CONF.options.treemapSize][0];
height = SIZES[CONF.options.treemapSize][1];
const treemapSvg = d3.select("#treemap");
const mimeSvgSize = d3.select("#agg_mime_size");
const mimeSvgCount = d3.select("#agg_mime_count");
const dateHistogramSvg = d3.select("#date_histogram");
const sizeHistogramSvg = d3.select("#size_histogram");
const indexId = $("#indices").val();
d3.csv(`/s/${indexId}/1`).then(tabularData => {
tabularData.forEach(row => {
row.taxonomy = row.path.split("/");
row.size = Number(row.size);
});
if (CONF.options.treemapType === "cascaded") {
const data = burrow(tabularData, false);
cascadeTreemap(data, treemapSvg);
} else {
const data = burrow(tabularData.sort((a, b) => b.taxonomy.length - a.taxonomy.length), true);
flatTreemap(data, treemapSvg);
}
});
d3.csv(`/s/${indexId}/2`).then(tabularData => {
mimeBarSize(tabularData.slice(), mimeSvgSize);
mimeBarCount(tabularData.slice(), mimeSvgCount);
});
d3.csv(`/s/${indexId}/3`).then(tabularData => {
sizeHistogram(tabularData, sizeHistogramSvg);
});
d3.csv(`/s/${indexId}/4`).then(tabularData => {
dateHistogram(tabularData, dateHistogramSvg);
});
treemapSvg.selectAll("*").remove();
treemapSvg.attr("viewBox", [0, 0, width, height])
.attr("xmlns", "http://www.w3.org/2000/svg")
.attr("xmlns:xlink", "http://www.w3.org/1999/xlink")
.attr("version", "1.1")
.style("overflow", "visible")
.style("font", "10px sans-serif");
}
window.onload = function () {
CONF.load();
$.jsonPost("i").then(resp => {
const select = $("#indices");
const urlIndices = (new URLSearchParams(location.search)).get("i");
resp["indices"].forEach(idx => {
indexMap[idx.id] = idx.name;
select.append($("<option>")
.attr("value", idx.id)
.append(idx.name));
if (urlIndices && urlIndices.split(",").indexOf(idx.name) !== -1) {
select.select(idx.name);
}
});
updateStats();
});
};
function fullScreen(selector) {
const card = document.getElementById(selector);
card.classList.toggle("full-screen");
}
function exportTreemap() {
domtoimage.toBlob(document.getElementById("treemap"), {width: width, height: height})
.then(function (blob) {
let a = document.createElement("a");
let url = URL.createObjectURL(blob);
a.href = url;
a.download = `${indexMap[$("#indices").val()]}_treemap.png`;
document.body.appendChild(a);
a.click();
setTimeout(function() {
document.body.removeChild(a);
window.URL.revokeObjectURL(url);
}, 0);
});
}
</script>
</body>
</html>

353
src/stats.c Normal file
View File

@@ -0,0 +1,353 @@
#include "sist.h"
#include "io/serialize.h"
#include "ctx.h"
#include <glib.h>
static GHashTable *FlatTree;
static GHashTable *BufferTable;
static GHashTable *AggMime;
static GHashTable *AggSize;
static GHashTable *AggDate;
#define SIZE_BUCKET (long)(5 * 1024 * 1024)
#define DATE_BUCKET (long)(2629800)
static long TotalSize = 0;
static long DocumentCount = 0;
typedef struct {
long size;
long count;
} agg_t;
void fill_tables(cJSON *document, UNUSED(const char uuid_str[UUID_STR_LEN])) {
if (cJSON_GetObjectItem(document, "parent") != NULL) {
return;
}
const char *json_path = cJSON_GetObjectItem(document, "path")->valuestring;
char *path = malloc(strlen(json_path) + 1);
strcpy(path, json_path);
const char *json_mime = cJSON_GetObjectItem(document, "mime")->valuestring;
char *mime;
if (json_mime == NULL) {
mime = NULL;
} else {
mime = malloc(strlen(json_mime) + 1);
strcpy(mime, json_mime);
}
long size = (long) cJSON_GetObjectItem(document, "size")->valuedouble;
int mtime = cJSON_GetObjectItem(document, "mtime")->valueint;
// treemap
void *existing_path = g_hash_table_lookup(FlatTree, path);
if (existing_path == NULL) {
g_hash_table_insert(FlatTree, path, (gpointer) size);
} else {
g_hash_table_replace(FlatTree, path, (gpointer) ((long) existing_path + size));
}
// mime agg
if (mime != NULL) {
agg_t *orig_agg = g_hash_table_lookup(AggMime, mime);
if (orig_agg == NULL) {
agg_t *agg = malloc(sizeof(agg_t));
agg->size = size;
agg->count = 1;
g_hash_table_insert(AggMime, mime, agg);
} else {
orig_agg->size += size;
orig_agg->count += 1;
free(mime);
}
}
// size agg
long size_bucket = size - (size % SIZE_BUCKET);
agg_t *orig_agg = g_hash_table_lookup(AggSize, (gpointer) size_bucket);
if (orig_agg == NULL) {
agg_t *agg = malloc(sizeof(agg_t));
agg->size = size;
agg->count = 1;
g_hash_table_insert(AggSize, (gpointer) size_bucket, agg);
} else {
orig_agg->count += 1;
orig_agg->size += size;
}
// date agg
long date_bucket = mtime - (mtime % DATE_BUCKET);
orig_agg = g_hash_table_lookup(AggDate, (gpointer) date_bucket);
if (orig_agg == NULL) {
agg_t *agg = malloc(sizeof(agg_t));
agg->size = size;
agg->count = 1;
g_hash_table_insert(AggDate, (gpointer) date_bucket, agg);
} else {
orig_agg->count += 1;
orig_agg->size += size;
}
TotalSize += size;
DocumentCount += 1;
}
void read_index_into_tables(index_t *index) {
DIR *dir = opendir(index->path);
struct dirent *de;
while ((de = readdir(dir)) != NULL) {
if (strncmp(de->d_name, "_index_", sizeof("_index_") - 1) == 0) {
char file_path[PATH_MAX];
snprintf(file_path, PATH_MAX, "%s/%s", index->path, de->d_name);
read_index(file_path, index->desc.uuid, index->desc.type, fill_tables);
}
}
closedir(dir);
}
static size_t rfind(const char *str, int c) {
for (int i = (int)strlen(str); i >= 0; i--) {
if (str[i] == c) {
return i;
}
}
return -1;
}
int merge_up(double thresh) {
long min_size = (long) (thresh * (double) TotalSize);
int count = 0;
GHashTableIter iter;
g_hash_table_iter_init(&iter, FlatTree);
void *key;
void *value;
while (g_hash_table_iter_next(&iter, &key, &value)) {
long size = (long) value;
if (size < min_size) {
int stop = rfind(key, '/');
if (stop == -1) {
stop = 0;
}
char *parent = malloc(stop + 1);
strncpy(parent, key, stop);
*(parent + stop) = '\0';
void *existing_parent = g_hash_table_lookup(FlatTree, parent);
if (existing_parent == NULL) {
void *existing_parent2_key;
void *existing_parent2_val;
int found = g_hash_table_lookup_extended(BufferTable, parent, &existing_parent2_key,
&existing_parent2_val);
if (!found) {
g_hash_table_insert(BufferTable, parent, value);
} else {
g_hash_table_replace(BufferTable, parent, (gpointer) ((long) existing_parent2_val + size));
free(existing_parent2_key);
}
} else {
g_hash_table_replace(FlatTree, parent, (gpointer) ((long) existing_parent + size));
}
g_hash_table_iter_remove(&iter);
count += 1;
}
}
g_hash_table_iter_init(&iter, BufferTable);
while (g_hash_table_iter_next(&iter, &key, &value)) {
g_hash_table_insert(FlatTree, key, value);
g_hash_table_iter_remove(&iter);
}
int size = g_hash_table_size(FlatTree);
LOG_DEBUGF("stats.c", "Merge up iteration (%d merged, %d in tree)", count, size)
return count;
}
/**
* Assumes out is at at least PATH_MAX *4
*/
void csv_escape(char *dst, const char *str) {
const char *ptr = str;
char *out = dst;
if (rfind(str, ',') == -1 && rfind(str, '"') == -1) {
strcpy(dst, str);
return;
}
*out++ = '"';
char c;
while ((c = *ptr++) != 0) {
if (c == '"') {
*out++ = '"';
*out++ = '"';
} else {
*out++ = c;
}
}
*out++ = '"';
*out = '\0';
}
int open_or_exit(const char *path) {
int fd = open(path, O_CREAT | O_WRONLY, S_IRUSR | S_IWUSR);
if (fd < 0) {
LOG_FATALF("stats.c", "Error while creating file: %s [%d]\n", strerror(errno), errno)
}
return fd;
}
#define TREEMAP_CSV_HEADER "path,size"
#define MIME_AGG_CSV_HEADER "mime,size,count"
#define SIZE_AGG_CSV_HEADER "bucket,size,count"
#define DATE_AGG_CSV_HEADER "bucket,size,count"
void write_treemap_csv(double thresh, const char *out_path) {
void *key;
void *value;
long min_size = (long) (thresh * (double) TotalSize);
int fd = open_or_exit(out_path);
int ret = write(fd, TREEMAP_CSV_HEADER, sizeof(TREEMAP_CSV_HEADER) - 1);
if (ret == -1) {
LOG_FATALF("stats.c", "Write error: %s", strerror(errno))
}
GHashTableIter iter;
g_hash_table_iter_init(&iter, FlatTree);
while (g_hash_table_iter_next(&iter, &key, &value)) {
long size = (long) value;
if (size >= min_size) {
char path_buf[PATH_MAX * 4];
char buf[PATH_MAX * 4 + 16];
csv_escape(path_buf, key);
size_t written = sprintf(buf, "\n%s,%ld", path_buf, (long) value);
ret = write(fd, buf, written);
if (ret == -1) {
LOG_FATALF("stats.c", "Write error: %s", strerror(errno))
}
}
}
close(fd);
}
void write_agg_csv_str(const char *out_path, const char *header, GHashTable *table) {
void *key;
void *value;
char buf[4096];
int fd = open_or_exit(out_path);
int ret = write(fd, header, strlen(header));
if (ret == -1) {
LOG_FATALF("stats.c", "Write error: %s", strerror(errno))
}
GHashTableIter iter;
g_hash_table_iter_init(&iter, table);
while (g_hash_table_iter_next(&iter, &key, &value)) {
agg_t *agg = value;
size_t written = sprintf(buf, "\n%s,%ld,%ld", (const char*)key, agg->size, agg->count);
ret = write(fd, buf, written);
if (ret == -1) {
LOG_FATALF("stats.c", "Write error: %s", strerror(errno))
}
}
close(fd);
}
void write_agg_csv_long(const char *out_path, const char *header, GHashTable *table) {
void *key;
void *value;
char buf[4096];
int fd = open_or_exit(out_path);
int ret = write(fd, header, strlen(header));
if (ret == -1) {
LOG_FATALF("stats.c", "Write error: %s", strerror(errno))
}
GHashTableIter iter;
g_hash_table_iter_init(&iter, table);
while (g_hash_table_iter_next(&iter, &key, &value)) {
agg_t *agg = value;
size_t written = sprintf(buf, "\n%ld,%ld,%ld", (long)key, agg->size, agg->count);
ret = write(fd, buf, written);
if (ret == -1) {
LOG_FATALF("stats.c", "Write error: %s", strerror(errno))
}
}
close(fd);
}
int generate_stats(index_t *index, const double threshold, const char *out_prefix) {
FlatTree = g_hash_table_new_full(g_str_hash, g_str_equal, free, NULL);
BufferTable = g_hash_table_new(g_str_hash, g_str_equal);
AggMime = g_hash_table_new_full(g_str_hash, g_str_equal, free, free);
AggSize = g_hash_table_new_full(g_direct_hash, g_direct_equal, NULL, free);
AggDate = g_hash_table_new_full(g_direct_hash, g_direct_equal, NULL, free);
LOG_INFO("stats.c", "Generating stats...")
read_index_into_tables(index);
LOG_DEBUG("stats.c", "Read index into tables")
LOG_DEBUGF("stats.c", "Total size is %ld", TotalSize)
LOG_DEBUGF("stats.c", "Document count is %ld", DocumentCount)
LOG_DEBUGF("stats.c", "Merging small directories upwards with a threshold of %f%%", threshold * 100)
while (merge_up(threshold) > 100) {}
char tmp[PATH_MAX];
strncpy(tmp, out_prefix, sizeof(tmp));
strcat(tmp, "treemap.csv");
write_treemap_csv(threshold, tmp);
strncpy(tmp, out_prefix, sizeof(tmp));
strcat(tmp, "mime_agg.csv");
write_agg_csv_str(tmp, MIME_AGG_CSV_HEADER, AggMime);
strncpy(tmp, out_prefix, sizeof(tmp));
strcat(tmp, "size_agg.csv");
write_agg_csv_long(tmp, SIZE_AGG_CSV_HEADER, AggSize);
strncpy(tmp, out_prefix, sizeof(tmp));
strcat(tmp, "date_agg.csv");
write_agg_csv_long(tmp, DATE_AGG_CSV_HEADER, AggDate);
g_hash_table_remove_all(FlatTree);
g_hash_table_destroy(FlatTree);
g_hash_table_destroy(BufferTable);
g_hash_table_remove_all(AggMime);
g_hash_table_destroy(AggMime);
g_hash_table_remove_all(AggSize);
g_hash_table_destroy(AggSize);
g_hash_table_remove_all(AggDate);
g_hash_table_destroy(AggDate);
return 0;
}

6
src/stats.h Normal file
View File

@@ -0,0 +1,6 @@
#ifndef SIST2_STATS_H
#define SIST2_STATS_H
int generate_stats(index_t *index, double threshold, const char* out_prefix);
#endif

View File

@@ -3,6 +3,8 @@
#include "sist.h" #include "sist.h"
#include <pthread.h> #include <pthread.h>
#define MAX_QUEUE_SIZE 10000
typedef void (*thread_func_t)(void *arg); typedef void (*thread_func_t)(void *arg);
typedef struct tpool_work { typedef struct tpool_work {
@@ -26,6 +28,7 @@ typedef struct tpool {
int work_cnt; int work_cnt;
int done_cnt; int done_cnt;
int free_arg;
int stop; int stop;
void (*cleanup_func)(); void (*cleanup_func)();
@@ -79,6 +82,10 @@ int tpool_add_work(tpool_t *pool, thread_func_t func, void *arg) {
return 0; return 0;
} }
while ((pool->work_cnt - pool->done_cnt) >= MAX_QUEUE_SIZE) {
usleep(100000);
}
pthread_mutex_lock(&(pool->work_mutex)); pthread_mutex_lock(&(pool->work_mutex));
if (pool->work_head == NULL) { if (pool->work_head == NULL) {
pool->work_head = work; pool->work_head = work;
@@ -121,7 +128,9 @@ static void *tpool_worker(void *arg) {
} }
work->func(work->arg); work->func(work->arg);
free(work->arg); if (pool->free_arg) {
free(work->arg);
}
free(work); free(work);
} }
@@ -138,8 +147,10 @@ static void *tpool_worker(void *arg) {
pthread_mutex_unlock(&(pool->work_mutex)); pthread_mutex_unlock(&(pool->work_mutex));
} }
LOG_INFO("tpool.c", "Executing cleaup function") if (pool->cleanup_func != NULL) {
pool->cleanup_func(); LOG_INFO("tpool.c", "Executing cleanup function")
pool->cleanup_func();
}
pthread_cond_signal(&(pool->working_cond)); pthread_cond_signal(&(pool->working_cond));
pthread_mutex_unlock(&(pool->work_mutex)); pthread_mutex_unlock(&(pool->work_mutex));
@@ -207,13 +218,14 @@ void tpool_destroy(tpool_t *pool) {
* Create a thread pool * Create a thread pool
* @param thread_cnt Worker threads count * @param thread_cnt Worker threads count
*/ */
tpool_t *tpool_create(size_t thread_cnt, void cleanup_func()) { tpool_t *tpool_create(size_t thread_cnt, void cleanup_func(), int free_arg) {
tpool_t *pool = malloc(sizeof(tpool_t)); tpool_t *pool = malloc(sizeof(tpool_t));
pool->thread_cnt = thread_cnt; pool->thread_cnt = thread_cnt;
pool->work_cnt = 0; pool->work_cnt = 0;
pool->done_cnt = 0; pool->done_cnt = 0;
pool->stop = 0; pool->stop = 0;
pool->free_arg = free_arg;
pool->cleanup_func = cleanup_func; pool->cleanup_func = cleanup_func;
pool->threads = calloc(sizeof(pthread_t), thread_cnt); pool->threads = calloc(sizeof(pthread_t), thread_cnt);

View File

@@ -8,7 +8,7 @@ typedef struct tpool tpool_t;
typedef void (*thread_func_t)(void *arg); typedef void (*thread_func_t)(void *arg);
tpool_t *tpool_create(size_t num, void (*cleanup_func)()); tpool_t *tpool_create(size_t num, void (*cleanup_func)(), int free_arg);
void tpool_start(tpool_t *pool); void tpool_start(tpool_t *pool);
void tpool_destroy(tpool_t *tm); void tpool_destroy(tpool_t *tm);

View File

@@ -19,6 +19,7 @@ typedef struct index_descriptor {
typedef struct index_t { typedef struct index_t {
struct index_descriptor desc; struct index_descriptor desc;
struct store_t *store; struct store_t *store;
struct store_t *tag_store;
char path[PATH_MAX]; char path[PATH_MAX];
} index_t; } index_t;

View File

@@ -26,10 +26,11 @@ dyn_buffer_t url_escape(char *str) {
} }
char *abspath(const char *path) { char *abspath(const char *path) {
wordexp_t w;
wordexp(path, &w, 0);
char *abs = realpath(w.we_wordv[0], NULL); char *expanded = expandpath(path);
char *abs = realpath(expanded, NULL);
free(expanded);
if (abs == NULL) { if (abs == NULL) {
return NULL; return NULL;
} }
@@ -38,16 +39,46 @@ char *abspath(const char *path) {
strcat(abs, "/"); strcat(abs, "/");
} }
wordfree(&w);
return abs; return abs;
} }
char *expandpath(const char *path) { void shell_escape(char *dst, const char *src) {
wordexp_t w; const char *ptr = src;
wordexp(path, &w, 0); char *out = dst;
while ((*ptr)) {
char c = *ptr++;
char *expanded = malloc(strlen(w.we_wordv[0]) + 2); if (c == '&' || c == '\n' || c == '|' || c == ';' || c == '<' ||
strcpy(expanded, w.we_wordv[0]); c == '>' || c == '(' || c == ')' || c == '{' || c == '}') {
*out++ = '\\';
}
*out++ = c;
}
*out = 0;
}
char *expandpath(const char *path) {
char tmp[PATH_MAX * 2];
shell_escape(tmp, path);
wordexp_t w;
wordexp(tmp, &w, 0);
if (w.we_wordv == NULL) {
return NULL;
}
*tmp = '\0';
for (int i = 0; i < w.we_wordc; i++) {
strcat(tmp, w.we_wordv[i]);
if (i != w.we_wordc - 1) {
strcat(tmp, " ");
}
}
char *expanded = malloc(strlen(tmp) + 2);
strcpy(expanded, tmp);
strcat(expanded, "/"); strcat(expanded, "/");
wordfree(&w); wordfree(&w);
@@ -123,4 +154,92 @@ const char *find_file_in_paths(const char *paths[], const char *filename) {
return NULL; return NULL;
} }
#define ESCAPE_CHAR ']'
void str_escape(char *dst, const char *str) {
const size_t len = strlen(str);
char buf[16384];
memset(buf + len, 0, 8);
strcpy(buf, str);
char *cur = dst;
const char *ptr = buf;
const char *oldPtr = ptr;
utf8_int32_t c;
char tmp[16];
do {
ptr = (char *) utf8codepoint(ptr, &c);
*(int *) tmp = 0x00000000;
size_t code_len = (ptr - oldPtr);
memcpy(tmp, oldPtr, code_len);
oldPtr = ptr;
if (!utf8_validchr2(tmp)) {
for (int i = 0; i < code_len; i++) {
if (tmp[i] == 0) {
break;
}
cur += sprintf(cur, "%c%02X", ESCAPE_CHAR, (unsigned char) tmp[i]);
}
continue;
}
if (c == ESCAPE_CHAR) {
*cur++ = ESCAPE_CHAR;
*cur++ = ESCAPE_CHAR;
continue;
}
if (((utf8_int32_t) 0xffffff80 & c) == 0) {
*(cur++) = (char) c;
} else if (((utf8_int32_t) 0xfffff800 & c) == 0) {
*(cur++) = 0xc0 | (char) (c >> 6);
*(cur++) = 0x80 | (char) (c & 0x3f);
} else if (((utf8_int32_t) 0xffff0000 & c) == 0) {
*(cur++) = 0xe0 | (char) (c >> 12);
*(cur++) = 0x80 | (char) ((c >> 6) & 0x3f);
*(cur++) = 0x80 | (char) (c & 0x3f);
} else {
*(cur++) = 0xf0 | (char) (c >> 18);
*(cur++) = 0x80 | (char) ((c >> 12) & 0x3f);
*(cur++) = 0x80 | (char) ((c >> 6) & 0x3f);
*(cur++) = 0x80 | (char) (c & 0x3f);
}
} while (*ptr != '\0');
*cur = '\0';
}
void str_unescape(char *dst, const char *str) {
char *cur = dst;
const char *ptr = str;
char tmp[3];
tmp[2] = '\0';
while (*ptr != 0) {
char c = *ptr++;
if (c == ESCAPE_CHAR) {
char next = *ptr;
if (next == ESCAPE_CHAR) {
*cur++ = (char) c;
ptr += 1;
} else {
tmp[0] = *(ptr);
tmp[1] = *(ptr + 1);
*cur++ = (char) strtol(tmp, NULL, 16);
ptr += 2;
}
} else {
*cur++ = c;
}
}
*cur = '\0';
}

View File

@@ -43,4 +43,9 @@ static int incremental_mark_file_for_copy(GHashTable *table, unsigned long inode
const char *find_file_in_paths(const char **paths, const char *filename); const char *find_file_in_paths(const char **paths, const char *filename);
void str_escape(char *dst, const char *str);
void str_unescape(char *dst, const char *str);
#endif #endif

View File

@@ -53,18 +53,87 @@ store_t *get_store(const char *index_id) {
return NULL; return NULL;
} }
store_t *get_tag_store(const char *index_id) {
index_t *idx = get_index_by_id(index_id);
if (idx != NULL) {
return idx->tag_store;
}
return NULL;
}
void search_index(struct mg_connection *nc) { void search_index(struct mg_connection *nc) {
send_response_line(nc, 200, sizeof(search_html), "Content-Type: text/html"); send_response_line(nc, 200, sizeof(search_html), "Content-Type: text/html");
mg_send(nc, search_html, sizeof(search_html)); mg_send(nc, search_html, sizeof(search_html));
nc->flags |= MG_F_SEND_AND_CLOSE; nc->flags |= MG_F_SEND_AND_CLOSE;
} }
void javascript(struct mg_connection *nc) { void stats(struct mg_connection *nc) {
send_response_line(nc, 200, sizeof(stats_html), "Content-Type: text/html");
mg_send(nc, stats_html, sizeof(stats_html));
nc->flags |= MG_F_SEND_AND_CLOSE;
}
void stats_files(struct mg_connection *nc, struct http_message *hm, struct mg_str *path) {
if (path->len != UUID_STR_LEN + 4) {
mg_http_send_error(nc, 404, NULL);
nc->flags |= MG_F_SEND_AND_CLOSE;
return;
}
char arg_uuid[UUID_STR_LEN];
memcpy(arg_uuid, hm->uri.p + 3, UUID_STR_LEN);
*(arg_uuid + UUID_STR_LEN - 1) = '\0';
index_t *index = get_index_by_id(arg_uuid);
if (index == NULL) {
mg_http_send_error(nc, 404, NULL);
nc->flags |= MG_F_SEND_AND_CLOSE;
return;
}
const char *file;
switch (atoi(hm->uri.p + 3 + UUID_STR_LEN)) {
case 1:
file = "treemap.csv";
break;
case 2:
file = "mime_agg.csv";
break;
case 3:
file = "size_agg.csv";
break;
case 4:
file = "date_agg.csv";
break;
default:
nc->flags |= MG_F_SEND_AND_CLOSE;
return;
}
char disposition[8196];
snprintf(disposition, sizeof(disposition), "Content-Disposition: inline; filename=\"%s\"", file);
char full_path[PATH_MAX];
strcpy(full_path, index->path);
strcat(full_path, file);
mg_http_serve_file(nc, hm, full_path, mg_mk_str("text/csv"), mg_mk_str(disposition));
nc->flags |= MG_F_SEND_AND_CLOSE;
}
void javascript_lib(struct mg_connection *nc) {
send_response_line(nc, 200, sizeof(bundle_js), "Content-Type: application/javascript"); send_response_line(nc, 200, sizeof(bundle_js), "Content-Type: application/javascript");
mg_send(nc, bundle_js, sizeof(bundle_js)); mg_send(nc, bundle_js, sizeof(bundle_js));
nc->flags |= MG_F_SEND_AND_CLOSE; nc->flags |= MG_F_SEND_AND_CLOSE;
} }
void javascript_search(struct mg_connection *nc) {
send_response_line(nc, 200, sizeof(search_js), "Content-Type: application/javascript");
mg_send(nc, search_js, sizeof(search_js));
nc->flags |= MG_F_SEND_AND_CLOSE;
}
int client_requested_dark_theme(struct http_message *hm) { int client_requested_dark_theme(struct http_message *hm) {
struct mg_str *cookie_header = mg_get_http_header(hm, "cookie"); struct mg_str *cookie_header = mg_get_http_header(hm, "cookie");
if (cookie_header == NULL) { if (cookie_header == NULL) {
@@ -114,6 +183,7 @@ void thumbnail(struct mg_connection *nc, struct http_message *hm, struct mg_str
if (path->len != UUID_STR_LEN * 2 + 2) { if (path->len != UUID_STR_LEN * 2 + 2) {
LOG_DEBUGF("serve.c", "Invalid thumbnail path: %.*s", (int) path->len, path->p) LOG_DEBUGF("serve.c", "Invalid thumbnail path: %.*s", (int) path->len, path->p)
mg_http_send_error(nc, 404, NULL);
nc->flags |= MG_F_SEND_AND_CLOSE; nc->flags |= MG_F_SEND_AND_CLOSE;
return; return;
} }
@@ -130,6 +200,7 @@ void thumbnail(struct mg_connection *nc, struct http_message *hm, struct mg_str
int ret = uuid_parse(arg_uuid, uuid); int ret = uuid_parse(arg_uuid, uuid);
if (ret != 0) { if (ret != 0) {
LOG_DEBUGF("serve.c", "Invalid thumbnail UUID: %s", arg_uuid) LOG_DEBUGF("serve.c", "Invalid thumbnail UUID: %s", arg_uuid)
mg_http_send_error(nc, 404, NULL);
nc->flags |= MG_F_SEND_AND_CLOSE; nc->flags |= MG_F_SEND_AND_CLOSE;
return; return;
} }
@@ -137,6 +208,7 @@ void thumbnail(struct mg_connection *nc, struct http_message *hm, struct mg_str
store_t *store = get_store(arg_index); store_t *store = get_store(arg_index);
if (store == NULL) { if (store == NULL) {
LOG_DEBUGF("serve.c", "Could not get store for index: %s", arg_index) LOG_DEBUGF("serve.c", "Could not get store for index: %s", arg_index)
mg_http_send_error(nc, 404, NULL);
nc->flags |= MG_F_SEND_AND_CLOSE; nc->flags |= MG_F_SEND_AND_CLOSE;
return; return;
} }
@@ -155,6 +227,7 @@ void search(struct mg_connection *nc, struct http_message *hm) {
if (hm->body.len == 0) { if (hm->body.len == 0) {
LOG_DEBUG("serve.c", "Client sent empty body, ignoring request") LOG_DEBUG("serve.c", "Client sent empty body, ignoring request")
mg_http_send_error(nc, 500, NULL);
nc->flags |= MG_F_SEND_AND_CLOSE; nc->flags |= MG_F_SEND_AND_CLOSE;
return; return;
} }
@@ -174,12 +247,19 @@ int serve_file_from_url(cJSON *json, index_t *idx, struct mg_connection *nc) {
const char *path = cJSON_GetObjectItem(json, "path")->valuestring; const char *path = cJSON_GetObjectItem(json, "path")->valuestring;
const char *name = cJSON_GetObjectItem(json, "name")->valuestring; const char *name = cJSON_GetObjectItem(json, "name")->valuestring;
char name_unescaped[PATH_MAX * 3];
str_unescape(name_unescaped, name);
char path_unescaped[PATH_MAX * 3];
str_unescape(path_unescaped, path);
const char *ext = cJSON_GetObjectItem(json, "extension")->valuestring; const char *ext = cJSON_GetObjectItem(json, "extension")->valuestring;
char url[8196]; char url[8196];
snprintf(url, sizeof(url), snprintf(url, sizeof(url),
"%s%s/%s%s%s", "%s%s/%s%s%s",
idx->desc.rewrite_url, path, name, strlen(ext) == 0 ? "" : ".", ext); idx->desc.rewrite_url, path_unescaped, name_unescaped, strlen(ext) == 0 ? "" : ".", ext);
dyn_buffer_t encoded = url_escape(url); dyn_buffer_t encoded = url_escape(url);
mg_http_send_redirect( mg_http_send_redirect(
@@ -198,10 +278,16 @@ void serve_file_from_disk(cJSON *json, index_t *idx, struct mg_connection *nc, s
const char *ext = cJSON_GetObjectItem(json, "extension")->valuestring; const char *ext = cJSON_GetObjectItem(json, "extension")->valuestring;
const char *mime = cJSON_GetObjectItem(json, "mime")->valuestring; const char *mime = cJSON_GetObjectItem(json, "mime")->valuestring;
char name_unescaped[PATH_MAX * 3];
str_unescape(name_unescaped, name);
char path_unescaped[PATH_MAX * 3];
str_unescape(path_unescaped, path);
char full_path[PATH_MAX]; char full_path[PATH_MAX];
snprintf(full_path, PATH_MAX, "%s%s%s%s%s%s", snprintf(full_path, PATH_MAX, "%s%s%s%s%s%s",
idx->desc.root, path, strlen(path) == 0 ? "" : "/", idx->desc.root, path_unescaped, strlen(path_unescaped) == 0 ? "" : "/",
name, strlen(ext) == 0 ? "" : ".", ext); name_unescaped, strlen(ext) == 0 ? "" : ".", ext);
LOG_DEBUGF("serve.c", "Serving file from disk: %s", full_path) LOG_DEBUGF("serve.c", "Serving file from disk: %s", full_path)
@@ -242,6 +328,7 @@ void document_info(struct mg_connection *nc, struct http_message *hm, struct mg_
if (path->len != UUID_STR_LEN + 2) { if (path->len != UUID_STR_LEN + 2) {
LOG_DEBUGF("serve.c", "Invalid document_info path: %.*s", (int) path->len, path->p) LOG_DEBUGF("serve.c", "Invalid document_info path: %.*s", (int) path->len, path->p)
mg_http_send_error(nc, 404, NULL);
nc->flags |= MG_F_SEND_AND_CLOSE; nc->flags |= MG_F_SEND_AND_CLOSE;
return; return;
} }
@@ -256,6 +343,7 @@ void document_info(struct mg_connection *nc, struct http_message *hm, struct mg_
cJSON *index_id = cJSON_GetObjectItem(source, "index"); cJSON *index_id = cJSON_GetObjectItem(source, "index");
if (index_id == NULL) { if (index_id == NULL) {
cJSON_Delete(doc); cJSON_Delete(doc);
mg_http_send_error(nc, 404, NULL);
nc->flags |= MG_F_SEND_AND_CLOSE; nc->flags |= MG_F_SEND_AND_CLOSE;
return; return;
} }
@@ -263,6 +351,7 @@ void document_info(struct mg_connection *nc, struct http_message *hm, struct mg_
index_t *idx = get_index_by_id(index_id->valuestring); index_t *idx = get_index_by_id(index_id->valuestring);
if (idx == NULL) { if (idx == NULL) {
cJSON_Delete(doc); cJSON_Delete(doc);
mg_http_send_error(nc, 404, NULL);
nc->flags |= MG_F_SEND_AND_CLOSE; nc->flags |= MG_F_SEND_AND_CLOSE;
return; return;
} }
@@ -280,6 +369,7 @@ void file(struct mg_connection *nc, struct http_message *hm, struct mg_str *path
if (path->len != UUID_STR_LEN + 2) { if (path->len != UUID_STR_LEN + 2) {
LOG_DEBUGF("serve.c", "Invalid file path: %.*s", (int) path->len, path->p) LOG_DEBUGF("serve.c", "Invalid file path: %.*s", (int) path->len, path->p)
mg_http_send_error(nc, 404, NULL);
nc->flags |= MG_F_SEND_AND_CLOSE; nc->flags |= MG_F_SEND_AND_CLOSE;
return; return;
} }
@@ -299,6 +389,7 @@ void file(struct mg_connection *nc, struct http_message *hm, struct mg_str *path
index_id = cJSON_GetObjectItem(source, "index"); index_id = cJSON_GetObjectItem(source, "index");
if (index_id == NULL) { if (index_id == NULL) {
cJSON_Delete(doc); cJSON_Delete(doc);
mg_http_send_error(nc, 404, NULL);
nc->flags |= MG_F_SEND_AND_CLOSE; nc->flags |= MG_F_SEND_AND_CLOSE;
return; return;
} }
@@ -314,6 +405,7 @@ void file(struct mg_connection *nc, struct http_message *hm, struct mg_str *path
if (idx == NULL) { if (idx == NULL) {
cJSON_Delete(doc); cJSON_Delete(doc);
nc->flags |= MG_F_SEND_AND_CLOSE; nc->flags |= MG_F_SEND_AND_CLOSE;
mg_http_send_error(nc, 404, NULL);
return; return;
} }
@@ -338,6 +430,177 @@ void status(struct mg_connection *nc) {
nc->flags |= MG_F_SEND_AND_CLOSE; nc->flags |= MG_F_SEND_AND_CLOSE;
} }
typedef struct {
char *name;
int delete;
char *relpath;
char *doc_id;
} tag_req_t;
tag_req_t *parse_tag_request(cJSON *json) {
if (!cJSON_IsObject(json)) {
return NULL;
}
cJSON *arg_name = cJSON_GetObjectItem(json, "name");
if (arg_name == NULL || !cJSON_IsString(arg_name)) {
return NULL;
}
cJSON *arg_delete = cJSON_GetObjectItem(json, "delete");
if (arg_delete == NULL || !cJSON_IsBool(arg_delete)) {
return NULL;
}
cJSON *arg_relpath = cJSON_GetObjectItem(json, "relpath");
if (arg_relpath == NULL || !cJSON_IsString(arg_relpath)) {
return NULL;
}
cJSON *arg_doc_id = cJSON_GetObjectItem(json, "doc_id");
if (arg_doc_id == NULL || !cJSON_IsString(arg_doc_id)) {
return NULL;
}
tag_req_t *req = malloc(sizeof(tag_req_t));
req->delete = arg_delete->valueint;
req->name = arg_name->valuestring;
req->relpath = arg_relpath->valuestring;
req->doc_id = arg_doc_id->valuestring;
return req;
}
void tag(struct mg_connection *nc, struct http_message *hm, struct mg_str *path) {
if (path->len != UUID_STR_LEN + 4) {
LOG_DEBUGF("serve.c", "Invalid tag path: %.*s", (int) path->len, path->p)
mg_http_send_error(nc, 404, NULL);
nc->flags |= MG_F_SEND_AND_CLOSE;
return;
}
char arg_index[UUID_STR_LEN];
memcpy(arg_index, hm->uri.p + 5, UUID_STR_LEN);
*(arg_index + UUID_STR_LEN - 1) = '\0';
if (hm->body.len < 2 || hm->method.len != 4 || memcmp(&hm->method, "POST", 4) == 0) {
LOG_DEBUG("serve.c", "Invalid tag request")
mg_http_send_error(nc, 400, NULL);
nc->flags |= MG_F_SEND_AND_CLOSE;
return;
}
store_t *store = get_tag_store(arg_index);
if (store == NULL) {
LOG_DEBUGF("serve.c", "Could not get tag store for index: %s", arg_index)
mg_http_send_error(nc, 404, NULL);
nc->flags |= MG_F_SEND_AND_CLOSE;
return;
}
char *body = malloc(hm->body.len + 1);
memcpy(body, hm->body.p, hm->body.len);
*(body + hm->body.len) = '\0';
cJSON *json = cJSON_Parse(body);
tag_req_t *arg_req = parse_tag_request(json);
if (arg_req == NULL) {
LOG_DEBUGF("serve.c", "Could not parse tag request", arg_index)
cJSON_Delete(json);
free(body);
mg_http_send_error(nc, 400, NULL);
nc->flags |= MG_F_SEND_AND_CLOSE;
return;
}
cJSON *arr = NULL;
size_t data_len = 0;
const char *data = store_read(store, arg_req->relpath, strlen(arg_req->relpath), &data_len);
if (data_len == 0) {
arr = cJSON_CreateArray();
} else {
arr = cJSON_Parse(data);
}
if (arg_req->delete) {
if (data_len > 0) {
cJSON *element = NULL;
int i = 0;
cJSON_ArrayForEach(element, arr) {
if (strcmp(element->valuestring, arg_req->name) == 0) {
cJSON_DeleteItemFromArray(arr, i);
break;
}
i++;
}
}
char buf[8196];
snprintf(buf, sizeof(buf),
"{"
" \"script\" : {"
" \"source\": \"if (ctx._source.tag.contains(params.tag)) { ctx._source.tag.remove(ctx._source.tag.indexOf(params.tag)) }\","
" \"lang\": \"painless\","
" \"params\" : {"
" \"tag\" : \"%s\""
" }"
" }"
"}", arg_req->name
);
char url[4096];
snprintf(url, sizeof(url), "%s/sist2/_update/%s", WebCtx.es_url, arg_req->doc_id);
nc->user_data = web_post_async(url, buf);
} else {
cJSON_AddItemToArray(arr, cJSON_CreateString(arg_req->name));
char buf[8196];
snprintf(buf, sizeof(buf),
"{"
" \"script\" : {"
" \"source\": \"if(ctx._source.tag == null) {ctx._source.tag = new ArrayList()} ctx._source.tag.add(params.tag)\","
" \"lang\": \"painless\","
" \"params\" : {"
" \"tag\" : \"%s\""
" }"
" }"
"}", arg_req->name
);
char url[4096];
snprintf(url, sizeof(url), "%s/sist2/_update/%s", WebCtx.es_url, arg_req->doc_id);
nc->user_data = web_post_async(url, buf);
}
char *json_str = cJSON_PrintUnformatted(arr);
store_write(store, arg_req->relpath, strlen(arg_req->relpath) + 1, json_str, strlen(json_str) + 1);
free(arg_req);
free(json_str);
cJSON_Delete(json);
cJSON_Delete(arr);
free(body);
}
int validate_auth(struct mg_connection *nc, struct http_message *hm) {
char user[256] = {0,};
char pass[256] = {0,};
int ret = mg_get_http_basic_auth(hm, user, sizeof(user), pass, sizeof(pass));
if (ret == -1 || strcmp(user, WebCtx.auth_user) != 0 || strcmp(pass, WebCtx.auth_pass) != 0) {
mg_printf(nc, "HTTP/1.1 401 Unauthorized\r\n"
"WWW-Authenticate: Basic realm=\"sist2\"\r\n"
"Content-Length: 0\r\n\r\n");
nc->flags |= MG_F_SEND_AND_CLOSE;
return FALSE;
}
return TRUE;
}
static void ev_router(struct mg_connection *nc, int ev, void *p) { static void ev_router(struct mg_connection *nc, int ev, void *p) {
struct mg_str scheme; struct mg_str scheme;
struct mg_str user_info; struct mg_str user_info;
@@ -351,21 +614,14 @@ static void ev_router(struct mg_connection *nc, int ev, void *p) {
struct http_message *hm = (struct http_message *) p; struct http_message *hm = (struct http_message *) p;
if (mg_parse_uri(hm->uri, &scheme, &user_info, &host, &port, &path, &query, &fragment) != 0) { if (mg_parse_uri(hm->uri, &scheme, &user_info, &host, &port, &path, &query, &fragment) != 0) {
mg_http_send_error(nc, 400, NULL);
nc->flags |= MG_F_SEND_AND_CLOSE; nc->flags |= MG_F_SEND_AND_CLOSE;
return; return;
} }
if (WebCtx.auth_enabled == TRUE) { if (WebCtx.auth_enabled == TRUE) {
char user[256] = {0,}; if (!validate_auth(nc, hm)) {
char pass[256] = {0,};
int ret = mg_get_http_basic_auth(hm, user, sizeof(user), pass, sizeof(pass));
if (ret == -1 || strcmp(user, WebCtx.auth_user) != 0 || strcmp(pass, WebCtx.auth_pass) != 0) {
mg_printf(nc, "HTTP/1.1 401 Unauthorized\r\n"
"WWW-Authenticate: Basic realm=\"sist2\"\r\n"
"Content-Length: 0\r\n\r\n");
nc->flags |= MG_F_SEND_AND_CLOSE;
return; return;
} }
} }
@@ -374,8 +630,12 @@ static void ev_router(struct mg_connection *nc, int ev, void *p) {
search_index(nc); search_index(nc);
} else if (is_equal(&path, &((struct mg_str) MG_MK_STR("/css")))) { } else if (is_equal(&path, &((struct mg_str) MG_MK_STR("/css")))) {
style(nc, hm); style(nc, hm);
} else if (is_equal(&path, &((struct mg_str) MG_MK_STR("/js")))) { } else if (is_equal(&path, &((struct mg_str) MG_MK_STR("/stats")))) {
javascript(nc); stats(nc);
} else if (is_equal(&path, &((struct mg_str) MG_MK_STR("/jslib")))) {
javascript_lib(nc);
} else if (is_equal(&path, &((struct mg_str) MG_MK_STR("/jssearch")))) {
javascript_search(nc);
} else if (is_equal(&path, &((struct mg_str) MG_MK_STR("/img/sprite-skin-flat.png")))) { } else if (is_equal(&path, &((struct mg_str) MG_MK_STR("/img/sprite-skin-flat.png")))) {
img_sprite_skin_flat(nc, hm); img_sprite_skin_flat(nc, hm);
} else if (is_equal(&path, &((struct mg_str) MG_MK_STR("/es")))) { } else if (is_equal(&path, &((struct mg_str) MG_MK_STR("/es")))) {
@@ -388,9 +648,19 @@ static void ev_router(struct mg_connection *nc, int ev, void *p) {
file(nc, hm, &path); file(nc, hm, &path);
} else if (has_prefix(&path, &((struct mg_str) MG_MK_STR("/t/")))) { } else if (has_prefix(&path, &((struct mg_str) MG_MK_STR("/t/")))) {
thumbnail(nc, hm, &path); thumbnail(nc, hm, &path);
} else if (has_prefix(&path, &((struct mg_str) MG_MK_STR("/s/")))) {
stats_files(nc, hm, &path);
} else if (has_prefix(&path, &((struct mg_str) MG_MK_STR("/tag/")))) {
if (WebCtx.tag_auth_enabled == TRUE) {
if (!validate_auth(nc, hm)) {
return;
}
}
tag(nc, hm, &path);
} else if (has_prefix(&path, &((struct mg_str) MG_MK_STR("/d/")))) { } else if (has_prefix(&path, &((struct mg_str) MG_MK_STR("/d/")))) {
document_info(nc, hm, &path); document_info(nc, hm, &path);
} else { } else {
mg_http_send_error(nc, 404, NULL);
nc->flags |= MG_F_SEND_AND_CLOSE; nc->flags |= MG_F_SEND_AND_CLOSE;
} }
@@ -421,7 +691,7 @@ static void ev_router(struct mg_connection *nc, int ev, void *p) {
free(json_str); free(json_str);
free(tmp); free(tmp);
} }
//todo return error code mg_http_send_error(nc, 500, NULL);
} }
free_response(r); free_response(r);

File diff suppressed because one or more lines are too long