Compare commits

..

19 Commits

Author SHA1 Message Date
ebfd7e03ce User scripts, bug fixes, docker image 2019-11-12 20:58:43 -05:00
6931d320a2 bugfix with invalid/corrupted index path 2019-11-11 20:49:38 -05:00
fc22e52eae Image placeholder 2019-11-09 23:26:49 -05:00
ba81748a74 Update build 2019-11-09 17:15:20 -05:00
e72fa1587b EXIF metadata for images 2019-11-09 15:18:44 -05:00
ea4fb7fa0d Bug fixes 2019-11-09 12:00:07 -05:00
b0a868bb73 remove 'must match' 2019-11-08 21:46:54 -05:00
d761a3b595 update readme 2019-11-08 19:42:36 -05:00
2d7a8a2fdc fuzzy toggle 2019-11-08 16:15:10 -05:00
152d2ddf8a bug fix in deserialize 2019-11-08 09:03:44 -05:00
bc5f22b759 update readme 2019-11-05 18:59:00 -05:00
534b397876 update readme, UI tweak: don't show broken images 2019-11-03 10:39:02 -05:00
7962a994e2 utf8 update + bug fixes 2019-11-03 07:50:31 -05:00
f8f1a27180 video metadata 2019-10-31 11:54:13 -04:00
784c3c9435 Font rendering fixes 2019-10-31 10:15:01 -04:00
f8b081a3f4 UI tweaks, path autocomplete 2019-10-31 08:26:19 -04:00
5661573b06 Dark theme, pdf meta, de-serialize bugfix 2019-10-30 22:20:22 -04:00
130fb78787 Fix some memory leaks 2019-10-27 15:40:48 -04:00
2943ca9365 UI tweak 2019-10-27 14:10:24 -04:00
57 changed files with 2019 additions and 681 deletions

2
.gitignore vendored
View File

@@ -11,7 +11,7 @@ Makefile
LOG
sist2*
index.sist2/
bundle.css
bundle*.css
bundle.js
*.a
vgcore.*

15
.gitmodules vendored
View File

@@ -16,3 +16,18 @@
[submodule "lmdb"]
path = lmdb
url = https://github.com/LMDB/lmdb
[submodule "utf8.h"]
path = utf8.h
url = https://github.com/sheredom/utf8.h
[submodule "lib/openjpeg"]
path = lib/openjpeg
url = https://github.com/uclouvain/openjpeg
[submodule "lib/harfbuzz"]
path = lib/harfbuzz
url = https://github.com/harfbuzz/harfbuzz
[submodule "lib/libmagic"]
path = lib/libmagic
url = https://github.com/threatstack/libmagic
[submodule "lib/bzip2-1.0.6"]
path = lib/bzip2-1.0.6
url = https://github.com/enthought/bzip2-1.0.6

View File

@@ -37,6 +37,9 @@ if (WITH_SIST2)
lmdb/libraries/liblmdb/lmdb.h lmdb/libraries/liblmdb/mdb.c
lmdb/libraries/liblmdb/midl.h lmdb/libraries/liblmdb/midl.c
src/cli.c src/cli.h
# utf8.h
utf8.h/utf8.h
)
endif ()
@@ -67,6 +70,9 @@ if (WITH_SIST2_SCAN)
lmdb/libraries/liblmdb/lmdb.h lmdb/libraries/liblmdb/mdb.c
lmdb/libraries/liblmdb/midl.h lmdb/libraries/liblmdb/midl.c
src/cli.c src/cli.h
# utf8.h
utf8.h/utf8.h
)
endif ()
@@ -116,10 +122,10 @@ if (WITH_SIST2)
target_compile_options(sist2
PRIVATE
-O3
# -march=native
-fno-stack-protector
-fomit-frame-pointer
# -Ofast
# -march=native
# -fno-stack-protector
# -fomit-frame-pointer
)
TARGET_LINK_LIBRARIES(
@@ -150,6 +156,9 @@ if (WITH_SIST2)
m
bz2
magic
harfbuzz
openjp2
freetype
)
endif ()
@@ -187,7 +196,7 @@ if (WITH_SIST2_SCAN)
)
target_compile_options(sist2_scan
PRIVATE
-O3
-Ofast
# -march=native
-fno-stack-protector
-fomit-frame-pointer
@@ -215,6 +224,9 @@ if (WITH_SIST2_SCAN)
pthread
m
${PROJECT_SOURCE_DIR}/lib/libharfbuzz.a
${PROJECT_SOURCE_DIR}/lib/libopenjp2.a
freetype
)
endif ()

9
Docker/Dockerfile Normal file
View File

@@ -0,0 +1,9 @@
FROM ubuntu:19.10
MAINTAINER simon987 <me@simon987.net>
RUN apt update
RUN apt install -y libglib2.0-0 libcurl4 libmagic1 libharfbuzz-bin libopenjp2-7
ADD sist2 /root/sist2
ENTRYPOINT ["/root/sist2"]

8
Docker/build.sh Executable file
View File

@@ -0,0 +1,8 @@
cp ../sist2 .
version=$(./sist2 --version)
echo "Version ${version}"
docker build . -t simon987/sist2:${version} -t simon987/sist2:latest
docker push simon987/sist2:${version}
docker push simon987/sist2:latest

View File

@@ -9,11 +9,12 @@ sist2 (Simple incremental search tool)
## Features
* Fast, low memory usage
* Fast, low memory usage, multi-threaded
* Portable (all its features are packaged in a single executable)
* Extracts text from common file types\*
* Generates thumbnails\*
* Incremental scanning
* Automatic tagging from file attributes via [user scripts](scripting/README.md)
\* See [format support](#format-support)
@@ -21,11 +22,13 @@ sist2 (Simple incremental search tool)
## Getting Started
1. Have an [Elasticsearch](https://www.elastic.co/downloads/elasticsearch) instance running
1. Download the [latest sist2 release](https://github.com/simon987/sist2/releases)
1.
1. Download the [latest sist2 release](https://github.com/simon987/sist2/releases) *
1. *(or)* `docker pull simon987/sist2:latest`
*Windows users*: `sist2` runs under [WSL](https://en.wikipedia.org/wiki/Windows_Subsystem_for_Linux)
*Mac users*: See [#1](https://github.com/simon987/sist2/issues/1)
\* *Windows users*: **sist2** runs under [WSL](https://en.wikipedia.org/wiki/Windows_Subsystem_for_Linux)
\* *Mac users*: See [#1](https://github.com/simon987/sist2/issues/1)
## Example usage
@@ -52,14 +55,40 @@ sist2 index --print ./my_idx > raw_documents.ndjson
sist2 web --bind 0.0.0.0 --port 4321 ./my_idx1 ./my_idx2 ./my_idx3
```
### Use sist2 with docker
**scan**
```bash
docker run -it \
-v /path/to/files/:/files \
-v $PWD/out/:/out \
simon987/sist2 scan -t 4 /files -o /out/my_idx1
```
**index**
```bash
docker run -it --network host\
-v $PWD/out/:/out \
simon987/sist2 index /out/my_idx1
```
**web**
```bash
docker run --rm --network host -d --name sist2\
-v $PWD/out/my_idx:/idx \
-v $PWD/my/files:/files
simon987/sist2 web --bind 0.0.0.0 /idx
docker stop sist2
```
## Format support
File type | Library | Content | Thumbnail | Metadata
:---|:---|:---|:---|:---
pdf,xps,cbz,cbr,fb2,epub | MuPDF | yes | yes, `png` | *planned* |
`audio/*` | libav | - | yes, `jpeg` | ID3 tags |
`video/*` | libav | - | yes, `jpeg` | *planned* |
`image/*` | libav | - | yes, `jpeg` | *planned* |
pdf,xps,cbz,fb2,epub | MuPDF | yes | yes, `png` | title |
`audio/*` | ffmpeg | - | yes, `jpeg` | ID3 tags |
`video/*` | ffmpeg | - | yes, `jpeg` | title, comment, artist |
`image/*` | ffmpeg | - | yes, `jpeg` | `EXIF:Artist`, `EXIF:ImageDescription` |
ttf,ttc,cff,woff,fnt,otf | Freetype2 | - | yes, `bmp` | Name & style |
`text/plain` | *(none)* | yes | no | - |
docx, xlsx, pptx | | *planned* | no | *planned* |
@@ -79,13 +108,14 @@ binaries.
apt install git cmake pkg-config libglib2.0-dev\
libssl-dev uuid-dev libavformat-dev libswscale-dev \
python3 libmagic-dev libfreetype6-dev libcurl-dev \
libbz2-dev yasm
libbz2-dev yasm libharfbuzz-dev ragel
```
*(FreeBSD)*
```bash
pkg install cmake gcc yasm gmake bash ffmpeg e2fsprogs-uuid
pkg install cmake gcc yasm gmake bash ffmpeg e2fsprogs-uuid\
autotools ragel
```
__
2. Build
```bash
git clone --recurse-submodules https://github.com/simon987/sist2

2
cJSON

Submodule cJSON updated: 2de7d04aaf...533ff8a783

1
lib/bzip2-1.0.6 Submodule

Submodule lib/bzip2-1.0.6 added at 288acf97a1

1
lib/harfbuzz Submodule

Submodule lib/harfbuzz added at 878e3588a3

1
lib/libmagic Submodule

Submodule lib/libmagic added at 1249b5cd02

1
lib/openjpeg Submodule

Submodule lib/openjpeg added at 5875a6b446

View File

@@ -91,7 +91,7 @@ application/x-esrehber, es
application/x-excel, xla|xld|xlk|xlt|xlv
application/x-executable, exe
application/x-font-sfn,
application/x-font-ttf, ttf
application/x-font-ttf, ttf|ttc
application/x-freelance, pre
application/x-git,
application/x-gsp, gsp
@@ -254,6 +254,7 @@ text/mcf, mcf
text/pascal, pas
text/plain, com|cmd|conf|def|g|idc|list|lst|mar|sdml|text|txt|md|groovy|license|properties|desktop|ini|rst|cmake|ipynb|readme|less|lo|go|yml|d|cs|hpp|srt
text/richtext, rt|rtf|rtx
text/rtf,
text/scriplet, wsc
text/x-awk, awk
!video/x-jng, jng
@@ -263,7 +264,7 @@ image/x-xwindowdump, xwd
!image/vnd.adobe.photoshop, psd
text/tab-separated-values, tsv
text/troff, man|me|ms|roff|t|tr
text/uri-list, uni|unis|uri|uris
text/uri-list, uji|unis|uri|uris
text/vnd.abc, abc
text/vnd.fmi.flexstor, flx
text/vnd.wap.wmlscript, wmls
@@ -359,3 +360,54 @@ image/x-tga,
application/x-wine-extension-ini,
application/x-cbz, cbz
application/x-cbr, cbr
application/x-ms-compress-szdd, fon
application/x-atari-7800-rom, a78
application/x-nes-rom, nes
application/x-font-pfm, pfm
application/x-gettext-translation,
image/wmf,
application/pgp-keys,
image/x-3ds, 3ds
application/x-lz4, lz4
application/vnd.openxmlformats-officedocument.presentationml.presentation, pptx
application/vnd.oasis.opendocument.presentation, odp
application/x-msaccess, accdb
application/vnd.oasis.opendocument.spreadsheet, ods
audio/x-aiff, aiff|aif
text/x-ms-regedit, reg
application/x-gamecube-rom,
application/x-nintendo-ds-rom,
text/x-objective-c,
application/x-font-gdos,
application/x-apple-diskimage,
application/x-zstd, zst
video/x-m4v, m4v
message/news,
application/vnd.symbian.install,
application/x-lzh-compressed,
application/x-dosdriver,
application/vnd.tcpdump.pcap, pcap
x-epoc/x-sisx-app,
application/x-avira-qua,
video/MP2T,
application/x-snappy-framed,
application/x-lz4+json, jsonlz4
application/x-dmp, dmp
application/zlib, z
application/x-pgp-keyring,
application/x-gdbm,
application/x-font-pf2, pf2
application/x-zip,
application/x-coredump,
application/x-java-jmod, jmod
application/x-terminfo,
application/x-terminfo2,
application/x-arc,
application/vnd.lotus-1-2-3,
image/x-win-bitmap,
application/x-maxis-dbpf,
text/PGP,
audio/x-hx-aac-adts,
application/x-chrome-extension,
image/heic, heic
image/x-gem,
1 application/arj arj
91 application/x-excel xla|xld|xlk|xlt|xlv
92 application/x-executable exe
93 application/x-font-sfn
94 application/x-font-ttf ttf ttf|ttc
95 application/x-freelance pre
96 application/x-git
97 application/x-gsp gsp
254 text/pascal pas
255 text/plain com|cmd|conf|def|g|idc|list|lst|mar|sdml|text|txt|md|groovy|license|properties|desktop|ini|rst|cmake|ipynb|readme|less|lo|go|yml|d|cs|hpp|srt
256 text/richtext rt|rtf|rtx
257 text/rtf
258 text/scriplet wsc
259 text/x-awk awk
260 !video/x-jng jng
264 !image/vnd.adobe.photoshop psd
265 text/tab-separated-values tsv
266 text/troff man|me|ms|roff|t|tr
267 text/uri-list uni|unis|uri|uris uji|unis|uri|uris
268 text/vnd.abc abc
269 text/vnd.fmi.flexstor flx
270 text/vnd.wap.wmlscript wmls
360 application/x-wine-extension-ini
361 application/x-cbz cbz
362 application/x-cbr cbr
363 application/x-ms-compress-szdd fon
364 application/x-atari-7800-rom a78
365 application/x-nes-rom nes
366 application/x-font-pfm pfm
367 application/x-gettext-translation
368 image/wmf
369 application/pgp-keys
370 image/x-3ds 3ds
371 application/x-lz4 lz4
372 application/vnd.openxmlformats-officedocument.presentationml.presentation pptx
373 application/vnd.oasis.opendocument.presentation odp
374 application/x-msaccess accdb
375 application/vnd.oasis.opendocument.spreadsheet ods
376 audio/x-aiff aiff|aif
377 text/x-ms-regedit reg
378 application/x-gamecube-rom
379 application/x-nintendo-ds-rom
380 text/x-objective-c
381 application/x-font-gdos
382 application/x-apple-diskimage
383 application/x-zstd zst
384 video/x-m4v m4v
385 message/news
386 application/vnd.symbian.install
387 application/x-lzh-compressed
388 application/x-dosdriver
389 application/vnd.tcpdump.pcap pcap
390 x-epoc/x-sisx-app
391 application/x-avira-qua
392 video/MP2T
393 application/x-snappy-framed
394 application/x-lz4+json jsonlz4
395 application/x-dmp dmp
396 application/zlib z
397 application/x-pgp-keyring
398 application/x-gdbm
399 application/x-font-pf2 pf2
400 application/x-zip
401 application/x-coredump
402 application/x-java-jmod jmod
403 application/x-terminfo
404 application/x-terminfo2
405 application/x-arc
406 application/vnd.lotus-1-2-3
407 image/x-win-bitmap
408 application/x-maxis-dbpf
409 text/PGP
410 audio/x-hx-aac-adts
411 application/x-chrome-extension
412 image/heic heic
413 image/x-gem

View File

@@ -80,6 +80,9 @@
"analyzer": "my_nGram"
}
}
},
"tag": {
"type": "keyword"
}
}
}

117
scripting/README.md Normal file
View File

@@ -0,0 +1,117 @@
## User scripts
*This document is under construction, more in-depth guide coming soon*
During the `index` step, you can use the `--script-file <script>` option to
modify documents or add user tags. This option is mainly used to
implement automatic tagging based on file attributes.
The scripting language used
([Painless Scripting Language](https://www.elastic.co/guide/en/elasticsearch/painless/7.4/index.html))
is very similar to Java, but you should be able to create user scripts
without programming experience at all if you're somewhat familiar with
regex.
This is the base structure of the documents we're working with:
```json
{
"_id": "e171405c-fdb5-4feb-bb32-82637bc32084",
"_index": "sist2",
"_type": "_doc",
"_source": {
"index": "206b3050-e821-421a-891d-12fcf6c2db0d",
"mime": "application/json",
"size": 1799,
"mtime": 1545443685,
"extension": "md",
"name": "README",
"path": "sist2/scripting",
"content": "..."
}
}
```
**Example script**
This script checks if the `genre` attribute exists, if it does
it adds the `genre.<genre>` tag.
```Java
ArrayList tags = ctx._source.tag = new ArrayList();
if (ctx._source?.genre != null) {
tags.add("genre." + ctx._source.genre.toLowerCase())
}
```
You can use `.` to create a hierarchical tag tree:
![scripting/genre_example](genre_example.png)
To use regular expressions, you need to add this line in `/etc/elasticsearch/elasticsearch.yml`
```yaml
script.painless.regex.enabled: true
```
Or, if you're using docker add `-e "script.painless.regex.enabled=true"`
### Examples
If `(20XX)` is in the file name, add the `year.<year>` tag:
```Java
ArrayList tags = ctx._source.tag = new ArrayList();
Matcher m = /[\(\.+](20[0-9]{2})[\)\.+]/.matcher(ctx._source.name);
if (m.find()) {
tags.add("year." + m.group(1))
}
```
Use default *Calibre* folder structure to infer author.
```Java
ArrayList tags = ctx._source.tag = new ArrayList();
// We expect the book path to look like this:
// /path/to/Calibre Library/Author/Title/Title - Author.pdf
if (ctx._source.name.contains("-") && ctx._source.extension == "pdf") {
String[] names = ctx._source.name.splitOnToken('-');
tags.add("author." + names[1].strip());
}
```
If the file matches a specific pattern `AAAA-000 fName1 lName1, <fName2 lName2>...`, add the `actress.<actress>` and
`studio.<studio>` tag:
```Java
ArrayList tags = ctx._source.tag = new ArrayList();
Matcher m = /([A-Z]{4})-[0-9]{3} (.*)/.matcher(ctx._source.name);
if (m.find()) {
tags.add("studio." + m.group(1));
// Take the matched group (.*), and add a tag for
// each name, separated by comma
for (String name : m.group(2).splitOnToken(',')) {
tags.add("actress." + name);
}
}
```
Set the name of the last folder (`/path/to/<studio>/file.mp4`) to `studio.<studio>` tag
```Java
ArrayList tags = ctx._source.tag = new ArrayList();
if (ctx._source.path != "") {
String[] names = ctx._source.path.splitOnToken('/');
tags.add("studio." + names[names.length-1]);
}
```
Set the name of the last folder (`/path/to/<studio>/file.mp4`) to `studio.<studio>` tag
```Java
ArrayList tags = ctx._source.tag = new ArrayList();
if (ctx._source.path != "") {
String[] names = ctx._source.path.splitOnToken('/');
tags.add("studio." + names[names.length-1]);
}
```

BIN
scripting/genre_example.png Normal file

Binary file not shown.

After

Width:  |  Height:  |  Size: 26 KiB

View File

@@ -6,9 +6,11 @@ rm web/js/bundle.js 2> /dev/null
cat `ls web/js/*.min.js` > web/js/bundle.js
cat web/js/{util,dom,search}.js >> web/js/bundle.js
rm web/css/bundle.css 2> /dev/null
rm web/css/bundle*.css 2> /dev/null
cat web/css/*.min.css > web/css/bundle.css
cat web/css/main.css >> web/css/bundle.css
cat web/css/light.css >> web/css/bundle.css
cat web/css/*.min.css > web/css/bundle_dark.css
cat web/css/dark.css >> web/css/bundle_dark.css
python3 scripts/mime.py > src/parsing/mime_generated.c
python3 scripts/serve_static.py > src/web/static_generated.c

View File

@@ -2,12 +2,28 @@
cd lib
cd mupdf
HAVE_X11=no HAVE_GLUT=no make -j 4
USE_SYSTEM_HARFBUZZ=yes USE_SYSTEM_OPENJPEG=yes HAVE_X11=no HAVE_GLUT=no make -j 4
cd ..
mv mupdf/build/release/libmupdf.a .
mv mupdf/build/release/libmupdf-third.a .
# openjp2
cd openjpeg
#cmake . -DCMAKE_BUILD_TYPE=Release -DCMAKE_C_FLAGS="-O3 -march=native -DNDEBUG"
cmake . -DCMAKE_BUILD_TYPE=Release -DCMAKE_C_FLAGS="-O3"
make -j 4
cd ..
mv openjpeg/bin/libopenjp2.a .
# harfbuzz
cd harfbuzz
./autogen.sh
./configure --disable-shared --enable-static
make -j 4
cd ..
mv harfbuzz/src/.libs/libharfbuzz.a .
# ffmpeg
cd ffmpeg
./configure --disable-shared --enable-static --disable-ffmpeg --disable-ffplay \
@@ -38,14 +54,12 @@ cd ../..
mv onion/build/src/onion/libonion_static.a .
#bzip2
git clone https://github.com/enthought/bzip2-1.0.6
cd bzip2-1.0.6
make -j 4
cd ..
mv bzip2-1.0.6/libbz2.a .
# magic
git clone https://github.com/threatstack/libmagic
cd libmagic
./autogen.sh
./configure --enable-static --disable-shared

View File

@@ -9,6 +9,22 @@ cd ..
mv mupdf/build/release/libmupdf.a .
mv mupdf/build/release/libmupdf-third.a .
# openjp2
cd openjpeg
#cmake . -DCMAKE_BUILD_TYPE=Release -DCMAKE_C_FLAGS="-O3 -march=native -DNDEBUG"
cmake . -DCMAKE_BUILD_TYPE=Release -DCMAKE_C_FLAGS="-O3"
gmake -j 4
cd ..
mv openjpeg/bin/libopenjp2.a .
# harfbuzz
cd harfbuzz
./autogen.sh
./configure --disable-shared --enable-static
gmake -j 4
cd ..
mv harfbuzz/src/.libs/libharfbuzz.a .
# ffmpeg
cd ffmpeg
./configure --disable-shared --enable-static --disable-ffmpeg --disable-ffplay \
@@ -26,14 +42,12 @@ mv ffmpeg/libswresample/libswresample.a .
mv ffmpeg/libswscale/libswscale.a .
#bzip2
git clone https://github.com/enthought/bzip2-1.0.6
cd bzip2-1.0.6
make -j 4
cd ..
mv bzip2-1.0.6/libbz2.a .
# magic
git clone https://github.com/threatstack/libmagic
cd libmagic
./autogen.sh
./configure --enable-static --disable-shared

View File

@@ -12,7 +12,8 @@ major_mime = {
"audio": 7,
"image": 8,
"text": 9,
"application": 10
"application": 10,
"x-epoc": 11,
}
pdf = (
@@ -24,6 +25,7 @@ pdf = (
font = (
"application/vnd.ms-opentype",
"application/x-ms-compress-szdd"
"application/x-font-sfn",
"application/x-font-ttf",
"font/otf",

View File

@@ -1,8 +1,9 @@
files = [
"web/css/bundle.css",
"web/css/bundle_dark.css",
"web/js/bundle.js",
"web/img/bg-bars.png",
"web/img/sprite-skin-flat.png",
"web/img/sprite-skin-flat-dark.png",
"web/search.html",
]

View File

@@ -2,8 +2,8 @@
#define DEFAULT_OUTPUT "index.sist2/"
#define DEFAULT_CONTENT_SIZE 4096
#define DEFAULT_QUALITY 15
#define DEFAULT_SIZE 200
#define DEFAULT_QUALITY 5
#define DEFAULT_SIZE 500
#define DEFAULT_REWRITE_URL ""
#define DEFAULT_ES_URL "http://localhost:9200"
@@ -25,7 +25,7 @@ int scan_args_validate(scan_args_t *args, int argc, const char **argv) {
char *abs_path = abspath(argv[1]);
if (abs_path == NULL) {
fprintf(stderr, "File not found: %s", argv[1]);
fprintf(stderr, "File not found: %s\n", argv[1]);
return 1;
} else {
args->path = abs_path;
@@ -34,7 +34,7 @@ int scan_args_validate(scan_args_t *args, int argc, const char **argv) {
if (args->incremental != NULL) {
abs_path = abspath(args->incremental);
if (abs_path == NULL) {
fprintf(stderr, "File not found: %s", args->incremental);
fprintf(stderr, "File not found: %s\n", args->incremental);
return 1;
}
}
@@ -100,7 +100,7 @@ int index_args_validate(index_args_t *args, int argc, const char **argv) {
char *index_path = abspath(argv[1]);
if (index_path == NULL) {
fprintf(stderr, "File not found: %s", argv[1]);
fprintf(stderr, "File not found: %s\n", argv[1]);
return 1;
} else {
args->index_path = argv[1];
@@ -109,6 +109,27 @@ int index_args_validate(index_args_t *args, int argc, const char **argv) {
if (args->es_url == NULL) {
args->es_url = DEFAULT_ES_URL;
}
if (args->script_path != NULL) {
struct stat info;
int res = stat(args->script_path, &info);
if (res == -1) {
fprintf(stderr, "Error opening script file '%s': %s\n", args->script_path, strerror(errno));
return 1;
}
int fd = open(args->script_path, O_RDONLY);
if (fd == -1) {
fprintf(stderr, "Error opening script file '%s': %s\n", args->script_path, strerror(errno));
return 1;
}
args->script = malloc(info.st_size + 1);
read(fd, args->script, info.st_size);
*(args->script + info.st_size) = '\0';
close(fd);
}
return 0;
}
@@ -137,7 +158,7 @@ int web_args_validate(web_args_t *args, int argc, const char **argv) {
for (int i = 0; i < args->index_count; i++) {
char *abs_path = abspath(args->indices[i]);
if (abs_path == NULL) {
fprintf(stderr, "File not found: %s", abs_path);
fprintf(stderr, "File not found: %s\n", abs_path);
return 1;
}
}

View File

@@ -22,6 +22,8 @@ int scan_args_validate(scan_args_t *args, int argc, const char **argv);
typedef struct index_args {
char *es_url;
const char *index_path;
const char *script_path;
char *script;
int print;
int force_reset;
} index_args_t;

View File

@@ -6,7 +6,6 @@
#include <stdio.h>
#include <string.h>
#include <cJSON/cJSON.h>
#include <src/ctx.h>
#include "static_generated.c"
@@ -54,6 +53,40 @@ void index_json(cJSON *document, const char uuid_str[UUID_STR_LEN]) {
elastic_index_line(bulk_line);
}
void execute_update_script(const char *script, const char index_id[UUID_STR_LEN]) {
cJSON *body = cJSON_CreateObject();
cJSON *script_obj = cJSON_AddObjectToObject(body, "script");
cJSON_AddStringToObject(script_obj, "lang", "painless");
cJSON_AddStringToObject(script_obj, "source", script);
cJSON *query = cJSON_AddObjectToObject(body, "query");
cJSON *term_obj = cJSON_AddObjectToObject(query, "term");
cJSON_AddStringToObject(term_obj, "index", index_id);
char * str = cJSON_Print(body);
char bulk_url[4096];
snprintf(bulk_url, 4096, "%s/sist2/_update_by_query?pretty", Indexer->es_url);
response_t *r = web_post(bulk_url, str, "Content-Type: application/json");
printf("Executed user script <%d>\n", r->status_code);
cJSON *resp = cJSON_Parse(r->body);
cJSON_free(str);
cJSON_Delete(body);
free_response(r);
cJSON *error = cJSON_GetObjectItem(resp, "error");
if (error != NULL) {
char *error_str = cJSON_Print(error);
fprintf(stderr, "User script error: \n%s\n", error_str);
cJSON_free(error_str);
}
cJSON_Delete(resp);
}
void elastic_flush() {
if (Indexer == NULL) {
@@ -102,12 +135,20 @@ void elastic_flush() {
cJSON *ret_json = cJSON_Parse(r->body);
if (cJSON_GetObjectItem(ret_json, "errors")->valueint != 0) {
fprintf(stderr, "%s\n", r->body);
cJSON *err;
cJSON_ArrayForEach(err, cJSON_GetObjectItem(ret_json, "items")) {
if (cJSON_GetObjectItem(cJSON_GetObjectItem(err, "index"), "status")->valueint != 201) {
char* str = cJSON_Print(err);
fprintf(stderr, "%s\n", str);
cJSON_free(str);
}
}
}
cJSON_Delete(ret_json);
free_response(r);
free(buf);
}
void elastic_index_line(es_bulk_line_t *line) {
@@ -133,8 +174,7 @@ void elastic_index_line(es_bulk_line_t *line) {
es_indexer_t *create_indexer(const char *url) {
size_t url_len = strlen(url);
char *es_url = malloc(url_len);
char *es_url = malloc(strlen(url) + 1);
strcpy(es_url, url);
es_indexer_t *indexer = malloc(sizeof(es_indexer_t));
@@ -147,7 +187,7 @@ es_indexer_t *create_indexer(const char *url) {
return indexer;
}
void destroy_indexer() {
void destroy_indexer(char * script, char index_id[UUID_STR_LEN]) {
char url[4096];
@@ -156,6 +196,15 @@ void destroy_indexer() {
printf("Refresh index <%d>\n", r->status_code);
free_response(r);
if (script != NULL) {
execute_update_script(script, index_id);
}
snprintf(url, sizeof(url), "%s/sist2/_refresh", IndexCtx.es_url);
r = web_post(url, "", NULL);
printf("Refresh index <%d>\n", r->status_code);
free_response(r);
snprintf(url, sizeof(url), "%s/sist2/_forcemerge", IndexCtx.es_url);
r = web_post(url, "", NULL);
printf("Merge index <%d>\n", r->status_code);
@@ -213,8 +262,10 @@ cJSON *elastic_get_document(const char *uuid_str) {
snprintf(url, 4096, "%s/sist2/_doc/%s", WebCtx.es_url, uuid_str);
response_t *r = web_get(url);
cJSON *json = NULL;
if (r->status_code == 200) {
return cJSON_Parse(r->body);
json = cJSON_Parse(r->body);
}
return NULL;
free_response(r);
return json;
}

View File

@@ -24,7 +24,7 @@ void index_json(cJSON *document, const char uuid_str[UUID_STR_LEN]);
es_indexer_t *create_indexer(const char* es_url);
void destroy_indexer();
void destroy_indexer(char *script, char index_id[UUID_STR_LEN]);
void elastic_init(int force_reset);

File diff suppressed because one or more lines are too long

View File

@@ -49,18 +49,19 @@ response_t *web_post(const char *url, const char *data, const char *header) {
curl_easy_setopt(curl, CURLOPT_POST, 1);
curl_easy_setopt(curl, CURLOPT_USERAGENT, "sist2");
struct curl_slist *headers = NULL;
if (header != NULL) {
struct curl_slist *headers = NULL;
headers = curl_slist_append(headers, header);
curl_easy_setopt(curl, CURLOPT_HTTPHEADER, headers);
}
curl_easy_setopt(curl, CURLOPT_POSTFIELDS, data);
int r1 = curl_easy_perform(curl);
curl_easy_perform(curl);
curl_easy_getinfo(curl, CURLINFO_RESPONSE_CODE, &resp->status_code);
curl_easy_cleanup(curl);
curl_slist_free_all(headers);
resp->body = buffer.buf;
resp->size = buffer.cur;

View File

@@ -54,6 +54,12 @@ index_descriptor_t read_index_descriptor(char *path) {
struct stat info;
stat(path, &info);
int fd = open(path, O_RDONLY);
if (fd == -1) {
fprintf(stderr, "Invalid/corrupt index (Could not find descriptor)\n");
exit(1);
}
char *buf = malloc(info.st_size + 1);
read(fd, buf, info.st_size);
*(buf + info.st_size) = '\0';
@@ -66,7 +72,7 @@ index_descriptor_t read_index_descriptor(char *path) {
strcpy(descriptor.root, cJSON_GetObjectItem(json, "root")->valuestring);
strcpy(descriptor.name, cJSON_GetObjectItem(json, "name")->valuestring);
strcpy(descriptor.rewrite_url, cJSON_GetObjectItem(json, "rewrite_url")->valuestring);
descriptor.root_len = (short)strlen(descriptor.root);
descriptor.root_len = (short) strlen(descriptor.root);
strcpy(descriptor.version, cJSON_GetObjectItem(json, "version")->valuestring);
strcpy(descriptor.uuid, cJSON_GetObjectItem(json, "uuid")->valuestring);
@@ -181,7 +187,7 @@ void read_index(const char *path, const char index_id[UUID_STR_LEN], index_func
uuid_unparse(line.uuid, uuid_str);
cJSON_AddStringToObject(document, "mime", mime_get_mime_text(line.mime));
cJSON_AddNumberToObject(document, "size", (double)line.size);
cJSON_AddNumberToObject(document, "size", (double) line.size);
cJSON_AddNumberToObject(document, "mtime", line.mtime);
int c;
@@ -197,21 +203,30 @@ void read_index(const char *path, const char index_id[UUID_STR_LEN], index_func
*(buf.buf + line.ext) = '\0';
}
cJSON_AddStringToObject(document, "name", buf.buf + line.base);
*(buf.buf + line.base - 1) = '\0';
cJSON_AddStringToObject(document, "path", buf.buf);
if (line.base > 0) {
*(buf.buf + line.base - 1) = '\0';
cJSON_AddStringToObject(document, "path", buf.buf);
} else {
cJSON_AddStringToObject(document, "path", "");
}
enum metakey key = getc(file);
while (key != '\n') {
switch (key) {
case MetaWidth:
case MetaHeight:
case MetaMediaDuration:
case MetaMediaBitrate: {
case MetaHeight: {
int value;
fread(&value, sizeof(int), 1, file);
cJSON_AddNumberToObject(document, get_meta_key_text(key), value);
break;
}
case MetaMediaDuration:
case MetaMediaBitrate: {
long value;
fread(&value, sizeof(long), 1, file);
cJSON_AddNumberToObject(document, get_meta_key_text(key), value);
break;
}
case MetaMediaAudioCodec:
case MetaMediaVideoCodec: {
int value;
@@ -232,7 +247,7 @@ void read_index(const char *path, const char index_id[UUID_STR_LEN], index_func
case MetaTitle: {
buf.cur = 0;
while ((c = getc(file)) != 0) {
if (!(SHOULD_IGNORE_CHAR(c)) || c == ' ') {
if (SHOULD_KEEP_CHAR(c) || c == ' ') {
dyn_buffer_write_char(&buf, (char) c);
}
}
@@ -240,14 +255,18 @@ void read_index(const char *path, const char index_id[UUID_STR_LEN], index_func
cJSON_AddStringToObject(document, get_meta_key_text(key), buf.buf);
break;
}
default:
fprintf(stderr, "Invalid meta key (corrupt index): %x\n", key);
break;
}
key = getc(file);
}
func(document, uuid_str);
cJSON_free(document);
cJSON_Delete(document);
}
dyn_buffer_destroy(&buf);
fclose(file);
}
@@ -291,6 +310,7 @@ void incremental_copy(store_t *store, store_t *dst_store, const char *filepath,
size_t buf_len;
char *buf = store_read(store, (char *) line.uuid, 16, &buf_len);
store_write(dst_store, (char *) line.uuid, 16, buf, buf_len);
free(buf);
char c;
while ((c = (char) getc(file))) {

View File

@@ -15,7 +15,7 @@ store_t *store_create(char *path) {
);
if (open_ret != 0) {
fprintf(stderr, "Error while opening store: %s", mdb_strerror(open_ret));
fprintf(stderr, "Error while opening store: %s (%s)\n", mdb_strerror(open_ret), path);
exit(1);
}
@@ -64,7 +64,7 @@ void store_write(store_t *store, char *key, size_t key_len, char *buf, size_t bu
// Cannot resize when there is a opened transaction.
// Resize take effect on the next commit.
pthread_rwlock_wrlock(&store->lock);
store->size += 1024 * 1024 * 5;
store->size += 1024 * 1024 * 50;
mdb_env_set_mapsize(store->env, store->size);
mdb_txn_begin(store->env, NULL, 0, &txn);
put_ret = mdb_put(txn, store->dbi, &mdb_key, &mdb_value, 0);

View File

@@ -3,14 +3,13 @@
parse_job_t *create_parse_job(const char *filepath, const struct stat *info, int base) {
int len = (int) strlen(filepath);
parse_job_t *job = malloc(sizeof(parse_job_t) + len);
memcpy(&(job->filepath), filepath, len + 1);
strcpy(job->filepath, filepath);
job->base = base;
char *p = strrchr(filepath + base, '.');
if (p != NULL) {
job->ext = (int)(p - filepath + 1);
job->ext = (int) (p - filepath + 1);
} else {
job->ext = len;
}

View File

@@ -10,7 +10,7 @@
#define EPILOG "Made by simon987 <me@simon987.net>. Released under GPL-3.0"
static const char *const Version = "1.0.9";
static const char *const Version = "1.1.5";
static const char *const usage[] = {
"sist2 scan [OPTION]... PATH",
"sist2 index [OPTION]... INDEX",
@@ -52,11 +52,10 @@ void sist2_scan(scan_args_t *args) {
ScanCtx.tn_qscale = args->quality;
ScanCtx.tn_size = args->size;
ScanCtx.content_size = args->content_size;
ScanCtx.pool = tpool_create(args->threads, serializer_cleanup);
ScanCtx.threads = args->threads;
strncpy(ScanCtx.index.path, args->output, sizeof(ScanCtx.index.path));
strncpy(ScanCtx.index.desc.name, args->name, sizeof(ScanCtx.index.desc.name));
strcpy(ScanCtx.index.desc.root, args->path);
strncpy(ScanCtx.index.desc.root, args->path, sizeof(ScanCtx.index.desc.root));
ScanCtx.index.desc.root_len = (short) strlen(ScanCtx.index.desc.root);
init_dir(ScanCtx.index.path);
@@ -93,6 +92,8 @@ void sist2_scan(scan_args_t *args) {
printf("Loaded %d items in to mtime table.", g_hash_table_size(ScanCtx.original_table));
}
ScanCtx.pool = tpool_create(args->threads, serializer_cleanup);
tpool_start(ScanCtx.pool);
walk_directory_tree(ScanCtx.index.desc.root);
tpool_wait(ScanCtx.pool);
tpool_destroy(ScanCtx.pool);
@@ -162,10 +163,11 @@ void sist2_index(index_args_t *args) {
read_index(file_path, desc.uuid, f);
}
}
closedir(dir);
if (!args->print) {
elastic_flush();
destroy_indexer();
destroy_indexer(args->script, desc.uuid);
}
}
@@ -207,16 +209,20 @@ int main(int argc, const char *argv[]) {
web_args_t *web_args = web_args_create();
#endif
int arg_version = 0;
char * common_es_url = NULL;
struct argparse_option options[] = {
OPT_HELP(),
OPT_BOOLEAN('v', "version", &arg_version, "Show version and exit"),
OPT_GROUP("Scan options"),
OPT_INTEGER('t', "threads", &scan_args->threads, "Number of threads. DEFAULT=1"),
OPT_FLOAT('q', "quality", &scan_args->quality,
"Thumbnail quality, on a scale of 1.0 to 31.0, 1.0 being the best. DEFAULT=15"),
OPT_INTEGER(0, "size", &scan_args->size, "Thumbnail size, in pixels. DEFAULT=200"),
"Thumbnail quality, on a scale of 1.0 to 31.0, 1.0 being the best. DEFAULT=5"),
OPT_INTEGER(0, "size", &scan_args->size, "Thumbnail size, in pixels. DEFAULT=500"),
OPT_INTEGER(0, "content-size", &scan_args->content_size,
"Number of bytes to be extracted from text documents. DEFAULT=4096"),
OPT_STRING(0, "incremental", &scan_args->incremental,
@@ -229,6 +235,7 @@ int main(int argc, const char *argv[]) {
OPT_GROUP("Index options"),
OPT_STRING(0, "es-url", &common_es_url, "Elasticsearch url. DEFAULT=http://localhost:9200"),
OPT_BOOLEAN('p', "print", &index_args->print, "Just print JSON documents to stdout."),
OPT_STRING(0, "script-file", &index_args->script_path, "Path to user script."),
OPT_BOOLEAN('f', "force-reset", &index_args->force_reset, "Reset Elasticsearch mappings and settings. "
"(You must use this option the first time you use the index command)"),
@@ -246,6 +253,11 @@ int main(int argc, const char *argv[]) {
argparse_describe(&argparse, DESCRIPTION, EPILOG);
argc = argparse_parse(&argparse, argc, argv);
if (arg_version) {
printf(Version);
exit(0);
}
#ifndef SIST_SCAN_ONLY
web_args->es_url = common_es_url;
index_args->es_url = common_es_url;

View File

@@ -15,12 +15,12 @@ typedef struct text_dimensions {
} text_dimensions_t;
typedef struct glyph {
unsigned int top;
unsigned int height;
unsigned int width;
unsigned int descent;
unsigned int ascent;
unsigned int advance_width;
int top;
int height;
int width;
int descent;
int ascent;
int advance_width;
unsigned char *pixmap;
} glyph_t;
@@ -39,10 +39,10 @@ glyph_t ft_glyph_to_glyph(FT_GlyphSlot slot) {
glyph.pixmap = slot->bitmap.buffer;
glyph.width = slot->bitmap.width;
glyph.height = slot->bitmap.rows;
glyph.width = (int) slot->bitmap.width;
glyph.height = (int) slot->bitmap.rows;
glyph.top = slot->bitmap_top;
glyph.advance_width = slot->advance.x / 64;
glyph.advance_width = (int) slot->advance.x / 64;
glyph.descent = MAX(0, glyph.height - glyph.top);
glyph.ascent = MAX(0, MAX(glyph.top, glyph.height) - glyph.descent);
@@ -50,10 +50,6 @@ glyph_t ft_glyph_to_glyph(FT_GlyphSlot slot) {
return glyph;
}
__always_inline
glyph_t get_glyph(char character, FT_Face face) {
}
text_dimensions_t text_dimension(char *text, FT_Face face) {
text_dimensions_t dimensions;
@@ -62,7 +58,7 @@ text_dimensions_t text_dimension(char *text, FT_Face face) {
int num_chars = (int) strlen(text);
unsigned int max_ascent = 0;
unsigned int max_descent = 0;
int max_descent = 0;
char pc = 0;
for (int i = 0; i < num_chars; i++) {
@@ -72,7 +68,7 @@ text_dimensions_t text_dimension(char *text, FT_Face face) {
glyph_t glyph = ft_glyph_to_glyph(face->glyph);
max_descent = MAX(max_descent, glyph.descent);
max_ascent = MAX(max_ascent, glyph.ascent);
max_ascent = MAX(max_ascent, MAX(glyph.height, glyph.ascent));
int kerning_x = kerning_offset(c, pc, face);
dimensions.width += MAX(glyph.advance_width, glyph.width) + kerning_x;
@@ -146,6 +142,9 @@ void parse_font(const char *buf, size_t buf_len, document_t *doc) {
if (library == NULL) {
FT_Init_FreeType(&library);
}
if (buf == NULL) {
return;
}
FT_Face face;
FT_Error err = FT_New_Memory_Face(library, (unsigned char *) buf, buf_len, 0, &face);
@@ -156,7 +155,11 @@ void parse_font(const char *buf, size_t buf_len, document_t *doc) {
char font_name[1024];
if (face->style_name == NULL || *(face->style_name) == '?') {
strcpy(font_name, face->family_name);
if (face->family_name == NULL) {
strcpy(font_name, "(null)");
} else {
strcpy(font_name, face->family_name);
}
} else {
snprintf(font_name, sizeof(font_name), "%s %s", face->family_name, face->style_name);
}
@@ -186,11 +189,18 @@ void parse_font(const char *buf, size_t buf_len, document_t *doc) {
err = FT_Load_Char(face, c, FT_LOAD_NO_HINTING | FT_LOAD_RENDER);
if (err != 0) {
continue;
c = c >= 'a' && c <= 'z' ? c - 32 : c + 32;
err = FT_Load_Char(face, c, FT_LOAD_NO_HINTING | FT_LOAD_RENDER);
if (err != 0) {
continue;
}
}
glyph_t glyph = ft_glyph_to_glyph(face->glyph);
pen.x += kerning_offset(c, pc, face);
if (pen.x <= 0) {
pen.x = ABS(glyph.advance_width - glyph.width);
}
pen.y = dimensions.height - glyph.ascent - dimensions.baseline;
draw_glyph(&glyph, pen.x, pen.y, dimensions, bitmap);

View File

@@ -1,6 +1,9 @@
#include "src/sist.h"
#include "src/ctx.h"
#define MIN_SIZE 32
__always_inline
AVCodecContext *alloc_jpeg_encoder(int dstW, int dstH, float qscale) {
AVCodec *jpeg_codec = avcodec_find_encoder(AV_CODEC_ID_MJPEG);
@@ -22,8 +25,8 @@ AVCodecContext *alloc_jpeg_encoder(int dstW, int dstH, float qscale) {
return jpeg;
}
__always_inline
AVFrame *scale_frame(const AVCodecContext *decoder, const AVFrame *frame, int size) {
AVFrame *scaled_frame = av_frame_alloc();
int dstW;
int dstH;
@@ -41,16 +44,22 @@ AVFrame *scale_frame(const AVCodecContext *decoder, const AVFrame *frame, int si
}
}
if (dstW <= MIN_SIZE || dstH <= MIN_SIZE) {
return NULL;
}
AVFrame *scaled_frame = av_frame_alloc();
struct SwsContext *ctx = sws_getContext(
decoder->width, decoder->height, decoder->pix_fmt,
dstW, dstH, AV_PIX_FMT_YUVJ420P,
SWS_FAST_BILINEAR, 0, 0, 0
);
int dst_buf_len = avpicture_get_size(AV_PIX_FMT_YUVJ420P, dstW, dstH);
int dst_buf_len = av_image_get_buffer_size(AV_PIX_FMT_YUV420P, dstW, dstH, 1);
uint8_t *dst_buf = (uint8_t *) av_malloc(dst_buf_len);
avpicture_fill((AVPicture *) scaled_frame, dst_buf, AV_PIX_FMT_YUVJ420P, dstW, dstH);
av_image_fill_arrays(scaled_frame->data, scaled_frame->linesize, dst_buf, AV_PIX_FMT_YUV420P, dstW, dstH, 1);
sws_scale(ctx,
(const uint8_t *const *) frame->data, frame->linesize,
@@ -81,7 +90,7 @@ AVFrame *read_frame(AVFormatContext *pFormatCtx, AVCodecContext *decoder, int st
if (read_frame_ret != 0) {
if (read_frame_ret != AVERROR_EOF) {
fprintf(stderr, "Error reading frame: %s\n", av_err2str(read_frame_ret));
fprintf(stderr, "Error reading frame: %d\n", read_frame_ret);
}
av_frame_free(&frame);
av_packet_unref(&avPacket);
@@ -107,43 +116,74 @@ AVFrame *read_frame(AVFormatContext *pFormatCtx, AVCodecContext *decoder, int st
return frame;
}
#define APPEND_TAG_META(doc, tag_, keyname) \
text_buffer_t tex = text_buffer_create(-1); \
text_buffer_append_string0(&tex, tag_->value); \
meta_line_t *meta_tag = malloc(sizeof(meta_line_t) + tex.dyn_buffer.cur); \
meta_tag->key = keyname; \
strcpy(meta_tag->strval, tex.dyn_buffer.buf); \
APPEND_META(doc, meta_tag) \
text_buffer_destroy(&tex);
__always_inline
void append_audio_meta(AVFormatContext *pFormatCtx, document_t *doc) {
AVDictionaryEntry *tag = NULL;
while ((tag = av_dict_get(pFormatCtx->metadata, "", tag, AV_DICT_IGNORE_SUFFIX))) {
char *key = tag->key;
for (; *key; ++key) *key = (char) tolower(*key);
char key[32];
strncpy(key, tag->key, sizeof(key));
if (strcmp(tag->key, "artist") == 0) {
size_t len = strlen(tag->value);
meta_line_t *meta_tag = malloc(sizeof(meta_line_t) + len);
meta_tag->key = MetaArtist;
memcpy(meta_tag->strval, tag->value, len);
APPEND_META(doc, meta_tag)
} else if (strcmp(tag->key, "genre") == 0) {
size_t len = strlen(tag->value);
meta_line_t *meta_tag = malloc(sizeof(meta_line_t) + len);
meta_tag->key = MetaGenre;
memcpy(meta_tag->strval, tag->value, len);
APPEND_META(doc, meta_tag)
} else if (strcmp(tag->key, "title") == 0) {
size_t len = strlen(tag->value);
meta_line_t *meta_tag = malloc(sizeof(meta_line_t) + len);
meta_tag->key = MetaTitle;
memcpy(meta_tag->strval, tag->value, len);
APPEND_META(doc, meta_tag)
} else if (strcmp(tag->key, "album_artist") == 0) {
size_t len = strlen(tag->value);
meta_line_t *meta_tag = malloc(sizeof(meta_line_t) + len);
meta_tag->key = MetaAlbumArtist;
memcpy(meta_tag->strval, tag->value, len);
APPEND_META(doc, meta_tag)
} else if (strcmp(tag->key, "album") == 0) {
size_t len = strlen(tag->value);
meta_line_t *meta_tag = malloc(sizeof(meta_line_t) + len);
meta_tag->key = MetaAlbum;
memcpy(meta_tag->strval, tag->value, len);
APPEND_META(doc, meta_tag)
char *ptr = key;
for (; *ptr; ++ptr) *ptr = (char) tolower(*ptr);
if (strcmp(key, "artist") == 0) {
APPEND_TAG_META(doc, tag, MetaArtist)
} else if (strcmp(key, "genre") == 0) {
APPEND_TAG_META(doc, tag, MetaGenre)
} else if (strcmp(key, "title") == 0) {
APPEND_TAG_META(doc, tag, MetaTitle)
} else if (strcmp(key, "album_artist") == 0) {
APPEND_TAG_META(doc, tag, MetaAlbumArtist)
} else if (strcmp(key, "album") == 0) {
APPEND_TAG_META(doc, tag, MetaAlbum)
}
}
}
__always_inline
void append_video_meta(AVFormatContext *pFormatCtx, AVFrame *frame, document_t *doc, int include_audio_tags, int is_video) {
if (is_video) {
meta_line_t *meta_duration = malloc(sizeof(meta_line_t));
meta_duration->key = MetaMediaDuration;
meta_duration->longval = pFormatCtx->duration / AV_TIME_BASE;
APPEND_META(doc, meta_duration)
meta_line_t *meta_bitrate = malloc(sizeof(meta_line_t));
meta_bitrate->key = MetaMediaBitrate;
meta_bitrate->longval = pFormatCtx->bit_rate;
APPEND_META(doc, meta_bitrate)
}
AVDictionaryEntry *tag = NULL;
if (is_video) {
while ((tag = av_dict_get(pFormatCtx->metadata, "", tag, AV_DICT_IGNORE_SUFFIX))) {
if (include_audio_tags && strcmp(tag->key, "title") == 0) {
APPEND_TAG_META(doc, tag, MetaTitle)
} else if (strcmp(tag->key, "comment") == 0) {
APPEND_TAG_META(doc, tag, MetaContent)
} else if (include_audio_tags && strcmp(tag->key, "artist") == 0) {
APPEND_TAG_META(doc, tag, MetaArtist)
}
}
} else {
// EXIF metadata
while ((tag = av_dict_get(frame->metadata, "", tag, AV_DICT_IGNORE_SUFFIX))) {
if (include_audio_tags && strcmp(tag->key, "Artist") == 0) {
APPEND_TAG_META(doc, tag, MetaArtist)
} else if (strcmp(tag->key, "ImageDescription") == 0) {
APPEND_TAG_META(doc, tag, MetaContent)
}
}
}
}
@@ -160,7 +200,7 @@ void parse_media(const char *filepath, document_t *doc) {
}
int res = avformat_open_input(&pFormatCtx, filepath, NULL, NULL);
if (res < 0) {
printf("ERR%s %s\n", filepath, av_err2str(res));
fprintf(stderr, "media error: %s %s\n", filepath, av_err2str(res));
return;
}
@@ -205,20 +245,7 @@ void parse_media(const char *filepath, document_t *doc) {
if (video_stream != -1) {
AVStream *stream = pFormatCtx->streams[video_stream];
if (stream->nb_frames > 1) {
//This is a video (not a still image)
meta_line_t *meta_duration = malloc(sizeof(meta_line_t));
meta_duration->key = MetaMediaDuration;
meta_duration->longval = pFormatCtx->duration / AV_TIME_BASE;
APPEND_META(doc, meta_duration)
meta_line_t *meta_bitrate = malloc(sizeof(meta_line_t));
meta_bitrate->key = MetaMediaBitrate;
meta_bitrate->intval = pFormatCtx->bit_rate;
APPEND_META(doc, meta_bitrate)
}
if (stream->codecpar->width <= 20 || stream->codecpar->height <= 20) {
if (stream->codecpar->width <= MIN_SIZE || stream->codecpar->height <= MIN_SIZE) {
avformat_close_input(&pFormatCtx);
avformat_free_context(pFormatCtx);
return;
@@ -250,9 +277,19 @@ void parse_media(const char *filepath, document_t *doc) {
return;
}
append_video_meta(pFormatCtx, frame, doc, audio_stream == -1, stream->nb_frames > 1);
// Scale frame
AVFrame *scaled_frame = scale_frame(decoder, frame, ScanCtx.tn_size);
if (scaled_frame == NULL) {
av_frame_free(&frame);
avcodec_free_context(&decoder);
avformat_close_input(&pFormatCtx);
avformat_free_context(pFormatCtx);
return;
}
// Encode frame to jpeg
AVCodecContext *jpeg_encoder = alloc_jpeg_encoder(scaled_frame->width, scaled_frame->height, ScanCtx.tn_qscale);
avcodec_send_frame(jpeg_encoder, scaled_frame);
@@ -262,7 +299,8 @@ void parse_media(const char *filepath, document_t *doc) {
avcodec_receive_packet(jpeg_encoder, &jpeg_packet);
// Save thumbnail
store_write(ScanCtx.index.store, (char *) doc->uuid, sizeof(doc->uuid), (char *) jpeg_packet.data, jpeg_packet.size);
store_write(ScanCtx.index.store, (char *) doc->uuid, sizeof(doc->uuid), (char *) jpeg_packet.data,
jpeg_packet.size);
av_packet_unref(&jpeg_packet);
av_frame_free(&frame);

View File

@@ -5,6 +5,7 @@
#include "src/sist.h"
#define MIN_VIDEO_SIZE 1024 * 64
#define MIN_IMAGE_SIZE 1024 * 2
void parse_media(const char * filepath, document_t *doc);

View File

@@ -1,10 +1,12 @@
#include "mime.h"
unsigned int mime_get_mime_by_ext(GHashTable *ext_table, const char * ext) {
char lower[64];
char lower[8];
char *p = lower;
while ((*ext)) {
int cnt = 0;
while ((*ext) != '\0' && cnt + 1 < sizeof(lower)) {
*p++ = (char)tolower(*ext++);
cnt++;
}
*p = '\0';
return (size_t) g_hash_table_lookup(ext_table, lower);

View File

@@ -39,333 +39,385 @@ enum mime {
application_oda=655391,
application_ogg=655392,
application_pdf=655393 | 0x40000000,
application_pgp_signature=655394,
application_pkcs7_signature=655395,
application_pkix_cert=655396,
application_postscript=655397,
application_pro_eng=655398,
application_ringing_tones=655399,
application_smil=655400,
application_solids=655401,
application_sounder=655402,
application_step=655403,
application_streamingmedia=655404,
application_vda=655405,
application_vnd_fdf=655406,
application_vnd_font_fontforge_sfd=655407,
application_vnd_hp_hpgl=655408,
application_vnd_iccprofile=655409,
application_vnd_ms_cab_compressed=655410,
application_vnd_ms_excel=655411,
application_vnd_ms_fontobject=655412,
application_vnd_ms_opentype=655413 | 0x20000000,
application_vnd_ms_pki_certstore=655414,
application_vnd_ms_pki_pko=655415,
application_vnd_ms_pki_seccat=655416,
application_vnd_ms_powerpoint=655417,
application_vnd_ms_project=655418,
application_vnd_oasis_opendocument_base=655419,
application_vnd_oasis_opendocument_formula=655420,
application_vnd_oasis_opendocument_graphics=655421,
application_vnd_oasis_opendocument_text=655422,
application_vnd_openxmlformats_officedocument_spreadsheetml_sheet=655423,
application_vnd_openxmlformats_officedocument_wordprocessingml_document=655424,
application_vnd_wap_wmlc=655425,
application_vnd_wap_wmlscriptc=655426,
application_vnd_xara=655427,
application_vocaltec_media_desc=655428,
application_vocaltec_media_file=655429,
application_winhelp=655430,
application_wordperfect=655431,
application_wordperfect6_0=655432,
application_wordperfect6_1=655433,
application_x_123=655434,
application_x_7z_compressed=655435,
application_x_aim=655436,
application_x_archive=655437,
application_x_authorware_bin=655438,
application_x_authorware_map=655439,
application_x_authorware_seg=655440,
application_x_bcpio=655441,
application_x_bittorrent=655442,
application_x_bsh=655443,
application_x_bytecode_python=655444,
application_x_bzip=655445,
application_x_bzip2=655446,
application_x_cbr=655447,
application_x_cbz=655448 | 0x40000000,
application_x_cdlink=655449,
application_x_chat=655450,
application_x_cocoa=655451,
application_x_conference=655452,
application_x_cpio=655453,
application_x_dbf=655454,
application_x_dbt=655455,
application_x_debian_package=655456,
application_x_deepv=655457,
application_x_director=655458,
application_x_dosexec=655459,
application_x_dvi=655460,
application_x_elc=655461,
application_pgp_keys=655394,
application_pgp_signature=655395,
application_pkcs7_signature=655396,
application_pkix_cert=655397,
application_postscript=655398,
application_pro_eng=655399,
application_ringing_tones=655400,
application_smil=655401,
application_solids=655402,
application_sounder=655403,
application_step=655404,
application_streamingmedia=655405,
application_vda=655406,
application_vnd_fdf=655407,
application_vnd_font_fontforge_sfd=655408,
application_vnd_hp_hpgl=655409,
application_vnd_iccprofile=655410,
application_vnd_lotus_1_2_3=655411,
application_vnd_ms_cab_compressed=655412,
application_vnd_ms_excel=655413,
application_vnd_ms_fontobject=655414,
application_vnd_ms_opentype=655415 | 0x20000000,
application_vnd_ms_pki_certstore=655416,
application_vnd_ms_pki_pko=655417,
application_vnd_ms_pki_seccat=655418,
application_vnd_ms_powerpoint=655419,
application_vnd_ms_project=655420,
application_vnd_oasis_opendocument_base=655421,
application_vnd_oasis_opendocument_formula=655422,
application_vnd_oasis_opendocument_graphics=655423,
application_vnd_oasis_opendocument_presentation=655424,
application_vnd_oasis_opendocument_spreadsheet=655425,
application_vnd_oasis_opendocument_text=655426,
application_vnd_openxmlformats_officedocument_presentationml_presentation=655427,
application_vnd_openxmlformats_officedocument_spreadsheetml_sheet=655428,
application_vnd_openxmlformats_officedocument_wordprocessingml_document=655429,
application_vnd_symbian_install=655430,
application_vnd_tcpdump_pcap=655431,
application_vnd_wap_wmlc=655432,
application_vnd_wap_wmlscriptc=655433,
application_vnd_xara=655434,
application_vocaltec_media_desc=655435,
application_vocaltec_media_file=655436,
application_winhelp=655437,
application_wordperfect=655438,
application_wordperfect6_0=655439,
application_wordperfect6_1=655440,
application_x_123=655441,
application_x_7z_compressed=655442,
application_x_aim=655443,
application_x_apple_diskimage=655444,
application_x_arc=655445,
application_x_archive=655446,
application_x_atari_7800_rom=655447,
application_x_authorware_bin=655448,
application_x_authorware_map=655449,
application_x_authorware_seg=655450,
application_x_avira_qua=655451,
application_x_bcpio=655452,
application_x_bittorrent=655453,
application_x_bsh=655454,
application_x_bytecode_python=655455,
application_x_bzip=655456,
application_x_bzip2=655457,
application_x_cbr=655458,
application_x_cbz=655459 | 0x40000000,
application_x_cdlink=655460,
application_x_chat=655461,
application_x_chrome_extension=655462,
application_x_cocoa=655463,
application_x_conference=655464,
application_x_coredump=655465,
application_x_cpio=655466,
application_x_dbf=655467,
application_x_dbt=655468,
application_x_debian_package=655469,
application_x_deepv=655470,
application_x_director=655471,
application_x_dmp=655472,
application_x_dosdriver=655473,
application_x_dosexec=655474,
application_x_dvi=655475,
application_x_elc=655476,
application_x_empty=1,
application_x_envoy=655463,
application_x_esrehber=655464,
application_x_excel=655465,
application_x_executable=655466,
application_x_font_sfn=655467 | 0x20000000,
application_x_font_ttf=655468 | 0x20000000,
application_x_freelance=655469,
application_x_git=655470,
application_x_gsp=655471,
application_x_gss=655472,
application_x_gtar=655473,
application_x_gzip=655474,
application_x_hdf=655475,
application_x_helpfile=655476,
application_x_httpd_imap=655477,
application_x_ima=655478,
application_x_innosetup=655479,
application_x_internett_signup=655480,
application_x_inventor=655481,
application_x_ip2=655482,
application_x_java_applet=655483,
application_x_java_commerce=655484,
application_x_java_image=655485,
application_x_java_keystore=655486,
application_x_kdelnk=655487,
application_x_koan=655488,
application_x_latex=655489,
application_x_livescreen=655490,
application_x_lotus=655491,
application_x_lzh=655492,
application_x_lzx=655493,
application_x_mach_binary=655494,
application_x_mach_executable=655495,
application_x_magic_cap_package_1_0=655496,
application_x_mathcad=655497,
application_x_meme=655498,
application_x_midi=655499,
application_x_mif=655500,
application_x_mix_transfer=655501,
application_x_mobipocket_ebook=655502,
application_x_ms_pdb=655503,
application_x_ms_reader=655504,
application_x_navi_animation=655505,
application_x_navidoc=655506,
application_x_navimap=655507,
application_x_navistyle=655508,
application_x_netcdf=655509,
application_x_newton_compatible_pkg=655510,
application_x_object=655511,
application_x_omc=655512,
application_x_omcdatamaker=655513,
application_x_omcregerator=655514,
application_x_pagemaker=655515,
application_x_pcl=655516,
application_x_pixclscript=655517,
application_x_pkcs7_certreqresp=655518,
application_x_pkcs7_signature=655519,
application_x_project=655520,
application_x_qpro=655521,
application_x_rar=655522,
application_x_rpm=655523,
application_x_sdp=655524,
application_x_sea=655525,
application_x_seelogo=655526,
application_x_setupscript=655527,
application_x_shar=655528,
application_x_sharedlib=655529,
application_x_shockwave_flash=655530,
application_x_sprite=655531,
application_x_sqlite3=655532,
application_x_sv4cpio=655533,
application_x_sv4crc=655534,
application_x_tar=655535,
application_x_tbook=655536,
application_x_tex_tfm=655537,
application_x_texinfo=655538,
application_x_ustar=655539,
application_x_visio=655540,
application_x_vnd_audioexplosion_mzz=655541,
application_x_vnd_ls_xpix=655542,
application_x_vrml=655543,
application_x_wais_source=655544,
application_x_wine_extension_ini=655545,
application_x_wintalk=655546,
application_x_world=655547,
application_x_wri=655548,
application_x_x509_ca_cert=655549,
application_x_xz=655550,
application_xml=655551,
application_zip=655552,
audio_it=458945,
audio_make=458946,
audio_mid=458947,
audio_midi=458948,
audio_mp4=458949,
audio_mpeg=458950,
audio_ogg=458951,
audio_s3m=458952,
audio_tsp_audio=458953,
audio_tsplayer=458954,
audio_vnd_qcelp=458955,
audio_voxware=458956,
audio_x_flac=458957,
audio_x_gsm=458958,
audio_x_jam=458959,
audio_x_liveaudio=458960,
audio_x_m4a=458961,
audio_x_midi=458962,
audio_x_mod=458963,
audio_x_mp4a_latm=458964,
audio_x_mpeg_3=458965,
audio_x_mpequrl=458966,
audio_x_nspaudio=458967,
audio_x_pn_realaudio=458968,
audio_x_psid=458969,
audio_x_realaudio=458970,
audio_x_twinvq=458971,
audio_x_twinvq_plugin=458972,
audio_x_voc=458973,
audio_x_wav=458974,
audio_xm=458975,
font_otf=327904 | 0x20000000,
font_sfnt=327905 | 0x20000000,
font_woff=327906 | 0x20000000,
font_woff2=327907 | 0x20000000,
image_cmu_raster=524516,
image_fif=524517,
image_florian=524518,
image_g3fax=524519,
image_gif=524520,
image_ief=524521,
image_jpeg=524522,
image_jutvision=524523,
image_naplps=524524,
image_pict=524525,
image_png=524526,
image_svg=524527 | 0x80000000,
image_svg_xml=524528 | 0x80000000,
image_tiff=524529,
image_vnd_adobe_photoshop=524530 | 0x80000000,
image_vnd_djvu=524531 | 0x80000000,
image_vnd_fpx=524532,
image_vnd_microsoft_icon=524533,
image_vnd_rn_realflash=524534,
image_vnd_rn_realpix=524535,
image_vnd_wap_wbmp=524536,
image_vnd_xiff=524537,
image_webp=524538,
image_x_cmu_raster=524539,
image_x_cur=524540,
image_x_dwg=524541,
image_x_eps=524542,
image_x_exr=524543,
image_x_icns=524544,
image_x_icon=524545 | 0x80000000,
image_x_jg=524546,
image_x_jps=524547,
image_x_ms_bmp=524548,
image_x_niff=524549,
image_x_pcx=524550,
image_x_pict=524551,
image_x_portable_bitmap=524552,
image_x_portable_graymap=524553,
image_x_portable_pixmap=524554,
image_x_quicktime=524555,
image_x_rgb=524556,
image_x_tga=524557,
image_x_tiff=524558,
image_x_xcf=524559 | 0x80000000,
image_x_xpixmap=524560 | 0x80000000,
image_x_xwindowdump=524561,
message_rfc822=196882,
model_vnd_dwf=65811,
model_vnd_gdl=65812,
model_vnd_gs_gdl=65813,
model_vrml=65814,
model_x_pov=65815,
text_asp=590104,
text_css=590105,
text_html=590106,
text_javascript=590107,
text_mcf=590108,
text_pascal=590109,
text_plain=590110,
text_richtext=590111,
text_scriplet=590112,
text_tab_separated_values=590113,
text_troff=590114,
text_uri_list=590115,
text_vnd_abc=590116,
text_vnd_fmi_flexstor=590117,
text_vnd_wap_wml=590118,
text_vnd_wap_wmlscript=590119,
text_webviewhtml=590120,
text_x_Algol68=590121,
text_x_asm=590122,
text_x_audiosoft_intra=590123,
text_x_awk=590124,
text_x_bcpl=590125,
text_x_c=590126,
text_x_c__=590127,
text_x_component=590128,
text_x_diff=590129,
text_x_fortran=590130,
text_x_java=590131,
text_x_la_asf=590132,
text_x_lisp=590133,
text_x_m=590134,
text_x_m4=590135,
text_x_makefile=590136,
text_x_msdos_batch=590137,
text_x_pascal=590138,
text_x_perl=590139,
text_x_php=590140,
text_x_po=590141,
text_x_python=590142,
text_x_ruby=590143,
text_x_sass=590144,
text_x_scss=590145,
text_x_server_parsed_html=590146,
text_x_setext=590147,
text_x_sgml=590148,
text_x_shellscript=590149,
text_x_speech=590150,
text_x_tcl=590151,
text_x_tex=590152,
text_x_uil=590153,
text_x_uuencode=590154,
text_x_vcalendar=590155,
text_x_vcard=590156,
text_xml=590157,
video_animaflex=393550,
video_avi=393551,
video_avs_video=393552,
video_mp4=393553,
video_mpeg=393554,
video_quicktime=393555,
video_vdo=393556,
video_vivo=393557,
video_vnd_rn_realvideo=393558,
video_vosaic=393559,
video_webm=393560,
video_x_amt_demorun=393561,
video_x_amt_showrun=393562,
video_x_atomic3d_feature=393563,
video_x_dl=393564,
video_x_dv=393565,
video_x_fli=393566,
video_x_flv=393567,
video_x_isvideo=393568,
video_x_jng=393569 | 0x80000000,
video_x_matroska=393570,
video_x_mng=393571,
video_x_motion_jpeg=393572,
video_x_ms_asf=393573,
video_x_msvideo=393574,
video_x_qtc=393575,
video_x_sgi_movie=393576,
application_x_envoy=655478,
application_x_esrehber=655479,
application_x_excel=655480,
application_x_executable=655481,
application_x_font_gdos=655482,
application_x_font_pf2=655483,
application_x_font_pfm=655484,
application_x_font_sfn=655485,
application_x_font_ttf=655486 | 0x20000000,
application_x_freelance=655487,
application_x_gamecube_rom=655488,
application_x_gdbm=655489,
application_x_gettext_translation=655490,
application_x_git=655491,
application_x_gsp=655492,
application_x_gss=655493,
application_x_gtar=655494,
application_x_gzip=655495,
application_x_hdf=655496,
application_x_helpfile=655497,
application_x_httpd_imap=655498,
application_x_ima=655499,
application_x_innosetup=655500,
application_x_internett_signup=655501,
application_x_inventor=655502,
application_x_ip2=655503,
application_x_java_applet=655504,
application_x_java_commerce=655505,
application_x_java_image=655506,
application_x_java_jmod=655507,
application_x_java_keystore=655508,
application_x_kdelnk=655509,
application_x_koan=655510,
application_x_latex=655511,
application_x_livescreen=655512,
application_x_lotus=655513,
application_x_lz4=655514,
application_x_lz4_json=655515,
application_x_lzh=655516,
application_x_lzh_compressed=655517,
application_x_lzx=655518,
application_x_mach_binary=655519,
application_x_mach_executable=655520,
application_x_magic_cap_package_1_0=655521,
application_x_mathcad=655522,
application_x_maxis_dbpf=655523,
application_x_meme=655524,
application_x_midi=655525,
application_x_mif=655526,
application_x_mix_transfer=655527,
application_x_mobipocket_ebook=655528,
application_x_ms_compress_szdd=655529,
application_x_ms_pdb=655530,
application_x_ms_reader=655531,
application_x_msaccess=655532,
application_x_navi_animation=655533,
application_x_navidoc=655534,
application_x_navimap=655535,
application_x_navistyle=655536,
application_x_nes_rom=655537,
application_x_netcdf=655538,
application_x_newton_compatible_pkg=655539,
application_x_nintendo_ds_rom=655540,
application_x_object=655541,
application_x_omc=655542,
application_x_omcdatamaker=655543,
application_x_omcregerator=655544,
application_x_pagemaker=655545,
application_x_pcl=655546,
application_x_pgp_keyring=655547,
application_x_pixclscript=655548,
application_x_pkcs7_certreqresp=655549,
application_x_pkcs7_signature=655550,
application_x_project=655551,
application_x_qpro=655552,
application_x_rar=655553,
application_x_rpm=655554,
application_x_sdp=655555,
application_x_sea=655556,
application_x_seelogo=655557,
application_x_setupscript=655558,
application_x_shar=655559,
application_x_sharedlib=655560,
application_x_shockwave_flash=655561,
application_x_snappy_framed=655562,
application_x_sprite=655563,
application_x_sqlite3=655564,
application_x_sv4cpio=655565,
application_x_sv4crc=655566,
application_x_tar=655567,
application_x_tbook=655568,
application_x_terminfo=655569,
application_x_terminfo2=655570,
application_x_tex_tfm=655571,
application_x_texinfo=655572,
application_x_ustar=655573,
application_x_visio=655574,
application_x_vnd_audioexplosion_mzz=655575,
application_x_vnd_ls_xpix=655576,
application_x_vrml=655577,
application_x_wais_source=655578,
application_x_wine_extension_ini=655579,
application_x_wintalk=655580,
application_x_world=655581,
application_x_wri=655582,
application_x_x509_ca_cert=655583,
application_x_xz=655584,
application_x_zip=655585,
application_x_zstd=655586,
application_xml=655587,
application_zip=655588,
application_zlib=655589,
audio_it=458982,
audio_make=458983,
audio_mid=458984,
audio_midi=458985,
audio_mp4=458986,
audio_mpeg=458987,
audio_ogg=458988,
audio_s3m=458989,
audio_tsp_audio=458990,
audio_tsplayer=458991,
audio_vnd_qcelp=458992,
audio_voxware=458993,
audio_x_aiff=458994,
audio_x_flac=458995,
audio_x_gsm=458996,
audio_x_hx_aac_adts=458997,
audio_x_jam=458998,
audio_x_liveaudio=458999,
audio_x_m4a=459000,
audio_x_midi=459001,
audio_x_mod=459002,
audio_x_mp4a_latm=459003,
audio_x_mpeg_3=459004,
audio_x_mpequrl=459005,
audio_x_nspaudio=459006,
audio_x_pn_realaudio=459007,
audio_x_psid=459008,
audio_x_realaudio=459009,
audio_x_twinvq=459010,
audio_x_twinvq_plugin=459011,
audio_x_voc=459012,
audio_x_wav=459013,
audio_xm=459014,
font_otf=327943 | 0x20000000,
font_sfnt=327944 | 0x20000000,
font_woff=327945 | 0x20000000,
font_woff2=327946 | 0x20000000,
image_cmu_raster=524555,
image_fif=524556,
image_florian=524557,
image_g3fax=524558,
image_gif=524559,
image_heic=524560,
image_ief=524561,
image_jpeg=524562,
image_jutvision=524563,
image_naplps=524564,
image_pict=524565,
image_png=524566,
image_svg=524567 | 0x80000000,
image_svg_xml=524568 | 0x80000000,
image_tiff=524569,
image_vnd_adobe_photoshop=524570 | 0x80000000,
image_vnd_djvu=524571 | 0x80000000,
image_vnd_fpx=524572,
image_vnd_microsoft_icon=524573,
image_vnd_rn_realflash=524574,
image_vnd_rn_realpix=524575,
image_vnd_wap_wbmp=524576,
image_vnd_xiff=524577,
image_webp=524578,
image_wmf=524579,
image_x_3ds=524580,
image_x_cmu_raster=524581,
image_x_cur=524582,
image_x_dwg=524583,
image_x_eps=524584,
image_x_exr=524585,
image_x_gem=524586,
image_x_icns=524587,
image_x_icon=524588 | 0x80000000,
image_x_jg=524589,
image_x_jps=524590,
image_x_ms_bmp=524591,
image_x_niff=524592,
image_x_pcx=524593,
image_x_pict=524594,
image_x_portable_bitmap=524595,
image_x_portable_graymap=524596,
image_x_portable_pixmap=524597,
image_x_quicktime=524598,
image_x_rgb=524599,
image_x_tga=524600,
image_x_tiff=524601,
image_x_win_bitmap=524602,
image_x_xcf=524603 | 0x80000000,
image_x_xpixmap=524604 | 0x80000000,
image_x_xwindowdump=524605,
message_news=196926,
message_rfc822=196927,
model_vnd_dwf=65856,
model_vnd_gdl=65857,
model_vnd_gs_gdl=65858,
model_vrml=65859,
model_x_pov=65860,
text_PGP=590149,
text_asp=590150,
text_css=590151,
text_html=590152,
text_javascript=590153,
text_mcf=590154,
text_pascal=590155,
text_plain=590156,
text_richtext=590157,
text_rtf=590158,
text_scriplet=590159,
text_tab_separated_values=590160,
text_troff=590161,
text_uri_list=590162,
text_vnd_abc=590163,
text_vnd_fmi_flexstor=590164,
text_vnd_wap_wml=590165,
text_vnd_wap_wmlscript=590166,
text_webviewhtml=590167,
text_x_Algol68=590168,
text_x_asm=590169,
text_x_audiosoft_intra=590170,
text_x_awk=590171,
text_x_bcpl=590172,
text_x_c=590173,
text_x_c__=590174,
text_x_component=590175,
text_x_diff=590176,
text_x_fortran=590177,
text_x_java=590178,
text_x_la_asf=590179,
text_x_lisp=590180,
text_x_m=590181,
text_x_m4=590182,
text_x_makefile=590183,
text_x_ms_regedit=590184,
text_x_msdos_batch=590185,
text_x_objective_c=590186,
text_x_pascal=590187,
text_x_perl=590188,
text_x_php=590189,
text_x_po=590190,
text_x_python=590191,
text_x_ruby=590192,
text_x_sass=590193,
text_x_scss=590194,
text_x_server_parsed_html=590195,
text_x_setext=590196,
text_x_sgml=590197,
text_x_shellscript=590198,
text_x_speech=590199,
text_x_tcl=590200,
text_x_tex=590201,
text_x_uil=590202,
text_x_uuencode=590203,
text_x_vcalendar=590204,
text_x_vcard=590205,
text_xml=590206,
video_MP2T=393599,
video_animaflex=393600,
video_avi=393601,
video_avs_video=393602,
video_mp4=393603,
video_mpeg=393604,
video_quicktime=393605,
video_vdo=393606,
video_vivo=393607,
video_vnd_rn_realvideo=393608,
video_vosaic=393609,
video_webm=393610,
video_x_amt_demorun=393611,
video_x_amt_showrun=393612,
video_x_atomic3d_feature=393613,
video_x_dl=393614,
video_x_dv=393615,
video_x_fli=393616,
video_x_flv=393617,
video_x_isvideo=393618,
video_x_jng=393619 | 0x80000000,
video_x_m4v=393620,
video_x_matroska=393621,
video_x_mng=393622,
video_x_motion_jpeg=393623,
video_x_ms_asf=393624,
video_x_msvideo=393625,
video_x_qtc=393626,
video_x_sgi_movie=393627,
x_epoc_x_sisx_app=721308,
};
char *mime_get_mime_text(unsigned int mime_id) {switch (mime_id) {
case application_arj: return "application/arj";
@@ -624,6 +676,7 @@ case text_mcf: return "text/mcf";
case text_pascal: return "text/pascal";
case text_plain: return "text/plain";
case text_richtext: return "text/richtext";
case text_rtf: return "text/rtf";
case text_scriplet: return "text/scriplet";
case text_x_awk: return "text/x-awk";
case video_x_jng: return "video/x-jng";
@@ -728,6 +781,57 @@ case image_x_tga: return "image/x-tga";
case application_x_wine_extension_ini: return "application/x-wine-extension-ini";
case application_x_cbz: return "application/x-cbz";
case application_x_cbr: return "application/x-cbr";
case application_x_ms_compress_szdd: return "application/x-ms-compress-szdd";
case application_x_atari_7800_rom: return "application/x-atari-7800-rom";
case application_x_nes_rom: return "application/x-nes-rom";
case application_x_font_pfm: return "application/x-font-pfm";
case application_x_gettext_translation: return "application/x-gettext-translation";
case image_wmf: return "image/wmf";
case application_pgp_keys: return "application/pgp-keys";
case image_x_3ds: return "image/x-3ds";
case application_x_lz4: return "application/x-lz4";
case application_vnd_openxmlformats_officedocument_presentationml_presentation: return "application/vnd.openxmlformats-officedocument.presentationml.presentation";
case application_vnd_oasis_opendocument_presentation: return "application/vnd.oasis.opendocument.presentation";
case application_x_msaccess: return "application/x-msaccess";
case application_vnd_oasis_opendocument_spreadsheet: return "application/vnd.oasis.opendocument.spreadsheet";
case audio_x_aiff: return "audio/x-aiff";
case text_x_ms_regedit: return "text/x-ms-regedit";
case application_x_gamecube_rom: return "application/x-gamecube-rom";
case application_x_nintendo_ds_rom: return "application/x-nintendo-ds-rom";
case text_x_objective_c: return "text/x-objective-c";
case application_x_font_gdos: return "application/x-font-gdos";
case application_x_apple_diskimage: return "application/x-apple-diskimage";
case application_x_zstd: return "application/x-zstd";
case video_x_m4v: return "video/x-m4v";
case message_news: return "message/news";
case application_vnd_symbian_install: return "application/vnd.symbian.install";
case application_x_lzh_compressed: return "application/x-lzh-compressed";
case application_x_dosdriver: return "application/x-dosdriver";
case application_vnd_tcpdump_pcap: return "application/vnd.tcpdump.pcap";
case x_epoc_x_sisx_app: return "x-epoc/x-sisx-app";
case application_x_avira_qua: return "application/x-avira-qua";
case video_MP2T: return "video/MP2T";
case application_x_snappy_framed: return "application/x-snappy-framed";
case application_x_lz4_json: return "application/x-lz4+json";
case application_x_dmp: return "application/x-dmp";
case application_zlib: return "application/zlib";
case application_x_pgp_keyring: return "application/x-pgp-keyring";
case application_x_gdbm: return "application/x-gdbm";
case application_x_font_pf2: return "application/x-font-pf2";
case application_x_zip: return "application/x-zip";
case application_x_coredump: return "application/x-coredump";
case application_x_java_jmod: return "application/x-java-jmod";
case application_x_terminfo: return "application/x-terminfo";
case application_x_terminfo2: return "application/x-terminfo2";
case application_x_arc: return "application/x-arc";
case application_vnd_lotus_1_2_3: return "application/vnd.lotus-1-2-3";
case image_x_win_bitmap: return "image/x-win-bitmap";
case application_x_maxis_dbpf: return "application/x-maxis-dbpf";
case text_PGP: return "text/PGP";
case audio_x_hx_aac_adts: return "audio/x-hx-aac-adts";
case application_x_chrome_extension: return "application/x-chrome-extension";
case image_heic: return "image/heic";
case image_x_gem: return "image/x-gem";
default: return NULL;}}
GHashTable *mime_get_ext_table() {GHashTable *ext_table = g_hash_table_new(g_str_hash, g_str_equal);
g_hash_table_insert(ext_table, "arj", (gpointer)application_arj);
@@ -857,6 +961,7 @@ g_hash_table_insert(ext_table, "xlt", (gpointer)application_x_excel);
g_hash_table_insert(ext_table, "xlv", (gpointer)application_x_excel);
g_hash_table_insert(ext_table, "exe", (gpointer)application_x_executable);
g_hash_table_insert(ext_table, "ttf", (gpointer)application_x_font_ttf);
g_hash_table_insert(ext_table, "ttc", (gpointer)application_x_font_ttf);
g_hash_table_insert(ext_table, "pre", (gpointer)application_x_freelance);
g_hash_table_insert(ext_table, "gsp", (gpointer)application_x_gsp);
g_hash_table_insert(ext_table, "gss", (gpointer)application_x_gss);
@@ -1094,7 +1199,7 @@ g_hash_table_insert(ext_table, "ms", (gpointer)text_troff);
g_hash_table_insert(ext_table, "roff", (gpointer)text_troff);
g_hash_table_insert(ext_table, "t", (gpointer)text_troff);
g_hash_table_insert(ext_table, "tr", (gpointer)text_troff);
g_hash_table_insert(ext_table, "uni", (gpointer)text_uri_list);
g_hash_table_insert(ext_table, "uji", (gpointer)text_uri_list);
g_hash_table_insert(ext_table, "unis", (gpointer)text_uri_list);
g_hash_table_insert(ext_table, "uri", (gpointer)text_uri_list);
g_hash_table_insert(ext_table, "uris", (gpointer)text_uri_list);
@@ -1207,6 +1312,28 @@ g_hash_table_insert(ext_table, "vcf", (gpointer)text_x_vcard);
g_hash_table_insert(ext_table, "hlp", (gpointer)application_winhelp);
g_hash_table_insert(ext_table, "cbz", (gpointer)application_x_cbz);
g_hash_table_insert(ext_table, "cbr", (gpointer)application_x_cbr);
g_hash_table_insert(ext_table, "fon", (gpointer)application_x_ms_compress_szdd);
g_hash_table_insert(ext_table, "a78", (gpointer)application_x_atari_7800_rom);
g_hash_table_insert(ext_table, "nes", (gpointer)application_x_nes_rom);
g_hash_table_insert(ext_table, "pfm", (gpointer)application_x_font_pfm);
g_hash_table_insert(ext_table, "3ds", (gpointer)image_x_3ds);
g_hash_table_insert(ext_table, "lz4", (gpointer)application_x_lz4);
g_hash_table_insert(ext_table, "pptx", (gpointer)application_vnd_openxmlformats_officedocument_presentationml_presentation);
g_hash_table_insert(ext_table, "odp", (gpointer)application_vnd_oasis_opendocument_presentation);
g_hash_table_insert(ext_table, "accdb", (gpointer)application_x_msaccess);
g_hash_table_insert(ext_table, "ods", (gpointer)application_vnd_oasis_opendocument_spreadsheet);
g_hash_table_insert(ext_table, "aiff", (gpointer)audio_x_aiff);
g_hash_table_insert(ext_table, "aif", (gpointer)audio_x_aiff);
g_hash_table_insert(ext_table, "reg", (gpointer)text_x_ms_regedit);
g_hash_table_insert(ext_table, "zst", (gpointer)application_x_zstd);
g_hash_table_insert(ext_table, "m4v", (gpointer)video_x_m4v);
g_hash_table_insert(ext_table, "pcap", (gpointer)application_vnd_tcpdump_pcap);
g_hash_table_insert(ext_table, "jsonlz4", (gpointer)application_x_lz4_json);
g_hash_table_insert(ext_table, "dmp", (gpointer)application_x_dmp);
g_hash_table_insert(ext_table, "z", (gpointer)application_zlib);
g_hash_table_insert(ext_table, "pf2", (gpointer)application_x_font_pf2);
g_hash_table_insert(ext_table, "jmod", (gpointer)application_x_java_jmod);
g_hash_table_insert(ext_table, "heic", (gpointer)image_heic);
return ext_table;}
GHashTable *mime_get_mime_table() {GHashTable *mime_table = g_hash_table_new(g_str_hash, g_str_equal);
g_hash_table_insert(mime_table, "application/arj", (gpointer)application_arj);
@@ -1465,6 +1592,7 @@ g_hash_table_insert(mime_table, "text/mcf", (gpointer)text_mcf);
g_hash_table_insert(mime_table, "text/pascal", (gpointer)text_pascal);
g_hash_table_insert(mime_table, "text/plain", (gpointer)text_plain);
g_hash_table_insert(mime_table, "text/richtext", (gpointer)text_richtext);
g_hash_table_insert(mime_table, "text/rtf", (gpointer)text_rtf);
g_hash_table_insert(mime_table, "text/scriplet", (gpointer)text_scriplet);
g_hash_table_insert(mime_table, "text/x-awk", (gpointer)text_x_awk);
g_hash_table_insert(mime_table, "video/x-jng", (gpointer)video_x_jng);
@@ -1569,5 +1697,56 @@ g_hash_table_insert(mime_table, "image/x-tga", (gpointer)image_x_tga);
g_hash_table_insert(mime_table, "application/x-wine-extension-ini", (gpointer)application_x_wine_extension_ini);
g_hash_table_insert(mime_table, "application/x-cbz", (gpointer)application_x_cbz);
g_hash_table_insert(mime_table, "application/x-cbr", (gpointer)application_x_cbr);
g_hash_table_insert(mime_table, "application/x-ms-compress-szdd", (gpointer)application_x_ms_compress_szdd);
g_hash_table_insert(mime_table, "application/x-atari-7800-rom", (gpointer)application_x_atari_7800_rom);
g_hash_table_insert(mime_table, "application/x-nes-rom", (gpointer)application_x_nes_rom);
g_hash_table_insert(mime_table, "application/x-font-pfm", (gpointer)application_x_font_pfm);
g_hash_table_insert(mime_table, "application/x-gettext-translation", (gpointer)application_x_gettext_translation);
g_hash_table_insert(mime_table, "image/wmf", (gpointer)image_wmf);
g_hash_table_insert(mime_table, "application/pgp-keys", (gpointer)application_pgp_keys);
g_hash_table_insert(mime_table, "image/x-3ds", (gpointer)image_x_3ds);
g_hash_table_insert(mime_table, "application/x-lz4", (gpointer)application_x_lz4);
g_hash_table_insert(mime_table, "application/vnd.openxmlformats-officedocument.presentationml.presentation", (gpointer)application_vnd_openxmlformats_officedocument_presentationml_presentation);
g_hash_table_insert(mime_table, "application/vnd.oasis.opendocument.presentation", (gpointer)application_vnd_oasis_opendocument_presentation);
g_hash_table_insert(mime_table, "application/x-msaccess", (gpointer)application_x_msaccess);
g_hash_table_insert(mime_table, "application/vnd.oasis.opendocument.spreadsheet", (gpointer)application_vnd_oasis_opendocument_spreadsheet);
g_hash_table_insert(mime_table, "audio/x-aiff", (gpointer)audio_x_aiff);
g_hash_table_insert(mime_table, "text/x-ms-regedit", (gpointer)text_x_ms_regedit);
g_hash_table_insert(mime_table, "application/x-gamecube-rom", (gpointer)application_x_gamecube_rom);
g_hash_table_insert(mime_table, "application/x-nintendo-ds-rom", (gpointer)application_x_nintendo_ds_rom);
g_hash_table_insert(mime_table, "text/x-objective-c", (gpointer)text_x_objective_c);
g_hash_table_insert(mime_table, "application/x-font-gdos", (gpointer)application_x_font_gdos);
g_hash_table_insert(mime_table, "application/x-apple-diskimage", (gpointer)application_x_apple_diskimage);
g_hash_table_insert(mime_table, "application/x-zstd", (gpointer)application_x_zstd);
g_hash_table_insert(mime_table, "video/x-m4v", (gpointer)video_x_m4v);
g_hash_table_insert(mime_table, "message/news", (gpointer)message_news);
g_hash_table_insert(mime_table, "application/vnd.symbian.install", (gpointer)application_vnd_symbian_install);
g_hash_table_insert(mime_table, "application/x-lzh-compressed", (gpointer)application_x_lzh_compressed);
g_hash_table_insert(mime_table, "application/x-dosdriver", (gpointer)application_x_dosdriver);
g_hash_table_insert(mime_table, "application/vnd.tcpdump.pcap", (gpointer)application_vnd_tcpdump_pcap);
g_hash_table_insert(mime_table, "x-epoc/x-sisx-app", (gpointer)x_epoc_x_sisx_app);
g_hash_table_insert(mime_table, "application/x-avira-qua", (gpointer)application_x_avira_qua);
g_hash_table_insert(mime_table, "video/MP2T", (gpointer)video_MP2T);
g_hash_table_insert(mime_table, "application/x-snappy-framed", (gpointer)application_x_snappy_framed);
g_hash_table_insert(mime_table, "application/x-lz4+json", (gpointer)application_x_lz4_json);
g_hash_table_insert(mime_table, "application/x-dmp", (gpointer)application_x_dmp);
g_hash_table_insert(mime_table, "application/zlib", (gpointer)application_zlib);
g_hash_table_insert(mime_table, "application/x-pgp-keyring", (gpointer)application_x_pgp_keyring);
g_hash_table_insert(mime_table, "application/x-gdbm", (gpointer)application_x_gdbm);
g_hash_table_insert(mime_table, "application/x-font-pf2", (gpointer)application_x_font_pf2);
g_hash_table_insert(mime_table, "application/x-zip", (gpointer)application_x_zip);
g_hash_table_insert(mime_table, "application/x-coredump", (gpointer)application_x_coredump);
g_hash_table_insert(mime_table, "application/x-java-jmod", (gpointer)application_x_java_jmod);
g_hash_table_insert(mime_table, "application/x-terminfo", (gpointer)application_x_terminfo);
g_hash_table_insert(mime_table, "application/x-terminfo2", (gpointer)application_x_terminfo2);
g_hash_table_insert(mime_table, "application/x-arc", (gpointer)application_x_arc);
g_hash_table_insert(mime_table, "application/vnd.lotus-1-2-3", (gpointer)application_vnd_lotus_1_2_3);
g_hash_table_insert(mime_table, "image/x-win-bitmap", (gpointer)image_x_win_bitmap);
g_hash_table_insert(mime_table, "application/x-maxis-dbpf", (gpointer)application_x_maxis_dbpf);
g_hash_table_insert(mime_table, "text/PGP", (gpointer)text_PGP);
g_hash_table_insert(mime_table, "audio/x-hx-aac-adts", (gpointer)audio_x_hx_aac_adts);
g_hash_table_insert(mime_table, "application/x-chrome-extension", (gpointer)application_x_chrome_extension);
g_hash_table_insert(mime_table, "image/heic", (gpointer)image_heic);
g_hash_table_insert(mime_table, "image/x-gem", (gpointer)image_x_gem);
return mime_table;}
#endif

View File

@@ -1,7 +1,7 @@
#include "src/sist.h"
#include "src/ctx.h"
__thread magic_t Magic;
__thread magic_t Magic = NULL;
void *read_all(parse_job_t *job, const char *buf, int bytes_read, int *fd) {
@@ -16,7 +16,6 @@ void *read_all(parse_job_t *job, const char *buf, int bytes_read, int *fd) {
if (*fd == -1) {
perror("open");
printf("%s\n", job->filepath);
free(job);
return NULL;
}
}
@@ -25,6 +24,7 @@ void *read_all(parse_job_t *job, const char *buf, int bytes_read, int *fd) {
int ret = read(*fd, full_buf + bytes_read, job->info.st_size - bytes_read);
if (ret == -1) {
perror("read");
return NULL;
}
}
@@ -62,7 +62,7 @@ void parse(void *arg) {
if (job->info.st_size == 0) {
doc.mime = MIME_EMPTY;
} else if (*(job->filepath + job->ext) != '\0') {
} else if (*(job->filepath + job->ext) != '\0' && (job->ext - job->base != 1)) {
doc.mime = mime_get_mime_by_ext(ScanCtx.ext_table, job->filepath + job->ext);
}
@@ -80,11 +80,18 @@ void parse(void *arg) {
bytes_read = read(fd, buf, PARSE_BUF_SIZE);
if (bytes_read == -1) {
perror("read");
close(fd);
free(job);
return;
}
const char *magic_mime_str = magic_buffer(Magic, buf, bytes_read);
if (magic_mime_str != NULL) {
doc.mime = mime_get_mime_by_string(ScanCtx.mime_table, magic_mime_str);
if (doc.mime == 0) {
fprintf(stderr, "Couldn't find mime %s, %s!\n", magic_mime_str, job->filepath + job->base);
fprintf(stderr, "Couldn't find mime %s, %s\n", magic_mime_str, job->filepath + job->base);
}
}
}
@@ -93,14 +100,15 @@ void parse(void *arg) {
if (!(SHOULD_PARSE(doc.mime))) {
} else if ((mmime == MimeVideo && doc.size >= MIN_VIDEO_SIZE) || mmime == MimeAudio || mmime == MimeImage) {
} else if ((mmime == MimeVideo && doc.size >= MIN_VIDEO_SIZE) ||
(mmime == MimeImage && doc.size >= MIN_IMAGE_SIZE) || mmime == MimeAudio) {
parse_media(job->filepath, &doc);
} else if (IS_PDF(doc.mime)) {
void *pdf_buf = read_all(job, (char *) buf, bytes_read, &fd);
parse_pdf(pdf_buf, doc.size, &doc);
if (pdf_buf != buf) {
if (pdf_buf != buf && pdf_buf != NULL) {
free(pdf_buf);
}
@@ -111,7 +119,7 @@ void parse(void *arg) {
void *font_buf = read_all(job, (char *) buf, bytes_read, &fd);
parse_font(font_buf, doc.size, &doc);
if (font_buf != buf) {
if (font_buf != buf && font_buf != NULL) {
free(font_buf);
}
}

View File

@@ -1,10 +1,22 @@
#include <src/ctx.h>
#include "pdf.h"
#include "src/ctx.h"
fz_page *render_cover(fz_context *ctx, document_t *doc, fz_document *fzdoc) {
fz_page *cover = fz_load_page(ctx, fzdoc, 0);
int err = 0;
fz_page *cover = NULL;
fz_var(cover);
fz_try(ctx)
cover = fz_load_page(ctx, fzdoc, 0);
fz_catch(ctx)
err = 1;
if (err != 0) {
fz_drop_page(ctx, cover);
return NULL;
}
fz_rect bounds = fz_bound_page(ctx, cover);
float scale;
@@ -24,24 +36,49 @@ fz_page *render_cover(fz_context *ctx, document_t *doc, fz_document *fzdoc) {
fz_clear_pixmap_with_value(ctx, pixmap, 0xFF);
fz_device *dev = fz_new_draw_device(ctx, m, pixmap);
pthread_mutex_lock(&ScanCtx.mupdf_mu);
fz_var(err);
fz_try(ctx)
{
pthread_mutex_lock(&ScanCtx.mupdf_mu);
fz_run_page(ctx, cover, dev, fz_identity, NULL);
}
fz_always(ctx)
{
fz_close_device(ctx, dev);
fz_drop_device(ctx, dev);
pthread_mutex_unlock(&ScanCtx.mupdf_mu);
}
fz_catch(ctx)
fz_rethrow(ctx);
err = ctx->error.errcode;
fz_drop_device(ctx, dev);
if (err != 0) {
fz_drop_page(ctx, cover);
fz_drop_pixmap(ctx, pixmap);
return NULL;
}
fz_buffer *fzbuf = fz_new_buffer_from_pixmap_as_png(ctx, pixmap, fz_default_color_params);
unsigned char *tn_buf;
size_t tn_len = fz_buffer_storage(ctx, fzbuf, &tn_buf);
fz_buffer *fzbuf = NULL;
fz_var(fzbuf);
fz_var(err);
store_write(ScanCtx.index.store, (char *) doc->uuid, sizeof(doc->uuid), (char *) tn_buf, tn_len);
fz_try(ctx)
fzbuf = fz_new_buffer_from_pixmap_as_png(ctx, pixmap, fz_default_color_params);
fz_catch(ctx)
err = ctx->error.errcode;
if (err == 0) {
unsigned char *tn_buf;
size_t tn_len = fz_buffer_storage(ctx, fzbuf, &tn_buf);
store_write(ScanCtx.index.store, (char *) doc->uuid, sizeof(doc->uuid), (char *) tn_buf, tn_len);
}
fz_drop_pixmap(ctx, pixmap);
fz_drop_buffer(ctx, fzbuf);
fz_drop_pixmap(ctx, pixmap);
if (err != 0) {
fz_drop_page(ctx, cover);
return NULL;
}
return cover;
}
@@ -49,103 +86,178 @@ fz_page *render_cover(fz_context *ctx, document_t *doc, fz_document *fzdoc) {
void fz_noop_callback(__attribute__((unused)) void *user, __attribute__((unused)) const char *message) {}
void init_ctx(fz_context *ctx) {
fz_disable_icc(ctx);
fz_register_document_handlers(ctx);
ctx->warn.print = fz_noop_callback;
ctx->error.print = fz_noop_callback;
}
int read_stext_block(fz_stext_block *block, text_buffer_t *tex) {
if (block->type != FZ_STEXT_BLOCK_TEXT) {
return 0;
}
fz_stext_line *line = block->u.t.first_line;
while (line != NULL) {
fz_stext_char *c = line->first_char;
while (c != NULL) {
if (text_buffer_append_char(tex, c->c) == TEXT_BUF_FULL) {
return TEXT_BUF_FULL;
}
c = c->next;
}
line = line->next;
}
return 0;
}
void parse_pdf(void *buf, size_t buf_len, document_t *doc) {
if (buf == NULL) {
return;
}
static int mu_is_initialized = 0;
if (!mu_is_initialized) {
pthread_mutex_init(&ScanCtx.mupdf_mu, NULL);
mu_is_initialized = 1;
}
fz_context *ctx = fz_new_context(NULL, NULL, FZ_STORE_UNLIMITED);
fz_stream *stream = NULL;
fz_document *fzdoc = NULL;
fz_var(stream);
init_ctx(ctx);
int err = 0;
fz_document *fzdoc = NULL;
fz_stream *stream = NULL;
fz_var(fzdoc);
fz_var(stream);
fz_var(err);
fz_try(ctx)
{
fz_disable_icc(ctx);
fz_register_document_handlers(ctx);
//disable warnings
ctx->warn.print = fz_noop_callback;
ctx->error.print = fz_noop_callback;
stream = fz_open_memory(ctx, buf, buf_len);
fzdoc = fz_open_document_with_stream(ctx, mime_get_mime_text(doc->mime), stream);
int page_count = fz_count_pages(ctx, fzdoc);
fz_page *cover = render_cover(ctx, doc, fzdoc);
fz_stext_options opts;
text_buffer_t text_buf = text_buffer_create(ScanCtx.content_size);
for (int current_page = 0; current_page < page_count; current_page++) {
fz_page *page;
if (current_page == 0) {
page = cover;
} else {
page = fz_load_page(ctx, fzdoc, current_page);
}
fz_stext_page *stext = fz_new_stext_page(ctx, fz_bound_page(ctx, page));
fz_device *dev = fz_new_stext_device(ctx, stext, &opts);
pthread_mutex_lock(&ScanCtx.mupdf_mu);
fz_try(ctx)
fz_run_page_contents(ctx, page, dev, fz_identity, NULL);
fz_always(ctx)
pthread_mutex_unlock(&ScanCtx.mupdf_mu);
fz_catch(ctx)
fz_rethrow(ctx);
fz_drop_device(ctx, dev);
fz_stext_block *block = stext->first_block;
while (block != NULL) {
if (block->type != FZ_STEXT_BLOCK_TEXT) {
block = block->next;
continue;
}
fz_stext_line *line = block->u.t.first_line;
while (line != NULL) {
fz_stext_char *c = line->first_char;
while (c != NULL) {
if (text_buffer_append_char(&text_buf, c->c) == TEXT_BUF_FULL) {
fz_drop_page(ctx, page);
fz_drop_stext_page(ctx, stext);
goto write_loop_end;
}
c = c->next;
}
line = line->next;
}
block = block->next;
}
fz_drop_page(ctx, page);
fz_drop_stext_page(ctx, stext);
}
write_loop_end:;
text_buffer_terminate_string(&text_buf);
meta_line_t *meta_content = malloc(sizeof(meta_line_t) + text_buf.dyn_buffer.cur);
meta_content->key = MetaContent;
memcpy(meta_content->strval, text_buf.dyn_buffer.buf, text_buf.dyn_buffer.cur);
text_buffer_destroy(&text_buf);
APPEND_META(doc, meta_content)
}
fz_always(ctx)
{
fz_catch(ctx)
err = ctx->error.errcode;
if (err) {
fz_drop_stream(ctx, stream);
fz_drop_document(ctx, fzdoc);
fz_drop_context(ctx);
} fz_catch(ctx) {
fprintf(stderr, "Error %s %s\n", doc->filepath, ctx->error.message);
return;
}
char title[4096] = {'\0',};
fz_try(ctx)
fz_lookup_metadata(ctx, fzdoc, FZ_META_INFO_TITLE, title, sizeof(title));
fz_catch(ctx)
;
if (strlen(title) > 0) {
meta_line_t *meta_content = malloc(sizeof(meta_line_t) + strlen(title));
meta_content->key = MetaTitle;
strcpy(meta_content->strval, title);
APPEND_META(doc, meta_content)
}
int page_count = -1;
fz_var(err);
fz_try(ctx)
page_count = fz_count_pages(ctx, fzdoc);
fz_catch(ctx)
err = ctx->error.errcode;
if (err) {
fz_drop_stream(ctx, stream);
fz_drop_document(ctx, fzdoc);
fz_drop_context(ctx);
return;
}
fz_page *cover = render_cover(ctx, doc, fzdoc);
if (cover == NULL) {
fz_drop_stream(ctx, stream);
fz_drop_document(ctx, fzdoc);
fz_drop_context(ctx);
return;
}
fz_stext_options opts = {0};
text_buffer_t text_buf = text_buffer_create(ScanCtx.content_size);
for (int current_page = 0; current_page < page_count; current_page++) {
fz_page *page = NULL;
if (current_page == 0) {
page = cover;
} else {
fz_var(err);
fz_try(ctx)
page = fz_load_page(ctx, fzdoc, current_page);
fz_catch(ctx)
err = ctx->error.errcode;
if (err != 0) {
text_buffer_destroy(&text_buf);
fz_drop_page(ctx, page);
fz_drop_stream(ctx, stream);
fz_drop_document(ctx, fzdoc);
fz_drop_context(ctx);
return;
}
}
fz_stext_page *stext = fz_new_stext_page(ctx, fz_bound_page(ctx, page));
fz_device *dev = fz_new_stext_device(ctx, stext, &opts);
fz_var(err);
fz_try(ctx)
fz_run_page(ctx, page, dev, fz_identity, NULL);
fz_always(ctx)
{
fz_close_device(ctx, dev);
fz_drop_device(ctx, dev);
}
fz_catch(ctx)
err = ctx->error.errcode;
if (err != 0) {
text_buffer_destroy(&text_buf);
fz_drop_page(ctx, page);
fz_drop_stext_page(ctx, stext);
fz_drop_stream(ctx, stream);
fz_drop_document(ctx, fzdoc);
fz_drop_context(ctx);
return;
}
fz_stext_block *block = stext->first_block;
while (block != NULL) {
int ret = read_stext_block(block, &text_buf);
if (ret == TEXT_BUF_FULL) {
break;
}
block = block->next;
}
fz_drop_stext_page(ctx, stext);
fz_drop_page(ctx, page);
if (text_buf.dyn_buffer.cur >= text_buf.dyn_buffer.size) {
break;
}
}
text_buffer_terminate_string(&text_buf);
meta_line_t *meta_content = malloc(sizeof(meta_line_t) + text_buf.dyn_buffer.cur);
meta_content->key = MetaContent;
memcpy(meta_content->strval, text_buf.dyn_buffer.buf, text_buf.dyn_buffer.cur);
APPEND_META(doc, meta_content)
fz_drop_stream(ctx, stream);
fz_drop_document(ctx, fzdoc);
fz_drop_context(ctx);
text_buffer_destroy(&text_buf);
}

View File

@@ -27,17 +27,14 @@ void parse_text(int bytes_read, int *fd, char *buf, document_t *doc) {
read(*fd, intermediate_buf + bytes_read, to_read);
}
text_buffer_t tex = text_buffer_create(ScanCtx.content_size);
text_buffer_append_string(&tex, intermediate_buf, intermediate_buf_len);
text_buffer_t text_buf = text_buffer_create(ScanCtx.content_size);
for (int i = 0; i < intermediate_buf_len; i++) {
text_buffer_append_char(&text_buf, *(intermediate_buf + i));
}
text_buffer_terminate_string(&text_buf);
meta_line_t *meta = malloc(sizeof(meta_line_t) + text_buf.dyn_buffer.cur);
meta_line_t *meta = malloc(sizeof(meta_line_t) + tex.dyn_buffer.cur);
meta->key = MetaContent;
strcpy(meta->strval, text_buf.dyn_buffer.buf);
text_buffer_destroy(&text_buf);
free(intermediate_buf);
strcpy(meta->strval, tex.dyn_buffer.buf);
APPEND_META(doc, meta)
free(intermediate_buf);
text_buffer_destroy(&tex);
}

View File

@@ -16,6 +16,7 @@
#include <libswscale/swscale.h>
#include <libswresample/swresample.h>
#include <libavcodec/avcodec.h>
#include <libavutil/imgutils.h>
#include <ctype.h>
#include <mupdf/fitz.h>
#include <mupdf/pdf.h>
@@ -49,6 +50,7 @@
#include "parsing/media.h"
#include "parsing/font.h"
#include "cli.h"
#include "utf8.h/utf8.h"
#ifndef SIST_SCAN_ONLY
#include "src/index/elastic.h"

View File

@@ -25,6 +25,7 @@ typedef struct tpool {
int done_cnt;
int stop;
void (*cleanup_func)();
} tpool_t;
@@ -100,7 +101,7 @@ static void *tpool_worker(void *arg) {
tpool_t *pool = arg;
while (1) {
pthread_mutex_lock(&(pool->work_mutex));
pthread_mutex_lock(&pool->work_mutex);
if (pool->stop) {
break;
}
@@ -120,7 +121,7 @@ static void *tpool_worker(void *arg) {
pthread_mutex_lock(&(pool->work_mutex));
pool->done_cnt++;
progress_bar_print((double)pool->done_cnt / pool->work_cnt, ScanCtx.stat_tn_size, ScanCtx.stat_index_size);
progress_bar_print((double) pool->done_cnt / pool->work_cnt, ScanCtx.stat_tn_size, ScanCtx.stat_index_size);
if (pool->work_head == NULL) {
pthread_cond_signal(&(pool->working_cond));
@@ -188,11 +189,11 @@ tpool_t *tpool_create(size_t thread_cnt, void cleanup_func()) {
tpool_t *pool = malloc(sizeof(tpool_t));
pool->thread_cnt = thread_cnt;
pool->work_cnt =0;
pool->done_cnt =0;
pool->work_cnt = 0;
pool->done_cnt = 0;
pool->stop = 0;
pool->cleanup_func = cleanup_func;
pool->threads = malloc(sizeof(pthread_t) * thread_cnt);
pool->threads = calloc(sizeof(pthread_t), thread_cnt);
pthread_mutex_init(&(pool->work_mutex), NULL);
@@ -202,11 +203,14 @@ tpool_t *tpool_create(size_t thread_cnt, void cleanup_func()) {
pool->work_head = NULL;
pool->work_tail = NULL;
for (size_t i = 0; i < thread_cnt; i++) {
return pool;
}
void tpool_start(tpool_t *pool) {
for (size_t i = 0; i < pool->thread_cnt; i++) {
pthread_t thread = pool->threads[i];
pthread_create(&thread, NULL, tpool_worker, pool);
pthread_detach(thread);
}
return pool;
}

View File

@@ -9,6 +9,7 @@ typedef struct tpool tpool_t;
typedef void (*thread_func_t)(void *arg);
tpool_t *tpool_create(size_t num, void (*cleanup_func)());
void tpool_start(tpool_t *pool);
void tpool_destroy(tpool_t *tm);
int tpool_add_work(tpool_t *pool, thread_func_t func, void *arg);

View File

@@ -89,6 +89,85 @@ void text_buffer_terminate_string(text_buffer_t *buf) {
dyn_buffer_write_char(&buf->dyn_buffer, '\0');
}
__always_inline
int utf8_validchr(const char *s) {
if (0x00 == (0x80 & *s)) {
return TRUE;
} else if (0xf0 == (0xf8 & *s)) {
if ((0x80 != (0xc0 & s[1])) || (0x80 != (0xc0 & s[2])) ||
(0x80 != (0xc0 & s[3]))) {
return FALSE;
}
if (0x80 == (0xc0 & s[4])) {
return FALSE;
}
if ((0 == (0x07 & s[0])) && (0 == (0x30 & s[1]))) {
return FALSE;
}
} else if (0xe0 == (0xf0 & *s)) {
if ((0x80 != (0xc0 & s[1])) || (0x80 != (0xc0 & s[2]))) {
return FALSE;
}
if (0x80 == (0xc0 & s[3])) {
return FALSE;
}
if ((0 == (0x0f & s[0])) && (0 == (0x20 & s[1]))) {
return FALSE;
}
} else if (0xc0 == (0xe0 & *s)) {
if (0x80 != (0xc0 & s[1])) {
return FALSE;
}
if (0x80 == (0xc0 & s[2])) {
return FALSE;
}
if (0 == (0x1e & s[0])) {
return FALSE;
}
} else {
return FALSE;
}
return TRUE;
}
int text_buffer_append_string(text_buffer_t *buf, char *str, size_t len) {
utf8_int32_t c;
if (str == NULL || len < 1 ||
(0xf0 == (0xf8 & str[0]) && len < 4) ||
(0xe0 == (0xf0 & str[0]) && len < 3) ||
(0xc0 == (0xe0 & str[0]) && len == 1) ||
*(str) == 0) {
text_buffer_terminate_string(buf);
return 0;
}
for (void *v = utf8codepoint(str, &c); c != '\0' && ((char *) v - str + 4) < len; v = utf8codepoint(v, &c)) {
if (utf8_validchr(v)) {
text_buffer_append_char(buf, c);
}
}
text_buffer_terminate_string(buf);
return 0;
}
int text_buffer_append_string0(text_buffer_t *buf, char *str) {
utf8_int32_t c;
for (void *v = utf8codepoint(str, &c); c != '\0'; v = utf8codepoint(v, &c)) {
if (utf8_validchr(v)) {
text_buffer_append_char(buf, c);
}
}
text_buffer_terminate_string(buf);
}
int text_buffer_append_char(text_buffer_t *buf, int c) {
if (SHOULD_IGNORE_CHAR(c)) {
@@ -96,15 +175,31 @@ int text_buffer_append_char(text_buffer_t *buf, int c) {
dyn_buffer_write_char(&buf->dyn_buffer, ' ');
buf->last_char_was_whitespace = TRUE;
if (buf->dyn_buffer.cur >= buf->max_size) {
if (buf->max_size > 0 && buf->dyn_buffer.cur >= buf->max_size) {
return TEXT_BUF_FULL;
}
}
} else {
buf->last_char_was_whitespace = FALSE;
dyn_buffer_write_char(&buf->dyn_buffer, (char) c);
grow_buffer_small(&buf->dyn_buffer);
if (buf->dyn_buffer.cur >= buf->max_size) {
if (0 == ((utf8_int32_t) 0xffffff80 & c)) {
*(buf->dyn_buffer.buf + buf->dyn_buffer.cur++) = (char) c;
} else if (0 == ((utf8_int32_t) 0xfffff800 & c)) {
*(buf->dyn_buffer.buf + buf->dyn_buffer.cur++) = 0xc0 | (char) (c >> 6);
*(buf->dyn_buffer.buf + buf->dyn_buffer.cur++) = 0x80 | (char) (c & 0x3f);
} else if (0 == ((utf8_int32_t) 0xffff0000 & c)) {
*(buf->dyn_buffer.buf + buf->dyn_buffer.cur++) = 0xe0 | (char) (c >> 12);
*(buf->dyn_buffer.buf + buf->dyn_buffer.cur++) = 0x80 | (char) ((c >> 6) & 0x3f);
*(buf->dyn_buffer.buf + buf->dyn_buffer.cur++) = 0x80 | (char) (c & 0x3f);
} else {
*(buf->dyn_buffer.buf + buf->dyn_buffer.cur++) = 0xf0 | (char) (c >> 18);
*(buf->dyn_buffer.buf + buf->dyn_buffer.cur++) = 0x80 | (char) ((c >> 12) & 0x3f);
*(buf->dyn_buffer.buf + buf->dyn_buffer.cur++) = 0x80 | (char) ((c >> 6) & 0x3f);
*(buf->dyn_buffer.buf + buf->dyn_buffer.cur++) = 0x80 | (char) (c & 0x3f);
}
if (buf->max_size > 0 && buf->dyn_buffer.cur >= buf->max_size) {
return TEXT_BUF_FULL;
}
}
@@ -136,7 +231,7 @@ dyn_buffer_t url_escape(char *str) {
dyn_buffer_t text = dyn_buffer_create();
char * ptr = str;
char *ptr = str;
while (*ptr) {
if (*ptr == '#') {
dyn_buffer_write(&text, "%23", 3);
@@ -169,7 +264,7 @@ char *expandpath(const char *path) {
wordexp_t w;
wordexp(path, &w, 0);
char * expanded = malloc(strlen(w.we_wordv[0]) + 2);
char *expanded = malloc(strlen(w.we_wordv[0]) + 2);
strcpy(expanded, w.we_wordv[0]);
strcat(expanded, "/");

View File

@@ -5,7 +5,10 @@
#define TEXT_BUF_FULL -1
#define INITIAL_BUF_SIZE 1024 * 16
#define SHOULD_IGNORE_CHAR(c) c < '0' || c > 'z'
#define SHOULD_IGNORE_CHAR(c) !(SHOULD_KEEP_CHAR(c))
#define SHOULD_KEEP_CHAR(c) (c >= (int)'!')
typedef struct dyn_buffer {
char *buf;
@@ -21,8 +24,10 @@ typedef struct text_buffer {
dyn_buffer_t dyn_buffer;
} text_buffer_t;
char *abspath(const char * path);
char *abspath(const char *path);
char *expandpath(const char *path);
dyn_buffer_t url_escape(char *str);
void progress_bar_print(double percentage, size_t tn_size, size_t index_size);
@@ -56,13 +61,16 @@ text_buffer_t text_buffer_create(int max_size);
void text_buffer_terminate_string(text_buffer_t *buf);
int text_buffer_append_string(text_buffer_t *buf, char *str, size_t len);
int text_buffer_append_string0(text_buffer_t *buf, char *str);
int text_buffer_append_char(text_buffer_t *buf, int c);
void incremental_put(GHashTable *table, unsigned long inode_no, int mtime);
int incremental_get(GHashTable *table, unsigned long inode_no);
int incremental_mark_file_for_copy(GHashTable *table, unsigned long inode_no);
#endif

View File

@@ -43,27 +43,40 @@ int javascript(void *p, onion_request *req, onion_response *res) {
return OCS_PROCESSED;
}
int style(void *p, onion_request *req, onion_response *res) {
set_default_headers(res);
onion_response_set_header(res, "Content-Type", "text/css");
onion_response_set_length(res, sizeof(bundle_css));
onion_response_write(res, bundle_css, sizeof(bundle_css));
return OCS_PROCESSED;
int client_requested_dark_theme(onion_request *req) {
const char *cookie = onion_request_get_cookie(req, "sist");
if (cookie == NULL) {
return FALSE;
}
return strcmp(cookie, "dark") == 0;
}
int bg_bars(void *p, onion_request *req, onion_response *res) {
int style(void *p, onion_request *req, onion_response *res) {
set_default_headers(res);
onion_response_set_header(res, "Content-Type", "image/png");
onion_response_set_length(res, sizeof(bg_bars_png));
onion_response_write(res, bg_bars_png, sizeof(bg_bars_png));
onion_response_set_header(res, "Content-Type", "text/css");
if (client_requested_dark_theme(req)) {
onion_response_set_length(res, sizeof(bundle_dark_css));
onion_response_write(res, bundle_dark_css, sizeof(bundle_dark_css));
} else {
onion_response_set_length(res, sizeof(bundle_css));
onion_response_write(res, bundle_css, sizeof(bundle_css));
}
return OCS_PROCESSED;
}
int img_sprite_skin_flag(void *p, onion_request *req, onion_response *res) {
set_default_headers(res);
onion_response_set_header(res, "Content-Type", "image/png");
onion_response_set_length(res, sizeof(sprite_skin_flat_png));
onion_response_write(res, sprite_skin_flat_png, sizeof(sprite_skin_flat_png));
if (client_requested_dark_theme(req)) {
onion_response_set_length(res, sizeof(sprite_skin_flat_dark_png));
onion_response_write(res, sprite_skin_flat_dark_png, sizeof(sprite_skin_flat_dark_png));
} else {
onion_response_set_length(res, sizeof(sprite_skin_flat_png));
onion_response_write(res, sprite_skin_flat_png, sizeof(sprite_skin_flat_png));
}
return OCS_PROCESSED;
}
@@ -326,7 +339,7 @@ int index_info(void *p, onion_request *req, onion_response *res) {
cJSON_AddStringToObject(idx_json, "name", idx->desc.name);
cJSON_AddStringToObject(idx_json, "version", idx->desc.version);
cJSON_AddStringToObject(idx_json, "id", idx->desc.uuid);
cJSON_AddNumberToObject(idx_json, "timestamp", (double)idx->desc.timestamp);
cJSON_AddNumberToObject(idx_json, "timestamp", (double) idx->desc.timestamp);
cJSON_AddItemToArray(arr, idx_json);
}
@@ -349,20 +362,26 @@ int file(void *p, onion_request *req, onion_response *res) {
cJSON *source = cJSON_GetObjectItem(doc, "_source");
cJSON *index_id = cJSON_GetObjectItem(source, "index");
if (index_id == NULL) {
cJSON_Delete(doc);
return OCS_NOT_PROCESSED;
}
index_t *idx = get_index_by_id(index_id->valuestring);
if (idx == NULL) {
cJSON_Delete(doc);
return OCS_NOT_PROCESSED;
}
int ret;
if (strlen(idx->desc.rewrite_url) == 0) {
return serve_file_from_disk(source, idx, req, res);
ret = serve_file_from_disk(source, idx, req, res);
} else {
return serve_file_from_url(source, idx, req, res);
ret = serve_file_from_url(source, idx, req, res);
}
cJSON_Delete(doc);
return ret;
}
void serve(const char *hostname, const char *port) {
@@ -378,7 +397,6 @@ void serve(const char *hostname, const char *port) {
onion_url_add(urls, "", search_index);
onion_url_add(urls, "css", style);
onion_url_add(urls, "js", javascript);
onion_url_add(urls, "img/bg-bars.png", bg_bars);
onion_url_add(urls, "img/sprite-skin-flat.png", img_sprite_skin_flag);
onion_url_add(urls, "es", search);

File diff suppressed because one or more lines are too long

1
utf8.h Submodule

Submodule utf8.h added at 2a7c5bfa95

286
web/css/dark.css Normal file
View File

@@ -0,0 +1,286 @@
*:focus {
outline: 0;
}
a {
color: #00BCD4;
}
body {
overflow-y: scroll;
background: black;
}
.progress {
margin-top: 1em;
}
.card {
margin-top: 1em;
background: #212121;
color: #e0e0e0;
border-radius: 1px;
border: none;
}
.navbar-brand {
font-size: 1.75rem;
padding: 0;
color: #f5f5f5;
}
.navbar {
background: #546b7a;
}
.navbar a:hover {
color: #fff;
}
.navbar span {
color: #eee;
}
.document {
padding: 0.5rem;
}
.document p {
margin-bottom: 0;
}
.document:hover p {
text-decoration: underline;
}
.badge-video {
color: #FFFFFF;
background-color: #F27761;
}
.badge-image {
color: #FFFFFF;
background-color: #AA99C9;
}
.badge-audio {
color: #FFFFFF;
background-color: #00ADEF;
}
.badge-resolution {
color: #212529;
background-color: #B0BEC5;
}
.badge-text {
color: #FFFFFF;
background-color: #FAAB3C;
}
.card-img-overlay {
pointer-events: none;
padding: 0.75rem;
bottom: unset;
top: 0;
left: unset;
right: unset;
}
.file-title {
font-size: 10pt;
white-space: nowrap;
text-overflow: ellipsis;
overflow: hidden;
}
.badge {
margin-right: 3px;
}
.badge-user {
color: #212529;
background-color: #e0e0e0;
}
.fit {
display: block;
min-width: 64px;
max-width: 100%;
max-height: 175px;
margin: 0 auto 0;
padding: 3px 3px 0 3px;
width: auto;
height: auto;
}
.audio-fit {
height: 39px;
vertical-align: bottom;
display: inline;
width: 100%;
}
@media (min-width: 1200px) {
.card-columns {
column-count: 4;
}
}
@media (min-width: 1500px) {
.container {
max-width: 1440px;
}
.card-columns {
column-count: 5;
}
}
@media (min-width: 1800px) {
.container {
max-width: 1550px;
}
}
mark {
background: #fff217;
border-radius: 0;
padding: 1px 0;
}
.content-div {
font-family: SFMono-Regular, Menlo, Monaco, Consolas, "Liberation Mono", "Courier New", monospace;
font-size: 13px;
padding: 1em;
background-color: #37474F;
border: 1px solid #616161;
border-radius: 4px;
margin: 3px;
}
.irs-single, .irs-from, .irs-to {
font-size: 13px;
background-color: #00BCD4;
}
.irs-slider {
cursor: col-resize;
}
.irs {
margin-top: 1em;
margin-bottom: 1em;
}
.custom-select {
overflow: auto;
background-color: #37474F;
border: 1px solid #616161;
color: #bdbdbd;
}
.custom-select:focus {
border-color: #757575;
outline: 0;
box-shadow: 0 0 0 .2rem rgba(0, 123, 255, .25);
}
option {
outline: none;
}
.form-control {
background-color: #37474F;
border: 1px solid #616161;
color: #fff;
}
.form-control:focus {
background-color: #546E7A;
color: #fff;
}
.input-group-text {
background: #263238;
border: 1px solid #616161;
color: #dbdbdb;
}
::placeholder {
color: #BDBDBD !important;
opacity: 1;
}
.inspire-tree .selected > .wholerow, .inspire-tree .selected > .title-wrap:hover + .wholerow {
background: none;
}
.inspire-tree .icon-expand::before, .inspire-tree .icon-collapse::before {
background-color: black;
}
.inspire-tree .title {
color: #eee;
}
.inspire-tree {
font-weight: 400;
font-size: 14px;
font-family: Helvetica, Nueue, Verdana, sans-serif;
max-height: 350px;
overflow: auto;
}
.page-indicator {
line-height: 1rem;
padding: 0.5rem;
background: #212121;
color: #eee;
}
.btn-xs {
padding: .1rem .3rem;
font-size: .875rem;
border-radius: .2rem;
}
.btn {
color: #eee;
}
.nav-tabs .nav-link {
color: #e0e0e0;
}
.nav-tabs .nav-item.show .nav-link, .nav-tabs .nav-link.active {
background-color: #212121;
border-color: #616161 #616161 #212121;
color: #e0e0e0;
}
.nav-tabs .nav-link:focus, .nav-tabs .nav-link:focus {
border-color: #616161 #616161 #212121;
color: #e0e0e0;
}
.nav-tabs .nav-link:focus, .nav-tabs .nav-link:hover {
border-color: #e0e0e0 #e0e0e0 #212121;
color: #e0e0e0;
}
.nav-tabs {
border-bottom: #616161;
}
.nav {
margin-top: 0.5rem;
}
@media (min-width: 800px) {
.nav {
min-width: 800px;
}
}

View File

@@ -1,3 +1,7 @@
*:focus {
outline: 0;
}
body {overflow-y:scroll;}
.progress {
@@ -6,6 +10,7 @@ body {overflow-y:scroll;}
.card {
margin-top: 1em;
box-shadow: 0 .125rem .25rem rgba(0,0,0,.075) !important;
}
.navbar-brand {
font-size: 1.75rem;
@@ -46,6 +51,11 @@ body {overflow-y:scroll;}
background-color: #FFC107;
}
.badge-user {
color: #212529;
background-color: #e0e0e0;
}
.badge-text {
color: #FFFFFF;
background-color: #FAAB3C;
@@ -87,6 +97,7 @@ body {overflow-y:scroll;}
height: 39px;
vertical-align: bottom;
display: inline;
width: 100%;
}
@media (min-width: 1200px) {
@@ -159,10 +170,21 @@ mark {
.page-indicator {
line-height: 1rem;
padding: 0.5rem;
background: #f8f9fa;
}
.btn-xs {
padding: .1rem .3rem;
font-size: .875rem;
border-radius: .2rem;
}
}
.nav {
margin-top: 0.5rem;
}
@media (min-width: 800px) {
.nav {
min-width: 800px;
}
}

Binary file not shown.

Before

Width:  |  Height:  |  Size: 8.3 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 595 B

View File

@@ -75,6 +75,18 @@ function shouldPlayVideo(hit) {
return videoc !== "hevc" && videoc !== "mpeg2video" && videoc !== "wmv3";
}
function makePlaceholder(w, h) {
const calc = w > h
? (175 / w / h) >= 272
? (175 * w / h)
: 175
: 175;
const el = document.createElement("div");
el.setAttribute("style", `height: ${calc}px`);
return el;
}
/**
*
* @param hit
@@ -82,7 +94,7 @@ function shouldPlayVideo(hit) {
*/
function createDocCard(hit) {
let docCard = document.createElement("div");
docCard.setAttribute("class", "card shadow-sm");
docCard.setAttribute("class", "card");
let docCardBody = document.createElement("div");
docCardBody.setAttribute("class", "card-body document");
@@ -119,14 +131,22 @@ function createDocCard(hit) {
thumbnail = document.createElement("video");
addVidSrc("f/" + hit["_id"], hit["_source"]["mime"], thumbnail);
const placeholder = makePlaceholder(hit["_source"]["width"], hit["_source"]["height"]);
imgWrapper.appendChild(placeholder);
thumbnail.setAttribute("class", "fit");
thumbnail.setAttribute("loop", "");
thumbnail.setAttribute("controls", "");
thumbnail.setAttribute("preload", "none");
thumbnail.setAttribute("poster", `t/${hit["_source"]["index"]}/${hit["_id"]}`);
thumbnail.addEventListener("dblclick", function () {
thumbnail.webkitRequestFullScreen();
});
const poster = new Image();
poster.src = thumbnail.getAttribute('poster');
poster.addEventListener("load", function () {
placeholder.remove();
imgWrapper.appendChild(thumbnail);
});
} else if ((hit["_source"].hasOwnProperty("width") && hit["_source"]["width"] > 20 && hit["_source"]["height"] > 20)
|| hit["_source"]["mime"] === "application/pdf"
|| hit["_source"]["mime"] === "application/epub+zip"
@@ -136,6 +156,17 @@ function createDocCard(hit) {
thumbnail = document.createElement("img");
thumbnail.setAttribute("class", "card-img-top fit");
thumbnail.setAttribute("src", `t/${hit["_source"]["index"]}/${hit["_id"]}`);
const placeholder = makePlaceholder(hit["_source"]["width"], hit["_source"]["height"]);
imgWrapper.appendChild(placeholder);
thumbnail.addEventListener("error", () => {
imgWrapper.remove();
});
thumbnail.addEventListener("load", () => {
placeholder.remove();
imgWrapper.appendChild(thumbnail);
});
}
//Thumbnail overlay
@@ -164,7 +195,7 @@ function createDocCard(hit) {
if (hit["_source"].hasOwnProperty("duration")) {
thumbnailOverlay = document.createElement("div");
thumbnailOverlay.setAttribute("class", "card-img-overlay");
let durationBadge = document.createElement("span");
const durationBadge = document.createElement("span");
durationBadge.setAttribute("class", "badge badge-resolution");
durationBadge.appendChild(document.createTextNode(humanTime(hit["_source"]["duration"])));
thumbnailOverlay.appendChild(durationBadge);
@@ -176,7 +207,7 @@ function createDocCard(hit) {
case "video":
case "image":
if (hit["_source"].hasOwnProperty("videoc")) {
let formatTag = document.createElement("span");
const formatTag = document.createElement("span");
formatTag.setAttribute("class", "badge badge-pill badge-video");
formatTag.appendChild(document.createTextNode(hit["_source"]["videoc"].replace(" ", "")));
tags.push(formatTag);
@@ -196,14 +227,13 @@ function createDocCard(hit) {
//Content
let contentHl = getContentHighlight(hit);
if (contentHl !== undefined) {
let contentDiv = document.createElement("div");
contentDiv.setAttribute("class", "content-div bg-light");
const contentDiv = document.createElement("div");
contentDiv.setAttribute("class", "content-div");
contentDiv.insertAdjacentHTML('afterbegin', contentHl);
docCard.appendChild(contentDiv);
}
if (thumbnail !== null) {
imgWrapper.appendChild(thumbnail);
docCard.appendChild(imgWrapper);
}
@@ -224,6 +254,26 @@ function createDocCard(hit) {
imgWrapper.appendChild(thumbnailOverlay);
}
// User tags
if (hit["_source"].hasOwnProperty("tag")) {
hit["_source"]["tag"].forEach(tag => {
const userTag = document.createElement("span");
userTag.setAttribute("class", "badge badge-pill badge-user");
const tokens = tag.split("#");
if (tokens.length > 1) {
const bg = "#" + tokens[1];
const fg = lum(tokens[1]) > 40 ? "#000" : "#fff";
userTag.setAttribute("style", `background-color: ${bg}; color: ${fg}`);
}
const name = tokens[0].split(".")[tokens[0].split(".").length - 1];
userTag.appendChild(document.createTextNode(name));
tags.push(userTag);
})
}
for (let i = 0; i < tags.length; i++) {
tagContainer.appendChild(tags[i]);
}
@@ -257,7 +307,7 @@ function makePreloader() {
function makePageIndicator(searchResult) {
let pageIndicator = document.createElement("div");
pageIndicator.setAttribute("class", "page-indicator shadow-sm bg-light font-weight-light");
pageIndicator.setAttribute("class", "page-indicator font-weight-light");
const totalHits = searchResult["hits"]["total"].hasOwnProperty("value")
? searchResult["hits"]["total"]["value"] : searchResult["hits"]["total"];
pageIndicator.appendChild(document.createTextNode(docCount + " / " + totalHits));

View File

@@ -1,6 +1,8 @@
const SIZE = 40;
let mimeMap = [];
let tree;
let tagMap = [];
let mimeTree;
let tagTree;
let searchBar = document.getElementById("searchBar");
let pathBar = document.getElementById("pathBar");
@@ -21,21 +23,51 @@ jQuery["jsonPost"] = function (url, data) {
});
};
function toggleSearchBar() {
window.onload = () => {
$("#theme").on("click", () => {
if (!document.cookie.includes("sist")) {
document.cookie = "sist=dark";
} else {
document.cookie = "sist=; Max-Age=-99999999;";
}
window.location.reload();
})
};
function toggleFuzzy() {
searchDebounced();
}
$.jsonPost("i").then(resp => {
resp["indices"].forEach(idx => {
$("#indices").append($("<option>")
const opt = $("<option>")
.attr("value", idx.id)
.attr("selected", !idx.name.includes("(nsfw)"))
.append(idx.name)
);
selectedIndices.push(idx.id);
.append(idx.name);
if (!idx.name.includes("(nsfw)")) {
opt.attr("selected", !idx.name.includes("(nsfw)"));
selectedIndices.push(idx.id);
}
$("#indices").append(opt);
});
});
function handleTreeClick (tree) {
return (event, node, handler) => {
event.preventTreeDefault();
if (node.id === "any") {
if (!node.itree.state.checked) {
tree.deselect();
}
} else {
tree.node("any").deselect();
}
handler();
searchDebounced();
}
}
$.jsonPost("es", {
aggs: {
mimeTypes: {
@@ -72,38 +104,90 @@ $.jsonPost("es", {
});
mimeMap.push({"text": "All", "id": "any"});
tree = new InspireTree({
mimeTree = new InspireTree({
selection: {
mode: 'checkbox'
},
data: mimeMap
});
new InspireTreeDOM(tree, {
target: '.tree'
new InspireTreeDOM(mimeTree, {
target: '#mimeTree'
});
tree.on("node.click", function (event, node, handler) {
event.preventTreeDefault();
mimeTree.on("node.click", handleTreeClick(mimeTree));
mimeTree.select();
mimeTree.node("any").deselect();
});
if (node.id === "any") {
if (!node.itree.state.checked) {
tree.deselect();
function leafTag(tag) {
const tokens = tag.split(".");
return tokens[tokens.length-1]
}
// Tags tree
$.jsonPost("es", {
aggs: {
tags: {
terms: {
field: "tag",
size: 10000
}
} else {
tree.node("any").deselect();
}
handler();
searchDebounced();
},
size: 0,
}).then(resp => {
resp["aggregations"]["tags"]["buckets"]
.sort((a, b) => a["key"].localeCompare(b["key"]))
.forEach(bucket => {
addTag(tagMap, bucket["key"], bucket["key"], bucket["doc_count"])
});
tree.select();
tree.node("any").deselect();
tagMap.push({"text": "All", "id": "any"});
tagTree = new InspireTree({
selection: {
mode: 'checkbox'
},
data: tagMap
});
new InspireTreeDOM(tagTree, {
target: '#tagTree'
});
tagTree.on("node.click", handleTreeClick(tagTree));
tagTree.node("any").select();
searchBusy = false;
});
function addTag(map, tag, id, count) {
let tags = tag.split("#")[0].split(".");
let child = {
id: id,
text: tags.length !== 1 ? tags[0] : `${tags[0]} (${count})`,
children: []
};
let found = false;
map.forEach(node => {
if (node.text === child.text) {
found = true;
if (tags.length !== 1) {
addTag(node.children, tags.slice(1).join("."), id, count);
}
}
});
if (!found) {
if (tags.length !== 1) {
addTag(child.children, tags.slice(1).join("."), id, count);
map.push(child);
} else {
map.push(child);
}
}
}
new autoComplete({
selector: '#pathBar',
minChars: 1,
delay: 75,
delay: 400,
renderItem: function (item) {
return '<div class="autocomplete-suggestion" data-val="' + item + '">' + item + '</div>';
},
@@ -168,8 +252,8 @@ function doScroll() {
})
}
function getSelectedMimeTypes() {
let mimeTypes = [];
function getSelectedNodes(tree) {
let selectedNodes = [];
let selected = tree.selected();
@@ -181,11 +265,11 @@ function getSelectedMimeTypes() {
//Only get children
if (selected[i].text.indexOf("(") !== -1) {
mimeTypes.push(selected[i].id);
selectedNodes.push(selected[i].id);
}
}
return mimeTypes
return selectedNodes
}
function search() {
@@ -205,21 +289,37 @@ function search() {
let query = searchBar.value;
let empty = query === "";
let condition = $("#barToggle").prop("checked") && !empty ? "must" : "should";
let condition = empty ? "should" : "must";
let filters = [
{range: {size: {gte: size_min, lte: size_max}}},
{terms: {index: selectedIndices}}
];
let fields = [
"name^8",
"content^3",
"album^8", "artist^8", "title^8", "genre^2", "album_artist^8",
"font_name^6"
];
if ($("#fuzzyToggle").prop("checked")) {
fields.push("content.nGram");
fields.push("name.nGram^3");
}
let path = pathBar.value.replace(/\/$/, "").toLowerCase(); //remove trailing slashes
if (path !== "") {
filters.push([{term: {path: path}}])
}
let mimeTypes = getSelectedMimeTypes();
let mimeTypes = getSelectedNodes(mimeTree);
if (!mimeTypes.includes("any")) {
filters.push([{terms: {"mime": mimeTypes}}]);
}
let tags = getSelectedNodes(tagTree);
if (!tags.includes("any")) {
filters.push([{terms: {"tag": tags}}]);
}
$.jsonPost("es?scroll=1", {
"_source": {
excludes: ["content"]
@@ -230,12 +330,7 @@ function search() {
multi_match: {
query: query,
type: "most_fields",
fields: [
"name^8", "name.nGram^3", "content^3",
"content.nGram",
"album^8", "artist^8", "title^8", "genre^2", "album_artist^8",
"font_name^6"
],
fields: fields,
operator: "and"
}
},
@@ -252,7 +347,7 @@ function search() {
content: {},
name: {},
"name.nGram": {},
// font_name: {},
font_name: {},
}
},
aggs: {
@@ -266,14 +361,6 @@ function search() {
//Search stats
searchResults.appendChild(makeStatsCard(searchResult));
//Autocomplete
if (searchResult.hasOwnProperty("suggest") && searchResult["suggest"].hasOwnProperty("path")) {
pathAutoComplete = [];
for (let i = 0; i < searchResult["suggest"]["path"][0]["options"].length; i++) {
pathAutoComplete.push(searchResult["suggest"]["path"][0]["options"][i].text)
}
}
//Setup page
let resultContainer = makeResultContainer();
searchResults.appendChild(resultContainer);
@@ -285,7 +372,6 @@ function search() {
});
}
let pathAutoComplete = [];
let size_min = 0;
let size_max = 10000000000000;
@@ -293,8 +379,8 @@ let searchDebounced = _.debounce(function () {
coolingDown = false;
search()
}, 500);
searchBar.addEventListener("keyup", searchDebounced);
document.getElementById("pathBar").addEventListener("keyup", searchDebounced);
//Size slider
$("#sizeSlider").ionRangeSlider({
@@ -345,15 +431,18 @@ updateIndices();
//Suggest
function getPathChoices() {
return new Promise(getPaths => {
let xhttp = new XMLHttpRequest();
xhttp.onreadystatechange = function () {
if (this.readyState === 4 && this.status === 200) {
getPaths(JSON.parse(xhttp.responseText))
$.jsonPost("es", {
suggest: {
path: {
prefix: pathBar.value,
completion: {
field: "suggest-path",
skip_duplicates: true,
size: 10000
}
}
}
};
xhttp.open("GET", "suggest?prefix=" + pathBar.value, true);
xhttp.send();
});
}).then(resp => getPaths(resp["suggest"]["path"][0]["options"].map(opt => opt["_source"]["path"])));
})
}

View File

@@ -43,9 +43,9 @@ function humanTime(sec_num) {
function debounce(func, wait) {
let timeout;
return function() {
return function () {
let context = this, args = arguments;
let later = function() {
let later = function () {
timeout = null;
func.apply(context, args);
};
@@ -54,3 +54,13 @@ function debounce(func, wait) {
func.apply(context, args);
};
}
function lum(c) {
c = c.substring(1);
let rgb = parseInt(c, 16);
let r = (rgb >> 16) & 0xff;
let g = (rgb >> 8) & 0xff;
let b = (rgb >> 0) & 0xff;
return 0.2126 * r + 0.7152 * g + 0.0722 * b;
}

View File

@@ -9,9 +9,10 @@
</head>
<body>
<nav class="navbar navbar-expand-lg navbar-light">
<nav class="navbar navbar-expand-lg">
<a class="navbar-brand" href="/">sist2</a>
<span class="tagline">Lightning-fast file system indexer and search tool </span>
<a style="margin-left: auto" id="theme" class="btn" title="Toggle theme" href="/">Theme</a>
</nav>
<div class="container">
@@ -23,9 +24,9 @@
<div class="input-group">
<div class="input-group-prepend">
<div class="input-group-text">
<span onclick="document.getElementById('barToggle').click()">Must match&nbsp</span>
<input title="Toggle between 'Should' and 'Must' match mode" type="checkbox" id="barToggle"
onclick="toggleSearchBar()" checked>
<span title="Toggle fuzzy searching" onclick="document.getElementById('fuzzyToggle').click()">Fuzzy&nbsp</span>
<input title="Toggle fuzzy searching" type="checkbox" id="fuzzyToggle"
onclick="toggleFuzzy()" checked>
</div>
</div>
<input id="searchBar" type="search" class="form-control" placeholder="Search">
@@ -41,10 +42,24 @@
</div>
<div class="col">
<label>Mime types</label>
<div class="tree"></div>
<ul class="nav nav-tabs" role="tablist">
<li class="nav-item">
<a class="nav-link active" data-toggle="tab" href="#mime" role="tab" aria-controls="home" aria-selected="true">Mime Types</a>
</li>
<li class="nav-item">
<a class="nav-link" data-toggle="tab" href="#tag" role="tab" aria-controls="profile" aria-selected="false" title="User-defined tags">Tags</a>
</li>
</ul>
<div class="tab-content" id="myTabContent">
<div class="tab-pane fade show active" id="mime" role="tabpanel" aria-labelledby="home-tab">
<div id="mimeTree" class="tree"></div>
</div>
<div class="tab-pane fade" id="tag" role="tabpanel" aria-labelledby="profile-tab">
<div id="tagTree" class="tree"></div>
</div>
</div>
</div>
</div>
</div>
</div>