Compare commits

..

1 Commits

Author SHA1 Message Date
c762d0de08 Dark theme, pdf meta, de-serialize bugfix 2019-10-30 21:48:58 -04:00
58 changed files with 822 additions and 2783 deletions

15
.gitmodules vendored
View File

@@ -16,18 +16,3 @@
[submodule "lmdb"] [submodule "lmdb"]
path = lmdb path = lmdb
url = https://github.com/LMDB/lmdb url = https://github.com/LMDB/lmdb
[submodule "utf8.h"]
path = utf8.h
url = https://github.com/sheredom/utf8.h
[submodule "lib/openjpeg"]
path = lib/openjpeg
url = https://github.com/uclouvain/openjpeg
[submodule "lib/harfbuzz"]
path = lib/harfbuzz
url = https://github.com/harfbuzz/harfbuzz
[submodule "lib/libmagic"]
path = lib/libmagic
url = https://github.com/threatstack/libmagic
[submodule "lib/bzip2-1.0.6"]
path = lib/bzip2-1.0.6
url = https://github.com/enthought/bzip2-1.0.6

View File

@@ -23,7 +23,6 @@ if (WITH_SIST2)
src/parsing/text.h src/parsing/text.c src/parsing/text.h src/parsing/text.c
src/index/web.c src/index/web.h src/index/web.c src/index/web.h
src/web/serve.c src/web/serve.h src/web/serve.c src/web/serve.h
src/web/auth_basic.h src/web/auth_basic.c
src/index/elastic.c src/index/elastic.h src/index/elastic.c src/index/elastic.h
src/util.c src/util.h src/util.c src/util.h
src/ctx.h src/types.h src/parsing/font.c src/parsing/font.h src/ctx.h src/types.h src/parsing/font.c src/parsing/font.h
@@ -38,10 +37,7 @@ if (WITH_SIST2)
lmdb/libraries/liblmdb/lmdb.h lmdb/libraries/liblmdb/mdb.c lmdb/libraries/liblmdb/lmdb.h lmdb/libraries/liblmdb/mdb.c
lmdb/libraries/liblmdb/midl.h lmdb/libraries/liblmdb/midl.c lmdb/libraries/liblmdb/midl.h lmdb/libraries/liblmdb/midl.c
src/cli.c src/cli.h src/cli.c src/cli.h
)
# utf8.h
utf8.h/utf8.h
src/parsing/arc.c src/parsing/arc.h)
endif () endif ()
if (WITH_SIST2_SCAN) if (WITH_SIST2_SCAN)
@@ -71,10 +67,7 @@ if (WITH_SIST2_SCAN)
lmdb/libraries/liblmdb/lmdb.h lmdb/libraries/liblmdb/mdb.c lmdb/libraries/liblmdb/lmdb.h lmdb/libraries/liblmdb/mdb.c
lmdb/libraries/liblmdb/midl.h lmdb/libraries/liblmdb/midl.c lmdb/libraries/liblmdb/midl.h lmdb/libraries/liblmdb/midl.c
src/cli.c src/cli.h src/cli.c src/cli.h
)
# utf8.h
utf8.h/utf8.h
src/parsing/arc.c src/parsing/arc.h)
endif () endif ()
find_package(PkgConfig REQUIRED) find_package(PkgConfig REQUIRED)
@@ -123,8 +116,8 @@ if (WITH_SIST2)
target_compile_options(sist2 target_compile_options(sist2
PRIVATE PRIVATE
-Ofast -O3
# -march=native # -march=native
-fno-stack-protector -fno-stack-protector
-fomit-frame-pointer -fomit-frame-pointer
) )
@@ -157,10 +150,6 @@ if (WITH_SIST2)
m m
bz2 bz2
magic magic
${PROJECT_SOURCE_DIR}/lib/libharfbuzz.a
${PROJECT_SOURCE_DIR}/lib/libopenjp2.a
freetype
archive
) )
endif () endif ()
@@ -198,7 +187,7 @@ if (WITH_SIST2_SCAN)
) )
target_compile_options(sist2_scan target_compile_options(sist2_scan
PRIVATE PRIVATE
-Ofast -O3
# -march=native # -march=native
-fno-stack-protector -fno-stack-protector
-fomit-frame-pointer -fomit-frame-pointer
@@ -226,14 +215,6 @@ if (WITH_SIST2_SCAN)
pthread pthread
m m
${PROJECT_SOURCE_DIR}/lib/libharfbuzz.a
${PROJECT_SOURCE_DIR}/lib/libopenjp2.a
freetype
${PROJECT_SOURCE_DIR}/lib/libarchive.a
${PROJECT_SOURCE_DIR}/lib/liblz4.a
${PROJECT_SOURCE_DIR}/lib/liblzma.a
${PROJECT_SOURCE_DIR}/lib/libzstd.a
) )
endif () endif ()

View File

@@ -1,9 +0,0 @@
FROM ubuntu:19.10
MAINTAINER simon987 <me@simon987.net>
RUN apt update
RUN apt install -y libglib2.0-0 libcurl4 libmagic1 libharfbuzz-bin libopenjp2-7
ADD sist2 /root/sist2
ENTRYPOINT ["/root/sist2"]

View File

@@ -1,9 +0,0 @@
rm ./sist2
cp ../sist2 .
version=$(./sist2 --version)
echo "Version ${version}"
docker build . -t simon987/sist2:${version} -t simon987/sist2:latest
docker push simon987/sist2:${version}
docker push simon987/sist2:latest

View File

@@ -9,32 +9,29 @@ sist2 (Simple incremental search tool)
## Features ## Features
* Fast, low memory usage, multi-threaded * Fast, low memory usage
* Portable (all its features are packaged in a single executable) * Portable (all its features are packaged in a single executable)
* Extracts text from common file types\* * Extracts text from common file types\*
* Generates thumbnails\* * Generates thumbnails\*
* Incremental scanning * Incremental scanning
* Automatic tagging from file attributes via [user scripts](scripting/README.md)
* Recursive scan inside archive files \*\*
\* See [format support](#format-support) \* See [format support](#format-support)
\** See [Archive files](#archive-files)
## Getting Started ## Getting Started
1. Have an [Elasticsearch](https://www.elastic.co/downloads/elasticsearch) instance running 1. Have an [Elasticsearch](https://www.elastic.co/downloads/elasticsearch) instance running
1. 1. Download the [latest sist2 release](https://github.com/simon987/sist2/releases)
1. Download the [latest sist2 release](https://github.com/simon987/sist2/releases) *
1. *(or)* `docker pull simon987/sist2:latest`
\* *Windows users*: **sist2** runs under [WSL](https://en.wikipedia.org/wiki/Windows_Subsystem_for_Linux) *Windows users*: `sist2` runs under [WSL](https://en.wikipedia.org/wiki/Windows_Subsystem_for_Linux)
\* *Mac users*: See [#1](https://github.com/simon987/sist2/issues/1)
*Mac users*: See [#1](https://github.com/simon987/sist2/issues/1)
## Example usage ## Example usage
![demo](demo.gif)
See help page `sist2 --help` for more details. See help page `sist2 --help` for more details.
**Scan a directory** **Scan a directory**
@@ -55,61 +52,19 @@ sist2 index --print ./my_idx > raw_documents.ndjson
sist2 web --bind 0.0.0.0 --port 4321 ./my_idx1 ./my_idx2 ./my_idx3 sist2 web --bind 0.0.0.0 --port 4321 ./my_idx1 ./my_idx2 ./my_idx3
``` ```
### Use sist2 with docker
**scan**
```bash
docker run -it \
-v /path/to/files/:/files \
-v $PWD/out/:/out \
simon987/sist2 scan -t 4 /files -o /out/my_idx1
```
**index**
```bash
docker run -it --network host\
-v $PWD/out/:/out \
simon987/sist2 index /out/my_idx1
```
**web**
```bash
docker run --rm --network host -d --name sist2\
-v $PWD/out/my_idx:/idx \
-v $PWD/my/files:/files
simon987/sist2 web --bind 0.0.0.0 /idx
docker stop sist2
```
## Format support ## Format support
File type | Library | Content | Thumbnail | Metadata File type | Library | Content | Thumbnail | Metadata
:---|:---|:---|:---|:--- :---|:---|:---|:---|:---
pdf,xps,cbz,fb2,epub | MuPDF | yes | yes, `png` | title | pdf,xps,cbz,cbr,fb2,epub | MuPDF | yes | yes, `png` | *planned* |
`audio/*` | ffmpeg | - | yes, `jpeg` | ID3 tags | `audio/*` | libav | - | yes, `jpeg` | ID3 tags |
`video/*` | ffmpeg | - | yes, `jpeg` | title, comment, artist | `video/*` | libav | - | yes, `jpeg` | *planned* |
`image/*` | ffmpeg | - | yes, `jpeg` | `EXIF:Artist`, `EXIF:ImageDescription` | `image/*` | libav | - | yes, `jpeg` | *planned* |
ttf,ttc,cff,woff,fnt,otf | Freetype2 | - | yes, `bmp` | Name & style | ttf,ttc,cff,woff,fnt,otf | Freetype2 | - | yes, `bmp` | Name & style |
`text/plain` | *(none)* | yes | no | - | `text/plain` | *(none)* | yes | no | - |
tar, zip, rar, 7z, ar ... | Libarchive | yes\* | - | no | docx, xlsx, pptx | | *planned* | no | *planned* |
docx, xlsx, pptx | | yes | no | *planned* |
\* *See [Archive files](#archive-files)*
### Archive files
**sist2** will scan files stored into archive files (zip, tar, 7z...) as if
they were directly in the file system. Recursive (archives inside archives)
scan is also supported.
**Limitations**:
* Parsing media files with formats that require
*seek* (e.g. `.gif`, `.mp4` w/ fragmented metadata etc.) is not supported.
* Archive files are scanned sequentially, by a single thread. On systems where
**sist2** is not I/O bound, scans might be faster when larger archives are split
into smaller parts.
To check if a media file can be parsed without *seek*, execute `cat file.mp4 | ffprobe -`
## Build from source ## Build from source
@@ -124,18 +79,17 @@ binaries.
apt install git cmake pkg-config libglib2.0-dev\ apt install git cmake pkg-config libglib2.0-dev\
libssl-dev uuid-dev libavformat-dev libswscale-dev \ libssl-dev uuid-dev libavformat-dev libswscale-dev \
python3 libmagic-dev libfreetype6-dev libcurl-dev \ python3 libmagic-dev libfreetype6-dev libcurl-dev \
libbz2-dev yasm libharfbuzz-dev ragel libbz2-dev yasm
``` ```
*(FreeBSD)* *(FreeBSD)*
```bash ```bash
pkg install cmake gcc yasm gmake bash ffmpeg e2fsprogs-uuid\ pkg install cmake gcc yasm gmake bash ffmpeg e2fsprogs-uuid
autotools ragel
``` ```
__
2. Build 2. Build
```bash ```bash
git clone --recurse-submodules https://github.com/simon987/sist2 git clone --recurse-submodules https://github.com/simon987/sist2
./scripts/get_static_libs.sh ./scripts/get_static_libs.sh
cmake . cmake .
make make
``` ```

2
cJSON

Submodule cJSON updated: 533ff8a783...2de7d04aaf

1
lib/ffmpeg Submodule

Submodule lib/ffmpeg added at 0481a1f6e5

1
lib/mupdf Submodule

Submodule lib/mupdf added at 91782a4348

1
lib/onion Submodule

Submodule lib/onion added at d8d4cc9290

View File

@@ -252,9 +252,8 @@ text/html, acgi|htm|html|htmls|htx|shtml
text/javascript, js text/javascript, js
text/mcf, mcf text/mcf, mcf
text/pascal, pas text/pascal, pas
text/plain, com|cmd|conf|def|g|idc|list|lst|mar|sdml|text|txt|md|groovy|license|properties|desktop|ini|rst|cmake|ipynb|readme|less|lo|go|yml|d|cs|hpp|srt|nfo|sfv|m3u text/plain, com|cmd|conf|def|g|idc|list|lst|mar|sdml|text|txt|md|groovy|license|properties|desktop|ini|rst|cmake|ipynb|readme|less|lo|go|yml|d|cs|hpp|srt
text/richtext, rt|rtf|rtx text/richtext, rt|rtf|rtx
text/rtf,
text/scriplet, wsc text/scriplet, wsc
text/x-awk, awk text/x-awk, awk
!video/x-jng, jng !video/x-jng, jng
@@ -264,7 +263,7 @@ image/x-xwindowdump, xwd
!image/vnd.adobe.photoshop, psd !image/vnd.adobe.photoshop, psd
text/tab-separated-values, tsv text/tab-separated-values, tsv
text/troff, man|me|ms|roff|t|tr text/troff, man|me|ms|roff|t|tr
text/uri-list, uji|unis|uri|uris text/uri-list, uni|unis|uri|uris
text/vnd.abc, abc text/vnd.abc, abc
text/vnd.fmi.flexstor, flx text/vnd.fmi.flexstor, flx
text/vnd.wap.wmlscript, wmls text/vnd.wap.wmlscript, wmls
@@ -361,58 +360,3 @@ application/x-wine-extension-ini,
application/x-cbz, cbz application/x-cbz, cbz
application/x-cbr, cbr application/x-cbr, cbr
application/x-ms-compress-szdd, fon application/x-ms-compress-szdd, fon
application/x-atari-7800-rom, a78
application/x-nes-rom, nes
application/x-font-pfm, pfm
application/x-gettext-translation,
image/wmf,
application/pgp-keys,
image/x-3ds, 3ds
application/x-lz4, lz4
application/vnd.openxmlformats-officedocument.presentationml.presentation, pptx
application/vnd.oasis.opendocument.presentation, odp
application/x-msaccess, accdb
application/vnd.oasis.opendocument.spreadsheet, ods
audio/x-aiff, aiff|aif
text/x-ms-regedit, reg
application/x-gamecube-rom,
application/x-nintendo-ds-rom,
text/x-objective-c,
application/x-font-gdos,
application/x-apple-diskimage,
application/x-zstd, zst
video/x-m4v, m4v
message/news,
application/vnd.symbian.install,
application/x-lzh-compressed,
application/x-dosdriver,
application/vnd.tcpdump.pcap, pcap
x-epoc/x-sisx-app,
application/x-avira-qua,
video/MP2T,
application/x-snappy-framed,
application/x-lz4+json, jsonlz4
application/x-dmp, dmp
application/zlib, z
application/x-pgp-keyring,
application/x-gdbm,
application/x-font-pf2, pf2
application/x-zip,
application/x-coredump,
application/x-java-jmod, jmod
application/x-terminfo,
application/x-terminfo2,
application/x-arc,
application/vnd.lotus-1-2-3,
image/x-win-bitmap,
application/x-maxis-dbpf,
text/PGP,
audio/x-hx-aac-adts,
application/x-chrome-extension,
image/heic, heic
image/x-gem,
application/x-lzma, lzma
application/warc, warc
application/x-lz4, lz4
application/x-lzip, lz
application/x-lzop, lzo
1 application/arj arj
252 text/javascript js
253 text/mcf mcf
254 text/pascal pas
255 text/plain com|cmd|conf|def|g|idc|list|lst|mar|sdml|text|txt|md|groovy|license|properties|desktop|ini|rst|cmake|ipynb|readme|less|lo|go|yml|d|cs|hpp|srt|nfo|sfv|m3u com|cmd|conf|def|g|idc|list|lst|mar|sdml|text|txt|md|groovy|license|properties|desktop|ini|rst|cmake|ipynb|readme|less|lo|go|yml|d|cs|hpp|srt
256 text/richtext rt|rtf|rtx
text/rtf
257 text/scriplet wsc
258 text/x-awk awk
259 !video/x-jng jng
263 !image/vnd.adobe.photoshop psd
264 text/tab-separated-values tsv
265 text/troff man|me|ms|roff|t|tr
266 text/uri-list uji|unis|uri|uris uni|unis|uri|uris
267 text/vnd.abc abc
268 text/vnd.fmi.flexstor flx
269 text/vnd.wap.wmlscript wmls
360 application/x-cbz cbz
361 application/x-cbr cbr
362 application/x-ms-compress-szdd fon
application/x-atari-7800-rom a78
application/x-nes-rom nes
application/x-font-pfm pfm
application/x-gettext-translation
image/wmf
application/pgp-keys
image/x-3ds 3ds
application/x-lz4 lz4
application/vnd.openxmlformats-officedocument.presentationml.presentation pptx
application/vnd.oasis.opendocument.presentation odp
application/x-msaccess accdb
application/vnd.oasis.opendocument.spreadsheet ods
audio/x-aiff aiff|aif
text/x-ms-regedit reg
application/x-gamecube-rom
application/x-nintendo-ds-rom
text/x-objective-c
application/x-font-gdos
application/x-apple-diskimage
application/x-zstd zst
video/x-m4v m4v
message/news
application/vnd.symbian.install
application/x-lzh-compressed
application/x-dosdriver
application/vnd.tcpdump.pcap pcap
x-epoc/x-sisx-app
application/x-avira-qua
video/MP2T
application/x-snappy-framed
application/x-lz4+json jsonlz4
application/x-dmp dmp
application/zlib z
application/x-pgp-keyring
application/x-gdbm
application/x-font-pf2 pf2
application/x-zip
application/x-coredump
application/x-java-jmod jmod
application/x-terminfo
application/x-terminfo2
application/x-arc
application/vnd.lotus-1-2-3
image/x-win-bitmap
application/x-maxis-dbpf
text/PGP
audio/x-hx-aac-adts
application/x-chrome-extension
image/heic heic
image/x-gem
application/x-lzma lzma
application/warc warc
application/x-lz4 lz4
application/x-lzip lz
application/x-lzop lzo

View File

@@ -80,9 +80,6 @@
"analyzer": "my_nGram" "analyzer": "my_nGram"
} }
} }
},
"tag": {
"type": "keyword"
} }
} }
} }

View File

@@ -1,117 +0,0 @@
## User scripts
*This document is under construction, more in-depth guide coming soon*
During the `index` step, you can use the `--script-file <script>` option to
modify documents or add user tags. This option is mainly used to
implement automatic tagging based on file attributes.
The scripting language used
([Painless Scripting Language](https://www.elastic.co/guide/en/elasticsearch/painless/7.4/index.html))
is very similar to Java, but you should be able to create user scripts
without programming experience at all if you're somewhat familiar with
regex.
This is the base structure of the documents we're working with:
```json
{
"_id": "e171405c-fdb5-4feb-bb32-82637bc32084",
"_index": "sist2",
"_type": "_doc",
"_source": {
"index": "206b3050-e821-421a-891d-12fcf6c2db0d",
"mime": "application/json",
"size": 1799,
"mtime": 1545443685,
"extension": "md",
"name": "README",
"path": "sist2/scripting",
"content": "..."
}
}
```
**Example script**
This script checks if the `genre` attribute exists, if it does
it adds the `genre.<genre>` tag.
```Java
ArrayList tags = ctx._source.tag = new ArrayList();
if (ctx._source?.genre != null) {
tags.add("genre." + ctx._source.genre.toLowerCase())
}
```
You can use `.` to create a hierarchical tag tree:
![scripting/genre_example](genre_example.png)
To use regular expressions, you need to add this line in `/etc/elasticsearch/elasticsearch.yml`
```yaml
script.painless.regex.enabled: true
```
Or, if you're using docker add `-e "script.painless.regex.enabled=true"`
### Examples
If `(20XX)` is in the file name, add the `year.<year>` tag:
```Java
ArrayList tags = ctx._source.tag = new ArrayList();
Matcher m = /[\(\.+](20[0-9]{2})[\)\.+]/.matcher(ctx._source.name);
if (m.find()) {
tags.add("year." + m.group(1))
}
```
Use default *Calibre* folder structure to infer author.
```Java
ArrayList tags = ctx._source.tag = new ArrayList();
// We expect the book path to look like this:
// /path/to/Calibre Library/Author/Title/Title - Author.pdf
if (ctx._source.name.contains("-") && ctx._source.extension == "pdf") {
String[] names = ctx._source.name.splitOnToken('-');
tags.add("author." + names[1].strip());
}
```
If the file matches a specific pattern `AAAA-000 fName1 lName1, <fName2 lName2>...`, add the `actress.<actress>` and
`studio.<studio>` tag:
```Java
ArrayList tags = ctx._source.tag = new ArrayList();
Matcher m = /([A-Z]{4})-[0-9]{3} (.*)/.matcher(ctx._source.name);
if (m.find()) {
tags.add("studio." + m.group(1));
// Take the matched group (.*), and add a tag for
// each name, separated by comma
for (String name : m.group(2).splitOnToken(',')) {
tags.add("actress." + name);
}
}
```
Set the name of the last folder (`/path/to/<studio>/file.mp4`) to `studio.<studio>` tag
```Java
ArrayList tags = ctx._source.tag = new ArrayList();
if (ctx._source.path != "") {
String[] names = ctx._source.path.splitOnToken('/');
tags.add("studio." + names[names.length-1]);
}
```
Set the name of the last folder (`/path/to/<studio>/file.mp4`) to `studio.<studio>` tag
```Java
ArrayList tags = ctx._source.tag = new ArrayList();
if (ctx._source.path != "") {
String[] names = ctx._source.path.splitOnToken('/');
tags.add("studio." + names[names.length-1]);
}
```

Binary file not shown.

Before

Width:  |  Height:  |  Size: 26 KiB

View File

@@ -2,28 +2,12 @@
cd lib cd lib
cd mupdf cd mupdf
USE_SYSTEM_HARFBUZZ=yes USE_SYSTEM_OPENJPEG=yes HAVE_X11=no HAVE_GLUT=no make -j 4 HAVE_X11=no HAVE_GLUT=no make -j 4
cd .. cd ..
mv mupdf/build/release/libmupdf.a . mv mupdf/build/release/libmupdf.a .
mv mupdf/build/release/libmupdf-third.a . mv mupdf/build/release/libmupdf-third.a .
# openjp2
cd openjpeg
#cmake . -DCMAKE_BUILD_TYPE=Release -DCMAKE_C_FLAGS="-O3 -march=native -DNDEBUG"
cmake . -DCMAKE_BUILD_TYPE=Release -DCMAKE_C_FLAGS="-O3"
make -j 4
cd ..
mv openjpeg/bin/libopenjp2.a .
# harfbuzz
cd harfbuzz
./autogen.sh
./configure --disable-shared --enable-static
make -j 4
cd ..
mv harfbuzz/src/.libs/libharfbuzz.a .
# ffmpeg # ffmpeg
cd ffmpeg cd ffmpeg
./configure --disable-shared --enable-static --disable-ffmpeg --disable-ffplay \ ./configure --disable-shared --enable-static --disable-ffmpeg --disable-ffplay \
@@ -54,12 +38,14 @@ cd ../..
mv onion/build/src/onion/libonion_static.a . mv onion/build/src/onion/libonion_static.a .
#bzip2 #bzip2
git clone https://github.com/enthought/bzip2-1.0.6
cd bzip2-1.0.6 cd bzip2-1.0.6
make -j 4 make -j 4
cd .. cd ..
mv bzip2-1.0.6/libbz2.a . mv bzip2-1.0.6/libbz2.a .
# magic # magic
git clone https://github.com/threatstack/libmagic
cd libmagic cd libmagic
./autogen.sh ./autogen.sh
./configure --enable-static --disable-shared ./configure --enable-static --disable-shared
@@ -67,39 +53,5 @@ make -j 4
cd .. cd ..
mv libmagic/src/.libs/libmagic.a . mv libmagic/src/.libs/libmagic.a .
# libarchive
git clone https://github.com/libarchive/libarchive
cd libarchive/build
./autogen.sh
cd ..
./configure --without-nettle --without-expat --without-xml2 --without-openssl
make -j 4
cd ..
mv libarchive/.libs/libarchive.a .
# lz4
git clone https://github.com/lz4/lz4
cd lz4
make -j 4
cd ..
mv lz4/lib/liblz4.a .
# lzma
wget https://newcontinuum.dl.sourceforge.net/project/lzmautils/xz-5.2.3.tar.gz
tar -xzf xz-5.2.3.tar.gz
rm xz-5.2.3.tar.gz
cd xz-5.2.3
./autogen.sh
./configure
make -j 4
cd ..
mv xz-5.2.3/src/liblzma/.libs/liblzma.a .
# zstd
git clone https://github.com/facebook/zstd
cd zstd
make -j 4
cd ..
mv zstd/lib/libzstd.a .
cd .. cd ..

View File

@@ -9,22 +9,6 @@ cd ..
mv mupdf/build/release/libmupdf.a . mv mupdf/build/release/libmupdf.a .
mv mupdf/build/release/libmupdf-third.a . mv mupdf/build/release/libmupdf-third.a .
# openjp2
cd openjpeg
#cmake . -DCMAKE_BUILD_TYPE=Release -DCMAKE_C_FLAGS="-O3 -march=native -DNDEBUG"
cmake . -DCMAKE_BUILD_TYPE=Release -DCMAKE_C_FLAGS="-O3"
gmake -j 4
cd ..
mv openjpeg/bin/libopenjp2.a .
# harfbuzz
cd harfbuzz
./autogen.sh
./configure --disable-shared --enable-static
gmake -j 4
cd ..
mv harfbuzz/src/.libs/libharfbuzz.a .
# ffmpeg # ffmpeg
cd ffmpeg cd ffmpeg
./configure --disable-shared --enable-static --disable-ffmpeg --disable-ffplay \ ./configure --disable-shared --enable-static --disable-ffmpeg --disable-ffplay \
@@ -42,12 +26,14 @@ mv ffmpeg/libswresample/libswresample.a .
mv ffmpeg/libswscale/libswscale.a . mv ffmpeg/libswscale/libswscale.a .
#bzip2 #bzip2
git clone https://github.com/enthought/bzip2-1.0.6
cd bzip2-1.0.6 cd bzip2-1.0.6
make -j 4 make -j 4
cd .. cd ..
mv bzip2-1.0.6/libbz2.a . mv bzip2-1.0.6/libbz2.a .
# magic # magic
git clone https://github.com/threatstack/libmagic
cd libmagic cd libmagic
./autogen.sh ./autogen.sh
./configure --enable-static --disable-shared ./configure --enable-static --disable-shared

View File

@@ -12,8 +12,7 @@ major_mime = {
"audio": 7, "audio": 7,
"image": 8, "image": 8,
"text": 9, "text": 9,
"application": 10, "application": 10
"x-epoc": 11,
} }
pdf = ( pdf = (
@@ -34,28 +33,6 @@ font = (
"font/woff2" "font/woff2"
) )
# Archive "formats"
archive = (
"application/x-tar",
"application/zip",
"application/x-rar",
"application/x-arc",
"application/x-warc",
"application/x-7z-compressed",
)
# Archive "filters"
arc_filter = (
"application/gzip",
"application/x-bzip2",
"application/x-xz",
"application/x-zstd",
"application/x-lzma",
"application/x-lz4",
"application/x-lzip",
"application/x-lzop",
)
cnt = 1 cnt = 1
@@ -70,10 +47,6 @@ def mime_id(mime):
mime_id += " | 0x40000000" mime_id += " | 0x40000000"
elif mime in font: elif mime in font:
mime_id += " | 0x20000000" mime_id += " | 0x20000000"
elif mime in archive:
mime_id += " | 0x10000000"
elif mime in arc_filter:
mime_id += " | 0x08000000"
elif mime == "application/x-empty": elif mime == "application/x-empty":
return "1" return "1"
return mime_id return mime_id

111
src/cli.c
View File

@@ -1,13 +1,12 @@
#include "cli.h" #include "cli.h"
#define DEFAULT_OUTPUT "index.sist2/" #define DEFAULT_OUTPUT "index.sist2/"
#define DEFAULT_CONTENT_SIZE 32768 #define DEFAULT_CONTENT_SIZE 4096
#define DEFAULT_QUALITY 5 #define DEFAULT_QUALITY 15
#define DEFAULT_SIZE 500 #define DEFAULT_SIZE 200
#define DEFAULT_REWRITE_URL "" #define DEFAULT_REWRITE_URL ""
#define DEFAULT_ES_URL "http://localhost:9200" #define DEFAULT_ES_URL "http://localhost:9200"
#define DEFAULT_BATCH_SIZE 100
#define DEFAULT_BIND_ADDR "localhost" #define DEFAULT_BIND_ADDR "localhost"
#define DEFAULT_PORT "4090" #define DEFAULT_PORT "4090"
@@ -15,39 +14,9 @@
scan_args_t *scan_args_create() { scan_args_t *scan_args_create() {
scan_args_t *args = calloc(sizeof(scan_args_t), 1); scan_args_t *args = calloc(sizeof(scan_args_t), 1);
args->depth = -1;
return args; return args;
} }
void scan_args_destroy(scan_args_t *args) {
if (args->name != NULL) {
free(args->name);
}
if (args->path != NULL) {
free(args->path);
}
if (args->output != NULL) {
free(args->output);
}
free(args);
}
#ifndef SIST_SCAN_ONLY
void index_args_destroy(index_args_t *args) {
//todo
free(args);
}
void web_args_destroy(web_args_t *args) {
//todo
free(args);
}
#endif
int scan_args_validate(scan_args_t *args, int argc, const char **argv) { int scan_args_validate(scan_args_t *args, int argc, const char **argv) {
if (argc < 2) { if (argc < 2) {
fprintf(stderr, "Required positional argument: PATH.\n"); fprintf(stderr, "Required positional argument: PATH.\n");
@@ -56,7 +25,7 @@ int scan_args_validate(scan_args_t *args, int argc, const char **argv) {
char *abs_path = abspath(argv[1]); char *abs_path = abspath(argv[1]);
if (abs_path == NULL) { if (abs_path == NULL) {
fprintf(stderr, "File not found: %s\n", argv[1]); fprintf(stderr, "File not found: %s", argv[1]);
return 1; return 1;
} else { } else {
args->path = abs_path; args->path = abs_path;
@@ -65,7 +34,7 @@ int scan_args_validate(scan_args_t *args, int argc, const char **argv) {
if (args->incremental != NULL) { if (args->incremental != NULL) {
abs_path = abspath(args->incremental); abs_path = abspath(args->incremental);
if (abs_path == NULL) { if (abs_path == NULL) {
fprintf(stderr, "File not found: %s\n", args->incremental); fprintf(stderr, "File not found: %s", args->incremental);
return 1; return 1;
} }
} }
@@ -79,13 +48,16 @@ int scan_args_validate(scan_args_t *args, int argc, const char **argv) {
if (args->size == 0) { if (args->size == 0) {
args->size = DEFAULT_SIZE; args->size = DEFAULT_SIZE;
} else if (args->size > 0 && args->size < 32) { } else if (args->size <= 0) {
printf("Invalid size: %d\n", args->content_size); fprintf(stderr, "Invalid size: %d\n", args->size);
return 1; return 1;
} }
if (args->content_size == 0) { if (args->content_size == 0) {
args->content_size = DEFAULT_CONTENT_SIZE; args->content_size = DEFAULT_CONTENT_SIZE;
} else if (args->content_size <= 0) {
fprintf(stderr, "Invalid content-size: %d\n", args->content_size);
return 1;
} }
if (args->threads == 0) { if (args->threads == 0) {
@@ -108,12 +80,6 @@ int scan_args_validate(scan_args_t *args, int argc, const char **argv) {
return 1; return 1;
} }
if (args->depth < 0) {
args->depth = G_MAXINT32;
} else {
args->depth += 1;
}
if (args->name == NULL) { if (args->name == NULL) {
args->name = g_path_get_basename(args->output); args->name = g_path_get_basename(args->output);
} }
@@ -121,24 +87,10 @@ int scan_args_validate(scan_args_t *args, int argc, const char **argv) {
if (args->rewrite_url == NULL) { if (args->rewrite_url == NULL) {
args->rewrite_url = DEFAULT_REWRITE_URL; args->rewrite_url = DEFAULT_REWRITE_URL;
} }
if (args->archive == NULL || strcmp(args->archive, "recurse") == 0) {
args->archive_mode = ARC_MODE_RECURSE;
} else if (strcmp(args->archive, "list") == 0) {
args->archive_mode = ARC_MODE_LIST;
} else if (strcmp(args->archive, "shallow") == 0) {
args->archive_mode = ARC_MODE_SHALLOW;
} else if (strcmp(args->archive, "skip") == 0) {
args->archive_mode = ARC_MODE_SKIP;
} else {
fprintf(stderr, "Archive mode must be one of (skip, list, shallow, recurse), got '%s'", args->archive);
return 1;
}
return 0; return 0;
} }
#ifndef SIST_SCAN_ONLY #ifndef SIST_SCAN_ONLY
int index_args_validate(index_args_t *args, int argc, const char **argv) { int index_args_validate(index_args_t *args, int argc, const char **argv) {
if (argc < 2) { if (argc < 2) {
@@ -148,47 +100,15 @@ int index_args_validate(index_args_t *args, int argc, const char **argv) {
char *index_path = abspath(argv[1]); char *index_path = abspath(argv[1]);
if (index_path == NULL) { if (index_path == NULL) {
fprintf(stderr, "File not found: %s\n", argv[1]); fprintf(stderr, "File not found: %s", argv[1]);
return 1; return 1;
} else { } else {
args->index_path = argv[1]; args->index_path = argv[1];
free(index_path);
} }
if (args->es_url == NULL) { if (args->es_url == NULL) {
args->es_url = DEFAULT_ES_URL; args->es_url = DEFAULT_ES_URL;
} }
if (args->script_path != NULL) {
struct stat info;
int res = stat(args->script_path, &info);
if (res == -1) {
fprintf(stderr, "Error opening script file '%s': %s\n", args->script_path, strerror(errno));
return 1;
}
int fd = open(args->script_path, O_RDONLY);
if (fd == -1) {
fprintf(stderr, "Error opening script file '%s': %s\n", args->script_path, strerror(errno));
return 1;
}
args->script = malloc(info.st_size + 1);
res = read(fd, args->script, info.st_size);
if (res == -1) {
fprintf(stderr, "Error reading script file '%s': %s\n", args->script_path, strerror(errno));
return 1;
}
*(args->script + info.st_size) = '\0';
close(fd);
}
if (args->batch_size == 0) {
args->batch_size = DEFAULT_BATCH_SIZE;
}
return 0; return 0;
} }
@@ -211,19 +131,13 @@ int web_args_validate(web_args_t *args, int argc, const char **argv) {
args->port = DEFAULT_PORT; args->port = DEFAULT_PORT;
} }
if (args->credentials != NULL) {
args->b64credentials = onion_base64_encode(args->credentials, (int) strlen(args->credentials));
//Remove trailing newline
*(args->b64credentials + strlen(args->b64credentials) - 1) = '\0';
}
args->index_count = argc - 1; args->index_count = argc - 1;
args->indices = argv + 1; args->indices = argv + 1;
for (int i = 0; i < args->index_count; i++) { for (int i = 0; i < args->index_count; i++) {
char *abs_path = abspath(args->indices[i]); char *abs_path = abspath(args->indices[i]);
if (abs_path == NULL) { if (abs_path == NULL) {
fprintf(stderr, "File not found: %s\n", abs_path); fprintf(stderr, "File not found: %s", abs_path);
return 1; return 1;
} }
} }
@@ -239,6 +153,5 @@ web_args_t *web_args_create() {
web_args_t *args = calloc(sizeof(web_args_t), 1); web_args_t *args = calloc(sizeof(web_args_t), 1);
return args; return args;
} }
#endif #endif

View File

@@ -12,24 +12,17 @@ typedef struct scan_args {
char *output; char *output;
char *rewrite_url; char *rewrite_url;
char *name; char *name;
int depth;
char *path; char *path;
char *archive;
archive_mode_t archive_mode;
} scan_args_t; } scan_args_t;
scan_args_t *scan_args_create(); scan_args_t *scan_args_create();
void scan_args_destroy(scan_args_t *args);
int scan_args_validate(scan_args_t *args, int argc, const char **argv); int scan_args_validate(scan_args_t *args, int argc, const char **argv);
#ifndef SIST_SCAN_ONLY #ifndef SIST_SCAN_ONLY
typedef struct index_args { typedef struct index_args {
char *es_url; char *es_url;
const char *index_path; const char *index_path;
const char *script_path;
char *script;
int print; int print;
int batch_size;
int force_reset; int force_reset;
} index_args_t; } index_args_t;
@@ -37,17 +30,12 @@ typedef struct web_args {
char *es_url; char *es_url;
char *bind; char *bind;
char *port; char *port;
char *credentials;
char *b64credentials;
int index_count; int index_count;
const char **indices; const char **indices;
} web_args_t; } web_args_t;
index_args_t *index_args_create(); index_args_t *index_args_create();
void index_args_destroy(index_args_t *args);
web_args_t *web_args_create(); web_args_t *web_args_create();
void web_args_destroy(web_args_t *args);
int index_args_validate(index_args_t *args, int argc, const char **argv); int index_args_validate(index_args_t *args, int argc, const char **argv);
int web_args_validate(web_args_t *args, int argc, const char **argv); int web_args_validate(web_args_t *args, int argc, const char **argv);

View File

@@ -15,8 +15,6 @@ struct {
int threads; int threads;
int content_size; int content_size;
float tn_qscale; float tn_qscale;
int depth;
archive_mode_t archive_mode;
size_t stat_tn_size; size_t stat_tn_size;
size_t stat_index_size; size_t stat_index_size;
@@ -31,13 +29,11 @@ struct {
#ifndef SIST_SCAN_ONLY #ifndef SIST_SCAN_ONLY
struct { struct {
char *es_url; char *es_url;
int batch_size;
} IndexCtx; } IndexCtx;
struct { struct {
char *es_url; char *es_url;
int index_count; int index_count;
char *b64credentials;
struct index_t indices[16]; struct index_t indices[16];
} WebCtx; } WebCtx;
#endif #endif

View File

@@ -6,9 +6,11 @@
#include <stdio.h> #include <stdio.h>
#include <string.h> #include <string.h>
#include <cJSON/cJSON.h> #include <cJSON/cJSON.h>
#include <src/ctx.h>
#include "static_generated.c" #include "static_generated.c"
#define BULK_INDEX_SIZE 100
typedef struct es_indexer { typedef struct es_indexer {
int queued; int queued;
@@ -52,40 +54,6 @@ void index_json(cJSON *document, const char uuid_str[UUID_STR_LEN]) {
elastic_index_line(bulk_line); elastic_index_line(bulk_line);
} }
void execute_update_script(const char *script, const char index_id[UUID_STR_LEN]) {
cJSON *body = cJSON_CreateObject();
cJSON *script_obj = cJSON_AddObjectToObject(body, "script");
cJSON_AddStringToObject(script_obj, "lang", "painless");
cJSON_AddStringToObject(script_obj, "source", script);
cJSON *query = cJSON_AddObjectToObject(body, "query");
cJSON *term_obj = cJSON_AddObjectToObject(query, "term");
cJSON_AddStringToObject(term_obj, "index", index_id);
char * str = cJSON_Print(body);
char bulk_url[4096];
snprintf(bulk_url, 4096, "%s/sist2/_update_by_query?pretty", Indexer->es_url);
response_t *r = web_post(bulk_url, str, "Content-Type: application/json");
printf("Executed user script <%d>\n", r->status_code);
cJSON *resp = cJSON_Parse(r->body);
cJSON_free(str);
cJSON_Delete(body);
free_response(r);
cJSON *error = cJSON_GetObjectItem(resp, "error");
if (error != NULL) {
char *error_str = cJSON_Print(error);
fprintf(stderr, "User script error: \n%s\n", error_str);
cJSON_free(error_str);
}
cJSON_Delete(resp);
}
void elastic_flush() { void elastic_flush() {
if (Indexer == NULL) { if (Indexer == NULL) {
@@ -130,30 +98,16 @@ void elastic_flush() {
char bulk_url[4096]; char bulk_url[4096];
snprintf(bulk_url, 4096, "%s/sist2/_bulk", Indexer->es_url); snprintf(bulk_url, 4096, "%s/sist2/_bulk", Indexer->es_url);
response_t *r = web_post(bulk_url, buf, "Content-Type: application/x-ndjson"); response_t *r = web_post(bulk_url, buf, "Content-Type: application/x-ndjson");
if (r->status_code == 0) {
fprintf(stderr, "Could not connect to %s, make sure that elasticsearch is running!\n", IndexCtx.es_url);
exit(1);
}
printf("Indexed %3d documents (%zukB) <%d>\n", count, buf_cur / 1024, r->status_code); printf("Indexed %3d documents (%zukB) <%d>\n", count, buf_cur / 1024, r->status_code);
cJSON *ret_json = cJSON_Parse(r->body); cJSON *ret_json = cJSON_Parse(r->body);
if (cJSON_GetObjectItem(ret_json, "errors")->valueint != 0) { if (cJSON_GetObjectItem(ret_json, "errors")->valueint != 0) {
cJSON *err; fprintf(stderr, "%s\n", r->body);
cJSON_ArrayForEach(err, cJSON_GetObjectItem(ret_json, "items")) {
if (cJSON_GetObjectItem(cJSON_GetObjectItem(err, "index"), "status")->valueint != 201) {
char* str = cJSON_Print(err);
fprintf(stderr, "%s\n", str);
cJSON_free(str);
}
}
} }
cJSON_Delete(ret_json); cJSON_Delete(ret_json);
free_response(r); free_response(r);
free(buf);
} }
void elastic_index_line(es_bulk_line_t *line) { void elastic_index_line(es_bulk_line_t *line) {
@@ -172,14 +126,15 @@ void elastic_index_line(es_bulk_line_t *line) {
Indexer->queued += 1; Indexer->queued += 1;
if (Indexer->queued >= IndexCtx.batch_size) { if (Indexer->queued >= BULK_INDEX_SIZE) {
elastic_flush(); elastic_flush();
} }
} }
es_indexer_t *create_indexer(const char *url) { es_indexer_t *create_indexer(const char *url) {
char *es_url = malloc(strlen(url) + 1); size_t url_len = strlen(url);
char *es_url = malloc(url_len);
strcpy(es_url, url); strcpy(es_url, url);
es_indexer_t *indexer = malloc(sizeof(es_indexer_t)); es_indexer_t *indexer = malloc(sizeof(es_indexer_t));
@@ -192,7 +147,7 @@ es_indexer_t *create_indexer(const char *url) {
return indexer; return indexer;
} }
void destroy_indexer(char * script, char index_id[UUID_STR_LEN]) { void destroy_indexer() {
char url[4096]; char url[4096];
@@ -201,15 +156,6 @@ void destroy_indexer(char * script, char index_id[UUID_STR_LEN]) {
printf("Refresh index <%d>\n", r->status_code); printf("Refresh index <%d>\n", r->status_code);
free_response(r); free_response(r);
if (script != NULL) {
execute_update_script(script, index_id);
}
snprintf(url, sizeof(url), "%s/sist2/_refresh", IndexCtx.es_url);
r = web_post(url, "", NULL);
printf("Refresh index <%d>\n", r->status_code);
free_response(r);
snprintf(url, sizeof(url), "%s/sist2/_forcemerge", IndexCtx.es_url); snprintf(url, sizeof(url), "%s/sist2/_forcemerge", IndexCtx.es_url);
r = web_post(url, "", NULL); r = web_post(url, "", NULL);
printf("Merge index <%d>\n", r->status_code); printf("Merge index <%d>\n", r->status_code);

View File

@@ -24,7 +24,7 @@ void index_json(cJSON *document, const char uuid_str[UUID_STR_LEN]);
es_indexer_t *create_indexer(const char* es_url); es_indexer_t *create_indexer(const char* es_url);
void destroy_indexer(char *script, char index_id[UUID_STR_LEN]); void destroy_indexer();
void elastic_init(int force_reset); void elastic_init(int force_reset);

File diff suppressed because one or more lines are too long

View File

@@ -1,7 +1,7 @@
#include "src/ctx.h" #include "src/ctx.h"
#include "serialize.h" #include "serialize.h"
static __thread int index_fd = -1; static __thread int IndexFd = -1;
typedef struct { typedef struct {
unsigned char uuid[16]; unsigned char uuid[16];
@@ -54,12 +54,6 @@ index_descriptor_t read_index_descriptor(char *path) {
struct stat info; struct stat info;
stat(path, &info); stat(path, &info);
int fd = open(path, O_RDONLY); int fd = open(path, O_RDONLY);
if (fd == -1) {
fprintf(stderr, "Invalid/corrupt index (Could not find descriptor)\n");
exit(1);
}
char *buf = malloc(info.st_size + 1); char *buf = malloc(info.st_size + 1);
read(fd, buf, info.st_size); read(fd, buf, info.st_size);
*(buf + info.st_size) = '\0'; *(buf + info.st_size) = '\0';
@@ -72,7 +66,7 @@ index_descriptor_t read_index_descriptor(char *path) {
strcpy(descriptor.root, cJSON_GetObjectItem(json, "root")->valuestring); strcpy(descriptor.root, cJSON_GetObjectItem(json, "root")->valuestring);
strcpy(descriptor.name, cJSON_GetObjectItem(json, "name")->valuestring); strcpy(descriptor.name, cJSON_GetObjectItem(json, "name")->valuestring);
strcpy(descriptor.rewrite_url, cJSON_GetObjectItem(json, "rewrite_url")->valuestring); strcpy(descriptor.rewrite_url, cJSON_GetObjectItem(json, "rewrite_url")->valuestring);
descriptor.root_len = (short) strlen(descriptor.root); descriptor.root_len = (short)strlen(descriptor.root);
strcpy(descriptor.version, cJSON_GetObjectItem(json, "version")->valuestring); strcpy(descriptor.version, cJSON_GetObjectItem(json, "version")->valuestring);
strcpy(descriptor.uuid, cJSON_GetObjectItem(json, "uuid")->valuestring); strcpy(descriptor.uuid, cJSON_GetObjectItem(json, "uuid")->valuestring);
@@ -111,8 +105,6 @@ char *get_meta_key_text(enum metakey meta_key) {
return "title"; return "title";
case MetaFontName: case MetaFontName:
return "font_name"; return "font_name";
case MetaParent:
return "parent";
default: default:
return NULL; return NULL;
} }
@@ -121,13 +113,13 @@ char *get_meta_key_text(enum metakey meta_key) {
void write_document(document_t *doc) { void write_document(document_t *doc) {
if (index_fd == -1) { if (IndexFd == -1) {
char dstfile[PATH_MAX]; char dstfile[PATH_MAX];
pthread_t self = pthread_self(); pthread_t self = pthread_self();
snprintf(dstfile, PATH_MAX, "%s_index_%lu", ScanCtx.index.path, self); snprintf(dstfile, PATH_MAX, "%s_index_%lu", ScanCtx.index.path, self);
index_fd = open(dstfile, O_CREAT | O_WRONLY | O_APPEND, S_IRUSR | S_IWUSR); IndexFd = open(dstfile, O_CREAT | O_WRONLY | O_APPEND, S_IRUSR | S_IWUSR);
if (index_fd == -1) { if (IndexFd == -1) {
perror("open"); perror("open");
} }
} }
@@ -160,16 +152,13 @@ void write_document(document_t *doc) {
} }
dyn_buffer_write_char(&buf, '\n'); dyn_buffer_write_char(&buf, '\n');
int res = write(index_fd, buf.buf, buf.cur); write(IndexFd, buf.buf, buf.cur);
if (res == -1) {
perror("write");
}
ScanCtx.stat_index_size += buf.cur; ScanCtx.stat_index_size += buf.cur;
dyn_buffer_destroy(&buf); dyn_buffer_destroy(&buf);
} }
void thread_cleanup() { void serializer_cleanup() {
close(index_fd); close(IndexFd);
} }
void read_index(const char *path, const char index_id[UUID_STR_LEN], index_func func) { void read_index(const char *path, const char index_id[UUID_STR_LEN], index_func func) {
@@ -192,7 +181,7 @@ void read_index(const char *path, const char index_id[UUID_STR_LEN], index_func
uuid_unparse(line.uuid, uuid_str); uuid_unparse(line.uuid, uuid_str);
cJSON_AddStringToObject(document, "mime", mime_get_mime_text(line.mime)); cJSON_AddStringToObject(document, "mime", mime_get_mime_text(line.mime));
cJSON_AddNumberToObject(document, "size", (double) line.size); cJSON_AddNumberToObject(document, "size", (double)line.size);
cJSON_AddNumberToObject(document, "mtime", line.mtime); cJSON_AddNumberToObject(document, "mtime", line.mtime);
int c; int c;
@@ -219,16 +208,11 @@ void read_index(const char *path, const char index_id[UUID_STR_LEN], index_func
while (key != '\n') { while (key != '\n') {
switch (key) { switch (key) {
case MetaWidth: case MetaWidth:
case MetaHeight: { case MetaHeight:
int value;
fread(&value, sizeof(int), 1, file);
cJSON_AddNumberToObject(document, get_meta_key_text(key), value);
break;
}
case MetaMediaDuration: case MetaMediaDuration:
case MetaMediaBitrate: { case MetaMediaBitrate: {
long value; int value;
fread(&value, sizeof(long), 1, file); fread(&value, sizeof(int), 1, file);
cJSON_AddNumberToObject(document, get_meta_key_text(key), value); cJSON_AddNumberToObject(document, get_meta_key_text(key), value);
break; break;
} }
@@ -249,11 +233,10 @@ void read_index(const char *path, const char index_id[UUID_STR_LEN], index_func
case MetaAlbumArtist: case MetaAlbumArtist:
case MetaGenre: case MetaGenre:
case MetaFontName: case MetaFontName:
case MetaParent:
case MetaTitle: { case MetaTitle: {
buf.cur = 0; buf.cur = 0;
while ((c = getc(file)) != 0) { while ((c = getc(file)) != 0) {
if (SHOULD_KEEP_CHAR(c) || c == ' ') { if (!(SHOULD_IGNORE_CHAR(c)) || c == ' ') {
dyn_buffer_write_char(&buf, (char) c); dyn_buffer_write_char(&buf, (char) c);
} }
} }
@@ -261,18 +244,14 @@ void read_index(const char *path, const char index_id[UUID_STR_LEN], index_func
cJSON_AddStringToObject(document, get_meta_key_text(key), buf.buf); cJSON_AddStringToObject(document, get_meta_key_text(key), buf.buf);
break; break;
} }
default:
fprintf(stderr, "Invalid meta key (corrupt index): %x\n", key);
break;
} }
key = getc(file); key = getc(file);
} }
func(document, uuid_str); func(document, uuid_str);
cJSON_Delete(document); cJSON_free(document);
} }
dyn_buffer_destroy(&buf);
fclose(file); fclose(file);
} }

View File

@@ -18,7 +18,7 @@ void incremental_read(GHashTable *table, const char *filepath);
/** /**
* Must be called after write_document * Must be called after write_document
*/ */
void thread_cleanup(); void serializer_cleanup();
void write_index_descriptor(char *path, index_descriptor_t *desc); void write_index_descriptor(char *path, index_descriptor_t *desc);

View File

@@ -15,7 +15,7 @@ store_t *store_create(char *path) {
); );
if (open_ret != 0) { if (open_ret != 0) {
fprintf(stderr, "Error while opening store: %s (%s)\n", mdb_strerror(open_ret), path); fprintf(stderr, "Error while opening store: %s", mdb_strerror(open_ret));
exit(1); exit(1);
} }

View File

@@ -1,36 +1,28 @@
#include "walk.h" #include "walk.h"
#include "src/ctx.h" #include "src/ctx.h"
__always_inline parse_job_t *create_parse_job(const char *filepath, const struct stat *info, int base) {
parse_job_t *create_fs_parse_job(const char *filepath, const struct stat *info, int base) {
int len = (int) strlen(filepath); int len = (int) strlen(filepath);
parse_job_t *job = malloc(sizeof(parse_job_t) + len); parse_job_t *job = malloc(sizeof(parse_job_t) + len);
strcpy(job->filepath, filepath); memcpy(&(job->filepath), filepath, len + 1);
job->base = base; job->base = base;
char *p = strrchr(filepath + base, '.'); char *p = strrchr(filepath + base, '.');
if (p != NULL) { if (p != NULL) {
job->ext = (int) (p - filepath + 1); job->ext = (int)(p - filepath + 1);
} else { } else {
job->ext = len; job->ext = len;
} }
job->info = *info; memcpy(&(job->info), info, sizeof(struct stat));
memset(job->parent, 0, 16);
job->vfile.filepath = job->filepath;
job->vfile.read = fs_read;
job->vfile.close = fs_close;
job->vfile.fd = -1;
job->vfile.is_fs_file = TRUE;
return job; return job;
} }
int handle_entry(const char *filepath, const struct stat *info, int typeflag, struct FTW *ftw) { int handle_entry(const char *filepath, const struct stat *info, int typeflag, struct FTW *ftw) {
if (ftw->level <= ScanCtx.depth && typeflag == FTW_F && S_ISREG(info->st_mode)) { if (typeflag == FTW_F && S_ISREG(info->st_mode)) {
parse_job_t *job = create_fs_parse_job(filepath, info, ftw->base); parse_job_t *job = create_parse_job(filepath, info, ftw->base);
tpool_add_work(ScanCtx.pool, parse, job); tpool_add_work(ScanCtx.pool, parse, job);
} }

View File

@@ -10,7 +10,7 @@
#define EPILOG "Made by simon987 <me@simon987.net>. Released under GPL-3.0" #define EPILOG "Made by simon987 <me@simon987.net>. Released under GPL-3.0"
static const char *const Version = "1.1.10"; static const char *const Version = "1.0.11";
static const char *const usage[] = { static const char *const usage[] = {
"sist2 scan [OPTION]... PATH", "sist2 scan [OPTION]... PATH",
"sist2 index [OPTION]... INDEX", "sist2 index [OPTION]... INDEX",
@@ -19,9 +19,9 @@ static const char *const usage[] = {
}; };
void global_init() { void global_init() {
#ifndef SIST_SCAN_ONLY #ifndef SIST_SCAN_ONLY
curl_global_init(CURL_GLOBAL_NOTHING); curl_global_init(CURL_GLOBAL_NOTHING);
#endif #endif
av_log_set_level(AV_LOG_QUIET); av_log_set_level(AV_LOG_QUIET);
} }
@@ -41,22 +41,10 @@ void init_dir(const char *dirpath) {
void scan_print_header() { void scan_print_header() {
printf("sist2 V%s\n", Version); printf("sist2 V%s\n", Version);
printf("---------------------\n"); printf("---------------------\n");
printf("threads\t\t\t%d\n", ScanCtx.threads); printf("threads\t\t%d\n", ScanCtx.threads);
printf("tn_qscale\t\t%.1f/31.0\n", ScanCtx.tn_qscale); printf("tn_qscale\t%.1f/31.0\n", ScanCtx.tn_qscale);
printf("tn_size\t\t%dpx\n", ScanCtx.tn_size);
if (ScanCtx.tn_size > 0) { printf("output\t\t%s\n", ScanCtx.index.path);
printf("tn_size\t\t\t%dpx\n", ScanCtx.tn_size);
} else {
printf("tn_size\t\t\tdisabled\n");
}
if (ScanCtx.content_size > 0) {
printf("content_size\t\t%d B\n", ScanCtx.content_size);
} else {
printf("content_size\t\t\tdisabled\n");
}
printf("output\t\t\t%s\n", ScanCtx.index.path);
} }
void sist2_scan(scan_args_t *args) { void sist2_scan(scan_args_t *args) {
@@ -64,12 +52,11 @@ void sist2_scan(scan_args_t *args) {
ScanCtx.tn_qscale = args->quality; ScanCtx.tn_qscale = args->quality;
ScanCtx.tn_size = args->size; ScanCtx.tn_size = args->size;
ScanCtx.content_size = args->content_size; ScanCtx.content_size = args->content_size;
ScanCtx.pool = tpool_create(args->threads, serializer_cleanup);
ScanCtx.threads = args->threads; ScanCtx.threads = args->threads;
ScanCtx.depth = args->depth;
ScanCtx.archive_mode = args->archive_mode;
strncpy(ScanCtx.index.path, args->output, sizeof(ScanCtx.index.path)); strncpy(ScanCtx.index.path, args->output, sizeof(ScanCtx.index.path));
strncpy(ScanCtx.index.desc.name, args->name, sizeof(ScanCtx.index.desc.name)); strncpy(ScanCtx.index.desc.name, args->name, sizeof(ScanCtx.index.desc.name));
strncpy(ScanCtx.index.desc.root, args->path, sizeof(ScanCtx.index.desc.root)); strcpy(ScanCtx.index.desc.root, args->path);
ScanCtx.index.desc.root_len = (short) strlen(ScanCtx.index.desc.root); ScanCtx.index.desc.root_len = (short) strlen(ScanCtx.index.desc.root);
init_dir(ScanCtx.index.path); init_dir(ScanCtx.index.path);
@@ -106,8 +93,6 @@ void sist2_scan(scan_args_t *args) {
printf("Loaded %d items in to mtime table.", g_hash_table_size(ScanCtx.original_table)); printf("Loaded %d items in to mtime table.", g_hash_table_size(ScanCtx.original_table));
} }
ScanCtx.pool = tpool_create(args->threads, thread_cleanup);
tpool_start(ScanCtx.pool);
walk_directory_tree(ScanCtx.index.desc.root); walk_directory_tree(ScanCtx.index.desc.root);
tpool_wait(ScanCtx.pool); tpool_wait(ScanCtx.pool);
tpool_destroy(ScanCtx.pool); tpool_destroy(ScanCtx.pool);
@@ -139,11 +124,9 @@ void sist2_scan(scan_args_t *args) {
} }
#ifndef SIST_SCAN_ONLY #ifndef SIST_SCAN_ONLY
void sist2_index(index_args_t *args) { void sist2_index(index_args_t *args) {
IndexCtx.es_url = args->es_url; IndexCtx.es_url = args->es_url;
IndexCtx.batch_size = args->batch_size;
if (!args->print) { if (!args->print) {
elastic_init(args->force_reset); elastic_init(args->force_reset);
@@ -179,11 +162,10 @@ void sist2_index(index_args_t *args) {
read_index(file_path, desc.uuid, f); read_index(file_path, desc.uuid, f);
} }
} }
closedir(dir);
if (!args->print) { if (!args->print) {
elastic_flush(); elastic_flush();
destroy_indexer(args->script, desc.uuid); destroy_indexer();
} }
} }
@@ -191,7 +173,6 @@ void sist2_web(web_args_t *args) {
WebCtx.es_url = args->es_url; WebCtx.es_url = args->es_url;
WebCtx.index_count = args->index_count; WebCtx.index_count = args->index_count;
WebCtx.b64credentials = args->b64credentials;
for (int i = 0; i < args->index_count; i++) { for (int i = 0; i < args->index_count; i++) {
char *abs_path = abspath(args->indices[i]); char *abs_path = abspath(args->indices[i]);
@@ -213,7 +194,6 @@ void sist2_web(web_args_t *args) {
serve(args->bind, args->port); serve(args->bind, args->port);
} }
#endif #endif
@@ -222,54 +202,41 @@ int main(int argc, const char *argv[]) {
global_init(); global_init();
scan_args_t *scan_args = scan_args_create(); scan_args_t *scan_args = scan_args_create();
#ifndef SIST_SCAN_ONLY #ifndef SIST_SCAN_ONLY
index_args_t *index_args = index_args_create(); index_args_t *index_args = index_args_create();
web_args_t *web_args = web_args_create(); web_args_t *web_args = web_args_create();
#endif #endif
int arg_version = 0; char * common_es_url = NULL;
char *common_es_url = NULL;
struct argparse_option options[] = { struct argparse_option options[] = {
OPT_HELP(), OPT_HELP(),
OPT_BOOLEAN('v', "version", &arg_version, "Show version and exit"),
OPT_GROUP("Scan options"), OPT_GROUP("Scan options"),
OPT_INTEGER('t', "threads", &scan_args->threads, "Number of threads. DEFAULT=1"), OPT_INTEGER('t', "threads", &scan_args->threads, "Number of threads. DEFAULT=1"),
OPT_FLOAT('q', "quality", &scan_args->quality, OPT_FLOAT('q', "quality", &scan_args->quality,
"Thumbnail quality, on a scale of 1.0 to 31.0, 1.0 being the best. DEFAULT=5"), "Thumbnail quality, on a scale of 1.0 to 31.0, 1.0 being the best. DEFAULT=15"),
OPT_INTEGER(0, "size", &scan_args->size, OPT_INTEGER(0, "size", &scan_args->size, "Thumbnail size, in pixels. DEFAULT=200"),
"Thumbnail size, in pixels. Use negative value to disable. DEFAULT=500"),
OPT_INTEGER(0, "content-size", &scan_args->content_size, OPT_INTEGER(0, "content-size", &scan_args->content_size,
"Number of bytes to be extracted from text documents. Use negative value to disable. DEFAULT=32768"), "Number of bytes to be extracted from text documents. DEFAULT=4096"),
OPT_STRING(0, "incremental", &scan_args->incremental, OPT_STRING(0, "incremental", &scan_args->incremental,
"Reuse an existing index and only scan modified files."), "Reuse an existing index and only scan modified files."),
OPT_STRING('o', "output", &scan_args->output, "Output directory. DEFAULT=index.sist2/"), OPT_STRING('o', "output", &scan_args->output, "Output directory. DEFAULT=index.sist2/"),
OPT_STRING(0, "rewrite-url", &scan_args->rewrite_url, "Serve files from this url instead of from disk."), OPT_STRING(0, "rewrite-url", &scan_args->rewrite_url, "Serve files from this url instead of from disk."),
OPT_STRING(0, "name", &scan_args->name, "Index display name. DEFAULT: (name of the directory)"), OPT_STRING(0, "name", &scan_args->name, "Index display name. DEFAULT: (name of the directory)"),
OPT_INTEGER(0, "depth", &scan_args->depth, "Scan up to DEPTH subdirectories deep. "
"Use 0 to only scan files in PATH. DEFAULT: -1"),
OPT_STRING(0, "archive", &scan_args->archive, "Archive file mode (skip|list|shallow|recurse). "
"skip: Don't parse, list: only get file names as text, "
"shallow: Don't parse archives inside archives. DEFAULT: recurse"),
#ifndef SIST_SCAN_ONLY #ifndef SIST_SCAN_ONLY
OPT_GROUP("Index options"), OPT_GROUP("Index options"),
OPT_STRING(0, "es-url", &common_es_url, "Elasticsearch url. DEFAULT=http://localhost:9200"), OPT_STRING(0, "es-url", &common_es_url, "Elasticsearch url. DEFAULT=http://localhost:9200"),
OPT_BOOLEAN('p', "print", &index_args->print, "Just print JSON documents to stdout."), OPT_BOOLEAN('p', "print", &index_args->print, "Just print JSON documents to stdout."),
OPT_STRING(0, "script-file", &index_args->script_path, "Path to user script."),
OPT_INTEGER(0, "batch-size", &index_args->batch_size, "Index batch size. DEFAULT: 100"),
OPT_BOOLEAN('f', "force-reset", &index_args->force_reset, "Reset Elasticsearch mappings and settings. " OPT_BOOLEAN('f', "force-reset", &index_args->force_reset, "Reset Elasticsearch mappings and settings. "
"(You must use this option the first time you use the index command)"), "(You must use this option the first time you use the index command)"),
OPT_GROUP("Web options"), OPT_GROUP("Web options"),
OPT_STRING(0, "es-url", &common_es_url, "Elasticsearch url. DEFAULT=http://localhost:9200"), OPT_STRING(0, "es-url", &common_es_url, "Elasticsearch url. DEFAULT=http://localhost:9200"),
OPT_STRING(0, "bind", &web_args->bind, "Listen on this address. DEFAULT=localhost"), OPT_STRING(0, "bind", &web_args->bind, "Listen on this address. DEFAULT=localhost"),
OPT_STRING(0, "port", &web_args->port, "Listen on this port. DEFAULT=4090"), OPT_STRING(0, "port", &web_args->port, "Listen on this port. DEFAULT=4090"),
OPT_STRING(0, "auth", &web_args->credentials, "Basic auth in user:password format"), #endif
#endif
OPT_END(), OPT_END(),
}; };
@@ -279,15 +246,10 @@ int main(int argc, const char *argv[]) {
argparse_describe(&argparse, DESCRIPTION, EPILOG); argparse_describe(&argparse, DESCRIPTION, EPILOG);
argc = argparse_parse(&argparse, argc, argv); argc = argparse_parse(&argparse, argc, argv);
if (arg_version) { #ifndef SIST_SCAN_ONLY
printf(Version);
exit(0);
}
#ifndef SIST_SCAN_ONLY
web_args->es_url = common_es_url; web_args->es_url = common_es_url;
index_args->es_url = common_es_url; index_args->es_url = common_es_url;
#endif #endif
if (argc == 0) { if (argc == 0) {
argparse_usage(&argparse); argparse_usage(&argparse);
@@ -302,7 +264,7 @@ int main(int argc, const char *argv[]) {
} }
#ifndef SIST_SCAN_ONLY #ifndef SIST_SCAN_ONLY
else if (strcmp(argv[0], "index") == 0) { else if (strcmp(argv[0], "index") == 0) {
int err = index_args_validate(index_args, argc, argv); int err = index_args_validate(index_args, argc, argv);
@@ -320,20 +282,12 @@ int main(int argc, const char *argv[]) {
sist2_web(web_args); sist2_web(web_args);
} }
#endif #endif
else { else {
fprintf(stderr, "Invalid command: '%s'\n", argv[0]); fprintf(stderr, "Invalid command: '%s'\n", argv[0]);
argparse_usage(&argparse); argparse_usage(&argparse);
return 1; return 1;
} }
printf("\n"); printf("\n");
scan_args_destroy(scan_args);
#ifndef SIST_SCAN_ONLY
index_args_destroy(index_args);
web_args_destroy(web_args);
#endif
return 0; return 0;
} }

View File

@@ -1,152 +0,0 @@
#include "arc.h"
#include "src/ctx.h"
#define ARC_BUF_SIZE 8192
int should_parse_filtered_file(const char *filepath, int ext) {
char tmp[PATH_MAX * 2];
if (ext == 0) {
return FALSE;
}
memcpy(tmp, filepath, ext - 1);
*(tmp + ext - 1) = '\0';
char *idx = strrchr(tmp, '.');
if (idx == NULL) {
return FALSE;
}
if (strcmp(idx, ".tar") == 0) {
return TRUE;
}
return FALSE;
}
int arc_read(struct vfile *f, void *buf, size_t size) {
return archive_read_data(f->arc, buf, size);
}
typedef struct arc_data {
vfile_t *f;
char buf[ARC_BUF_SIZE];
} arc_data_f;
int vfile_open_callback(struct archive *a, void *user_data) {
arc_data_f *data = user_data;
if (data->f->is_fs_file && data->f->fd == -1) {
data->f->fd = open(data->f->filepath, O_RDONLY);
}
return ARCHIVE_OK;
}
long vfile_read_callback(struct archive *a, void *user_data, const void **buf) {
arc_data_f *data = user_data;
*buf = data->buf;
return data->f->read(data->f, data->buf, ARC_BUF_SIZE);
}
int vfile_close_callback(struct archive *a, void *user_data) {
arc_data_f *data = user_data;
if (data->f->close != NULL) {
data->f->close(data->f);
}
return ARCHIVE_OK;
}
void parse_archive(vfile_t *f, document_t *doc) {
struct archive *a;
struct archive_entry *entry;
a = archive_read_new();
archive_read_support_filter_all(a);
archive_read_support_format_all(a);
arc_data_f data;
data.f = f;
int ret = 0;
if (data.f->is_fs_file) {
ret = archive_read_open_filename(a, doc->filepath, ARC_BUF_SIZE);
} else if (ScanCtx.archive_mode == ARC_MODE_RECURSE) {
ret = archive_read_open(
a, &data,
vfile_open_callback,
vfile_read_callback,
vfile_close_callback
);
} else {
archive_read_free(a);
return;
}
if (ret != ARCHIVE_OK) {
fprintf(stderr, "OPEN[%d]:%s %s\n", ret, archive_error_string(a), doc->filepath);
archive_read_free(a);
return;
}
if (ScanCtx.archive_mode == ARC_MODE_LIST) {
dyn_buffer_t buf = dyn_buffer_create();
while (archive_read_next_header(a, &entry) == ARCHIVE_OK) {
if (S_ISREG(archive_entry_stat(entry)->st_mode)) {
char *path = (char *) archive_entry_pathname(entry);
dyn_buffer_append_string(&buf, path);
dyn_buffer_write_char(&buf, '\n');
}
}
dyn_buffer_write_char(&buf, '\0');
meta_line_t *meta_list = malloc(sizeof(meta_line_t) + buf.cur);
meta_list->key = MetaContent;
strcpy(meta_list->strval, buf.buf);
APPEND_META(doc, meta_list);
dyn_buffer_destroy(&buf);
} else {
parse_job_t *sub_job = malloc(sizeof(parse_job_t) + PATH_MAX * 2);
sub_job->vfile.close = NULL;
sub_job->vfile.read = arc_read;
sub_job->vfile.arc = a;
sub_job->vfile.filepath = sub_job->filepath;
sub_job->vfile.is_fs_file = FALSE;
memcpy(sub_job->parent, doc->uuid, sizeof(uuid_t));
while (archive_read_next_header(a, &entry) == ARCHIVE_OK) {
sub_job->info = *archive_entry_stat(entry);
if (S_ISREG(sub_job->info.st_mode)) {
sprintf(sub_job->filepath, "%s#/%s", f->filepath, archive_entry_pathname(entry));
sub_job->base = (int) (strrchr(sub_job->filepath, '/') - sub_job->filepath) + 1;
char *p = strrchr(sub_job->filepath, '.');
if (p != NULL) {
sub_job->ext = (int) (p - sub_job->filepath + 1);
} else {
sub_job->ext = (int) strlen(sub_job->filepath);
}
parse(sub_job);
}
}
free(sub_job);
}
archive_read_free(a);
}

View File

@@ -1,12 +0,0 @@
#ifndef SIST2_ARC_H
#define SIST2_ARC_H
#include "src/sist.h"
int should_parse_filtered_file(const char *filepath, int ext);
void parse_archive(vfile_t *f, document_t *doc);
int arc_read(struct vfile * f, void *buf, size_t size);
#endif

View File

@@ -1,9 +1,11 @@
#include "font.h" #include "font.h"
#include "ft2build.h"
#include "freetype/freetype.h"
#include "src/ctx.h" #include "src/ctx.h"
__thread FT_Library ft_lib = NULL; __thread FT_Library library = NULL;
typedef struct text_dimensions { typedef struct text_dimensions {
@@ -13,12 +15,12 @@ typedef struct text_dimensions {
} text_dimensions_t; } text_dimensions_t;
typedef struct glyph { typedef struct glyph {
int top; unsigned int top;
int height; unsigned int height;
int width; unsigned int width;
int descent; unsigned int descent;
int ascent; unsigned int ascent;
int advance_width; unsigned int advance_width;
unsigned char *pixmap; unsigned char *pixmap;
} glyph_t; } glyph_t;
@@ -37,10 +39,10 @@ glyph_t ft_glyph_to_glyph(FT_GlyphSlot slot) {
glyph.pixmap = slot->bitmap.buffer; glyph.pixmap = slot->bitmap.buffer;
glyph.width = (int) slot->bitmap.width; glyph.width = slot->bitmap.width;
glyph.height = (int) slot->bitmap.rows; glyph.height = slot->bitmap.rows;
glyph.top = slot->bitmap_top; glyph.top = slot->bitmap_top;
glyph.advance_width = (int) slot->advance.x / 64; glyph.advance_width = slot->advance.x / 64;
glyph.descent = MAX(0, glyph.height - glyph.top); glyph.descent = MAX(0, glyph.height - glyph.top);
glyph.ascent = MAX(0, MAX(glyph.top, glyph.height) - glyph.descent); glyph.ascent = MAX(0, MAX(glyph.top, glyph.height) - glyph.descent);
@@ -48,6 +50,10 @@ glyph_t ft_glyph_to_glyph(FT_GlyphSlot slot) {
return glyph; return glyph;
} }
__always_inline
glyph_t get_glyph(char character, FT_Face face) {
}
text_dimensions_t text_dimension(char *text, FT_Face face) { text_dimensions_t text_dimension(char *text, FT_Face face) {
text_dimensions_t dimensions; text_dimensions_t dimensions;
@@ -56,7 +62,7 @@ text_dimensions_t text_dimension(char *text, FT_Face face) {
int num_chars = (int) strlen(text); int num_chars = (int) strlen(text);
unsigned int max_ascent = 0; unsigned int max_ascent = 0;
int max_descent = 0; unsigned int max_descent = 0;
char pc = 0; char pc = 0;
for (int i = 0; i < num_chars; i++) { for (int i = 0; i < num_chars; i++) {
@@ -66,7 +72,7 @@ text_dimensions_t text_dimension(char *text, FT_Face face) {
glyph_t glyph = ft_glyph_to_glyph(face->glyph); glyph_t glyph = ft_glyph_to_glyph(face->glyph);
max_descent = MAX(max_descent, glyph.descent); max_descent = MAX(max_descent, glyph.descent);
max_ascent = MAX(max_ascent, MAX(glyph.height, glyph.ascent)); max_ascent = MAX(max_ascent, glyph.ascent);
int kerning_x = kerning_offset(c, pc, face); int kerning_x = kerning_offset(c, pc, face);
dimensions.width += MAX(glyph.advance_width, glyph.width) + kerning_x; dimensions.width += MAX(glyph.advance_width, glyph.width) + kerning_x;
@@ -137,15 +143,12 @@ void bmp_format(dyn_buffer_t *buf, text_dimensions_t dimensions, const unsigned
} }
void parse_font(const char *buf, size_t buf_len, document_t *doc) { void parse_font(const char *buf, size_t buf_len, document_t *doc) {
if (ft_lib == NULL) { if (library == NULL) {
FT_Init_FreeType(&ft_lib); FT_Init_FreeType(&library);
}
if (buf == NULL) {
return;
} }
FT_Face face; FT_Face face;
FT_Error err = FT_New_Memory_Face(ft_lib, (unsigned char *) buf, buf_len, 0, &face); FT_Error err = FT_New_Memory_Face(library, (unsigned char *) buf, buf_len, 0, &face);
if (err != 0) { if (err != 0) {
return; return;
} }
@@ -167,10 +170,6 @@ void parse_font(const char *buf, size_t buf_len, document_t *doc) {
strcpy(meta_name->strval, font_name); strcpy(meta_name->strval, font_name);
APPEND_META(doc, meta_name) APPEND_META(doc, meta_name)
if (ScanCtx.tn_size <= 0) {
return;
}
int pixel = 64; int pixel = 64;
int num_chars = (int) strlen(font_name); int num_chars = (int) strlen(font_name);
@@ -191,18 +190,11 @@ void parse_font(const char *buf, size_t buf_len, document_t *doc) {
err = FT_Load_Char(face, c, FT_LOAD_NO_HINTING | FT_LOAD_RENDER); err = FT_Load_Char(face, c, FT_LOAD_NO_HINTING | FT_LOAD_RENDER);
if (err != 0) { if (err != 0) {
c = c >= 'a' && c <= 'z' ? c - 32 : c + 32; continue;
err = FT_Load_Char(face, c, FT_LOAD_NO_HINTING | FT_LOAD_RENDER);
if (err != 0) {
continue;
}
} }
glyph_t glyph = ft_glyph_to_glyph(face->glyph); glyph_t glyph = ft_glyph_to_glyph(face->glyph);
pen.x += kerning_offset(c, pc, face); pen.x += kerning_offset(c, pc, face);
if (pen.x <= 0) {
pen.x = ABS(glyph.advance_width - glyph.width);
}
pen.y = dimensions.height - glyph.ascent - dimensions.baseline; pen.y = dimensions.height - glyph.ascent - dimensions.baseline;
draw_glyph(&glyph, pen.x, pen.y, dimensions, bitmap); draw_glyph(&glyph, pen.x, pen.y, dimensions, bitmap);

View File

@@ -1,10 +1,6 @@
#include "src/sist.h" #include "src/sist.h"
#include "src/ctx.h" #include "src/ctx.h"
#define MIN_SIZE 32
#define AVIO_BUF_SIZE 8192
__always_inline
AVCodecContext *alloc_jpeg_encoder(int dstW, int dstH, float qscale) { AVCodecContext *alloc_jpeg_encoder(int dstW, int dstH, float qscale) {
AVCodec *jpeg_codec = avcodec_find_encoder(AV_CODEC_ID_MJPEG); AVCodec *jpeg_codec = avcodec_find_encoder(AV_CODEC_ID_MJPEG);
@@ -26,8 +22,8 @@ AVCodecContext *alloc_jpeg_encoder(int dstW, int dstH, float qscale) {
return jpeg; return jpeg;
} }
__always_inline
AVFrame *scale_frame(const AVCodecContext *decoder, const AVFrame *frame, int size) { AVFrame *scale_frame(const AVCodecContext *decoder, const AVFrame *frame, int size) {
AVFrame *scaled_frame = av_frame_alloc();
int dstW; int dstW;
int dstH; int dstH;
@@ -45,22 +41,16 @@ AVFrame *scale_frame(const AVCodecContext *decoder, const AVFrame *frame, int si
} }
} }
if (dstW <= MIN_SIZE || dstH <= MIN_SIZE) {
return NULL;
}
AVFrame *scaled_frame = av_frame_alloc();
struct SwsContext *ctx = sws_getContext( struct SwsContext *ctx = sws_getContext(
decoder->width, decoder->height, decoder->pix_fmt, decoder->width, decoder->height, decoder->pix_fmt,
dstW, dstH, AV_PIX_FMT_YUVJ420P, dstW, dstH, AV_PIX_FMT_YUVJ420P,
SWS_FAST_BILINEAR, 0, 0, 0 SWS_FAST_BILINEAR, 0, 0, 0
); );
int dst_buf_len = av_image_get_buffer_size(AV_PIX_FMT_YUV420P, dstW, dstH, 1); int dst_buf_len = avpicture_get_size(AV_PIX_FMT_YUVJ420P, dstW, dstH);
uint8_t *dst_buf = (uint8_t *) av_malloc(dst_buf_len); uint8_t *dst_buf = (uint8_t *) av_malloc(dst_buf_len);
av_image_fill_arrays(scaled_frame->data, scaled_frame->linesize, dst_buf, AV_PIX_FMT_YUV420P, dstW, dstH, 1); avpicture_fill((AVPicture *) scaled_frame, dst_buf, AV_PIX_FMT_YUVJ420P, dstW, dstH);
sws_scale(ctx, sws_scale(ctx,
(const uint8_t *const *) frame->data, frame->linesize, (const uint8_t *const *) frame->data, frame->linesize,
@@ -90,9 +80,9 @@ AVFrame *read_frame(AVFormatContext *pFormatCtx, AVCodecContext *decoder, int st
int read_frame_ret = av_read_frame(pFormatCtx, &avPacket); int read_frame_ret = av_read_frame(pFormatCtx, &avPacket);
if (read_frame_ret != 0) { if (read_frame_ret != 0) {
// if (read_frame_ret != AVERROR_EOF) { if (read_frame_ret != AVERROR_EOF) {
// fprintf(stderr, "Error reading frame: %d\n", read_frame_ret); fprintf(stderr, "Error reading frame: %s\n", av_err2str(read_frame_ret));
// } }
av_frame_free(&frame); av_frame_free(&frame);
av_packet_unref(&avPacket); av_packet_unref(&avPacket);
return NULL; return NULL;
@@ -117,83 +107,63 @@ AVFrame *read_frame(AVFormatContext *pFormatCtx, AVCodecContext *decoder, int st
return frame; return frame;
} }
#define APPEND_TAG_META(doc, tag_, keyname) \
text_buffer_t tex = text_buffer_create(-1); \
text_buffer_append_string0(&tex, tag_->value); \
meta_line_t *meta_tag = malloc(sizeof(meta_line_t) + tex.dyn_buffer.cur); \
meta_tag->key = keyname; \
strcpy(meta_tag->strval, tex.dyn_buffer.buf); \
APPEND_META(doc, meta_tag) \
text_buffer_destroy(&tex);
__always_inline
void append_audio_meta(AVFormatContext *pFormatCtx, document_t *doc) { void append_audio_meta(AVFormatContext *pFormatCtx, document_t *doc) {
AVDictionaryEntry *tag = NULL; AVDictionaryEntry *tag = NULL;
while ((tag = av_dict_get(pFormatCtx->metadata, "", tag, AV_DICT_IGNORE_SUFFIX))) { while ((tag = av_dict_get(pFormatCtx->metadata, "", tag, AV_DICT_IGNORE_SUFFIX))) {
char key[32]; char *key = tag->key;
strncpy(key, tag->key, sizeof(key)); for (; *key; ++key) *key = (char) tolower(*key);
char *ptr = key; if (strcmp(tag->key, "artist") == 0) {
for (; *ptr; ++ptr) *ptr = (char) tolower(*ptr); size_t len = strlen(tag->value);
meta_line_t *meta_tag = malloc(sizeof(meta_line_t) + len);
if (strcmp(key, "artist") == 0) { meta_tag->key = MetaArtist;
APPEND_TAG_META(doc, tag, MetaArtist) memcpy(meta_tag->strval, tag->value, len);
} else if (strcmp(key, "genre") == 0) { APPEND_META(doc, meta_tag)
APPEND_TAG_META(doc, tag, MetaGenre) } else if (strcmp(tag->key, "genre") == 0) {
} else if (strcmp(key, "title") == 0) { size_t len = strlen(tag->value);
APPEND_TAG_META(doc, tag, MetaTitle) meta_line_t *meta_tag = malloc(sizeof(meta_line_t) + len);
} else if (strcmp(key, "album_artist") == 0) { meta_tag->key = MetaGenre;
APPEND_TAG_META(doc, tag, MetaAlbumArtist) memcpy(meta_tag->strval, tag->value, len);
} else if (strcmp(key, "album") == 0) { APPEND_META(doc, meta_tag)
APPEND_TAG_META(doc, tag, MetaAlbum) } else if (strcmp(tag->key, "title") == 0) {
size_t len = strlen(tag->value);
meta_line_t *meta_tag = malloc(sizeof(meta_line_t) + len);
meta_tag->key = MetaTitle;
memcpy(meta_tag->strval, tag->value, len);
APPEND_META(doc, meta_tag)
} else if (strcmp(tag->key, "album_artist") == 0) {
size_t len = strlen(tag->value);
meta_line_t *meta_tag = malloc(sizeof(meta_line_t) + len);
meta_tag->key = MetaAlbumArtist;
memcpy(meta_tag->strval, tag->value, len);
APPEND_META(doc, meta_tag)
} else if (strcmp(tag->key, "album") == 0) {
size_t len = strlen(tag->value);
meta_line_t *meta_tag = malloc(sizeof(meta_line_t) + len);
meta_tag->key = MetaAlbum;
memcpy(meta_tag->strval, tag->value, len);
APPEND_META(doc, meta_tag)
} }
} }
} }
__always_inline void parse_media(const char *filepath, document_t *doc) {
void append_video_meta(AVFormatContext *pFormatCtx, AVFrame *frame, document_t *doc, int include_audio_tags, int is_video) {
if (is_video) {
meta_line_t *meta_duration = malloc(sizeof(meta_line_t));
meta_duration->key = MetaMediaDuration;
meta_duration->longval = pFormatCtx->duration / AV_TIME_BASE;
APPEND_META(doc, meta_duration)
meta_line_t *meta_bitrate = malloc(sizeof(meta_line_t));
meta_bitrate->key = MetaMediaBitrate;
meta_bitrate->longval = pFormatCtx->bit_rate;
APPEND_META(doc, meta_bitrate)
}
AVDictionaryEntry *tag = NULL;
if (is_video) {
while ((tag = av_dict_get(pFormatCtx->metadata, "", tag, AV_DICT_IGNORE_SUFFIX))) {
if (include_audio_tags && strcmp(tag->key, "title") == 0) {
APPEND_TAG_META(doc, tag, MetaTitle)
} else if (strcmp(tag->key, "comment") == 0) {
APPEND_TAG_META(doc, tag, MetaContent)
} else if (include_audio_tags && strcmp(tag->key, "artist") == 0) {
APPEND_TAG_META(doc, tag, MetaArtist)
}
}
} else {
// EXIF metadata
while ((tag = av_dict_get(frame->metadata, "", tag, AV_DICT_IGNORE_SUFFIX))) {
if (include_audio_tags && strcmp(tag->key, "Artist") == 0) {
APPEND_TAG_META(doc, tag, MetaArtist)
} else if (strcmp(tag->key, "ImageDescription") == 0) {
APPEND_TAG_META(doc, tag, MetaContent)
}
}
}
}
void parse_media(AVFormatContext *pFormatCtx, document_t *doc) {
int video_stream = -1; int video_stream = -1;
int audio_stream = -1; int audio_stream = -1;
AVFormatContext *pFormatCtx = avformat_alloc_context();
if (pFormatCtx == NULL) {
fprintf(stderr, "Could not allocate AVFormatContext! %s \n", filepath);
return;
}
int res = avformat_open_input(&pFormatCtx, filepath, NULL, NULL);
if (res < 0) {
printf("ERR%s %s\n", filepath, av_err2str(res));
return;
}
avformat_find_stream_info(pFormatCtx, NULL); avformat_find_stream_info(pFormatCtx, NULL);
for (int i = (int) pFormatCtx->nb_streams - 1; i >= 0; i--) { for (int i = (int) pFormatCtx->nb_streams - 1; i >= 0; i--) {
@@ -232,10 +202,23 @@ void parse_media(AVFormatContext *pFormatCtx, document_t *doc) {
} }
} }
if (video_stream != -1 && ScanCtx.tn_size > 0) { if (video_stream != -1) {
AVStream *stream = pFormatCtx->streams[video_stream]; AVStream *stream = pFormatCtx->streams[video_stream];
if (stream->codecpar->width <= MIN_SIZE || stream->codecpar->height <= MIN_SIZE) { if (stream->nb_frames > 1) {
//This is a video (not a still image)
meta_line_t *meta_duration = malloc(sizeof(meta_line_t));
meta_duration->key = MetaMediaDuration;
meta_duration->longval = pFormatCtx->duration / AV_TIME_BASE;
APPEND_META(doc, meta_duration)
meta_line_t *meta_bitrate = malloc(sizeof(meta_line_t));
meta_bitrate->key = MetaMediaBitrate;
meta_bitrate->intval = pFormatCtx->bit_rate;
APPEND_META(doc, meta_bitrate)
}
if (stream->codecpar->width <= 20 || stream->codecpar->height <= 20) {
avformat_close_input(&pFormatCtx); avformat_close_input(&pFormatCtx);
avformat_free_context(pFormatCtx); avformat_free_context(pFormatCtx);
return; return;
@@ -267,19 +250,9 @@ void parse_media(AVFormatContext *pFormatCtx, document_t *doc) {
return; return;
} }
append_video_meta(pFormatCtx, frame, doc, audio_stream == -1, stream->nb_frames > 1);
// Scale frame // Scale frame
AVFrame *scaled_frame = scale_frame(decoder, frame, ScanCtx.tn_size); AVFrame *scaled_frame = scale_frame(decoder, frame, ScanCtx.tn_size);
if (scaled_frame == NULL) {
av_frame_free(&frame);
avcodec_free_context(&decoder);
avformat_close_input(&pFormatCtx);
avformat_free_context(pFormatCtx);
return;
}
// Encode frame to jpeg // Encode frame to jpeg
AVCodecContext *jpeg_encoder = alloc_jpeg_encoder(scaled_frame->width, scaled_frame->height, ScanCtx.tn_qscale); AVCodecContext *jpeg_encoder = alloc_jpeg_encoder(scaled_frame->width, scaled_frame->height, ScanCtx.tn_qscale);
avcodec_send_frame(jpeg_encoder, scaled_frame); avcodec_send_frame(jpeg_encoder, scaled_frame);
@@ -289,8 +262,7 @@ void parse_media(AVFormatContext *pFormatCtx, document_t *doc) {
avcodec_receive_packet(jpeg_encoder, &jpeg_packet); avcodec_receive_packet(jpeg_encoder, &jpeg_packet);
// Save thumbnail // Save thumbnail
store_write(ScanCtx.index.store, (char *) doc->uuid, sizeof(doc->uuid), (char *) jpeg_packet.data, store_write(ScanCtx.index.store, (char *) doc->uuid, sizeof(doc->uuid), (char *) jpeg_packet.data, jpeg_packet.size);
jpeg_packet.size);
av_packet_unref(&jpeg_packet); av_packet_unref(&jpeg_packet);
av_frame_free(&frame); av_frame_free(&frame);
@@ -304,58 +276,3 @@ void parse_media(AVFormatContext *pFormatCtx, document_t *doc) {
avformat_free_context(pFormatCtx); avformat_free_context(pFormatCtx);
} }
void parse_media_filename(const char *filepath, document_t *doc) {
AVFormatContext *pFormatCtx = avformat_alloc_context();
if (pFormatCtx == NULL) {
fprintf(stderr, "Could not allocate AVFormatContext! %s \n", filepath);
return;
}
int res = avformat_open_input(&pFormatCtx, filepath, NULL, NULL);
if (res < 0) {
fprintf(stderr, "media error: %s %s\n", filepath, av_err2str(res));
return;
}
parse_media(pFormatCtx, doc);
}
int vfile_read(void *ptr, uint8_t *buf, int buf_size) {
struct vfile *f = ptr;
int ret = f->read(f, buf, buf_size);
if (ret == 0) {
return AVERROR_EOF;
}
return ret;
}
void parse_media_vfile(struct vfile *f, document_t *doc) {
AVFormatContext *pFormatCtx = avformat_alloc_context();
if (pFormatCtx == NULL) {
fprintf(stderr, "Could not allocate AVFormatContext! %s \n", f->filepath);
return;
}
unsigned char *buffer = (unsigned char *) av_malloc(AVIO_BUF_SIZE);
AVIOContext *io_ctx = avio_alloc_context(buffer, AVIO_BUF_SIZE, 0, f, vfile_read, NULL, NULL);
pFormatCtx->pb = io_ctx;
pFormatCtx->flags |= AVFMT_FLAG_CUSTOM_IO;
int res = avformat_open_input(&pFormatCtx, "", NULL, NULL);
if (res == -5) {
// Tried to parse media that requires seek
return;
} else if(res < 0) {
fprintf(stderr, "media error: %s %s\n", f->filepath, av_err2str(res));
return;
}
parse_media(pFormatCtx, doc);
av_free(io_ctx);
}

View File

@@ -5,10 +5,7 @@
#include "src/sist.h" #include "src/sist.h"
#define MIN_VIDEO_SIZE 1024 * 64 #define MIN_VIDEO_SIZE 1024 * 64
#define MIN_IMAGE_SIZE 1024 * 2
void parse_media_filename(const char * filepath, document_t *doc); void parse_media(const char * filepath, document_t *doc);
void parse_media_vfile(struct vfile *f, document_t *doc);
#endif #endif

View File

@@ -1,12 +1,10 @@
#include "mime.h" #include "mime.h"
unsigned int mime_get_mime_by_ext(GHashTable *ext_table, const char * ext) { unsigned int mime_get_mime_by_ext(GHashTable *ext_table, const char * ext) {
char lower[8]; char lower[64];
char *p = lower; char *p = lower;
int cnt = 0; while ((*ext)) {
while ((*ext) != '\0' && cnt + 1 < sizeof(lower)) {
*p++ = (char)tolower(*ext++); *p++ = (char)tolower(*ext++);
cnt++;
} }
*p = '\0'; *p = '\0';
return (size_t) g_hash_table_lookup(ext_table, lower); return (size_t) g_hash_table_lookup(ext_table, lower);

View File

@@ -8,7 +8,7 @@
#define MIME_EMPTY 1 #define MIME_EMPTY 1
#define DONT_PARSE 0x80000000 #define DONT_PARSE 0x80000000
#define SHOULD_PARSE(mime_id) (mime_id & DONT_PARSE) != DONT_PARSE && mime_id != 0 #define SHOULD_PARSE(mime_id) (mime_id & DONT_PARSE) != DONT_PARSE
#define PDF_MASK 0x40000000 #define PDF_MASK 0x40000000
#define IS_PDF(mime_id) (mime_id & PDF_MASK) == PDF_MASK #define IS_PDF(mime_id) (mime_id & PDF_MASK) == PDF_MASK
@@ -16,12 +16,6 @@
#define FONT_MASK 0x20000000 #define FONT_MASK 0x20000000
#define IS_FONT(mime_id) (mime_id & FONT_MASK) == FONT_MASK #define IS_FONT(mime_id) (mime_id & FONT_MASK) == FONT_MASK
#define ARC_MASK 0x10000000
#define IS_ARC(mime_id) (mime_id & ARC_MASK) == ARC_MASK
#define ARC_FILTER_MASK 0x08000000
#define IS_ARC_FILTER(mime_id) (mime_id & ARC_FILTER_MASK) == ARC_FILTER_MASK
enum major_mime { enum major_mime {
MimeInvalid = 0, MimeInvalid = 0,
MimeModel = 1, MimeModel = 1,

View File

@@ -20,7 +20,7 @@ enum mime {
application_freeloader=655372, application_freeloader=655372,
application_futuresplash=655373, application_futuresplash=655373,
application_groupwise=655374, application_groupwise=655374,
application_gzip=655375 | 0x08000000, application_gzip=655375,
application_hta=655376, application_hta=655376,
application_i_deas=655377, application_i_deas=655377,
application_iges=655378, application_iges=655378,
@@ -39,389 +39,334 @@ enum mime {
application_oda=655391, application_oda=655391,
application_ogg=655392, application_ogg=655392,
application_pdf=655393 | 0x40000000, application_pdf=655393 | 0x40000000,
application_pgp_keys=655394, application_pgp_signature=655394,
application_pgp_signature=655395, application_pkcs7_signature=655395,
application_pkcs7_signature=655396, application_pkix_cert=655396,
application_pkix_cert=655397, application_postscript=655397,
application_postscript=655398, application_pro_eng=655398,
application_pro_eng=655399, application_ringing_tones=655399,
application_ringing_tones=655400, application_smil=655400,
application_smil=655401, application_solids=655401,
application_solids=655402, application_sounder=655402,
application_sounder=655403, application_step=655403,
application_step=655404, application_streamingmedia=655404,
application_streamingmedia=655405, application_vda=655405,
application_vda=655406, application_vnd_fdf=655406,
application_vnd_fdf=655407, application_vnd_font_fontforge_sfd=655407,
application_vnd_font_fontforge_sfd=655408, application_vnd_hp_hpgl=655408,
application_vnd_hp_hpgl=655409, application_vnd_iccprofile=655409,
application_vnd_iccprofile=655410, application_vnd_ms_cab_compressed=655410,
application_vnd_lotus_1_2_3=655411, application_vnd_ms_excel=655411,
application_vnd_ms_cab_compressed=655412, application_vnd_ms_fontobject=655412,
application_vnd_ms_excel=655413, application_vnd_ms_opentype=655413 | 0x20000000,
application_vnd_ms_fontobject=655414, application_vnd_ms_pki_certstore=655414,
application_vnd_ms_opentype=655415 | 0x20000000, application_vnd_ms_pki_pko=655415,
application_vnd_ms_pki_certstore=655416, application_vnd_ms_pki_seccat=655416,
application_vnd_ms_pki_pko=655417, application_vnd_ms_powerpoint=655417,
application_vnd_ms_pki_seccat=655418, application_vnd_ms_project=655418,
application_vnd_ms_powerpoint=655419, application_vnd_oasis_opendocument_base=655419,
application_vnd_ms_project=655420, application_vnd_oasis_opendocument_formula=655420,
application_vnd_oasis_opendocument_base=655421, application_vnd_oasis_opendocument_graphics=655421,
application_vnd_oasis_opendocument_formula=655422, application_vnd_oasis_opendocument_text=655422,
application_vnd_oasis_opendocument_graphics=655423, application_vnd_openxmlformats_officedocument_spreadsheetml_sheet=655423,
application_vnd_oasis_opendocument_presentation=655424, application_vnd_openxmlformats_officedocument_wordprocessingml_document=655424,
application_vnd_oasis_opendocument_spreadsheet=655425, application_vnd_wap_wmlc=655425,
application_vnd_oasis_opendocument_text=655426, application_vnd_wap_wmlscriptc=655426,
application_vnd_openxmlformats_officedocument_presentationml_presentation=655427, application_vnd_xara=655427,
application_vnd_openxmlformats_officedocument_spreadsheetml_sheet=655428, application_vocaltec_media_desc=655428,
application_vnd_openxmlformats_officedocument_wordprocessingml_document=655429, application_vocaltec_media_file=655429,
application_vnd_symbian_install=655430, application_winhelp=655430,
application_vnd_tcpdump_pcap=655431, application_wordperfect=655431,
application_vnd_wap_wmlc=655432, application_wordperfect6_0=655432,
application_vnd_wap_wmlscriptc=655433, application_wordperfect6_1=655433,
application_vnd_xara=655434, application_x_123=655434,
application_vocaltec_media_desc=655435, application_x_7z_compressed=655435,
application_vocaltec_media_file=655436, application_x_aim=655436,
application_warc=655437, application_x_archive=655437,
application_winhelp=655438, application_x_authorware_bin=655438,
application_wordperfect=655439, application_x_authorware_map=655439,
application_wordperfect6_0=655440, application_x_authorware_seg=655440,
application_wordperfect6_1=655441, application_x_bcpio=655441,
application_x_123=655442, application_x_bittorrent=655442,
application_x_7z_compressed=655443 | 0x10000000, application_x_bsh=655443,
application_x_aim=655444, application_x_bytecode_python=655444,
application_x_apple_diskimage=655445, application_x_bzip=655445,
application_x_arc=655446 | 0x10000000, application_x_bzip2=655446,
application_x_archive=655447, application_x_cbr=655447,
application_x_atari_7800_rom=655448, application_x_cbz=655448 | 0x40000000,
application_x_authorware_bin=655449, application_x_cdlink=655449,
application_x_authorware_map=655450, application_x_chat=655450,
application_x_authorware_seg=655451, application_x_cocoa=655451,
application_x_avira_qua=655452, application_x_conference=655452,
application_x_bcpio=655453, application_x_cpio=655453,
application_x_bittorrent=655454, application_x_dbf=655454,
application_x_bsh=655455, application_x_dbt=655455,
application_x_bytecode_python=655456, application_x_debian_package=655456,
application_x_bzip=655457, application_x_deepv=655457,
application_x_bzip2=655458 | 0x08000000, application_x_director=655458,
application_x_cbr=655459, application_x_dosexec=655459,
application_x_cbz=655460 | 0x40000000, application_x_dvi=655460,
application_x_cdlink=655461, application_x_elc=655461,
application_x_chat=655462,
application_x_chrome_extension=655463,
application_x_cocoa=655464,
application_x_conference=655465,
application_x_coredump=655466,
application_x_cpio=655467,
application_x_dbf=655468,
application_x_dbt=655469,
application_x_debian_package=655470,
application_x_deepv=655471,
application_x_director=655472,
application_x_dmp=655473,
application_x_dosdriver=655474,
application_x_dosexec=655475,
application_x_dvi=655476,
application_x_elc=655477,
application_x_empty=1, application_x_empty=1,
application_x_envoy=655479, application_x_envoy=655463,
application_x_esrehber=655480, application_x_esrehber=655464,
application_x_excel=655481, application_x_excel=655465,
application_x_executable=655482, application_x_executable=655466,
application_x_font_gdos=655483, application_x_font_sfn=655467,
application_x_font_pf2=655484, application_x_font_ttf=655468 | 0x20000000,
application_x_font_pfm=655485, application_x_freelance=655469,
application_x_font_sfn=655486, application_x_git=655470,
application_x_font_ttf=655487 | 0x20000000, application_x_gsp=655471,
application_x_freelance=655488, application_x_gss=655472,
application_x_gamecube_rom=655489, application_x_gtar=655473,
application_x_gdbm=655490, application_x_gzip=655474,
application_x_gettext_translation=655491, application_x_hdf=655475,
application_x_git=655492, application_x_helpfile=655476,
application_x_gsp=655493, application_x_httpd_imap=655477,
application_x_gss=655494, application_x_ima=655478,
application_x_gtar=655495, application_x_innosetup=655479,
application_x_gzip=655496, application_x_internett_signup=655480,
application_x_hdf=655497, application_x_inventor=655481,
application_x_helpfile=655498, application_x_ip2=655482,
application_x_httpd_imap=655499, application_x_java_applet=655483,
application_x_ima=655500, application_x_java_commerce=655484,
application_x_innosetup=655501, application_x_java_image=655485,
application_x_internett_signup=655502, application_x_java_keystore=655486,
application_x_inventor=655503, application_x_kdelnk=655487,
application_x_ip2=655504, application_x_koan=655488,
application_x_java_applet=655505, application_x_latex=655489,
application_x_java_commerce=655506, application_x_livescreen=655490,
application_x_java_image=655507, application_x_lotus=655491,
application_x_java_jmod=655508, application_x_lzh=655492,
application_x_java_keystore=655509, application_x_lzx=655493,
application_x_kdelnk=655510, application_x_mach_binary=655494,
application_x_koan=655511, application_x_mach_executable=655495,
application_x_latex=655512, application_x_magic_cap_package_1_0=655496,
application_x_livescreen=655513, application_x_mathcad=655497,
application_x_lotus=655514, application_x_meme=655498,
application_x_lz4=655515 | 0x08000000, application_x_midi=655499,
application_x_lz4_json=655516, application_x_mif=655500,
application_x_lzh=655517, application_x_mix_transfer=655501,
application_x_lzh_compressed=655518, application_x_mobipocket_ebook=655502,
application_x_lzip=655519 | 0x08000000, application_x_ms_compress_szdd=655503,
application_x_lzma=655520 | 0x08000000, application_x_ms_pdb=655504,
application_x_lzop=655521 | 0x08000000, application_x_ms_reader=655505,
application_x_lzx=655522, application_x_navi_animation=655506,
application_x_mach_binary=655523, application_x_navidoc=655507,
application_x_mach_executable=655524, application_x_navimap=655508,
application_x_magic_cap_package_1_0=655525, application_x_navistyle=655509,
application_x_mathcad=655526, application_x_netcdf=655510,
application_x_maxis_dbpf=655527, application_x_newton_compatible_pkg=655511,
application_x_meme=655528, application_x_object=655512,
application_x_midi=655529, application_x_omc=655513,
application_x_mif=655530, application_x_omcdatamaker=655514,
application_x_mix_transfer=655531, application_x_omcregerator=655515,
application_x_mobipocket_ebook=655532, application_x_pagemaker=655516,
application_x_ms_compress_szdd=655533, application_x_pcl=655517,
application_x_ms_pdb=655534, application_x_pixclscript=655518,
application_x_ms_reader=655535, application_x_pkcs7_certreqresp=655519,
application_x_msaccess=655536, application_x_pkcs7_signature=655520,
application_x_navi_animation=655537, application_x_project=655521,
application_x_navidoc=655538, application_x_qpro=655522,
application_x_navimap=655539, application_x_rar=655523,
application_x_navistyle=655540, application_x_rpm=655524,
application_x_nes_rom=655541, application_x_sdp=655525,
application_x_netcdf=655542, application_x_sea=655526,
application_x_newton_compatible_pkg=655543, application_x_seelogo=655527,
application_x_nintendo_ds_rom=655544, application_x_setupscript=655528,
application_x_object=655545, application_x_shar=655529,
application_x_omc=655546, application_x_sharedlib=655530,
application_x_omcdatamaker=655547, application_x_shockwave_flash=655531,
application_x_omcregerator=655548, application_x_sprite=655532,
application_x_pagemaker=655549, application_x_sqlite3=655533,
application_x_pcl=655550, application_x_sv4cpio=655534,
application_x_pgp_keyring=655551, application_x_sv4crc=655535,
application_x_pixclscript=655552, application_x_tar=655536,
application_x_pkcs7_certreqresp=655553, application_x_tbook=655537,
application_x_pkcs7_signature=655554, application_x_tex_tfm=655538,
application_x_project=655555, application_x_texinfo=655539,
application_x_qpro=655556, application_x_ustar=655540,
application_x_rar=655557 | 0x10000000, application_x_visio=655541,
application_x_rpm=655558, application_x_vnd_audioexplosion_mzz=655542,
application_x_sdp=655559, application_x_vnd_ls_xpix=655543,
application_x_sea=655560, application_x_vrml=655544,
application_x_seelogo=655561, application_x_wais_source=655545,
application_x_setupscript=655562, application_x_wine_extension_ini=655546,
application_x_shar=655563, application_x_wintalk=655547,
application_x_sharedlib=655564, application_x_world=655548,
application_x_shockwave_flash=655565, application_x_wri=655549,
application_x_snappy_framed=655566, application_x_x509_ca_cert=655550,
application_x_sprite=655567, application_x_xz=655551,
application_x_sqlite3=655568, application_xml=655552,
application_x_sv4cpio=655569, application_zip=655553,
application_x_sv4crc=655570, audio_it=458946,
application_x_tar=655571 | 0x10000000, audio_make=458947,
application_x_tbook=655572, audio_mid=458948,
application_x_terminfo=655573, audio_midi=458949,
application_x_terminfo2=655574, audio_mp4=458950,
application_x_tex_tfm=655575, audio_mpeg=458951,
application_x_texinfo=655576, audio_ogg=458952,
application_x_ustar=655577, audio_s3m=458953,
application_x_visio=655578, audio_tsp_audio=458954,
application_x_vnd_audioexplosion_mzz=655579, audio_tsplayer=458955,
application_x_vnd_ls_xpix=655580, audio_vnd_qcelp=458956,
application_x_vrml=655581, audio_voxware=458957,
application_x_wais_source=655582, audio_x_flac=458958,
application_x_wine_extension_ini=655583, audio_x_gsm=458959,
application_x_wintalk=655584, audio_x_jam=458960,
application_x_world=655585, audio_x_liveaudio=458961,
application_x_wri=655586, audio_x_m4a=458962,
application_x_x509_ca_cert=655587, audio_x_midi=458963,
application_x_xz=655588 | 0x08000000, audio_x_mod=458964,
application_x_zip=655589, audio_x_mp4a_latm=458965,
application_x_zstd=655590 | 0x08000000, audio_x_mpeg_3=458966,
application_xml=655591, audio_x_mpequrl=458967,
application_zip=655592 | 0x10000000, audio_x_nspaudio=458968,
application_zlib=655593, audio_x_pn_realaudio=458969,
audio_it=458986, audio_x_psid=458970,
audio_make=458987, audio_x_realaudio=458971,
audio_mid=458988, audio_x_twinvq=458972,
audio_midi=458989, audio_x_twinvq_plugin=458973,
audio_mp4=458990, audio_x_voc=458974,
audio_mpeg=458991, audio_x_wav=458975,
audio_ogg=458992, audio_xm=458976,
audio_s3m=458993, font_otf=327905 | 0x20000000,
audio_tsp_audio=458994, font_sfnt=327906 | 0x20000000,
audio_tsplayer=458995, font_woff=327907 | 0x20000000,
audio_vnd_qcelp=458996, font_woff2=327908 | 0x20000000,
audio_voxware=458997, image_cmu_raster=524517,
audio_x_aiff=458998, image_fif=524518,
audio_x_flac=458999, image_florian=524519,
audio_x_gsm=459000, image_g3fax=524520,
audio_x_hx_aac_adts=459001, image_gif=524521,
audio_x_jam=459002, image_ief=524522,
audio_x_liveaudio=459003, image_jpeg=524523,
audio_x_m4a=459004, image_jutvision=524524,
audio_x_midi=459005, image_naplps=524525,
audio_x_mod=459006, image_pict=524526,
audio_x_mp4a_latm=459007, image_png=524527,
audio_x_mpeg_3=459008, image_svg=524528 | 0x80000000,
audio_x_mpequrl=459009, image_svg_xml=524529 | 0x80000000,
audio_x_nspaudio=459010, image_tiff=524530,
audio_x_pn_realaudio=459011, image_vnd_adobe_photoshop=524531 | 0x80000000,
audio_x_psid=459012, image_vnd_djvu=524532 | 0x80000000,
audio_x_realaudio=459013, image_vnd_fpx=524533,
audio_x_twinvq=459014, image_vnd_microsoft_icon=524534,
audio_x_twinvq_plugin=459015, image_vnd_rn_realflash=524535,
audio_x_voc=459016, image_vnd_rn_realpix=524536,
audio_x_wav=459017, image_vnd_wap_wbmp=524537,
audio_xm=459018, image_vnd_xiff=524538,
font_otf=327947 | 0x20000000, image_webp=524539,
font_sfnt=327948 | 0x20000000, image_x_cmu_raster=524540,
font_woff=327949 | 0x20000000, image_x_cur=524541,
font_woff2=327950 | 0x20000000, image_x_dwg=524542,
image_cmu_raster=524559, image_x_eps=524543,
image_fif=524560, image_x_exr=524544,
image_florian=524561, image_x_icns=524545,
image_g3fax=524562, image_x_icon=524546 | 0x80000000,
image_gif=524563, image_x_jg=524547,
image_heic=524564, image_x_jps=524548,
image_ief=524565, image_x_ms_bmp=524549,
image_jpeg=524566, image_x_niff=524550,
image_jutvision=524567, image_x_pcx=524551,
image_naplps=524568, image_x_pict=524552,
image_pict=524569, image_x_portable_bitmap=524553,
image_png=524570, image_x_portable_graymap=524554,
image_svg=524571 | 0x80000000, image_x_portable_pixmap=524555,
image_svg_xml=524572 | 0x80000000, image_x_quicktime=524556,
image_tiff=524573, image_x_rgb=524557,
image_vnd_adobe_photoshop=524574 | 0x80000000, image_x_tga=524558,
image_vnd_djvu=524575 | 0x80000000, image_x_tiff=524559,
image_vnd_fpx=524576, image_x_xcf=524560 | 0x80000000,
image_vnd_microsoft_icon=524577, image_x_xpixmap=524561 | 0x80000000,
image_vnd_rn_realflash=524578, image_x_xwindowdump=524562,
image_vnd_rn_realpix=524579, message_rfc822=196883,
image_vnd_wap_wbmp=524580, model_vnd_dwf=65812,
image_vnd_xiff=524581, model_vnd_gdl=65813,
image_webp=524582, model_vnd_gs_gdl=65814,
image_wmf=524583, model_vrml=65815,
image_x_3ds=524584, model_x_pov=65816,
image_x_cmu_raster=524585, text_asp=590105,
image_x_cur=524586, text_css=590106,
image_x_dwg=524587, text_html=590107,
image_x_eps=524588, text_javascript=590108,
image_x_exr=524589, text_mcf=590109,
image_x_gem=524590, text_pascal=590110,
image_x_icns=524591, text_plain=590111,
image_x_icon=524592 | 0x80000000, text_richtext=590112,
image_x_jg=524593, text_scriplet=590113,
image_x_jps=524594, text_tab_separated_values=590114,
image_x_ms_bmp=524595, text_troff=590115,
image_x_niff=524596, text_uri_list=590116,
image_x_pcx=524597, text_vnd_abc=590117,
image_x_pict=524598, text_vnd_fmi_flexstor=590118,
image_x_portable_bitmap=524599, text_vnd_wap_wml=590119,
image_x_portable_graymap=524600, text_vnd_wap_wmlscript=590120,
image_x_portable_pixmap=524601, text_webviewhtml=590121,
image_x_quicktime=524602, text_x_Algol68=590122,
image_x_rgb=524603, text_x_asm=590123,
image_x_tga=524604, text_x_audiosoft_intra=590124,
image_x_tiff=524605, text_x_awk=590125,
image_x_win_bitmap=524606, text_x_bcpl=590126,
image_x_xcf=524607 | 0x80000000, text_x_c=590127,
image_x_xpixmap=524608 | 0x80000000, text_x_c__=590128,
image_x_xwindowdump=524609, text_x_component=590129,
message_news=196930, text_x_diff=590130,
message_rfc822=196931, text_x_fortran=590131,
model_vnd_dwf=65860, text_x_java=590132,
model_vnd_gdl=65861, text_x_la_asf=590133,
model_vnd_gs_gdl=65862, text_x_lisp=590134,
model_vrml=65863, text_x_m=590135,
model_x_pov=65864, text_x_m4=590136,
text_PGP=590153, text_x_makefile=590137,
text_asp=590154, text_x_msdos_batch=590138,
text_css=590155, text_x_pascal=590139,
text_html=590156, text_x_perl=590140,
text_javascript=590157, text_x_php=590141,
text_mcf=590158, text_x_po=590142,
text_pascal=590159, text_x_python=590143,
text_plain=590160, text_x_ruby=590144,
text_richtext=590161, text_x_sass=590145,
text_rtf=590162, text_x_scss=590146,
text_scriplet=590163, text_x_server_parsed_html=590147,
text_tab_separated_values=590164, text_x_setext=590148,
text_troff=590165, text_x_sgml=590149,
text_uri_list=590166, text_x_shellscript=590150,
text_vnd_abc=590167, text_x_speech=590151,
text_vnd_fmi_flexstor=590168, text_x_tcl=590152,
text_vnd_wap_wml=590169, text_x_tex=590153,
text_vnd_wap_wmlscript=590170, text_x_uil=590154,
text_webviewhtml=590171, text_x_uuencode=590155,
text_x_Algol68=590172, text_x_vcalendar=590156,
text_x_asm=590173, text_x_vcard=590157,
text_x_audiosoft_intra=590174, text_xml=590158,
text_x_awk=590175, video_animaflex=393551,
text_x_bcpl=590176, video_avi=393552,
text_x_c=590177, video_avs_video=393553,
text_x_c__=590178, video_mp4=393554,
text_x_component=590179, video_mpeg=393555,
text_x_diff=590180, video_quicktime=393556,
text_x_fortran=590181, video_vdo=393557,
text_x_java=590182, video_vivo=393558,
text_x_la_asf=590183, video_vnd_rn_realvideo=393559,
text_x_lisp=590184, video_vosaic=393560,
text_x_m=590185, video_webm=393561,
text_x_m4=590186, video_x_amt_demorun=393562,
text_x_makefile=590187, video_x_amt_showrun=393563,
text_x_ms_regedit=590188, video_x_atomic3d_feature=393564,
text_x_msdos_batch=590189, video_x_dl=393565,
text_x_objective_c=590190, video_x_dv=393566,
text_x_pascal=590191, video_x_fli=393567,
text_x_perl=590192, video_x_flv=393568,
text_x_php=590193, video_x_isvideo=393569,
text_x_po=590194, video_x_jng=393570 | 0x80000000,
text_x_python=590195, video_x_matroska=393571,
text_x_ruby=590196, video_x_mng=393572,
text_x_sass=590197, video_x_motion_jpeg=393573,
text_x_scss=590198, video_x_ms_asf=393574,
text_x_server_parsed_html=590199, video_x_msvideo=393575,
text_x_setext=590200, video_x_qtc=393576,
text_x_sgml=590201, video_x_sgi_movie=393577,
text_x_shellscript=590202,
text_x_speech=590203,
text_x_tcl=590204,
text_x_tex=590205,
text_x_uil=590206,
text_x_uuencode=590207,
text_x_vcalendar=590208,
text_x_vcard=590209,
text_xml=590210,
video_MP2T=393603,
video_animaflex=393604,
video_avi=393605,
video_avs_video=393606,
video_mp4=393607,
video_mpeg=393608,
video_quicktime=393609,
video_vdo=393610,
video_vivo=393611,
video_vnd_rn_realvideo=393612,
video_vosaic=393613,
video_webm=393614,
video_x_amt_demorun=393615,
video_x_amt_showrun=393616,
video_x_atomic3d_feature=393617,
video_x_dl=393618,
video_x_dv=393619,
video_x_fli=393620,
video_x_flv=393621,
video_x_isvideo=393622,
video_x_jng=393623 | 0x80000000,
video_x_m4v=393624,
video_x_matroska=393625,
video_x_mng=393626,
video_x_motion_jpeg=393627,
video_x_ms_asf=393628,
video_x_msvideo=393629,
video_x_qtc=393630,
video_x_sgi_movie=393631,
x_epoc_x_sisx_app=721312,
}; };
char *mime_get_mime_text(unsigned int mime_id) {switch (mime_id) { char *mime_get_mime_text(unsigned int mime_id) {switch (mime_id) {
case application_arj: return "application/arj"; case application_arj: return "application/arj";
@@ -680,7 +625,6 @@ case text_mcf: return "text/mcf";
case text_pascal: return "text/pascal"; case text_pascal: return "text/pascal";
case text_plain: return "text/plain"; case text_plain: return "text/plain";
case text_richtext: return "text/richtext"; case text_richtext: return "text/richtext";
case text_rtf: return "text/rtf";
case text_scriplet: return "text/scriplet"; case text_scriplet: return "text/scriplet";
case text_x_awk: return "text/x-awk"; case text_x_awk: return "text/x-awk";
case video_x_jng: return "video/x-jng"; case video_x_jng: return "video/x-jng";
@@ -786,60 +730,6 @@ case application_x_wine_extension_ini: return "application/x-wine-extension-ini"
case application_x_cbz: return "application/x-cbz"; case application_x_cbz: return "application/x-cbz";
case application_x_cbr: return "application/x-cbr"; case application_x_cbr: return "application/x-cbr";
case application_x_ms_compress_szdd: return "application/x-ms-compress-szdd"; case application_x_ms_compress_szdd: return "application/x-ms-compress-szdd";
case application_x_atari_7800_rom: return "application/x-atari-7800-rom";
case application_x_nes_rom: return "application/x-nes-rom";
case application_x_font_pfm: return "application/x-font-pfm";
case application_x_gettext_translation: return "application/x-gettext-translation";
case image_wmf: return "image/wmf";
case application_pgp_keys: return "application/pgp-keys";
case image_x_3ds: return "image/x-3ds";
case application_x_lz4: return "application/x-lz4";
case application_vnd_openxmlformats_officedocument_presentationml_presentation: return "application/vnd.openxmlformats-officedocument.presentationml.presentation";
case application_vnd_oasis_opendocument_presentation: return "application/vnd.oasis.opendocument.presentation";
case application_x_msaccess: return "application/x-msaccess";
case application_vnd_oasis_opendocument_spreadsheet: return "application/vnd.oasis.opendocument.spreadsheet";
case audio_x_aiff: return "audio/x-aiff";
case text_x_ms_regedit: return "text/x-ms-regedit";
case application_x_gamecube_rom: return "application/x-gamecube-rom";
case application_x_nintendo_ds_rom: return "application/x-nintendo-ds-rom";
case text_x_objective_c: return "text/x-objective-c";
case application_x_font_gdos: return "application/x-font-gdos";
case application_x_apple_diskimage: return "application/x-apple-diskimage";
case application_x_zstd: return "application/x-zstd";
case video_x_m4v: return "video/x-m4v";
case message_news: return "message/news";
case application_vnd_symbian_install: return "application/vnd.symbian.install";
case application_x_lzh_compressed: return "application/x-lzh-compressed";
case application_x_dosdriver: return "application/x-dosdriver";
case application_vnd_tcpdump_pcap: return "application/vnd.tcpdump.pcap";
case x_epoc_x_sisx_app: return "x-epoc/x-sisx-app";
case application_x_avira_qua: return "application/x-avira-qua";
case video_MP2T: return "video/MP2T";
case application_x_snappy_framed: return "application/x-snappy-framed";
case application_x_lz4_json: return "application/x-lz4+json";
case application_x_dmp: return "application/x-dmp";
case application_zlib: return "application/zlib";
case application_x_pgp_keyring: return "application/x-pgp-keyring";
case application_x_gdbm: return "application/x-gdbm";
case application_x_font_pf2: return "application/x-font-pf2";
case application_x_zip: return "application/x-zip";
case application_x_coredump: return "application/x-coredump";
case application_x_java_jmod: return "application/x-java-jmod";
case application_x_terminfo: return "application/x-terminfo";
case application_x_terminfo2: return "application/x-terminfo2";
case application_x_arc: return "application/x-arc";
case application_vnd_lotus_1_2_3: return "application/vnd.lotus-1-2-3";
case image_x_win_bitmap: return "image/x-win-bitmap";
case application_x_maxis_dbpf: return "application/x-maxis-dbpf";
case text_PGP: return "text/PGP";
case audio_x_hx_aac_adts: return "audio/x-hx-aac-adts";
case application_x_chrome_extension: return "application/x-chrome-extension";
case image_heic: return "image/heic";
case image_x_gem: return "image/x-gem";
case application_x_lzma: return "application/x-lzma";
case application_warc: return "application/warc";
case application_x_lzip: return "application/x-lzip";
case application_x_lzop: return "application/x-lzop";
default: return NULL;}} default: return NULL;}}
GHashTable *mime_get_ext_table() {GHashTable *ext_table = g_hash_table_new(g_str_hash, g_str_equal); GHashTable *mime_get_ext_table() {GHashTable *ext_table = g_hash_table_new(g_str_hash, g_str_equal);
g_hash_table_insert(ext_table, "arj", (gpointer)application_arj); g_hash_table_insert(ext_table, "arj", (gpointer)application_arj);
@@ -1190,9 +1080,6 @@ g_hash_table_insert(ext_table, "d", (gpointer)text_plain);
g_hash_table_insert(ext_table, "cs", (gpointer)text_plain); g_hash_table_insert(ext_table, "cs", (gpointer)text_plain);
g_hash_table_insert(ext_table, "hpp", (gpointer)text_plain); g_hash_table_insert(ext_table, "hpp", (gpointer)text_plain);
g_hash_table_insert(ext_table, "srt", (gpointer)text_plain); g_hash_table_insert(ext_table, "srt", (gpointer)text_plain);
g_hash_table_insert(ext_table, "nfo", (gpointer)text_plain);
g_hash_table_insert(ext_table, "sfv", (gpointer)text_plain);
g_hash_table_insert(ext_table, "m3u", (gpointer)text_plain);
g_hash_table_insert(ext_table, "rt", (gpointer)text_richtext); g_hash_table_insert(ext_table, "rt", (gpointer)text_richtext);
g_hash_table_insert(ext_table, "rtf", (gpointer)text_richtext); g_hash_table_insert(ext_table, "rtf", (gpointer)text_richtext);
g_hash_table_insert(ext_table, "rtx", (gpointer)text_richtext); g_hash_table_insert(ext_table, "rtx", (gpointer)text_richtext);
@@ -1210,7 +1097,7 @@ g_hash_table_insert(ext_table, "ms", (gpointer)text_troff);
g_hash_table_insert(ext_table, "roff", (gpointer)text_troff); g_hash_table_insert(ext_table, "roff", (gpointer)text_troff);
g_hash_table_insert(ext_table, "t", (gpointer)text_troff); g_hash_table_insert(ext_table, "t", (gpointer)text_troff);
g_hash_table_insert(ext_table, "tr", (gpointer)text_troff); g_hash_table_insert(ext_table, "tr", (gpointer)text_troff);
g_hash_table_insert(ext_table, "uji", (gpointer)text_uri_list); g_hash_table_insert(ext_table, "uni", (gpointer)text_uri_list);
g_hash_table_insert(ext_table, "unis", (gpointer)text_uri_list); g_hash_table_insert(ext_table, "unis", (gpointer)text_uri_list);
g_hash_table_insert(ext_table, "uri", (gpointer)text_uri_list); g_hash_table_insert(ext_table, "uri", (gpointer)text_uri_list);
g_hash_table_insert(ext_table, "uris", (gpointer)text_uri_list); g_hash_table_insert(ext_table, "uris", (gpointer)text_uri_list);
@@ -1324,31 +1211,6 @@ g_hash_table_insert(ext_table, "hlp", (gpointer)application_winhelp);
g_hash_table_insert(ext_table, "cbz", (gpointer)application_x_cbz); g_hash_table_insert(ext_table, "cbz", (gpointer)application_x_cbz);
g_hash_table_insert(ext_table, "cbr", (gpointer)application_x_cbr); g_hash_table_insert(ext_table, "cbr", (gpointer)application_x_cbr);
g_hash_table_insert(ext_table, "fon", (gpointer)application_x_ms_compress_szdd); g_hash_table_insert(ext_table, "fon", (gpointer)application_x_ms_compress_szdd);
g_hash_table_insert(ext_table, "a78", (gpointer)application_x_atari_7800_rom);
g_hash_table_insert(ext_table, "nes", (gpointer)application_x_nes_rom);
g_hash_table_insert(ext_table, "pfm", (gpointer)application_x_font_pfm);
g_hash_table_insert(ext_table, "3ds", (gpointer)image_x_3ds);
g_hash_table_insert(ext_table, "lz4", (gpointer)application_x_lz4);
g_hash_table_insert(ext_table, "pptx", (gpointer)application_vnd_openxmlformats_officedocument_presentationml_presentation);
g_hash_table_insert(ext_table, "odp", (gpointer)application_vnd_oasis_opendocument_presentation);
g_hash_table_insert(ext_table, "accdb", (gpointer)application_x_msaccess);
g_hash_table_insert(ext_table, "ods", (gpointer)application_vnd_oasis_opendocument_spreadsheet);
g_hash_table_insert(ext_table, "aiff", (gpointer)audio_x_aiff);
g_hash_table_insert(ext_table, "aif", (gpointer)audio_x_aiff);
g_hash_table_insert(ext_table, "reg", (gpointer)text_x_ms_regedit);
g_hash_table_insert(ext_table, "zst", (gpointer)application_x_zstd);
g_hash_table_insert(ext_table, "m4v", (gpointer)video_x_m4v);
g_hash_table_insert(ext_table, "pcap", (gpointer)application_vnd_tcpdump_pcap);
g_hash_table_insert(ext_table, "jsonlz4", (gpointer)application_x_lz4_json);
g_hash_table_insert(ext_table, "dmp", (gpointer)application_x_dmp);
g_hash_table_insert(ext_table, "z", (gpointer)application_zlib);
g_hash_table_insert(ext_table, "pf2", (gpointer)application_x_font_pf2);
g_hash_table_insert(ext_table, "jmod", (gpointer)application_x_java_jmod);
g_hash_table_insert(ext_table, "heic", (gpointer)image_heic);
g_hash_table_insert(ext_table, "lzma", (gpointer)application_x_lzma);
g_hash_table_insert(ext_table, "warc", (gpointer)application_warc);
g_hash_table_insert(ext_table, "lz", (gpointer)application_x_lzip);
g_hash_table_insert(ext_table, "lzo", (gpointer)application_x_lzop);
return ext_table;} return ext_table;}
GHashTable *mime_get_mime_table() {GHashTable *mime_table = g_hash_table_new(g_str_hash, g_str_equal); GHashTable *mime_get_mime_table() {GHashTable *mime_table = g_hash_table_new(g_str_hash, g_str_equal);
g_hash_table_insert(mime_table, "application/arj", (gpointer)application_arj); g_hash_table_insert(mime_table, "application/arj", (gpointer)application_arj);
@@ -1607,7 +1469,6 @@ g_hash_table_insert(mime_table, "text/mcf", (gpointer)text_mcf);
g_hash_table_insert(mime_table, "text/pascal", (gpointer)text_pascal); g_hash_table_insert(mime_table, "text/pascal", (gpointer)text_pascal);
g_hash_table_insert(mime_table, "text/plain", (gpointer)text_plain); g_hash_table_insert(mime_table, "text/plain", (gpointer)text_plain);
g_hash_table_insert(mime_table, "text/richtext", (gpointer)text_richtext); g_hash_table_insert(mime_table, "text/richtext", (gpointer)text_richtext);
g_hash_table_insert(mime_table, "text/rtf", (gpointer)text_rtf);
g_hash_table_insert(mime_table, "text/scriplet", (gpointer)text_scriplet); g_hash_table_insert(mime_table, "text/scriplet", (gpointer)text_scriplet);
g_hash_table_insert(mime_table, "text/x-awk", (gpointer)text_x_awk); g_hash_table_insert(mime_table, "text/x-awk", (gpointer)text_x_awk);
g_hash_table_insert(mime_table, "video/x-jng", (gpointer)video_x_jng); g_hash_table_insert(mime_table, "video/x-jng", (gpointer)video_x_jng);
@@ -1713,59 +1574,5 @@ g_hash_table_insert(mime_table, "application/x-wine-extension-ini", (gpointer)ap
g_hash_table_insert(mime_table, "application/x-cbz", (gpointer)application_x_cbz); g_hash_table_insert(mime_table, "application/x-cbz", (gpointer)application_x_cbz);
g_hash_table_insert(mime_table, "application/x-cbr", (gpointer)application_x_cbr); g_hash_table_insert(mime_table, "application/x-cbr", (gpointer)application_x_cbr);
g_hash_table_insert(mime_table, "application/x-ms-compress-szdd", (gpointer)application_x_ms_compress_szdd); g_hash_table_insert(mime_table, "application/x-ms-compress-szdd", (gpointer)application_x_ms_compress_szdd);
g_hash_table_insert(mime_table, "application/x-atari-7800-rom", (gpointer)application_x_atari_7800_rom);
g_hash_table_insert(mime_table, "application/x-nes-rom", (gpointer)application_x_nes_rom);
g_hash_table_insert(mime_table, "application/x-font-pfm", (gpointer)application_x_font_pfm);
g_hash_table_insert(mime_table, "application/x-gettext-translation", (gpointer)application_x_gettext_translation);
g_hash_table_insert(mime_table, "image/wmf", (gpointer)image_wmf);
g_hash_table_insert(mime_table, "application/pgp-keys", (gpointer)application_pgp_keys);
g_hash_table_insert(mime_table, "image/x-3ds", (gpointer)image_x_3ds);
g_hash_table_insert(mime_table, "application/x-lz4", (gpointer)application_x_lz4);
g_hash_table_insert(mime_table, "application/vnd.openxmlformats-officedocument.presentationml.presentation", (gpointer)application_vnd_openxmlformats_officedocument_presentationml_presentation);
g_hash_table_insert(mime_table, "application/vnd.oasis.opendocument.presentation", (gpointer)application_vnd_oasis_opendocument_presentation);
g_hash_table_insert(mime_table, "application/x-msaccess", (gpointer)application_x_msaccess);
g_hash_table_insert(mime_table, "application/vnd.oasis.opendocument.spreadsheet", (gpointer)application_vnd_oasis_opendocument_spreadsheet);
g_hash_table_insert(mime_table, "audio/x-aiff", (gpointer)audio_x_aiff);
g_hash_table_insert(mime_table, "text/x-ms-regedit", (gpointer)text_x_ms_regedit);
g_hash_table_insert(mime_table, "application/x-gamecube-rom", (gpointer)application_x_gamecube_rom);
g_hash_table_insert(mime_table, "application/x-nintendo-ds-rom", (gpointer)application_x_nintendo_ds_rom);
g_hash_table_insert(mime_table, "text/x-objective-c", (gpointer)text_x_objective_c);
g_hash_table_insert(mime_table, "application/x-font-gdos", (gpointer)application_x_font_gdos);
g_hash_table_insert(mime_table, "application/x-apple-diskimage", (gpointer)application_x_apple_diskimage);
g_hash_table_insert(mime_table, "application/x-zstd", (gpointer)application_x_zstd);
g_hash_table_insert(mime_table, "video/x-m4v", (gpointer)video_x_m4v);
g_hash_table_insert(mime_table, "message/news", (gpointer)message_news);
g_hash_table_insert(mime_table, "application/vnd.symbian.install", (gpointer)application_vnd_symbian_install);
g_hash_table_insert(mime_table, "application/x-lzh-compressed", (gpointer)application_x_lzh_compressed);
g_hash_table_insert(mime_table, "application/x-dosdriver", (gpointer)application_x_dosdriver);
g_hash_table_insert(mime_table, "application/vnd.tcpdump.pcap", (gpointer)application_vnd_tcpdump_pcap);
g_hash_table_insert(mime_table, "x-epoc/x-sisx-app", (gpointer)x_epoc_x_sisx_app);
g_hash_table_insert(mime_table, "application/x-avira-qua", (gpointer)application_x_avira_qua);
g_hash_table_insert(mime_table, "video/MP2T", (gpointer)video_MP2T);
g_hash_table_insert(mime_table, "application/x-snappy-framed", (gpointer)application_x_snappy_framed);
g_hash_table_insert(mime_table, "application/x-lz4+json", (gpointer)application_x_lz4_json);
g_hash_table_insert(mime_table, "application/x-dmp", (gpointer)application_x_dmp);
g_hash_table_insert(mime_table, "application/zlib", (gpointer)application_zlib);
g_hash_table_insert(mime_table, "application/x-pgp-keyring", (gpointer)application_x_pgp_keyring);
g_hash_table_insert(mime_table, "application/x-gdbm", (gpointer)application_x_gdbm);
g_hash_table_insert(mime_table, "application/x-font-pf2", (gpointer)application_x_font_pf2);
g_hash_table_insert(mime_table, "application/x-zip", (gpointer)application_x_zip);
g_hash_table_insert(mime_table, "application/x-coredump", (gpointer)application_x_coredump);
g_hash_table_insert(mime_table, "application/x-java-jmod", (gpointer)application_x_java_jmod);
g_hash_table_insert(mime_table, "application/x-terminfo", (gpointer)application_x_terminfo);
g_hash_table_insert(mime_table, "application/x-terminfo2", (gpointer)application_x_terminfo2);
g_hash_table_insert(mime_table, "application/x-arc", (gpointer)application_x_arc);
g_hash_table_insert(mime_table, "application/vnd.lotus-1-2-3", (gpointer)application_vnd_lotus_1_2_3);
g_hash_table_insert(mime_table, "image/x-win-bitmap", (gpointer)image_x_win_bitmap);
g_hash_table_insert(mime_table, "application/x-maxis-dbpf", (gpointer)application_x_maxis_dbpf);
g_hash_table_insert(mime_table, "text/PGP", (gpointer)text_PGP);
g_hash_table_insert(mime_table, "audio/x-hx-aac-adts", (gpointer)audio_x_hx_aac_adts);
g_hash_table_insert(mime_table, "application/x-chrome-extension", (gpointer)application_x_chrome_extension);
g_hash_table_insert(mime_table, "image/heic", (gpointer)image_heic);
g_hash_table_insert(mime_table, "image/x-gem", (gpointer)image_x_gem);
g_hash_table_insert(mime_table, "application/x-lzma", (gpointer)application_x_lzma);
g_hash_table_insert(mime_table, "application/warc", (gpointer)application_warc);
g_hash_table_insert(mime_table, "application/x-lzip", (gpointer)application_x_lzip);
g_hash_table_insert(mime_table, "application/x-lzop", (gpointer)application_x_lzop);
return mime_table;} return mime_table;}
#endif #endif

View File

@@ -1,32 +1,9 @@
#include <src/ctx.h>
#include "src/sist.h" #include "src/sist.h"
#include "src/ctx.h" #include "src/ctx.h"
__thread magic_t Magic = NULL; __thread magic_t Magic;
int fs_read(struct vfile *f, void *buf, size_t size) { void *read_all(parse_job_t *job, const char *buf, int bytes_read, int *fd) {
if (f->fd == -1) {
f->fd = open(f->filepath, O_RDONLY);
if (f->fd == -1) {
perror("open");
printf("%s\n", f->filepath);
return -1;
}
}
return read(f->fd, buf, size);
}
#define CLOSE_FILE(f) if (f.close != NULL) {f.close(&f);};
void fs_close(struct vfile *f) {
if (f->fd != -1) {
close(f->fd);
}
}
void *read_all(parse_job_t *job, const char *buf, int bytes_read) {
void *full_buf; void *full_buf;
@@ -34,13 +11,20 @@ void *read_all(parse_job_t *job, const char *buf, int bytes_read) {
full_buf = malloc(job->info.st_size); full_buf = malloc(job->info.st_size);
memcpy(full_buf, buf, job->info.st_size); memcpy(full_buf, buf, job->info.st_size);
} else { } else {
if (*fd == -1) {
*fd = open(job->filepath, O_RDONLY);
if (*fd == -1) {
perror("open");
printf("%s\n", job->filepath);
free(job);
return NULL;
}
}
full_buf = malloc(job->info.st_size); full_buf = malloc(job->info.st_size);
memcpy(full_buf, buf, bytes_read); memcpy(full_buf, buf, bytes_read);
int ret = read(*fd, full_buf + bytes_read, job->info.st_size - bytes_read);
int ret = job->vfile.read(&job->vfile, full_buf + bytes_read, job->info.st_size - bytes_read);
if (ret == -1) { if (ret == -1) {
perror("read"); perror("read");
return NULL;
} }
} }
@@ -52,14 +36,15 @@ void parse(void *arg) {
parse_job_t *job = arg; parse_job_t *job = arg;
document_t doc; document_t doc;
int inc_ts = incremental_get(ScanCtx.original_table, job->info.st_ino); if (incremental_get(ScanCtx.original_table, job->info.st_ino) == job->info.st_mtim.tv_sec) {
if (inc_ts != 0 && inc_ts == job->info.st_mtim.tv_sec) {
incremental_mark_file_for_copy(ScanCtx.copy_table, job->info.st_ino); incremental_mark_file_for_copy(ScanCtx.copy_table, job->info.st_ino);
free(job);
return; return;
} }
if (Magic == NULL) { if (Magic == NULL) {
Magic = magic_open(MAGIC_MIME_TYPE); Magic = magic_open(MAGIC_MIME_TYPE);
magic_load(Magic, NULL);
} }
doc.filepath = job->filepath; doc.filepath = job->filepath;
@@ -77,25 +62,29 @@ void parse(void *arg) {
if (job->info.st_size == 0) { if (job->info.st_size == 0) {
doc.mime = MIME_EMPTY; doc.mime = MIME_EMPTY;
} else if (*(job->filepath + job->ext) != '\0' && (job->ext - job->base != 1)) { } else if (*(job->filepath + job->ext) != '\0') {
doc.mime = mime_get_mime_by_ext(ScanCtx.ext_table, job->filepath + job->ext); doc.mime = mime_get_mime_by_ext(ScanCtx.ext_table, job->filepath + job->ext);
} }
int fd = -1;
int bytes_read = 0; int bytes_read = 0;
if (doc.mime == 0) { if (doc.mime == 0) {
// Get mime type with libmagic // Get mime type with libmagic
bytes_read = job->vfile.read(&job->vfile, buf, PARSE_BUF_SIZE); fd = open(job->filepath, O_RDONLY);
if (bytes_read == -1) { if (fd == -1) {
CLOSE_FILE(job->vfile) perror("open");
free(job);
return; return;
} }
bytes_read = read(fd, buf, PARSE_BUF_SIZE);
const char *magic_mime_str = magic_buffer(Magic, buf, bytes_read); const char *magic_mime_str = magic_buffer(Magic, buf, bytes_read);
if (magic_mime_str != NULL) { if (magic_mime_str != NULL) {
doc.mime = mime_get_mime_by_string(ScanCtx.mime_table, magic_mime_str); doc.mime = mime_get_mime_by_string(ScanCtx.mime_table, magic_mime_str);
if (doc.mime == 0) { if (doc.mime == 0) {
fprintf(stderr, "Couldn't find mime %s, %s\n", magic_mime_str, job->filepath + job->base); fprintf(stderr, "Couldn't find mime %s, %s!\n", magic_mime_str, job->filepath + job->base);
} }
} }
} }
@@ -104,53 +93,34 @@ void parse(void *arg) {
if (!(SHOULD_PARSE(doc.mime))) { if (!(SHOULD_PARSE(doc.mime))) {
} else if ((mmime == MimeVideo && doc.size >= MIN_VIDEO_SIZE) || } else if ((mmime == MimeVideo && doc.size >= MIN_VIDEO_SIZE) || mmime == MimeAudio || mmime == MimeImage) {
(mmime == MimeImage && doc.size >= MIN_IMAGE_SIZE) || mmime == MimeAudio) { parse_media(job->filepath, &doc);
if (job->vfile.is_fs_file) {
parse_media_filename(job->filepath, &doc);
} else {
parse_media_vfile(&job->vfile, &doc);
}
} else if (IS_PDF(doc.mime)) { } else if (IS_PDF(doc.mime)) {
void *pdf_buf = read_all(job, (char *) buf, bytes_read); void *pdf_buf = read_all(job, (char *) buf, bytes_read, &fd);
parse_pdf(pdf_buf, doc.size, &doc); parse_pdf(pdf_buf, doc.size, &doc);
if (pdf_buf != buf && pdf_buf != NULL) { if (pdf_buf != buf) {
free(pdf_buf); free(pdf_buf);
} }
} else if (mmime == MimeText && ScanCtx.content_size > 0) { } else if (mmime == MimeText && ScanCtx.content_size > 0) {
parse_text(bytes_read, &job->vfile, (char *) buf, &doc); parse_text(bytes_read, &fd, (char *) buf, &doc);
} else if (IS_FONT(doc.mime)) { } else if (IS_FONT(doc.mime)) {
void *font_buf = read_all(job, (char *) buf, bytes_read); void *font_buf = read_all(job, (char *) buf, bytes_read, &fd);
parse_font(font_buf, doc.size, &doc); parse_font(font_buf, doc.size, &doc);
if (font_buf != buf && font_buf != NULL) { if (font_buf != buf) {
free(font_buf); free(font_buf);
} }
} else if (
ScanCtx.archive_mode != ARC_MODE_SKIP && (
IS_ARC(doc.mime) ||
(IS_ARC_FILTER(doc.mime) && should_parse_filtered_file(doc.filepath, doc.ext))
)) {
parse_archive(&job->vfile, &doc);
}
//Parent meta
if (!uuid_is_null(job->parent)) {
char tmp[UUID_STR_LEN];
uuid_unparse(job->parent, tmp);
meta_line_t *meta_parent = malloc(sizeof(meta_line_t) + UUID_STR_LEN + 1);
meta_parent->key = MetaParent;
strcpy(meta_parent->strval, tmp);
APPEND_META((&doc), meta_parent)
} }
write_document(&doc); write_document(&doc);
CLOSE_FILE(job->vfile) if (fd != -1) {
close(fd);
}
free(job);
} }

View File

@@ -5,9 +5,6 @@
#define PARSE_BUF_SIZE 4096 #define PARSE_BUF_SIZE 4096
int fs_read(struct vfile *f, void *buf, size_t size);
void fs_close(struct vfile *f);
void parse(void *arg); void parse(void *arg);
#endif #endif

View File

@@ -1,22 +1,10 @@
#include <src/ctx.h>
#include "pdf.h" #include "pdf.h"
#include "src/ctx.h" #include "src/ctx.h"
fz_page *render_cover(fz_context *ctx, document_t *doc, fz_document *fzdoc) { fz_page *render_cover(fz_context *ctx, document_t *doc, fz_document *fzdoc) {
int err = 0; fz_page *cover = fz_load_page(ctx, fzdoc, 0);
fz_page *cover = NULL;
fz_var(cover);
fz_try(ctx)
cover = fz_load_page(ctx, fzdoc, 0);
fz_catch(ctx)
err = 1;
if (err != 0) {
fz_drop_page(ctx, cover);
return NULL;
}
fz_rect bounds = fz_bound_page(ctx, cover); fz_rect bounds = fz_bound_page(ctx, cover);
float scale; float scale;
@@ -36,49 +24,24 @@ fz_page *render_cover(fz_context *ctx, document_t *doc, fz_document *fzdoc) {
fz_clear_pixmap_with_value(ctx, pixmap, 0xFF); fz_clear_pixmap_with_value(ctx, pixmap, 0xFF);
fz_device *dev = fz_new_draw_device(ctx, m, pixmap); fz_device *dev = fz_new_draw_device(ctx, m, pixmap);
fz_var(err); pthread_mutex_lock(&ScanCtx.mupdf_mu);
fz_try(ctx) fz_try(ctx)
{
pthread_mutex_lock(&ScanCtx.mupdf_mu);
fz_run_page(ctx, cover, dev, fz_identity, NULL); fz_run_page(ctx, cover, dev, fz_identity, NULL);
}
fz_always(ctx) fz_always(ctx)
{
fz_close_device(ctx, dev);
fz_drop_device(ctx, dev);
pthread_mutex_unlock(&ScanCtx.mupdf_mu); pthread_mutex_unlock(&ScanCtx.mupdf_mu);
}
fz_catch(ctx) fz_catch(ctx)
err = ctx->error.errcode; fz_rethrow(ctx);
if (err != 0) { fz_drop_device(ctx, dev);
fz_drop_page(ctx, cover);
fz_drop_pixmap(ctx, pixmap);
return NULL;
}
fz_buffer *fzbuf = NULL; fz_buffer *fzbuf = fz_new_buffer_from_pixmap_as_png(ctx, pixmap, fz_default_color_params);
fz_var(fzbuf); unsigned char *tn_buf;
fz_var(err); size_t tn_len = fz_buffer_storage(ctx, fzbuf, &tn_buf);
fz_try(ctx) store_write(ScanCtx.index.store, (char *) doc->uuid, sizeof(doc->uuid), (char *) tn_buf, tn_len);
fzbuf = fz_new_buffer_from_pixmap_as_png(ctx, pixmap, fz_default_color_params);
fz_catch(ctx)
err = ctx->error.errcode;
if (err == 0) {
unsigned char *tn_buf;
size_t tn_len = fz_buffer_storage(ctx, fzbuf, &tn_buf);
store_write(ScanCtx.index.store, (char *) doc->uuid, sizeof(doc->uuid), (char *) tn_buf, tn_len);
}
fz_drop_buffer(ctx, fzbuf);
fz_drop_pixmap(ctx, pixmap); fz_drop_pixmap(ctx, pixmap);
fz_drop_buffer(ctx, fzbuf);
if (err != 0) {
fz_drop_page(ctx, cover);
return NULL;
}
return cover; return cover;
} }
@@ -86,190 +49,113 @@ fz_page *render_cover(fz_context *ctx, document_t *doc, fz_document *fzdoc) {
void fz_noop_callback(__attribute__((unused)) void *user, __attribute__((unused)) const char *message) {} void fz_noop_callback(__attribute__((unused)) void *user, __attribute__((unused)) const char *message) {}
void init_ctx(fz_context *ctx) {
fz_disable_icc(ctx);
fz_register_document_handlers(ctx);
ctx->warn.print = fz_noop_callback;
ctx->error.print = fz_noop_callback;
}
int read_stext_block(fz_stext_block *block, text_buffer_t *tex) {
if (block->type != FZ_STEXT_BLOCK_TEXT) {
return 0;
}
fz_stext_line *line = block->u.t.first_line;
while (line != NULL) {
fz_stext_char *c = line->first_char;
while (c != NULL) {
if (text_buffer_append_char(tex, c->c) == TEXT_BUF_FULL) {
return TEXT_BUF_FULL;
}
c = c->next;
}
line = line->next;
}
return 0;
}
void parse_pdf(void *buf, size_t buf_len, document_t *doc) { void parse_pdf(void *buf, size_t buf_len, document_t *doc) {
if (buf == NULL) {
return;
}
static int mu_is_initialized = 0; static int mu_is_initialized = 0;
if (!mu_is_initialized) { if (!mu_is_initialized) {
pthread_mutex_init(&ScanCtx.mupdf_mu, NULL); pthread_mutex_init(&ScanCtx.mupdf_mu, NULL);
mu_is_initialized = 1; mu_is_initialized = 1;
} }
fz_context *ctx = fz_new_context(NULL, NULL, FZ_STORE_UNLIMITED); fz_context *ctx = fz_new_context(NULL, NULL, FZ_STORE_UNLIMITED);
init_ctx(ctx);
int err = 0;
fz_document *fzdoc = NULL;
fz_stream *stream = NULL; fz_stream *stream = NULL;
fz_var(fzdoc); fz_document *fzdoc = NULL;
fz_var(stream); fz_var(stream);
fz_var(err); fz_var(fzdoc);
fz_try(ctx) fz_try(ctx)
{ {
fz_disable_icc(ctx);
fz_register_document_handlers(ctx);
//disable warnings
ctx->warn.print = fz_noop_callback;
ctx->error.print = fz_noop_callback;
stream = fz_open_memory(ctx, buf, buf_len); stream = fz_open_memory(ctx, buf, buf_len);
fzdoc = fz_open_document_with_stream(ctx, mime_get_mime_text(doc->mime), stream); fzdoc = fz_open_document_with_stream(ctx, mime_get_mime_text(doc->mime), stream);
}
fz_catch(ctx)
err = ctx->error.errcode;
if (err) { char title[4096] = {'\0',};
fz_drop_stream(ctx, stream);
fz_drop_document(ctx, fzdoc);
fz_drop_context(ctx);
return;
}
char title[4096] = {'\0',};
fz_try(ctx)
fz_lookup_metadata(ctx, fzdoc, FZ_META_INFO_TITLE, title, sizeof(title)); fz_lookup_metadata(ctx, fzdoc, FZ_META_INFO_TITLE, title, sizeof(title));
fz_catch(ctx) printf("Title: %s\n", title); //todo rmv
;
if (strlen(title) > 0) { if (strlen(title) > 0) {
meta_line_t *meta_content = malloc(sizeof(meta_line_t) + strlen(title)); meta_line_t *meta_content = malloc(sizeof(meta_line_t) + strlen(title) + 1);
meta_content->key = MetaTitle; meta_content->key = MetaTitle;
strcpy(meta_content->strval, title); strcpy(meta_content->strval, title);
APPEND_META(doc, meta_content) APPEND_META(doc, meta_content)
} }
int page_count = -1; int page_count = fz_count_pages(ctx, fzdoc);
fz_var(err);
fz_try(ctx)
page_count = fz_count_pages(ctx, fzdoc);
fz_catch(ctx)
err = ctx->error.errcode;
if (err) { fz_page *cover = render_cover(ctx, doc, fzdoc);
fz_drop_stream(ctx, stream);
fz_drop_document(ctx, fzdoc);
fz_drop_context(ctx);
return;
}
fz_page *cover = NULL; fz_stext_options opts;
if (ScanCtx.tn_size > 0) {
cover = render_cover(ctx, doc, fzdoc);
} else {
fz_var(cover);
fz_try(ctx)
cover = fz_load_page(ctx, fzdoc, 0);
fz_catch(ctx)
cover = NULL;
}
if (cover == NULL) {
fz_drop_stream(ctx, stream);
fz_drop_document(ctx, fzdoc);
fz_drop_context(ctx);
return;
}
if (ScanCtx.content_size > 0) {
fz_stext_options opts = {0};
text_buffer_t text_buf = text_buffer_create(ScanCtx.content_size); text_buffer_t text_buf = text_buffer_create(ScanCtx.content_size);
for (int current_page = 0; current_page < page_count; current_page++) { for (int current_page = 0; current_page < page_count; current_page++) {
fz_page *page = NULL; fz_page *page; if (current_page == 0) {
if (current_page == 0) {
page = cover; page = cover;
} else { } else {
fz_var(err); page = fz_load_page(ctx, fzdoc, current_page);
fz_try(ctx)
page = fz_load_page(ctx, fzdoc, current_page);
fz_catch(ctx)
err = ctx->error.errcode;
if (err != 0) {
text_buffer_destroy(&text_buf);
fz_drop_page(ctx, page);
fz_drop_stream(ctx, stream);
fz_drop_document(ctx, fzdoc);
fz_drop_context(ctx);
return;
}
} }
fz_stext_page *stext = fz_new_stext_page(ctx, fz_bound_page(ctx, page)); fz_stext_page *stext = fz_new_stext_page(ctx, fz_bound_page(ctx, page));
fz_device *dev = fz_new_stext_device(ctx, stext, &opts); fz_device *dev = fz_new_stext_device(ctx, stext, &opts);
fz_var(err); pthread_mutex_lock(&ScanCtx.mupdf_mu);
fz_try(ctx) fz_try(ctx)
fz_run_page(ctx, page, dev, fz_identity, NULL); fz_run_page_contents(ctx, page, dev, fz_identity, NULL);
fz_always(ctx) fz_always(ctx)
{ pthread_mutex_unlock(&ScanCtx.mupdf_mu);
fz_close_device(ctx, dev);
fz_drop_device(ctx, dev);
}
fz_catch(ctx) fz_catch(ctx)
err = ctx->error.errcode; fz_rethrow(ctx);
if (err != 0) { fz_drop_device(ctx, dev);
text_buffer_destroy(&text_buf);
fz_drop_page(ctx, page);
fz_drop_stext_page(ctx, stext);
fz_drop_stream(ctx, stream);
fz_drop_document(ctx, fzdoc);
fz_drop_context(ctx);
return;
}
fz_stext_block *block = stext->first_block; fz_stext_block *block = stext->first_block;
while (block != NULL) { while (block != NULL) {
int ret = read_stext_block(block, &text_buf);
if (ret == TEXT_BUF_FULL) { if (block->type != FZ_STEXT_BLOCK_TEXT) {
break; block = block->next;
continue;
}
fz_stext_line *line = block->u.t.first_line;
while (line != NULL) {
fz_stext_char *c = line->first_char;
while (c != NULL) {
if (text_buffer_append_char(&text_buf, c->c) == TEXT_BUF_FULL) {
fz_drop_page(ctx, page);
fz_drop_stext_page(ctx, stext);
goto write_loop_end;
}
c = c->next;
}
line = line->next;
} }
block = block->next; block = block->next;
} }
fz_drop_stext_page(ctx, stext);
fz_drop_page(ctx, page); fz_drop_page(ctx, page);
fz_drop_stext_page(ctx, stext);
if (text_buf.dyn_buffer.cur >= text_buf.dyn_buffer.size) {
break;
}
} }
write_loop_end:;
text_buffer_terminate_string(&text_buf); text_buffer_terminate_string(&text_buf);
meta_line_t *meta_content = malloc(sizeof(meta_line_t) + text_buf.dyn_buffer.cur); meta_line_t *meta_content = malloc(sizeof(meta_line_t) + text_buf.dyn_buffer.cur);
meta_content->key = MetaContent; meta_content->key = MetaContent;
memcpy(meta_content->strval, text_buf.dyn_buffer.buf, text_buf.dyn_buffer.cur); memcpy(meta_content->strval, text_buf.dyn_buffer.buf, text_buf.dyn_buffer.cur);
APPEND_META(doc, meta_content)
text_buffer_destroy(&text_buf); text_buffer_destroy(&text_buf);
APPEND_META(doc, meta_content)
}
fz_always(ctx)
{
fz_drop_stream(ctx, stream);
fz_drop_document(ctx, fzdoc);
fz_drop_context(ctx);
} fz_catch(ctx) {
fprintf(stderr, "Error %s %s\n", doc->filepath, ctx->error.message);
} }
fz_drop_stream(ctx, stream);
fz_drop_document(ctx, fzdoc);
fz_drop_context(ctx);
} }

View File

@@ -1,7 +1,7 @@
#include "text.h" #include "text.h"
#include "src/ctx.h" #include "src/ctx.h"
void parse_text(int bytes_read, struct vfile *f, char *buf, document_t *doc) { void parse_text(int bytes_read, int *fd, char *buf, document_t *doc) {
char *intermediate_buf; char *intermediate_buf;
int intermediate_buf_len; int intermediate_buf_len;
@@ -13,6 +13,10 @@ void parse_text(int bytes_read, struct vfile *f, char *buf, document_t *doc) {
memcpy(intermediate_buf, buf, to_copy); memcpy(intermediate_buf, buf, to_copy);
} else { } else {
if (*fd == -1) {
*fd = open(doc->filepath, O_RDONLY);
}
int to_read = MIN(ScanCtx.content_size, doc->size) - bytes_read; int to_read = MIN(ScanCtx.content_size, doc->size) - bytes_read;
intermediate_buf = malloc(to_read + bytes_read); intermediate_buf = malloc(to_read + bytes_read);
@@ -21,16 +25,19 @@ void parse_text(int bytes_read, struct vfile *f, char *buf, document_t *doc) {
memcpy(intermediate_buf, buf, bytes_read); memcpy(intermediate_buf, buf, bytes_read);
} }
f->read(f, intermediate_buf + bytes_read, to_read); read(*fd, intermediate_buf + bytes_read, to_read);
} }
text_buffer_t tex = text_buffer_create(ScanCtx.content_size);
text_buffer_append_string(&tex, intermediate_buf, intermediate_buf_len);
meta_line_t *meta = malloc(sizeof(meta_line_t) + tex.dyn_buffer.cur); text_buffer_t text_buf = text_buffer_create(ScanCtx.content_size);
for (int i = 0; i < intermediate_buf_len; i++) {
text_buffer_append_char(&text_buf, *(intermediate_buf + i));
}
text_buffer_terminate_string(&text_buf);
meta_line_t *meta = malloc(sizeof(meta_line_t) + text_buf.dyn_buffer.cur);
meta->key = MetaContent; meta->key = MetaContent;
strcpy(meta->strval, tex.dyn_buffer.buf); strcpy(meta->strval, text_buf.dyn_buffer.buf);
APPEND_META(doc, meta) text_buffer_destroy(&text_buf);
free(intermediate_buf); free(intermediate_buf);
text_buffer_destroy(&tex); APPEND_META(doc, meta)
} }

View File

@@ -3,6 +3,6 @@
#include "src/sist.h" #include "src/sist.h"
void parse_text(int bytes_read, struct vfile *f, char *buf, document_t *doc); void parse_text(int bytes_read, int *fd, char *buf, document_t *doc);
#endif #endif

View File

@@ -16,7 +16,6 @@
#include <libswscale/swscale.h> #include <libswscale/swscale.h>
#include <libswresample/swresample.h> #include <libswresample/swresample.h>
#include <libavcodec/avcodec.h> #include <libavcodec/avcodec.h>
#include <libavutil/imgutils.h>
#include <ctype.h> #include <ctype.h>
#include <mupdf/fitz.h> #include <mupdf/fitz.h>
#include <mupdf/pdf.h> #include <mupdf/pdf.h>
@@ -26,17 +25,12 @@
#include <pthread.h> #include <pthread.h>
#include <sys/stat.h> #include <sys/stat.h>
#include <wordexp.h> #include <wordexp.h>
#include "ft2build.h"
#include "freetype/freetype.h"
#include <archive.h>
#include <archive_entry.h>
#ifndef SIST_SCAN_ONLY #ifndef SIST_SCAN_ONLY
#include <onion/onion.h> #include <onion/onion.h>
#include <onion/handler.h> #include <onion/handler.h>
#include <onion/block.h> #include <onion/block.h>
#include <onion/shortcuts.h> #include <onion/shortcuts.h>
#include <onion/codecs.h>
#include <curl/curl.h> #include <curl/curl.h>
#endif #endif
@@ -54,15 +48,12 @@
#include "parsing/pdf.h" #include "parsing/pdf.h"
#include "parsing/media.h" #include "parsing/media.h"
#include "parsing/font.h" #include "parsing/font.h"
#include "parsing/arc.h"
#include "cli.h" #include "cli.h"
#include "utf8.h/utf8.h"
#ifndef SIST_SCAN_ONLY #ifndef SIST_SCAN_ONLY
#include "src/index/elastic.h" #include "src/index/elastic.h"
#include "index/web.h" #include "index/web.h"
#include "web/serve.h" #include "web/serve.h"
#include "web/auth_basic.h"
#endif #endif
; ;

View File

@@ -25,7 +25,6 @@ typedef struct tpool {
int done_cnt; int done_cnt;
int stop; int stop;
void (*cleanup_func)(); void (*cleanup_func)();
} tpool_t; } tpool_t;
@@ -101,7 +100,7 @@ static void *tpool_worker(void *arg) {
tpool_t *pool = arg; tpool_t *pool = arg;
while (1) { while (1) {
pthread_mutex_lock(&pool->work_mutex); pthread_mutex_lock(&(pool->work_mutex));
if (pool->stop) { if (pool->stop) {
break; break;
} }
@@ -114,21 +113,14 @@ static void *tpool_worker(void *arg) {
pthread_mutex_unlock(&(pool->work_mutex)); pthread_mutex_unlock(&(pool->work_mutex));
if (work != NULL) { if (work != NULL) {
if (pool->stop) {
break;
}
work->func(work->arg); work->func(work->arg);
free(work->arg);
free(work); free(work);
} }
pthread_mutex_lock(&(pool->work_mutex)); pthread_mutex_lock(&(pool->work_mutex));
if (work != NULL) { pool->done_cnt++;
pool->done_cnt++;
}
progress_bar_print((double) pool->done_cnt / pool->work_cnt, ScanCtx.stat_tn_size, ScanCtx.stat_index_size); progress_bar_print((double)pool->done_cnt / pool->work_cnt, ScanCtx.stat_tn_size, ScanCtx.stat_index_size);
if (pool->work_head == NULL) { if (pool->work_head == NULL) {
pthread_cond_signal(&(pool->working_cond)); pthread_cond_signal(&(pool->working_cond));
@@ -149,15 +141,11 @@ void tpool_wait(tpool_t *pool) {
if (pool->done_cnt < pool->work_cnt) { if (pool->done_cnt < pool->work_cnt) {
pthread_cond_wait(&(pool->working_cond), &(pool->work_mutex)); pthread_cond_wait(&(pool->working_cond), &(pool->work_mutex));
} else { } else {
usleep(500000); pool->stop = 1;
if (pool->done_cnt == pool->work_cnt) { break;
pool->stop = 1;
usleep(1000000);
break;
}
} }
progress_bar_print(100.0, ScanCtx.stat_tn_size, ScanCtx.stat_index_size);
} }
progress_bar_print(1.0, ScanCtx.stat_tn_size, ScanCtx.stat_index_size);
pthread_mutex_unlock(&(pool->work_mutex)); pthread_mutex_unlock(&(pool->work_mutex));
} }
@@ -180,8 +168,7 @@ void tpool_destroy(tpool_t *pool) {
for (size_t i = 0; i < pool->thread_cnt; i++) { for (size_t i = 0; i < pool->thread_cnt; i++) {
pthread_t thread = pool->threads[i]; pthread_t thread = pool->threads[i];
if (thread != 0) { if (thread != 0) {
void *_; pthread_cancel(thread);
pthread_join(thread, &_);
} }
} }
@@ -201,11 +188,11 @@ tpool_t *tpool_create(size_t thread_cnt, void cleanup_func()) {
tpool_t *pool = malloc(sizeof(tpool_t)); tpool_t *pool = malloc(sizeof(tpool_t));
pool->thread_cnt = thread_cnt; pool->thread_cnt = thread_cnt;
pool->work_cnt = 0; pool->work_cnt =0;
pool->done_cnt = 0; pool->done_cnt =0;
pool->stop = 0; pool->stop = 0;
pool->cleanup_func = cleanup_func; pool->cleanup_func = cleanup_func;
pool->threads = calloc(sizeof(pthread_t), thread_cnt); pool->threads = malloc(sizeof(pthread_t) * thread_cnt);
pthread_mutex_init(&(pool->work_mutex), NULL); pthread_mutex_init(&(pool->work_mutex), NULL);
@@ -215,12 +202,11 @@ tpool_t *tpool_create(size_t thread_cnt, void cleanup_func()) {
pool->work_head = NULL; pool->work_head = NULL;
pool->work_tail = NULL; pool->work_tail = NULL;
for (size_t i = 0; i < thread_cnt; i++) {
pthread_t thread = pool->threads[i];
pthread_create(&thread, NULL, tpool_worker, pool);
pthread_detach(thread);
}
return pool; return pool;
} }
void tpool_start(tpool_t *pool) {
for (size_t i = 0; i < pool->thread_cnt; i++) {
pthread_create(&pool->threads[i], NULL, tpool_worker, pool);
}
}

View File

@@ -9,7 +9,6 @@ typedef struct tpool tpool_t;
typedef void (*thread_func_t)(void *arg); typedef void (*thread_func_t)(void *arg);
tpool_t *tpool_create(size_t num, void (*cleanup_func)()); tpool_t *tpool_create(size_t num, void (*cleanup_func)());
void tpool_start(tpool_t *pool);
void tpool_destroy(tpool_t *tm); void tpool_destroy(tpool_t *tm);
int tpool_add_work(tpool_t *pool, thread_func_t func, void *arg); int tpool_add_work(tpool_t *pool, thread_func_t func, void *arg);

View File

@@ -9,12 +9,6 @@
#define IS_META_LONG(key) (key & META_LONG_MASK) == META_LONG_MASK #define IS_META_LONG(key) (key & META_LONG_MASK) == META_LONG_MASK
#define IS_META_STR(meta) (meta->key & META_STR_MASK) == META_STR_MASK #define IS_META_STR(meta) (meta->key & META_STR_MASK) == META_STR_MASK
#define ARC_MODE_SKIP 0
#define ARC_MODE_LIST 1
#define ARC_MODE_SHALLOW 2
#define ARC_MODE_RECURSE 3
typedef int archive_mode_t;
// This is written to file as a 8bit char! // This is written to file as a 8bit char!
enum metakey { enum metakey {
MetaContent = 1 | META_STR_MASK, MetaContent = 1 | META_STR_MASK,
@@ -30,7 +24,6 @@ enum metakey {
MetaGenre = 11 | META_STR_MASK, MetaGenre = 11 | META_STR_MASK,
MetaTitle = 12 | META_STR_MASK, MetaTitle = 12 | META_STR_MASK,
MetaFontName = 13 | META_STR_MASK, MetaFontName = 13 | META_STR_MASK,
MetaParent = 14 | META_STR_MASK,
}; };
typedef struct index_descriptor { typedef struct index_descriptor {
@@ -70,39 +63,13 @@ typedef struct document {
short ext; short ext;
meta_line_t *meta_head; meta_line_t *meta_head;
meta_line_t *meta_tail; meta_line_t *meta_tail;
struct document *child_head;
struct document *child_tail;
char *filepath; char *filepath;
} document_t; } document_t;
typedef struct vfile vfile_t;
typedef int (*read_func_t)(struct vfile *, void *buf, size_t size);
typedef int (*seek_func_t)(struct vfile *, size_t size, int whence);
typedef void (*close_func_t)(struct vfile *);
typedef struct vfile {
union {
int fd;
struct archive *arc;
};
int is_fs_file;
char *filepath;
read_func_t read;
close_func_t close;
} vfile_t;
typedef struct parse_job_t { typedef struct parse_job_t {
int base; int base;
int ext; int ext;
struct stat info; struct stat info;
struct vfile vfile;
uuid_t parent;
char filepath[1]; char filepath[1];
} parse_job_t; } parse_job_t;

View File

@@ -46,10 +46,6 @@ void dyn_buffer_write_str(dyn_buffer_t *buf, char *str) {
dyn_buffer_write_char(buf, '\0'); dyn_buffer_write_char(buf, '\0');
} }
void dyn_buffer_append_string(dyn_buffer_t *buf, char *str) {
dyn_buffer_write(buf, str, strlen(str));
}
void dyn_buffer_write_int(dyn_buffer_t *buf, int d) { void dyn_buffer_write_int(dyn_buffer_t *buf, int d) {
grow_buffer_small(buf); grow_buffer_small(buf);
@@ -93,85 +89,6 @@ void text_buffer_terminate_string(text_buffer_t *buf) {
dyn_buffer_write_char(&buf->dyn_buffer, '\0'); dyn_buffer_write_char(&buf->dyn_buffer, '\0');
} }
__always_inline
int utf8_validchr(const char *s) {
if (0x00 == (0x80 & *s)) {
return TRUE;
} else if (0xf0 == (0xf8 & *s)) {
if ((0x80 != (0xc0 & s[1])) || (0x80 != (0xc0 & s[2])) ||
(0x80 != (0xc0 & s[3]))) {
return FALSE;
}
if (0x80 == (0xc0 & s[4])) {
return FALSE;
}
if ((0 == (0x07 & s[0])) && (0 == (0x30 & s[1]))) {
return FALSE;
}
} else if (0xe0 == (0xf0 & *s)) {
if ((0x80 != (0xc0 & s[1])) || (0x80 != (0xc0 & s[2]))) {
return FALSE;
}
if (0x80 == (0xc0 & s[3])) {
return FALSE;
}
if ((0 == (0x0f & s[0])) && (0 == (0x20 & s[1]))) {
return FALSE;
}
} else if (0xc0 == (0xe0 & *s)) {
if (0x80 != (0xc0 & s[1])) {
return FALSE;
}
if (0x80 == (0xc0 & s[2])) {
return FALSE;
}
if (0 == (0x1e & s[0])) {
return FALSE;
}
} else {
return FALSE;
}
return TRUE;
}
int text_buffer_append_string(text_buffer_t *buf, char *str, size_t len) {
utf8_int32_t c;
if (str == NULL || len < 1 ||
(0xf0 == (0xf8 & str[0]) && len < 4) ||
(0xe0 == (0xf0 & str[0]) && len < 3) ||
(0xc0 == (0xe0 & str[0]) && len == 1) ||
*(str) == 0) {
text_buffer_terminate_string(buf);
return 0;
}
for (void *v = utf8codepoint(str, &c); c != '\0' && ((char *) v - str + 4) < len; v = utf8codepoint(v, &c)) {
if (utf8_validchr(v)) {
text_buffer_append_char(buf, c);
}
}
text_buffer_terminate_string(buf);
return 0;
}
int text_buffer_append_string0(text_buffer_t *buf, char *str) {
utf8_int32_t c;
for (void *v = utf8codepoint(str, &c); c != '\0'; v = utf8codepoint(v, &c)) {
if (utf8_validchr(v)) {
text_buffer_append_char(buf, c);
}
}
text_buffer_terminate_string(buf);
}
int text_buffer_append_char(text_buffer_t *buf, int c) { int text_buffer_append_char(text_buffer_t *buf, int c) {
if (SHOULD_IGNORE_CHAR(c)) { if (SHOULD_IGNORE_CHAR(c)) {
@@ -179,31 +96,15 @@ int text_buffer_append_char(text_buffer_t *buf, int c) {
dyn_buffer_write_char(&buf->dyn_buffer, ' '); dyn_buffer_write_char(&buf->dyn_buffer, ' ');
buf->last_char_was_whitespace = TRUE; buf->last_char_was_whitespace = TRUE;
if (buf->max_size > 0 && buf->dyn_buffer.cur >= buf->max_size) { if (buf->dyn_buffer.cur >= buf->max_size) {
return TEXT_BUF_FULL; return TEXT_BUF_FULL;
} }
} }
} else { } else {
buf->last_char_was_whitespace = FALSE; buf->last_char_was_whitespace = FALSE;
grow_buffer_small(&buf->dyn_buffer); dyn_buffer_write_char(&buf->dyn_buffer, (char) c);
if (0 == ((utf8_int32_t) 0xffffff80 & c)) { if (buf->dyn_buffer.cur >= buf->max_size) {
*(buf->dyn_buffer.buf + buf->dyn_buffer.cur++) = (char) c;
} else if (0 == ((utf8_int32_t) 0xfffff800 & c)) {
*(buf->dyn_buffer.buf + buf->dyn_buffer.cur++) = 0xc0 | (char) (c >> 6);
*(buf->dyn_buffer.buf + buf->dyn_buffer.cur++) = 0x80 | (char) (c & 0x3f);
} else if (0 == ((utf8_int32_t) 0xffff0000 & c)) {
*(buf->dyn_buffer.buf + buf->dyn_buffer.cur++) = 0xe0 | (char) (c >> 12);
*(buf->dyn_buffer.buf + buf->dyn_buffer.cur++) = 0x80 | (char) ((c >> 6) & 0x3f);
*(buf->dyn_buffer.buf + buf->dyn_buffer.cur++) = 0x80 | (char) (c & 0x3f);
} else {
*(buf->dyn_buffer.buf + buf->dyn_buffer.cur++) = 0xf0 | (char) (c >> 18);
*(buf->dyn_buffer.buf + buf->dyn_buffer.cur++) = 0x80 | (char) ((c >> 12) & 0x3f);
*(buf->dyn_buffer.buf + buf->dyn_buffer.cur++) = 0x80 | (char) ((c >> 6) & 0x3f);
*(buf->dyn_buffer.buf + buf->dyn_buffer.cur++) = 0x80 | (char) (c & 0x3f);
}
if (buf->max_size > 0 && buf->dyn_buffer.cur >= buf->max_size) {
return TEXT_BUF_FULL; return TEXT_BUF_FULL;
} }
} }
@@ -235,7 +136,7 @@ dyn_buffer_t url_escape(char *str) {
dyn_buffer_t text = dyn_buffer_create(); dyn_buffer_t text = dyn_buffer_create();
char *ptr = str; char * ptr = str;
while (*ptr) { while (*ptr) {
if (*ptr == '#') { if (*ptr == '#') {
dyn_buffer_write(&text, "%23", 3); dyn_buffer_write(&text, "%23", 3);
@@ -268,7 +169,7 @@ char *expandpath(const char *path) {
wordexp_t w; wordexp_t w;
wordexp(path, &w, 0); wordexp(path, &w, 0);
char *expanded = malloc(strlen(w.we_wordv[0]) + 2); char * expanded = malloc(strlen(w.we_wordv[0]) + 2);
strcpy(expanded, w.we_wordv[0]); strcpy(expanded, w.we_wordv[0]);
strcat(expanded, "/"); strcat(expanded, "/");

View File

@@ -5,10 +5,7 @@
#define TEXT_BUF_FULL -1 #define TEXT_BUF_FULL -1
#define INITIAL_BUF_SIZE 1024 * 16 #define INITIAL_BUF_SIZE 1024 * 16
#define SHOULD_IGNORE_CHAR(c) c < '0' || c > 'z'
#define SHOULD_IGNORE_CHAR(c) !(SHOULD_KEEP_CHAR(c))
#define SHOULD_KEEP_CHAR(c) (c >= (int)'!')
typedef struct dyn_buffer { typedef struct dyn_buffer {
char *buf; char *buf;
@@ -24,10 +21,8 @@ typedef struct text_buffer {
dyn_buffer_t dyn_buffer; dyn_buffer_t dyn_buffer;
} text_buffer_t; } text_buffer_t;
char *abspath(const char *path); char *abspath(const char * path);
char *expandpath(const char *path); char *expandpath(const char *path);
dyn_buffer_t url_escape(char *str); dyn_buffer_t url_escape(char *str);
void progress_bar_print(double percentage, size_t tn_size, size_t index_size); void progress_bar_print(double percentage, size_t tn_size, size_t index_size);
@@ -47,8 +42,6 @@ void dyn_buffer_write_char(dyn_buffer_t *buf, char c);
void dyn_buffer_write_str(dyn_buffer_t *buf, char *str); void dyn_buffer_write_str(dyn_buffer_t *buf, char *str);
void dyn_buffer_append_string(dyn_buffer_t *buf, char *str);
void dyn_buffer_write_int(dyn_buffer_t *buf, int d); void dyn_buffer_write_int(dyn_buffer_t *buf, int d);
void dyn_buffer_write_short(dyn_buffer_t *buf, short s); void dyn_buffer_write_short(dyn_buffer_t *buf, short s);
@@ -63,16 +56,13 @@ text_buffer_t text_buffer_create(int max_size);
void text_buffer_terminate_string(text_buffer_t *buf); void text_buffer_terminate_string(text_buffer_t *buf);
int text_buffer_append_string(text_buffer_t *buf, char *str, size_t len);
int text_buffer_append_string0(text_buffer_t *buf, char *str);
int text_buffer_append_char(text_buffer_t *buf, int c); int text_buffer_append_char(text_buffer_t *buf, int c);
void incremental_put(GHashTable *table, unsigned long inode_no, int mtime); void incremental_put(GHashTable *table, unsigned long inode_no, int mtime);
int incremental_get(GHashTable *table, unsigned long inode_no); int incremental_get(GHashTable *table, unsigned long inode_no);
int incremental_mark_file_for_copy(GHashTable *table, unsigned long inode_no); int incremental_mark_file_for_copy(GHashTable *table, unsigned long inode_no);
#endif #endif

View File

@@ -1,59 +0,0 @@
#include "auth_basic.h"
#define UNAUTHORIZED_TEXT "Unauthorized"
typedef struct auth_basic_data {
onion_handler *inside;
const char *b64credentials;
} auth_basic_data_t;
int authenticate(const char *expected, const char *credentials) {
if (expected == NULL) {
return TRUE;
}
if (credentials && strncmp(credentials, "Basic ", 6) == 0) {
if (strcmp((credentials + 6), expected) == 0) {
return TRUE;
}
}
return FALSE;
}
int auth_basic_handler(auth_basic_data_t *d,
onion_request *req,
onion_response *res) {
const char *credentials = onion_request_get_header(req, "Authorization");
if (authenticate(d->b64credentials, credentials)) {
return onion_handler_handle(d->inside, req, res);
}
onion_response_set_header(res, "WWW-Authenticate", "Basic realm=\"sist2\"");
onion_response_set_code(res, HTTP_UNAUTHORIZED);
onion_response_write(res, UNAUTHORIZED_TEXT, sizeof(UNAUTHORIZED_TEXT));
onion_response_set_length(res, sizeof(UNAUTHORIZED_TEXT));
return OCS_PROCESSED;
}
void auth_basic_free(auth_basic_data_t *data) {
onion_handler_free(data->inside);
free(data);
}
onion_handler *auth_basic(const char *b64credentials, onion_handler *inside_level) {
auth_basic_data_t *privdata = malloc(sizeof(auth_basic_data_t));
privdata->b64credentials = b64credentials;
privdata->inside = inside_level;
return onion_handler_new((onion_handler_handler) auth_basic_handler, privdata,
(onion_handler_private_data_free) auth_basic_free);
}

View File

@@ -1,4 +0,0 @@
#include "src/sist.h"
onion_handler *auth_basic(const char *b64credentials, onion_handler *inside_level);

View File

@@ -245,8 +245,6 @@ int search(void *p, onion_request *req, onion_response *res) {
if (r->status_code == 200) { if (r->status_code == 200) {
onion_response_write(res, r->body, r->size); onion_response_write(res, r->body, r->size);
} else {
onion_response_set_code(res, HTTP_INTERNAL_ERROR);
} }
free_response(r); free_response(r);
@@ -360,24 +358,12 @@ int file(void *p, onion_request *req, onion_response *res) {
return OCS_PROCESSED; return OCS_PROCESSED;
} }
char *next = arg_uuid; cJSON *doc = elastic_get_document(arg_uuid);
cJSON *doc = NULL; cJSON *source = cJSON_GetObjectItem(doc, "_source");
cJSON *index_id = NULL; cJSON *index_id = cJSON_GetObjectItem(source, "index");
cJSON *source = NULL; if (index_id == NULL) {
cJSON_Delete(doc);
while (true) { return OCS_NOT_PROCESSED;
doc = elastic_get_document(next);
source = cJSON_GetObjectItem(doc, "_source");
index_id = cJSON_GetObjectItem(source, "index");
if (index_id == NULL) {
cJSON_Delete(doc);
return OCS_NOT_PROCESSED;
}
cJSON *parent = cJSON_GetObjectItem(source, "parent");
if (parent == NULL) {
break;
}
next = parent->valuestring;
} }
index_t *idx = get_index_by_id(index_id->valuestring); index_t *idx = get_index_by_id(index_id->valuestring);
@@ -405,11 +391,9 @@ void serve(const char *hostname, const char *port) {
onion_set_hostname(o, hostname); onion_set_hostname(o, hostname);
onion_set_port(o, port); onion_set_port(o, port);
onion_url *urls = onion_url_new(); onion_url *urls = onion_root_url(o);
// Static paths // Static paths
onion_set_root_handler(o, auth_basic(WebCtx.b64credentials, onion_url_to_handler(urls)));
onion_url_add(urls, "", search_index); onion_url_add(urls, "", search_index);
onion_url_add(urls, "css", style); onion_url_add(urls, "css", style);
onion_url_add(urls, "js", javascript); onion_url_add(urls, "js", javascript);
@@ -426,7 +410,6 @@ void serve(const char *hostname, const char *port) {
onion_url_add(urls, "^f/([a-fA-F0-9]{8}-[a-fA-F0-9]{4}-[a-fA-F0-9]{4}-[a-fA-F0-9]{4}-[a-fA-F0-9]{12})$", file); onion_url_add(urls, "^f/([a-fA-F0-9]{8}-[a-fA-F0-9]{4}-[a-fA-F0-9]{4}-[a-fA-F0-9]{4}-[a-fA-F0-9]{12})$", file);
onion_url_add(urls, "i", index_info); onion_url_add(urls, "i", index_info);
printf("Starting web server @ http://%s:%s\n", hostname, port); printf("Starting web server @ http://%s:%s\n", hostname, port);
onion_listen(o); onion_listen(o);

File diff suppressed because one or more lines are too long

1
utf8.h

Submodule utf8.h deleted from 2a7c5bfa95

View File

@@ -1,7 +1,3 @@
*:focus {
outline: 0;
}
a { a {
color: #00BCD4; color: #00BCD4;
} }
@@ -19,31 +15,6 @@ body {
margin-top: 1em; margin-top: 1em;
background: #212121; background: #212121;
color: #e0e0e0; color: #e0e0e0;
border-radius: 1px;
border: none;
}
.sub-document {
background: #37474F;
}
.sub-document .text-muted {
color: #8a949c !important;
}
.list-group-item {
background: #212121;
color: #e0e0e0;
border-top: 1px solid #424242;
border-bottom: none;
border-left: none;
border-right: none;
}
.list-group-item:first-child {
border-top: none;
} }
.navbar-brand { .navbar-brand {
@@ -116,18 +87,12 @@ body {
white-space: nowrap; white-space: nowrap;
text-overflow: ellipsis; text-overflow: ellipsis;
overflow: hidden; overflow: hidden;
color: #00BCD4;
} }
.badge { .badge {
margin-right: 3px; margin-right: 3px;
} }
.badge-user {
color: #212529;
background-color: #e0e0e0;
}
.fit { .fit {
display: block; display: block;
min-width: 64px; min-width: 64px;
@@ -139,15 +104,6 @@ body {
height: auto; height: auto;
} }
.fit-sm {
display: block;
max-width: 64px;
max-height: 64px;
margin: 0 auto 0;
width: auto;
height: auto;
}
.audio-fit { .audio-fit {
height: 39px; height: 39px;
vertical-align: bottom; vertical-align: bottom;
@@ -191,8 +147,6 @@ mark {
border: 1px solid #616161; border: 1px solid #616161;
border-radius: 4px; border-radius: 4px;
margin: 3px; margin: 3px;
white-space: normal;
color: rgb(224, 224, 224);
} }
.irs-single, .irs-from, .irs-to { .irs-single, .irs-from, .irs-to {
@@ -208,7 +162,6 @@ mark {
margin-top: 1em; margin-top: 1em;
margin-bottom: 1em; margin-bottom: 1em;
} }
.custom-select { .custom-select {
overflow: auto; overflow: auto;
background-color: #37474F; background-color: #37474F;
@@ -274,7 +227,6 @@ option {
padding: 0.5rem; padding: 0.5rem;
background: #212121; background: #212121;
color: #eee; color: #eee;
margin-top: 1em;
} }
.btn-xs { .btn-xs {
@@ -285,76 +237,4 @@ option {
.btn { .btn {
color: #eee; color: #eee;
} }
.nav-tabs .nav-link {
color: #e0e0e0;
}
.nav-tabs .nav-item.show .nav-link, .nav-tabs .nav-link.active {
background-color: #212121;
border-color: #616161 #616161 #212121;
color: #e0e0e0;
}
.nav-tabs .nav-link:focus, .nav-tabs .nav-link:focus {
border-color: #616161 #616161 #212121;
color: #e0e0e0;
}
.nav-tabs .nav-link:focus, .nav-tabs .nav-link:hover {
border-color: #e0e0e0 #e0e0e0 #212121;
color: #e0e0e0;
}
.nav-tabs {
border-bottom: #616161;
}
.nav {
margin-top: 0.5rem;
}
@media (max-width: 800px) {
#treeTabs {
flex-basis: inherit;
flex-grow: inherit;
}
}
.list-group {
margin-top: 1em;
}
.list-group-item {
padding: .25rem 0.5rem;
}
.wrapper-sm {
min-width: 64px;
}
.media-expanded {
display: inherit;
}
.media-expanded .fit {
max-height: 250px;
}
@media (max-width: 600px) {
.media-expanded .fit {
max-height: none;
}
.tagline {
display: none;
}
}
.version {
color: #00BCD4;
margin-left: -18px;
margin-top: -14px;
font-size: 11px;
}

View File

@@ -1,10 +1,4 @@
*:focus { body {overflow-y:scroll;}
outline: 0;
}
body {
overflow-y: scroll;
}
.progress { .progress {
margin-top: 1em; margin-top: 1em;
@@ -12,23 +6,14 @@ body {
.card { .card {
margin-top: 1em; margin-top: 1em;
box-shadow: 0 .125rem .25rem rgba(0, 0, 0, .075) !important;
} }
.sub-document {
background: #AB47BC1F;
}
.navbar-brand { .navbar-brand {
font-size: 1.75rem; font-size: 1.75rem;
padding: 0; padding: 0;
} }
.navbar { .navbar {
background: #F7F7F7; background: #F7F7F7; border-bottom: solid 1px #dfdfdf;
border-bottom: solid 1px #dfdfdf;
} }
.document { .document {
padding: 0.5rem; padding: 0.5rem;
} }
@@ -61,11 +46,6 @@ body {
background-color: #FFC107; background-color: #FFC107;
} }
.badge-user {
color: #212529;
background-color: #e0e0e0;
}
.badge-text { .badge-text {
color: #FFFFFF; color: #FFFFFF;
background-color: #FAAB3C; background-color: #FAAB3C;
@@ -103,15 +83,6 @@ body {
height: auto; height: auto;
} }
.fit-sm {
display: block;
max-width: 64px;
max-height: 64px;
margin: 0 auto 0;
width: auto;
height: auto;
}
.audio-fit { .audio-fit {
height: 39px; height: 39px;
vertical-align: bottom; vertical-align: bottom;
@@ -126,17 +97,16 @@ body {
} }
@media (min-width: 1500px) { @media (min-width: 1500px) {
.container { .container {
max-width: 1440px; max-width: 1440px;
} }
.card-columns { .card-columns {
column-count: 5; column-count: 5;
} }
} }
@media (min-width: 1800px) { @media (min-width: 1800px) {
.container { .container {
max-width: 1550px; max-width: 1550px;
} }
} }
@@ -148,15 +118,13 @@ mark {
} }
.content-div { .content-div {
font-family: SFMono-Regular, Menlo, Monaco, Consolas, "Liberation Mono", "Courier New", monospace; font-family: SFMono-Regular,Menlo,Monaco,Consolas,"Liberation Mono","Courier New",monospace;
font-size: 13px; font-size: 13px;
padding: 1em; padding: 1em;
background-color: #f5f5f5; background-color: #f5f5f5;
border: 1px solid #ccc; border: 1px solid #ccc;
border-radius: 4px; border-radius: 4px;
margin: 3px; margin: 3px;
white-space: normal;
color: #000;
} }
.irs-single, .irs-from, .irs-to { .irs-single, .irs-from, .irs-to {
@@ -176,7 +144,8 @@ mark {
margin-bottom: 1em; margin-bottom: 1em;
} }
.inspire-tree .selected > .wholerow, .inspire-tree .selected > .title-wrap:hover + .wholerow { .inspire-tree .selected > .wholerow, .inspire-tree .selected > .title-wrap:hover + .wholerow
{
background: none; background: none;
} }
@@ -191,60 +160,11 @@ mark {
.page-indicator { .page-indicator {
line-height: 1rem; line-height: 1rem;
padding: 0.5rem; padding: 0.5rem;
background: #f8f9fa; background: #212121;
margin-top: 1em;
} }
.btn-xs { .btn-xs {
padding: .1rem .3rem; padding: .1rem .3rem;
font-size: .875rem; font-size: .875rem;
border-radius: .2rem; border-radius: .2rem;
} }
.nav {
margin-top: 0.5rem;
}
@media (max-width: 800px) {
#treeTabs {
flex-basis: inherit;
flex-grow: inherit;
}
}
.list-group {
margin-top: 1em;
}
.list-group-item {
padding: .25rem 0.5rem;
}
.wrapper-sm {
min-width: 64px;
}
.media-expanded {
display: inherit;
}
.media-expanded .fit {
max-height: 250px;
}
@media (max-width: 600px) {
.media-expanded .fit {
max-height: none;
}
.tagline {
display: none;
}
}
.version {
color: #007bff;
margin-left: -18px;
margin-top: -14px;
font-size: 11px;
}

View File

@@ -75,84 +75,6 @@ function shouldPlayVideo(hit) {
return videoc !== "hevc" && videoc !== "mpeg2video" && videoc !== "wmv3"; return videoc !== "hevc" && videoc !== "mpeg2video" && videoc !== "wmv3";
} }
function makePlaceholder(w, h, small) {
let calc;
if (small) {
calc = w > h
? (64 / w / h) >= 100
? (64 * w / h)
: 64
: 64;
} else {
calc = w > h
? (175 / w / h) >= 272
? (175 * w / h)
: 175
: 175;
}
const el = document.createElement("div");
el.setAttribute("style", `height: ${calc}px`);
return el;
}
function makeTitle(hit) {
let title = document.createElement("div");
title.setAttribute("class", "file-title");
let extension = hit["_source"].hasOwnProperty("extension") && hit["_source"]["extension"] !== "" ? "." + hit["_source"]["extension"] : "";
applyNameToTitle(hit, title, extension);
title.setAttribute("title", hit["_source"]["path"] + "/" + hit["_source"]["name"] + extension);
return title;
}
function getTags(hit, mimeCategory) {
let tags = [];
switch (mimeCategory) {
case "video":
case "image":
if (hit["_source"].hasOwnProperty("videoc")) {
const formatTag = document.createElement("span");
formatTag.setAttribute("class", "badge badge-pill badge-video");
formatTag.appendChild(document.createTextNode(hit["_source"]["videoc"].replace(" ", "")));
tags.push(formatTag);
}
break;
case "audio": {
if (hit["_source"].hasOwnProperty("audioc")) {
let formatTag = document.createElement("span");
formatTag.setAttribute("class", "badge badge-pill badge-audio");
formatTag.appendChild(document.createTextNode(hit["_source"]["audioc"]));
tags.push(formatTag);
}
}
break;
}
// User tags
if (hit["_source"].hasOwnProperty("tag")) {
hit["_source"]["tag"].forEach(tag => {
const userTag = document.createElement("span");
userTag.setAttribute("class", "badge badge-pill badge-user");
const tokens = tag.split("#");
if (tokens.length > 1) {
const bg = "#" + tokens[1];
const fg = lum(tokens[1]) > 40 ? "#000" : "#fff";
userTag.setAttribute("style", `background-color: ${bg}; color: ${fg}`);
}
const name = tokens[0].split(".")[tokens[0].split(".").length - 1];
userTag.appendChild(document.createTextNode(name));
tags.push(userTag);
})
}
return tags
}
/** /**
* *
* @param hit * @param hit
@@ -160,30 +82,32 @@ function getTags(hit, mimeCategory) {
*/ */
function createDocCard(hit) { function createDocCard(hit) {
let docCard = document.createElement("div"); let docCard = document.createElement("div");
docCard.setAttribute("class", "card"); docCard.setAttribute("class", "card shadow-sm");
let docCardBody = document.createElement("div"); let docCardBody = document.createElement("div");
docCardBody.setAttribute("class", "card-body document"); docCardBody.setAttribute("class", "card-body document");
//Title
let title = makeTitle(hit);
let isSubDocument = false;
let link = document.createElement("a"); let link = document.createElement("a");
link.setAttribute("href", "f/" + hit["_id"]); link.setAttribute("href", "f/" + hit["_id"]);
link.setAttribute("target", "_blank"); link.setAttribute("target", "_blank");
link.appendChild(title);
if (hit["_source"].hasOwnProperty("parent")) { //Title
docCard.classList.add("sub-document"); let title = document.createElement("p");
isSubDocument = true; title.setAttribute("class", "file-title");
} let extension = hit["_source"].hasOwnProperty("extension") && hit["_source"]["extension"] !== "" ? "." + hit["_source"]["extension"] : "";
applyNameToTitle(hit, title, extension);
title.setAttribute("title", hit["_source"]["path"] + "/" + hit["_source"]["name"] + extension);
docCard.appendChild(title);
let tagContainer = document.createElement("div"); let tagContainer = document.createElement("div");
tagContainer.setAttribute("class", "card-text"); tagContainer.setAttribute("class", "card-text");
if (hit["_source"].hasOwnProperty("mime") && hit["_source"]["mime"] !== null) { if (hit["_source"].hasOwnProperty("mime") && hit["_source"]["mime"] !== null) {
let tags = [];
let thumbnail = null;
let thumbnailOverlay = null; let thumbnailOverlay = null;
let imgWrapper = document.createElement("div"); let imgWrapper = document.createElement("div");
imgWrapper.setAttribute("style", "position: relative"); imgWrapper.setAttribute("style", "position: relative");
@@ -191,7 +115,28 @@ function createDocCard(hit) {
let mimeCategory = hit["_source"]["mime"].split("/")[0]; let mimeCategory = hit["_source"]["mime"].split("/")[0];
//Thumbnail //Thumbnail
let thumbnail = makeThumbnail(mimeCategory, hit, imgWrapper, false); if (mimeCategory === "video" && shouldPlayVideo(hit)) {
thumbnail = document.createElement("video");
addVidSrc("f/" + hit["_id"], hit["_source"]["mime"], thumbnail);
thumbnail.setAttribute("class", "fit");
thumbnail.setAttribute("loop", "");
thumbnail.setAttribute("controls", "");
thumbnail.setAttribute("preload", "none");
thumbnail.setAttribute("poster", `t/${hit["_source"]["index"]}/${hit["_id"]}`);
thumbnail.addEventListener("dblclick", function () {
thumbnail.webkitRequestFullScreen();
});
} else if ((hit["_source"].hasOwnProperty("width") && hit["_source"]["width"] > 20 && hit["_source"]["height"] > 20)
|| hit["_source"]["mime"] === "application/pdf"
|| hit["_source"]["mime"] === "application/epub+zip"
|| hit["_source"]["mime"] === "application/x-cbz"
|| hit["_source"].hasOwnProperty("font_name")
) {
thumbnail = document.createElement("img");
thumbnail.setAttribute("class", "card-img-top fit");
thumbnail.setAttribute("src", `t/${hit["_source"]["index"]}/${hit["_id"]}`);
}
//Thumbnail overlay //Thumbnail overlay
switch (mimeCategory) { switch (mimeCategory) {
@@ -201,17 +146,15 @@ function createDocCard(hit) {
thumbnailOverlay.setAttribute("class", "card-img-overlay"); thumbnailOverlay.setAttribute("class", "card-img-overlay");
//Resolution //Resolution
if (hit["_source"].hasOwnProperty("width") && hit["_source"]["width"] > 32 && hit["_source"]["height"] > 32) { let resolutionBadge = document.createElement("span");
let resolutionBadge = document.createElement("span"); resolutionBadge.setAttribute("class", "badge badge-resolution");
resolutionBadge.setAttribute("class", "badge badge-resolution"); if (hit["_source"].hasOwnProperty("width")) {
if (hit["_source"].hasOwnProperty("width")) { resolutionBadge.appendChild(document.createTextNode(hit["_source"]["width"] + "x" + hit["_source"]["height"]));
resolutionBadge.appendChild(document.createTextNode(hit["_source"]["width"] + "x" + hit["_source"]["height"]));
}
thumbnailOverlay.appendChild(resolutionBadge);
} }
thumbnailOverlay.appendChild(resolutionBadge);
// Hover // Hover
if (thumbnail && hit["_source"]["videoc"] === "gif" && !isSubDocument) { if (thumbnail && hit["_source"]["videoc"] === "gif") {
gifOver(thumbnail, hit); gifOver(thumbnail, hit);
} }
break; break;
@@ -221,34 +164,51 @@ function createDocCard(hit) {
if (hit["_source"].hasOwnProperty("duration")) { if (hit["_source"].hasOwnProperty("duration")) {
thumbnailOverlay = document.createElement("div"); thumbnailOverlay = document.createElement("div");
thumbnailOverlay.setAttribute("class", "card-img-overlay"); thumbnailOverlay.setAttribute("class", "card-img-overlay");
const durationBadge = document.createElement("span"); let durationBadge = document.createElement("span");
durationBadge.setAttribute("class", "badge badge-resolution"); durationBadge.setAttribute("class", "badge badge-resolution");
durationBadge.appendChild(document.createTextNode(humanTime(hit["_source"]["duration"]))); durationBadge.appendChild(document.createTextNode(humanTime(hit["_source"]["duration"])));
thumbnailOverlay.appendChild(durationBadge); thumbnailOverlay.appendChild(durationBadge);
} }
} }
// Tags //Tags
let tags = getTags(hit, mimeCategory); switch (mimeCategory) {
for (let i = 0; i < tags.length; i++) { case "video":
tagContainer.appendChild(tags[i]); case "image":
if (hit["_source"].hasOwnProperty("videoc")) {
let formatTag = document.createElement("span");
formatTag.setAttribute("class", "badge badge-pill badge-video");
formatTag.appendChild(document.createTextNode(hit["_source"]["videoc"].replace(" ", "")));
tags.push(formatTag);
}
break;
case "audio": {
if (hit["_source"].hasOwnProperty("audioc")) {
let formatTag = document.createElement("span");
formatTag.setAttribute("class", "badge badge-pill badge-audio");
formatTag.appendChild(document.createTextNode(hit["_source"]["audioc"]));
tags.push(formatTag);
}
}
break;
} }
//Content //Content
let contentHl = getContentHighlight(hit); let contentHl = getContentHighlight(hit);
if (contentHl !== undefined) { if (contentHl !== undefined) {
const contentDiv = document.createElement("div"); let contentDiv = document.createElement("div");
contentDiv.setAttribute("class", "content-div"); contentDiv.setAttribute("class", "content-div");
contentDiv.insertAdjacentHTML('afterbegin', contentHl); contentDiv.insertAdjacentHTML('afterbegin', contentHl);
docCard.appendChild(contentDiv); docCard.appendChild(contentDiv);
} }
if (thumbnail !== null) { if (thumbnail !== null) {
imgWrapper.appendChild(thumbnail);
docCard.appendChild(imgWrapper); docCard.appendChild(imgWrapper);
} }
//Audio //Audio
if (mimeCategory === "audio" && hit["_source"].hasOwnProperty("audioc") && !isSubDocument) { if (mimeCategory === "audio" && hit["_source"].hasOwnProperty("audioc")) {
let audio = document.createElement("audio"); let audio = document.createElement("audio");
audio.setAttribute("preload", "none"); audio.setAttribute("preload", "none");
@@ -263,6 +223,10 @@ function createDocCard(hit) {
if (thumbnailOverlay !== null) { if (thumbnailOverlay !== null) {
imgWrapper.appendChild(thumbnailOverlay); imgWrapper.appendChild(thumbnailOverlay);
} }
for (let i = 0; i < tags.length; i++) {
tagContainer.appendChild(tags[i]);
}
} }
//Size tag //Size tag
@@ -274,146 +238,12 @@ function createDocCard(hit) {
docCardBody.appendChild(link); docCardBody.appendChild(link);
docCard.appendChild(docCardBody); docCard.appendChild(docCardBody);
link.appendChild(title);
docCardBody.appendChild(tagContainer); docCardBody.appendChild(tagContainer);
return docCard; return docCard;
} }
function makeThumbnail(mimeCategory, hit, imgWrapper, small) {
let thumbnail;
let isSubDocument = hit["_source"].hasOwnProperty("parent");
if (mimeCategory === "video" && shouldPlayVideo(hit) && !isSubDocument) {
thumbnail = document.createElement("video");
addVidSrc("f/" + hit["_id"], hit["_source"]["mime"], thumbnail);
const placeholder = makePlaceholder(hit["_source"]["width"], hit["_source"]["height"], small);
imgWrapper.appendChild(placeholder);
if (small) {
thumbnail.setAttribute("class", "fit-sm");
} else {
thumbnail.setAttribute("class", "fit");
}
if (small) {
thumbnail.style.cursor = "pointer";
thumbnail.title = "Enlarge";
thumbnail.addEventListener("click", function () {
imgWrapper.classList.remove("wrapper-sm", "mr-1");
imgWrapper.parentElement.classList.add("media-expanded");
thumbnail.setAttribute("class", "fit");
thumbnail.setAttribute("controls", "");
});
} else {
thumbnail.setAttribute("controls", "");
}
thumbnail.setAttribute("preload", "none");
thumbnail.setAttribute("poster", `t/${hit["_source"]["index"]}/${hit["_id"]}`);
thumbnail.addEventListener("dblclick", function () {
thumbnail.setAttribute("controls", "");
if (thumbnail.webkitRequestFullScreen) {
thumbnail.webkitRequestFullScreen();
} else {
thumbnail.requestFullscreen();
}
});
const poster = new Image();
poster.src = thumbnail.getAttribute('poster');
poster.addEventListener("load", function () {
placeholder.remove();
imgWrapper.appendChild(thumbnail);
});
} else if ((hit["_source"].hasOwnProperty("width") && hit["_source"]["width"] > 32 && hit["_source"]["height"] > 32)
|| hit["_source"]["mime"] === "application/pdf"
|| hit["_source"]["mime"] === "application/epub+zip"
|| hit["_source"]["mime"] === "application/x-cbz"
|| hit["_source"].hasOwnProperty("font_name")
) {
thumbnail = document.createElement("img");
if (small) {
thumbnail.setAttribute("class", "fit-sm");
} else {
thumbnail.setAttribute("class", "card-img-top fit");
}
thumbnail.setAttribute("src", `t/${hit["_source"]["index"]}/${hit["_id"]}`);
const placeholder = makePlaceholder(hit["_source"]["width"], hit["_source"]["height"], small);
imgWrapper.appendChild(placeholder);
thumbnail.addEventListener("error", () => {
imgWrapper.remove();
});
thumbnail.addEventListener("load", () => {
placeholder.remove();
imgWrapper.appendChild(thumbnail);
});
}
return thumbnail;
}
function createDocLine(hit) {
const mime = hit["_source"]["mime"];
let mimeCategory = mime ? mime.split("/")[0] : null;
let tags = getTags(hit, mimeCategory);
let imgWrapper = document.createElement("div");
imgWrapper.setAttribute("class", "align-self-start mr-1 wrapper-sm");
let media = document.createElement("div");
media.setAttribute("class", "media");
const line = document.createElement("div");
line.setAttribute("class", "list-group-item flex-column align-items-start");
const title = makeTitle(hit);
let link = document.createElement("a");
link.setAttribute("href", "f/" + hit["_id"]);
link.setAttribute("target", "_blank");
link.appendChild(title);
const titleDiv = document.createElement("div");
titleDiv.setAttribute("class", "file-title");
titleDiv.appendChild(link);
line.appendChild(media);
let thumbnail = makeThumbnail(mimeCategory, hit, imgWrapper, true);
if (thumbnail) {
media.appendChild(imgWrapper);
}
media.appendChild(titleDiv);
// Content
let contentHl = getContentHighlight(hit);
if (contentHl !== undefined) {
const contentDiv = document.createElement("div");
contentDiv.setAttribute("class", "content-div");
contentDiv.insertAdjacentHTML('afterbegin', contentHl);
titleDiv.appendChild(contentDiv);
}
let tagContainer = document.createElement("div");
tagContainer.setAttribute("class", "");
for (let i = 0; i < tags.length; i++) {
tagContainer.appendChild(tags[i]);
}
//Size tag
let sizeTag = document.createElement("small");
sizeTag.appendChild(document.createTextNode(humanFileSize(hit["_source"]["size"])));
sizeTag.setAttribute("class", "text-muted");
tagContainer.appendChild(sizeTag);
titleDiv.appendChild(tagContainer);
return line;
}
function makePreloader() { function makePreloader() {
const elem = document.createElement("div"); const elem = document.createElement("div");
elem.setAttribute("class", "progress"); elem.setAttribute("class", "progress");
@@ -427,7 +257,7 @@ function makePreloader() {
function makePageIndicator(searchResult) { function makePageIndicator(searchResult) {
let pageIndicator = document.createElement("div"); let pageIndicator = document.createElement("div");
pageIndicator.setAttribute("class", "page-indicator font-weight-light"); pageIndicator.setAttribute("class", "page-indicator shadow-sm font-weight-light");
const totalHits = searchResult["hits"]["total"].hasOwnProperty("value") const totalHits = searchResult["hits"]["total"].hasOwnProperty("value")
? searchResult["hits"]["total"]["value"] : searchResult["hits"]["total"]; ? searchResult["hits"]["total"]["value"] : searchResult["hits"]["total"];
pageIndicator.appendChild(document.createTextNode(docCount + " / " + totalHits)); pageIndicator.appendChild(document.createTextNode(docCount + " / " + totalHits));
@@ -438,53 +268,18 @@ function makePageIndicator(searchResult) {
function makeStatsCard(searchResult) { function makeStatsCard(searchResult) {
let statsCard = document.createElement("div"); let statsCard = document.createElement("div");
statsCard.setAttribute("class", "card stat"); statsCard.setAttribute("class", "card");
let statsCardBody = document.createElement("div"); let statsCardBody = document.createElement("div");
statsCardBody.setAttribute("class", "card-body"); statsCardBody.setAttribute("class", "card-body");
const resultMode = document.createElement("div"); let stat = document.createElement("p");
resultMode.setAttribute("class", "btn-group btn-group-toggle");
resultMode.setAttribute("data-toggle", "buttons");
resultMode.style.cssFloat = "right";
const listMode = document.createElement("label");
listMode.setAttribute("class", "btn btn-primary");
listMode.appendChild(document.createTextNode("List"));
const gridMode = document.createElement("label");
gridMode.setAttribute("class", "btn btn-primary");
gridMode.appendChild(document.createTextNode("Grid"));
resultMode.appendChild(gridMode);
resultMode.appendChild(listMode);
if (mode === "grid") {
gridMode.classList.add("active")
} else {
listMode.classList.add("active")
}
gridMode.addEventListener("click", () => {
mode = "grid";
localStorage.setItem("mode", mode);
searchDebounced();
});
listMode.addEventListener("click", () => {
mode = "list";
localStorage.setItem("mode", mode);
searchDebounced();
});
let stat = document.createElement("span");
const totalHits = searchResult["hits"]["total"].hasOwnProperty("value") const totalHits = searchResult["hits"]["total"].hasOwnProperty("value")
? searchResult["hits"]["total"]["value"] : searchResult["hits"]["total"]; ? searchResult["hits"]["total"]["value"] : searchResult["hits"]["total"];
stat.appendChild(document.createTextNode(totalHits + " results in " + searchResult["took"] + "ms")); stat.appendChild(document.createTextNode(totalHits + " results in " + searchResult["took"] + "ms"));
statsCardBody.appendChild(stat); statsCardBody.appendChild(stat);
statsCardBody.appendChild(resultMode);
if (totalHits !== 0) { if (totalHits !== 0) {
let sizeStat = document.createElement("div"); let sizeStat = document.createElement("span");
sizeStat.appendChild(document.createTextNode(humanFileSize(searchResult["aggregations"]["total_size"]["value"]))); sizeStat.appendChild(document.createTextNode(humanFileSize(searchResult["aggregations"]["total_size"]["value"])));
statsCardBody.appendChild(sizeStat); statsCardBody.appendChild(sizeStat);
} }
@@ -496,11 +291,7 @@ function makeStatsCard(searchResult) {
function makeResultContainer() { function makeResultContainer() {
let resultContainer = document.createElement("div"); let resultContainer = document.createElement("div");
resultContainer.setAttribute("class", "card-columns");
if (mode === "grid") {
resultContainer.setAttribute("class", "card-columns");
} else {
resultContainer.setAttribute("class", "list-group");
}
return resultContainer; return resultContainer;
} }

View File

@@ -1,8 +1,6 @@
const SIZE = 40; const SIZE = 40;
let mimeMap = []; let mimeMap = [];
let tagMap = []; let tree;
let mimeTree;
let tagTree;
let searchBar = document.getElementById("searchBar"); let searchBar = document.getElementById("searchBar");
let pathBar = document.getElementById("pathBar"); let pathBar = document.getElementById("pathBar");
@@ -12,13 +10,6 @@ let coolingDown = false;
let searchBusy = true; let searchBusy = true;
let selectedIndices = []; let selectedIndices = [];
let mode;
if (localStorage.getItem("mode") === null) {
mode = "grid";
} else {
mode = localStorage.getItem("mode")
}
jQuery["jsonPost"] = function (url, data) { jQuery["jsonPost"] = function (url, data) {
return jQuery.ajax({ return jQuery.ajax({
url: url, url: url,
@@ -32,7 +23,7 @@ jQuery["jsonPost"] = function (url, data) {
window.onload = () => { window.onload = () => {
$("#theme").on("click", () => { $("#theme").on("click", () => {
if (!document.cookie.includes("sist")) { if (document.cookie.length === 0) {
document.cookie = "sist=dark"; document.cookie = "sist=dark";
} else { } else {
document.cookie = "sist=; Max-Age=-99999999;"; document.cookie = "sist=; Max-Age=-99999999;";
@@ -41,7 +32,7 @@ window.onload = () => {
}) })
}; };
function toggleFuzzy() { function toggleSearchBar() {
searchDebounced(); searchDebounced();
} }
@@ -58,23 +49,6 @@ $.jsonPost("i").then(resp => {
}); });
}); });
function handleTreeClick (tree) {
return (event, node, handler) => {
event.preventTreeDefault();
if (node.id === "any") {
if (!node.itree.state.checked) {
tree.deselect();
}
} else {
tree.node("any").deselect();
}
handler();
searchDebounced();
}
}
$.jsonPost("es", { $.jsonPost("es", {
aggs: { aggs: {
mimeTypes: { mimeTypes: {
@@ -111,90 +85,38 @@ $.jsonPost("es", {
}); });
mimeMap.push({"text": "All", "id": "any"}); mimeMap.push({"text": "All", "id": "any"});
mimeTree = new InspireTree({ tree = new InspireTree({
selection: { selection: {
mode: 'checkbox' mode: 'checkbox'
}, },
data: mimeMap data: mimeMap
}); });
new InspireTreeDOM(mimeTree, { new InspireTreeDOM(tree, {
target: '#mimeTree' target: '.tree'
}); });
mimeTree.on("node.click", handleTreeClick(mimeTree)); tree.on("node.click", function (event, node, handler) {
mimeTree.select(); event.preventTreeDefault();
mimeTree.node("any").deselect();
});
function leafTag(tag) { if (node.id === "any") {
const tokens = tag.split("."); if (!node.itree.state.checked) {
return tokens[tokens.length-1] tree.deselect();
}
// Tags tree
$.jsonPost("es", {
aggs: {
tags: {
terms: {
field: "tag",
size: 10000
} }
} else {
tree.node("any").deselect();
} }
},
size: 0,
}).then(resp => {
resp["aggregations"]["tags"]["buckets"]
.sort((a, b) => a["key"].localeCompare(b["key"]))
.forEach(bucket => {
addTag(tagMap, bucket["key"], bucket["key"], bucket["doc_count"])
});
tagMap.push({"text": "All", "id": "any"}); handler();
tagTree = new InspireTree({ searchDebounced();
selection: {
mode: 'checkbox'
},
data: tagMap
}); });
new InspireTreeDOM(tagTree, { tree.select();
target: '#tagTree' tree.node("any").deselect();
});
tagTree.on("node.click", handleTreeClick(tagTree));
tagTree.node("any").select();
searchBusy = false; searchBusy = false;
}); });
function addTag(map, tag, id, count) {
let tags = tag.split("#")[0].split(".");
let child = {
id: id,
text: tags.length !== 1 ? tags[0] : `${tags[0]} (${count})`,
children: []
};
let found = false;
map.forEach(node => {
if (node.text === child.text) {
found = true;
if (tags.length !== 1) {
addTag(node.children, tags.slice(1).join("."), id, count);
}
}
});
if (!found) {
if (tags.length !== 1) {
addTag(child.children, tags.slice(1).join("."), id, count);
map.push(child);
} else {
map.push(child);
}
}
}
new autoComplete({ new autoComplete({
selector: '#pathBar', selector: '#pathBar',
minChars: 1, minChars: 1,
delay: 400, delay: 75,
renderItem: function (item) { renderItem: function (item) {
return '<div class="autocomplete-suggestion" data-val="' + item + '">' + item + '</div>'; return '<div class="autocomplete-suggestion" data-val="' + item + '">' + item + '</div>';
}, },
@@ -218,12 +140,7 @@ new autoComplete({
function insertHits(resultContainer, hits) { function insertHits(resultContainer, hits) {
for (let i = 0; i < hits.length; i++) { for (let i = 0; i < hits.length; i++) {
resultContainer.appendChild(createDocCard(hits[i]));
if (mode === "grid") {
resultContainer.appendChild(createDocCard(hits[i]));
} else {
resultContainer.appendChild(createDocLine(hits[i]));
}
docCount++; docCount++;
} }
} }
@@ -264,8 +181,8 @@ function doScroll() {
}) })
} }
function getSelectedNodes(tree) { function getSelectedMimeTypes() {
let selectedNodes = []; let mimeTypes = [];
let selected = tree.selected(); let selected = tree.selected();
@@ -277,11 +194,11 @@ function getSelectedNodes(tree) {
//Only get children //Only get children
if (selected[i].text.indexOf("(") !== -1) { if (selected[i].text.indexOf("(") !== -1) {
selectedNodes.push(selected[i].id); mimeTypes.push(selected[i].id);
} }
} }
return selectedNodes return mimeTypes
} }
function search() { function search() {
@@ -301,37 +218,21 @@ function search() {
let query = searchBar.value; let query = searchBar.value;
let empty = query === ""; let empty = query === "";
let condition = empty ? "should" : "must"; let condition = $("#barToggle").prop("checked") && !empty ? "must" : "should";
let filters = [ let filters = [
{range: {size: {gte: size_min, lte: size_max}}}, {range: {size: {gte: size_min, lte: size_max}}},
{terms: {index: selectedIndices}} {terms: {index: selectedIndices}}
]; ];
let fields = [
"name^8",
"content^3",
"album^8", "artist^8", "title^8", "genre^2", "album_artist^8",
"font_name^6"
];
if ($("#fuzzyToggle").prop("checked")) {
fields.push("content.nGram");
fields.push("name.nGram^3");
}
let path = pathBar.value.replace(/\/$/, "").toLowerCase(); //remove trailing slashes let path = pathBar.value.replace(/\/$/, "").toLowerCase(); //remove trailing slashes
if (path !== "") { if (path !== "") {
filters.push([{term: {path: path}}]) filters.push([{term: {path: path}}])
} }
let mimeTypes = getSelectedNodes(mimeTree); let mimeTypes = getSelectedMimeTypes();
if (!mimeTypes.includes("any")) { if (!mimeTypes.includes("any")) {
filters.push([{terms: {"mime": mimeTypes}}]); filters.push([{terms: {"mime": mimeTypes}}]);
} }
let tags = getSelectedNodes(tagTree);
if (!tags.includes("any")) {
filters.push([{terms: {"tag": tags}}]);
}
$.jsonPost("es?scroll=1", { $.jsonPost("es?scroll=1", {
"_source": { "_source": {
excludes: ["content"] excludes: ["content"]
@@ -342,7 +243,12 @@ function search() {
multi_match: { multi_match: {
query: query, query: query,
type: "most_fields", type: "most_fields",
fields: fields, fields: [
"name^8", "name.nGram^3", "content^3",
"content.nGram",
"album^8", "artist^8", "title^8", "genre^2", "album_artist^8",
"font_name^6"
],
operator: "and" operator: "and"
} }
}, },
@@ -357,10 +263,9 @@ function search() {
post_tags: ["</mark>"], post_tags: ["</mark>"],
fields: { fields: {
content: {}, content: {},
// "content.nGram": {},
name: {}, name: {},
"name.nGram": {}, "name.nGram": {},
font_name: {}, // font_name: {},
} }
}, },
aggs: { aggs: {
@@ -374,6 +279,14 @@ function search() {
//Search stats //Search stats
searchResults.appendChild(makeStatsCard(searchResult)); searchResults.appendChild(makeStatsCard(searchResult));
//Autocomplete
if (searchResult.hasOwnProperty("suggest") && searchResult["suggest"].hasOwnProperty("path")) {
pathAutoComplete = [];
for (let i = 0; i < searchResult["suggest"]["path"][0]["options"].length; i++) {
pathAutoComplete.push(searchResult["suggest"]["path"][0]["options"][i].text)
}
}
//Setup page //Setup page
let resultContainer = makeResultContainer(); let resultContainer = makeResultContainer();
searchResults.appendChild(resultContainer); searchResults.appendChild(resultContainer);
@@ -385,6 +298,7 @@ function search() {
}); });
} }
let pathAutoComplete = [];
let size_min = 0; let size_min = 0;
let size_max = 10000000000000; let size_max = 10000000000000;
@@ -392,8 +306,8 @@ let searchDebounced = _.debounce(function () {
coolingDown = false; coolingDown = false;
search() search()
}, 500); }, 500);
searchBar.addEventListener("keyup", searchDebounced); searchBar.addEventListener("keyup", searchDebounced);
document.getElementById("pathBar").addEventListener("keyup", searchDebounced);
//Size slider //Size slider
$("#sizeSlider").ionRangeSlider({ $("#sizeSlider").ionRangeSlider({
@@ -444,18 +358,15 @@ updateIndices();
//Suggest //Suggest
function getPathChoices() { function getPathChoices() {
return new Promise(getPaths => { return new Promise(getPaths => {
$.jsonPost("es", {
suggest: { let xhttp = new XMLHttpRequest();
path: { xhttp.onreadystatechange = function () {
prefix: pathBar.value, if (this.readyState === 4 && this.status === 200) {
completion: { getPaths(JSON.parse(xhttp.responseText))
field: "suggest-path",
skip_duplicates: true,
size: 10000
}
}
} }
}).then(resp => getPaths(resp["suggest"]["path"][0]["options"].map(opt => opt["_source"]["path"]))); };
}) xhttp.open("GET", "suggest?prefix=" + pathBar.value, true);
xhttp.send();
});
} }

View File

@@ -3,7 +3,7 @@
*/ */
function humanFileSize(bytes) { function humanFileSize(bytes) {
if (bytes === 0) { if (bytes === 0) {
return "0 B" return "? B"
} }
let thresh = 1000; let thresh = 1000;
@@ -43,9 +43,9 @@ function humanTime(sec_num) {
function debounce(func, wait) { function debounce(func, wait) {
let timeout; let timeout;
return function () { return function() {
let context = this, args = arguments; let context = this, args = arguments;
let later = function () { let later = function() {
timeout = null; timeout = null;
func.apply(context, args); func.apply(context, args);
}; };
@@ -54,13 +54,3 @@ function debounce(func, wait) {
func.apply(context, args); func.apply(context, args);
}; };
} }
function lum(c) {
c = c.substring(1);
let rgb = parseInt(c, 16);
let r = (rgb >> 16) & 0xff;
let g = (rgb >> 8) & 0xff;
let b = (rgb >> 0) & 0xff;
return 0.2126 * r + 0.7152 * g + 0.0722 * b;
}

View File

@@ -11,7 +11,6 @@
<nav class="navbar navbar-expand-lg"> <nav class="navbar navbar-expand-lg">
<a class="navbar-brand" href="/">sist2</a> <a class="navbar-brand" href="/">sist2</a>
<span class="badge badge-pill version">v1.1.10</span>
<span class="tagline">Lightning-fast file system indexer and search tool </span> <span class="tagline">Lightning-fast file system indexer and search tool </span>
<a style="margin-left: auto" id="theme" class="btn" title="Toggle theme" href="/">Theme</a> <a style="margin-left: auto" id="theme" class="btn" title="Toggle theme" href="/">Theme</a>
</nav> </nav>
@@ -25,9 +24,9 @@
<div class="input-group"> <div class="input-group">
<div class="input-group-prepend"> <div class="input-group-prepend">
<div class="input-group-text"> <div class="input-group-text">
<span title="Toggle fuzzy searching" onclick="document.getElementById('fuzzyToggle').click()">Fuzzy&nbsp</span> <span onclick="document.getElementById('barToggle').click()">Must match&nbsp</span>
<input title="Toggle fuzzy searching" type="checkbox" id="fuzzyToggle" <input title="Toggle between 'Should' and 'Must' match mode" type="checkbox" id="barToggle"
onclick="toggleFuzzy()" checked> onclick="toggleSearchBar()" checked>
</div> </div>
</div> </div>
<input id="searchBar" type="search" class="form-control" placeholder="Search"> <input id="searchBar" type="search" class="form-control" placeholder="Search">
@@ -42,25 +41,11 @@
<select class="custom-select" id="indices" multiple size="6"></select> <select class="custom-select" id="indices" multiple size="6"></select>
</div> </div>
<div class="col" id="treeTabs"> <div class="col">
<ul class="nav nav-tabs" role="tablist"> <label>Mime types</label>
<li class="nav-item">
<a class="nav-link active" data-toggle="tab" href="#mime" role="tab" aria-controls="home" aria-selected="true">Mime Types</a>
</li>
<li class="nav-item">
<a class="nav-link" data-toggle="tab" href="#tag" role="tab" aria-controls="profile" aria-selected="false" title="User-defined tags">Tags</a>
</li>
</ul>
<div class="tab-content" id="myTabContent">
<div class="tab-pane fade show active" id="mime" role="tabpanel" aria-labelledby="home-tab">
<div id="mimeTree" class="tree"></div>
</div>
<div class="tab-pane fade" id="tag" role="tabpanel" aria-labelledby="profile-tab">
<div id="tagTree" class="tree"></div>
</div>
</div>
</div>
<div class="tree"></div>
</div>
</div> </div>
</div> </div>
</div> </div>