Compare commits

...

21 Commits

Author SHA1 Message Date
be23201210 Archive file support 2019-12-13 10:53:51 -05:00
9778acda77 uifix 2019-12-12 19:19:53 -05:00
8d187926d9 Bugfix with incremental comparison 2019-12-12 15:41:31 -05:00
88c37e3523 Update README.md 2019-12-04 20:56:52 -05:00
d816dae8b3 UI fix, disable thumbnail option, batch index size option 2019-12-01 10:57:29 -05:00
4346c3e063 Also use static libraries in sist2 build 2019-11-30 20:02:26 -05:00
1a1032a8a7 Cleaner shutdown 2019-11-30 19:59:11 -05:00
4ab2ba1a02 #8 Skip PDF scan when content-size is 0 2019-11-21 16:06:31 -05:00
d089601dc5 Add sfv & m3u 2019-11-20 12:31:31 -05:00
11df6cc88f Add nfo to ext list 2019-11-20 11:41:50 -05:00
373ac01e4e Fix for #3 and maximum scan depth 2019-11-19 11:23:30 -05:00
893ff145c5 List mode tweak 2019-11-17 16:28:47 -05:00
6111ded77f Merge pull request #6 from simon987/wip
List mode #5
2019-11-17 16:15:36 -05:00
34cc26b2fd List mode #5 wip 2019-11-17 15:03:24 -05:00
204034d859 Add basic auth. Fixes #4 2019-11-17 10:00:17 -05:00
16ccc6c0d3 Show error message on elasticsearch connection fail 2019-11-17 09:55:16 -05:00
94c617fdc3 Bug fix 2019-11-12 22:11:50 -05:00
ebfd7e03ce User scripts, bug fixes, docker image 2019-11-12 20:58:43 -05:00
6931d320a2 bugfix with invalid/corrupted index path 2019-11-11 20:49:38 -05:00
fc22e52eae Image placeholder 2019-11-09 23:26:49 -05:00
ba81748a74 Update build 2019-11-09 17:15:20 -05:00
55 changed files with 2060 additions and 667 deletions

6
.gitmodules vendored
View File

@@ -25,3 +25,9 @@
[submodule "lib/harfbuzz"]
path = lib/harfbuzz
url = https://github.com/harfbuzz/harfbuzz
[submodule "lib/libmagic"]
path = lib/libmagic
url = https://github.com/threatstack/libmagic
[submodule "lib/bzip2-1.0.6"]
path = lib/bzip2-1.0.6
url = https://github.com/enthought/bzip2-1.0.6

View File

@@ -23,6 +23,7 @@ if (WITH_SIST2)
src/parsing/text.h src/parsing/text.c
src/index/web.c src/index/web.h
src/web/serve.c src/web/serve.h
src/web/auth_basic.h src/web/auth_basic.c
src/index/elastic.c src/index/elastic.h
src/util.c src/util.h
src/ctx.h src/types.h src/parsing/font.c src/parsing/font.h
@@ -40,7 +41,7 @@ if (WITH_SIST2)
# utf8.h
utf8.h/utf8.h
)
src/parsing/arc.c src/parsing/arc.h)
endif ()
if (WITH_SIST2_SCAN)
@@ -73,7 +74,7 @@ if (WITH_SIST2_SCAN)
# utf8.h
utf8.h/utf8.h
)
src/parsing/arc.c src/parsing/arc.h)
endif ()
find_package(PkgConfig REQUIRED)
@@ -156,9 +157,10 @@ if (WITH_SIST2)
m
bz2
magic
harfbuzz
openjp2
${PROJECT_SOURCE_DIR}/lib/libharfbuzz.a
${PROJECT_SOURCE_DIR}/lib/libopenjp2.a
freetype
archive
)
endif ()
@@ -227,6 +229,11 @@ if (WITH_SIST2_SCAN)
${PROJECT_SOURCE_DIR}/lib/libharfbuzz.a
${PROJECT_SOURCE_DIR}/lib/libopenjp2.a
freetype
${PROJECT_SOURCE_DIR}/lib/libarchive.a
${PROJECT_SOURCE_DIR}/lib/liblz4.a
${PROJECT_SOURCE_DIR}/lib/liblzma.a
${PROJECT_SOURCE_DIR}/lib/libzstd.a
)
endif ()

9
Docker/Dockerfile Normal file
View File

@@ -0,0 +1,9 @@
FROM ubuntu:19.10
MAINTAINER simon987 <me@simon987.net>
RUN apt update
RUN apt install -y libglib2.0-0 libcurl4 libmagic1 libharfbuzz-bin libopenjp2-7
ADD sist2 /root/sist2
ENTRYPOINT ["/root/sist2"]

9
Docker/build.sh Executable file
View File

@@ -0,0 +1,9 @@
rm ./sist2
cp ../sist2 .
version=$(./sist2 --version)
echo "Version ${version}"
docker build . -t simon987/sist2:${version} -t simon987/sist2:latest
docker push simon987/sist2:${version}
docker push simon987/sist2:latest

View File

@@ -14,24 +14,27 @@ sist2 (Simple incremental search tool)
* Extracts text from common file types\*
* Generates thumbnails\*
* Incremental scanning
* Automatic tagging from file attributes via [user scripts](scripting/README.md)
* Recursive scan inside archive files \*\*
\* See [format support](#format-support)
\* See [format support](#format-support)
\** See [Archive files](#archive-files)
## Getting Started
1. Have an [Elasticsearch](https://www.elastic.co/downloads/elasticsearch) instance running
1. Download the [latest sist2 release](https://github.com/simon987/sist2/releases)
1.
1. Download the [latest sist2 release](https://github.com/simon987/sist2/releases) *
1. *(or)* `docker pull simon987/sist2:latest`
*Windows users*: `sist2` runs under [WSL](https://en.wikipedia.org/wiki/Windows_Subsystem_for_Linux)
*Mac users*: See [#1](https://github.com/simon987/sist2/issues/1)
\* *Windows users*: **sist2** runs under [WSL](https://en.wikipedia.org/wiki/Windows_Subsystem_for_Linux)
\* *Mac users*: See [#1](https://github.com/simon987/sist2/issues/1)
## Example usage
![demo](demo.gif)
See help page `sist2 --help` for more details.
**Scan a directory**
@@ -52,6 +55,32 @@ sist2 index --print ./my_idx > raw_documents.ndjson
sist2 web --bind 0.0.0.0 --port 4321 ./my_idx1 ./my_idx2 ./my_idx3
```
### Use sist2 with docker
**scan**
```bash
docker run -it \
-v /path/to/files/:/files \
-v $PWD/out/:/out \
simon987/sist2 scan -t 4 /files -o /out/my_idx1
```
**index**
```bash
docker run -it --network host\
-v $PWD/out/:/out \
simon987/sist2 index /out/my_idx1
```
**web**
```bash
docker run --rm --network host -d --name sist2\
-v $PWD/out/my_idx:/idx \
-v $PWD/my/files:/files
simon987/sist2 web --bind 0.0.0.0 /idx
docker stop sist2
```
## Format support
File type | Library | Content | Thumbnail | Metadata
@@ -62,9 +91,25 @@ pdf,xps,cbz,fb2,epub | MuPDF | yes | yes, `png` | title |
`image/*` | ffmpeg | - | yes, `jpeg` | `EXIF:Artist`, `EXIF:ImageDescription` |
ttf,ttc,cff,woff,fnt,otf | Freetype2 | - | yes, `bmp` | Name & style |
`text/plain` | *(none)* | yes | no | - |
docx, xlsx, pptx | | *planned* | no | *planned* |
tar, zip, rar, 7z, ar ... | Libarchive | yes\* | - | no |
docx, xlsx, pptx | | yes | no | *planned* |
\* *See [Archive files](#archive-files)*
### Archive files
**sist2** will scan files stored into archive files (zip, tar, 7z...) as if
they were directly in the file system. Recursive (archives inside archives)
scan is also supported.
**Limitations**:
* Parsing media files with formats that require
*seek* (e.g. `.gif`, `.mp4` w/ fragmented metadata etc.) is not supported.
* Archive files are scanned sequentially, by a single thread. On systems where
**sist2** is not I/O bound, scans might be faster when larger archives are split
into smaller parts.
To check if a media file can be parsed without *seek*, execute `cat file.mp4 | ffprobe -`
## Build from source
@@ -93,4 +138,4 @@ binaries.
./scripts/get_static_libs.sh
cmake .
make
```
```

Submodule lib/ffmpeg deleted from 0481a1f6e5

Submodule lib/harfbuzz deleted from 7cde68f10c

Submodule lib/mupdf deleted from 91782a4348

Submodule lib/onion deleted from d8d4cc9290

Submodule lib/openjpeg deleted from 5875a6b446

View File

@@ -252,7 +252,7 @@ text/html, acgi|htm|html|htmls|htx|shtml
text/javascript, js
text/mcf, mcf
text/pascal, pas
text/plain, com|cmd|conf|def|g|idc|list|lst|mar|sdml|text|txt|md|groovy|license|properties|desktop|ini|rst|cmake|ipynb|readme|less|lo|go|yml|d|cs|hpp|srt
text/plain, com|cmd|conf|def|g|idc|list|lst|mar|sdml|text|txt|md|groovy|license|properties|desktop|ini|rst|cmake|ipynb|readme|less|lo|go|yml|d|cs|hpp|srt|nfo|sfv|m3u
text/richtext, rt|rtf|rtx
text/rtf,
text/scriplet, wsc
@@ -410,4 +410,9 @@ text/PGP,
audio/x-hx-aac-adts,
application/x-chrome-extension,
image/heic, heic
image/x-gem,
image/x-gem,
application/x-lzma, lzma
application/warc, warc
application/x-lz4, lz4
application/x-lzip, lz
application/x-lzop, lzo
1 application/arj arj
252 text/javascript js
253 text/mcf mcf
254 text/pascal pas
255 text/plain com|cmd|conf|def|g|idc|list|lst|mar|sdml|text|txt|md|groovy|license|properties|desktop|ini|rst|cmake|ipynb|readme|less|lo|go|yml|d|cs|hpp|srt com|cmd|conf|def|g|idc|list|lst|mar|sdml|text|txt|md|groovy|license|properties|desktop|ini|rst|cmake|ipynb|readme|less|lo|go|yml|d|cs|hpp|srt|nfo|sfv|m3u
256 text/richtext rt|rtf|rtx
257 text/rtf
258 text/scriplet wsc
410 audio/x-hx-aac-adts
411 application/x-chrome-extension
412 image/heic heic
413 image/x-gem
414 application/x-lzma lzma
415 application/warc warc
416 application/x-lz4 lz4
417 application/x-lzip lz
418 application/x-lzop lzo

View File

@@ -80,6 +80,9 @@
"analyzer": "my_nGram"
}
}
},
"tag": {
"type": "keyword"
}
}
}

117
scripting/README.md Normal file
View File

@@ -0,0 +1,117 @@
## User scripts
*This document is under construction, more in-depth guide coming soon*
During the `index` step, you can use the `--script-file <script>` option to
modify documents or add user tags. This option is mainly used to
implement automatic tagging based on file attributes.
The scripting language used
([Painless Scripting Language](https://www.elastic.co/guide/en/elasticsearch/painless/7.4/index.html))
is very similar to Java, but you should be able to create user scripts
without programming experience at all if you're somewhat familiar with
regex.
This is the base structure of the documents we're working with:
```json
{
"_id": "e171405c-fdb5-4feb-bb32-82637bc32084",
"_index": "sist2",
"_type": "_doc",
"_source": {
"index": "206b3050-e821-421a-891d-12fcf6c2db0d",
"mime": "application/json",
"size": 1799,
"mtime": 1545443685,
"extension": "md",
"name": "README",
"path": "sist2/scripting",
"content": "..."
}
}
```
**Example script**
This script checks if the `genre` attribute exists, if it does
it adds the `genre.<genre>` tag.
```Java
ArrayList tags = ctx._source.tag = new ArrayList();
if (ctx._source?.genre != null) {
tags.add("genre." + ctx._source.genre.toLowerCase())
}
```
You can use `.` to create a hierarchical tag tree:
![scripting/genre_example](genre_example.png)
To use regular expressions, you need to add this line in `/etc/elasticsearch/elasticsearch.yml`
```yaml
script.painless.regex.enabled: true
```
Or, if you're using docker add `-e "script.painless.regex.enabled=true"`
### Examples
If `(20XX)` is in the file name, add the `year.<year>` tag:
```Java
ArrayList tags = ctx._source.tag = new ArrayList();
Matcher m = /[\(\.+](20[0-9]{2})[\)\.+]/.matcher(ctx._source.name);
if (m.find()) {
tags.add("year." + m.group(1))
}
```
Use default *Calibre* folder structure to infer author.
```Java
ArrayList tags = ctx._source.tag = new ArrayList();
// We expect the book path to look like this:
// /path/to/Calibre Library/Author/Title/Title - Author.pdf
if (ctx._source.name.contains("-") && ctx._source.extension == "pdf") {
String[] names = ctx._source.name.splitOnToken('-');
tags.add("author." + names[1].strip());
}
```
If the file matches a specific pattern `AAAA-000 fName1 lName1, <fName2 lName2>...`, add the `actress.<actress>` and
`studio.<studio>` tag:
```Java
ArrayList tags = ctx._source.tag = new ArrayList();
Matcher m = /([A-Z]{4})-[0-9]{3} (.*)/.matcher(ctx._source.name);
if (m.find()) {
tags.add("studio." + m.group(1));
// Take the matched group (.*), and add a tag for
// each name, separated by comma
for (String name : m.group(2).splitOnToken(',')) {
tags.add("actress." + name);
}
}
```
Set the name of the last folder (`/path/to/<studio>/file.mp4`) to `studio.<studio>` tag
```Java
ArrayList tags = ctx._source.tag = new ArrayList();
if (ctx._source.path != "") {
String[] names = ctx._source.path.splitOnToken('/');
tags.add("studio." + names[names.length-1]);
}
```
Set the name of the last folder (`/path/to/<studio>/file.mp4`) to `studio.<studio>` tag
```Java
ArrayList tags = ctx._source.tag = new ArrayList();
if (ctx._source.path != "") {
String[] names = ctx._source.path.splitOnToken('/');
tags.add("studio." + names[names.length-1]);
}
```

BIN
scripting/genre_example.png Normal file

Binary file not shown.

After

Width:  |  Height:  |  Size: 26 KiB

View File

@@ -54,14 +54,12 @@ cd ../..
mv onion/build/src/onion/libonion_static.a .
#bzip2
git clone https://github.com/enthought/bzip2-1.0.6
cd bzip2-1.0.6
make -j 4
cd ..
mv bzip2-1.0.6/libbz2.a .
# magic
git clone https://github.com/threatstack/libmagic
cd libmagic
./autogen.sh
./configure --enable-static --disable-shared
@@ -69,5 +67,39 @@ make -j 4
cd ..
mv libmagic/src/.libs/libmagic.a .
# libarchive
git clone https://github.com/libarchive/libarchive
cd libarchive/build
./autogen.sh
cd ..
./configure --without-nettle --without-expat --without-xml2 --without-openssl
make -j 4
cd ..
mv libarchive/.libs/libarchive.a .
# lz4
git clone https://github.com/lz4/lz4
cd lz4
make -j 4
cd ..
mv lz4/lib/liblz4.a .
# lzma
wget https://newcontinuum.dl.sourceforge.net/project/lzmautils/xz-5.2.3.tar.gz
tar -xzf xz-5.2.3.tar.gz
rm xz-5.2.3.tar.gz
cd xz-5.2.3
./autogen.sh
./configure
make -j 4
cd ..
mv xz-5.2.3/src/liblzma/.libs/liblzma.a .
# zstd
git clone https://github.com/facebook/zstd
cd zstd
make -j 4
cd ..
mv zstd/lib/libzstd.a .
cd ..

View File

@@ -42,14 +42,12 @@ mv ffmpeg/libswresample/libswresample.a .
mv ffmpeg/libswscale/libswscale.a .
#bzip2
git clone https://github.com/enthought/bzip2-1.0.6
cd bzip2-1.0.6
make -j 4
cd ..
mv bzip2-1.0.6/libbz2.a .
# magic
git clone https://github.com/threatstack/libmagic
cd libmagic
./autogen.sh
./configure --enable-static --disable-shared

View File

@@ -34,6 +34,28 @@ font = (
"font/woff2"
)
# Archive "formats"
archive = (
"application/x-tar",
"application/zip",
"application/x-rar",
"application/x-arc",
"application/x-warc",
"application/x-7z-compressed",
)
# Archive "filters"
arc_filter = (
"application/gzip",
"application/x-bzip2",
"application/x-xz",
"application/x-zstd",
"application/x-lzma",
"application/x-lz4",
"application/x-lzip",
"application/x-lzop",
)
cnt = 1
@@ -48,6 +70,10 @@ def mime_id(mime):
mime_id += " | 0x40000000"
elif mime in font:
mime_id += " | 0x20000000"
elif mime in archive:
mime_id += " | 0x10000000"
elif mime in arc_filter:
mime_id += " | 0x08000000"
elif mime == "application/x-empty":
return "1"
return mime_id

111
src/cli.c
View File

@@ -1,12 +1,13 @@
#include "cli.h"
#define DEFAULT_OUTPUT "index.sist2/"
#define DEFAULT_CONTENT_SIZE 4096
#define DEFAULT_QUALITY 15
#define DEFAULT_SIZE 200
#define DEFAULT_CONTENT_SIZE 32768
#define DEFAULT_QUALITY 5
#define DEFAULT_SIZE 500
#define DEFAULT_REWRITE_URL ""
#define DEFAULT_ES_URL "http://localhost:9200"
#define DEFAULT_BATCH_SIZE 100
#define DEFAULT_BIND_ADDR "localhost"
#define DEFAULT_PORT "4090"
@@ -14,9 +15,39 @@
scan_args_t *scan_args_create() {
scan_args_t *args = calloc(sizeof(scan_args_t), 1);
args->depth = -1;
return args;
}
void scan_args_destroy(scan_args_t *args) {
if (args->name != NULL) {
free(args->name);
}
if (args->path != NULL) {
free(args->path);
}
if (args->output != NULL) {
free(args->output);
}
free(args);
}
#ifndef SIST_SCAN_ONLY
void index_args_destroy(index_args_t *args) {
//todo
free(args);
}
void web_args_destroy(web_args_t *args) {
//todo
free(args);
}
#endif
int scan_args_validate(scan_args_t *args, int argc, const char **argv) {
if (argc < 2) {
fprintf(stderr, "Required positional argument: PATH.\n");
@@ -25,7 +56,7 @@ int scan_args_validate(scan_args_t *args, int argc, const char **argv) {
char *abs_path = abspath(argv[1]);
if (abs_path == NULL) {
fprintf(stderr, "File not found: %s", argv[1]);
fprintf(stderr, "File not found: %s\n", argv[1]);
return 1;
} else {
args->path = abs_path;
@@ -34,7 +65,7 @@ int scan_args_validate(scan_args_t *args, int argc, const char **argv) {
if (args->incremental != NULL) {
abs_path = abspath(args->incremental);
if (abs_path == NULL) {
fprintf(stderr, "File not found: %s", args->incremental);
fprintf(stderr, "File not found: %s\n", args->incremental);
return 1;
}
}
@@ -48,16 +79,13 @@ int scan_args_validate(scan_args_t *args, int argc, const char **argv) {
if (args->size == 0) {
args->size = DEFAULT_SIZE;
} else if (args->size <= 0) {
fprintf(stderr, "Invalid size: %d\n", args->size);
} else if (args->size > 0 && args->size < 32) {
printf("Invalid size: %d\n", args->content_size);
return 1;
}
if (args->content_size == 0) {
args->content_size = DEFAULT_CONTENT_SIZE;
} else if (args->content_size <= 0) {
fprintf(stderr, "Invalid content-size: %d\n", args->content_size);
return 1;
}
if (args->threads == 0) {
@@ -80,6 +108,12 @@ int scan_args_validate(scan_args_t *args, int argc, const char **argv) {
return 1;
}
if (args->depth < 0) {
args->depth = G_MAXINT32;
} else {
args->depth += 1;
}
if (args->name == NULL) {
args->name = g_path_get_basename(args->output);
}
@@ -87,10 +121,24 @@ int scan_args_validate(scan_args_t *args, int argc, const char **argv) {
if (args->rewrite_url == NULL) {
args->rewrite_url = DEFAULT_REWRITE_URL;
}
if (args->archive == NULL || strcmp(args->archive, "recurse") == 0) {
args->archive_mode = ARC_MODE_RECURSE;
} else if (strcmp(args->archive, "list") == 0) {
args->archive_mode = ARC_MODE_LIST;
} else if (strcmp(args->archive, "shallow") == 0) {
args->archive_mode = ARC_MODE_SHALLOW;
} else if (strcmp(args->archive, "skip") == 0) {
args->archive_mode = ARC_MODE_SKIP;
} else {
fprintf(stderr, "Archive mode must be one of (skip, list, shallow, recurse), got '%s'", args->archive);
return 1;
}
return 0;
}
#ifndef SIST_SCAN_ONLY
int index_args_validate(index_args_t *args, int argc, const char **argv) {
if (argc < 2) {
@@ -100,15 +148,47 @@ int index_args_validate(index_args_t *args, int argc, const char **argv) {
char *index_path = abspath(argv[1]);
if (index_path == NULL) {
fprintf(stderr, "File not found: %s", argv[1]);
fprintf(stderr, "File not found: %s\n", argv[1]);
return 1;
} else {
args->index_path = argv[1];
free(index_path);
}
if (args->es_url == NULL) {
args->es_url = DEFAULT_ES_URL;
}
if (args->script_path != NULL) {
struct stat info;
int res = stat(args->script_path, &info);
if (res == -1) {
fprintf(stderr, "Error opening script file '%s': %s\n", args->script_path, strerror(errno));
return 1;
}
int fd = open(args->script_path, O_RDONLY);
if (fd == -1) {
fprintf(stderr, "Error opening script file '%s': %s\n", args->script_path, strerror(errno));
return 1;
}
args->script = malloc(info.st_size + 1);
res = read(fd, args->script, info.st_size);
if (res == -1) {
fprintf(stderr, "Error reading script file '%s': %s\n", args->script_path, strerror(errno));
return 1;
}
*(args->script + info.st_size) = '\0';
close(fd);
}
if (args->batch_size == 0) {
args->batch_size = DEFAULT_BATCH_SIZE;
}
return 0;
}
@@ -131,13 +211,19 @@ int web_args_validate(web_args_t *args, int argc, const char **argv) {
args->port = DEFAULT_PORT;
}
if (args->credentials != NULL) {
args->b64credentials = onion_base64_encode(args->credentials, (int) strlen(args->credentials));
//Remove trailing newline
*(args->b64credentials + strlen(args->b64credentials) - 1) = '\0';
}
args->index_count = argc - 1;
args->indices = argv + 1;
for (int i = 0; i < args->index_count; i++) {
char *abs_path = abspath(args->indices[i]);
if (abs_path == NULL) {
fprintf(stderr, "File not found: %s", abs_path);
fprintf(stderr, "File not found: %s\n", abs_path);
return 1;
}
}
@@ -153,5 +239,6 @@ web_args_t *web_args_create() {
web_args_t *args = calloc(sizeof(web_args_t), 1);
return args;
}
#endif

View File

@@ -12,17 +12,24 @@ typedef struct scan_args {
char *output;
char *rewrite_url;
char *name;
int depth;
char *path;
char *archive;
archive_mode_t archive_mode;
} scan_args_t;
scan_args_t *scan_args_create();
void scan_args_destroy(scan_args_t *args);
int scan_args_validate(scan_args_t *args, int argc, const char **argv);
#ifndef SIST_SCAN_ONLY
typedef struct index_args {
char *es_url;
const char *index_path;
const char *script_path;
char *script;
int print;
int batch_size;
int force_reset;
} index_args_t;
@@ -30,12 +37,17 @@ typedef struct web_args {
char *es_url;
char *bind;
char *port;
char *credentials;
char *b64credentials;
int index_count;
const char **indices;
} web_args_t;
index_args_t *index_args_create();
void index_args_destroy(index_args_t *args);
web_args_t *web_args_create();
void web_args_destroy(web_args_t *args);
int index_args_validate(index_args_t *args, int argc, const char **argv);
int web_args_validate(web_args_t *args, int argc, const char **argv);

View File

@@ -15,6 +15,8 @@ struct {
int threads;
int content_size;
float tn_qscale;
int depth;
archive_mode_t archive_mode;
size_t stat_tn_size;
size_t stat_index_size;
@@ -29,11 +31,13 @@ struct {
#ifndef SIST_SCAN_ONLY
struct {
char *es_url;
int batch_size;
} IndexCtx;
struct {
char *es_url;
int index_count;
char *b64credentials;
struct index_t indices[16];
} WebCtx;
#endif

View File

@@ -6,11 +6,9 @@
#include <stdio.h>
#include <string.h>
#include <cJSON/cJSON.h>
#include <src/ctx.h>
#include "static_generated.c"
#define BULK_INDEX_SIZE 100
typedef struct es_indexer {
int queued;
@@ -54,6 +52,40 @@ void index_json(cJSON *document, const char uuid_str[UUID_STR_LEN]) {
elastic_index_line(bulk_line);
}
void execute_update_script(const char *script, const char index_id[UUID_STR_LEN]) {
cJSON *body = cJSON_CreateObject();
cJSON *script_obj = cJSON_AddObjectToObject(body, "script");
cJSON_AddStringToObject(script_obj, "lang", "painless");
cJSON_AddStringToObject(script_obj, "source", script);
cJSON *query = cJSON_AddObjectToObject(body, "query");
cJSON *term_obj = cJSON_AddObjectToObject(query, "term");
cJSON_AddStringToObject(term_obj, "index", index_id);
char * str = cJSON_Print(body);
char bulk_url[4096];
snprintf(bulk_url, 4096, "%s/sist2/_update_by_query?pretty", Indexer->es_url);
response_t *r = web_post(bulk_url, str, "Content-Type: application/json");
printf("Executed user script <%d>\n", r->status_code);
cJSON *resp = cJSON_Parse(r->body);
cJSON_free(str);
cJSON_Delete(body);
free_response(r);
cJSON *error = cJSON_GetObjectItem(resp, "error");
if (error != NULL) {
char *error_str = cJSON_Print(error);
fprintf(stderr, "User script error: \n%s\n", error_str);
cJSON_free(error_str);
}
cJSON_Delete(resp);
}
void elastic_flush() {
if (Indexer == NULL) {
@@ -98,6 +130,12 @@ void elastic_flush() {
char bulk_url[4096];
snprintf(bulk_url, 4096, "%s/sist2/_bulk", Indexer->es_url);
response_t *r = web_post(bulk_url, buf, "Content-Type: application/x-ndjson");
if (r->status_code == 0) {
fprintf(stderr, "Could not connect to %s, make sure that elasticsearch is running!\n", IndexCtx.es_url);
exit(1);
}
printf("Indexed %3d documents (%zukB) <%d>\n", count, buf_cur / 1024, r->status_code);
cJSON *ret_json = cJSON_Parse(r->body);
@@ -115,6 +153,7 @@ void elastic_flush() {
cJSON_Delete(ret_json);
free_response(r);
free(buf);
}
void elastic_index_line(es_bulk_line_t *line) {
@@ -133,15 +172,14 @@ void elastic_index_line(es_bulk_line_t *line) {
Indexer->queued += 1;
if (Indexer->queued >= BULK_INDEX_SIZE) {
if (Indexer->queued >= IndexCtx.batch_size) {
elastic_flush();
}
}
es_indexer_t *create_indexer(const char *url) {
size_t url_len = strlen(url);
char *es_url = malloc(url_len);
char *es_url = malloc(strlen(url) + 1);
strcpy(es_url, url);
es_indexer_t *indexer = malloc(sizeof(es_indexer_t));
@@ -154,7 +192,7 @@ es_indexer_t *create_indexer(const char *url) {
return indexer;
}
void destroy_indexer() {
void destroy_indexer(char * script, char index_id[UUID_STR_LEN]) {
char url[4096];
@@ -163,6 +201,15 @@ void destroy_indexer() {
printf("Refresh index <%d>\n", r->status_code);
free_response(r);
if (script != NULL) {
execute_update_script(script, index_id);
}
snprintf(url, sizeof(url), "%s/sist2/_refresh", IndexCtx.es_url);
r = web_post(url, "", NULL);
printf("Refresh index <%d>\n", r->status_code);
free_response(r);
snprintf(url, sizeof(url), "%s/sist2/_forcemerge", IndexCtx.es_url);
r = web_post(url, "", NULL);
printf("Merge index <%d>\n", r->status_code);

View File

@@ -24,7 +24,7 @@ void index_json(cJSON *document, const char uuid_str[UUID_STR_LEN]);
es_indexer_t *create_indexer(const char* es_url);
void destroy_indexer();
void destroy_indexer(char *script, char index_id[UUID_STR_LEN]);
void elastic_init(int force_reset);

File diff suppressed because one or more lines are too long

View File

@@ -1,7 +1,7 @@
#include "src/ctx.h"
#include "serialize.h"
static __thread int IndexFd = -1;
static __thread int index_fd = -1;
typedef struct {
unsigned char uuid[16];
@@ -54,6 +54,12 @@ index_descriptor_t read_index_descriptor(char *path) {
struct stat info;
stat(path, &info);
int fd = open(path, O_RDONLY);
if (fd == -1) {
fprintf(stderr, "Invalid/corrupt index (Could not find descriptor)\n");
exit(1);
}
char *buf = malloc(info.st_size + 1);
read(fd, buf, info.st_size);
*(buf + info.st_size) = '\0';
@@ -105,6 +111,8 @@ char *get_meta_key_text(enum metakey meta_key) {
return "title";
case MetaFontName:
return "font_name";
case MetaParent:
return "parent";
default:
return NULL;
}
@@ -113,13 +121,13 @@ char *get_meta_key_text(enum metakey meta_key) {
void write_document(document_t *doc) {
if (IndexFd == -1) {
if (index_fd == -1) {
char dstfile[PATH_MAX];
pthread_t self = pthread_self();
snprintf(dstfile, PATH_MAX, "%s_index_%lu", ScanCtx.index.path, self);
IndexFd = open(dstfile, O_CREAT | O_WRONLY | O_APPEND, S_IRUSR | S_IWUSR);
index_fd = open(dstfile, O_CREAT | O_WRONLY | O_APPEND, S_IRUSR | S_IWUSR);
if (IndexFd == -1) {
if (index_fd == -1) {
perror("open");
}
}
@@ -152,13 +160,16 @@ void write_document(document_t *doc) {
}
dyn_buffer_write_char(&buf, '\n');
write(IndexFd, buf.buf, buf.cur);
int res = write(index_fd, buf.buf, buf.cur);
if (res == -1) {
perror("write");
}
ScanCtx.stat_index_size += buf.cur;
dyn_buffer_destroy(&buf);
}
void serializer_cleanup() {
close(IndexFd);
void thread_cleanup() {
close(index_fd);
}
void read_index(const char *path, const char index_id[UUID_STR_LEN], index_func func) {
@@ -238,6 +249,7 @@ void read_index(const char *path, const char index_id[UUID_STR_LEN], index_func
case MetaAlbumArtist:
case MetaGenre:
case MetaFontName:
case MetaParent:
case MetaTitle: {
buf.cur = 0;
while ((c = getc(file)) != 0) {
@@ -258,8 +270,9 @@ void read_index(const char *path, const char index_id[UUID_STR_LEN], index_func
}
func(document, uuid_str);
cJSON_free(document);
cJSON_Delete(document);
}
dyn_buffer_destroy(&buf);
fclose(file);
}

View File

@@ -18,7 +18,7 @@ void incremental_read(GHashTable *table, const char *filepath);
/**
* Must be called after write_document
*/
void serializer_cleanup();
void thread_cleanup();
void write_index_descriptor(char *path, index_descriptor_t *desc);

View File

@@ -15,7 +15,7 @@ store_t *store_create(char *path) {
);
if (open_ret != 0) {
fprintf(stderr, "Error while opening store: %s", mdb_strerror(open_ret));
fprintf(stderr, "Error while opening store: %s (%s)\n", mdb_strerror(open_ret), path);
exit(1);
}

View File

@@ -1,7 +1,8 @@
#include "walk.h"
#include "src/ctx.h"
parse_job_t *create_parse_job(const char *filepath, const struct stat *info, int base) {
__always_inline
parse_job_t *create_fs_parse_job(const char *filepath, const struct stat *info, int base) {
int len = (int) strlen(filepath);
parse_job_t *job = malloc(sizeof(parse_job_t) + len);
@@ -14,14 +15,22 @@ parse_job_t *create_parse_job(const char *filepath, const struct stat *info, int
job->ext = len;
}
memcpy(&(job->info), info, sizeof(struct stat));
job->info = *info;
memset(job->parent, 0, 16);
job->vfile.filepath = job->filepath;
job->vfile.read = fs_read;
job->vfile.close = fs_close;
job->vfile.fd = -1;
job->vfile.is_fs_file = TRUE;
return job;
}
int handle_entry(const char *filepath, const struct stat *info, int typeflag, struct FTW *ftw) {
if (typeflag == FTW_F && S_ISREG(info->st_mode)) {
parse_job_t *job = create_parse_job(filepath, info, ftw->base);
if (ftw->level <= ScanCtx.depth && typeflag == FTW_F && S_ISREG(info->st_mode)) {
parse_job_t *job = create_fs_parse_job(filepath, info, ftw->base);
tpool_add_work(ScanCtx.pool, parse, job);
}

View File

@@ -10,7 +10,7 @@
#define EPILOG "Made by simon987 <me@simon987.net>. Released under GPL-3.0"
static const char *const Version = "1.1.3";
static const char *const Version = "1.1.10";
static const char *const usage[] = {
"sist2 scan [OPTION]... PATH",
"sist2 index [OPTION]... INDEX",
@@ -19,9 +19,9 @@ static const char *const usage[] = {
};
void global_init() {
#ifndef SIST_SCAN_ONLY
#ifndef SIST_SCAN_ONLY
curl_global_init(CURL_GLOBAL_NOTHING);
#endif
#endif
av_log_set_level(AV_LOG_QUIET);
}
@@ -41,10 +41,22 @@ void init_dir(const char *dirpath) {
void scan_print_header() {
printf("sist2 V%s\n", Version);
printf("---------------------\n");
printf("threads\t\t%d\n", ScanCtx.threads);
printf("tn_qscale\t%.1f/31.0\n", ScanCtx.tn_qscale);
printf("tn_size\t\t%dpx\n", ScanCtx.tn_size);
printf("output\t\t%s\n", ScanCtx.index.path);
printf("threads\t\t\t%d\n", ScanCtx.threads);
printf("tn_qscale\t\t%.1f/31.0\n", ScanCtx.tn_qscale);
if (ScanCtx.tn_size > 0) {
printf("tn_size\t\t\t%dpx\n", ScanCtx.tn_size);
} else {
printf("tn_size\t\t\tdisabled\n");
}
if (ScanCtx.content_size > 0) {
printf("content_size\t\t%d B\n", ScanCtx.content_size);
} else {
printf("content_size\t\t\tdisabled\n");
}
printf("output\t\t\t%s\n", ScanCtx.index.path);
}
void sist2_scan(scan_args_t *args) {
@@ -53,6 +65,8 @@ void sist2_scan(scan_args_t *args) {
ScanCtx.tn_size = args->size;
ScanCtx.content_size = args->content_size;
ScanCtx.threads = args->threads;
ScanCtx.depth = args->depth;
ScanCtx.archive_mode = args->archive_mode;
strncpy(ScanCtx.index.path, args->output, sizeof(ScanCtx.index.path));
strncpy(ScanCtx.index.desc.name, args->name, sizeof(ScanCtx.index.desc.name));
strncpy(ScanCtx.index.desc.root, args->path, sizeof(ScanCtx.index.desc.root));
@@ -92,7 +106,7 @@ void sist2_scan(scan_args_t *args) {
printf("Loaded %d items in to mtime table.", g_hash_table_size(ScanCtx.original_table));
}
ScanCtx.pool = tpool_create(args->threads, serializer_cleanup);
ScanCtx.pool = tpool_create(args->threads, thread_cleanup);
tpool_start(ScanCtx.pool);
walk_directory_tree(ScanCtx.index.desc.root);
tpool_wait(ScanCtx.pool);
@@ -125,9 +139,11 @@ void sist2_scan(scan_args_t *args) {
}
#ifndef SIST_SCAN_ONLY
void sist2_index(index_args_t *args) {
IndexCtx.es_url = args->es_url;
IndexCtx.batch_size = args->batch_size;
if (!args->print) {
elastic_init(args->force_reset);
@@ -163,10 +179,11 @@ void sist2_index(index_args_t *args) {
read_index(file_path, desc.uuid, f);
}
}
closedir(dir);
if (!args->print) {
elastic_flush();
destroy_indexer();
destroy_indexer(args->script, desc.uuid);
}
}
@@ -174,6 +191,7 @@ void sist2_web(web_args_t *args) {
WebCtx.es_url = args->es_url;
WebCtx.index_count = args->index_count;
WebCtx.b64credentials = args->b64credentials;
for (int i = 0; i < args->index_count; i++) {
char *abs_path = abspath(args->indices[i]);
@@ -195,6 +213,7 @@ void sist2_web(web_args_t *args) {
serve(args->bind, args->port);
}
#endif
@@ -203,41 +222,54 @@ int main(int argc, const char *argv[]) {
global_init();
scan_args_t *scan_args = scan_args_create();
#ifndef SIST_SCAN_ONLY
#ifndef SIST_SCAN_ONLY
index_args_t *index_args = index_args_create();
web_args_t *web_args = web_args_create();
#endif
#endif
char * common_es_url = NULL;
int arg_version = 0;
char *common_es_url = NULL;
struct argparse_option options[] = {
OPT_HELP(),
OPT_BOOLEAN('v', "version", &arg_version, "Show version and exit"),
OPT_GROUP("Scan options"),
OPT_INTEGER('t', "threads", &scan_args->threads, "Number of threads. DEFAULT=1"),
OPT_FLOAT('q', "quality", &scan_args->quality,
"Thumbnail quality, on a scale of 1.0 to 31.0, 1.0 being the best. DEFAULT=15"),
OPT_INTEGER(0, "size", &scan_args->size, "Thumbnail size, in pixels. DEFAULT=200"),
"Thumbnail quality, on a scale of 1.0 to 31.0, 1.0 being the best. DEFAULT=5"),
OPT_INTEGER(0, "size", &scan_args->size,
"Thumbnail size, in pixels. Use negative value to disable. DEFAULT=500"),
OPT_INTEGER(0, "content-size", &scan_args->content_size,
"Number of bytes to be extracted from text documents. DEFAULT=4096"),
"Number of bytes to be extracted from text documents. Use negative value to disable. DEFAULT=32768"),
OPT_STRING(0, "incremental", &scan_args->incremental,
"Reuse an existing index and only scan modified files."),
OPT_STRING('o', "output", &scan_args->output, "Output directory. DEFAULT=index.sist2/"),
OPT_STRING(0, "rewrite-url", &scan_args->rewrite_url, "Serve files from this url instead of from disk."),
OPT_STRING(0, "name", &scan_args->name, "Index display name. DEFAULT: (name of the directory)"),
OPT_INTEGER(0, "depth", &scan_args->depth, "Scan up to DEPTH subdirectories deep. "
"Use 0 to only scan files in PATH. DEFAULT: -1"),
OPT_STRING(0, "archive", &scan_args->archive, "Archive file mode (skip|list|shallow|recurse). "
"skip: Don't parse, list: only get file names as text, "
"shallow: Don't parse archives inside archives. DEFAULT: recurse"),
#ifndef SIST_SCAN_ONLY
#ifndef SIST_SCAN_ONLY
OPT_GROUP("Index options"),
OPT_STRING(0, "es-url", &common_es_url, "Elasticsearch url. DEFAULT=http://localhost:9200"),
OPT_BOOLEAN('p', "print", &index_args->print, "Just print JSON documents to stdout."),
OPT_STRING(0, "script-file", &index_args->script_path, "Path to user script."),
OPT_INTEGER(0, "batch-size", &index_args->batch_size, "Index batch size. DEFAULT: 100"),
OPT_BOOLEAN('f', "force-reset", &index_args->force_reset, "Reset Elasticsearch mappings and settings. "
"(You must use this option the first time you use the index command)"),
"(You must use this option the first time you use the index command)"),
OPT_GROUP("Web options"),
OPT_STRING(0, "es-url", &common_es_url, "Elasticsearch url. DEFAULT=http://localhost:9200"),
OPT_STRING(0, "bind", &web_args->bind, "Listen on this address. DEFAULT=localhost"),
OPT_STRING(0, "port", &web_args->port, "Listen on this port. DEFAULT=4090"),
#endif
OPT_STRING(0, "auth", &web_args->credentials, "Basic auth in user:password format"),
#endif
OPT_END(),
};
@@ -247,10 +279,15 @@ int main(int argc, const char *argv[]) {
argparse_describe(&argparse, DESCRIPTION, EPILOG);
argc = argparse_parse(&argparse, argc, argv);
#ifndef SIST_SCAN_ONLY
if (arg_version) {
printf(Version);
exit(0);
}
#ifndef SIST_SCAN_ONLY
web_args->es_url = common_es_url;
index_args->es_url = common_es_url;
#endif
#endif
if (argc == 0) {
argparse_usage(&argparse);
@@ -265,7 +302,7 @@ int main(int argc, const char *argv[]) {
}
#ifndef SIST_SCAN_ONLY
#ifndef SIST_SCAN_ONLY
else if (strcmp(argv[0], "index") == 0) {
int err = index_args_validate(index_args, argc, argv);
@@ -283,12 +320,20 @@ int main(int argc, const char *argv[]) {
sist2_web(web_args);
}
#endif
#endif
else {
fprintf(stderr, "Invalid command: '%s'\n", argv[0]);
argparse_usage(&argparse);
return 1;
}
printf("\n");
scan_args_destroy(scan_args);
#ifndef SIST_SCAN_ONLY
index_args_destroy(index_args);
web_args_destroy(web_args);
#endif
return 0;
}

152
src/parsing/arc.c Normal file
View File

@@ -0,0 +1,152 @@
#include "arc.h"
#include "src/ctx.h"
#define ARC_BUF_SIZE 8192
int should_parse_filtered_file(const char *filepath, int ext) {
char tmp[PATH_MAX * 2];
if (ext == 0) {
return FALSE;
}
memcpy(tmp, filepath, ext - 1);
*(tmp + ext - 1) = '\0';
char *idx = strrchr(tmp, '.');
if (idx == NULL) {
return FALSE;
}
if (strcmp(idx, ".tar") == 0) {
return TRUE;
}
return FALSE;
}
int arc_read(struct vfile *f, void *buf, size_t size) {
return archive_read_data(f->arc, buf, size);
}
typedef struct arc_data {
vfile_t *f;
char buf[ARC_BUF_SIZE];
} arc_data_f;
int vfile_open_callback(struct archive *a, void *user_data) {
arc_data_f *data = user_data;
if (data->f->is_fs_file && data->f->fd == -1) {
data->f->fd = open(data->f->filepath, O_RDONLY);
}
return ARCHIVE_OK;
}
long vfile_read_callback(struct archive *a, void *user_data, const void **buf) {
arc_data_f *data = user_data;
*buf = data->buf;
return data->f->read(data->f, data->buf, ARC_BUF_SIZE);
}
int vfile_close_callback(struct archive *a, void *user_data) {
arc_data_f *data = user_data;
if (data->f->close != NULL) {
data->f->close(data->f);
}
return ARCHIVE_OK;
}
void parse_archive(vfile_t *f, document_t *doc) {
struct archive *a;
struct archive_entry *entry;
a = archive_read_new();
archive_read_support_filter_all(a);
archive_read_support_format_all(a);
arc_data_f data;
data.f = f;
int ret = 0;
if (data.f->is_fs_file) {
ret = archive_read_open_filename(a, doc->filepath, ARC_BUF_SIZE);
} else if (ScanCtx.archive_mode == ARC_MODE_RECURSE) {
ret = archive_read_open(
a, &data,
vfile_open_callback,
vfile_read_callback,
vfile_close_callback
);
} else {
archive_read_free(a);
return;
}
if (ret != ARCHIVE_OK) {
fprintf(stderr, "OPEN[%d]:%s %s\n", ret, archive_error_string(a), doc->filepath);
archive_read_free(a);
return;
}
if (ScanCtx.archive_mode == ARC_MODE_LIST) {
dyn_buffer_t buf = dyn_buffer_create();
while (archive_read_next_header(a, &entry) == ARCHIVE_OK) {
if (S_ISREG(archive_entry_stat(entry)->st_mode)) {
char *path = (char *) archive_entry_pathname(entry);
dyn_buffer_append_string(&buf, path);
dyn_buffer_write_char(&buf, '\n');
}
}
dyn_buffer_write_char(&buf, '\0');
meta_line_t *meta_list = malloc(sizeof(meta_line_t) + buf.cur);
meta_list->key = MetaContent;
strcpy(meta_list->strval, buf.buf);
APPEND_META(doc, meta_list);
dyn_buffer_destroy(&buf);
} else {
parse_job_t *sub_job = malloc(sizeof(parse_job_t) + PATH_MAX * 2);
sub_job->vfile.close = NULL;
sub_job->vfile.read = arc_read;
sub_job->vfile.arc = a;
sub_job->vfile.filepath = sub_job->filepath;
sub_job->vfile.is_fs_file = FALSE;
memcpy(sub_job->parent, doc->uuid, sizeof(uuid_t));
while (archive_read_next_header(a, &entry) == ARCHIVE_OK) {
sub_job->info = *archive_entry_stat(entry);
if (S_ISREG(sub_job->info.st_mode)) {
sprintf(sub_job->filepath, "%s#/%s", f->filepath, archive_entry_pathname(entry));
sub_job->base = (int) (strrchr(sub_job->filepath, '/') - sub_job->filepath) + 1;
char *p = strrchr(sub_job->filepath, '.');
if (p != NULL) {
sub_job->ext = (int) (p - sub_job->filepath + 1);
} else {
sub_job->ext = (int) strlen(sub_job->filepath);
}
parse(sub_job);
}
}
free(sub_job);
}
archive_read_free(a);
}

12
src/parsing/arc.h Normal file
View File

@@ -0,0 +1,12 @@
#ifndef SIST2_ARC_H
#define SIST2_ARC_H
#include "src/sist.h"
int should_parse_filtered_file(const char *filepath, int ext);
void parse_archive(vfile_t *f, document_t *doc);
int arc_read(struct vfile * f, void *buf, size_t size);
#endif

View File

@@ -1,11 +1,9 @@
#include "font.h"
#include "ft2build.h"
#include "freetype/freetype.h"
#include "src/ctx.h"
__thread FT_Library library = NULL;
__thread FT_Library ft_lib = NULL;
typedef struct text_dimensions {
@@ -139,15 +137,15 @@ void bmp_format(dyn_buffer_t *buf, text_dimensions_t dimensions, const unsigned
}
void parse_font(const char *buf, size_t buf_len, document_t *doc) {
if (library == NULL) {
FT_Init_FreeType(&library);
if (ft_lib == NULL) {
FT_Init_FreeType(&ft_lib);
}
if (buf == NULL) {
return;
}
FT_Face face;
FT_Error err = FT_New_Memory_Face(library, (unsigned char *) buf, buf_len, 0, &face);
FT_Error err = FT_New_Memory_Face(ft_lib, (unsigned char *) buf, buf_len, 0, &face);
if (err != 0) {
return;
}
@@ -169,6 +167,10 @@ void parse_font(const char *buf, size_t buf_len, document_t *doc) {
strcpy(meta_name->strval, font_name);
APPEND_META(doc, meta_name)
if (ScanCtx.tn_size <= 0) {
return;
}
int pixel = 64;
int num_chars = (int) strlen(font_name);

View File

@@ -2,6 +2,7 @@
#include "src/ctx.h"
#define MIN_SIZE 32
#define AVIO_BUF_SIZE 8192
__always_inline
AVCodecContext *alloc_jpeg_encoder(int dstW, int dstH, float qscale) {
@@ -89,9 +90,9 @@ AVFrame *read_frame(AVFormatContext *pFormatCtx, AVCodecContext *decoder, int st
int read_frame_ret = av_read_frame(pFormatCtx, &avPacket);
if (read_frame_ret != 0) {
if (read_frame_ret != AVERROR_EOF) {
fprintf(stderr, "Error reading frame: %d\n", read_frame_ret);
}
// if (read_frame_ret != AVERROR_EOF) {
// fprintf(stderr, "Error reading frame: %d\n", read_frame_ret);
// }
av_frame_free(&frame);
av_packet_unref(&avPacket);
return NULL;
@@ -188,22 +189,11 @@ void append_video_meta(AVFormatContext *pFormatCtx, AVFrame *frame, document_t *
}
}
void parse_media(const char *filepath, document_t *doc) {
void parse_media(AVFormatContext *pFormatCtx, document_t *doc) {
int video_stream = -1;
int audio_stream = -1;
AVFormatContext *pFormatCtx = avformat_alloc_context();
if (pFormatCtx == NULL) {
fprintf(stderr, "Could not allocate AVFormatContext! %s \n", filepath);
return;
}
int res = avformat_open_input(&pFormatCtx, filepath, NULL, NULL);
if (res < 0) {
fprintf(stderr, "media error: %s %s\n", filepath, av_err2str(res));
return;
}
avformat_find_stream_info(pFormatCtx, NULL);
for (int i = (int) pFormatCtx->nb_streams - 1; i >= 0; i--) {
@@ -242,7 +232,7 @@ void parse_media(const char *filepath, document_t *doc) {
}
}
if (video_stream != -1) {
if (video_stream != -1 && ScanCtx.tn_size > 0) {
AVStream *stream = pFormatCtx->streams[video_stream];
if (stream->codecpar->width <= MIN_SIZE || stream->codecpar->height <= MIN_SIZE) {
@@ -314,3 +304,58 @@ void parse_media(const char *filepath, document_t *doc) {
avformat_free_context(pFormatCtx);
}
void parse_media_filename(const char *filepath, document_t *doc) {
AVFormatContext *pFormatCtx = avformat_alloc_context();
if (pFormatCtx == NULL) {
fprintf(stderr, "Could not allocate AVFormatContext! %s \n", filepath);
return;
}
int res = avformat_open_input(&pFormatCtx, filepath, NULL, NULL);
if (res < 0) {
fprintf(stderr, "media error: %s %s\n", filepath, av_err2str(res));
return;
}
parse_media(pFormatCtx, doc);
}
int vfile_read(void *ptr, uint8_t *buf, int buf_size) {
struct vfile *f = ptr;
int ret = f->read(f, buf, buf_size);
if (ret == 0) {
return AVERROR_EOF;
}
return ret;
}
void parse_media_vfile(struct vfile *f, document_t *doc) {
AVFormatContext *pFormatCtx = avformat_alloc_context();
if (pFormatCtx == NULL) {
fprintf(stderr, "Could not allocate AVFormatContext! %s \n", f->filepath);
return;
}
unsigned char *buffer = (unsigned char *) av_malloc(AVIO_BUF_SIZE);
AVIOContext *io_ctx = avio_alloc_context(buffer, AVIO_BUF_SIZE, 0, f, vfile_read, NULL, NULL);
pFormatCtx->pb = io_ctx;
pFormatCtx->flags |= AVFMT_FLAG_CUSTOM_IO;
int res = avformat_open_input(&pFormatCtx, "", NULL, NULL);
if (res == -5) {
// Tried to parse media that requires seek
return;
} else if(res < 0) {
fprintf(stderr, "media error: %s %s\n", f->filepath, av_err2str(res));
return;
}
parse_media(pFormatCtx, doc);
av_free(io_ctx);
}

View File

@@ -7,6 +7,8 @@
#define MIN_VIDEO_SIZE 1024 * 64
#define MIN_IMAGE_SIZE 1024 * 2
void parse_media(const char * filepath, document_t *doc);
void parse_media_filename(const char * filepath, document_t *doc);
void parse_media_vfile(struct vfile *f, document_t *doc);
#endif

View File

@@ -8,7 +8,7 @@
#define MIME_EMPTY 1
#define DONT_PARSE 0x80000000
#define SHOULD_PARSE(mime_id) (mime_id & DONT_PARSE) != DONT_PARSE
#define SHOULD_PARSE(mime_id) (mime_id & DONT_PARSE) != DONT_PARSE && mime_id != 0
#define PDF_MASK 0x40000000
#define IS_PDF(mime_id) (mime_id & PDF_MASK) == PDF_MASK
@@ -16,6 +16,12 @@
#define FONT_MASK 0x20000000
#define IS_FONT(mime_id) (mime_id & FONT_MASK) == FONT_MASK
#define ARC_MASK 0x10000000
#define IS_ARC(mime_id) (mime_id & ARC_MASK) == ARC_MASK
#define ARC_FILTER_MASK 0x08000000
#define IS_ARC_FILTER(mime_id) (mime_id & ARC_FILTER_MASK) == ARC_FILTER_MASK
enum major_mime {
MimeInvalid = 0,
MimeModel = 1,

View File

@@ -20,7 +20,7 @@ enum mime {
application_freeloader=655372,
application_futuresplash=655373,
application_groupwise=655374,
application_gzip=655375,
application_gzip=655375 | 0x08000000,
application_hta=655376,
application_i_deas=655377,
application_iges=655378,
@@ -82,342 +82,346 @@ enum mime {
application_vnd_xara=655434,
application_vocaltec_media_desc=655435,
application_vocaltec_media_file=655436,
application_winhelp=655437,
application_wordperfect=655438,
application_wordperfect6_0=655439,
application_wordperfect6_1=655440,
application_x_123=655441,
application_x_7z_compressed=655442,
application_x_aim=655443,
application_x_apple_diskimage=655444,
application_x_arc=655445,
application_x_archive=655446,
application_x_atari_7800_rom=655447,
application_x_authorware_bin=655448,
application_x_authorware_map=655449,
application_x_authorware_seg=655450,
application_x_avira_qua=655451,
application_x_bcpio=655452,
application_x_bittorrent=655453,
application_x_bsh=655454,
application_x_bytecode_python=655455,
application_x_bzip=655456,
application_x_bzip2=655457,
application_x_cbr=655458,
application_x_cbz=655459 | 0x40000000,
application_x_cdlink=655460,
application_x_chat=655461,
application_x_chrome_extension=655462,
application_x_cocoa=655463,
application_x_conference=655464,
application_x_coredump=655465,
application_x_cpio=655466,
application_x_dbf=655467,
application_x_dbt=655468,
application_x_debian_package=655469,
application_x_deepv=655470,
application_x_director=655471,
application_x_dmp=655472,
application_x_dosdriver=655473,
application_x_dosexec=655474,
application_x_dvi=655475,
application_x_elc=655476,
application_warc=655437,
application_winhelp=655438,
application_wordperfect=655439,
application_wordperfect6_0=655440,
application_wordperfect6_1=655441,
application_x_123=655442,
application_x_7z_compressed=655443 | 0x10000000,
application_x_aim=655444,
application_x_apple_diskimage=655445,
application_x_arc=655446 | 0x10000000,
application_x_archive=655447,
application_x_atari_7800_rom=655448,
application_x_authorware_bin=655449,
application_x_authorware_map=655450,
application_x_authorware_seg=655451,
application_x_avira_qua=655452,
application_x_bcpio=655453,
application_x_bittorrent=655454,
application_x_bsh=655455,
application_x_bytecode_python=655456,
application_x_bzip=655457,
application_x_bzip2=655458 | 0x08000000,
application_x_cbr=655459,
application_x_cbz=655460 | 0x40000000,
application_x_cdlink=655461,
application_x_chat=655462,
application_x_chrome_extension=655463,
application_x_cocoa=655464,
application_x_conference=655465,
application_x_coredump=655466,
application_x_cpio=655467,
application_x_dbf=655468,
application_x_dbt=655469,
application_x_debian_package=655470,
application_x_deepv=655471,
application_x_director=655472,
application_x_dmp=655473,
application_x_dosdriver=655474,
application_x_dosexec=655475,
application_x_dvi=655476,
application_x_elc=655477,
application_x_empty=1,
application_x_envoy=655478,
application_x_esrehber=655479,
application_x_excel=655480,
application_x_executable=655481,
application_x_font_gdos=655482,
application_x_font_pf2=655483,
application_x_font_pfm=655484,
application_x_font_sfn=655485,
application_x_font_ttf=655486 | 0x20000000,
application_x_freelance=655487,
application_x_gamecube_rom=655488,
application_x_gdbm=655489,
application_x_gettext_translation=655490,
application_x_git=655491,
application_x_gsp=655492,
application_x_gss=655493,
application_x_gtar=655494,
application_x_gzip=655495,
application_x_hdf=655496,
application_x_helpfile=655497,
application_x_httpd_imap=655498,
application_x_ima=655499,
application_x_innosetup=655500,
application_x_internett_signup=655501,
application_x_inventor=655502,
application_x_ip2=655503,
application_x_java_applet=655504,
application_x_java_commerce=655505,
application_x_java_image=655506,
application_x_java_jmod=655507,
application_x_java_keystore=655508,
application_x_kdelnk=655509,
application_x_koan=655510,
application_x_latex=655511,
application_x_livescreen=655512,
application_x_lotus=655513,
application_x_lz4=655514,
application_x_lz4_json=655515,
application_x_lzh=655516,
application_x_lzh_compressed=655517,
application_x_lzx=655518,
application_x_mach_binary=655519,
application_x_mach_executable=655520,
application_x_magic_cap_package_1_0=655521,
application_x_mathcad=655522,
application_x_maxis_dbpf=655523,
application_x_meme=655524,
application_x_midi=655525,
application_x_mif=655526,
application_x_mix_transfer=655527,
application_x_mobipocket_ebook=655528,
application_x_ms_compress_szdd=655529,
application_x_ms_pdb=655530,
application_x_ms_reader=655531,
application_x_msaccess=655532,
application_x_navi_animation=655533,
application_x_navidoc=655534,
application_x_navimap=655535,
application_x_navistyle=655536,
application_x_nes_rom=655537,
application_x_netcdf=655538,
application_x_newton_compatible_pkg=655539,
application_x_nintendo_ds_rom=655540,
application_x_object=655541,
application_x_omc=655542,
application_x_omcdatamaker=655543,
application_x_omcregerator=655544,
application_x_pagemaker=655545,
application_x_pcl=655546,
application_x_pgp_keyring=655547,
application_x_pixclscript=655548,
application_x_pkcs7_certreqresp=655549,
application_x_pkcs7_signature=655550,
application_x_project=655551,
application_x_qpro=655552,
application_x_rar=655553,
application_x_rpm=655554,
application_x_sdp=655555,
application_x_sea=655556,
application_x_seelogo=655557,
application_x_setupscript=655558,
application_x_shar=655559,
application_x_sharedlib=655560,
application_x_shockwave_flash=655561,
application_x_snappy_framed=655562,
application_x_sprite=655563,
application_x_sqlite3=655564,
application_x_sv4cpio=655565,
application_x_sv4crc=655566,
application_x_tar=655567,
application_x_tbook=655568,
application_x_terminfo=655569,
application_x_terminfo2=655570,
application_x_tex_tfm=655571,
application_x_texinfo=655572,
application_x_ustar=655573,
application_x_visio=655574,
application_x_vnd_audioexplosion_mzz=655575,
application_x_vnd_ls_xpix=655576,
application_x_vrml=655577,
application_x_wais_source=655578,
application_x_wine_extension_ini=655579,
application_x_wintalk=655580,
application_x_world=655581,
application_x_wri=655582,
application_x_x509_ca_cert=655583,
application_x_xz=655584,
application_x_zip=655585,
application_x_zstd=655586,
application_xml=655587,
application_zip=655588,
application_zlib=655589,
audio_it=458982,
audio_make=458983,
audio_mid=458984,
audio_midi=458985,
audio_mp4=458986,
audio_mpeg=458987,
audio_ogg=458988,
audio_s3m=458989,
audio_tsp_audio=458990,
audio_tsplayer=458991,
audio_vnd_qcelp=458992,
audio_voxware=458993,
audio_x_aiff=458994,
audio_x_flac=458995,
audio_x_gsm=458996,
audio_x_hx_aac_adts=458997,
audio_x_jam=458998,
audio_x_liveaudio=458999,
audio_x_m4a=459000,
audio_x_midi=459001,
audio_x_mod=459002,
audio_x_mp4a_latm=459003,
audio_x_mpeg_3=459004,
audio_x_mpequrl=459005,
audio_x_nspaudio=459006,
audio_x_pn_realaudio=459007,
audio_x_psid=459008,
audio_x_realaudio=459009,
audio_x_twinvq=459010,
audio_x_twinvq_plugin=459011,
audio_x_voc=459012,
audio_x_wav=459013,
audio_xm=459014,
font_otf=327943 | 0x20000000,
font_sfnt=327944 | 0x20000000,
font_woff=327945 | 0x20000000,
font_woff2=327946 | 0x20000000,
image_cmu_raster=524555,
image_fif=524556,
image_florian=524557,
image_g3fax=524558,
image_gif=524559,
image_heic=524560,
image_ief=524561,
image_jpeg=524562,
image_jutvision=524563,
image_naplps=524564,
image_pict=524565,
image_png=524566,
image_svg=524567 | 0x80000000,
image_svg_xml=524568 | 0x80000000,
image_tiff=524569,
image_vnd_adobe_photoshop=524570 | 0x80000000,
image_vnd_djvu=524571 | 0x80000000,
image_vnd_fpx=524572,
image_vnd_microsoft_icon=524573,
image_vnd_rn_realflash=524574,
image_vnd_rn_realpix=524575,
image_vnd_wap_wbmp=524576,
image_vnd_xiff=524577,
image_webp=524578,
image_wmf=524579,
image_x_3ds=524580,
image_x_cmu_raster=524581,
image_x_cur=524582,
image_x_dwg=524583,
image_x_eps=524584,
image_x_exr=524585,
image_x_gem=524586,
image_x_icns=524587,
image_x_icon=524588 | 0x80000000,
image_x_jg=524589,
image_x_jps=524590,
image_x_ms_bmp=524591,
image_x_niff=524592,
image_x_pcx=524593,
image_x_pict=524594,
image_x_portable_bitmap=524595,
image_x_portable_graymap=524596,
image_x_portable_pixmap=524597,
image_x_quicktime=524598,
image_x_rgb=524599,
image_x_tga=524600,
image_x_tiff=524601,
image_x_win_bitmap=524602,
image_x_xcf=524603 | 0x80000000,
image_x_xpixmap=524604 | 0x80000000,
image_x_xwindowdump=524605,
message_news=196926,
message_rfc822=196927,
model_vnd_dwf=65856,
model_vnd_gdl=65857,
model_vnd_gs_gdl=65858,
model_vrml=65859,
model_x_pov=65860,
text_PGP=590149,
text_asp=590150,
text_css=590151,
text_html=590152,
text_javascript=590153,
text_mcf=590154,
text_pascal=590155,
text_plain=590156,
text_richtext=590157,
text_rtf=590158,
text_scriplet=590159,
text_tab_separated_values=590160,
text_troff=590161,
text_uri_list=590162,
text_vnd_abc=590163,
text_vnd_fmi_flexstor=590164,
text_vnd_wap_wml=590165,
text_vnd_wap_wmlscript=590166,
text_webviewhtml=590167,
text_x_Algol68=590168,
text_x_asm=590169,
text_x_audiosoft_intra=590170,
text_x_awk=590171,
text_x_bcpl=590172,
text_x_c=590173,
text_x_c__=590174,
text_x_component=590175,
text_x_diff=590176,
text_x_fortran=590177,
text_x_java=590178,
text_x_la_asf=590179,
text_x_lisp=590180,
text_x_m=590181,
text_x_m4=590182,
text_x_makefile=590183,
text_x_ms_regedit=590184,
text_x_msdos_batch=590185,
text_x_objective_c=590186,
text_x_pascal=590187,
text_x_perl=590188,
text_x_php=590189,
text_x_po=590190,
text_x_python=590191,
text_x_ruby=590192,
text_x_sass=590193,
text_x_scss=590194,
text_x_server_parsed_html=590195,
text_x_setext=590196,
text_x_sgml=590197,
text_x_shellscript=590198,
text_x_speech=590199,
text_x_tcl=590200,
text_x_tex=590201,
text_x_uil=590202,
text_x_uuencode=590203,
text_x_vcalendar=590204,
text_x_vcard=590205,
text_xml=590206,
video_MP2T=393599,
video_animaflex=393600,
video_avi=393601,
video_avs_video=393602,
video_mp4=393603,
video_mpeg=393604,
video_quicktime=393605,
video_vdo=393606,
video_vivo=393607,
video_vnd_rn_realvideo=393608,
video_vosaic=393609,
video_webm=393610,
video_x_amt_demorun=393611,
video_x_amt_showrun=393612,
video_x_atomic3d_feature=393613,
video_x_dl=393614,
video_x_dv=393615,
video_x_fli=393616,
video_x_flv=393617,
video_x_isvideo=393618,
video_x_jng=393619 | 0x80000000,
video_x_m4v=393620,
video_x_matroska=393621,
video_x_mng=393622,
video_x_motion_jpeg=393623,
video_x_ms_asf=393624,
video_x_msvideo=393625,
video_x_qtc=393626,
video_x_sgi_movie=393627,
x_epoc_x_sisx_app=721308,
application_x_envoy=655479,
application_x_esrehber=655480,
application_x_excel=655481,
application_x_executable=655482,
application_x_font_gdos=655483,
application_x_font_pf2=655484,
application_x_font_pfm=655485,
application_x_font_sfn=655486,
application_x_font_ttf=655487 | 0x20000000,
application_x_freelance=655488,
application_x_gamecube_rom=655489,
application_x_gdbm=655490,
application_x_gettext_translation=655491,
application_x_git=655492,
application_x_gsp=655493,
application_x_gss=655494,
application_x_gtar=655495,
application_x_gzip=655496,
application_x_hdf=655497,
application_x_helpfile=655498,
application_x_httpd_imap=655499,
application_x_ima=655500,
application_x_innosetup=655501,
application_x_internett_signup=655502,
application_x_inventor=655503,
application_x_ip2=655504,
application_x_java_applet=655505,
application_x_java_commerce=655506,
application_x_java_image=655507,
application_x_java_jmod=655508,
application_x_java_keystore=655509,
application_x_kdelnk=655510,
application_x_koan=655511,
application_x_latex=655512,
application_x_livescreen=655513,
application_x_lotus=655514,
application_x_lz4=655515 | 0x08000000,
application_x_lz4_json=655516,
application_x_lzh=655517,
application_x_lzh_compressed=655518,
application_x_lzip=655519 | 0x08000000,
application_x_lzma=655520 | 0x08000000,
application_x_lzop=655521 | 0x08000000,
application_x_lzx=655522,
application_x_mach_binary=655523,
application_x_mach_executable=655524,
application_x_magic_cap_package_1_0=655525,
application_x_mathcad=655526,
application_x_maxis_dbpf=655527,
application_x_meme=655528,
application_x_midi=655529,
application_x_mif=655530,
application_x_mix_transfer=655531,
application_x_mobipocket_ebook=655532,
application_x_ms_compress_szdd=655533,
application_x_ms_pdb=655534,
application_x_ms_reader=655535,
application_x_msaccess=655536,
application_x_navi_animation=655537,
application_x_navidoc=655538,
application_x_navimap=655539,
application_x_navistyle=655540,
application_x_nes_rom=655541,
application_x_netcdf=655542,
application_x_newton_compatible_pkg=655543,
application_x_nintendo_ds_rom=655544,
application_x_object=655545,
application_x_omc=655546,
application_x_omcdatamaker=655547,
application_x_omcregerator=655548,
application_x_pagemaker=655549,
application_x_pcl=655550,
application_x_pgp_keyring=655551,
application_x_pixclscript=655552,
application_x_pkcs7_certreqresp=655553,
application_x_pkcs7_signature=655554,
application_x_project=655555,
application_x_qpro=655556,
application_x_rar=655557 | 0x10000000,
application_x_rpm=655558,
application_x_sdp=655559,
application_x_sea=655560,
application_x_seelogo=655561,
application_x_setupscript=655562,
application_x_shar=655563,
application_x_sharedlib=655564,
application_x_shockwave_flash=655565,
application_x_snappy_framed=655566,
application_x_sprite=655567,
application_x_sqlite3=655568,
application_x_sv4cpio=655569,
application_x_sv4crc=655570,
application_x_tar=655571 | 0x10000000,
application_x_tbook=655572,
application_x_terminfo=655573,
application_x_terminfo2=655574,
application_x_tex_tfm=655575,
application_x_texinfo=655576,
application_x_ustar=655577,
application_x_visio=655578,
application_x_vnd_audioexplosion_mzz=655579,
application_x_vnd_ls_xpix=655580,
application_x_vrml=655581,
application_x_wais_source=655582,
application_x_wine_extension_ini=655583,
application_x_wintalk=655584,
application_x_world=655585,
application_x_wri=655586,
application_x_x509_ca_cert=655587,
application_x_xz=655588 | 0x08000000,
application_x_zip=655589,
application_x_zstd=655590 | 0x08000000,
application_xml=655591,
application_zip=655592 | 0x10000000,
application_zlib=655593,
audio_it=458986,
audio_make=458987,
audio_mid=458988,
audio_midi=458989,
audio_mp4=458990,
audio_mpeg=458991,
audio_ogg=458992,
audio_s3m=458993,
audio_tsp_audio=458994,
audio_tsplayer=458995,
audio_vnd_qcelp=458996,
audio_voxware=458997,
audio_x_aiff=458998,
audio_x_flac=458999,
audio_x_gsm=459000,
audio_x_hx_aac_adts=459001,
audio_x_jam=459002,
audio_x_liveaudio=459003,
audio_x_m4a=459004,
audio_x_midi=459005,
audio_x_mod=459006,
audio_x_mp4a_latm=459007,
audio_x_mpeg_3=459008,
audio_x_mpequrl=459009,
audio_x_nspaudio=459010,
audio_x_pn_realaudio=459011,
audio_x_psid=459012,
audio_x_realaudio=459013,
audio_x_twinvq=459014,
audio_x_twinvq_plugin=459015,
audio_x_voc=459016,
audio_x_wav=459017,
audio_xm=459018,
font_otf=327947 | 0x20000000,
font_sfnt=327948 | 0x20000000,
font_woff=327949 | 0x20000000,
font_woff2=327950 | 0x20000000,
image_cmu_raster=524559,
image_fif=524560,
image_florian=524561,
image_g3fax=524562,
image_gif=524563,
image_heic=524564,
image_ief=524565,
image_jpeg=524566,
image_jutvision=524567,
image_naplps=524568,
image_pict=524569,
image_png=524570,
image_svg=524571 | 0x80000000,
image_svg_xml=524572 | 0x80000000,
image_tiff=524573,
image_vnd_adobe_photoshop=524574 | 0x80000000,
image_vnd_djvu=524575 | 0x80000000,
image_vnd_fpx=524576,
image_vnd_microsoft_icon=524577,
image_vnd_rn_realflash=524578,
image_vnd_rn_realpix=524579,
image_vnd_wap_wbmp=524580,
image_vnd_xiff=524581,
image_webp=524582,
image_wmf=524583,
image_x_3ds=524584,
image_x_cmu_raster=524585,
image_x_cur=524586,
image_x_dwg=524587,
image_x_eps=524588,
image_x_exr=524589,
image_x_gem=524590,
image_x_icns=524591,
image_x_icon=524592 | 0x80000000,
image_x_jg=524593,
image_x_jps=524594,
image_x_ms_bmp=524595,
image_x_niff=524596,
image_x_pcx=524597,
image_x_pict=524598,
image_x_portable_bitmap=524599,
image_x_portable_graymap=524600,
image_x_portable_pixmap=524601,
image_x_quicktime=524602,
image_x_rgb=524603,
image_x_tga=524604,
image_x_tiff=524605,
image_x_win_bitmap=524606,
image_x_xcf=524607 | 0x80000000,
image_x_xpixmap=524608 | 0x80000000,
image_x_xwindowdump=524609,
message_news=196930,
message_rfc822=196931,
model_vnd_dwf=65860,
model_vnd_gdl=65861,
model_vnd_gs_gdl=65862,
model_vrml=65863,
model_x_pov=65864,
text_PGP=590153,
text_asp=590154,
text_css=590155,
text_html=590156,
text_javascript=590157,
text_mcf=590158,
text_pascal=590159,
text_plain=590160,
text_richtext=590161,
text_rtf=590162,
text_scriplet=590163,
text_tab_separated_values=590164,
text_troff=590165,
text_uri_list=590166,
text_vnd_abc=590167,
text_vnd_fmi_flexstor=590168,
text_vnd_wap_wml=590169,
text_vnd_wap_wmlscript=590170,
text_webviewhtml=590171,
text_x_Algol68=590172,
text_x_asm=590173,
text_x_audiosoft_intra=590174,
text_x_awk=590175,
text_x_bcpl=590176,
text_x_c=590177,
text_x_c__=590178,
text_x_component=590179,
text_x_diff=590180,
text_x_fortran=590181,
text_x_java=590182,
text_x_la_asf=590183,
text_x_lisp=590184,
text_x_m=590185,
text_x_m4=590186,
text_x_makefile=590187,
text_x_ms_regedit=590188,
text_x_msdos_batch=590189,
text_x_objective_c=590190,
text_x_pascal=590191,
text_x_perl=590192,
text_x_php=590193,
text_x_po=590194,
text_x_python=590195,
text_x_ruby=590196,
text_x_sass=590197,
text_x_scss=590198,
text_x_server_parsed_html=590199,
text_x_setext=590200,
text_x_sgml=590201,
text_x_shellscript=590202,
text_x_speech=590203,
text_x_tcl=590204,
text_x_tex=590205,
text_x_uil=590206,
text_x_uuencode=590207,
text_x_vcalendar=590208,
text_x_vcard=590209,
text_xml=590210,
video_MP2T=393603,
video_animaflex=393604,
video_avi=393605,
video_avs_video=393606,
video_mp4=393607,
video_mpeg=393608,
video_quicktime=393609,
video_vdo=393610,
video_vivo=393611,
video_vnd_rn_realvideo=393612,
video_vosaic=393613,
video_webm=393614,
video_x_amt_demorun=393615,
video_x_amt_showrun=393616,
video_x_atomic3d_feature=393617,
video_x_dl=393618,
video_x_dv=393619,
video_x_fli=393620,
video_x_flv=393621,
video_x_isvideo=393622,
video_x_jng=393623 | 0x80000000,
video_x_m4v=393624,
video_x_matroska=393625,
video_x_mng=393626,
video_x_motion_jpeg=393627,
video_x_ms_asf=393628,
video_x_msvideo=393629,
video_x_qtc=393630,
video_x_sgi_movie=393631,
x_epoc_x_sisx_app=721312,
};
char *mime_get_mime_text(unsigned int mime_id) {switch (mime_id) {
case application_arj: return "application/arj";
@@ -832,6 +836,10 @@ case audio_x_hx_aac_adts: return "audio/x-hx-aac-adts";
case application_x_chrome_extension: return "application/x-chrome-extension";
case image_heic: return "image/heic";
case image_x_gem: return "image/x-gem";
case application_x_lzma: return "application/x-lzma";
case application_warc: return "application/warc";
case application_x_lzip: return "application/x-lzip";
case application_x_lzop: return "application/x-lzop";
default: return NULL;}}
GHashTable *mime_get_ext_table() {GHashTable *ext_table = g_hash_table_new(g_str_hash, g_str_equal);
g_hash_table_insert(ext_table, "arj", (gpointer)application_arj);
@@ -1182,6 +1190,9 @@ g_hash_table_insert(ext_table, "d", (gpointer)text_plain);
g_hash_table_insert(ext_table, "cs", (gpointer)text_plain);
g_hash_table_insert(ext_table, "hpp", (gpointer)text_plain);
g_hash_table_insert(ext_table, "srt", (gpointer)text_plain);
g_hash_table_insert(ext_table, "nfo", (gpointer)text_plain);
g_hash_table_insert(ext_table, "sfv", (gpointer)text_plain);
g_hash_table_insert(ext_table, "m3u", (gpointer)text_plain);
g_hash_table_insert(ext_table, "rt", (gpointer)text_richtext);
g_hash_table_insert(ext_table, "rtf", (gpointer)text_richtext);
g_hash_table_insert(ext_table, "rtx", (gpointer)text_richtext);
@@ -1334,6 +1345,10 @@ g_hash_table_insert(ext_table, "z", (gpointer)application_zlib);
g_hash_table_insert(ext_table, "pf2", (gpointer)application_x_font_pf2);
g_hash_table_insert(ext_table, "jmod", (gpointer)application_x_java_jmod);
g_hash_table_insert(ext_table, "heic", (gpointer)image_heic);
g_hash_table_insert(ext_table, "lzma", (gpointer)application_x_lzma);
g_hash_table_insert(ext_table, "warc", (gpointer)application_warc);
g_hash_table_insert(ext_table, "lz", (gpointer)application_x_lzip);
g_hash_table_insert(ext_table, "lzo", (gpointer)application_x_lzop);
return ext_table;}
GHashTable *mime_get_mime_table() {GHashTable *mime_table = g_hash_table_new(g_str_hash, g_str_equal);
g_hash_table_insert(mime_table, "application/arj", (gpointer)application_arj);
@@ -1748,5 +1763,9 @@ g_hash_table_insert(mime_table, "audio/x-hx-aac-adts", (gpointer)audio_x_hx_aac_
g_hash_table_insert(mime_table, "application/x-chrome-extension", (gpointer)application_x_chrome_extension);
g_hash_table_insert(mime_table, "image/heic", (gpointer)image_heic);
g_hash_table_insert(mime_table, "image/x-gem", (gpointer)image_x_gem);
g_hash_table_insert(mime_table, "application/x-lzma", (gpointer)application_x_lzma);
g_hash_table_insert(mime_table, "application/warc", (gpointer)application_warc);
g_hash_table_insert(mime_table, "application/x-lzip", (gpointer)application_x_lzip);
g_hash_table_insert(mime_table, "application/x-lzop", (gpointer)application_x_lzop);
return mime_table;}
#endif

View File

@@ -1,9 +1,32 @@
#include <src/ctx.h>
#include "src/sist.h"
#include "src/ctx.h"
__thread magic_t Magic = NULL;
void *read_all(parse_job_t *job, const char *buf, int bytes_read, int *fd) {
int fs_read(struct vfile *f, void *buf, size_t size) {
if (f->fd == -1) {
f->fd = open(f->filepath, O_RDONLY);
if (f->fd == -1) {
perror("open");
printf("%s\n", f->filepath);
return -1;
}
}
return read(f->fd, buf, size);
}
#define CLOSE_FILE(f) if (f.close != NULL) {f.close(&f);};
void fs_close(struct vfile *f) {
if (f->fd != -1) {
close(f->fd);
}
}
void *read_all(parse_job_t *job, const char *buf, int bytes_read) {
void *full_buf;
@@ -11,17 +34,10 @@ void *read_all(parse_job_t *job, const char *buf, int bytes_read, int *fd) {
full_buf = malloc(job->info.st_size);
memcpy(full_buf, buf, job->info.st_size);
} else {
if (*fd == -1) {
*fd = open(job->filepath, O_RDONLY);
if (*fd == -1) {
perror("open");
printf("%s\n", job->filepath);
return NULL;
}
}
full_buf = malloc(job->info.st_size);
memcpy(full_buf, buf, bytes_read);
int ret = read(*fd, full_buf + bytes_read, job->info.st_size - bytes_read);
int ret = job->vfile.read(&job->vfile, full_buf + bytes_read, job->info.st_size - bytes_read);
if (ret == -1) {
perror("read");
return NULL;
@@ -36,15 +52,14 @@ void parse(void *arg) {
parse_job_t *job = arg;
document_t doc;
if (incremental_get(ScanCtx.original_table, job->info.st_ino) == job->info.st_mtim.tv_sec) {
int inc_ts = incremental_get(ScanCtx.original_table, job->info.st_ino);
if (inc_ts != 0 && inc_ts == job->info.st_mtim.tv_sec) {
incremental_mark_file_for_copy(ScanCtx.copy_table, job->info.st_ino);
free(job);
return;
}
if (Magic == NULL) {
Magic = magic_open(MAGIC_MIME_TYPE);
magic_load(Magic, NULL);
}
doc.filepath = job->filepath;
@@ -66,24 +81,13 @@ void parse(void *arg) {
doc.mime = mime_get_mime_by_ext(ScanCtx.ext_table, job->filepath + job->ext);
}
int fd = -1;
int bytes_read = 0;
if (doc.mime == 0) {
// Get mime type with libmagic
fd = open(job->filepath, O_RDONLY);
if (fd == -1) {
perror("open");
free(job);
return;
}
bytes_read = read(fd, buf, PARSE_BUF_SIZE);
bytes_read = job->vfile.read(&job->vfile, buf, PARSE_BUF_SIZE);
if (bytes_read == -1) {
perror("read");
close(fd);
free(job);
CLOSE_FILE(job->vfile)
return;
}
@@ -101,11 +105,16 @@ void parse(void *arg) {
if (!(SHOULD_PARSE(doc.mime))) {
} else if ((mmime == MimeVideo && doc.size >= MIN_VIDEO_SIZE) ||
(mmime == MimeImage && doc.size >= MIN_IMAGE_SIZE) || mmime == MimeAudio) {
parse_media(job->filepath, &doc);
(mmime == MimeImage && doc.size >= MIN_IMAGE_SIZE) || mmime == MimeAudio) {
if (job->vfile.is_fs_file) {
parse_media_filename(job->filepath, &doc);
} else {
parse_media_vfile(&job->vfile, &doc);
}
} else if (IS_PDF(doc.mime)) {
void *pdf_buf = read_all(job, (char *) buf, bytes_read, &fd);
void *pdf_buf = read_all(job, (char *) buf, bytes_read);
parse_pdf(pdf_buf, doc.size, &doc);
if (pdf_buf != buf && pdf_buf != NULL) {
@@ -113,22 +122,35 @@ void parse(void *arg) {
}
} else if (mmime == MimeText && ScanCtx.content_size > 0) {
parse_text(bytes_read, &fd, (char *) buf, &doc);
parse_text(bytes_read, &job->vfile, (char *) buf, &doc);
} else if (IS_FONT(doc.mime)) {
void *font_buf = read_all(job, (char *) buf, bytes_read, &fd);
void *font_buf = read_all(job, (char *) buf, bytes_read);
parse_font(font_buf, doc.size, &doc);
if (font_buf != buf && font_buf != NULL) {
free(font_buf);
}
} else if (
ScanCtx.archive_mode != ARC_MODE_SKIP && (
IS_ARC(doc.mime) ||
(IS_ARC_FILTER(doc.mime) && should_parse_filtered_file(doc.filepath, doc.ext))
)) {
parse_archive(&job->vfile, &doc);
}
//Parent meta
if (!uuid_is_null(job->parent)) {
char tmp[UUID_STR_LEN];
uuid_unparse(job->parent, tmp);
meta_line_t *meta_parent = malloc(sizeof(meta_line_t) + UUID_STR_LEN + 1);
meta_parent->key = MetaParent;
strcpy(meta_parent->strval, tmp);
APPEND_META((&doc), meta_parent)
}
write_document(&doc);
if (fd != -1) {
close(fd);
}
free(job);
CLOSE_FILE(job->vfile)
}

View File

@@ -5,6 +5,9 @@
#define PARSE_BUF_SIZE 4096
int fs_read(struct vfile *f, void *buf, size_t size);
void fs_close(struct vfile *f);
void parse(void *arg);
#endif

View File

@@ -177,7 +177,17 @@ void parse_pdf(void *buf, size_t buf_len, document_t *doc) {
return;
}
fz_page *cover = render_cover(ctx, doc, fzdoc);
fz_page *cover = NULL;
if (ScanCtx.tn_size > 0) {
cover = render_cover(ctx, doc, fzdoc);
} else {
fz_var(cover);
fz_try(ctx)
cover = fz_load_page(ctx, fzdoc, 0);
fz_catch(ctx)
cover = NULL;
}
if (cover == NULL) {
fz_drop_stream(ctx, stream);
fz_drop_document(ctx, fzdoc);
@@ -185,79 +195,81 @@ void parse_pdf(void *buf, size_t buf_len, document_t *doc) {
return;
}
fz_stext_options opts = {0};
text_buffer_t text_buf = text_buffer_create(ScanCtx.content_size);
if (ScanCtx.content_size > 0) {
fz_stext_options opts = {0};
text_buffer_t text_buf = text_buffer_create(ScanCtx.content_size);
for (int current_page = 0; current_page < page_count; current_page++) {
fz_page *page = NULL;
if (current_page == 0) {
page = cover;
} else {
fz_var(err);
fz_try(ctx)
page = fz_load_page(ctx, fzdoc, current_page);
fz_catch(ctx)
err = ctx->error.errcode;
if (err != 0) {
text_buffer_destroy(&text_buf);
fz_drop_page(ctx, page);
fz_drop_stream(ctx, stream);
fz_drop_document(ctx, fzdoc);
fz_drop_context(ctx);
return;
}
}
fz_stext_page *stext = fz_new_stext_page(ctx, fz_bound_page(ctx, page));
fz_device *dev = fz_new_stext_device(ctx, stext, &opts);
for (int current_page = 0; current_page < page_count; current_page++) {
fz_page *page = NULL;
if (current_page == 0) {
page = cover;
} else {
fz_var(err);
fz_try(ctx)
page = fz_load_page(ctx, fzdoc, current_page);
fz_run_page(ctx, page, dev, fz_identity, NULL);
fz_always(ctx)
{
fz_close_device(ctx, dev);
fz_drop_device(ctx, dev);
}
fz_catch(ctx)
err = ctx->error.errcode;
if (err != 0) {
text_buffer_destroy(&text_buf);
fz_drop_page(ctx, page);
fz_drop_stext_page(ctx, stext);
fz_drop_stream(ctx, stream);
fz_drop_document(ctx, fzdoc);
fz_drop_context(ctx);
return;
}
}
fz_stext_page *stext = fz_new_stext_page(ctx, fz_bound_page(ctx, page));
fz_device *dev = fz_new_stext_device(ctx, stext, &opts);
fz_var(err);
fz_try(ctx)
fz_run_page(ctx, page, dev, fz_identity, NULL);
fz_always(ctx)
{
fz_close_device(ctx, dev);
fz_drop_device(ctx, dev);
}
fz_catch(ctx)
err = ctx->error.errcode;
if (err != 0) {
text_buffer_destroy(&text_buf);
fz_drop_page(ctx, page);
fz_stext_block *block = stext->first_block;
while (block != NULL) {
int ret = read_stext_block(block, &text_buf);
if (ret == TEXT_BUF_FULL) {
break;
}
block = block->next;
}
fz_drop_stext_page(ctx, stext);
fz_drop_stream(ctx, stream);
fz_drop_document(ctx, fzdoc);
fz_drop_context(ctx);
return;
}
fz_drop_page(ctx, page);
fz_stext_block *block = stext->first_block;
while (block != NULL) {
int ret = read_stext_block(block, &text_buf);
if (ret == TEXT_BUF_FULL) {
if (text_buf.dyn_buffer.cur >= text_buf.dyn_buffer.size) {
break;
}
block = block->next;
}
fz_drop_stext_page(ctx, stext);
fz_drop_page(ctx, page);
text_buffer_terminate_string(&text_buf);
if (text_buf.dyn_buffer.cur >= text_buf.dyn_buffer.size) {
break;
}
meta_line_t *meta_content = malloc(sizeof(meta_line_t) + text_buf.dyn_buffer.cur);
meta_content->key = MetaContent;
memcpy(meta_content->strval, text_buf.dyn_buffer.buf, text_buf.dyn_buffer.cur);
APPEND_META(doc, meta_content)
text_buffer_destroy(&text_buf);
}
text_buffer_terminate_string(&text_buf);
meta_line_t *meta_content = malloc(sizeof(meta_line_t) + text_buf.dyn_buffer.cur);
meta_content->key = MetaContent;
memcpy(meta_content->strval, text_buf.dyn_buffer.buf, text_buf.dyn_buffer.cur);
APPEND_META(doc, meta_content)
fz_drop_stream(ctx, stream);
fz_drop_document(ctx, fzdoc);
fz_drop_context(ctx);
text_buffer_destroy(&text_buf);
}

View File

@@ -1,7 +1,7 @@
#include "text.h"
#include "src/ctx.h"
void parse_text(int bytes_read, int *fd, char *buf, document_t *doc) {
void parse_text(int bytes_read, struct vfile *f, char *buf, document_t *doc) {
char *intermediate_buf;
int intermediate_buf_len;
@@ -13,10 +13,6 @@ void parse_text(int bytes_read, int *fd, char *buf, document_t *doc) {
memcpy(intermediate_buf, buf, to_copy);
} else {
if (*fd == -1) {
*fd = open(doc->filepath, O_RDONLY);
}
int to_read = MIN(ScanCtx.content_size, doc->size) - bytes_read;
intermediate_buf = malloc(to_read + bytes_read);
@@ -25,7 +21,7 @@ void parse_text(int bytes_read, int *fd, char *buf, document_t *doc) {
memcpy(intermediate_buf, buf, bytes_read);
}
read(*fd, intermediate_buf + bytes_read, to_read);
f->read(f, intermediate_buf + bytes_read, to_read);
}
text_buffer_t tex = text_buffer_create(ScanCtx.content_size);
text_buffer_append_string(&tex, intermediate_buf, intermediate_buf_len);

View File

@@ -3,6 +3,6 @@
#include "src/sist.h"
void parse_text(int bytes_read, int *fd, char *buf, document_t *doc);
void parse_text(int bytes_read, struct vfile *f, char *buf, document_t *doc);
#endif

View File

@@ -26,12 +26,17 @@
#include <pthread.h>
#include <sys/stat.h>
#include <wordexp.h>
#include "ft2build.h"
#include "freetype/freetype.h"
#include <archive.h>
#include <archive_entry.h>
#ifndef SIST_SCAN_ONLY
#include <onion/onion.h>
#include <onion/handler.h>
#include <onion/block.h>
#include <onion/shortcuts.h>
#include <onion/codecs.h>
#include <curl/curl.h>
#endif
@@ -49,6 +54,7 @@
#include "parsing/pdf.h"
#include "parsing/media.h"
#include "parsing/font.h"
#include "parsing/arc.h"
#include "cli.h"
#include "utf8.h/utf8.h"
@@ -56,6 +62,7 @@
#include "src/index/elastic.h"
#include "index/web.h"
#include "web/serve.h"
#include "web/auth_basic.h"
#endif
;

View File

@@ -114,12 +114,19 @@ static void *tpool_worker(void *arg) {
pthread_mutex_unlock(&(pool->work_mutex));
if (work != NULL) {
if (pool->stop) {
break;
}
work->func(work->arg);
free(work->arg);
free(work);
}
pthread_mutex_lock(&(pool->work_mutex));
pool->done_cnt++;
if (work != NULL) {
pool->done_cnt++;
}
progress_bar_print((double) pool->done_cnt / pool->work_cnt, ScanCtx.stat_tn_size, ScanCtx.stat_index_size);
@@ -142,11 +149,15 @@ void tpool_wait(tpool_t *pool) {
if (pool->done_cnt < pool->work_cnt) {
pthread_cond_wait(&(pool->working_cond), &(pool->work_mutex));
} else {
pool->stop = 1;
break;
usleep(500000);
if (pool->done_cnt == pool->work_cnt) {
pool->stop = 1;
usleep(1000000);
break;
}
}
progress_bar_print(100.0, ScanCtx.stat_tn_size, ScanCtx.stat_index_size);
}
progress_bar_print(1.0, ScanCtx.stat_tn_size, ScanCtx.stat_index_size);
pthread_mutex_unlock(&(pool->work_mutex));
}
@@ -169,7 +180,8 @@ void tpool_destroy(tpool_t *pool) {
for (size_t i = 0; i < pool->thread_cnt; i++) {
pthread_t thread = pool->threads[i];
if (thread != 0) {
pthread_cancel(thread);
void *_;
pthread_join(thread, &_);
}
}
@@ -209,8 +221,6 @@ tpool_t *tpool_create(size_t thread_cnt, void cleanup_func()) {
void tpool_start(tpool_t *pool) {
for (size_t i = 0; i < pool->thread_cnt; i++) {
pthread_t thread = pool->threads[i];
pthread_create(&thread, NULL, tpool_worker, pool);
pthread_detach(thread);
pthread_create(&pool->threads[i], NULL, tpool_worker, pool);
}
}

View File

@@ -9,6 +9,12 @@
#define IS_META_LONG(key) (key & META_LONG_MASK) == META_LONG_MASK
#define IS_META_STR(meta) (meta->key & META_STR_MASK) == META_STR_MASK
#define ARC_MODE_SKIP 0
#define ARC_MODE_LIST 1
#define ARC_MODE_SHALLOW 2
#define ARC_MODE_RECURSE 3
typedef int archive_mode_t;
// This is written to file as a 8bit char!
enum metakey {
MetaContent = 1 | META_STR_MASK,
@@ -24,6 +30,7 @@ enum metakey {
MetaGenre = 11 | META_STR_MASK,
MetaTitle = 12 | META_STR_MASK,
MetaFontName = 13 | META_STR_MASK,
MetaParent = 14 | META_STR_MASK,
};
typedef struct index_descriptor {
@@ -63,13 +70,39 @@ typedef struct document {
short ext;
meta_line_t *meta_head;
meta_line_t *meta_tail;
struct document *child_head;
struct document *child_tail;
char *filepath;
} document_t;
typedef struct vfile vfile_t;
typedef int (*read_func_t)(struct vfile *, void *buf, size_t size);
typedef int (*seek_func_t)(struct vfile *, size_t size, int whence);
typedef void (*close_func_t)(struct vfile *);
typedef struct vfile {
union {
int fd;
struct archive *arc;
};
int is_fs_file;
char *filepath;
read_func_t read;
close_func_t close;
} vfile_t;
typedef struct parse_job_t {
int base;
int ext;
struct stat info;
struct vfile vfile;
uuid_t parent;
char filepath[1];
} parse_job_t;

View File

@@ -46,6 +46,10 @@ void dyn_buffer_write_str(dyn_buffer_t *buf, char *str) {
dyn_buffer_write_char(buf, '\0');
}
void dyn_buffer_append_string(dyn_buffer_t *buf, char *str) {
dyn_buffer_write(buf, str, strlen(str));
}
void dyn_buffer_write_int(dyn_buffer_t *buf, int d) {
grow_buffer_small(buf);

View File

@@ -47,6 +47,8 @@ void dyn_buffer_write_char(dyn_buffer_t *buf, char c);
void dyn_buffer_write_str(dyn_buffer_t *buf, char *str);
void dyn_buffer_append_string(dyn_buffer_t *buf, char *str);
void dyn_buffer_write_int(dyn_buffer_t *buf, int d);
void dyn_buffer_write_short(dyn_buffer_t *buf, short s);

59
src/web/auth_basic.c Normal file
View File

@@ -0,0 +1,59 @@
#include "auth_basic.h"
#define UNAUTHORIZED_TEXT "Unauthorized"
typedef struct auth_basic_data {
onion_handler *inside;
const char *b64credentials;
} auth_basic_data_t;
int authenticate(const char *expected, const char *credentials) {
if (expected == NULL) {
return TRUE;
}
if (credentials && strncmp(credentials, "Basic ", 6) == 0) {
if (strcmp((credentials + 6), expected) == 0) {
return TRUE;
}
}
return FALSE;
}
int auth_basic_handler(auth_basic_data_t *d,
onion_request *req,
onion_response *res) {
const char *credentials = onion_request_get_header(req, "Authorization");
if (authenticate(d->b64credentials, credentials)) {
return onion_handler_handle(d->inside, req, res);
}
onion_response_set_header(res, "WWW-Authenticate", "Basic realm=\"sist2\"");
onion_response_set_code(res, HTTP_UNAUTHORIZED);
onion_response_write(res, UNAUTHORIZED_TEXT, sizeof(UNAUTHORIZED_TEXT));
onion_response_set_length(res, sizeof(UNAUTHORIZED_TEXT));
return OCS_PROCESSED;
}
void auth_basic_free(auth_basic_data_t *data) {
onion_handler_free(data->inside);
free(data);
}
onion_handler *auth_basic(const char *b64credentials, onion_handler *inside_level) {
auth_basic_data_t *privdata = malloc(sizeof(auth_basic_data_t));
privdata->b64credentials = b64credentials;
privdata->inside = inside_level;
return onion_handler_new((onion_handler_handler) auth_basic_handler, privdata,
(onion_handler_private_data_free) auth_basic_free);
}

4
src/web/auth_basic.h Normal file
View File

@@ -0,0 +1,4 @@
#include "src/sist.h"
onion_handler *auth_basic(const char *b64credentials, onion_handler *inside_level);

View File

@@ -245,6 +245,8 @@ int search(void *p, onion_request *req, onion_response *res) {
if (r->status_code == 200) {
onion_response_write(res, r->body, r->size);
} else {
onion_response_set_code(res, HTTP_INTERNAL_ERROR);
}
free_response(r);
@@ -358,12 +360,24 @@ int file(void *p, onion_request *req, onion_response *res) {
return OCS_PROCESSED;
}
cJSON *doc = elastic_get_document(arg_uuid);
cJSON *source = cJSON_GetObjectItem(doc, "_source");
cJSON *index_id = cJSON_GetObjectItem(source, "index");
if (index_id == NULL) {
cJSON_Delete(doc);
return OCS_NOT_PROCESSED;
char *next = arg_uuid;
cJSON *doc = NULL;
cJSON *index_id = NULL;
cJSON *source = NULL;
while (true) {
doc = elastic_get_document(next);
source = cJSON_GetObjectItem(doc, "_source");
index_id = cJSON_GetObjectItem(source, "index");
if (index_id == NULL) {
cJSON_Delete(doc);
return OCS_NOT_PROCESSED;
}
cJSON *parent = cJSON_GetObjectItem(source, "parent");
if (parent == NULL) {
break;
}
next = parent->valuestring;
}
index_t *idx = get_index_by_id(index_id->valuestring);
@@ -391,9 +405,11 @@ void serve(const char *hostname, const char *port) {
onion_set_hostname(o, hostname);
onion_set_port(o, port);
onion_url *urls = onion_root_url(o);
onion_url *urls = onion_url_new();
// Static paths
onion_set_root_handler(o, auth_basic(WebCtx.b64credentials, onion_url_to_handler(urls)));
onion_url_add(urls, "", search_index);
onion_url_add(urls, "css", style);
onion_url_add(urls, "js", javascript);
@@ -410,6 +426,7 @@ void serve(const char *hostname, const char *port) {
onion_url_add(urls, "^f/([a-fA-F0-9]{8}-[a-fA-F0-9]{4}-[a-fA-F0-9]{4}-[a-fA-F0-9]{4}-[a-fA-F0-9]{12})$", file);
onion_url_add(urls, "i", index_info);
printf("Starting web server @ http://%s:%s\n", hostname, port);
onion_listen(o);

File diff suppressed because one or more lines are too long

View File

@@ -1,3 +1,7 @@
*:focus {
outline: 0;
}
a {
color: #00BCD4;
}
@@ -19,6 +23,29 @@ body {
border: none;
}
.sub-document {
background: #37474F;
}
.sub-document .text-muted {
color: #8a949c !important;
}
.list-group-item {
background: #212121;
color: #e0e0e0;
border-top: 1px solid #424242;
border-bottom: none;
border-left: none;
border-right: none;
}
.list-group-item:first-child {
border-top: none;
}
.navbar-brand {
font-size: 1.75rem;
padding: 0;
@@ -89,12 +116,18 @@ body {
white-space: nowrap;
text-overflow: ellipsis;
overflow: hidden;
color: #00BCD4;
}
.badge {
margin-right: 3px;
}
.badge-user {
color: #212529;
background-color: #e0e0e0;
}
.fit {
display: block;
min-width: 64px;
@@ -106,6 +139,15 @@ body {
height: auto;
}
.fit-sm {
display: block;
max-width: 64px;
max-height: 64px;
margin: 0 auto 0;
width: auto;
height: auto;
}
.audio-fit {
height: 39px;
vertical-align: bottom;
@@ -149,6 +191,8 @@ mark {
border: 1px solid #616161;
border-radius: 4px;
margin: 3px;
white-space: normal;
color: rgb(224, 224, 224);
}
.irs-single, .irs-from, .irs-to {
@@ -164,6 +208,7 @@ mark {
margin-top: 1em;
margin-bottom: 1em;
}
.custom-select {
overflow: auto;
background-color: #37474F;
@@ -229,6 +274,7 @@ option {
padding: 0.5rem;
background: #212121;
color: #eee;
margin-top: 1em;
}
.btn-xs {
@@ -239,4 +285,76 @@ option {
.btn {
color: #eee;
}
}
.nav-tabs .nav-link {
color: #e0e0e0;
}
.nav-tabs .nav-item.show .nav-link, .nav-tabs .nav-link.active {
background-color: #212121;
border-color: #616161 #616161 #212121;
color: #e0e0e0;
}
.nav-tabs .nav-link:focus, .nav-tabs .nav-link:focus {
border-color: #616161 #616161 #212121;
color: #e0e0e0;
}
.nav-tabs .nav-link:focus, .nav-tabs .nav-link:hover {
border-color: #e0e0e0 #e0e0e0 #212121;
color: #e0e0e0;
}
.nav-tabs {
border-bottom: #616161;
}
.nav {
margin-top: 0.5rem;
}
@media (max-width: 800px) {
#treeTabs {
flex-basis: inherit;
flex-grow: inherit;
}
}
.list-group {
margin-top: 1em;
}
.list-group-item {
padding: .25rem 0.5rem;
}
.wrapper-sm {
min-width: 64px;
}
.media-expanded {
display: inherit;
}
.media-expanded .fit {
max-height: 250px;
}
@media (max-width: 600px) {
.media-expanded .fit {
max-height: none;
}
.tagline {
display: none;
}
}
.version {
color: #00BCD4;
margin-left: -18px;
margin-top: -14px;
font-size: 11px;
}

View File

@@ -1,4 +1,10 @@
body {overflow-y:scroll;}
*:focus {
outline: 0;
}
body {
overflow-y: scroll;
}
.progress {
margin-top: 1em;
@@ -6,15 +12,23 @@ body {overflow-y:scroll;}
.card {
margin-top: 1em;
box-shadow: 0 .125rem .25rem rgba(0,0,0,.075) !important;
box-shadow: 0 .125rem .25rem rgba(0, 0, 0, .075) !important;
}
.sub-document {
background: #AB47BC1F;
}
.navbar-brand {
font-size: 1.75rem;
padding: 0;
}
.navbar {
background: #F7F7F7; border-bottom: solid 1px #dfdfdf;
background: #F7F7F7;
border-bottom: solid 1px #dfdfdf;
}
.document {
padding: 0.5rem;
}
@@ -47,6 +61,11 @@ body {overflow-y:scroll;}
background-color: #FFC107;
}
.badge-user {
color: #212529;
background-color: #e0e0e0;
}
.badge-text {
color: #FFFFFF;
background-color: #FAAB3C;
@@ -84,6 +103,15 @@ body {overflow-y:scroll;}
height: auto;
}
.fit-sm {
display: block;
max-width: 64px;
max-height: 64px;
margin: 0 auto 0;
width: auto;
height: auto;
}
.audio-fit {
height: 39px;
vertical-align: bottom;
@@ -98,16 +126,17 @@ body {overflow-y:scroll;}
}
@media (min-width: 1500px) {
.container {
.container {
max-width: 1440px;
}
.card-columns {
column-count: 5;
}
}
@media (min-width: 1800px) {
.container {
.container {
max-width: 1550px;
}
}
@@ -119,13 +148,15 @@ mark {
}
.content-div {
font-family: SFMono-Regular,Menlo,Monaco,Consolas,"Liberation Mono","Courier New",monospace;
font-family: SFMono-Regular, Menlo, Monaco, Consolas, "Liberation Mono", "Courier New", monospace;
font-size: 13px;
padding: 1em;
background-color: #f5f5f5;
border: 1px solid #ccc;
border-radius: 4px;
margin: 3px;
white-space: normal;
color: #000;
}
.irs-single, .irs-from, .irs-to {
@@ -145,8 +176,7 @@ mark {
margin-bottom: 1em;
}
.inspire-tree .selected > .wholerow, .inspire-tree .selected > .title-wrap:hover + .wholerow
{
.inspire-tree .selected > .wholerow, .inspire-tree .selected > .title-wrap:hover + .wholerow {
background: none;
}
@@ -162,10 +192,59 @@ mark {
line-height: 1rem;
padding: 0.5rem;
background: #f8f9fa;
margin-top: 1em;
}
.btn-xs {
padding: .1rem .3rem;
font-size: .875rem;
border-radius: .2rem;
}
}
.nav {
margin-top: 0.5rem;
}
@media (max-width: 800px) {
#treeTabs {
flex-basis: inherit;
flex-grow: inherit;
}
}
.list-group {
margin-top: 1em;
}
.list-group-item {
padding: .25rem 0.5rem;
}
.wrapper-sm {
min-width: 64px;
}
.media-expanded {
display: inherit;
}
.media-expanded .fit {
max-height: 250px;
}
@media (max-width: 600px) {
.media-expanded .fit {
max-height: none;
}
.tagline {
display: none;
}
}
.version {
color: #007bff;
margin-left: -18px;
margin-top: -14px;
font-size: 11px;
}

View File

@@ -75,6 +75,84 @@ function shouldPlayVideo(hit) {
return videoc !== "hevc" && videoc !== "mpeg2video" && videoc !== "wmv3";
}
function makePlaceholder(w, h, small) {
let calc;
if (small) {
calc = w > h
? (64 / w / h) >= 100
? (64 * w / h)
: 64
: 64;
} else {
calc = w > h
? (175 / w / h) >= 272
? (175 * w / h)
: 175
: 175;
}
const el = document.createElement("div");
el.setAttribute("style", `height: ${calc}px`);
return el;
}
function makeTitle(hit) {
let title = document.createElement("div");
title.setAttribute("class", "file-title");
let extension = hit["_source"].hasOwnProperty("extension") && hit["_source"]["extension"] !== "" ? "." + hit["_source"]["extension"] : "";
applyNameToTitle(hit, title, extension);
title.setAttribute("title", hit["_source"]["path"] + "/" + hit["_source"]["name"] + extension);
return title;
}
function getTags(hit, mimeCategory) {
let tags = [];
switch (mimeCategory) {
case "video":
case "image":
if (hit["_source"].hasOwnProperty("videoc")) {
const formatTag = document.createElement("span");
formatTag.setAttribute("class", "badge badge-pill badge-video");
formatTag.appendChild(document.createTextNode(hit["_source"]["videoc"].replace(" ", "")));
tags.push(formatTag);
}
break;
case "audio": {
if (hit["_source"].hasOwnProperty("audioc")) {
let formatTag = document.createElement("span");
formatTag.setAttribute("class", "badge badge-pill badge-audio");
formatTag.appendChild(document.createTextNode(hit["_source"]["audioc"]));
tags.push(formatTag);
}
}
break;
}
// User tags
if (hit["_source"].hasOwnProperty("tag")) {
hit["_source"]["tag"].forEach(tag => {
const userTag = document.createElement("span");
userTag.setAttribute("class", "badge badge-pill badge-user");
const tokens = tag.split("#");
if (tokens.length > 1) {
const bg = "#" + tokens[1];
const fg = lum(tokens[1]) > 40 ? "#000" : "#fff";
userTag.setAttribute("style", `background-color: ${bg}; color: ${fg}`);
}
const name = tokens[0].split(".")[tokens[0].split(".").length - 1];
userTag.appendChild(document.createTextNode(name));
tags.push(userTag);
})
}
return tags
}
/**
*
* @param hit
@@ -87,27 +165,25 @@ function createDocCard(hit) {
let docCardBody = document.createElement("div");
docCardBody.setAttribute("class", "card-body document");
//Title
let title = makeTitle(hit);
let isSubDocument = false;
let link = document.createElement("a");
link.setAttribute("href", "f/" + hit["_id"]);
link.setAttribute("target", "_blank");
link.appendChild(title);
//Title
let title = document.createElement("p");
title.setAttribute("class", "file-title");
let extension = hit["_source"].hasOwnProperty("extension") && hit["_source"]["extension"] !== "" ? "." + hit["_source"]["extension"] : "";
applyNameToTitle(hit, title, extension);
title.setAttribute("title", hit["_source"]["path"] + "/" + hit["_source"]["name"] + extension);
docCard.appendChild(title);
if (hit["_source"].hasOwnProperty("parent")) {
docCard.classList.add("sub-document");
isSubDocument = true;
}
let tagContainer = document.createElement("div");
tagContainer.setAttribute("class", "card-text");
if (hit["_source"].hasOwnProperty("mime") && hit["_source"]["mime"] !== null) {
let tags = [];
let thumbnail = null;
let thumbnailOverlay = null;
let imgWrapper = document.createElement("div");
imgWrapper.setAttribute("style", "position: relative");
@@ -115,31 +191,7 @@ function createDocCard(hit) {
let mimeCategory = hit["_source"]["mime"].split("/")[0];
//Thumbnail
if (mimeCategory === "video" && shouldPlayVideo(hit)) {
thumbnail = document.createElement("video");
addVidSrc("f/" + hit["_id"], hit["_source"]["mime"], thumbnail);
thumbnail.setAttribute("class", "fit");
thumbnail.setAttribute("loop", "");
thumbnail.setAttribute("controls", "");
thumbnail.setAttribute("preload", "none");
thumbnail.setAttribute("poster", `t/${hit["_source"]["index"]}/${hit["_id"]}`);
thumbnail.addEventListener("dblclick", function () {
thumbnail.webkitRequestFullScreen();
});
} else if ((hit["_source"].hasOwnProperty("width") && hit["_source"]["width"] > 20 && hit["_source"]["height"] > 20)
|| hit["_source"]["mime"] === "application/pdf"
|| hit["_source"]["mime"] === "application/epub+zip"
|| hit["_source"]["mime"] === "application/x-cbz"
|| hit["_source"].hasOwnProperty("font_name")
) {
thumbnail = document.createElement("img");
thumbnail.setAttribute("class", "card-img-top fit");
thumbnail.setAttribute("src", `t/${hit["_source"]["index"]}/${hit["_id"]}`);
thumbnail.addEventListener("error", () => {
imgWrapper.remove();
});
}
let thumbnail = makeThumbnail(mimeCategory, hit, imgWrapper, false);
//Thumbnail overlay
switch (mimeCategory) {
@@ -149,15 +201,17 @@ function createDocCard(hit) {
thumbnailOverlay.setAttribute("class", "card-img-overlay");
//Resolution
let resolutionBadge = document.createElement("span");
resolutionBadge.setAttribute("class", "badge badge-resolution");
if (hit["_source"].hasOwnProperty("width")) {
resolutionBadge.appendChild(document.createTextNode(hit["_source"]["width"] + "x" + hit["_source"]["height"]));
if (hit["_source"].hasOwnProperty("width") && hit["_source"]["width"] > 32 && hit["_source"]["height"] > 32) {
let resolutionBadge = document.createElement("span");
resolutionBadge.setAttribute("class", "badge badge-resolution");
if (hit["_source"].hasOwnProperty("width")) {
resolutionBadge.appendChild(document.createTextNode(hit["_source"]["width"] + "x" + hit["_source"]["height"]));
}
thumbnailOverlay.appendChild(resolutionBadge);
}
thumbnailOverlay.appendChild(resolutionBadge);
// Hover
if (thumbnail && hit["_source"]["videoc"] === "gif") {
if (thumbnail && hit["_source"]["videoc"] === "gif" && !isSubDocument) {
gifOver(thumbnail, hit);
}
break;
@@ -167,51 +221,34 @@ function createDocCard(hit) {
if (hit["_source"].hasOwnProperty("duration")) {
thumbnailOverlay = document.createElement("div");
thumbnailOverlay.setAttribute("class", "card-img-overlay");
let durationBadge = document.createElement("span");
const durationBadge = document.createElement("span");
durationBadge.setAttribute("class", "badge badge-resolution");
durationBadge.appendChild(document.createTextNode(humanTime(hit["_source"]["duration"])));
thumbnailOverlay.appendChild(durationBadge);
}
}
//Tags
switch (mimeCategory) {
case "video":
case "image":
if (hit["_source"].hasOwnProperty("videoc")) {
let formatTag = document.createElement("span");
formatTag.setAttribute("class", "badge badge-pill badge-video");
formatTag.appendChild(document.createTextNode(hit["_source"]["videoc"].replace(" ", "")));
tags.push(formatTag);
}
break;
case "audio": {
if (hit["_source"].hasOwnProperty("audioc")) {
let formatTag = document.createElement("span");
formatTag.setAttribute("class", "badge badge-pill badge-audio");
formatTag.appendChild(document.createTextNode(hit["_source"]["audioc"]));
tags.push(formatTag);
}
}
break;
// Tags
let tags = getTags(hit, mimeCategory);
for (let i = 0; i < tags.length; i++) {
tagContainer.appendChild(tags[i]);
}
//Content
let contentHl = getContentHighlight(hit);
if (contentHl !== undefined) {
let contentDiv = document.createElement("div");
const contentDiv = document.createElement("div");
contentDiv.setAttribute("class", "content-div");
contentDiv.insertAdjacentHTML('afterbegin', contentHl);
docCard.appendChild(contentDiv);
}
if (thumbnail !== null) {
imgWrapper.appendChild(thumbnail);
docCard.appendChild(imgWrapper);
}
//Audio
if (mimeCategory === "audio" && hit["_source"].hasOwnProperty("audioc")) {
if (mimeCategory === "audio" && hit["_source"].hasOwnProperty("audioc") && !isSubDocument) {
let audio = document.createElement("audio");
audio.setAttribute("preload", "none");
@@ -226,10 +263,6 @@ function createDocCard(hit) {
if (thumbnailOverlay !== null) {
imgWrapper.appendChild(thumbnailOverlay);
}
for (let i = 0; i < tags.length; i++) {
tagContainer.appendChild(tags[i]);
}
}
//Size tag
@@ -241,12 +274,146 @@ function createDocCard(hit) {
docCardBody.appendChild(link);
docCard.appendChild(docCardBody);
link.appendChild(title);
docCardBody.appendChild(tagContainer);
return docCard;
}
function makeThumbnail(mimeCategory, hit, imgWrapper, small) {
let thumbnail;
let isSubDocument = hit["_source"].hasOwnProperty("parent");
if (mimeCategory === "video" && shouldPlayVideo(hit) && !isSubDocument) {
thumbnail = document.createElement("video");
addVidSrc("f/" + hit["_id"], hit["_source"]["mime"], thumbnail);
const placeholder = makePlaceholder(hit["_source"]["width"], hit["_source"]["height"], small);
imgWrapper.appendChild(placeholder);
if (small) {
thumbnail.setAttribute("class", "fit-sm");
} else {
thumbnail.setAttribute("class", "fit");
}
if (small) {
thumbnail.style.cursor = "pointer";
thumbnail.title = "Enlarge";
thumbnail.addEventListener("click", function () {
imgWrapper.classList.remove("wrapper-sm", "mr-1");
imgWrapper.parentElement.classList.add("media-expanded");
thumbnail.setAttribute("class", "fit");
thumbnail.setAttribute("controls", "");
});
} else {
thumbnail.setAttribute("controls", "");
}
thumbnail.setAttribute("preload", "none");
thumbnail.setAttribute("poster", `t/${hit["_source"]["index"]}/${hit["_id"]}`);
thumbnail.addEventListener("dblclick", function () {
thumbnail.setAttribute("controls", "");
if (thumbnail.webkitRequestFullScreen) {
thumbnail.webkitRequestFullScreen();
} else {
thumbnail.requestFullscreen();
}
});
const poster = new Image();
poster.src = thumbnail.getAttribute('poster');
poster.addEventListener("load", function () {
placeholder.remove();
imgWrapper.appendChild(thumbnail);
});
} else if ((hit["_source"].hasOwnProperty("width") && hit["_source"]["width"] > 32 && hit["_source"]["height"] > 32)
|| hit["_source"]["mime"] === "application/pdf"
|| hit["_source"]["mime"] === "application/epub+zip"
|| hit["_source"]["mime"] === "application/x-cbz"
|| hit["_source"].hasOwnProperty("font_name")
) {
thumbnail = document.createElement("img");
if (small) {
thumbnail.setAttribute("class", "fit-sm");
} else {
thumbnail.setAttribute("class", "card-img-top fit");
}
thumbnail.setAttribute("src", `t/${hit["_source"]["index"]}/${hit["_id"]}`);
const placeholder = makePlaceholder(hit["_source"]["width"], hit["_source"]["height"], small);
imgWrapper.appendChild(placeholder);
thumbnail.addEventListener("error", () => {
imgWrapper.remove();
});
thumbnail.addEventListener("load", () => {
placeholder.remove();
imgWrapper.appendChild(thumbnail);
});
}
return thumbnail;
}
function createDocLine(hit) {
const mime = hit["_source"]["mime"];
let mimeCategory = mime ? mime.split("/")[0] : null;
let tags = getTags(hit, mimeCategory);
let imgWrapper = document.createElement("div");
imgWrapper.setAttribute("class", "align-self-start mr-1 wrapper-sm");
let media = document.createElement("div");
media.setAttribute("class", "media");
const line = document.createElement("div");
line.setAttribute("class", "list-group-item flex-column align-items-start");
const title = makeTitle(hit);
let link = document.createElement("a");
link.setAttribute("href", "f/" + hit["_id"]);
link.setAttribute("target", "_blank");
link.appendChild(title);
const titleDiv = document.createElement("div");
titleDiv.setAttribute("class", "file-title");
titleDiv.appendChild(link);
line.appendChild(media);
let thumbnail = makeThumbnail(mimeCategory, hit, imgWrapper, true);
if (thumbnail) {
media.appendChild(imgWrapper);
}
media.appendChild(titleDiv);
// Content
let contentHl = getContentHighlight(hit);
if (contentHl !== undefined) {
const contentDiv = document.createElement("div");
contentDiv.setAttribute("class", "content-div");
contentDiv.insertAdjacentHTML('afterbegin', contentHl);
titleDiv.appendChild(contentDiv);
}
let tagContainer = document.createElement("div");
tagContainer.setAttribute("class", "");
for (let i = 0; i < tags.length; i++) {
tagContainer.appendChild(tags[i]);
}
//Size tag
let sizeTag = document.createElement("small");
sizeTag.appendChild(document.createTextNode(humanFileSize(hit["_source"]["size"])));
sizeTag.setAttribute("class", "text-muted");
tagContainer.appendChild(sizeTag);
titleDiv.appendChild(tagContainer);
return line;
}
function makePreloader() {
const elem = document.createElement("div");
elem.setAttribute("class", "progress");
@@ -271,18 +438,53 @@ function makePageIndicator(searchResult) {
function makeStatsCard(searchResult) {
let statsCard = document.createElement("div");
statsCard.setAttribute("class", "card");
statsCard.setAttribute("class", "card stat");
let statsCardBody = document.createElement("div");
statsCardBody.setAttribute("class", "card-body");
let stat = document.createElement("p");
const resultMode = document.createElement("div");
resultMode.setAttribute("class", "btn-group btn-group-toggle");
resultMode.setAttribute("data-toggle", "buttons");
resultMode.style.cssFloat = "right";
const listMode = document.createElement("label");
listMode.setAttribute("class", "btn btn-primary");
listMode.appendChild(document.createTextNode("List"));
const gridMode = document.createElement("label");
gridMode.setAttribute("class", "btn btn-primary");
gridMode.appendChild(document.createTextNode("Grid"));
resultMode.appendChild(gridMode);
resultMode.appendChild(listMode);
if (mode === "grid") {
gridMode.classList.add("active")
} else {
listMode.classList.add("active")
}
gridMode.addEventListener("click", () => {
mode = "grid";
localStorage.setItem("mode", mode);
searchDebounced();
});
listMode.addEventListener("click", () => {
mode = "list";
localStorage.setItem("mode", mode);
searchDebounced();
});
let stat = document.createElement("span");
const totalHits = searchResult["hits"]["total"].hasOwnProperty("value")
? searchResult["hits"]["total"]["value"] : searchResult["hits"]["total"];
stat.appendChild(document.createTextNode(totalHits + " results in " + searchResult["took"] + "ms"));
statsCardBody.appendChild(stat);
statsCardBody.appendChild(resultMode);
if (totalHits !== 0) {
let sizeStat = document.createElement("span");
let sizeStat = document.createElement("div");
sizeStat.appendChild(document.createTextNode(humanFileSize(searchResult["aggregations"]["total_size"]["value"])));
statsCardBody.appendChild(sizeStat);
}
@@ -294,7 +496,11 @@ function makeStatsCard(searchResult) {
function makeResultContainer() {
let resultContainer = document.createElement("div");
resultContainer.setAttribute("class", "card-columns");
if (mode === "grid") {
resultContainer.setAttribute("class", "card-columns");
} else {
resultContainer.setAttribute("class", "list-group");
}
return resultContainer;
}

View File

@@ -1,6 +1,8 @@
const SIZE = 40;
let mimeMap = [];
let tree;
let tagMap = [];
let mimeTree;
let tagTree;
let searchBar = document.getElementById("searchBar");
let pathBar = document.getElementById("pathBar");
@@ -10,6 +12,13 @@ let coolingDown = false;
let searchBusy = true;
let selectedIndices = [];
let mode;
if (localStorage.getItem("mode") === null) {
mode = "grid";
} else {
mode = localStorage.getItem("mode")
}
jQuery["jsonPost"] = function (url, data) {
return jQuery.ajax({
url: url,
@@ -49,6 +58,23 @@ $.jsonPost("i").then(resp => {
});
});
function handleTreeClick (tree) {
return (event, node, handler) => {
event.preventTreeDefault();
if (node.id === "any") {
if (!node.itree.state.checked) {
tree.deselect();
}
} else {
tree.node("any").deselect();
}
handler();
searchDebounced();
}
}
$.jsonPost("es", {
aggs: {
mimeTypes: {
@@ -85,34 +111,86 @@ $.jsonPost("es", {
});
mimeMap.push({"text": "All", "id": "any"});
tree = new InspireTree({
mimeTree = new InspireTree({
selection: {
mode: 'checkbox'
},
data: mimeMap
});
new InspireTreeDOM(tree, {
target: '.tree'
new InspireTreeDOM(mimeTree, {
target: '#mimeTree'
});
tree.on("node.click", function (event, node, handler) {
event.preventTreeDefault();
mimeTree.on("node.click", handleTreeClick(mimeTree));
mimeTree.select();
mimeTree.node("any").deselect();
});
if (node.id === "any") {
if (!node.itree.state.checked) {
tree.deselect();
function leafTag(tag) {
const tokens = tag.split(".");
return tokens[tokens.length-1]
}
// Tags tree
$.jsonPost("es", {
aggs: {
tags: {
terms: {
field: "tag",
size: 10000
}
} else {
tree.node("any").deselect();
}
handler();
searchDebounced();
},
size: 0,
}).then(resp => {
resp["aggregations"]["tags"]["buckets"]
.sort((a, b) => a["key"].localeCompare(b["key"]))
.forEach(bucket => {
addTag(tagMap, bucket["key"], bucket["key"], bucket["doc_count"])
});
tree.select();
tree.node("any").deselect();
tagMap.push({"text": "All", "id": "any"});
tagTree = new InspireTree({
selection: {
mode: 'checkbox'
},
data: tagMap
});
new InspireTreeDOM(tagTree, {
target: '#tagTree'
});
tagTree.on("node.click", handleTreeClick(tagTree));
tagTree.node("any").select();
searchBusy = false;
});
function addTag(map, tag, id, count) {
let tags = tag.split("#")[0].split(".");
let child = {
id: id,
text: tags.length !== 1 ? tags[0] : `${tags[0]} (${count})`,
children: []
};
let found = false;
map.forEach(node => {
if (node.text === child.text) {
found = true;
if (tags.length !== 1) {
addTag(node.children, tags.slice(1).join("."), id, count);
}
}
});
if (!found) {
if (tags.length !== 1) {
addTag(child.children, tags.slice(1).join("."), id, count);
map.push(child);
} else {
map.push(child);
}
}
}
new autoComplete({
selector: '#pathBar',
minChars: 1,
@@ -140,7 +218,12 @@ new autoComplete({
function insertHits(resultContainer, hits) {
for (let i = 0; i < hits.length; i++) {
resultContainer.appendChild(createDocCard(hits[i]));
if (mode === "grid") {
resultContainer.appendChild(createDocCard(hits[i]));
} else {
resultContainer.appendChild(createDocLine(hits[i]));
}
docCount++;
}
}
@@ -181,8 +264,8 @@ function doScroll() {
})
}
function getSelectedMimeTypes() {
let mimeTypes = [];
function getSelectedNodes(tree) {
let selectedNodes = [];
let selected = tree.selected();
@@ -194,11 +277,11 @@ function getSelectedMimeTypes() {
//Only get children
if (selected[i].text.indexOf("(") !== -1) {
mimeTypes.push(selected[i].id);
selectedNodes.push(selected[i].id);
}
}
return mimeTypes
return selectedNodes
}
function search() {
@@ -239,11 +322,16 @@ function search() {
if (path !== "") {
filters.push([{term: {path: path}}])
}
let mimeTypes = getSelectedMimeTypes();
let mimeTypes = getSelectedNodes(mimeTree);
if (!mimeTypes.includes("any")) {
filters.push([{terms: {"mime": mimeTypes}}]);
}
let tags = getSelectedNodes(tagTree);
if (!tags.includes("any")) {
filters.push([{terms: {"tag": tags}}]);
}
$.jsonPost("es?scroll=1", {
"_source": {
excludes: ["content"]
@@ -269,6 +357,7 @@ function search() {
post_tags: ["</mark>"],
fields: {
content: {},
// "content.nGram": {},
name: {},
"name.nGram": {},
font_name: {},

View File

@@ -3,7 +3,7 @@
*/
function humanFileSize(bytes) {
if (bytes === 0) {
return "? B"
return "0 B"
}
let thresh = 1000;
@@ -43,9 +43,9 @@ function humanTime(sec_num) {
function debounce(func, wait) {
let timeout;
return function() {
return function () {
let context = this, args = arguments;
let later = function() {
let later = function () {
timeout = null;
func.apply(context, args);
};
@@ -54,3 +54,13 @@ function debounce(func, wait) {
func.apply(context, args);
};
}
function lum(c) {
c = c.substring(1);
let rgb = parseInt(c, 16);
let r = (rgb >> 16) & 0xff;
let g = (rgb >> 8) & 0xff;
let b = (rgb >> 0) & 0xff;
return 0.2126 * r + 0.7152 * g + 0.0722 * b;
}

View File

@@ -11,6 +11,7 @@
<nav class="navbar navbar-expand-lg">
<a class="navbar-brand" href="/">sist2</a>
<span class="badge badge-pill version">v1.1.10</span>
<span class="tagline">Lightning-fast file system indexer and search tool </span>
<a style="margin-left: auto" id="theme" class="btn" title="Toggle theme" href="/">Theme</a>
</nav>
@@ -24,7 +25,7 @@
<div class="input-group">
<div class="input-group-prepend">
<div class="input-group-text">
<span onclick="document.getElementById('fuzzyToggle').click()">Fuzzy&nbsp</span>
<span title="Toggle fuzzy searching" onclick="document.getElementById('fuzzyToggle').click()">Fuzzy&nbsp</span>
<input title="Toggle fuzzy searching" type="checkbox" id="fuzzyToggle"
onclick="toggleFuzzy()" checked>
</div>
@@ -41,11 +42,25 @@
<select class="custom-select" id="indices" multiple size="6"></select>
</div>
<div class="col">
<label>Mime types</label>
<div class="tree"></div>
<div class="col" id="treeTabs">
<ul class="nav nav-tabs" role="tablist">
<li class="nav-item">
<a class="nav-link active" data-toggle="tab" href="#mime" role="tab" aria-controls="home" aria-selected="true">Mime Types</a>
</li>
<li class="nav-item">
<a class="nav-link" data-toggle="tab" href="#tag" role="tab" aria-controls="profile" aria-selected="false" title="User-defined tags">Tags</a>
</li>
</ul>
<div class="tab-content" id="myTabContent">
<div class="tab-pane fade show active" id="mime" role="tabpanel" aria-labelledby="home-tab">
<div id="mimeTree" class="tree"></div>
</div>
<div class="tab-pane fade" id="tag" role="tabpanel" aria-labelledby="profile-tab">
<div id="tagTree" class="tree"></div>
</div>
</div>
</div>
</div>
</div>
</div>