mirror of
https://github.com/simon987/sist2.git
synced 2025-12-12 15:08:53 +00:00
Compare commits
30 Commits
| Author | SHA1 | Date | |
|---|---|---|---|
| 7b49a0dc49 | |||
| eb559b53aa | |||
| 6d01f9c0df | |||
| e724fec668 | |||
| fe5e93b300 | |||
| ecad85fd7d | |||
| 74cc898259 | |||
| dc2e4443c4 | |||
| 1a64431b52 | |||
|
|
9bad515e06 | ||
| 648559cedb | |||
| 3e6cd9cd5c | |||
| f249992798 | |||
|
|
e9645ecdaa | ||
| 046edea0e2 | |||
| a011b7e97b | |||
| 8c1c1697e0 | |||
| 018b49fa4c | |||
| 27b4e6403e | |||
| 13fdbd9e69 | |||
| 5e7fdaf8dd | |||
| 19d5c8ac9f | |||
| 99497049a8 | |||
|
|
1a3181d78b | ||
| 449aa77c8f | |||
| 3058c55510 | |||
| dedf9287b2 | |||
| ab199b0c0c | |||
| c4fbae123e | |||
| dd2397ef5c |
4
.github/ISSUE_TEMPLATE/issue-template.md
vendored
4
.github/ISSUE_TEMPLATE/issue-template.md
vendored
@@ -9,7 +9,9 @@ assignees: ''
|
||||
|
||||
sist2 version:
|
||||
|
||||
Platform (please indicate if you're using Docker):
|
||||
Platform (Linux or Docker):
|
||||
|
||||
Elasticsearch version:
|
||||
|
||||
Command with arguments: `ex: "scan ~/Documents -o ./i2 --threads 3 -q 1.0`
|
||||
|
||||
|
||||
@@ -30,7 +30,7 @@ add_executable(
|
||||
third-party/argparse/argparse.h third-party/argparse/argparse.c
|
||||
|
||||
src/cli.c src/cli.h
|
||||
src/stats.c src/stats.h)
|
||||
src/stats.c src/stats.h src/ctx.c)
|
||||
|
||||
target_link_directories(sist2 PRIVATE BEFORE ${_VCPKG_INSTALLED_DIR}/${VCPKG_TARGET_TRIPLET}/lib/)
|
||||
set(CMAKE_FIND_LIBRARY_SUFFIXES .a .lib)
|
||||
|
||||
14
README.md
14
README.md
@@ -18,7 +18,7 @@ sist2 (Simple incremental search tool)
|
||||
* Extracts text from common file types \*
|
||||
* Generates thumbnails \*
|
||||
* Incremental scanning
|
||||
* Automatic tagging from file attributes via [user scripts](scripting/README.md)
|
||||
* Automatic tagging from file attributes via [user scripts](docs/scripting.md)
|
||||
* Recursive scan inside archive files \*\*
|
||||
* OCR support with tesseract \*\*\*
|
||||
* Stats page & disk utilisation visualization
|
||||
@@ -53,7 +53,7 @@ sist2 (Simple incremental search tool)
|
||||
1. *(or)* Download a [development snapshot](https://files.simon987.net/artifacts/Sist2/Build/) *(Not recommended!)*
|
||||
1. *(or)* `docker pull simon987/sist2:latest`
|
||||
|
||||
1. See [Usage guide](DOCS/USAGE.md)
|
||||
1. See [Usage guide](docs/USAGE.md)
|
||||
|
||||
|
||||
\* *Windows users*: **sist2** runs under [WSL](https://en.wikipedia.org/wiki/Windows_Subsystem_for_Linux)
|
||||
@@ -61,7 +61,7 @@ sist2 (Simple incremental search tool)
|
||||
|
||||
## Example usage
|
||||
|
||||
See [Usage guide](DOCS/USAGE.md) for more details
|
||||
See [Usage guide](docs/USAGE.md) for more details
|
||||
|
||||
1. Scan a directory: `sist2 scan ~/Documents -o ./docs_idx`
|
||||
1. Push index to Elasticsearch: `sist2 index ./docs_idx`
|
||||
@@ -91,14 +91,12 @@ they were directly in the file system. Recursive (archives inside archives)
|
||||
scan is also supported.
|
||||
|
||||
**Limitations**:
|
||||
* Parsing media files with formats that require
|
||||
*seek* (e.g. `.gif`, `.mp4` w/ fragmented metadata etc.) is not supported.
|
||||
* Support for parsing media files with formats that require *seek* (e.g. `.gif`, `.mp4` w/ fragmented metadata etc.)
|
||||
is limitted (see `--mem-buffer` option)
|
||||
* Archive files are scanned sequentially, by a single thread. On systems where
|
||||
**sist2** is not I/O bound, scans might be faster when larger archives are split
|
||||
into smaller parts.
|
||||
|
||||
To check if a media file can be parsed without *seek*, execute `cat file.mp4 | ffprobe -`
|
||||
|
||||
|
||||
### OCR
|
||||
|
||||
@@ -130,6 +128,6 @@ binaries (GCC 7+ required).
|
||||
2. Build
|
||||
```bash
|
||||
git clone --recursive https://github.com/simon987/sist2/
|
||||
cmake -D <VCPKG_ROOT>/scripts/buildsystems/vcpkg.cmake .
|
||||
cmake -DCMAKE_TOOLCHAIN_FILE=<VCPKG_ROOT>/scripts/buildsystems/vcpkg.cmake .
|
||||
make
|
||||
```
|
||||
|
||||
@@ -14,11 +14,13 @@
|
||||
* [examples](#web-examples)
|
||||
* [rewrite_url](#rewrite_url)
|
||||
* [link to specific indices](#link-to-specific-indices)
|
||||
* [exec-script](#exec-script)
|
||||
|
||||
```
|
||||
Usage: sist2 scan [OPTION]... PATH
|
||||
or: sist2 index [OPTION]... INDEX
|
||||
or: sist2 web [OPTION]... INDEX...
|
||||
or: sist2 exec-script [OPTION]... INDEX
|
||||
Lightning-fast file system indexer and search tool.
|
||||
|
||||
-h, --help show this help message and exit
|
||||
@@ -40,6 +42,8 @@ Scan options
|
||||
--ocr=<str> Tesseract language (use tesseract --list-langs to see which are installed on your machine)
|
||||
-e, --exclude=<str> Files that match this regex will not be scanned
|
||||
--fast Only index file names & mime type
|
||||
--treemap-threshold=<str> Relative size threshold for treemap (see USAGE.md). DEFAULT: 0.0005
|
||||
--mem-buffer=<int> Maximum memory buffer size per thread in MB for files inside archives (see USAGE.md). DEFAULT: 2000
|
||||
|
||||
Index options
|
||||
--es-url=<str> Elasticsearch url with port. DEFAULT=http://localhost:9200
|
||||
@@ -48,13 +52,14 @@ Index options
|
||||
--batch-size=<int> Index batch size. DEFAULT: 100
|
||||
-f, --force-reset Reset Elasticsearch mappings and settings. (You must use this option the first time you use the index command)
|
||||
|
||||
|
||||
Web options
|
||||
--es-url=<str> Elasticsearch url. DEFAULT=http://localhost:9200
|
||||
--bind=<str> Listen on this address. DEFAULT=localhost:4090
|
||||
--auth=<str> Basic auth in user:password format
|
||||
Made by simon987 <me@simon987.net>. Released under GPL-3.0
|
||||
|
||||
Exec-script options
|
||||
--script-file=<str> Path to user script.
|
||||
Made by simon987 <me@simon987.net>. Released under GPL-3.0
|
||||
```
|
||||
|
||||
## Scan
|
||||
@@ -102,6 +107,11 @@ Made by simon987 <me@simon987.net>. Released under GPL-3.0
|
||||
In effect, smaller `treemap-threshold` values will yield a more detailed
|
||||
(but also a more cluttered and harder to read) visualization.
|
||||
|
||||
* `--mem-buffer` Maximum memory buffer size in MB (per thread) for files inside archives. Media files
|
||||
larger than this number will be read sequentially and no *seek* operations will be supported.
|
||||
|
||||
To check if a media file can be parsed without *seek*, execute `cat file.mp4 | ffprobe -`
|
||||
|
||||
### Scan examples
|
||||
|
||||
Simple scan
|
||||
@@ -226,7 +236,7 @@ it is currently unsupported and has no guaranties of back/forward compatibility.
|
||||
* `-p, --print`
|
||||
Print index in JSON format to stdout.
|
||||
* `--script-file`
|
||||
Path to user script. See [Scripting](scripting/README.md).
|
||||
Path to user script. See [Scripting](scripting.md).
|
||||
* `--batch-size=<int>`
|
||||
Index batch size. Indexing is generally faster with larger batches, but payloads that
|
||||
are too large will fail and additional overhead for retrying with smaller sizes may slow
|
||||
@@ -286,3 +296,7 @@ Both the `root` and `rewrite_url` fields are safe to manually modify from the
|
||||
To link to specific indices, you can add a list of comma-separated index name to
|
||||
the URL: `?i=<name>,<name>`. By default, indices with `"(nsfw)"` in their name are
|
||||
not displayed.
|
||||
|
||||
## exec-script
|
||||
|
||||
The `exec-script` command is used to execute a user script for an index that has already been imported to Elasticsearch with the `index` command. Note that the documents will not be reset to their default state before each execution as the `index` command does: if you make undesired changes to the documents by accident, you will need to run `index` again to revert to the original state.
|
||||
|
||||
@@ -39,7 +39,7 @@ it adds the `genre.<genre>` tag.
|
||||
ArrayList tags = ctx._source.tag = new ArrayList();
|
||||
|
||||
if (ctx._source?.genre != null) {
|
||||
tags.add("genre." + ctx._source.genre.toLowerCase())
|
||||
tags.add("genre." + ctx._source.genre.toLowerCase());
|
||||
}
|
||||
```
|
||||
|
||||
@@ -67,7 +67,7 @@ ArrayList tags = ctx._source.tag = new ArrayList();
|
||||
|
||||
Matcher m = /[\(\.+](20[0-9]{2})[\)\.+]/.matcher(ctx._source.name);
|
||||
if (m.find()) {
|
||||
tags.add("year." + m.group(1))
|
||||
tags.add("year." + m.group(1));
|
||||
}
|
||||
```
|
||||
|
||||
@@ -111,16 +111,6 @@ if (ctx._source.path != "") {
|
||||
}
|
||||
```
|
||||
|
||||
Set the name of the last folder (`/path/to/<studio>/file.mp4`) to `studio.<studio>` tag
|
||||
```Java
|
||||
ArrayList tags = ctx._source.tag = new ArrayList();
|
||||
|
||||
if (ctx._source.path != "") {
|
||||
String[] names = ctx._source.path.splitOnToken('/');
|
||||
tags.add("studio." + names[names.length-1]);
|
||||
}
|
||||
```
|
||||
|
||||
Parse `EXIF:F Number` tag
|
||||
```Java
|
||||
if (ctx._source?.exif_fnumber != null) {
|
||||
|
||||
@@ -10,6 +10,7 @@
|
||||
"path": {
|
||||
"type": "text",
|
||||
"analyzer": "path_analyzer",
|
||||
"copy_to": "suggest-path",
|
||||
"fielddata": true,
|
||||
"fields": {
|
||||
"nGram": {
|
||||
@@ -22,6 +23,10 @@
|
||||
}
|
||||
}
|
||||
},
|
||||
"suggest-path": {
|
||||
"type": "completion",
|
||||
"analyzer": "case_insensitive_kw_analyzer"
|
||||
},
|
||||
"mime": {
|
||||
"type": "keyword"
|
||||
},
|
||||
|
||||
@@ -13,7 +13,7 @@ application/epub+zip, epub
|
||||
application/freeloader, frl
|
||||
application/futuresplash, spl
|
||||
application/groupwise, vew
|
||||
application/gzip, gz
|
||||
application/gzip, gz|tgz
|
||||
application/hta, hta
|
||||
application/i-deas, unv
|
||||
application/iges, iges|igs
|
||||
@@ -111,7 +111,7 @@ application/x-dbf, dbf
|
||||
application/x-dbt,
|
||||
application/x-debian-package, deb
|
||||
application/x-deepv, deepv
|
||||
application/x-director, dcr|dir|dxr
|
||||
application/x-director, dir|dxr
|
||||
application/x-dmp, dmp
|
||||
application/x-dosdriver,
|
||||
application/x-dosexec, dll
|
||||
@@ -429,4 +429,22 @@ video/x-qtc, qtc
|
||||
video/x-sgi-movie, movie|mv
|
||||
x-epoc/x-sisx-app,
|
||||
application/x-zstd-dictionary,
|
||||
application/vnd.ms-outlook,
|
||||
application/vnd.ms-outlook, msg
|
||||
image/x-olympus-orf, orf
|
||||
image/x-nikon-nef, nef
|
||||
image/x-fuji-raf, raf
|
||||
image/x-panasonic-raw, rw2|raw
|
||||
image/x-adobe-dng, dng
|
||||
image/x-canon-cr2, cr2
|
||||
image/x-canon-crw, crw
|
||||
image/x-dcraw,
|
||||
image/x-kodak-dcr, dcr
|
||||
image/x-kodak-k25, k25
|
||||
image/x-kodak-kdc, kdc
|
||||
image/x-minolta-mrw, mrw
|
||||
image/x-pentax-pef, pef
|
||||
image/x-sigma-x3f, xf3
|
||||
image/x-sony-arw, arw
|
||||
image/x-sony-sr2, sr2
|
||||
image/x-sony-srf, srf
|
||||
image/x-epson-erf, erf
|
||||
|
||||
|
@@ -73,6 +73,29 @@ markup = (
|
||||
"text/x-sgml"
|
||||
)
|
||||
|
||||
raw = (
|
||||
"image/x-olympus-orf",
|
||||
"image/x-nikon-nef",
|
||||
"image/x-fuji-raf",
|
||||
"image/x-panasonic-raw",
|
||||
"image/x-adobe-dng",
|
||||
"image/x-canon-cr2",
|
||||
"image/x-canon-crw",
|
||||
"image/x-dcraw",
|
||||
"image/x-kodak-dcr",
|
||||
"image/x-kodak-k25",
|
||||
"image/x-kodak-kdc",
|
||||
"image/x-minolta-mrw",
|
||||
"image/x-pentax-pef",
|
||||
"image/x-sigma-x3f",
|
||||
"image/x-sony-arw",
|
||||
"image/x-sony-sr2",
|
||||
"image/x-sony-srf",
|
||||
"image/x-minolta-mrw",
|
||||
"image/x-pentax-pef",
|
||||
"image/x-epson-erf",
|
||||
)
|
||||
|
||||
cnt = 1
|
||||
|
||||
|
||||
@@ -97,6 +120,8 @@ def mime_id(mime):
|
||||
mime_id += " | 0x02000000"
|
||||
elif mime in markup:
|
||||
mime_id += " | 0x01000000"
|
||||
elif mime in raw:
|
||||
mime_id += " | 0x00800000"
|
||||
elif mime == "application/x-empty":
|
||||
return "1"
|
||||
return mime_id
|
||||
|
||||
101
src/cli.c
101
src/cli.c
@@ -14,7 +14,9 @@
|
||||
#define DEFAULT_LISTEN_ADDRESS "localhost:4090"
|
||||
#define DEFAULT_TREEMAP_THRESHOLD 0.0005
|
||||
|
||||
const char* TESS_DATAPATHS[] = {
|
||||
#define DEFAULT_MAX_MEM_BUFFER 2000
|
||||
|
||||
const char *TESS_DATAPATHS[] = {
|
||||
"/usr/share/tessdata/",
|
||||
"/usr/share/tesseract-ocr/tessdata/",
|
||||
"./",
|
||||
@@ -30,6 +32,11 @@ scan_args_t *scan_args_create() {
|
||||
return args;
|
||||
}
|
||||
|
||||
exec_args_t *exec_args_create() {
|
||||
exec_args_t *args = calloc(sizeof(exec_args_t), 1);
|
||||
return args;
|
||||
}
|
||||
|
||||
void scan_args_destroy(scan_args_t *args) {
|
||||
if (args->name != NULL) {
|
||||
free(args->name);
|
||||
@@ -53,6 +60,10 @@ void web_args_destroy(web_args_t *args) {
|
||||
free(args);
|
||||
}
|
||||
|
||||
void exec_args_destroy(exec_args_t *args) {
|
||||
free(args);
|
||||
}
|
||||
|
||||
int scan_args_validate(scan_args_t *args, int argc, const char **argv) {
|
||||
if (argc < 2) {
|
||||
fprintf(stderr, "Required positional argument: PATH.\n");
|
||||
@@ -113,7 +124,7 @@ int scan_args_validate(scan_args_t *args, int argc, const char **argv) {
|
||||
return 1;
|
||||
}
|
||||
|
||||
if (args->depth < 0) {
|
||||
if (args->depth <= 0) {
|
||||
args->depth = G_MAXINT32;
|
||||
} else {
|
||||
args->depth += 1;
|
||||
@@ -145,7 +156,7 @@ int scan_args_validate(scan_args_t *args, int argc, const char **argv) {
|
||||
|
||||
char filename[128];
|
||||
sprintf(filename, "%s.traineddata", args->tesseract_lang);
|
||||
const char * path = find_file_in_paths(TESS_DATAPATHS, filename);
|
||||
const char *path = find_file_in_paths(TESS_DATAPATHS, filename);
|
||||
if (path == NULL) {
|
||||
LOG_FATAL("cli.c", "Could not find tesseract language file!");
|
||||
}
|
||||
@@ -187,6 +198,10 @@ int scan_args_validate(scan_args_t *args, int argc, const char **argv) {
|
||||
args->treemap_threshold = atof(args->treemap_threshold_str);
|
||||
}
|
||||
|
||||
if (args->max_memory_buffer == 0) {
|
||||
args->max_memory_buffer = DEFAULT_MAX_MEM_BUFFER;
|
||||
}
|
||||
|
||||
LOG_DEBUGF("cli.c", "arg quality=%f", args->quality)
|
||||
LOG_DEBUGF("cli.c", "arg size=%d", args->size)
|
||||
LOG_DEBUGF("cli.c", "arg content_size=%d", args->content_size)
|
||||
@@ -203,6 +218,35 @@ int scan_args_validate(scan_args_t *args, int argc, const char **argv) {
|
||||
LOG_DEBUGF("cli.c", "arg exclude=%s", args->exclude_regex)
|
||||
LOG_DEBUGF("cli.c", "arg fast=%d", args->fast)
|
||||
LOG_DEBUGF("cli.c", "arg treemap_threshold=%f", args->treemap_threshold)
|
||||
LOG_DEBUGF("cli.c", "arg max_memory_buffer=%d", args->max_memory_buffer)
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
int load_script(const char *script_path, char **dst) {
|
||||
struct stat info;
|
||||
int res = stat(script_path, &info);
|
||||
|
||||
if (res == -1) {
|
||||
fprintf(stderr, "Error opening script file '%s': %s\n", script_path, strerror(errno));
|
||||
return 1;
|
||||
}
|
||||
|
||||
int fd = open(script_path, O_RDONLY);
|
||||
if (fd == -1) {
|
||||
fprintf(stderr, "Error opening script file '%s': %s\n", script_path, strerror(errno));
|
||||
return 1;
|
||||
}
|
||||
|
||||
*dst = malloc(info.st_size + 1);
|
||||
res = read(fd, *dst, info.st_size);
|
||||
if (res < 0) {
|
||||
fprintf(stderr, "Error reading script file '%s': %s\n", script_path, strerror(errno));
|
||||
return 1;
|
||||
}
|
||||
|
||||
*(*dst + info.st_size) = '\0';
|
||||
close(fd);
|
||||
|
||||
return 0;
|
||||
}
|
||||
@@ -230,29 +274,9 @@ int index_args_validate(index_args_t *args, int argc, const char **argv) {
|
||||
}
|
||||
|
||||
if (args->script_path != NULL) {
|
||||
struct stat info;
|
||||
int res = stat(args->script_path, &info);
|
||||
|
||||
if (res == -1) {
|
||||
fprintf(stderr, "Error opening script file '%s': %s\n", args->script_path, strerror(errno));
|
||||
if (load_script(args->script_path, &args->script) != 0) {
|
||||
return 1;
|
||||
}
|
||||
|
||||
int fd = open(args->script_path, O_RDONLY);
|
||||
if (fd == -1) {
|
||||
fprintf(stderr, "Error opening script file '%s': %s\n", args->script_path, strerror(errno));
|
||||
return 1;
|
||||
}
|
||||
|
||||
args->script = malloc(info.st_size + 1);
|
||||
res = read(fd, args->script, info.st_size);
|
||||
if (res < 0) {
|
||||
fprintf(stderr, "Error reading script file '%s': %s\n", args->script_path, strerror(errno));
|
||||
return 1;
|
||||
}
|
||||
|
||||
*(args->script + info.st_size) = '\0';
|
||||
close(fd);
|
||||
}
|
||||
|
||||
if (args->batch_size == 0) {
|
||||
@@ -288,7 +312,7 @@ int web_args_validate(web_args_t *args, int argc, const char **argv) {
|
||||
}
|
||||
|
||||
if (args->credentials != NULL) {
|
||||
char * ptr = strstr(args->credentials, ":");
|
||||
char *ptr = strstr(args->credentials, ":");
|
||||
if (ptr == NULL) {
|
||||
fprintf(stderr, "Invalid --auth format, see usage\n");
|
||||
return 1;
|
||||
@@ -341,3 +365,30 @@ web_args_t *web_args_create() {
|
||||
return args;
|
||||
}
|
||||
|
||||
int exec_args_validate(exec_args_t *args, int argc, const char **argv) {
|
||||
|
||||
char *index_path = abspath(argv[1]);
|
||||
if (index_path == NULL) {
|
||||
fprintf(stderr, "File not found: %s\n", argv[1]);
|
||||
return 1;
|
||||
} else {
|
||||
args->index_path = argv[1];
|
||||
free(index_path);
|
||||
}
|
||||
|
||||
if (args->es_url == NULL) {
|
||||
args->es_url = DEFAULT_ES_URL;
|
||||
}
|
||||
|
||||
if (args->script_path == NULL) {
|
||||
LOG_FATAL("cli.c", "--script-file argument is required");
|
||||
}
|
||||
|
||||
if (load_script(args->script_path, &args->script) != 0) {
|
||||
return 1;
|
||||
}
|
||||
|
||||
LOG_DEBUGF("cli.c", "arg script_path=%s", args->script_path)
|
||||
LOG_DEBUGF("cli.c", "arg script=%s", args->script)
|
||||
return 0;
|
||||
}
|
||||
|
||||
14
src/cli.h
14
src/cli.h
@@ -24,6 +24,7 @@ typedef struct scan_args {
|
||||
int fast;
|
||||
const char* treemap_threshold_str;
|
||||
double treemap_threshold;
|
||||
int max_memory_buffer;
|
||||
} scan_args_t;
|
||||
|
||||
scan_args_t *scan_args_create();
|
||||
@@ -53,6 +54,13 @@ typedef struct web_args {
|
||||
const char **indices;
|
||||
} web_args_t;
|
||||
|
||||
typedef struct exec_args {
|
||||
char *es_url;
|
||||
const char *index_path;
|
||||
const char *script_path;
|
||||
char *script;
|
||||
} exec_args_t;
|
||||
|
||||
index_args_t *index_args_create();
|
||||
|
||||
void index_args_destroy(index_args_t *args);
|
||||
@@ -65,4 +73,10 @@ int index_args_validate(index_args_t *args, int argc, const char **argv);
|
||||
|
||||
int web_args_validate(web_args_t *args, int argc, const char **argv);
|
||||
|
||||
exec_args_t *exec_args_create();
|
||||
|
||||
void exec_args_destroy(exec_args_t *args);
|
||||
|
||||
int exec_args_validate(exec_args_t *args, int argc, const char **argv);
|
||||
|
||||
#endif
|
||||
|
||||
6
src/ctx.c
Normal file
6
src/ctx.c
Normal file
@@ -0,0 +1,6 @@
|
||||
#include "ctx.h"
|
||||
|
||||
ScanCtx_t ScanCtx;
|
||||
WebCtx_t WebCtx;
|
||||
IndexCtx_t IndexCtx;
|
||||
LogCtx_t LogCtx;
|
||||
26
src/ctx.h
26
src/ctx.h
@@ -12,12 +12,12 @@
|
||||
#include "libscan/ooxml/ooxml.h"
|
||||
#include "libscan/text/text.h"
|
||||
#include "libscan/mobi/scan_mobi.h"
|
||||
#include "libscan/raw/raw.h"
|
||||
|
||||
#include <glib.h>
|
||||
#include <pcre.h>
|
||||
|
||||
//TODO Move to individual scan ctx
|
||||
struct {
|
||||
typedef struct {
|
||||
struct index_t index;
|
||||
|
||||
GHashTable *mime_table;
|
||||
@@ -46,27 +46,33 @@ struct {
|
||||
scan_ooxml_ctx_t ooxml_ctx;
|
||||
scan_text_ctx_t text_ctx;
|
||||
scan_mobi_ctx_t mobi_ctx;
|
||||
} ScanCtx;
|
||||
scan_raw_ctx_t raw_ctx;
|
||||
} ScanCtx_t;
|
||||
|
||||
struct {
|
||||
typedef struct {
|
||||
int verbose;
|
||||
int very_verbose;
|
||||
int no_color;
|
||||
} LogCtx;
|
||||
} LogCtx_t;
|
||||
|
||||
struct {
|
||||
typedef struct {
|
||||
char *es_url;
|
||||
int batch_size;
|
||||
} IndexCtx;
|
||||
} IndexCtx_t;
|
||||
|
||||
struct {
|
||||
typedef struct {
|
||||
char *es_url;
|
||||
int index_count;
|
||||
char *auth_user;
|
||||
char *auth_pass;
|
||||
int auth_enabled;
|
||||
struct index_t indices[16];
|
||||
} WebCtx;
|
||||
struct index_t indices[64];
|
||||
} WebCtx_t;
|
||||
|
||||
extern ScanCtx_t ScanCtx;
|
||||
extern WebCtx_t WebCtx;
|
||||
extern IndexCtx_t IndexCtx;
|
||||
extern LogCtx_t LogCtx;
|
||||
|
||||
|
||||
#endif
|
||||
|
||||
@@ -53,6 +53,10 @@ void index_json(cJSON *document, const char uuid_str[UUID_STR_LEN]) {
|
||||
|
||||
void execute_update_script(const char *script, const char index_id[UUID_STR_LEN]) {
|
||||
|
||||
if (Indexer == NULL) {
|
||||
Indexer = create_indexer(IndexCtx.es_url);
|
||||
}
|
||||
|
||||
cJSON *body = cJSON_CreateObject();
|
||||
cJSON *script_obj = cJSON_AddObjectToObject(body, "script");
|
||||
cJSON_AddStringToObject(script_obj, "lang", "painless");
|
||||
@@ -202,9 +206,8 @@ void delete_queue(int max) {
|
||||
Indexer->line_head = tmp->next;
|
||||
if (Indexer->line_head == NULL) {
|
||||
Indexer->line_tail = NULL;
|
||||
} else {
|
||||
free(tmp);
|
||||
}
|
||||
free(tmp);
|
||||
Indexer->queued -= 1;
|
||||
}
|
||||
}
|
||||
@@ -265,6 +268,7 @@ void destroy_indexer(char *script, char index_id[UUID_STR_LEN]) {
|
||||
|
||||
if (script != NULL) {
|
||||
execute_update_script(script, index_id);
|
||||
free(script);
|
||||
}
|
||||
|
||||
snprintf(url, sizeof(url), "%s/sist2/_refresh", IndexCtx.es_url);
|
||||
|
||||
@@ -32,4 +32,6 @@ cJSON *elastic_get_document(const char *uuid_str);
|
||||
|
||||
char *elastic_get_status();
|
||||
|
||||
void execute_update_script(const char *script, const char index_id[UUID_STR_LEN]);
|
||||
|
||||
#endif
|
||||
|
||||
File diff suppressed because one or more lines are too long
49
src/main.c
49
src/main.c
@@ -2,7 +2,6 @@
|
||||
#include "ctx.h"
|
||||
|
||||
#include <third-party/argparse/argparse.h>
|
||||
#include <glib.h>
|
||||
#include <locale.h>
|
||||
|
||||
#include "cli.h"
|
||||
@@ -22,11 +21,12 @@
|
||||
#define EPILOG "Made by simon987 <me@simon987.net>. Released under GPL-3.0"
|
||||
|
||||
|
||||
static const char *const Version = "2.3.0";
|
||||
static const char *const Version = "2.5.0";
|
||||
static const char *const usage[] = {
|
||||
"sist2 scan [OPTION]... PATH",
|
||||
"sist2 index [OPTION]... INDEX",
|
||||
"sist2 web [OPTION]... INDEX...",
|
||||
"sist2 exec-script [OPTION]... INDEX",
|
||||
NULL,
|
||||
};
|
||||
|
||||
@@ -127,6 +127,7 @@ void initialize_scan_context(scan_args_t *args) {
|
||||
ScanCtx.media_ctx.log = _log;
|
||||
ScanCtx.media_ctx.logf = _logf;
|
||||
ScanCtx.media_ctx.store = _store;
|
||||
ScanCtx.media_ctx.max_media_buffer = (long) args->max_memory_buffer * 1024 * 1024;
|
||||
init_media();
|
||||
|
||||
// OOXML
|
||||
@@ -153,6 +154,13 @@ void initialize_scan_context(scan_args_t *args) {
|
||||
strncpy(ScanCtx.index.desc.rewrite_url, args->rewrite_url, sizeof(ScanCtx.index.desc.rewrite_url));
|
||||
ScanCtx.index.desc.root_len = (short) strlen(ScanCtx.index.desc.root);
|
||||
ScanCtx.fast = args->fast;
|
||||
|
||||
// Raw
|
||||
ScanCtx.raw_ctx.tn_qscale = args->quality;
|
||||
ScanCtx.raw_ctx.tn_size = args->size;
|
||||
ScanCtx.raw_ctx.log = _log;
|
||||
ScanCtx.raw_ctx.logf = _logf;
|
||||
ScanCtx.raw_ctx.store = _store;
|
||||
}
|
||||
|
||||
|
||||
@@ -286,6 +294,22 @@ void sist2_index(index_args_t *args) {
|
||||
}
|
||||
}
|
||||
|
||||
void sist2_exec_script(exec_args_t *args) {
|
||||
|
||||
LogCtx.verbose = TRUE;
|
||||
|
||||
char descriptor_path[PATH_MAX];
|
||||
snprintf(descriptor_path, PATH_MAX, "%s/descriptor.json", args->index_path);
|
||||
index_descriptor_t desc = read_index_descriptor(descriptor_path);
|
||||
|
||||
IndexCtx.es_url = args->es_url;
|
||||
|
||||
LOG_DEBUGF("main.c", "descriptor version %s (%s)", desc.version, desc.type)
|
||||
|
||||
execute_update_script(args->script, desc.uuid);
|
||||
free(args->script);
|
||||
}
|
||||
|
||||
void sist2_web(web_args_t *args) {
|
||||
|
||||
WebCtx.es_url = args->es_url;
|
||||
@@ -322,10 +346,12 @@ int main(int argc, const char *argv[]) {
|
||||
scan_args_t *scan_args = scan_args_create();
|
||||
index_args_t *index_args = index_args_create();
|
||||
web_args_t *web_args = web_args_create();
|
||||
exec_args_t *exec_args = exec_args_create();
|
||||
|
||||
int arg_version = 0;
|
||||
|
||||
char *common_es_url = NULL;
|
||||
char *common_script_path = NULL;
|
||||
|
||||
struct argparse_option options[] = {
|
||||
OPT_HELP(),
|
||||
@@ -358,11 +384,14 @@ int main(int argc, const char *argv[]) {
|
||||
OPT_BOOLEAN(0, "fast", &scan_args->fast, "Only index file names & mime type"),
|
||||
OPT_STRING(0, "treemap-threshold", &scan_args->treemap_threshold_str, "Relative size threshold for treemap "
|
||||
"(see USAGE.md). DEFAULT: 0.0005"),
|
||||
OPT_INTEGER(0, "mem-buffer", &scan_args->max_memory_buffer,
|
||||
"Maximum memory buffer size per thread in MB for files inside archives "
|
||||
"(see USAGE.md). DEFAULT: 2000"),
|
||||
|
||||
OPT_GROUP("Index options"),
|
||||
OPT_STRING(0, "es-url", &common_es_url, "Elasticsearch url with port. DEFAULT=http://localhost:9200"),
|
||||
OPT_BOOLEAN('p', "print", &index_args->print, "Just print JSON documents to stdout."),
|
||||
OPT_STRING(0, "script-file", &index_args->script_path, "Path to user script."),
|
||||
OPT_STRING(0, "script-file", &common_script_path, "Path to user script."),
|
||||
OPT_INTEGER(0, "batch-size", &index_args->batch_size, "Index batch size. DEFAULT: 100"),
|
||||
OPT_BOOLEAN('f', "force-reset", &index_args->force_reset, "Reset Elasticsearch mappings and settings. "
|
||||
"(You must use this option the first time you use the index command)"),
|
||||
@@ -372,6 +401,9 @@ int main(int argc, const char *argv[]) {
|
||||
OPT_STRING(0, "bind", &web_args->listen_address, "Listen on this address. DEFAULT=localhost:4090"),
|
||||
OPT_STRING(0, "auth", &web_args->credentials, "Basic auth in user:password format"),
|
||||
|
||||
OPT_GROUP("Exec-script options"),
|
||||
OPT_STRING(0, "script-file", &common_script_path, "Path to user script."),
|
||||
|
||||
OPT_END(),
|
||||
};
|
||||
|
||||
@@ -391,6 +423,8 @@ int main(int argc, const char *argv[]) {
|
||||
|
||||
web_args->es_url = common_es_url;
|
||||
index_args->es_url = common_es_url;
|
||||
index_args->script_path = common_script_path;
|
||||
exec_args->script_path = common_script_path;
|
||||
|
||||
if (argc == 0) {
|
||||
argparse_usage(&argparse);
|
||||
@@ -419,6 +453,14 @@ int main(int argc, const char *argv[]) {
|
||||
}
|
||||
sist2_web(web_args);
|
||||
|
||||
} else if (strcmp(argv[0], "exec-script") == 0) {
|
||||
|
||||
int err = exec_args_validate(exec_args, argc, argv);
|
||||
if (err != 0) {
|
||||
goto end;
|
||||
}
|
||||
sist2_exec_script(exec_args);
|
||||
|
||||
} else {
|
||||
fprintf(stderr, "Invalid command: '%s'\n", argv[0]);
|
||||
argparse_usage(&argparse);
|
||||
@@ -430,6 +472,7 @@ int main(int argc, const char *argv[]) {
|
||||
scan_args_destroy(scan_args);
|
||||
index_args_destroy(index_args);
|
||||
web_args_destroy(web_args);
|
||||
exec_args_destroy(exec_args);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
@@ -3,7 +3,7 @@
|
||||
|
||||
#include "../sist.h"
|
||||
|
||||
#define MAJOR_MIME(mime_id) (mime_id & 0x00FF0000) >> 16
|
||||
#define MAJOR_MIME(mime_id) (mime_id & 0x000F0000) >> 16
|
||||
|
||||
#define MIME_EMPTY 1
|
||||
|
||||
@@ -31,6 +31,9 @@
|
||||
#define MARKUP_MASK 0x01000000
|
||||
#define IS_MARKUP(mime_id) (mime_id & MARKUP_MASK) == MARKUP_MASK
|
||||
|
||||
#define RAW_MASK 0x00800000
|
||||
#define IS_RAW(mime_id) (mime_id & RAW_MASK) == RAW_MASK
|
||||
|
||||
enum major_mime {
|
||||
MimeInvalid = 0,
|
||||
MimeModel = 1,
|
||||
|
||||
@@ -315,127 +315,145 @@ enum mime {
|
||||
image_webp=524595,
|
||||
image_wmf=524596,
|
||||
image_x_3ds=524597,
|
||||
image_x_award_bioslogo=524598,
|
||||
image_x_cmu_raster=524599,
|
||||
image_x_cur=524600,
|
||||
image_x_dwg=524601,
|
||||
image_x_eps=524602,
|
||||
image_x_exr=524603,
|
||||
image_x_gem=524604,
|
||||
image_x_icns=524605,
|
||||
image_x_icon=524606 | 0x80000000,
|
||||
image_x_jg=524607,
|
||||
image_x_jps=524608,
|
||||
image_x_ms_bmp=524609,
|
||||
image_x_niff=524610,
|
||||
image_x_pcx=524611,
|
||||
image_x_pict=524612,
|
||||
image_x_portable_bitmap=524613,
|
||||
image_x_portable_graymap=524614,
|
||||
image_x_portable_pixmap=524615,
|
||||
image_x_quicktime=524616,
|
||||
image_x_rgb=524617,
|
||||
image_x_tga=524618,
|
||||
image_x_tiff=524619,
|
||||
image_x_win_bitmap=524620,
|
||||
image_x_xcf=524621 | 0x80000000,
|
||||
image_x_xpixmap=524622 | 0x80000000,
|
||||
image_x_xwindowdump=524623,
|
||||
message_news=196944,
|
||||
message_rfc822=196945,
|
||||
model_vnd_dwf=65874,
|
||||
model_vnd_gdl=65875,
|
||||
model_vnd_gs_gdl=65876,
|
||||
model_vrml=65877,
|
||||
model_x_pov=65878,
|
||||
text_PGP=590167,
|
||||
text_asp=590168,
|
||||
text_css=590169,
|
||||
text_html=590170 | 0x01000000,
|
||||
text_javascript=590171,
|
||||
text_mcf=590172,
|
||||
text_pascal=590173,
|
||||
text_plain=590174,
|
||||
text_richtext=590175,
|
||||
text_rtf=590176,
|
||||
text_scriplet=590177,
|
||||
text_tab_separated_values=590178,
|
||||
text_troff=590179,
|
||||
text_uri_list=590180,
|
||||
text_vnd_abc=590181,
|
||||
text_vnd_fmi_flexstor=590182,
|
||||
text_vnd_wap_wml=590183,
|
||||
text_vnd_wap_wmlscript=590184,
|
||||
text_webviewhtml=590185,
|
||||
text_x_Algol68=590186,
|
||||
text_x_asm=590187,
|
||||
text_x_audiosoft_intra=590188,
|
||||
text_x_awk=590189,
|
||||
text_x_bcpl=590190,
|
||||
text_x_c=590191,
|
||||
text_x_c__=590192,
|
||||
text_x_component=590193,
|
||||
text_x_diff=590194,
|
||||
text_x_fortran=590195,
|
||||
text_x_java=590196,
|
||||
text_x_la_asf=590197,
|
||||
text_x_lisp=590198,
|
||||
text_x_m=590199,
|
||||
text_x_m4=590200,
|
||||
text_x_makefile=590201,
|
||||
text_x_ms_regedit=590202,
|
||||
text_x_msdos_batch=590203,
|
||||
text_x_objective_c=590204,
|
||||
text_x_pascal=590205,
|
||||
text_x_perl=590206,
|
||||
text_x_php=590207,
|
||||
text_x_po=590208,
|
||||
text_x_python=590209,
|
||||
text_x_ruby=590210,
|
||||
text_x_sass=590211,
|
||||
text_x_scss=590212,
|
||||
text_x_server_parsed_html=590213,
|
||||
text_x_setext=590214,
|
||||
text_x_sgml=590215 | 0x01000000,
|
||||
text_x_shellscript=590216,
|
||||
text_x_speech=590217,
|
||||
text_x_tcl=590218,
|
||||
text_x_tex=590219,
|
||||
text_x_uil=590220,
|
||||
text_x_uuencode=590221,
|
||||
text_x_vcalendar=590222,
|
||||
text_x_vcard=590223,
|
||||
text_xml=590224 | 0x01000000,
|
||||
video_MP2T=393617,
|
||||
video_animaflex=393618,
|
||||
video_avi=393619,
|
||||
video_avs_video=393620,
|
||||
video_mp4=393621,
|
||||
video_mpeg=393622,
|
||||
video_quicktime=393623,
|
||||
video_vdo=393624,
|
||||
video_vivo=393625,
|
||||
video_vnd_rn_realvideo=393626,
|
||||
video_vosaic=393627,
|
||||
video_webm=393628,
|
||||
video_x_amt_demorun=393629,
|
||||
video_x_amt_showrun=393630,
|
||||
video_x_atomic3d_feature=393631,
|
||||
video_x_dl=393632,
|
||||
video_x_dv=393633,
|
||||
video_x_fli=393634,
|
||||
video_x_flv=393635,
|
||||
video_x_isvideo=393636,
|
||||
video_x_jng=393637 | 0x80000000,
|
||||
video_x_m4v=393638,
|
||||
video_x_matroska=393639,
|
||||
video_x_mng=393640,
|
||||
video_x_motion_jpeg=393641,
|
||||
video_x_ms_asf=393642,
|
||||
video_x_msvideo=393643,
|
||||
video_x_qtc=393644,
|
||||
video_x_sgi_movie=393645,
|
||||
x_epoc_x_sisx_app=721326,
|
||||
image_x_adobe_dng=524598 | 0x00800000,
|
||||
image_x_award_bioslogo=524599,
|
||||
image_x_canon_cr2=524600 | 0x00800000,
|
||||
image_x_canon_crw=524601 | 0x00800000,
|
||||
image_x_cmu_raster=524602,
|
||||
image_x_cur=524603,
|
||||
image_x_dcraw=524604 | 0x00800000,
|
||||
image_x_dwg=524605,
|
||||
image_x_eps=524606,
|
||||
image_x_epson_erf=524607 | 0x00800000,
|
||||
image_x_exr=524608,
|
||||
image_x_fuji_raf=524609 | 0x00800000,
|
||||
image_x_gem=524610,
|
||||
image_x_icns=524611,
|
||||
image_x_icon=524612 | 0x80000000,
|
||||
image_x_jg=524613,
|
||||
image_x_jps=524614,
|
||||
image_x_kodak_dcr=524615 | 0x00800000,
|
||||
image_x_kodak_k25=524616 | 0x00800000,
|
||||
image_x_kodak_kdc=524617 | 0x00800000,
|
||||
image_x_minolta_mrw=524618 | 0x00800000,
|
||||
image_x_ms_bmp=524619,
|
||||
image_x_niff=524620,
|
||||
image_x_nikon_nef=524621 | 0x00800000,
|
||||
image_x_olympus_orf=524622 | 0x00800000,
|
||||
image_x_panasonic_raw=524623 | 0x00800000,
|
||||
image_x_pcx=524624,
|
||||
image_x_pentax_pef=524625 | 0x00800000,
|
||||
image_x_pict=524626,
|
||||
image_x_portable_bitmap=524627,
|
||||
image_x_portable_graymap=524628,
|
||||
image_x_portable_pixmap=524629,
|
||||
image_x_quicktime=524630,
|
||||
image_x_rgb=524631,
|
||||
image_x_sigma_x3f=524632 | 0x00800000,
|
||||
image_x_sony_arw=524633 | 0x00800000,
|
||||
image_x_sony_sr2=524634 | 0x00800000,
|
||||
image_x_sony_srf=524635 | 0x00800000,
|
||||
image_x_tga=524636,
|
||||
image_x_tiff=524637,
|
||||
image_x_win_bitmap=524638,
|
||||
image_x_xcf=524639 | 0x80000000,
|
||||
image_x_xpixmap=524640 | 0x80000000,
|
||||
image_x_xwindowdump=524641,
|
||||
message_news=196962,
|
||||
message_rfc822=196963,
|
||||
model_vnd_dwf=65892,
|
||||
model_vnd_gdl=65893,
|
||||
model_vnd_gs_gdl=65894,
|
||||
model_vrml=65895,
|
||||
model_x_pov=65896,
|
||||
text_PGP=590185,
|
||||
text_asp=590186,
|
||||
text_css=590187,
|
||||
text_html=590188 | 0x01000000,
|
||||
text_javascript=590189,
|
||||
text_mcf=590190,
|
||||
text_pascal=590191,
|
||||
text_plain=590192,
|
||||
text_richtext=590193,
|
||||
text_rtf=590194,
|
||||
text_scriplet=590195,
|
||||
text_tab_separated_values=590196,
|
||||
text_troff=590197,
|
||||
text_uri_list=590198,
|
||||
text_vnd_abc=590199,
|
||||
text_vnd_fmi_flexstor=590200,
|
||||
text_vnd_wap_wml=590201,
|
||||
text_vnd_wap_wmlscript=590202,
|
||||
text_webviewhtml=590203,
|
||||
text_x_Algol68=590204,
|
||||
text_x_asm=590205,
|
||||
text_x_audiosoft_intra=590206,
|
||||
text_x_awk=590207,
|
||||
text_x_bcpl=590208,
|
||||
text_x_c=590209,
|
||||
text_x_c__=590210,
|
||||
text_x_component=590211,
|
||||
text_x_diff=590212,
|
||||
text_x_fortran=590213,
|
||||
text_x_java=590214,
|
||||
text_x_la_asf=590215,
|
||||
text_x_lisp=590216,
|
||||
text_x_m=590217,
|
||||
text_x_m4=590218,
|
||||
text_x_makefile=590219,
|
||||
text_x_ms_regedit=590220,
|
||||
text_x_msdos_batch=590221,
|
||||
text_x_objective_c=590222,
|
||||
text_x_pascal=590223,
|
||||
text_x_perl=590224,
|
||||
text_x_php=590225,
|
||||
text_x_po=590226,
|
||||
text_x_python=590227,
|
||||
text_x_ruby=590228,
|
||||
text_x_sass=590229,
|
||||
text_x_scss=590230,
|
||||
text_x_server_parsed_html=590231,
|
||||
text_x_setext=590232,
|
||||
text_x_sgml=590233 | 0x01000000,
|
||||
text_x_shellscript=590234,
|
||||
text_x_speech=590235,
|
||||
text_x_tcl=590236,
|
||||
text_x_tex=590237,
|
||||
text_x_uil=590238,
|
||||
text_x_uuencode=590239,
|
||||
text_x_vcalendar=590240,
|
||||
text_x_vcard=590241,
|
||||
text_xml=590242 | 0x01000000,
|
||||
video_MP2T=393635,
|
||||
video_animaflex=393636,
|
||||
video_avi=393637,
|
||||
video_avs_video=393638,
|
||||
video_mp4=393639,
|
||||
video_mpeg=393640,
|
||||
video_quicktime=393641,
|
||||
video_vdo=393642,
|
||||
video_vivo=393643,
|
||||
video_vnd_rn_realvideo=393644,
|
||||
video_vosaic=393645,
|
||||
video_webm=393646,
|
||||
video_x_amt_demorun=393647,
|
||||
video_x_amt_showrun=393648,
|
||||
video_x_atomic3d_feature=393649,
|
||||
video_x_dl=393650,
|
||||
video_x_dv=393651,
|
||||
video_x_fli=393652,
|
||||
video_x_flv=393653,
|
||||
video_x_isvideo=393654,
|
||||
video_x_jng=393655 | 0x80000000,
|
||||
video_x_m4v=393656,
|
||||
video_x_matroska=393657,
|
||||
video_x_mng=393658,
|
||||
video_x_motion_jpeg=393659,
|
||||
video_x_ms_asf=393660,
|
||||
video_x_msvideo=393661,
|
||||
video_x_qtc=393662,
|
||||
video_x_sgi_movie=393663,
|
||||
x_epoc_x_sisx_app=721344,
|
||||
};
|
||||
char *mime_get_mime_text(unsigned int mime_id) {switch (mime_id) {
|
||||
case application_arj: return "application/arj";
|
||||
@@ -868,6 +886,24 @@ case video_x_sgi_movie: return "video/x-sgi-movie";
|
||||
case x_epoc_x_sisx_app: return "x-epoc/x-sisx-app";
|
||||
case application_x_zstd_dictionary: return "application/x-zstd-dictionary";
|
||||
case application_vnd_ms_outlook: return "application/vnd.ms-outlook";
|
||||
case image_x_olympus_orf: return "image/x-olympus-orf";
|
||||
case image_x_nikon_nef: return "image/x-nikon-nef";
|
||||
case image_x_fuji_raf: return "image/x-fuji-raf";
|
||||
case image_x_panasonic_raw: return "image/x-panasonic-raw";
|
||||
case image_x_adobe_dng: return "image/x-adobe-dng";
|
||||
case image_x_canon_cr2: return "image/x-canon-cr2";
|
||||
case image_x_canon_crw: return "image/x-canon-crw";
|
||||
case image_x_dcraw: return "image/x-dcraw";
|
||||
case image_x_kodak_dcr: return "image/x-kodak-dcr";
|
||||
case image_x_kodak_k25: return "image/x-kodak-k25";
|
||||
case image_x_kodak_kdc: return "image/x-kodak-kdc";
|
||||
case image_x_minolta_mrw: return "image/x-minolta-mrw";
|
||||
case image_x_pentax_pef: return "image/x-pentax-pef";
|
||||
case image_x_sigma_x3f: return "image/x-sigma-x3f";
|
||||
case image_x_sony_arw: return "image/x-sony-arw";
|
||||
case image_x_sony_sr2: return "image/x-sony-sr2";
|
||||
case image_x_sony_srf: return "image/x-sony-srf";
|
||||
case image_x_epson_erf: return "image/x-epson-erf";
|
||||
default: return NULL;}}
|
||||
GHashTable *mime_get_ext_table() {GHashTable *ext_table = g_hash_table_new(g_str_hash, g_str_equal);
|
||||
g_hash_table_insert(ext_table, "arj", (gpointer)application_arj);
|
||||
@@ -885,6 +921,7 @@ g_hash_table_insert(ext_table, "frl", (gpointer)application_freeloader);
|
||||
g_hash_table_insert(ext_table, "spl", (gpointer)application_futuresplash);
|
||||
g_hash_table_insert(ext_table, "vew", (gpointer)application_groupwise);
|
||||
g_hash_table_insert(ext_table, "gz", (gpointer)application_gzip);
|
||||
g_hash_table_insert(ext_table, "tgz", (gpointer)application_gzip);
|
||||
g_hash_table_insert(ext_table, "hta", (gpointer)application_hta);
|
||||
g_hash_table_insert(ext_table, "unv", (gpointer)application_i_deas);
|
||||
g_hash_table_insert(ext_table, "iges", (gpointer)application_iges);
|
||||
@@ -999,7 +1036,6 @@ g_hash_table_insert(ext_table, "cpio", (gpointer)application_x_cpio);
|
||||
g_hash_table_insert(ext_table, "dbf", (gpointer)application_x_dbf);
|
||||
g_hash_table_insert(ext_table, "deb", (gpointer)application_x_debian_package);
|
||||
g_hash_table_insert(ext_table, "deepv", (gpointer)application_x_deepv);
|
||||
g_hash_table_insert(ext_table, "dcr", (gpointer)application_x_director);
|
||||
g_hash_table_insert(ext_table, "dir", (gpointer)application_x_director);
|
||||
g_hash_table_insert(ext_table, "dxr", (gpointer)application_x_director);
|
||||
g_hash_table_insert(ext_table, "dmp", (gpointer)application_x_dmp);
|
||||
@@ -1387,6 +1423,25 @@ g_hash_table_insert(ext_table, "divx", (gpointer)video_x_msvideo);
|
||||
g_hash_table_insert(ext_table, "qtc", (gpointer)video_x_qtc);
|
||||
g_hash_table_insert(ext_table, "movie", (gpointer)video_x_sgi_movie);
|
||||
g_hash_table_insert(ext_table, "mv", (gpointer)video_x_sgi_movie);
|
||||
g_hash_table_insert(ext_table, "msg", (gpointer)application_vnd_ms_outlook);
|
||||
g_hash_table_insert(ext_table, "orf", (gpointer)image_x_olympus_orf);
|
||||
g_hash_table_insert(ext_table, "nef", (gpointer)image_x_nikon_nef);
|
||||
g_hash_table_insert(ext_table, "raf", (gpointer)image_x_fuji_raf);
|
||||
g_hash_table_insert(ext_table, "rw2", (gpointer)image_x_panasonic_raw);
|
||||
g_hash_table_insert(ext_table, "raw", (gpointer)image_x_panasonic_raw);
|
||||
g_hash_table_insert(ext_table, "dng", (gpointer)image_x_adobe_dng);
|
||||
g_hash_table_insert(ext_table, "cr2", (gpointer)image_x_canon_cr2);
|
||||
g_hash_table_insert(ext_table, "crw", (gpointer)image_x_canon_crw);
|
||||
g_hash_table_insert(ext_table, "dcr", (gpointer)image_x_kodak_dcr);
|
||||
g_hash_table_insert(ext_table, "k25", (gpointer)image_x_kodak_k25);
|
||||
g_hash_table_insert(ext_table, "kdc", (gpointer)image_x_kodak_kdc);
|
||||
g_hash_table_insert(ext_table, "mrw", (gpointer)image_x_minolta_mrw);
|
||||
g_hash_table_insert(ext_table, "pef", (gpointer)image_x_pentax_pef);
|
||||
g_hash_table_insert(ext_table, "xf3", (gpointer)image_x_sigma_x3f);
|
||||
g_hash_table_insert(ext_table, "arw", (gpointer)image_x_sony_arw);
|
||||
g_hash_table_insert(ext_table, "sr2", (gpointer)image_x_sony_sr2);
|
||||
g_hash_table_insert(ext_table, "srf", (gpointer)image_x_sony_srf);
|
||||
g_hash_table_insert(ext_table, "erf", (gpointer)image_x_epson_erf);
|
||||
return ext_table;}
|
||||
GHashTable *mime_get_mime_table() {GHashTable *mime_table = g_hash_table_new(g_str_hash, g_str_equal);
|
||||
g_hash_table_insert(mime_table, "application/arj", (gpointer)application_arj);
|
||||
@@ -1819,5 +1874,23 @@ g_hash_table_insert(mime_table, "video/x-sgi-movie", (gpointer)video_x_sgi_movie
|
||||
g_hash_table_insert(mime_table, "x-epoc/x-sisx-app", (gpointer)x_epoc_x_sisx_app);
|
||||
g_hash_table_insert(mime_table, "application/x-zstd-dictionary", (gpointer)application_x_zstd_dictionary);
|
||||
g_hash_table_insert(mime_table, "application/vnd.ms-outlook", (gpointer)application_vnd_ms_outlook);
|
||||
g_hash_table_insert(mime_table, "image/x-olympus-orf", (gpointer)image_x_olympus_orf);
|
||||
g_hash_table_insert(mime_table, "image/x-nikon-nef", (gpointer)image_x_nikon_nef);
|
||||
g_hash_table_insert(mime_table, "image/x-fuji-raf", (gpointer)image_x_fuji_raf);
|
||||
g_hash_table_insert(mime_table, "image/x-panasonic-raw", (gpointer)image_x_panasonic_raw);
|
||||
g_hash_table_insert(mime_table, "image/x-adobe-dng", (gpointer)image_x_adobe_dng);
|
||||
g_hash_table_insert(mime_table, "image/x-canon-cr2", (gpointer)image_x_canon_cr2);
|
||||
g_hash_table_insert(mime_table, "image/x-canon-crw", (gpointer)image_x_canon_crw);
|
||||
g_hash_table_insert(mime_table, "image/x-dcraw", (gpointer)image_x_dcraw);
|
||||
g_hash_table_insert(mime_table, "image/x-kodak-dcr", (gpointer)image_x_kodak_dcr);
|
||||
g_hash_table_insert(mime_table, "image/x-kodak-k25", (gpointer)image_x_kodak_k25);
|
||||
g_hash_table_insert(mime_table, "image/x-kodak-kdc", (gpointer)image_x_kodak_kdc);
|
||||
g_hash_table_insert(mime_table, "image/x-minolta-mrw", (gpointer)image_x_minolta_mrw);
|
||||
g_hash_table_insert(mime_table, "image/x-pentax-pef", (gpointer)image_x_pentax_pef);
|
||||
g_hash_table_insert(mime_table, "image/x-sigma-x3f", (gpointer)image_x_sigma_x3f);
|
||||
g_hash_table_insert(mime_table, "image/x-sony-arw", (gpointer)image_x_sony_arw);
|
||||
g_hash_table_insert(mime_table, "image/x-sony-sr2", (gpointer)image_x_sony_sr2);
|
||||
g_hash_table_insert(mime_table, "image/x-sony-srf", (gpointer)image_x_sony_srf);
|
||||
g_hash_table_insert(mime_table, "image/x-epson-erf", (gpointer)image_x_epson_erf);
|
||||
return mime_table;}
|
||||
#endif
|
||||
|
||||
@@ -78,6 +78,11 @@ void parse(void *arg) {
|
||||
|
||||
if (doc.mime == 0 && !ScanCtx.fast) {
|
||||
// Get mime type with libmagic
|
||||
if (!job->vfile.is_fs_file) {
|
||||
LOG_WARNING(job->filepath, "Guessing mime type with libmagic inside archive files is not currently supported");
|
||||
goto abort;
|
||||
}
|
||||
|
||||
bytes_read = job->vfile.read(&job->vfile, buf, MAGIC_BUF_SIZE);
|
||||
if (bytes_read < 0) {
|
||||
|
||||
@@ -114,6 +119,8 @@ void parse(void *arg) {
|
||||
|
||||
if (!(SHOULD_PARSE(doc.mime))) {
|
||||
|
||||
} else if (IS_RAW(doc.mime)) {
|
||||
parse_raw(&ScanCtx.raw_ctx, &job->vfile, &doc);
|
||||
} else if ((mmime == MimeVideo && doc.size >= MIN_VIDEO_SIZE) ||
|
||||
(mmime == MimeImage && doc.size >= MIN_IMAGE_SIZE) || mmime == MimeAudio) {
|
||||
|
||||
@@ -147,14 +154,13 @@ void parse(void *arg) {
|
||||
parse_mobi(&ScanCtx.mobi_ctx, &job->vfile, &doc);
|
||||
}
|
||||
|
||||
abort:
|
||||
|
||||
//Parent meta
|
||||
if (!uuid_is_null(job->parent)) {
|
||||
char tmp[UUID_STR_LEN];
|
||||
uuid_unparse(job->parent, tmp);
|
||||
|
||||
meta_line_t *meta_parent = malloc(sizeof(meta_line_t) + UUID_STR_LEN + 1);
|
||||
meta_parent->key = MetaParent;
|
||||
strcpy(meta_parent->str_val, tmp);
|
||||
uuid_unparse(job->parent, meta_parent->str_val);
|
||||
APPEND_META((&doc), meta_parent)
|
||||
}
|
||||
|
||||
|
||||
4
src/static/css/autocomplete.min.css
vendored
Normal file
4
src/static/css/autocomplete.min.css
vendored
Normal file
@@ -0,0 +1,4 @@
|
||||
.autocomplete-suggestions { text-align: left; cursor: default; border: 1px solid #ccc; border-top: 0; background: #fff; box-shadow: -1px 1px 3px rgba(0,0,0,.1); position: absolute; display: none; z-index: 9999; max-height: 254px; overflow: hidden; overflow-y: auto; box-sizing: border-box; }
|
||||
.autocomplete-suggestion { position: relative; padding: 0 .6em; line-height: 23px; white-space: nowrap; overflow: hidden; text-overflow: ellipsis; font-size: 1.02em; color: #333; }
|
||||
.autocomplete-suggestion b { font-weight: normal; color: #1f8dd6; }
|
||||
.autocomplete-suggestion.selected { background: #f0f0f0; }
|
||||
@@ -266,6 +266,7 @@ mark {
|
||||
margin: 3px;
|
||||
white-space: normal;
|
||||
color: rgb(224, 224, 224);
|
||||
overflow: hidden;
|
||||
}
|
||||
|
||||
.irs-single, .irs-from, .irs-to {
|
||||
|
||||
@@ -205,6 +205,7 @@ mark {
|
||||
margin: 3px;
|
||||
white-space: normal;
|
||||
color: #000;
|
||||
overflow: hidden;
|
||||
}
|
||||
|
||||
.irs-single, .irs-from, .irs-to {
|
||||
|
||||
3
src/static/js/auto-complete.min.js
vendored
Normal file
3
src/static/js/auto-complete.min.js
vendored
Normal file
File diff suppressed because one or more lines are too long
@@ -27,18 +27,12 @@ function gifOver(thumbnail, hit) {
|
||||
}
|
||||
|
||||
function getContentHighlight(hit) {
|
||||
const re = RegExp(/<mark>/g);
|
||||
|
||||
const sortByMathCount = (a, b) => {
|
||||
return b.match(re).length - a.match(re).length;
|
||||
};
|
||||
|
||||
if (hit.hasOwnProperty("highlight")) {
|
||||
if (hit["highlight"].hasOwnProperty("content")) {
|
||||
return hit["highlight"]["content"].sort(sortByMathCount)[0];
|
||||
return hit["highlight"]["content"][0];
|
||||
|
||||
} else if (hit["highlight"].hasOwnProperty("content.nGram")) {
|
||||
return hit["highlight"]["content.nGram"].sort(sortByMathCount)[0];
|
||||
return hit["highlight"]["content.nGram"][0];
|
||||
}
|
||||
}
|
||||
|
||||
@@ -77,6 +71,7 @@ function shouldPlayVideo(hit) {
|
||||
|
||||
return mime &&
|
||||
mime.startsWith("video/") &&
|
||||
!("parent" in hit["_source"]) &&
|
||||
hit["_source"]["extension"] !== "mkv" &&
|
||||
hit["_source"]["extension"] !== "avi" &&
|
||||
videoc !== "hevc" &&
|
||||
@@ -92,6 +87,7 @@ function shouldDisplayRawImage(hit) {
|
||||
hit["_source"]["mime"] &&
|
||||
!hit["_source"]["parent"] &&
|
||||
hit["_source"]["videoc"] !== "tiff" &&
|
||||
hit["_source"]["videoc"] !== "raw" &&
|
||||
hit["_source"]["videoc"] !== "ppm";
|
||||
}
|
||||
|
||||
|
||||
@@ -74,6 +74,41 @@ function showEsError() {
|
||||
|
||||
window.onload = () => {
|
||||
CONF.load();
|
||||
new autoComplete({
|
||||
selector: '#pathBar',
|
||||
minChars: 1,
|
||||
delay: 400,
|
||||
renderItem: function (item) {
|
||||
return '<div class="autocomplete-suggestion" data-val="' + item + '">' + item + '</div>';
|
||||
},
|
||||
source: async function (term, suggest) {
|
||||
|
||||
if (!CONF.options.suggestPath) {
|
||||
return []
|
||||
}
|
||||
|
||||
term = term.toLowerCase();
|
||||
|
||||
const choices = await getPathChoices();
|
||||
|
||||
let matches = [];
|
||||
for (let i = 0; i < choices.length; i++) {
|
||||
if (~choices[i].toLowerCase().indexOf(term)) {
|
||||
matches.push(choices[i]);
|
||||
}
|
||||
}
|
||||
suggest(matches.sort());
|
||||
},
|
||||
onSelect: function () {
|
||||
searchDebounced();
|
||||
}
|
||||
});
|
||||
searchBar.addEventListener("keyup", searchDebounced);
|
||||
pathBar.addEventListener("keyup", e => {
|
||||
if (e.key === "Enter") {
|
||||
searchDebounced();
|
||||
}
|
||||
});
|
||||
};
|
||||
|
||||
function toggleFuzzy() {
|
||||
@@ -105,10 +140,7 @@ $.jsonPost("i").then(resp => {
|
||||
});
|
||||
|
||||
function getDocumentInfo(id) {
|
||||
return $.getJSON("d/" + id).fail(e => {
|
||||
console.log(e);
|
||||
showEsError();
|
||||
})
|
||||
return $.getJSON("d/" + id).fail(showEsError)
|
||||
}
|
||||
|
||||
function handleTreeClick(tree) {
|
||||
@@ -218,12 +250,13 @@ function addTag(map, tag, id, count) {
|
||||
let child = {
|
||||
id: id,
|
||||
text: tags.length !== 1 ? tags[0] : `${tags[0]} (${count})`,
|
||||
name: tags[0],
|
||||
children: []
|
||||
};
|
||||
|
||||
let found = false;
|
||||
map.forEach(node => {
|
||||
if (node.text === child.text) {
|
||||
if (node.name === child.name) {
|
||||
found = true;
|
||||
if (tags.length !== 1) {
|
||||
addTag(node.children, tags.slice(1).join("."), id, count);
|
||||
@@ -332,24 +365,24 @@ function search(after = null) {
|
||||
|
||||
let path = pathBar.value.replace(/\/$/, "").toLowerCase(); //remove trailing slashes
|
||||
if (path !== "") {
|
||||
filters.push([{term: {path: path}}])
|
||||
filters.push({term: {path: path}})
|
||||
}
|
||||
let mimeTypes = getSelectedNodes(mimeTree);
|
||||
if (!mimeTypes.includes("any")) {
|
||||
filters.push([{terms: {"mime": mimeTypes}}]);
|
||||
filters.push({terms: {"mime": mimeTypes}});
|
||||
}
|
||||
|
||||
let tags = getSelectedNodes(tagTree);
|
||||
if (!tags.includes("any")) {
|
||||
filters.push([{terms: {"tag": tags}}]);
|
||||
filters.push({terms: {"tag": tags}});
|
||||
}
|
||||
|
||||
if (date_min && date_max) {
|
||||
filters.push([{range: {mtime: {gte: date_min, lte: date_max}}}])
|
||||
filters.push({range: {mtime: {gte: date_min, lte: date_max}}})
|
||||
} else if (date_min) {
|
||||
filters.push([{range: {mtime: {gte: date_min}}}])
|
||||
filters.push({range: {mtime: {gte: date_min}}})
|
||||
} else if (date_max) {
|
||||
filters.push([{range: {mtime: {lte: date_max}}}])
|
||||
filters.push({range: {mtime: {lte: date_max}}})
|
||||
}
|
||||
|
||||
let q = {
|
||||
@@ -385,6 +418,9 @@ function search(after = null) {
|
||||
q.highlight = {
|
||||
pre_tags: ["<mark>"],
|
||||
post_tags: ["</mark>"],
|
||||
fragment_size: CONF.options.fragmentSize,
|
||||
number_of_fragments: 1,
|
||||
order: "score",
|
||||
fields: {
|
||||
content: {},
|
||||
// "content.nGram": {},
|
||||
@@ -441,8 +477,6 @@ let searchDebounced = _.debounce(function () {
|
||||
search()
|
||||
}, 500);
|
||||
|
||||
searchBar.addEventListener("keyup", searchDebounced);
|
||||
pathBar.addEventListener("keyup", searchDebounced);
|
||||
|
||||
//Size slider
|
||||
$("#sizeSlider").ionRangeSlider({
|
||||
@@ -607,7 +641,8 @@ function createPathTree(target) {
|
||||
let pathTree = new InspireTree({
|
||||
data: function (node, resolve, reject) {
|
||||
return getNextDepth(node);
|
||||
}
|
||||
},
|
||||
sort: "text"
|
||||
});
|
||||
|
||||
selectedIndices.forEach(index => {
|
||||
@@ -627,3 +662,19 @@ function createPathTree(target) {
|
||||
pathTree.on("node.click", handlePathTreeClick(pathTree));
|
||||
}
|
||||
|
||||
function getPathChoices() {
|
||||
return new Promise(getPaths => {
|
||||
$.jsonPost("es", {
|
||||
suggest: {
|
||||
path: {
|
||||
prefix: pathBar.value,
|
||||
completion: {
|
||||
field: "suggest-path",
|
||||
skip_duplicates: true,
|
||||
size: 10000
|
||||
}
|
||||
}
|
||||
}
|
||||
}).then(resp => getPaths(resp["suggest"]["path"][0]["options"].map(opt => opt["_source"]["path"])));
|
||||
})
|
||||
}
|
||||
|
||||
@@ -100,6 +100,8 @@ const _defaults = {
|
||||
treemapGroupingDepth: 3,
|
||||
treemapColor: "PuBuGn",
|
||||
treemapSize: "large",
|
||||
suggestPath: true,
|
||||
fragmentSize: 100
|
||||
};
|
||||
|
||||
function loadSettings() {
|
||||
@@ -114,6 +116,8 @@ function loadSettings() {
|
||||
$("#settingTreemapColor").val(CONF.options.treemapColor);
|
||||
$("#settingTreemapSize").val(CONF.options.treemapSize);
|
||||
$("#settingTreemapType").val(CONF.options.treemapType);
|
||||
$("#settingSuggestPath").prop("checked", CONF.options.suggestPath);
|
||||
$("#settingFragmentSize").val(CONF.options.fragmentSize);
|
||||
}
|
||||
|
||||
function Settings() {
|
||||
@@ -155,6 +159,8 @@ function updateSettings() {
|
||||
CONF.options.treemapColor = $("#settingTreemapColor").val();
|
||||
CONF.options.treemapSize = $("#settingTreemapSize").val();
|
||||
CONF.options.treemapType = $("#settingTreemapType").val();
|
||||
CONF.options.suggestPath = $("#settingSuggestPath").prop("checked");
|
||||
CONF.options.fragmentSize = $("#settingFragmentSize").val();
|
||||
CONF.save();
|
||||
|
||||
if (typeof searchDebounced !== "undefined") {
|
||||
|
||||
@@ -11,7 +11,7 @@
|
||||
|
||||
<nav class="navbar navbar-expand-lg">
|
||||
<a class="navbar-brand" href="/">sist2</a>
|
||||
<span class="badge badge-pill version">2.3.0</span>
|
||||
<span class="badge badge-pill version">2.5.0</span>
|
||||
<span class="tagline">Lightning-fast file system indexer and search tool </span>
|
||||
<a class="btn ml-auto" href="/stats">Stats</a>
|
||||
<button class="btn" type="button" data-toggle="modal" data-target="#settings" onclick="loadSettings()">Settings</button>
|
||||
@@ -192,6 +192,17 @@
|
||||
<label class="custom-control-label" for="settingSearchInPath">Enable matching query against document path</label>
|
||||
</div>
|
||||
|
||||
<div class="custom-control custom-checkbox">
|
||||
<input type="checkbox" class="custom-control-input" id="settingSuggestPath">
|
||||
<label class="custom-control-label" for="settingSuggestPath">Enable auto-complete in path filter bar</label>
|
||||
</div>
|
||||
|
||||
<br/>
|
||||
<div class="form-group">
|
||||
<input type="number" class="form-control" id="settingFragmentSize">
|
||||
<label for="settingFragmentSize">Highlight context size in characters</label>
|
||||
</div>
|
||||
|
||||
<label for="settingDisplay">Display</label>
|
||||
<select id="settingDisplay" class="form-control form-control-sm">
|
||||
<option value="grid">Grid</option>
|
||||
|
||||
@@ -10,7 +10,7 @@
|
||||
|
||||
<nav class="navbar navbar-expand-lg">
|
||||
<a class="navbar-brand" href="/">sist2</a>
|
||||
<span class="badge badge-pill version">2.3.0</span>
|
||||
<span class="badge badge-pill version">2.5.0</span>
|
||||
<span class="tagline">Lightning-fast file system indexer and search tool </span>
|
||||
<a style="margin-left: auto" class="btn" href="/">Back</a>
|
||||
<button class="btn" type="button" data-toggle="modal" data-target="#settings"
|
||||
@@ -77,6 +77,17 @@
|
||||
path</label>
|
||||
</div>
|
||||
|
||||
<div class="custom-control custom-checkbox">
|
||||
<input type="checkbox" class="custom-control-input" id="settingSuggestPath">
|
||||
<label class="custom-control-label" for="settingSuggestPath">Enable auto-complete in path filter bar</label>
|
||||
</div>
|
||||
|
||||
<br/>
|
||||
<div class="form-group">
|
||||
<input type="number" class="form-control" id="settingFragmentSize">
|
||||
<label for="settingFragmentSize">Highlight context size in characters</label>
|
||||
</div>
|
||||
|
||||
<label for="settingDisplay">Display</label>
|
||||
<select id="settingDisplay" class="form-control form-control-sm">
|
||||
<option value="grid">Grid</option>
|
||||
|
||||
14
src/stats.c
14
src/stats.c
@@ -24,6 +24,10 @@ typedef struct {
|
||||
|
||||
void fill_tables(cJSON *document, UNUSED(const char uuid_str[UUID_STR_LEN])) {
|
||||
|
||||
if (cJSON_GetObjectItem(document, "parent") != NULL) {
|
||||
return;
|
||||
}
|
||||
|
||||
const char *json_path = cJSON_GetObjectItem(document, "path")->valuestring;
|
||||
char *path = malloc(strlen(json_path) + 1);
|
||||
strcpy(path, json_path);
|
||||
@@ -167,7 +171,7 @@ int merge_up(double thresh) {
|
||||
|
||||
int size = g_hash_table_size(FlatTree);
|
||||
|
||||
LOG_DEBUGF("stats.h", "Merge up iteration (%d merged, %d in tree)", count, size)
|
||||
LOG_DEBUGF("stats.c", "Merge up iteration (%d merged, %d in tree)", count, size)
|
||||
return count;
|
||||
}
|
||||
|
||||
@@ -184,9 +188,9 @@ void csv_escape(char *dst, const char *str) {
|
||||
return;
|
||||
}
|
||||
|
||||
while (*ptr++ != 0) {
|
||||
char c = *ptr;
|
||||
|
||||
*out++ = '"';
|
||||
char c;
|
||||
while ((c = *ptr++) != 0) {
|
||||
if (c == '"') {
|
||||
*out++ = '"';
|
||||
*out++ = '"';
|
||||
@@ -194,6 +198,8 @@ void csv_escape(char *dst, const char *str) {
|
||||
*out++ = c;
|
||||
}
|
||||
}
|
||||
*out++ = '"';
|
||||
*out = '\0';
|
||||
}
|
||||
|
||||
int open_or_exit(const char *path) {
|
||||
|
||||
55
src/util.c
55
src/util.c
@@ -26,10 +26,11 @@ dyn_buffer_t url_escape(char *str) {
|
||||
}
|
||||
|
||||
char *abspath(const char *path) {
|
||||
wordexp_t w;
|
||||
wordexp(path, &w, 0);
|
||||
|
||||
char *abs = realpath(w.we_wordv[0], NULL);
|
||||
char *expanded = expandpath(path);
|
||||
|
||||
char *abs = realpath(expanded, NULL);
|
||||
free(expanded);
|
||||
if (abs == NULL) {
|
||||
return NULL;
|
||||
}
|
||||
@@ -38,16 +39,46 @@ char *abspath(const char *path) {
|
||||
strcat(abs, "/");
|
||||
}
|
||||
|
||||
wordfree(&w);
|
||||
return abs;
|
||||
}
|
||||
|
||||
char *expandpath(const char *path) {
|
||||
wordexp_t w;
|
||||
wordexp(path, &w, 0);
|
||||
void shell_escape(char *dst, const char *src) {
|
||||
const char *ptr = src;
|
||||
char *out = dst;
|
||||
while ((*ptr)) {
|
||||
char c = *ptr++;
|
||||
|
||||
char *expanded = malloc(strlen(w.we_wordv[0]) + 2);
|
||||
strcpy(expanded, w.we_wordv[0]);
|
||||
if (c == '&' || c == '\n' || c == '|' || c == ';' || c == '<' ||
|
||||
c == '>' || c == '(' || c == ')' || c == '{' || c == '}') {
|
||||
*out++ = '\\';
|
||||
}
|
||||
*out++ = c;
|
||||
}
|
||||
*out = 0;
|
||||
}
|
||||
|
||||
char *expandpath(const char *path) {
|
||||
char tmp[PATH_MAX * 2];
|
||||
|
||||
shell_escape(tmp, path);
|
||||
|
||||
wordexp_t w;
|
||||
wordexp(tmp, &w, 0);
|
||||
|
||||
if (w.we_wordv == NULL) {
|
||||
return NULL;
|
||||
}
|
||||
|
||||
*tmp = '\0';
|
||||
for (int i = 0; i < w.we_wordc; i++) {
|
||||
strcat(tmp, w.we_wordv[i]);
|
||||
if (i != w.we_wordc - 1) {
|
||||
strcat(tmp, " ");
|
||||
}
|
||||
}
|
||||
|
||||
char *expanded = malloc(strlen(tmp) + 2);
|
||||
strcpy(expanded, tmp);
|
||||
strcat(expanded, "/");
|
||||
|
||||
wordfree(&w);
|
||||
@@ -152,7 +183,7 @@ void str_escape(char *dst, const char *str) {
|
||||
break;
|
||||
}
|
||||
|
||||
cur += sprintf(cur, "%c%02X", ESCAPE_CHAR, (unsigned char)tmp[i]);
|
||||
cur += sprintf(cur, "%c%02X", ESCAPE_CHAR, (unsigned char) tmp[i]);
|
||||
}
|
||||
continue;
|
||||
}
|
||||
@@ -198,12 +229,12 @@ void str_unescape(char *dst, const char *str) {
|
||||
char next = *ptr;
|
||||
|
||||
if (next == ESCAPE_CHAR) {
|
||||
*cur++ = (char)c;
|
||||
*cur++ = (char) c;
|
||||
ptr += 1;
|
||||
} else {
|
||||
tmp[0] = *(ptr);
|
||||
tmp[1] = *(ptr + 1);
|
||||
*cur++ = (char)strtol(tmp, NULL, 16);
|
||||
*cur++ = (char) strtol(tmp, NULL, 16);
|
||||
ptr += 2;
|
||||
}
|
||||
} else {
|
||||
|
||||
@@ -68,6 +68,7 @@ void stats(struct mg_connection *nc) {
|
||||
void stats_files(struct mg_connection *nc, struct http_message *hm, struct mg_str *path) {
|
||||
|
||||
if (path->len != UUID_STR_LEN + 4) {
|
||||
mg_http_send_error(nc, 404, NULL);
|
||||
nc->flags |= MG_F_SEND_AND_CLOSE;
|
||||
return;
|
||||
}
|
||||
@@ -78,6 +79,7 @@ void stats_files(struct mg_connection *nc, struct http_message *hm, struct mg_st
|
||||
|
||||
index_t *index = get_index_by_id(arg_uuid);
|
||||
if (index == NULL) {
|
||||
mg_http_send_error(nc, 404, NULL);
|
||||
nc->flags |= MG_F_SEND_AND_CLOSE;
|
||||
return;
|
||||
}
|
||||
@@ -173,6 +175,7 @@ void thumbnail(struct mg_connection *nc, struct http_message *hm, struct mg_str
|
||||
|
||||
if (path->len != UUID_STR_LEN * 2 + 2) {
|
||||
LOG_DEBUGF("serve.c", "Invalid thumbnail path: %.*s", (int) path->len, path->p)
|
||||
mg_http_send_error(nc, 404, NULL);
|
||||
nc->flags |= MG_F_SEND_AND_CLOSE;
|
||||
return;
|
||||
}
|
||||
@@ -189,6 +192,7 @@ void thumbnail(struct mg_connection *nc, struct http_message *hm, struct mg_str
|
||||
int ret = uuid_parse(arg_uuid, uuid);
|
||||
if (ret != 0) {
|
||||
LOG_DEBUGF("serve.c", "Invalid thumbnail UUID: %s", arg_uuid)
|
||||
mg_http_send_error(nc, 404, NULL);
|
||||
nc->flags |= MG_F_SEND_AND_CLOSE;
|
||||
return;
|
||||
}
|
||||
@@ -196,6 +200,7 @@ void thumbnail(struct mg_connection *nc, struct http_message *hm, struct mg_str
|
||||
store_t *store = get_store(arg_index);
|
||||
if (store == NULL) {
|
||||
LOG_DEBUGF("serve.c", "Could not get store for index: %s", arg_index)
|
||||
mg_http_send_error(nc, 404, NULL);
|
||||
nc->flags |= MG_F_SEND_AND_CLOSE;
|
||||
return;
|
||||
}
|
||||
@@ -214,6 +219,7 @@ void search(struct mg_connection *nc, struct http_message *hm) {
|
||||
|
||||
if (hm->body.len == 0) {
|
||||
LOG_DEBUG("serve.c", "Client sent empty body, ignoring request")
|
||||
mg_http_send_error(nc, 500, NULL);
|
||||
nc->flags |= MG_F_SEND_AND_CLOSE;
|
||||
return;
|
||||
}
|
||||
@@ -314,6 +320,7 @@ void document_info(struct mg_connection *nc, struct http_message *hm, struct mg_
|
||||
|
||||
if (path->len != UUID_STR_LEN + 2) {
|
||||
LOG_DEBUGF("serve.c", "Invalid document_info path: %.*s", (int) path->len, path->p)
|
||||
mg_http_send_error(nc, 404, NULL);
|
||||
nc->flags |= MG_F_SEND_AND_CLOSE;
|
||||
return;
|
||||
}
|
||||
@@ -328,6 +335,7 @@ void document_info(struct mg_connection *nc, struct http_message *hm, struct mg_
|
||||
cJSON *index_id = cJSON_GetObjectItem(source, "index");
|
||||
if (index_id == NULL) {
|
||||
cJSON_Delete(doc);
|
||||
mg_http_send_error(nc, 404, NULL);
|
||||
nc->flags |= MG_F_SEND_AND_CLOSE;
|
||||
return;
|
||||
}
|
||||
@@ -335,6 +343,7 @@ void document_info(struct mg_connection *nc, struct http_message *hm, struct mg_
|
||||
index_t *idx = get_index_by_id(index_id->valuestring);
|
||||
if (idx == NULL) {
|
||||
cJSON_Delete(doc);
|
||||
mg_http_send_error(nc, 404, NULL);
|
||||
nc->flags |= MG_F_SEND_AND_CLOSE;
|
||||
return;
|
||||
}
|
||||
@@ -352,6 +361,7 @@ void file(struct mg_connection *nc, struct http_message *hm, struct mg_str *path
|
||||
|
||||
if (path->len != UUID_STR_LEN + 2) {
|
||||
LOG_DEBUGF("serve.c", "Invalid file path: %.*s", (int) path->len, path->p)
|
||||
mg_http_send_error(nc, 404, NULL);
|
||||
nc->flags |= MG_F_SEND_AND_CLOSE;
|
||||
return;
|
||||
}
|
||||
@@ -371,6 +381,7 @@ void file(struct mg_connection *nc, struct http_message *hm, struct mg_str *path
|
||||
index_id = cJSON_GetObjectItem(source, "index");
|
||||
if (index_id == NULL) {
|
||||
cJSON_Delete(doc);
|
||||
mg_http_send_error(nc, 404, NULL);
|
||||
nc->flags |= MG_F_SEND_AND_CLOSE;
|
||||
return;
|
||||
}
|
||||
@@ -386,6 +397,7 @@ void file(struct mg_connection *nc, struct http_message *hm, struct mg_str *path
|
||||
if (idx == NULL) {
|
||||
cJSON_Delete(doc);
|
||||
nc->flags |= MG_F_SEND_AND_CLOSE;
|
||||
mg_http_send_error(nc, 404, NULL);
|
||||
return;
|
||||
}
|
||||
|
||||
@@ -423,6 +435,7 @@ static void ev_router(struct mg_connection *nc, int ev, void *p) {
|
||||
struct http_message *hm = (struct http_message *) p;
|
||||
|
||||
if (mg_parse_uri(hm->uri, &scheme, &user_info, &host, &port, &path, &query, &fragment) != 0) {
|
||||
mg_http_send_error(nc, 400, NULL);
|
||||
nc->flags |= MG_F_SEND_AND_CLOSE;
|
||||
return;
|
||||
}
|
||||
@@ -469,6 +482,7 @@ static void ev_router(struct mg_connection *nc, int ev, void *p) {
|
||||
} else if (has_prefix(&path, &((struct mg_str) MG_MK_STR("/d/")))) {
|
||||
document_info(nc, hm, &path);
|
||||
} else {
|
||||
mg_http_send_error(nc, 404, NULL);
|
||||
nc->flags |= MG_F_SEND_AND_CLOSE;
|
||||
}
|
||||
|
||||
@@ -499,7 +513,7 @@ static void ev_router(struct mg_connection *nc, int ev, void *p) {
|
||||
free(json_str);
|
||||
free(tmp);
|
||||
}
|
||||
//todo return error code
|
||||
mg_http_send_error(nc, 500, NULL);
|
||||
}
|
||||
|
||||
free_response(r);
|
||||
|
||||
File diff suppressed because one or more lines are too long
2
third-party/libscan
vendored
2
third-party/libscan
vendored
Submodule third-party/libscan updated: fe6232ed82...59fd5252a5
Reference in New Issue
Block a user