Compare commits

...

30 Commits

Author SHA1 Message Date
7b49a0dc49 Build fix 2020-06-21 12:56:13 -04:00
eb559b53aa RAW picture file support 2020-06-21 10:46:11 -04:00
6d01f9c0df whoops 2020-06-19 22:12:19 -04:00
e724fec668 Fix web return codes 2020-06-19 21:41:17 -04:00
fe5e93b300 Update USAGE.md 2020-06-19 21:29:09 -04:00
ecad85fd7d version bump 2020-06-19 21:10:03 -04:00
74cc898259 Fix tag display issue 2020-06-19 21:07:19 -04:00
dc2e4443c4 Add exec-script command 2020-06-19 21:07:19 -04:00
1a64431b52 Merge pull request #63 from dpieski/patch-3
Correct typos in example
2020-06-19 18:26:10 -04:00
Andrew
9bad515e06 Correct typos in example
Correct typos in examples.
2020-06-19 17:22:02 -05:00
648559cedb Update README.md 2020-06-17 13:25:20 -04:00
3e6cd9cd5c Merge pull request #60 from dpieski/patch-2
update Usage.md
2020-06-17 13:04:46 -04:00
f249992798 Update scripting.md 2020-06-17 13:00:07 -04:00
Andrew
e9645ecdaa update Usage.md
Fixing a link.
2020-06-17 10:58:25 -05:00
046edea0e2 Handle special characters in file paths 2020-06-10 19:45:36 -04:00
a011b7e97b Fragment size setting 2020-06-09 21:40:53 -04:00
8c1c1697e0 Fix file wordexp in some paths #59 2020-06-05 19:41:02 -04:00
018b49fa4c Fix csv_escape #58 2020-06-05 19:13:03 -04:00
27b4e6403e Re-enable path autocomplete #54 2020-06-02 19:46:58 -04:00
13fdbd9e69 Fix for ES 7.7 #54 2020-06-01 18:14:34 -04:00
5e7fdaf8dd Update issue-template.md 2020-06-01 10:45:43 -04:00
19d5c8ac9f Update issue-template.md 2020-05-29 18:19:21 -04:00
99497049a8 Merge pull request #53 from dpieski/patch-1
Update README
2020-05-29 18:16:13 -04:00
Andrew
1a3181d78b Update README
changed case of path in a link to the usage guide to fix 404 error.
2020-05-29 15:37:20 -05:00
449aa77c8f Fix for unknown mime inside archives 2020-05-25 17:36:04 -04:00
3058c55510 Memory leak fix #37 2020-05-24 15:42:42 -04:00
dedf9287b2 Fix name separation in --archive list mode 2020-05-24 14:36:59 -04:00
ab199b0c0c Remove arc_reset() function because seek() inside archive doesn't work 2020-05-24 14:18:31 -04:00
c4fbae123e Better support for media files inside archives 2020-05-24 14:10:23 -04:00
dd2397ef5c handle .tgz #44, ignore files inside archives for stats page 2020-05-24 10:10:28 -04:00
33 changed files with 665 additions and 270 deletions

View File

@@ -9,7 +9,9 @@ assignees: ''
sist2 version: sist2 version:
Platform (please indicate if you're using Docker): Platform (Linux or Docker):
Elasticsearch version:
Command with arguments: `ex: "scan ~/Documents -o ./i2 --threads 3 -q 1.0` Command with arguments: `ex: "scan ~/Documents -o ./i2 --threads 3 -q 1.0`

View File

@@ -30,7 +30,7 @@ add_executable(
third-party/argparse/argparse.h third-party/argparse/argparse.c third-party/argparse/argparse.h third-party/argparse/argparse.c
src/cli.c src/cli.h src/cli.c src/cli.h
src/stats.c src/stats.h) src/stats.c src/stats.h src/ctx.c)
target_link_directories(sist2 PRIVATE BEFORE ${_VCPKG_INSTALLED_DIR}/${VCPKG_TARGET_TRIPLET}/lib/) target_link_directories(sist2 PRIVATE BEFORE ${_VCPKG_INSTALLED_DIR}/${VCPKG_TARGET_TRIPLET}/lib/)
set(CMAKE_FIND_LIBRARY_SUFFIXES .a .lib) set(CMAKE_FIND_LIBRARY_SUFFIXES .a .lib)

View File

@@ -18,7 +18,7 @@ sist2 (Simple incremental search tool)
* Extracts text from common file types \* * Extracts text from common file types \*
* Generates thumbnails \* * Generates thumbnails \*
* Incremental scanning * Incremental scanning
* Automatic tagging from file attributes via [user scripts](scripting/README.md) * Automatic tagging from file attributes via [user scripts](docs/scripting.md)
* Recursive scan inside archive files \*\* * Recursive scan inside archive files \*\*
* OCR support with tesseract \*\*\* * OCR support with tesseract \*\*\*
* Stats page & disk utilisation visualization * Stats page & disk utilisation visualization
@@ -53,7 +53,7 @@ sist2 (Simple incremental search tool)
1. *(or)* Download a [development snapshot](https://files.simon987.net/artifacts/Sist2/Build/) *(Not recommended!)* 1. *(or)* Download a [development snapshot](https://files.simon987.net/artifacts/Sist2/Build/) *(Not recommended!)*
1. *(or)* `docker pull simon987/sist2:latest` 1. *(or)* `docker pull simon987/sist2:latest`
1. See [Usage guide](DOCS/USAGE.md) 1. See [Usage guide](docs/USAGE.md)
\* *Windows users*: **sist2** runs under [WSL](https://en.wikipedia.org/wiki/Windows_Subsystem_for_Linux) \* *Windows users*: **sist2** runs under [WSL](https://en.wikipedia.org/wiki/Windows_Subsystem_for_Linux)
@@ -61,7 +61,7 @@ sist2 (Simple incremental search tool)
## Example usage ## Example usage
See [Usage guide](DOCS/USAGE.md) for more details See [Usage guide](docs/USAGE.md) for more details
1. Scan a directory: `sist2 scan ~/Documents -o ./docs_idx` 1. Scan a directory: `sist2 scan ~/Documents -o ./docs_idx`
1. Push index to Elasticsearch: `sist2 index ./docs_idx` 1. Push index to Elasticsearch: `sist2 index ./docs_idx`
@@ -91,14 +91,12 @@ they were directly in the file system. Recursive (archives inside archives)
scan is also supported. scan is also supported.
**Limitations**: **Limitations**:
* Parsing media files with formats that require * Support for parsing media files with formats that require *seek* (e.g. `.gif`, `.mp4` w/ fragmented metadata etc.)
*seek* (e.g. `.gif`, `.mp4` w/ fragmented metadata etc.) is not supported. is limitted (see `--mem-buffer` option)
* Archive files are scanned sequentially, by a single thread. On systems where * Archive files are scanned sequentially, by a single thread. On systems where
**sist2** is not I/O bound, scans might be faster when larger archives are split **sist2** is not I/O bound, scans might be faster when larger archives are split
into smaller parts. into smaller parts.
To check if a media file can be parsed without *seek*, execute `cat file.mp4 | ffprobe -`
### OCR ### OCR
@@ -130,6 +128,6 @@ binaries (GCC 7+ required).
2. Build 2. Build
```bash ```bash
git clone --recursive https://github.com/simon987/sist2/ git clone --recursive https://github.com/simon987/sist2/
cmake -D <VCPKG_ROOT>/scripts/buildsystems/vcpkg.cmake . cmake -DCMAKE_TOOLCHAIN_FILE=<VCPKG_ROOT>/scripts/buildsystems/vcpkg.cmake .
make make
``` ```

View File

@@ -14,11 +14,13 @@
* [examples](#web-examples) * [examples](#web-examples)
* [rewrite_url](#rewrite_url) * [rewrite_url](#rewrite_url)
* [link to specific indices](#link-to-specific-indices) * [link to specific indices](#link-to-specific-indices)
* [exec-script](#exec-script)
``` ```
Usage: sist2 scan [OPTION]... PATH Usage: sist2 scan [OPTION]... PATH
or: sist2 index [OPTION]... INDEX or: sist2 index [OPTION]... INDEX
or: sist2 web [OPTION]... INDEX... or: sist2 web [OPTION]... INDEX...
or: sist2 exec-script [OPTION]... INDEX
Lightning-fast file system indexer and search tool. Lightning-fast file system indexer and search tool.
-h, --help show this help message and exit -h, --help show this help message and exit
@@ -40,6 +42,8 @@ Scan options
--ocr=<str> Tesseract language (use tesseract --list-langs to see which are installed on your machine) --ocr=<str> Tesseract language (use tesseract --list-langs to see which are installed on your machine)
-e, --exclude=<str> Files that match this regex will not be scanned -e, --exclude=<str> Files that match this regex will not be scanned
--fast Only index file names & mime type --fast Only index file names & mime type
--treemap-threshold=<str> Relative size threshold for treemap (see USAGE.md). DEFAULT: 0.0005
--mem-buffer=<int> Maximum memory buffer size per thread in MB for files inside archives (see USAGE.md). DEFAULT: 2000
Index options Index options
--es-url=<str> Elasticsearch url with port. DEFAULT=http://localhost:9200 --es-url=<str> Elasticsearch url with port. DEFAULT=http://localhost:9200
@@ -48,13 +52,14 @@ Index options
--batch-size=<int> Index batch size. DEFAULT: 100 --batch-size=<int> Index batch size. DEFAULT: 100
-f, --force-reset Reset Elasticsearch mappings and settings. (You must use this option the first time you use the index command) -f, --force-reset Reset Elasticsearch mappings and settings. (You must use this option the first time you use the index command)
Web options Web options
--es-url=<str> Elasticsearch url. DEFAULT=http://localhost:9200 --es-url=<str> Elasticsearch url. DEFAULT=http://localhost:9200
--bind=<str> Listen on this address. DEFAULT=localhost:4090 --bind=<str> Listen on this address. DEFAULT=localhost:4090
--auth=<str> Basic auth in user:password format --auth=<str> Basic auth in user:password format
Made by simon987 <me@simon987.net>. Released under GPL-3.0
Exec-script options
--script-file=<str> Path to user script.
Made by simon987 <me@simon987.net>. Released under GPL-3.0
``` ```
## Scan ## Scan
@@ -102,6 +107,11 @@ Made by simon987 <me@simon987.net>. Released under GPL-3.0
In effect, smaller `treemap-threshold` values will yield a more detailed In effect, smaller `treemap-threshold` values will yield a more detailed
(but also a more cluttered and harder to read) visualization. (but also a more cluttered and harder to read) visualization.
* `--mem-buffer` Maximum memory buffer size in MB (per thread) for files inside archives. Media files
larger than this number will be read sequentially and no *seek* operations will be supported.
To check if a media file can be parsed without *seek*, execute `cat file.mp4 | ffprobe -`
### Scan examples ### Scan examples
Simple scan Simple scan
@@ -226,7 +236,7 @@ it is currently unsupported and has no guaranties of back/forward compatibility.
* `-p, --print` * `-p, --print`
Print index in JSON format to stdout. Print index in JSON format to stdout.
* `--script-file` * `--script-file`
Path to user script. See [Scripting](scripting/README.md). Path to user script. See [Scripting](scripting.md).
* `--batch-size=<int>` * `--batch-size=<int>`
Index batch size. Indexing is generally faster with larger batches, but payloads that Index batch size. Indexing is generally faster with larger batches, but payloads that
are too large will fail and additional overhead for retrying with smaller sizes may slow are too large will fail and additional overhead for retrying with smaller sizes may slow
@@ -286,3 +296,7 @@ Both the `root` and `rewrite_url` fields are safe to manually modify from the
To link to specific indices, you can add a list of comma-separated index name to To link to specific indices, you can add a list of comma-separated index name to
the URL: `?i=<name>,<name>`. By default, indices with `"(nsfw)"` in their name are the URL: `?i=<name>,<name>`. By default, indices with `"(nsfw)"` in their name are
not displayed. not displayed.
## exec-script
The `exec-script` command is used to execute a user script for an index that has already been imported to Elasticsearch with the `index` command. Note that the documents will not be reset to their default state before each execution as the `index` command does: if you make undesired changes to the documents by accident, you will need to run `index` again to revert to the original state.

View File

@@ -39,7 +39,7 @@ it adds the `genre.<genre>` tag.
ArrayList tags = ctx._source.tag = new ArrayList(); ArrayList tags = ctx._source.tag = new ArrayList();
if (ctx._source?.genre != null) { if (ctx._source?.genre != null) {
tags.add("genre." + ctx._source.genre.toLowerCase()) tags.add("genre." + ctx._source.genre.toLowerCase());
} }
``` ```
@@ -67,7 +67,7 @@ ArrayList tags = ctx._source.tag = new ArrayList();
Matcher m = /[\(\.+](20[0-9]{2})[\)\.+]/.matcher(ctx._source.name); Matcher m = /[\(\.+](20[0-9]{2})[\)\.+]/.matcher(ctx._source.name);
if (m.find()) { if (m.find()) {
tags.add("year." + m.group(1)) tags.add("year." + m.group(1));
} }
``` ```
@@ -111,16 +111,6 @@ if (ctx._source.path != "") {
} }
``` ```
Set the name of the last folder (`/path/to/<studio>/file.mp4`) to `studio.<studio>` tag
```Java
ArrayList tags = ctx._source.tag = new ArrayList();
if (ctx._source.path != "") {
String[] names = ctx._source.path.splitOnToken('/');
tags.add("studio." + names[names.length-1]);
}
```
Parse `EXIF:F Number` tag Parse `EXIF:F Number` tag
```Java ```Java
if (ctx._source?.exif_fnumber != null) { if (ctx._source?.exif_fnumber != null) {

View File

@@ -10,6 +10,7 @@
"path": { "path": {
"type": "text", "type": "text",
"analyzer": "path_analyzer", "analyzer": "path_analyzer",
"copy_to": "suggest-path",
"fielddata": true, "fielddata": true,
"fields": { "fields": {
"nGram": { "nGram": {
@@ -22,6 +23,10 @@
} }
} }
}, },
"suggest-path": {
"type": "completion",
"analyzer": "case_insensitive_kw_analyzer"
},
"mime": { "mime": {
"type": "keyword" "type": "keyword"
}, },

View File

@@ -13,7 +13,7 @@ application/epub+zip, epub
application/freeloader, frl application/freeloader, frl
application/futuresplash, spl application/futuresplash, spl
application/groupwise, vew application/groupwise, vew
application/gzip, gz application/gzip, gz|tgz
application/hta, hta application/hta, hta
application/i-deas, unv application/i-deas, unv
application/iges, iges|igs application/iges, iges|igs
@@ -111,7 +111,7 @@ application/x-dbf, dbf
application/x-dbt, application/x-dbt,
application/x-debian-package, deb application/x-debian-package, deb
application/x-deepv, deepv application/x-deepv, deepv
application/x-director, dcr|dir|dxr application/x-director, dir|dxr
application/x-dmp, dmp application/x-dmp, dmp
application/x-dosdriver, application/x-dosdriver,
application/x-dosexec, dll application/x-dosexec, dll
@@ -429,4 +429,22 @@ video/x-qtc, qtc
video/x-sgi-movie, movie|mv video/x-sgi-movie, movie|mv
x-epoc/x-sisx-app, x-epoc/x-sisx-app,
application/x-zstd-dictionary, application/x-zstd-dictionary,
application/vnd.ms-outlook, application/vnd.ms-outlook, msg
image/x-olympus-orf, orf
image/x-nikon-nef, nef
image/x-fuji-raf, raf
image/x-panasonic-raw, rw2|raw
image/x-adobe-dng, dng
image/x-canon-cr2, cr2
image/x-canon-crw, crw
image/x-dcraw,
image/x-kodak-dcr, dcr
image/x-kodak-k25, k25
image/x-kodak-kdc, kdc
image/x-minolta-mrw, mrw
image/x-pentax-pef, pef
image/x-sigma-x3f, xf3
image/x-sony-arw, arw
image/x-sony-sr2, sr2
image/x-sony-srf, srf
image/x-epson-erf, erf
1 application/arj arj
13 application/freeloader frl
14 application/futuresplash spl
15 application/groupwise vew
16 application/gzip gz gz|tgz
17 application/hta hta
18 application/i-deas unv
19 application/iges iges|igs
111 application/x-dbt
112 application/x-debian-package deb
113 application/x-deepv deepv
114 application/x-director dcr|dir|dxr dir|dxr
115 application/x-dmp dmp
116 application/x-dosdriver
117 application/x-dosexec dll
429 video/x-sgi-movie movie|mv
430 x-epoc/x-sisx-app
431 application/x-zstd-dictionary
432 application/vnd.ms-outlook msg
433 image/x-olympus-orf orf
434 image/x-nikon-nef nef
435 image/x-fuji-raf raf
436 image/x-panasonic-raw rw2|raw
437 image/x-adobe-dng dng
438 image/x-canon-cr2 cr2
439 image/x-canon-crw crw
440 image/x-dcraw
441 image/x-kodak-dcr dcr
442 image/x-kodak-k25 k25
443 image/x-kodak-kdc kdc
444 image/x-minolta-mrw mrw
445 image/x-pentax-pef pef
446 image/x-sigma-x3f xf3
447 image/x-sony-arw arw
448 image/x-sony-sr2 sr2
449 image/x-sony-srf srf
450 image/x-epson-erf erf

View File

@@ -73,6 +73,29 @@ markup = (
"text/x-sgml" "text/x-sgml"
) )
raw = (
"image/x-olympus-orf",
"image/x-nikon-nef",
"image/x-fuji-raf",
"image/x-panasonic-raw",
"image/x-adobe-dng",
"image/x-canon-cr2",
"image/x-canon-crw",
"image/x-dcraw",
"image/x-kodak-dcr",
"image/x-kodak-k25",
"image/x-kodak-kdc",
"image/x-minolta-mrw",
"image/x-pentax-pef",
"image/x-sigma-x3f",
"image/x-sony-arw",
"image/x-sony-sr2",
"image/x-sony-srf",
"image/x-minolta-mrw",
"image/x-pentax-pef",
"image/x-epson-erf",
)
cnt = 1 cnt = 1
@@ -97,6 +120,8 @@ def mime_id(mime):
mime_id += " | 0x02000000" mime_id += " | 0x02000000"
elif mime in markup: elif mime in markup:
mime_id += " | 0x01000000" mime_id += " | 0x01000000"
elif mime in raw:
mime_id += " | 0x00800000"
elif mime == "application/x-empty": elif mime == "application/x-empty":
return "1" return "1"
return mime_id return mime_id

101
src/cli.c
View File

@@ -14,7 +14,9 @@
#define DEFAULT_LISTEN_ADDRESS "localhost:4090" #define DEFAULT_LISTEN_ADDRESS "localhost:4090"
#define DEFAULT_TREEMAP_THRESHOLD 0.0005 #define DEFAULT_TREEMAP_THRESHOLD 0.0005
const char* TESS_DATAPATHS[] = { #define DEFAULT_MAX_MEM_BUFFER 2000
const char *TESS_DATAPATHS[] = {
"/usr/share/tessdata/", "/usr/share/tessdata/",
"/usr/share/tesseract-ocr/tessdata/", "/usr/share/tesseract-ocr/tessdata/",
"./", "./",
@@ -30,6 +32,11 @@ scan_args_t *scan_args_create() {
return args; return args;
} }
exec_args_t *exec_args_create() {
exec_args_t *args = calloc(sizeof(exec_args_t), 1);
return args;
}
void scan_args_destroy(scan_args_t *args) { void scan_args_destroy(scan_args_t *args) {
if (args->name != NULL) { if (args->name != NULL) {
free(args->name); free(args->name);
@@ -53,6 +60,10 @@ void web_args_destroy(web_args_t *args) {
free(args); free(args);
} }
void exec_args_destroy(exec_args_t *args) {
free(args);
}
int scan_args_validate(scan_args_t *args, int argc, const char **argv) { int scan_args_validate(scan_args_t *args, int argc, const char **argv) {
if (argc < 2) { if (argc < 2) {
fprintf(stderr, "Required positional argument: PATH.\n"); fprintf(stderr, "Required positional argument: PATH.\n");
@@ -113,7 +124,7 @@ int scan_args_validate(scan_args_t *args, int argc, const char **argv) {
return 1; return 1;
} }
if (args->depth < 0) { if (args->depth <= 0) {
args->depth = G_MAXINT32; args->depth = G_MAXINT32;
} else { } else {
args->depth += 1; args->depth += 1;
@@ -145,7 +156,7 @@ int scan_args_validate(scan_args_t *args, int argc, const char **argv) {
char filename[128]; char filename[128];
sprintf(filename, "%s.traineddata", args->tesseract_lang); sprintf(filename, "%s.traineddata", args->tesseract_lang);
const char * path = find_file_in_paths(TESS_DATAPATHS, filename); const char *path = find_file_in_paths(TESS_DATAPATHS, filename);
if (path == NULL) { if (path == NULL) {
LOG_FATAL("cli.c", "Could not find tesseract language file!"); LOG_FATAL("cli.c", "Could not find tesseract language file!");
} }
@@ -187,6 +198,10 @@ int scan_args_validate(scan_args_t *args, int argc, const char **argv) {
args->treemap_threshold = atof(args->treemap_threshold_str); args->treemap_threshold = atof(args->treemap_threshold_str);
} }
if (args->max_memory_buffer == 0) {
args->max_memory_buffer = DEFAULT_MAX_MEM_BUFFER;
}
LOG_DEBUGF("cli.c", "arg quality=%f", args->quality) LOG_DEBUGF("cli.c", "arg quality=%f", args->quality)
LOG_DEBUGF("cli.c", "arg size=%d", args->size) LOG_DEBUGF("cli.c", "arg size=%d", args->size)
LOG_DEBUGF("cli.c", "arg content_size=%d", args->content_size) LOG_DEBUGF("cli.c", "arg content_size=%d", args->content_size)
@@ -203,6 +218,35 @@ int scan_args_validate(scan_args_t *args, int argc, const char **argv) {
LOG_DEBUGF("cli.c", "arg exclude=%s", args->exclude_regex) LOG_DEBUGF("cli.c", "arg exclude=%s", args->exclude_regex)
LOG_DEBUGF("cli.c", "arg fast=%d", args->fast) LOG_DEBUGF("cli.c", "arg fast=%d", args->fast)
LOG_DEBUGF("cli.c", "arg treemap_threshold=%f", args->treemap_threshold) LOG_DEBUGF("cli.c", "arg treemap_threshold=%f", args->treemap_threshold)
LOG_DEBUGF("cli.c", "arg max_memory_buffer=%d", args->max_memory_buffer)
return 0;
}
int load_script(const char *script_path, char **dst) {
struct stat info;
int res = stat(script_path, &info);
if (res == -1) {
fprintf(stderr, "Error opening script file '%s': %s\n", script_path, strerror(errno));
return 1;
}
int fd = open(script_path, O_RDONLY);
if (fd == -1) {
fprintf(stderr, "Error opening script file '%s': %s\n", script_path, strerror(errno));
return 1;
}
*dst = malloc(info.st_size + 1);
res = read(fd, *dst, info.st_size);
if (res < 0) {
fprintf(stderr, "Error reading script file '%s': %s\n", script_path, strerror(errno));
return 1;
}
*(*dst + info.st_size) = '\0';
close(fd);
return 0; return 0;
} }
@@ -230,29 +274,9 @@ int index_args_validate(index_args_t *args, int argc, const char **argv) {
} }
if (args->script_path != NULL) { if (args->script_path != NULL) {
struct stat info; if (load_script(args->script_path, &args->script) != 0) {
int res = stat(args->script_path, &info);
if (res == -1) {
fprintf(stderr, "Error opening script file '%s': %s\n", args->script_path, strerror(errno));
return 1; return 1;
} }
int fd = open(args->script_path, O_RDONLY);
if (fd == -1) {
fprintf(stderr, "Error opening script file '%s': %s\n", args->script_path, strerror(errno));
return 1;
}
args->script = malloc(info.st_size + 1);
res = read(fd, args->script, info.st_size);
if (res < 0) {
fprintf(stderr, "Error reading script file '%s': %s\n", args->script_path, strerror(errno));
return 1;
}
*(args->script + info.st_size) = '\0';
close(fd);
} }
if (args->batch_size == 0) { if (args->batch_size == 0) {
@@ -288,7 +312,7 @@ int web_args_validate(web_args_t *args, int argc, const char **argv) {
} }
if (args->credentials != NULL) { if (args->credentials != NULL) {
char * ptr = strstr(args->credentials, ":"); char *ptr = strstr(args->credentials, ":");
if (ptr == NULL) { if (ptr == NULL) {
fprintf(stderr, "Invalid --auth format, see usage\n"); fprintf(stderr, "Invalid --auth format, see usage\n");
return 1; return 1;
@@ -341,3 +365,30 @@ web_args_t *web_args_create() {
return args; return args;
} }
int exec_args_validate(exec_args_t *args, int argc, const char **argv) {
char *index_path = abspath(argv[1]);
if (index_path == NULL) {
fprintf(stderr, "File not found: %s\n", argv[1]);
return 1;
} else {
args->index_path = argv[1];
free(index_path);
}
if (args->es_url == NULL) {
args->es_url = DEFAULT_ES_URL;
}
if (args->script_path == NULL) {
LOG_FATAL("cli.c", "--script-file argument is required");
}
if (load_script(args->script_path, &args->script) != 0) {
return 1;
}
LOG_DEBUGF("cli.c", "arg script_path=%s", args->script_path)
LOG_DEBUGF("cli.c", "arg script=%s", args->script)
return 0;
}

View File

@@ -24,6 +24,7 @@ typedef struct scan_args {
int fast; int fast;
const char* treemap_threshold_str; const char* treemap_threshold_str;
double treemap_threshold; double treemap_threshold;
int max_memory_buffer;
} scan_args_t; } scan_args_t;
scan_args_t *scan_args_create(); scan_args_t *scan_args_create();
@@ -53,6 +54,13 @@ typedef struct web_args {
const char **indices; const char **indices;
} web_args_t; } web_args_t;
typedef struct exec_args {
char *es_url;
const char *index_path;
const char *script_path;
char *script;
} exec_args_t;
index_args_t *index_args_create(); index_args_t *index_args_create();
void index_args_destroy(index_args_t *args); void index_args_destroy(index_args_t *args);
@@ -65,4 +73,10 @@ int index_args_validate(index_args_t *args, int argc, const char **argv);
int web_args_validate(web_args_t *args, int argc, const char **argv); int web_args_validate(web_args_t *args, int argc, const char **argv);
exec_args_t *exec_args_create();
void exec_args_destroy(exec_args_t *args);
int exec_args_validate(exec_args_t *args, int argc, const char **argv);
#endif #endif

6
src/ctx.c Normal file
View File

@@ -0,0 +1,6 @@
#include "ctx.h"
ScanCtx_t ScanCtx;
WebCtx_t WebCtx;
IndexCtx_t IndexCtx;
LogCtx_t LogCtx;

View File

@@ -12,12 +12,12 @@
#include "libscan/ooxml/ooxml.h" #include "libscan/ooxml/ooxml.h"
#include "libscan/text/text.h" #include "libscan/text/text.h"
#include "libscan/mobi/scan_mobi.h" #include "libscan/mobi/scan_mobi.h"
#include "libscan/raw/raw.h"
#include <glib.h> #include <glib.h>
#include <pcre.h> #include <pcre.h>
//TODO Move to individual scan ctx typedef struct {
struct {
struct index_t index; struct index_t index;
GHashTable *mime_table; GHashTable *mime_table;
@@ -46,27 +46,33 @@ struct {
scan_ooxml_ctx_t ooxml_ctx; scan_ooxml_ctx_t ooxml_ctx;
scan_text_ctx_t text_ctx; scan_text_ctx_t text_ctx;
scan_mobi_ctx_t mobi_ctx; scan_mobi_ctx_t mobi_ctx;
} ScanCtx; scan_raw_ctx_t raw_ctx;
} ScanCtx_t;
struct { typedef struct {
int verbose; int verbose;
int very_verbose; int very_verbose;
int no_color; int no_color;
} LogCtx; } LogCtx_t;
struct { typedef struct {
char *es_url; char *es_url;
int batch_size; int batch_size;
} IndexCtx; } IndexCtx_t;
struct { typedef struct {
char *es_url; char *es_url;
int index_count; int index_count;
char *auth_user; char *auth_user;
char *auth_pass; char *auth_pass;
int auth_enabled; int auth_enabled;
struct index_t indices[16]; struct index_t indices[64];
} WebCtx; } WebCtx_t;
extern ScanCtx_t ScanCtx;
extern WebCtx_t WebCtx;
extern IndexCtx_t IndexCtx;
extern LogCtx_t LogCtx;
#endif #endif

View File

@@ -53,6 +53,10 @@ void index_json(cJSON *document, const char uuid_str[UUID_STR_LEN]) {
void execute_update_script(const char *script, const char index_id[UUID_STR_LEN]) { void execute_update_script(const char *script, const char index_id[UUID_STR_LEN]) {
if (Indexer == NULL) {
Indexer = create_indexer(IndexCtx.es_url);
}
cJSON *body = cJSON_CreateObject(); cJSON *body = cJSON_CreateObject();
cJSON *script_obj = cJSON_AddObjectToObject(body, "script"); cJSON *script_obj = cJSON_AddObjectToObject(body, "script");
cJSON_AddStringToObject(script_obj, "lang", "painless"); cJSON_AddStringToObject(script_obj, "lang", "painless");
@@ -202,9 +206,8 @@ void delete_queue(int max) {
Indexer->line_head = tmp->next; Indexer->line_head = tmp->next;
if (Indexer->line_head == NULL) { if (Indexer->line_head == NULL) {
Indexer->line_tail = NULL; Indexer->line_tail = NULL;
} else {
free(tmp);
} }
free(tmp);
Indexer->queued -= 1; Indexer->queued -= 1;
} }
} }
@@ -265,6 +268,7 @@ void destroy_indexer(char *script, char index_id[UUID_STR_LEN]) {
if (script != NULL) { if (script != NULL) {
execute_update_script(script, index_id); execute_update_script(script, index_id);
free(script);
} }
snprintf(url, sizeof(url), "%s/sist2/_refresh", IndexCtx.es_url); snprintf(url, sizeof(url), "%s/sist2/_refresh", IndexCtx.es_url);

View File

@@ -32,4 +32,6 @@ cJSON *elastic_get_document(const char *uuid_str);
char *elastic_get_status(); char *elastic_get_status();
void execute_update_script(const char *script, const char index_id[UUID_STR_LEN]);
#endif #endif

File diff suppressed because one or more lines are too long

View File

@@ -2,7 +2,6 @@
#include "ctx.h" #include "ctx.h"
#include <third-party/argparse/argparse.h> #include <third-party/argparse/argparse.h>
#include <glib.h>
#include <locale.h> #include <locale.h>
#include "cli.h" #include "cli.h"
@@ -22,11 +21,12 @@
#define EPILOG "Made by simon987 <me@simon987.net>. Released under GPL-3.0" #define EPILOG "Made by simon987 <me@simon987.net>. Released under GPL-3.0"
static const char *const Version = "2.3.0"; static const char *const Version = "2.5.0";
static const char *const usage[] = { static const char *const usage[] = {
"sist2 scan [OPTION]... PATH", "sist2 scan [OPTION]... PATH",
"sist2 index [OPTION]... INDEX", "sist2 index [OPTION]... INDEX",
"sist2 web [OPTION]... INDEX...", "sist2 web [OPTION]... INDEX...",
"sist2 exec-script [OPTION]... INDEX",
NULL, NULL,
}; };
@@ -127,6 +127,7 @@ void initialize_scan_context(scan_args_t *args) {
ScanCtx.media_ctx.log = _log; ScanCtx.media_ctx.log = _log;
ScanCtx.media_ctx.logf = _logf; ScanCtx.media_ctx.logf = _logf;
ScanCtx.media_ctx.store = _store; ScanCtx.media_ctx.store = _store;
ScanCtx.media_ctx.max_media_buffer = (long) args->max_memory_buffer * 1024 * 1024;
init_media(); init_media();
// OOXML // OOXML
@@ -153,6 +154,13 @@ void initialize_scan_context(scan_args_t *args) {
strncpy(ScanCtx.index.desc.rewrite_url, args->rewrite_url, sizeof(ScanCtx.index.desc.rewrite_url)); strncpy(ScanCtx.index.desc.rewrite_url, args->rewrite_url, sizeof(ScanCtx.index.desc.rewrite_url));
ScanCtx.index.desc.root_len = (short) strlen(ScanCtx.index.desc.root); ScanCtx.index.desc.root_len = (short) strlen(ScanCtx.index.desc.root);
ScanCtx.fast = args->fast; ScanCtx.fast = args->fast;
// Raw
ScanCtx.raw_ctx.tn_qscale = args->quality;
ScanCtx.raw_ctx.tn_size = args->size;
ScanCtx.raw_ctx.log = _log;
ScanCtx.raw_ctx.logf = _logf;
ScanCtx.raw_ctx.store = _store;
} }
@@ -286,6 +294,22 @@ void sist2_index(index_args_t *args) {
} }
} }
void sist2_exec_script(exec_args_t *args) {
LogCtx.verbose = TRUE;
char descriptor_path[PATH_MAX];
snprintf(descriptor_path, PATH_MAX, "%s/descriptor.json", args->index_path);
index_descriptor_t desc = read_index_descriptor(descriptor_path);
IndexCtx.es_url = args->es_url;
LOG_DEBUGF("main.c", "descriptor version %s (%s)", desc.version, desc.type)
execute_update_script(args->script, desc.uuid);
free(args->script);
}
void sist2_web(web_args_t *args) { void sist2_web(web_args_t *args) {
WebCtx.es_url = args->es_url; WebCtx.es_url = args->es_url;
@@ -322,10 +346,12 @@ int main(int argc, const char *argv[]) {
scan_args_t *scan_args = scan_args_create(); scan_args_t *scan_args = scan_args_create();
index_args_t *index_args = index_args_create(); index_args_t *index_args = index_args_create();
web_args_t *web_args = web_args_create(); web_args_t *web_args = web_args_create();
exec_args_t *exec_args = exec_args_create();
int arg_version = 0; int arg_version = 0;
char *common_es_url = NULL; char *common_es_url = NULL;
char *common_script_path = NULL;
struct argparse_option options[] = { struct argparse_option options[] = {
OPT_HELP(), OPT_HELP(),
@@ -358,11 +384,14 @@ int main(int argc, const char *argv[]) {
OPT_BOOLEAN(0, "fast", &scan_args->fast, "Only index file names & mime type"), OPT_BOOLEAN(0, "fast", &scan_args->fast, "Only index file names & mime type"),
OPT_STRING(0, "treemap-threshold", &scan_args->treemap_threshold_str, "Relative size threshold for treemap " OPT_STRING(0, "treemap-threshold", &scan_args->treemap_threshold_str, "Relative size threshold for treemap "
"(see USAGE.md). DEFAULT: 0.0005"), "(see USAGE.md). DEFAULT: 0.0005"),
OPT_INTEGER(0, "mem-buffer", &scan_args->max_memory_buffer,
"Maximum memory buffer size per thread in MB for files inside archives "
"(see USAGE.md). DEFAULT: 2000"),
OPT_GROUP("Index options"), OPT_GROUP("Index options"),
OPT_STRING(0, "es-url", &common_es_url, "Elasticsearch url with port. DEFAULT=http://localhost:9200"), OPT_STRING(0, "es-url", &common_es_url, "Elasticsearch url with port. DEFAULT=http://localhost:9200"),
OPT_BOOLEAN('p', "print", &index_args->print, "Just print JSON documents to stdout."), OPT_BOOLEAN('p', "print", &index_args->print, "Just print JSON documents to stdout."),
OPT_STRING(0, "script-file", &index_args->script_path, "Path to user script."), OPT_STRING(0, "script-file", &common_script_path, "Path to user script."),
OPT_INTEGER(0, "batch-size", &index_args->batch_size, "Index batch size. DEFAULT: 100"), OPT_INTEGER(0, "batch-size", &index_args->batch_size, "Index batch size. DEFAULT: 100"),
OPT_BOOLEAN('f', "force-reset", &index_args->force_reset, "Reset Elasticsearch mappings and settings. " OPT_BOOLEAN('f', "force-reset", &index_args->force_reset, "Reset Elasticsearch mappings and settings. "
"(You must use this option the first time you use the index command)"), "(You must use this option the first time you use the index command)"),
@@ -372,6 +401,9 @@ int main(int argc, const char *argv[]) {
OPT_STRING(0, "bind", &web_args->listen_address, "Listen on this address. DEFAULT=localhost:4090"), OPT_STRING(0, "bind", &web_args->listen_address, "Listen on this address. DEFAULT=localhost:4090"),
OPT_STRING(0, "auth", &web_args->credentials, "Basic auth in user:password format"), OPT_STRING(0, "auth", &web_args->credentials, "Basic auth in user:password format"),
OPT_GROUP("Exec-script options"),
OPT_STRING(0, "script-file", &common_script_path, "Path to user script."),
OPT_END(), OPT_END(),
}; };
@@ -391,6 +423,8 @@ int main(int argc, const char *argv[]) {
web_args->es_url = common_es_url; web_args->es_url = common_es_url;
index_args->es_url = common_es_url; index_args->es_url = common_es_url;
index_args->script_path = common_script_path;
exec_args->script_path = common_script_path;
if (argc == 0) { if (argc == 0) {
argparse_usage(&argparse); argparse_usage(&argparse);
@@ -419,6 +453,14 @@ int main(int argc, const char *argv[]) {
} }
sist2_web(web_args); sist2_web(web_args);
} else if (strcmp(argv[0], "exec-script") == 0) {
int err = exec_args_validate(exec_args, argc, argv);
if (err != 0) {
goto end;
}
sist2_exec_script(exec_args);
} else { } else {
fprintf(stderr, "Invalid command: '%s'\n", argv[0]); fprintf(stderr, "Invalid command: '%s'\n", argv[0]);
argparse_usage(&argparse); argparse_usage(&argparse);
@@ -430,6 +472,7 @@ int main(int argc, const char *argv[]) {
scan_args_destroy(scan_args); scan_args_destroy(scan_args);
index_args_destroy(index_args); index_args_destroy(index_args);
web_args_destroy(web_args); web_args_destroy(web_args);
exec_args_destroy(exec_args);
return 0; return 0;
} }

View File

@@ -3,7 +3,7 @@
#include "../sist.h" #include "../sist.h"
#define MAJOR_MIME(mime_id) (mime_id & 0x00FF0000) >> 16 #define MAJOR_MIME(mime_id) (mime_id & 0x000F0000) >> 16
#define MIME_EMPTY 1 #define MIME_EMPTY 1
@@ -31,6 +31,9 @@
#define MARKUP_MASK 0x01000000 #define MARKUP_MASK 0x01000000
#define IS_MARKUP(mime_id) (mime_id & MARKUP_MASK) == MARKUP_MASK #define IS_MARKUP(mime_id) (mime_id & MARKUP_MASK) == MARKUP_MASK
#define RAW_MASK 0x00800000
#define IS_RAW(mime_id) (mime_id & RAW_MASK) == RAW_MASK
enum major_mime { enum major_mime {
MimeInvalid = 0, MimeInvalid = 0,
MimeModel = 1, MimeModel = 1,

View File

@@ -315,127 +315,145 @@ enum mime {
image_webp=524595, image_webp=524595,
image_wmf=524596, image_wmf=524596,
image_x_3ds=524597, image_x_3ds=524597,
image_x_award_bioslogo=524598, image_x_adobe_dng=524598 | 0x00800000,
image_x_cmu_raster=524599, image_x_award_bioslogo=524599,
image_x_cur=524600, image_x_canon_cr2=524600 | 0x00800000,
image_x_dwg=524601, image_x_canon_crw=524601 | 0x00800000,
image_x_eps=524602, image_x_cmu_raster=524602,
image_x_exr=524603, image_x_cur=524603,
image_x_gem=524604, image_x_dcraw=524604 | 0x00800000,
image_x_icns=524605, image_x_dwg=524605,
image_x_icon=524606 | 0x80000000, image_x_eps=524606,
image_x_jg=524607, image_x_epson_erf=524607 | 0x00800000,
image_x_jps=524608, image_x_exr=524608,
image_x_ms_bmp=524609, image_x_fuji_raf=524609 | 0x00800000,
image_x_niff=524610, image_x_gem=524610,
image_x_pcx=524611, image_x_icns=524611,
image_x_pict=524612, image_x_icon=524612 | 0x80000000,
image_x_portable_bitmap=524613, image_x_jg=524613,
image_x_portable_graymap=524614, image_x_jps=524614,
image_x_portable_pixmap=524615, image_x_kodak_dcr=524615 | 0x00800000,
image_x_quicktime=524616, image_x_kodak_k25=524616 | 0x00800000,
image_x_rgb=524617, image_x_kodak_kdc=524617 | 0x00800000,
image_x_tga=524618, image_x_minolta_mrw=524618 | 0x00800000,
image_x_tiff=524619, image_x_ms_bmp=524619,
image_x_win_bitmap=524620, image_x_niff=524620,
image_x_xcf=524621 | 0x80000000, image_x_nikon_nef=524621 | 0x00800000,
image_x_xpixmap=524622 | 0x80000000, image_x_olympus_orf=524622 | 0x00800000,
image_x_xwindowdump=524623, image_x_panasonic_raw=524623 | 0x00800000,
message_news=196944, image_x_pcx=524624,
message_rfc822=196945, image_x_pentax_pef=524625 | 0x00800000,
model_vnd_dwf=65874, image_x_pict=524626,
model_vnd_gdl=65875, image_x_portable_bitmap=524627,
model_vnd_gs_gdl=65876, image_x_portable_graymap=524628,
model_vrml=65877, image_x_portable_pixmap=524629,
model_x_pov=65878, image_x_quicktime=524630,
text_PGP=590167, image_x_rgb=524631,
text_asp=590168, image_x_sigma_x3f=524632 | 0x00800000,
text_css=590169, image_x_sony_arw=524633 | 0x00800000,
text_html=590170 | 0x01000000, image_x_sony_sr2=524634 | 0x00800000,
text_javascript=590171, image_x_sony_srf=524635 | 0x00800000,
text_mcf=590172, image_x_tga=524636,
text_pascal=590173, image_x_tiff=524637,
text_plain=590174, image_x_win_bitmap=524638,
text_richtext=590175, image_x_xcf=524639 | 0x80000000,
text_rtf=590176, image_x_xpixmap=524640 | 0x80000000,
text_scriplet=590177, image_x_xwindowdump=524641,
text_tab_separated_values=590178, message_news=196962,
text_troff=590179, message_rfc822=196963,
text_uri_list=590180, model_vnd_dwf=65892,
text_vnd_abc=590181, model_vnd_gdl=65893,
text_vnd_fmi_flexstor=590182, model_vnd_gs_gdl=65894,
text_vnd_wap_wml=590183, model_vrml=65895,
text_vnd_wap_wmlscript=590184, model_x_pov=65896,
text_webviewhtml=590185, text_PGP=590185,
text_x_Algol68=590186, text_asp=590186,
text_x_asm=590187, text_css=590187,
text_x_audiosoft_intra=590188, text_html=590188 | 0x01000000,
text_x_awk=590189, text_javascript=590189,
text_x_bcpl=590190, text_mcf=590190,
text_x_c=590191, text_pascal=590191,
text_x_c__=590192, text_plain=590192,
text_x_component=590193, text_richtext=590193,
text_x_diff=590194, text_rtf=590194,
text_x_fortran=590195, text_scriplet=590195,
text_x_java=590196, text_tab_separated_values=590196,
text_x_la_asf=590197, text_troff=590197,
text_x_lisp=590198, text_uri_list=590198,
text_x_m=590199, text_vnd_abc=590199,
text_x_m4=590200, text_vnd_fmi_flexstor=590200,
text_x_makefile=590201, text_vnd_wap_wml=590201,
text_x_ms_regedit=590202, text_vnd_wap_wmlscript=590202,
text_x_msdos_batch=590203, text_webviewhtml=590203,
text_x_objective_c=590204, text_x_Algol68=590204,
text_x_pascal=590205, text_x_asm=590205,
text_x_perl=590206, text_x_audiosoft_intra=590206,
text_x_php=590207, text_x_awk=590207,
text_x_po=590208, text_x_bcpl=590208,
text_x_python=590209, text_x_c=590209,
text_x_ruby=590210, text_x_c__=590210,
text_x_sass=590211, text_x_component=590211,
text_x_scss=590212, text_x_diff=590212,
text_x_server_parsed_html=590213, text_x_fortran=590213,
text_x_setext=590214, text_x_java=590214,
text_x_sgml=590215 | 0x01000000, text_x_la_asf=590215,
text_x_shellscript=590216, text_x_lisp=590216,
text_x_speech=590217, text_x_m=590217,
text_x_tcl=590218, text_x_m4=590218,
text_x_tex=590219, text_x_makefile=590219,
text_x_uil=590220, text_x_ms_regedit=590220,
text_x_uuencode=590221, text_x_msdos_batch=590221,
text_x_vcalendar=590222, text_x_objective_c=590222,
text_x_vcard=590223, text_x_pascal=590223,
text_xml=590224 | 0x01000000, text_x_perl=590224,
video_MP2T=393617, text_x_php=590225,
video_animaflex=393618, text_x_po=590226,
video_avi=393619, text_x_python=590227,
video_avs_video=393620, text_x_ruby=590228,
video_mp4=393621, text_x_sass=590229,
video_mpeg=393622, text_x_scss=590230,
video_quicktime=393623, text_x_server_parsed_html=590231,
video_vdo=393624, text_x_setext=590232,
video_vivo=393625, text_x_sgml=590233 | 0x01000000,
video_vnd_rn_realvideo=393626, text_x_shellscript=590234,
video_vosaic=393627, text_x_speech=590235,
video_webm=393628, text_x_tcl=590236,
video_x_amt_demorun=393629, text_x_tex=590237,
video_x_amt_showrun=393630, text_x_uil=590238,
video_x_atomic3d_feature=393631, text_x_uuencode=590239,
video_x_dl=393632, text_x_vcalendar=590240,
video_x_dv=393633, text_x_vcard=590241,
video_x_fli=393634, text_xml=590242 | 0x01000000,
video_x_flv=393635, video_MP2T=393635,
video_x_isvideo=393636, video_animaflex=393636,
video_x_jng=393637 | 0x80000000, video_avi=393637,
video_x_m4v=393638, video_avs_video=393638,
video_x_matroska=393639, video_mp4=393639,
video_x_mng=393640, video_mpeg=393640,
video_x_motion_jpeg=393641, video_quicktime=393641,
video_x_ms_asf=393642, video_vdo=393642,
video_x_msvideo=393643, video_vivo=393643,
video_x_qtc=393644, video_vnd_rn_realvideo=393644,
video_x_sgi_movie=393645, video_vosaic=393645,
x_epoc_x_sisx_app=721326, video_webm=393646,
video_x_amt_demorun=393647,
video_x_amt_showrun=393648,
video_x_atomic3d_feature=393649,
video_x_dl=393650,
video_x_dv=393651,
video_x_fli=393652,
video_x_flv=393653,
video_x_isvideo=393654,
video_x_jng=393655 | 0x80000000,
video_x_m4v=393656,
video_x_matroska=393657,
video_x_mng=393658,
video_x_motion_jpeg=393659,
video_x_ms_asf=393660,
video_x_msvideo=393661,
video_x_qtc=393662,
video_x_sgi_movie=393663,
x_epoc_x_sisx_app=721344,
}; };
char *mime_get_mime_text(unsigned int mime_id) {switch (mime_id) { char *mime_get_mime_text(unsigned int mime_id) {switch (mime_id) {
case application_arj: return "application/arj"; case application_arj: return "application/arj";
@@ -868,6 +886,24 @@ case video_x_sgi_movie: return "video/x-sgi-movie";
case x_epoc_x_sisx_app: return "x-epoc/x-sisx-app"; case x_epoc_x_sisx_app: return "x-epoc/x-sisx-app";
case application_x_zstd_dictionary: return "application/x-zstd-dictionary"; case application_x_zstd_dictionary: return "application/x-zstd-dictionary";
case application_vnd_ms_outlook: return "application/vnd.ms-outlook"; case application_vnd_ms_outlook: return "application/vnd.ms-outlook";
case image_x_olympus_orf: return "image/x-olympus-orf";
case image_x_nikon_nef: return "image/x-nikon-nef";
case image_x_fuji_raf: return "image/x-fuji-raf";
case image_x_panasonic_raw: return "image/x-panasonic-raw";
case image_x_adobe_dng: return "image/x-adobe-dng";
case image_x_canon_cr2: return "image/x-canon-cr2";
case image_x_canon_crw: return "image/x-canon-crw";
case image_x_dcraw: return "image/x-dcraw";
case image_x_kodak_dcr: return "image/x-kodak-dcr";
case image_x_kodak_k25: return "image/x-kodak-k25";
case image_x_kodak_kdc: return "image/x-kodak-kdc";
case image_x_minolta_mrw: return "image/x-minolta-mrw";
case image_x_pentax_pef: return "image/x-pentax-pef";
case image_x_sigma_x3f: return "image/x-sigma-x3f";
case image_x_sony_arw: return "image/x-sony-arw";
case image_x_sony_sr2: return "image/x-sony-sr2";
case image_x_sony_srf: return "image/x-sony-srf";
case image_x_epson_erf: return "image/x-epson-erf";
default: return NULL;}} default: return NULL;}}
GHashTable *mime_get_ext_table() {GHashTable *ext_table = g_hash_table_new(g_str_hash, g_str_equal); GHashTable *mime_get_ext_table() {GHashTable *ext_table = g_hash_table_new(g_str_hash, g_str_equal);
g_hash_table_insert(ext_table, "arj", (gpointer)application_arj); g_hash_table_insert(ext_table, "arj", (gpointer)application_arj);
@@ -885,6 +921,7 @@ g_hash_table_insert(ext_table, "frl", (gpointer)application_freeloader);
g_hash_table_insert(ext_table, "spl", (gpointer)application_futuresplash); g_hash_table_insert(ext_table, "spl", (gpointer)application_futuresplash);
g_hash_table_insert(ext_table, "vew", (gpointer)application_groupwise); g_hash_table_insert(ext_table, "vew", (gpointer)application_groupwise);
g_hash_table_insert(ext_table, "gz", (gpointer)application_gzip); g_hash_table_insert(ext_table, "gz", (gpointer)application_gzip);
g_hash_table_insert(ext_table, "tgz", (gpointer)application_gzip);
g_hash_table_insert(ext_table, "hta", (gpointer)application_hta); g_hash_table_insert(ext_table, "hta", (gpointer)application_hta);
g_hash_table_insert(ext_table, "unv", (gpointer)application_i_deas); g_hash_table_insert(ext_table, "unv", (gpointer)application_i_deas);
g_hash_table_insert(ext_table, "iges", (gpointer)application_iges); g_hash_table_insert(ext_table, "iges", (gpointer)application_iges);
@@ -999,7 +1036,6 @@ g_hash_table_insert(ext_table, "cpio", (gpointer)application_x_cpio);
g_hash_table_insert(ext_table, "dbf", (gpointer)application_x_dbf); g_hash_table_insert(ext_table, "dbf", (gpointer)application_x_dbf);
g_hash_table_insert(ext_table, "deb", (gpointer)application_x_debian_package); g_hash_table_insert(ext_table, "deb", (gpointer)application_x_debian_package);
g_hash_table_insert(ext_table, "deepv", (gpointer)application_x_deepv); g_hash_table_insert(ext_table, "deepv", (gpointer)application_x_deepv);
g_hash_table_insert(ext_table, "dcr", (gpointer)application_x_director);
g_hash_table_insert(ext_table, "dir", (gpointer)application_x_director); g_hash_table_insert(ext_table, "dir", (gpointer)application_x_director);
g_hash_table_insert(ext_table, "dxr", (gpointer)application_x_director); g_hash_table_insert(ext_table, "dxr", (gpointer)application_x_director);
g_hash_table_insert(ext_table, "dmp", (gpointer)application_x_dmp); g_hash_table_insert(ext_table, "dmp", (gpointer)application_x_dmp);
@@ -1387,6 +1423,25 @@ g_hash_table_insert(ext_table, "divx", (gpointer)video_x_msvideo);
g_hash_table_insert(ext_table, "qtc", (gpointer)video_x_qtc); g_hash_table_insert(ext_table, "qtc", (gpointer)video_x_qtc);
g_hash_table_insert(ext_table, "movie", (gpointer)video_x_sgi_movie); g_hash_table_insert(ext_table, "movie", (gpointer)video_x_sgi_movie);
g_hash_table_insert(ext_table, "mv", (gpointer)video_x_sgi_movie); g_hash_table_insert(ext_table, "mv", (gpointer)video_x_sgi_movie);
g_hash_table_insert(ext_table, "msg", (gpointer)application_vnd_ms_outlook);
g_hash_table_insert(ext_table, "orf", (gpointer)image_x_olympus_orf);
g_hash_table_insert(ext_table, "nef", (gpointer)image_x_nikon_nef);
g_hash_table_insert(ext_table, "raf", (gpointer)image_x_fuji_raf);
g_hash_table_insert(ext_table, "rw2", (gpointer)image_x_panasonic_raw);
g_hash_table_insert(ext_table, "raw", (gpointer)image_x_panasonic_raw);
g_hash_table_insert(ext_table, "dng", (gpointer)image_x_adobe_dng);
g_hash_table_insert(ext_table, "cr2", (gpointer)image_x_canon_cr2);
g_hash_table_insert(ext_table, "crw", (gpointer)image_x_canon_crw);
g_hash_table_insert(ext_table, "dcr", (gpointer)image_x_kodak_dcr);
g_hash_table_insert(ext_table, "k25", (gpointer)image_x_kodak_k25);
g_hash_table_insert(ext_table, "kdc", (gpointer)image_x_kodak_kdc);
g_hash_table_insert(ext_table, "mrw", (gpointer)image_x_minolta_mrw);
g_hash_table_insert(ext_table, "pef", (gpointer)image_x_pentax_pef);
g_hash_table_insert(ext_table, "xf3", (gpointer)image_x_sigma_x3f);
g_hash_table_insert(ext_table, "arw", (gpointer)image_x_sony_arw);
g_hash_table_insert(ext_table, "sr2", (gpointer)image_x_sony_sr2);
g_hash_table_insert(ext_table, "srf", (gpointer)image_x_sony_srf);
g_hash_table_insert(ext_table, "erf", (gpointer)image_x_epson_erf);
return ext_table;} return ext_table;}
GHashTable *mime_get_mime_table() {GHashTable *mime_table = g_hash_table_new(g_str_hash, g_str_equal); GHashTable *mime_get_mime_table() {GHashTable *mime_table = g_hash_table_new(g_str_hash, g_str_equal);
g_hash_table_insert(mime_table, "application/arj", (gpointer)application_arj); g_hash_table_insert(mime_table, "application/arj", (gpointer)application_arj);
@@ -1819,5 +1874,23 @@ g_hash_table_insert(mime_table, "video/x-sgi-movie", (gpointer)video_x_sgi_movie
g_hash_table_insert(mime_table, "x-epoc/x-sisx-app", (gpointer)x_epoc_x_sisx_app); g_hash_table_insert(mime_table, "x-epoc/x-sisx-app", (gpointer)x_epoc_x_sisx_app);
g_hash_table_insert(mime_table, "application/x-zstd-dictionary", (gpointer)application_x_zstd_dictionary); g_hash_table_insert(mime_table, "application/x-zstd-dictionary", (gpointer)application_x_zstd_dictionary);
g_hash_table_insert(mime_table, "application/vnd.ms-outlook", (gpointer)application_vnd_ms_outlook); g_hash_table_insert(mime_table, "application/vnd.ms-outlook", (gpointer)application_vnd_ms_outlook);
g_hash_table_insert(mime_table, "image/x-olympus-orf", (gpointer)image_x_olympus_orf);
g_hash_table_insert(mime_table, "image/x-nikon-nef", (gpointer)image_x_nikon_nef);
g_hash_table_insert(mime_table, "image/x-fuji-raf", (gpointer)image_x_fuji_raf);
g_hash_table_insert(mime_table, "image/x-panasonic-raw", (gpointer)image_x_panasonic_raw);
g_hash_table_insert(mime_table, "image/x-adobe-dng", (gpointer)image_x_adobe_dng);
g_hash_table_insert(mime_table, "image/x-canon-cr2", (gpointer)image_x_canon_cr2);
g_hash_table_insert(mime_table, "image/x-canon-crw", (gpointer)image_x_canon_crw);
g_hash_table_insert(mime_table, "image/x-dcraw", (gpointer)image_x_dcraw);
g_hash_table_insert(mime_table, "image/x-kodak-dcr", (gpointer)image_x_kodak_dcr);
g_hash_table_insert(mime_table, "image/x-kodak-k25", (gpointer)image_x_kodak_k25);
g_hash_table_insert(mime_table, "image/x-kodak-kdc", (gpointer)image_x_kodak_kdc);
g_hash_table_insert(mime_table, "image/x-minolta-mrw", (gpointer)image_x_minolta_mrw);
g_hash_table_insert(mime_table, "image/x-pentax-pef", (gpointer)image_x_pentax_pef);
g_hash_table_insert(mime_table, "image/x-sigma-x3f", (gpointer)image_x_sigma_x3f);
g_hash_table_insert(mime_table, "image/x-sony-arw", (gpointer)image_x_sony_arw);
g_hash_table_insert(mime_table, "image/x-sony-sr2", (gpointer)image_x_sony_sr2);
g_hash_table_insert(mime_table, "image/x-sony-srf", (gpointer)image_x_sony_srf);
g_hash_table_insert(mime_table, "image/x-epson-erf", (gpointer)image_x_epson_erf);
return mime_table;} return mime_table;}
#endif #endif

View File

@@ -78,6 +78,11 @@ void parse(void *arg) {
if (doc.mime == 0 && !ScanCtx.fast) { if (doc.mime == 0 && !ScanCtx.fast) {
// Get mime type with libmagic // Get mime type with libmagic
if (!job->vfile.is_fs_file) {
LOG_WARNING(job->filepath, "Guessing mime type with libmagic inside archive files is not currently supported");
goto abort;
}
bytes_read = job->vfile.read(&job->vfile, buf, MAGIC_BUF_SIZE); bytes_read = job->vfile.read(&job->vfile, buf, MAGIC_BUF_SIZE);
if (bytes_read < 0) { if (bytes_read < 0) {
@@ -114,6 +119,8 @@ void parse(void *arg) {
if (!(SHOULD_PARSE(doc.mime))) { if (!(SHOULD_PARSE(doc.mime))) {
} else if (IS_RAW(doc.mime)) {
parse_raw(&ScanCtx.raw_ctx, &job->vfile, &doc);
} else if ((mmime == MimeVideo && doc.size >= MIN_VIDEO_SIZE) || } else if ((mmime == MimeVideo && doc.size >= MIN_VIDEO_SIZE) ||
(mmime == MimeImage && doc.size >= MIN_IMAGE_SIZE) || mmime == MimeAudio) { (mmime == MimeImage && doc.size >= MIN_IMAGE_SIZE) || mmime == MimeAudio) {
@@ -147,14 +154,13 @@ void parse(void *arg) {
parse_mobi(&ScanCtx.mobi_ctx, &job->vfile, &doc); parse_mobi(&ScanCtx.mobi_ctx, &job->vfile, &doc);
} }
abort:
//Parent meta //Parent meta
if (!uuid_is_null(job->parent)) { if (!uuid_is_null(job->parent)) {
char tmp[UUID_STR_LEN];
uuid_unparse(job->parent, tmp);
meta_line_t *meta_parent = malloc(sizeof(meta_line_t) + UUID_STR_LEN + 1); meta_line_t *meta_parent = malloc(sizeof(meta_line_t) + UUID_STR_LEN + 1);
meta_parent->key = MetaParent; meta_parent->key = MetaParent;
strcpy(meta_parent->str_val, tmp); uuid_unparse(job->parent, meta_parent->str_val);
APPEND_META((&doc), meta_parent) APPEND_META((&doc), meta_parent)
} }

4
src/static/css/autocomplete.min.css vendored Normal file
View File

@@ -0,0 +1,4 @@
.autocomplete-suggestions { text-align: left; cursor: default; border: 1px solid #ccc; border-top: 0; background: #fff; box-shadow: -1px 1px 3px rgba(0,0,0,.1); position: absolute; display: none; z-index: 9999; max-height: 254px; overflow: hidden; overflow-y: auto; box-sizing: border-box; }
.autocomplete-suggestion { position: relative; padding: 0 .6em; line-height: 23px; white-space: nowrap; overflow: hidden; text-overflow: ellipsis; font-size: 1.02em; color: #333; }
.autocomplete-suggestion b { font-weight: normal; color: #1f8dd6; }
.autocomplete-suggestion.selected { background: #f0f0f0; }

View File

@@ -266,6 +266,7 @@ mark {
margin: 3px; margin: 3px;
white-space: normal; white-space: normal;
color: rgb(224, 224, 224); color: rgb(224, 224, 224);
overflow: hidden;
} }
.irs-single, .irs-from, .irs-to { .irs-single, .irs-from, .irs-to {

View File

@@ -205,6 +205,7 @@ mark {
margin: 3px; margin: 3px;
white-space: normal; white-space: normal;
color: #000; color: #000;
overflow: hidden;
} }
.irs-single, .irs-from, .irs-to { .irs-single, .irs-from, .irs-to {

3
src/static/js/auto-complete.min.js vendored Normal file

File diff suppressed because one or more lines are too long

View File

@@ -27,18 +27,12 @@ function gifOver(thumbnail, hit) {
} }
function getContentHighlight(hit) { function getContentHighlight(hit) {
const re = RegExp(/<mark>/g);
const sortByMathCount = (a, b) => {
return b.match(re).length - a.match(re).length;
};
if (hit.hasOwnProperty("highlight")) { if (hit.hasOwnProperty("highlight")) {
if (hit["highlight"].hasOwnProperty("content")) { if (hit["highlight"].hasOwnProperty("content")) {
return hit["highlight"]["content"].sort(sortByMathCount)[0]; return hit["highlight"]["content"][0];
} else if (hit["highlight"].hasOwnProperty("content.nGram")) { } else if (hit["highlight"].hasOwnProperty("content.nGram")) {
return hit["highlight"]["content.nGram"].sort(sortByMathCount)[0]; return hit["highlight"]["content.nGram"][0];
} }
} }
@@ -77,6 +71,7 @@ function shouldPlayVideo(hit) {
return mime && return mime &&
mime.startsWith("video/") && mime.startsWith("video/") &&
!("parent" in hit["_source"]) &&
hit["_source"]["extension"] !== "mkv" && hit["_source"]["extension"] !== "mkv" &&
hit["_source"]["extension"] !== "avi" && hit["_source"]["extension"] !== "avi" &&
videoc !== "hevc" && videoc !== "hevc" &&
@@ -92,6 +87,7 @@ function shouldDisplayRawImage(hit) {
hit["_source"]["mime"] && hit["_source"]["mime"] &&
!hit["_source"]["parent"] && !hit["_source"]["parent"] &&
hit["_source"]["videoc"] !== "tiff" && hit["_source"]["videoc"] !== "tiff" &&
hit["_source"]["videoc"] !== "raw" &&
hit["_source"]["videoc"] !== "ppm"; hit["_source"]["videoc"] !== "ppm";
} }

View File

@@ -74,6 +74,41 @@ function showEsError() {
window.onload = () => { window.onload = () => {
CONF.load(); CONF.load();
new autoComplete({
selector: '#pathBar',
minChars: 1,
delay: 400,
renderItem: function (item) {
return '<div class="autocomplete-suggestion" data-val="' + item + '">' + item + '</div>';
},
source: async function (term, suggest) {
if (!CONF.options.suggestPath) {
return []
}
term = term.toLowerCase();
const choices = await getPathChoices();
let matches = [];
for (let i = 0; i < choices.length; i++) {
if (~choices[i].toLowerCase().indexOf(term)) {
matches.push(choices[i]);
}
}
suggest(matches.sort());
},
onSelect: function () {
searchDebounced();
}
});
searchBar.addEventListener("keyup", searchDebounced);
pathBar.addEventListener("keyup", e => {
if (e.key === "Enter") {
searchDebounced();
}
});
}; };
function toggleFuzzy() { function toggleFuzzy() {
@@ -105,10 +140,7 @@ $.jsonPost("i").then(resp => {
}); });
function getDocumentInfo(id) { function getDocumentInfo(id) {
return $.getJSON("d/" + id).fail(e => { return $.getJSON("d/" + id).fail(showEsError)
console.log(e);
showEsError();
})
} }
function handleTreeClick(tree) { function handleTreeClick(tree) {
@@ -218,12 +250,13 @@ function addTag(map, tag, id, count) {
let child = { let child = {
id: id, id: id,
text: tags.length !== 1 ? tags[0] : `${tags[0]} (${count})`, text: tags.length !== 1 ? tags[0] : `${tags[0]} (${count})`,
name: tags[0],
children: [] children: []
}; };
let found = false; let found = false;
map.forEach(node => { map.forEach(node => {
if (node.text === child.text) { if (node.name === child.name) {
found = true; found = true;
if (tags.length !== 1) { if (tags.length !== 1) {
addTag(node.children, tags.slice(1).join("."), id, count); addTag(node.children, tags.slice(1).join("."), id, count);
@@ -332,24 +365,24 @@ function search(after = null) {
let path = pathBar.value.replace(/\/$/, "").toLowerCase(); //remove trailing slashes let path = pathBar.value.replace(/\/$/, "").toLowerCase(); //remove trailing slashes
if (path !== "") { if (path !== "") {
filters.push([{term: {path: path}}]) filters.push({term: {path: path}})
} }
let mimeTypes = getSelectedNodes(mimeTree); let mimeTypes = getSelectedNodes(mimeTree);
if (!mimeTypes.includes("any")) { if (!mimeTypes.includes("any")) {
filters.push([{terms: {"mime": mimeTypes}}]); filters.push({terms: {"mime": mimeTypes}});
} }
let tags = getSelectedNodes(tagTree); let tags = getSelectedNodes(tagTree);
if (!tags.includes("any")) { if (!tags.includes("any")) {
filters.push([{terms: {"tag": tags}}]); filters.push({terms: {"tag": tags}});
} }
if (date_min && date_max) { if (date_min && date_max) {
filters.push([{range: {mtime: {gte: date_min, lte: date_max}}}]) filters.push({range: {mtime: {gte: date_min, lte: date_max}}})
} else if (date_min) { } else if (date_min) {
filters.push([{range: {mtime: {gte: date_min}}}]) filters.push({range: {mtime: {gte: date_min}}})
} else if (date_max) { } else if (date_max) {
filters.push([{range: {mtime: {lte: date_max}}}]) filters.push({range: {mtime: {lte: date_max}}})
} }
let q = { let q = {
@@ -385,6 +418,9 @@ function search(after = null) {
q.highlight = { q.highlight = {
pre_tags: ["<mark>"], pre_tags: ["<mark>"],
post_tags: ["</mark>"], post_tags: ["</mark>"],
fragment_size: CONF.options.fragmentSize,
number_of_fragments: 1,
order: "score",
fields: { fields: {
content: {}, content: {},
// "content.nGram": {}, // "content.nGram": {},
@@ -441,8 +477,6 @@ let searchDebounced = _.debounce(function () {
search() search()
}, 500); }, 500);
searchBar.addEventListener("keyup", searchDebounced);
pathBar.addEventListener("keyup", searchDebounced);
//Size slider //Size slider
$("#sizeSlider").ionRangeSlider({ $("#sizeSlider").ionRangeSlider({
@@ -607,7 +641,8 @@ function createPathTree(target) {
let pathTree = new InspireTree({ let pathTree = new InspireTree({
data: function (node, resolve, reject) { data: function (node, resolve, reject) {
return getNextDepth(node); return getNextDepth(node);
} },
sort: "text"
}); });
selectedIndices.forEach(index => { selectedIndices.forEach(index => {
@@ -627,3 +662,19 @@ function createPathTree(target) {
pathTree.on("node.click", handlePathTreeClick(pathTree)); pathTree.on("node.click", handlePathTreeClick(pathTree));
} }
function getPathChoices() {
return new Promise(getPaths => {
$.jsonPost("es", {
suggest: {
path: {
prefix: pathBar.value,
completion: {
field: "suggest-path",
skip_duplicates: true,
size: 10000
}
}
}
}).then(resp => getPaths(resp["suggest"]["path"][0]["options"].map(opt => opt["_source"]["path"])));
})
}

View File

@@ -100,6 +100,8 @@ const _defaults = {
treemapGroupingDepth: 3, treemapGroupingDepth: 3,
treemapColor: "PuBuGn", treemapColor: "PuBuGn",
treemapSize: "large", treemapSize: "large",
suggestPath: true,
fragmentSize: 100
}; };
function loadSettings() { function loadSettings() {
@@ -114,6 +116,8 @@ function loadSettings() {
$("#settingTreemapColor").val(CONF.options.treemapColor); $("#settingTreemapColor").val(CONF.options.treemapColor);
$("#settingTreemapSize").val(CONF.options.treemapSize); $("#settingTreemapSize").val(CONF.options.treemapSize);
$("#settingTreemapType").val(CONF.options.treemapType); $("#settingTreemapType").val(CONF.options.treemapType);
$("#settingSuggestPath").prop("checked", CONF.options.suggestPath);
$("#settingFragmentSize").val(CONF.options.fragmentSize);
} }
function Settings() { function Settings() {
@@ -155,6 +159,8 @@ function updateSettings() {
CONF.options.treemapColor = $("#settingTreemapColor").val(); CONF.options.treemapColor = $("#settingTreemapColor").val();
CONF.options.treemapSize = $("#settingTreemapSize").val(); CONF.options.treemapSize = $("#settingTreemapSize").val();
CONF.options.treemapType = $("#settingTreemapType").val(); CONF.options.treemapType = $("#settingTreemapType").val();
CONF.options.suggestPath = $("#settingSuggestPath").prop("checked");
CONF.options.fragmentSize = $("#settingFragmentSize").val();
CONF.save(); CONF.save();
if (typeof searchDebounced !== "undefined") { if (typeof searchDebounced !== "undefined") {

View File

@@ -11,7 +11,7 @@
<nav class="navbar navbar-expand-lg"> <nav class="navbar navbar-expand-lg">
<a class="navbar-brand" href="/">sist2</a> <a class="navbar-brand" href="/">sist2</a>
<span class="badge badge-pill version">2.3.0</span> <span class="badge badge-pill version">2.5.0</span>
<span class="tagline">Lightning-fast file system indexer and search tool </span> <span class="tagline">Lightning-fast file system indexer and search tool </span>
<a class="btn ml-auto" href="/stats">Stats</a> <a class="btn ml-auto" href="/stats">Stats</a>
<button class="btn" type="button" data-toggle="modal" data-target="#settings" onclick="loadSettings()">Settings</button> <button class="btn" type="button" data-toggle="modal" data-target="#settings" onclick="loadSettings()">Settings</button>
@@ -192,6 +192,17 @@
<label class="custom-control-label" for="settingSearchInPath">Enable matching query against document path</label> <label class="custom-control-label" for="settingSearchInPath">Enable matching query against document path</label>
</div> </div>
<div class="custom-control custom-checkbox">
<input type="checkbox" class="custom-control-input" id="settingSuggestPath">
<label class="custom-control-label" for="settingSuggestPath">Enable auto-complete in path filter bar</label>
</div>
<br/>
<div class="form-group">
<input type="number" class="form-control" id="settingFragmentSize">
<label for="settingFragmentSize">Highlight context size in characters</label>
</div>
<label for="settingDisplay">Display</label> <label for="settingDisplay">Display</label>
<select id="settingDisplay" class="form-control form-control-sm"> <select id="settingDisplay" class="form-control form-control-sm">
<option value="grid">Grid</option> <option value="grid">Grid</option>

View File

@@ -10,7 +10,7 @@
<nav class="navbar navbar-expand-lg"> <nav class="navbar navbar-expand-lg">
<a class="navbar-brand" href="/">sist2</a> <a class="navbar-brand" href="/">sist2</a>
<span class="badge badge-pill version">2.3.0</span> <span class="badge badge-pill version">2.5.0</span>
<span class="tagline">Lightning-fast file system indexer and search tool </span> <span class="tagline">Lightning-fast file system indexer and search tool </span>
<a style="margin-left: auto" class="btn" href="/">Back</a> <a style="margin-left: auto" class="btn" href="/">Back</a>
<button class="btn" type="button" data-toggle="modal" data-target="#settings" <button class="btn" type="button" data-toggle="modal" data-target="#settings"
@@ -77,6 +77,17 @@
path</label> path</label>
</div> </div>
<div class="custom-control custom-checkbox">
<input type="checkbox" class="custom-control-input" id="settingSuggestPath">
<label class="custom-control-label" for="settingSuggestPath">Enable auto-complete in path filter bar</label>
</div>
<br/>
<div class="form-group">
<input type="number" class="form-control" id="settingFragmentSize">
<label for="settingFragmentSize">Highlight context size in characters</label>
</div>
<label for="settingDisplay">Display</label> <label for="settingDisplay">Display</label>
<select id="settingDisplay" class="form-control form-control-sm"> <select id="settingDisplay" class="form-control form-control-sm">
<option value="grid">Grid</option> <option value="grid">Grid</option>

View File

@@ -24,6 +24,10 @@ typedef struct {
void fill_tables(cJSON *document, UNUSED(const char uuid_str[UUID_STR_LEN])) { void fill_tables(cJSON *document, UNUSED(const char uuid_str[UUID_STR_LEN])) {
if (cJSON_GetObjectItem(document, "parent") != NULL) {
return;
}
const char *json_path = cJSON_GetObjectItem(document, "path")->valuestring; const char *json_path = cJSON_GetObjectItem(document, "path")->valuestring;
char *path = malloc(strlen(json_path) + 1); char *path = malloc(strlen(json_path) + 1);
strcpy(path, json_path); strcpy(path, json_path);
@@ -167,7 +171,7 @@ int merge_up(double thresh) {
int size = g_hash_table_size(FlatTree); int size = g_hash_table_size(FlatTree);
LOG_DEBUGF("stats.h", "Merge up iteration (%d merged, %d in tree)", count, size) LOG_DEBUGF("stats.c", "Merge up iteration (%d merged, %d in tree)", count, size)
return count; return count;
} }
@@ -184,9 +188,9 @@ void csv_escape(char *dst, const char *str) {
return; return;
} }
while (*ptr++ != 0) { *out++ = '"';
char c = *ptr; char c;
while ((c = *ptr++) != 0) {
if (c == '"') { if (c == '"') {
*out++ = '"'; *out++ = '"';
*out++ = '"'; *out++ = '"';
@@ -194,6 +198,8 @@ void csv_escape(char *dst, const char *str) {
*out++ = c; *out++ = c;
} }
} }
*out++ = '"';
*out = '\0';
} }
int open_or_exit(const char *path) { int open_or_exit(const char *path) {

View File

@@ -26,10 +26,11 @@ dyn_buffer_t url_escape(char *str) {
} }
char *abspath(const char *path) { char *abspath(const char *path) {
wordexp_t w;
wordexp(path, &w, 0);
char *abs = realpath(w.we_wordv[0], NULL); char *expanded = expandpath(path);
char *abs = realpath(expanded, NULL);
free(expanded);
if (abs == NULL) { if (abs == NULL) {
return NULL; return NULL;
} }
@@ -38,16 +39,46 @@ char *abspath(const char *path) {
strcat(abs, "/"); strcat(abs, "/");
} }
wordfree(&w);
return abs; return abs;
} }
char *expandpath(const char *path) { void shell_escape(char *dst, const char *src) {
wordexp_t w; const char *ptr = src;
wordexp(path, &w, 0); char *out = dst;
while ((*ptr)) {
char c = *ptr++;
char *expanded = malloc(strlen(w.we_wordv[0]) + 2); if (c == '&' || c == '\n' || c == '|' || c == ';' || c == '<' ||
strcpy(expanded, w.we_wordv[0]); c == '>' || c == '(' || c == ')' || c == '{' || c == '}') {
*out++ = '\\';
}
*out++ = c;
}
*out = 0;
}
char *expandpath(const char *path) {
char tmp[PATH_MAX * 2];
shell_escape(tmp, path);
wordexp_t w;
wordexp(tmp, &w, 0);
if (w.we_wordv == NULL) {
return NULL;
}
*tmp = '\0';
for (int i = 0; i < w.we_wordc; i++) {
strcat(tmp, w.we_wordv[i]);
if (i != w.we_wordc - 1) {
strcat(tmp, " ");
}
}
char *expanded = malloc(strlen(tmp) + 2);
strcpy(expanded, tmp);
strcat(expanded, "/"); strcat(expanded, "/");
wordfree(&w); wordfree(&w);
@@ -152,7 +183,7 @@ void str_escape(char *dst, const char *str) {
break; break;
} }
cur += sprintf(cur, "%c%02X", ESCAPE_CHAR, (unsigned char)tmp[i]); cur += sprintf(cur, "%c%02X", ESCAPE_CHAR, (unsigned char) tmp[i]);
} }
continue; continue;
} }
@@ -198,12 +229,12 @@ void str_unescape(char *dst, const char *str) {
char next = *ptr; char next = *ptr;
if (next == ESCAPE_CHAR) { if (next == ESCAPE_CHAR) {
*cur++ = (char)c; *cur++ = (char) c;
ptr += 1; ptr += 1;
} else { } else {
tmp[0] = *(ptr); tmp[0] = *(ptr);
tmp[1] = *(ptr + 1); tmp[1] = *(ptr + 1);
*cur++ = (char)strtol(tmp, NULL, 16); *cur++ = (char) strtol(tmp, NULL, 16);
ptr += 2; ptr += 2;
} }
} else { } else {

View File

@@ -68,6 +68,7 @@ void stats(struct mg_connection *nc) {
void stats_files(struct mg_connection *nc, struct http_message *hm, struct mg_str *path) { void stats_files(struct mg_connection *nc, struct http_message *hm, struct mg_str *path) {
if (path->len != UUID_STR_LEN + 4) { if (path->len != UUID_STR_LEN + 4) {
mg_http_send_error(nc, 404, NULL);
nc->flags |= MG_F_SEND_AND_CLOSE; nc->flags |= MG_F_SEND_AND_CLOSE;
return; return;
} }
@@ -78,6 +79,7 @@ void stats_files(struct mg_connection *nc, struct http_message *hm, struct mg_st
index_t *index = get_index_by_id(arg_uuid); index_t *index = get_index_by_id(arg_uuid);
if (index == NULL) { if (index == NULL) {
mg_http_send_error(nc, 404, NULL);
nc->flags |= MG_F_SEND_AND_CLOSE; nc->flags |= MG_F_SEND_AND_CLOSE;
return; return;
} }
@@ -173,6 +175,7 @@ void thumbnail(struct mg_connection *nc, struct http_message *hm, struct mg_str
if (path->len != UUID_STR_LEN * 2 + 2) { if (path->len != UUID_STR_LEN * 2 + 2) {
LOG_DEBUGF("serve.c", "Invalid thumbnail path: %.*s", (int) path->len, path->p) LOG_DEBUGF("serve.c", "Invalid thumbnail path: %.*s", (int) path->len, path->p)
mg_http_send_error(nc, 404, NULL);
nc->flags |= MG_F_SEND_AND_CLOSE; nc->flags |= MG_F_SEND_AND_CLOSE;
return; return;
} }
@@ -189,6 +192,7 @@ void thumbnail(struct mg_connection *nc, struct http_message *hm, struct mg_str
int ret = uuid_parse(arg_uuid, uuid); int ret = uuid_parse(arg_uuid, uuid);
if (ret != 0) { if (ret != 0) {
LOG_DEBUGF("serve.c", "Invalid thumbnail UUID: %s", arg_uuid) LOG_DEBUGF("serve.c", "Invalid thumbnail UUID: %s", arg_uuid)
mg_http_send_error(nc, 404, NULL);
nc->flags |= MG_F_SEND_AND_CLOSE; nc->flags |= MG_F_SEND_AND_CLOSE;
return; return;
} }
@@ -196,6 +200,7 @@ void thumbnail(struct mg_connection *nc, struct http_message *hm, struct mg_str
store_t *store = get_store(arg_index); store_t *store = get_store(arg_index);
if (store == NULL) { if (store == NULL) {
LOG_DEBUGF("serve.c", "Could not get store for index: %s", arg_index) LOG_DEBUGF("serve.c", "Could not get store for index: %s", arg_index)
mg_http_send_error(nc, 404, NULL);
nc->flags |= MG_F_SEND_AND_CLOSE; nc->flags |= MG_F_SEND_AND_CLOSE;
return; return;
} }
@@ -214,6 +219,7 @@ void search(struct mg_connection *nc, struct http_message *hm) {
if (hm->body.len == 0) { if (hm->body.len == 0) {
LOG_DEBUG("serve.c", "Client sent empty body, ignoring request") LOG_DEBUG("serve.c", "Client sent empty body, ignoring request")
mg_http_send_error(nc, 500, NULL);
nc->flags |= MG_F_SEND_AND_CLOSE; nc->flags |= MG_F_SEND_AND_CLOSE;
return; return;
} }
@@ -314,6 +320,7 @@ void document_info(struct mg_connection *nc, struct http_message *hm, struct mg_
if (path->len != UUID_STR_LEN + 2) { if (path->len != UUID_STR_LEN + 2) {
LOG_DEBUGF("serve.c", "Invalid document_info path: %.*s", (int) path->len, path->p) LOG_DEBUGF("serve.c", "Invalid document_info path: %.*s", (int) path->len, path->p)
mg_http_send_error(nc, 404, NULL);
nc->flags |= MG_F_SEND_AND_CLOSE; nc->flags |= MG_F_SEND_AND_CLOSE;
return; return;
} }
@@ -328,6 +335,7 @@ void document_info(struct mg_connection *nc, struct http_message *hm, struct mg_
cJSON *index_id = cJSON_GetObjectItem(source, "index"); cJSON *index_id = cJSON_GetObjectItem(source, "index");
if (index_id == NULL) { if (index_id == NULL) {
cJSON_Delete(doc); cJSON_Delete(doc);
mg_http_send_error(nc, 404, NULL);
nc->flags |= MG_F_SEND_AND_CLOSE; nc->flags |= MG_F_SEND_AND_CLOSE;
return; return;
} }
@@ -335,6 +343,7 @@ void document_info(struct mg_connection *nc, struct http_message *hm, struct mg_
index_t *idx = get_index_by_id(index_id->valuestring); index_t *idx = get_index_by_id(index_id->valuestring);
if (idx == NULL) { if (idx == NULL) {
cJSON_Delete(doc); cJSON_Delete(doc);
mg_http_send_error(nc, 404, NULL);
nc->flags |= MG_F_SEND_AND_CLOSE; nc->flags |= MG_F_SEND_AND_CLOSE;
return; return;
} }
@@ -352,6 +361,7 @@ void file(struct mg_connection *nc, struct http_message *hm, struct mg_str *path
if (path->len != UUID_STR_LEN + 2) { if (path->len != UUID_STR_LEN + 2) {
LOG_DEBUGF("serve.c", "Invalid file path: %.*s", (int) path->len, path->p) LOG_DEBUGF("serve.c", "Invalid file path: %.*s", (int) path->len, path->p)
mg_http_send_error(nc, 404, NULL);
nc->flags |= MG_F_SEND_AND_CLOSE; nc->flags |= MG_F_SEND_AND_CLOSE;
return; return;
} }
@@ -371,6 +381,7 @@ void file(struct mg_connection *nc, struct http_message *hm, struct mg_str *path
index_id = cJSON_GetObjectItem(source, "index"); index_id = cJSON_GetObjectItem(source, "index");
if (index_id == NULL) { if (index_id == NULL) {
cJSON_Delete(doc); cJSON_Delete(doc);
mg_http_send_error(nc, 404, NULL);
nc->flags |= MG_F_SEND_AND_CLOSE; nc->flags |= MG_F_SEND_AND_CLOSE;
return; return;
} }
@@ -386,6 +397,7 @@ void file(struct mg_connection *nc, struct http_message *hm, struct mg_str *path
if (idx == NULL) { if (idx == NULL) {
cJSON_Delete(doc); cJSON_Delete(doc);
nc->flags |= MG_F_SEND_AND_CLOSE; nc->flags |= MG_F_SEND_AND_CLOSE;
mg_http_send_error(nc, 404, NULL);
return; return;
} }
@@ -423,6 +435,7 @@ static void ev_router(struct mg_connection *nc, int ev, void *p) {
struct http_message *hm = (struct http_message *) p; struct http_message *hm = (struct http_message *) p;
if (mg_parse_uri(hm->uri, &scheme, &user_info, &host, &port, &path, &query, &fragment) != 0) { if (mg_parse_uri(hm->uri, &scheme, &user_info, &host, &port, &path, &query, &fragment) != 0) {
mg_http_send_error(nc, 400, NULL);
nc->flags |= MG_F_SEND_AND_CLOSE; nc->flags |= MG_F_SEND_AND_CLOSE;
return; return;
} }
@@ -469,6 +482,7 @@ static void ev_router(struct mg_connection *nc, int ev, void *p) {
} else if (has_prefix(&path, &((struct mg_str) MG_MK_STR("/d/")))) { } else if (has_prefix(&path, &((struct mg_str) MG_MK_STR("/d/")))) {
document_info(nc, hm, &path); document_info(nc, hm, &path);
} else { } else {
mg_http_send_error(nc, 404, NULL);
nc->flags |= MG_F_SEND_AND_CLOSE; nc->flags |= MG_F_SEND_AND_CLOSE;
} }
@@ -499,7 +513,7 @@ static void ev_router(struct mg_connection *nc, int ev, void *p) {
free(json_str); free(json_str);
free(tmp); free(tmp);
} }
//todo return error code mg_http_send_error(nc, 500, NULL);
} }
free_response(r); free_response(r);

File diff suppressed because one or more lines are too long