Compare commits

...

14 Commits

Author SHA1 Message Date
7b49a0dc49 Build fix 2020-06-21 12:56:13 -04:00
eb559b53aa RAW picture file support 2020-06-21 10:46:11 -04:00
6d01f9c0df whoops 2020-06-19 22:12:19 -04:00
e724fec668 Fix web return codes 2020-06-19 21:41:17 -04:00
fe5e93b300 Update USAGE.md 2020-06-19 21:29:09 -04:00
ecad85fd7d version bump 2020-06-19 21:10:03 -04:00
74cc898259 Fix tag display issue 2020-06-19 21:07:19 -04:00
dc2e4443c4 Add exec-script command 2020-06-19 21:07:19 -04:00
1a64431b52 Merge pull request #63 from dpieski/patch-3
Correct typos in example
2020-06-19 18:26:10 -04:00
Andrew
9bad515e06 Correct typos in example
Correct typos in examples.
2020-06-19 17:22:02 -05:00
648559cedb Update README.md 2020-06-17 13:25:20 -04:00
3e6cd9cd5c Merge pull request #60 from dpieski/patch-2
update Usage.md
2020-06-17 13:04:46 -04:00
f249992798 Update scripting.md 2020-06-17 13:00:07 -04:00
Andrew
e9645ecdaa update Usage.md
Fixing a link.
2020-06-17 10:58:25 -05:00
23 changed files with 462 additions and 216 deletions

View File

@@ -30,7 +30,7 @@ add_executable(
third-party/argparse/argparse.h third-party/argparse/argparse.c
src/cli.c src/cli.h
src/stats.c src/stats.h)
src/stats.c src/stats.h src/ctx.c)
target_link_directories(sist2 PRIVATE BEFORE ${_VCPKG_INSTALLED_DIR}/${VCPKG_TARGET_TRIPLET}/lib/)
set(CMAKE_FIND_LIBRARY_SUFFIXES .a .lib)

View File

@@ -18,7 +18,7 @@ sist2 (Simple incremental search tool)
* Extracts text from common file types \*
* Generates thumbnails \*
* Incremental scanning
* Automatic tagging from file attributes via [user scripts](scripting/README.md)
* Automatic tagging from file attributes via [user scripts](docs/scripting.md)
* Recursive scan inside archive files \*\*
* OCR support with tesseract \*\*\*
* Stats page & disk utilisation visualization
@@ -128,6 +128,6 @@ binaries (GCC 7+ required).
2. Build
```bash
git clone --recursive https://github.com/simon987/sist2/
cmake -D <VCPKG_ROOT>/scripts/buildsystems/vcpkg.cmake .
cmake -DCMAKE_TOOLCHAIN_FILE=<VCPKG_ROOT>/scripts/buildsystems/vcpkg.cmake .
make
```

View File

@@ -14,50 +14,52 @@
* [examples](#web-examples)
* [rewrite_url](#rewrite_url)
* [link to specific indices](#link-to-specific-indices)
* [exec-script](#exec-script)
```
Usage: sist2 scan [OPTION]... PATH
or: sist2 index [OPTION]... INDEX
or: sist2 web [OPTION]... INDEX...
or: sist2 exec-script [OPTION]... INDEX
Lightning-fast file system indexer and search tool.
-h, --help show this help message and exit
-v, --version Show version and exit
--verbose Turn on logging
--very-verbose Turn on debug messages
-h, --help show this help message and exit
-v, --version Show version and exit
--verbose Turn on logging
--very-verbose Turn on debug messages
Scan options
-t, --threads=<int> Number of threads. DEFAULT=1
-q, --quality=<flt> Thumbnail quality, on a scale of 1.0 to 31.0, 1.0 being the best. DEFAULT=5
--size=<int> Thumbnail size, in pixels. Use negative value to disable. DEFAULT=500
--content-size=<int> Number of bytes to be extracted from text documents. Use negative value to disable. DEFAULT=32768
--incremental=<str> Reuse an existing index and only scan modified files.
-o, --output=<str> Output directory. DEFAULT=index.sist2/
--rewrite-url=<str> Serve files from this url instead of from disk.
--name=<str> Index display name. DEFAULT: (name of the directory)
--depth=<int> Scan up to DEPTH subdirectories deep. Use 0 to only scan files in PATH. DEFAULT: -1
--archive=<str> Archive file mode (skip|list|shallow|recurse). skip: Don't parse, list: only get file names as text, shallow: Don't parse archives inside archives. DEFAULT: recurse
--ocr=<str> Tesseract language (use tesseract --list-langs to see which are installed on your machine)
-e, --exclude=<str> Files that match this regex will not be scanned
--fast Only index file names & mime type
-t, --threads=<int> Number of threads. DEFAULT=1
-q, --quality=<flt> Thumbnail quality, on a scale of 1.0 to 31.0, 1.0 being the best. DEFAULT=5
--size=<int> Thumbnail size, in pixels. Use negative value to disable. DEFAULT=500
--content-size=<int> Number of bytes to be extracted from text documents. Use negative value to disable. DEFAULT=32768
--incremental=<str> Reuse an existing index and only scan modified files.
-o, --output=<str> Output directory. DEFAULT=index.sist2/
--rewrite-url=<str> Serve files from this url instead of from disk.
--name=<str> Index display name. DEFAULT: (name of the directory)
--depth=<int> Scan up to DEPTH subdirectories deep. Use 0 to only scan files in PATH. DEFAULT: -1
--archive=<str> Archive file mode (skip|list|shallow|recurse). skip: Don't parse, list: only get file names as text, shallow: Don't parse archives inside archives. DEFAULT: recurse
--ocr=<str> Tesseract language (use tesseract --list-langs to see which are installed on your machine)
-e, --exclude=<str> Files that match this regex will not be scanned
--fast Only index file names & mime type
--treemap-threshold=<str> Relative size threshold for treemap (see USAGE.md). DEFAULT: 0.0005
--mem-buffer=<int> Maximum memory buffer size in MB for files inside archives (see USAGE.md). DEFAULT: 2000
--mem-buffer=<int> Maximum memory buffer size per thread in MB for files inside archives (see USAGE.md). DEFAULT: 2000
Index options
--es-url=<str> Elasticsearch url with port. DEFAULT=http://localhost:9200
-p, --print Just print JSON documents to stdout.
--script-file=<str> Path to user script.
--batch-size=<int> Index batch size. DEFAULT: 100
-f, --force-reset Reset Elasticsearch mappings and settings. (You must use this option the first time you use the index command)
--es-url=<str> Elasticsearch url with port. DEFAULT=http://localhost:9200
-p, --print Just print JSON documents to stdout.
--script-file=<str> Path to user script.
--batch-size=<int> Index batch size. DEFAULT: 100
-f, --force-reset Reset Elasticsearch mappings and settings. (You must use this option the first time you use the index command)
Web options
--es-url=<str> Elasticsearch url. DEFAULT=http://localhost:9200
--bind=<str> Listen on this address. DEFAULT=localhost:4090
--auth=<str> Basic auth in user:password format
Made by simon987 <me@simon987.net>. Released under GPL-3.0
--es-url=<str> Elasticsearch url. DEFAULT=http://localhost:9200
--bind=<str> Listen on this address. DEFAULT=localhost:4090
--auth=<str> Basic auth in user:password format
Exec-script options
--script-file=<str> Path to user script.
Made by simon987 <me@simon987.net>. Released under GPL-3.0
```
## Scan
@@ -234,7 +236,7 @@ it is currently unsupported and has no guaranties of back/forward compatibility.
* `-p, --print`
Print index in JSON format to stdout.
* `--script-file`
Path to user script. See [Scripting](scripting/README.md).
Path to user script. See [Scripting](scripting.md).
* `--batch-size=<int>`
Index batch size. Indexing is generally faster with larger batches, but payloads that
are too large will fail and additional overhead for retrying with smaller sizes may slow
@@ -294,3 +296,7 @@ Both the `root` and `rewrite_url` fields are safe to manually modify from the
To link to specific indices, you can add a list of comma-separated index name to
the URL: `?i=<name>,<name>`. By default, indices with `"(nsfw)"` in their name are
not displayed.
## exec-script
The `exec-script` command is used to execute a user script for an index that has already been imported to Elasticsearch with the `index` command. Note that the documents will not be reset to their default state before each execution as the `index` command does: if you make undesired changes to the documents by accident, you will need to run `index` again to revert to the original state.

View File

@@ -39,7 +39,7 @@ it adds the `genre.<genre>` tag.
ArrayList tags = ctx._source.tag = new ArrayList();
if (ctx._source?.genre != null) {
tags.add("genre." + ctx._source.genre.toLowerCase())
tags.add("genre." + ctx._source.genre.toLowerCase());
}
```
@@ -67,7 +67,7 @@ ArrayList tags = ctx._source.tag = new ArrayList();
Matcher m = /[\(\.+](20[0-9]{2})[\)\.+]/.matcher(ctx._source.name);
if (m.find()) {
tags.add("year." + m.group(1))
tags.add("year." + m.group(1));
}
```
@@ -111,16 +111,6 @@ if (ctx._source.path != "") {
}
```
Set the name of the last folder (`/path/to/<studio>/file.mp4`) to `studio.<studio>` tag
```Java
ArrayList tags = ctx._source.tag = new ArrayList();
if (ctx._source.path != "") {
String[] names = ctx._source.path.splitOnToken('/');
tags.add("studio." + names[names.length-1]);
}
```
Parse `EXIF:F Number` tag
```Java
if (ctx._source?.exif_fnumber != null) {

View File

@@ -111,7 +111,7 @@ application/x-dbf, dbf
application/x-dbt,
application/x-debian-package, deb
application/x-deepv, deepv
application/x-director, dcr|dir|dxr
application/x-director, dir|dxr
application/x-dmp, dmp
application/x-dosdriver,
application/x-dosexec, dll
@@ -430,3 +430,21 @@ video/x-sgi-movie, movie|mv
x-epoc/x-sisx-app,
application/x-zstd-dictionary,
application/vnd.ms-outlook, msg
image/x-olympus-orf, orf
image/x-nikon-nef, nef
image/x-fuji-raf, raf
image/x-panasonic-raw, rw2|raw
image/x-adobe-dng, dng
image/x-canon-cr2, cr2
image/x-canon-crw, crw
image/x-dcraw,
image/x-kodak-dcr, dcr
image/x-kodak-k25, k25
image/x-kodak-kdc, kdc
image/x-minolta-mrw, mrw
image/x-pentax-pef, pef
image/x-sigma-x3f, xf3
image/x-sony-arw, arw
image/x-sony-sr2, sr2
image/x-sony-srf, srf
image/x-epson-erf, erf
1 application/arj arj
111 application/x-dbt
112 application/x-debian-package deb
113 application/x-deepv deepv
114 application/x-director dcr|dir|dxr dir|dxr
115 application/x-dmp dmp
116 application/x-dosdriver
117 application/x-dosexec dll
430 x-epoc/x-sisx-app
431 application/x-zstd-dictionary
432 application/vnd.ms-outlook msg
433 image/x-olympus-orf orf
434 image/x-nikon-nef nef
435 image/x-fuji-raf raf
436 image/x-panasonic-raw rw2|raw
437 image/x-adobe-dng dng
438 image/x-canon-cr2 cr2
439 image/x-canon-crw crw
440 image/x-dcraw
441 image/x-kodak-dcr dcr
442 image/x-kodak-k25 k25
443 image/x-kodak-kdc kdc
444 image/x-minolta-mrw mrw
445 image/x-pentax-pef pef
446 image/x-sigma-x3f xf3
447 image/x-sony-arw arw
448 image/x-sony-sr2 sr2
449 image/x-sony-srf srf
450 image/x-epson-erf erf

View File

@@ -73,6 +73,29 @@ markup = (
"text/x-sgml"
)
raw = (
"image/x-olympus-orf",
"image/x-nikon-nef",
"image/x-fuji-raf",
"image/x-panasonic-raw",
"image/x-adobe-dng",
"image/x-canon-cr2",
"image/x-canon-crw",
"image/x-dcraw",
"image/x-kodak-dcr",
"image/x-kodak-k25",
"image/x-kodak-kdc",
"image/x-minolta-mrw",
"image/x-pentax-pef",
"image/x-sigma-x3f",
"image/x-sony-arw",
"image/x-sony-sr2",
"image/x-sony-srf",
"image/x-minolta-mrw",
"image/x-pentax-pef",
"image/x-epson-erf",
)
cnt = 1
@@ -97,6 +120,8 @@ def mime_id(mime):
mime_id += " | 0x02000000"
elif mime in markup:
mime_id += " | 0x01000000"
elif mime in raw:
mime_id += " | 0x00800000"
elif mime == "application/x-empty":
return "1"
return mime_id

View File

@@ -16,7 +16,7 @@
#define DEFAULT_MAX_MEM_BUFFER 2000
const char* TESS_DATAPATHS[] = {
const char *TESS_DATAPATHS[] = {
"/usr/share/tessdata/",
"/usr/share/tesseract-ocr/tessdata/",
"./",
@@ -32,6 +32,11 @@ scan_args_t *scan_args_create() {
return args;
}
exec_args_t *exec_args_create() {
exec_args_t *args = calloc(sizeof(exec_args_t), 1);
return args;
}
void scan_args_destroy(scan_args_t *args) {
if (args->name != NULL) {
free(args->name);
@@ -55,6 +60,10 @@ void web_args_destroy(web_args_t *args) {
free(args);
}
void exec_args_destroy(exec_args_t *args) {
free(args);
}
int scan_args_validate(scan_args_t *args, int argc, const char **argv) {
if (argc < 2) {
fprintf(stderr, "Required positional argument: PATH.\n");
@@ -115,7 +124,7 @@ int scan_args_validate(scan_args_t *args, int argc, const char **argv) {
return 1;
}
if (args->depth < 0) {
if (args->depth <= 0) {
args->depth = G_MAXINT32;
} else {
args->depth += 1;
@@ -147,7 +156,7 @@ int scan_args_validate(scan_args_t *args, int argc, const char **argv) {
char filename[128];
sprintf(filename, "%s.traineddata", args->tesseract_lang);
const char * path = find_file_in_paths(TESS_DATAPATHS, filename);
const char *path = find_file_in_paths(TESS_DATAPATHS, filename);
if (path == NULL) {
LOG_FATAL("cli.c", "Could not find tesseract language file!");
}
@@ -214,6 +223,34 @@ int scan_args_validate(scan_args_t *args, int argc, const char **argv) {
return 0;
}
int load_script(const char *script_path, char **dst) {
struct stat info;
int res = stat(script_path, &info);
if (res == -1) {
fprintf(stderr, "Error opening script file '%s': %s\n", script_path, strerror(errno));
return 1;
}
int fd = open(script_path, O_RDONLY);
if (fd == -1) {
fprintf(stderr, "Error opening script file '%s': %s\n", script_path, strerror(errno));
return 1;
}
*dst = malloc(info.st_size + 1);
res = read(fd, *dst, info.st_size);
if (res < 0) {
fprintf(stderr, "Error reading script file '%s': %s\n", script_path, strerror(errno));
return 1;
}
*(*dst + info.st_size) = '\0';
close(fd);
return 0;
}
int index_args_validate(index_args_t *args, int argc, const char **argv) {
LogCtx.verbose = 1;
@@ -237,29 +274,9 @@ int index_args_validate(index_args_t *args, int argc, const char **argv) {
}
if (args->script_path != NULL) {
struct stat info;
int res = stat(args->script_path, &info);
if (res == -1) {
fprintf(stderr, "Error opening script file '%s': %s\n", args->script_path, strerror(errno));
if (load_script(args->script_path, &args->script) != 0) {
return 1;
}
int fd = open(args->script_path, O_RDONLY);
if (fd == -1) {
fprintf(stderr, "Error opening script file '%s': %s\n", args->script_path, strerror(errno));
return 1;
}
args->script = malloc(info.st_size + 1);
res = read(fd, args->script, info.st_size);
if (res < 0) {
fprintf(stderr, "Error reading script file '%s': %s\n", args->script_path, strerror(errno));
return 1;
}
*(args->script + info.st_size) = '\0';
close(fd);
}
if (args->batch_size == 0) {
@@ -295,7 +312,7 @@ int web_args_validate(web_args_t *args, int argc, const char **argv) {
}
if (args->credentials != NULL) {
char * ptr = strstr(args->credentials, ":");
char *ptr = strstr(args->credentials, ":");
if (ptr == NULL) {
fprintf(stderr, "Invalid --auth format, see usage\n");
return 1;
@@ -348,3 +365,30 @@ web_args_t *web_args_create() {
return args;
}
int exec_args_validate(exec_args_t *args, int argc, const char **argv) {
char *index_path = abspath(argv[1]);
if (index_path == NULL) {
fprintf(stderr, "File not found: %s\n", argv[1]);
return 1;
} else {
args->index_path = argv[1];
free(index_path);
}
if (args->es_url == NULL) {
args->es_url = DEFAULT_ES_URL;
}
if (args->script_path == NULL) {
LOG_FATAL("cli.c", "--script-file argument is required");
}
if (load_script(args->script_path, &args->script) != 0) {
return 1;
}
LOG_DEBUGF("cli.c", "arg script_path=%s", args->script_path)
LOG_DEBUGF("cli.c", "arg script=%s", args->script)
return 0;
}

View File

@@ -54,6 +54,13 @@ typedef struct web_args {
const char **indices;
} web_args_t;
typedef struct exec_args {
char *es_url;
const char *index_path;
const char *script_path;
char *script;
} exec_args_t;
index_args_t *index_args_create();
void index_args_destroy(index_args_t *args);
@@ -66,4 +73,10 @@ int index_args_validate(index_args_t *args, int argc, const char **argv);
int web_args_validate(web_args_t *args, int argc, const char **argv);
exec_args_t *exec_args_create();
void exec_args_destroy(exec_args_t *args);
int exec_args_validate(exec_args_t *args, int argc, const char **argv);
#endif

6
src/ctx.c Normal file
View File

@@ -0,0 +1,6 @@
#include "ctx.h"
ScanCtx_t ScanCtx;
WebCtx_t WebCtx;
IndexCtx_t IndexCtx;
LogCtx_t LogCtx;

View File

@@ -12,12 +12,12 @@
#include "libscan/ooxml/ooxml.h"
#include "libscan/text/text.h"
#include "libscan/mobi/scan_mobi.h"
#include "libscan/raw/raw.h"
#include <glib.h>
#include <pcre.h>
//TODO Move to individual scan ctx
struct {
typedef struct {
struct index_t index;
GHashTable *mime_table;
@@ -46,27 +46,33 @@ struct {
scan_ooxml_ctx_t ooxml_ctx;
scan_text_ctx_t text_ctx;
scan_mobi_ctx_t mobi_ctx;
} ScanCtx;
scan_raw_ctx_t raw_ctx;
} ScanCtx_t;
struct {
typedef struct {
int verbose;
int very_verbose;
int no_color;
} LogCtx;
} LogCtx_t;
struct {
typedef struct {
char *es_url;
int batch_size;
} IndexCtx;
} IndexCtx_t;
struct {
typedef struct {
char *es_url;
int index_count;
char *auth_user;
char *auth_pass;
int auth_enabled;
struct index_t indices[16];
} WebCtx;
struct index_t indices[64];
} WebCtx_t;
extern ScanCtx_t ScanCtx;
extern WebCtx_t WebCtx;
extern IndexCtx_t IndexCtx;
extern LogCtx_t LogCtx;
#endif

View File

@@ -53,6 +53,10 @@ void index_json(cJSON *document, const char uuid_str[UUID_STR_LEN]) {
void execute_update_script(const char *script, const char index_id[UUID_STR_LEN]) {
if (Indexer == NULL) {
Indexer = create_indexer(IndexCtx.es_url);
}
cJSON *body = cJSON_CreateObject();
cJSON *script_obj = cJSON_AddObjectToObject(body, "script");
cJSON_AddStringToObject(script_obj, "lang", "painless");
@@ -264,6 +268,7 @@ void destroy_indexer(char *script, char index_id[UUID_STR_LEN]) {
if (script != NULL) {
execute_update_script(script, index_id);
free(script);
}
snprintf(url, sizeof(url), "%s/sist2/_refresh", IndexCtx.es_url);

View File

@@ -32,4 +32,6 @@ cJSON *elastic_get_document(const char *uuid_str);
char *elastic_get_status();
void execute_update_script(const char *script, const char index_id[UUID_STR_LEN]);
#endif

View File

@@ -2,7 +2,6 @@
#include "ctx.h"
#include <third-party/argparse/argparse.h>
#include <glib.h>
#include <locale.h>
#include "cli.h"
@@ -22,11 +21,12 @@
#define EPILOG "Made by simon987 <me@simon987.net>. Released under GPL-3.0"
static const char *const Version = "2.4.2";
static const char *const Version = "2.5.0";
static const char *const usage[] = {
"sist2 scan [OPTION]... PATH",
"sist2 index [OPTION]... INDEX",
"sist2 web [OPTION]... INDEX...",
"sist2 exec-script [OPTION]... INDEX",
NULL,
};
@@ -154,6 +154,13 @@ void initialize_scan_context(scan_args_t *args) {
strncpy(ScanCtx.index.desc.rewrite_url, args->rewrite_url, sizeof(ScanCtx.index.desc.rewrite_url));
ScanCtx.index.desc.root_len = (short) strlen(ScanCtx.index.desc.root);
ScanCtx.fast = args->fast;
// Raw
ScanCtx.raw_ctx.tn_qscale = args->quality;
ScanCtx.raw_ctx.tn_size = args->size;
ScanCtx.raw_ctx.log = _log;
ScanCtx.raw_ctx.logf = _logf;
ScanCtx.raw_ctx.store = _store;
}
@@ -287,6 +294,22 @@ void sist2_index(index_args_t *args) {
}
}
void sist2_exec_script(exec_args_t *args) {
LogCtx.verbose = TRUE;
char descriptor_path[PATH_MAX];
snprintf(descriptor_path, PATH_MAX, "%s/descriptor.json", args->index_path);
index_descriptor_t desc = read_index_descriptor(descriptor_path);
IndexCtx.es_url = args->es_url;
LOG_DEBUGF("main.c", "descriptor version %s (%s)", desc.version, desc.type)
execute_update_script(args->script, desc.uuid);
free(args->script);
}
void sist2_web(web_args_t *args) {
WebCtx.es_url = args->es_url;
@@ -323,10 +346,12 @@ int main(int argc, const char *argv[]) {
scan_args_t *scan_args = scan_args_create();
index_args_t *index_args = index_args_create();
web_args_t *web_args = web_args_create();
exec_args_t *exec_args = exec_args_create();
int arg_version = 0;
char *common_es_url = NULL;
char *common_script_path = NULL;
struct argparse_option options[] = {
OPT_HELP(),
@@ -366,7 +391,7 @@ int main(int argc, const char *argv[]) {
OPT_GROUP("Index options"),
OPT_STRING(0, "es-url", &common_es_url, "Elasticsearch url with port. DEFAULT=http://localhost:9200"),
OPT_BOOLEAN('p', "print", &index_args->print, "Just print JSON documents to stdout."),
OPT_STRING(0, "script-file", &index_args->script_path, "Path to user script."),
OPT_STRING(0, "script-file", &common_script_path, "Path to user script."),
OPT_INTEGER(0, "batch-size", &index_args->batch_size, "Index batch size. DEFAULT: 100"),
OPT_BOOLEAN('f', "force-reset", &index_args->force_reset, "Reset Elasticsearch mappings and settings. "
"(You must use this option the first time you use the index command)"),
@@ -376,6 +401,9 @@ int main(int argc, const char *argv[]) {
OPT_STRING(0, "bind", &web_args->listen_address, "Listen on this address. DEFAULT=localhost:4090"),
OPT_STRING(0, "auth", &web_args->credentials, "Basic auth in user:password format"),
OPT_GROUP("Exec-script options"),
OPT_STRING(0, "script-file", &common_script_path, "Path to user script."),
OPT_END(),
};
@@ -395,6 +423,8 @@ int main(int argc, const char *argv[]) {
web_args->es_url = common_es_url;
index_args->es_url = common_es_url;
index_args->script_path = common_script_path;
exec_args->script_path = common_script_path;
if (argc == 0) {
argparse_usage(&argparse);
@@ -423,6 +453,14 @@ int main(int argc, const char *argv[]) {
}
sist2_web(web_args);
} else if (strcmp(argv[0], "exec-script") == 0) {
int err = exec_args_validate(exec_args, argc, argv);
if (err != 0) {
goto end;
}
sist2_exec_script(exec_args);
} else {
fprintf(stderr, "Invalid command: '%s'\n", argv[0]);
argparse_usage(&argparse);
@@ -434,6 +472,7 @@ int main(int argc, const char *argv[]) {
scan_args_destroy(scan_args);
index_args_destroy(index_args);
web_args_destroy(web_args);
exec_args_destroy(exec_args);
return 0;
}

View File

@@ -3,7 +3,7 @@
#include "../sist.h"
#define MAJOR_MIME(mime_id) (mime_id & 0x00FF0000) >> 16
#define MAJOR_MIME(mime_id) (mime_id & 0x000F0000) >> 16
#define MIME_EMPTY 1
@@ -31,6 +31,9 @@
#define MARKUP_MASK 0x01000000
#define IS_MARKUP(mime_id) (mime_id & MARKUP_MASK) == MARKUP_MASK
#define RAW_MASK 0x00800000
#define IS_RAW(mime_id) (mime_id & RAW_MASK) == RAW_MASK
enum major_mime {
MimeInvalid = 0,
MimeModel = 1,

View File

@@ -315,127 +315,145 @@ enum mime {
image_webp=524595,
image_wmf=524596,
image_x_3ds=524597,
image_x_award_bioslogo=524598,
image_x_cmu_raster=524599,
image_x_cur=524600,
image_x_dwg=524601,
image_x_eps=524602,
image_x_exr=524603,
image_x_gem=524604,
image_x_icns=524605,
image_x_icon=524606 | 0x80000000,
image_x_jg=524607,
image_x_jps=524608,
image_x_ms_bmp=524609,
image_x_niff=524610,
image_x_pcx=524611,
image_x_pict=524612,
image_x_portable_bitmap=524613,
image_x_portable_graymap=524614,
image_x_portable_pixmap=524615,
image_x_quicktime=524616,
image_x_rgb=524617,
image_x_tga=524618,
image_x_tiff=524619,
image_x_win_bitmap=524620,
image_x_xcf=524621 | 0x80000000,
image_x_xpixmap=524622 | 0x80000000,
image_x_xwindowdump=524623,
message_news=196944,
message_rfc822=196945,
model_vnd_dwf=65874,
model_vnd_gdl=65875,
model_vnd_gs_gdl=65876,
model_vrml=65877,
model_x_pov=65878,
text_PGP=590167,
text_asp=590168,
text_css=590169,
text_html=590170 | 0x01000000,
text_javascript=590171,
text_mcf=590172,
text_pascal=590173,
text_plain=590174,
text_richtext=590175,
text_rtf=590176,
text_scriplet=590177,
text_tab_separated_values=590178,
text_troff=590179,
text_uri_list=590180,
text_vnd_abc=590181,
text_vnd_fmi_flexstor=590182,
text_vnd_wap_wml=590183,
text_vnd_wap_wmlscript=590184,
text_webviewhtml=590185,
text_x_Algol68=590186,
text_x_asm=590187,
text_x_audiosoft_intra=590188,
text_x_awk=590189,
text_x_bcpl=590190,
text_x_c=590191,
text_x_c__=590192,
text_x_component=590193,
text_x_diff=590194,
text_x_fortran=590195,
text_x_java=590196,
text_x_la_asf=590197,
text_x_lisp=590198,
text_x_m=590199,
text_x_m4=590200,
text_x_makefile=590201,
text_x_ms_regedit=590202,
text_x_msdos_batch=590203,
text_x_objective_c=590204,
text_x_pascal=590205,
text_x_perl=590206,
text_x_php=590207,
text_x_po=590208,
text_x_python=590209,
text_x_ruby=590210,
text_x_sass=590211,
text_x_scss=590212,
text_x_server_parsed_html=590213,
text_x_setext=590214,
text_x_sgml=590215 | 0x01000000,
text_x_shellscript=590216,
text_x_speech=590217,
text_x_tcl=590218,
text_x_tex=590219,
text_x_uil=590220,
text_x_uuencode=590221,
text_x_vcalendar=590222,
text_x_vcard=590223,
text_xml=590224 | 0x01000000,
video_MP2T=393617,
video_animaflex=393618,
video_avi=393619,
video_avs_video=393620,
video_mp4=393621,
video_mpeg=393622,
video_quicktime=393623,
video_vdo=393624,
video_vivo=393625,
video_vnd_rn_realvideo=393626,
video_vosaic=393627,
video_webm=393628,
video_x_amt_demorun=393629,
video_x_amt_showrun=393630,
video_x_atomic3d_feature=393631,
video_x_dl=393632,
video_x_dv=393633,
video_x_fli=393634,
video_x_flv=393635,
video_x_isvideo=393636,
video_x_jng=393637 | 0x80000000,
video_x_m4v=393638,
video_x_matroska=393639,
video_x_mng=393640,
video_x_motion_jpeg=393641,
video_x_ms_asf=393642,
video_x_msvideo=393643,
video_x_qtc=393644,
video_x_sgi_movie=393645,
x_epoc_x_sisx_app=721326,
image_x_adobe_dng=524598 | 0x00800000,
image_x_award_bioslogo=524599,
image_x_canon_cr2=524600 | 0x00800000,
image_x_canon_crw=524601 | 0x00800000,
image_x_cmu_raster=524602,
image_x_cur=524603,
image_x_dcraw=524604 | 0x00800000,
image_x_dwg=524605,
image_x_eps=524606,
image_x_epson_erf=524607 | 0x00800000,
image_x_exr=524608,
image_x_fuji_raf=524609 | 0x00800000,
image_x_gem=524610,
image_x_icns=524611,
image_x_icon=524612 | 0x80000000,
image_x_jg=524613,
image_x_jps=524614,
image_x_kodak_dcr=524615 | 0x00800000,
image_x_kodak_k25=524616 | 0x00800000,
image_x_kodak_kdc=524617 | 0x00800000,
image_x_minolta_mrw=524618 | 0x00800000,
image_x_ms_bmp=524619,
image_x_niff=524620,
image_x_nikon_nef=524621 | 0x00800000,
image_x_olympus_orf=524622 | 0x00800000,
image_x_panasonic_raw=524623 | 0x00800000,
image_x_pcx=524624,
image_x_pentax_pef=524625 | 0x00800000,
image_x_pict=524626,
image_x_portable_bitmap=524627,
image_x_portable_graymap=524628,
image_x_portable_pixmap=524629,
image_x_quicktime=524630,
image_x_rgb=524631,
image_x_sigma_x3f=524632 | 0x00800000,
image_x_sony_arw=524633 | 0x00800000,
image_x_sony_sr2=524634 | 0x00800000,
image_x_sony_srf=524635 | 0x00800000,
image_x_tga=524636,
image_x_tiff=524637,
image_x_win_bitmap=524638,
image_x_xcf=524639 | 0x80000000,
image_x_xpixmap=524640 | 0x80000000,
image_x_xwindowdump=524641,
message_news=196962,
message_rfc822=196963,
model_vnd_dwf=65892,
model_vnd_gdl=65893,
model_vnd_gs_gdl=65894,
model_vrml=65895,
model_x_pov=65896,
text_PGP=590185,
text_asp=590186,
text_css=590187,
text_html=590188 | 0x01000000,
text_javascript=590189,
text_mcf=590190,
text_pascal=590191,
text_plain=590192,
text_richtext=590193,
text_rtf=590194,
text_scriplet=590195,
text_tab_separated_values=590196,
text_troff=590197,
text_uri_list=590198,
text_vnd_abc=590199,
text_vnd_fmi_flexstor=590200,
text_vnd_wap_wml=590201,
text_vnd_wap_wmlscript=590202,
text_webviewhtml=590203,
text_x_Algol68=590204,
text_x_asm=590205,
text_x_audiosoft_intra=590206,
text_x_awk=590207,
text_x_bcpl=590208,
text_x_c=590209,
text_x_c__=590210,
text_x_component=590211,
text_x_diff=590212,
text_x_fortran=590213,
text_x_java=590214,
text_x_la_asf=590215,
text_x_lisp=590216,
text_x_m=590217,
text_x_m4=590218,
text_x_makefile=590219,
text_x_ms_regedit=590220,
text_x_msdos_batch=590221,
text_x_objective_c=590222,
text_x_pascal=590223,
text_x_perl=590224,
text_x_php=590225,
text_x_po=590226,
text_x_python=590227,
text_x_ruby=590228,
text_x_sass=590229,
text_x_scss=590230,
text_x_server_parsed_html=590231,
text_x_setext=590232,
text_x_sgml=590233 | 0x01000000,
text_x_shellscript=590234,
text_x_speech=590235,
text_x_tcl=590236,
text_x_tex=590237,
text_x_uil=590238,
text_x_uuencode=590239,
text_x_vcalendar=590240,
text_x_vcard=590241,
text_xml=590242 | 0x01000000,
video_MP2T=393635,
video_animaflex=393636,
video_avi=393637,
video_avs_video=393638,
video_mp4=393639,
video_mpeg=393640,
video_quicktime=393641,
video_vdo=393642,
video_vivo=393643,
video_vnd_rn_realvideo=393644,
video_vosaic=393645,
video_webm=393646,
video_x_amt_demorun=393647,
video_x_amt_showrun=393648,
video_x_atomic3d_feature=393649,
video_x_dl=393650,
video_x_dv=393651,
video_x_fli=393652,
video_x_flv=393653,
video_x_isvideo=393654,
video_x_jng=393655 | 0x80000000,
video_x_m4v=393656,
video_x_matroska=393657,
video_x_mng=393658,
video_x_motion_jpeg=393659,
video_x_ms_asf=393660,
video_x_msvideo=393661,
video_x_qtc=393662,
video_x_sgi_movie=393663,
x_epoc_x_sisx_app=721344,
};
char *mime_get_mime_text(unsigned int mime_id) {switch (mime_id) {
case application_arj: return "application/arj";
@@ -868,6 +886,24 @@ case video_x_sgi_movie: return "video/x-sgi-movie";
case x_epoc_x_sisx_app: return "x-epoc/x-sisx-app";
case application_x_zstd_dictionary: return "application/x-zstd-dictionary";
case application_vnd_ms_outlook: return "application/vnd.ms-outlook";
case image_x_olympus_orf: return "image/x-olympus-orf";
case image_x_nikon_nef: return "image/x-nikon-nef";
case image_x_fuji_raf: return "image/x-fuji-raf";
case image_x_panasonic_raw: return "image/x-panasonic-raw";
case image_x_adobe_dng: return "image/x-adobe-dng";
case image_x_canon_cr2: return "image/x-canon-cr2";
case image_x_canon_crw: return "image/x-canon-crw";
case image_x_dcraw: return "image/x-dcraw";
case image_x_kodak_dcr: return "image/x-kodak-dcr";
case image_x_kodak_k25: return "image/x-kodak-k25";
case image_x_kodak_kdc: return "image/x-kodak-kdc";
case image_x_minolta_mrw: return "image/x-minolta-mrw";
case image_x_pentax_pef: return "image/x-pentax-pef";
case image_x_sigma_x3f: return "image/x-sigma-x3f";
case image_x_sony_arw: return "image/x-sony-arw";
case image_x_sony_sr2: return "image/x-sony-sr2";
case image_x_sony_srf: return "image/x-sony-srf";
case image_x_epson_erf: return "image/x-epson-erf";
default: return NULL;}}
GHashTable *mime_get_ext_table() {GHashTable *ext_table = g_hash_table_new(g_str_hash, g_str_equal);
g_hash_table_insert(ext_table, "arj", (gpointer)application_arj);
@@ -1000,7 +1036,6 @@ g_hash_table_insert(ext_table, "cpio", (gpointer)application_x_cpio);
g_hash_table_insert(ext_table, "dbf", (gpointer)application_x_dbf);
g_hash_table_insert(ext_table, "deb", (gpointer)application_x_debian_package);
g_hash_table_insert(ext_table, "deepv", (gpointer)application_x_deepv);
g_hash_table_insert(ext_table, "dcr", (gpointer)application_x_director);
g_hash_table_insert(ext_table, "dir", (gpointer)application_x_director);
g_hash_table_insert(ext_table, "dxr", (gpointer)application_x_director);
g_hash_table_insert(ext_table, "dmp", (gpointer)application_x_dmp);
@@ -1389,6 +1424,24 @@ g_hash_table_insert(ext_table, "qtc", (gpointer)video_x_qtc);
g_hash_table_insert(ext_table, "movie", (gpointer)video_x_sgi_movie);
g_hash_table_insert(ext_table, "mv", (gpointer)video_x_sgi_movie);
g_hash_table_insert(ext_table, "msg", (gpointer)application_vnd_ms_outlook);
g_hash_table_insert(ext_table, "orf", (gpointer)image_x_olympus_orf);
g_hash_table_insert(ext_table, "nef", (gpointer)image_x_nikon_nef);
g_hash_table_insert(ext_table, "raf", (gpointer)image_x_fuji_raf);
g_hash_table_insert(ext_table, "rw2", (gpointer)image_x_panasonic_raw);
g_hash_table_insert(ext_table, "raw", (gpointer)image_x_panasonic_raw);
g_hash_table_insert(ext_table, "dng", (gpointer)image_x_adobe_dng);
g_hash_table_insert(ext_table, "cr2", (gpointer)image_x_canon_cr2);
g_hash_table_insert(ext_table, "crw", (gpointer)image_x_canon_crw);
g_hash_table_insert(ext_table, "dcr", (gpointer)image_x_kodak_dcr);
g_hash_table_insert(ext_table, "k25", (gpointer)image_x_kodak_k25);
g_hash_table_insert(ext_table, "kdc", (gpointer)image_x_kodak_kdc);
g_hash_table_insert(ext_table, "mrw", (gpointer)image_x_minolta_mrw);
g_hash_table_insert(ext_table, "pef", (gpointer)image_x_pentax_pef);
g_hash_table_insert(ext_table, "xf3", (gpointer)image_x_sigma_x3f);
g_hash_table_insert(ext_table, "arw", (gpointer)image_x_sony_arw);
g_hash_table_insert(ext_table, "sr2", (gpointer)image_x_sony_sr2);
g_hash_table_insert(ext_table, "srf", (gpointer)image_x_sony_srf);
g_hash_table_insert(ext_table, "erf", (gpointer)image_x_epson_erf);
return ext_table;}
GHashTable *mime_get_mime_table() {GHashTable *mime_table = g_hash_table_new(g_str_hash, g_str_equal);
g_hash_table_insert(mime_table, "application/arj", (gpointer)application_arj);
@@ -1821,5 +1874,23 @@ g_hash_table_insert(mime_table, "video/x-sgi-movie", (gpointer)video_x_sgi_movie
g_hash_table_insert(mime_table, "x-epoc/x-sisx-app", (gpointer)x_epoc_x_sisx_app);
g_hash_table_insert(mime_table, "application/x-zstd-dictionary", (gpointer)application_x_zstd_dictionary);
g_hash_table_insert(mime_table, "application/vnd.ms-outlook", (gpointer)application_vnd_ms_outlook);
g_hash_table_insert(mime_table, "image/x-olympus-orf", (gpointer)image_x_olympus_orf);
g_hash_table_insert(mime_table, "image/x-nikon-nef", (gpointer)image_x_nikon_nef);
g_hash_table_insert(mime_table, "image/x-fuji-raf", (gpointer)image_x_fuji_raf);
g_hash_table_insert(mime_table, "image/x-panasonic-raw", (gpointer)image_x_panasonic_raw);
g_hash_table_insert(mime_table, "image/x-adobe-dng", (gpointer)image_x_adobe_dng);
g_hash_table_insert(mime_table, "image/x-canon-cr2", (gpointer)image_x_canon_cr2);
g_hash_table_insert(mime_table, "image/x-canon-crw", (gpointer)image_x_canon_crw);
g_hash_table_insert(mime_table, "image/x-dcraw", (gpointer)image_x_dcraw);
g_hash_table_insert(mime_table, "image/x-kodak-dcr", (gpointer)image_x_kodak_dcr);
g_hash_table_insert(mime_table, "image/x-kodak-k25", (gpointer)image_x_kodak_k25);
g_hash_table_insert(mime_table, "image/x-kodak-kdc", (gpointer)image_x_kodak_kdc);
g_hash_table_insert(mime_table, "image/x-minolta-mrw", (gpointer)image_x_minolta_mrw);
g_hash_table_insert(mime_table, "image/x-pentax-pef", (gpointer)image_x_pentax_pef);
g_hash_table_insert(mime_table, "image/x-sigma-x3f", (gpointer)image_x_sigma_x3f);
g_hash_table_insert(mime_table, "image/x-sony-arw", (gpointer)image_x_sony_arw);
g_hash_table_insert(mime_table, "image/x-sony-sr2", (gpointer)image_x_sony_sr2);
g_hash_table_insert(mime_table, "image/x-sony-srf", (gpointer)image_x_sony_srf);
g_hash_table_insert(mime_table, "image/x-epson-erf", (gpointer)image_x_epson_erf);
return mime_table;}
#endif

View File

@@ -119,6 +119,8 @@ void parse(void *arg) {
if (!(SHOULD_PARSE(doc.mime))) {
} else if (IS_RAW(doc.mime)) {
parse_raw(&ScanCtx.raw_ctx, &job->vfile, &doc);
} else if ((mmime == MimeVideo && doc.size >= MIN_VIDEO_SIZE) ||
(mmime == MimeImage && doc.size >= MIN_IMAGE_SIZE) || mmime == MimeAudio) {

View File

@@ -87,6 +87,7 @@ function shouldDisplayRawImage(hit) {
hit["_source"]["mime"] &&
!hit["_source"]["parent"] &&
hit["_source"]["videoc"] !== "tiff" &&
hit["_source"]["videoc"] !== "raw" &&
hit["_source"]["videoc"] !== "ppm";
}

View File

@@ -250,12 +250,13 @@ function addTag(map, tag, id, count) {
let child = {
id: id,
text: tags.length !== 1 ? tags[0] : `${tags[0]} (${count})`,
name: tags[0],
children: []
};
let found = false;
map.forEach(node => {
if (node.text === child.text) {
if (node.name === child.name) {
found = true;
if (tags.length !== 1) {
addTag(node.children, tags.slice(1).join("."), id, count);

View File

@@ -11,7 +11,7 @@
<nav class="navbar navbar-expand-lg">
<a class="navbar-brand" href="/">sist2</a>
<span class="badge badge-pill version">2.4.2</span>
<span class="badge badge-pill version">2.5.0</span>
<span class="tagline">Lightning-fast file system indexer and search tool </span>
<a class="btn ml-auto" href="/stats">Stats</a>
<button class="btn" type="button" data-toggle="modal" data-target="#settings" onclick="loadSettings()">Settings</button>

View File

@@ -10,7 +10,7 @@
<nav class="navbar navbar-expand-lg">
<a class="navbar-brand" href="/">sist2</a>
<span class="badge badge-pill version">2.4.2</span>
<span class="badge badge-pill version">2.5.0</span>
<span class="tagline">Lightning-fast file system indexer and search tool </span>
<a style="margin-left: auto" class="btn" href="/">Back</a>
<button class="btn" type="button" data-toggle="modal" data-target="#settings"

View File

@@ -68,6 +68,7 @@ void stats(struct mg_connection *nc) {
void stats_files(struct mg_connection *nc, struct http_message *hm, struct mg_str *path) {
if (path->len != UUID_STR_LEN + 4) {
mg_http_send_error(nc, 404, NULL);
nc->flags |= MG_F_SEND_AND_CLOSE;
return;
}
@@ -78,6 +79,7 @@ void stats_files(struct mg_connection *nc, struct http_message *hm, struct mg_st
index_t *index = get_index_by_id(arg_uuid);
if (index == NULL) {
mg_http_send_error(nc, 404, NULL);
nc->flags |= MG_F_SEND_AND_CLOSE;
return;
}
@@ -173,6 +175,7 @@ void thumbnail(struct mg_connection *nc, struct http_message *hm, struct mg_str
if (path->len != UUID_STR_LEN * 2 + 2) {
LOG_DEBUGF("serve.c", "Invalid thumbnail path: %.*s", (int) path->len, path->p)
mg_http_send_error(nc, 404, NULL);
nc->flags |= MG_F_SEND_AND_CLOSE;
return;
}
@@ -189,6 +192,7 @@ void thumbnail(struct mg_connection *nc, struct http_message *hm, struct mg_str
int ret = uuid_parse(arg_uuid, uuid);
if (ret != 0) {
LOG_DEBUGF("serve.c", "Invalid thumbnail UUID: %s", arg_uuid)
mg_http_send_error(nc, 404, NULL);
nc->flags |= MG_F_SEND_AND_CLOSE;
return;
}
@@ -196,6 +200,7 @@ void thumbnail(struct mg_connection *nc, struct http_message *hm, struct mg_str
store_t *store = get_store(arg_index);
if (store == NULL) {
LOG_DEBUGF("serve.c", "Could not get store for index: %s", arg_index)
mg_http_send_error(nc, 404, NULL);
nc->flags |= MG_F_SEND_AND_CLOSE;
return;
}
@@ -214,6 +219,7 @@ void search(struct mg_connection *nc, struct http_message *hm) {
if (hm->body.len == 0) {
LOG_DEBUG("serve.c", "Client sent empty body, ignoring request")
mg_http_send_error(nc, 500, NULL);
nc->flags |= MG_F_SEND_AND_CLOSE;
return;
}
@@ -314,6 +320,7 @@ void document_info(struct mg_connection *nc, struct http_message *hm, struct mg_
if (path->len != UUID_STR_LEN + 2) {
LOG_DEBUGF("serve.c", "Invalid document_info path: %.*s", (int) path->len, path->p)
mg_http_send_error(nc, 404, NULL);
nc->flags |= MG_F_SEND_AND_CLOSE;
return;
}
@@ -328,6 +335,7 @@ void document_info(struct mg_connection *nc, struct http_message *hm, struct mg_
cJSON *index_id = cJSON_GetObjectItem(source, "index");
if (index_id == NULL) {
cJSON_Delete(doc);
mg_http_send_error(nc, 404, NULL);
nc->flags |= MG_F_SEND_AND_CLOSE;
return;
}
@@ -335,6 +343,7 @@ void document_info(struct mg_connection *nc, struct http_message *hm, struct mg_
index_t *idx = get_index_by_id(index_id->valuestring);
if (idx == NULL) {
cJSON_Delete(doc);
mg_http_send_error(nc, 404, NULL);
nc->flags |= MG_F_SEND_AND_CLOSE;
return;
}
@@ -352,6 +361,7 @@ void file(struct mg_connection *nc, struct http_message *hm, struct mg_str *path
if (path->len != UUID_STR_LEN + 2) {
LOG_DEBUGF("serve.c", "Invalid file path: %.*s", (int) path->len, path->p)
mg_http_send_error(nc, 404, NULL);
nc->flags |= MG_F_SEND_AND_CLOSE;
return;
}
@@ -371,6 +381,7 @@ void file(struct mg_connection *nc, struct http_message *hm, struct mg_str *path
index_id = cJSON_GetObjectItem(source, "index");
if (index_id == NULL) {
cJSON_Delete(doc);
mg_http_send_error(nc, 404, NULL);
nc->flags |= MG_F_SEND_AND_CLOSE;
return;
}
@@ -386,6 +397,7 @@ void file(struct mg_connection *nc, struct http_message *hm, struct mg_str *path
if (idx == NULL) {
cJSON_Delete(doc);
nc->flags |= MG_F_SEND_AND_CLOSE;
mg_http_send_error(nc, 404, NULL);
return;
}
@@ -423,6 +435,7 @@ static void ev_router(struct mg_connection *nc, int ev, void *p) {
struct http_message *hm = (struct http_message *) p;
if (mg_parse_uri(hm->uri, &scheme, &user_info, &host, &port, &path, &query, &fragment) != 0) {
mg_http_send_error(nc, 400, NULL);
nc->flags |= MG_F_SEND_AND_CLOSE;
return;
}
@@ -469,6 +482,7 @@ static void ev_router(struct mg_connection *nc, int ev, void *p) {
} else if (has_prefix(&path, &((struct mg_str) MG_MK_STR("/d/")))) {
document_info(nc, hm, &path);
} else {
mg_http_send_error(nc, 404, NULL);
nc->flags |= MG_F_SEND_AND_CLOSE;
}
@@ -499,7 +513,7 @@ static void ev_router(struct mg_connection *nc, int ev, void *p) {
free(json_str);
free(tmp);
}
//todo return error code
mg_http_send_error(nc, 500, NULL);
}
free_response(r);

File diff suppressed because one or more lines are too long