Compare commits

...

32 Commits

Author SHA1 Message Date
4b4ab12fac Version bump 2020-09-22 21:08:24 -04:00
ae283f77ad Fix #112 2020-09-22 21:08:24 -04:00
d3bd53a5ea Fix arm Dockerfile 2020-09-13 16:16:26 -04:00
f7887f24d1 sync libscan 2020-09-13 16:16:26 -04:00
5c8de19188 Update build instructions 2020-09-13 16:16:26 -04:00
d861d278a4 version bump 2020-09-13 16:16:26 -04:00
b6ddeee0e0 Use async curl for ES requests #108 2020-09-13 16:16:26 -04:00
0cd2523b05 arm64 build 2020-09-13 16:16:26 -04:00
5e798f9367 Update issue-template.md 2020-09-13 10:29:42 -04:00
5da6c1488b Handle null mime in document info dialog 2020-08-29 10:34:58 -04:00
9568e25f84 Fix #99 2020-08-29 10:17:28 -04:00
6a8027789a Limited support for UTF16 2020-08-29 10:17:28 -04:00
b1d16d8abf Fix #100 2020-08-29 10:17:28 -04:00
b2a157e24d Update docs 2020-08-25 10:38:38 -04:00
9aead9389a Fix typo in elastic.c 2020-08-25 10:38:38 -04:00
a32c68cba8 Build fixes 2020-08-25 10:38:38 -04:00
d116cf9d91 Default index for web & exec 2020-08-25 10:38:38 -04:00
Andrew
a020a8b32c Update USAGE.md
Fix link to scripting.
2020-08-25 10:38:38 -04:00
5d5d9c3092 Fix heap buffer overflow warning 2020-08-25 10:38:38 -04:00
3379d5ce71 Fix #97 2020-08-25 10:38:38 -04:00
a0ff4a1f01 Fix heap buffer overflow warning 2020-08-25 10:38:38 -04:00
4589f3bde7 Fix #94 2020-08-25 10:38:38 -04:00
1c898640cf Fix #88 2020-08-25 10:38:38 -04:00
a0739d5177 Fix #92 2020-08-25 10:38:38 -04:00
8f9d29dbc6 Fix #91 2020-08-25 10:38:38 -04:00
3ff4b70223 Update README.md 2020-08-25 10:38:38 -04:00
02ad035b09 Workaround when first ebook page is blank 2020-08-25 10:38:38 -04:00
c11feb213d Gracefully handle archive errors in comic.c 2020-08-25 10:38:38 -04:00
72902947cd Fix for #90 2020-08-25 10:38:38 -04:00
a18bb81222 remove warning 2020-08-25 10:38:38 -04:00
1520288f19 Fix #89 2020-08-25 10:38:38 -04:00
e507de194b Fix log colors 2020-08-25 10:38:38 -04:00
26 changed files with 309 additions and 146 deletions

View File

@@ -9,7 +9,7 @@ assignees: ''
sist2 version:
Platform (Linux or Docker):
Platform (Linux or Docker, x86-64 or arm64):
Elasticsearch version:

22
DockerArm64/Dockerfile Normal file
View File

@@ -0,0 +1,22 @@
FROM ubuntu:19.10
MAINTAINER simon987 <me@simon987.net>
RUN apt update
RUN apt install -y libglib2.0-0 libcurl4 libmagic1 libharfbuzz-bin libopenjp2-7 libarchive13 liblzma5 libzstd1 liblz4-1 \
curl libtiff5 libpng16-16 libpcre3
RUN mkdir -p /usr/share/tessdata && \
cd /usr/share/tessdata/ && \
curl -o /usr/share/tessdata/hin.traineddata https://raw.githubusercontent.com/tesseract-ocr/tessdata/master/hin.traineddata &&\
curl -o /usr/share/tessdata/jpn.traineddata https://raw.githubusercontent.com/tesseract-ocr/tessdata/master/jpn.traineddata &&\
curl -o /usr/share/tessdata/eng.traineddata https://raw.githubusercontent.com/tesseract-ocr/tessdata/master/eng.traineddata &&\
curl -o /usr/share/tessdata/fra.traineddata https://raw.githubusercontent.com/tesseract-ocr/tessdata/master/fra.traineddata &&\
curl -o /usr/share/tessdata/rus.traineddata https://raw.githubusercontent.com/tesseract-ocr/tessdata/master/rus.traineddata &&\
curl -o /usr/share/tessdata/spa.traineddata https://raw.githubusercontent.com/tesseract-ocr/tessdata/master/spa.traineddata && ls -lh
ADD sist2_arm64 /root/sist2
ENV LANG C.UTF-8
ENV LC_ALL C.UTF-8
ENTRYPOINT ["/root/sist2"]

13
DockerArm64/build.sh Executable file
View File

@@ -0,0 +1,13 @@
rm ./sist2_arm64
cp ../sist2_arm64.gz .
gzip -d sist2_arm64.gz
version=$(./sist2_arm64 --version)
echo "Version ${version}"
docker build . -t simon987/sist2-arm64:"${version}" -t simon987/sist2-arm64:latest
docker push simon987/sist2-arm64:"${version}"
docker push simon987/sist2-arm64:latest
docker run --rm simon987/sist2-arm64 -v

View File

@@ -2,6 +2,8 @@
[![CodeFactor](https://www.codefactor.io/repository/github/simon987/sist2/badge?s=05daa325188aac4eae32c786f3d9cf4e0593f822)](https://www.codefactor.io/repository/github/simon987/sist2)
[![Development snapshots](https://ci.simon987.net/app/rest/builds/buildType(Sist2_Build)/statusIcon)](https://files.simon987.net/artifacts/Sist2/Build/)
**Demo**: [sist2.simon987.net](https://sist2.simon987.net/)
# sist2
sist2 (Simple incremental search tool)
@@ -124,7 +126,7 @@ binaries (GCC 7+ required).
1. Install compile-time dependencies
```bash
vcpkg install lmdb cjson glib libarchive[core,bzip2,libxml2,lz4,lzma,lzo] pthread tesseract libxml2 ffmpeg zstd gtest mongoose libuuid libmagic libraw
vcpkg install lmdb cjson glib libarchive[core,bzip2,libxml2,lz4,lzma,lzo] pthread tesseract libxml2 ffmpeg zstd gtest mongoose libuuid libmagic libraw curl[core,ssl] jbig2dec brotli libmupdf
```
2. Build

12
ci/build_arm64.sh Executable file
View File

@@ -0,0 +1,12 @@
#!/usr/bin/env bash
VCPKG_ROOT="/vcpkg"
rm *.gz
rm -rf CMakeFiles CMakeCache.txt
cmake -DSIST_DEBUG=off -DCMAKE_TOOLCHAIN_FILE="${VCPKG_ROOT}/scripts/buildsystems/vcpkg.cmake" .
make -j 4
strip sist2
mv sist2 sist2_arm64
gzip -9 sist2_arm64

View File

@@ -49,6 +49,7 @@ Scan options
Index options
-t, --threads=<int> Number of threads. DEFAULT=1
--es-url=<str> Elasticsearch url with port. DEFAULT=http://localhost:9200
--es-index=<str> Elasticsearch index name. DEFAULT=sist2
-p, --print Just print JSON documents to stdout.
--script-file=<str> Path to user script.
--async-script Execute user script asynchronously.
@@ -57,11 +58,14 @@ Index options
Web options
--es-url=<str> Elasticsearch url. DEFAULT=http://localhost:9200
--es-index=<str> Elasticsearch index name. DEFAULT=sist2
--bind=<str> Listen on this address. DEFAULT=localhost:4090
--auth=<str> Basic auth in user:password format
--tag-auth=<str> Basic auth in user:password format for tagging
Exec-script options
--es-url=<str> Elasticsearch url. DEFAULT=http://localhost:9200
--es-index=<str> Elasticsearch index name. DEFAULT=sist2
--script-file=<str> Path to user script.
--async-script Execute user script asynchronously.
Made by simon987 <me@simon987.net>. Released under GPL-3.0
@@ -241,6 +245,8 @@ it is currently unsupported and has no guaranties of back/forward compatibility.
* `--es-url`
Elasticsearch url and port. If you are using docker, make sure that both containers are on the
same network.
* `--es-index`
Elasticsearch index name. DEFAULT=sist2
* `-p, --print`
Print index in JSON format to stdout.
* `--script-file`
@@ -278,6 +284,8 @@ sist2 index --print ./my_index/ | jq | less
### Web options
* `--es-url=<str>` Elasticsearch url.
* `--es-index`
Elasticsearch index name. DEFAULT=sist2
* `--bind=<str>` Listen on this address.
* `--auth=<str>` Basic auth in user:password format
* `--tag-auth=<str>` Basic auth in user:password format. Works the same way as the
@@ -340,4 +348,4 @@ See [Automatic tagging](#automatic-tagging) for information about tag
### Automatic tagging
See [scripting](docs/scripting.md) documentation.
See [scripting](scripting.md) documentation.

View File

@@ -54,6 +54,10 @@
"type": "integer",
"index": false
},
"pages": {
"type": "integer",
"index": false
},
"mtime": {
"type": "integer"
},

View File

@@ -9,6 +9,7 @@
#define DEFAULT_REWRITE_URL ""
#define DEFAULT_ES_URL "http://localhost:9200"
#define DEFAULT_ES_INDEX "sist2"
#define DEFAULT_BATCH_SIZE 100
#define DEFAULT_LISTEN_ADDRESS "localhost:4090"
@@ -287,6 +288,10 @@ int index_args_validate(index_args_t *args, int argc, const char **argv) {
args->es_url = DEFAULT_ES_URL;
}
if (args->es_index == NULL) {
args->es_index = DEFAULT_ES_INDEX;
}
if (args->script_path != NULL) {
if (load_script(args->script_path, &args->script) != 0) {
return 1;
@@ -298,6 +303,7 @@ int index_args_validate(index_args_t *args, int argc, const char **argv) {
}
LOG_DEBUGF("cli.c", "arg es_url=%s", args->es_url)
LOG_DEBUGF("cli.c", "arg es_index=%s", args->es_index)
LOG_DEBUGF("cli.c", "arg index_path=%s", args->index_path)
LOG_DEBUGF("cli.c", "arg script_path=%s", args->script_path)
LOG_DEBUGF("cli.c", "arg async_script=%s", args->async_script)
@@ -326,6 +332,10 @@ int web_args_validate(web_args_t *args, int argc, const char **argv) {
args->listen_address = DEFAULT_LISTEN_ADDRESS;
}
if (args->es_index == NULL) {
args->es_index = DEFAULT_ES_INDEX;
}
if (args->credentials != NULL) {
char *ptr = strstr(args->credentials, ":");
if (ptr == NULL) {
@@ -383,6 +393,7 @@ int web_args_validate(web_args_t *args, int argc, const char **argv) {
}
LOG_DEBUGF("cli.c", "arg es_url=%s", args->es_url)
LOG_DEBUGF("cli.c", "arg es_index=%s", args->es_index)
LOG_DEBUGF("cli.c", "arg listen=%s", args->listen_address)
LOG_DEBUGF("cli.c", "arg credentials=%s", args->credentials)
LOG_DEBUGF("cli.c", "arg tag_credentials=%s", args->tag_credentials)
@@ -426,6 +437,10 @@ int exec_args_validate(exec_args_t *args, int argc, const char **argv) {
args->es_url = DEFAULT_ES_URL;
}
if (args->es_index == NULL) {
args->es_index = DEFAULT_ES_INDEX;
}
if (args->script_path == NULL) {
LOG_FATAL("cli.c", "--script-file argument is required");
}

View File

@@ -35,6 +35,7 @@ int scan_args_validate(scan_args_t *args, int argc, const char **argv);
typedef struct index_args {
char *es_url;
char *es_index;
const char *index_path;
const char *script_path;
char *script;
@@ -47,6 +48,7 @@ typedef struct index_args {
typedef struct web_args {
char *es_url;
char *es_index;
char *listen_address;
char *credentials;
char *tag_credentials;
@@ -60,6 +62,7 @@ typedef struct web_args {
typedef struct exec_args {
char *es_url;
char *es_index;
const char *index_path;
const char *script_path;
int async_script;

View File

@@ -58,6 +58,7 @@ typedef struct {
typedef struct {
char *es_url;
char *es_index;
int batch_size;
tpool_t *pool;
store_t *tag_store;
@@ -66,6 +67,7 @@ typedef struct {
typedef struct {
char *es_url;
char *es_index;
int index_count;
char *auth_user;
char *auth_pass;

View File

@@ -9,6 +9,7 @@
typedef struct es_indexer {
int queued;
char *es_url;
char *es_index;
es_bulk_line_t *line_head;
es_bulk_line_t *line_tail;
} es_indexer_t;
@@ -17,11 +18,13 @@ typedef struct es_indexer {
static __thread es_indexer_t *Indexer;
void delete_queue(int max);
void elastic_flush();
void elastic_cleanup() {
elastic_flush();
if (Indexer != NULL) {
free(Indexer->es_index);
free(Indexer->es_url);
free(Indexer);
}
@@ -32,7 +35,7 @@ void print_json(cJSON *document, const char uuid_str[UUID_STR_LEN]) {
cJSON *line = cJSON_CreateObject();
cJSON_AddStringToObject(line, "_id", uuid_str);
cJSON_AddStringToObject(line, "_index", "sist2");
cJSON_AddStringToObject(line, "_index", IndexCtx.es_index);
cJSON_AddStringToObject(line, "_type", "_doc");
cJSON_AddItemReferenceToObject(line, "_source", document);
@@ -67,7 +70,7 @@ void index_json(cJSON *document, const char uuid_str[UUID_STR_LEN]) {
void execute_update_script(const char *script, int async, const char index_id[UUID_STR_LEN]) {
if (Indexer == NULL) {
Indexer = create_indexer(IndexCtx.es_url);
Indexer = create_indexer(IndexCtx.es_url, IndexCtx.es_index);
}
cJSON *body = cJSON_CreateObject();
@@ -83,9 +86,10 @@ void execute_update_script(const char *script, int async, const char index_id[UU
char bulk_url[4096];
if (async) {
snprintf(bulk_url, sizeof(bulk_url), "%s/sist2/_update_by_query?wait_for_completion=false", Indexer->es_url);
snprintf(bulk_url, sizeof(bulk_url), "%s/%s/_update_by_query?wait_for_completion=false", Indexer->es_url,
Indexer->es_index);
} else {
snprintf(bulk_url, sizeof(bulk_url), "%s/sist2/_update_by_query", Indexer->es_url);
snprintf(bulk_url, sizeof(bulk_url), "%s/%s/_update_by_query", Indexer->es_url, Indexer->es_index);
}
response_t *r = web_post(bulk_url, str);
if (!async) {
@@ -113,8 +117,6 @@ void execute_update_script(const char *script, int async, const char index_id[UU
cJSON_Delete(resp);
}
#define ACTION_STR_LEN 91
void *create_bulk_buffer(int max, int *count, size_t *buf_len) {
es_bulk_line_t *line = Indexer->line_head;
*count = 0;
@@ -126,20 +128,24 @@ void *create_bulk_buffer(int max, int *count, size_t *buf_len) {
while (line != NULL && *count < max) {
char action_str[256];
snprintf(action_str, 256,
"{\"index\":{\"_id\":\"%s\", \"_type\":\"_doc\", \"_index\":\"sist2\"}}\n", line->uuid_str);
snprintf(
action_str, 256,
"{\"index\":{\"_id\":\"%s\",\"_type\":\"_doc\",\"_index\":\"%s\"}}\n",
line->uuid_str, Indexer->es_index
);
size_t action_str_len = strlen(action_str);
size_t line_len = strlen(line->line);
while (buf_size + line_len + ACTION_STR_LEN > buf_capacity) {
while (buf_size + line_len + action_str_len > buf_capacity) {
buf_capacity *= 2;
buf = realloc(buf, buf_capacity);
}
buf_size += line_len + ACTION_STR_LEN;
buf_size += line_len + action_str_len;
memcpy(buf + buf_cur, action_str, ACTION_STR_LEN);
buf_cur += ACTION_STR_LEN;
memcpy(buf + buf_cur, action_str, action_str_len);
buf_cur += action_str_len;
memcpy(buf + buf_cur, line->line, line_len);
buf_cur += line_len;
@@ -177,6 +183,21 @@ void print_errors(response_t *r) {
free(tmp);
}
void print_error(response_t *r) {
char *tmp = malloc(r->size + 1);
memcpy(tmp, r->body, r->size);
*(tmp + r->size) = '\0';
cJSON *ret_json = cJSON_Parse(tmp);
if (cJSON_GetObjectItem(ret_json, "error") != NULL) {
char *str = cJSON_Print(cJSON_GetObjectItem(ret_json, "error"));
LOG_ERRORF("elastic.c", "%s\n", str);
cJSON_free(str);
}
cJSON_Delete(ret_json);
free(tmp);
}
void _elastic_flush(int max) {
if (max == 0) {
@@ -189,7 +210,7 @@ void _elastic_flush(int max) {
void *buf = create_bulk_buffer(max, &count, &buf_len);
char bulk_url[4096];
snprintf(bulk_url, 4096, "%s/sist2/_bulk?pipeline=tie", Indexer->es_url);
snprintf(bulk_url, sizeof(bulk_url), "%s/%s/_bulk?pipeline=tie", Indexer->es_url, Indexer->es_index);
response_t *r = web_post(bulk_url, buf);
if (r->status_code == 0) {
@@ -259,7 +280,7 @@ void delete_queue(int max) {
void elastic_flush() {
if (Indexer == NULL) {
Indexer = create_indexer(IndexCtx.es_url);
Indexer = create_indexer(IndexCtx.es_url, IndexCtx.es_index);
}
_elastic_flush(Indexer->queued);
@@ -268,7 +289,7 @@ void elastic_flush() {
void elastic_index_line(es_bulk_line_t *line) {
if (Indexer == NULL) {
Indexer = create_indexer(IndexCtx.es_url);
Indexer = create_indexer(IndexCtx.es_url, IndexCtx.es_index);
}
if (Indexer->line_head == NULL) {
@@ -286,14 +307,18 @@ void elastic_index_line(es_bulk_line_t *line) {
}
}
es_indexer_t *create_indexer(const char *url) {
es_indexer_t *create_indexer(const char *url, const char *index) {
char *es_url = malloc(strlen(url) + 1);
strcpy(es_url, url);
char *es_index = malloc(strlen(index) + 1);
strcpy(es_index, index);
es_indexer_t *indexer = malloc(sizeof(es_indexer_t));
indexer->es_url = es_url;
indexer->es_index = es_index;
indexer->queued = 0;
indexer->line_head = NULL;
indexer->line_tail = NULL;
@@ -305,7 +330,7 @@ void finish_indexer(char *script, int async_script, char *index_id) {
char url[4096];
snprintf(url, sizeof(url), "%s/sist2/_refresh", IndexCtx.es_url);
snprintf(url, sizeof(url), "%s/%s/_refresh", IndexCtx.es_url, IndexCtx.es_index);
response_t *r = web_post(url, "");
LOG_INFOF("elastic.c", "Refresh index <%d>", r->status_code);
free_response(r);
@@ -314,18 +339,18 @@ void finish_indexer(char *script, int async_script, char *index_id) {
execute_update_script(script, async_script, index_id);
free(script);
snprintf(url, sizeof(url), "%s/sist2/_refresh", IndexCtx.es_url);
snprintf(url, sizeof(url), "%s/%s/_refresh", IndexCtx.es_url, IndexCtx.es_index);
r = web_post(url, "");
LOG_INFOF("elastic.c", "Refresh index <%d>", r->status_code);
free_response(r);
}
snprintf(url, sizeof(url), "%s/sist2/_forcemerge", IndexCtx.es_url);
snprintf(url, sizeof(url), "%s/%s/_forcemerge", IndexCtx.es_url, IndexCtx.es_index);
r = web_post(url, "");
LOG_INFOF("elastic.c", "Merge index <%d>", r->status_code);
free_response(r);
snprintf(url, sizeof(url), "%s/sist2/_settings", IndexCtx.es_url);
snprintf(url, sizeof(url), "%s/%s/_settings", IndexCtx.es_url, IndexCtx.es_index);
r = web_put(url, "{\"index\":{\"refresh_interval\":\"1s\"}}");
LOG_INFOF("elastic.c", "Set refresh interval <%d>", r->status_code);
free_response(r);
@@ -335,7 +360,7 @@ void elastic_init(int force_reset) {
// Check if index exists
char url[4096];
snprintf(url, 4096, "%s/sist2", IndexCtx.es_url);
snprintf(url, sizeof(url), "%s/%s", IndexCtx.es_url, IndexCtx.es_index);
response_t *r = web_get(url, 30);
int index_exists = r->status_code == 200;
free_response(r);
@@ -345,32 +370,38 @@ void elastic_init(int force_reset) {
LOG_INFOF("elastic.c", "Delete index <%d>", r->status_code);
free_response(r);
snprintf(url, 4096, "%s/sist2", IndexCtx.es_url);
snprintf(url, sizeof(url), "%s/%s", IndexCtx.es_url, IndexCtx.es_index);
r = web_put(url, "");
if (r->status_code != 200) {
print_error(r);
LOG_FATAL("elastic.c", "Could not create index")
}
LOG_INFOF("elastic.c", "Create index <%d>", r->status_code);
free_response(r);
snprintf(url, 4096, "%s/sist2/_close", IndexCtx.es_url);
snprintf(url, sizeof(url), "%s/%s/_close", IndexCtx.es_url, IndexCtx.es_index);
r = web_post(url, "");
LOG_INFOF("elastic.c", "Close index <%d>", r->status_code);
free_response(r);
snprintf(url, 4096, "%s/_ingest/pipeline/tie", IndexCtx.es_url);
snprintf(url, sizeof(url), "%s/_ingest/pipeline/tie", IndexCtx.es_url);
r = web_put(url, pipeline_json);
LOG_INFOF("elastic.c", "Create pipeline <%d>", r->status_code);
free_response(r);
snprintf(url, 4096, "%s/sist2/_settings", IndexCtx.es_url);
snprintf(url, sizeof(url), "%s/%s/_settings", IndexCtx.es_url, IndexCtx.es_index);
r = web_put(url, settings_json);
LOG_INFOF("elastic.c", "Update settings <%d>", r->status_code);
free_response(r);
snprintf(url, 4096, "%s/sist2/_mappings/_doc?include_type_name=true", IndexCtx.es_url);
snprintf(url, sizeof(url), "%s/%s/_mappings/_doc?include_type_name=true", IndexCtx.es_url, IndexCtx.es_index);
r = web_put(url, mappings_json);
LOG_INFOF("elastic.c", "Update mappings <%d>", r->status_code);
free_response(r);
snprintf(url, 4096, "%s/sist2/_open", IndexCtx.es_url);
snprintf(url, sizeof(url), "%s/%s/_open", IndexCtx.es_url, IndexCtx.es_index);
r = web_post(url, "");
LOG_INFOF("elastic.c", "Open index <%d>", r->status_code);
free_response(r);
@@ -379,12 +410,16 @@ void elastic_init(int force_reset) {
cJSON *elastic_get_document(const char *uuid_str) {
char url[4096];
snprintf(url, 4096, "%s/sist2/_doc/%s", WebCtx.es_url, uuid_str);
snprintf(url, sizeof(url), "%s/%s/_doc/%s", WebCtx.es_url, WebCtx.es_index, uuid_str);
response_t *r = web_get(url, 3);
cJSON *json = NULL;
if (r->status_code == 200) {
json = cJSON_Parse(r->body);
char *tmp = malloc(r->size + 1);
memcpy(tmp, r->body, r->size);
*(tmp + r->size) = '\0';
json = cJSON_Parse(tmp);
free(tmp);
}
free_response(r);
return json;
@@ -392,8 +427,8 @@ cJSON *elastic_get_document(const char *uuid_str) {
char *elastic_get_status() {
char url[4096];
snprintf(url, 4096,
"%s/_cluster/state/metadata/sist2?filter_path=metadata.indices.*.state", WebCtx.es_url);
snprintf(url, sizeof(url),
"%s/_cluster/state/metadata/%s?filter_path=metadata.indices.*.state", WebCtx.es_url, WebCtx.es_index);
response_t *r = web_get(url, 30);
cJSON *json = NULL;
@@ -401,12 +436,16 @@ char *elastic_get_status() {
status[0] = '\0';
if (r->status_code == 200) {
json = cJSON_Parse(r->body);
char *tmp = malloc(r->size + 1);
memcpy(tmp, r->body, r->size);
*(tmp + r->size) = '\0';
json = cJSON_Parse(tmp);
free(tmp);
const cJSON *metadata = cJSON_GetObjectItem(json, "metadata");
if (metadata != NULL) {
const cJSON *indices = cJSON_GetObjectItem(metadata, "indices");
const cJSON *sist2 = cJSON_GetObjectItem(indices, "sist2");
const cJSON *state = cJSON_GetObjectItem(sist2, "state");
const cJSON *index = cJSON_GetObjectItem(indices, WebCtx.es_index);
const cJSON *state = cJSON_GetObjectItem(index, "state");
strcpy(status, state->valuestring);
}
}

View File

@@ -20,7 +20,7 @@ void print_json(cJSON *document, const char uuid_str[UUID_STR_LEN]);
void index_json(cJSON *document, const char uuid_str[UUID_STR_LEN]);
es_indexer_t *create_indexer(const char* es_url);
es_indexer_t *create_indexer(const char *url, const char *index);
void elastic_cleanup();
void finish_indexer(char *script, int async_script, char *index_id);

File diff suppressed because one or more lines are too long

View File

@@ -1,5 +1,6 @@
#include "web.h"
#include "src/sist.h"
#include "src/ctx.h"
#include <mongoose.h>
#include <pthread.h>
@@ -21,95 +22,82 @@ void free_response(response_t *resp) {
free(resp);
}
#define SIST2_HEADERS "User-Agent: sist2\r\nContent-Type: application/json\r\n"
void web_post_async_poll(subreq_ctx_t* req) {
fd_set fdread;
fd_set fdwrite;
fd_set fdexcep;
int maxfd = -1;
FD_ZERO(&fdread);
FD_ZERO(&fdwrite);
FD_ZERO(&fdexcep);
void http_req_ev(struct mg_connection *nc, int ev, void *ptr) {
CURLMcode mc = curl_multi_fdset(req->multi, &fdread, &fdwrite, &fdexcep, &maxfd);
http_ev_data_t *ev_data = (http_ev_data_t *) nc->user_data;
if(mc != CURLM_OK) {
req->done = TRUE;
return;
}
switch (ev) {
case MG_EV_CONNECT: {
int connect_status = *(int *) ptr;
if (connect_status != 0) {
ev_data->done = TRUE;
ev_data->resp->status_code = 0;
}
if (maxfd == -1) {
// no fds ready yet
return;
}
struct timeval timeout = {1, 0};
int rc = select(maxfd + 1, &fdread, &fdwrite, &fdexcep, &timeout);
switch(rc) {
case -1:
req->done = TRUE;
break;
}
case MG_EV_HTTP_REPLY: {
struct http_message *hm = (struct http_message *) ptr;
//TODO: Check errors?
ev_data->resp->size = hm->body.len;
ev_data->resp->status_code = hm->resp_code;
ev_data->resp->body = malloc(hm->body.len + 1);
memcpy(ev_data->resp->body, hm->body.p, hm->body.len);
*(ev_data->resp->body + hm->body.len) = '\0';
ev_data->done = TRUE;
case 0:
break;
}
case MG_EV_CLOSE: {
ev_data->done = TRUE;
break;
}
default:
curl_multi_perform(req->multi, &req->running_handles);
break;
}
if (req->running_handles == 0) {
req->done = TRUE;
req->response->body = req->response_buf.buf;
req->response->size = req->response_buf.cur;
curl_easy_getinfo(req->handle, CURLINFO_RESPONSE_CODE, &req->response->status_code);
curl_multi_cleanup(req->multi);
curl_easy_cleanup(req->handle);
curl_slist_free_all(req->headers);
return;
}
}
subreq_ctx_t *http_req(const char *url, const char *extra_headers, const char *post_data, const char *method) {
subreq_ctx_t *web_post_async(const char *url, char *data) {
subreq_ctx_t *req = calloc(1, sizeof(subreq_ctx_t));
req->response = calloc(1, sizeof(response_t));
req->data = data;
req->response_buf = dyn_buffer_create();
struct mg_str scheme;
struct mg_str user_info;
struct mg_str host;
unsigned int port;
struct mg_str path;
struct mg_str query;
struct mg_str fragment;
req->handle = curl_easy_init();
CURL *curl = req->handle;
curl_easy_setopt(curl, CURLOPT_URL, url);
curl_easy_setopt(curl, CURLOPT_WRITEDATA, (void *) (&req->response_buf));
curl_easy_setopt(curl, CURLOPT_WRITEFUNCTION, write_cb);
curl_easy_setopt(curl, CURLOPT_POST, 1);
curl_easy_setopt(curl, CURLOPT_USERAGENT, "sist2");
if (post_data == NULL) post_data = "";
if (extra_headers == NULL) extra_headers = "";
if (path.len == 0) path = mg_mk_str("/");
if (host.len == 0) host = mg_mk_str("");
struct curl_slist *headers = NULL;
headers = curl_slist_append(headers, "Content-Type: application/json");
curl_easy_setopt(curl, CURLOPT_HTTPHEADER, headers);
// [scheme://[user_info@]]host[:port][/path][?query][#fragment]
mg_parse_uri(mg_mk_str(url), &scheme, &user_info, &host, &port, &path, &query, &fragment);
curl_easy_setopt(curl, CURLOPT_POSTFIELDS, data);
if (query.len > 0) path.len += query.len + 1;
req->multi = curl_multi_init();
curl_multi_add_handle(req->multi, curl);
curl_multi_perform(req->multi, &req->running_handles);
subreq_ctx_t *ctx = malloc(sizeof(subreq_ctx_t));
mg_mgr_init(&ctx->mgr, NULL);
LOG_DEBUGF("web.c", "async request POST %s", url)
char address[8192];
snprintf(address, sizeof(address), "tcp://%.*s:%u", (int) host.len, host.p, port);
struct mg_connection *nc = mg_connect(&ctx->mgr, address, http_req_ev);
nc->user_data = &ctx->ev_data;
mg_set_protocol_http_websocket(nc);
ctx->ev_data.resp = calloc(1, sizeof(response_t));
ctx->ev_data.done = FALSE;
mg_printf(
nc, "%s %.*s HTTP/1.1\r\n"
"Host: %.*s\r\n"
"Content-Length: %zu\r\n"
"%s\r\n"
"%s",
method, (int) path.len, path.p,
(int) (path.p - host.p), host.p,
strlen(post_data),
extra_headers,
post_data
);
return ctx;
}
subreq_ctx_t *web_post_async(const char *url, const char *data) {
return http_req(url, SIST2_HEADERS, data, "POST");
return req;
}
response_t *web_get(const char *url, int timeout) {
@@ -125,7 +113,8 @@ response_t *web_get(const char *url, int timeout) {
curl_easy_setopt(curl, CURLOPT_USERAGENT, "sist2");
curl_easy_setopt(curl, CURLOPT_TIMEOUT, timeout);
struct curl_slist *headers = curl_slist_append(headers, "Content-Type: application/json");
struct curl_slist *headers = NULL;
headers = curl_slist_append(headers, "Content-Type: application/json");
curl_easy_setopt(curl, CURLOPT_HTTPHEADER, headers);
curl_easy_perform(curl);
@@ -153,7 +142,8 @@ response_t *web_post(const char *url, const char *data) {
curl_easy_setopt(curl, CURLOPT_POST, 1);
curl_easy_setopt(curl, CURLOPT_USERAGENT, "sist2");
struct curl_slist *headers = curl_slist_append(headers, "Content-Type: application/json");
struct curl_slist *headers = NULL;
headers = curl_slist_append(headers, "Content-Type: application/json");
curl_easy_setopt(curl, CURLOPT_HTTPHEADER, headers);
curl_easy_setopt(curl, CURLOPT_POSTFIELDS, data);
@@ -187,7 +177,8 @@ response_t *web_put(const char *url, const char *data) {
curl_easy_setopt(curl, CURLOPT_DNS_USE_GLOBAL_CACHE, 0);
curl_easy_setopt(curl, CURLOPT_IPRESOLVE, CURLOPT_DNS_LOCAL_IP4 );
struct curl_slist *headers = curl_slist_append(headers, "Content-Type: application/json");
struct curl_slist *headers = NULL;
headers = curl_slist_append(headers, "Content-Type: application/json");
curl_easy_setopt(curl, CURLOPT_HTTPHEADER, headers);
curl_easy_setopt(curl, CURLOPT_POSTFIELDS, data);
@@ -218,7 +209,8 @@ response_t *web_delete(const char *url) {
curl_easy_setopt(curl, CURLOPT_USERAGENT, "sist2");
curl_easy_setopt(curl, CURLOPT_POSTFIELDS, "");
struct curl_slist *headers = curl_slist_append(headers, "Content-Type: application/json");
struct curl_slist *headers = NULL;
headers = curl_slist_append(headers, "Content-Type: application/json");
curl_easy_setopt(curl, CURLOPT_HTTPHEADER, headers);
curl_easy_perform(curl);

View File

@@ -3,6 +3,7 @@
#include "src/sist.h"
#include <mongoose.h>
#include <curl/curl.h>
typedef struct response {
char *body;
@@ -16,13 +17,20 @@ typedef struct {
} http_ev_data_t;
typedef struct {
http_ev_data_t ev_data;
struct mg_mgr mgr;
char* data;
dyn_buffer_t response_buf;
struct curl_slist *headers;
CURL *handle;
CURLM *multi;
response_t *response;
int running_handles;
int done;
} subreq_ctx_t;
response_t *web_get(const char *url, int timeout);
response_t *web_post(const char * url, const char * data);
subreq_ctx_t *web_post_async(const char *url, const char *data);
void web_post_async_poll(subreq_ctx_t* req);
subreq_ctx_t *web_post_async(const char *url, char *data);
response_t *web_put(const char *url, const char *data);
response_t *web_delete(const char *url);

View File

@@ -150,6 +150,8 @@ char *get_meta_key_text(enum metakey meta_key) {
return "modified_by";
case MetaThumbnail:
return "thumbnail";
case MetaPages:
return "pages";
default:
return NULL;
}
@@ -278,6 +280,7 @@ void read_index_bin(const char *path, const char *index_id, index_func func) {
size_t ret = 0;
while (key != '\n') {
switch (key) {
case MetaPages:
case MetaWidth:
case MetaHeight: {
int value;

View File

@@ -4,8 +4,7 @@
#include <stdarg.h>
const char *log_colors[] = {
"\033[34m", "\033[01;34m", "\033[0m",
"\033[01;33m", "\033[31m", "\033[01;31m"
"\033[34m", "\033[01;34m", "\033[01;33m", "\033[0m", "\033[31m", "\033[01;31m"
};
const char *log_levels[] = {

View File

@@ -21,7 +21,7 @@
#define EPILOG "Made by simon987 <me@simon987.net>. Released under GPL-3.0"
static const char *const Version = "2.8.0";
static const char *const Version = "2.8.4";
static const char *const usage[] = {
"sist2 scan [OPTION]... PATH",
"sist2 index [OPTION]... INDEX",
@@ -259,6 +259,7 @@ void sist2_scan(scan_args_t *args) {
void sist2_index(index_args_t *args) {
IndexCtx.es_url = args->es_url;
IndexCtx.es_index = args->es_index;
IndexCtx.batch_size = args->batch_size;
if (!args->print) {
@@ -347,6 +348,7 @@ void sist2_exec_script(exec_args_t *args) {
void sist2_web(web_args_t *args) {
WebCtx.es_url = args->es_url;
WebCtx.es_index = args->es_index;
WebCtx.index_count = args->index_count;
WebCtx.auth_user = args->auth_user;
WebCtx.auth_pass = args->auth_pass;
@@ -390,6 +392,7 @@ int main(int argc, const char *argv[]) {
int arg_version = 0;
char *common_es_url = NULL;
char *common_es_index = NULL;
char *common_script_path = NULL;
int common_async_script = 0;
int common_threads = 0;
@@ -432,6 +435,7 @@ int main(int argc, const char *argv[]) {
OPT_GROUP("Index options"),
OPT_INTEGER('t', "threads", &common_threads, "Number of threads. DEFAULT=1"),
OPT_STRING(0, "es-url", &common_es_url, "Elasticsearch url with port. DEFAULT=http://localhost:9200"),
OPT_STRING(0, "es-index", &common_es_index, "Elasticsearch index name. DEFAULT=sist2"),
OPT_BOOLEAN('p', "print", &index_args->print, "Just print JSON documents to stdout."),
OPT_STRING(0, "script-file", &common_script_path, "Path to user script."),
OPT_BOOLEAN(0, "async-script", &common_async_script, "Execute user script asynchronously."),
@@ -441,11 +445,14 @@ int main(int argc, const char *argv[]) {
OPT_GROUP("Web options"),
OPT_STRING(0, "es-url", &common_es_url, "Elasticsearch url. DEFAULT=http://localhost:9200"),
OPT_STRING(0, "es-index", &common_es_index, "Elasticsearch index name. DEFAULT=sist2"),
OPT_STRING(0, "bind", &web_args->listen_address, "Listen on this address. DEFAULT=localhost:4090"),
OPT_STRING(0, "auth", &web_args->credentials, "Basic auth in user:password format"),
OPT_STRING(0, "tag-auth", &web_args->tag_credentials, "Basic auth in user:password format for tagging"),
OPT_GROUP("Exec-script options"),
OPT_STRING(0, "es-url", &common_es_url, "Elasticsearch url. DEFAULT=http://localhost:9200"),
OPT_STRING(0, "es-index", &common_es_index, "Elasticsearch index name. DEFAULT=sist2"),
OPT_STRING(0, "script-file", &common_script_path, "Path to user script."),
OPT_BOOLEAN(0, "async-script", &common_async_script, "Execute user script asynchronously."),
@@ -469,6 +476,11 @@ int main(int argc, const char *argv[]) {
web_args->es_url = common_es_url;
index_args->es_url = common_es_url;
exec_args->es_url = common_es_url;
web_args->es_index = common_es_index;
index_args->es_index = common_es_index;
exec_args->es_index = common_es_index;
index_args->script_path = common_script_path;
exec_args->script_path = common_script_path;
index_args->threads = common_threads;

View File

@@ -229,6 +229,7 @@ a:hover, .btn:hover {
max-width: 100%;
max-height: 400px;
margin: 0 auto 0;
width: auto;
height: auto;
}
@@ -537,3 +538,7 @@ svg {
.stat > .card-body {
padding: 0.7em 1.25em;
}
#modal-body > .img-wrapper {
margin-bottom: 1em;
}

View File

@@ -416,4 +416,8 @@ mark {
.stat > .card-body {
padding: 0.7em 1.25em;
}
#modal-body > .img-wrapper {
margin-bottom: 1em;
}

View File

@@ -75,6 +75,7 @@ function shouldPlayVideo(hit) {
hit["_source"]["extension"] !== "mkv" &&
hit["_source"]["extension"] !== "avi" &&
videoc !== "hevc" &&
videoc !== "mpeg1video" &&
videoc !== "mpeg2video" &&
videoc !== "wmv3";
}
@@ -194,10 +195,21 @@ function makeUserTag(tag, hit) {
function infoButtonCb(hit) {
return () => {
getDocumentInfo(hit["_id"]).then(doc => {
$("#modal-body").empty()
$("#modal-title").text(doc["name"] + ext(hit));
if (doc["mime"]) {
const mimeCategory = doc["mime"].split("/")[0];
const imgWrapper = document.createElement("div");
imgWrapper.setAttribute("style", "position: relative");
imgWrapper.setAttribute("class", "img-wrapper");
makeThumbnail(mimeCategory, hit, imgWrapper, false);
$("#modal-body").append(imgWrapper);
}
const tbody = $("<tbody>");
$("#modal-body").empty()
$("#modal-body")
.append($("<table class='table table-sm'>")
.append($("<thead>")
.append($("<tr>")
@@ -208,10 +220,19 @@ function infoButtonCb(hit) {
.append(tbody)
);
tbody.append($("<tr>")
.append($("<td>").text("index"))
.append($("<td>").text(`[${indexMap[doc["index"]]}]`))
).append($("<tr>")
.append($("<td>").text("mtime"))
.append($("<td>")
.text(new Date(doc["mtime"] * 1000).toISOString().split(".")[0].replace("T", " "))
.attr("title", doc["mtime"]))
);
const displayFields = new Set([
"mime", "size", "mtime", "path", "title", "width", "height", "duration", "audioc", "videoc",
"mime", "size", "path", "title", "width", "height", "duration", "audioc", "videoc",
"bitrate", "artist", "album", "album_artist", "genre", "title", "font_name", "tag", "author",
"modified_by"
"modified_by", "pages"
]);
Object.keys(doc)
.filter(key => key.startsWith("_keyword.") || key.startsWith("_text.") || displayFields.has(key) || key.startsWith("exif_"))

View File

@@ -12,7 +12,7 @@
<nav class="navbar navbar-expand-lg">
<a class="navbar-brand" href="/">sist2</a>
<span class="badge badge-pill version">2.8.0</span>
<span class="badge badge-pill version">2.8.4</span>
<span class="tagline">Lightning-fast file system indexer and search tool </span>
<a class="btn ml-auto" href="/stats">Stats</a>
<button class="btn" type="button" data-toggle="modal" data-target="#settings" onclick="loadSettings()">Settings

View File

@@ -10,7 +10,7 @@
<nav class="navbar navbar-expand-lg">
<a class="navbar-brand" href="/">sist2</a>
<span class="badge badge-pill version">2.8.0</span>
<span class="badge badge-pill version">2.8.4</span>
<span class="tagline">Lightning-fast file system indexer and search tool </span>
<a style="margin-left: auto" class="btn" href="/">Back</a>
<button class="btn" type="button" data-toggle="modal" data-target="#settings"

View File

@@ -10,8 +10,6 @@
#include <mongoose.h>
#define CHUNK_SIZE 1024 * 1024 * 10
static int has_prefix(const struct mg_str *str, const struct mg_str *prefix) {
return str->len > prefix->len && memcmp(str->p, prefix->p, prefix->len) == 0;
@@ -237,13 +235,12 @@ void search(struct mg_connection *nc, struct http_message *hm) {
*(body + hm->body.len) = '\0';
char url[4096];
snprintf(url, 4096, "%s/sist2/_search", WebCtx.es_url);
snprintf(url, 4096, "%s/%s/_search", WebCtx.es_url, WebCtx.es_index);
nc->user_data = web_post_async(url, body);
free(body);
}
int serve_file_from_url(cJSON *json, index_t *idx, struct mg_connection *nc) {
void serve_file_from_url(cJSON *json, index_t *idx, struct mg_connection *nc) {
const char *path = cJSON_GetObjectItem(json, "path")->valuestring;
const char *name = cJSON_GetObjectItem(json, "name")->valuestring;
@@ -538,8 +535,8 @@ void tag(struct mg_connection *nc, struct http_message *hm, struct mg_str *path)
}
}
char buf[8192];
snprintf(buf, sizeof(buf),
char *buf = malloc(sizeof(char) * 8192);
snprintf(buf, 8192,
"{"
" \"script\" : {"
" \"source\": \"if (ctx._source.tag.contains(params.tag)) { ctx._source.tag.remove(ctx._source.tag.indexOf(params.tag)) }\","
@@ -552,14 +549,14 @@ void tag(struct mg_connection *nc, struct http_message *hm, struct mg_str *path)
);
char url[4096];
snprintf(url, sizeof(url), "%s/sist2/_update/%s", WebCtx.es_url, arg_req->doc_id);
snprintf(url, sizeof(url), "%s/%s/_update/%s", WebCtx.es_url, WebCtx.es_index, arg_req->doc_id);
nc->user_data = web_post_async(url, buf);
} else {
cJSON_AddItemToArray(arr, cJSON_CreateString(arg_req->name));
char buf[8192];
snprintf(buf, sizeof(buf),
char *buf = malloc(sizeof(char) * 8192);
snprintf(buf, 8192,
"{"
" \"script\" : {"
" \"source\": \"if(ctx._source.tag == null) {ctx._source.tag = new ArrayList()} ctx._source.tag.add(params.tag)\","
@@ -572,7 +569,7 @@ void tag(struct mg_connection *nc, struct http_message *hm, struct mg_str *path)
);
char url[4096];
snprintf(url, sizeof(url), "%s/sist2/_update/%s", WebCtx.es_url, arg_req->doc_id);
snprintf(url, sizeof(url), "%s/%s/_update/%s", WebCtx.es_url, WebCtx.es_index, arg_req->doc_id);
nc->user_data = web_post_async(url, buf);
}
@@ -668,11 +665,11 @@ static void ev_router(struct mg_connection *nc, int ev, void *p) {
if (nc->user_data != NULL) {
//Waiting for ES reply
subreq_ctx_t *ctx = (subreq_ctx_t *) nc->user_data;
mg_mgr_poll(&ctx->mgr, 0);
web_post_async_poll(ctx);
if (ctx->ev_data.done == TRUE) {
if (ctx->done == TRUE) {
response_t *r = ctx->ev_data.resp;
response_t *r = ctx->response;
if (r->status_code == 200) {
send_response_line(nc, 200, r->size, "Content-Type: application/json");
@@ -695,6 +692,8 @@ static void ev_router(struct mg_connection *nc, int ev, void *p) {
}
free_response(r);
free(ctx->data);
free(ctx);
nc->flags |= MG_F_SEND_AND_CLOSE;
nc->user_data = NULL;
}

File diff suppressed because one or more lines are too long