From 75e4e93dddc115cb6e096e0cf87e2d9bf906a45f Mon Sep 17 00:00:00 2001 From: simon987 Date: Sat, 16 Jan 2021 10:57:55 -0500 Subject: [PATCH 1/7] Enable docker image builds --- .drone.yml | 19 +++++++++++++++++++ Docker/Dockerfile | 2 +- Docker/build.sh | 14 -------------- ci/build.sh | 2 ++ 4 files changed, 22 insertions(+), 15 deletions(-) delete mode 100755 Docker/build.sh diff --git a/.drone.yml b/.drone.yml index 4532e63..647e567 100644 --- a/.drone.yml +++ b/.drone.yml @@ -11,6 +11,20 @@ steps: image: simon987/ubuntu_ci commands: - ./ci/build.sh + - name: docker + image: plugins/docker + settings: + username: + from_secret: DOCKER_USER + password: + from_secret: DOCKER_PASSWORD + repo: simon987/sist2 + context: ./Docker/ + dockerfile: ./Docker/Dockerfile + autotag: true + when: + event: + - tag - name: scp files image: appleboy/drone-scp settings: @@ -24,6 +38,7 @@ steps: from_secret: SSH_KEY target: /files/sist2/${DRONE_REPO_OWNER}_${DRONE_REPO_NAME}/${DRONE_BRANCH}_${DRONE_BUILD_NUMBER}_${DRONE_COMMIT}/ source: + - ./VERSION - ./sist2-x64-linux - ./sist2-x64-linux-debug.tar.gz @@ -54,3 +69,7 @@ steps: target: /files/sist2/${DRONE_REPO_OWNER}_${DRONE_REPO_NAME}/${DRONE_BRANCH}_${DRONE_BUILD_NUMBER}_${DRONE_COMMIT}/ source: - ./sist2-arm64-linux + +volumes: + - name: dockersock + temp: {} \ No newline at end of file diff --git a/Docker/Dockerfile b/Docker/Dockerfile index d8a0519..08f6e12 100644 --- a/Docker/Dockerfile +++ b/Docker/Dockerfile @@ -1,4 +1,4 @@ -FROM ubuntu:19.10 +FROM ubuntu:20.04 MAINTAINER simon987 RUN apt update diff --git a/Docker/build.sh b/Docker/build.sh deleted file mode 100755 index 20312c4..0000000 --- a/Docker/build.sh +++ /dev/null @@ -1,14 +0,0 @@ -rm ./sist2 sist2_debug -cp ../sist2.gz . -gzip -d sist2.gz -strip sist2 - -version=$(./sist2 --version) - -echo "Version ${version}" -docker build . -t simon987/sist2:${version} -t simon987/sist2:latest - -docker push simon987/sist2:${version} -docker push simon987/sist2:latest - -docker run --rm simon987/sist2 -v \ No newline at end of file diff --git a/ci/build.sh b/ci/build.sh index 290af14..91c3df8 100755 --- a/ci/build.sh +++ b/ci/build.sh @@ -10,6 +10,8 @@ rm -rf CMakeFiles CMakeCache.txt cmake -DSIST_DEBUG=off -DCMAKE_TOOLCHAIN_FILE="${VCPKG_ROOT}/scripts/buildsystems/vcpkg.cmake" . make -j 33 strip sist2 +./sist2 -v > VERSION +cp sist2 Docker/ mv sist2 sist2-x64-linux rm -rf CMakeFiles CMakeCache.txt From 5fe9c9efa31a59330ba097f757fea3e958bbc4e1 Mon Sep 17 00:00:00 2001 From: simon987 Date: Sat, 16 Jan 2021 11:14:18 -0500 Subject: [PATCH 2/7] Tweak CI settings --- .drone.yml | 9 +++------ 1 file changed, 3 insertions(+), 6 deletions(-) diff --git a/.drone.yml b/.drone.yml index 647e567..b56731d 100644 --- a/.drone.yml +++ b/.drone.yml @@ -21,7 +21,8 @@ steps: repo: simon987/sist2 context: ./Docker/ dockerfile: ./Docker/Dockerfile - autotag: true + auto_tag: true + auto_tag_suffix: x64-linux when: event: - tag @@ -66,10 +67,6 @@ steps: from_secret: SSH_USER key: from_secret: SSH_KEY - target: /files/sist2/${DRONE_REPO_OWNER}_${DRONE_REPO_NAME}/${DRONE_BRANCH}_${DRONE_BUILD_NUMBER}_${DRONE_COMMIT}/ + target: /files/sist2/${DRONE_REPO_OWNER}_${DRONE_REPO_NAME}/arm_${DRONE_BRANCH}_${DRONE_BUILD_NUMBER}_${DRONE_COMMIT}/ source: - ./sist2-arm64-linux - -volumes: - - name: dockersock - temp: {} \ No newline at end of file From 8111a6c1432b1f3c8d236cf212e97e084120ce7f Mon Sep 17 00:00:00 2001 From: simon987 Date: Sat, 16 Jan 2021 12:13:56 -0500 Subject: [PATCH 3/7] Workaround for UTF8 .doc files --- third-party/libscan | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/third-party/libscan b/third-party/libscan index ae9fade..0a9742b 160000 --- a/third-party/libscan +++ b/third-party/libscan @@ -1 +1 @@ -Subproject commit ae9fadec473e6e4ade05259fe359c5366c3f3af6 +Subproject commit 0a9742b6865da7b60b77790b8288ca6d8b17471c From f83912712957bbe349e68597ce7c8fc5c711d93b Mon Sep 17 00:00:00 2001 From: simon987 Date: Sat, 16 Jan 2021 12:17:43 -0500 Subject: [PATCH 4/7] Change encoding for antiword PDF --- third-party/libscan | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/third-party/libscan b/third-party/libscan index 0a9742b..49d4f1a 160000 --- a/third-party/libscan +++ b/third-party/libscan @@ -1 +1 @@ -Subproject commit 0a9742b6865da7b60b77790b8288ca6d8b17471c +Subproject commit 49d4f1ae4834b1716887db28f4e1156b1d47816e From 32c9cb28a35a83911f3e09ca9d0eed4baa18ae29 Mon Sep 17 00:00:00 2001 From: simon987 Date: Fri, 26 Mar 2021 19:48:16 -0400 Subject: [PATCH 5/7] Read subtitles from media files, fix bug in text_buffer --- docs/USAGE.md | 4 +++- src/cli.h | 1 + src/main.c | 2 ++ third-party/argparse | 2 +- 4 files changed, 7 insertions(+), 2 deletions(-) diff --git a/docs/USAGE.md b/docs/USAGE.md index bc7278b..a1cb6de 100644 --- a/docs/USAGE.md +++ b/docs/USAGE.md @@ -46,6 +46,7 @@ Scan options --fast Only index file names & mime type --treemap-threshold= Relative size threshold for treemap (see USAGE.md). DEFAULT: 0.0005 --mem-buffer= Maximum memory buffer size per thread in MB for files inside archives (see USAGE.md). DEFAULT: 2000 + --read-subtitles Read subtitles from media files Index options -t, --threads= Number of threads. DEFAULT=1 @@ -91,7 +92,7 @@ Made by simon987 . Released under GPL-3.0 Specify an existing index. Information about files in this index that were not modified (based on *mtime* attribute) will be copied to the new index and will not be parsed again. * `-o, --output` Output directory. -* `--rewrite-url` Set the `rewrite_url` option for the web module (See [rewrite_url](#rewrite_url)) +* `--rewrite-url` Set the `rewrite_url` option for the web module (See [rewrite_url](#rewrite_url)) * `--name` Set the `name` option for the web module * `--depth` Maximum scan dept. Set to 0 only scan files directly in the root directory, set to -1 for infinite depth * `--archive` Archive file mode. @@ -123,6 +124,7 @@ Made by simon987 . Released under GPL-3.0 larger than this number will be read sequentially and no *seek* operations will be supported. To check if a media file can be parsed without *seek*, execute `cat file.mp4 | ffprobe -` +* `--read-subtitles` When enabled, will attempt to read the subtitles stream from media files. ### Scan examples diff --git a/src/cli.h b/src/cli.h index 27139ef..90242aa 100644 --- a/src/cli.h +++ b/src/cli.h @@ -25,6 +25,7 @@ typedef struct scan_args { const char* treemap_threshold_str; double treemap_threshold; int max_memory_buffer; + int read_subtitles; } scan_args_t; scan_args_t *scan_args_create(); diff --git a/src/main.c b/src/main.c index d5b5593..54d8b14 100644 --- a/src/main.c +++ b/src/main.c @@ -132,6 +132,7 @@ void initialize_scan_context(scan_args_t *args) { ScanCtx.media_ctx.logf = _logf; ScanCtx.media_ctx.store = _store; ScanCtx.media_ctx.max_media_buffer = (long) args->max_memory_buffer * 1024 * 1024; + ScanCtx.media_ctx.read_subtitles = args->read_subtitles; init_media(); // OOXML @@ -448,6 +449,7 @@ int main(int argc, const char *argv[]) { OPT_INTEGER(0, "mem-buffer", &scan_args->max_memory_buffer, "Maximum memory buffer size per thread in MB for files inside archives " "(see USAGE.md). DEFAULT: 2000"), + OPT_BOOLEAN(0, "read-subtitles", &scan_args->read_subtitles, "Read subtitles from media files."), OPT_GROUP("Index options"), OPT_INTEGER('t', "threads", &common_threads, "Number of threads. DEFAULT=1"), diff --git a/third-party/argparse b/third-party/argparse index 3f4e359..ffd9c23 160000 --- a/third-party/argparse +++ b/third-party/argparse @@ -1 +1 @@ -Subproject commit 3f4e3594a6891b942d5a711781d5425111aa13bf +Subproject commit ffd9c23427d0cb105e27f27f0cf97b463b6a8bf8 From db3d3128357e28a66430da73e79cb613ed1d4d70 Mon Sep 17 00:00:00 2001 From: simon987 Date: Wed, 14 Apr 2021 19:52:39 -0400 Subject: [PATCH 6/7] wip --- src/web/serve.c | 266 ++++++++++++++++++------------------------------ 1 file changed, 98 insertions(+), 168 deletions(-) diff --git a/src/web/serve.c b/src/web/serve.c index 63b79d9..412b188 100644 --- a/src/web/serve.c +++ b/src/web/serve.c @@ -8,18 +8,8 @@ #include -#include - -static int has_prefix(const struct mg_str *str, const struct mg_str *prefix) { - return str->len > prefix->len && memcmp(str->p, prefix->p, prefix->len) == 0; -} - -static int is_equal(const struct mg_str *s1, const struct mg_str *s2) { - return s1->len == s2->len && memcmp(s1->p, s2->p, s2->len) == 0; -} - -static void send_response_line(struct mg_connection *nc, int status_code, int length, char *extra_headers) { +static void send_response_line(struct mg_connection *nc, int status_code, size_t length, char *extra_headers) { mg_printf( nc, "HTTP/1.1 %d %s\r\n" @@ -62,36 +52,32 @@ store_t *get_tag_store(const char *index_id) { void search_index(struct mg_connection *nc) { send_response_line(nc, 200, sizeof(search_html), "Content-Type: text/html"); mg_send(nc, search_html, sizeof(search_html)); - nc->flags |= MG_F_SEND_AND_CLOSE; } void stats(struct mg_connection *nc) { send_response_line(nc, 200, sizeof(stats_html), "Content-Type: text/html"); mg_send(nc, stats_html, sizeof(stats_html)); - nc->flags |= MG_F_SEND_AND_CLOSE; } -void stats_files(struct mg_connection *nc, struct http_message *hm, struct mg_str *path) { +void stats_files(struct mg_connection *nc, struct mg_http_message *hm) { - if (path->len != MD5_STR_LENGTH + 4) { - mg_http_send_error(nc, 404, NULL); - nc->flags |= MG_F_SEND_AND_CLOSE; + if (hm->uri.len != MD5_STR_LENGTH + 4) { + mg_http_reply(nc, 404, "", ""); return; } char arg_md5[MD5_STR_LENGTH]; - memcpy(arg_md5, hm->uri.p + 3, MD5_STR_LENGTH); + memcpy(arg_md5, hm->uri.ptr + 3, MD5_STR_LENGTH); *(arg_md5 + MD5_STR_LENGTH - 1) = '\0'; index_t *index = get_index_by_id(arg_md5); if (index == NULL) { - mg_http_send_error(nc, 404, NULL); - nc->flags |= MG_F_SEND_AND_CLOSE; + mg_http_reply(nc, 404, "", ""); return; } const char *file; - switch (atoi(hm->uri.p + 3 + MD5_STR_LENGTH)) { + switch (atoi(hm->uri.ptr + 3 + MD5_STR_LENGTH)) { case 1: file = "treemap.csv"; break; @@ -105,7 +91,6 @@ void stats_files(struct mg_connection *nc, struct http_message *hm, struct mg_st file = "date_agg.csv"; break; default: - nc->flags |= MG_F_SEND_AND_CLOSE; return; } @@ -116,43 +101,31 @@ void stats_files(struct mg_connection *nc, struct http_message *hm, struct mg_st strcpy(full_path, index->path); strcat(full_path, file); - mg_http_serve_file(nc, hm, full_path, mg_mk_str("text/csv"), mg_mk_str(disposition)); - nc->flags |= MG_F_SEND_AND_CLOSE; + mg_http_serve_file(nc, hm, full_path, "text/csv", disposition); } void javascript_lib(struct mg_connection *nc) { send_response_line(nc, 200, sizeof(bundle_js), "Content-Type: application/javascript"); mg_send(nc, bundle_js, sizeof(bundle_js)); - nc->flags |= MG_F_SEND_AND_CLOSE; } void javascript_search(struct mg_connection *nc) { send_response_line(nc, 200, sizeof(search_js), "Content-Type: application/javascript"); mg_send(nc, search_js, sizeof(search_js)); - nc->flags |= MG_F_SEND_AND_CLOSE; } -int client_requested_dark_theme(struct http_message *hm) { - struct mg_str *cookie_header = mg_get_http_header(hm, "cookie"); +int client_requested_dark_theme(struct mg_http_message *hm) { + struct mg_str *cookie_header = mg_http_get_header(hm, "cookie"); if (cookie_header == NULL) { return FALSE; } - char buf[4096]; - char *sist_cookie = buf; - if (mg_http_parse_header2(cookie_header, "sist", &sist_cookie, sizeof(buf)) == 0) { - return FALSE; - } + struct mg_str sist_cookie = http_get_header_var(*cookie_header, mg_str_n("sist", 4)); - int ret = strcmp(sist_cookie, "dark") == 0; - if (sist_cookie != buf) { - free(sist_cookie); - } - - return ret; + return mg_strcmp(sist_cookie, mg_str_n("dark", 4)) == 0; } -void style(struct mg_connection *nc, struct http_message *hm) { +void style(struct mg_connection *nc, struct mg_http_message *hm) { if (client_requested_dark_theme(hm)) { send_response_line(nc, 200, sizeof(bundle_dark_css), "Content-Type: text/css"); @@ -161,11 +134,9 @@ void style(struct mg_connection *nc, struct http_message *hm) { send_response_line(nc, 200, sizeof(bundle_css), "Content-Type: text/css"); mg_send(nc, bundle_css, sizeof(bundle_css)); } - - nc->flags |= MG_F_SEND_AND_CLOSE; } -void img_sprite_skin_flat(struct mg_connection *nc, struct http_message *hm) { +void img_sprite_skin_flat(struct mg_connection *nc, struct mg_http_message *hm) { if (client_requested_dark_theme(hm)) { send_response_line(nc, 200, sizeof(sprite_skin_flat_dark_png), "Content-Type: image/png"); mg_send(nc, sprite_skin_flat_dark_png, sizeof(sprite_skin_flat_dark_png)); @@ -173,25 +144,22 @@ void img_sprite_skin_flat(struct mg_connection *nc, struct http_message *hm) { send_response_line(nc, 200, sizeof(sprite_skin_flat_png), "Content-Type: image/png"); mg_send(nc, sprite_skin_flat_png, sizeof(sprite_skin_flat_png)); } - - nc->flags |= MG_F_SEND_AND_CLOSE; } -void thumbnail(struct mg_connection *nc, struct http_message *hm, struct mg_str *path) { +void thumbnail(struct mg_connection *nc, struct mg_http_message *hm) { - if (path->len != 68) { - LOG_DEBUGF("serve.c", "Invalid thumbnail path: %.*s", (int) path->len, path->p) - mg_http_send_error(nc, 404, NULL); - nc->flags |= MG_F_SEND_AND_CLOSE; + if (hm->uri.len != 68) { + LOG_DEBUGF("serve.c", "Invalid thumbnail path: %.*s", (int) hm->uri.len, hm->uri.ptr) + mg_http_reply(nc, 404, "", "Not found"); return; } char arg_file_md5[MD5_STR_LENGTH]; char arg_index[MD5_STR_LENGTH]; - memcpy(arg_index, hm->uri.p + 3, MD5_STR_LENGTH); + memcpy(arg_index, hm->uri.ptr + 3, MD5_STR_LENGTH); *(arg_index + MD5_STR_LENGTH - 1) = '\0'; - memcpy(arg_file_md5, hm->uri.p + 3 + MD5_STR_LENGTH, MD5_STR_LENGTH); + memcpy(arg_file_md5, hm->uri.ptr + 3 + MD5_STR_LENGTH, MD5_STR_LENGTH); *(arg_file_md5 + MD5_STR_LENGTH - 1) = '\0'; unsigned char md5_buf[MD5_DIGEST_LENGTH]; @@ -200,8 +168,7 @@ void thumbnail(struct mg_connection *nc, struct http_message *hm, struct mg_str store_t *store = get_store(arg_index); if (store == NULL) { LOG_DEBUGF("serve.c", "Could not get store for index: %s", arg_index) - mg_http_send_error(nc, 404, NULL); - nc->flags |= MG_F_SEND_AND_CLOSE; + mg_http_reply(nc, 404, "", "Not found"); return; } @@ -212,26 +179,24 @@ void thumbnail(struct mg_connection *nc, struct http_message *hm, struct mg_str mg_send(nc, data, data_len); free(data); } - nc->flags |= MG_F_SEND_AND_CLOSE; } -void search(struct mg_connection *nc, struct http_message *hm) { +void search(struct mg_connection *nc, struct mg_http_message *hm) { if (hm->body.len == 0) { LOG_DEBUG("serve.c", "Client sent empty body, ignoring request") - mg_http_send_error(nc, 500, NULL); - nc->flags |= MG_F_SEND_AND_CLOSE; + mg_http_reply(nc, 500, "", "Invalid request"); return; } char *body = malloc(hm->body.len + 1); - memcpy(body, hm->body.p, hm->body.len); + memcpy(body, hm->body.ptr, hm->body.len); *(body + hm->body.len) = '\0'; char url[4096]; snprintf(url, 4096, "%s/%s/_search", WebCtx.es_url, WebCtx.es_index); - nc->user_data = web_post_async(url, body); + nc->fn_data = web_post_async(url, body); } void serve_file_from_url(cJSON *json, index_t *idx, struct mg_connection *nc) { @@ -253,16 +218,13 @@ void serve_file_from_url(cJSON *json, index_t *idx, struct mg_connection *nc) { idx->desc.rewrite_url, path_unescaped, name_unescaped, strlen(ext) == 0 ? "" : ".", ext); dyn_buffer_t encoded = url_escape(url); - mg_http_send_redirect( - nc, 308, - (struct mg_str) MG_MK_STR_N(encoded.buf, encoded.cur), - (struct mg_str) MG_NULL_STR - ); + dyn_buffer_write_char(&encoded, '\0'); + + mg_http_reply(nc, 308, "Location: %s", encoded.buf); dyn_buffer_destroy(&encoded); - nc->flags |= MG_F_SEND_AND_CLOSE; } -void serve_file_from_disk(cJSON *json, index_t *idx, struct mg_connection *nc, struct http_message *hm) { +void serve_file_from_disk(cJSON *json, index_t *idx, struct mg_connection *nc, struct mg_http_message *hm) { const char *path = cJSON_GetObjectItem(json, "path")->valuestring; const char *name = cJSON_GetObjectItem(json, "name")->valuestring; @@ -286,7 +248,7 @@ void serve_file_from_disk(cJSON *json, index_t *idx, struct mg_connection *nc, s snprintf(disposition, sizeof(disposition), "Content-Disposition: inline; filename=\"%s%s%s\"", name, strlen(ext) == 0 ? "" : ".", ext); - mg_http_serve_file(nc, hm, full_path, mg_mk_str(mime), mg_mk_str(disposition)); + mg_http_serve_file(nc, hm, full_path, mime, ""); } void index_info(struct mg_connection *nc) { @@ -310,22 +272,19 @@ void index_info(struct mg_connection *nc) { mg_send(nc, json_str, strlen(json_str)); free(json_str); cJSON_Delete(json); - - nc->flags |= MG_F_SEND_AND_CLOSE; } -void document_info(struct mg_connection *nc, struct http_message *hm, struct mg_str *path) { +void document_info(struct mg_connection *nc, struct mg_http_message *hm) { - if (path->len != MD5_STR_LENGTH + 2) { - LOG_DEBUGF("serve.c", "Invalid document_info path: %.*s", (int) path->len, path->p) - mg_http_send_error(nc, 404, NULL); - nc->flags |= MG_F_SEND_AND_CLOSE; + if (hm->uri.len != MD5_STR_LENGTH + 2) { + LOG_DEBUGF("serve.c", "Invalid document_info path: %.*s", (int) hm->uri.len, hm->uri.ptr) + mg_http_reply(nc, 404, "", "Not found"); return; } char arg_md5[MD5_STR_LENGTH]; - memcpy(arg_md5, hm->uri.p + 3, MD5_STR_LENGTH); + memcpy(arg_md5, hm->uri.ptr + 3, MD5_STR_LENGTH); *(arg_md5 + MD5_STR_LENGTH - 1) = '\0'; cJSON *doc = elastic_get_document(arg_md5); @@ -334,16 +293,14 @@ void document_info(struct mg_connection *nc, struct http_message *hm, struct mg_ cJSON *index_id = cJSON_GetObjectItem(source, "index"); if (index_id == NULL) { cJSON_Delete(doc); - mg_http_send_error(nc, 404, NULL); - nc->flags |= MG_F_SEND_AND_CLOSE; + mg_http_reply(nc, 404, "", "Not found"); return; } index_t *idx = get_index_by_id(index_id->valuestring); if (idx == NULL) { cJSON_Delete(doc); - mg_http_send_error(nc, 404, NULL); - nc->flags |= MG_F_SEND_AND_CLOSE; + mg_http_reply(nc, 404, "", "Not found"); return; } @@ -352,21 +309,18 @@ void document_info(struct mg_connection *nc, struct http_message *hm, struct mg_ mg_send(nc, json_str, (int) strlen(json_str)); free(json_str); cJSON_Delete(doc); - - nc->flags |= MG_F_SEND_AND_CLOSE; } -void file(struct mg_connection *nc, struct http_message *hm, struct mg_str *path) { +void file(struct mg_connection *nc, struct mg_http_message *hm) { - if (path->len != MD5_STR_LENGTH + 2) { - LOG_DEBUGF("serve.c", "Invalid file path: %.*s", (int) path->len, path->p) - mg_http_send_error(nc, 404, NULL); - nc->flags |= MG_F_SEND_AND_CLOSE; + if (hm->uri.len != MD5_STR_LENGTH + 2) { + LOG_DEBUGF("serve.c", "Invalid file path: %.*s", (int) hm->uri.len, hm->uri.ptr) + mg_http_reply(nc, 404, "", "Not found"); return; } char arg_md5[MD5_STR_LENGTH]; - memcpy(arg_md5, hm->uri.p + 3, MD5_STR_LENGTH); + memcpy(arg_md5, hm->uri.ptr + 3, MD5_STR_LENGTH); *(arg_md5 + MD5_STR_LENGTH - 1) = '\0'; const char *next = arg_md5; @@ -380,8 +334,7 @@ void file(struct mg_connection *nc, struct http_message *hm, struct mg_str *path index_id = cJSON_GetObjectItem(source, "index"); if (index_id == NULL) { cJSON_Delete(doc); - mg_http_send_error(nc, 404, NULL); - nc->flags |= MG_F_SEND_AND_CLOSE; + mg_http_reply(nc, 404, "", "Not found"); return; } cJSON *parent = cJSON_GetObjectItem(source, "parent"); @@ -395,8 +348,7 @@ void file(struct mg_connection *nc, struct http_message *hm, struct mg_str *path if (idx == NULL) { cJSON_Delete(doc); - nc->flags |= MG_F_SEND_AND_CLOSE; - mg_http_send_error(nc, 404, NULL); + mg_http_reply(nc, 404, "", "Not found"); return; } @@ -417,8 +369,6 @@ void status(struct mg_connection *nc) { } free(status); - - nc->flags |= MG_F_SEND_AND_CLOSE; } typedef struct { @@ -464,35 +414,32 @@ tag_req_t *parse_tag_request(cJSON *json) { return req; } -void tag(struct mg_connection *nc, struct http_message *hm, struct mg_str *path) { - if (path->len != MD5_STR_LENGTH + 4) { - LOG_DEBUGF("serve.c", "Invalid tag path: %.*s", (int) path->len, path->p) - mg_http_send_error(nc, 404, NULL); - nc->flags |= MG_F_SEND_AND_CLOSE; +void tag(struct mg_connection *nc, struct mg_http_message *hm) { + if (hm->uri.len != MD5_STR_LENGTH + 4) { + LOG_DEBUGF("serve.c", "Invalid tag path: %.*s", (int) hm->uri.len, hm->uri.ptr) + mg_http_reply(nc, 404, "", "Not found"); return; } char arg_index[MD5_STR_LENGTH]; - memcpy(arg_index, hm->uri.p + 5, MD5_STR_LENGTH); + memcpy(arg_index, hm->uri.ptr + 5, MD5_STR_LENGTH); *(arg_index + MD5_STR_LENGTH - 1) = '\0'; if (hm->body.len < 2 || hm->method.len != 4 || memcmp(&hm->method, "POST", 4) == 0) { LOG_DEBUG("serve.c", "Invalid tag request") - mg_http_send_error(nc, 400, NULL); - nc->flags |= MG_F_SEND_AND_CLOSE; + mg_http_reply(nc, 404, "", "Not found"); return; } store_t *store = get_tag_store(arg_index); if (store == NULL) { LOG_DEBUGF("serve.c", "Could not get tag store for index: %s", arg_index) - mg_http_send_error(nc, 404, NULL); - nc->flags |= MG_F_SEND_AND_CLOSE; + mg_http_reply(nc, 404, "", "Not found"); return; } char *body = malloc(hm->body.len + 1); - memcpy(body, hm->body.p, hm->body.len); + memcpy(body, hm->body.ptr, hm->body.len); *(body + hm->body.len) = '\0'; cJSON *json = cJSON_Parse(body); @@ -501,8 +448,7 @@ void tag(struct mg_connection *nc, struct http_message *hm, struct mg_str *path) LOG_DEBUGF("serve.c", "Could not parse tag request", arg_index) cJSON_Delete(json); free(body); - mg_http_send_error(nc, 400, NULL); - nc->flags |= MG_F_SEND_AND_CLOSE; + mg_http_reply(nc, 400, "", "Invalid request"); return; } @@ -545,7 +491,7 @@ void tag(struct mg_connection *nc, struct http_message *hm, struct mg_str *path) char url[4096]; snprintf(url, sizeof(url), "%s/%s/_update/%s", WebCtx.es_url, WebCtx.es_index, arg_req->doc_id); - nc->user_data = web_post_async(url, buf); + nc->fn_data = web_post_async(url, buf); } else { cJSON_AddItemToArray(arr, cJSON_CreateString(arg_req->name)); @@ -565,7 +511,7 @@ void tag(struct mg_connection *nc, struct http_message *hm, struct mg_str *path) char url[4096]; snprintf(url, sizeof(url), "%s/%s/_update/%s", WebCtx.es_url, WebCtx.es_index, arg_req->doc_id); - nc->user_data = web_post_async(url, buf); + nc->fn_data = web_post_async(url, buf); } char *json_str = cJSON_PrintUnformatted(arr); @@ -579,92 +525,73 @@ void tag(struct mg_connection *nc, struct http_message *hm, struct mg_str *path) free(body); } -int validate_auth(struct mg_connection *nc, struct http_message *hm) { +int validate_auth(struct mg_connection *nc, struct mg_http_message *hm) { char user[256] = {0,}; char pass[256] = {0,}; - int ret = mg_get_http_basic_auth(hm, user, sizeof(user), pass, sizeof(pass)); - if (ret == -1 || strcmp(user, WebCtx.auth_user) != 0 || strcmp(pass, WebCtx.auth_pass) != 0) { - mg_printf(nc, "HTTP/1.1 401 Unauthorized\r\n" - "WWW-Authenticate: Basic realm=\"sist2\"\r\n" - "Content-Length: 0\r\n\r\n"); - nc->flags |= MG_F_SEND_AND_CLOSE; + mg_http_creds(hm, user, sizeof(user), pass, sizeof(pass)); + if (strcmp(user, WebCtx.auth_user) != 0 || strcmp(pass, WebCtx.auth_pass) != 0) { + mg_http_reply(nc, 401, "WWW-Authenticate: Basic realm=\"sist2\"", ""); return FALSE; } return TRUE; } -static void ev_router(struct mg_connection *nc, int ev, void *p) { - struct mg_str scheme; - struct mg_str user_info; - struct mg_str host; - unsigned int port; - struct mg_str path; - struct mg_str query; - struct mg_str fragment; - - if (ev == MG_EV_HTTP_REQUEST) { - struct http_message *hm = (struct http_message *) p; - - if (mg_parse_uri(hm->uri, &scheme, &user_info, &host, &port, &path, &query, &fragment) != 0) { - mg_http_send_error(nc, 400, NULL); - nc->flags |= MG_F_SEND_AND_CLOSE; - return; - } +static void ev_router(struct mg_connection *nc, int ev, void *ev_data, UNUSED(void *fn_data)) { + if (ev == MG_EV_HTTP_MSG) { + struct mg_http_message *hm = (struct mg_http_message *) ev_data; if (WebCtx.auth_enabled == TRUE) { if (!validate_auth(nc, hm)) { + nc->is_closing = 1; return; } } - if (is_equal(&path, &((struct mg_str) MG_MK_STR("/")))) { + if (mg_http_match_uri(hm, "/")) { search_index(nc); - } else if (is_equal(&path, &((struct mg_str) MG_MK_STR("/css")))) { + } else if (mg_http_match_uri(hm, "/css")) { style(nc, hm); - } else if (is_equal(&path, &((struct mg_str) MG_MK_STR("/stats")))) { + } else if (mg_http_match_uri(hm, "/stats")) { stats(nc); - } else if (is_equal(&path, &((struct mg_str) MG_MK_STR("/jslib")))) { + } else if (mg_http_match_uri(hm, "/jslib")) { javascript_lib(nc); - } else if (is_equal(&path, &((struct mg_str) MG_MK_STR("/jssearch")))) { + } else if (mg_http_match_uri(hm, "/jssearch")) { javascript_search(nc); - } else if (is_equal(&path, &((struct mg_str) MG_MK_STR("/img/sprite-skin-flat.png")))) { + } else if (mg_http_match_uri(hm, "/img/sprite-skin-flat.png")) { img_sprite_skin_flat(nc, hm); - } else if (is_equal(&path, &((struct mg_str) MG_MK_STR("/es")))) { + } else if (mg_http_match_uri(hm, "/es")) { search(nc, hm); - } else if (is_equal(&path, &((struct mg_str) MG_MK_STR("/i")))) { + } else if (mg_http_match_uri(hm, "/i")) { index_info(nc); - } else if (is_equal(&path, &((struct mg_str) MG_MK_STR("/status")))) { + } else if (mg_http_match_uri(hm, "/status")) { status(nc); - } else if (has_prefix(&path, &((struct mg_str) MG_MK_STR("/f/")))) { - file(nc, hm, &path); - } else if (has_prefix(&path, &((struct mg_str) MG_MK_STR("/t/")))) { - thumbnail(nc, hm, &path); - } else if (has_prefix(&path, &((struct mg_str) MG_MK_STR("/s/")))) { - stats_files(nc, hm, &path); - } else if (has_prefix(&path, &((struct mg_str) MG_MK_STR("/tag/")))) { - if (WebCtx.tag_auth_enabled == TRUE) { - if (!validate_auth(nc, hm)) { - return; - } + } else if (mg_http_match_uri(hm, "/f/*")) { + file(nc, hm); + } else if (mg_http_match_uri(hm, "/t/*/*")) { + thumbnail(nc, hm); + } else if (mg_http_match_uri(hm, "/s/*")) { + stats_files(nc, hm); + } else if (mg_http_match_uri(hm, "/tag/*")) { + if (WebCtx.tag_auth_enabled == TRUE && !validate_auth(nc, hm)) { + nc->is_closing = 1; + return; } - tag(nc, hm, &path); - } else if (has_prefix(&path, &((struct mg_str) MG_MK_STR("/d/")))) { - document_info(nc, hm, &path); + tag(nc, hm); + } else if (mg_http_match_uri(hm, "/d/*")) { + document_info(nc, hm); } else { - mg_http_send_error(nc, 404, NULL); - nc->flags |= MG_F_SEND_AND_CLOSE; + mg_http_reply(nc, 404, "", "Page not found"); } } else if (ev == MG_EV_POLL) { - if (nc->user_data != NULL) { + if (nc->fn_data != NULL) { //Waiting for ES reply - subreq_ctx_t *ctx = (subreq_ctx_t *) nc->user_data; + subreq_ctx_t *ctx = (subreq_ctx_t *) nc->fn_data; web_post_async_poll(ctx); if (ctx->done == TRUE) { - response_t *r = ctx->response; if (r->status_code == 200) { @@ -684,14 +611,14 @@ static void ev_router(struct mg_connection *nc, int ev, void *p) { free(json_str); free(tmp); } - mg_http_send_error(nc, 500, NULL); + + mg_http_reply(nc, 500, "", ""); } free_response(r); free(ctx->data); free(ctx); - nc->flags |= MG_F_SEND_AND_CLOSE; - nc->user_data = NULL; + nc->fn_data = NULL; } } } @@ -702,15 +629,18 @@ void serve(const char *listen_address) { printf("Starting web server @ http://%s\n", listen_address); struct mg_mgr mgr; - mg_mgr_init(&mgr, NULL); + mg_mgr_init(&mgr); - struct mg_connection *nc = mg_bind(&mgr, listen_address, ev_router); + int ok = 1; + + struct mg_connection *nc = mg_http_listen(&mgr, listen_address, ev_router, NULL); if (nc == NULL) { LOG_FATALF("serve.c", "Couldn't bind web server on address %s", listen_address) } - mg_set_protocol_http_websocket(nc); - for (;;) { + while (ok) { mg_mgr_poll(&mgr, 10); } + mg_mgr_free(&mgr); + LOG_INFO("serve.c", "Finished web event loop") } From 908def1016ca81b746b89dc2839d232aceaef0ce Mon Sep 17 00:00:00 2001 From: simon987 Date: Wed, 5 May 2021 14:13:46 -0400 Subject: [PATCH 7/7] Fix build, update dockerfile --- .dockerignore | 23 +++++++++++++++++++++++ CMakeLists.txt | 12 +++++++----- DockerArm64/Dockerfile | 22 ---------------------- DockerArm64/build.sh | 13 ------------- Docker/Dockerfile => Dockerfile | 14 +++++++++++--- Dockerfile.arm64 | 1 + src/cli.c | 1 + src/cli.h | 1 + src/main.c | 5 ++++- src/static/search.html | 2 +- src/static/stats.html | 2 +- src/web/serve.c | 2 +- 12 files changed, 51 insertions(+), 47 deletions(-) create mode 100644 .dockerignore delete mode 100644 DockerArm64/Dockerfile delete mode 100755 DockerArm64/build.sh rename Docker/Dockerfile => Dockerfile (76%) create mode 100644 Dockerfile.arm64 diff --git a/.dockerignore b/.dockerignore new file mode 100644 index 0000000..571043a --- /dev/null +++ b/.dockerignore @@ -0,0 +1,23 @@ +.idea +*/thumbs +*.cbp +CMakeCache.txt +CMakeFiles +cmake-build-debug +cmake_install.cmake +Makefile +*.out +LOG +sist2* +index.sist2/ +bundle*.css +bundle.js +**/*.a +**/vgcore.* +build/ +.git/ +third-party/libscan/libscan-test-files/ +**/ext_ffmpeg +**/ext_libmobi +**/scan_a_test +Dockerfile \ No newline at end of file diff --git a/CMakeLists.txt b/CMakeLists.txt index 5a02c37..34516e6 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -5,7 +5,7 @@ project(sist2 C) option(SIST_DEBUG "Build a debug executable" on) -set(BUILD_TESTS off) +set(BUILD_TESTS on) add_subdirectory(third-party/libscan) set(ARGPARSE_SHARED off) add_subdirectory(third-party/argparse) @@ -36,14 +36,15 @@ add_executable(sist2 target_link_directories(sist2 PRIVATE BEFORE ${_VCPKG_INSTALLED_DIR}/${VCPKG_TARGET_TRIPLET}/lib/) set(CMAKE_FIND_LIBRARY_SUFFIXES .a .lib) +find_package(PkgConfig REQUIRED) + +pkg_search_module(GLIB REQUIRED glib-2.0) + find_package(lmdb CONFIG REQUIRED) find_package(cJSON CONFIG REQUIRED) -find_package(unofficial-glib CONFIG REQUIRED) find_package(unofficial-mongoose CONFIG REQUIRED) find_package(CURL CONFIG REQUIRED) -#find_package(OpenSSL REQUIRED) - target_include_directories( sist2 PUBLIC @@ -51,6 +52,7 @@ target_include_directories( ${CMAKE_SOURCE_DIR}/third-party/utf8.h/ ${CMAKE_SOURCE_DIR}/third-party/libscan/ ${CMAKE_SOURCE_DIR}/ + ${GLIB_INCLUDE_DIRS} ) target_compile_options( @@ -103,7 +105,7 @@ target_link_libraries( lmdb cjson argparse - unofficial::glib::glib + ${GLIB_LDFLAGS} unofficial::mongoose::mongoose CURL::libcurl diff --git a/DockerArm64/Dockerfile b/DockerArm64/Dockerfile deleted file mode 100644 index bb910ec..0000000 --- a/DockerArm64/Dockerfile +++ /dev/null @@ -1,22 +0,0 @@ -FROM ubuntu:19.10 -MAINTAINER simon987 - -RUN apt update -RUN apt install -y libglib2.0-0 libcurl4 libmagic1 libharfbuzz-bin libopenjp2-7 libarchive13 liblzma5 libzstd1 liblz4-1 \ - curl libtiff5 libpng16-16 libpcre3 - -RUN mkdir -p /usr/share/tessdata && \ - cd /usr/share/tessdata/ && \ - curl -o /usr/share/tessdata/hin.traineddata https://raw.githubusercontent.com/tesseract-ocr/tessdata/master/hin.traineddata &&\ - curl -o /usr/share/tessdata/jpn.traineddata https://raw.githubusercontent.com/tesseract-ocr/tessdata/master/jpn.traineddata &&\ - curl -o /usr/share/tessdata/eng.traineddata https://raw.githubusercontent.com/tesseract-ocr/tessdata/master/eng.traineddata &&\ - curl -o /usr/share/tessdata/fra.traineddata https://raw.githubusercontent.com/tesseract-ocr/tessdata/master/fra.traineddata &&\ - curl -o /usr/share/tessdata/rus.traineddata https://raw.githubusercontent.com/tesseract-ocr/tessdata/master/rus.traineddata &&\ - curl -o /usr/share/tessdata/spa.traineddata https://raw.githubusercontent.com/tesseract-ocr/tessdata/master/spa.traineddata && ls -lh - -ADD sist2_arm64 /root/sist2 - -ENV LANG C.UTF-8 -ENV LC_ALL C.UTF-8 - -ENTRYPOINT ["/root/sist2"] diff --git a/DockerArm64/build.sh b/DockerArm64/build.sh deleted file mode 100755 index 55d30ab..0000000 --- a/DockerArm64/build.sh +++ /dev/null @@ -1,13 +0,0 @@ -rm ./sist2_arm64 -cp ../sist2_arm64.gz . -gzip -d sist2_arm64.gz - -version=$(./sist2_arm64 --version) - -echo "Version ${version}" -docker build . -t simon987/sist2-arm64:"${version}" -t simon987/sist2-arm64:latest - -docker push simon987/sist2-arm64:"${version}" -docker push simon987/sist2-arm64:latest - -docker run --rm simon987/sist2-arm64 -v \ No newline at end of file diff --git a/Docker/Dockerfile b/Dockerfile similarity index 76% rename from Docker/Dockerfile rename to Dockerfile index 08f6e12..4b525cd 100644 --- a/Docker/Dockerfile +++ b/Dockerfile @@ -1,6 +1,14 @@ -FROM ubuntu:20.04 +FROM simon987/sist2-build as build MAINTAINER simon987 +WORKDIR /build/ +ADD . /build/ +RUN cmake -DSIST_DEBUG=off -DBUILD_TESTS=off -DCMAKE_TOOLCHAIN_FILE=/vcpkg/scripts/buildsystems/vcpkg.cmake . +RUN make -j$(nproc) +RUN strip sist2 + +FROM ubuntu:20.10 + RUN apt update RUN apt install -y libglib2.0-0 libcurl4 libmagic1 libharfbuzz-bin libopenjp2-7 libarchive13 liblzma5 libzstd1 liblz4-1 \ curl libtiff5 libpng16-16 libpcre3 @@ -12,9 +20,9 @@ RUN mkdir -p /usr/share/tessdata && \ curl -o /usr/share/tessdata/eng.traineddata https://raw.githubusercontent.com/tesseract-ocr/tessdata/master/eng.traineddata &&\ curl -o /usr/share/tessdata/fra.traineddata https://raw.githubusercontent.com/tesseract-ocr/tessdata/master/fra.traineddata &&\ curl -o /usr/share/tessdata/rus.traineddata https://raw.githubusercontent.com/tesseract-ocr/tessdata/master/rus.traineddata &&\ - curl -o /usr/share/tessdata/spa.traineddata https://raw.githubusercontent.com/tesseract-ocr/tessdata/master/spa.traineddata && ls -lh + curl -o /usr/share/tessdata/spa.traineddata https://raw.githubusercontent.com/tesseract-ocr/tessdata/master/spa.traineddata -ADD sist2 /root/sist2 +COPY --from=build /build/sist2 /root/sist2 ENV LANG C.UTF-8 ENV LC_ALL C.UTF-8 diff --git a/Dockerfile.arm64 b/Dockerfile.arm64 new file mode 100644 index 0000000..f87f5c1 --- /dev/null +++ b/Dockerfile.arm64 @@ -0,0 +1 @@ +# TODO \ No newline at end of file diff --git a/src/cli.c b/src/cli.c index 1cd8f2c..2aa3c54 100644 --- a/src/cli.c +++ b/src/cli.c @@ -227,6 +227,7 @@ int scan_args_validate(scan_args_t *args, int argc, const char **argv) { LOG_DEBUGF("cli.c", "arg depth=%d", args->depth) LOG_DEBUGF("cli.c", "arg path=%s", args->path) LOG_DEBUGF("cli.c", "arg archive=%s", args->archive) + LOG_DEBUGF("cli.c", "arg archive_passphrase=%s", args->archive_passphrase) LOG_DEBUGF("cli.c", "arg tesseract_lang=%s", args->tesseract_lang) LOG_DEBUGF("cli.c", "arg tesseract_path=%s", args->tesseract_path) LOG_DEBUGF("cli.c", "arg exclude=%s", args->exclude_regex) diff --git a/src/cli.h b/src/cli.h index 90242aa..085807b 100644 --- a/src/cli.h +++ b/src/cli.h @@ -18,6 +18,7 @@ typedef struct scan_args { char *path; char *archive; archive_mode_t archive_mode; + char *archive_passphrase; char *tesseract_lang; const char *tesseract_path; char *exclude_regex; diff --git a/src/main.c b/src/main.c index 54d8b14..b3e2bdb 100644 --- a/src/main.c +++ b/src/main.c @@ -21,7 +21,7 @@ #define EPILOG "Made by simon987 . Released under GPL-3.0" -static const char *const Version = "2.9.0"; +static const char *const Version = "2.9.1"; static const char *const usage[] = { "sist2 scan [OPTION]... PATH", "sist2 index [OPTION]... INDEX", @@ -99,6 +99,7 @@ void initialize_scan_context(scan_args_t *args) { ScanCtx.arc_ctx.log = _log; ScanCtx.arc_ctx.logf = _logf; ScanCtx.arc_ctx.parse = (parse_callback_t) parse; + memset(ScanCtx.arc_ctx.passphrase, 0, sizeof(ScanCtx.arc_ctx.passphrase)); // Comic ScanCtx.comic_ctx.log = _log; @@ -440,6 +441,8 @@ int main(int argc, const char *argv[]) { OPT_STRING(0, "archive", &scan_args->archive, "Archive file mode (skip|list|shallow|recurse). " "skip: Don't parse, list: only get file names as text, " "shallow: Don't parse archives inside archives. DEFAULT: recurse"), + OPT_STRING(0, "archive-passphrase", &scan_args->archive_passphrase, "Passphrase for encrypted archive files"), + OPT_STRING(0, "ocr", &scan_args->tesseract_lang, "Tesseract language (use tesseract --list-langs to see " "which are installed on your machine)"), OPT_STRING('e', "exclude", &scan_args->exclude_regex, "Files that match this regex will not be scanned"), diff --git a/src/static/search.html b/src/static/search.html index d55a765..b675a76 100644 --- a/src/static/search.html +++ b/src/static/search.html @@ -12,7 +12,7 @@