Rework user scripts, update DB schema to support embeddings

This commit is contained in:
2023-08-19 15:46:19 -04:00
parent 27188b6fa0
commit 857f3315c2
62 changed files with 1842 additions and 1250 deletions

View File

@@ -36,9 +36,52 @@ static struct mg_http_serve_opts IndexServeOpts = {
.ssi_pattern = NULL,
.root_dir = NULL,
.mime_types = "",
.extra_headers = HTTP_SERVER_HEADER "Cross-Origin-Embedder-Policy: require-corp\r\nCross-Origin-Opener-Policy: same-origin\r\n"
.extra_headers = HTTP_SERVER_HEADER HTTP_CROSS_ORIGIN_HEADERS
};
void get_embedding(struct mg_connection *nc, struct mg_http_message *hm) {
if (WebCtx.search_backend == ES_SEARCH_BACKEND && WebCtx.es_version != NULL && !HAS_KNN(WebCtx.es_version)) {
LOG_WARNINGF("serve.c",
"Your Elasticsearch version (%d.%d.%d) does not support approximate kNN search and will"
" fallback to a brute-force search. Please install ES 8.x.x+ for better search performance.",
WebCtx.es_version->major, WebCtx.es_version->minor, WebCtx.es_version->patch);
}
if (hm->uri.len != SIST_INDEX_ID_LEN + SIST_DOC_ID_LEN + 2 + 4) {
LOG_DEBUGF("serve.c", "Invalid thumbnail path: %.*s", (int) hm->uri.len, hm->uri.ptr);
HTTP_REPLY_NOT_FOUND
return;
}
char doc_id[SIST_DOC_ID_LEN];
char index_id[SIST_INDEX_ID_LEN];
memcpy(index_id, hm->uri.ptr + 3, SIST_INDEX_ID_LEN);
*(index_id + SIST_INDEX_ID_LEN - 1) = '\0';
memcpy(doc_id, hm->uri.ptr + 3 + SIST_INDEX_ID_LEN, SIST_DOC_ID_LEN);
*(doc_id + SIST_DOC_ID_LEN - 1) = '\0';
int model_id = (int) strtol(hm->uri.ptr + SIST_INDEX_ID_LEN + SIST_DOC_ID_LEN + 3, NULL, 10);
database_t *db = web_get_database(index_id);
if (db == NULL) {
LOG_DEBUGF("serve.c", "Could not get database for index: %s", index_id);
HTTP_REPLY_NOT_FOUND
return;
}
cJSON *json = database_get_embedding(db, doc_id, model_id);
if (json == NULL) {
HTTP_REPLY_NOT_FOUND
return;
}
mg_send_json(nc, json);
cJSON_Delete(json);
}
void stats_files(struct mg_connection *nc, struct mg_http_message *hm) {
if (hm->uri.len != SIST_INDEX_ID_LEN + 7) {
@@ -316,6 +359,7 @@ void index_info(struct mg_connection *nc) {
cJSON_AddBoolToObject(json, "esVersionSupported", IS_SUPPORTED_ES_VERSION(WebCtx.es_version));
cJSON_AddBoolToObject(json, "esVersionLegacy", IS_LEGACY_VERSION(WebCtx.es_version));
cJSON_AddBoolToObject(json, "esVersionHasKnn", HAS_KNN(WebCtx.es_version));
cJSON_AddStringToObject(json, "lang", WebCtx.lang);
cJSON_AddBoolToObject(json, "auth0Enabled", WebCtx.auth0_enabled);
@@ -708,6 +752,9 @@ static void ev_router(struct mg_connection *nc, int ev, void *ev_data, UNUSED(vo
return;
}
tag(nc, hm);
} else if (mg_http_match_uri(hm, "/e/*/*/*")) {
get_embedding(nc, hm);
return;
} else {
HTTP_REPLY_NOT_FOUND
}

View File

@@ -262,8 +262,8 @@ fts_search_req_t *get_search_req(struct mg_http_message *hm) {
: DEFAULT_HIGHLIGHT_CONTEXT_SIZE;
req->model = req_model.val ? req_model.val->valueint : 0;
req->embedding = req_model.val
? get_float_buffer(req_embedding.val, &req->embedding_size)
: NULL;
? get_float_buffer(req_embedding.val, &req->embedding_size)
: NULL;
cJSON_Delete(json);

View File

@@ -3,7 +3,7 @@
void web_serve_asset_index_html(struct mg_connection *nc) {
web_send_headers(nc, 200, sizeof(index_html), "Content-Type: text/html");
web_send_headers(nc, 200, sizeof(index_html), HTTP_CROSS_ORIGIN_HEADERS "Content-Type: text/html");
mg_send(nc, index_html, sizeof(index_html));
}

View File

@@ -7,6 +7,8 @@
#include <mongoose.h>
#define HTTP_SERVER_HEADER "Server: sist2/" VERSION "\r\n"
// See https://web.dev/coop-coep/
#define HTTP_CROSS_ORIGIN_HEADERS "Cross-Origin-Embedder-Policy: require-corp\r\nCross-Origin-Opener-Policy: same-origin\r\n"
index_t *web_get_index_by_id(const char *index_id);