SQLite search backend

This commit is contained in:
2023-05-18 14:16:11 -04:00
parent 1cfceba518
commit 944c224904
46 changed files with 3261 additions and 1296 deletions

View File

@@ -432,7 +432,6 @@ int sqlite_index_args_validate(sqlite_index_args_t *args, int argc, const char *
LOG_DEBUGF("cli.c", "arg index_path=%s", args->index_path);
LOG_DEBUGF("cli.c", "arg search_index_path=%s", args->search_index_path);
LOG_DEBUGF("cli.c", "arg optimize_index=%d", args->optimize_database);
return 0;
}
@@ -446,6 +445,16 @@ int web_args_validate(web_args_t *args, int argc, const char **argv) {
return 1;
}
if (args->search_index_path != NULL && args->es_url != NULL) {
LOG_FATAL("cli.c", "--search-index and --es-url arguments are mutually exclusive.");
}
if (args->search_index_path != NULL && args->es_index != NULL) {
LOG_FATAL("cli.c", "--search-index and --es-index arguments are mutually exclusive.");
}
if (args->search_index_path != NULL && args->es_insecure_ssl == TRUE) {
LOG_FATAL("cli.c", "--search-index and --es-insecure_ssl arguments are mutually exclusive.");
}
if (args->es_url == NULL) {
args->es_url = DEFAULT_ES_URL;
}
@@ -558,9 +567,25 @@ int web_args_validate(web_args_t *args, int argc, const char **argv) {
free(abs_path);
}
if (args->search_index_path != NULL) {
char *abs_path = abspath(args->search_index_path);
if (abs_path == NULL) {
LOG_FATALF("cli.c", "Search index not found: %s", args->search_index_path);
}
args->es_index = NULL;
args->es_url = NULL;
args->es_insecure_ssl = FALSE;
args->search_backend = SQLITE_SEARCH_BACKEND;
} else {
args->search_backend = ES_SEARCH_BACKEND;
}
LOG_DEBUGF("cli.c", "arg es_url=%s", args->es_url);
LOG_DEBUGF("cli.c", "arg es_index=%s", args->es_index);
LOG_DEBUGF("cli.c", "arg es_insecure_ssl=%d", args->es_insecure_ssl);
LOG_DEBUGF("cli.c", "arg search_index_path=%s", args->search_index_path);
LOG_DEBUGF("cli.c", "arg search_backend=%d", args->search_backend);
LOG_DEBUGF("cli.c", "arg tagline=%s", args->tagline);
LOG_DEBUGF("cli.c", "arg dev=%d", args->dev);
LOG_DEBUGF("cli.c", "arg listen=%s", args->listen_address);

View File

@@ -69,13 +69,18 @@ typedef struct index_args {
typedef struct {
char *index_path;
char *search_index_path;
int optimize_database;
} sqlite_index_args_t;
typedef enum {
ES_SEARCH_BACKEND,
SQLITE_SEARCH_BACKEND,
} search_backend_t;
typedef struct web_args {
char *es_url;
char *es_index;
int es_insecure_ssl;
char *search_index_path;
char *listen_address;
char *credentials;
char *tag_credentials;
@@ -94,6 +99,7 @@ typedef struct web_args {
int index_count;
int dev;
const char **indices;
search_backend_t search_backend;
} web_args_t;
typedef struct exec_args {

View File

@@ -76,6 +76,7 @@ typedef struct {
char *es_url;
es_version_t *es_version;
char *es_index;
database_t *search_db;
int es_insecure_ssl;
int index_count;
char *auth_user;
@@ -93,6 +94,7 @@ typedef struct {
struct index_t indices[256];
char lang[10];
int dev;
int search_backend;
} WebCtx_t;

View File

@@ -14,12 +14,45 @@ database_t *database_create(const char *filename, database_type_t type) {
strcpy(db->filename, filename);
db->type = type;
db->select_thumbnail_stmt = NULL;
db->db = NULL;
db->tag_array = NULL;
db->ipc_ctx = NULL;
return db;
}
int tag_matches(const char *query, const char *tag) {
size_t query_len = strlen(query);
size_t tag_len = strlen(tag);
if (query_len >= tag_len) {
return FALSE;
}
return strncmp(tag, query, query_len) == 0 && *(tag + query_len) == '.';
}
void tag_matches_func(sqlite3_context *ctx, int argc, sqlite3_value **argv) {
if (argc != 1 || sqlite3_value_type(argv[0]) != SQLITE_TEXT) {
sqlite3_result_error(ctx, "Invalid parameters", -1);
}
const char *tag = (const char *) sqlite3_value_text(argv[0]);
char **tags = *(char ***) sqlite3_user_data(ctx);
array_foreach(tags) {
if (tag_matches(tags[i], tag)) {
sqlite3_result_int(ctx, TRUE);
return;
}
}
sqlite3_result_int(ctx, FALSE);
}
__always_inline
static int sep_rfind(const char *str) {
for (int i = (int) strlen(str); i >= 0; i--) {
@@ -48,6 +81,24 @@ void path_parent_func(sqlite3_context *ctx, int argc, sqlite3_value **argv) {
sqlite3_result_text(ctx, parent, stop, SQLITE_TRANSIENT);
}
void random_func(sqlite3_context *ctx, int argc, UNUSED(sqlite3_value **argv)) {
if (argc != 1 || sqlite3_value_type(argv[0]) != SQLITE_INTEGER) {
sqlite3_result_error(ctx, "Invalid parameters", -1);
}
char state_buf[128] = {0,};
struct random_data buf;
int result;
long seed = sqlite3_value_int64(argv[0]);
initstate_r((int) seed, state_buf, sizeof(state_buf), &buf);
random_r(&buf, &result);
sqlite3_result_int(ctx, result);
}
void save_current_job_info(sqlite3_context *ctx, int argc, sqlite3_value **argv) {
if (argc != 1 || sqlite3_value_type(argv[0]) != SQLITE_TEXT) {
@@ -87,7 +138,8 @@ void database_open(database_t *db) {
CRASH_IF_NOT_SQLITE_OK(sqlite3_open(db->filename, &db->db));
sqlite3_busy_timeout(db->db, 1000);
CRASH_IF_NOT_SQLITE_OK(sqlite3_exec(db->db, "PRAGMA cache_size = -200000;", NULL, NULL, NULL));
// TODO: Optional argument?
// CRASH_IF_NOT_SQLITE_OK(sqlite3_exec(db->db, "PRAGMA cache_size = -200000;", NULL, NULL, NULL));
CRASH_IF_NOT_SQLITE_OK(sqlite3_exec(db->db, "PRAGMA synchronous = OFF;", NULL, NULL, NULL));
if (db->type == INDEX_DATABASE) {
@@ -119,6 +171,10 @@ void database_open(database_t *db) {
-1,
&db->write_thumbnail_stmt, NULL));
CRASH_IF_NOT_SQLITE_OK(sqlite3_prepare_v2(
db->db, "SELECT json_data FROM document WHERE id=?", -1,
&db->get_document, NULL));
// Create functions
sqlite3_create_function(
db->db,
@@ -170,6 +226,61 @@ void database_open(database_t *db) {
db->db, "INSERT INTO index_job (doc_id,type,line) VALUES (?,?,?);", -1,
&db->insert_index_job_stmt, NULL));
} else if (db->type == FTS_DATABASE) {
CRASH_IF_NOT_SQLITE_OK(sqlite3_prepare_v2(
db->db, "SELECT path, count FROM path_index"
" WHERE index_id=? AND depth BETWEEN ? AND ?"
" LIMIT 65536", -1,
&db->fts_search_paths, NULL));
CRASH_IF_NOT_SQLITE_OK(sqlite3_prepare_v2(
db->db, "SELECT json_data FROM document_index"
" WHERE id=?", -1,
&db->fts_get_document, NULL));
CRASH_IF_NOT_SQLITE_OK(sqlite3_prepare_v2(
db->db, "SELECT DISTINCT tag FROM tag"
" WHERE tag GLOB (? || '*') ORDER BY tag LIMIT 100", -1,
&db->fts_suggest_tag, NULL));
CRASH_IF_NOT_SQLITE_OK(sqlite3_prepare_v2(
db->db, "SELECT tag, count(*) FROM tag GROUP BY tag", -1,
&db->fts_get_tags, NULL));
CRASH_IF_NOT_SQLITE_OK(sqlite3_prepare_v2(
db->db, "SELECT path, count FROM path_index"
" WHERE (index_id=?1 OR ?1 IS NULL) AND depth BETWEEN ? AND ?"
" AND (path = ?4 or path GLOB ?5)"
" LIMIT 65536", -1,
&db->fts_search_paths_w_prefix, NULL));
CRASH_IF_NOT_SQLITE_OK(sqlite3_prepare_v2(
db->db, "SELECT path, count FROM path_index"
" WHERE depth BETWEEN ? AND ?"
" AND path GLOB ?"
" LIMIT 65536", -1,
&db->fts_suggest_paths, NULL));
CRASH_IF_NOT_SQLITE_OK(sqlite3_prepare_v2(
db->db, "SELECT * FROM stats", -1,
&db->fts_date_range, NULL));
CRASH_IF_NOT_SQLITE_OK(sqlite3_prepare_v2(
db->db, "SELECT mime, sum(count) FROM mime_index WHERE mime is not NULL GROUP BY mime", -1,
&db->fts_get_mimetypes, NULL));
sqlite3_create_function(
db->db,
"random_seeded",
1,
SQLITE_UTF8,
NULL,
random_func,
NULL,
NULL
);
sqlite3_create_function(
db->db,
"path_parent",
@@ -180,8 +291,33 @@ void database_open(database_t *db) {
NULL,
NULL
);
sqlite3_create_function(
db->db,
"tag_matches",
1,
SQLITE_UTF8,
&db->tag_array,
tag_matches_func,
NULL,
NULL
);
}
if (db->type == FTS_DATABASE || db->type == INDEX_DATABASE) {
// Tag table is the same schema for FTS database & index database
CRASH_IF_NOT_SQLITE_OK(sqlite3_prepare_v2(
db->db,
"INSERT INTO tag (id, tag) VALUES (?,?) ON CONFLICT DO NOTHING;",
-1,
&db->write_tag_stmt, NULL));
CRASH_IF_NOT_SQLITE_OK(sqlite3_prepare_v2(
db->db,
"DELETE FROM tag WHERE id=? AND tag=?;",
-1,
&db->delete_tag_stmt, NULL));
}
}
void database_close(database_t *db, int optimize) {
@@ -193,7 +329,9 @@ void database_close(database_t *db, int optimize) {
CRASH_IF_NOT_SQLITE_OK(sqlite3_exec(db->db, "PRAGMA optimize;", NULL, NULL, NULL));
}
sqlite3_close(db->db);
if (db->db) {
sqlite3_close(db->db);
}
if (db->type == IPC_PRODUCER_DATABASE) {
remove(db->filename);
@@ -622,3 +760,39 @@ void database_add_work(database_t *db, job_t *job) {
pthread_cond_signal(&db->ipc_ctx->has_work_cond);
pthread_mutex_unlock(&db->ipc_ctx->mutex);
}
void database_write_tag(database_t *db, char *doc_id, char *tag) {
sqlite3_bind_text(db->write_tag_stmt, 1, doc_id, -1, SQLITE_STATIC);
sqlite3_bind_text(db->write_tag_stmt, 2, tag, -1, SQLITE_STATIC);
CRASH_IF_STMT_FAIL(sqlite3_step(db->write_tag_stmt));
CRASH_IF_NOT_SQLITE_OK(sqlite3_reset(db->write_tag_stmt));
}
void database_delete_tag(database_t *db, char *doc_id, char *tag) {
sqlite3_bind_text(db->delete_tag_stmt, 1, doc_id, -1, SQLITE_STATIC);
sqlite3_bind_text(db->delete_tag_stmt, 2, tag, -1, SQLITE_STATIC);
CRASH_IF_STMT_FAIL(sqlite3_step(db->delete_tag_stmt));
CRASH_IF_NOT_SQLITE_OK(sqlite3_reset(db->delete_tag_stmt));
}
cJSON *database_get_document(database_t *db, char *doc_id) {
sqlite3_bind_text(db->get_document, 1, doc_id, -1, SQLITE_STATIC);
int ret = sqlite3_step(db->get_document);
CRASH_IF_STMT_FAIL(ret);
cJSON *json;
if (ret == SQLITE_ROW) {
const char *json_str = sqlite3_column_text(db->get_document, 0);
json = cJSON_Parse(json_str);
} else {
json = NULL;
}
CRASH_IF_NOT_SQLITE_OK(sqlite3_reset(db->get_document));
return json;
}

View File

@@ -33,6 +33,16 @@ typedef enum {
JOB_PARSE_JOB
} job_type_t;
typedef enum {
FTS_SORT_INVALID,
FTS_SORT_SCORE,
FTS_SORT_SIZE,
FTS_SORT_MTIME,
FTS_SORT_RANDOM,
FTS_SORT_NAME,
FTS_SORT_ID,
} fts_sort_t;
typedef struct {
job_type_t type;
union {
@@ -53,6 +63,11 @@ typedef struct {
char current_job[MAX_THREADS][PATH_MAX * 2];
} database_ipc_ctx_t;
typedef struct {
double date_min;
double date_max;
} database_summary_stats_t;
typedef struct database {
char filename[PATH_MAX];
database_type_t type;
@@ -67,12 +82,27 @@ typedef struct database {
sqlite3_stmt *write_document_stmt;
sqlite3_stmt *write_document_sidecar_stmt;
sqlite3_stmt *write_thumbnail_stmt;
sqlite3_stmt *get_document;
sqlite3_stmt *delete_tag_stmt;
sqlite3_stmt *write_tag_stmt;
sqlite3_stmt *insert_parse_job_stmt;
sqlite3_stmt *insert_index_job_stmt;
sqlite3_stmt *pop_parse_job_stmt;
sqlite3_stmt *pop_index_job_stmt;
sqlite3_stmt *fts_search_paths;
sqlite3_stmt *fts_search_paths_w_prefix;
sqlite3_stmt *fts_suggest_paths;
sqlite3_stmt *fts_date_range;
sqlite3_stmt *fts_get_mimetypes;
sqlite3_stmt *fts_get_document;
sqlite3_stmt *fts_suggest_tag;
sqlite3_stmt *fts_get_tags;
char **tag_array;
database_ipc_ctx_t *ipc_ctx;
} database_t;
@@ -134,7 +164,7 @@ database_iterator_t *database_create_treemap_iterator(database_t *db, long thres
treemap_row_t database_treemap_iter(database_iterator_t *iter);
#define database_treemap_iter_foreach(element, iter) \
for (treemap_row_t element = database_treemap_iter(iter); element.path != NULL; element = database_treemap_iter(iter))
for (treemap_row_t element = database_treemap_iter(iter); (element).path != NULL; (element) = database_treemap_iter(iter))
void database_generate_stats(database_t *db, double treemap_threshold);
@@ -145,14 +175,12 @@ job_t *database_get_work(database_t *db, job_type_t job_type);
void database_add_work(database_t *db, job_t *job);
//void database_index(database_t *db);
cJSON *database_get_stats(database_t *db, database_stat_type_d type);
#define CRASH_IF_STMT_FAIL(x) do { \
int return_value = x; \
if (return_value != SQLITE_DONE && return_value != SQLITE_ROW) { \
LOG_FATALF("database.c", "Sqlite error @ database.c:%d : (%d) %s", __LINE__, return_value, sqlite3_errmsg(db->db)); \
LOG_FATALF("database.c", "Sqlite error @ %s:%d : (%d) %s", __BASE_FILE__, __LINE__, return_value, sqlite3_errmsg(db->db)); \
} \
} while (0)
@@ -169,4 +197,33 @@ void database_fts_index(database_t *db);
void database_fts_optimize(database_t *db);
#endif //SIST2_DATABASE_H
cJSON *database_fts_get_paths(database_t *db, const char *index_id, int depth_min, int depth_max, const char *prefix,
int suggest);
cJSON *database_fts_get_mimetypes(database_t *db);
database_summary_stats_t database_fts_get_date_range(database_t *db);
cJSON *database_fts_search(database_t *db, const char *query, const char *path, long size_min,
long size_max, long date_min, long date_max, int page_size,
char **index_ids, char **mime_types, char **tags, int sort_asc,
fts_sort_t sort, int seed, char **after, int fetch_aggregations,
int highlight, int highlight_context_size);
void database_write_tag(database_t *db, char *doc_id, char *tag);
void database_delete_tag(database_t *db, char *doc_id, char *tag);
void database_fts_detach(database_t *db);
cJSON *database_fts_get_document(database_t *db, char *doc_id);
database_summary_stats_t database_fts_sync_tags(database_t *db);
cJSON *database_fts_suggest_tag(database_t *db, char *prefix);
cJSON *database_fts_get_tags(database_t *db);
cJSON *database_get_document(database_t *db, char *doc_id);
#endif

View File

@@ -1,6 +1,13 @@
#include "database.h"
#include "src/ctx.h"
void database_fts_detach(database_t *db) {
CRASH_IF_NOT_SQLITE_OK(sqlite3_exec(
db->db, "DETACH DATABASE fts",
NULL, NULL, NULL
));
}
void database_fts_attach(database_t *db, const char *fts_database_path) {
LOG_DEBUGF("database_fts.c", "Attaching to %s", fts_database_path);
@@ -16,62 +23,137 @@ void database_fts_attach(database_t *db, const char *fts_database_path) {
sqlite3_finalize(stmt);
}
int database_fts_get_max_path_depth(database_t *db) {
sqlite3_stmt *stmt;
CRASH_IF_NOT_SQLITE_OK(sqlite3_prepare_v2(
db->db, "SELECT MAX(depth) FROM path_tmp", -1, &stmt, NULL));
CRASH_IF_STMT_FAIL(sqlite3_step(stmt));
int max_depth = sqlite3_column_int(stmt, 0);
sqlite3_finalize(stmt);
return max_depth;
}
void database_fts_index(database_t *db) {
LOG_INFO("database_fts.c", "Creating content table.");
CRASH_IF_NOT_SQLITE_OK(sqlite3_exec(
db->db,
"WITH docs AS (SELECT document.id as id,\n"
" (SELECT id FROM descriptor) as index_id,\n"
" size,\n"
" document.json_data ->> 'path' as path,\n"
" length(document.json_data->>'path') - length(REPLACE(document.json_data->>'path', '/', '')) as path_depth,\n"
" document.json_data ->> 'mime' as mime,\n"
" mtime,\n"
" CASE\n"
" WHEN sc.json_data IS NULL THEN CASE\n"
" WHEN t.tag IS NULL THEN json_set(\n"
" document.json_data, '$._id',\n"
" document.id, '$.size',\n"
" document.size, '$.mtime',\n"
" document.mtime)\n"
" ELSE json_set(document.json_data, '$._id',\n"
" document.id, '$.size',\n"
" document.size, '$.mtime',\n"
" document.mtime, '$.tag',\n"
" json_group_array(t.tag)) END\n"
" ELSE CASE\n"
" WHEN t.tag IS NULL THEN json_patch(\n"
" json_set(document.json_data, '$._id', document.id, '$.size',\n"
" document.size, '$.mtime', document.mtime),\n"
" sc.json_data)\n"
" ELSE json_set(json_patch(document.json_data, sc.json_data), '$._id',\n"
" document.id, '$.size', document.size, '$.mtime',\n"
" document.mtime, '$.tag',\n"
" json_group_array(t.tag)) END END as json_data\n"
" FROM document\n"
" LEFT JOIN document_sidecar sc ON document.id = sc.id\n"
" LEFT JOIN tag t ON document.id = t.id\n"
" GROUP BY document.id)\n"
"INSERT\n"
"INTO fts.document_index (id, index_id, size, path, path_depth, mtime, mime, json_data)\n"
"SELECT *\n"
"FROM docs\n"
"WHERE true\n"
"on conflict (id, index_id) do update set size=excluded.size,\n"
" mtime=excluded.mtime,\n"
" json_data=excluded.json_data;",
"WITH docs AS ("
" SELECT document.id as id, (SELECT id FROM descriptor) as index_id, size,"
" document.json_data ->> 'name' as name,"
" document.json_data ->> 'path' as path,"
" mtime,"
" document.json_data ->> 'mime' as mime,"
" CASE"
" WHEN sc.json_data IS NULL THEN"
" json_set(document.json_data, "
" '$._id',document.id,"
" '$.size',document.size, "
" '$.mtime',document.mtime)"
" ELSE json_patch("
" json_set(document.json_data,"
" '$._id',document.id,"
" '$.size',document.size,"
" '$.mtime', document.mtime),"
" sc.json_data) END"
" FROM document"
" LEFT JOIN document_sidecar sc ON document.id = sc.id"
" GROUP BY document.id)"
" INSERT"
" INTO fts.document_index (id, index_id, size, name, path, mtime, mime, json_data)"
" SELECT * FROM docs WHERE true"
" on conflict (id, index_id) do update set "
" size=excluded.size, mtime=excluded.mtime, mime=excluded.mime, json_data=excluded.json_data;",
NULL, NULL, NULL));
LOG_DEBUG("database_fts.c", "Deleting old documents.");
CRASH_IF_NOT_SQLITE_OK(sqlite3_exec(
db->db,
"DELETE FROM fts.document_index"
" WHERE id IN (SELECT id FROM delete_list)"
" AND index_id = (SELECT id FROM descriptor);",
NULL, NULL, NULL));
LOG_DEBUG("database_fts.c", "Generating summary stats");
CRASH_IF_NOT_SQLITE_OK(sqlite3_exec(
db->db,
"DELETE FROM fts.stats", NULL, NULL, NULL));
CRASH_IF_NOT_SQLITE_OK(sqlite3_exec(
db->db, "INSERT INTO fts.stats "
"SELECT min(mtime), max(mtime) FROM fts.document_index",
NULL, NULL, NULL));
LOG_DEBUG("database_fts.c", "Generating mime index");
CRASH_IF_NOT_SQLITE_OK(sqlite3_exec(
db->db, "DELETE FROM fts.mime_index;", NULL, NULL, NULL));
CRASH_IF_NOT_SQLITE_OK(sqlite3_exec(
db->db, "INSERT INTO fts.mime_index (index_id, mime, count) "
"SELECT index_id, mime, count(*) FROM fts.document_index GROUP BY index_id, mime",
NULL, NULL, NULL));
LOG_DEBUG("database_fts.c", "Generating path index");
CRASH_IF_NOT_SQLITE_OK(sqlite3_exec(
db->db,
"CREATE TEMP TABLE path_tmp ("
" path TEXT,"
" index_id TEXT,"
" count INTEGER NOT NULL,"
" depth INTEGER NOT NULL,"
" children INTEGER NOT NULL DEFAULT(0),"
" total INTEGER AS (count + children),"
" PRIMARY KEY (path, index_id)"
");", NULL, NULL, NULL));
CRASH_IF_NOT_SQLITE_OK(sqlite3_exec(
db->db,
"INSERT INTO path_tmp (path, index_id, count, depth)"
" SELECT path, index_id, count(*), CASE WHEN length(json_data->>'path') == 0 THEN 0"
" ELSE 1 + length(json_data->>'path') - length(REPLACE(json_data->>'path', '/', ''))"
" END as depth FROM document_index WHERE depth > 0"
" GROUP BY path", NULL, NULL, NULL));
int max_depth = database_fts_get_max_path_depth(db);
for (int i = max_depth; i > 1; i--) {
sqlite3_stmt *stmt;
CRASH_IF_NOT_SQLITE_OK(sqlite3_prepare_v2(
db->db,
"INSERT INTO path_tmp (path, index_id, children, depth, count)"
" SELECT path_parent(path) parent, index_id, (SELECT COALESCE(sum(count), 0) FROM path_tmp WHERE path "
" BETWEEN path_parent(p.path) || '/' AND path_parent(p.path) || '/𘚟' AND index_id = p.index_id) as cnt, depth-1, 0 "
" FROM path_tmp p WHERE depth=? GROUP BY parent"
" ON CONFLICT(path, index_id) DO UPDATE SET children=excluded.children",
-1, &stmt, NULL));
sqlite3_bind_int(stmt, 1, i);
CRASH_IF_STMT_FAIL(sqlite3_step(stmt));
LOG_DEBUGF("database_fts.c", "Path index depth %d (%d)", i, sqlite3_changes(db->db));
}
CRASH_IF_NOT_SQLITE_OK(sqlite3_exec(
db->db,
"DELETE FROM path_index;"
"INSERT INTO path_index (path, index_id, count, depth) SELECT path, index_id, total, depth FROM path_tmp",
NULL, NULL, NULL));
LOG_DEBUG("database_fts.c", "Generating search index.");
CRASH_IF_NOT_SQLITE_OK(sqlite3_exec(
db->db, "INSERT INTO search(search) VALUES ('delete-all')",
NULL, NULL, NULL));
CRASH_IF_NOT_SQLITE_OK(sqlite3_exec(
db->db,
"DELETE\n"
"FROM fts.document_index\n"
"WHERE id IN (SELECT id FROM delete_list)\n"
" AND index_id = (SELECT id FROM descriptor);",
NULL, NULL, NULL
));
"INSERT INTO search(rowid, name, content, title) SELECT id, name, content, title from document_view",
NULL, NULL, NULL));
}
void database_fts_optimize(database_t *db) {
@@ -81,8 +163,642 @@ void database_fts_optimize(database_t *db) {
db->db,
"INSERT INTO search(search) VALUES('optimize');",
NULL, NULL, NULL));
LOG_DEBUG("database_fts.c", "Optimized fts5 table.");
CRASH_IF_NOT_SQLITE_OK(sqlite3_exec(db->db, "PRAGMA fts.optimize;", NULL, NULL, NULL));
LOG_DEBUG("database_fts.c", "optimized indices.");
}
cJSON *database_fts_get_paths(database_t *db, const char *index_id, int depth_min, int depth_max, const char *prefix,
int suggest) {
sqlite3_stmt *stmt;
if (suggest) {
stmt = db->fts_suggest_paths;
sqlite3_bind_int(stmt, 1, depth_min);
sqlite3_bind_int(stmt, 2, depth_max);
if (prefix) {
char *prefix_glob = malloc(strlen(prefix) + 2);
sprintf(prefix_glob, "%s*", prefix);
sqlite3_bind_text(stmt, 3, prefix_glob, -1, SQLITE_TRANSIENT);
free(prefix_glob);
}
} else if (prefix) {
stmt = db->fts_search_paths_w_prefix;
if (index_id) {
sqlite3_bind_text(stmt, 1, index_id, -1, SQLITE_STATIC);
} else {
sqlite3_bind_null(stmt, 1);
}
sqlite3_bind_int(stmt, 2, depth_min);
sqlite3_bind_int(stmt, 3, depth_max);
char *prefix_glob = malloc(strlen(prefix) + 3);
sprintf(prefix_glob, "%s/*", prefix);
sqlite3_bind_text(stmt, 4, prefix, -1, SQLITE_STATIC);
sqlite3_bind_text(stmt, 5, prefix_glob, -1, SQLITE_TRANSIENT);
free(prefix_glob);
} else {
stmt = db->fts_search_paths;
if (index_id) {
sqlite3_bind_text(stmt, 1, index_id, -1, SQLITE_STATIC);
} else {
sqlite3_bind_null(stmt, 1);
}
sqlite3_bind_int(stmt, 2, depth_min);
sqlite3_bind_int(stmt, 3, depth_max);
}
cJSON *json = cJSON_CreateArray();
int ret;
do {
ret = sqlite3_step(stmt);
CRASH_IF_STMT_FAIL(ret);
if (ret == SQLITE_DONE) {
break;
}
cJSON *row = cJSON_CreateObject();
cJSON_AddStringToObject(row, "path", (const char *) sqlite3_column_text(stmt, 0));
cJSON_AddNumberToObject(row, "count", (double) sqlite3_column_int64(stmt, 1));
cJSON_AddItemToArray(json, row);
} while (TRUE);
sqlite3_reset(stmt);
return json;
}
cJSON *database_fts_get_mimetypes(database_t *db) {
cJSON *json = cJSON_CreateArray();
int ret;
do {
ret = sqlite3_step(db->fts_get_mimetypes);
CRASH_IF_STMT_FAIL(ret);
if (ret == SQLITE_DONE) {
break;
}
cJSON *row = cJSON_CreateObject();
cJSON_AddStringToObject(row, "mime", (const char *) sqlite3_column_text(db->fts_get_mimetypes, 0));
cJSON_AddNumberToObject(row, "count", (double) sqlite3_column_int64(db->fts_get_mimetypes, 1));
cJSON_AddItemToArray(json, row);
} while (TRUE);
sqlite3_reset(db->fts_get_mimetypes);
return json;
}
const char *size_where_clause(long size_min, long size_max) {
if (size_min > 0 && size_max > 0) {
return "size BETWEEN @size_min AND @size_max";
} else if (size_min > 0) {
return "size >= @size_min";
} else if (size_max > 0) {
return "size <= @size_max";
}
return NULL;
}
const char *date_where_clause(long date_min, long date_max) {
if (date_min > 0 && date_max > 0) {
return "mtime BETWEEN @date_min AND @date_max";
} else if (date_min > 0) {
return "mtime >= @date_min";
} else if (date_max > 0) {
return "mtime <= @date_max";
}
return NULL;
}
int array_length(char **arr) {
if (arr == NULL) {
return 0;
}
int count = -1;
while (arr[++count] != NULL);
return count;
}
#define INDEX_ID_PARAM_OFFSET (10)
#define MIME_PARAM_OFFSET (INDEX_ID_PARAM_OFFSET + 1000)
char *build_where_clause(const char *path_where, const char *size_where, const char *date_where,
const char *index_id_where, const char *mime_where, const char *query_where,
const char *after_where, const char *tags_where) {
char *where = calloc(
strlen(index_id_where)
+ (query_where ? strlen(query_where) + sizeof(" AND ") : 0)
+ (path_where ? strlen(path_where) + sizeof(" AND ") : 0)
+ (size_where ? strlen(size_where) + sizeof(" AND ") : 0)
+ (date_where ? strlen(date_where) + sizeof(" AND ") : 0)
+ (after_where ? strlen(after_where) + sizeof(" AND ") : 0)
+ (tags_where ? strlen(tags_where) + sizeof(" AND ") : 0)
+ (mime_where ? strlen(mime_where) + sizeof(" AND ") : 0) + 1,
sizeof(char)
);
strcat(where, index_id_where);
if (query_where) {
strcat(where, " AND ");
strcat(where, query_where);
}
if (path_where) {
strcat(where, " AND ");
strcat(where, path_where);
}
if (size_where) {
strcat(where, " AND ");
strcat(where, size_where);
}
if (date_where) {
strcat(where, " AND ");
strcat(where, date_where);
}
if (mime_where) {
strcat(where, " AND ");
strcat(where, mime_where);
}
if (after_where) {
strcat(where, " AND ");
strcat(where, after_where);
}
if (tags_where) {
strcat(where, " AND ");
strcat(where, tags_where);
}
return where;
}
char *index_ids_where_clause(char **index_ids) {
int param_count = array_length(index_ids);
char *clause = malloc(13 + 2 + 6 * param_count);
strcpy(clause, "index_id IN (");
for (int i = 0; i < param_count; i++) {
char param[10];
snprintf(param, sizeof(param), "?%d%s",
INDEX_ID_PARAM_OFFSET + i, i == param_count - 1 ? "" : ",");
strcat(clause, param);
}
strcat(clause, ")");
return clause;
}
char *mime_types_where_clause(char **mime_types) {
int param_count = array_length(mime_types);
if (param_count == 0) {
return NULL;
}
char *clause = malloc(9 + 2 + 6 * param_count);
strcpy(clause, "mime IN (");
for (int i = 0; i < param_count; i++) {
char param[10];
snprintf(param, sizeof(param), "?%d%s",
MIME_PARAM_OFFSET + i, i == param_count - 1 ? "" : ",");
strcat(clause, param);
}
strcat(clause, ")");
return clause;
}
const char *path_where_clause(const char *path) {
if (path == NULL || strlen(path) == 0) {
return NULL;
}
return "(path = @path or path GLOB @path_glob)";
}
const char *get_sort_var(fts_sort_t sort) {
switch (sort) {
case FTS_SORT_SCORE:
// Round to 14 decimal places to avoid precision problems when converting to JSON...
return "round(rank, 14)";
case FTS_SORT_SIZE:
return "size";
case FTS_SORT_MTIME:
return "mtime";
case FTS_SORT_RANDOM:
return "random_seeded(doc.ROWID + ?5)";
case FTS_SORT_NAME:
return "doc.name";
case FTS_SORT_ID:
return "doc.id";
default:
return NULL;
}
}
const char *match_where(const char *query) {
if (query == NULL || strlen(query) == 0) {
return NULL;
} else {
return "search MATCH ?1";
}
}
char *tags_where_clause(char **tags) {
if (tags == NULL) {
return NULL;
}
return "EXISTS (SELECT 1 FROM tag WHERE id=doc.id AND tag_matches(tag))";
}
database_summary_stats_t database_fts_get_date_range(database_t *db) {
int ret = sqlite3_step(db->fts_date_range);
CRASH_IF_STMT_FAIL(ret);
if (ret == SQLITE_DONE) {
return (database_summary_stats_t) {0, 0};
}
database_summary_stats_t stats;
stats.date_min = (double) sqlite3_column_int64(db->fts_date_range, 0);
stats.date_max = (double) sqlite3_column_int64(db->fts_date_range, 1);
sqlite3_reset(db->fts_date_range);
return stats;
}
char *get_after_where(char **after, fts_sort_t sort) {
if (after == NULL) {
return NULL;
}
return "(sort_var, doc.ROWID) > (?3, ?4)";
}
cJSON *database_fts_search(database_t *db, const char *query, const char *path, long size_min,
long size_max, long date_min, long date_max, int page_size,
char **index_ids, char **mime_types, char **tags, int sort_asc,
fts_sort_t sort, int seed, char **after, int fetch_aggregations,
int highlight, int highlight_context_size) {
char path_glob[PATH_MAX * 2];
snprintf(path_glob, sizeof(path_glob), "%s/*", path);
const char *path_where = path_where_clause(path);
const char *size_where = size_where_clause(size_min, size_max);
const char *date_where = date_where_clause(date_min, date_max);
const char *index_id_where = index_ids_where_clause(index_ids);
const char *mime_where = mime_types_where_clause(mime_types);
const char *query_where = match_where(query);
const char *after_where = get_after_where(after, sort);
const char *tags_where = tags_where_clause(tags);
if (!query_where && sort == FTS_SORT_SCORE) {
// If query is NULL, then sort by id instead
sort = FTS_SORT_ID;
}
char *agg_where;
char *where = build_where_clause(path_where, size_where, date_where, index_id_where, mime_where, query_where,
after_where, tags_where);
if (fetch_aggregations) {
agg_where = build_where_clause(path_where, size_where, date_where, index_id_where, mime_where, query_where,
NULL, tags_where);
}
const char *json_object_sql;
if (highlight && query_where != NULL) {
json_object_sql = "json_remove(json_set(doc.json_data,"
"'$.index', doc.index_id,"
"'$._highlight.name', snippet(search, 0, '<mark>', '</mark>', '', ?6),"
"'$._highlight.content', snippet(search, 1, '<mark>', '</mark>', '', ?6)),"
"'$.content')";
} else {
json_object_sql = "json_remove(json_set(doc.json_data,"
"'$.index', doc.index_id),"
"'$.content')";
}
char *sql;
char *agg_sql;
if (query_where) {
asprintf(
&sql,
"SELECT"
" %s, %s as sort_var, doc.ROWID"
" FROM search"
" INNER JOIN document_index doc on doc.ROWID = search.ROWID"
" WHERE %s"
" ORDER BY sort_var%s, doc.ROWID"
" LIMIT ?2",
json_object_sql, get_sort_var(sort),
where,
sort_asc ? "" : "DESC");
if (fetch_aggregations) {
asprintf(&agg_sql,
"SELECT count(*), sum(size)"
" FROM search"
" INNER JOIN document_index doc on doc.ROWID = search.ROWID"
" WHERE search MATCH ?1"
" AND %s", agg_where);
}
} else {
asprintf(
&sql,
"SELECT"
" %s, %s as sort_var, doc.ROWID"
" FROM document_index doc"
" WHERE %s"
" ORDER BY sort_var%s,doc.ROWID"
" LIMIT ?2",
json_object_sql, get_sort_var(sort),
where,
sort_asc ? "" : " DESC");
if (fetch_aggregations) {
asprintf(&agg_sql,
"SELECT count(*), sum(size)"
" FROM document_index doc"
" WHERE %s", agg_where);
}
}
sqlite3_stmt *stmt;
CRASH_IF_NOT_SQLITE_OK(sqlite3_prepare_v2(db->db, sql, -1, &stmt, NULL));
if (query_where) {
sqlite3_bind_text(stmt, 1, query, -1, SQLITE_STATIC);
}
sqlite3_bind_int(stmt, 2, page_size);
if (index_ids) {
array_foreach(index_ids) {
sqlite3_bind_text(stmt, INDEX_ID_PARAM_OFFSET + i, index_ids[i], -1, SQLITE_STATIC);
}
}
if (mime_types) {
array_foreach(mime_types) {
sqlite3_bind_text(stmt, MIME_PARAM_OFFSET + i, mime_types[i], -1, SQLITE_STATIC);
}
}
if (tags) {
db->tag_array = tags;
}
if (size_min > 0) {
sqlite3_bind_int64(stmt, sqlite3_bind_parameter_index(stmt, "@size_min"), size_min);
}
if (size_max > 0) {
sqlite3_bind_int64(stmt, sqlite3_bind_parameter_index(stmt, "@size_max"), size_max);
}
if (date_min > 0) {
sqlite3_bind_int64(stmt, sqlite3_bind_parameter_index(stmt, "@date_min"), date_min);
}
if (date_max > 0) {
sqlite3_bind_int64(stmt, sqlite3_bind_parameter_index(stmt, "@date_max"), date_max);
}
if (path_where) {
sqlite3_bind_text(stmt, sqlite3_bind_parameter_index(stmt, "@path"), path, -1, SQLITE_STATIC);
sqlite3_bind_text(stmt, sqlite3_bind_parameter_index(stmt, "@path_glob"), path_glob, -1, SQLITE_STATIC);
}
if (after_where) {
if (sort == FTS_SORT_NAME || sort == FTS_SORT_ID) {
sqlite3_bind_text(stmt, 3, after[0], -1, SQLITE_STATIC);
} else if (sort == FTS_SORT_SCORE) {
sqlite3_bind_double(stmt, 3, strtod(after[0], NULL));
} else {
sqlite3_bind_int64(stmt, 3, strtol(after[0], NULL, 10));
}
sqlite3_bind_int64(stmt, 4, strtol(after[1], NULL, 10));
}
if (sort == FTS_SORT_RANDOM) {
sqlite3_bind_int(stmt, 5, seed);
}
if (highlight) {
sqlite3_bind_int(stmt, 6, highlight_context_size);
}
cJSON *json = cJSON_CreateObject();
cJSON *hits_hits = cJSON_CreateArray();
int ret;
do {
ret = sqlite3_step(stmt);
if (ret != SQLITE_DONE && ret != SQLITE_ROW) {
break;
}
if (ret == SQLITE_DONE) {
break;
}
const char *json_str = (const char *) sqlite3_column_text(stmt, 0);
cJSON *row = cJSON_CreateObject();
cJSON *source = cJSON_Parse(json_str);
if (highlight) {
cJSON *hl = cJSON_DetachItemFromObject(source, "_highlight");
cJSON_AddItemToObject(row, "highlight", hl);
}
cJSON *id = cJSON_DetachItemFromObject(source, "_id");
cJSON_AddItemToObject(row, "_id", id);
cJSON_AddItemToObject(row, "_source", source);
cJSON *sort_info = cJSON_AddArrayToObject(row, "sort");
cJSON_AddItemToArray(
sort_info,
cJSON_CreateString((char *) sqlite3_column_text(stmt, 1))
);
cJSON_AddItemToArray(
sort_info,
cJSON_CreateString((char *) sqlite3_column_text(stmt, 2))
);
cJSON_AddItemToArray(hits_hits, row);
} while (TRUE);
sqlite3_finalize(stmt);
cJSON *hits = cJSON_AddObjectToObject(json, "hits");
cJSON_AddItemToObject(hits, "hits", hits_hits);
// Aggregations
if (fetch_aggregations) {
sqlite3_stmt *agg_stmt;
CRASH_IF_NOT_SQLITE_OK(sqlite3_prepare_v2(db->db, agg_sql, -1, &agg_stmt, NULL));
if (index_ids) {
array_foreach(index_ids) {
sqlite3_bind_text(agg_stmt, INDEX_ID_PARAM_OFFSET + i, index_ids[i], -1, SQLITE_STATIC);
}
}
if (mime_types) {
array_foreach(mime_types) {
sqlite3_bind_text(agg_stmt, MIME_PARAM_OFFSET + i, mime_types[i], -1, SQLITE_STATIC);
}
}
if (query_where) {
sqlite3_bind_text(agg_stmt, 1, query, -1, SQLITE_STATIC);
}
if (size_min > 0) {
sqlite3_bind_int64(agg_stmt, sqlite3_bind_parameter_index(agg_stmt, "@size_min"), size_min);
}
if (size_max > 0) {
sqlite3_bind_int64(agg_stmt, sqlite3_bind_parameter_index(agg_stmt, "@size_max"), size_max);
}
if (date_min > 0) {
sqlite3_bind_int64(agg_stmt, sqlite3_bind_parameter_index(agg_stmt, "@date_min"), date_min);
}
if (date_max > 0) {
sqlite3_bind_int64(agg_stmt, sqlite3_bind_parameter_index(agg_stmt, "@date_max"), date_max);
}
if (path_where) {
sqlite3_bind_text(agg_stmt, sqlite3_bind_parameter_index(agg_stmt, "@path"), path, -1, SQLITE_STATIC);
sqlite3_bind_text(agg_stmt, sqlite3_bind_parameter_index(agg_stmt, "@path_glob"), path_glob, -1,
SQLITE_STATIC);
}
int agg_ret = sqlite3_step(agg_stmt);
if (agg_ret == SQLITE_ROW) {
cJSON *aggregations = cJSON_AddObjectToObject(json, "aggregations");
cJSON *total_count = cJSON_AddObjectToObject(aggregations, "total_count");
cJSON_AddNumberToObject(total_count, "value", sqlite3_column_double(agg_stmt, 0));
cJSON *total_size = cJSON_AddObjectToObject(aggregations, "total_size");
cJSON_AddNumberToObject(total_size, "value", sqlite3_column_double(agg_stmt, 1));
} else {
cJSON *aggregations = cJSON_AddObjectToObject(json, "aggregations");
cJSON *total_count = cJSON_AddObjectToObject(aggregations, "total_count");
cJSON_AddNumberToObject(total_count, "value", 0);
cJSON *total_size = cJSON_AddObjectToObject(aggregations, "total_size");
cJSON_AddNumberToObject(total_size, "value", 0);
}
sqlite3_finalize(agg_stmt);
}
// Cleanup
if (index_id_where) {
free(index_id_where);
}
if (mime_where) {
free(mime_where);
}
free(where);
free(sql);
if (fetch_aggregations) {
free(agg_where);
free(agg_sql);
}
return json;
}
database_summary_stats_t database_fts_sync_tags(database_t *db) {
LOG_INFO("database_fts.c", "Syncing tags.");
CRASH_IF_NOT_SQLITE_OK(sqlite3_exec(
db->db,
"DELETE FROM fts.tag WHERE"
" (id, tag) NOT IN (SELECT id, tag FROM tag)",
NULL, NULL, NULL));
CRASH_IF_NOT_SQLITE_OK(sqlite3_exec(
db->db,
"INSERT INTO fts.tag (id, tag) "
" SELECT id, tag FROM tag "
" WHERE (id, tag) NOT IN (SELECT * FROM fts.tag)",
NULL, NULL, NULL));
}
cJSON *database_fts_get_document(database_t *db, char *doc_id) {
sqlite3_bind_text(db->fts_get_document, 1, doc_id, -1, NULL);
int ret = sqlite3_step(db->fts_get_document);
cJSON *json = NULL;
if (ret == SQLITE_ROW) {
const char *json_data = (const char *) sqlite3_column_text(db->fts_get_document, 0);
json = cJSON_Parse(json_data);
} else {
CRASH_IF_STMT_FAIL(ret);
}
sqlite3_reset(db->fts_get_document);
return json;
}
cJSON *database_fts_suggest_tag(database_t *db, char *prefix) {
sqlite3_bind_text(db->fts_suggest_tag, 1, prefix, -1, NULL);
cJSON *json = cJSON_CreateArray();
int ret;
do {
ret = sqlite3_step(db->fts_suggest_tag);
CRASH_IF_STMT_FAIL(ret);
if (ret == SQLITE_DONE) {
break;
}
cJSON_AddItemToArray(
json,
cJSON_CreateString((const char *) sqlite3_column_text(db->fts_suggest_tag, 0))
);
} while (TRUE);
sqlite3_reset(db->fts_suggest_tag);
return json;
}
cJSON *database_fts_get_tags(database_t *db) {
cJSON *json = cJSON_CreateArray();
int ret;
do {
ret = sqlite3_step(db->fts_get_tags);
CRASH_IF_STMT_FAIL(ret);
if (ret == SQLITE_DONE) {
break;
}
cJSON *row = cJSON_CreateObject();
cJSON_AddStringToObject(row, "tag", (const char *) sqlite3_column_text(db->fts_get_tags, 0));
cJSON_AddNumberToObject(row, "count", sqlite3_column_int(db->fts_get_tags, 1));
cJSON_AddItemToArray(json, row);
} while (TRUE);
sqlite3_reset(db->fts_get_tags);
return json;
}

View File

@@ -3,43 +3,75 @@ const char *FtsDatabaseSchema =
" id TEXT NOT NULL,"
" index_id TEXT NOT NULL,"
" size INTEGER NOT NULL,"
" name TEXT NOT NULL,"
" path TEXT NOT NULL,"
" path_depth INT NOT NULL,"
" mtime INTEGER NOT NULL,"
" mime TEXT NOT NULL,"
" mime TEXT,"
" json_data TEXT NOT NULL,"
" PRIMARY KEY (id, index_id)"
");"
""
"CREATE VIEW IF NOT EXISTS document_view (rowid, name, content)"
"CREATE TABLE IF NOT EXISTS stats ("
" mtime_min INTEGER,"
" mtime_max INTEGER"
");"
""
"CREATE TABLE IF NOT EXISTS path_index ("
" path TEXT,"
" index_id TEXT,"
" count INTEGER NOT NULL,"
" depth INTEGER NOT NULL,"
" PRIMARY KEY (path, index_id)"
");"
""
"CREATE TABLE IF NOT EXISTS mime_index ("
" index_id TEXT,"
" mime TEXT,"
" count INT,"
" PRIMARY KEY(index_id, mime)"
");"
""
"CREATE TABLE IF NOT EXISTS tag ("
" id TEXT NOT NULL,"
" tag TEXT NOT NULL,"
" PRIMARY KEY (id, tag)"
");"
"CREATE INDEX IF NOT EXISTS tag_tag_idx ON tag(tag);"
"CREATE INDEX IF NOT EXISTS tag_id_idx ON tag(id);"
"CREATE TRIGGER IF NOT EXISTS tag_write_trigger"
" AFTER INSERT ON tag"
" BEGIN"
" UPDATE document_index"
" SET json_data = json_set(json_data, '$.tag', (SELECT json_group_array(tag) FROM tag WHERE id = NEW.id))"
" WHERE id = NEW.id;"
" END;"
""
"CREATE TRIGGER IF NOT EXISTS tag_delete_trigger"
" AFTER DELETE ON tag"
" BEGIN"
" UPDATE document_index"
" SET json_data = json_set(json_data, '$.tag', (SELECT json_group_array(tag) FROM tag WHERE id = OLD.id))"
" WHERE id = OLD.id;"
" END;"
""
"CREATE VIEW IF NOT EXISTS document_view (id, name, content, title)"
" AS"
" SELECT rowid,"
" json_data->>'name',"
" json_data->>'content'"
" json_data->>'content',"
" json_data->>'title'"
" FROM document_index;"
""
"CREATE INDEX IF NOT EXISTS document_index_size_idx ON document_index (size);"
"CREATE INDEX IF NOT EXISTS document_index_mtime_idx ON document_index (mtime);"
"CREATE INDEX IF NOT EXISTS document_index_mime_idx ON document_index (mime);"
"CREATE INDEX IF NOT EXISTS document_index_path_idx ON document_index (path);"
"CREATE INDEX IF NOT EXISTS document_index_path_depth_idx ON document_index (path_depth);"
""
"CREATE VIRTUAL TABLE IF NOT EXISTS search USING fts5 ("
" name,"
" content,"
" content='document_view'"
" name,"
" content,"
" title,"
" content='document_view',"
" content_rowid='id'"
");"
""
"CREATE TRIGGER IF NOT EXISTS on_insert AFTER INSERT ON document_index BEGIN"
" INSERT INTO search(rowid, name, content) VALUES (new.rowid, new.json_data->>'name', new.json_data->>'content');"
"END;"
"CREATE TRIGGER IF NOT EXISTS on_delete AFTER DELETE ON document_index BEGIN"
" INSERT INTO search(search, name, content) VALUES('delete', old.json_data->>'name', old.json_data->>'content');"
"END;"
"CREATE TRIGGER IF NOT EXISTS on_update AFTER UPDATE ON document_index BEGIN"
" INSERT INTO search(search, rowid, name, content) VALUES('delete', old.rowid, old.json_data->>'name', old.json_data->>'content');"
" INSERT INTO search(rowid, name, content) VALUES (new.rowid, new.json_data->>'name', new.json_data->>'content');"
"END;";
// name^8, content^3, title^8
"INSERT INTO search(search, rank) VALUES('rank', 'bm25(8, 3, 8)');"
"";
const char *IpcDatabaseSchema =
"CREATE TABLE parse_job ("
@@ -78,7 +110,8 @@ const char *IndexDatabaseSchema =
""
"CREATE TABLE tag ("
" id TEXT NOT NULL,"
" tag TEXT NOT NULL"
" tag TEXT NOT NULL,"
" PRIMARY KEY (id, tag)"
");"
""
"CREATE TABLE document_sidecar ("

View File

@@ -238,5 +238,7 @@ cJSON *database_get_stats(database_t *db, database_stat_type_d type) {
cJSON_AddItemToArray(json, row);
} while (TRUE);
sqlite3_finalize(stmt);
return json;
}

View File

@@ -92,8 +92,8 @@ void index_json(cJSON *document, const char doc_id[SIST_DOC_ID_LEN]) {
cJSON_free(json);
tpool_add_work(IndexCtx.pool, &(job_t) {
.type = JOB_BULK_LINE,
.bulk_line = bulk_line,
.type = JOB_BULK_LINE,
.bulk_line = bulk_line,
});
free(bulk_line);
}
@@ -606,4 +606,4 @@ char *elastic_get_status() {
free_response(r);
cJSON_Delete(json);
return status;
}
}

View File

@@ -362,11 +362,10 @@ void sist2_sqlite_index(sqlite_index_args_t *args) {
database_fts_attach(db, args->search_index_path);
database_fts_index(db);
if (args->optimize_database) {
database_fts_optimize(db);
}
database_fts_optimize(db);
database_close(db, FALSE);
database_close(search_db, FALSE);
}
void sist2_exec_script(exec_args_t *args) {
@@ -391,6 +390,7 @@ void sist2_exec_script(exec_args_t *args) {
void sist2_web(web_args_t *args) {
WebCtx.es_url = args->es_url;
WebCtx.search_backend = args->search_backend;
WebCtx.es_index = args->es_index;
WebCtx.es_insecure_ssl = args->es_insecure_ssl;
WebCtx.index_count = args->index_count;
@@ -407,15 +407,27 @@ void sist2_web(web_args_t *args) {
WebCtx.auth0_audience = args->auth0_audience;
strcpy(WebCtx.lang, args->lang);
if (args->search_backend == SQLITE_SEARCH_BACKEND) {
WebCtx.search_db = database_create(args->search_index_path, FTS_DATABASE);
database_open(WebCtx.search_db);
}
for (int i = 0; i < args->index_count; i++) {
char *abs_path = abspath(args->indices[i]);
strcpy(WebCtx.indices[i].path, abs_path);
WebCtx.indices[i].db = database_create(abs_path, INDEX_DATABASE);
database_open(WebCtx.indices[i].db);
database_t *db = database_create(abs_path, INDEX_DATABASE);
database_open(db);
if (WebCtx.search_backend == SQLITE_SEARCH_BACKEND) {
database_fts_attach(db, args->search_index_path);
database_fts_sync_tags(db);
database_fts_detach(db);
}
index_descriptor_t *desc = database_read_index_descriptor(WebCtx.indices[i].db);
WebCtx.indices[i].db = db;
index_descriptor_t *desc = database_read_index_descriptor(db);
WebCtx.indices[i].desc = *desc;
free(desc);
@@ -465,6 +477,7 @@ int main(int argc, const char *argv[]) {
int common_async_script = 0;
int common_threads = 0;
int common_optimize_database = 0;
char *common_search_index = NULL;
struct argparse_option options[] = {
OPT_HELP(),
@@ -541,14 +554,14 @@ int main(int argc, const char *argv[]) {
OPT_BOOLEAN('f', "force-reset", &index_args->force_reset, "Reset Elasticsearch mappings and settings."),
OPT_GROUP("sqlite-index options"),
OPT_STRING(0, "search-index", &sqlite_index_args->search_index_path, "Path to search index. Will be created if it does not exist yet."),
OPT_BOOLEAN(0, "optimize-index", &common_optimize_database,
"Optimize search index file for smaller size and faster queries."),
OPT_STRING(0, "search-index", &common_search_index, "Path to search index. Will be created if it does not exist yet."),
OPT_GROUP("Web options"),
OPT_STRING(0, "es-url", &common_es_url, "Elasticsearch url. DEFAULT: http://localhost:9200"),
OPT_BOOLEAN(0, "es-insecure-ssl", &common_es_insecure_ssl,
"Do not verify SSL connections to Elasticsearch."),
// TODO: change arg name (?)
OPT_STRING(0, "search-index", &common_search_index, "Path to SQLite search index."),
OPT_STRING(0, "es-index", &common_es_index, "Elasticsearch index name. DEFAULT: sist2"),
OPT_STRING(0, "bind", &web_args->listen_address,
"Listen for connections on this address. DEFAULT: localhost:4090"),
@@ -584,7 +597,7 @@ int main(int argc, const char *argv[]) {
argc = argparse_parse(&argparse, argc, argv);
if (arg_version) {
printf(Version);
printf("%s", Version);
goto end;
}
@@ -612,7 +625,9 @@ int main(int argc, const char *argv[]) {
index_args->async_script = common_async_script;
scan_args->optimize_database = common_optimize_database;
sqlite_index_args->optimize_database = common_optimize_database;
sqlite_index_args->search_index_path = common_search_index;
web_args->search_index_path = common_search_index;
if (argc == 0) {
argparse_usage(&argparse);

View File

@@ -51,11 +51,11 @@
#include <ctype.h>
#include "git_hash.h"
#define VERSION "3.0.6"
#define VERSION "3.0.7"
static const char *const Version = VERSION;
static const int VersionMajor = 3;
static const int VersionMinor = 0;
static const int VersionPatch = 6;
static const int VersionPatch = 7;
#ifndef SIST_PLATFORM
#define SIST_PLATFORM unknown

View File

@@ -114,4 +114,7 @@ struct timespec timespec_add(struct timespec ts1, long usec);
pthread_cond_timedwait(cond, mutex, &end_time); \
} while (0)
#define array_foreach(arr) \
for (int i = 0; (arr)[i] != NULL; i++)
#endif

View File

@@ -5,11 +5,24 @@
#include "src/index/web.h"
#include "src/auth0/auth0_c_api.h"
#include "src/web/web_util.h"
#include "src/cli.h"
#include <time.h>
#include <src/ctx.h>
#define HTTP_TEXT_TYPE_HEADER "Content-Type: text/plain;charset=utf-8\r\n"
#define HTTP_REPLY_NOT_FOUND mg_http_reply(nc, 404, HTTP_SERVER_HEADER HTTP_TEXT_TYPE_HEADER, "Not found");
void fts_search_paths(struct mg_connection *nc, struct mg_http_message *hm);
void fts_search_mimetypes(struct mg_connection *nc, struct mg_http_message *hm);
void fts_search_summary_stats(struct mg_connection *nc, struct mg_http_message *hm);
void fts_search(struct mg_connection *nc, struct mg_http_message *hm);
void fts_get_document(struct mg_connection *nc, struct mg_http_message *hm);
void fts_suggest_tag(struct mg_connection *nc, struct mg_http_message *hm);
void fts_get_tags(struct mg_connection *nc, struct mg_http_message *hm);
static struct mg_http_serve_opts DefaultServeOpts = {
.fs = NULL,
@@ -47,12 +60,8 @@ void stats_files(struct mg_connection *nc, struct mg_http_message *hm) {
}
cJSON *json = database_get_stats(db, stat_type);
char *json_str = cJSON_PrintUnformatted(json);
mg_send_json(nc, json);
web_send_headers(nc, 200, strlen(json_str), "Content-Type: application/json");
mg_send(nc, json_str, strlen(json_str));
free(json_str);
cJSON_Delete(json);
}
@@ -93,7 +102,7 @@ void serve_chunk_vendors_css(struct mg_connection *nc, struct mg_http_message *h
}
void serve_thumbnail(struct mg_connection *nc, struct mg_http_message *hm, const char *arg_index,
const char *arg_doc_id, int arg_num) {
const char *arg_doc_id, int arg_num) {
database_t *db = web_get_database(arg_index);
if (db == NULL) {
@@ -252,6 +261,10 @@ void serve_file_from_disk(cJSON *json, index_t *idx, struct mg_connection *nc, s
}
void cache_es_version() {
if (WebCtx.search_backend == SQLITE_SEARCH_BACKEND) {
return;
}
static int is_cached = FALSE;
if (is_cached == TRUE) {
@@ -321,6 +334,12 @@ void index_info(struct mg_connection *nc) {
cJSON_AddItemToArray(arr, idx_json);
}
if (WebCtx.search_backend == SQLITE_SEARCH_BACKEND) {
cJSON_AddStringToObject(json, "searchBackend", "sqlite");
} else {
cJSON_AddStringToObject(json, "searchBackend", "elasticsearch");
}
char *json_str = cJSON_PrintUnformatted(json);
web_send_headers(nc, 200, strlen(json_str), "Content-Type: application/json");
@@ -329,54 +348,63 @@ void index_info(struct mg_connection *nc) {
cJSON_Delete(json);
}
cJSON *get_root_document_by_id(const char *index_id, const char *doc_id) {
database_t *db = web_get_database(index_id);
if (!db) {
return NULL;
}
char next_id[SIST_DOC_ID_LEN];
strcpy(next_id, doc_id);
while (TRUE) {
cJSON *doc = database_get_document(db, next_id);
if (doc == NULL) {
return NULL;
}
cJSON *parent = cJSON_GetObjectItem(doc, "parent");
if (parent == NULL || cJSON_IsNull(parent)) {
return doc;
}
strcpy(next_id, parent->valuestring);
cJSON_Delete(parent);
}
}
void file(struct mg_connection *nc, struct mg_http_message *hm) {
if (hm->uri.len != SIST_DOC_ID_LEN + 2) {
if (hm->uri.len != SIST_INDEX_ID_LEN + SIST_DOC_ID_LEN + 2) {
LOG_DEBUGF("serve.c", "Invalid file path: %.*s", (int) hm->uri.len, hm->uri.ptr);
HTTP_REPLY_NOT_FOUND
return;
}
char arg_doc_id[SIST_DOC_ID_LEN];
memcpy(arg_doc_id, hm->uri.ptr + 3, SIST_DOC_ID_LEN);
char arg_index[SIST_INDEX_ID_LEN];
memcpy(arg_index, hm->uri.ptr + 3, SIST_INDEX_ID_LEN);
*(arg_index + SIST_INDEX_ID_LEN - 1) = '\0';
memcpy(arg_doc_id, hm->uri.ptr + 3 + SIST_INDEX_ID_LEN, SIST_DOC_ID_LEN);
*(arg_doc_id + SIST_DOC_ID_LEN - 1) = '\0';
const char *next = arg_doc_id;
cJSON *doc = NULL;
cJSON *index_id = NULL;
cJSON *source = NULL;
while (true) {
doc = elastic_get_document(next);
source = cJSON_GetObjectItem(doc, "_source");
index_id = cJSON_GetObjectItem(source, "index");
if (index_id == NULL) {
cJSON_Delete(doc);
HTTP_REPLY_NOT_FOUND
return;
}
cJSON *parent = cJSON_GetObjectItem(source, "parent");
if (parent == NULL) {
break;
}
next = parent->valuestring;
}
index_t *idx = web_get_index_by_id(index_id->valuestring);
index_t *idx = web_get_index_by_id(arg_index);
if (idx == NULL) {
cJSON_Delete(doc);
HTTP_REPLY_NOT_FOUND
return;
}
cJSON *source = get_root_document_by_id(arg_index, arg_doc_id);
if (strlen(idx->desc.rewrite_url) == 0) {
serve_file_from_disk(source, idx, nc, hm);
} else {
serve_file_from_url(source, idx, nc);
}
cJSON_Delete(doc);
cJSON_Delete(source);
}
void status(struct mg_connection *nc) {
@@ -398,6 +426,10 @@ typedef struct {
tag_req_t *parse_tag_request(cJSON *json) {
if (json == NULL) {
return NULL;
}
if (!cJSON_IsObject(json)) {
return NULL;
}
@@ -425,115 +457,101 @@ tag_req_t *parse_tag_request(cJSON *json) {
return req;
}
subreq_ctx_t *elastic_delete_tag(const tag_req_t *req) {
char *buf = malloc(sizeof(char) * 8192);
snprintf(buf, 8192,
"{"
" \"script\" : {"
" \"source\": \"if (ctx._source.tag.contains(params.tag)) { ctx._source.tag.remove(ctx._source.tag.indexOf(params.tag)) }\","
" \"lang\": \"painless\","
" \"params\" : {"
" \"tag\" : \"%s\""
" }"
" }"
"}", req->name
);
char url[4096];
snprintf(url, sizeof(url), "%s/%s/_update/%s", WebCtx.es_url, WebCtx.es_index, req->doc_id);
return web_post_async(url, buf, WebCtx.es_insecure_ssl);
}
subreq_ctx_t *elastic_write_tag(const tag_req_t *req) {
char *buf = malloc(sizeof(char) * 8192);
snprintf(buf, 8192,
"{"
" \"script\" : {"
" \"source\": \"if(ctx._source.tag == null) {ctx._source.tag = new ArrayList()} ctx._source.tag.add(params.tag)\","
" \"lang\": \"painless\","
" \"params\" : {"
" \"tag\" : \"%s\""
" }"
" }"
"}", req->name
);
char url[4096];
snprintf(url, sizeof(url), "%s/%s/_update/%s", WebCtx.es_url, WebCtx.es_index, req->doc_id);
return web_post_async(url, buf, WebCtx.es_insecure_ssl);
}
void tag(struct mg_connection *nc, struct mg_http_message *hm) {
// if (hm->uri.len != SIST_INDEX_ID_LEN + 4) {
// LOG_DEBUGF("serve.c", "Invalid tag path: %.*s", (int) hm->uri.len, hm->uri.ptr)
// HTTP_REPLY_NOT_FOUND
// return;
// }
//
// char arg_index[SIST_INDEX_ID_LEN];
// memcpy(arg_index, hm->uri.ptr + 5, SIST_INDEX_ID_LEN);
// *(arg_index + SIST_INDEX_ID_LEN - 1) = '\0';
//
// if (hm->body.len < 2 || hm->method.len != 4 || memcmp(&hm->method, "POST", 4) == 0) {
// LOG_DEBUG("serve.c", "Invalid tag request")
// HTTP_REPLY_NOT_FOUND
// return;
// }
//
// store_t *store = get_tag_store(arg_index);
// if (store == NULL) {
// LOG_DEBUGF("serve.c", "Could not get tag store for index: %s", arg_index)
// HTTP_REPLY_NOT_FOUND
// return;
// }
//
// char *body = malloc(hm->body.len + 1);
// memcpy(body, hm->body.ptr, hm->body.len);
// *(body + hm->body.len) = '\0';
// cJSON *json = cJSON_Parse(body);
//
// tag_req_t *arg_req = parse_tag_request(json);
// if (arg_req == NULL) {
// LOG_DEBUGF("serve.c", "Could not parse tag request", arg_index)
// cJSON_Delete(json);
// free(body);
// mg_http_reply(nc, 400, "", "Invalid request");
// return;
// }
//
// cJSON *arr = NULL;
//
// size_t data_len = 0;
// const char *data = store_read(store, arg_req->doc_id, SIST_DOC_ID_LEN, &data_len);
// if (data_len == 0) {
// arr = cJSON_CreateArray();
// } else {
// arr = cJSON_Parse(data);
// }
//
// if (arg_req->delete) {
//
// if (data_len > 0) {
// cJSON *element = NULL;
// int i = 0;
// cJSON_ArrayForEach(element, arr) {
// if (strcmp(element->valuestring, arg_req->name) == 0) {
// cJSON_DeleteItemFromArray(arr, i);
// break;
// }
// i++;
// }
// }
//
// char *buf = malloc(sizeof(char) * 8192);
// snprintf(buf, 8192,
// "{"
// " \"script\" : {"
// " \"source\": \"if (ctx._source.tag.contains(params.tag)) { ctx._source.tag.remove(ctx._source.tag.indexOf(params.tag)) }\","
// " \"lang\": \"painless\","
// " \"params\" : {"
// " \"tag\" : \"%s\""
// " }"
// " }"
// "}", arg_req->name
// );
//
// char url[4096];
// snprintf(url, sizeof(url), "%s/%s/_update/%s", WebCtx.es_url, WebCtx.es_index, arg_req->doc_id);
// nc->fn_data = web_post_async(url, buf, WebCtx.es_insecure_ssl);
//
// } else {
// cJSON_AddItemToArray(arr, cJSON_CreateString(arg_req->name));
//
// char *buf = malloc(sizeof(char) * 8192);
// snprintf(buf, 8192,
// "{"
// " \"script\" : {"
// " \"source\": \"if(ctx._source.tag == null) {ctx._source.tag = new ArrayList()} ctx._source.tag.add(params.tag)\","
// " \"lang\": \"painless\","
// " \"params\" : {"
// " \"tag\" : \"%s\""
// " }"
// " }"
// "}", arg_req->name
// );
//
// char url[4096];
// snprintf(url, sizeof(url), "%s/%s/_update/%s", WebCtx.es_url, WebCtx.es_index, arg_req->doc_id);
// nc->fn_data = web_post_async(url, buf, WebCtx.es_insecure_ssl);
// }
//
// char *json_str = cJSON_PrintUnformatted(arr);
// store_write(store, arg_req->doc_id, SIST_DOC_ID_LEN, json_str, strlen(json_str) + 1);
// store_flush(store);
//
// free(arg_req);
// free(json_str);
// cJSON_Delete(json);
// cJSON_Delete(arr);
// free(body);
if (hm->uri.len != SIST_INDEX_ID_LEN + 4) {
LOG_DEBUGF("serve.c", "Invalid tag path: %.*s", (int) hm->uri.len, hm->uri.ptr);
HTTP_REPLY_NOT_FOUND
return;
}
char arg_index[SIST_INDEX_ID_LEN];
memcpy(arg_index, hm->uri.ptr + 5, SIST_INDEX_ID_LEN);
*(arg_index + SIST_INDEX_ID_LEN - 1) = '\0';
char *body = malloc(hm->body.len + 1);
memcpy(body, hm->body.ptr, hm->body.len);
*(body + hm->body.len) = '\0';
cJSON *json = cJSON_Parse(body);
free(body);
if (json == NULL) {
HTTP_REPLY_BAD_REQUEST
return;
}
database_t *db = web_get_database(arg_index);
if (db == NULL) {
LOG_DEBUGF("serve.c", "Could not get database for index: %s", arg_index);
HTTP_REPLY_NOT_FOUND
return;
}
tag_req_t *req = parse_tag_request(json);
cJSON_Delete(json);
if (req == NULL) {
LOG_DEBUGF("serve.c", "Could not parse tag request", arg_index);
HTTP_REPLY_BAD_REQUEST
return;
}
if (req->delete) {
database_delete_tag(db, req->doc_id, req->name);
if (WebCtx.search_backend == SQLITE_SEARCH_BACKEND) {
database_delete_tag(WebCtx.search_db, req->doc_id, req->name);
HTTP_REPLY_OK
} else {
nc->fn_data = elastic_delete_tag(req);
}
} else {
database_write_tag(db, req->doc_id, req->name);
if (WebCtx.search_backend == SQLITE_SEARCH_BACKEND) {
database_write_tag(WebCtx.search_db, req->doc_id, req->name);
HTTP_REPLY_OK
} else {
nc->fn_data = elastic_write_tag(req);
}
}
free(req);
}
int validate_auth(struct mg_connection *nc, struct mg_http_message *hm) {
@@ -631,11 +649,39 @@ static void ev_router(struct mg_connection *nc, int ev, void *ev_data, UNUSED(vo
return;
}
if (mg_http_match_uri(hm, "/es")) {
search(nc, hm);
} else if (mg_http_match_uri(hm, "/status")) {
if (WebCtx.search_backend == SQLITE_SEARCH_BACKEND) {
if (mg_http_match_uri(hm, "/fts/paths")) {
fts_search_paths(nc, hm);
return;
} else if (mg_http_match_uri(hm, "/fts/mimetypes")) {
fts_search_mimetypes(nc, hm);
return;
} else if (mg_http_match_uri(hm, "/fts/dateRange")) {
fts_search_summary_stats(nc, hm);
return;
} else if (mg_http_match_uri(hm, "/fts/search")) {
fts_search(nc, hm);
return;
} else if (mg_http_match_uri(hm, "/fts/d/*")) {
fts_get_document(nc, hm);
return;
} else if (mg_http_match_uri(hm, "/fts/suggestTags")) {
fts_suggest_tag(nc, hm);
return;
} else if (mg_http_match_uri(hm, "/fts/tags")) {
fts_get_tags(nc, hm);
return;
}
} else if (WebCtx.search_backend == ES_SEARCH_BACKEND) {
if (mg_http_match_uri(hm, "/es")) {
search(nc, hm);
return;
}
}
if (mg_http_match_uri(hm, "/status")) {
status(nc);
} else if (mg_http_match_uri(hm, "/f/*")) {
} else if (mg_http_match_uri(hm, "/f/*/*")) {
file(nc, hm);
} else if (mg_http_match_uri(hm, "/t/*/*/*")) {
thumbnail_with_num(nc, hm);
@@ -702,14 +748,12 @@ void serve(const char *listen_address) {
struct mg_mgr mgr;
mg_mgr_init(&mgr);
int ok = 1;
struct mg_connection *nc = mg_http_listen(&mgr, listen_address, ev_router, NULL);
if (nc == NULL) {
LOG_FATALF("serve.c", "Couldn't bind web server on address %s", listen_address);
}
while (ok) {
while (TRUE) {
mg_mgr_poll(&mgr, 10);
}
mg_mgr_free(&mgr);

View File

@@ -3,6 +3,11 @@
#include "src/sist.h"
#define HTTP_TEXT_TYPE_HEADER "Content-Type: text/plain;charset=utf-8\r\n"
#define HTTP_REPLY_NOT_FOUND mg_http_reply(nc, 404, HTTP_SERVER_HEADER HTTP_TEXT_TYPE_HEADER, "Not found");
#define HTTP_REPLY_BAD_REQUEST mg_http_reply(nc, 400, HTTP_SERVER_HEADER HTTP_TEXT_TYPE_HEADER, "Invalid request");
#define HTTP_REPLY_OK mg_http_reply(nc, 200, HTTP_SERVER_HEADER HTTP_TEXT_TYPE_HEADER, "ok");
void serve(const char *listen_address);
#endif

378
src/web/web_fts.c Normal file
View File

@@ -0,0 +1,378 @@
#include "serve.h"
#include <mongoose.h>
#include "src/web/web_util.h"
typedef struct {
char *index_id;
char *prefix;
int min_depth;
int max_depth;
} fts_search_paths_req_t;
typedef struct {
cJSON *val;
int invalid;
} json_value;
typedef struct {
char *query;
char *path;
fts_sort_t sort;
double size_min;
double size_max;
double date_min;
double date_max;
int page_size;
char **index_ids;
char **mime_types;
char **tags;
int sort_asc;
int seed;
char **after;
int fetch_aggregations;
int highlight;
int highlight_context_size;
} fts_search_req_t;
fts_sort_t get_sort_mode(const cJSON *req_sort) {
if (strcmp(req_sort->valuestring, "score") == 0) {
return FTS_SORT_SCORE;
} else if (strcmp(req_sort->valuestring, "size") == 0) {
return FTS_SORT_SIZE;
} else if (strcmp(req_sort->valuestring, "mtime") == 0) {
return FTS_SORT_MTIME;
} else if (strcmp(req_sort->valuestring, "random") == 0) {
return FTS_SORT_RANDOM;
} else if (strcmp(req_sort->valuestring, "name") == 0) {
return FTS_SORT_NAME;
}
return FTS_SORT_INVALID;
}
static json_value get_json_string(cJSON *object, const char *name) {
cJSON *item = cJSON_GetObjectItem(object, name);
if (item == NULL || cJSON_IsNull(item)) {
return (json_value) {NULL, FALSE};
}
if (!cJSON_IsString(item)) {
return (json_value) {NULL, TRUE};
}
return (json_value) {item, FALSE};
}
static json_value get_json_number(cJSON *object, const char *name) {
cJSON *item = cJSON_GetObjectItem(object, name);
if (item == NULL || cJSON_IsNull(item)) {
return (json_value) {NULL, FALSE};
}
if (!cJSON_IsNumber(item)) {
return (json_value) {NULL, TRUE};
}
return (json_value) {item, FALSE};
}
static json_value get_json_bool(cJSON *object, const char *name) {
cJSON *item = cJSON_GetObjectItem(object, name);
if (item == NULL || cJSON_IsNull(item)) {
return (json_value) {NULL, FALSE};
}
if (!cJSON_IsBool(item)) {
return (json_value) {NULL, TRUE};
}
return (json_value) {item, FALSE};
}
static json_value get_json_array(cJSON *object, const char *name) {
cJSON *item = cJSON_GetObjectItem(object, name);
if (item == NULL || cJSON_IsNull(item)) {
return (json_value) {NULL, FALSE};
}
if (!cJSON_IsArray(item) || cJSON_GetArraySize(item) == 0) {
return (json_value) {NULL, TRUE};
}
cJSON *elem;
cJSON_ArrayForEach(elem, item) {
if (!cJSON_IsString(elem)) {
return (json_value) {NULL, TRUE};
}
}
return (json_value) {item, FALSE};
}
char **json_array_to_c_array(cJSON *json) {
cJSON *element;
char **arr = calloc(cJSON_GetArraySize(json) + 1, sizeof(char *));
int i = 0;
cJSON_ArrayForEach(element, json) {
arr[i++] = strdup(element->valuestring);
}
return arr;
}
#define DEFAULT_HIGHLIGHT_CONTEXT_SIZE 20
fts_search_req_t *get_search_req(struct mg_http_message *hm) {
cJSON *json = web_get_json_body(hm);
if (json == NULL) {
return NULL;
}
json_value req_query, req_path, req_size_min, req_size_max, req_date_min, req_date_max, req_page_size,
req_index_ids, req_mime_types, req_tags, req_sort_asc, req_sort, req_seed, req_after,
req_fetch_aggregations, req_highlight, req_highlight_context_size;
if (!cJSON_IsObject(json) ||
(req_query = get_json_string(json, "query")).invalid ||
(req_path = get_json_string(json, "path")).invalid ||
(req_sort = get_json_string(json, "sort")).val == NULL ||
(req_size_min = get_json_number(json, "sizeMin")).invalid ||
(req_size_max = get_json_number(json, "sizeMax")).invalid ||
(req_date_min = get_json_number(json, "dateMin")).invalid ||
(req_date_max = get_json_number(json, "dateMax")).invalid ||
(req_page_size = get_json_number(json, "pageSize")).val == NULL ||
(req_after = get_json_array(json, "after")).invalid ||
(req_seed = get_json_number(json, "seed")).invalid ||
(req_fetch_aggregations = get_json_bool(json, "fetchAggregations")).invalid ||
(req_sort_asc = get_json_bool(json, "sortAsc")).invalid ||
(req_index_ids = get_json_array(json, "indexIds")).invalid ||
(req_mime_types = get_json_array(json, "mimeTypes")).invalid ||
(req_highlight = get_json_bool(json, "highlight")).invalid ||
(req_highlight_context_size = get_json_number(json, "highlightContextSize")).invalid ||
(req_tags = get_json_array(json, "tags")).invalid) {
cJSON_Delete(json);
return NULL;
}
int index_id_count = cJSON_GetArraySize(req_index_ids.val);
if (index_id_count > 999) {
cJSON_Delete(json);
return NULL;
}
int mime_count = req_mime_types.val ? 0 : cJSON_GetArraySize(req_mime_types.val);
if (mime_count > 999) {
cJSON_Delete(json);
return NULL;
}
int tag_count = req_tags.val ? 0 : cJSON_GetArraySize(req_tags.val);
if (tag_count > 9999) {
cJSON_Delete(json);
return NULL;
}
if (req_path.val && (strstr(req_path.val->valuestring, "*") || strlen(req_path.val) >= PATH_MAX)) {
cJSON_Delete(json);
return NULL;
}
fts_sort_t sort = get_sort_mode(req_sort.val);
if (sort == FTS_SORT_INVALID) {
cJSON_Delete(json);
return NULL;
}
if (req_after.val && cJSON_GetArraySize(req_after.val) != 2) {
cJSON_Delete(json);
return NULL;
}
if (req_page_size.val->valueint > 1000 || req_page_size.val->valueint < 0) {
cJSON_Delete(json);
return NULL;
}
if (req_highlight_context_size.val->valueint < 0) {
cJSON_Delete(json);
return NULL;
}
fts_search_req_t *req = malloc(sizeof(fts_search_req_t));
req->sort = sort;
req->query = req_query.val ? strdup(req_query.val->valuestring) : NULL;
req->path = req_path.val ? strdup(req_path.val->valuestring) : NULL;
req->size_min = req_size_min.val ? req_size_min.val->valuedouble : 0;
req->size_max = req_size_max.val ? req_size_max.val->valuedouble : 0;
req->seed = (int) (req_seed.val ? req_seed.val->valuedouble : 0);
req->date_min = req_date_min.val ? req_date_min.val->valuedouble : 0;
req->date_max = req_date_max.val ? req_date_max.val->valuedouble : 0;
req->page_size = (int) req_page_size.val->valuedouble;
req->sort_asc = req_sort_asc.val ? req_sort_asc.val->valueint : TRUE;
req->index_ids = req_index_ids.val ? json_array_to_c_array(req_index_ids.val) : NULL;
req->after = req_after.val ? json_array_to_c_array(req_after.val) : NULL;
req->mime_types = req_mime_types.val ? json_array_to_c_array(req_mime_types.val) : NULL;
req->tags = req_tags.val ? json_array_to_c_array(req_tags.val) : NULL;
req->fetch_aggregations = req_fetch_aggregations.val ? req_fetch_aggregations.val->valueint : FALSE;
req->highlight = req_highlight.val ? req_highlight.val->valueint : FALSE;
req->highlight_context_size = req_highlight_context_size.val
? req_highlight_context_size.val->valueint
: DEFAULT_HIGHLIGHT_CONTEXT_SIZE;
cJSON_Delete(json);
return req;
}
void destroy_array(char **array) {
if (array == NULL) {
return;
}
array_foreach(array) { free(array[i]); }
free(array);
}
void destroy_search_req(fts_search_req_t *req) {
free(req->query);
free(req->path);
destroy_array(req->index_ids);
destroy_array(req->mime_types);
destroy_array(req->tags);
free(req);
}
fts_search_paths_req_t *get_search_paths_req(struct mg_http_message *hm) {
cJSON *json = web_get_json_body(hm);
if (json == NULL) {
return NULL;
}
json_value req_index_id, req_min_depth, req_max_depth, req_prefix;
if (!cJSON_IsObject(json) ||
(req_index_id = get_json_string(json, "indexId")).invalid ||
(req_prefix = get_json_string(json, "prefix")).invalid ||
(req_min_depth = get_json_number(json, "minDepth")).val == NULL ||
(req_max_depth = get_json_number(json, "maxDepth")).val == NULL) {
cJSON_Delete(json);
return NULL;
}
fts_search_paths_req_t *req = malloc(sizeof(fts_search_paths_req_t));
req->index_id = req_index_id.val ? strdup(req_index_id.val->valuestring) : NULL;
req->min_depth = req_min_depth.val->valueint;
req->max_depth = req_max_depth.val->valueint;
req->prefix = req_prefix.val ? strdup(req_prefix.val->valuestring) : NULL;
cJSON_Delete(json);
return req;
}
void destroy_search_paths_req(fts_search_paths_req_t *req) {
if (req->index_id) {
free(req->index_id);
}
if (req->prefix) {
free(req->prefix);
}
free(req);
}
void fts_search_paths(struct mg_connection *nc, struct mg_http_message *hm) {
fts_search_paths_req_t *req = get_search_paths_req(hm);
if (req == NULL) {
HTTP_REPLY_BAD_REQUEST
return;
}
cJSON *json = database_fts_get_paths(WebCtx.search_db, req->index_id, req->min_depth,
req->max_depth, req->prefix, req->max_depth == 10000);
destroy_search_paths_req(req);
mg_send_json(nc, json);
cJSON_Delete(json);
}
void fts_search_mimetypes(struct mg_connection *nc, struct mg_http_message *hm) {
cJSON *json = database_fts_get_mimetypes(WebCtx.search_db);
mg_send_json(nc, json);
cJSON_Delete(json);
}
void fts_search_summary_stats(struct mg_connection *nc, UNUSED(struct mg_http_message *hm)) {
database_summary_stats_t stats = database_fts_get_date_range(WebCtx.search_db);
cJSON *json = cJSON_CreateObject();
cJSON_AddNumberToObject(json, "dateMin", stats.date_min);
cJSON_AddNumberToObject(json, "dateMax", stats.date_max);
mg_send_json(nc, json);
cJSON_Delete(json);
}
void fts_search(struct mg_connection *nc, struct mg_http_message *hm) {
fts_search_req_t *req = get_search_req(hm);
if (req == NULL) {
HTTP_REPLY_BAD_REQUEST
return;
}
cJSON *json = database_fts_search(WebCtx.search_db, req->query, req->path,
(long) req->size_min, (long) req->size_max,
(long) req->date_min, (long) req->date_max,
req->page_size, req->index_ids, req->mime_types,
req->tags, req->sort_asc, req->sort, req->seed,
req->after, req->fetch_aggregations, req->highlight,
req->highlight_context_size);
destroy_search_req(req);
mg_send_json(nc, json);
cJSON_Delete(json);
}
void fts_get_document(struct mg_connection *nc, struct mg_http_message *hm) {
char doc_id[SIST_DOC_ID_LEN];
memcpy(doc_id, hm->uri.ptr + 7, SIST_INDEX_ID_LEN);
*(doc_id + SIST_INDEX_ID_LEN - 1) = '\0';
cJSON *json = database_fts_get_document(WebCtx.search_db, doc_id);
if (!json) {
HTTP_REPLY_NOT_FOUND
return;
}
mg_send_json(nc, json);
cJSON_Delete(json);
}
void fts_suggest_tag(struct mg_connection *nc, struct mg_http_message *hm) {
char *body = web_get_string_body(hm);
if (body == NULL) {
HTTP_REPLY_BAD_REQUEST
return;
}
cJSON *json = database_fts_suggest_tag(WebCtx.search_db, body);
mg_send_json(nc, json);
cJSON_Delete(json);
free(body);
}
void fts_get_tags(struct mg_connection *nc, struct mg_http_message *hm) {
cJSON *json = database_fts_get_tags(WebCtx.search_db);
mg_send_json(nc, json);
cJSON_Delete(json);
}

View File

@@ -61,3 +61,38 @@ void web_send_headers(struct mg_connection *nc, int status_code, size_t length,
extra_headers
);
}
cJSON *web_get_json_body(struct mg_http_message *hm) {
if (hm->body.len == 0) {
return NULL;
}
char *body = malloc(hm->body.len + 1);
memcpy(body, hm->body.ptr, hm->body.len);
*(body + hm->body.len) = '\0';
cJSON *json = cJSON_Parse(body);
free(body);
return json;
}
char *web_get_string_body(struct mg_http_message *hm) {
if (hm->body.len == 0) {
return NULL;
}
char *body = malloc(hm->body.len + 1);
memcpy(body, hm->body.ptr, hm->body.len);
*(body + hm->body.len) = '\0';
return body;
}
void mg_send_json(struct mg_connection *nc, const cJSON *json) {
char *json_str = cJSON_PrintUnformatted(json);
web_send_headers(nc, 200, strlen(json_str), "Content-Type: application/json");
mg_send(nc, json_str, strlen(json_str));
free(json_str);
}

View File

@@ -14,10 +14,9 @@ database_t *web_get_database(const char *index_id);
__always_inline
static char *web_address_to_string(struct mg_addr *addr) {
return "TODO";
// static char address_to_string_buf[INET6_ADDRSTRLEN];
//
// return mg_ntoa(addr, address_to_string_buf, sizeof(address_to_string_buf));
static char address_to_string_buf[INET6_ADDRSTRLEN];
return mg_ntoa(addr, address_to_string_buf, sizeof(address_to_string_buf));
}
void web_send_headers(struct mg_connection *nc, int status_code, size_t length, char *extra_headers);
@@ -29,4 +28,8 @@ void web_serve_asset_favicon_ico(struct mg_connection *nc);
void web_serve_asset_style_css(struct mg_connection *nc);
void web_serve_asset_chunk_vendors_css(struct mg_connection *nc);
cJSON *web_get_json_body(struct mg_http_message *hm);
char *web_get_string_body(struct mg_http_message *hm);
void mg_send_json(struct mg_connection *nc, const cJSON *json);
#endif //SIST2_WEB_UTIL_H