SQLite search backend

This commit is contained in:
2023-05-18 14:16:11 -04:00
parent 1cfceba518
commit 944c224904
46 changed files with 3261 additions and 1296 deletions

View File

@@ -14,12 +14,45 @@ database_t *database_create(const char *filename, database_type_t type) {
strcpy(db->filename, filename);
db->type = type;
db->select_thumbnail_stmt = NULL;
db->db = NULL;
db->tag_array = NULL;
db->ipc_ctx = NULL;
return db;
}
int tag_matches(const char *query, const char *tag) {
size_t query_len = strlen(query);
size_t tag_len = strlen(tag);
if (query_len >= tag_len) {
return FALSE;
}
return strncmp(tag, query, query_len) == 0 && *(tag + query_len) == '.';
}
void tag_matches_func(sqlite3_context *ctx, int argc, sqlite3_value **argv) {
if (argc != 1 || sqlite3_value_type(argv[0]) != SQLITE_TEXT) {
sqlite3_result_error(ctx, "Invalid parameters", -1);
}
const char *tag = (const char *) sqlite3_value_text(argv[0]);
char **tags = *(char ***) sqlite3_user_data(ctx);
array_foreach(tags) {
if (tag_matches(tags[i], tag)) {
sqlite3_result_int(ctx, TRUE);
return;
}
}
sqlite3_result_int(ctx, FALSE);
}
__always_inline
static int sep_rfind(const char *str) {
for (int i = (int) strlen(str); i >= 0; i--) {
@@ -48,6 +81,24 @@ void path_parent_func(sqlite3_context *ctx, int argc, sqlite3_value **argv) {
sqlite3_result_text(ctx, parent, stop, SQLITE_TRANSIENT);
}
void random_func(sqlite3_context *ctx, int argc, UNUSED(sqlite3_value **argv)) {
if (argc != 1 || sqlite3_value_type(argv[0]) != SQLITE_INTEGER) {
sqlite3_result_error(ctx, "Invalid parameters", -1);
}
char state_buf[128] = {0,};
struct random_data buf;
int result;
long seed = sqlite3_value_int64(argv[0]);
initstate_r((int) seed, state_buf, sizeof(state_buf), &buf);
random_r(&buf, &result);
sqlite3_result_int(ctx, result);
}
void save_current_job_info(sqlite3_context *ctx, int argc, sqlite3_value **argv) {
if (argc != 1 || sqlite3_value_type(argv[0]) != SQLITE_TEXT) {
@@ -87,7 +138,8 @@ void database_open(database_t *db) {
CRASH_IF_NOT_SQLITE_OK(sqlite3_open(db->filename, &db->db));
sqlite3_busy_timeout(db->db, 1000);
CRASH_IF_NOT_SQLITE_OK(sqlite3_exec(db->db, "PRAGMA cache_size = -200000;", NULL, NULL, NULL));
// TODO: Optional argument?
// CRASH_IF_NOT_SQLITE_OK(sqlite3_exec(db->db, "PRAGMA cache_size = -200000;", NULL, NULL, NULL));
CRASH_IF_NOT_SQLITE_OK(sqlite3_exec(db->db, "PRAGMA synchronous = OFF;", NULL, NULL, NULL));
if (db->type == INDEX_DATABASE) {
@@ -119,6 +171,10 @@ void database_open(database_t *db) {
-1,
&db->write_thumbnail_stmt, NULL));
CRASH_IF_NOT_SQLITE_OK(sqlite3_prepare_v2(
db->db, "SELECT json_data FROM document WHERE id=?", -1,
&db->get_document, NULL));
// Create functions
sqlite3_create_function(
db->db,
@@ -170,6 +226,61 @@ void database_open(database_t *db) {
db->db, "INSERT INTO index_job (doc_id,type,line) VALUES (?,?,?);", -1,
&db->insert_index_job_stmt, NULL));
} else if (db->type == FTS_DATABASE) {
CRASH_IF_NOT_SQLITE_OK(sqlite3_prepare_v2(
db->db, "SELECT path, count FROM path_index"
" WHERE index_id=? AND depth BETWEEN ? AND ?"
" LIMIT 65536", -1,
&db->fts_search_paths, NULL));
CRASH_IF_NOT_SQLITE_OK(sqlite3_prepare_v2(
db->db, "SELECT json_data FROM document_index"
" WHERE id=?", -1,
&db->fts_get_document, NULL));
CRASH_IF_NOT_SQLITE_OK(sqlite3_prepare_v2(
db->db, "SELECT DISTINCT tag FROM tag"
" WHERE tag GLOB (? || '*') ORDER BY tag LIMIT 100", -1,
&db->fts_suggest_tag, NULL));
CRASH_IF_NOT_SQLITE_OK(sqlite3_prepare_v2(
db->db, "SELECT tag, count(*) FROM tag GROUP BY tag", -1,
&db->fts_get_tags, NULL));
CRASH_IF_NOT_SQLITE_OK(sqlite3_prepare_v2(
db->db, "SELECT path, count FROM path_index"
" WHERE (index_id=?1 OR ?1 IS NULL) AND depth BETWEEN ? AND ?"
" AND (path = ?4 or path GLOB ?5)"
" LIMIT 65536", -1,
&db->fts_search_paths_w_prefix, NULL));
CRASH_IF_NOT_SQLITE_OK(sqlite3_prepare_v2(
db->db, "SELECT path, count FROM path_index"
" WHERE depth BETWEEN ? AND ?"
" AND path GLOB ?"
" LIMIT 65536", -1,
&db->fts_suggest_paths, NULL));
CRASH_IF_NOT_SQLITE_OK(sqlite3_prepare_v2(
db->db, "SELECT * FROM stats", -1,
&db->fts_date_range, NULL));
CRASH_IF_NOT_SQLITE_OK(sqlite3_prepare_v2(
db->db, "SELECT mime, sum(count) FROM mime_index WHERE mime is not NULL GROUP BY mime", -1,
&db->fts_get_mimetypes, NULL));
sqlite3_create_function(
db->db,
"random_seeded",
1,
SQLITE_UTF8,
NULL,
random_func,
NULL,
NULL
);
sqlite3_create_function(
db->db,
"path_parent",
@@ -180,8 +291,33 @@ void database_open(database_t *db) {
NULL,
NULL
);
sqlite3_create_function(
db->db,
"tag_matches",
1,
SQLITE_UTF8,
&db->tag_array,
tag_matches_func,
NULL,
NULL
);
}
if (db->type == FTS_DATABASE || db->type == INDEX_DATABASE) {
// Tag table is the same schema for FTS database & index database
CRASH_IF_NOT_SQLITE_OK(sqlite3_prepare_v2(
db->db,
"INSERT INTO tag (id, tag) VALUES (?,?) ON CONFLICT DO NOTHING;",
-1,
&db->write_tag_stmt, NULL));
CRASH_IF_NOT_SQLITE_OK(sqlite3_prepare_v2(
db->db,
"DELETE FROM tag WHERE id=? AND tag=?;",
-1,
&db->delete_tag_stmt, NULL));
}
}
void database_close(database_t *db, int optimize) {
@@ -193,7 +329,9 @@ void database_close(database_t *db, int optimize) {
CRASH_IF_NOT_SQLITE_OK(sqlite3_exec(db->db, "PRAGMA optimize;", NULL, NULL, NULL));
}
sqlite3_close(db->db);
if (db->db) {
sqlite3_close(db->db);
}
if (db->type == IPC_PRODUCER_DATABASE) {
remove(db->filename);
@@ -622,3 +760,39 @@ void database_add_work(database_t *db, job_t *job) {
pthread_cond_signal(&db->ipc_ctx->has_work_cond);
pthread_mutex_unlock(&db->ipc_ctx->mutex);
}
void database_write_tag(database_t *db, char *doc_id, char *tag) {
sqlite3_bind_text(db->write_tag_stmt, 1, doc_id, -1, SQLITE_STATIC);
sqlite3_bind_text(db->write_tag_stmt, 2, tag, -1, SQLITE_STATIC);
CRASH_IF_STMT_FAIL(sqlite3_step(db->write_tag_stmt));
CRASH_IF_NOT_SQLITE_OK(sqlite3_reset(db->write_tag_stmt));
}
void database_delete_tag(database_t *db, char *doc_id, char *tag) {
sqlite3_bind_text(db->delete_tag_stmt, 1, doc_id, -1, SQLITE_STATIC);
sqlite3_bind_text(db->delete_tag_stmt, 2, tag, -1, SQLITE_STATIC);
CRASH_IF_STMT_FAIL(sqlite3_step(db->delete_tag_stmt));
CRASH_IF_NOT_SQLITE_OK(sqlite3_reset(db->delete_tag_stmt));
}
cJSON *database_get_document(database_t *db, char *doc_id) {
sqlite3_bind_text(db->get_document, 1, doc_id, -1, SQLITE_STATIC);
int ret = sqlite3_step(db->get_document);
CRASH_IF_STMT_FAIL(ret);
cJSON *json;
if (ret == SQLITE_ROW) {
const char *json_str = sqlite3_column_text(db->get_document, 0);
json = cJSON_Parse(json_str);
} else {
json = NULL;
}
CRASH_IF_NOT_SQLITE_OK(sqlite3_reset(db->get_document));
return json;
}

View File

@@ -33,6 +33,16 @@ typedef enum {
JOB_PARSE_JOB
} job_type_t;
typedef enum {
FTS_SORT_INVALID,
FTS_SORT_SCORE,
FTS_SORT_SIZE,
FTS_SORT_MTIME,
FTS_SORT_RANDOM,
FTS_SORT_NAME,
FTS_SORT_ID,
} fts_sort_t;
typedef struct {
job_type_t type;
union {
@@ -53,6 +63,11 @@ typedef struct {
char current_job[MAX_THREADS][PATH_MAX * 2];
} database_ipc_ctx_t;
typedef struct {
double date_min;
double date_max;
} database_summary_stats_t;
typedef struct database {
char filename[PATH_MAX];
database_type_t type;
@@ -67,12 +82,27 @@ typedef struct database {
sqlite3_stmt *write_document_stmt;
sqlite3_stmt *write_document_sidecar_stmt;
sqlite3_stmt *write_thumbnail_stmt;
sqlite3_stmt *get_document;
sqlite3_stmt *delete_tag_stmt;
sqlite3_stmt *write_tag_stmt;
sqlite3_stmt *insert_parse_job_stmt;
sqlite3_stmt *insert_index_job_stmt;
sqlite3_stmt *pop_parse_job_stmt;
sqlite3_stmt *pop_index_job_stmt;
sqlite3_stmt *fts_search_paths;
sqlite3_stmt *fts_search_paths_w_prefix;
sqlite3_stmt *fts_suggest_paths;
sqlite3_stmt *fts_date_range;
sqlite3_stmt *fts_get_mimetypes;
sqlite3_stmt *fts_get_document;
sqlite3_stmt *fts_suggest_tag;
sqlite3_stmt *fts_get_tags;
char **tag_array;
database_ipc_ctx_t *ipc_ctx;
} database_t;
@@ -134,7 +164,7 @@ database_iterator_t *database_create_treemap_iterator(database_t *db, long thres
treemap_row_t database_treemap_iter(database_iterator_t *iter);
#define database_treemap_iter_foreach(element, iter) \
for (treemap_row_t element = database_treemap_iter(iter); element.path != NULL; element = database_treemap_iter(iter))
for (treemap_row_t element = database_treemap_iter(iter); (element).path != NULL; (element) = database_treemap_iter(iter))
void database_generate_stats(database_t *db, double treemap_threshold);
@@ -145,14 +175,12 @@ job_t *database_get_work(database_t *db, job_type_t job_type);
void database_add_work(database_t *db, job_t *job);
//void database_index(database_t *db);
cJSON *database_get_stats(database_t *db, database_stat_type_d type);
#define CRASH_IF_STMT_FAIL(x) do { \
int return_value = x; \
if (return_value != SQLITE_DONE && return_value != SQLITE_ROW) { \
LOG_FATALF("database.c", "Sqlite error @ database.c:%d : (%d) %s", __LINE__, return_value, sqlite3_errmsg(db->db)); \
LOG_FATALF("database.c", "Sqlite error @ %s:%d : (%d) %s", __BASE_FILE__, __LINE__, return_value, sqlite3_errmsg(db->db)); \
} \
} while (0)
@@ -169,4 +197,33 @@ void database_fts_index(database_t *db);
void database_fts_optimize(database_t *db);
#endif //SIST2_DATABASE_H
cJSON *database_fts_get_paths(database_t *db, const char *index_id, int depth_min, int depth_max, const char *prefix,
int suggest);
cJSON *database_fts_get_mimetypes(database_t *db);
database_summary_stats_t database_fts_get_date_range(database_t *db);
cJSON *database_fts_search(database_t *db, const char *query, const char *path, long size_min,
long size_max, long date_min, long date_max, int page_size,
char **index_ids, char **mime_types, char **tags, int sort_asc,
fts_sort_t sort, int seed, char **after, int fetch_aggregations,
int highlight, int highlight_context_size);
void database_write_tag(database_t *db, char *doc_id, char *tag);
void database_delete_tag(database_t *db, char *doc_id, char *tag);
void database_fts_detach(database_t *db);
cJSON *database_fts_get_document(database_t *db, char *doc_id);
database_summary_stats_t database_fts_sync_tags(database_t *db);
cJSON *database_fts_suggest_tag(database_t *db, char *prefix);
cJSON *database_fts_get_tags(database_t *db);
cJSON *database_get_document(database_t *db, char *doc_id);
#endif

View File

@@ -1,6 +1,13 @@
#include "database.h"
#include "src/ctx.h"
void database_fts_detach(database_t *db) {
CRASH_IF_NOT_SQLITE_OK(sqlite3_exec(
db->db, "DETACH DATABASE fts",
NULL, NULL, NULL
));
}
void database_fts_attach(database_t *db, const char *fts_database_path) {
LOG_DEBUGF("database_fts.c", "Attaching to %s", fts_database_path);
@@ -16,62 +23,137 @@ void database_fts_attach(database_t *db, const char *fts_database_path) {
sqlite3_finalize(stmt);
}
int database_fts_get_max_path_depth(database_t *db) {
sqlite3_stmt *stmt;
CRASH_IF_NOT_SQLITE_OK(sqlite3_prepare_v2(
db->db, "SELECT MAX(depth) FROM path_tmp", -1, &stmt, NULL));
CRASH_IF_STMT_FAIL(sqlite3_step(stmt));
int max_depth = sqlite3_column_int(stmt, 0);
sqlite3_finalize(stmt);
return max_depth;
}
void database_fts_index(database_t *db) {
LOG_INFO("database_fts.c", "Creating content table.");
CRASH_IF_NOT_SQLITE_OK(sqlite3_exec(
db->db,
"WITH docs AS (SELECT document.id as id,\n"
" (SELECT id FROM descriptor) as index_id,\n"
" size,\n"
" document.json_data ->> 'path' as path,\n"
" length(document.json_data->>'path') - length(REPLACE(document.json_data->>'path', '/', '')) as path_depth,\n"
" document.json_data ->> 'mime' as mime,\n"
" mtime,\n"
" CASE\n"
" WHEN sc.json_data IS NULL THEN CASE\n"
" WHEN t.tag IS NULL THEN json_set(\n"
" document.json_data, '$._id',\n"
" document.id, '$.size',\n"
" document.size, '$.mtime',\n"
" document.mtime)\n"
" ELSE json_set(document.json_data, '$._id',\n"
" document.id, '$.size',\n"
" document.size, '$.mtime',\n"
" document.mtime, '$.tag',\n"
" json_group_array(t.tag)) END\n"
" ELSE CASE\n"
" WHEN t.tag IS NULL THEN json_patch(\n"
" json_set(document.json_data, '$._id', document.id, '$.size',\n"
" document.size, '$.mtime', document.mtime),\n"
" sc.json_data)\n"
" ELSE json_set(json_patch(document.json_data, sc.json_data), '$._id',\n"
" document.id, '$.size', document.size, '$.mtime',\n"
" document.mtime, '$.tag',\n"
" json_group_array(t.tag)) END END as json_data\n"
" FROM document\n"
" LEFT JOIN document_sidecar sc ON document.id = sc.id\n"
" LEFT JOIN tag t ON document.id = t.id\n"
" GROUP BY document.id)\n"
"INSERT\n"
"INTO fts.document_index (id, index_id, size, path, path_depth, mtime, mime, json_data)\n"
"SELECT *\n"
"FROM docs\n"
"WHERE true\n"
"on conflict (id, index_id) do update set size=excluded.size,\n"
" mtime=excluded.mtime,\n"
" json_data=excluded.json_data;",
"WITH docs AS ("
" SELECT document.id as id, (SELECT id FROM descriptor) as index_id, size,"
" document.json_data ->> 'name' as name,"
" document.json_data ->> 'path' as path,"
" mtime,"
" document.json_data ->> 'mime' as mime,"
" CASE"
" WHEN sc.json_data IS NULL THEN"
" json_set(document.json_data, "
" '$._id',document.id,"
" '$.size',document.size, "
" '$.mtime',document.mtime)"
" ELSE json_patch("
" json_set(document.json_data,"
" '$._id',document.id,"
" '$.size',document.size,"
" '$.mtime', document.mtime),"
" sc.json_data) END"
" FROM document"
" LEFT JOIN document_sidecar sc ON document.id = sc.id"
" GROUP BY document.id)"
" INSERT"
" INTO fts.document_index (id, index_id, size, name, path, mtime, mime, json_data)"
" SELECT * FROM docs WHERE true"
" on conflict (id, index_id) do update set "
" size=excluded.size, mtime=excluded.mtime, mime=excluded.mime, json_data=excluded.json_data;",
NULL, NULL, NULL));
LOG_DEBUG("database_fts.c", "Deleting old documents.");
CRASH_IF_NOT_SQLITE_OK(sqlite3_exec(
db->db,
"DELETE FROM fts.document_index"
" WHERE id IN (SELECT id FROM delete_list)"
" AND index_id = (SELECT id FROM descriptor);",
NULL, NULL, NULL));
LOG_DEBUG("database_fts.c", "Generating summary stats");
CRASH_IF_NOT_SQLITE_OK(sqlite3_exec(
db->db,
"DELETE FROM fts.stats", NULL, NULL, NULL));
CRASH_IF_NOT_SQLITE_OK(sqlite3_exec(
db->db, "INSERT INTO fts.stats "
"SELECT min(mtime), max(mtime) FROM fts.document_index",
NULL, NULL, NULL));
LOG_DEBUG("database_fts.c", "Generating mime index");
CRASH_IF_NOT_SQLITE_OK(sqlite3_exec(
db->db, "DELETE FROM fts.mime_index;", NULL, NULL, NULL));
CRASH_IF_NOT_SQLITE_OK(sqlite3_exec(
db->db, "INSERT INTO fts.mime_index (index_id, mime, count) "
"SELECT index_id, mime, count(*) FROM fts.document_index GROUP BY index_id, mime",
NULL, NULL, NULL));
LOG_DEBUG("database_fts.c", "Generating path index");
CRASH_IF_NOT_SQLITE_OK(sqlite3_exec(
db->db,
"CREATE TEMP TABLE path_tmp ("
" path TEXT,"
" index_id TEXT,"
" count INTEGER NOT NULL,"
" depth INTEGER NOT NULL,"
" children INTEGER NOT NULL DEFAULT(0),"
" total INTEGER AS (count + children),"
" PRIMARY KEY (path, index_id)"
");", NULL, NULL, NULL));
CRASH_IF_NOT_SQLITE_OK(sqlite3_exec(
db->db,
"INSERT INTO path_tmp (path, index_id, count, depth)"
" SELECT path, index_id, count(*), CASE WHEN length(json_data->>'path') == 0 THEN 0"
" ELSE 1 + length(json_data->>'path') - length(REPLACE(json_data->>'path', '/', ''))"
" END as depth FROM document_index WHERE depth > 0"
" GROUP BY path", NULL, NULL, NULL));
int max_depth = database_fts_get_max_path_depth(db);
for (int i = max_depth; i > 1; i--) {
sqlite3_stmt *stmt;
CRASH_IF_NOT_SQLITE_OK(sqlite3_prepare_v2(
db->db,
"INSERT INTO path_tmp (path, index_id, children, depth, count)"
" SELECT path_parent(path) parent, index_id, (SELECT COALESCE(sum(count), 0) FROM path_tmp WHERE path "
" BETWEEN path_parent(p.path) || '/' AND path_parent(p.path) || '/𘚟' AND index_id = p.index_id) as cnt, depth-1, 0 "
" FROM path_tmp p WHERE depth=? GROUP BY parent"
" ON CONFLICT(path, index_id) DO UPDATE SET children=excluded.children",
-1, &stmt, NULL));
sqlite3_bind_int(stmt, 1, i);
CRASH_IF_STMT_FAIL(sqlite3_step(stmt));
LOG_DEBUGF("database_fts.c", "Path index depth %d (%d)", i, sqlite3_changes(db->db));
}
CRASH_IF_NOT_SQLITE_OK(sqlite3_exec(
db->db,
"DELETE FROM path_index;"
"INSERT INTO path_index (path, index_id, count, depth) SELECT path, index_id, total, depth FROM path_tmp",
NULL, NULL, NULL));
LOG_DEBUG("database_fts.c", "Generating search index.");
CRASH_IF_NOT_SQLITE_OK(sqlite3_exec(
db->db, "INSERT INTO search(search) VALUES ('delete-all')",
NULL, NULL, NULL));
CRASH_IF_NOT_SQLITE_OK(sqlite3_exec(
db->db,
"DELETE\n"
"FROM fts.document_index\n"
"WHERE id IN (SELECT id FROM delete_list)\n"
" AND index_id = (SELECT id FROM descriptor);",
NULL, NULL, NULL
));
"INSERT INTO search(rowid, name, content, title) SELECT id, name, content, title from document_view",
NULL, NULL, NULL));
}
void database_fts_optimize(database_t *db) {
@@ -81,8 +163,642 @@ void database_fts_optimize(database_t *db) {
db->db,
"INSERT INTO search(search) VALUES('optimize');",
NULL, NULL, NULL));
LOG_DEBUG("database_fts.c", "Optimized fts5 table.");
CRASH_IF_NOT_SQLITE_OK(sqlite3_exec(db->db, "PRAGMA fts.optimize;", NULL, NULL, NULL));
LOG_DEBUG("database_fts.c", "optimized indices.");
}
cJSON *database_fts_get_paths(database_t *db, const char *index_id, int depth_min, int depth_max, const char *prefix,
int suggest) {
sqlite3_stmt *stmt;
if (suggest) {
stmt = db->fts_suggest_paths;
sqlite3_bind_int(stmt, 1, depth_min);
sqlite3_bind_int(stmt, 2, depth_max);
if (prefix) {
char *prefix_glob = malloc(strlen(prefix) + 2);
sprintf(prefix_glob, "%s*", prefix);
sqlite3_bind_text(stmt, 3, prefix_glob, -1, SQLITE_TRANSIENT);
free(prefix_glob);
}
} else if (prefix) {
stmt = db->fts_search_paths_w_prefix;
if (index_id) {
sqlite3_bind_text(stmt, 1, index_id, -1, SQLITE_STATIC);
} else {
sqlite3_bind_null(stmt, 1);
}
sqlite3_bind_int(stmt, 2, depth_min);
sqlite3_bind_int(stmt, 3, depth_max);
char *prefix_glob = malloc(strlen(prefix) + 3);
sprintf(prefix_glob, "%s/*", prefix);
sqlite3_bind_text(stmt, 4, prefix, -1, SQLITE_STATIC);
sqlite3_bind_text(stmt, 5, prefix_glob, -1, SQLITE_TRANSIENT);
free(prefix_glob);
} else {
stmt = db->fts_search_paths;
if (index_id) {
sqlite3_bind_text(stmt, 1, index_id, -1, SQLITE_STATIC);
} else {
sqlite3_bind_null(stmt, 1);
}
sqlite3_bind_int(stmt, 2, depth_min);
sqlite3_bind_int(stmt, 3, depth_max);
}
cJSON *json = cJSON_CreateArray();
int ret;
do {
ret = sqlite3_step(stmt);
CRASH_IF_STMT_FAIL(ret);
if (ret == SQLITE_DONE) {
break;
}
cJSON *row = cJSON_CreateObject();
cJSON_AddStringToObject(row, "path", (const char *) sqlite3_column_text(stmt, 0));
cJSON_AddNumberToObject(row, "count", (double) sqlite3_column_int64(stmt, 1));
cJSON_AddItemToArray(json, row);
} while (TRUE);
sqlite3_reset(stmt);
return json;
}
cJSON *database_fts_get_mimetypes(database_t *db) {
cJSON *json = cJSON_CreateArray();
int ret;
do {
ret = sqlite3_step(db->fts_get_mimetypes);
CRASH_IF_STMT_FAIL(ret);
if (ret == SQLITE_DONE) {
break;
}
cJSON *row = cJSON_CreateObject();
cJSON_AddStringToObject(row, "mime", (const char *) sqlite3_column_text(db->fts_get_mimetypes, 0));
cJSON_AddNumberToObject(row, "count", (double) sqlite3_column_int64(db->fts_get_mimetypes, 1));
cJSON_AddItemToArray(json, row);
} while (TRUE);
sqlite3_reset(db->fts_get_mimetypes);
return json;
}
const char *size_where_clause(long size_min, long size_max) {
if (size_min > 0 && size_max > 0) {
return "size BETWEEN @size_min AND @size_max";
} else if (size_min > 0) {
return "size >= @size_min";
} else if (size_max > 0) {
return "size <= @size_max";
}
return NULL;
}
const char *date_where_clause(long date_min, long date_max) {
if (date_min > 0 && date_max > 0) {
return "mtime BETWEEN @date_min AND @date_max";
} else if (date_min > 0) {
return "mtime >= @date_min";
} else if (date_max > 0) {
return "mtime <= @date_max";
}
return NULL;
}
int array_length(char **arr) {
if (arr == NULL) {
return 0;
}
int count = -1;
while (arr[++count] != NULL);
return count;
}
#define INDEX_ID_PARAM_OFFSET (10)
#define MIME_PARAM_OFFSET (INDEX_ID_PARAM_OFFSET + 1000)
char *build_where_clause(const char *path_where, const char *size_where, const char *date_where,
const char *index_id_where, const char *mime_where, const char *query_where,
const char *after_where, const char *tags_where) {
char *where = calloc(
strlen(index_id_where)
+ (query_where ? strlen(query_where) + sizeof(" AND ") : 0)
+ (path_where ? strlen(path_where) + sizeof(" AND ") : 0)
+ (size_where ? strlen(size_where) + sizeof(" AND ") : 0)
+ (date_where ? strlen(date_where) + sizeof(" AND ") : 0)
+ (after_where ? strlen(after_where) + sizeof(" AND ") : 0)
+ (tags_where ? strlen(tags_where) + sizeof(" AND ") : 0)
+ (mime_where ? strlen(mime_where) + sizeof(" AND ") : 0) + 1,
sizeof(char)
);
strcat(where, index_id_where);
if (query_where) {
strcat(where, " AND ");
strcat(where, query_where);
}
if (path_where) {
strcat(where, " AND ");
strcat(where, path_where);
}
if (size_where) {
strcat(where, " AND ");
strcat(where, size_where);
}
if (date_where) {
strcat(where, " AND ");
strcat(where, date_where);
}
if (mime_where) {
strcat(where, " AND ");
strcat(where, mime_where);
}
if (after_where) {
strcat(where, " AND ");
strcat(where, after_where);
}
if (tags_where) {
strcat(where, " AND ");
strcat(where, tags_where);
}
return where;
}
char *index_ids_where_clause(char **index_ids) {
int param_count = array_length(index_ids);
char *clause = malloc(13 + 2 + 6 * param_count);
strcpy(clause, "index_id IN (");
for (int i = 0; i < param_count; i++) {
char param[10];
snprintf(param, sizeof(param), "?%d%s",
INDEX_ID_PARAM_OFFSET + i, i == param_count - 1 ? "" : ",");
strcat(clause, param);
}
strcat(clause, ")");
return clause;
}
char *mime_types_where_clause(char **mime_types) {
int param_count = array_length(mime_types);
if (param_count == 0) {
return NULL;
}
char *clause = malloc(9 + 2 + 6 * param_count);
strcpy(clause, "mime IN (");
for (int i = 0; i < param_count; i++) {
char param[10];
snprintf(param, sizeof(param), "?%d%s",
MIME_PARAM_OFFSET + i, i == param_count - 1 ? "" : ",");
strcat(clause, param);
}
strcat(clause, ")");
return clause;
}
const char *path_where_clause(const char *path) {
if (path == NULL || strlen(path) == 0) {
return NULL;
}
return "(path = @path or path GLOB @path_glob)";
}
const char *get_sort_var(fts_sort_t sort) {
switch (sort) {
case FTS_SORT_SCORE:
// Round to 14 decimal places to avoid precision problems when converting to JSON...
return "round(rank, 14)";
case FTS_SORT_SIZE:
return "size";
case FTS_SORT_MTIME:
return "mtime";
case FTS_SORT_RANDOM:
return "random_seeded(doc.ROWID + ?5)";
case FTS_SORT_NAME:
return "doc.name";
case FTS_SORT_ID:
return "doc.id";
default:
return NULL;
}
}
const char *match_where(const char *query) {
if (query == NULL || strlen(query) == 0) {
return NULL;
} else {
return "search MATCH ?1";
}
}
char *tags_where_clause(char **tags) {
if (tags == NULL) {
return NULL;
}
return "EXISTS (SELECT 1 FROM tag WHERE id=doc.id AND tag_matches(tag))";
}
database_summary_stats_t database_fts_get_date_range(database_t *db) {
int ret = sqlite3_step(db->fts_date_range);
CRASH_IF_STMT_FAIL(ret);
if (ret == SQLITE_DONE) {
return (database_summary_stats_t) {0, 0};
}
database_summary_stats_t stats;
stats.date_min = (double) sqlite3_column_int64(db->fts_date_range, 0);
stats.date_max = (double) sqlite3_column_int64(db->fts_date_range, 1);
sqlite3_reset(db->fts_date_range);
return stats;
}
char *get_after_where(char **after, fts_sort_t sort) {
if (after == NULL) {
return NULL;
}
return "(sort_var, doc.ROWID) > (?3, ?4)";
}
cJSON *database_fts_search(database_t *db, const char *query, const char *path, long size_min,
long size_max, long date_min, long date_max, int page_size,
char **index_ids, char **mime_types, char **tags, int sort_asc,
fts_sort_t sort, int seed, char **after, int fetch_aggregations,
int highlight, int highlight_context_size) {
char path_glob[PATH_MAX * 2];
snprintf(path_glob, sizeof(path_glob), "%s/*", path);
const char *path_where = path_where_clause(path);
const char *size_where = size_where_clause(size_min, size_max);
const char *date_where = date_where_clause(date_min, date_max);
const char *index_id_where = index_ids_where_clause(index_ids);
const char *mime_where = mime_types_where_clause(mime_types);
const char *query_where = match_where(query);
const char *after_where = get_after_where(after, sort);
const char *tags_where = tags_where_clause(tags);
if (!query_where && sort == FTS_SORT_SCORE) {
// If query is NULL, then sort by id instead
sort = FTS_SORT_ID;
}
char *agg_where;
char *where = build_where_clause(path_where, size_where, date_where, index_id_where, mime_where, query_where,
after_where, tags_where);
if (fetch_aggregations) {
agg_where = build_where_clause(path_where, size_where, date_where, index_id_where, mime_where, query_where,
NULL, tags_where);
}
const char *json_object_sql;
if (highlight && query_where != NULL) {
json_object_sql = "json_remove(json_set(doc.json_data,"
"'$.index', doc.index_id,"
"'$._highlight.name', snippet(search, 0, '<mark>', '</mark>', '', ?6),"
"'$._highlight.content', snippet(search, 1, '<mark>', '</mark>', '', ?6)),"
"'$.content')";
} else {
json_object_sql = "json_remove(json_set(doc.json_data,"
"'$.index', doc.index_id),"
"'$.content')";
}
char *sql;
char *agg_sql;
if (query_where) {
asprintf(
&sql,
"SELECT"
" %s, %s as sort_var, doc.ROWID"
" FROM search"
" INNER JOIN document_index doc on doc.ROWID = search.ROWID"
" WHERE %s"
" ORDER BY sort_var%s, doc.ROWID"
" LIMIT ?2",
json_object_sql, get_sort_var(sort),
where,
sort_asc ? "" : "DESC");
if (fetch_aggregations) {
asprintf(&agg_sql,
"SELECT count(*), sum(size)"
" FROM search"
" INNER JOIN document_index doc on doc.ROWID = search.ROWID"
" WHERE search MATCH ?1"
" AND %s", agg_where);
}
} else {
asprintf(
&sql,
"SELECT"
" %s, %s as sort_var, doc.ROWID"
" FROM document_index doc"
" WHERE %s"
" ORDER BY sort_var%s,doc.ROWID"
" LIMIT ?2",
json_object_sql, get_sort_var(sort),
where,
sort_asc ? "" : " DESC");
if (fetch_aggregations) {
asprintf(&agg_sql,
"SELECT count(*), sum(size)"
" FROM document_index doc"
" WHERE %s", agg_where);
}
}
sqlite3_stmt *stmt;
CRASH_IF_NOT_SQLITE_OK(sqlite3_prepare_v2(db->db, sql, -1, &stmt, NULL));
if (query_where) {
sqlite3_bind_text(stmt, 1, query, -1, SQLITE_STATIC);
}
sqlite3_bind_int(stmt, 2, page_size);
if (index_ids) {
array_foreach(index_ids) {
sqlite3_bind_text(stmt, INDEX_ID_PARAM_OFFSET + i, index_ids[i], -1, SQLITE_STATIC);
}
}
if (mime_types) {
array_foreach(mime_types) {
sqlite3_bind_text(stmt, MIME_PARAM_OFFSET + i, mime_types[i], -1, SQLITE_STATIC);
}
}
if (tags) {
db->tag_array = tags;
}
if (size_min > 0) {
sqlite3_bind_int64(stmt, sqlite3_bind_parameter_index(stmt, "@size_min"), size_min);
}
if (size_max > 0) {
sqlite3_bind_int64(stmt, sqlite3_bind_parameter_index(stmt, "@size_max"), size_max);
}
if (date_min > 0) {
sqlite3_bind_int64(stmt, sqlite3_bind_parameter_index(stmt, "@date_min"), date_min);
}
if (date_max > 0) {
sqlite3_bind_int64(stmt, sqlite3_bind_parameter_index(stmt, "@date_max"), date_max);
}
if (path_where) {
sqlite3_bind_text(stmt, sqlite3_bind_parameter_index(stmt, "@path"), path, -1, SQLITE_STATIC);
sqlite3_bind_text(stmt, sqlite3_bind_parameter_index(stmt, "@path_glob"), path_glob, -1, SQLITE_STATIC);
}
if (after_where) {
if (sort == FTS_SORT_NAME || sort == FTS_SORT_ID) {
sqlite3_bind_text(stmt, 3, after[0], -1, SQLITE_STATIC);
} else if (sort == FTS_SORT_SCORE) {
sqlite3_bind_double(stmt, 3, strtod(after[0], NULL));
} else {
sqlite3_bind_int64(stmt, 3, strtol(after[0], NULL, 10));
}
sqlite3_bind_int64(stmt, 4, strtol(after[1], NULL, 10));
}
if (sort == FTS_SORT_RANDOM) {
sqlite3_bind_int(stmt, 5, seed);
}
if (highlight) {
sqlite3_bind_int(stmt, 6, highlight_context_size);
}
cJSON *json = cJSON_CreateObject();
cJSON *hits_hits = cJSON_CreateArray();
int ret;
do {
ret = sqlite3_step(stmt);
if (ret != SQLITE_DONE && ret != SQLITE_ROW) {
break;
}
if (ret == SQLITE_DONE) {
break;
}
const char *json_str = (const char *) sqlite3_column_text(stmt, 0);
cJSON *row = cJSON_CreateObject();
cJSON *source = cJSON_Parse(json_str);
if (highlight) {
cJSON *hl = cJSON_DetachItemFromObject(source, "_highlight");
cJSON_AddItemToObject(row, "highlight", hl);
}
cJSON *id = cJSON_DetachItemFromObject(source, "_id");
cJSON_AddItemToObject(row, "_id", id);
cJSON_AddItemToObject(row, "_source", source);
cJSON *sort_info = cJSON_AddArrayToObject(row, "sort");
cJSON_AddItemToArray(
sort_info,
cJSON_CreateString((char *) sqlite3_column_text(stmt, 1))
);
cJSON_AddItemToArray(
sort_info,
cJSON_CreateString((char *) sqlite3_column_text(stmt, 2))
);
cJSON_AddItemToArray(hits_hits, row);
} while (TRUE);
sqlite3_finalize(stmt);
cJSON *hits = cJSON_AddObjectToObject(json, "hits");
cJSON_AddItemToObject(hits, "hits", hits_hits);
// Aggregations
if (fetch_aggregations) {
sqlite3_stmt *agg_stmt;
CRASH_IF_NOT_SQLITE_OK(sqlite3_prepare_v2(db->db, agg_sql, -1, &agg_stmt, NULL));
if (index_ids) {
array_foreach(index_ids) {
sqlite3_bind_text(agg_stmt, INDEX_ID_PARAM_OFFSET + i, index_ids[i], -1, SQLITE_STATIC);
}
}
if (mime_types) {
array_foreach(mime_types) {
sqlite3_bind_text(agg_stmt, MIME_PARAM_OFFSET + i, mime_types[i], -1, SQLITE_STATIC);
}
}
if (query_where) {
sqlite3_bind_text(agg_stmt, 1, query, -1, SQLITE_STATIC);
}
if (size_min > 0) {
sqlite3_bind_int64(agg_stmt, sqlite3_bind_parameter_index(agg_stmt, "@size_min"), size_min);
}
if (size_max > 0) {
sqlite3_bind_int64(agg_stmt, sqlite3_bind_parameter_index(agg_stmt, "@size_max"), size_max);
}
if (date_min > 0) {
sqlite3_bind_int64(agg_stmt, sqlite3_bind_parameter_index(agg_stmt, "@date_min"), date_min);
}
if (date_max > 0) {
sqlite3_bind_int64(agg_stmt, sqlite3_bind_parameter_index(agg_stmt, "@date_max"), date_max);
}
if (path_where) {
sqlite3_bind_text(agg_stmt, sqlite3_bind_parameter_index(agg_stmt, "@path"), path, -1, SQLITE_STATIC);
sqlite3_bind_text(agg_stmt, sqlite3_bind_parameter_index(agg_stmt, "@path_glob"), path_glob, -1,
SQLITE_STATIC);
}
int agg_ret = sqlite3_step(agg_stmt);
if (agg_ret == SQLITE_ROW) {
cJSON *aggregations = cJSON_AddObjectToObject(json, "aggregations");
cJSON *total_count = cJSON_AddObjectToObject(aggregations, "total_count");
cJSON_AddNumberToObject(total_count, "value", sqlite3_column_double(agg_stmt, 0));
cJSON *total_size = cJSON_AddObjectToObject(aggregations, "total_size");
cJSON_AddNumberToObject(total_size, "value", sqlite3_column_double(agg_stmt, 1));
} else {
cJSON *aggregations = cJSON_AddObjectToObject(json, "aggregations");
cJSON *total_count = cJSON_AddObjectToObject(aggregations, "total_count");
cJSON_AddNumberToObject(total_count, "value", 0);
cJSON *total_size = cJSON_AddObjectToObject(aggregations, "total_size");
cJSON_AddNumberToObject(total_size, "value", 0);
}
sqlite3_finalize(agg_stmt);
}
// Cleanup
if (index_id_where) {
free(index_id_where);
}
if (mime_where) {
free(mime_where);
}
free(where);
free(sql);
if (fetch_aggregations) {
free(agg_where);
free(agg_sql);
}
return json;
}
database_summary_stats_t database_fts_sync_tags(database_t *db) {
LOG_INFO("database_fts.c", "Syncing tags.");
CRASH_IF_NOT_SQLITE_OK(sqlite3_exec(
db->db,
"DELETE FROM fts.tag WHERE"
" (id, tag) NOT IN (SELECT id, tag FROM tag)",
NULL, NULL, NULL));
CRASH_IF_NOT_SQLITE_OK(sqlite3_exec(
db->db,
"INSERT INTO fts.tag (id, tag) "
" SELECT id, tag FROM tag "
" WHERE (id, tag) NOT IN (SELECT * FROM fts.tag)",
NULL, NULL, NULL));
}
cJSON *database_fts_get_document(database_t *db, char *doc_id) {
sqlite3_bind_text(db->fts_get_document, 1, doc_id, -1, NULL);
int ret = sqlite3_step(db->fts_get_document);
cJSON *json = NULL;
if (ret == SQLITE_ROW) {
const char *json_data = (const char *) sqlite3_column_text(db->fts_get_document, 0);
json = cJSON_Parse(json_data);
} else {
CRASH_IF_STMT_FAIL(ret);
}
sqlite3_reset(db->fts_get_document);
return json;
}
cJSON *database_fts_suggest_tag(database_t *db, char *prefix) {
sqlite3_bind_text(db->fts_suggest_tag, 1, prefix, -1, NULL);
cJSON *json = cJSON_CreateArray();
int ret;
do {
ret = sqlite3_step(db->fts_suggest_tag);
CRASH_IF_STMT_FAIL(ret);
if (ret == SQLITE_DONE) {
break;
}
cJSON_AddItemToArray(
json,
cJSON_CreateString((const char *) sqlite3_column_text(db->fts_suggest_tag, 0))
);
} while (TRUE);
sqlite3_reset(db->fts_suggest_tag);
return json;
}
cJSON *database_fts_get_tags(database_t *db) {
cJSON *json = cJSON_CreateArray();
int ret;
do {
ret = sqlite3_step(db->fts_get_tags);
CRASH_IF_STMT_FAIL(ret);
if (ret == SQLITE_DONE) {
break;
}
cJSON *row = cJSON_CreateObject();
cJSON_AddStringToObject(row, "tag", (const char *) sqlite3_column_text(db->fts_get_tags, 0));
cJSON_AddNumberToObject(row, "count", sqlite3_column_int(db->fts_get_tags, 1));
cJSON_AddItemToArray(json, row);
} while (TRUE);
sqlite3_reset(db->fts_get_tags);
return json;
}

View File

@@ -3,43 +3,75 @@ const char *FtsDatabaseSchema =
" id TEXT NOT NULL,"
" index_id TEXT NOT NULL,"
" size INTEGER NOT NULL,"
" name TEXT NOT NULL,"
" path TEXT NOT NULL,"
" path_depth INT NOT NULL,"
" mtime INTEGER NOT NULL,"
" mime TEXT NOT NULL,"
" mime TEXT,"
" json_data TEXT NOT NULL,"
" PRIMARY KEY (id, index_id)"
");"
""
"CREATE VIEW IF NOT EXISTS document_view (rowid, name, content)"
"CREATE TABLE IF NOT EXISTS stats ("
" mtime_min INTEGER,"
" mtime_max INTEGER"
");"
""
"CREATE TABLE IF NOT EXISTS path_index ("
" path TEXT,"
" index_id TEXT,"
" count INTEGER NOT NULL,"
" depth INTEGER NOT NULL,"
" PRIMARY KEY (path, index_id)"
");"
""
"CREATE TABLE IF NOT EXISTS mime_index ("
" index_id TEXT,"
" mime TEXT,"
" count INT,"
" PRIMARY KEY(index_id, mime)"
");"
""
"CREATE TABLE IF NOT EXISTS tag ("
" id TEXT NOT NULL,"
" tag TEXT NOT NULL,"
" PRIMARY KEY (id, tag)"
");"
"CREATE INDEX IF NOT EXISTS tag_tag_idx ON tag(tag);"
"CREATE INDEX IF NOT EXISTS tag_id_idx ON tag(id);"
"CREATE TRIGGER IF NOT EXISTS tag_write_trigger"
" AFTER INSERT ON tag"
" BEGIN"
" UPDATE document_index"
" SET json_data = json_set(json_data, '$.tag', (SELECT json_group_array(tag) FROM tag WHERE id = NEW.id))"
" WHERE id = NEW.id;"
" END;"
""
"CREATE TRIGGER IF NOT EXISTS tag_delete_trigger"
" AFTER DELETE ON tag"
" BEGIN"
" UPDATE document_index"
" SET json_data = json_set(json_data, '$.tag', (SELECT json_group_array(tag) FROM tag WHERE id = OLD.id))"
" WHERE id = OLD.id;"
" END;"
""
"CREATE VIEW IF NOT EXISTS document_view (id, name, content, title)"
" AS"
" SELECT rowid,"
" json_data->>'name',"
" json_data->>'content'"
" json_data->>'content',"
" json_data->>'title'"
" FROM document_index;"
""
"CREATE INDEX IF NOT EXISTS document_index_size_idx ON document_index (size);"
"CREATE INDEX IF NOT EXISTS document_index_mtime_idx ON document_index (mtime);"
"CREATE INDEX IF NOT EXISTS document_index_mime_idx ON document_index (mime);"
"CREATE INDEX IF NOT EXISTS document_index_path_idx ON document_index (path);"
"CREATE INDEX IF NOT EXISTS document_index_path_depth_idx ON document_index (path_depth);"
""
"CREATE VIRTUAL TABLE IF NOT EXISTS search USING fts5 ("
" name,"
" content,"
" content='document_view'"
" name,"
" content,"
" title,"
" content='document_view',"
" content_rowid='id'"
");"
""
"CREATE TRIGGER IF NOT EXISTS on_insert AFTER INSERT ON document_index BEGIN"
" INSERT INTO search(rowid, name, content) VALUES (new.rowid, new.json_data->>'name', new.json_data->>'content');"
"END;"
"CREATE TRIGGER IF NOT EXISTS on_delete AFTER DELETE ON document_index BEGIN"
" INSERT INTO search(search, name, content) VALUES('delete', old.json_data->>'name', old.json_data->>'content');"
"END;"
"CREATE TRIGGER IF NOT EXISTS on_update AFTER UPDATE ON document_index BEGIN"
" INSERT INTO search(search, rowid, name, content) VALUES('delete', old.rowid, old.json_data->>'name', old.json_data->>'content');"
" INSERT INTO search(rowid, name, content) VALUES (new.rowid, new.json_data->>'name', new.json_data->>'content');"
"END;";
// name^8, content^3, title^8
"INSERT INTO search(search, rank) VALUES('rank', 'bm25(8, 3, 8)');"
"";
const char *IpcDatabaseSchema =
"CREATE TABLE parse_job ("
@@ -78,7 +110,8 @@ const char *IndexDatabaseSchema =
""
"CREATE TABLE tag ("
" id TEXT NOT NULL,"
" tag TEXT NOT NULL"
" tag TEXT NOT NULL,"
" PRIMARY KEY (id, tag)"
");"
""
"CREATE TABLE document_sidecar ("

View File

@@ -238,5 +238,7 @@ cJSON *database_get_stats(database_t *db, database_stat_type_d type) {
cJSON_AddItemToArray(json, row);
} while (TRUE);
sqlite3_finalize(stmt);
return json;
}