refactor index schema, remove sidecar parsing, remove TS

This commit is contained in:
2023-09-05 18:59:18 -04:00
parent b81ccebdb1
commit 8fdb832c85
84 changed files with 1420 additions and 2445 deletions

View File

@@ -4,6 +4,7 @@
#include <string.h>
#include <pthread.h>
#include "src/util.h"
#include "src/parsing/mime.h"
#include <time.h>
@@ -64,9 +65,11 @@ static int sep_rfind(const char *str) {
}
void path_parent_func(sqlite3_context *ctx, int argc, sqlite3_value **argv) {
#ifdef SIST_DEBUG
if (argc != 1 || sqlite3_value_type(argv[0]) != SQLITE_TEXT) {
sqlite3_result_error(ctx, "Invalid parameters", -1);
}
#endif
const char *value = (const char *) sqlite3_value_text(argv[0]);
@@ -82,28 +85,27 @@ void path_parent_func(sqlite3_context *ctx, int argc, sqlite3_value **argv) {
}
void random_func(sqlite3_context *ctx, int argc, UNUSED(sqlite3_value **argv)) {
#ifdef SIST_DEBUG
if (argc != 1 || sqlite3_value_type(argv[0]) != SQLITE_INTEGER) {
sqlite3_result_error(ctx, "Invalid parameters", -1);
}
#endif
char state_buf[128] = {0,};
struct random_data buf;
int result;
char state_buf[8] = {0,};
long seed = sqlite3_value_int64(argv[0]);
initstate_r((int) seed, state_buf, sizeof(state_buf), &buf);
initstate((int) seed, state_buf, sizeof(state_buf));
random_r(&buf, &result);
sqlite3_result_int(ctx, result);
sqlite3_result_int(ctx, (int) random());
}
void save_current_job_info(sqlite3_context *ctx, int argc, sqlite3_value **argv) {
#ifdef SIST_DEBUG
if (argc != 1 || sqlite3_value_type(argv[0]) != SQLITE_TEXT) {
sqlite3_result_error(ctx, "Invalid parameters", -1);
}
#endif
database_ipc_ctx_t *ipc_ctx = sqlite3_user_data(ctx);
@@ -146,6 +148,12 @@ void database_open(database_t *db) {
CRASH_IF_NOT_SQLITE_OK(sqlite3_exec(db->db, "PRAGMA temp_store = memory;", NULL, NULL, NULL));
}
#ifdef SIST_DEBUG
CRASH_IF_NOT_SQLITE_OK(sqlite3_exec(db->db, "PRAGMA foreign_keys = ON;", NULL, NULL, NULL));
#else
CRASH_IF_NOT_SQLITE_OK(sqlite3_exec(db->db, "PRAGMA ignore_check_constraints = ON;", NULL, NULL, NULL));
#endif
if (db->type == INDEX_DATABASE) {
// Prepare statements;
CRASH_IF_NOT_SQLITE_OK(sqlite3_prepare_v2(
@@ -154,16 +162,15 @@ void database_open(database_t *db) {
&db->select_thumbnail_stmt, NULL));
CRASH_IF_NOT_SQLITE_OK(sqlite3_prepare_v2(
db->db,
"UPDATE document SET marked=1 WHERE id=? AND mtime=? RETURNING id",
"UPDATE marked SET marked=1 WHERE id=(SELECT ROWID FROM document WHERE path=?) AND mtime=? RETURNING id",
-1,
&db->mark_document_stmt, NULL));
CRASH_IF_NOT_SQLITE_OK(sqlite3_prepare_v2(
db->db,
"REPLACE INTO document_sidecar (id, json_data) VALUES (?,?)", -1,
&db->write_document_sidecar_stmt, NULL));
CRASH_IF_NOT_SQLITE_OK(sqlite3_prepare_v2(
db->db,
"REPLACE INTO document (id, mtime, size, json_data, version) VALUES (?, ?, ?, ?, (SELECT max(id) FROM version));",
"INSERT INTO document (path, parent, mime, mtime, size, thumbnail_count, json_data, version) "
"VALUES (?, (SELECT id FROM document WHERE path=?), ?, ?, ?, ?, ?, (SELECT max(id) FROM version)) "
"ON CONFLICT (path) DO UPDATE SET json_data=excluded.json_data "
"RETURNING id;",
-1,
&db->write_document_stmt, NULL));
CRASH_IF_NOT_SQLITE_OK(sqlite3_prepare_v2(
@@ -173,7 +180,12 @@ void database_open(database_t *db) {
&db->write_thumbnail_stmt, NULL));
CRASH_IF_NOT_SQLITE_OK(sqlite3_prepare_v2(
db->db, "SELECT json_data FROM document WHERE id=?", -1,
db->db, "SELECT json_set(json_data, "
"'$._id', CAST (doc.id AS TEXT),"
"'$.thumbnail', doc.thumbnail_count,"
"'$.mime', m.name,"
"'$.size', doc.size"
") FROM document doc LEFT JOIN mime m ON m.id=doc.mime WHERE doc.id=?", -1,
&db->get_document, NULL));
CRASH_IF_NOT_SQLITE_OK(sqlite3_prepare_v2(
@@ -184,6 +196,12 @@ void database_open(database_t *db) {
db->db, "SELECT embedding FROM embedding WHERE id=? AND model_id=? AND start=0", -1,
&db->get_embedding, NULL));
CRASH_IF_NOT_SQLITE_OK(sqlite3_prepare_v2(
db->db,
"INSERT INTO tag (id, tag) VALUES (?,?) ON CONFLICT DO NOTHING;",
-1,
&db->write_tag_stmt, NULL));
// Create functions
sqlite3_create_function(
db->db,
@@ -228,7 +246,7 @@ void database_open(database_t *db) {
CRASH_IF_NOT_SQLITE_OK(sqlite3_prepare_v2(
db->db,
"DELETE FROM index_job WHERE id = (SELECT MIN(id) FROM index_job)"
" RETURNING doc_id,type,line;",
" RETURNING sid,type,line;",
-1, &db->pop_index_job_stmt, NULL
));
@@ -243,7 +261,7 @@ void database_open(database_t *db) {
db->db, "INSERT INTO parse_job (filepath,mtime,st_size) VALUES (?,?,?);", -1,
&db->insert_parse_job_stmt, NULL));
CRASH_IF_NOT_SQLITE_OK(sqlite3_prepare_v2(
db->db, "INSERT INTO index_job (doc_id,type,line) VALUES (?,?,?);", -1,
db->db, "INSERT INTO index_job (sid,type,line) VALUES (?,?,?);", -1,
&db->insert_index_job_stmt, NULL));
} else if (db->type == FTS_DATABASE) {
@@ -294,6 +312,12 @@ void database_open(database_t *db) {
db->db, "SELECT mime, sum(count) FROM mime_index WHERE mime is not NULL GROUP BY mime", -1,
&db->fts_get_mimetypes, NULL));
CRASH_IF_NOT_SQLITE_OK(sqlite3_prepare_v2(
db->db,
"INSERT INTO tag (id, index_id, tag) VALUES (?,?,?) ON CONFLICT DO NOTHING;",
-1,
&db->fts_write_tag_stmt, NULL));
sqlite3_create_function(
db->db,
"random_seeded",
@@ -340,13 +364,6 @@ void database_open(database_t *db) {
}
if (db->type == FTS_DATABASE || db->type == INDEX_DATABASE) {
// Tag table is the same schema for FTS database & index database
CRASH_IF_NOT_SQLITE_OK(sqlite3_prepare_v2(
db->db,
"INSERT INTO tag (id, tag) VALUES (?,?) ON CONFLICT DO NOTHING;",
-1,
&db->write_tag_stmt, NULL));
CRASH_IF_NOT_SQLITE_OK(sqlite3_prepare_v2(
db->db,
"DELETE FROM tag WHERE id=? AND tag=?;",
@@ -376,8 +393,8 @@ void database_close(database_t *db, int optimize) {
db = NULL;
}
void *database_read_thumbnail(database_t *db, const char *id, int num, size_t *return_value_len) {
sqlite3_bind_text(db->select_thumbnail_stmt, 1, id, -1, SQLITE_STATIC);
void *database_read_thumbnail(database_t *db, int doc_id, int num, size_t *return_value_len) {
sqlite3_bind_int(db->select_thumbnail_stmt, 1, doc_id);
sqlite3_bind_int(db->select_thumbnail_stmt, 2, num);
int ret = sqlite3_step(db->select_thumbnail_stmt);
@@ -410,7 +427,7 @@ void database_write_index_descriptor(database_t *db, index_descriptor_t *desc) {
sqlite3_prepare_v2(db->db, "INSERT INTO descriptor (id, version_major, version_minor, version_patch,"
" root, name, rewrite_url, timestamp) VALUES (?,?,?,?,?,?,?,?);", -1, &stmt, NULL);
sqlite3_bind_text(stmt, 1, desc->id, -1, SQLITE_STATIC);
sqlite3_bind_int(stmt, 1, desc->id);
sqlite3_bind_int(stmt, 2, desc->version_major);
sqlite3_bind_int(stmt, 3, desc->version_minor);
sqlite3_bind_int(stmt, 4, desc->version_patch);
@@ -433,7 +450,7 @@ index_descriptor_t *database_read_index_descriptor(database_t *db) {
CRASH_IF_STMT_FAIL(sqlite3_step(stmt));
const char *id = (char *) sqlite3_column_text(stmt, 0);
int id = sqlite3_column_int(stmt, 0);
int v_major = sqlite3_column_int(stmt, 1);
int v_minor = sqlite3_column_int(stmt, 2);
int v_patch = sqlite3_column_int(stmt, 3);
@@ -443,7 +460,7 @@ index_descriptor_t *database_read_index_descriptor(database_t *db) {
int timestamp = sqlite3_column_int(stmt, 7);
index_descriptor_t *desc = malloc(sizeof(index_descriptor_t));
strcpy(desc->id, id);
desc->id = id;
snprintf(desc->version, sizeof(desc->version), "%d.%d.%d", v_major, v_minor, v_patch);
desc->version_major = v_major;
desc->version_minor = v_minor;
@@ -461,7 +478,8 @@ index_descriptor_t *database_read_index_descriptor(database_t *db) {
database_iterator_t *database_create_delete_list_iterator(database_t *db) {
sqlite3_stmt *stmt;
sqlite3_prepare_v2(db->db, "SELECT id FROM delete_list;", -1, &stmt, NULL);
sqlite3_prepare_v2(db->db, "SELECT doc.id FROM delete_list "
"INNER JOIN document doc ON doc.ROWID = delete_list.id;", -1, &stmt, NULL);
database_iterator_t *iter = malloc(sizeof(database_iterator_t));
@@ -471,14 +489,11 @@ database_iterator_t *database_create_delete_list_iterator(database_t *db) {
return iter;
}
char *database_delete_list_iter(database_iterator_t *iter) {
int database_delete_list_iter(database_iterator_t *iter) {
int ret = sqlite3_step(iter->stmt);
if (ret == SQLITE_ROW) {
const char *id = (const char *) sqlite3_column_text(iter->stmt, 0);
char *id_heap = malloc(strlen(id) + 1);
strcpy(id_heap, id);
return id_heap;
return sqlite3_column_int(iter->stmt, 0);
}
if (ret != SQLITE_DONE) {
@@ -491,7 +506,7 @@ char *database_delete_list_iter(database_iterator_t *iter) {
iter->stmt = NULL;
return NULL;
return 0;
}
database_iterator_t *database_create_document_iterator(database_t *db) {
@@ -507,12 +522,16 @@ database_iterator_t *database_create_document_iterator(database_t *db) {
" '$._id', document.id, "
" '$.size', document.size, "
" '$.mtime', document.mtime, "
" '$.mime', mim.name,"
" '$.thumbnail', document.thumbnail_count, "
" '$.tag', json_group_array((SELECT tag FROM tag WHERE document.id = tag.id)))"
" ELSE"
" json_set(document.json_data,"
" '$._id', document.id,"
" '$.size', document.size,"
" '$.mtime', document.mtime,"
" '$.mime', mim.name,"
" '$.thumbnail', document.thumbnail_count, "
" '$.tag', json_group_array((SELECT tag FROM tag WHERE document.id = tag.id)),"
" '$.emb', json_group_object(m.path, json(emb_to_json(emb.embedding))),"
" '$.embedding', 1)"
@@ -520,6 +539,7 @@ database_iterator_t *database_create_document_iterator(database_t *db) {
" FROM document"
" LEFT JOIN embedding emb ON document.id = emb.id"
" LEFT JOIN model m ON emb.model_id = m.id"
" LEFT JOIN mime mim ON mim.id = document.mime"
" GROUP BY document.id)"
" SELECT json_set(j, '$.index', (SELECT id FROM descriptor)) FROM doc",
-1, &stmt, NULL));
@@ -573,43 +593,48 @@ cJSON *database_document_iter(database_iterator_t *iter) {
cJSON *database_incremental_scan_begin(database_t *db) {
LOG_DEBUG("database.c", "Preparing database for incremental scan");
CRASH_IF_NOT_SQLITE_OK(sqlite3_exec(db->db, "UPDATE document SET marked=0;", NULL, NULL, NULL));
CRASH_IF_NOT_SQLITE_OK(sqlite3_exec(db->db, "DELETE FROM marked;", NULL, NULL, NULL));
CRASH_IF_NOT_SQLITE_OK(
sqlite3_exec(db->db, "INSERT INTO marked SELECT ROWID, 0, mtime FROM document;", NULL, NULL, NULL));
}
cJSON *database_incremental_scan_end(database_t *db) {
CRASH_IF_NOT_SQLITE_OK(sqlite3_exec(
db->db,
"DELETE FROM delete_list WHERE id IN (SELECT id FROM document WHERE marked=1);",
"DELETE FROM delete_list WHERE id IN (SELECT id FROM marked WHERE marked = 1);",
NULL, NULL, NULL
));
CRASH_IF_NOT_SQLITE_OK(sqlite3_exec(
db->db,
"DELETE FROM thumbnail WHERE id IN (SELECT id FROM document WHERE marked=0);",
"DELETE FROM thumbnail WHERE EXISTS ("
" SELECT document.id FROM document INNER JOIN marked m ON m.id = document.ROWID"
" WHERE marked=0 and document.id = thumbnail.id)",
NULL, NULL, NULL
));
CRASH_IF_NOT_SQLITE_OK(sqlite3_exec(
db->db,
"INSERT INTO delete_list (id) SELECT id FROM document WHERE marked=0 ON CONFLICT DO NOTHING;",
"INSERT INTO delete_list (id) "
"SELECT id FROM marked WHERE marked=0 ON CONFLICT DO NOTHING;",
NULL, NULL, NULL
));
CRASH_IF_NOT_SQLITE_OK(sqlite3_exec(
db->db,
"DELETE FROM document_sidecar WHERE id IN (SELECT id FROM document WHERE marked=0);",
"DELETE FROM document WHERE ROWID IN (SELECT id FROM marked WHERE marked=0);",
NULL, NULL, NULL
));
CRASH_IF_NOT_SQLITE_OK(sqlite3_exec(
db->db,
"DELETE FROM document WHERE marked=0;",
"DELETE FROM marked;",
NULL, NULL, NULL
));
}
int database_mark_document(database_t *db, const char *id, int mtime) {
sqlite3_bind_text(db->mark_document_stmt, 1, id, -1, SQLITE_STATIC);
int database_mark_document(database_t *db, const char *path, int mtime) {
sqlite3_bind_text(db->mark_document_stmt, 1, path, -1, SQLITE_STATIC);
sqlite3_bind_int(db->mark_document_stmt, 2, mtime);
pthread_mutex_lock(&db->ipc_ctx->index_db_mutex);
@@ -631,31 +656,38 @@ int database_mark_document(database_t *db, const char *id, int mtime) {
CRASH_IF_STMT_FAIL(ret);
}
void database_write_document(database_t *db, document_t *doc, const char *json_data) {
sqlite3_bind_text(db->write_document_stmt, 1, doc->doc_id, -1, SQLITE_STATIC);
sqlite3_bind_int(db->write_document_stmt, 2, doc->mtime);
sqlite3_bind_int64(db->write_document_stmt, 3, (long) doc->size);
sqlite3_bind_text(db->write_document_stmt, 4, json_data, -1, SQLITE_STATIC);
int database_write_document(database_t *db, document_t *doc, const char *json_data) {
const char *rel_path = doc->filepath + ScanCtx.index.desc.root_len;
const char *parent_rel_path = doc->parent[0] != '\0'
? doc->parent + ScanCtx.index.desc.root_len
: NULL;
// path, parent, mtime, size, json_data
sqlite3_bind_text(db->write_document_stmt, 1, rel_path, -1, SQLITE_STATIC);
sqlite3_bind_text(db->write_document_stmt, 2, parent_rel_path, -1, SQLITE_STATIC);
sqlite3_bind_int64(db->write_document_stmt, 3, doc->mime);
sqlite3_bind_int(db->write_document_stmt, 4, doc->mtime);
sqlite3_bind_int64(db->write_document_stmt, 5, (long) doc->size);
sqlite3_bind_int(db->write_document_stmt, 6, doc->thumbnail_count);
if (json_data) {
sqlite3_bind_text(db->write_document_stmt, 7, json_data, -1, SQLITE_STATIC);
} else {
sqlite3_bind_null(db->write_document_stmt, 7);
}
pthread_mutex_lock(&db->ipc_ctx->index_db_mutex);
CRASH_IF_STMT_FAIL(sqlite3_step(db->write_document_stmt));
int id = sqlite3_column_int(db->write_document_stmt, 0);
CRASH_IF_NOT_SQLITE_OK(sqlite3_reset(db->write_document_stmt));
pthread_mutex_unlock(&db->ipc_ctx->index_db_mutex);
return id;
}
void database_write_document_sidecar(database_t *db, const char *id, const char *json_data) {
sqlite3_bind_text(db->write_document_sidecar_stmt, 1, id, -1, SQLITE_STATIC);
sqlite3_bind_text(db->write_document_sidecar_stmt, 2, json_data, -1, SQLITE_STATIC);
pthread_mutex_lock(&db->ipc_ctx->index_db_mutex);
CRASH_IF_STMT_FAIL(sqlite3_step(db->write_document_sidecar_stmt));
CRASH_IF_NOT_SQLITE_OK(sqlite3_reset(db->write_document_sidecar_stmt));
pthread_mutex_unlock(&db->ipc_ctx->index_db_mutex);
}
void database_write_thumbnail(database_t *db, const char *id, int num, void *data, size_t data_size) {
sqlite3_bind_text(db->write_thumbnail_stmt, 1, id, -1, SQLITE_STATIC);
void database_write_thumbnail(database_t *db, int doc_id, int num, void *data, size_t data_size) {
sqlite3_bind_int(db->write_thumbnail_stmt, 1, doc_id);
sqlite3_bind_int(db->write_thumbnail_stmt, 2, num);
sqlite3_bind_blob(db->write_thumbnail_stmt, 3, data, (int) data_size, SQLITE_STATIC);
@@ -716,7 +748,7 @@ job_t *database_get_work(database_t *db, job_type_t job_type) {
} else {
job->bulk_line = malloc(sizeof(es_bulk_line_t));
}
strcpy(job->bulk_line->doc_id, (const char *) sqlite3_column_text(db->pop_index_job_stmt, 0));
strcpy(job->bulk_line->sid, (const char *) sqlite3_column_text(db->pop_index_job_stmt, 0));
job->bulk_line->type = sqlite3_column_int(db->pop_index_job_stmt, 1);
job->bulk_line->next = NULL;
@@ -767,7 +799,7 @@ void database_add_work(database_t *db, job_t *job) {
} while (ret != SQLITE_DONE && ret != SQLITE_OK);
} else if (job->type == JOB_BULK_LINE) {
do {
sqlite3_bind_text(db->insert_index_job_stmt, 1, job->bulk_line->doc_id, -1, SQLITE_STATIC);
sqlite3_bind_text(db->insert_index_job_stmt, 1, job->bulk_line->sid, -1, SQLITE_STATIC);
sqlite3_bind_int(db->insert_index_job_stmt, 2, job->bulk_line->type);
if (job->bulk_line->type != ES_BULK_LINE_DELETE) {
sqlite3_bind_text(db->insert_index_job_stmt, 3, job->bulk_line->line, -1, SQLITE_STATIC);
@@ -808,24 +840,25 @@ void database_add_work(database_t *db, job_t *job) {
pthread_mutex_unlock(&db->ipc_ctx->mutex);
}
void database_write_tag(database_t *db, char *doc_id, char *tag) {
sqlite3_bind_text(db->write_tag_stmt, 1, doc_id, -1, SQLITE_STATIC);
sqlite3_bind_text(db->write_tag_stmt, 2, tag, -1, SQLITE_STATIC);
void database_write_tag(database_t *db, long sid, char *tag) {
sqlite3_bind_int64(db->write_tag_stmt, 1, sid);
sqlite3_bind_int(db->write_tag_stmt, 2, (int) (sid >> 32));
sqlite3_bind_text(db->write_tag_stmt, 3, tag, -1, SQLITE_STATIC);
CRASH_IF_STMT_FAIL(sqlite3_step(db->write_tag_stmt));
CRASH_IF_NOT_SQLITE_OK(sqlite3_reset(db->write_tag_stmt));
}
void database_delete_tag(database_t *db, char *doc_id, char *tag) {
sqlite3_bind_text(db->delete_tag_stmt, 1, doc_id, -1, SQLITE_STATIC);
void database_delete_tag(database_t *db, long sid, char *tag) {
sqlite3_bind_int64(db->delete_tag_stmt, 1, sid);
sqlite3_bind_text(db->delete_tag_stmt, 2, tag, -1, SQLITE_STATIC);
CRASH_IF_STMT_FAIL(sqlite3_step(db->delete_tag_stmt));
CRASH_IF_NOT_SQLITE_OK(sqlite3_reset(db->delete_tag_stmt));
}
cJSON *database_get_document(database_t *db, char *doc_id) {
sqlite3_bind_text(db->get_document, 1, doc_id, -1, SQLITE_STATIC);
cJSON *database_get_document(database_t *db, int doc_id) {
sqlite3_bind_int(db->get_document, 1, doc_id);
int ret = sqlite3_step(db->get_document);
CRASH_IF_STMT_FAIL(ret);
@@ -833,7 +866,7 @@ cJSON *database_get_document(database_t *db, char *doc_id) {
cJSON *json;
if (ret == SQLITE_ROW) {
const char *json_str = sqlite3_column_text(db->get_document, 0);
const char *json_str = (char *) sqlite3_column_text(db->get_document, 0);
json = cJSON_Parse(json_str);
} else {
json = NULL;
@@ -847,4 +880,24 @@ cJSON *database_get_document(database_t *db, char *doc_id) {
void database_increment_version(database_t *db) {
CRASH_IF_NOT_SQLITE_OK(sqlite3_exec(
db->db, "INSERT INTO version DEFAULT VALUES", NULL, NULL, NULL));
}
void database_sync_mime_table(database_t *db) {
unsigned int *cur = get_mime_ids();
sqlite3_stmt *stmt;
CRASH_IF_NOT_SQLITE_OK(sqlite3_prepare(
db->db,
"REPLACE INTO mime (id, name) VALUES (?,?)", -1, &stmt, NULL));
while (*cur != 0) {
sqlite3_bind_int64(stmt, 1, (long) *cur);
sqlite3_bind_text(stmt, 2, mime_get_mime_text(*cur), -1, NULL);
CRASH_IF_STMT_FAIL(sqlite3_step(stmt));
sqlite3_reset(stmt);
cur += 1;
}
sqlite3_finalize(stmt);
}

View File

@@ -81,7 +81,6 @@ typedef struct database {
sqlite3_stmt *mark_document_stmt;
sqlite3_stmt *write_document_stmt;
sqlite3_stmt *write_document_sidecar_stmt;
sqlite3_stmt *write_thumbnail_stmt;
sqlite3_stmt *get_document;
sqlite3_stmt *get_models;
@@ -103,6 +102,7 @@ typedef struct database {
sqlite3_stmt *fts_get_document;
sqlite3_stmt *fts_suggest_tag;
sqlite3_stmt *fts_get_tags;
sqlite3_stmt *fts_write_tag_stmt;
sqlite3_stmt *fts_model_size;
@@ -133,15 +133,15 @@ void database_close(database_t *, int optimize);
void database_increment_version(database_t *db);
void database_write_thumbnail(database_t *db, const char *id, int num, void *data, size_t data_size);
void database_write_thumbnail(database_t *db, int doc_id, int num, void *data, size_t data_size);
void *database_read_thumbnail(database_t *db, const char *id, int num, size_t *return_value_len);
void *database_read_thumbnail(database_t *db, int doc_id, int num, size_t *return_value_len);
void database_write_index_descriptor(database_t *db, index_descriptor_t *desc);
index_descriptor_t *database_read_index_descriptor(database_t *db);
void database_write_document(database_t *db, document_t *doc, const char *json_data);
int database_write_document(database_t *db, document_t *doc, const char *json_data);
database_iterator_t *database_create_document_iterator(database_t *db);
@@ -154,10 +154,10 @@ cJSON *database_document_iter(database_iterator_t *);
database_iterator_t *database_create_delete_list_iterator(database_t *db);
char *database_delete_list_iter(database_iterator_t *iter);
int database_delete_list_iter(database_iterator_t *iter);
#define database_delete_list_iter_foreach(element, iter) \
for (char *(element) = database_delete_list_iter(iter); (element) != NULL; (element) = database_delete_list_iter(iter))
for (int (element) = database_delete_list_iter(iter); (element) != 0; (element) = database_delete_list_iter(iter))
cJSON *database_incremental_scan_begin(database_t *db);
@@ -166,8 +166,6 @@ cJSON *database_incremental_scan_end(database_t *db);
int database_mark_document(database_t *db, const char *id, int mtime);
void database_write_document_sidecar(database_t *db, const char *id, const char *json_data);
database_iterator_t *database_create_treemap_iterator(database_t *db, long threshold);
treemap_row_t database_treemap_iter(database_iterator_t *iter);
@@ -206,7 +204,7 @@ void database_fts_index(database_t *db);
void database_fts_optimize(database_t *db);
cJSON *database_fts_get_paths(database_t *db, const char *index_id, int depth_min, int depth_max, const char *prefix,
cJSON *database_fts_get_paths(database_t *db, int index_id, int depth_min, int depth_max, const char *prefix,
int suggest);
cJSON *database_fts_get_mimetypes(database_t *db);
@@ -215,18 +213,20 @@ database_summary_stats_t database_fts_get_date_range(database_t *db);
cJSON *database_fts_search(database_t *db, const char *query, const char *path, long size_min,
long size_max, long date_min, long date_max, int page_size,
char **index_ids, char **mime_types, char **tags, int sort_asc,
int *index_ids, char **mime_types, char **tags, int sort_asc,
fts_sort_t sort, int seed, char **after, int fetch_aggregations,
int highlight, int highlight_context_size, int model,
const float *embedding, int embedding_size);
void database_write_tag(database_t *db, char *doc_id, char *tag);
void database_write_tag(database_t *db, long sid, char *tag);
void database_delete_tag(database_t *db, char *doc_id, char *tag);
void database_fts_write_tag(database_t *db, long sid, char *tag);
void database_delete_tag(database_t *db, long sid, char *tag);
void database_fts_detach(database_t *db);
cJSON *database_fts_get_document(database_t *db, char *doc_id);
cJSON *database_fts_get_document(database_t *db, long sid);
database_summary_stats_t database_fts_sync_tags(database_t *db);
@@ -234,7 +234,7 @@ cJSON *database_fts_suggest_tag(database_t *db, char *prefix);
cJSON *database_fts_get_tags(database_t *db);
cJSON *database_get_document(database_t *db, char *doc_id);
cJSON *database_get_document(database_t *db, int doc_id);
void cosine_sim_func(sqlite3_context *ctx, int argc, sqlite3_value **argv);
@@ -242,6 +242,8 @@ cJSON *database_get_models(database_t *db);
int database_fts_get_model_size(database_t *db, int model_id);
cJSON *database_get_embedding(database_t *db, char *doc_id, int model_id);
cJSON *database_get_embedding(database_t *db, int doc_id, int model_id);
void database_sync_mime_table(database_t *db);
#endif

View File

@@ -69,9 +69,9 @@ cJSON *database_get_models(database_t *db) {
return json;
}
cJSON *database_get_embedding(database_t *db, char *doc_id, int model_id) {
cJSON *database_get_embedding(database_t *db, int doc_id, int model_id) {
sqlite3_bind_text(db->get_embedding, 1, doc_id, -1, SQLITE_STATIC);
sqlite3_bind_int(db->get_embedding, 1, doc_id);
sqlite3_bind_int(db->get_embedding, 2, model_id);
int ret = sqlite3_step(db->get_embedding);
CRASH_IF_STMT_FAIL(ret);

View File

@@ -42,21 +42,23 @@ void database_fts_index(database_t *db) {
CRASH_IF_NOT_SQLITE_OK(sqlite3_exec(
db->db,
"WITH docs AS ("
" SELECT document.id as id, (SELECT id FROM descriptor) as index_id, size,"
" SELECT "
" ((SELECT id FROM descriptor) << 32) | document.id as id,"
" (SELECT id FROM descriptor) as index_id,"
" size,"
" document.json_data ->> 'name' as name,"
" document.json_data ->> 'path' as path,"
" mtime,"
" document.json_data ->> 'mime' as mime,"
" json_set(document.json_data, "
" '$._id',document.id,"
" '$.size',document.size, "
" '$.mtime',document.mtime)"
" m.name as mime,"
" thumbnail_count,"
" document.json_data"
" FROM document"
" LEFT JOIN mime m ON m.id=document.mime"
" )"
" INSERT"
" INTO fts.document_index (id, index_id, size, name, path, mtime, mime, json_data)"
" INTO fts.document_index (id, index_id, size, name, path, mtime, mime, thumbnail_count, json_data)"
" SELECT * FROM docs WHERE true"
" on conflict (id, index_id) do update set "
" on conflict (id) do update set "
" size=excluded.size, mtime=excluded.mtime, mime=excluded.mime, json_data=excluded.json_data;",
NULL, NULL, NULL));
@@ -64,13 +66,14 @@ void database_fts_index(database_t *db) {
CRASH_IF_NOT_SQLITE_OK(sqlite3_exec(
db->db,
"REPLACE INTO fts.embedding (id, model_id, start, end, embedding)"
" SELECT id, model_id, start, end, embedding FROM embedding", NULL, NULL, NULL));
"REPLACE INTO fts.model (id, size)"
" SELECT id, size FROM model", NULL, NULL, NULL));
CRASH_IF_NOT_SQLITE_OK(sqlite3_exec(
db->db,
"INSERT INTO fts.model (id, size)"
" SELECT id, size FROM model WHERE TRUE ON CONFLICT (id) DO NOTHING", NULL, NULL, NULL));
"REPLACE INTO fts.embedding (id, model_id, start, end, embedding)"
" SELECT (SELECT id FROM descriptor) << 32 | id, model_id, start, end, embedding FROM embedding "
" WHERE TRUE ON CONFLICT (id, model_id, start) DO NOTHING;", NULL, NULL, NULL));
// TODO: delete old embeddings
@@ -172,7 +175,7 @@ void database_fts_optimize(database_t *db) {
CRASH_IF_NOT_SQLITE_OK(sqlite3_exec(db->db, "PRAGMA fts.optimize;", NULL, NULL, NULL));
}
cJSON *database_fts_get_paths(database_t *db, const char *index_id, int depth_min, int depth_max, const char *prefix,
cJSON *database_fts_get_paths(database_t *db, int index_id, int depth_min, int depth_max, const char *prefix,
int suggest) {
sqlite3_stmt *stmt;
@@ -192,7 +195,7 @@ cJSON *database_fts_get_paths(database_t *db, const char *index_id, int depth_mi
} else if (prefix) {
stmt = db->fts_search_paths_w_prefix;
if (index_id) {
sqlite3_bind_text(stmt, 1, index_id, -1, SQLITE_STATIC);
sqlite3_bind_int(stmt, 1, index_id);
} else {
sqlite3_bind_null(stmt, 1);
}
@@ -207,7 +210,7 @@ cJSON *database_fts_get_paths(database_t *db, const char *index_id, int depth_mi
} else {
stmt = db->fts_search_paths;
if (index_id) {
sqlite3_bind_text(stmt, 1, index_id, -1, SQLITE_STATIC);
sqlite3_bind_int(stmt, 1, index_id);
} else {
sqlite3_bind_null(stmt, 1);
}
@@ -290,7 +293,6 @@ const char *date_where_clause(long date_min, long date_max) {
}
int array_length(char **arr) {
if (arr == NULL) {
return 0;
}
@@ -301,6 +303,17 @@ int array_length(char **arr) {
return count;
}
int int_array_length(const int *arr) {
if (arr == NULL) {
return 0;
}
int count = -1;
while (arr[++count] != 0);
return count;
}
#define INDEX_ID_PARAM_OFFSET (10)
#define MIME_PARAM_OFFSET (INDEX_ID_PARAM_OFFSET + 1000)
@@ -351,8 +364,8 @@ char *build_where_clause(const char *path_where, const char *size_where, const c
return where;
}
char *index_ids_where_clause(char **index_ids) {
int param_count = array_length(index_ids);
char *index_ids_where_clause(int *index_ids) {
int param_count = int_array_length(index_ids);
char *clause = malloc(13 + 2 + 6 * param_count);
@@ -483,7 +496,7 @@ int database_fts_get_model_size(database_t *db, int model_id) {
cJSON *database_fts_search(database_t *db, const char *query, const char *path, long size_min,
long size_max, long date_min, long date_max, int page_size,
char **index_ids, char **mime_types, char **tags, int sort_asc,
int *index_ids, char **mime_types, char **tags, int sort_asc,
fts_sort_t sort, int seed, char **after, int fetch_aggregations,
int highlight, int highlight_context_size, int model,
const float *embedding, int embedding_size) {
@@ -524,13 +537,21 @@ cJSON *database_fts_search(database_t *db, const char *query, const char *path,
const char *json_object_sql;
if (highlight && query_where != NULL) {
json_object_sql = "json_set(json_remove(doc.json_data, '$.content'),"
"'$._id', CAST(doc.id AS TEXT),"
"'$.index', doc.index_id,"
"'$.thumbnail', doc.thumbnail_count,"
"'$.mime', doc.mime,"
"'$.size', doc.size,"
"'$.embedding', (CASE WHEN emb.id IS NOT NULL THEN 1 ELSE 0 END),"
"'$._highlight.name', snippet(search, 0, '<mark>', '</mark>', '', ?6),"
"'$._highlight.content', snippet(search, 1, '<mark>', '</mark>', '', ?6))";
} else {
json_object_sql = "json_set(json_remove(doc.json_data, '$.content'),"
"'$._id', CAST(doc.id AS TEXT),"
"'$.index', doc.index_id,"
"'$.thumbnail', doc.thumbnail_count,"
"'$.mime', doc.mime,"
"'$.size', doc.size,"
"'$.embedding', (CASE WHEN emb.id IS NOT NULL THEN 1 ELSE 0 END))";
}
@@ -592,7 +613,7 @@ cJSON *database_fts_search(database_t *db, const char *query, const char *path,
if (index_ids) {
array_foreach(index_ids) {
sqlite3_bind_text(stmt, INDEX_ID_PARAM_OFFSET + i, index_ids[i], -1, SQLITE_STATIC);
sqlite3_bind_int(stmt, INDEX_ID_PARAM_OFFSET + i, index_ids[i]);
}
}
if (mime_types) {
@@ -692,7 +713,7 @@ cJSON *database_fts_search(database_t *db, const char *query, const char *path,
if (index_ids) {
array_foreach(index_ids) {
sqlite3_bind_text(agg_stmt, INDEX_ID_PARAM_OFFSET + i, index_ids[i], -1, SQLITE_STATIC);
sqlite3_bind_int(agg_stmt, INDEX_ID_PARAM_OFFSET + i, index_ids[i]);
}
}
if (mime_types) {
@@ -764,19 +785,20 @@ database_summary_stats_t database_fts_sync_tags(database_t *db) {
CRASH_IF_NOT_SQLITE_OK(sqlite3_exec(
db->db,
"DELETE FROM fts.tag WHERE"
" (id, tag) NOT IN (SELECT id, tag FROM tag)",
" (id, index_id, tag) NOT IN (SELECT ((SELECT id FROM descriptor) << 32) | id, (SELECT id FROM descriptor), tag FROM tag)"
" AND index_id = (SELECT id FROM descriptor)",
NULL, NULL, NULL));
CRASH_IF_NOT_SQLITE_OK(sqlite3_exec(
db->db,
"INSERT INTO fts.tag (id, tag) "
" SELECT id, tag FROM tag "
" WHERE (id, tag) NOT IN (SELECT * FROM fts.tag)",
"INSERT INTO fts.tag (id, index_id, tag) "
" SELECT (((SELECT id FROM descriptor) << 32) | id) as sid, (SELECT id FROM descriptor), tag FROM tag "
" WHERE (sid, tag) NOT IN (SELECT id, tag FROM fts.tag)",
NULL, NULL, NULL));
}
cJSON *database_fts_get_document(database_t *db, char *doc_id) {
sqlite3_bind_text(db->fts_get_document, 1, doc_id, -1, NULL);
cJSON *database_fts_get_document(database_t *db, long sid) {
sqlite3_bind_int64(db->fts_get_document, 1, sid);
int ret = sqlite3_step(db->fts_get_document);
cJSON *json = NULL;
@@ -844,3 +866,11 @@ cJSON *database_fts_get_tags(database_t *db) {
return json;
}
void database_fts_write_tag(database_t *db, long sid, char *tag) {
sqlite3_bind_int64(db->fts_write_tag_stmt, 1, sid);
sqlite3_bind_int(db->fts_write_tag_stmt, 2, (int) (sid >> 32));
sqlite3_bind_text(db->fts_write_tag_stmt, 3, tag, -1, SQLITE_STATIC);
CRASH_IF_STMT_FAIL(sqlite3_step(db->fts_write_tag_stmt));
CRASH_IF_NOT_SQLITE_OK(sqlite3_reset(db->fts_write_tag_stmt));
}

View File

@@ -1,57 +1,63 @@
#ifdef SIST_DEBUG
#define STRICT " STRICT"
#else
#define STRICT ""
#endif
const char *FtsDatabaseSchema =
"CREATE TABLE IF NOT EXISTS document_index ("
" id TEXT NOT NULL,"
" index_id TEXT NOT NULL,"
" id INTEGER PRIMARY KEY,"
" index_id INTEGER NOT NULL,"
" size INTEGER NOT NULL,"
" name TEXT NOT NULL,"
" path TEXT NOT NULL,"
" mtime INTEGER NOT NULL,"
" mime TEXT,"
" json_data TEXT NOT NULL,"
" PRIMARY KEY (id, index_id)"
");"
" thumbnail_count INTEGER NOT NULL,"
" json_data TEXT NOT NULL"
")"STRICT";"
""
"CREATE TABLE IF NOT EXISTS stats ("
" mtime_min INTEGER,"
" mtime_max INTEGER"
");"
")"STRICT";"
""
"CREATE TABLE IF NOT EXISTS path_index ("
" path TEXT,"
" index_id TEXT,"
" index_id INTEGER,"
" count INTEGER NOT NULL,"
" depth INTEGER NOT NULL,"
" PRIMARY KEY (path, index_id)"
");"
")"STRICT";"
""
"CREATE TABLE IF NOT EXISTS mime_index ("
" index_id TEXT,"
" index_id INTEGER,"
" mime TEXT,"
" count INT,"
" count INTEGER,"
" PRIMARY KEY(index_id, mime)"
");"
")"STRICT";"
""
"CREATE TABLE IF NOT EXISTS tag ("
" id TEXT NOT NULL,"
" id INTEGER NOT NULL,"
" index_id INTEGER NOT NULL,"
" tag TEXT NOT NULL,"
" PRIMARY KEY (id, tag)"
");"
")"STRICT";"
"CREATE INDEX IF NOT EXISTS tag_tag_idx ON tag(tag);"
"CREATE INDEX IF NOT EXISTS tag_id_idx ON tag(id);"
""
"CREATE TABLE IF NOT EXISTS embedding ("
" id TEXT REFERENCES document(id),"
" id INTEGER REFERENCES document_index(id),"
" model_id INTEGER NOT NULL REFERENCES model(id),"
" start INTEGER NOT NULL,"
" end INTEGER,"
" embedding BLOB NOT NULL,"
" PRIMARY KEY (id, model_id, start)"
");"
")"STRICT";"
""
"CREATE TABLE IF NOT EXISTS model ("
" id INTEGER PRIMARY KEY CHECK (id > 0 AND id < 1000),"
" size INTEGER NOT NULL"
");"
")"STRICT";"
""
"CREATE TRIGGER IF NOT EXISTS tag_write_trigger"
" AFTER INSERT ON tag"
@@ -71,7 +77,7 @@ const char *FtsDatabaseSchema =
""
"CREATE VIEW IF NOT EXISTS document_view (id, name, content, title)"
" AS"
" SELECT rowid,"
" SELECT id,"
" json_data->>'name',"
" json_data->>'content',"
" json_data->>'title'"
@@ -94,18 +100,18 @@ const char *IpcDatabaseSchema =
" filepath TEXT NOT NULL,"
" mtime INTEGER NOT NULL,"
" st_size INTEGER NOT NULL"
");"
")"STRICT";"
""
"CREATE TABLE index_job ("
" id INTEGER PRIMARY KEY,"
" doc_id TEXT NOT NULL CHECK ( length(doc_id) = 32 ),"
" sid TEXT NOT NULL,"
" type INTEGER NOT NULL,"
" line TEXT"
");";
")"STRICT";";
const char *IndexDatabaseSchema =
"CREATE TABLE thumbnail ("
" id TEXT NOT NULL CHECK ( length(id) = 32 ),"
" id INTEGER REFERENCES document(id),"
" num INTEGER NOT NULL,"
" data BLOB NOT NULL,"
" PRIMARY KEY(id, num)"
@@ -114,34 +120,46 @@ const char *IndexDatabaseSchema =
"CREATE TABLE version ("
" id INTEGER PRIMARY KEY AUTOINCREMENT,"
" date TEXT NOT NULL DEFAULT (CURRENT_TIMESTAMP)"
");"
")"STRICT";"
""
"CREATE TABLE mime("
" id INTEGER PRIMARY KEY,"
" name TEXT"
")"STRICT";"
"CREATE UNIQUE INDEX mime_name_idx ON mime(name);"
""
"CREATE TABLE document ("
" id TEXT PRIMARY KEY CHECK ( length(id) = 32 ),"
" marked INTEGER NOT NULL DEFAULT (1),"
" id INTEGER PRIMARY KEY,"
" parent INTEGER REFERENCES document(id),"
" mime INTEGER REFERENCES mime(id),"
" path TEXT NOT NULL,"
" version INTEGER NOT NULL REFERENCES version(id),"
" mtime INTEGER NOT NULL,"
" size INTEGER NOT NULL,"
" json_data TEXT NOT NULL CHECK ( json_valid(json_data) )"
") WITHOUT ROWID;"
" thumbnail_count INTEGER NOT NULL,"
" json_data TEXT CHECK ( json_data IS NULL OR json_valid(json_data) )"
")"STRICT";"
"CREATE UNIQUE INDEX document_path_idx ON document(path);"
"CREATE TABLE marked ("
" id INTEGER PRIMARY KEY,"
" marked INTEGER NOT NULL,"
" mtime INTEGER NOT NULL"
")"STRICT";"
""
"CREATE INDEX marked_marked ON marked(marked);"
""
"CREATE TABLE delete_list ("
" id TEXT PRIMARY KEY CHECK ( length(id) = 32 )"
") WITHOUT ROWID;"
" id INTEGER PRIMARY KEY"
")"STRICT";"
""
"CREATE TABLE tag ("
" id TEXT NOT NULL,"
" id INTEGER NOT NULL REFERENCES document(id),"
" tag TEXT NOT NULL,"
" PRIMARY KEY (id, tag)"
");"
""
"CREATE TABLE document_sidecar ("
" id TEXT PRIMARY KEY NOT NULL,"
" json_data TEXT NOT NULL"
") WITHOUT ROWID;"
")"STRICT";"
""
"CREATE TABLE descriptor ("
" id TEXT NOT NULL,"
" id INTEGER PRIMARY KEY,"
" version_major INTEGER NOT NULL,"
" version_minor INTEGER NOT NULL,"
" version_patch INTEGER NOT NULL,"
@@ -149,37 +167,37 @@ const char *IndexDatabaseSchema =
" name TEXT NOT NULL,"
" rewrite_url TEXT,"
" timestamp INTEGER NOT NULL"
");"
")"STRICT";"
""
"CREATE TABLE stats_treemap ("
" path TEXT NOT NULL,"
" size INTEGER NOT NULL"
");"
")"STRICT";"
""
"CREATE TABLE stats_size_agg ("
" bucket INTEGER NOT NULL,"
" count INTEGER NOT NULL"
");"
")"STRICT";"
""
"CREATE TABLE stats_date_agg ("
" bucket INTEGER NOT NULL,"
" count INTEGER NOT NULL"
");"
")"STRICT";"
""
"CREATE TABLE stats_mime_agg ("
" mime TEXT NOT NULL,"
" size INTEGER NOT NULL,"
" count INTEGER NOT NULL"
");"
")"STRICT";"
""
"CREATE TABLE embedding ("
" id TEXT REFERENCES document(id),"
" id INTEGER REFERENCES document(id),"
" model_id INTEGER NOT NULL references model(id),"
" start INTEGER NOT NULL,"
" end INTEGER,"
" embedding BLOB NOT NULL,"
" PRIMARY KEY (id, model_id, start)"
");"
")"STRICT";"
""
"CREATE TABLE model ("
" id INTEGER PRIMARY KEY CHECK (id > 0 AND id < 1000),"
@@ -188,5 +206,5 @@ const char *IndexDatabaseSchema =
" path TEXT NOT NULL UNIQUE,"
" size INTEGER NOT NULL,"
" type TEXT NOT NULL CHECK ( type IN ('flat', 'nested') )"
");";
")"STRICT";";

View File

@@ -98,10 +98,10 @@ void database_generate_stats(database_t *db, double treemap_threshold) {
// mime aggregation
sqlite3_prepare_v2(db->db, "INSERT INTO stats_mime_agg"
" SELECT"
" (json_data->>'mime') as bucket,"
" m.name as bucket,"
" sum(size),"
" count(*)"
" FROM document"
" FROM document INNER JOIN mime m ON m.id=document.mime"
" WHERE bucket IS NOT NULL"
" GROUP BY bucket", -1, &stmt, NULL);
CRASH_IF_STMT_FAIL(sqlite3_step(stmt));
@@ -117,8 +117,8 @@ void database_generate_stats(database_t *db, double treemap_threshold) {
// flat map
CRASH_IF_NOT_SQLITE_OK(sqlite3_exec(db->db,
"INSERT INTO tm (path, size) SELECT json_data->>'path' as path, sum(size)"
" FROM document WHERE json_data->>'parent' IS NULL GROUP BY path;",
"INSERT INTO tm (path, size) SELECT path, sum(size)"
" FROM document WHERE parent IS NULL GROUP BY path;",
NULL, NULL, NULL));
// Merge up