mirror of
https://github.com/simon987/sist2.git
synced 2025-12-12 06:58:54 +00:00
refactor index schema, remove sidecar parsing, remove TS
This commit is contained in:
@@ -4,6 +4,7 @@
|
||||
#include <string.h>
|
||||
#include <pthread.h>
|
||||
#include "src/util.h"
|
||||
#include "src/parsing/mime.h"
|
||||
|
||||
#include <time.h>
|
||||
|
||||
@@ -64,9 +65,11 @@ static int sep_rfind(const char *str) {
|
||||
}
|
||||
|
||||
void path_parent_func(sqlite3_context *ctx, int argc, sqlite3_value **argv) {
|
||||
#ifdef SIST_DEBUG
|
||||
if (argc != 1 || sqlite3_value_type(argv[0]) != SQLITE_TEXT) {
|
||||
sqlite3_result_error(ctx, "Invalid parameters", -1);
|
||||
}
|
||||
#endif
|
||||
|
||||
const char *value = (const char *) sqlite3_value_text(argv[0]);
|
||||
|
||||
@@ -82,28 +85,27 @@ void path_parent_func(sqlite3_context *ctx, int argc, sqlite3_value **argv) {
|
||||
}
|
||||
|
||||
void random_func(sqlite3_context *ctx, int argc, UNUSED(sqlite3_value **argv)) {
|
||||
#ifdef SIST_DEBUG
|
||||
if (argc != 1 || sqlite3_value_type(argv[0]) != SQLITE_INTEGER) {
|
||||
sqlite3_result_error(ctx, "Invalid parameters", -1);
|
||||
}
|
||||
#endif
|
||||
|
||||
char state_buf[128] = {0,};
|
||||
struct random_data buf;
|
||||
int result;
|
||||
|
||||
char state_buf[8] = {0,};
|
||||
long seed = sqlite3_value_int64(argv[0]);
|
||||
|
||||
initstate_r((int) seed, state_buf, sizeof(state_buf), &buf);
|
||||
initstate((int) seed, state_buf, sizeof(state_buf));
|
||||
|
||||
random_r(&buf, &result);
|
||||
|
||||
sqlite3_result_int(ctx, result);
|
||||
sqlite3_result_int(ctx, (int) random());
|
||||
}
|
||||
|
||||
|
||||
void save_current_job_info(sqlite3_context *ctx, int argc, sqlite3_value **argv) {
|
||||
#ifdef SIST_DEBUG
|
||||
if (argc != 1 || sqlite3_value_type(argv[0]) != SQLITE_TEXT) {
|
||||
sqlite3_result_error(ctx, "Invalid parameters", -1);
|
||||
}
|
||||
#endif
|
||||
|
||||
database_ipc_ctx_t *ipc_ctx = sqlite3_user_data(ctx);
|
||||
|
||||
@@ -146,6 +148,12 @@ void database_open(database_t *db) {
|
||||
CRASH_IF_NOT_SQLITE_OK(sqlite3_exec(db->db, "PRAGMA temp_store = memory;", NULL, NULL, NULL));
|
||||
}
|
||||
|
||||
#ifdef SIST_DEBUG
|
||||
CRASH_IF_NOT_SQLITE_OK(sqlite3_exec(db->db, "PRAGMA foreign_keys = ON;", NULL, NULL, NULL));
|
||||
#else
|
||||
CRASH_IF_NOT_SQLITE_OK(sqlite3_exec(db->db, "PRAGMA ignore_check_constraints = ON;", NULL, NULL, NULL));
|
||||
#endif
|
||||
|
||||
if (db->type == INDEX_DATABASE) {
|
||||
// Prepare statements;
|
||||
CRASH_IF_NOT_SQLITE_OK(sqlite3_prepare_v2(
|
||||
@@ -154,16 +162,15 @@ void database_open(database_t *db) {
|
||||
&db->select_thumbnail_stmt, NULL));
|
||||
CRASH_IF_NOT_SQLITE_OK(sqlite3_prepare_v2(
|
||||
db->db,
|
||||
"UPDATE document SET marked=1 WHERE id=? AND mtime=? RETURNING id",
|
||||
"UPDATE marked SET marked=1 WHERE id=(SELECT ROWID FROM document WHERE path=?) AND mtime=? RETURNING id",
|
||||
-1,
|
||||
&db->mark_document_stmt, NULL));
|
||||
CRASH_IF_NOT_SQLITE_OK(sqlite3_prepare_v2(
|
||||
db->db,
|
||||
"REPLACE INTO document_sidecar (id, json_data) VALUES (?,?)", -1,
|
||||
&db->write_document_sidecar_stmt, NULL));
|
||||
CRASH_IF_NOT_SQLITE_OK(sqlite3_prepare_v2(
|
||||
db->db,
|
||||
"REPLACE INTO document (id, mtime, size, json_data, version) VALUES (?, ?, ?, ?, (SELECT max(id) FROM version));",
|
||||
"INSERT INTO document (path, parent, mime, mtime, size, thumbnail_count, json_data, version) "
|
||||
"VALUES (?, (SELECT id FROM document WHERE path=?), ?, ?, ?, ?, ?, (SELECT max(id) FROM version)) "
|
||||
"ON CONFLICT (path) DO UPDATE SET json_data=excluded.json_data "
|
||||
"RETURNING id;",
|
||||
-1,
|
||||
&db->write_document_stmt, NULL));
|
||||
CRASH_IF_NOT_SQLITE_OK(sqlite3_prepare_v2(
|
||||
@@ -173,7 +180,12 @@ void database_open(database_t *db) {
|
||||
&db->write_thumbnail_stmt, NULL));
|
||||
|
||||
CRASH_IF_NOT_SQLITE_OK(sqlite3_prepare_v2(
|
||||
db->db, "SELECT json_data FROM document WHERE id=?", -1,
|
||||
db->db, "SELECT json_set(json_data, "
|
||||
"'$._id', CAST (doc.id AS TEXT),"
|
||||
"'$.thumbnail', doc.thumbnail_count,"
|
||||
"'$.mime', m.name,"
|
||||
"'$.size', doc.size"
|
||||
") FROM document doc LEFT JOIN mime m ON m.id=doc.mime WHERE doc.id=?", -1,
|
||||
&db->get_document, NULL));
|
||||
|
||||
CRASH_IF_NOT_SQLITE_OK(sqlite3_prepare_v2(
|
||||
@@ -184,6 +196,12 @@ void database_open(database_t *db) {
|
||||
db->db, "SELECT embedding FROM embedding WHERE id=? AND model_id=? AND start=0", -1,
|
||||
&db->get_embedding, NULL));
|
||||
|
||||
CRASH_IF_NOT_SQLITE_OK(sqlite3_prepare_v2(
|
||||
db->db,
|
||||
"INSERT INTO tag (id, tag) VALUES (?,?) ON CONFLICT DO NOTHING;",
|
||||
-1,
|
||||
&db->write_tag_stmt, NULL));
|
||||
|
||||
// Create functions
|
||||
sqlite3_create_function(
|
||||
db->db,
|
||||
@@ -228,7 +246,7 @@ void database_open(database_t *db) {
|
||||
CRASH_IF_NOT_SQLITE_OK(sqlite3_prepare_v2(
|
||||
db->db,
|
||||
"DELETE FROM index_job WHERE id = (SELECT MIN(id) FROM index_job)"
|
||||
" RETURNING doc_id,type,line;",
|
||||
" RETURNING sid,type,line;",
|
||||
-1, &db->pop_index_job_stmt, NULL
|
||||
));
|
||||
|
||||
@@ -243,7 +261,7 @@ void database_open(database_t *db) {
|
||||
db->db, "INSERT INTO parse_job (filepath,mtime,st_size) VALUES (?,?,?);", -1,
|
||||
&db->insert_parse_job_stmt, NULL));
|
||||
CRASH_IF_NOT_SQLITE_OK(sqlite3_prepare_v2(
|
||||
db->db, "INSERT INTO index_job (doc_id,type,line) VALUES (?,?,?);", -1,
|
||||
db->db, "INSERT INTO index_job (sid,type,line) VALUES (?,?,?);", -1,
|
||||
&db->insert_index_job_stmt, NULL));
|
||||
|
||||
} else if (db->type == FTS_DATABASE) {
|
||||
@@ -294,6 +312,12 @@ void database_open(database_t *db) {
|
||||
db->db, "SELECT mime, sum(count) FROM mime_index WHERE mime is not NULL GROUP BY mime", -1,
|
||||
&db->fts_get_mimetypes, NULL));
|
||||
|
||||
CRASH_IF_NOT_SQLITE_OK(sqlite3_prepare_v2(
|
||||
db->db,
|
||||
"INSERT INTO tag (id, index_id, tag) VALUES (?,?,?) ON CONFLICT DO NOTHING;",
|
||||
-1,
|
||||
&db->fts_write_tag_stmt, NULL));
|
||||
|
||||
sqlite3_create_function(
|
||||
db->db,
|
||||
"random_seeded",
|
||||
@@ -340,13 +364,6 @@ void database_open(database_t *db) {
|
||||
}
|
||||
|
||||
if (db->type == FTS_DATABASE || db->type == INDEX_DATABASE) {
|
||||
// Tag table is the same schema for FTS database & index database
|
||||
CRASH_IF_NOT_SQLITE_OK(sqlite3_prepare_v2(
|
||||
db->db,
|
||||
"INSERT INTO tag (id, tag) VALUES (?,?) ON CONFLICT DO NOTHING;",
|
||||
-1,
|
||||
&db->write_tag_stmt, NULL));
|
||||
|
||||
CRASH_IF_NOT_SQLITE_OK(sqlite3_prepare_v2(
|
||||
db->db,
|
||||
"DELETE FROM tag WHERE id=? AND tag=?;",
|
||||
@@ -376,8 +393,8 @@ void database_close(database_t *db, int optimize) {
|
||||
db = NULL;
|
||||
}
|
||||
|
||||
void *database_read_thumbnail(database_t *db, const char *id, int num, size_t *return_value_len) {
|
||||
sqlite3_bind_text(db->select_thumbnail_stmt, 1, id, -1, SQLITE_STATIC);
|
||||
void *database_read_thumbnail(database_t *db, int doc_id, int num, size_t *return_value_len) {
|
||||
sqlite3_bind_int(db->select_thumbnail_stmt, 1, doc_id);
|
||||
sqlite3_bind_int(db->select_thumbnail_stmt, 2, num);
|
||||
|
||||
int ret = sqlite3_step(db->select_thumbnail_stmt);
|
||||
@@ -410,7 +427,7 @@ void database_write_index_descriptor(database_t *db, index_descriptor_t *desc) {
|
||||
|
||||
sqlite3_prepare_v2(db->db, "INSERT INTO descriptor (id, version_major, version_minor, version_patch,"
|
||||
" root, name, rewrite_url, timestamp) VALUES (?,?,?,?,?,?,?,?);", -1, &stmt, NULL);
|
||||
sqlite3_bind_text(stmt, 1, desc->id, -1, SQLITE_STATIC);
|
||||
sqlite3_bind_int(stmt, 1, desc->id);
|
||||
sqlite3_bind_int(stmt, 2, desc->version_major);
|
||||
sqlite3_bind_int(stmt, 3, desc->version_minor);
|
||||
sqlite3_bind_int(stmt, 4, desc->version_patch);
|
||||
@@ -433,7 +450,7 @@ index_descriptor_t *database_read_index_descriptor(database_t *db) {
|
||||
|
||||
CRASH_IF_STMT_FAIL(sqlite3_step(stmt));
|
||||
|
||||
const char *id = (char *) sqlite3_column_text(stmt, 0);
|
||||
int id = sqlite3_column_int(stmt, 0);
|
||||
int v_major = sqlite3_column_int(stmt, 1);
|
||||
int v_minor = sqlite3_column_int(stmt, 2);
|
||||
int v_patch = sqlite3_column_int(stmt, 3);
|
||||
@@ -443,7 +460,7 @@ index_descriptor_t *database_read_index_descriptor(database_t *db) {
|
||||
int timestamp = sqlite3_column_int(stmt, 7);
|
||||
|
||||
index_descriptor_t *desc = malloc(sizeof(index_descriptor_t));
|
||||
strcpy(desc->id, id);
|
||||
desc->id = id;
|
||||
snprintf(desc->version, sizeof(desc->version), "%d.%d.%d", v_major, v_minor, v_patch);
|
||||
desc->version_major = v_major;
|
||||
desc->version_minor = v_minor;
|
||||
@@ -461,7 +478,8 @@ index_descriptor_t *database_read_index_descriptor(database_t *db) {
|
||||
database_iterator_t *database_create_delete_list_iterator(database_t *db) {
|
||||
|
||||
sqlite3_stmt *stmt;
|
||||
sqlite3_prepare_v2(db->db, "SELECT id FROM delete_list;", -1, &stmt, NULL);
|
||||
sqlite3_prepare_v2(db->db, "SELECT doc.id FROM delete_list "
|
||||
"INNER JOIN document doc ON doc.ROWID = delete_list.id;", -1, &stmt, NULL);
|
||||
|
||||
database_iterator_t *iter = malloc(sizeof(database_iterator_t));
|
||||
|
||||
@@ -471,14 +489,11 @@ database_iterator_t *database_create_delete_list_iterator(database_t *db) {
|
||||
return iter;
|
||||
}
|
||||
|
||||
char *database_delete_list_iter(database_iterator_t *iter) {
|
||||
int database_delete_list_iter(database_iterator_t *iter) {
|
||||
int ret = sqlite3_step(iter->stmt);
|
||||
|
||||
if (ret == SQLITE_ROW) {
|
||||
const char *id = (const char *) sqlite3_column_text(iter->stmt, 0);
|
||||
char *id_heap = malloc(strlen(id) + 1);
|
||||
strcpy(id_heap, id);
|
||||
return id_heap;
|
||||
return sqlite3_column_int(iter->stmt, 0);
|
||||
}
|
||||
|
||||
if (ret != SQLITE_DONE) {
|
||||
@@ -491,7 +506,7 @@ char *database_delete_list_iter(database_iterator_t *iter) {
|
||||
|
||||
iter->stmt = NULL;
|
||||
|
||||
return NULL;
|
||||
return 0;
|
||||
}
|
||||
|
||||
database_iterator_t *database_create_document_iterator(database_t *db) {
|
||||
@@ -507,12 +522,16 @@ database_iterator_t *database_create_document_iterator(database_t *db) {
|
||||
" '$._id', document.id, "
|
||||
" '$.size', document.size, "
|
||||
" '$.mtime', document.mtime, "
|
||||
" '$.mime', mim.name,"
|
||||
" '$.thumbnail', document.thumbnail_count, "
|
||||
" '$.tag', json_group_array((SELECT tag FROM tag WHERE document.id = tag.id)))"
|
||||
" ELSE"
|
||||
" json_set(document.json_data,"
|
||||
" '$._id', document.id,"
|
||||
" '$.size', document.size,"
|
||||
" '$.mtime', document.mtime,"
|
||||
" '$.mime', mim.name,"
|
||||
" '$.thumbnail', document.thumbnail_count, "
|
||||
" '$.tag', json_group_array((SELECT tag FROM tag WHERE document.id = tag.id)),"
|
||||
" '$.emb', json_group_object(m.path, json(emb_to_json(emb.embedding))),"
|
||||
" '$.embedding', 1)"
|
||||
@@ -520,6 +539,7 @@ database_iterator_t *database_create_document_iterator(database_t *db) {
|
||||
" FROM document"
|
||||
" LEFT JOIN embedding emb ON document.id = emb.id"
|
||||
" LEFT JOIN model m ON emb.model_id = m.id"
|
||||
" LEFT JOIN mime mim ON mim.id = document.mime"
|
||||
" GROUP BY document.id)"
|
||||
" SELECT json_set(j, '$.index', (SELECT id FROM descriptor)) FROM doc",
|
||||
-1, &stmt, NULL));
|
||||
@@ -573,43 +593,48 @@ cJSON *database_document_iter(database_iterator_t *iter) {
|
||||
|
||||
cJSON *database_incremental_scan_begin(database_t *db) {
|
||||
LOG_DEBUG("database.c", "Preparing database for incremental scan");
|
||||
CRASH_IF_NOT_SQLITE_OK(sqlite3_exec(db->db, "UPDATE document SET marked=0;", NULL, NULL, NULL));
|
||||
CRASH_IF_NOT_SQLITE_OK(sqlite3_exec(db->db, "DELETE FROM marked;", NULL, NULL, NULL));
|
||||
CRASH_IF_NOT_SQLITE_OK(
|
||||
sqlite3_exec(db->db, "INSERT INTO marked SELECT ROWID, 0, mtime FROM document;", NULL, NULL, NULL));
|
||||
}
|
||||
|
||||
cJSON *database_incremental_scan_end(database_t *db) {
|
||||
CRASH_IF_NOT_SQLITE_OK(sqlite3_exec(
|
||||
db->db,
|
||||
"DELETE FROM delete_list WHERE id IN (SELECT id FROM document WHERE marked=1);",
|
||||
"DELETE FROM delete_list WHERE id IN (SELECT id FROM marked WHERE marked = 1);",
|
||||
NULL, NULL, NULL
|
||||
));
|
||||
|
||||
CRASH_IF_NOT_SQLITE_OK(sqlite3_exec(
|
||||
db->db,
|
||||
"DELETE FROM thumbnail WHERE id IN (SELECT id FROM document WHERE marked=0);",
|
||||
"DELETE FROM thumbnail WHERE EXISTS ("
|
||||
" SELECT document.id FROM document INNER JOIN marked m ON m.id = document.ROWID"
|
||||
" WHERE marked=0 and document.id = thumbnail.id)",
|
||||
NULL, NULL, NULL
|
||||
));
|
||||
|
||||
CRASH_IF_NOT_SQLITE_OK(sqlite3_exec(
|
||||
db->db,
|
||||
"INSERT INTO delete_list (id) SELECT id FROM document WHERE marked=0 ON CONFLICT DO NOTHING;",
|
||||
"INSERT INTO delete_list (id) "
|
||||
"SELECT id FROM marked WHERE marked=0 ON CONFLICT DO NOTHING;",
|
||||
NULL, NULL, NULL
|
||||
));
|
||||
|
||||
CRASH_IF_NOT_SQLITE_OK(sqlite3_exec(
|
||||
db->db,
|
||||
"DELETE FROM document_sidecar WHERE id IN (SELECT id FROM document WHERE marked=0);",
|
||||
"DELETE FROM document WHERE ROWID IN (SELECT id FROM marked WHERE marked=0);",
|
||||
NULL, NULL, NULL
|
||||
));
|
||||
|
||||
CRASH_IF_NOT_SQLITE_OK(sqlite3_exec(
|
||||
db->db,
|
||||
"DELETE FROM document WHERE marked=0;",
|
||||
"DELETE FROM marked;",
|
||||
NULL, NULL, NULL
|
||||
));
|
||||
}
|
||||
|
||||
int database_mark_document(database_t *db, const char *id, int mtime) {
|
||||
sqlite3_bind_text(db->mark_document_stmt, 1, id, -1, SQLITE_STATIC);
|
||||
int database_mark_document(database_t *db, const char *path, int mtime) {
|
||||
sqlite3_bind_text(db->mark_document_stmt, 1, path, -1, SQLITE_STATIC);
|
||||
sqlite3_bind_int(db->mark_document_stmt, 2, mtime);
|
||||
|
||||
pthread_mutex_lock(&db->ipc_ctx->index_db_mutex);
|
||||
@@ -631,31 +656,38 @@ int database_mark_document(database_t *db, const char *id, int mtime) {
|
||||
CRASH_IF_STMT_FAIL(ret);
|
||||
}
|
||||
|
||||
void database_write_document(database_t *db, document_t *doc, const char *json_data) {
|
||||
sqlite3_bind_text(db->write_document_stmt, 1, doc->doc_id, -1, SQLITE_STATIC);
|
||||
sqlite3_bind_int(db->write_document_stmt, 2, doc->mtime);
|
||||
sqlite3_bind_int64(db->write_document_stmt, 3, (long) doc->size);
|
||||
sqlite3_bind_text(db->write_document_stmt, 4, json_data, -1, SQLITE_STATIC);
|
||||
int database_write_document(database_t *db, document_t *doc, const char *json_data) {
|
||||
|
||||
const char *rel_path = doc->filepath + ScanCtx.index.desc.root_len;
|
||||
const char *parent_rel_path = doc->parent[0] != '\0'
|
||||
? doc->parent + ScanCtx.index.desc.root_len
|
||||
: NULL;
|
||||
|
||||
// path, parent, mtime, size, json_data
|
||||
sqlite3_bind_text(db->write_document_stmt, 1, rel_path, -1, SQLITE_STATIC);
|
||||
sqlite3_bind_text(db->write_document_stmt, 2, parent_rel_path, -1, SQLITE_STATIC);
|
||||
sqlite3_bind_int64(db->write_document_stmt, 3, doc->mime);
|
||||
sqlite3_bind_int(db->write_document_stmt, 4, doc->mtime);
|
||||
sqlite3_bind_int64(db->write_document_stmt, 5, (long) doc->size);
|
||||
sqlite3_bind_int(db->write_document_stmt, 6, doc->thumbnail_count);
|
||||
if (json_data) {
|
||||
sqlite3_bind_text(db->write_document_stmt, 7, json_data, -1, SQLITE_STATIC);
|
||||
} else {
|
||||
sqlite3_bind_null(db->write_document_stmt, 7);
|
||||
}
|
||||
|
||||
pthread_mutex_lock(&db->ipc_ctx->index_db_mutex);
|
||||
CRASH_IF_STMT_FAIL(sqlite3_step(db->write_document_stmt));
|
||||
int id = sqlite3_column_int(db->write_document_stmt, 0);
|
||||
CRASH_IF_NOT_SQLITE_OK(sqlite3_reset(db->write_document_stmt));
|
||||
pthread_mutex_unlock(&db->ipc_ctx->index_db_mutex);
|
||||
|
||||
return id;
|
||||
}
|
||||
|
||||
|
||||
void database_write_document_sidecar(database_t *db, const char *id, const char *json_data) {
|
||||
sqlite3_bind_text(db->write_document_sidecar_stmt, 1, id, -1, SQLITE_STATIC);
|
||||
sqlite3_bind_text(db->write_document_sidecar_stmt, 2, json_data, -1, SQLITE_STATIC);
|
||||
|
||||
pthread_mutex_lock(&db->ipc_ctx->index_db_mutex);
|
||||
CRASH_IF_STMT_FAIL(sqlite3_step(db->write_document_sidecar_stmt));
|
||||
CRASH_IF_NOT_SQLITE_OK(sqlite3_reset(db->write_document_sidecar_stmt));
|
||||
pthread_mutex_unlock(&db->ipc_ctx->index_db_mutex);
|
||||
}
|
||||
|
||||
void database_write_thumbnail(database_t *db, const char *id, int num, void *data, size_t data_size) {
|
||||
sqlite3_bind_text(db->write_thumbnail_stmt, 1, id, -1, SQLITE_STATIC);
|
||||
void database_write_thumbnail(database_t *db, int doc_id, int num, void *data, size_t data_size) {
|
||||
sqlite3_bind_int(db->write_thumbnail_stmt, 1, doc_id);
|
||||
sqlite3_bind_int(db->write_thumbnail_stmt, 2, num);
|
||||
sqlite3_bind_blob(db->write_thumbnail_stmt, 3, data, (int) data_size, SQLITE_STATIC);
|
||||
|
||||
@@ -716,7 +748,7 @@ job_t *database_get_work(database_t *db, job_type_t job_type) {
|
||||
} else {
|
||||
job->bulk_line = malloc(sizeof(es_bulk_line_t));
|
||||
}
|
||||
strcpy(job->bulk_line->doc_id, (const char *) sqlite3_column_text(db->pop_index_job_stmt, 0));
|
||||
strcpy(job->bulk_line->sid, (const char *) sqlite3_column_text(db->pop_index_job_stmt, 0));
|
||||
job->bulk_line->type = sqlite3_column_int(db->pop_index_job_stmt, 1);
|
||||
job->bulk_line->next = NULL;
|
||||
|
||||
@@ -767,7 +799,7 @@ void database_add_work(database_t *db, job_t *job) {
|
||||
} while (ret != SQLITE_DONE && ret != SQLITE_OK);
|
||||
} else if (job->type == JOB_BULK_LINE) {
|
||||
do {
|
||||
sqlite3_bind_text(db->insert_index_job_stmt, 1, job->bulk_line->doc_id, -1, SQLITE_STATIC);
|
||||
sqlite3_bind_text(db->insert_index_job_stmt, 1, job->bulk_line->sid, -1, SQLITE_STATIC);
|
||||
sqlite3_bind_int(db->insert_index_job_stmt, 2, job->bulk_line->type);
|
||||
if (job->bulk_line->type != ES_BULK_LINE_DELETE) {
|
||||
sqlite3_bind_text(db->insert_index_job_stmt, 3, job->bulk_line->line, -1, SQLITE_STATIC);
|
||||
@@ -808,24 +840,25 @@ void database_add_work(database_t *db, job_t *job) {
|
||||
pthread_mutex_unlock(&db->ipc_ctx->mutex);
|
||||
}
|
||||
|
||||
void database_write_tag(database_t *db, char *doc_id, char *tag) {
|
||||
sqlite3_bind_text(db->write_tag_stmt, 1, doc_id, -1, SQLITE_STATIC);
|
||||
sqlite3_bind_text(db->write_tag_stmt, 2, tag, -1, SQLITE_STATIC);
|
||||
void database_write_tag(database_t *db, long sid, char *tag) {
|
||||
sqlite3_bind_int64(db->write_tag_stmt, 1, sid);
|
||||
sqlite3_bind_int(db->write_tag_stmt, 2, (int) (sid >> 32));
|
||||
sqlite3_bind_text(db->write_tag_stmt, 3, tag, -1, SQLITE_STATIC);
|
||||
|
||||
CRASH_IF_STMT_FAIL(sqlite3_step(db->write_tag_stmt));
|
||||
CRASH_IF_NOT_SQLITE_OK(sqlite3_reset(db->write_tag_stmt));
|
||||
}
|
||||
|
||||
void database_delete_tag(database_t *db, char *doc_id, char *tag) {
|
||||
sqlite3_bind_text(db->delete_tag_stmt, 1, doc_id, -1, SQLITE_STATIC);
|
||||
void database_delete_tag(database_t *db, long sid, char *tag) {
|
||||
sqlite3_bind_int64(db->delete_tag_stmt, 1, sid);
|
||||
sqlite3_bind_text(db->delete_tag_stmt, 2, tag, -1, SQLITE_STATIC);
|
||||
|
||||
CRASH_IF_STMT_FAIL(sqlite3_step(db->delete_tag_stmt));
|
||||
CRASH_IF_NOT_SQLITE_OK(sqlite3_reset(db->delete_tag_stmt));
|
||||
}
|
||||
|
||||
cJSON *database_get_document(database_t *db, char *doc_id) {
|
||||
sqlite3_bind_text(db->get_document, 1, doc_id, -1, SQLITE_STATIC);
|
||||
cJSON *database_get_document(database_t *db, int doc_id) {
|
||||
sqlite3_bind_int(db->get_document, 1, doc_id);
|
||||
|
||||
int ret = sqlite3_step(db->get_document);
|
||||
CRASH_IF_STMT_FAIL(ret);
|
||||
@@ -833,7 +866,7 @@ cJSON *database_get_document(database_t *db, char *doc_id) {
|
||||
cJSON *json;
|
||||
|
||||
if (ret == SQLITE_ROW) {
|
||||
const char *json_str = sqlite3_column_text(db->get_document, 0);
|
||||
const char *json_str = (char *) sqlite3_column_text(db->get_document, 0);
|
||||
json = cJSON_Parse(json_str);
|
||||
} else {
|
||||
json = NULL;
|
||||
@@ -847,4 +880,24 @@ cJSON *database_get_document(database_t *db, char *doc_id) {
|
||||
void database_increment_version(database_t *db) {
|
||||
CRASH_IF_NOT_SQLITE_OK(sqlite3_exec(
|
||||
db->db, "INSERT INTO version DEFAULT VALUES", NULL, NULL, NULL));
|
||||
}
|
||||
|
||||
void database_sync_mime_table(database_t *db) {
|
||||
unsigned int *cur = get_mime_ids();
|
||||
|
||||
sqlite3_stmt *stmt;
|
||||
CRASH_IF_NOT_SQLITE_OK(sqlite3_prepare(
|
||||
db->db,
|
||||
"REPLACE INTO mime (id, name) VALUES (?,?)", -1, &stmt, NULL));
|
||||
|
||||
while (*cur != 0) {
|
||||
sqlite3_bind_int64(stmt, 1, (long) *cur);
|
||||
sqlite3_bind_text(stmt, 2, mime_get_mime_text(*cur), -1, NULL);
|
||||
|
||||
CRASH_IF_STMT_FAIL(sqlite3_step(stmt));
|
||||
sqlite3_reset(stmt);
|
||||
|
||||
cur += 1;
|
||||
}
|
||||
sqlite3_finalize(stmt);
|
||||
}
|
||||
@@ -81,7 +81,6 @@ typedef struct database {
|
||||
|
||||
sqlite3_stmt *mark_document_stmt;
|
||||
sqlite3_stmt *write_document_stmt;
|
||||
sqlite3_stmt *write_document_sidecar_stmt;
|
||||
sqlite3_stmt *write_thumbnail_stmt;
|
||||
sqlite3_stmt *get_document;
|
||||
sqlite3_stmt *get_models;
|
||||
@@ -103,6 +102,7 @@ typedef struct database {
|
||||
sqlite3_stmt *fts_get_document;
|
||||
sqlite3_stmt *fts_suggest_tag;
|
||||
sqlite3_stmt *fts_get_tags;
|
||||
sqlite3_stmt *fts_write_tag_stmt;
|
||||
sqlite3_stmt *fts_model_size;
|
||||
|
||||
|
||||
@@ -133,15 +133,15 @@ void database_close(database_t *, int optimize);
|
||||
|
||||
void database_increment_version(database_t *db);
|
||||
|
||||
void database_write_thumbnail(database_t *db, const char *id, int num, void *data, size_t data_size);
|
||||
void database_write_thumbnail(database_t *db, int doc_id, int num, void *data, size_t data_size);
|
||||
|
||||
void *database_read_thumbnail(database_t *db, const char *id, int num, size_t *return_value_len);
|
||||
void *database_read_thumbnail(database_t *db, int doc_id, int num, size_t *return_value_len);
|
||||
|
||||
void database_write_index_descriptor(database_t *db, index_descriptor_t *desc);
|
||||
|
||||
index_descriptor_t *database_read_index_descriptor(database_t *db);
|
||||
|
||||
void database_write_document(database_t *db, document_t *doc, const char *json_data);
|
||||
int database_write_document(database_t *db, document_t *doc, const char *json_data);
|
||||
|
||||
database_iterator_t *database_create_document_iterator(database_t *db);
|
||||
|
||||
@@ -154,10 +154,10 @@ cJSON *database_document_iter(database_iterator_t *);
|
||||
|
||||
database_iterator_t *database_create_delete_list_iterator(database_t *db);
|
||||
|
||||
char *database_delete_list_iter(database_iterator_t *iter);
|
||||
int database_delete_list_iter(database_iterator_t *iter);
|
||||
|
||||
#define database_delete_list_iter_foreach(element, iter) \
|
||||
for (char *(element) = database_delete_list_iter(iter); (element) != NULL; (element) = database_delete_list_iter(iter))
|
||||
for (int (element) = database_delete_list_iter(iter); (element) != 0; (element) = database_delete_list_iter(iter))
|
||||
|
||||
|
||||
cJSON *database_incremental_scan_begin(database_t *db);
|
||||
@@ -166,8 +166,6 @@ cJSON *database_incremental_scan_end(database_t *db);
|
||||
|
||||
int database_mark_document(database_t *db, const char *id, int mtime);
|
||||
|
||||
void database_write_document_sidecar(database_t *db, const char *id, const char *json_data);
|
||||
|
||||
database_iterator_t *database_create_treemap_iterator(database_t *db, long threshold);
|
||||
|
||||
treemap_row_t database_treemap_iter(database_iterator_t *iter);
|
||||
@@ -206,7 +204,7 @@ void database_fts_index(database_t *db);
|
||||
|
||||
void database_fts_optimize(database_t *db);
|
||||
|
||||
cJSON *database_fts_get_paths(database_t *db, const char *index_id, int depth_min, int depth_max, const char *prefix,
|
||||
cJSON *database_fts_get_paths(database_t *db, int index_id, int depth_min, int depth_max, const char *prefix,
|
||||
int suggest);
|
||||
|
||||
cJSON *database_fts_get_mimetypes(database_t *db);
|
||||
@@ -215,18 +213,20 @@ database_summary_stats_t database_fts_get_date_range(database_t *db);
|
||||
|
||||
cJSON *database_fts_search(database_t *db, const char *query, const char *path, long size_min,
|
||||
long size_max, long date_min, long date_max, int page_size,
|
||||
char **index_ids, char **mime_types, char **tags, int sort_asc,
|
||||
int *index_ids, char **mime_types, char **tags, int sort_asc,
|
||||
fts_sort_t sort, int seed, char **after, int fetch_aggregations,
|
||||
int highlight, int highlight_context_size, int model,
|
||||
const float *embedding, int embedding_size);
|
||||
|
||||
void database_write_tag(database_t *db, char *doc_id, char *tag);
|
||||
void database_write_tag(database_t *db, long sid, char *tag);
|
||||
|
||||
void database_delete_tag(database_t *db, char *doc_id, char *tag);
|
||||
void database_fts_write_tag(database_t *db, long sid, char *tag);
|
||||
|
||||
void database_delete_tag(database_t *db, long sid, char *tag);
|
||||
|
||||
void database_fts_detach(database_t *db);
|
||||
|
||||
cJSON *database_fts_get_document(database_t *db, char *doc_id);
|
||||
cJSON *database_fts_get_document(database_t *db, long sid);
|
||||
|
||||
database_summary_stats_t database_fts_sync_tags(database_t *db);
|
||||
|
||||
@@ -234,7 +234,7 @@ cJSON *database_fts_suggest_tag(database_t *db, char *prefix);
|
||||
|
||||
cJSON *database_fts_get_tags(database_t *db);
|
||||
|
||||
cJSON *database_get_document(database_t *db, char *doc_id);
|
||||
cJSON *database_get_document(database_t *db, int doc_id);
|
||||
|
||||
void cosine_sim_func(sqlite3_context *ctx, int argc, sqlite3_value **argv);
|
||||
|
||||
@@ -242,6 +242,8 @@ cJSON *database_get_models(database_t *db);
|
||||
|
||||
int database_fts_get_model_size(database_t *db, int model_id);
|
||||
|
||||
cJSON *database_get_embedding(database_t *db, char *doc_id, int model_id);
|
||||
cJSON *database_get_embedding(database_t *db, int doc_id, int model_id);
|
||||
|
||||
void database_sync_mime_table(database_t *db);
|
||||
|
||||
#endif
|
||||
@@ -69,9 +69,9 @@ cJSON *database_get_models(database_t *db) {
|
||||
return json;
|
||||
}
|
||||
|
||||
cJSON *database_get_embedding(database_t *db, char *doc_id, int model_id) {
|
||||
cJSON *database_get_embedding(database_t *db, int doc_id, int model_id) {
|
||||
|
||||
sqlite3_bind_text(db->get_embedding, 1, doc_id, -1, SQLITE_STATIC);
|
||||
sqlite3_bind_int(db->get_embedding, 1, doc_id);
|
||||
sqlite3_bind_int(db->get_embedding, 2, model_id);
|
||||
int ret = sqlite3_step(db->get_embedding);
|
||||
CRASH_IF_STMT_FAIL(ret);
|
||||
|
||||
@@ -42,21 +42,23 @@ void database_fts_index(database_t *db) {
|
||||
CRASH_IF_NOT_SQLITE_OK(sqlite3_exec(
|
||||
db->db,
|
||||
"WITH docs AS ("
|
||||
" SELECT document.id as id, (SELECT id FROM descriptor) as index_id, size,"
|
||||
" SELECT "
|
||||
" ((SELECT id FROM descriptor) << 32) | document.id as id,"
|
||||
" (SELECT id FROM descriptor) as index_id,"
|
||||
" size,"
|
||||
" document.json_data ->> 'name' as name,"
|
||||
" document.json_data ->> 'path' as path,"
|
||||
" mtime,"
|
||||
" document.json_data ->> 'mime' as mime,"
|
||||
" json_set(document.json_data, "
|
||||
" '$._id',document.id,"
|
||||
" '$.size',document.size, "
|
||||
" '$.mtime',document.mtime)"
|
||||
" m.name as mime,"
|
||||
" thumbnail_count,"
|
||||
" document.json_data"
|
||||
" FROM document"
|
||||
" LEFT JOIN mime m ON m.id=document.mime"
|
||||
" )"
|
||||
" INSERT"
|
||||
" INTO fts.document_index (id, index_id, size, name, path, mtime, mime, json_data)"
|
||||
" INTO fts.document_index (id, index_id, size, name, path, mtime, mime, thumbnail_count, json_data)"
|
||||
" SELECT * FROM docs WHERE true"
|
||||
" on conflict (id, index_id) do update set "
|
||||
" on conflict (id) do update set "
|
||||
" size=excluded.size, mtime=excluded.mtime, mime=excluded.mime, json_data=excluded.json_data;",
|
||||
NULL, NULL, NULL));
|
||||
|
||||
@@ -64,13 +66,14 @@ void database_fts_index(database_t *db) {
|
||||
|
||||
CRASH_IF_NOT_SQLITE_OK(sqlite3_exec(
|
||||
db->db,
|
||||
"REPLACE INTO fts.embedding (id, model_id, start, end, embedding)"
|
||||
" SELECT id, model_id, start, end, embedding FROM embedding", NULL, NULL, NULL));
|
||||
"REPLACE INTO fts.model (id, size)"
|
||||
" SELECT id, size FROM model", NULL, NULL, NULL));
|
||||
|
||||
CRASH_IF_NOT_SQLITE_OK(sqlite3_exec(
|
||||
db->db,
|
||||
"INSERT INTO fts.model (id, size)"
|
||||
" SELECT id, size FROM model WHERE TRUE ON CONFLICT (id) DO NOTHING", NULL, NULL, NULL));
|
||||
"REPLACE INTO fts.embedding (id, model_id, start, end, embedding)"
|
||||
" SELECT (SELECT id FROM descriptor) << 32 | id, model_id, start, end, embedding FROM embedding "
|
||||
" WHERE TRUE ON CONFLICT (id, model_id, start) DO NOTHING;", NULL, NULL, NULL));
|
||||
|
||||
// TODO: delete old embeddings
|
||||
|
||||
@@ -172,7 +175,7 @@ void database_fts_optimize(database_t *db) {
|
||||
CRASH_IF_NOT_SQLITE_OK(sqlite3_exec(db->db, "PRAGMA fts.optimize;", NULL, NULL, NULL));
|
||||
}
|
||||
|
||||
cJSON *database_fts_get_paths(database_t *db, const char *index_id, int depth_min, int depth_max, const char *prefix,
|
||||
cJSON *database_fts_get_paths(database_t *db, int index_id, int depth_min, int depth_max, const char *prefix,
|
||||
int suggest) {
|
||||
|
||||
sqlite3_stmt *stmt;
|
||||
@@ -192,7 +195,7 @@ cJSON *database_fts_get_paths(database_t *db, const char *index_id, int depth_mi
|
||||
} else if (prefix) {
|
||||
stmt = db->fts_search_paths_w_prefix;
|
||||
if (index_id) {
|
||||
sqlite3_bind_text(stmt, 1, index_id, -1, SQLITE_STATIC);
|
||||
sqlite3_bind_int(stmt, 1, index_id);
|
||||
} else {
|
||||
sqlite3_bind_null(stmt, 1);
|
||||
}
|
||||
@@ -207,7 +210,7 @@ cJSON *database_fts_get_paths(database_t *db, const char *index_id, int depth_mi
|
||||
} else {
|
||||
stmt = db->fts_search_paths;
|
||||
if (index_id) {
|
||||
sqlite3_bind_text(stmt, 1, index_id, -1, SQLITE_STATIC);
|
||||
sqlite3_bind_int(stmt, 1, index_id);
|
||||
} else {
|
||||
sqlite3_bind_null(stmt, 1);
|
||||
}
|
||||
@@ -290,7 +293,6 @@ const char *date_where_clause(long date_min, long date_max) {
|
||||
}
|
||||
|
||||
int array_length(char **arr) {
|
||||
|
||||
if (arr == NULL) {
|
||||
return 0;
|
||||
}
|
||||
@@ -301,6 +303,17 @@ int array_length(char **arr) {
|
||||
return count;
|
||||
}
|
||||
|
||||
int int_array_length(const int *arr) {
|
||||
if (arr == NULL) {
|
||||
return 0;
|
||||
}
|
||||
|
||||
int count = -1;
|
||||
while (arr[++count] != 0);
|
||||
|
||||
return count;
|
||||
}
|
||||
|
||||
#define INDEX_ID_PARAM_OFFSET (10)
|
||||
#define MIME_PARAM_OFFSET (INDEX_ID_PARAM_OFFSET + 1000)
|
||||
|
||||
@@ -351,8 +364,8 @@ char *build_where_clause(const char *path_where, const char *size_where, const c
|
||||
return where;
|
||||
}
|
||||
|
||||
char *index_ids_where_clause(char **index_ids) {
|
||||
int param_count = array_length(index_ids);
|
||||
char *index_ids_where_clause(int *index_ids) {
|
||||
int param_count = int_array_length(index_ids);
|
||||
|
||||
char *clause = malloc(13 + 2 + 6 * param_count);
|
||||
|
||||
@@ -483,7 +496,7 @@ int database_fts_get_model_size(database_t *db, int model_id) {
|
||||
|
||||
cJSON *database_fts_search(database_t *db, const char *query, const char *path, long size_min,
|
||||
long size_max, long date_min, long date_max, int page_size,
|
||||
char **index_ids, char **mime_types, char **tags, int sort_asc,
|
||||
int *index_ids, char **mime_types, char **tags, int sort_asc,
|
||||
fts_sort_t sort, int seed, char **after, int fetch_aggregations,
|
||||
int highlight, int highlight_context_size, int model,
|
||||
const float *embedding, int embedding_size) {
|
||||
@@ -524,13 +537,21 @@ cJSON *database_fts_search(database_t *db, const char *query, const char *path,
|
||||
const char *json_object_sql;
|
||||
if (highlight && query_where != NULL) {
|
||||
json_object_sql = "json_set(json_remove(doc.json_data, '$.content'),"
|
||||
"'$._id', CAST(doc.id AS TEXT),"
|
||||
"'$.index', doc.index_id,"
|
||||
"'$.thumbnail', doc.thumbnail_count,"
|
||||
"'$.mime', doc.mime,"
|
||||
"'$.size', doc.size,"
|
||||
"'$.embedding', (CASE WHEN emb.id IS NOT NULL THEN 1 ELSE 0 END),"
|
||||
"'$._highlight.name', snippet(search, 0, '<mark>', '</mark>', '', ?6),"
|
||||
"'$._highlight.content', snippet(search, 1, '<mark>', '</mark>', '', ?6))";
|
||||
} else {
|
||||
json_object_sql = "json_set(json_remove(doc.json_data, '$.content'),"
|
||||
"'$._id', CAST(doc.id AS TEXT),"
|
||||
"'$.index', doc.index_id,"
|
||||
"'$.thumbnail', doc.thumbnail_count,"
|
||||
"'$.mime', doc.mime,"
|
||||
"'$.size', doc.size,"
|
||||
"'$.embedding', (CASE WHEN emb.id IS NOT NULL THEN 1 ELSE 0 END))";
|
||||
}
|
||||
|
||||
@@ -592,7 +613,7 @@ cJSON *database_fts_search(database_t *db, const char *query, const char *path,
|
||||
|
||||
if (index_ids) {
|
||||
array_foreach(index_ids) {
|
||||
sqlite3_bind_text(stmt, INDEX_ID_PARAM_OFFSET + i, index_ids[i], -1, SQLITE_STATIC);
|
||||
sqlite3_bind_int(stmt, INDEX_ID_PARAM_OFFSET + i, index_ids[i]);
|
||||
}
|
||||
}
|
||||
if (mime_types) {
|
||||
@@ -692,7 +713,7 @@ cJSON *database_fts_search(database_t *db, const char *query, const char *path,
|
||||
|
||||
if (index_ids) {
|
||||
array_foreach(index_ids) {
|
||||
sqlite3_bind_text(agg_stmt, INDEX_ID_PARAM_OFFSET + i, index_ids[i], -1, SQLITE_STATIC);
|
||||
sqlite3_bind_int(agg_stmt, INDEX_ID_PARAM_OFFSET + i, index_ids[i]);
|
||||
}
|
||||
}
|
||||
if (mime_types) {
|
||||
@@ -764,19 +785,20 @@ database_summary_stats_t database_fts_sync_tags(database_t *db) {
|
||||
CRASH_IF_NOT_SQLITE_OK(sqlite3_exec(
|
||||
db->db,
|
||||
"DELETE FROM fts.tag WHERE"
|
||||
" (id, tag) NOT IN (SELECT id, tag FROM tag)",
|
||||
" (id, index_id, tag) NOT IN (SELECT ((SELECT id FROM descriptor) << 32) | id, (SELECT id FROM descriptor), tag FROM tag)"
|
||||
" AND index_id = (SELECT id FROM descriptor)",
|
||||
NULL, NULL, NULL));
|
||||
|
||||
CRASH_IF_NOT_SQLITE_OK(sqlite3_exec(
|
||||
db->db,
|
||||
"INSERT INTO fts.tag (id, tag) "
|
||||
" SELECT id, tag FROM tag "
|
||||
" WHERE (id, tag) NOT IN (SELECT * FROM fts.tag)",
|
||||
"INSERT INTO fts.tag (id, index_id, tag) "
|
||||
" SELECT (((SELECT id FROM descriptor) << 32) | id) as sid, (SELECT id FROM descriptor), tag FROM tag "
|
||||
" WHERE (sid, tag) NOT IN (SELECT id, tag FROM fts.tag)",
|
||||
NULL, NULL, NULL));
|
||||
}
|
||||
|
||||
cJSON *database_fts_get_document(database_t *db, char *doc_id) {
|
||||
sqlite3_bind_text(db->fts_get_document, 1, doc_id, -1, NULL);
|
||||
cJSON *database_fts_get_document(database_t *db, long sid) {
|
||||
sqlite3_bind_int64(db->fts_get_document, 1, sid);
|
||||
|
||||
int ret = sqlite3_step(db->fts_get_document);
|
||||
cJSON *json = NULL;
|
||||
@@ -844,3 +866,11 @@ cJSON *database_fts_get_tags(database_t *db) {
|
||||
|
||||
return json;
|
||||
}
|
||||
void database_fts_write_tag(database_t *db, long sid, char *tag) {
|
||||
sqlite3_bind_int64(db->fts_write_tag_stmt, 1, sid);
|
||||
sqlite3_bind_int(db->fts_write_tag_stmt, 2, (int) (sid >> 32));
|
||||
sqlite3_bind_text(db->fts_write_tag_stmt, 3, tag, -1, SQLITE_STATIC);
|
||||
|
||||
CRASH_IF_STMT_FAIL(sqlite3_step(db->fts_write_tag_stmt));
|
||||
CRASH_IF_NOT_SQLITE_OK(sqlite3_reset(db->fts_write_tag_stmt));
|
||||
}
|
||||
|
||||
@@ -1,57 +1,63 @@
|
||||
#ifdef SIST_DEBUG
|
||||
#define STRICT " STRICT"
|
||||
#else
|
||||
#define STRICT ""
|
||||
#endif
|
||||
|
||||
const char *FtsDatabaseSchema =
|
||||
"CREATE TABLE IF NOT EXISTS document_index ("
|
||||
" id TEXT NOT NULL,"
|
||||
" index_id TEXT NOT NULL,"
|
||||
" id INTEGER PRIMARY KEY,"
|
||||
" index_id INTEGER NOT NULL,"
|
||||
" size INTEGER NOT NULL,"
|
||||
" name TEXT NOT NULL,"
|
||||
" path TEXT NOT NULL,"
|
||||
" mtime INTEGER NOT NULL,"
|
||||
" mime TEXT,"
|
||||
" json_data TEXT NOT NULL,"
|
||||
" PRIMARY KEY (id, index_id)"
|
||||
");"
|
||||
" thumbnail_count INTEGER NOT NULL,"
|
||||
" json_data TEXT NOT NULL"
|
||||
")"STRICT";"
|
||||
""
|
||||
"CREATE TABLE IF NOT EXISTS stats ("
|
||||
" mtime_min INTEGER,"
|
||||
" mtime_max INTEGER"
|
||||
");"
|
||||
")"STRICT";"
|
||||
""
|
||||
"CREATE TABLE IF NOT EXISTS path_index ("
|
||||
" path TEXT,"
|
||||
" index_id TEXT,"
|
||||
" index_id INTEGER,"
|
||||
" count INTEGER NOT NULL,"
|
||||
" depth INTEGER NOT NULL,"
|
||||
" PRIMARY KEY (path, index_id)"
|
||||
");"
|
||||
")"STRICT";"
|
||||
""
|
||||
"CREATE TABLE IF NOT EXISTS mime_index ("
|
||||
" index_id TEXT,"
|
||||
" index_id INTEGER,"
|
||||
" mime TEXT,"
|
||||
" count INT,"
|
||||
" count INTEGER,"
|
||||
" PRIMARY KEY(index_id, mime)"
|
||||
");"
|
||||
")"STRICT";"
|
||||
""
|
||||
"CREATE TABLE IF NOT EXISTS tag ("
|
||||
" id TEXT NOT NULL,"
|
||||
" id INTEGER NOT NULL,"
|
||||
" index_id INTEGER NOT NULL,"
|
||||
" tag TEXT NOT NULL,"
|
||||
" PRIMARY KEY (id, tag)"
|
||||
");"
|
||||
")"STRICT";"
|
||||
"CREATE INDEX IF NOT EXISTS tag_tag_idx ON tag(tag);"
|
||||
"CREATE INDEX IF NOT EXISTS tag_id_idx ON tag(id);"
|
||||
""
|
||||
"CREATE TABLE IF NOT EXISTS embedding ("
|
||||
" id TEXT REFERENCES document(id),"
|
||||
" id INTEGER REFERENCES document_index(id),"
|
||||
" model_id INTEGER NOT NULL REFERENCES model(id),"
|
||||
" start INTEGER NOT NULL,"
|
||||
" end INTEGER,"
|
||||
" embedding BLOB NOT NULL,"
|
||||
" PRIMARY KEY (id, model_id, start)"
|
||||
");"
|
||||
")"STRICT";"
|
||||
""
|
||||
"CREATE TABLE IF NOT EXISTS model ("
|
||||
" id INTEGER PRIMARY KEY CHECK (id > 0 AND id < 1000),"
|
||||
" size INTEGER NOT NULL"
|
||||
");"
|
||||
")"STRICT";"
|
||||
""
|
||||
"CREATE TRIGGER IF NOT EXISTS tag_write_trigger"
|
||||
" AFTER INSERT ON tag"
|
||||
@@ -71,7 +77,7 @@ const char *FtsDatabaseSchema =
|
||||
""
|
||||
"CREATE VIEW IF NOT EXISTS document_view (id, name, content, title)"
|
||||
" AS"
|
||||
" SELECT rowid,"
|
||||
" SELECT id,"
|
||||
" json_data->>'name',"
|
||||
" json_data->>'content',"
|
||||
" json_data->>'title'"
|
||||
@@ -94,18 +100,18 @@ const char *IpcDatabaseSchema =
|
||||
" filepath TEXT NOT NULL,"
|
||||
" mtime INTEGER NOT NULL,"
|
||||
" st_size INTEGER NOT NULL"
|
||||
");"
|
||||
")"STRICT";"
|
||||
""
|
||||
"CREATE TABLE index_job ("
|
||||
" id INTEGER PRIMARY KEY,"
|
||||
" doc_id TEXT NOT NULL CHECK ( length(doc_id) = 32 ),"
|
||||
" sid TEXT NOT NULL,"
|
||||
" type INTEGER NOT NULL,"
|
||||
" line TEXT"
|
||||
");";
|
||||
")"STRICT";";
|
||||
|
||||
const char *IndexDatabaseSchema =
|
||||
"CREATE TABLE thumbnail ("
|
||||
" id TEXT NOT NULL CHECK ( length(id) = 32 ),"
|
||||
" id INTEGER REFERENCES document(id),"
|
||||
" num INTEGER NOT NULL,"
|
||||
" data BLOB NOT NULL,"
|
||||
" PRIMARY KEY(id, num)"
|
||||
@@ -114,34 +120,46 @@ const char *IndexDatabaseSchema =
|
||||
"CREATE TABLE version ("
|
||||
" id INTEGER PRIMARY KEY AUTOINCREMENT,"
|
||||
" date TEXT NOT NULL DEFAULT (CURRENT_TIMESTAMP)"
|
||||
");"
|
||||
")"STRICT";"
|
||||
""
|
||||
"CREATE TABLE mime("
|
||||
" id INTEGER PRIMARY KEY,"
|
||||
" name TEXT"
|
||||
")"STRICT";"
|
||||
"CREATE UNIQUE INDEX mime_name_idx ON mime(name);"
|
||||
""
|
||||
"CREATE TABLE document ("
|
||||
" id TEXT PRIMARY KEY CHECK ( length(id) = 32 ),"
|
||||
" marked INTEGER NOT NULL DEFAULT (1),"
|
||||
" id INTEGER PRIMARY KEY,"
|
||||
" parent INTEGER REFERENCES document(id),"
|
||||
" mime INTEGER REFERENCES mime(id),"
|
||||
" path TEXT NOT NULL,"
|
||||
" version INTEGER NOT NULL REFERENCES version(id),"
|
||||
" mtime INTEGER NOT NULL,"
|
||||
" size INTEGER NOT NULL,"
|
||||
" json_data TEXT NOT NULL CHECK ( json_valid(json_data) )"
|
||||
") WITHOUT ROWID;"
|
||||
" thumbnail_count INTEGER NOT NULL,"
|
||||
" json_data TEXT CHECK ( json_data IS NULL OR json_valid(json_data) )"
|
||||
")"STRICT";"
|
||||
"CREATE UNIQUE INDEX document_path_idx ON document(path);"
|
||||
"CREATE TABLE marked ("
|
||||
" id INTEGER PRIMARY KEY,"
|
||||
" marked INTEGER NOT NULL,"
|
||||
" mtime INTEGER NOT NULL"
|
||||
")"STRICT";"
|
||||
""
|
||||
"CREATE INDEX marked_marked ON marked(marked);"
|
||||
""
|
||||
"CREATE TABLE delete_list ("
|
||||
" id TEXT PRIMARY KEY CHECK ( length(id) = 32 )"
|
||||
") WITHOUT ROWID;"
|
||||
" id INTEGER PRIMARY KEY"
|
||||
")"STRICT";"
|
||||
""
|
||||
"CREATE TABLE tag ("
|
||||
" id TEXT NOT NULL,"
|
||||
" id INTEGER NOT NULL REFERENCES document(id),"
|
||||
" tag TEXT NOT NULL,"
|
||||
" PRIMARY KEY (id, tag)"
|
||||
");"
|
||||
""
|
||||
"CREATE TABLE document_sidecar ("
|
||||
" id TEXT PRIMARY KEY NOT NULL,"
|
||||
" json_data TEXT NOT NULL"
|
||||
") WITHOUT ROWID;"
|
||||
")"STRICT";"
|
||||
""
|
||||
"CREATE TABLE descriptor ("
|
||||
" id TEXT NOT NULL,"
|
||||
" id INTEGER PRIMARY KEY,"
|
||||
" version_major INTEGER NOT NULL,"
|
||||
" version_minor INTEGER NOT NULL,"
|
||||
" version_patch INTEGER NOT NULL,"
|
||||
@@ -149,37 +167,37 @@ const char *IndexDatabaseSchema =
|
||||
" name TEXT NOT NULL,"
|
||||
" rewrite_url TEXT,"
|
||||
" timestamp INTEGER NOT NULL"
|
||||
");"
|
||||
")"STRICT";"
|
||||
""
|
||||
"CREATE TABLE stats_treemap ("
|
||||
" path TEXT NOT NULL,"
|
||||
" size INTEGER NOT NULL"
|
||||
");"
|
||||
")"STRICT";"
|
||||
""
|
||||
"CREATE TABLE stats_size_agg ("
|
||||
" bucket INTEGER NOT NULL,"
|
||||
" count INTEGER NOT NULL"
|
||||
");"
|
||||
")"STRICT";"
|
||||
""
|
||||
"CREATE TABLE stats_date_agg ("
|
||||
" bucket INTEGER NOT NULL,"
|
||||
" count INTEGER NOT NULL"
|
||||
");"
|
||||
")"STRICT";"
|
||||
""
|
||||
"CREATE TABLE stats_mime_agg ("
|
||||
" mime TEXT NOT NULL,"
|
||||
" size INTEGER NOT NULL,"
|
||||
" count INTEGER NOT NULL"
|
||||
");"
|
||||
")"STRICT";"
|
||||
""
|
||||
"CREATE TABLE embedding ("
|
||||
" id TEXT REFERENCES document(id),"
|
||||
" id INTEGER REFERENCES document(id),"
|
||||
" model_id INTEGER NOT NULL references model(id),"
|
||||
" start INTEGER NOT NULL,"
|
||||
" end INTEGER,"
|
||||
" embedding BLOB NOT NULL,"
|
||||
" PRIMARY KEY (id, model_id, start)"
|
||||
");"
|
||||
")"STRICT";"
|
||||
""
|
||||
"CREATE TABLE model ("
|
||||
" id INTEGER PRIMARY KEY CHECK (id > 0 AND id < 1000),"
|
||||
@@ -188,5 +206,5 @@ const char *IndexDatabaseSchema =
|
||||
" path TEXT NOT NULL UNIQUE,"
|
||||
" size INTEGER NOT NULL,"
|
||||
" type TEXT NOT NULL CHECK ( type IN ('flat', 'nested') )"
|
||||
");";
|
||||
")"STRICT";";
|
||||
|
||||
|
||||
@@ -98,10 +98,10 @@ void database_generate_stats(database_t *db, double treemap_threshold) {
|
||||
// mime aggregation
|
||||
sqlite3_prepare_v2(db->db, "INSERT INTO stats_mime_agg"
|
||||
" SELECT"
|
||||
" (json_data->>'mime') as bucket,"
|
||||
" m.name as bucket,"
|
||||
" sum(size),"
|
||||
" count(*)"
|
||||
" FROM document"
|
||||
" FROM document INNER JOIN mime m ON m.id=document.mime"
|
||||
" WHERE bucket IS NOT NULL"
|
||||
" GROUP BY bucket", -1, &stmt, NULL);
|
||||
CRASH_IF_STMT_FAIL(sqlite3_step(stmt));
|
||||
@@ -117,8 +117,8 @@ void database_generate_stats(database_t *db, double treemap_threshold) {
|
||||
|
||||
// flat map
|
||||
CRASH_IF_NOT_SQLITE_OK(sqlite3_exec(db->db,
|
||||
"INSERT INTO tm (path, size) SELECT json_data->>'path' as path, sum(size)"
|
||||
" FROM document WHERE json_data->>'parent' IS NULL GROUP BY path;",
|
||||
"INSERT INTO tm (path, size) SELECT path, sum(size)"
|
||||
" FROM document WHERE parent IS NULL GROUP BY path;",
|
||||
NULL, NULL, NULL));
|
||||
|
||||
// Merge up
|
||||
|
||||
@@ -47,7 +47,7 @@ void elastic_cleanup() {
|
||||
destroy_indexer(Indexer);
|
||||
}
|
||||
|
||||
void print_json(cJSON *document, const char id_str[SIST_DOC_ID_LEN]) {
|
||||
void print_json(cJSON *document, const char id_str[SIST_SID_LEN]) {
|
||||
|
||||
cJSON *line = cJSON_CreateObject();
|
||||
|
||||
@@ -64,12 +64,12 @@ void print_json(cJSON *document, const char id_str[SIST_DOC_ID_LEN]) {
|
||||
cJSON_Delete(line);
|
||||
}
|
||||
|
||||
void delete_document(const char *document_id) {
|
||||
void delete_document(const char *sid) {
|
||||
es_bulk_line_t bulk_line;
|
||||
|
||||
bulk_line.type = ES_BULK_LINE_DELETE;
|
||||
bulk_line.next = NULL;
|
||||
strcpy(bulk_line.doc_id, document_id);
|
||||
strcpy(bulk_line.sid, sid);
|
||||
|
||||
tpool_add_work(IndexCtx.pool, &(job_t) {
|
||||
.type = JOB_BULK_LINE,
|
||||
@@ -78,14 +78,14 @@ void delete_document(const char *document_id) {
|
||||
}
|
||||
|
||||
|
||||
void index_json(cJSON *document, const char doc_id[SIST_DOC_ID_LEN]) {
|
||||
void index_json(cJSON *document, const char doc_id[SIST_SID_LEN]) {
|
||||
char *json = cJSON_PrintUnformatted(document);
|
||||
|
||||
size_t json_len = strlen(json);
|
||||
es_bulk_line_t *bulk_line = malloc(sizeof(es_bulk_line_t) + json_len + 2);
|
||||
bulk_line->type = ES_BULK_LINE_INDEX;
|
||||
memcpy(bulk_line->line, json, json_len);
|
||||
strcpy(bulk_line->doc_id, doc_id);
|
||||
strcpy(bulk_line->sid, doc_id);
|
||||
*(bulk_line->line + json_len) = '\n';
|
||||
*(bulk_line->line + json_len + 1) = '\0';
|
||||
bulk_line->next = NULL;
|
||||
@@ -124,13 +124,13 @@ void *create_bulk_buffer(int max, int *count, size_t *buf_len, int legacy) {
|
||||
snprintf(
|
||||
action_str, sizeof(action_str),
|
||||
"{\"index\":{\"_id\":\"%s\",\"_type\":\"_doc\",\"_index\":\"%s\"}}\n",
|
||||
line->doc_id, Indexer->es_index
|
||||
line->sid, Indexer->es_index
|
||||
);
|
||||
} else {
|
||||
snprintf(
|
||||
action_str, sizeof(action_str),
|
||||
"{\"index\":{\"_id\":\"%s\",\"_index\":\"%s\"}}\n",
|
||||
line->doc_id, Indexer->es_index
|
||||
line->sid, Indexer->es_index
|
||||
);
|
||||
}
|
||||
|
||||
@@ -148,7 +148,7 @@ void *create_bulk_buffer(int max, int *count, size_t *buf_len, int legacy) {
|
||||
snprintf(
|
||||
action_str, sizeof(action_str),
|
||||
"{\"delete\":{\"_id\":\"%s\",\"_index\":\"%s\"}}\n",
|
||||
line->doc_id, Indexer->es_index
|
||||
line->sid, Indexer->es_index
|
||||
);
|
||||
|
||||
size_t action_str_len = strlen(action_str);
|
||||
@@ -236,7 +236,7 @@ void _elastic_flush(int max) {
|
||||
if (r->status_code == 413) {
|
||||
|
||||
if (max <= 1) {
|
||||
LOG_ERRORF("elastic.c", "Single document too large, giving up: {%s}", Indexer->line_head->doc_id);
|
||||
LOG_ERRORF("elastic.c", "Single document too large, giving up: {%s}", Indexer->line_head->sid);
|
||||
free_response(r);
|
||||
free(buf);
|
||||
free_queue(1);
|
||||
@@ -348,7 +348,7 @@ es_indexer_t *create_indexer(const char *url, const char *index) {
|
||||
return indexer;
|
||||
}
|
||||
|
||||
void finish_indexer(char *index_id) {
|
||||
void finish_indexer(int index_id) {
|
||||
|
||||
char url[4096];
|
||||
|
||||
|
||||
@@ -8,7 +8,7 @@
|
||||
|
||||
typedef struct es_bulk_line {
|
||||
struct es_bulk_line *next;
|
||||
char doc_id[SIST_DOC_ID_LEN];
|
||||
char sid[SIST_SID_LEN];
|
||||
int type;
|
||||
char line[0];
|
||||
} es_bulk_line_t;
|
||||
@@ -44,16 +44,16 @@ typedef struct es_indexer es_indexer_t;
|
||||
|
||||
void elastic_index_line(es_bulk_line_t *line);
|
||||
|
||||
void print_json(cJSON *document, const char index_id_str[SIST_INDEX_ID_LEN]);
|
||||
void print_json(cJSON *document, const char doc_id[SIST_SID_LEN]);
|
||||
|
||||
void index_json(cJSON *document, const char doc_id[SIST_INDEX_ID_LEN]);
|
||||
void index_json(cJSON *document, const char doc_id[SIST_SID_LEN]);
|
||||
|
||||
void delete_document(const char *document_id);
|
||||
void delete_document(const char *sid);
|
||||
|
||||
es_indexer_t *create_indexer(const char *url, const char *index);
|
||||
|
||||
void elastic_cleanup();
|
||||
void finish_indexer(char *index_id);
|
||||
void finish_indexer(int index_id);
|
||||
|
||||
void elastic_init(int force_reset, const char* user_mappings, const char* user_settings);
|
||||
|
||||
|
||||
@@ -32,8 +32,6 @@ char *get_meta_key_text(enum metakey meta_key) {
|
||||
return "title";
|
||||
case MetaFontName:
|
||||
return "font_name";
|
||||
case MetaParent:
|
||||
return "parent";
|
||||
case MetaExifMake:
|
||||
return "exif_make";
|
||||
case MetaExifDescription:
|
||||
@@ -58,8 +56,6 @@ char *get_meta_key_text(enum metakey meta_key) {
|
||||
return "author";
|
||||
case MetaModifiedBy:
|
||||
return "modified_by";
|
||||
case MetaThumbnail:
|
||||
return "thumbnail";
|
||||
case MetaPages:
|
||||
return "pages";
|
||||
case MetaExifGpsLongitudeRef:
|
||||
@@ -81,21 +77,23 @@ char *get_meta_key_text(enum metakey meta_key) {
|
||||
}
|
||||
}
|
||||
|
||||
char *build_json_string(document_t *doc) {
|
||||
typedef struct {
|
||||
meta_line_t *meta_head;
|
||||
meta_line_t *meta_tail;
|
||||
} linked_list_t;
|
||||
|
||||
|
||||
void write_document(document_t *doc) {
|
||||
linked_list_t thumbnails_to_write = {.meta_head = NULL, .meta_tail = NULL};
|
||||
|
||||
cJSON *json = cJSON_CreateObject();
|
||||
int buffer_size_guess = 8192;
|
||||
|
||||
const char *mime_text = mime_get_mime_text(doc->mime);
|
||||
if (mime_text == NULL) {
|
||||
cJSON_AddNullToObject(json, "mime");
|
||||
} else {
|
||||
cJSON_AddStringToObject(json, "mime", mime_text);
|
||||
}
|
||||
|
||||
// Ignore root directory in the file path
|
||||
doc->ext = (short) (doc->ext - ScanCtx.index.desc.root_len);
|
||||
doc->base = (short) (doc->base - ScanCtx.index.desc.root_len);
|
||||
char *filepath = doc->filepath + ScanCtx.index.desc.root_len;
|
||||
char filepath[PATH_MAX * 3];
|
||||
strcpy(filepath, doc->filepath + ScanCtx.index.desc.root_len);
|
||||
|
||||
cJSON_AddStringToObject(json, "extension", filepath + doc->ext);
|
||||
|
||||
@@ -125,7 +123,6 @@ char *build_json_string(document_t *doc) {
|
||||
while (meta != NULL) {
|
||||
|
||||
switch (meta->key) {
|
||||
case MetaThumbnail:
|
||||
case MetaPages:
|
||||
case MetaWidth:
|
||||
case MetaHeight:
|
||||
@@ -143,7 +140,6 @@ char *build_json_string(document_t *doc) {
|
||||
case MetaAlbumArtist:
|
||||
case MetaGenre:
|
||||
case MetaFontName:
|
||||
case MetaParent:
|
||||
case MetaExifMake:
|
||||
case MetaExifDescription:
|
||||
case MetaExifSoftware:
|
||||
@@ -168,6 +164,11 @@ char *build_json_string(document_t *doc) {
|
||||
buffer_size_guess += (int) strlen(meta->str_val);
|
||||
break;
|
||||
}
|
||||
case MetaThumbnail: {
|
||||
// Keep a list of thumbnails to write after we know what the sid is
|
||||
APPEND_THUMBNAIL(&thumbnails_to_write, meta->str_val, meta->size);
|
||||
break;
|
||||
}
|
||||
default:
|
||||
LOG_FATALF("serialize.c", "Invalid meta key: %x %s", meta->key, get_meta_key_text(meta->key));
|
||||
}
|
||||
@@ -180,13 +181,19 @@ char *build_json_string(document_t *doc) {
|
||||
char *json_str = cJSON_PrintBuffered(json, buffer_size_guess, FALSE);
|
||||
cJSON_Delete(json);
|
||||
|
||||
return json_str;
|
||||
}
|
||||
|
||||
void write_document(document_t *doc) {
|
||||
char *json_str = build_json_string(doc);
|
||||
|
||||
database_write_document(ProcData.index_db, doc, json_str);
|
||||
int doc_id = database_write_document(ProcData.index_db, doc, json_str);
|
||||
free(doc);
|
||||
free(json_str);
|
||||
|
||||
// Write thumbnails
|
||||
meta = thumbnails_to_write.meta_head;
|
||||
int index_num = 0;
|
||||
while (meta != NULL) {
|
||||
database_write_thumbnail(ProcData.index_db, doc_id, index_num, meta->str_val, meta->size);
|
||||
|
||||
meta_line_t *tmp = meta;
|
||||
meta = meta->next;
|
||||
free(tmp);
|
||||
index_num += 1;
|
||||
}
|
||||
}
|
||||
39
src/main.c
39
src/main.c
@@ -39,7 +39,7 @@ void database_scan_begin(scan_args_t *args) {
|
||||
index_descriptor_t *original_desc = database_read_index_descriptor(db);
|
||||
|
||||
// copy original index id
|
||||
strcpy(desc->id, original_desc->id);
|
||||
desc->id = original_desc->id;
|
||||
|
||||
if (original_desc->version_major != VersionMajor) {
|
||||
LOG_FATALF("main.c", "Version mismatch! Index is %s but executable is %s", original_desc->version, Version);
|
||||
@@ -67,7 +67,7 @@ void database_scan_begin(scan_args_t *args) {
|
||||
desc->version_patch = VersionPatch;
|
||||
|
||||
// generate new index id based on timestamp
|
||||
md5_hexdigest(&ScanCtx.index.desc.timestamp, sizeof(ScanCtx.index.desc.timestamp), ScanCtx.index.desc.id);
|
||||
desc->id = (int) ScanCtx.index.desc.timestamp;
|
||||
|
||||
database_initialize(db);
|
||||
database_open(db);
|
||||
@@ -75,14 +75,11 @@ void database_scan_begin(scan_args_t *args) {
|
||||
}
|
||||
|
||||
database_increment_version(db);
|
||||
database_sync_mime_table(db);
|
||||
|
||||
database_close(db, FALSE);
|
||||
}
|
||||
|
||||
void write_thumbnail_callback(char *key, int num, void *buf, size_t buf_len) {
|
||||
database_write_thumbnail(ProcData.index_db, key, num, buf, buf_len);
|
||||
}
|
||||
|
||||
void log_callback(const char *filepath, int level, char *str) {
|
||||
if (level == LEVEL_FATAL) {
|
||||
sist_log(filepath, level, str);
|
||||
@@ -140,7 +137,6 @@ void initialize_scan_context(scan_args_t *args) {
|
||||
// Comic
|
||||
ScanCtx.comic_ctx.log = log_callback;
|
||||
ScanCtx.comic_ctx.logf = logf_callback;
|
||||
ScanCtx.comic_ctx.store = write_thumbnail_callback;
|
||||
ScanCtx.comic_ctx.enable_tn = args->tn_count > 0;
|
||||
ScanCtx.comic_ctx.tn_size = args->tn_size;
|
||||
ScanCtx.comic_ctx.tn_qscale = args->tn_quality;
|
||||
@@ -157,7 +153,6 @@ void initialize_scan_context(scan_args_t *args) {
|
||||
}
|
||||
ScanCtx.ebook_ctx.log = log_callback;
|
||||
ScanCtx.ebook_ctx.logf = logf_callback;
|
||||
ScanCtx.ebook_ctx.store = write_thumbnail_callback;
|
||||
ScanCtx.ebook_ctx.fast_epub_parse = args->fast_epub;
|
||||
ScanCtx.ebook_ctx.tn_qscale = args->tn_quality;
|
||||
|
||||
@@ -165,7 +160,6 @@ void initialize_scan_context(scan_args_t *args) {
|
||||
ScanCtx.font_ctx.enable_tn = args->tn_count > 0;
|
||||
ScanCtx.font_ctx.log = log_callback;
|
||||
ScanCtx.font_ctx.logf = logf_callback;
|
||||
ScanCtx.font_ctx.store = write_thumbnail_callback;
|
||||
|
||||
// Media
|
||||
ScanCtx.media_ctx.tn_qscale = args->tn_quality;
|
||||
@@ -173,7 +167,6 @@ void initialize_scan_context(scan_args_t *args) {
|
||||
ScanCtx.media_ctx.tn_count = args->tn_count;
|
||||
ScanCtx.media_ctx.log = log_callback;
|
||||
ScanCtx.media_ctx.logf = logf_callback;
|
||||
ScanCtx.media_ctx.store = write_thumbnail_callback;
|
||||
ScanCtx.media_ctx.max_media_buffer = (long) args->max_memory_buffer_mib * 1024 * 1024;
|
||||
ScanCtx.media_ctx.read_subtitles = args->read_subtitles;
|
||||
ScanCtx.media_ctx.read_subtitles = args->tn_count;
|
||||
@@ -189,13 +182,11 @@ void initialize_scan_context(scan_args_t *args) {
|
||||
ScanCtx.ooxml_ctx.content_size = args->content_size;
|
||||
ScanCtx.ooxml_ctx.log = log_callback;
|
||||
ScanCtx.ooxml_ctx.logf = logf_callback;
|
||||
ScanCtx.ooxml_ctx.store = write_thumbnail_callback;
|
||||
|
||||
// MOBI
|
||||
ScanCtx.mobi_ctx.content_size = args->content_size;
|
||||
ScanCtx.mobi_ctx.log = log_callback;
|
||||
ScanCtx.mobi_ctx.logf = logf_callback;
|
||||
ScanCtx.mobi_ctx.store = write_thumbnail_callback;
|
||||
ScanCtx.mobi_ctx.enable_tn = args->tn_count > 0;
|
||||
ScanCtx.mobi_ctx.tn_size = args->tn_size;
|
||||
ScanCtx.mobi_ctx.tn_qscale = args->tn_quality;
|
||||
@@ -209,7 +200,6 @@ void initialize_scan_context(scan_args_t *args) {
|
||||
ScanCtx.msdoc_ctx.content_size = args->content_size;
|
||||
ScanCtx.msdoc_ctx.log = log_callback;
|
||||
ScanCtx.msdoc_ctx.logf = logf_callback;
|
||||
ScanCtx.msdoc_ctx.store = write_thumbnail_callback;
|
||||
ScanCtx.msdoc_ctx.msdoc_mime = mime_get_mime_by_string("application/msword");
|
||||
|
||||
ScanCtx.threads = args->threads;
|
||||
@@ -228,7 +218,6 @@ void initialize_scan_context(scan_args_t *args) {
|
||||
ScanCtx.raw_ctx.tn_size = args->tn_size;
|
||||
ScanCtx.raw_ctx.log = log_callback;
|
||||
ScanCtx.raw_ctx.logf = logf_callback;
|
||||
ScanCtx.raw_ctx.store = write_thumbnail_callback;
|
||||
|
||||
// Wpd
|
||||
ScanCtx.wpd_ctx.content_size = args->content_size;
|
||||
@@ -316,16 +305,15 @@ void sist2_index(index_args_t *args) {
|
||||
database_open(db);
|
||||
database_iterator_t *iterator = database_create_document_iterator(db);
|
||||
database_document_iter_foreach(json, iterator) {
|
||||
char doc_id[SIST_DOC_ID_LEN];
|
||||
strcpy(doc_id, cJSON_GetObjectItem(json, "_id")->valuestring);
|
||||
char sid[SIST_SID_LEN];
|
||||
int doc_id = cJSON_GetObjectItem(json, "_id")->valueint;
|
||||
cJSON_DeleteItemFromObject(json, "_id");
|
||||
|
||||
// TODO: delete tag if empty
|
||||
format_sid(sid, desc->id, doc_id);
|
||||
|
||||
if (args->print) {
|
||||
print_json(json, doc_id);
|
||||
print_json(json, sid);
|
||||
} else {
|
||||
index_json(json, doc_id);
|
||||
index_json(json, sid);
|
||||
cnt += 1;
|
||||
}
|
||||
cJSON_Delete(json);
|
||||
@@ -334,10 +322,12 @@ void sist2_index(index_args_t *args) {
|
||||
free(iterator);
|
||||
|
||||
if (!args->print) {
|
||||
char sid[SIST_SID_LEN];
|
||||
|
||||
database_iterator_t *del_iter = database_create_delete_list_iterator(db);
|
||||
database_delete_list_iter_foreach(id, del_iter) {
|
||||
delete_document(id);
|
||||
free(id);
|
||||
database_delete_list_iter_foreach(doc_id, del_iter) {
|
||||
format_sid(sid, desc->id, doc_id);
|
||||
delete_document(sid);
|
||||
}
|
||||
free(del_iter);
|
||||
}
|
||||
@@ -533,7 +523,8 @@ int main(int argc, const char *argv[]) {
|
||||
OPT_BOOLEAN('f', "force-reset", &index_args->force_reset, "Reset Elasticsearch mappings and settings."),
|
||||
|
||||
OPT_GROUP("sqlite-index options"),
|
||||
OPT_STRING(0, "search-index", &common_search_index, "Path to search index. Will be created if it does not exist yet."),
|
||||
OPT_STRING(0, "search-index", &common_search_index,
|
||||
"Path to search index. Will be created if it does not exist yet."),
|
||||
|
||||
OPT_GROUP("Web options"),
|
||||
OPT_STRING(0, "es-url", &common_es_url, "Elasticsearch url. DEFAULT: http://localhost:9200"),
|
||||
|
||||
@@ -61,4 +61,6 @@ unsigned int mime_get_mime_by_ext(const char *ext);
|
||||
|
||||
unsigned int mime_get_mime_by_string(const char *str);
|
||||
|
||||
unsigned int* get_mime_ids();
|
||||
|
||||
#endif
|
||||
|
||||
@@ -365,7 +365,6 @@ model_vnd_gdl=65893,
|
||||
model_vnd_gs_gdl=65894,
|
||||
model_vrml=65895,
|
||||
model_x_pov=65896,
|
||||
sist2_sidecar=2,
|
||||
text_PGP=590185,
|
||||
text_asp=590186,
|
||||
text_css=590187,
|
||||
@@ -909,7 +908,6 @@ case image_x_sony_arw: return "image/x-sony-arw";
|
||||
case image_x_sony_sr2: return "image/x-sony-sr2";
|
||||
case image_x_sony_srf: return "image/x-sony-srf";
|
||||
case image_x_epson_erf: return "image/x-epson-erf";
|
||||
case sist2_sidecar: return "sist2/sidecar";
|
||||
default: return NULL;}}
|
||||
unsigned int mime_extension_lookup(unsigned long extension_crc32) {switch (extension_crc32) {
|
||||
case 2495639202:return application_x_matlab_data;
|
||||
@@ -1293,7 +1291,6 @@ case 1698465774:return image_x_sony_arw;
|
||||
case 2083014127:return image_x_sony_sr2;
|
||||
case 271503362:return image_x_sony_srf;
|
||||
case 142938048:return image_x_epson_erf;
|
||||
case 287571459:return sist2_sidecar;
|
||||
default: return 0;}}
|
||||
unsigned int mime_name_lookup(unsigned long mime_crc32) {switch (mime_crc32) {
|
||||
case 3272851765: return application_x_matlab_data;
|
||||
@@ -1747,6 +1744,7 @@ case 3060720351: return image_x_sony_arw;
|
||||
case 2944016606: return image_x_sony_sr2;
|
||||
case 3279729971: return image_x_sony_srf;
|
||||
case 1665206815: return image_x_epson_erf;
|
||||
case 521139448: return sist2_sidecar;
|
||||
default: return 0;}}
|
||||
unsigned int mime_ids[] = {655530,655363,655364,655365,655366,655362,655361,655367,655368,655369,655370,655371,655372 | 0x40000000,655373,655374,655375,655376 | 0x08000000,655377,655378,655379,655380,655382,655381,655383,655384,655390,655385,655386,655387,655388,655389,655391,655392,655393,655394,655395 | 0x40000000,655396,655397,655398,655399,655400,655401,655402,655403,655404,655405,655406,655407,655408,655411,655412,655413,655414,655415,655416,655417,655418,655419 | 0x20000000,655421,655422,655423,655424,655425,655426,655427,655428,655429,655430,655431,655432 | 0x04000000,655433 | 0x04000000,655434 | 0x04000000,655435,655436,655437,655438,655439,655440,655441,655442,655443,655444,655445,655446 | 0x10000000,655447,655448,655449 | 0x10000000,655450,655451,655452,655453,655454,655455,655456,655457,655458,655459,655461 | 0x08000000,655460,655462,655463,655464,655465,655466,655467,655468,655469,655470,655471,655472,655473,655474,655475,655476,655477,655478,655479,655480,1,655481,655482,655483,655484,655485,655486,655487,655488,655489 | 0x20000000,655490,655491,655492,655493,655494,655495,655496,655497,655498,655499,655500,655501,655502,655503,655504,655505,655506,655507,655508,655509,655510,655511,655512,655513,655514,655515,655516,655517,655519,655518 | 0x08000000,655521,655520,655522 | 0x08000000,655523 | 0x08000000,655524 | 0x08000000,655525,655526,655527,655528,655529,655531,655532,655533,655534,655535,655599,655536 | 0x02000000,655409 | 0x02000000,655540,655537,655538,655539,655541,655542,655543,655544,655545,655546,655547,655548,655549,655550,655552,655551,655553,655554,655555,655556,655557,655558,655559,655560,655561,655562 | 0x10000000,655563,655564,655565,655566,655567,655569,655568,655570,655571,655572,655573,655574,655575,655576,655577,655578 | 0x10000000,655579,655580,655581,655583,655582,655584,655585,655586,655587,655588,655589,655590,655591,655592,655593,655594,655595 | 0x08000000,655596,655597 | 0x08000000,655600 | 0x10000000,655601,458994 | 0x80000000,458995,458996,458998,458997,458999,459000,459001,459002,459003,459004,459005,459006,459007,459008,459009,459010,459011,459012,459013,459014,459015,459016,459017,459018,459030,459019,459020,459021,459022,459023,459025,459024,459026,459027,459029 | 0x80000000,459028 | 0x80000000,327959 | 0x20000000,327960 | 0x20000000,327962 | 0x20000000,327961 | 0x20000000,524571,524572,524573,524574,524575,524576,524577,524578,524579,524580,524581,524582,524583,524584 | 0x80000000,524585 | 0x80000000,524586,524587 | 0x80000000,524588 | 0x80000000,524589,524590,524591,524592,524593,524594,524595,524596,524597,524599,524602,524603,524605,524606,524608,524610,524611,524612 | 0x80000000,524613,524614,524619,524620,524624,524626,524627,524628,524629,524630,524631,524636,524637,524638,524639 | 0x80000000,524640 | 0x80000000,524641,196962,196963,65892,65893,65894,65895,65896,590186,590187,590189 | 0x01000000,590190,590191,590192,590185,590193,590231,590188,655410,590194,590195,590196,590197,590198,590199,590200,590201,590203,590202,590204,590205,590206,590207,590208,590209,590210,590211,590212,590213,590214,590215,590216,590217,590219,590220,590244 | 0x01000000,590218,590222,590221,590223,590224,590225,590226,590227,590228,590229,590230,590232,590233,590234,590235 | 0x01000000,590236,590237,590238,590239,590240,590241,590242,590243,393638,393639,393640,393637,393641,393642,393643,393644,393645,393646,393647,393648,393649,393650,393651,393652,393653,393654,393655,393656,393657 | 0x80000000,393658,393659,393660,393661,393662,393663,393664,393665,721346,655598,655420,524622 | 0x00800000,524621 | 0x00800000,524609 | 0x00800000,524623 | 0x00800000,524598 | 0x00800000,524600 | 0x00800000,524601 | 0x00800000,524604 | 0x00800000,524615 | 0x00800000,524616 | 0x00800000,524617 | 0x00800000,524618 | 0x00800000,524625 | 0x00800000,524632 | 0x00800000,524633 | 0x00800000,524634 | 0x00800000,524635 | 0x00800000,524607 | 0x00800000,0};
|
||||
unsigned int* get_mime_ids() { return mime_ids; }
|
||||
#endif
|
||||
|
||||
@@ -4,10 +4,8 @@
|
||||
#include "src/ctx.h"
|
||||
#include "mime.h"
|
||||
#include "src/io/serialize.h"
|
||||
#include "src/parsing/sidecar.h"
|
||||
#include "src/parsing/fs_util.h"
|
||||
#include "src/parsing/magic_util.h"
|
||||
#include <pthread.h>
|
||||
|
||||
|
||||
#define MIN_VIDEO_SIZE (1024 * 64)
|
||||
@@ -27,7 +25,6 @@ typedef enum {
|
||||
FILETYPE_OOXML,
|
||||
FILETYPE_COMIC,
|
||||
FILETYPE_MOBI,
|
||||
FILETYPE_SIST2_SIDECAR,
|
||||
FILETYPE_MSDOC,
|
||||
FILETYPE_JSON,
|
||||
FILETYPE_NDJSON,
|
||||
@@ -63,8 +60,6 @@ file_type_t get_file_type(unsigned int mime, size_t size, const char *filepath)
|
||||
return FILETYPE_COMIC;
|
||||
} else if (IS_MOBI(mime)) {
|
||||
return FILETYPE_MOBI;
|
||||
} else if (mime == MIME_SIST2_SIDECAR) {
|
||||
return FILETYPE_SIST2_SIDECAR;
|
||||
} else if (is_msdoc(&ScanCtx.msdoc_ctx, mime)) {
|
||||
return FILETYPE_MSDOC;
|
||||
} else if (is_json(&ScanCtx.json_ctx, mime)) {
|
||||
@@ -157,7 +152,8 @@ void parse(parse_job_t *job) {
|
||||
doc->size = job->vfile.st_size;
|
||||
doc->mtime = MAX(job->vfile.mtime, 0);
|
||||
doc->mime = get_mime(job);
|
||||
generate_doc_id(doc->filepath + ScanCtx.index.desc.root_len, doc->doc_id);
|
||||
doc->thumbnail_count = 0;
|
||||
strcpy(doc->parent, job->parent);
|
||||
|
||||
if (doc->mime == GET_MIME_ERROR_FATAL) {
|
||||
CLOSE_FILE(job->vfile)
|
||||
@@ -165,16 +161,12 @@ void parse(parse_job_t *job) {
|
||||
return;
|
||||
}
|
||||
|
||||
if (database_mark_document(ProcData.index_db, doc->doc_id, doc->mtime)) {
|
||||
if (database_mark_document(ProcData.index_db, doc->filepath + ScanCtx.index.desc.root_len, doc->mtime)) {
|
||||
CLOSE_FILE(job->vfile)
|
||||
free(doc);
|
||||
return;
|
||||
}
|
||||
|
||||
if (LogCtx.very_verbose) {
|
||||
LOG_DEBUGF(job->filepath, "Starting parse job {%s}", doc->doc_id);
|
||||
}
|
||||
|
||||
switch (get_file_type(doc->mime, doc->size, doc->filepath)) {
|
||||
case FILETYPE_RAW:
|
||||
parse_raw(&ScanCtx.raw_ctx, &job->vfile, doc);
|
||||
@@ -195,6 +187,10 @@ void parse(parse_job_t *job) {
|
||||
parse_font(&ScanCtx.font_ctx, &job->vfile, doc);
|
||||
break;
|
||||
case FILETYPE_ARCHIVE:
|
||||
|
||||
// Insert the document now so that the children documents can link to an existing ID
|
||||
database_write_document(ProcData.index_db, doc, NULL);
|
||||
|
||||
parse_archive(&ScanCtx.arc_ctx, &job->vfile, doc, ScanCtx.exclude, ScanCtx.exclude_extra);
|
||||
break;
|
||||
case FILETYPE_OOXML:
|
||||
@@ -206,11 +202,6 @@ void parse(parse_job_t *job) {
|
||||
case FILETYPE_MOBI:
|
||||
parse_mobi(&ScanCtx.mobi_ctx, &job->vfile, doc);
|
||||
break;
|
||||
case FILETYPE_SIST2_SIDECAR:
|
||||
parse_sidecar(&job->vfile, doc);
|
||||
CLOSE_FILE(job->vfile)
|
||||
free(doc);
|
||||
return;
|
||||
case FILETYPE_MSDOC:
|
||||
parse_msdoc(&ScanCtx.msdoc_ctx, &job->vfile, doc);
|
||||
break;
|
||||
@@ -225,14 +216,6 @@ void parse(parse_job_t *job) {
|
||||
break;
|
||||
}
|
||||
|
||||
//Parent meta
|
||||
if (job->parent[0] != '\0') {
|
||||
meta_line_t *meta_parent = malloc(sizeof(meta_line_t) + SIST_INDEX_ID_LEN);
|
||||
meta_parent->key = MetaParent;
|
||||
strcpy(meta_parent->str_val, job->parent);
|
||||
APPEND_META((doc), meta_parent);
|
||||
}
|
||||
|
||||
CLOSE_FILE(job->vfile)
|
||||
|
||||
if (job->vfile.has_checksum) {
|
||||
|
||||
@@ -1,40 +0,0 @@
|
||||
#include "sidecar.h"
|
||||
|
||||
#include "src/ctx.h"
|
||||
|
||||
void parse_sidecar(vfile_t *vfile, document_t *doc) {
|
||||
|
||||
LOG_DEBUGF("sidecar.c", "Parsing sidecar file %s", vfile->filepath);
|
||||
|
||||
size_t size;
|
||||
char *buf = read_all(vfile, &size);
|
||||
if (buf == NULL) {
|
||||
LOG_ERRORF("sidecar.c", "Read error for %s", vfile->filepath);
|
||||
return;
|
||||
}
|
||||
|
||||
buf = realloc(buf, size + 1);
|
||||
*(buf + size) = '\0';
|
||||
|
||||
cJSON *json = cJSON_Parse(buf);
|
||||
if (json == NULL) {
|
||||
LOG_ERRORF("sidecar.c", "Could not parse JSON sidecar %s", vfile->filepath);
|
||||
return;
|
||||
}
|
||||
char *json_str = cJSON_PrintUnformatted(json);
|
||||
|
||||
char assoc_doc_id[SIST_DOC_ID_LEN];
|
||||
|
||||
char rel_path[PATH_MAX];
|
||||
size_t rel_path_len = doc->ext - 1 - ScanCtx.index.desc.root_len;
|
||||
memcpy(rel_path, vfile->filepath + ScanCtx.index.desc.root_len, rel_path_len);
|
||||
*(rel_path + rel_path_len) = '\0';
|
||||
|
||||
generate_doc_id(rel_path, assoc_doc_id);
|
||||
|
||||
database_write_document_sidecar(ProcData.index_db, assoc_doc_id, json_str);
|
||||
|
||||
cJSON_Delete(json);
|
||||
free(json_str);
|
||||
free(buf);
|
||||
}
|
||||
@@ -1,8 +0,0 @@
|
||||
#ifndef SIST2_SIDECAR_H
|
||||
#define SIST2_SIDECAR_H
|
||||
|
||||
#include "src/sist.h"
|
||||
|
||||
void parse_sidecar(vfile_t *vfile, document_t *doc);
|
||||
|
||||
#endif
|
||||
20
src/sist.h
20
src/sist.h
@@ -3,19 +3,19 @@
|
||||
|
||||
#define _GNU_SOURCE
|
||||
|
||||
#ifndef FALSE
|
||||
#define FALSE (0)
|
||||
#ifndef FALSE
|
||||
#define FALSE (0)
|
||||
#define BOOL int
|
||||
#endif
|
||||
|
||||
#ifndef TRUE
|
||||
#define TRUE (!FALSE)
|
||||
#ifndef TRUE
|
||||
#define TRUE (!FALSE)
|
||||
#endif
|
||||
|
||||
#undef MAX
|
||||
#undef MAX
|
||||
#define MAX(a, b) (((a) > (b)) ? (a) : (b))
|
||||
|
||||
#undef MIN
|
||||
#undef MIN
|
||||
#define MIN(a, b) (((a) < (b)) ? (a) : (b))
|
||||
|
||||
#ifndef PATH_MAX
|
||||
@@ -23,7 +23,7 @@
|
||||
#endif
|
||||
|
||||
#undef ABS
|
||||
#define ABS(a) (((a) < 0) ? -(a) : (a))
|
||||
#define ABS(a) (((a) < 0) ? -(a) : (a))
|
||||
|
||||
#define UNUSED(x) __attribute__((__unused__)) x
|
||||
|
||||
@@ -51,11 +51,11 @@
|
||||
#include <ctype.h>
|
||||
#include "git_hash.h"
|
||||
|
||||
#define VERSION "3.2.1"
|
||||
#define VERSION "3.3.0"
|
||||
static const char *const Version = VERSION;
|
||||
static const int VersionMajor = 3;
|
||||
static const int VersionMinor = 2;
|
||||
static const int VersionPatch = 1;
|
||||
static const int VersionMinor = 3;
|
||||
static const int VersionPatch = 0;
|
||||
|
||||
#ifndef SIST_PLATFORM
|
||||
#define SIST_PLATFORM unknown
|
||||
|
||||
@@ -4,7 +4,7 @@
|
||||
typedef struct database database_t;
|
||||
|
||||
typedef struct index_descriptor {
|
||||
char id[SIST_INDEX_ID_LEN];
|
||||
int id;
|
||||
char version[64];
|
||||
int version_major;
|
||||
int version_minor;
|
||||
@@ -24,4 +24,11 @@ typedef struct index_t {
|
||||
char path[PATH_MAX];
|
||||
} index_t;
|
||||
|
||||
typedef struct {
|
||||
int doc_id;
|
||||
int index_id;
|
||||
long sid_int64;
|
||||
char sid_str[SIST_SID_LEN];
|
||||
} sist_id_t;
|
||||
|
||||
#endif
|
||||
|
||||
47
src/util.h
47
src/util.h
@@ -7,6 +7,7 @@
|
||||
|
||||
#include "third-party/utf8.h/utf8.h"
|
||||
#include "libscan/scan.h"
|
||||
#include "types.h"
|
||||
#include <openssl/evp.h>
|
||||
|
||||
|
||||
@@ -18,7 +19,8 @@ dyn_buffer_t url_escape(char *str);
|
||||
|
||||
extern int PrintingProgressBar;
|
||||
|
||||
void progress_bar_print_json(size_t done, size_t count, size_t tn_size, size_t index_size, int waiting);
|
||||
void progress_bar_print_json(size_t done, size_t count, size_t tn_size, size_t index_size, int waiting);
|
||||
|
||||
void progress_bar_print(double percentage, size_t tn_size, size_t index_size);
|
||||
|
||||
const char *find_file_in_paths(const char **paths, const char *filename);
|
||||
@@ -87,24 +89,6 @@ static void buf2hex(const unsigned char *buf, size_t buflen, char *hex_string) {
|
||||
*s = '\0';
|
||||
}
|
||||
|
||||
static void md5_hexdigest(const void *data, size_t size, char *output) {
|
||||
EVP_MD_CTX *md_ctx = EVP_MD_CTX_new();
|
||||
EVP_DigestInit_ex(md_ctx, EVP_md5(), NULL);
|
||||
|
||||
EVP_DigestUpdate(md_ctx, data, size);
|
||||
|
||||
unsigned char digest[MD5_DIGEST_LENGTH];
|
||||
EVP_DigestFinal_ex(md_ctx, digest, NULL);
|
||||
EVP_MD_CTX_free(md_ctx);
|
||||
|
||||
buf2hex(digest, MD5_DIGEST_LENGTH, output);
|
||||
}
|
||||
|
||||
__always_inline
|
||||
static void generate_doc_id(const char *rel_path, char *doc_id) {
|
||||
md5_hexdigest(rel_path, strlen(rel_path), doc_id);
|
||||
}
|
||||
|
||||
#define MILLISECOND 1000
|
||||
|
||||
struct timespec timespec_add(struct timespec ts1, long usec);
|
||||
@@ -125,6 +109,29 @@ struct timespec timespec_add(struct timespec ts1, long usec);
|
||||
} while (0)
|
||||
|
||||
#define array_foreach(arr) \
|
||||
for (int i = 0; (arr)[i] != NULL; i++)
|
||||
for (int i = 0; (arr)[i] != 0; i++)
|
||||
|
||||
#define format_sid(out, index_id, doc_id) \
|
||||
sprintf((out), "%08x.%08x", (index_id), (doc_id))
|
||||
|
||||
static int parse_sid(sist_id_t *sid, const char doc_sid_str[SIST_SID_LEN]) {
|
||||
if (doc_sid_str[8] != '.') {
|
||||
return FALSE;
|
||||
}
|
||||
|
||||
char tmp[9];
|
||||
|
||||
memcpy(tmp, doc_sid_str, 8);
|
||||
sid->index_id = (int) strtol(tmp, NULL, 16);
|
||||
memcpy(tmp, doc_sid_str + 9, 8);
|
||||
sid->doc_id = (int) strtol(tmp, NULL, 16);
|
||||
|
||||
memcpy(sid->sid_str, doc_sid_str, SIST_SID_LEN - 1);
|
||||
*(sid->sid_str + SIST_SID_LEN - 1) = '\0';
|
||||
|
||||
sid->sid_int64 = ((long) sid->index_id << 32) | sid->doc_id;
|
||||
|
||||
return TRUE;
|
||||
}
|
||||
|
||||
#endif
|
||||
|
||||
155
src/web/serve.c
155
src/web/serve.c
@@ -48,30 +48,24 @@ void get_embedding(struct mg_connection *nc, struct mg_http_message *hm) {
|
||||
WebCtx.es_version->major, WebCtx.es_version->minor, WebCtx.es_version->patch);
|
||||
}
|
||||
|
||||
if (hm->uri.len != SIST_INDEX_ID_LEN + SIST_DOC_ID_LEN + 2 + 4) {
|
||||
LOG_DEBUGF("serve.c", "Invalid thumbnail path: %.*s", (int) hm->uri.len, hm->uri.ptr);
|
||||
sist_id_t sid;
|
||||
|
||||
if (hm->uri.len != SIST_SID_LEN + 2 + 4 || !parse_sid(&sid, hm->uri.ptr + 3)) {
|
||||
LOG_DEBUGF("serve.c", "Invalid embedding path: %.*s", (int) hm->uri.len, hm->uri.ptr);
|
||||
HTTP_REPLY_NOT_FOUND
|
||||
return;
|
||||
}
|
||||
|
||||
char doc_id[SIST_DOC_ID_LEN];
|
||||
char index_id[SIST_INDEX_ID_LEN];
|
||||
int model_id = (int) strtol(hm->uri.ptr + SIST_SID_LEN + 3, NULL, 10);
|
||||
|
||||
memcpy(index_id, hm->uri.ptr + 3, SIST_INDEX_ID_LEN);
|
||||
*(index_id + SIST_INDEX_ID_LEN - 1) = '\0';
|
||||
memcpy(doc_id, hm->uri.ptr + 3 + SIST_INDEX_ID_LEN, SIST_DOC_ID_LEN);
|
||||
*(doc_id + SIST_DOC_ID_LEN - 1) = '\0';
|
||||
|
||||
int model_id = (int) strtol(hm->uri.ptr + SIST_INDEX_ID_LEN + SIST_DOC_ID_LEN + 3, NULL, 10);
|
||||
|
||||
database_t *db = web_get_database(index_id);
|
||||
database_t *db = web_get_database(sid.index_id);
|
||||
if (db == NULL) {
|
||||
LOG_DEBUGF("serve.c", "Could not get database for index: %s", index_id);
|
||||
LOG_DEBUGF("serve.c", "Could not get database for index: %s", sid.index_id);
|
||||
HTTP_REPLY_NOT_FOUND
|
||||
return;
|
||||
}
|
||||
|
||||
cJSON *json = database_get_embedding(db, doc_id, model_id);
|
||||
cJSON *json = database_get_embedding(db, sid.doc_id, model_id);
|
||||
|
||||
if (json == NULL) {
|
||||
HTTP_REPLY_NOT_FOUND
|
||||
@@ -84,17 +78,19 @@ void get_embedding(struct mg_connection *nc, struct mg_http_message *hm) {
|
||||
|
||||
void stats_files(struct mg_connection *nc, struct mg_http_message *hm) {
|
||||
|
||||
if (hm->uri.len != SIST_INDEX_ID_LEN + 7) {
|
||||
if (hm->uri.len != 17) {
|
||||
HTTP_REPLY_NOT_FOUND
|
||||
return;
|
||||
}
|
||||
|
||||
char arg_index_id[SIST_INDEX_ID_LEN];
|
||||
char index_id_str[9];
|
||||
char arg_stat_type[5];
|
||||
|
||||
memcpy(arg_index_id, hm->uri.ptr + 3, SIST_INDEX_ID_LEN);
|
||||
*(arg_index_id + SIST_INDEX_ID_LEN - 1) = '\0';
|
||||
memcpy(arg_stat_type, hm->uri.ptr + 3 + SIST_INDEX_ID_LEN, 4);
|
||||
memcpy(index_id_str, hm->uri.ptr + 3, 8);
|
||||
*(index_id_str + 8) = '\0';
|
||||
int index_id = (int)strtol(index_id_str, NULL, 16);
|
||||
|
||||
memcpy(arg_stat_type, hm->uri.ptr + 3 + 9, 4);
|
||||
*(arg_stat_type + sizeof(arg_stat_type) - 1) = '\0';
|
||||
|
||||
database_stat_type_d stat_type = database_get_stat_type_by_mnemonic(arg_stat_type);
|
||||
@@ -103,9 +99,9 @@ void stats_files(struct mg_connection *nc, struct mg_http_message *hm) {
|
||||
return;
|
||||
}
|
||||
|
||||
database_t *db = web_get_database(arg_index_id);
|
||||
database_t *db = web_get_database(index_id);
|
||||
if (db == NULL) {
|
||||
LOG_DEBUGF("serve.c", "Could not get database for index: %s", arg_index_id);
|
||||
LOG_DEBUGF("serve.c", "Could not get database for index: %d", index_id);
|
||||
HTTP_REPLY_NOT_FOUND
|
||||
return;
|
||||
}
|
||||
@@ -152,19 +148,19 @@ void serve_chunk_vendors_css(struct mg_connection *nc, struct mg_http_message *h
|
||||
web_serve_asset_chunk_vendors_css(nc);
|
||||
}
|
||||
|
||||
void serve_thumbnail(struct mg_connection *nc, struct mg_http_message *hm, const char *arg_index,
|
||||
const char *arg_doc_id, int arg_num) {
|
||||
void serve_thumbnail(struct mg_connection *nc, struct mg_http_message *hm, int index_id,
|
||||
int doc_id, int arg_num) {
|
||||
|
||||
database_t *db = web_get_database(arg_index);
|
||||
database_t *db = web_get_database(index_id);
|
||||
if (db == NULL) {
|
||||
LOG_DEBUGF("serve.c", "Could not get database for index: %s", arg_index);
|
||||
LOG_DEBUGF("serve.c", "Could not get database for index: %d", index_id);
|
||||
HTTP_REPLY_NOT_FOUND
|
||||
return;
|
||||
}
|
||||
|
||||
size_t data_len = 0;
|
||||
|
||||
void *data = database_read_thumbnail(db, arg_doc_id, arg_num, &data_len);
|
||||
void *data = database_read_thumbnail(db, doc_id, arg_num, &data_len);
|
||||
|
||||
if (data_len != 0) {
|
||||
web_send_headers(
|
||||
@@ -181,44 +177,29 @@ void serve_thumbnail(struct mg_connection *nc, struct mg_http_message *hm, const
|
||||
}
|
||||
|
||||
void thumbnail_with_num(struct mg_connection *nc, struct mg_http_message *hm) {
|
||||
if (hm->uri.len != SIST_INDEX_ID_LEN + SIST_DOC_ID_LEN + 2 + 5) {
|
||||
sist_id_t sid;
|
||||
|
||||
if (hm->uri.len != SIST_SID_LEN + 2 + 4 || !parse_sid(&sid, hm->uri.ptr + 3)) {
|
||||
LOG_DEBUGF("serve.c", "Invalid thumbnail path: %.*s", (int) hm->uri.len, hm->uri.ptr);
|
||||
HTTP_REPLY_NOT_FOUND
|
||||
return;
|
||||
}
|
||||
|
||||
char arg_doc_id[SIST_DOC_ID_LEN];
|
||||
char arg_index[SIST_INDEX_ID_LEN];
|
||||
char arg_num[5] = {0};
|
||||
int num = (int) strtol(hm->uri.ptr + SIST_SID_LEN + 3, NULL, 10);
|
||||
|
||||
memcpy(arg_index, hm->uri.ptr + 3, SIST_INDEX_ID_LEN);
|
||||
*(arg_index + SIST_INDEX_ID_LEN - 1) = '\0';
|
||||
memcpy(arg_doc_id, hm->uri.ptr + 3 + SIST_INDEX_ID_LEN, SIST_DOC_ID_LEN);
|
||||
*(arg_doc_id + SIST_DOC_ID_LEN - 1) = '\0';
|
||||
memcpy(arg_num, hm->uri.ptr + SIST_INDEX_ID_LEN + SIST_DOC_ID_LEN + 3, 4);
|
||||
|
||||
int num = (int) strtol(arg_num, NULL, 10);
|
||||
|
||||
serve_thumbnail(nc, hm, arg_index, arg_doc_id, num);
|
||||
serve_thumbnail(nc, hm, sid.index_id, sid.doc_id, num);
|
||||
}
|
||||
|
||||
void thumbnail(struct mg_connection *nc, struct mg_http_message *hm) {
|
||||
sist_id_t sid;
|
||||
|
||||
if (hm->uri.len != SIST_INDEX_ID_LEN + SIST_DOC_ID_LEN + 2) {
|
||||
if (hm->uri.len != 20 || !parse_sid(&sid, hm->uri.ptr + 3)) {
|
||||
LOG_DEBUGF("serve.c", "Invalid thumbnail path: %.*s", (int) hm->uri.len, hm->uri.ptr);
|
||||
HTTP_REPLY_NOT_FOUND
|
||||
return;
|
||||
}
|
||||
|
||||
char arg_doc_id[SIST_DOC_ID_LEN];
|
||||
char arg_index[SIST_INDEX_ID_LEN];
|
||||
|
||||
memcpy(arg_index, hm->uri.ptr + 3, SIST_INDEX_ID_LEN);
|
||||
*(arg_index + SIST_INDEX_ID_LEN - 1) = '\0';
|
||||
memcpy(arg_doc_id, hm->uri.ptr + 3 + SIST_INDEX_ID_LEN, SIST_DOC_ID_LEN);
|
||||
*(arg_doc_id + SIST_DOC_ID_LEN - 1) = '\0';
|
||||
|
||||
serve_thumbnail(nc, hm, arg_index, arg_doc_id, 0);
|
||||
serve_thumbnail(nc, hm, sid.index_id, sid.doc_id, 0);
|
||||
}
|
||||
|
||||
void search(struct mg_connection *nc, struct mg_http_message *hm) {
|
||||
@@ -382,7 +363,7 @@ void index_info(struct mg_connection *nc) {
|
||||
cJSON *idx_json = cJSON_CreateObject();
|
||||
cJSON_AddStringToObject(idx_json, "name", idx->desc.name);
|
||||
cJSON_AddStringToObject(idx_json, "version", idx->desc.version);
|
||||
cJSON_AddStringToObject(idx_json, "id", idx->desc.id);
|
||||
cJSON_AddNumberToObject(idx_json, "id", idx->desc.id);
|
||||
cJSON_AddStringToObject(idx_json, "rewriteUrl", idx->desc.rewrite_url);
|
||||
cJSON_AddNumberToObject(idx_json, "timestamp", (double) idx->desc.timestamp);
|
||||
cJSON_AddItemToArray(arr, idx_json);
|
||||
@@ -405,15 +386,14 @@ void index_info(struct mg_connection *nc) {
|
||||
cJSON_Delete(json);
|
||||
}
|
||||
|
||||
cJSON *get_root_document_by_id(const char *index_id, const char *doc_id) {
|
||||
cJSON *get_root_document_by_id(int index_id, int doc_id) {
|
||||
|
||||
database_t *db = web_get_database(index_id);
|
||||
if (!db) {
|
||||
return NULL;
|
||||
}
|
||||
|
||||
char next_id[SIST_DOC_ID_LEN];
|
||||
strcpy(next_id, doc_id);
|
||||
int next_id = doc_id;
|
||||
|
||||
while (TRUE) {
|
||||
cJSON *doc = database_get_document(db, next_id);
|
||||
@@ -423,38 +403,31 @@ cJSON *get_root_document_by_id(const char *index_id, const char *doc_id) {
|
||||
}
|
||||
|
||||
cJSON *parent = cJSON_GetObjectItem(doc, "parent");
|
||||
if (parent == NULL || cJSON_IsNull(parent)) {
|
||||
if (parent == NULL || !cJSON_IsNumber(parent)) {
|
||||
return doc;
|
||||
}
|
||||
|
||||
strcpy(next_id, parent->valuestring);
|
||||
cJSON_Delete(parent);
|
||||
next_id = parent->valueint;
|
||||
cJSON_Delete(doc);
|
||||
}
|
||||
}
|
||||
|
||||
void file(struct mg_connection *nc, struct mg_http_message *hm) {
|
||||
sist_id_t sid;
|
||||
|
||||
if (hm->uri.len != SIST_INDEX_ID_LEN + SIST_DOC_ID_LEN + 2) {
|
||||
if (hm->uri.len != 20 || !parse_sid(&sid, hm->uri.ptr + 3)) {
|
||||
LOG_DEBUGF("serve.c", "Invalid file path: %.*s", (int) hm->uri.len, hm->uri.ptr);
|
||||
HTTP_REPLY_NOT_FOUND
|
||||
return;
|
||||
}
|
||||
|
||||
char arg_doc_id[SIST_DOC_ID_LEN];
|
||||
char arg_index[SIST_INDEX_ID_LEN];
|
||||
|
||||
memcpy(arg_index, hm->uri.ptr + 3, SIST_INDEX_ID_LEN);
|
||||
*(arg_index + SIST_INDEX_ID_LEN - 1) = '\0';
|
||||
memcpy(arg_doc_id, hm->uri.ptr + 3 + SIST_INDEX_ID_LEN, SIST_DOC_ID_LEN);
|
||||
*(arg_doc_id + SIST_DOC_ID_LEN - 1) = '\0';
|
||||
|
||||
index_t *idx = web_get_index_by_id(arg_index);
|
||||
index_t *idx = web_get_index_by_id(sid.index_id);
|
||||
if (idx == NULL) {
|
||||
HTTP_REPLY_NOT_FOUND
|
||||
return;
|
||||
}
|
||||
|
||||
cJSON *source = get_root_document_by_id(arg_index, arg_doc_id);
|
||||
cJSON *source = get_root_document_by_id(sid.index_id, sid.doc_id);
|
||||
|
||||
if (strlen(idx->desc.rewrite_url) == 0) {
|
||||
serve_file_from_disk(source, idx, nc, hm);
|
||||
@@ -478,7 +451,6 @@ void status(struct mg_connection *nc) {
|
||||
typedef struct {
|
||||
char *name;
|
||||
int delete;
|
||||
char *doc_id;
|
||||
} tag_req_t;
|
||||
|
||||
tag_req_t *parse_tag_request(cJSON *json) {
|
||||
@@ -501,20 +473,14 @@ tag_req_t *parse_tag_request(cJSON *json) {
|
||||
return NULL;
|
||||
}
|
||||
|
||||
cJSON *arg_doc_id = cJSON_GetObjectItem(json, "doc_id");
|
||||
if (arg_doc_id == NULL || !cJSON_IsString(arg_doc_id)) {
|
||||
return NULL;
|
||||
}
|
||||
|
||||
tag_req_t *req = malloc(sizeof(tag_req_t));
|
||||
req->delete = arg_delete->valueint;
|
||||
req->name = arg_name->valuestring;
|
||||
req->doc_id = arg_doc_id->valuestring;
|
||||
|
||||
return req;
|
||||
}
|
||||
|
||||
subreq_ctx_t *elastic_delete_tag(const tag_req_t *req) {
|
||||
subreq_ctx_t *elastic_delete_tag(const char* sid, const tag_req_t *req) {
|
||||
char *buf = malloc(sizeof(char) * 8192);
|
||||
snprintf(buf, 8192,
|
||||
"{"
|
||||
@@ -529,12 +495,12 @@ subreq_ctx_t *elastic_delete_tag(const tag_req_t *req) {
|
||||
);
|
||||
|
||||
char url[4096];
|
||||
snprintf(url, sizeof(url), "%s/%s/_update/%s", WebCtx.es_url, WebCtx.es_index, req->doc_id);
|
||||
snprintf(url, sizeof(url), "%s/%s/_update/%s", WebCtx.es_url, WebCtx.es_index, sid);
|
||||
|
||||
return web_post_async(url, buf, WebCtx.es_insecure_ssl);
|
||||
}
|
||||
|
||||
subreq_ctx_t *elastic_write_tag(const tag_req_t *req) {
|
||||
subreq_ctx_t *elastic_write_tag(const char* sid, const tag_req_t *req) {
|
||||
char *buf = malloc(sizeof(char) * 8192);
|
||||
snprintf(buf, 8192,
|
||||
"{"
|
||||
@@ -549,21 +515,18 @@ subreq_ctx_t *elastic_write_tag(const tag_req_t *req) {
|
||||
);
|
||||
|
||||
char url[4096];
|
||||
snprintf(url, sizeof(url), "%s/%s/_update/%s", WebCtx.es_url, WebCtx.es_index, req->doc_id);
|
||||
snprintf(url, sizeof(url), "%s/%s/_update/%s", WebCtx.es_url, WebCtx.es_index, sid);
|
||||
return web_post_async(url, buf, WebCtx.es_insecure_ssl);
|
||||
}
|
||||
|
||||
void tag(struct mg_connection *nc, struct mg_http_message *hm) {
|
||||
if (hm->uri.len != SIST_INDEX_ID_LEN + 4) {
|
||||
sist_id_t sid;
|
||||
if (hm->uri.len != 22 || !parse_sid(&sid, hm->uri.ptr + 5)) {
|
||||
LOG_DEBUGF("serve.c", "Invalid tag path: %.*s", (int) hm->uri.len, hm->uri.ptr);
|
||||
HTTP_REPLY_NOT_FOUND
|
||||
return;
|
||||
}
|
||||
|
||||
char arg_index[SIST_INDEX_ID_LEN];
|
||||
memcpy(arg_index, hm->uri.ptr + 5, SIST_INDEX_ID_LEN);
|
||||
*(arg_index + SIST_INDEX_ID_LEN - 1) = '\0';
|
||||
|
||||
char *body = malloc(hm->body.len + 1);
|
||||
memcpy(body, hm->body.ptr, hm->body.len);
|
||||
*(body + hm->body.len) = '\0';
|
||||
@@ -575,36 +538,36 @@ void tag(struct mg_connection *nc, struct mg_http_message *hm) {
|
||||
return;
|
||||
}
|
||||
|
||||
database_t *db = web_get_database(arg_index);
|
||||
database_t *db = web_get_database(sid.index_id);
|
||||
if (db == NULL) {
|
||||
LOG_DEBUGF("serve.c", "Could not get database for index: %s", arg_index);
|
||||
LOG_DEBUGF("serve.c", "Could not get database for index: %d", sid.index_id);
|
||||
HTTP_REPLY_NOT_FOUND
|
||||
return;
|
||||
}
|
||||
|
||||
tag_req_t *req = parse_tag_request(json);
|
||||
if (req == NULL) {
|
||||
LOG_DEBUGF("serve.c", "Could not parse tag request", arg_index);
|
||||
LOG_DEBUG("serve.c", "Could not parse tag request");
|
||||
cJSON_Delete(json);
|
||||
HTTP_REPLY_BAD_REQUEST
|
||||
return;
|
||||
}
|
||||
|
||||
if (req->delete) {
|
||||
database_delete_tag(db, req->doc_id, req->name);
|
||||
database_delete_tag(db, sid.doc_id, req->name);
|
||||
if (WebCtx.search_backend == SQLITE_SEARCH_BACKEND) {
|
||||
database_delete_tag(WebCtx.search_db, req->doc_id, req->name);
|
||||
database_delete_tag(WebCtx.search_db, sid.sid_int64, req->name);
|
||||
HTTP_REPLY_OK
|
||||
} else {
|
||||
nc->fn_data = elastic_delete_tag(req);
|
||||
nc->fn_data = elastic_delete_tag(sid.sid_str, req);
|
||||
}
|
||||
} else {
|
||||
database_write_tag(db, req->doc_id, req->name);
|
||||
database_write_tag(db, sid.doc_id, req->name);
|
||||
if (WebCtx.search_backend == SQLITE_SEARCH_BACKEND) {
|
||||
database_write_tag(WebCtx.search_db, req->doc_id, req->name);
|
||||
database_fts_write_tag(WebCtx.search_db, sid.sid_int64, req->name);
|
||||
HTTP_REPLY_OK
|
||||
} else {
|
||||
nc->fn_data = elastic_write_tag(req);
|
||||
nc->fn_data = elastic_write_tag(sid.sid_str, req);
|
||||
}
|
||||
}
|
||||
|
||||
@@ -739,11 +702,11 @@ static void ev_router(struct mg_connection *nc, int ev, void *ev_data, UNUSED(vo
|
||||
|
||||
if (mg_http_match_uri(hm, "/status")) {
|
||||
status(nc);
|
||||
} else if (mg_http_match_uri(hm, "/f/*/*")) {
|
||||
} else if (mg_http_match_uri(hm, "/f/*")) {
|
||||
file(nc, hm);
|
||||
} else if (mg_http_match_uri(hm, "/t/*/*/*")) {
|
||||
thumbnail_with_num(nc, hm);
|
||||
} else if (mg_http_match_uri(hm, "/t/*/*")) {
|
||||
thumbnail_with_num(nc, hm);
|
||||
} else if (mg_http_match_uri(hm, "/t/*")) {
|
||||
thumbnail(nc, hm);
|
||||
} else if (mg_http_match_uri(hm, "/s/*/*")) {
|
||||
stats_files(nc, hm);
|
||||
@@ -752,7 +715,7 @@ static void ev_router(struct mg_connection *nc, int ev, void *ev_data, UNUSED(vo
|
||||
return;
|
||||
}
|
||||
tag(nc, hm);
|
||||
} else if (mg_http_match_uri(hm, "/e/*/*/*")) {
|
||||
} else if (mg_http_match_uri(hm, "/e/*/*")) {
|
||||
get_embedding(nc, hm);
|
||||
return;
|
||||
} else {
|
||||
|
||||
@@ -3,7 +3,7 @@
|
||||
#include "src/web/web_util.h"
|
||||
|
||||
typedef struct {
|
||||
char *index_id;
|
||||
int index_id;
|
||||
char *prefix;
|
||||
int min_depth;
|
||||
int max_depth;
|
||||
@@ -23,7 +23,7 @@ typedef struct {
|
||||
double date_min;
|
||||
double date_max;
|
||||
int page_size;
|
||||
char **index_ids;
|
||||
int *index_ids;
|
||||
char **mime_types;
|
||||
char **tags;
|
||||
int sort_asc;
|
||||
@@ -108,7 +108,7 @@ static json_value get_json_bool(cJSON *object, const char *name) {
|
||||
return (json_value) {item, FALSE};
|
||||
}
|
||||
|
||||
static json_value get_json_float_array(cJSON *object, const char *name) {
|
||||
static json_value get_json_number_array(cJSON *object, const char *name) {
|
||||
cJSON *item = cJSON_GetObjectItem(object, name);
|
||||
if (item == NULL || cJSON_IsNull(item)) {
|
||||
return (json_value) {NULL, FALSE};
|
||||
@@ -147,7 +147,6 @@ static json_value get_json_array(cJSON *object, const char *name) {
|
||||
}
|
||||
|
||||
char **json_array_to_c_array(cJSON *json) {
|
||||
|
||||
cJSON *element;
|
||||
char **arr = calloc(cJSON_GetArraySize(json) + 1, sizeof(char *));
|
||||
int i = 0;
|
||||
@@ -158,6 +157,17 @@ char **json_array_to_c_array(cJSON *json) {
|
||||
return arr;
|
||||
}
|
||||
|
||||
int *json_number_array_to_c_array(cJSON *json) {
|
||||
cJSON *element;
|
||||
int *arr = calloc(cJSON_GetArraySize(json) + 1, sizeof(int));
|
||||
int i = 0;
|
||||
cJSON_ArrayForEach(element, json) {
|
||||
arr[i++] = (int) element->valuedouble;
|
||||
}
|
||||
|
||||
return arr;
|
||||
}
|
||||
|
||||
#define DEFAULT_HIGHLIGHT_CONTEXT_SIZE 20
|
||||
|
||||
fts_search_req_t *get_search_req(struct mg_http_message *hm) {
|
||||
@@ -184,11 +194,11 @@ fts_search_req_t *get_search_req(struct mg_http_message *hm) {
|
||||
(req_seed = get_json_number(json, "seed")).invalid ||
|
||||
(req_fetch_aggregations = get_json_bool(json, "fetchAggregations")).invalid ||
|
||||
(req_sort_asc = get_json_bool(json, "sortAsc")).invalid ||
|
||||
(req_index_ids = get_json_array(json, "indexIds")).invalid ||
|
||||
(req_index_ids = get_json_number_array(json, "indexIds")).invalid ||
|
||||
(req_mime_types = get_json_array(json, "mimeTypes")).invalid ||
|
||||
(req_highlight = get_json_bool(json, "highlight")).invalid ||
|
||||
(req_highlight_context_size = get_json_number(json, "highlightContextSize")).invalid ||
|
||||
(req_embedding = get_json_float_array(json, "embedding")).invalid ||
|
||||
(req_embedding = get_json_number_array(json, "embedding")).invalid ||
|
||||
(req_model = get_json_number(json, "model")).invalid ||
|
||||
(req_tags = get_json_array(json, "tags")).invalid) {
|
||||
cJSON_Delete(json);
|
||||
@@ -251,7 +261,7 @@ fts_search_req_t *get_search_req(struct mg_http_message *hm) {
|
||||
req->date_max = req_date_max.val ? req_date_max.val->valuedouble : 0;
|
||||
req->page_size = (int) req_page_size.val->valuedouble;
|
||||
req->sort_asc = req_sort_asc.val ? req_sort_asc.val->valueint : TRUE;
|
||||
req->index_ids = req_index_ids.val ? json_array_to_c_array(req_index_ids.val) : NULL;
|
||||
req->index_ids = req_index_ids.val ? json_number_array_to_c_array(req_index_ids.val) : NULL;
|
||||
req->after = req_after.val ? json_array_to_c_array(req_after.val) : NULL;
|
||||
req->mime_types = req_mime_types.val ? json_array_to_c_array(req_mime_types.val) : NULL;
|
||||
req->tags = req_tags.val ? json_array_to_c_array(req_tags.val) : NULL;
|
||||
@@ -282,7 +292,9 @@ void destroy_search_req(fts_search_req_t *req) {
|
||||
free(req->query);
|
||||
free(req->path);
|
||||
|
||||
destroy_array(req->index_ids);
|
||||
if (req->index_ids) {
|
||||
free(req->index_ids);
|
||||
}
|
||||
destroy_array(req->mime_types);
|
||||
destroy_array(req->tags);
|
||||
|
||||
@@ -303,7 +315,7 @@ fts_search_paths_req_t *get_search_paths_req(struct mg_http_message *hm) {
|
||||
json_value req_index_id, req_min_depth, req_max_depth, req_prefix;
|
||||
|
||||
if (!cJSON_IsObject(json) ||
|
||||
(req_index_id = get_json_string(json, "indexId")).invalid ||
|
||||
(req_index_id = get_json_number(json, "indexId")).invalid ||
|
||||
(req_prefix = get_json_string(json, "prefix")).invalid ||
|
||||
(req_min_depth = get_json_number(json, "minDepth")).val == NULL ||
|
||||
(req_max_depth = get_json_number(json, "maxDepth")).val == NULL) {
|
||||
@@ -313,19 +325,16 @@ fts_search_paths_req_t *get_search_paths_req(struct mg_http_message *hm) {
|
||||
|
||||
fts_search_paths_req_t *req = malloc(sizeof(fts_search_paths_req_t));
|
||||
|
||||
req->index_id = req_index_id.val ? strdup(req_index_id.val->valuestring) : NULL;
|
||||
req->index_id = req_index_id.val ? req_index_id.val->valueint : 0;
|
||||
req->prefix = req_prefix.val ? strdup(req_prefix.val->valuestring) : NULL;
|
||||
req->min_depth = req_min_depth.val->valueint;
|
||||
req->max_depth = req_max_depth.val->valueint;
|
||||
req->prefix = req_prefix.val ? strdup(req_prefix.val->valuestring) : NULL;
|
||||
|
||||
cJSON_Delete(json);
|
||||
return req;
|
||||
}
|
||||
|
||||
void destroy_search_paths_req(fts_search_paths_req_t *req) {
|
||||
if (req->index_id) {
|
||||
free(req->index_id);
|
||||
}
|
||||
if (req->prefix) {
|
||||
free(req->prefix);
|
||||
}
|
||||
@@ -398,11 +407,15 @@ void fts_search(struct mg_connection *nc, struct mg_http_message *hm) {
|
||||
|
||||
void fts_get_document(struct mg_connection *nc, struct mg_http_message *hm) {
|
||||
|
||||
char doc_id[SIST_DOC_ID_LEN];
|
||||
memcpy(doc_id, hm->uri.ptr + 7, SIST_INDEX_ID_LEN);
|
||||
*(doc_id + SIST_INDEX_ID_LEN - 1) = '\0';
|
||||
sist_id_t sid;
|
||||
|
||||
cJSON *json = database_fts_get_document(WebCtx.search_db, doc_id);
|
||||
if (hm->uri.len != 24 || !parse_sid(&sid, hm->uri.ptr + 7)) {
|
||||
LOG_DEBUGF("serve.c", "Invalid /fts/d/ path: %.*s", (int) hm->uri.len, hm->uri.ptr);
|
||||
HTTP_REPLY_NOT_FOUND
|
||||
return;
|
||||
}
|
||||
|
||||
cJSON *json = database_fts_get_document(WebCtx.search_db, sid.sid_int64);
|
||||
|
||||
if (!json) {
|
||||
HTTP_REPLY_NOT_FOUND
|
||||
|
||||
@@ -32,16 +32,16 @@ void web_serve_asset_chunk_vendors_css(struct mg_connection *nc) {
|
||||
mg_send(nc, chunk_vendors_css, sizeof(chunk_vendors_css));
|
||||
}
|
||||
|
||||
index_t *web_get_index_by_id(const char *index_id) {
|
||||
index_t *web_get_index_by_id(int index_id) {
|
||||
for (int i = WebCtx.index_count; i >= 0; i--) {
|
||||
if (strncmp(index_id, WebCtx.indices[i].desc.id, SIST_INDEX_ID_LEN) == 0) {
|
||||
if (index_id == WebCtx.indices[i].desc.id) {
|
||||
return &WebCtx.indices[i];
|
||||
}
|
||||
}
|
||||
return NULL;
|
||||
}
|
||||
|
||||
database_t *web_get_database(const char *index_id) {
|
||||
database_t *web_get_database(int index_id) {
|
||||
index_t *idx = web_get_index_by_id(index_id);
|
||||
if (idx != NULL) {
|
||||
return idx->db;
|
||||
|
||||
@@ -10,9 +10,9 @@
|
||||
// See https://web.dev/coop-coep/
|
||||
#define HTTP_CROSS_ORIGIN_HEADERS "Cross-Origin-Embedder-Policy: require-corp\r\nCross-Origin-Opener-Policy: same-origin\r\n"
|
||||
|
||||
index_t *web_get_index_by_id(const char *index_id);
|
||||
index_t *web_get_index_by_id(int index_id);
|
||||
|
||||
database_t *web_get_database(const char *index_id);
|
||||
database_t *web_get_database(int index_id);
|
||||
|
||||
__always_inline
|
||||
static char *web_address_to_string(struct mg_addr *addr) {
|
||||
|
||||
Reference in New Issue
Block a user