mirror of
https://github.com/simon987/sist2.git
synced 2025-04-10 14:06:45 +00:00
wip
This commit is contained in:
parent
35cfd3b3b1
commit
1cfceba518
@ -58,7 +58,7 @@ add_executable(sist2
|
||||
|
||||
src/auth0/auth0_c_api.h src/auth0/auth0_c_api.cpp
|
||||
|
||||
src/database/database_stats.c src/database/database_schema.c)
|
||||
src/database/database_stats.c src/database/database_schema.c src/database/database_fts.c)
|
||||
set_target_properties(sist2 PROPERTIES LINKER_LANGUAGE C)
|
||||
|
||||
target_link_directories(sist2 PRIVATE BEFORE ${_VCPKG_INSTALLED_DIR}/${VCPKG_TARGET_TRIPLET}/lib/)
|
||||
|
@ -185,7 +185,7 @@ docker run --rm --entrypoint cat my-sist2-image /root/sist2 > sist2-x64-linux
|
||||
3. Install vcpkg dependencies
|
||||
|
||||
```bash
|
||||
vcpkg install curl[core,openssl] sqlite3 cpp-jwt pcre cjson brotli libarchive[core,bzip2,libxml2,lz4,lzma,lzo] pthread tesseract libxml2 libmupdf gtest mongoose libmagic libraw gumbo ffmpeg[core,avcodec,avformat,swscale,swresample]
|
||||
vcpkg install curl[core,openssl] sqlite3[core,fts5] cpp-jwt pcre cjson brotli libarchive[core,bzip2,libxml2,lz4,lzma,lzo] pthread tesseract libxml2 libmupdf gtest mongoose libmagic libraw gumbo ffmpeg[core,avcodec,avformat,swscale,swresample]
|
||||
```
|
||||
|
||||
4. Build
|
||||
|
32
src/cli.c
32
src/cli.c
@ -410,6 +410,33 @@ int index_args_validate(index_args_t *args, int argc, const char **argv) {
|
||||
return 0;
|
||||
}
|
||||
|
||||
int sqlite_index_args_validate(sqlite_index_args_t *args, int argc, const char **argv) {
|
||||
|
||||
LogCtx.verbose = 1;
|
||||
|
||||
if (argc < 2) {
|
||||
fprintf(stderr, "Required positional argument: PATH.\n");
|
||||
return 1;
|
||||
}
|
||||
|
||||
char *index_path = abspath(argv[1]);
|
||||
if (index_path == NULL) {
|
||||
LOG_FATALF("cli.c", "Invalid PATH argument. File not found: %s", argv[1]);
|
||||
} else {
|
||||
args->index_path = index_path;
|
||||
}
|
||||
|
||||
if (args->search_index_path == NULL) {
|
||||
LOG_FATAL("cli.c", "Missing required argument --search-index");
|
||||
}
|
||||
|
||||
LOG_DEBUGF("cli.c", "arg index_path=%s", args->index_path);
|
||||
LOG_DEBUGF("cli.c", "arg search_index_path=%s", args->search_index_path);
|
||||
LOG_DEBUGF("cli.c", "arg optimize_index=%d", args->optimize_database);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
int web_args_validate(web_args_t *args, int argc, const char **argv) {
|
||||
|
||||
LogCtx.verbose = 1;
|
||||
@ -554,6 +581,11 @@ index_args_t *index_args_create() {
|
||||
return args;
|
||||
}
|
||||
|
||||
sqlite_index_args_t *sqlite_index_args_create() {
|
||||
sqlite_index_args_t *args = calloc(sizeof(sqlite_index_args_t), 1);
|
||||
return args;
|
||||
}
|
||||
|
||||
web_args_t *web_args_create() {
|
||||
web_args_t *args = calloc(sizeof(web_args_t), 1);
|
||||
return args;
|
||||
|
11
src/cli.h
11
src/cli.h
@ -66,6 +66,12 @@ typedef struct index_args {
|
||||
int incremental;
|
||||
} index_args_t;
|
||||
|
||||
typedef struct {
|
||||
char *index_path;
|
||||
char *search_index_path;
|
||||
int optimize_database;
|
||||
} sqlite_index_args_t;
|
||||
|
||||
typedef struct web_args {
|
||||
char *es_url;
|
||||
char *es_index;
|
||||
@ -102,6 +108,8 @@ typedef struct exec_args {
|
||||
|
||||
index_args_t *index_args_create();
|
||||
|
||||
sqlite_index_args_t *sqlite_index_args_create();
|
||||
|
||||
void index_args_destroy(index_args_t *args);
|
||||
|
||||
web_args_t *web_args_create();
|
||||
@ -110,6 +118,8 @@ void web_args_destroy(web_args_t *args);
|
||||
|
||||
int index_args_validate(index_args_t *args, int argc, const char **argv);
|
||||
|
||||
int sqlite_index_args_validate(sqlite_index_args_t *args, int argc, const char **argv);
|
||||
|
||||
int web_args_validate(web_args_t *args, int argc, const char **argv);
|
||||
|
||||
exec_args_t *exec_args_create();
|
||||
@ -118,4 +128,5 @@ void exec_args_destroy(exec_args_t *args);
|
||||
|
||||
int exec_args_validate(exec_args_t *args, int argc, const char **argv);
|
||||
|
||||
|
||||
#endif
|
||||
|
@ -74,6 +74,8 @@ void database_initialize(database_t *db) {
|
||||
CRASH_IF_NOT_SQLITE_OK(sqlite3_exec(db->db, IndexDatabaseSchema, NULL, NULL, NULL));
|
||||
} else if (db->type == IPC_CONSUMER_DATABASE || db->type == IPC_PRODUCER_DATABASE) {
|
||||
CRASH_IF_NOT_SQLITE_OK(sqlite3_exec(db->db, IpcDatabaseSchema, NULL, NULL, NULL));
|
||||
} else if (db->type == FTS_DATABASE) {
|
||||
CRASH_IF_NOT_SQLITE_OK(sqlite3_exec(db->db, FtsDatabaseSchema, NULL, NULL, NULL));
|
||||
}
|
||||
|
||||
sqlite3_close(db->db);
|
||||
@ -479,28 +481,6 @@ void database_write_thumbnail(database_t *db, const char *id, int num, void *dat
|
||||
}
|
||||
|
||||
|
||||
//void database_create_fts_index(database_t *db, database_t *fts_db) {
|
||||
// // In a separate file,
|
||||
//
|
||||
// // use database_initialize() to create FTS schema
|
||||
// // if --force-reset, then truncate the tables first
|
||||
//
|
||||
// /*
|
||||
// * create/append fts table
|
||||
// *
|
||||
// * create/append scalar index table with
|
||||
// * id,index,size,mtime,mime
|
||||
// *
|
||||
// * create/append path index table with
|
||||
// * index,path,depth
|
||||
// *
|
||||
// * content table is a view with SELECT UNION for all attached tables
|
||||
// * random_seed column
|
||||
// */
|
||||
//
|
||||
// // INSERT INTO ft(ft) VALUES('optimize');
|
||||
//}
|
||||
|
||||
job_t *database_get_work(database_t *db, job_type_t job_type) {
|
||||
job_t *job;
|
||||
|
||||
|
@ -10,6 +10,7 @@ typedef struct index_descriptor index_descriptor_t;
|
||||
|
||||
extern const char *IpcDatabaseSchema;
|
||||
extern const char *IndexDatabaseSchema;
|
||||
extern const char *FtsDatabaseSchema;
|
||||
|
||||
typedef enum {
|
||||
INDEX_DATABASE,
|
||||
@ -86,8 +87,6 @@ typedef struct {
|
||||
long size;
|
||||
} treemap_row_t;
|
||||
|
||||
static treemap_row_t null_treemap_row = {0, 0, 0};
|
||||
|
||||
|
||||
database_t *database_create(const char *filename, database_type_t type);
|
||||
|
||||
@ -116,7 +115,7 @@ cJSON *database_document_iter(database_iterator_t *);
|
||||
|
||||
database_iterator_t *database_create_delete_list_iterator(database_t *db);
|
||||
|
||||
char * database_delete_list_iter(database_iterator_t *iter);
|
||||
char *database_delete_list_iter(database_iterator_t *iter);
|
||||
|
||||
#define database_delete_list_iter_foreach(element, iter) \
|
||||
for (char *(element) = database_delete_list_iter(iter); (element) != NULL; (element) = database_delete_list_iter(iter))
|
||||
@ -160,8 +159,14 @@ cJSON *database_get_stats(database_t *db, database_stat_type_d type);
|
||||
#define CRASH_IF_NOT_SQLITE_OK(x) do { \
|
||||
int return_value = x; \
|
||||
if (return_value != SQLITE_OK) { \
|
||||
LOG_FATALF("database.c", "Sqlite error @ database.c:%d : (%d) %s", __LINE__, return_value, sqlite3_errmsg(db->db)); \
|
||||
LOG_FATALF("database.c", "Sqlite error @ %s:%d : (%d) %s", __BASE_FILE__, __LINE__, return_value, sqlite3_errmsg(db->db)); \
|
||||
} \
|
||||
} while (0)
|
||||
|
||||
void database_fts_attach(database_t *db, const char *fts_database_path);
|
||||
|
||||
void database_fts_index(database_t *db);
|
||||
|
||||
void database_fts_optimize(database_t *db);
|
||||
|
||||
#endif //SIST2_DATABASE_H
|
88
src/database/database_fts.c
Normal file
88
src/database/database_fts.c
Normal file
@ -0,0 +1,88 @@
|
||||
#include "database.h"
|
||||
#include "src/ctx.h"
|
||||
|
||||
void database_fts_attach(database_t *db, const char *fts_database_path) {
|
||||
|
||||
LOG_DEBUGF("database_fts.c", "Attaching to %s", fts_database_path);
|
||||
|
||||
sqlite3_stmt *stmt;
|
||||
CRASH_IF_NOT_SQLITE_OK(sqlite3_prepare_v2(
|
||||
db->db, "ATTACH DATABASE ? AS fts"
|
||||
"", -1, &stmt, NULL));
|
||||
|
||||
sqlite3_bind_text(stmt, 1, fts_database_path, -1, SQLITE_STATIC);
|
||||
|
||||
CRASH_IF_STMT_FAIL(sqlite3_step(stmt));
|
||||
sqlite3_finalize(stmt);
|
||||
}
|
||||
|
||||
void database_fts_index(database_t *db) {
|
||||
|
||||
LOG_INFO("database_fts.c", "Creating content table.");
|
||||
|
||||
CRASH_IF_NOT_SQLITE_OK(sqlite3_exec(
|
||||
db->db,
|
||||
"WITH docs AS (SELECT document.id as id,\n"
|
||||
" (SELECT id FROM descriptor) as index_id,\n"
|
||||
" size,\n"
|
||||
" document.json_data ->> 'path' as path,\n"
|
||||
" length(document.json_data->>'path') - length(REPLACE(document.json_data->>'path', '/', '')) as path_depth,\n"
|
||||
" document.json_data ->> 'mime' as mime,\n"
|
||||
" mtime,\n"
|
||||
" CASE\n"
|
||||
" WHEN sc.json_data IS NULL THEN CASE\n"
|
||||
" WHEN t.tag IS NULL THEN json_set(\n"
|
||||
" document.json_data, '$._id',\n"
|
||||
" document.id, '$.size',\n"
|
||||
" document.size, '$.mtime',\n"
|
||||
" document.mtime)\n"
|
||||
" ELSE json_set(document.json_data, '$._id',\n"
|
||||
" document.id, '$.size',\n"
|
||||
" document.size, '$.mtime',\n"
|
||||
" document.mtime, '$.tag',\n"
|
||||
" json_group_array(t.tag)) END\n"
|
||||
" ELSE CASE\n"
|
||||
" WHEN t.tag IS NULL THEN json_patch(\n"
|
||||
" json_set(document.json_data, '$._id', document.id, '$.size',\n"
|
||||
" document.size, '$.mtime', document.mtime),\n"
|
||||
" sc.json_data)\n"
|
||||
" ELSE json_set(json_patch(document.json_data, sc.json_data), '$._id',\n"
|
||||
" document.id, '$.size', document.size, '$.mtime',\n"
|
||||
" document.mtime, '$.tag',\n"
|
||||
" json_group_array(t.tag)) END END as json_data\n"
|
||||
" FROM document\n"
|
||||
" LEFT JOIN document_sidecar sc ON document.id = sc.id\n"
|
||||
" LEFT JOIN tag t ON document.id = t.id\n"
|
||||
" GROUP BY document.id)\n"
|
||||
"INSERT\n"
|
||||
"INTO fts.document_index (id, index_id, size, path, path_depth, mtime, mime, json_data)\n"
|
||||
"SELECT *\n"
|
||||
"FROM docs\n"
|
||||
"WHERE true\n"
|
||||
"on conflict (id, index_id) do update set size=excluded.size,\n"
|
||||
" mtime=excluded.mtime,\n"
|
||||
" json_data=excluded.json_data;",
|
||||
NULL, NULL, NULL));
|
||||
|
||||
CRASH_IF_NOT_SQLITE_OK(sqlite3_exec(
|
||||
db->db,
|
||||
"DELETE\n"
|
||||
"FROM fts.document_index\n"
|
||||
"WHERE id IN (SELECT id FROM delete_list)\n"
|
||||
" AND index_id = (SELECT id FROM descriptor);",
|
||||
NULL, NULL, NULL
|
||||
));
|
||||
}
|
||||
|
||||
void database_fts_optimize(database_t *db) {
|
||||
LOG_INFO("database_fts.c", "Optimizing search index.");
|
||||
|
||||
CRASH_IF_NOT_SQLITE_OK(sqlite3_exec(
|
||||
db->db,
|
||||
"INSERT INTO search(search) VALUES('optimize');",
|
||||
NULL, NULL, NULL));
|
||||
LOG_DEBUG("database_fts.c", "Optimized fts5 table.");
|
||||
|
||||
CRASH_IF_NOT_SQLITE_OK(sqlite3_exec(db->db, "PRAGMA fts.optimize;", NULL, NULL, NULL));
|
||||
LOG_DEBUG("database_fts.c", "optimized indices.");
|
||||
}
|
@ -1,3 +1,45 @@
|
||||
const char *FtsDatabaseSchema =
|
||||
"CREATE TABLE IF NOT EXISTS document_index ("
|
||||
" id TEXT NOT NULL,"
|
||||
" index_id TEXT NOT NULL,"
|
||||
" size INTEGER NOT NULL,"
|
||||
" path TEXT NOT NULL,"
|
||||
" path_depth INT NOT NULL,"
|
||||
" mtime INTEGER NOT NULL,"
|
||||
" mime TEXT NOT NULL,"
|
||||
" json_data TEXT NOT NULL,"
|
||||
" PRIMARY KEY (id, index_id)"
|
||||
");"
|
||||
""
|
||||
"CREATE VIEW IF NOT EXISTS document_view (rowid, name, content)"
|
||||
" AS"
|
||||
" SELECT rowid,"
|
||||
" json_data->>'name',"
|
||||
" json_data->>'content'"
|
||||
" FROM document_index;"
|
||||
""
|
||||
"CREATE INDEX IF NOT EXISTS document_index_size_idx ON document_index (size);"
|
||||
"CREATE INDEX IF NOT EXISTS document_index_mtime_idx ON document_index (mtime);"
|
||||
"CREATE INDEX IF NOT EXISTS document_index_mime_idx ON document_index (mime);"
|
||||
"CREATE INDEX IF NOT EXISTS document_index_path_idx ON document_index (path);"
|
||||
"CREATE INDEX IF NOT EXISTS document_index_path_depth_idx ON document_index (path_depth);"
|
||||
""
|
||||
"CREATE VIRTUAL TABLE IF NOT EXISTS search USING fts5 ("
|
||||
" name,"
|
||||
" content,"
|
||||
" content='document_view'"
|
||||
");"
|
||||
""
|
||||
"CREATE TRIGGER IF NOT EXISTS on_insert AFTER INSERT ON document_index BEGIN"
|
||||
" INSERT INTO search(rowid, name, content) VALUES (new.rowid, new.json_data->>'name', new.json_data->>'content');"
|
||||
"END;"
|
||||
"CREATE TRIGGER IF NOT EXISTS on_delete AFTER DELETE ON document_index BEGIN"
|
||||
" INSERT INTO search(search, name, content) VALUES('delete', old.json_data->>'name', old.json_data->>'content');"
|
||||
"END;"
|
||||
"CREATE TRIGGER IF NOT EXISTS on_update AFTER UPDATE ON document_index BEGIN"
|
||||
" INSERT INTO search(search, rowid, name, content) VALUES('delete', old.rowid, old.json_data->>'name', old.json_data->>'content');"
|
||||
" INSERT INTO search(rowid, name, content) VALUES (new.rowid, new.json_data->>'name', new.json_data->>'content');"
|
||||
"END;";
|
||||
|
||||
const char *IpcDatabaseSchema =
|
||||
"CREATE TABLE parse_job ("
|
||||
|
38
src/main.c
38
src/main.c
@ -22,6 +22,7 @@
|
||||
static const char *const usage[] = {
|
||||
"sist2 scan [OPTION]... PATH",
|
||||
"sist2 index [OPTION]... INDEX",
|
||||
"sist2 sqlite-index [OPTION]... INDEX",
|
||||
"sist2 web [OPTION]... INDEX...",
|
||||
"sist2 exec-script [OPTION]... INDEX",
|
||||
NULL,
|
||||
@ -351,6 +352,23 @@ void sist2_index(index_args_t *args) {
|
||||
free(desc);
|
||||
}
|
||||
|
||||
void sist2_sqlite_index(sqlite_index_args_t *args) {
|
||||
database_t *db = database_create(args->index_path, INDEX_DATABASE);
|
||||
database_open(db);
|
||||
|
||||
database_t *search_db = database_create(args->search_index_path, FTS_DATABASE);
|
||||
database_initialize(search_db);
|
||||
|
||||
database_fts_attach(db, args->search_index_path);
|
||||
|
||||
database_fts_index(db);
|
||||
if (args->optimize_database) {
|
||||
database_fts_optimize(db);
|
||||
}
|
||||
|
||||
database_close(db, FALSE);
|
||||
}
|
||||
|
||||
void sist2_exec_script(exec_args_t *args) {
|
||||
LogCtx.verbose = TRUE;
|
||||
|
||||
@ -436,6 +454,7 @@ int main(int argc, const char *argv[]) {
|
||||
index_args_t *index_args = index_args_create();
|
||||
web_args_t *web_args = web_args_create();
|
||||
exec_args_t *exec_args = exec_args_create();
|
||||
sqlite_index_args_t *sqlite_index_args = sqlite_index_args_create();
|
||||
|
||||
int arg_version = 0;
|
||||
|
||||
@ -445,6 +464,7 @@ int main(int argc, const char *argv[]) {
|
||||
char *common_script_path = NULL;
|
||||
int common_async_script = 0;
|
||||
int common_threads = 0;
|
||||
int common_optimize_database = 0;
|
||||
|
||||
struct argparse_option options[] = {
|
||||
OPT_HELP(),
|
||||
@ -471,7 +491,7 @@ int main(int argc, const char *argv[]) {
|
||||
OPT_STRING('o', "output", &scan_args->output, "Output index file path. DEFAULT: index.sist2"),
|
||||
OPT_BOOLEAN(0, "incremental", &scan_args->incremental,
|
||||
"If the output file path exists, only scan new or modified files."),
|
||||
OPT_BOOLEAN(0, "optimize-index", &scan_args->optimize_database,
|
||||
OPT_BOOLEAN(0, "optimize-index", &common_optimize_database,
|
||||
"Defragment index file after scan to reduce its file size."),
|
||||
OPT_STRING(0, "rewrite-url", &scan_args->rewrite_url, "Serve files from this url instead of from disk."),
|
||||
OPT_STRING(0, "name", &scan_args->name, "Index display name. DEFAULT: index"),
|
||||
@ -520,6 +540,11 @@ int main(int argc, const char *argv[]) {
|
||||
OPT_INTEGER(0, "batch-size", &index_args->batch_size, "Index batch size. DEFAULT: 70"),
|
||||
OPT_BOOLEAN('f', "force-reset", &index_args->force_reset, "Reset Elasticsearch mappings and settings."),
|
||||
|
||||
OPT_GROUP("sqlite-index options"),
|
||||
OPT_STRING(0, "search-index", &sqlite_index_args->search_index_path, "Path to search index. Will be created if it does not exist yet."),
|
||||
OPT_BOOLEAN(0, "optimize-index", &common_optimize_database,
|
||||
"Optimize search index file for smaller size and faster queries."),
|
||||
|
||||
OPT_GROUP("Web options"),
|
||||
OPT_STRING(0, "es-url", &common_es_url, "Elasticsearch url. DEFAULT: http://localhost:9200"),
|
||||
OPT_BOOLEAN(0, "es-insecure-ssl", &common_es_insecure_ssl,
|
||||
@ -586,6 +611,9 @@ int main(int argc, const char *argv[]) {
|
||||
exec_args->async_script = common_async_script;
|
||||
index_args->async_script = common_async_script;
|
||||
|
||||
scan_args->optimize_database = common_optimize_database;
|
||||
sqlite_index_args->optimize_database = common_optimize_database;
|
||||
|
||||
if (argc == 0) {
|
||||
argparse_usage(&argparse);
|
||||
goto end;
|
||||
@ -605,6 +633,14 @@ int main(int argc, const char *argv[]) {
|
||||
}
|
||||
sist2_index(index_args);
|
||||
|
||||
} else if (strcmp(argv[0], "sqlite-index") == 0) {
|
||||
|
||||
int err = sqlite_index_args_validate(sqlite_index_args, argc, argv);
|
||||
if (err != 0) {
|
||||
goto end;
|
||||
}
|
||||
sist2_sqlite_index(sqlite_index_args);
|
||||
|
||||
} else if (strcmp(argv[0], "web") == 0) {
|
||||
|
||||
int err = web_args_validate(web_args, argc, argv);
|
||||
|
Loading…
x
Reference in New Issue
Block a user