use sqlite to save index, major thread pool refactor

This commit is contained in:
2023-04-03 21:39:50 -04:00
parent ca973d63a4
commit fc36f33d52
62 changed files with 3630 additions and 4673 deletions

View File

@@ -0,0 +1,159 @@
#include "database.h"
#include "src/sist.h"
#include "src/ctx.h"
#define TREEMAP_MINIMUM_MERGES_TO_CONTINUE (100)
#define SIZE_BUCKET (long)(5 * 1000 * 1000)
#define DATE_BUCKET (long)(2629800) // ~30 days
database_iterator_t *database_create_treemap_iterator(database_t *db, long threshold) {
sqlite3_stmt *stmt;
sqlite3_prepare_v2(db->db,
"SELECT path, path_parent(path), size FROM tm"
" WHERE path_parent(path) IN (SELECT path FROM tm)"
" AND size<?",
-1, &stmt, NULL);
sqlite3_bind_int64(stmt, 1, threshold);
database_iterator_t *iter = malloc(sizeof(database_iterator_t));
iter->stmt = stmt;
iter->db = db;
return iter;
}
treemap_row_t database_treemap_iter(database_iterator_t *iter) {
if (iter->stmt == NULL) {
LOG_FATAL("database.c", "FIXME: database_treemap_iter() called after iteration stopped");
}
int ret = sqlite3_step(iter->stmt);
if (ret == SQLITE_ROW) {
treemap_row_t row = {
.path = (const char *) sqlite3_column_text(iter->stmt, 0),
.parent = (const char *) sqlite3_column_text(iter->stmt, 1),
.size = sqlite3_column_int64(iter->stmt, 2)
};
return row;
}
if (ret != SQLITE_DONE) {
LOG_FATALF("database.c", "FIXME: doc iter returned %s", sqlite3_errmsg(iter->db->db));
}
sqlite3_finalize(iter->stmt);
iter->stmt = NULL;
return (treemap_row_t) {NULL, NULL, 0};
}
void database_generate_stats(database_t *db, double treemap_threshold) {
LOG_INFO("database.c", "Generating stats");
CRASH_IF_NOT_SQLITE_OK(sqlite3_exec(db->db, "DELETE FROM stats_size_agg;", NULL, NULL, NULL));
CRASH_IF_NOT_SQLITE_OK(sqlite3_exec(db->db, "DELETE FROM stats_date_agg;", NULL, NULL, NULL));
CRASH_IF_NOT_SQLITE_OK(sqlite3_exec(db->db, "DELETE FROM stats_mime_agg;", NULL, NULL, NULL));
CRASH_IF_NOT_SQLITE_OK(sqlite3_exec(db->db, "DELETE FROM stats_treemap;", NULL, NULL, NULL));
CRASH_IF_NOT_SQLITE_OK(
sqlite3_exec(db->db, "CREATE TEMP TABLE tm(path TEXT PRIMARY KEY, size INT);", NULL, NULL, NULL));
sqlite3_prepare_v2(db->db, "UPDATE tm SET size=size+? WHERE path=?;", -1, &db->treemap_merge_up_update_stmt, NULL);
sqlite3_prepare_v2(db->db, "DELETE FROM tm WHERE path = ?;", -1, &db->treemap_merge_up_delete_stmt, NULL);
// size aggregation
sqlite3_stmt *stmt;
sqlite3_prepare_v2(db->db, "INSERT INTO stats_size_agg"
" SELECT"
" cast(size / ?1 as int) * ?1 as bucket,"
" count(*) as count"
" FROM document"
" GROUP BY bucket", -1, &stmt, NULL);
sqlite3_bind_int(stmt, 1, SIZE_BUCKET);
CRASH_IF_STMT_FAIL(sqlite3_step(stmt));
sqlite3_finalize(stmt);
// date aggregation
sqlite3_prepare_v2(db->db, "INSERT INTO stats_date_agg"
" SELECT"
" cast(mtime / ?1 as int) * ?1 as bucket,"
" count(*) as count"
" FROM document"
" GROUP BY bucket", -1, &stmt, NULL);
sqlite3_bind_int(stmt, 1, DATE_BUCKET);
CRASH_IF_STMT_FAIL(sqlite3_step(stmt));
sqlite3_finalize(stmt);
// mime aggregation
sqlite3_prepare_v2(db->db, "INSERT INTO stats_mime_agg"
" SELECT"
" (json_data->>'mime') as bucket,"
" sum(size),"
" count(*)"
" FROM document"
" WHERE bucket IS NOT NULL"
" GROUP BY bucket", -1, &stmt, NULL);
CRASH_IF_STMT_FAIL(sqlite3_step(stmt));
sqlite3_finalize(stmt);
// Treemap
sqlite3_prepare_v2(db->db, "SELECT SUM(size) FROM document;", -1, &stmt, NULL);
CRASH_IF_STMT_FAIL(sqlite3_step(stmt));
long total_size = sqlite3_column_int64(stmt, 0);
long threshold = (long) ((double) total_size * treemap_threshold);
sqlite3_finalize(stmt);
// flat map
CRASH_IF_NOT_SQLITE_OK(sqlite3_exec(db->db,
"INSERT INTO tm (path, size) SELECT json_data->>'path' as path, sum(size)"
" FROM document WHERE json_data->>'parent' IS NULL GROUP BY path;",
NULL, NULL, NULL));
// Merge up
int merged_rows = 0;
do {
if (merged_rows) {
LOG_INFOF("database.c", "Treemap merge iteration (%d rows changed)", merged_rows);
}
merged_rows = 0;
sqlite3_prepare_v2(db->db,
"INSERT INTO tm (path, size) SELECT path_parent(path) as parent, 0 "
" FROM tm WHERE parent not IN (SELECT path FROM tm) AND size<?"
" ON CONFLICT DO NOTHING;", -1, &stmt, NULL);
sqlite3_bind_int64(stmt, 1, threshold);
CRASH_IF_STMT_FAIL(sqlite3_step(stmt));
database_iterator_t *iter = database_create_treemap_iterator(db, threshold);
database_treemap_iter_foreach(row, iter) {
sqlite3_bind_int64(db->treemap_merge_up_update_stmt, 1, row.size);
sqlite3_bind_text(db->treemap_merge_up_update_stmt, 2, row.parent, -1, SQLITE_STATIC);
CRASH_IF_STMT_FAIL(sqlite3_step(db->treemap_merge_up_update_stmt));
CRASH_IF_NOT_SQLITE_OK(sqlite3_reset(db->treemap_merge_up_update_stmt));
sqlite3_bind_text(db->treemap_merge_up_delete_stmt, 1, row.path, -1, SQLITE_STATIC);
CRASH_IF_STMT_FAIL(sqlite3_step(db->treemap_merge_up_delete_stmt));
CRASH_IF_NOT_SQLITE_OK(sqlite3_reset(db->treemap_merge_up_delete_stmt));
merged_rows += 1;
}
} while (merged_rows > TREEMAP_MINIMUM_MERGES_TO_CONTINUE);
CRASH_IF_NOT_SQLITE_OK(sqlite3_exec(db->db,
"INSERT INTO stats_treemap (path, size) SELECT path,size FROM tm;",
NULL, NULL, NULL));
LOG_INFO("database.c", "Done!");
}