mirror of
https://github.com/simon987/sist2.git
synced 2025-12-14 15:59:03 +00:00
use sqlite to save index, major thread pool refactor
This commit is contained in:
159
src/database/database_stats.c
Normal file
159
src/database/database_stats.c
Normal file
@@ -0,0 +1,159 @@
|
||||
#include "database.h"
|
||||
#include "src/sist.h"
|
||||
#include "src/ctx.h"
|
||||
|
||||
#define TREEMAP_MINIMUM_MERGES_TO_CONTINUE (100)
|
||||
#define SIZE_BUCKET (long)(5 * 1000 * 1000)
|
||||
#define DATE_BUCKET (long)(2629800) // ~30 days
|
||||
|
||||
database_iterator_t *database_create_treemap_iterator(database_t *db, long threshold) {
|
||||
|
||||
sqlite3_stmt *stmt;
|
||||
|
||||
sqlite3_prepare_v2(db->db,
|
||||
"SELECT path, path_parent(path), size FROM tm"
|
||||
" WHERE path_parent(path) IN (SELECT path FROM tm)"
|
||||
" AND size<?",
|
||||
-1, &stmt, NULL);
|
||||
|
||||
sqlite3_bind_int64(stmt, 1, threshold);
|
||||
|
||||
database_iterator_t *iter = malloc(sizeof(database_iterator_t));
|
||||
|
||||
iter->stmt = stmt;
|
||||
iter->db = db;
|
||||
|
||||
return iter;
|
||||
}
|
||||
|
||||
treemap_row_t database_treemap_iter(database_iterator_t *iter) {
|
||||
|
||||
if (iter->stmt == NULL) {
|
||||
LOG_FATAL("database.c", "FIXME: database_treemap_iter() called after iteration stopped");
|
||||
}
|
||||
|
||||
int ret = sqlite3_step(iter->stmt);
|
||||
|
||||
if (ret == SQLITE_ROW) {
|
||||
treemap_row_t row = {
|
||||
.path = (const char *) sqlite3_column_text(iter->stmt, 0),
|
||||
.parent = (const char *) sqlite3_column_text(iter->stmt, 1),
|
||||
.size = sqlite3_column_int64(iter->stmt, 2)
|
||||
};
|
||||
|
||||
return row;
|
||||
}
|
||||
|
||||
if (ret != SQLITE_DONE) {
|
||||
LOG_FATALF("database.c", "FIXME: doc iter returned %s", sqlite3_errmsg(iter->db->db));
|
||||
}
|
||||
|
||||
sqlite3_finalize(iter->stmt);
|
||||
iter->stmt = NULL;
|
||||
|
||||
return (treemap_row_t) {NULL, NULL, 0};
|
||||
}
|
||||
|
||||
void database_generate_stats(database_t *db, double treemap_threshold) {
|
||||
|
||||
LOG_INFO("database.c", "Generating stats");
|
||||
|
||||
CRASH_IF_NOT_SQLITE_OK(sqlite3_exec(db->db, "DELETE FROM stats_size_agg;", NULL, NULL, NULL));
|
||||
CRASH_IF_NOT_SQLITE_OK(sqlite3_exec(db->db, "DELETE FROM stats_date_agg;", NULL, NULL, NULL));
|
||||
CRASH_IF_NOT_SQLITE_OK(sqlite3_exec(db->db, "DELETE FROM stats_mime_agg;", NULL, NULL, NULL));
|
||||
CRASH_IF_NOT_SQLITE_OK(sqlite3_exec(db->db, "DELETE FROM stats_treemap;", NULL, NULL, NULL));
|
||||
|
||||
CRASH_IF_NOT_SQLITE_OK(
|
||||
sqlite3_exec(db->db, "CREATE TEMP TABLE tm(path TEXT PRIMARY KEY, size INT);", NULL, NULL, NULL));
|
||||
|
||||
sqlite3_prepare_v2(db->db, "UPDATE tm SET size=size+? WHERE path=?;", -1, &db->treemap_merge_up_update_stmt, NULL);
|
||||
sqlite3_prepare_v2(db->db, "DELETE FROM tm WHERE path = ?;", -1, &db->treemap_merge_up_delete_stmt, NULL);
|
||||
|
||||
// size aggregation
|
||||
sqlite3_stmt *stmt;
|
||||
sqlite3_prepare_v2(db->db, "INSERT INTO stats_size_agg"
|
||||
" SELECT"
|
||||
" cast(size / ?1 as int) * ?1 as bucket,"
|
||||
" count(*) as count"
|
||||
" FROM document"
|
||||
" GROUP BY bucket", -1, &stmt, NULL);
|
||||
sqlite3_bind_int(stmt, 1, SIZE_BUCKET);
|
||||
CRASH_IF_STMT_FAIL(sqlite3_step(stmt));
|
||||
|
||||
sqlite3_finalize(stmt);
|
||||
|
||||
// date aggregation
|
||||
sqlite3_prepare_v2(db->db, "INSERT INTO stats_date_agg"
|
||||
" SELECT"
|
||||
" cast(mtime / ?1 as int) * ?1 as bucket,"
|
||||
" count(*) as count"
|
||||
" FROM document"
|
||||
" GROUP BY bucket", -1, &stmt, NULL);
|
||||
sqlite3_bind_int(stmt, 1, DATE_BUCKET);
|
||||
CRASH_IF_STMT_FAIL(sqlite3_step(stmt));
|
||||
|
||||
sqlite3_finalize(stmt);
|
||||
|
||||
// mime aggregation
|
||||
sqlite3_prepare_v2(db->db, "INSERT INTO stats_mime_agg"
|
||||
" SELECT"
|
||||
" (json_data->>'mime') as bucket,"
|
||||
" sum(size),"
|
||||
" count(*)"
|
||||
" FROM document"
|
||||
" WHERE bucket IS NOT NULL"
|
||||
" GROUP BY bucket", -1, &stmt, NULL);
|
||||
CRASH_IF_STMT_FAIL(sqlite3_step(stmt));
|
||||
|
||||
sqlite3_finalize(stmt);
|
||||
|
||||
// Treemap
|
||||
sqlite3_prepare_v2(db->db, "SELECT SUM(size) FROM document;", -1, &stmt, NULL);
|
||||
CRASH_IF_STMT_FAIL(sqlite3_step(stmt));
|
||||
long total_size = sqlite3_column_int64(stmt, 0);
|
||||
long threshold = (long) ((double) total_size * treemap_threshold);
|
||||
sqlite3_finalize(stmt);
|
||||
|
||||
// flat map
|
||||
CRASH_IF_NOT_SQLITE_OK(sqlite3_exec(db->db,
|
||||
"INSERT INTO tm (path, size) SELECT json_data->>'path' as path, sum(size)"
|
||||
" FROM document WHERE json_data->>'parent' IS NULL GROUP BY path;",
|
||||
NULL, NULL, NULL));
|
||||
|
||||
// Merge up
|
||||
int merged_rows = 0;
|
||||
do {
|
||||
if (merged_rows) {
|
||||
LOG_INFOF("database.c", "Treemap merge iteration (%d rows changed)", merged_rows);
|
||||
}
|
||||
merged_rows = 0;
|
||||
|
||||
sqlite3_prepare_v2(db->db,
|
||||
"INSERT INTO tm (path, size) SELECT path_parent(path) as parent, 0 "
|
||||
" FROM tm WHERE parent not IN (SELECT path FROM tm) AND size<?"
|
||||
" ON CONFLICT DO NOTHING;", -1, &stmt, NULL);
|
||||
sqlite3_bind_int64(stmt, 1, threshold);
|
||||
CRASH_IF_STMT_FAIL(sqlite3_step(stmt));
|
||||
|
||||
database_iterator_t *iter = database_create_treemap_iterator(db, threshold);
|
||||
database_treemap_iter_foreach(row, iter) {
|
||||
sqlite3_bind_int64(db->treemap_merge_up_update_stmt, 1, row.size);
|
||||
sqlite3_bind_text(db->treemap_merge_up_update_stmt, 2, row.parent, -1, SQLITE_STATIC);
|
||||
CRASH_IF_STMT_FAIL(sqlite3_step(db->treemap_merge_up_update_stmt));
|
||||
CRASH_IF_NOT_SQLITE_OK(sqlite3_reset(db->treemap_merge_up_update_stmt));
|
||||
|
||||
sqlite3_bind_text(db->treemap_merge_up_delete_stmt, 1, row.path, -1, SQLITE_STATIC);
|
||||
CRASH_IF_STMT_FAIL(sqlite3_step(db->treemap_merge_up_delete_stmt));
|
||||
CRASH_IF_NOT_SQLITE_OK(sqlite3_reset(db->treemap_merge_up_delete_stmt));
|
||||
|
||||
merged_rows += 1;
|
||||
}
|
||||
} while (merged_rows > TREEMAP_MINIMUM_MERGES_TO_CONTINUE);
|
||||
|
||||
CRASH_IF_NOT_SQLITE_OK(sqlite3_exec(db->db,
|
||||
"INSERT INTO stats_treemap (path, size) SELECT path,size FROM tm;",
|
||||
NULL, NULL, NULL));
|
||||
|
||||
LOG_INFO("database.c", "Done!");
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user