mirror of
https://github.com/simon987/sist2.git
synced 2025-12-13 15:29:04 +00:00
245 lines
8.9 KiB
C
245 lines
8.9 KiB
C
#include "database.h"
|
|
#include "src/sist.h"
|
|
#include "src/ctx.h"
|
|
|
|
#define TREEMAP_MINIMUM_MERGES_TO_CONTINUE (100)
|
|
#define SIZE_BUCKET (long)(5 * 1000 * 1000)
|
|
#define DATE_BUCKET (long)(2629800) // ~30 days
|
|
|
|
|
|
database_iterator_t *database_create_treemap_iterator(database_t *db, long threshold) {
|
|
|
|
sqlite3_stmt *stmt;
|
|
|
|
sqlite3_prepare_v2(db->db,
|
|
"SELECT path, path_parent(path), size FROM tm"
|
|
" WHERE path_parent(path) IN (SELECT path FROM tm)"
|
|
" AND size<?",
|
|
-1, &stmt, NULL);
|
|
|
|
sqlite3_bind_int64(stmt, 1, threshold);
|
|
|
|
database_iterator_t *iter = malloc(sizeof(database_iterator_t));
|
|
|
|
iter->stmt = stmt;
|
|
iter->db = db;
|
|
|
|
return iter;
|
|
}
|
|
|
|
treemap_row_t database_treemap_iter(database_iterator_t *iter) {
|
|
|
|
if (iter->stmt == NULL) {
|
|
LOG_FATAL("database.c", "FIXME: database_treemap_iter() called after iteration stopped");
|
|
}
|
|
|
|
int ret = sqlite3_step(iter->stmt);
|
|
|
|
if (ret == SQLITE_ROW) {
|
|
treemap_row_t row = {
|
|
.path = (const char *) sqlite3_column_text(iter->stmt, 0),
|
|
.parent = (const char *) sqlite3_column_text(iter->stmt, 1),
|
|
.size = sqlite3_column_int64(iter->stmt, 2)
|
|
};
|
|
|
|
return row;
|
|
}
|
|
|
|
if (ret != SQLITE_DONE) {
|
|
LOG_FATALF("database.c", "FIXME: doc iter returned %s", sqlite3_errmsg(iter->db->db));
|
|
}
|
|
|
|
sqlite3_finalize(iter->stmt);
|
|
iter->stmt = NULL;
|
|
|
|
return (treemap_row_t) {NULL, NULL, 0};
|
|
}
|
|
|
|
void database_generate_stats(database_t *db, double treemap_threshold) {
|
|
|
|
LOG_INFO("database.c", "Generating stats");
|
|
|
|
CRASH_IF_NOT_SQLITE_OK(sqlite3_exec(db->db, "DELETE FROM stats_size_agg;", NULL, NULL, NULL));
|
|
CRASH_IF_NOT_SQLITE_OK(sqlite3_exec(db->db, "DELETE FROM stats_date_agg;", NULL, NULL, NULL));
|
|
CRASH_IF_NOT_SQLITE_OK(sqlite3_exec(db->db, "DELETE FROM stats_mime_agg;", NULL, NULL, NULL));
|
|
CRASH_IF_NOT_SQLITE_OK(sqlite3_exec(db->db, "DELETE FROM stats_treemap;", NULL, NULL, NULL));
|
|
|
|
CRASH_IF_NOT_SQLITE_OK(
|
|
sqlite3_exec(db->db, "CREATE TEMP TABLE tm(path TEXT PRIMARY KEY, size INT);", NULL, NULL, NULL));
|
|
|
|
sqlite3_prepare_v2(db->db, "UPDATE tm SET size=size+? WHERE path=?;", -1, &db->treemap_merge_up_update_stmt, NULL);
|
|
sqlite3_prepare_v2(db->db, "DELETE FROM tm WHERE path = ?;", -1, &db->treemap_merge_up_delete_stmt, NULL);
|
|
|
|
// size aggregation
|
|
sqlite3_stmt *stmt;
|
|
sqlite3_prepare_v2(db->db, "INSERT INTO stats_size_agg"
|
|
" SELECT"
|
|
" cast(size / ?1 as int) * ?1 as bucket,"
|
|
" count(*) as count"
|
|
" FROM document"
|
|
" GROUP BY bucket", -1, &stmt, NULL);
|
|
sqlite3_bind_int(stmt, 1, SIZE_BUCKET);
|
|
CRASH_IF_STMT_FAIL(sqlite3_step(stmt));
|
|
|
|
sqlite3_finalize(stmt);
|
|
|
|
// date aggregation
|
|
sqlite3_prepare_v2(db->db, "INSERT INTO stats_date_agg"
|
|
" SELECT"
|
|
" cast(mtime / ?1 as int) * ?1 as bucket,"
|
|
" count(*) as count"
|
|
" FROM document"
|
|
" GROUP BY bucket", -1, &stmt, NULL);
|
|
sqlite3_bind_int(stmt, 1, DATE_BUCKET);
|
|
CRASH_IF_STMT_FAIL(sqlite3_step(stmt));
|
|
|
|
sqlite3_finalize(stmt);
|
|
|
|
// mime aggregation
|
|
sqlite3_prepare_v2(db->db, "INSERT INTO stats_mime_agg"
|
|
" SELECT"
|
|
" m.name as bucket,"
|
|
" sum(size),"
|
|
" count(*)"
|
|
" FROM document INNER JOIN mime m ON m.id=document.mime"
|
|
" WHERE bucket IS NOT NULL"
|
|
" GROUP BY bucket", -1, &stmt, NULL);
|
|
CRASH_IF_STMT_FAIL(sqlite3_step(stmt));
|
|
|
|
sqlite3_finalize(stmt);
|
|
|
|
// Treemap
|
|
sqlite3_prepare_v2(db->db, "SELECT SUM(size) FROM document;", -1, &stmt, NULL);
|
|
CRASH_IF_STMT_FAIL(sqlite3_step(stmt));
|
|
long total_size = sqlite3_column_int64(stmt, 0);
|
|
long threshold = (long) ((double) total_size * treemap_threshold);
|
|
sqlite3_finalize(stmt);
|
|
|
|
// flat map
|
|
CRASH_IF_NOT_SQLITE_OK(sqlite3_exec(db->db,
|
|
"INSERT INTO tm (path, size) SELECT path, sum(size)"
|
|
" FROM document WHERE parent IS NULL GROUP BY path;",
|
|
NULL, NULL, NULL));
|
|
|
|
// Merge up
|
|
int merged_rows = 0;
|
|
do {
|
|
if (merged_rows) {
|
|
LOG_INFOF("database.c", "Treemap merge iteration (%d rows changed)", merged_rows);
|
|
}
|
|
merged_rows = 0;
|
|
|
|
sqlite3_prepare_v2(db->db,
|
|
"INSERT INTO tm (path, size) SELECT path_parent(path) as parent, 0 "
|
|
" FROM tm WHERE parent not IN (SELECT path FROM tm) AND size<?"
|
|
" ON CONFLICT DO NOTHING;", -1, &stmt, NULL);
|
|
sqlite3_bind_int64(stmt, 1, threshold);
|
|
CRASH_IF_STMT_FAIL(sqlite3_step(stmt));
|
|
|
|
database_iterator_t *iter = database_create_treemap_iterator(db, threshold);
|
|
database_treemap_iter_foreach(row, iter) {
|
|
sqlite3_bind_int64(db->treemap_merge_up_update_stmt, 1, row.size);
|
|
sqlite3_bind_text(db->treemap_merge_up_update_stmt, 2, row.parent, -1, SQLITE_STATIC);
|
|
CRASH_IF_STMT_FAIL(sqlite3_step(db->treemap_merge_up_update_stmt));
|
|
CRASH_IF_NOT_SQLITE_OK(sqlite3_reset(db->treemap_merge_up_update_stmt));
|
|
|
|
sqlite3_bind_text(db->treemap_merge_up_delete_stmt, 1, row.path, -1, SQLITE_STATIC);
|
|
CRASH_IF_STMT_FAIL(sqlite3_step(db->treemap_merge_up_delete_stmt));
|
|
CRASH_IF_NOT_SQLITE_OK(sqlite3_reset(db->treemap_merge_up_delete_stmt));
|
|
|
|
merged_rows += 1;
|
|
}
|
|
free(iter);
|
|
} while (merged_rows > TREEMAP_MINIMUM_MERGES_TO_CONTINUE);
|
|
|
|
CRASH_IF_NOT_SQLITE_OK(sqlite3_exec(db->db,
|
|
"INSERT INTO stats_treemap (path, size) SELECT path,size FROM tm;",
|
|
NULL, NULL, NULL));
|
|
|
|
LOG_INFO("database.c", "Done!");
|
|
}
|
|
|
|
database_stat_type_d database_get_stat_type_by_mnemonic(const char *name) {
|
|
if (strcmp(name, "TMAP") == 0) {
|
|
return DATABASE_STAT_TREEMAP;
|
|
}
|
|
if (strcmp(name, "MAGG") == 0) {
|
|
return DATABASE_STAT_MIME_AGG;
|
|
}
|
|
if (strcmp(name, "SAGG") == 0) {
|
|
return DATABASE_STAT_SIZE_AGG;
|
|
}
|
|
if (strcmp(name, "DAGG") == 0) {
|
|
return DATABASE_STAT_DATE_AGG;
|
|
}
|
|
|
|
return DATABASE_STAT_INVALID;
|
|
}
|
|
|
|
cJSON *database_get_stats(database_t *db, database_stat_type_d type) {
|
|
|
|
sqlite3_stmt *stmt;
|
|
|
|
switch (type) {
|
|
case DATABASE_STAT_TREEMAP:
|
|
CRASH_IF_NOT_SQLITE_OK(sqlite3_prepare_v2(
|
|
db->db, "SELECT path,size FROM stats_treemap", -1, &stmt, NULL
|
|
));
|
|
break;
|
|
case DATABASE_STAT_DATE_AGG:
|
|
CRASH_IF_NOT_SQLITE_OK(sqlite3_prepare_v2(
|
|
db->db, "SELECT bucket,count FROM stats_date_agg", -1, &stmt, NULL
|
|
));
|
|
break;
|
|
case DATABASE_STAT_SIZE_AGG:
|
|
CRASH_IF_NOT_SQLITE_OK(sqlite3_prepare_v2(
|
|
db->db, "SELECT bucket,count FROM stats_size_agg", -1, &stmt, NULL
|
|
));
|
|
break;
|
|
case DATABASE_STAT_MIME_AGG:
|
|
CRASH_IF_NOT_SQLITE_OK(sqlite3_prepare_v2(
|
|
db->db, "SELECT mime,size,count FROM stats_mime_agg", -1, &stmt, NULL
|
|
));
|
|
break;
|
|
case DATABASE_STAT_INVALID:
|
|
default:
|
|
LOG_FATALF("database_stats.c", "Invalid stat type: %d", type);
|
|
}
|
|
|
|
cJSON *json = cJSON_CreateArray();
|
|
|
|
int ret;
|
|
do {
|
|
ret = sqlite3_step(stmt);
|
|
CRASH_IF_STMT_FAIL(ret);
|
|
|
|
if (ret == SQLITE_DONE) {
|
|
break;
|
|
}
|
|
|
|
cJSON *row = cJSON_CreateObject();
|
|
|
|
switch (type) {
|
|
case DATABASE_STAT_TREEMAP:
|
|
cJSON_AddStringToObject(row, "path", (const char *) sqlite3_column_text(stmt, 0));
|
|
cJSON_AddNumberToObject(row, "size", (double) sqlite3_column_int64(stmt, 1));
|
|
break;
|
|
case DATABASE_STAT_DATE_AGG:
|
|
case DATABASE_STAT_SIZE_AGG:
|
|
cJSON_AddNumberToObject(row, "bucket", (double) sqlite3_column_int64(stmt, 0));
|
|
cJSON_AddNumberToObject(row, "count", (double) sqlite3_column_int64(stmt, 1));
|
|
break;
|
|
case DATABASE_STAT_MIME_AGG:
|
|
cJSON_AddStringToObject(row, "mime", (const char *) sqlite3_column_text(stmt, 0));
|
|
cJSON_AddNumberToObject(row, "size", (double) sqlite3_column_int64(stmt, 1));
|
|
cJSON_AddNumberToObject(row, "count", (double) sqlite3_column_int64(stmt, 2));
|
|
break;
|
|
}
|
|
|
|
cJSON_AddItemToArray(json, row);
|
|
} while (TRUE);
|
|
|
|
sqlite3_finalize(stmt);
|
|
|
|
return json;
|
|
} |