diff --git a/src/cli.c b/src/cli.c index fc2b762..665d109 100644 --- a/src/cli.c +++ b/src/cli.c @@ -19,6 +19,8 @@ #define DEFAULT_MAX_MEM_BUFFER 2000 +#define DEFAULT_THROTTLE_MEMORY_THRESHOLD 0 + const char *TESS_DATAPATHS[] = { "/usr/share/tessdata/", "/usr/share/tesseract-ocr/tessdata/", @@ -255,6 +257,10 @@ int scan_args_validate(scan_args_t *args, int argc, const char **argv) { args->max_memory_buffer = DEFAULT_MAX_MEM_BUFFER; } + if (args->scan_mem_limit <= 0) { + args->scan_mem_limit = DEFAULT_THROTTLE_MEMORY_THRESHOLD; + } + if (args->list_path != NULL) { if (strcmp(args->list_path, "-") == 0) { args->list_file = stdin; diff --git a/src/cli.h b/src/cli.h index a32d40a..b74fe96 100644 --- a/src/cli.h +++ b/src/cli.h @@ -10,6 +10,7 @@ typedef struct scan_args { int size; int content_size; int threads; + int scan_mem_limit; char *incremental; char *output; char *rewrite_url; diff --git a/src/ctx.h b/src/ctx.h index fb55998..6d6b224 100644 --- a/src/ctx.h +++ b/src/ctx.h @@ -35,6 +35,7 @@ typedef struct { int threads; int depth; int calculate_checksums; + size_t mem_limit; size_t stat_tn_size; size_t stat_index_size; diff --git a/src/main.c b/src/main.c index a1b567f..b7f3134 100644 --- a/src/main.c +++ b/src/main.c @@ -253,6 +253,7 @@ void initialize_scan_context(scan_args_t *args) { ScanCtx.threads = args->threads; ScanCtx.depth = args->depth; + ScanCtx.mem_limit = args->scan_mem_limit * 1024 * 1024; strncpy(ScanCtx.index.path, args->output, sizeof(ScanCtx.index.path)); strncpy(ScanCtx.index.desc.name, args->name, sizeof(ScanCtx.index.desc.name)); @@ -382,10 +383,10 @@ void sist2_scan(scan_args_t *args) { load_incremental_index(args); } - ScanCtx.pool = tpool_create(args->threads, thread_cleanup, TRUE, TRUE); + ScanCtx.pool = tpool_create(ScanCtx.threads, thread_cleanup, TRUE, TRUE, ScanCtx.mem_limit); tpool_start(ScanCtx.pool); - ScanCtx.writer_pool = tpool_create(1, writer_cleanup, TRUE, FALSE); + ScanCtx.writer_pool = tpool_create(1, writer_cleanup, TRUE, FALSE, 0); tpool_start(ScanCtx.writer_pool); if (args->list_path) { @@ -466,7 +467,7 @@ void sist2_index(index_args_t *args) { f = index_json; } - IndexCtx.pool = tpool_create(args->threads, elastic_cleanup, FALSE, args->print == 0); + IndexCtx.pool = tpool_create(args->threads, elastic_cleanup, FALSE, args->print == 0, 0); tpool_start(IndexCtx.pool); READ_INDICES(file_path, args->index_path, { @@ -586,6 +587,7 @@ int main(int argc, const char *argv[]) { OPT_GROUP("Scan options"), OPT_INTEGER('t', "threads", &common_threads, "Number of threads. DEFAULT=1"), + OPT_STRING(0, "mem-throttle", &scan_args->scan_mem_limit, "Total memory threshold in MB for scan throttling. DEFAULT=0"), OPT_FLOAT('q', "quality", &scan_args->quality, "Thumbnail quality, on a scale of 1.0 to 31.0, 1.0 being the best. DEFAULT=3"), OPT_INTEGER(0, "size", &scan_args->size, diff --git a/src/tpool.c b/src/tpool.c index e0de5e8..970d8c3 100644 --- a/src/tpool.c +++ b/src/tpool.c @@ -28,6 +28,9 @@ typedef struct tpool { int work_cnt; int done_cnt; int busy_cnt; + int throttle_stuck_cnt; + size_t mem_limit; + size_t page_size; int free_arg; int stop; @@ -114,11 +117,42 @@ int tpool_add_work(tpool_t *pool, thread_func_t func, void *arg) { return 1; } +/** + * see: https://github.com/htop-dev/htop/blob/f782f821f7f8081cb43bbad1c37f32830a260a81/linux/LinuxProcessList.c + */ +__always_inline +static size_t _get_total_mem(tpool_t* pool) { + FILE* statmfile = fopen("/proc/self/statm", "r"); + if (!statmfile) + return 0; + + long int dummy, dummy2, dummy3, dummy4, dummy5, dummy6; + long int m_resident; + + int r = fscanf(statmfile, "%ld %ld %ld %ld %ld %ld %ld", + &dummy, /* m_virt */ + &m_resident, + &dummy2, /* m_share */ + &dummy3, /* m_trs */ + &dummy4, /* unused since Linux 2.6; always 0 */ + &dummy5, /* m_drs */ + &dummy6); /* unused since Linux 2.6; always 0 */ + fclose(statmfile); + + if (r == 7) { + return m_resident * pool->page_size; + } else { + return 0; + } +} + /** * Thread worker function */ static void *tpool_worker(void *arg) { tpool_t *pool = arg; + int stuck_notified = 0; + int throttle_ms = 0; while (1) { pthread_mutex_lock(&pool->work_mutex); @@ -138,10 +172,35 @@ static void *tpool_worker(void *arg) { pthread_mutex_unlock(&(pool->work_mutex)); if (work != NULL) { + stuck_notified = 0; + throttle_ms = 0; + while(!pool->stop && pool->mem_limit > 0 && _get_total_mem(pool) >= pool->mem_limit) { + if (!stuck_notified && throttle_ms >= 90000) { + // notify the pool that this thread is stuck. + pthread_mutex_lock(&(pool->work_mutex)); + pool->throttle_stuck_cnt += 1; + if (pool->throttle_stuck_cnt == pool->thread_cnt) { + LOG_ERROR("tpool.c", "Throttle memory limit too low, cannot proceed!"); + pool->stop = TRUE; + } + pthread_mutex_unlock(&(pool->work_mutex)); + stuck_notified = 1; + } + usleep(10000); + throttle_ms += 10; + } + if (pool->stop) { break; } + // we are not stuck anymore. cancel our notification. + if (stuck_notified) { + pthread_mutex_lock(&(pool->work_mutex)); + pool->throttle_stuck_cnt -= 1; + pthread_mutex_unlock(&(pool->work_mutex)); + } + work->func(work->arg); if (pool->free_arg) { free(work->arg); @@ -243,18 +302,21 @@ void tpool_destroy(tpool_t *pool) { * Create a thread pool * @param thread_cnt Worker threads count */ -tpool_t *tpool_create(int thread_cnt, void cleanup_func(), int free_arg, int print_progress) { +tpool_t *tpool_create(int thread_cnt, void cleanup_func(), int free_arg, int print_progress, size_t mem_limit) { tpool_t *pool = malloc(sizeof(tpool_t)); pool->thread_cnt = thread_cnt; pool->work_cnt = 0; pool->done_cnt = 0; pool->busy_cnt = 0; + pool->throttle_stuck_cnt = 0; + pool->mem_limit = mem_limit; pool->stop = FALSE; pool->free_arg = free_arg; pool->cleanup_func = cleanup_func; pool->threads = calloc(sizeof(pthread_t), thread_cnt); pool->print_progress = print_progress; + pool->page_size = getpagesize(); pthread_mutex_init(&(pool->work_mutex), NULL); diff --git a/src/tpool.h b/src/tpool.h index 6c84a93..71d202f 100644 --- a/src/tpool.h +++ b/src/tpool.h @@ -8,7 +8,7 @@ typedef struct tpool tpool_t; typedef void (*thread_func_t)(void *arg); -tpool_t *tpool_create(int num, void (*cleanup_func)(), int free_arg, int print_progress); +tpool_t *tpool_create(int num, void (*cleanup_func)(), int free_arg, int print_progress, size_t mem_limit); void tpool_start(tpool_t *pool); void tpool_destroy(tpool_t *pool);