Merge pull request #246 from yatli/mem_cap_dev

scan memory threshold
This commit is contained in:
simon987 2022-02-13 13:26:18 -05:00 committed by GitHub
commit d58fcbc788
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
6 changed files with 77 additions and 5 deletions

View File

@ -19,6 +19,8 @@
#define DEFAULT_MAX_MEM_BUFFER 2000
#define DEFAULT_THROTTLE_MEMORY_THRESHOLD 0
const char *TESS_DATAPATHS[] = {
"/usr/share/tessdata/",
"/usr/share/tesseract-ocr/tessdata/",
@ -255,6 +257,10 @@ int scan_args_validate(scan_args_t *args, int argc, const char **argv) {
args->max_memory_buffer = DEFAULT_MAX_MEM_BUFFER;
}
if (args->scan_mem_limit <= 0) {
args->scan_mem_limit = DEFAULT_THROTTLE_MEMORY_THRESHOLD;
}
if (args->list_path != NULL) {
if (strcmp(args->list_path, "-") == 0) {
args->list_file = stdin;

View File

@ -10,6 +10,7 @@ typedef struct scan_args {
int size;
int content_size;
int threads;
int scan_mem_limit;
char *incremental;
char *output;
char *rewrite_url;

View File

@ -35,6 +35,7 @@ typedef struct {
int threads;
int depth;
int calculate_checksums;
size_t mem_limit;
size_t stat_tn_size;
size_t stat_index_size;

View File

@ -253,6 +253,7 @@ void initialize_scan_context(scan_args_t *args) {
ScanCtx.threads = args->threads;
ScanCtx.depth = args->depth;
ScanCtx.mem_limit = args->scan_mem_limit * 1024 * 1024;
strncpy(ScanCtx.index.path, args->output, sizeof(ScanCtx.index.path));
strncpy(ScanCtx.index.desc.name, args->name, sizeof(ScanCtx.index.desc.name));
@ -382,10 +383,10 @@ void sist2_scan(scan_args_t *args) {
load_incremental_index(args);
}
ScanCtx.pool = tpool_create(args->threads, thread_cleanup, TRUE, TRUE);
ScanCtx.pool = tpool_create(ScanCtx.threads, thread_cleanup, TRUE, TRUE, ScanCtx.mem_limit);
tpool_start(ScanCtx.pool);
ScanCtx.writer_pool = tpool_create(1, writer_cleanup, TRUE, FALSE);
ScanCtx.writer_pool = tpool_create(1, writer_cleanup, TRUE, FALSE, 0);
tpool_start(ScanCtx.writer_pool);
if (args->list_path) {
@ -466,7 +467,7 @@ void sist2_index(index_args_t *args) {
f = index_json;
}
IndexCtx.pool = tpool_create(args->threads, elastic_cleanup, FALSE, args->print == 0);
IndexCtx.pool = tpool_create(args->threads, elastic_cleanup, FALSE, args->print == 0, 0);
tpool_start(IndexCtx.pool);
READ_INDICES(file_path, args->index_path, {
@ -586,6 +587,7 @@ int main(int argc, const char *argv[]) {
OPT_GROUP("Scan options"),
OPT_INTEGER('t', "threads", &common_threads, "Number of threads. DEFAULT=1"),
OPT_STRING(0, "mem-throttle", &scan_args->scan_mem_limit, "Total memory threshold in MB for scan throttling. DEFAULT=0"),
OPT_FLOAT('q', "quality", &scan_args->quality,
"Thumbnail quality, on a scale of 1.0 to 31.0, 1.0 being the best. DEFAULT=3"),
OPT_INTEGER(0, "size", &scan_args->size,

View File

@ -28,6 +28,9 @@ typedef struct tpool {
int work_cnt;
int done_cnt;
int busy_cnt;
int throttle_stuck_cnt;
size_t mem_limit;
size_t page_size;
int free_arg;
int stop;
@ -114,11 +117,42 @@ int tpool_add_work(tpool_t *pool, thread_func_t func, void *arg) {
return 1;
}
/**
* see: https://github.com/htop-dev/htop/blob/f782f821f7f8081cb43bbad1c37f32830a260a81/linux/LinuxProcessList.c
*/
__always_inline
static size_t _get_total_mem(tpool_t* pool) {
FILE* statmfile = fopen("/proc/self/statm", "r");
if (!statmfile)
return 0;
long int dummy, dummy2, dummy3, dummy4, dummy5, dummy6;
long int m_resident;
int r = fscanf(statmfile, "%ld %ld %ld %ld %ld %ld %ld",
&dummy, /* m_virt */
&m_resident,
&dummy2, /* m_share */
&dummy3, /* m_trs */
&dummy4, /* unused since Linux 2.6; always 0 */
&dummy5, /* m_drs */
&dummy6); /* unused since Linux 2.6; always 0 */
fclose(statmfile);
if (r == 7) {
return m_resident * pool->page_size;
} else {
return 0;
}
}
/**
* Thread worker function
*/
static void *tpool_worker(void *arg) {
tpool_t *pool = arg;
int stuck_notified = 0;
int throttle_ms = 0;
while (1) {
pthread_mutex_lock(&pool->work_mutex);
@ -138,10 +172,35 @@ static void *tpool_worker(void *arg) {
pthread_mutex_unlock(&(pool->work_mutex));
if (work != NULL) {
stuck_notified = 0;
throttle_ms = 0;
while(!pool->stop && pool->mem_limit > 0 && _get_total_mem(pool) >= pool->mem_limit) {
if (!stuck_notified && throttle_ms >= 90000) {
// notify the pool that this thread is stuck.
pthread_mutex_lock(&(pool->work_mutex));
pool->throttle_stuck_cnt += 1;
if (pool->throttle_stuck_cnt == pool->thread_cnt) {
LOG_ERROR("tpool.c", "Throttle memory limit too low, cannot proceed!");
pool->stop = TRUE;
}
pthread_mutex_unlock(&(pool->work_mutex));
stuck_notified = 1;
}
usleep(10000);
throttle_ms += 10;
}
if (pool->stop) {
break;
}
// we are not stuck anymore. cancel our notification.
if (stuck_notified) {
pthread_mutex_lock(&(pool->work_mutex));
pool->throttle_stuck_cnt -= 1;
pthread_mutex_unlock(&(pool->work_mutex));
}
work->func(work->arg);
if (pool->free_arg) {
free(work->arg);
@ -243,18 +302,21 @@ void tpool_destroy(tpool_t *pool) {
* Create a thread pool
* @param thread_cnt Worker threads count
*/
tpool_t *tpool_create(int thread_cnt, void cleanup_func(), int free_arg, int print_progress) {
tpool_t *tpool_create(int thread_cnt, void cleanup_func(), int free_arg, int print_progress, size_t mem_limit) {
tpool_t *pool = malloc(sizeof(tpool_t));
pool->thread_cnt = thread_cnt;
pool->work_cnt = 0;
pool->done_cnt = 0;
pool->busy_cnt = 0;
pool->throttle_stuck_cnt = 0;
pool->mem_limit = mem_limit;
pool->stop = FALSE;
pool->free_arg = free_arg;
pool->cleanup_func = cleanup_func;
pool->threads = calloc(sizeof(pthread_t), thread_cnt);
pool->print_progress = print_progress;
pool->page_size = getpagesize();
pthread_mutex_init(&(pool->work_mutex), NULL);

View File

@ -8,7 +8,7 @@ typedef struct tpool tpool_t;
typedef void (*thread_func_t)(void *arg);
tpool_t *tpool_create(int num, void (*cleanup_func)(), int free_arg, int print_progress);
tpool_t *tpool_create(int num, void (*cleanup_func)(), int free_arg, int print_progress, size_t mem_limit);
void tpool_start(tpool_t *pool);
void tpool_destroy(tpool_t *pool);