mirror of
https://github.com/simon987/sist2.git
synced 2025-04-24 12:45:56 +00:00
Compare commits
11 Commits
8fa34da02f
...
d58fcbc788
Author | SHA1 | Date | |
---|---|---|---|
d58fcbc788 | |||
b483447b1c | |||
|
0d68d5fc7f | ||
|
1813bf505c | ||
|
9a6e7c7c47 | ||
|
68252b4e80 | ||
|
d1f13f2c84 | ||
|
6075c21a3a | ||
|
f3674ffa02 | ||
|
de187eff1c | ||
|
8e96174e1f |
7
contrib/systemd/Makefile
Normal file
7
contrib/systemd/Makefile
Normal file
@ -0,0 +1,7 @@
|
||||
install:
|
||||
install sist2-update-all.sh /usr/bin/sist2-update-all.sh
|
||||
install sist2-update-files.sh /usr/bin/sist2-update-files.sh
|
||||
install sist2-update-nextcloud.sh /usr/bin/sist2-update-nextcloud.sh
|
||||
install sist2-update.service /etc/systemd/system/sist2-update.service
|
||||
install sist2-update.timer /etc/systemd/system/sist2-update.timer
|
||||
systemctl daemon-reload
|
31
contrib/systemd/README.md
Normal file
31
contrib/systemd/README.md
Normal file
@ -0,0 +1,31 @@
|
||||
# Systemd integration example
|
||||
|
||||
This example contains my (yatli) personal configuration for sist2 auto-updating.
|
||||
The following indices are involved in this configuration:
|
||||
|
||||
| Index | Path | Description |
|
||||
|-----------|------------------|--------------------------------------------|
|
||||
| files | /zpool/files | Main file repository |
|
||||
| nextcloud | /zpool/nextcloud | Externally synchronized to a cloud account |
|
||||
|
||||
The systemd integration achieves automatic sist2 scanning & indexing everyday at 3:00AM.
|
||||
|
||||
### Tailoring the configuration for yourself
|
||||
|
||||
`sist2-update-all.sh` calls update scripts for each sist2 index. Add or remove
|
||||
update scripts accordingly to suit your need. Each update script (e.g.
|
||||
`sist2-update-files.sh`) has important parameters laid down at the beginning so
|
||||
make sure to edit them to point to your files and index locations.
|
||||
|
||||
### Installation
|
||||
|
||||
```bash
|
||||
# install the services and scripts
|
||||
sudo make install
|
||||
# enable & start the timer
|
||||
sudo systemctl enable sist2-update.timer
|
||||
sudo systemctl start sist2-update.timer
|
||||
# verify that the timer has been enabled
|
||||
systemctl list-timers --all
|
||||
```
|
||||
|
9
contrib/systemd/sist2-update-all.sh
Executable file
9
contrib/systemd/sist2-update-all.sh
Executable file
@ -0,0 +1,9 @@
|
||||
#!/bin/bash
|
||||
set -e
|
||||
__dir="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
|
||||
echo "Update index: Files"
|
||||
source ${__dir}/sist2-update-files.sh
|
||||
echo "Update index: Nextcloud"
|
||||
source ${__dir}/sist2-update-nextcloud.sh
|
||||
echo "Done. Restarting sist2."
|
||||
docker restart sist2-sist2-1
|
34
contrib/systemd/sist2-update-files.sh
Executable file
34
contrib/systemd/sist2-update-files.sh
Executable file
@ -0,0 +1,34 @@
|
||||
#!/bin/bash
|
||||
set -e
|
||||
DATE=$(date +%Y_%m_%d)
|
||||
CONTENT=/zpool/files
|
||||
ORIG=/mnt/ssd/sist-index/files.idx
|
||||
NEW=/mnt/ssd/sist-index/files_$DATE.idx
|
||||
EXCLUDE='ZArchives|TorrentStore|TorrentDownload|624f0c59-1fef-44f6-95e9-7483296f2833|ubuntu-full-2021-12-07'
|
||||
NAME=Files
|
||||
#REWRITE_URL="http://localhost:33333/activate?collection=$NAME&path="
|
||||
REWRITE_URL=""
|
||||
|
||||
sist2 scan \
|
||||
--threads 14 \
|
||||
--mem-throttle 32768 \
|
||||
--quality 1.0 \
|
||||
--name $NAME \
|
||||
--ocr-lang=eng+chi_sim \
|
||||
--ocr-ebooks \
|
||||
--ocr-images \
|
||||
--exclude=$EXCLUDE \
|
||||
--rewrite-url=$REWRITE_URL \
|
||||
--incremental=$ORIG \
|
||||
--output=$NEW \
|
||||
$CONTENT
|
||||
echo ">>> Scan complete"
|
||||
rm -rf $ORIG
|
||||
mv $NEW $ORIG
|
||||
|
||||
unset http_proxy
|
||||
unset https_proxy
|
||||
unset HTTP_PROXY
|
||||
unset HTTPS_PROXY
|
||||
sist2 index $ORIG --incremental-index
|
||||
echo ">>> Index complete"
|
33
contrib/systemd/sist2-update-nextcloud.sh
Executable file
33
contrib/systemd/sist2-update-nextcloud.sh
Executable file
@ -0,0 +1,33 @@
|
||||
#!/bin/bash
|
||||
set -e
|
||||
DATE=$(date +%Y_%m_%d)
|
||||
CONTENT=/zpool/nextcloud/v-yadli
|
||||
ORIG=/mnt/ssd/sist-index/nextcloud.idx
|
||||
NEW=/mnt/ssd/sist-index/nextcloud_$DATE.idx
|
||||
EXCLUDE='Yatao|.*263418493\\/Image\\/.*'
|
||||
NAME=NextCloud
|
||||
# REWRITE_URL="http://localhost:33333/activate?collection=$NAME&path="
|
||||
REWRITE_URL=""
|
||||
|
||||
sist2 scan \
|
||||
--threads 14 \
|
||||
--mem-throttle 32768 \
|
||||
--quality 1.0 \
|
||||
--name $NAME \
|
||||
--ocr-lang=eng+chi_sim \
|
||||
--ocr-ebooks \
|
||||
--ocr-images \
|
||||
--exclude=$EXCLUDE \
|
||||
--rewrite-url=$REWRITE_URL \
|
||||
--incremental=$ORIG \
|
||||
--output=$NEW \
|
||||
$CONTENT
|
||||
echo ">>> Scan complete"
|
||||
rm -rf $ORIG
|
||||
mv $NEW $ORIG
|
||||
|
||||
unset http_proxy
|
||||
unset https_proxy
|
||||
unset HTTP_PROXY
|
||||
unset HTTPS_PROXY
|
||||
sist2 index $ORIG --incremental-index
|
6
contrib/systemd/sist2-update.service
Normal file
6
contrib/systemd/sist2-update.service
Normal file
@ -0,0 +1,6 @@
|
||||
[Unit]
|
||||
Description=sist2-update
|
||||
|
||||
[Service]
|
||||
User=yatli
|
||||
ExecStart=/bin/bash /usr/bin/sist2-update-all.sh
|
10
contrib/systemd/sist2-update.timer
Normal file
10
contrib/systemd/sist2-update.timer
Normal file
@ -0,0 +1,10 @@
|
||||
[Unit]
|
||||
Description=sist2-update
|
||||
|
||||
[Timer]
|
||||
OnCalendar=*-*-* 3:00:00
|
||||
Persistent=true
|
||||
Unit=sist2-update.service
|
||||
|
||||
[Install]
|
||||
WantedBy=timers.target
|
@ -19,6 +19,8 @@
|
||||
|
||||
#define DEFAULT_MAX_MEM_BUFFER 2000
|
||||
|
||||
#define DEFAULT_THROTTLE_MEMORY_THRESHOLD 0
|
||||
|
||||
const char *TESS_DATAPATHS[] = {
|
||||
"/usr/share/tessdata/",
|
||||
"/usr/share/tesseract-ocr/tessdata/",
|
||||
@ -255,6 +257,10 @@ int scan_args_validate(scan_args_t *args, int argc, const char **argv) {
|
||||
args->max_memory_buffer = DEFAULT_MAX_MEM_BUFFER;
|
||||
}
|
||||
|
||||
if (args->scan_mem_limit <= 0) {
|
||||
args->scan_mem_limit = DEFAULT_THROTTLE_MEMORY_THRESHOLD;
|
||||
}
|
||||
|
||||
if (args->list_path != NULL) {
|
||||
if (strcmp(args->list_path, "-") == 0) {
|
||||
args->list_file = stdin;
|
||||
|
@ -10,6 +10,7 @@ typedef struct scan_args {
|
||||
int size;
|
||||
int content_size;
|
||||
int threads;
|
||||
int scan_mem_limit;
|
||||
char *incremental;
|
||||
char *output;
|
||||
char *rewrite_url;
|
||||
|
@ -35,6 +35,7 @@ typedef struct {
|
||||
int threads;
|
||||
int depth;
|
||||
int calculate_checksums;
|
||||
size_t mem_limit;
|
||||
|
||||
size_t stat_tn_size;
|
||||
size_t stat_index_size;
|
||||
|
@ -253,6 +253,7 @@ void initialize_scan_context(scan_args_t *args) {
|
||||
|
||||
ScanCtx.threads = args->threads;
|
||||
ScanCtx.depth = args->depth;
|
||||
ScanCtx.mem_limit = args->scan_mem_limit * 1024 * 1024;
|
||||
|
||||
strncpy(ScanCtx.index.path, args->output, sizeof(ScanCtx.index.path));
|
||||
strncpy(ScanCtx.index.desc.name, args->name, sizeof(ScanCtx.index.desc.name));
|
||||
@ -382,10 +383,10 @@ void sist2_scan(scan_args_t *args) {
|
||||
load_incremental_index(args);
|
||||
}
|
||||
|
||||
ScanCtx.pool = tpool_create(args->threads, thread_cleanup, TRUE, TRUE);
|
||||
ScanCtx.pool = tpool_create(ScanCtx.threads, thread_cleanup, TRUE, TRUE, ScanCtx.mem_limit);
|
||||
tpool_start(ScanCtx.pool);
|
||||
|
||||
ScanCtx.writer_pool = tpool_create(1, writer_cleanup, TRUE, FALSE);
|
||||
ScanCtx.writer_pool = tpool_create(1, writer_cleanup, TRUE, FALSE, 0);
|
||||
tpool_start(ScanCtx.writer_pool);
|
||||
|
||||
if (args->list_path) {
|
||||
@ -466,7 +467,7 @@ void sist2_index(index_args_t *args) {
|
||||
f = index_json;
|
||||
}
|
||||
|
||||
IndexCtx.pool = tpool_create(args->threads, elastic_cleanup, FALSE, args->print == 0);
|
||||
IndexCtx.pool = tpool_create(args->threads, elastic_cleanup, FALSE, args->print == 0, 0);
|
||||
tpool_start(IndexCtx.pool);
|
||||
|
||||
READ_INDICES(file_path, args->index_path, {
|
||||
@ -586,6 +587,7 @@ int main(int argc, const char *argv[]) {
|
||||
|
||||
OPT_GROUP("Scan options"),
|
||||
OPT_INTEGER('t', "threads", &common_threads, "Number of threads. DEFAULT=1"),
|
||||
OPT_STRING(0, "mem-throttle", &scan_args->scan_mem_limit, "Total memory threshold in MB for scan throttling. DEFAULT=0"),
|
||||
OPT_FLOAT('q', "quality", &scan_args->quality,
|
||||
"Thumbnail quality, on a scale of 1.0 to 31.0, 1.0 being the best. DEFAULT=3"),
|
||||
OPT_INTEGER(0, "size", &scan_args->size,
|
||||
|
64
src/tpool.c
64
src/tpool.c
@ -28,6 +28,9 @@ typedef struct tpool {
|
||||
int work_cnt;
|
||||
int done_cnt;
|
||||
int busy_cnt;
|
||||
int throttle_stuck_cnt;
|
||||
size_t mem_limit;
|
||||
size_t page_size;
|
||||
|
||||
int free_arg;
|
||||
int stop;
|
||||
@ -114,11 +117,42 @@ int tpool_add_work(tpool_t *pool, thread_func_t func, void *arg) {
|
||||
return 1;
|
||||
}
|
||||
|
||||
/**
|
||||
* see: https://github.com/htop-dev/htop/blob/f782f821f7f8081cb43bbad1c37f32830a260a81/linux/LinuxProcessList.c
|
||||
*/
|
||||
__always_inline
|
||||
static size_t _get_total_mem(tpool_t* pool) {
|
||||
FILE* statmfile = fopen("/proc/self/statm", "r");
|
||||
if (!statmfile)
|
||||
return 0;
|
||||
|
||||
long int dummy, dummy2, dummy3, dummy4, dummy5, dummy6;
|
||||
long int m_resident;
|
||||
|
||||
int r = fscanf(statmfile, "%ld %ld %ld %ld %ld %ld %ld",
|
||||
&dummy, /* m_virt */
|
||||
&m_resident,
|
||||
&dummy2, /* m_share */
|
||||
&dummy3, /* m_trs */
|
||||
&dummy4, /* unused since Linux 2.6; always 0 */
|
||||
&dummy5, /* m_drs */
|
||||
&dummy6); /* unused since Linux 2.6; always 0 */
|
||||
fclose(statmfile);
|
||||
|
||||
if (r == 7) {
|
||||
return m_resident * pool->page_size;
|
||||
} else {
|
||||
return 0;
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Thread worker function
|
||||
*/
|
||||
static void *tpool_worker(void *arg) {
|
||||
tpool_t *pool = arg;
|
||||
int stuck_notified = 0;
|
||||
int throttle_ms = 0;
|
||||
|
||||
while (1) {
|
||||
pthread_mutex_lock(&pool->work_mutex);
|
||||
@ -138,10 +172,35 @@ static void *tpool_worker(void *arg) {
|
||||
pthread_mutex_unlock(&(pool->work_mutex));
|
||||
|
||||
if (work != NULL) {
|
||||
stuck_notified = 0;
|
||||
throttle_ms = 0;
|
||||
while(!pool->stop && pool->mem_limit > 0 && _get_total_mem(pool) >= pool->mem_limit) {
|
||||
if (!stuck_notified && throttle_ms >= 90000) {
|
||||
// notify the pool that this thread is stuck.
|
||||
pthread_mutex_lock(&(pool->work_mutex));
|
||||
pool->throttle_stuck_cnt += 1;
|
||||
if (pool->throttle_stuck_cnt == pool->thread_cnt) {
|
||||
LOG_ERROR("tpool.c", "Throttle memory limit too low, cannot proceed!");
|
||||
pool->stop = TRUE;
|
||||
}
|
||||
pthread_mutex_unlock(&(pool->work_mutex));
|
||||
stuck_notified = 1;
|
||||
}
|
||||
usleep(10000);
|
||||
throttle_ms += 10;
|
||||
}
|
||||
|
||||
if (pool->stop) {
|
||||
break;
|
||||
}
|
||||
|
||||
// we are not stuck anymore. cancel our notification.
|
||||
if (stuck_notified) {
|
||||
pthread_mutex_lock(&(pool->work_mutex));
|
||||
pool->throttle_stuck_cnt -= 1;
|
||||
pthread_mutex_unlock(&(pool->work_mutex));
|
||||
}
|
||||
|
||||
work->func(work->arg);
|
||||
if (pool->free_arg) {
|
||||
free(work->arg);
|
||||
@ -243,18 +302,21 @@ void tpool_destroy(tpool_t *pool) {
|
||||
* Create a thread pool
|
||||
* @param thread_cnt Worker threads count
|
||||
*/
|
||||
tpool_t *tpool_create(int thread_cnt, void cleanup_func(), int free_arg, int print_progress) {
|
||||
tpool_t *tpool_create(int thread_cnt, void cleanup_func(), int free_arg, int print_progress, size_t mem_limit) {
|
||||
|
||||
tpool_t *pool = malloc(sizeof(tpool_t));
|
||||
pool->thread_cnt = thread_cnt;
|
||||
pool->work_cnt = 0;
|
||||
pool->done_cnt = 0;
|
||||
pool->busy_cnt = 0;
|
||||
pool->throttle_stuck_cnt = 0;
|
||||
pool->mem_limit = mem_limit;
|
||||
pool->stop = FALSE;
|
||||
pool->free_arg = free_arg;
|
||||
pool->cleanup_func = cleanup_func;
|
||||
pool->threads = calloc(sizeof(pthread_t), thread_cnt);
|
||||
pool->print_progress = print_progress;
|
||||
pool->page_size = getpagesize();
|
||||
|
||||
pthread_mutex_init(&(pool->work_mutex), NULL);
|
||||
|
||||
|
@ -8,7 +8,7 @@ typedef struct tpool tpool_t;
|
||||
|
||||
typedef void (*thread_func_t)(void *arg);
|
||||
|
||||
tpool_t *tpool_create(int num, void (*cleanup_func)(), int free_arg, int print_progress);
|
||||
tpool_t *tpool_create(int num, void (*cleanup_func)(), int free_arg, int print_progress, size_t mem_limit);
|
||||
void tpool_start(tpool_t *pool);
|
||||
void tpool_destroy(tpool_t *pool);
|
||||
|
||||
|
Loading…
x
Reference in New Issue
Block a user