mirror of
https://github.com/simon987/sist2.git
synced 2025-04-24 12:45:56 +00:00
Compare commits
No commits in common. "d58fcbc788315fde0e4af9e3010620e36a57bcc2" and "8fa34da02fee912516265beba3a28efcd58e2104" have entirely different histories.
d58fcbc788
...
8fa34da02f
@ -1,7 +0,0 @@
|
|||||||
install:
|
|
||||||
install sist2-update-all.sh /usr/bin/sist2-update-all.sh
|
|
||||||
install sist2-update-files.sh /usr/bin/sist2-update-files.sh
|
|
||||||
install sist2-update-nextcloud.sh /usr/bin/sist2-update-nextcloud.sh
|
|
||||||
install sist2-update.service /etc/systemd/system/sist2-update.service
|
|
||||||
install sist2-update.timer /etc/systemd/system/sist2-update.timer
|
|
||||||
systemctl daemon-reload
|
|
@ -1,31 +0,0 @@
|
|||||||
# Systemd integration example
|
|
||||||
|
|
||||||
This example contains my (yatli) personal configuration for sist2 auto-updating.
|
|
||||||
The following indices are involved in this configuration:
|
|
||||||
|
|
||||||
| Index | Path | Description |
|
|
||||||
|-----------|------------------|--------------------------------------------|
|
|
||||||
| files | /zpool/files | Main file repository |
|
|
||||||
| nextcloud | /zpool/nextcloud | Externally synchronized to a cloud account |
|
|
||||||
|
|
||||||
The systemd integration achieves automatic sist2 scanning & indexing everyday at 3:00AM.
|
|
||||||
|
|
||||||
### Tailoring the configuration for yourself
|
|
||||||
|
|
||||||
`sist2-update-all.sh` calls update scripts for each sist2 index. Add or remove
|
|
||||||
update scripts accordingly to suit your need. Each update script (e.g.
|
|
||||||
`sist2-update-files.sh`) has important parameters laid down at the beginning so
|
|
||||||
make sure to edit them to point to your files and index locations.
|
|
||||||
|
|
||||||
### Installation
|
|
||||||
|
|
||||||
```bash
|
|
||||||
# install the services and scripts
|
|
||||||
sudo make install
|
|
||||||
# enable & start the timer
|
|
||||||
sudo systemctl enable sist2-update.timer
|
|
||||||
sudo systemctl start sist2-update.timer
|
|
||||||
# verify that the timer has been enabled
|
|
||||||
systemctl list-timers --all
|
|
||||||
```
|
|
||||||
|
|
@ -1,9 +0,0 @@
|
|||||||
#!/bin/bash
|
|
||||||
set -e
|
|
||||||
__dir="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
|
|
||||||
echo "Update index: Files"
|
|
||||||
source ${__dir}/sist2-update-files.sh
|
|
||||||
echo "Update index: Nextcloud"
|
|
||||||
source ${__dir}/sist2-update-nextcloud.sh
|
|
||||||
echo "Done. Restarting sist2."
|
|
||||||
docker restart sist2-sist2-1
|
|
@ -1,34 +0,0 @@
|
|||||||
#!/bin/bash
|
|
||||||
set -e
|
|
||||||
DATE=$(date +%Y_%m_%d)
|
|
||||||
CONTENT=/zpool/files
|
|
||||||
ORIG=/mnt/ssd/sist-index/files.idx
|
|
||||||
NEW=/mnt/ssd/sist-index/files_$DATE.idx
|
|
||||||
EXCLUDE='ZArchives|TorrentStore|TorrentDownload|624f0c59-1fef-44f6-95e9-7483296f2833|ubuntu-full-2021-12-07'
|
|
||||||
NAME=Files
|
|
||||||
#REWRITE_URL="http://localhost:33333/activate?collection=$NAME&path="
|
|
||||||
REWRITE_URL=""
|
|
||||||
|
|
||||||
sist2 scan \
|
|
||||||
--threads 14 \
|
|
||||||
--mem-throttle 32768 \
|
|
||||||
--quality 1.0 \
|
|
||||||
--name $NAME \
|
|
||||||
--ocr-lang=eng+chi_sim \
|
|
||||||
--ocr-ebooks \
|
|
||||||
--ocr-images \
|
|
||||||
--exclude=$EXCLUDE \
|
|
||||||
--rewrite-url=$REWRITE_URL \
|
|
||||||
--incremental=$ORIG \
|
|
||||||
--output=$NEW \
|
|
||||||
$CONTENT
|
|
||||||
echo ">>> Scan complete"
|
|
||||||
rm -rf $ORIG
|
|
||||||
mv $NEW $ORIG
|
|
||||||
|
|
||||||
unset http_proxy
|
|
||||||
unset https_proxy
|
|
||||||
unset HTTP_PROXY
|
|
||||||
unset HTTPS_PROXY
|
|
||||||
sist2 index $ORIG --incremental-index
|
|
||||||
echo ">>> Index complete"
|
|
@ -1,33 +0,0 @@
|
|||||||
#!/bin/bash
|
|
||||||
set -e
|
|
||||||
DATE=$(date +%Y_%m_%d)
|
|
||||||
CONTENT=/zpool/nextcloud/v-yadli
|
|
||||||
ORIG=/mnt/ssd/sist-index/nextcloud.idx
|
|
||||||
NEW=/mnt/ssd/sist-index/nextcloud_$DATE.idx
|
|
||||||
EXCLUDE='Yatao|.*263418493\\/Image\\/.*'
|
|
||||||
NAME=NextCloud
|
|
||||||
# REWRITE_URL="http://localhost:33333/activate?collection=$NAME&path="
|
|
||||||
REWRITE_URL=""
|
|
||||||
|
|
||||||
sist2 scan \
|
|
||||||
--threads 14 \
|
|
||||||
--mem-throttle 32768 \
|
|
||||||
--quality 1.0 \
|
|
||||||
--name $NAME \
|
|
||||||
--ocr-lang=eng+chi_sim \
|
|
||||||
--ocr-ebooks \
|
|
||||||
--ocr-images \
|
|
||||||
--exclude=$EXCLUDE \
|
|
||||||
--rewrite-url=$REWRITE_URL \
|
|
||||||
--incremental=$ORIG \
|
|
||||||
--output=$NEW \
|
|
||||||
$CONTENT
|
|
||||||
echo ">>> Scan complete"
|
|
||||||
rm -rf $ORIG
|
|
||||||
mv $NEW $ORIG
|
|
||||||
|
|
||||||
unset http_proxy
|
|
||||||
unset https_proxy
|
|
||||||
unset HTTP_PROXY
|
|
||||||
unset HTTPS_PROXY
|
|
||||||
sist2 index $ORIG --incremental-index
|
|
@ -1,6 +0,0 @@
|
|||||||
[Unit]
|
|
||||||
Description=sist2-update
|
|
||||||
|
|
||||||
[Service]
|
|
||||||
User=yatli
|
|
||||||
ExecStart=/bin/bash /usr/bin/sist2-update-all.sh
|
|
@ -1,10 +0,0 @@
|
|||||||
[Unit]
|
|
||||||
Description=sist2-update
|
|
||||||
|
|
||||||
[Timer]
|
|
||||||
OnCalendar=*-*-* 3:00:00
|
|
||||||
Persistent=true
|
|
||||||
Unit=sist2-update.service
|
|
||||||
|
|
||||||
[Install]
|
|
||||||
WantedBy=timers.target
|
|
@ -19,8 +19,6 @@
|
|||||||
|
|
||||||
#define DEFAULT_MAX_MEM_BUFFER 2000
|
#define DEFAULT_MAX_MEM_BUFFER 2000
|
||||||
|
|
||||||
#define DEFAULT_THROTTLE_MEMORY_THRESHOLD 0
|
|
||||||
|
|
||||||
const char *TESS_DATAPATHS[] = {
|
const char *TESS_DATAPATHS[] = {
|
||||||
"/usr/share/tessdata/",
|
"/usr/share/tessdata/",
|
||||||
"/usr/share/tesseract-ocr/tessdata/",
|
"/usr/share/tesseract-ocr/tessdata/",
|
||||||
@ -257,10 +255,6 @@ int scan_args_validate(scan_args_t *args, int argc, const char **argv) {
|
|||||||
args->max_memory_buffer = DEFAULT_MAX_MEM_BUFFER;
|
args->max_memory_buffer = DEFAULT_MAX_MEM_BUFFER;
|
||||||
}
|
}
|
||||||
|
|
||||||
if (args->scan_mem_limit <= 0) {
|
|
||||||
args->scan_mem_limit = DEFAULT_THROTTLE_MEMORY_THRESHOLD;
|
|
||||||
}
|
|
||||||
|
|
||||||
if (args->list_path != NULL) {
|
if (args->list_path != NULL) {
|
||||||
if (strcmp(args->list_path, "-") == 0) {
|
if (strcmp(args->list_path, "-") == 0) {
|
||||||
args->list_file = stdin;
|
args->list_file = stdin;
|
||||||
|
@ -10,7 +10,6 @@ typedef struct scan_args {
|
|||||||
int size;
|
int size;
|
||||||
int content_size;
|
int content_size;
|
||||||
int threads;
|
int threads;
|
||||||
int scan_mem_limit;
|
|
||||||
char *incremental;
|
char *incremental;
|
||||||
char *output;
|
char *output;
|
||||||
char *rewrite_url;
|
char *rewrite_url;
|
||||||
|
@ -35,7 +35,6 @@ typedef struct {
|
|||||||
int threads;
|
int threads;
|
||||||
int depth;
|
int depth;
|
||||||
int calculate_checksums;
|
int calculate_checksums;
|
||||||
size_t mem_limit;
|
|
||||||
|
|
||||||
size_t stat_tn_size;
|
size_t stat_tn_size;
|
||||||
size_t stat_index_size;
|
size_t stat_index_size;
|
||||||
|
@ -253,7 +253,6 @@ void initialize_scan_context(scan_args_t *args) {
|
|||||||
|
|
||||||
ScanCtx.threads = args->threads;
|
ScanCtx.threads = args->threads;
|
||||||
ScanCtx.depth = args->depth;
|
ScanCtx.depth = args->depth;
|
||||||
ScanCtx.mem_limit = args->scan_mem_limit * 1024 * 1024;
|
|
||||||
|
|
||||||
strncpy(ScanCtx.index.path, args->output, sizeof(ScanCtx.index.path));
|
strncpy(ScanCtx.index.path, args->output, sizeof(ScanCtx.index.path));
|
||||||
strncpy(ScanCtx.index.desc.name, args->name, sizeof(ScanCtx.index.desc.name));
|
strncpy(ScanCtx.index.desc.name, args->name, sizeof(ScanCtx.index.desc.name));
|
||||||
@ -383,10 +382,10 @@ void sist2_scan(scan_args_t *args) {
|
|||||||
load_incremental_index(args);
|
load_incremental_index(args);
|
||||||
}
|
}
|
||||||
|
|
||||||
ScanCtx.pool = tpool_create(ScanCtx.threads, thread_cleanup, TRUE, TRUE, ScanCtx.mem_limit);
|
ScanCtx.pool = tpool_create(args->threads, thread_cleanup, TRUE, TRUE);
|
||||||
tpool_start(ScanCtx.pool);
|
tpool_start(ScanCtx.pool);
|
||||||
|
|
||||||
ScanCtx.writer_pool = tpool_create(1, writer_cleanup, TRUE, FALSE, 0);
|
ScanCtx.writer_pool = tpool_create(1, writer_cleanup, TRUE, FALSE);
|
||||||
tpool_start(ScanCtx.writer_pool);
|
tpool_start(ScanCtx.writer_pool);
|
||||||
|
|
||||||
if (args->list_path) {
|
if (args->list_path) {
|
||||||
@ -467,7 +466,7 @@ void sist2_index(index_args_t *args) {
|
|||||||
f = index_json;
|
f = index_json;
|
||||||
}
|
}
|
||||||
|
|
||||||
IndexCtx.pool = tpool_create(args->threads, elastic_cleanup, FALSE, args->print == 0, 0);
|
IndexCtx.pool = tpool_create(args->threads, elastic_cleanup, FALSE, args->print == 0);
|
||||||
tpool_start(IndexCtx.pool);
|
tpool_start(IndexCtx.pool);
|
||||||
|
|
||||||
READ_INDICES(file_path, args->index_path, {
|
READ_INDICES(file_path, args->index_path, {
|
||||||
@ -587,7 +586,6 @@ int main(int argc, const char *argv[]) {
|
|||||||
|
|
||||||
OPT_GROUP("Scan options"),
|
OPT_GROUP("Scan options"),
|
||||||
OPT_INTEGER('t', "threads", &common_threads, "Number of threads. DEFAULT=1"),
|
OPT_INTEGER('t', "threads", &common_threads, "Number of threads. DEFAULT=1"),
|
||||||
OPT_STRING(0, "mem-throttle", &scan_args->scan_mem_limit, "Total memory threshold in MB for scan throttling. DEFAULT=0"),
|
|
||||||
OPT_FLOAT('q', "quality", &scan_args->quality,
|
OPT_FLOAT('q', "quality", &scan_args->quality,
|
||||||
"Thumbnail quality, on a scale of 1.0 to 31.0, 1.0 being the best. DEFAULT=3"),
|
"Thumbnail quality, on a scale of 1.0 to 31.0, 1.0 being the best. DEFAULT=3"),
|
||||||
OPT_INTEGER(0, "size", &scan_args->size,
|
OPT_INTEGER(0, "size", &scan_args->size,
|
||||||
|
64
src/tpool.c
64
src/tpool.c
@ -28,9 +28,6 @@ typedef struct tpool {
|
|||||||
int work_cnt;
|
int work_cnt;
|
||||||
int done_cnt;
|
int done_cnt;
|
||||||
int busy_cnt;
|
int busy_cnt;
|
||||||
int throttle_stuck_cnt;
|
|
||||||
size_t mem_limit;
|
|
||||||
size_t page_size;
|
|
||||||
|
|
||||||
int free_arg;
|
int free_arg;
|
||||||
int stop;
|
int stop;
|
||||||
@ -117,42 +114,11 @@ int tpool_add_work(tpool_t *pool, thread_func_t func, void *arg) {
|
|||||||
return 1;
|
return 1;
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
|
||||||
* see: https://github.com/htop-dev/htop/blob/f782f821f7f8081cb43bbad1c37f32830a260a81/linux/LinuxProcessList.c
|
|
||||||
*/
|
|
||||||
__always_inline
|
|
||||||
static size_t _get_total_mem(tpool_t* pool) {
|
|
||||||
FILE* statmfile = fopen("/proc/self/statm", "r");
|
|
||||||
if (!statmfile)
|
|
||||||
return 0;
|
|
||||||
|
|
||||||
long int dummy, dummy2, dummy3, dummy4, dummy5, dummy6;
|
|
||||||
long int m_resident;
|
|
||||||
|
|
||||||
int r = fscanf(statmfile, "%ld %ld %ld %ld %ld %ld %ld",
|
|
||||||
&dummy, /* m_virt */
|
|
||||||
&m_resident,
|
|
||||||
&dummy2, /* m_share */
|
|
||||||
&dummy3, /* m_trs */
|
|
||||||
&dummy4, /* unused since Linux 2.6; always 0 */
|
|
||||||
&dummy5, /* m_drs */
|
|
||||||
&dummy6); /* unused since Linux 2.6; always 0 */
|
|
||||||
fclose(statmfile);
|
|
||||||
|
|
||||||
if (r == 7) {
|
|
||||||
return m_resident * pool->page_size;
|
|
||||||
} else {
|
|
||||||
return 0;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Thread worker function
|
* Thread worker function
|
||||||
*/
|
*/
|
||||||
static void *tpool_worker(void *arg) {
|
static void *tpool_worker(void *arg) {
|
||||||
tpool_t *pool = arg;
|
tpool_t *pool = arg;
|
||||||
int stuck_notified = 0;
|
|
||||||
int throttle_ms = 0;
|
|
||||||
|
|
||||||
while (1) {
|
while (1) {
|
||||||
pthread_mutex_lock(&pool->work_mutex);
|
pthread_mutex_lock(&pool->work_mutex);
|
||||||
@ -172,35 +138,10 @@ static void *tpool_worker(void *arg) {
|
|||||||
pthread_mutex_unlock(&(pool->work_mutex));
|
pthread_mutex_unlock(&(pool->work_mutex));
|
||||||
|
|
||||||
if (work != NULL) {
|
if (work != NULL) {
|
||||||
stuck_notified = 0;
|
|
||||||
throttle_ms = 0;
|
|
||||||
while(!pool->stop && pool->mem_limit > 0 && _get_total_mem(pool) >= pool->mem_limit) {
|
|
||||||
if (!stuck_notified && throttle_ms >= 90000) {
|
|
||||||
// notify the pool that this thread is stuck.
|
|
||||||
pthread_mutex_lock(&(pool->work_mutex));
|
|
||||||
pool->throttle_stuck_cnt += 1;
|
|
||||||
if (pool->throttle_stuck_cnt == pool->thread_cnt) {
|
|
||||||
LOG_ERROR("tpool.c", "Throttle memory limit too low, cannot proceed!");
|
|
||||||
pool->stop = TRUE;
|
|
||||||
}
|
|
||||||
pthread_mutex_unlock(&(pool->work_mutex));
|
|
||||||
stuck_notified = 1;
|
|
||||||
}
|
|
||||||
usleep(10000);
|
|
||||||
throttle_ms += 10;
|
|
||||||
}
|
|
||||||
|
|
||||||
if (pool->stop) {
|
if (pool->stop) {
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
|
|
||||||
// we are not stuck anymore. cancel our notification.
|
|
||||||
if (stuck_notified) {
|
|
||||||
pthread_mutex_lock(&(pool->work_mutex));
|
|
||||||
pool->throttle_stuck_cnt -= 1;
|
|
||||||
pthread_mutex_unlock(&(pool->work_mutex));
|
|
||||||
}
|
|
||||||
|
|
||||||
work->func(work->arg);
|
work->func(work->arg);
|
||||||
if (pool->free_arg) {
|
if (pool->free_arg) {
|
||||||
free(work->arg);
|
free(work->arg);
|
||||||
@ -302,21 +243,18 @@ void tpool_destroy(tpool_t *pool) {
|
|||||||
* Create a thread pool
|
* Create a thread pool
|
||||||
* @param thread_cnt Worker threads count
|
* @param thread_cnt Worker threads count
|
||||||
*/
|
*/
|
||||||
tpool_t *tpool_create(int thread_cnt, void cleanup_func(), int free_arg, int print_progress, size_t mem_limit) {
|
tpool_t *tpool_create(int thread_cnt, void cleanup_func(), int free_arg, int print_progress) {
|
||||||
|
|
||||||
tpool_t *pool = malloc(sizeof(tpool_t));
|
tpool_t *pool = malloc(sizeof(tpool_t));
|
||||||
pool->thread_cnt = thread_cnt;
|
pool->thread_cnt = thread_cnt;
|
||||||
pool->work_cnt = 0;
|
pool->work_cnt = 0;
|
||||||
pool->done_cnt = 0;
|
pool->done_cnt = 0;
|
||||||
pool->busy_cnt = 0;
|
pool->busy_cnt = 0;
|
||||||
pool->throttle_stuck_cnt = 0;
|
|
||||||
pool->mem_limit = mem_limit;
|
|
||||||
pool->stop = FALSE;
|
pool->stop = FALSE;
|
||||||
pool->free_arg = free_arg;
|
pool->free_arg = free_arg;
|
||||||
pool->cleanup_func = cleanup_func;
|
pool->cleanup_func = cleanup_func;
|
||||||
pool->threads = calloc(sizeof(pthread_t), thread_cnt);
|
pool->threads = calloc(sizeof(pthread_t), thread_cnt);
|
||||||
pool->print_progress = print_progress;
|
pool->print_progress = print_progress;
|
||||||
pool->page_size = getpagesize();
|
|
||||||
|
|
||||||
pthread_mutex_init(&(pool->work_mutex), NULL);
|
pthread_mutex_init(&(pool->work_mutex), NULL);
|
||||||
|
|
||||||
|
@ -8,7 +8,7 @@ typedef struct tpool tpool_t;
|
|||||||
|
|
||||||
typedef void (*thread_func_t)(void *arg);
|
typedef void (*thread_func_t)(void *arg);
|
||||||
|
|
||||||
tpool_t *tpool_create(int num, void (*cleanup_func)(), int free_arg, int print_progress, size_t mem_limit);
|
tpool_t *tpool_create(int num, void (*cleanup_func)(), int free_arg, int print_progress);
|
||||||
void tpool_start(tpool_t *pool);
|
void tpool_start(tpool_t *pool);
|
||||||
void tpool_destroy(tpool_t *pool);
|
void tpool_destroy(tpool_t *pool);
|
||||||
|
|
||||||
|
Loading…
x
Reference in New Issue
Block a user