mirror of
https://github.com/simon987/sist2.git
synced 2025-04-10 14:06:45 +00:00
--exclude argument #26
This commit is contained in:
parent
018ac86640
commit
483a454c8d
@ -137,6 +137,8 @@ TARGET_LINK_LIBRARIES(
|
||||
${PROJECT_SOURCE_DIR}/lib/libcrypto.a
|
||||
${PROJECT_SOURCE_DIR}/lib/libssl.a
|
||||
dl
|
||||
|
||||
pcre
|
||||
)
|
||||
|
||||
add_custom_target(
|
||||
|
21
src/cli.c
21
src/cli.c
@ -162,6 +162,26 @@ int scan_args_validate(scan_args_t *args, int argc, const char **argv) {
|
||||
args->tesseract_path = path;
|
||||
}
|
||||
|
||||
if (args->exclude_regex != NULL) {
|
||||
const char *error;
|
||||
int error_offset;
|
||||
|
||||
pcre *re = pcre_compile(args->exclude_regex, 0, &error, &error_offset, 0);
|
||||
if (error != NULL) {
|
||||
LOG_FATALF("cli.c", "pcre_compile returned error: %s (offset:%d)", error, error_offset)
|
||||
}
|
||||
|
||||
pcre_extra *re_extra = pcre_study(re, 0, &error);
|
||||
if (error != NULL) {
|
||||
LOG_FATALF("cli.c", "pcre_study returned error: %s", error)
|
||||
}
|
||||
|
||||
ScanCtx.exclude = re;
|
||||
ScanCtx.exclude_extra = re_extra;
|
||||
} else {
|
||||
ScanCtx.exclude = NULL;
|
||||
}
|
||||
|
||||
LOG_DEBUGF("cli.c", "arg quality=%f", args->quality)
|
||||
LOG_DEBUGF("cli.c", "arg size=%d", args->size)
|
||||
LOG_DEBUGF("cli.c", "arg content_size=%d", args->content_size)
|
||||
@ -175,6 +195,7 @@ int scan_args_validate(scan_args_t *args, int argc, const char **argv) {
|
||||
LOG_DEBUGF("cli.c", "arg archive=%s", args->archive)
|
||||
LOG_DEBUGF("cli.c", "arg tesseract_lang=%s", args->tesseract_lang)
|
||||
LOG_DEBUGF("cli.c", "arg tesseract_path=%s", args->tesseract_path)
|
||||
LOG_DEBUGF("cli.c", "arg exclude=%s", args->exclude_regex)
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
@ -18,10 +18,13 @@ typedef struct scan_args {
|
||||
archive_mode_t archive_mode;
|
||||
char *tesseract_lang;
|
||||
const char *tesseract_path;
|
||||
char *exclude_regex;
|
||||
} scan_args_t;
|
||||
|
||||
scan_args_t *scan_args_create();
|
||||
|
||||
void scan_args_destroy(scan_args_t *args);
|
||||
|
||||
int scan_args_validate(scan_args_t *args, int argc, const char **argv);
|
||||
|
||||
typedef struct index_args {
|
||||
@ -45,12 +48,15 @@ typedef struct web_args {
|
||||
} web_args_t;
|
||||
|
||||
index_args_t *index_args_create();
|
||||
|
||||
void index_args_destroy(index_args_t *args);
|
||||
|
||||
web_args_t *web_args_create();
|
||||
|
||||
void web_args_destroy(web_args_t *args);
|
||||
|
||||
int index_args_validate(index_args_t *args, int argc, const char **argv);
|
||||
|
||||
int web_args_validate(web_args_t *args, int argc, const char **argv);
|
||||
|
||||
#endif
|
||||
|
@ -29,6 +29,8 @@ struct {
|
||||
pthread_mutex_t mupdf_mu;
|
||||
char * tesseract_lang;
|
||||
const char * tesseract_path;
|
||||
pcre *exclude;
|
||||
pcre_extra *exclude_extra;
|
||||
} ScanCtx;
|
||||
|
||||
struct {
|
||||
|
@ -28,8 +28,18 @@ parse_job_t *create_fs_parse_job(const char *filepath, const struct stat *info,
|
||||
return job;
|
||||
}
|
||||
|
||||
int sub_strings[30];
|
||||
#define EXCLUDED(str) (pcre_exec(ScanCtx.exclude, ScanCtx.exclude_extra, filepath, strlen(filepath), 0, 0, sub_strings, sizeof(sub_strings)) >= 0)
|
||||
|
||||
int handle_entry(const char *filepath, const struct stat *info, int typeflag, struct FTW *ftw) {
|
||||
if (ftw->level <= ScanCtx.depth && typeflag == FTW_F && S_ISREG(info->st_mode)) {
|
||||
|
||||
if (typeflag == FTW_F && S_ISREG(info->st_mode) && ftw->level <= ScanCtx.depth) {
|
||||
|
||||
if (ScanCtx.exclude != NULL && EXCLUDED(filepath)) {
|
||||
LOG_DEBUGF("walk.c", "Excluded: %s", filepath)
|
||||
return 0;
|
||||
}
|
||||
|
||||
parse_job_t *job = create_fs_parse_job(filepath, info, ftw->base);
|
||||
tpool_add_work(ScanCtx.pool, parse, job);
|
||||
}
|
||||
|
@ -240,6 +240,7 @@ int main(int argc, const char *argv[]) {
|
||||
"shallow: Don't parse archives inside archives. DEFAULT: recurse"),
|
||||
OPT_STRING(0, "ocr", &scan_args->tesseract_lang, "Tesseract language (use tesseract --list-langs to see "
|
||||
"which are installed on your machine)"),
|
||||
OPT_STRING('e', "exclude", &scan_args->exclude_regex, "Files that match this regex will not be scanned"),
|
||||
|
||||
OPT_GROUP("Index options"),
|
||||
OPT_STRING(0, "es-url", &common_es_url, "Elasticsearch url with port. DEFAULT=http://localhost:9200"),
|
||||
@ -286,9 +287,7 @@ int main(int argc, const char *argv[]) {
|
||||
}
|
||||
sist2_scan(scan_args);
|
||||
|
||||
}
|
||||
|
||||
else if (strcmp(argv[0], "index") == 0) {
|
||||
} else if (strcmp(argv[0], "index") == 0) {
|
||||
|
||||
int err = index_args_validate(index_args, argc, argv);
|
||||
if (err != 0) {
|
||||
@ -304,8 +303,7 @@ int main(int argc, const char *argv[]) {
|
||||
}
|
||||
sist2_web(web_args);
|
||||
|
||||
}
|
||||
else {
|
||||
} else {
|
||||
fprintf(stderr, "Invalid command: '%s'\n", argv[0]);
|
||||
argparse_usage(&argparse);
|
||||
return 1;
|
||||
|
@ -35,6 +35,7 @@
|
||||
#include <libxml/xmlstring.h>
|
||||
#define BOOL int
|
||||
#include <tesseract/capi.h>
|
||||
#include <pcre.h>
|
||||
|
||||
#include <onion/onion.h>
|
||||
#include <onion/handler.h>
|
||||
|
Loading…
x
Reference in New Issue
Block a user