mirror of
https://github.com/simon987/sist2.git
synced 2025-09-07 07:06:54 +00:00
Fix #504, version bump
This commit is contained in:
parent
d221e08d67
commit
cb0a587fe9
@ -62,7 +62,9 @@ add_executable(
|
||||
src/database/database_schema.c
|
||||
src/database/database_fts.c
|
||||
src/web/web_fts.c
|
||||
src/database/database_embeddings.c)
|
||||
src/database/database_embeddings.c
|
||||
src/ignorelist.c
|
||||
src/ignorelist.h)
|
||||
set_target_properties(sist2 PROPERTIES LINKER_LANGUAGE C)
|
||||
|
||||
target_link_directories(sist2 PRIVATE BEFORE ${_VCPKG_INSTALLED_DIR}/${VCPKG_TARGET_TRIPLET}/lib/)
|
||||
@ -76,6 +78,7 @@ find_package(CURL CONFIG REQUIRED)
|
||||
find_library(MAGIC_LIB NAMES libmagic.a REQUIRED)
|
||||
find_package(unofficial-sqlite3 CONFIG REQUIRED)
|
||||
find_package(OpenBLAS CONFIG REQUIRED)
|
||||
find_package(libgit2 CONFIG REQUIRED)
|
||||
|
||||
|
||||
target_include_directories(
|
||||
@ -149,6 +152,7 @@ target_link_libraries(
|
||||
|
||||
# m
|
||||
z
|
||||
libgit2::libgit2package
|
||||
argparse
|
||||
unofficial::mongoose::mongoose
|
||||
CURL::libcurl
|
||||
|
@ -108,6 +108,27 @@ sist scan ~/Documents -o ./documents.sist2 --incremental
|
||||
sist scan ~/Documents -o ./documents.sist2 --incremental
|
||||
```
|
||||
|
||||
### Excluding files
|
||||
|
||||
You can use the `--exclude` option to specify exclude patterns. For more complex setups, you can create a
|
||||
`.sist2ignore` file at the root of the scan path (For example, `~/Documents/.sist2ignore` for the example above).
|
||||
|
||||
The syntax for sist2ignore is the same as .gitignore for Git (reference [here](https://git-scm.com/docs/gitignore)).
|
||||
|
||||
Example:
|
||||
|
||||
**.sist2ignore**
|
||||
```gitignore
|
||||
# Ignore all PDF files
|
||||
*.pdf
|
||||
|
||||
# But don't ignore them for the /important_files/ directory
|
||||
!/important_videos/*.pdf
|
||||
|
||||
# Ignore all files in _staging/ directories
|
||||
_staging/
|
||||
```
|
||||
|
||||
### Index documents to Elasticsearch search backend
|
||||
|
||||
```bash
|
||||
|
@ -19,6 +19,7 @@
|
||||
#include "src/database/database.h"
|
||||
#include "src/index/elastic.h"
|
||||
#include "sqlite3.h"
|
||||
#include "ignorelist.h"
|
||||
|
||||
#include <pcre.h>
|
||||
|
||||
@ -34,6 +35,7 @@ typedef struct {
|
||||
pcre *exclude;
|
||||
pcre_extra *exclude_extra;
|
||||
int fast;
|
||||
ignorelist_t *ignorelist;
|
||||
|
||||
scan_arc_ctx_t arc_ctx;
|
||||
scan_comic_ctx_t comic_ctx;
|
||||
|
106
src/ignorelist.c
Normal file
106
src/ignorelist.c
Normal file
@ -0,0 +1,106 @@
|
||||
#include "ignorelist.h"
|
||||
#include "ctx.h"
|
||||
#include <git2.h>
|
||||
|
||||
typedef struct ignorelist {
|
||||
git_repository *repo;
|
||||
char repo_path[PATH_MAX];
|
||||
int has_rules;
|
||||
} ignorelist_t;
|
||||
|
||||
char *get_tempdir() {
|
||||
char *tempdir_env = getenv("TMPDIR");
|
||||
|
||||
if (tempdir_env != NULL) {
|
||||
return tempdir_env;
|
||||
}
|
||||
|
||||
return "/tmp/";
|
||||
}
|
||||
|
||||
void ignorelist_destroy(ignorelist_t* ignorelist) {
|
||||
git_libgit2_shutdown();
|
||||
|
||||
if (ignorelist->repo != NULL) {
|
||||
git_repository_free(ignorelist->repo);
|
||||
}
|
||||
|
||||
free(ignorelist);
|
||||
}
|
||||
|
||||
ignorelist_t *ignorelist_create() {
|
||||
git_libgit2_init();
|
||||
|
||||
ignorelist_t *ignorelist = malloc(sizeof(ignorelist_t));
|
||||
|
||||
ignorelist->repo = NULL;
|
||||
ignorelist->has_rules = FALSE;
|
||||
|
||||
char *tempdir = get_tempdir();
|
||||
|
||||
if (tempdir[strlen(tempdir) - 1] == '/') {
|
||||
sprintf(ignorelist->repo_path, "%ssist2-ignorelist-%d", tempdir, getpid());
|
||||
} else {
|
||||
sprintf(ignorelist->repo_path, "%s/sist2-ignorelist-%d", tempdir, getpid());
|
||||
}
|
||||
|
||||
return ignorelist;
|
||||
}
|
||||
|
||||
void ignorelist_load_ignore_file(ignorelist_t *ignorelist, const char *filepath) {
|
||||
|
||||
FILE *file;
|
||||
char line[PATH_MAX * 2];
|
||||
|
||||
file = fopen(filepath, "r");
|
||||
|
||||
if(file == NULL) {
|
||||
// No ignore list
|
||||
return;
|
||||
}
|
||||
|
||||
LOG_DEBUGF("ignorelist.c", "Opening temporary git repository %s", ignorelist->repo_path);
|
||||
int init_result = git_repository_init(&ignorelist->repo, ignorelist->repo_path, TRUE);
|
||||
|
||||
if (init_result != 0) {
|
||||
LOG_FATALF("ignorelist.c", "Got error code from git_repository_init(): %d", init_result);
|
||||
}
|
||||
|
||||
git_ignore_clear_internal_rules(ignorelist->repo);
|
||||
|
||||
while(fgets(line, PATH_MAX * 2, file)){
|
||||
|
||||
line[strlen(line) - 1] = '\0'; // Strip trailing newline
|
||||
char *rules = {line,};
|
||||
|
||||
int result = git_ignore_add_rule(ignorelist->repo, rules);
|
||||
|
||||
if (result == 0) {
|
||||
LOG_DEBUGF("ignorelist.c", "Load ignore rule: %s", line);
|
||||
ignorelist->has_rules = TRUE;
|
||||
} else {
|
||||
LOG_FATALF("ignorelist.c", "Invalid ignore rule: %s", line);
|
||||
}
|
||||
}
|
||||
|
||||
fclose(file);
|
||||
}
|
||||
|
||||
int ignorelist_is_ignored(ignorelist_t *ignorelist, const char *filepath) {
|
||||
|
||||
if (!ignorelist->has_rules) {
|
||||
return FALSE;
|
||||
}
|
||||
|
||||
const char *rel_path = filepath + ScanCtx.index.desc.root_len;
|
||||
|
||||
int ignored = -1;
|
||||
|
||||
int result = git_ignore_path_is_ignored(&ignored, ignorelist->repo, rel_path);
|
||||
|
||||
if (result != 0) {
|
||||
LOG_FATALF("ignorelist.c", "git_ignore_path_is_ignored returned error code: %d", result);
|
||||
}
|
||||
|
||||
return ignored;
|
||||
}
|
16
src/ignorelist.h
Normal file
16
src/ignorelist.h
Normal file
@ -0,0 +1,16 @@
|
||||
#ifndef SIST2_IGNORELIST_H
|
||||
#define SIST2_IGNORELIST_H
|
||||
|
||||
#include "src/sist.h"
|
||||
|
||||
typedef struct ignorelist ignorelist_t;
|
||||
|
||||
ignorelist_t *ignorelist_create();
|
||||
|
||||
void ignorelist_destroy(ignorelist_t* ignorelist);
|
||||
|
||||
void ignorelist_load_ignore_file(ignorelist_t* ignorelist, const char* filepath);
|
||||
|
||||
int ignorelist_is_ignored(ignorelist_t* ignorelist, const char* filepath);
|
||||
|
||||
#endif //SIST2_IGNORELIST_H
|
@ -23,8 +23,17 @@ int handle_entry(const char *filepath, const struct stat *info, int typeflag, st
|
||||
if (ScanCtx.exclude != NULL && EXCLUDED(filepath)) {
|
||||
LOG_DEBUGF("walk.c", "Excluded: %s", filepath);
|
||||
|
||||
if (typeflag == FTW_F && S_ISREG(info->st_mode)) {
|
||||
} else if (typeflag == FTW_D) {
|
||||
if (typeflag == FTW_D) {
|
||||
return FTW_SKIP_SUBTREE;
|
||||
}
|
||||
|
||||
return FTW_CONTINUE;
|
||||
}
|
||||
|
||||
if (ignorelist_is_ignored(ScanCtx.ignorelist, filepath)) {
|
||||
LOG_DEBUGF("walk.c", "Ignored: %s", filepath);
|
||||
|
||||
if (typeflag == FTW_D) {
|
||||
return FTW_SKIP_SUBTREE;
|
||||
}
|
||||
|
||||
|
@ -11,6 +11,7 @@
|
||||
#include "web/serve.h"
|
||||
#include "parsing/mime.h"
|
||||
#include "parsing/parse.h"
|
||||
#include "ignorelist.h"
|
||||
|
||||
#include <signal.h>
|
||||
#include <pthread.h>
|
||||
@ -239,6 +240,13 @@ void sist2_scan(scan_args_t *args) {
|
||||
|
||||
LOG_INFOF("main.c", "sist2 v%s", Version);
|
||||
|
||||
ScanCtx.ignorelist = ignorelist_create();
|
||||
|
||||
char ignore_filepath[PATH_MAX];
|
||||
sprintf(ignore_filepath, "%s.sist2ignore", args->path);
|
||||
|
||||
ignorelist_load_ignore_file(ScanCtx.ignorelist, ignore_filepath);
|
||||
|
||||
ScanCtx.pool = tpool_create(ScanCtx.threads, TRUE);
|
||||
tpool_start(ScanCtx.pool);
|
||||
|
||||
@ -268,6 +276,7 @@ void sist2_scan(scan_args_t *args) {
|
||||
|
||||
database_generate_stats(db, args->treemap_threshold);
|
||||
database_close(db, args->optimize_database);
|
||||
ignorelist_destroy(ScanCtx.ignorelist);
|
||||
}
|
||||
|
||||
void sist2_index(index_args_t *args) {
|
||||
|
@ -51,11 +51,11 @@
|
||||
#include <ctype.h>
|
||||
#include "git_hash.h"
|
||||
|
||||
#define VERSION "3.4.7"
|
||||
#define VERSION "3.5.0"
|
||||
static const char *const Version = VERSION;
|
||||
static const int VersionMajor = 3;
|
||||
static const int VersionMinor = 4;
|
||||
static const int VersionPatch = 7;
|
||||
static const int VersionMinor = 5;
|
||||
static const int VersionPatch = 0;
|
||||
|
||||
#ifndef SIST_PLATFORM
|
||||
#define SIST_PLATFORM unknown
|
||||
|
Loading…
x
Reference in New Issue
Block a user