Fix #504, version bump

This commit is contained in:
Shy 2025-07-05 19:01:00 -04:00
parent d221e08d67
commit cb0a587fe9
8 changed files with 173 additions and 6 deletions

View File

@ -62,7 +62,9 @@ add_executable(
src/database/database_schema.c
src/database/database_fts.c
src/web/web_fts.c
src/database/database_embeddings.c)
src/database/database_embeddings.c
src/ignorelist.c
src/ignorelist.h)
set_target_properties(sist2 PROPERTIES LINKER_LANGUAGE C)
target_link_directories(sist2 PRIVATE BEFORE ${_VCPKG_INSTALLED_DIR}/${VCPKG_TARGET_TRIPLET}/lib/)
@ -76,6 +78,7 @@ find_package(CURL CONFIG REQUIRED)
find_library(MAGIC_LIB NAMES libmagic.a REQUIRED)
find_package(unofficial-sqlite3 CONFIG REQUIRED)
find_package(OpenBLAS CONFIG REQUIRED)
find_package(libgit2 CONFIG REQUIRED)
target_include_directories(
@ -149,6 +152,7 @@ target_link_libraries(
# m
z
libgit2::libgit2package
argparse
unofficial::mongoose::mongoose
CURL::libcurl

View File

@ -108,6 +108,27 @@ sist scan ~/Documents -o ./documents.sist2 --incremental
sist scan ~/Documents -o ./documents.sist2 --incremental
```
### Excluding files
You can use the `--exclude` option to specify exclude patterns. For more complex setups, you can create a
`.sist2ignore` file at the root of the scan path (For example, `~/Documents/.sist2ignore` for the example above).
The syntax for sist2ignore is the same as .gitignore for Git (reference [here](https://git-scm.com/docs/gitignore)).
Example:
**.sist2ignore**
```gitignore
# Ignore all PDF files
*.pdf
# But don't ignore them for the /important_files/ directory
!/important_videos/*.pdf
# Ignore all files in _staging/ directories
_staging/
```
### Index documents to Elasticsearch search backend
```bash

View File

@ -19,6 +19,7 @@
#include "src/database/database.h"
#include "src/index/elastic.h"
#include "sqlite3.h"
#include "ignorelist.h"
#include <pcre.h>
@ -34,6 +35,7 @@ typedef struct {
pcre *exclude;
pcre_extra *exclude_extra;
int fast;
ignorelist_t *ignorelist;
scan_arc_ctx_t arc_ctx;
scan_comic_ctx_t comic_ctx;

106
src/ignorelist.c Normal file
View File

@ -0,0 +1,106 @@
#include "ignorelist.h"
#include "ctx.h"
#include <git2.h>
typedef struct ignorelist {
git_repository *repo;
char repo_path[PATH_MAX];
int has_rules;
} ignorelist_t;
char *get_tempdir() {
char *tempdir_env = getenv("TMPDIR");
if (tempdir_env != NULL) {
return tempdir_env;
}
return "/tmp/";
}
void ignorelist_destroy(ignorelist_t* ignorelist) {
git_libgit2_shutdown();
if (ignorelist->repo != NULL) {
git_repository_free(ignorelist->repo);
}
free(ignorelist);
}
ignorelist_t *ignorelist_create() {
git_libgit2_init();
ignorelist_t *ignorelist = malloc(sizeof(ignorelist_t));
ignorelist->repo = NULL;
ignorelist->has_rules = FALSE;
char *tempdir = get_tempdir();
if (tempdir[strlen(tempdir) - 1] == '/') {
sprintf(ignorelist->repo_path, "%ssist2-ignorelist-%d", tempdir, getpid());
} else {
sprintf(ignorelist->repo_path, "%s/sist2-ignorelist-%d", tempdir, getpid());
}
return ignorelist;
}
void ignorelist_load_ignore_file(ignorelist_t *ignorelist, const char *filepath) {
FILE *file;
char line[PATH_MAX * 2];
file = fopen(filepath, "r");
if(file == NULL) {
// No ignore list
return;
}
LOG_DEBUGF("ignorelist.c", "Opening temporary git repository %s", ignorelist->repo_path);
int init_result = git_repository_init(&ignorelist->repo, ignorelist->repo_path, TRUE);
if (init_result != 0) {
LOG_FATALF("ignorelist.c", "Got error code from git_repository_init(): %d", init_result);
}
git_ignore_clear_internal_rules(ignorelist->repo);
while(fgets(line, PATH_MAX * 2, file)){
line[strlen(line) - 1] = '\0'; // Strip trailing newline
char *rules = {line,};
int result = git_ignore_add_rule(ignorelist->repo, rules);
if (result == 0) {
LOG_DEBUGF("ignorelist.c", "Load ignore rule: %s", line);
ignorelist->has_rules = TRUE;
} else {
LOG_FATALF("ignorelist.c", "Invalid ignore rule: %s", line);
}
}
fclose(file);
}
int ignorelist_is_ignored(ignorelist_t *ignorelist, const char *filepath) {
if (!ignorelist->has_rules) {
return FALSE;
}
const char *rel_path = filepath + ScanCtx.index.desc.root_len;
int ignored = -1;
int result = git_ignore_path_is_ignored(&ignored, ignorelist->repo, rel_path);
if (result != 0) {
LOG_FATALF("ignorelist.c", "git_ignore_path_is_ignored returned error code: %d", result);
}
return ignored;
}

16
src/ignorelist.h Normal file
View File

@ -0,0 +1,16 @@
#ifndef SIST2_IGNORELIST_H
#define SIST2_IGNORELIST_H
#include "src/sist.h"
typedef struct ignorelist ignorelist_t;
ignorelist_t *ignorelist_create();
void ignorelist_destroy(ignorelist_t* ignorelist);
void ignorelist_load_ignore_file(ignorelist_t* ignorelist, const char* filepath);
int ignorelist_is_ignored(ignorelist_t* ignorelist, const char* filepath);
#endif //SIST2_IGNORELIST_H

View File

@ -23,8 +23,17 @@ int handle_entry(const char *filepath, const struct stat *info, int typeflag, st
if (ScanCtx.exclude != NULL && EXCLUDED(filepath)) {
LOG_DEBUGF("walk.c", "Excluded: %s", filepath);
if (typeflag == FTW_F && S_ISREG(info->st_mode)) {
} else if (typeflag == FTW_D) {
if (typeflag == FTW_D) {
return FTW_SKIP_SUBTREE;
}
return FTW_CONTINUE;
}
if (ignorelist_is_ignored(ScanCtx.ignorelist, filepath)) {
LOG_DEBUGF("walk.c", "Ignored: %s", filepath);
if (typeflag == FTW_D) {
return FTW_SKIP_SUBTREE;
}

View File

@ -11,6 +11,7 @@
#include "web/serve.h"
#include "parsing/mime.h"
#include "parsing/parse.h"
#include "ignorelist.h"
#include <signal.h>
#include <pthread.h>
@ -239,6 +240,13 @@ void sist2_scan(scan_args_t *args) {
LOG_INFOF("main.c", "sist2 v%s", Version);
ScanCtx.ignorelist = ignorelist_create();
char ignore_filepath[PATH_MAX];
sprintf(ignore_filepath, "%s.sist2ignore", args->path);
ignorelist_load_ignore_file(ScanCtx.ignorelist, ignore_filepath);
ScanCtx.pool = tpool_create(ScanCtx.threads, TRUE);
tpool_start(ScanCtx.pool);
@ -268,6 +276,7 @@ void sist2_scan(scan_args_t *args) {
database_generate_stats(db, args->treemap_threshold);
database_close(db, args->optimize_database);
ignorelist_destroy(ScanCtx.ignorelist);
}
void sist2_index(index_args_t *args) {

View File

@ -51,11 +51,11 @@
#include <ctype.h>
#include "git_hash.h"
#define VERSION "3.4.7"
#define VERSION "3.5.0"
static const char *const Version = VERSION;
static const int VersionMajor = 3;
static const int VersionMinor = 4;
static const int VersionPatch = 7;
static const int VersionMinor = 5;
static const int VersionPatch = 0;
#ifndef SIST_PLATFORM
#define SIST_PLATFORM unknown