Compare commits

..

No commits in common. "master" and "3.4.7" have entirely different histories.

9 changed files with 7 additions and 174 deletions

View File

@ -62,9 +62,7 @@ add_executable(
src/database/database_schema.c src/database/database_schema.c
src/database/database_fts.c src/database/database_fts.c
src/web/web_fts.c src/web/web_fts.c
src/database/database_embeddings.c src/database/database_embeddings.c)
src/ignorelist.c
src/ignorelist.h)
set_target_properties(sist2 PROPERTIES LINKER_LANGUAGE C) set_target_properties(sist2 PROPERTIES LINKER_LANGUAGE C)
target_link_directories(sist2 PRIVATE BEFORE ${_VCPKG_INSTALLED_DIR}/${VCPKG_TARGET_TRIPLET}/lib/) target_link_directories(sist2 PRIVATE BEFORE ${_VCPKG_INSTALLED_DIR}/${VCPKG_TARGET_TRIPLET}/lib/)
@ -78,7 +76,6 @@ find_package(CURL CONFIG REQUIRED)
find_library(MAGIC_LIB NAMES libmagic.a REQUIRED) find_library(MAGIC_LIB NAMES libmagic.a REQUIRED)
find_package(unofficial-sqlite3 CONFIG REQUIRED) find_package(unofficial-sqlite3 CONFIG REQUIRED)
find_package(OpenBLAS CONFIG REQUIRED) find_package(OpenBLAS CONFIG REQUIRED)
find_package(libgit2 CONFIG REQUIRED)
target_include_directories( target_include_directories(
@ -152,7 +149,6 @@ target_link_libraries(
# m # m
z z
libgit2::libgit2package
argparse argparse
unofficial::mongoose::mongoose unofficial::mongoose::mongoose
CURL::libcurl CURL::libcurl

View File

@ -216,7 +216,7 @@ docker run --rm --entrypoint cat my-sist2-image /root/sist2 > sist2-x64-linux
3. Install vcpkg dependencies 3. Install vcpkg dependencies
```bash ```bash
vcpkg install openblas curl[core,openssl] sqlite3[core,fts5,json1] cpp-jwt pcre cjson brotli libarchive[core,bzip2,libxml2,lz4,lzma,lzo] pthread tesseract libxml2 libmupdf[ocr] gtest mongoose libmagic libraw gumbo ffmpeg[core,avcodec,avformat,swscale,swresample,webp,opus,mp3lame,vpx,zlib] libgit2[core,pcre] vcpkg install openblas curl[core,openssl] sqlite3[core,fts5,json1] cpp-jwt pcre cjson brotli libarchive[core,bzip2,libxml2,lz4,lzma,lzo] pthread tesseract libxml2 libmupdf[ocr] gtest mongoose libmagic libraw gumbo ffmpeg[core,avcodec,avformat,swscale,swresample,webp,opus,mp3lame,vpx,zlib]
``` ```
4. Build 4. Build

View File

@ -108,27 +108,6 @@ sist scan ~/Documents -o ./documents.sist2 --incremental
sist scan ~/Documents -o ./documents.sist2 --incremental sist scan ~/Documents -o ./documents.sist2 --incremental
``` ```
### Excluding files
You can use the `--exclude` option to specify exclude patterns. For more complex setups, you can create a
`.sist2ignore` file at the root of the scan path (For example, `~/Documents/.sist2ignore` for the example above).
The syntax for sist2ignore is the same as .gitignore for Git (reference [here](https://git-scm.com/docs/gitignore)).
Example:
**.sist2ignore**
```gitignore
# Ignore all PDF files
*.pdf
# But don't ignore them for the /important_files/ directory
!/important_files/*.pdf
# Ignore all files in _staging/ directories
_staging/
```
### Index documents to Elasticsearch search backend ### Index documents to Elasticsearch search backend
```bash ```bash

View File

@ -19,7 +19,6 @@
#include "src/database/database.h" #include "src/database/database.h"
#include "src/index/elastic.h" #include "src/index/elastic.h"
#include "sqlite3.h" #include "sqlite3.h"
#include "ignorelist.h"
#include <pcre.h> #include <pcre.h>
@ -35,7 +34,6 @@ typedef struct {
pcre *exclude; pcre *exclude;
pcre_extra *exclude_extra; pcre_extra *exclude_extra;
int fast; int fast;
ignorelist_t *ignorelist;
scan_arc_ctx_t arc_ctx; scan_arc_ctx_t arc_ctx;
scan_comic_ctx_t comic_ctx; scan_comic_ctx_t comic_ctx;

View File

@ -1,106 +0,0 @@
#include "ignorelist.h"
#include "ctx.h"
#include <git2.h>
typedef struct ignorelist {
git_repository *repo;
char repo_path[PATH_MAX];
int has_rules;
} ignorelist_t;
char *get_tempdir() {
char *tempdir_env = getenv("TMPDIR");
if (tempdir_env != NULL) {
return tempdir_env;
}
return "/tmp/";
}
void ignorelist_destroy(ignorelist_t* ignorelist) {
git_libgit2_shutdown();
if (ignorelist->repo != NULL) {
git_repository_free(ignorelist->repo);
}
free(ignorelist);
}
ignorelist_t *ignorelist_create() {
git_libgit2_init();
ignorelist_t *ignorelist = malloc(sizeof(ignorelist_t));
ignorelist->repo = NULL;
ignorelist->has_rules = FALSE;
char *tempdir = get_tempdir();
if (tempdir[strlen(tempdir) - 1] == '/') {
sprintf(ignorelist->repo_path, "%ssist2-ignorelist-%d", tempdir, getpid());
} else {
sprintf(ignorelist->repo_path, "%s/sist2-ignorelist-%d", tempdir, getpid());
}
return ignorelist;
}
void ignorelist_load_ignore_file(ignorelist_t *ignorelist, const char *filepath) {
FILE *file;
char line[PATH_MAX * 2];
file = fopen(filepath, "r");
if(file == NULL) {
// No ignore list
return;
}
LOG_DEBUGF("ignorelist.c", "Opening temporary git repository %s", ignorelist->repo_path);
int init_result = git_repository_init(&ignorelist->repo, ignorelist->repo_path, TRUE);
if (init_result != 0) {
LOG_FATALF("ignorelist.c", "Got error code from git_repository_init(): %d", init_result);
}
git_ignore_clear_internal_rules(ignorelist->repo);
while(fgets(line, PATH_MAX * 2, file)){
line[strlen(line) - 1] = '\0'; // Strip trailing newline
char *rules = {line,};
int result = git_ignore_add_rule(ignorelist->repo, rules);
if (result == 0) {
LOG_DEBUGF("ignorelist.c", "Load ignore rule: %s", line);
ignorelist->has_rules = TRUE;
} else {
LOG_FATALF("ignorelist.c", "Invalid ignore rule: %s", line);
}
}
fclose(file);
}
int ignorelist_is_ignored(ignorelist_t *ignorelist, const char *filepath) {
if (!ignorelist->has_rules) {
return FALSE;
}
const char *rel_path = filepath + ScanCtx.index.desc.root_len;
int ignored = -1;
int result = git_ignore_path_is_ignored(&ignored, ignorelist->repo, rel_path);
if (result != 0) {
LOG_FATALF("ignorelist.c", "git_ignore_path_is_ignored returned error code: %d", result);
}
return ignored;
}

View File

@ -1,16 +0,0 @@
#ifndef SIST2_IGNORELIST_H
#define SIST2_IGNORELIST_H
#include "src/sist.h"
typedef struct ignorelist ignorelist_t;
ignorelist_t *ignorelist_create();
void ignorelist_destroy(ignorelist_t* ignorelist);
void ignorelist_load_ignore_file(ignorelist_t* ignorelist, const char* filepath);
int ignorelist_is_ignored(ignorelist_t* ignorelist, const char* filepath);
#endif //SIST2_IGNORELIST_H

View File

@ -23,17 +23,8 @@ int handle_entry(const char *filepath, const struct stat *info, int typeflag, st
if (ScanCtx.exclude != NULL && EXCLUDED(filepath)) { if (ScanCtx.exclude != NULL && EXCLUDED(filepath)) {
LOG_DEBUGF("walk.c", "Excluded: %s", filepath); LOG_DEBUGF("walk.c", "Excluded: %s", filepath);
if (typeflag == FTW_D) { if (typeflag == FTW_F && S_ISREG(info->st_mode)) {
return FTW_SKIP_SUBTREE; } else if (typeflag == FTW_D) {
}
return FTW_CONTINUE;
}
if (ignorelist_is_ignored(ScanCtx.ignorelist, filepath)) {
LOG_DEBUGF("walk.c", "Ignored: %s", filepath);
if (typeflag == FTW_D) {
return FTW_SKIP_SUBTREE; return FTW_SKIP_SUBTREE;
} }

View File

@ -11,7 +11,6 @@
#include "web/serve.h" #include "web/serve.h"
#include "parsing/mime.h" #include "parsing/mime.h"
#include "parsing/parse.h" #include "parsing/parse.h"
#include "ignorelist.h"
#include <signal.h> #include <signal.h>
#include <pthread.h> #include <pthread.h>
@ -240,13 +239,6 @@ void sist2_scan(scan_args_t *args) {
LOG_INFOF("main.c", "sist2 v%s", Version); LOG_INFOF("main.c", "sist2 v%s", Version);
ScanCtx.ignorelist = ignorelist_create();
char ignore_filepath[PATH_MAX];
sprintf(ignore_filepath, "%s.sist2ignore", args->path);
ignorelist_load_ignore_file(ScanCtx.ignorelist, ignore_filepath);
ScanCtx.pool = tpool_create(ScanCtx.threads, TRUE); ScanCtx.pool = tpool_create(ScanCtx.threads, TRUE);
tpool_start(ScanCtx.pool); tpool_start(ScanCtx.pool);
@ -276,7 +268,6 @@ void sist2_scan(scan_args_t *args) {
database_generate_stats(db, args->treemap_threshold); database_generate_stats(db, args->treemap_threshold);
database_close(db, args->optimize_database); database_close(db, args->optimize_database);
ignorelist_destroy(ScanCtx.ignorelist);
} }
void sist2_index(index_args_t *args) { void sist2_index(index_args_t *args) {

View File

@ -51,11 +51,11 @@
#include <ctype.h> #include <ctype.h>
#include "git_hash.h" #include "git_hash.h"
#define VERSION "3.5.0" #define VERSION "3.4.7"
static const char *const Version = VERSION; static const char *const Version = VERSION;
static const int VersionMajor = 3; static const int VersionMajor = 3;
static const int VersionMinor = 5; static const int VersionMinor = 4;
static const int VersionPatch = 0; static const int VersionPatch = 7;
#ifndef SIST_PLATFORM #ifndef SIST_PLATFORM
#define SIST_PLATFORM unknown #define SIST_PLATFORM unknown