Compare commits

...

3 Commits

Author SHA1 Message Date
Shy
40136b74b4 Update readme 2025-07-05 19:40:20 -04:00
Shy
cb0a587fe9 Fix #504, version bump 2025-07-05 19:01:00 -04:00
Shy
d221e08d67 Fix #534, version bump 2025-07-05 09:56:50 -04:00
14 changed files with 194 additions and 11 deletions

View File

@ -62,7 +62,9 @@ add_executable(
src/database/database_schema.c
src/database/database_fts.c
src/web/web_fts.c
src/database/database_embeddings.c)
src/database/database_embeddings.c
src/ignorelist.c
src/ignorelist.h)
set_target_properties(sist2 PROPERTIES LINKER_LANGUAGE C)
target_link_directories(sist2 PRIVATE BEFORE ${_VCPKG_INSTALLED_DIR}/${VCPKG_TARGET_TRIPLET}/lib/)
@ -76,6 +78,7 @@ find_package(CURL CONFIG REQUIRED)
find_library(MAGIC_LIB NAMES libmagic.a REQUIRED)
find_package(unofficial-sqlite3 CONFIG REQUIRED)
find_package(OpenBLAS CONFIG REQUIRED)
find_package(libgit2 CONFIG REQUIRED)
target_include_directories(
@ -149,6 +152,7 @@ target_link_libraries(
# m
z
libgit2::libgit2package
argparse
unofficial::mongoose::mongoose
CURL::libcurl

View File

@ -216,7 +216,7 @@ docker run --rm --entrypoint cat my-sist2-image /root/sist2 > sist2-x64-linux
3. Install vcpkg dependencies
```bash
vcpkg install openblas curl[core,openssl] sqlite3[core,fts5,json1] cpp-jwt pcre cjson brotli libarchive[core,bzip2,libxml2,lz4,lzma,lzo] pthread tesseract libxml2 libmupdf[ocr] gtest mongoose libmagic libraw gumbo ffmpeg[core,avcodec,avformat,swscale,swresample,webp,opus,mp3lame,vpx,zlib]
vcpkg install openblas curl[core,openssl] sqlite3[core,fts5,json1] cpp-jwt pcre cjson brotli libarchive[core,bzip2,libxml2,lz4,lzma,lzo] pthread tesseract libxml2 libmupdf[ocr] gtest mongoose libmagic libraw gumbo ffmpeg[core,avcodec,avformat,swscale,swresample,webp,opus,mp3lame,vpx,zlib] libgit2[core,pcre]
```
4. Build

View File

@ -108,6 +108,27 @@ sist scan ~/Documents -o ./documents.sist2 --incremental
sist scan ~/Documents -o ./documents.sist2 --incremental
```
### Excluding files
You can use the `--exclude` option to specify exclude patterns. For more complex setups, you can create a
`.sist2ignore` file at the root of the scan path (For example, `~/Documents/.sist2ignore` for the example above).
The syntax for sist2ignore is the same as .gitignore for Git (reference [here](https://git-scm.com/docs/gitignore)).
Example:
**.sist2ignore**
```gitignore
# Ignore all PDF files
*.pdf
# But don't ignore them for the /important_files/ directory
!/important_files/*.pdf
# Ignore all files in _staging/ directories
_staging/
```
### Index documents to Elasticsearch search backend
```bash

View File

@ -238,7 +238,7 @@ class Sist2ElasticsearchQuery {
pre_tags: ["<mark>"],
post_tags: ["</mark>"],
fragment_size: getters.optFragmentSize,
number_of_fragments: 1,
number_of_fragments: getters.optFragmentCount,
order: "score",
fields: {
content: {},

View File

@ -3,6 +3,8 @@
</template>
<script>
const FRAGMENT_SEPARATOR = "<br /><i style='line-height: 2.4'>[…]</i><br/>";
export default {
name: "ContentDiv",
props: ["doc"],
@ -13,10 +15,10 @@ export default {
}
if (this.doc.highlight["content.nGram"]) {
return this.doc.highlight["content.nGram"][0];
return this.doc.highlight["content.nGram"].join(FRAGMENT_SEPARATOR);
}
if (this.doc.highlight.content) {
return this.doc.highlight.content[0];
return this.doc.highlight.content.join(FRAGMENT_SEPARATOR);
}
}
}

View File

@ -59,6 +59,7 @@ export default {
searchInPath: "Enable matching query against document path",
suggestPath: "Enable auto-complete in path filter bar",
fragmentSize: "Highlight context size",
fragmentCount: "Number of highlight snippets",
queryMode: "Search mode",
displayMode: "Display",
columns: "Column count",
@ -242,6 +243,7 @@ export default {
searchInPath: "Abgleich der Abfrage mit dem Dokumentpfad aktivieren",
suggestPath: "Aktiviere Auto-Vervollständigung in Pfadfilter-Leiste",
fragmentSize: "Kontextgröße",
fragmentCount: "Anzahl der hervorgehobenen Snippets",
queryMode: "Such-Modus",
displayMode: "Ansicht",
columns: "Anzahl Spalten",
@ -417,6 +419,7 @@ export default {
searchInPath: "Activer la recherche dans le chemin des documents",
suggestPath: "Activer l'autocomplétion dans la barre de filtre de chemin",
fragmentSize: "Longueur du contexte de surlignage",
fragmentCount: "Nombre d'extraits surlignés",
queryMode: "Mode de recherche",
displayMode: "Affichage",
columns: "Nombre de colonnes",
@ -592,6 +595,7 @@ export default {
searchInPath: "匹配文档路径",
suggestPath: "搜索框启用自动补全",
fragmentSize: "高亮上下文大小",
fragmentCount: "突出显示的项目数",
queryMode: "搜索模式",
displayMode: "显示",
columns: "列数",
@ -767,6 +771,7 @@ export default {
searchInPath: "Włącz szukanie również w ścieżce dokumentu",
suggestPath: "Włącz auto-uzupełnianie w filtrze ścieżek",
fragmentSize: "Podświetl wielkość kontekstu w znakach",
fragmentCount: "Liczba wyróżnionych fragmentów",
queryMode: "Tryb szukania",
displayMode: "Wyświetlanie",
columns: "Liczba kolumn",

View File

@ -3,7 +3,7 @@ import Vuex from "vuex"
import {deserializeMimes, randomSeed, serializeMimes} from "@/util";
import {getInstance} from "@/plugins/auth0.js";
const CONF_VERSION = 3;
const CONF_VERSION = 4;
Vue.use(Vuex);
@ -41,6 +41,7 @@ export default new Vuex.Store({
optTagOrOperator: false,
optFuzzy: true,
optFragmentSize: 200,
optFragmentCount: 1,
optQueryMode: "simple",
optSearchInPath: false,
optColumns: "auto",
@ -170,6 +171,7 @@ export default new Vuex.Store({
setOptSearchInPath: (state, val) => state.optSearchInPath = val,
setOptSuggestPath: (state, val) => state.optSuggestPath = val,
setOptFragmentSize: (state, val) => state.optFragmentSize = val,
setOptFragmentCount: (state, val) => state.optFragmentCount = val,
setOptQueryMode: (state, val) => state.optQueryMode = val,
setOptResultSize: (state, val) => state.optSize = val,
setOptTagOrOperator: (state, val) => state.optTagOrOperator = val,
@ -430,6 +432,7 @@ export default new Vuex.Store({
optSearchInPath: state => state.optSearchInPath,
optSuggestPath: state => state.optSuggestPath,
optFragmentSize: state => state.optFragmentSize,
optFragmentCount: state => state.optFragmentCount,
optQueryMode: state => state.optQueryMode,
optTreemapType: state => state.optTreemapType,
optTreemapTiling: state => state.optTreemapTiling,

View File

@ -151,6 +151,10 @@
<b-form-input :value="optFragmentSize" step="10" type="number" min="0"
@input="setOptFragmentSize"></b-form-input>
<label :class="{'text-muted': uiSqliteMode}">{{ $t("opt.fragmentCount") }}</label>
<b-form-input :value="optFragmentCount" :disabled="uiSqliteMode" step="1" type="number" min="1"
@input="setOptFragmentCount"></b-form-input>
<label>{{ $t("opt.resultSize") }}</label>
<b-form-input :value="optResultSize" type="number" min="10"
@input="setOptResultSize"></b-form-input>
@ -314,6 +318,7 @@ export default {
"optSearchInPath",
"optSuggestPath",
"optFragmentSize",
"optFragmentCount",
"optQueryMode",
"optTreemapType",
"optTreemapTiling",
@ -360,6 +365,7 @@ export default {
"setOptSearchInPath",
"setOptSuggestPath",
"setOptFragmentSize",
"setOptFragmentCount",
"setOptQueryMode",
"setOptTreemapType",
"setOptTreemapTiling",

View File

@ -19,6 +19,7 @@
#include "src/database/database.h"
#include "src/index/elastic.h"
#include "sqlite3.h"
#include "ignorelist.h"
#include <pcre.h>
@ -34,6 +35,7 @@ typedef struct {
pcre *exclude;
pcre_extra *exclude_extra;
int fast;
ignorelist_t *ignorelist;
scan_arc_ctx_t arc_ctx;
scan_comic_ctx_t comic_ctx;

106
src/ignorelist.c Normal file
View File

@ -0,0 +1,106 @@
#include "ignorelist.h"
#include "ctx.h"
#include <git2.h>
typedef struct ignorelist {
git_repository *repo;
char repo_path[PATH_MAX];
int has_rules;
} ignorelist_t;
char *get_tempdir() {
char *tempdir_env = getenv("TMPDIR");
if (tempdir_env != NULL) {
return tempdir_env;
}
return "/tmp/";
}
void ignorelist_destroy(ignorelist_t* ignorelist) {
git_libgit2_shutdown();
if (ignorelist->repo != NULL) {
git_repository_free(ignorelist->repo);
}
free(ignorelist);
}
ignorelist_t *ignorelist_create() {
git_libgit2_init();
ignorelist_t *ignorelist = malloc(sizeof(ignorelist_t));
ignorelist->repo = NULL;
ignorelist->has_rules = FALSE;
char *tempdir = get_tempdir();
if (tempdir[strlen(tempdir) - 1] == '/') {
sprintf(ignorelist->repo_path, "%ssist2-ignorelist-%d", tempdir, getpid());
} else {
sprintf(ignorelist->repo_path, "%s/sist2-ignorelist-%d", tempdir, getpid());
}
return ignorelist;
}
void ignorelist_load_ignore_file(ignorelist_t *ignorelist, const char *filepath) {
FILE *file;
char line[PATH_MAX * 2];
file = fopen(filepath, "r");
if(file == NULL) {
// No ignore list
return;
}
LOG_DEBUGF("ignorelist.c", "Opening temporary git repository %s", ignorelist->repo_path);
int init_result = git_repository_init(&ignorelist->repo, ignorelist->repo_path, TRUE);
if (init_result != 0) {
LOG_FATALF("ignorelist.c", "Got error code from git_repository_init(): %d", init_result);
}
git_ignore_clear_internal_rules(ignorelist->repo);
while(fgets(line, PATH_MAX * 2, file)){
line[strlen(line) - 1] = '\0'; // Strip trailing newline
char *rules = {line,};
int result = git_ignore_add_rule(ignorelist->repo, rules);
if (result == 0) {
LOG_DEBUGF("ignorelist.c", "Load ignore rule: %s", line);
ignorelist->has_rules = TRUE;
} else {
LOG_FATALF("ignorelist.c", "Invalid ignore rule: %s", line);
}
}
fclose(file);
}
int ignorelist_is_ignored(ignorelist_t *ignorelist, const char *filepath) {
if (!ignorelist->has_rules) {
return FALSE;
}
const char *rel_path = filepath + ScanCtx.index.desc.root_len;
int ignored = -1;
int result = git_ignore_path_is_ignored(&ignored, ignorelist->repo, rel_path);
if (result != 0) {
LOG_FATALF("ignorelist.c", "git_ignore_path_is_ignored returned error code: %d", result);
}
return ignored;
}

16
src/ignorelist.h Normal file
View File

@ -0,0 +1,16 @@
#ifndef SIST2_IGNORELIST_H
#define SIST2_IGNORELIST_H
#include "src/sist.h"
typedef struct ignorelist ignorelist_t;
ignorelist_t *ignorelist_create();
void ignorelist_destroy(ignorelist_t* ignorelist);
void ignorelist_load_ignore_file(ignorelist_t* ignorelist, const char* filepath);
int ignorelist_is_ignored(ignorelist_t* ignorelist, const char* filepath);
#endif //SIST2_IGNORELIST_H

View File

@ -23,8 +23,17 @@ int handle_entry(const char *filepath, const struct stat *info, int typeflag, st
if (ScanCtx.exclude != NULL && EXCLUDED(filepath)) {
LOG_DEBUGF("walk.c", "Excluded: %s", filepath);
if (typeflag == FTW_F && S_ISREG(info->st_mode)) {
} else if (typeflag == FTW_D) {
if (typeflag == FTW_D) {
return FTW_SKIP_SUBTREE;
}
return FTW_CONTINUE;
}
if (ignorelist_is_ignored(ScanCtx.ignorelist, filepath)) {
LOG_DEBUGF("walk.c", "Ignored: %s", filepath);
if (typeflag == FTW_D) {
return FTW_SKIP_SUBTREE;
}

View File

@ -11,6 +11,7 @@
#include "web/serve.h"
#include "parsing/mime.h"
#include "parsing/parse.h"
#include "ignorelist.h"
#include <signal.h>
#include <pthread.h>
@ -239,6 +240,13 @@ void sist2_scan(scan_args_t *args) {
LOG_INFOF("main.c", "sist2 v%s", Version);
ScanCtx.ignorelist = ignorelist_create();
char ignore_filepath[PATH_MAX];
sprintf(ignore_filepath, "%s.sist2ignore", args->path);
ignorelist_load_ignore_file(ScanCtx.ignorelist, ignore_filepath);
ScanCtx.pool = tpool_create(ScanCtx.threads, TRUE);
tpool_start(ScanCtx.pool);
@ -268,6 +276,7 @@ void sist2_scan(scan_args_t *args) {
database_generate_stats(db, args->treemap_threshold);
database_close(db, args->optimize_database);
ignorelist_destroy(ScanCtx.ignorelist);
}
void sist2_index(index_args_t *args) {

View File

@ -51,11 +51,11 @@
#include <ctype.h>
#include "git_hash.h"
#define VERSION "3.4.6"
#define VERSION "3.5.0"
static const char *const Version = VERSION;
static const int VersionMajor = 3;
static const int VersionMinor = 4;
static const int VersionPatch = 6;
static const int VersionMinor = 5;
static const int VersionPatch = 0;
#ifndef SIST_PLATFORM
#define SIST_PLATFORM unknown