mirror of
https://github.com/simon987/sist2.git
synced 2025-09-08 15:46:57 +00:00
Compare commits
3 Commits
Author | SHA1 | Date | |
---|---|---|---|
|
40136b74b4 | ||
|
cb0a587fe9 | ||
|
d221e08d67 |
@ -62,7 +62,9 @@ add_executable(
|
||||
src/database/database_schema.c
|
||||
src/database/database_fts.c
|
||||
src/web/web_fts.c
|
||||
src/database/database_embeddings.c)
|
||||
src/database/database_embeddings.c
|
||||
src/ignorelist.c
|
||||
src/ignorelist.h)
|
||||
set_target_properties(sist2 PROPERTIES LINKER_LANGUAGE C)
|
||||
|
||||
target_link_directories(sist2 PRIVATE BEFORE ${_VCPKG_INSTALLED_DIR}/${VCPKG_TARGET_TRIPLET}/lib/)
|
||||
@ -76,6 +78,7 @@ find_package(CURL CONFIG REQUIRED)
|
||||
find_library(MAGIC_LIB NAMES libmagic.a REQUIRED)
|
||||
find_package(unofficial-sqlite3 CONFIG REQUIRED)
|
||||
find_package(OpenBLAS CONFIG REQUIRED)
|
||||
find_package(libgit2 CONFIG REQUIRED)
|
||||
|
||||
|
||||
target_include_directories(
|
||||
@ -149,6 +152,7 @@ target_link_libraries(
|
||||
|
||||
# m
|
||||
z
|
||||
libgit2::libgit2package
|
||||
argparse
|
||||
unofficial::mongoose::mongoose
|
||||
CURL::libcurl
|
||||
|
@ -216,7 +216,7 @@ docker run --rm --entrypoint cat my-sist2-image /root/sist2 > sist2-x64-linux
|
||||
3. Install vcpkg dependencies
|
||||
|
||||
```bash
|
||||
vcpkg install openblas curl[core,openssl] sqlite3[core,fts5,json1] cpp-jwt pcre cjson brotli libarchive[core,bzip2,libxml2,lz4,lzma,lzo] pthread tesseract libxml2 libmupdf[ocr] gtest mongoose libmagic libraw gumbo ffmpeg[core,avcodec,avformat,swscale,swresample,webp,opus,mp3lame,vpx,zlib]
|
||||
vcpkg install openblas curl[core,openssl] sqlite3[core,fts5,json1] cpp-jwt pcre cjson brotli libarchive[core,bzip2,libxml2,lz4,lzma,lzo] pthread tesseract libxml2 libmupdf[ocr] gtest mongoose libmagic libraw gumbo ffmpeg[core,avcodec,avformat,swscale,swresample,webp,opus,mp3lame,vpx,zlib] libgit2[core,pcre]
|
||||
```
|
||||
|
||||
4. Build
|
||||
|
@ -108,6 +108,27 @@ sist scan ~/Documents -o ./documents.sist2 --incremental
|
||||
sist scan ~/Documents -o ./documents.sist2 --incremental
|
||||
```
|
||||
|
||||
### Excluding files
|
||||
|
||||
You can use the `--exclude` option to specify exclude patterns. For more complex setups, you can create a
|
||||
`.sist2ignore` file at the root of the scan path (For example, `~/Documents/.sist2ignore` for the example above).
|
||||
|
||||
The syntax for sist2ignore is the same as .gitignore for Git (reference [here](https://git-scm.com/docs/gitignore)).
|
||||
|
||||
Example:
|
||||
|
||||
**.sist2ignore**
|
||||
```gitignore
|
||||
# Ignore all PDF files
|
||||
*.pdf
|
||||
|
||||
# But don't ignore them for the /important_files/ directory
|
||||
!/important_files/*.pdf
|
||||
|
||||
# Ignore all files in _staging/ directories
|
||||
_staging/
|
||||
```
|
||||
|
||||
### Index documents to Elasticsearch search backend
|
||||
|
||||
```bash
|
||||
|
@ -238,7 +238,7 @@ class Sist2ElasticsearchQuery {
|
||||
pre_tags: ["<mark>"],
|
||||
post_tags: ["</mark>"],
|
||||
fragment_size: getters.optFragmentSize,
|
||||
number_of_fragments: 1,
|
||||
number_of_fragments: getters.optFragmentCount,
|
||||
order: "score",
|
||||
fields: {
|
||||
content: {},
|
||||
|
@ -3,6 +3,8 @@
|
||||
</template>
|
||||
|
||||
<script>
|
||||
const FRAGMENT_SEPARATOR = "<br /><i style='line-height: 2.4'>[…]</i><br/>";
|
||||
|
||||
export default {
|
||||
name: "ContentDiv",
|
||||
props: ["doc"],
|
||||
@ -13,10 +15,10 @@ export default {
|
||||
}
|
||||
|
||||
if (this.doc.highlight["content.nGram"]) {
|
||||
return this.doc.highlight["content.nGram"][0];
|
||||
return this.doc.highlight["content.nGram"].join(FRAGMENT_SEPARATOR);
|
||||
}
|
||||
if (this.doc.highlight.content) {
|
||||
return this.doc.highlight.content[0];
|
||||
return this.doc.highlight.content.join(FRAGMENT_SEPARATOR);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -59,6 +59,7 @@ export default {
|
||||
searchInPath: "Enable matching query against document path",
|
||||
suggestPath: "Enable auto-complete in path filter bar",
|
||||
fragmentSize: "Highlight context size",
|
||||
fragmentCount: "Number of highlight snippets",
|
||||
queryMode: "Search mode",
|
||||
displayMode: "Display",
|
||||
columns: "Column count",
|
||||
@ -242,6 +243,7 @@ export default {
|
||||
searchInPath: "Abgleich der Abfrage mit dem Dokumentpfad aktivieren",
|
||||
suggestPath: "Aktiviere Auto-Vervollständigung in Pfadfilter-Leiste",
|
||||
fragmentSize: "Kontextgröße",
|
||||
fragmentCount: "Anzahl der hervorgehobenen Snippets",
|
||||
queryMode: "Such-Modus",
|
||||
displayMode: "Ansicht",
|
||||
columns: "Anzahl Spalten",
|
||||
@ -417,6 +419,7 @@ export default {
|
||||
searchInPath: "Activer la recherche dans le chemin des documents",
|
||||
suggestPath: "Activer l'autocomplétion dans la barre de filtre de chemin",
|
||||
fragmentSize: "Longueur du contexte de surlignage",
|
||||
fragmentCount: "Nombre d'extraits surlignés",
|
||||
queryMode: "Mode de recherche",
|
||||
displayMode: "Affichage",
|
||||
columns: "Nombre de colonnes",
|
||||
@ -592,6 +595,7 @@ export default {
|
||||
searchInPath: "匹配文档路径",
|
||||
suggestPath: "搜索框启用自动补全",
|
||||
fragmentSize: "高亮上下文大小",
|
||||
fragmentCount: "突出显示的项目数",
|
||||
queryMode: "搜索模式",
|
||||
displayMode: "显示",
|
||||
columns: "列数",
|
||||
@ -767,6 +771,7 @@ export default {
|
||||
searchInPath: "Włącz szukanie również w ścieżce dokumentu",
|
||||
suggestPath: "Włącz auto-uzupełnianie w filtrze ścieżek",
|
||||
fragmentSize: "Podświetl wielkość kontekstu w znakach",
|
||||
fragmentCount: "Liczba wyróżnionych fragmentów",
|
||||
queryMode: "Tryb szukania",
|
||||
displayMode: "Wyświetlanie",
|
||||
columns: "Liczba kolumn",
|
||||
|
@ -3,7 +3,7 @@ import Vuex from "vuex"
|
||||
import {deserializeMimes, randomSeed, serializeMimes} from "@/util";
|
||||
import {getInstance} from "@/plugins/auth0.js";
|
||||
|
||||
const CONF_VERSION = 3;
|
||||
const CONF_VERSION = 4;
|
||||
|
||||
Vue.use(Vuex);
|
||||
|
||||
@ -41,6 +41,7 @@ export default new Vuex.Store({
|
||||
optTagOrOperator: false,
|
||||
optFuzzy: true,
|
||||
optFragmentSize: 200,
|
||||
optFragmentCount: 1,
|
||||
optQueryMode: "simple",
|
||||
optSearchInPath: false,
|
||||
optColumns: "auto",
|
||||
@ -170,6 +171,7 @@ export default new Vuex.Store({
|
||||
setOptSearchInPath: (state, val) => state.optSearchInPath = val,
|
||||
setOptSuggestPath: (state, val) => state.optSuggestPath = val,
|
||||
setOptFragmentSize: (state, val) => state.optFragmentSize = val,
|
||||
setOptFragmentCount: (state, val) => state.optFragmentCount = val,
|
||||
setOptQueryMode: (state, val) => state.optQueryMode = val,
|
||||
setOptResultSize: (state, val) => state.optSize = val,
|
||||
setOptTagOrOperator: (state, val) => state.optTagOrOperator = val,
|
||||
@ -430,6 +432,7 @@ export default new Vuex.Store({
|
||||
optSearchInPath: state => state.optSearchInPath,
|
||||
optSuggestPath: state => state.optSuggestPath,
|
||||
optFragmentSize: state => state.optFragmentSize,
|
||||
optFragmentCount: state => state.optFragmentCount,
|
||||
optQueryMode: state => state.optQueryMode,
|
||||
optTreemapType: state => state.optTreemapType,
|
||||
optTreemapTiling: state => state.optTreemapTiling,
|
||||
|
@ -151,6 +151,10 @@
|
||||
<b-form-input :value="optFragmentSize" step="10" type="number" min="0"
|
||||
@input="setOptFragmentSize"></b-form-input>
|
||||
|
||||
<label :class="{'text-muted': uiSqliteMode}">{{ $t("opt.fragmentCount") }}</label>
|
||||
<b-form-input :value="optFragmentCount" :disabled="uiSqliteMode" step="1" type="number" min="1"
|
||||
@input="setOptFragmentCount"></b-form-input>
|
||||
|
||||
<label>{{ $t("opt.resultSize") }}</label>
|
||||
<b-form-input :value="optResultSize" type="number" min="10"
|
||||
@input="setOptResultSize"></b-form-input>
|
||||
@ -314,6 +318,7 @@ export default {
|
||||
"optSearchInPath",
|
||||
"optSuggestPath",
|
||||
"optFragmentSize",
|
||||
"optFragmentCount",
|
||||
"optQueryMode",
|
||||
"optTreemapType",
|
||||
"optTreemapTiling",
|
||||
@ -360,6 +365,7 @@ export default {
|
||||
"setOptSearchInPath",
|
||||
"setOptSuggestPath",
|
||||
"setOptFragmentSize",
|
||||
"setOptFragmentCount",
|
||||
"setOptQueryMode",
|
||||
"setOptTreemapType",
|
||||
"setOptTreemapTiling",
|
||||
|
@ -19,6 +19,7 @@
|
||||
#include "src/database/database.h"
|
||||
#include "src/index/elastic.h"
|
||||
#include "sqlite3.h"
|
||||
#include "ignorelist.h"
|
||||
|
||||
#include <pcre.h>
|
||||
|
||||
@ -34,6 +35,7 @@ typedef struct {
|
||||
pcre *exclude;
|
||||
pcre_extra *exclude_extra;
|
||||
int fast;
|
||||
ignorelist_t *ignorelist;
|
||||
|
||||
scan_arc_ctx_t arc_ctx;
|
||||
scan_comic_ctx_t comic_ctx;
|
||||
|
106
src/ignorelist.c
Normal file
106
src/ignorelist.c
Normal file
@ -0,0 +1,106 @@
|
||||
#include "ignorelist.h"
|
||||
#include "ctx.h"
|
||||
#include <git2.h>
|
||||
|
||||
typedef struct ignorelist {
|
||||
git_repository *repo;
|
||||
char repo_path[PATH_MAX];
|
||||
int has_rules;
|
||||
} ignorelist_t;
|
||||
|
||||
char *get_tempdir() {
|
||||
char *tempdir_env = getenv("TMPDIR");
|
||||
|
||||
if (tempdir_env != NULL) {
|
||||
return tempdir_env;
|
||||
}
|
||||
|
||||
return "/tmp/";
|
||||
}
|
||||
|
||||
void ignorelist_destroy(ignorelist_t* ignorelist) {
|
||||
git_libgit2_shutdown();
|
||||
|
||||
if (ignorelist->repo != NULL) {
|
||||
git_repository_free(ignorelist->repo);
|
||||
}
|
||||
|
||||
free(ignorelist);
|
||||
}
|
||||
|
||||
ignorelist_t *ignorelist_create() {
|
||||
git_libgit2_init();
|
||||
|
||||
ignorelist_t *ignorelist = malloc(sizeof(ignorelist_t));
|
||||
|
||||
ignorelist->repo = NULL;
|
||||
ignorelist->has_rules = FALSE;
|
||||
|
||||
char *tempdir = get_tempdir();
|
||||
|
||||
if (tempdir[strlen(tempdir) - 1] == '/') {
|
||||
sprintf(ignorelist->repo_path, "%ssist2-ignorelist-%d", tempdir, getpid());
|
||||
} else {
|
||||
sprintf(ignorelist->repo_path, "%s/sist2-ignorelist-%d", tempdir, getpid());
|
||||
}
|
||||
|
||||
return ignorelist;
|
||||
}
|
||||
|
||||
void ignorelist_load_ignore_file(ignorelist_t *ignorelist, const char *filepath) {
|
||||
|
||||
FILE *file;
|
||||
char line[PATH_MAX * 2];
|
||||
|
||||
file = fopen(filepath, "r");
|
||||
|
||||
if(file == NULL) {
|
||||
// No ignore list
|
||||
return;
|
||||
}
|
||||
|
||||
LOG_DEBUGF("ignorelist.c", "Opening temporary git repository %s", ignorelist->repo_path);
|
||||
int init_result = git_repository_init(&ignorelist->repo, ignorelist->repo_path, TRUE);
|
||||
|
||||
if (init_result != 0) {
|
||||
LOG_FATALF("ignorelist.c", "Got error code from git_repository_init(): %d", init_result);
|
||||
}
|
||||
|
||||
git_ignore_clear_internal_rules(ignorelist->repo);
|
||||
|
||||
while(fgets(line, PATH_MAX * 2, file)){
|
||||
|
||||
line[strlen(line) - 1] = '\0'; // Strip trailing newline
|
||||
char *rules = {line,};
|
||||
|
||||
int result = git_ignore_add_rule(ignorelist->repo, rules);
|
||||
|
||||
if (result == 0) {
|
||||
LOG_DEBUGF("ignorelist.c", "Load ignore rule: %s", line);
|
||||
ignorelist->has_rules = TRUE;
|
||||
} else {
|
||||
LOG_FATALF("ignorelist.c", "Invalid ignore rule: %s", line);
|
||||
}
|
||||
}
|
||||
|
||||
fclose(file);
|
||||
}
|
||||
|
||||
int ignorelist_is_ignored(ignorelist_t *ignorelist, const char *filepath) {
|
||||
|
||||
if (!ignorelist->has_rules) {
|
||||
return FALSE;
|
||||
}
|
||||
|
||||
const char *rel_path = filepath + ScanCtx.index.desc.root_len;
|
||||
|
||||
int ignored = -1;
|
||||
|
||||
int result = git_ignore_path_is_ignored(&ignored, ignorelist->repo, rel_path);
|
||||
|
||||
if (result != 0) {
|
||||
LOG_FATALF("ignorelist.c", "git_ignore_path_is_ignored returned error code: %d", result);
|
||||
}
|
||||
|
||||
return ignored;
|
||||
}
|
16
src/ignorelist.h
Normal file
16
src/ignorelist.h
Normal file
@ -0,0 +1,16 @@
|
||||
#ifndef SIST2_IGNORELIST_H
|
||||
#define SIST2_IGNORELIST_H
|
||||
|
||||
#include "src/sist.h"
|
||||
|
||||
typedef struct ignorelist ignorelist_t;
|
||||
|
||||
ignorelist_t *ignorelist_create();
|
||||
|
||||
void ignorelist_destroy(ignorelist_t* ignorelist);
|
||||
|
||||
void ignorelist_load_ignore_file(ignorelist_t* ignorelist, const char* filepath);
|
||||
|
||||
int ignorelist_is_ignored(ignorelist_t* ignorelist, const char* filepath);
|
||||
|
||||
#endif //SIST2_IGNORELIST_H
|
@ -23,8 +23,17 @@ int handle_entry(const char *filepath, const struct stat *info, int typeflag, st
|
||||
if (ScanCtx.exclude != NULL && EXCLUDED(filepath)) {
|
||||
LOG_DEBUGF("walk.c", "Excluded: %s", filepath);
|
||||
|
||||
if (typeflag == FTW_F && S_ISREG(info->st_mode)) {
|
||||
} else if (typeflag == FTW_D) {
|
||||
if (typeflag == FTW_D) {
|
||||
return FTW_SKIP_SUBTREE;
|
||||
}
|
||||
|
||||
return FTW_CONTINUE;
|
||||
}
|
||||
|
||||
if (ignorelist_is_ignored(ScanCtx.ignorelist, filepath)) {
|
||||
LOG_DEBUGF("walk.c", "Ignored: %s", filepath);
|
||||
|
||||
if (typeflag == FTW_D) {
|
||||
return FTW_SKIP_SUBTREE;
|
||||
}
|
||||
|
||||
|
@ -11,6 +11,7 @@
|
||||
#include "web/serve.h"
|
||||
#include "parsing/mime.h"
|
||||
#include "parsing/parse.h"
|
||||
#include "ignorelist.h"
|
||||
|
||||
#include <signal.h>
|
||||
#include <pthread.h>
|
||||
@ -239,6 +240,13 @@ void sist2_scan(scan_args_t *args) {
|
||||
|
||||
LOG_INFOF("main.c", "sist2 v%s", Version);
|
||||
|
||||
ScanCtx.ignorelist = ignorelist_create();
|
||||
|
||||
char ignore_filepath[PATH_MAX];
|
||||
sprintf(ignore_filepath, "%s.sist2ignore", args->path);
|
||||
|
||||
ignorelist_load_ignore_file(ScanCtx.ignorelist, ignore_filepath);
|
||||
|
||||
ScanCtx.pool = tpool_create(ScanCtx.threads, TRUE);
|
||||
tpool_start(ScanCtx.pool);
|
||||
|
||||
@ -268,6 +276,7 @@ void sist2_scan(scan_args_t *args) {
|
||||
|
||||
database_generate_stats(db, args->treemap_threshold);
|
||||
database_close(db, args->optimize_database);
|
||||
ignorelist_destroy(ScanCtx.ignorelist);
|
||||
}
|
||||
|
||||
void sist2_index(index_args_t *args) {
|
||||
|
@ -51,11 +51,11 @@
|
||||
#include <ctype.h>
|
||||
#include "git_hash.h"
|
||||
|
||||
#define VERSION "3.4.6"
|
||||
#define VERSION "3.5.0"
|
||||
static const char *const Version = VERSION;
|
||||
static const int VersionMajor = 3;
|
||||
static const int VersionMinor = 4;
|
||||
static const int VersionPatch = 6;
|
||||
static const int VersionMinor = 5;
|
||||
static const int VersionPatch = 0;
|
||||
|
||||
#ifndef SIST_PLATFORM
|
||||
#define SIST_PLATFORM unknown
|
||||
|
Loading…
x
Reference in New Issue
Block a user