mirror of
https://github.com/simon987/sist2.git
synced 2025-12-20 02:26:08 +00:00
Compare commits
16 Commits
8fdb832c85
...
3.3.6
| Author | SHA1 | Date | |
|---|---|---|---|
| 49a21a5a25 | |||
| 560aa82ce7 | |||
| b8c905bd64 | |||
| 8299237ea0 | |||
| 31646a2747 | |||
| d9d77de47f | |||
| 5f0957d029 | |||
| 1cc48f7f33 | |||
| e1e22fd79a | |||
| 786bbc3859 | |||
| 9698ea0c37 | |||
| f345fc1a9a | |||
| 660fbf75d8 | |||
| 33ae585879 | |||
| 5729cbd6b4 | |||
| a19ec3305a |
1
.gitignore
vendored
1
.gitignore
vendored
@@ -3,6 +3,7 @@ thumbs
|
||||
*.cbp
|
||||
CMakeCache.txt
|
||||
CMakeFiles
|
||||
cmake-build-default-event-trace
|
||||
cmake-build-debug
|
||||
cmake_install.cmake
|
||||
Makefile
|
||||
|
||||
@@ -4,6 +4,8 @@
|
||||
|
||||
**Demo**: [sist2.simon987.net](https://sist2.simon987.net/)
|
||||
|
||||
**Community URL:** [Discord](https://discord.gg/2PEjDy3Rfs)
|
||||
|
||||
# sist2
|
||||
|
||||
sist2 (Simple incremental search tool)
|
||||
@@ -46,7 +48,7 @@ services:
|
||||
- "discovery.type=single-node"
|
||||
- "ES_JAVA_OPTS=-Xms2g -Xmx2g"
|
||||
sist2-admin:
|
||||
image: simon987/sist2:3.1.4-x64-linux
|
||||
image: simon987/sist2:3.3.4-x64-linux
|
||||
restart: unless-stopped
|
||||
volumes:
|
||||
- ./sist2-admin-data/:/sist2-admin/
|
||||
@@ -157,6 +159,7 @@ indices, but it uses much less memory and is easier to set up.
|
||||
| Manual tagging | ✓ | ✓ |
|
||||
| User scripts | ✓ | ✓ |
|
||||
| Media Type breakdown for search results | | ✓ |
|
||||
| Embeddings search | ✓ *O(n)* | ✓ *O(logn)* |
|
||||
|
||||
### NER
|
||||
|
||||
|
||||
@@ -175,6 +175,32 @@ Using a version >=7.14.0 is recommended to enable the following features:
|
||||
When using a legacy version of ES, a notice will be displayed next to the sist2 version in the web UI.
|
||||
If you don't care about the features above, you can ignore it or disable it in the configuration page.
|
||||
|
||||
# Embeddings search
|
||||
|
||||
Since v3.2.0, User scripts can be used to generate _embeddings_ (vector of float32 numbers) which are stored in the .sist2 index file
|
||||
(see [scripting](scripting.md)). Embeddings can be used for:
|
||||
|
||||
* Nearest-neighbor queries (e.g. "return the documents most similar to this one")
|
||||
* Semantic searches (e.g. "return the documents that are most closely related to the given topic")
|
||||
|
||||
In theory, embeddings can be created for any type of documents (image, text, audio etc.).
|
||||
|
||||
For example, the [clip](https://github.com/simon987/sist2-script-clip) User Script, generates 512-d embeddings of images
|
||||
(videos are also supported using the thumbnails generated by sist2). When the user enters a query in the "Embeddings Search"
|
||||
textbox, the query's embedding is generated in their browser, leveraging the ONNX web runtime.
|
||||
|
||||
<details>
|
||||
<summary>Screenshots</summary>
|
||||
|
||||

|
||||

|
||||
|
||||
1. Embeddings search bar. You can select the model using the dropdown on the left.
|
||||
2. This icon appears for indices with embeddings search enabled.
|
||||
3. Documents with this icon have embeddings. Click on the icon to perform KNN search.
|
||||
</details>
|
||||
|
||||
|
||||
# Tagging
|
||||
|
||||
### Manual tagging
|
||||
@@ -200,42 +226,3 @@ See [Automatic tagging](#automatic-tagging) for information about tag
|
||||
### Automatic tagging
|
||||
|
||||
See [scripting](scripting.md) documentation.
|
||||
|
||||
# Sidecar files
|
||||
|
||||
When scanning, sist2 will read metadata from `.s2meta` JSON files and overwrite the
|
||||
original document's indexed metadata (does not modify the actual file). Sidecar metadata files will also work inside archives.
|
||||
Sidecar files themselves are not saved in the index.
|
||||
|
||||
This feature is useful to leverage third-party applications such as speech-to-text or
|
||||
OCR to add additional metadata to a file.
|
||||
|
||||
**Example**
|
||||
|
||||
```
|
||||
~/Documents/
|
||||
├── Video.mp4
|
||||
└── Video.mp4.s2meta
|
||||
```
|
||||
|
||||
The sidecar file must have exactly the same file path and the `.s2meta` suffix.
|
||||
|
||||
`Video.mp4.s2meta`:
|
||||
```json
|
||||
{
|
||||
"content": "This sidecar file will overwrite some metadata fields of Video.mp4",
|
||||
"author": "Some author",
|
||||
"duration": 12345,
|
||||
"bitrate": 67890,
|
||||
"some_arbitrary_field": [1,2,3]
|
||||
}
|
||||
```
|
||||
|
||||
```
|
||||
sist2 scan ~/Documents -o ./docs.sist2
|
||||
sist2 index ./docs.sist2
|
||||
```
|
||||
|
||||
*NOTE*: It is technically possible to overwrite the `tag` value using sidecar files, however,
|
||||
it is not currently possible to restore both manual tags and sidecar tags without user scripts
|
||||
while reindexing.
|
||||
|
||||
BIN
docs/embeddings-1.png
Normal file
BIN
docs/embeddings-1.png
Normal file
Binary file not shown.
|
After Width: | Height: | Size: 90 KiB |
BIN
docs/embeddings-2.png
Normal file
BIN
docs/embeddings-2.png
Normal file
Binary file not shown.
|
After Width: | Height: | Size: 996 KiB |
BIN
docs/sist2-admin-scripts.png
Normal file
BIN
docs/sist2-admin-scripts.png
Normal file
Binary file not shown.
|
After Width: | Height: | Size: 78 KiB |
@@ -81,7 +81,7 @@ function humanDuration(sec_num) {
|
||||
return `${seconds}s`;
|
||||
}
|
||||
|
||||
return "<0s";
|
||||
return "<1s";
|
||||
}
|
||||
|
||||
export default {
|
||||
@@ -134,7 +134,7 @@ export default {
|
||||
duration: this.taskDuration(row),
|
||||
time: moment.utc(row.started).local().format("dd, MMM Do YYYY, HH:mm:ss"),
|
||||
logs: null,
|
||||
status: [0,1].includes(row.return_code) ? "ok" : "failed",
|
||||
status: row.return_code === 0 ? "ok" : "failed",
|
||||
_row: row
|
||||
}));
|
||||
});
|
||||
|
||||
@@ -120,6 +120,10 @@ class Sist2Task:
|
||||
|
||||
logger.info(f"Started task {self.display_name}")
|
||||
|
||||
def set_pid(self, pid):
|
||||
self.pid = pid
|
||||
|
||||
|
||||
|
||||
class Sist2ScanTask(Sist2Task):
|
||||
|
||||
@@ -133,13 +137,10 @@ class Sist2ScanTask(Sist2Task):
|
||||
else:
|
||||
self.job.scan_options.output = None
|
||||
|
||||
def set_pid(pid):
|
||||
self.pid = pid
|
||||
|
||||
return_code = sist2.scan(self.job.scan_options, logs_cb=self.log_callback, set_pid_cb=set_pid)
|
||||
return_code = sist2.scan(self.job.scan_options, logs_cb=self.log_callback, set_pid_cb=self.set_pid)
|
||||
self.ended = datetime.utcnow()
|
||||
|
||||
is_ok = return_code in (0, 1)
|
||||
is_ok = (return_code in (0, 1)) if "debug" in sist2.bin_path else (return_code == 0)
|
||||
|
||||
if not is_ok:
|
||||
self._logger.error(json.dumps({"sist2-admin": f"Process returned non-zero exit code ({return_code})"}))
|
||||
@@ -165,6 +166,9 @@ class Sist2ScanTask(Sist2Task):
|
||||
self.job.previous_index_path = self.job.index_path
|
||||
db["jobs"][self.job.name] = self.job
|
||||
|
||||
if is_ok:
|
||||
return 0
|
||||
|
||||
return return_code
|
||||
|
||||
|
||||
@@ -185,7 +189,7 @@ class Sist2IndexTask(Sist2Task):
|
||||
|
||||
logger.debug(f"Fetched search backend options for {self.job.index_options.search_backend}")
|
||||
|
||||
return_code = sist2.index(self.job.index_options, search_backend, logs_cb=self.log_callback)
|
||||
return_code = sist2.index(self.job.index_options, search_backend, logs_cb=self.log_callback, set_pid_cb=self.set_pid)
|
||||
self.ended = datetime.utcnow()
|
||||
|
||||
duration = self.ended - self.started
|
||||
@@ -249,7 +253,7 @@ class Sist2UserScriptTask(Sist2Task):
|
||||
super().run(sist2, db)
|
||||
|
||||
try:
|
||||
self.user_script.setup(self.log_callback)
|
||||
self.user_script.setup(self.log_callback, self.set_pid)
|
||||
except Exception as e:
|
||||
logger.error(f"Setup for {self.user_script.name} failed: ")
|
||||
logger.exception(e)
|
||||
@@ -269,7 +273,7 @@ class Sist2UserScriptTask(Sist2Task):
|
||||
self.log_callback({"sist2-admin": f"Starting user script with {executable=}, {index_path=}, {extra_args=}"})
|
||||
|
||||
proc = Popen(args, stdout=subprocess.PIPE, stderr=subprocess.PIPE, cwd=self.user_script.script_dir())
|
||||
self.pid = proc.pid
|
||||
self.set_pid(proc.pid)
|
||||
|
||||
t_stderr = Thread(target=self._consume_logs, args=(self.log_callback, proc, "stderr", False))
|
||||
t_stderr.start()
|
||||
@@ -316,7 +320,7 @@ class TaskQueue:
|
||||
def _tasks_failed(self):
|
||||
done = set()
|
||||
|
||||
for row in self._db["task_done"].sql("WHERE return_code NOT IN (0,1)"):
|
||||
for row in self._db["task_done"].sql("WHERE return_code != 0"):
|
||||
done.add(uuid.UUID(row["id"]))
|
||||
|
||||
return done
|
||||
|
||||
@@ -20,7 +20,7 @@ def set_executable(file):
|
||||
os.chmod(file, os.stat(file).st_mode | stat.S_IEXEC)
|
||||
|
||||
|
||||
def _initialize_git_repository(url, path, log_cb, force_clone):
|
||||
def _initialize_git_repository(url, path, log_cb, force_clone, set_pid_cb):
|
||||
log_cb({"sist2-admin": f"Cloning {url}"})
|
||||
|
||||
if force_clone or not os.path.exists(os.path.join(path, ".git")):
|
||||
@@ -36,14 +36,18 @@ def _initialize_git_repository(url, path, log_cb, force_clone):
|
||||
log_cb({"sist2-admin": f"Executing setup script {setup_script}"})
|
||||
|
||||
set_executable(setup_script)
|
||||
result = subprocess.run([setup_script], cwd=path, stdout=subprocess.PIPE, stderr=subprocess.STDOUT)
|
||||
for line in result.stdout.split(b"\n"):
|
||||
proc = subprocess.Popen([setup_script], cwd=path, stdout=subprocess.PIPE, stderr=subprocess.STDOUT)
|
||||
set_pid_cb(proc.pid)
|
||||
proc.wait()
|
||||
stdout = proc.stdout.read()
|
||||
|
||||
for line in stdout.split(b"\n"):
|
||||
if line:
|
||||
log_cb({"stdout": line.decode()})
|
||||
|
||||
log_cb({"stdout": f"Executed setup script {setup_script}, return code = {result.returncode}"})
|
||||
log_cb({"stdout": f"Executed setup script {setup_script}, return code = {proc.returncode}"})
|
||||
|
||||
if result.returncode != 0:
|
||||
if proc.returncode != 0:
|
||||
raise Exception("Error when running setup script!")
|
||||
|
||||
log_cb({"sist2-admin": f"Initialized git repository in {path}"})
|
||||
@@ -60,11 +64,11 @@ class UserScript(BaseModel):
|
||||
def script_dir(self):
|
||||
return os.path.join(SCRIPT_FOLDER, self.name)
|
||||
|
||||
def setup(self, log_cb):
|
||||
def setup(self, log_cb, set_pid_cb):
|
||||
os.makedirs(self.script_dir(), exist_ok=True)
|
||||
|
||||
if self.type == ScriptType.GIT:
|
||||
_initialize_git_repository(self.git_repository, self.script_dir(), log_cb, self.force_clone)
|
||||
_initialize_git_repository(self.git_repository, self.script_dir(), log_cb, self.force_clone, set_pid_cb)
|
||||
self.force_clone = False
|
||||
elif self.type == ScriptType.SIMPLE:
|
||||
self._setup_simple()
|
||||
|
||||
@@ -243,7 +243,7 @@ class Sist2:
|
||||
self.bin_path = bin_path
|
||||
self._data_dir = data_directory
|
||||
|
||||
def index(self, options: IndexOptions, search_backend: Sist2SearchBackend, logs_cb):
|
||||
def index(self, options: IndexOptions, search_backend: Sist2SearchBackend, logs_cb, set_pid_cb):
|
||||
|
||||
args = [
|
||||
self.bin_path,
|
||||
@@ -255,6 +255,8 @@ class Sist2:
|
||||
logs_cb({"sist2-admin": f"Starting sist2 command with args {args}"})
|
||||
proc = Popen(args, stdout=PIPE, stderr=PIPE)
|
||||
|
||||
set_pid_cb(proc.pid)
|
||||
|
||||
t_stderr = Thread(target=self._consume_logs_stderr, args=(logs_cb, proc))
|
||||
t_stderr.start()
|
||||
|
||||
|
||||
@@ -33,18 +33,6 @@ class Sist2Api {
|
||||
|
||||
getSist2Info() {
|
||||
return axios.get(`${this.baseUrl}i`).then(resp => {
|
||||
const indices = resp.data.indices;
|
||||
|
||||
resp.data.indices = indices.map(idx => {
|
||||
return {
|
||||
id: idx.id,
|
||||
name: idx.name,
|
||||
timestamp: idx.timestamp,
|
||||
version: idx.version,
|
||||
models: idx.models,
|
||||
};
|
||||
});
|
||||
|
||||
this.sist2Info = resp.data;
|
||||
|
||||
return resp.data;
|
||||
@@ -155,6 +143,12 @@ class Sist2Api {
|
||||
}
|
||||
}
|
||||
|
||||
_getIndexRoot(indexId) {
|
||||
console.log(indexId)
|
||||
console.log(this.sist2Info.indices.find(idx => idx.id === indexId))
|
||||
return this.sist2Info.indices.find(idx => idx.id === indexId).root;
|
||||
}
|
||||
|
||||
esQuery(query) {
|
||||
return axios.post(`${this.baseUrl}es`, query).then(resp => {
|
||||
const res = resp.data;
|
||||
@@ -163,6 +157,7 @@ class Sist2Api {
|
||||
res.hits.hits.forEach((hit) => {
|
||||
hit["_source"]["name"] = strUnescape(hit["_source"]["name"]);
|
||||
hit["_source"]["path"] = strUnescape(hit["_source"]["path"]);
|
||||
hit["_source"]["indexRoot"] = this._getIndexRoot(hit["_source"]["index"]);
|
||||
|
||||
this.setHitProps(hit);
|
||||
this.setHitTags(hit);
|
||||
@@ -421,7 +416,9 @@ class Sist2Api {
|
||||
return axios.get(`${this.baseUrl}fts/dateRange`)
|
||||
.then(resp => ({
|
||||
min: resp.data.dateMin,
|
||||
max: resp.data.dateMax,
|
||||
max: (resp.data.dateMax === resp.data.dateMin)
|
||||
? resp.data.dateMax + 1
|
||||
: resp.data.dateMax,
|
||||
}));
|
||||
}
|
||||
|
||||
@@ -585,7 +582,7 @@ class Sist2Api {
|
||||
}
|
||||
}
|
||||
|
||||
getTagSuggestions(prefix) {
|
||||
getTagSuggestions(prefix) {
|
||||
if (this.backend() === "sqlite") {
|
||||
return this.getTagSuggestionsSqlite(prefix);
|
||||
} else {
|
||||
|
||||
24
src/cli.c
24
src/cli.c
@@ -74,6 +74,21 @@ void sqlite_index_args_destroy(sqlite_index_args_t *args) {
|
||||
free(args);
|
||||
}
|
||||
|
||||
char *add_trailing_slash(char *abs_path) {
|
||||
if (strcmp(abs_path, "/") == 0) {
|
||||
// Special case: don't add trailing slash for "/"
|
||||
return abs_path;
|
||||
}
|
||||
|
||||
char *new_abs_path = realloc(abs_path, strlen(abs_path) + 2);
|
||||
if (new_abs_path == NULL) {
|
||||
LOG_FATALF("cli.c", "FIXME: realloc() failed for abs_path=%s", abs_path);
|
||||
}
|
||||
strcat(new_abs_path, "/");
|
||||
|
||||
return new_abs_path;
|
||||
}
|
||||
|
||||
int scan_args_validate(scan_args_t *args, int argc, const char **argv) {
|
||||
if (argc < 2) {
|
||||
fprintf(stderr, "Required positional argument: PATH.\n");
|
||||
@@ -83,15 +98,10 @@ int scan_args_validate(scan_args_t *args, int argc, const char **argv) {
|
||||
char *abs_path = abspath(argv[1]);
|
||||
if (abs_path == NULL) {
|
||||
LOG_FATALF("cli.c", "Invalid PATH argument. File not found: %s", argv[1]);
|
||||
} else {
|
||||
char *new_abs_path = realloc(abs_path, strlen(abs_path) + 2);
|
||||
if (new_abs_path == NULL) {
|
||||
LOG_FATALF("cli.c", "FIXME: realloc() failed for argv[1]=%s, abs_path=%s", argv[1], abs_path);
|
||||
}
|
||||
strcat(new_abs_path, "/");
|
||||
args->path = new_abs_path;
|
||||
}
|
||||
|
||||
args->path = add_trailing_slash(abs_path);
|
||||
|
||||
if (args->tn_quality == OPTION_VALUE_UNSPECIFIED) {
|
||||
args->tn_quality = DEFAULT_QUALITY;
|
||||
} else if (args->tn_quality < 0 || args->tn_quality > 100) {
|
||||
|
||||
@@ -1,8 +1,6 @@
|
||||
#include "ctx.h"
|
||||
|
||||
ScanCtx_t ScanCtx = {
|
||||
.stat_index_size = 0,
|
||||
.stat_tn_size = 0,
|
||||
.pool = NULL,
|
||||
.index.path = {0,},
|
||||
};
|
||||
|
||||
@@ -31,9 +31,6 @@ typedef struct {
|
||||
int depth;
|
||||
int calculate_checksums;
|
||||
|
||||
size_t stat_tn_size;
|
||||
size_t stat_index_size;
|
||||
|
||||
pcre *exclude;
|
||||
pcre_extra *exclude_extra;
|
||||
int fast;
|
||||
|
||||
@@ -149,7 +149,7 @@ void database_open(database_t *db) {
|
||||
}
|
||||
|
||||
#ifdef SIST_DEBUG
|
||||
CRASH_IF_NOT_SQLITE_OK(sqlite3_exec(db->db, "PRAGMA foreign_keys = ON;", NULL, NULL, NULL));
|
||||
// CRASH_IF_NOT_SQLITE_OK(sqlite3_exec(db->db, "PRAGMA foreign_keys = ON;", NULL, NULL, NULL));
|
||||
#else
|
||||
CRASH_IF_NOT_SQLITE_OK(sqlite3_exec(db->db, "PRAGMA ignore_check_constraints = ON;", NULL, NULL, NULL));
|
||||
#endif
|
||||
@@ -373,7 +373,7 @@ void database_open(database_t *db) {
|
||||
}
|
||||
|
||||
void database_close(database_t *db, int optimize) {
|
||||
LOG_DEBUGF("database.c", "Closing database %s", db->filename);
|
||||
LOG_DEBUGF("database.c", "Closing database %s (%p)", db->filename, db->db);
|
||||
|
||||
if (optimize) {
|
||||
LOG_DEBUG("database.c", "Optimizing database");
|
||||
@@ -516,32 +516,31 @@ database_iterator_t *database_create_document_iterator(database_t *db) {
|
||||
CRASH_IF_NOT_SQLITE_OK(
|
||||
sqlite3_prepare_v2(
|
||||
db->db,
|
||||
"WITH doc (j) AS (SELECT CASE"
|
||||
" WHEN emb.embedding IS NULL THEN"
|
||||
" json_set(document.json_data, "
|
||||
" '$._id', document.id, "
|
||||
" '$.size', document.size, "
|
||||
" '$.mtime', document.mtime, "
|
||||
" '$.mime', mim.name,"
|
||||
" '$.thumbnail', document.thumbnail_count, "
|
||||
" '$.tag', json_group_array((SELECT tag FROM tag WHERE document.id = tag.id)))"
|
||||
" ELSE"
|
||||
" json_set(document.json_data,"
|
||||
" '$._id', document.id,"
|
||||
" '$.size', document.size,"
|
||||
" '$.mtime', document.mtime,"
|
||||
" '$.mime', mim.name,"
|
||||
" '$.thumbnail', document.thumbnail_count, "
|
||||
" '$.tag', json_group_array((SELECT tag FROM tag WHERE document.id = tag.id)),"
|
||||
" '$.emb', json_group_object(m.path, json(emb_to_json(emb.embedding))),"
|
||||
" '$.embedding', 1)"
|
||||
" END"
|
||||
"WITH doc (id, j) AS ("
|
||||
"SELECT"
|
||||
" document.id,"
|
||||
" json_set(document.json_data,"
|
||||
" '$._id', document.id,"
|
||||
" '$.index', (SELECT id FROM descriptor),"
|
||||
" '$.size', document.size,"
|
||||
" '$.mtime', document.mtime,"
|
||||
" '$.mime', mim.name,"
|
||||
" '$.thumbnail', document.thumbnail_count,"
|
||||
" '$.tag', json_group_array(t.tag))"
|
||||
" FROM document"
|
||||
" LEFT JOIN embedding emb ON document.id = emb.id"
|
||||
" LEFT JOIN model m ON emb.model_id = m.id"
|
||||
" LEFT JOIN mime mim ON mim.id = document.mime"
|
||||
" LEFT JOIN mime mim ON mim.id = document.mime"
|
||||
" LEFT JOIN tag t ON t.id = document.id"
|
||||
" GROUP BY document.id)"
|
||||
" SELECT json_set(j, '$.index', (SELECT id FROM descriptor)) FROM doc",
|
||||
"SELECT CASE"
|
||||
" WHEN emb.embedding IS NULL THEN j"
|
||||
" ELSE json_set(j,"
|
||||
" '$.emb', json_group_object(m.path, json(emb_to_json(emb.embedding))),"
|
||||
" '$.embedding', 1"
|
||||
" ) END"
|
||||
" FROM doc"
|
||||
" LEFT JOIN embedding emb ON doc.id = emb.id"
|
||||
" LEFT JOIN model m ON emb.model_id = m.id"
|
||||
" GROUP BY doc.id",
|
||||
-1, &stmt, NULL));
|
||||
|
||||
database_iterator_t *iter = malloc(sizeof(database_iterator_t));
|
||||
@@ -594,8 +593,9 @@ cJSON *database_document_iter(database_iterator_t *iter) {
|
||||
cJSON *database_incremental_scan_begin(database_t *db) {
|
||||
LOG_DEBUG("database.c", "Preparing database for incremental scan");
|
||||
CRASH_IF_NOT_SQLITE_OK(sqlite3_exec(db->db, "DELETE FROM marked;", NULL, NULL, NULL));
|
||||
LOG_DEBUG("database.c", "Preparing database for incremental scan (create marked table)");
|
||||
CRASH_IF_NOT_SQLITE_OK(
|
||||
sqlite3_exec(db->db, "INSERT INTO marked SELECT ROWID, 0, mtime FROM document;", NULL, NULL, NULL));
|
||||
sqlite3_exec(db->db, "INSERT INTO marked SELECT id, 0, mtime FROM document;", NULL, NULL, NULL));
|
||||
}
|
||||
|
||||
cJSON *database_incremental_scan_end(database_t *db) {
|
||||
|
||||
@@ -105,7 +105,6 @@ typedef struct database {
|
||||
sqlite3_stmt *fts_write_tag_stmt;
|
||||
sqlite3_stmt *fts_model_size;
|
||||
|
||||
|
||||
char **tag_array;
|
||||
|
||||
database_ipc_ctx_t *ipc_ctx;
|
||||
|
||||
@@ -90,6 +90,7 @@ subreq_ctx_t *web_post_async(const char *url, char *data, int insecure) {
|
||||
curl_easy_setopt(curl, CURLOPT_USERAGENT, "sist2");
|
||||
if (insecure) {
|
||||
curl_easy_setopt(curl, CURLOPT_SSL_VERIFYPEER, 0);
|
||||
curl_easy_setopt(curl, CURLOPT_SSL_VERIFYHOST, 0);
|
||||
}
|
||||
|
||||
curl_easy_setopt(curl, CURLOPT_ERRORBUFFER, req->curl_err_buffer);
|
||||
@@ -123,6 +124,7 @@ response_t *web_get(const char *url, int timeout, int insecure) {
|
||||
curl_easy_setopt(curl, CURLOPT_TIMEOUT, timeout);
|
||||
if (insecure) {
|
||||
curl_easy_setopt(curl, CURLOPT_SSL_VERIFYPEER, 0);
|
||||
curl_easy_setopt(curl, CURLOPT_SSL_VERIFYHOST, 0);
|
||||
}
|
||||
|
||||
struct curl_slist *headers = NULL;
|
||||
@@ -162,6 +164,7 @@ response_t *web_post(const char *url, const char *data, int insecure) {
|
||||
curl_easy_setopt(curl, CURLOPT_USERAGENT, "sist2");
|
||||
if (insecure) {
|
||||
curl_easy_setopt(curl, CURLOPT_SSL_VERIFYPEER, 0);
|
||||
curl_easy_setopt(curl, CURLOPT_SSL_VERIFYHOST, 0);
|
||||
}
|
||||
|
||||
char err_buffer[CURL_ERROR_SIZE + 1] = {};
|
||||
@@ -207,6 +210,7 @@ response_t *web_put(const char *url, const char *data, int insecure) {
|
||||
curl_easy_setopt(curl, CURLOPT_IPRESOLVE, CURLOPT_DNS_LOCAL_IP4);
|
||||
if (insecure) {
|
||||
curl_easy_setopt(curl, CURLOPT_SSL_VERIFYPEER, 0);
|
||||
curl_easy_setopt(curl, CURLOPT_SSL_VERIFYHOST, 0);
|
||||
}
|
||||
|
||||
struct curl_slist *headers = NULL;
|
||||
@@ -241,6 +245,7 @@ response_t *web_delete(const char *url, int insecure) {
|
||||
curl_easy_setopt(curl, CURLOPT_USERAGENT, "sist2");
|
||||
if (insecure) {
|
||||
curl_easy_setopt(curl, CURLOPT_SSL_VERIFYPEER, 0);
|
||||
curl_easy_setopt(curl, CURLOPT_SSL_VERIFYHOST, 0);
|
||||
}
|
||||
|
||||
curl_easy_setopt(curl, CURLOPT_POSTFIELDS, "");
|
||||
|
||||
@@ -260,9 +260,6 @@ void sist2_scan(scan_args_t *args) {
|
||||
tpool_wait(ScanCtx.pool);
|
||||
tpool_destroy(ScanCtx.pool);
|
||||
|
||||
LOG_DEBUGF("main.c", "Thumbnail store size: %lu", ScanCtx.stat_tn_size);
|
||||
LOG_DEBUGF("main.c", "Index size: %lu", ScanCtx.stat_index_size);
|
||||
|
||||
database_t *db = database_create(args->output, INDEX_DATABASE);
|
||||
database_open(db);
|
||||
|
||||
@@ -356,7 +353,6 @@ void sist2_sqlite_index(sqlite_index_args_t *args) {
|
||||
database_fts_optimize(db);
|
||||
|
||||
database_close(db, FALSE);
|
||||
database_close(search_db, FALSE);
|
||||
}
|
||||
|
||||
void sist2_web(web_args_t *args) {
|
||||
|
||||
@@ -51,11 +51,11 @@
|
||||
#include <ctype.h>
|
||||
#include "git_hash.h"
|
||||
|
||||
#define VERSION "3.3.0"
|
||||
#define VERSION "3.3.6"
|
||||
static const char *const Version = VERSION;
|
||||
static const int VersionMajor = 3;
|
||||
static const int VersionMinor = 3;
|
||||
static const int VersionPatch = 0;
|
||||
static const int VersionPatch = 6;
|
||||
|
||||
#ifndef SIST_PLATFORM
|
||||
#define SIST_PLATFORM unknown
|
||||
|
||||
26
src/tpool.c
26
src/tpool.c
@@ -77,14 +77,14 @@ static void worker_thread_loop(tpool_t *pool) {
|
||||
job_t *job = database_get_work(ProcData.ipc_db, pool->shm->job_type);
|
||||
|
||||
if (job != NULL) {
|
||||
pthread_mutex_lock(&(pool->shm->data_mutex));
|
||||
pool->shm->busy_count += 1;
|
||||
pthread_mutex_unlock(&(pool->shm->data_mutex));
|
||||
|
||||
if (pool->shm->stop) {
|
||||
break;
|
||||
}
|
||||
|
||||
pthread_mutex_lock(&(pool->shm->data_mutex));
|
||||
pool->shm->busy_count += 1;
|
||||
pthread_mutex_unlock(&(pool->shm->data_mutex));
|
||||
|
||||
if (job->type == JOB_PARSE_JOB) {
|
||||
parse(job->parse_job);
|
||||
} else if (job->type == JOB_BULK_LINE) {
|
||||
@@ -110,11 +110,11 @@ static void worker_thread_loop(tpool_t *pool) {
|
||||
if (LogCtx.json_logs) {
|
||||
progress_bar_print_json(done,
|
||||
count,
|
||||
ScanCtx.stat_tn_size,
|
||||
ScanCtx.stat_index_size, pool->shm->waiting);
|
||||
0,
|
||||
0, pool->shm->waiting);
|
||||
} else {
|
||||
progress_bar_print((double) done / count,
|
||||
ScanCtx.stat_tn_size, ScanCtx.stat_index_size);
|
||||
0, 0);
|
||||
}
|
||||
}
|
||||
|
||||
@@ -200,11 +200,11 @@ static void *tpool_worker(void *arg) {
|
||||
pool->shm->ipc_ctx.completed_job_count += 1;
|
||||
pthread_mutex_unlock(&(pool->shm->ipc_ctx.mutex));
|
||||
|
||||
pthread_mutex_lock(&(pool->shm->data_mutex));
|
||||
pool->shm->busy_count -= 1;
|
||||
pthread_mutex_unlock(&(pool->shm->data_mutex));
|
||||
|
||||
if (WIFSIGNALED(status)) {
|
||||
pthread_mutex_lock(&(pool->shm->data_mutex));
|
||||
pool->shm->busy_count -= 1;
|
||||
pthread_mutex_unlock(&(pool->shm->data_mutex));
|
||||
|
||||
int crashed_thread_id = -1;
|
||||
for (int i = 0; i < MAX_THREADS; i++) {
|
||||
if (pool->shm->thread_id_to_pid_mapping[i] == pid) {
|
||||
@@ -265,14 +265,14 @@ void tpool_wait(tpool_t *pool) {
|
||||
if (pool->shm->ipc_ctx.job_count > 0) {
|
||||
pthread_cond_wait(&(pool->shm->done_working_cond), &pool->shm->mutex);
|
||||
} else {
|
||||
if (pool->shm->ipc_ctx.job_count == 0 && pool->shm->busy_count == 0) {
|
||||
if (pool->shm->ipc_ctx.job_count == 0 && pool->shm->busy_count <= 0) {
|
||||
pool->shm->stop = TRUE;
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
if (pool->print_progress && !LogCtx.json_logs) {
|
||||
progress_bar_print(1.0, ScanCtx.stat_tn_size, ScanCtx.stat_index_size);
|
||||
progress_bar_print(1.0, 0, 0);
|
||||
}
|
||||
pthread_mutex_unlock(&pool->shm->mutex);
|
||||
|
||||
|
||||
@@ -88,7 +88,7 @@ void stats_files(struct mg_connection *nc, struct mg_http_message *hm) {
|
||||
|
||||
memcpy(index_id_str, hm->uri.ptr + 3, 8);
|
||||
*(index_id_str + 8) = '\0';
|
||||
int index_id = (int)strtol(index_id_str, NULL, 16);
|
||||
int index_id = (int) strtol(index_id_str, NULL, 16);
|
||||
|
||||
memcpy(arg_stat_type, hm->uri.ptr + 3 + 9, 4);
|
||||
*(arg_stat_type + sizeof(arg_stat_type) - 1) = '\0';
|
||||
@@ -368,6 +368,10 @@ void index_info(struct mg_connection *nc) {
|
||||
cJSON_AddNumberToObject(idx_json, "timestamp", (double) idx->desc.timestamp);
|
||||
cJSON_AddItemToArray(arr, idx_json);
|
||||
|
||||
#ifdef SIST_DEBUG_INFO
|
||||
cJSON_AddStringToObject(idx_json, "root", idx->desc.root);
|
||||
#endif
|
||||
|
||||
cJSON *models = database_get_models(idx->db);
|
||||
cJSON_AddItemToObject(idx_json, "models", models);
|
||||
}
|
||||
@@ -480,7 +484,7 @@ tag_req_t *parse_tag_request(cJSON *json) {
|
||||
return req;
|
||||
}
|
||||
|
||||
subreq_ctx_t *elastic_delete_tag(const char* sid, const tag_req_t *req) {
|
||||
subreq_ctx_t *elastic_delete_tag(const char *sid, const tag_req_t *req) {
|
||||
char *buf = malloc(sizeof(char) * 8192);
|
||||
snprintf(buf, 8192,
|
||||
"{"
|
||||
@@ -500,7 +504,7 @@ subreq_ctx_t *elastic_delete_tag(const char* sid, const tag_req_t *req) {
|
||||
return web_post_async(url, buf, WebCtx.es_insecure_ssl);
|
||||
}
|
||||
|
||||
subreq_ctx_t *elastic_write_tag(const char* sid, const tag_req_t *req) {
|
||||
subreq_ctx_t *elastic_write_tag(const char *sid, const tag_req_t *req) {
|
||||
char *buf = malloc(sizeof(char) * 8192);
|
||||
snprintf(buf, 8192,
|
||||
"{"
|
||||
|
||||
Reference in New Issue
Block a user