mirror of
https://github.com/simon987/sist2.git
synced 2025-04-21 11:16:46 +00:00
Compare commits
No commits in common. "31646a2747ea7c31336da2f9eb12aa4299df1220" and "e1e22fd79a226fb569d3ef619767da2a22187029" have entirely different histories.
31646a2747
...
e1e22fd79a
@ -46,7 +46,7 @@ services:
|
|||||||
- "discovery.type=single-node"
|
- "discovery.type=single-node"
|
||||||
- "ES_JAVA_OPTS=-Xms2g -Xmx2g"
|
- "ES_JAVA_OPTS=-Xms2g -Xmx2g"
|
||||||
sist2-admin:
|
sist2-admin:
|
||||||
image: simon987/sist2:3.3.3-x64-linux
|
image: simon987/sist2:3.1.4-x64-linux
|
||||||
restart: unless-stopped
|
restart: unless-stopped
|
||||||
volumes:
|
volumes:
|
||||||
- ./sist2-admin-data/:/sist2-admin/
|
- ./sist2-admin-data/:/sist2-admin/
|
||||||
@ -157,7 +157,6 @@ indices, but it uses much less memory and is easier to set up.
|
|||||||
| Manual tagging | ✓ | ✓ |
|
| Manual tagging | ✓ | ✓ |
|
||||||
| User scripts | ✓ | ✓ |
|
| User scripts | ✓ | ✓ |
|
||||||
| Media Type breakdown for search results | | ✓ |
|
| Media Type breakdown for search results | | ✓ |
|
||||||
| Embeddings search | ✓ *O(n)* | ✓ *O(logn)* |
|
|
||||||
|
|
||||||
### NER
|
### NER
|
||||||
|
|
||||||
|
@ -175,32 +175,6 @@ Using a version >=7.14.0 is recommended to enable the following features:
|
|||||||
When using a legacy version of ES, a notice will be displayed next to the sist2 version in the web UI.
|
When using a legacy version of ES, a notice will be displayed next to the sist2 version in the web UI.
|
||||||
If you don't care about the features above, you can ignore it or disable it in the configuration page.
|
If you don't care about the features above, you can ignore it or disable it in the configuration page.
|
||||||
|
|
||||||
# Embeddings search
|
|
||||||
|
|
||||||
Since v3.2.0, User scripts can be used to generate _embeddings_ (vector of float32 numbers) which are stored in the .sist2 index file
|
|
||||||
(see [scripting](scripting.md)). Embeddings can be used for:
|
|
||||||
|
|
||||||
* Nearest-neighbor queries (e.g. "return the documents most similar to this one")
|
|
||||||
* Semantic searches (e.g. "return the documents that are most closely related to the given topic")
|
|
||||||
|
|
||||||
In theory, embeddings can be created for any type of documents (image, text, audio etc.).
|
|
||||||
|
|
||||||
For example, the [clip](https://github.com/simon987/sist2-script-clip) User Script, generates 512-d embeddings of images
|
|
||||||
(videos are also supported using the thumbnails generated by sist2). When the user enters a query in the "Embeddings Search"
|
|
||||||
textbox, the query's embedding is generated in their browser, leveraging the ONNX web runtime.
|
|
||||||
|
|
||||||
<details>
|
|
||||||
<summary>Screenshots</summary>
|
|
||||||
|
|
||||||

|
|
||||||

|
|
||||||
|
|
||||||
1. Embeddings search bar. You can select the model using the dropdown on the left.
|
|
||||||
2. This icon appears for indices with embeddings search enabled.
|
|
||||||
3. Documents with this icon have embeddings. Click on the icon to perform KNN search.
|
|
||||||
</details>
|
|
||||||
|
|
||||||
|
|
||||||
# Tagging
|
# Tagging
|
||||||
|
|
||||||
### Manual tagging
|
### Manual tagging
|
||||||
@ -226,3 +200,42 @@ See [Automatic tagging](#automatic-tagging) for information about tag
|
|||||||
### Automatic tagging
|
### Automatic tagging
|
||||||
|
|
||||||
See [scripting](scripting.md) documentation.
|
See [scripting](scripting.md) documentation.
|
||||||
|
|
||||||
|
# Sidecar files
|
||||||
|
|
||||||
|
When scanning, sist2 will read metadata from `.s2meta` JSON files and overwrite the
|
||||||
|
original document's indexed metadata (does not modify the actual file). Sidecar metadata files will also work inside archives.
|
||||||
|
Sidecar files themselves are not saved in the index.
|
||||||
|
|
||||||
|
This feature is useful to leverage third-party applications such as speech-to-text or
|
||||||
|
OCR to add additional metadata to a file.
|
||||||
|
|
||||||
|
**Example**
|
||||||
|
|
||||||
|
```
|
||||||
|
~/Documents/
|
||||||
|
├── Video.mp4
|
||||||
|
└── Video.mp4.s2meta
|
||||||
|
```
|
||||||
|
|
||||||
|
The sidecar file must have exactly the same file path and the `.s2meta` suffix.
|
||||||
|
|
||||||
|
`Video.mp4.s2meta`:
|
||||||
|
```json
|
||||||
|
{
|
||||||
|
"content": "This sidecar file will overwrite some metadata fields of Video.mp4",
|
||||||
|
"author": "Some author",
|
||||||
|
"duration": 12345,
|
||||||
|
"bitrate": 67890,
|
||||||
|
"some_arbitrary_field": [1,2,3]
|
||||||
|
}
|
||||||
|
```
|
||||||
|
|
||||||
|
```
|
||||||
|
sist2 scan ~/Documents -o ./docs.sist2
|
||||||
|
sist2 index ./docs.sist2
|
||||||
|
```
|
||||||
|
|
||||||
|
*NOTE*: It is technically possible to overwrite the `tag` value using sidecar files, however,
|
||||||
|
it is not currently possible to restore both manual tags and sidecar tags without user scripts
|
||||||
|
while reindexing.
|
||||||
|
Binary file not shown.
Before Width: | Height: | Size: 90 KiB |
Binary file not shown.
Before Width: | Height: | Size: 996 KiB |
@ -90,7 +90,6 @@ subreq_ctx_t *web_post_async(const char *url, char *data, int insecure) {
|
|||||||
curl_easy_setopt(curl, CURLOPT_USERAGENT, "sist2");
|
curl_easy_setopt(curl, CURLOPT_USERAGENT, "sist2");
|
||||||
if (insecure) {
|
if (insecure) {
|
||||||
curl_easy_setopt(curl, CURLOPT_SSL_VERIFYPEER, 0);
|
curl_easy_setopt(curl, CURLOPT_SSL_VERIFYPEER, 0);
|
||||||
curl_easy_setopt(curl, CURLOPT_SSL_VERIFYHOST, 0);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
curl_easy_setopt(curl, CURLOPT_ERRORBUFFER, req->curl_err_buffer);
|
curl_easy_setopt(curl, CURLOPT_ERRORBUFFER, req->curl_err_buffer);
|
||||||
@ -124,7 +123,6 @@ response_t *web_get(const char *url, int timeout, int insecure) {
|
|||||||
curl_easy_setopt(curl, CURLOPT_TIMEOUT, timeout);
|
curl_easy_setopt(curl, CURLOPT_TIMEOUT, timeout);
|
||||||
if (insecure) {
|
if (insecure) {
|
||||||
curl_easy_setopt(curl, CURLOPT_SSL_VERIFYPEER, 0);
|
curl_easy_setopt(curl, CURLOPT_SSL_VERIFYPEER, 0);
|
||||||
curl_easy_setopt(curl, CURLOPT_SSL_VERIFYHOST, 0);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
struct curl_slist *headers = NULL;
|
struct curl_slist *headers = NULL;
|
||||||
@ -164,7 +162,6 @@ response_t *web_post(const char *url, const char *data, int insecure) {
|
|||||||
curl_easy_setopt(curl, CURLOPT_USERAGENT, "sist2");
|
curl_easy_setopt(curl, CURLOPT_USERAGENT, "sist2");
|
||||||
if (insecure) {
|
if (insecure) {
|
||||||
curl_easy_setopt(curl, CURLOPT_SSL_VERIFYPEER, 0);
|
curl_easy_setopt(curl, CURLOPT_SSL_VERIFYPEER, 0);
|
||||||
curl_easy_setopt(curl, CURLOPT_SSL_VERIFYHOST, 0);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
char err_buffer[CURL_ERROR_SIZE + 1] = {};
|
char err_buffer[CURL_ERROR_SIZE + 1] = {};
|
||||||
@ -210,7 +207,6 @@ response_t *web_put(const char *url, const char *data, int insecure) {
|
|||||||
curl_easy_setopt(curl, CURLOPT_IPRESOLVE, CURLOPT_DNS_LOCAL_IP4);
|
curl_easy_setopt(curl, CURLOPT_IPRESOLVE, CURLOPT_DNS_LOCAL_IP4);
|
||||||
if (insecure) {
|
if (insecure) {
|
||||||
curl_easy_setopt(curl, CURLOPT_SSL_VERIFYPEER, 0);
|
curl_easy_setopt(curl, CURLOPT_SSL_VERIFYPEER, 0);
|
||||||
curl_easy_setopt(curl, CURLOPT_SSL_VERIFYHOST, 0);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
struct curl_slist *headers = NULL;
|
struct curl_slist *headers = NULL;
|
||||||
@ -245,7 +241,6 @@ response_t *web_delete(const char *url, int insecure) {
|
|||||||
curl_easy_setopt(curl, CURLOPT_USERAGENT, "sist2");
|
curl_easy_setopt(curl, CURLOPT_USERAGENT, "sist2");
|
||||||
if (insecure) {
|
if (insecure) {
|
||||||
curl_easy_setopt(curl, CURLOPT_SSL_VERIFYPEER, 0);
|
curl_easy_setopt(curl, CURLOPT_SSL_VERIFYPEER, 0);
|
||||||
curl_easy_setopt(curl, CURLOPT_SSL_VERIFYHOST, 0);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
curl_easy_setopt(curl, CURLOPT_POSTFIELDS, "");
|
curl_easy_setopt(curl, CURLOPT_POSTFIELDS, "");
|
||||||
|
@ -51,11 +51,11 @@
|
|||||||
#include <ctype.h>
|
#include <ctype.h>
|
||||||
#include "git_hash.h"
|
#include "git_hash.h"
|
||||||
|
|
||||||
#define VERSION "3.3.3"
|
#define VERSION "3.3.2"
|
||||||
static const char *const Version = VERSION;
|
static const char *const Version = VERSION;
|
||||||
static const int VersionMajor = 3;
|
static const int VersionMajor = 3;
|
||||||
static const int VersionMinor = 3;
|
static const int VersionMinor = 3;
|
||||||
static const int VersionPatch = 3;
|
static const int VersionPatch = 2;
|
||||||
|
|
||||||
#ifndef SIST_PLATFORM
|
#ifndef SIST_PLATFORM
|
||||||
#define SIST_PLATFORM unknown
|
#define SIST_PLATFORM unknown
|
||||||
|
Loading…
x
Reference in New Issue
Block a user