mirror of
https://github.com/simon987/sist2.git
synced 2025-04-21 19:26:45 +00:00
Compare commits
No commits in common. "69438464bf02b81d7b4edf6a1c7c7b815b243474" and "5771693b1ad92e268737a598c9d901448e6e8632" have entirely different histories.
69438464bf
...
5771693b1a
@ -63,7 +63,7 @@ add_executable(
|
|||||||
src/database/database_schema.c
|
src/database/database_schema.c
|
||||||
src/database/database_fts.c
|
src/database/database_fts.c
|
||||||
src/web/web_fts.c
|
src/web/web_fts.c
|
||||||
src/database/database_embeddings.c)
|
)
|
||||||
set_target_properties(sist2 PROPERTIES LINKER_LANGUAGE C)
|
set_target_properties(sist2 PROPERTIES LINKER_LANGUAGE C)
|
||||||
|
|
||||||
target_link_directories(sist2 PRIVATE BEFORE ${_VCPKG_INSTALLED_DIR}/${VCPKG_TARGET_TRIPLET}/lib/)
|
target_link_directories(sist2 PRIVATE BEFORE ${_VCPKG_INSTALLED_DIR}/${VCPKG_TARGET_TRIPLET}/lib/)
|
||||||
@ -76,7 +76,6 @@ find_package(unofficial-mongoose CONFIG REQUIRED)
|
|||||||
find_package(CURL CONFIG REQUIRED)
|
find_package(CURL CONFIG REQUIRED)
|
||||||
find_library(MAGIC_LIB NAMES libmagic.a REQUIRED)
|
find_library(MAGIC_LIB NAMES libmagic.a REQUIRED)
|
||||||
find_package(unofficial-sqlite3 CONFIG REQUIRED)
|
find_package(unofficial-sqlite3 CONFIG REQUIRED)
|
||||||
find_package(OpenBLAS CONFIG REQUIRED)
|
|
||||||
|
|
||||||
|
|
||||||
target_include_directories(
|
target_include_directories(
|
||||||
@ -159,7 +158,6 @@ target_link_libraries(
|
|||||||
|
|
||||||
${MAGIC_LIB}
|
${MAGIC_LIB}
|
||||||
unofficial::sqlite3::sqlite3
|
unofficial::sqlite3::sqlite3
|
||||||
OpenBLAS::OpenBLAS
|
|
||||||
)
|
)
|
||||||
|
|
||||||
add_custom_target(
|
add_custom_target(
|
||||||
|
@ -48,6 +48,5 @@ COPY --from=build /build/build/sist2 /root/sist2
|
|||||||
# sist2-admin
|
# sist2-admin
|
||||||
WORKDIR /root/sist2-admin
|
WORKDIR /root/sist2-admin
|
||||||
COPY sist2-admin/requirements.txt /root/sist2-admin/
|
COPY sist2-admin/requirements.txt /root/sist2-admin/
|
||||||
RUN ln /usr/bin/python3 /usr/bin/python
|
RUN python3 -m pip install --no-cache -r /root/sist2-admin/requirements.txt
|
||||||
RUN python -m pip install --no-cache -r /root/sist2-admin/requirements.txt
|
|
||||||
COPY --from=build /build/sist2-admin/ /root/sist2-admin/
|
COPY --from=build /build/sist2-admin/ /root/sist2-admin/
|
||||||
|
@ -147,15 +147,15 @@ fewer features and generally comparable query performance for medium-size
|
|||||||
indices, but it uses much less memory and is easier to set up.
|
indices, but it uses much less memory and is easier to set up.
|
||||||
|
|
||||||
| | SQLite | Elasticsearch |
|
| | SQLite | Elasticsearch |
|
||||||
|----------------------------------------------|:---------------------------------------------------:|:-------------------------------------------------------------------------------------------------------------------------------------:|
|
|----------------------------------------------|:----------------------------------------:|:-------------------------------------------------------------------------------------------------------------------------------------:|
|
||||||
| Requires separate search engine installation | | ✓ |
|
| Requires separate search engine installation | | ✓ |
|
||||||
| Memory footprint | ~20MB | >500MB |
|
| Memory footprint | ~20MB | >500MB |
|
||||||
| Query syntax | [fts5](https://www.sqlite.org/fts5.html) | [query_string](https://www.elastic.co/guide/en/elasticsearch/reference/current/query-dsl-query-string-query.html#query-string-syntax) |
|
| Query syntax | [fts5](https://www.sqlite.org/fts5.html) | [query_string](https://www.elastic.co/guide/en/elasticsearch/reference/current/query-dsl-query-string-query.html#query-string-syntax) |
|
||||||
| Fuzzy search | | ✓ |
|
| Fuzzy search | | ✓ |
|
||||||
| Media Types tree real-time updating | | ✓ |
|
| Media Types tree real-time updating | | ✓ |
|
||||||
| Search in file `path` | [WIP](https://github.com/simon987/sist2/issues/402) | ✓ |
|
| Search in file `path` | | ✓ |
|
||||||
| Manual tagging | ✓ | ✓ |
|
| Manual tagging | ✓ | ✓ |
|
||||||
| User scripts | ✓ | ✓ |
|
| User scripts | | ✓ |
|
||||||
| Media Type breakdown for search results | | ✓ |
|
| Media Type breakdown for search results | | ✓ |
|
||||||
|
|
||||||
### NER
|
### NER
|
||||||
@ -206,7 +206,7 @@ docker run --rm --entrypoint cat my-sist2-image /root/sist2 > sist2-x64-linux
|
|||||||
3. Install vcpkg dependencies
|
3. Install vcpkg dependencies
|
||||||
|
|
||||||
```bash
|
```bash
|
||||||
vcpkg install openblas curl[core,openssl] sqlite3[core,fts5] cpp-jwt pcre cjson brotli libarchive[core,bzip2,libxml2,lz4,lzma,lzo] pthread tesseract libxml2 libmupdf[ocr] gtest mongoose libmagic libraw gumbo ffmpeg[core,avcodec,avformat,swscale,swresample,webp,opus,mp3lame,vpx,zlib]
|
vcpkg install curl[core,openssl] sqlite3[core,fts5] cpp-jwt pcre cjson brotli libarchive[core,bzip2,libxml2,lz4,lzma,lzo] pthread tesseract libxml2 libmupdf[ocr] gtest mongoose libmagic libraw gumbo ffmpeg[core,avcodec,avformat,swscale,swresample,webp,opus,mp3lame,vpx,ffprobe,zlib]
|
||||||
```
|
```
|
||||||
|
|
||||||
4. Build
|
4. Build
|
||||||
|
@ -5,6 +5,7 @@ Usage: sist2 scan [OPTION]... PATH
|
|||||||
or: sist2 index [OPTION]... INDEX
|
or: sist2 index [OPTION]... INDEX
|
||||||
or: sist2 sqlite-index [OPTION]... INDEX
|
or: sist2 sqlite-index [OPTION]... INDEX
|
||||||
or: sist2 web [OPTION]... INDEX...
|
or: sist2 web [OPTION]... INDEX...
|
||||||
|
or: sist2 exec-script [OPTION]... INDEX
|
||||||
|
|
||||||
Lightning-fast file system indexer and search tool.
|
Lightning-fast file system indexer and search tool.
|
||||||
|
|
||||||
@ -73,6 +74,13 @@ Web options
|
|||||||
--dev Serve html & js files from disk (for development)
|
--dev Serve html & js files from disk (for development)
|
||||||
--lang=<str> Default UI language. Can be changed by the user
|
--lang=<str> Default UI language. Can be changed by the user
|
||||||
|
|
||||||
|
Exec-script options
|
||||||
|
--es-url=<str> Elasticsearch url. DEFAULT: http://localhost:9200
|
||||||
|
--es-insecure-ssl Do not verify SSL connections to Elasticsearch.
|
||||||
|
--es-index=<str> Elasticsearch index name. DEFAULT: sist2
|
||||||
|
--script-file=<str> Path to user script.
|
||||||
|
--async-script Execute user script asynchronously.
|
||||||
|
|
||||||
Made by simon987 <me@simon987.net>. Released under GPL-3.0
|
Made by simon987 <me@simon987.net>. Released under GPL-3.0
|
||||||
```
|
```
|
||||||
|
|
||||||
@ -175,6 +183,11 @@ Using a version >=7.14.0 is recommended to enable the following features:
|
|||||||
When using a legacy version of ES, a notice will be displayed next to the sist2 version in the web UI.
|
When using a legacy version of ES, a notice will be displayed next to the sist2 version in the web UI.
|
||||||
If you don't care about the features above, you can ignore it or disable it in the configuration page.
|
If you don't care about the features above, you can ignore it or disable it in the configuration page.
|
||||||
|
|
||||||
|
## exec-script
|
||||||
|
|
||||||
|
The `exec-script` command is used to execute a user script for an index that has already been imported to Elasticsearch with the `index` command. Note that the documents will not be reset to their default state before each execution as the `index` command does: if you make undesired changes to the documents by accident, you will need to run `index` again to revert to the original state.
|
||||||
|
|
||||||
|
|
||||||
# Tagging
|
# Tagging
|
||||||
|
|
||||||
### Manual tagging
|
### Manual tagging
|
||||||
|
@ -1,34 +1,6 @@
|
|||||||
## User scripts
|
## User scripts
|
||||||
|
|
||||||
User scripts are used to augment your sist2 index with additional metadata, neural network embeddings, tags etc.
|
*This document is under construction, more in-depth guide coming soon*
|
||||||
|
|
||||||
|
|
||||||
Since version 3.2.0, user scripts are written in Python, and are ran against the sist2 index file. User scripts do not
|
|
||||||
need a connection to the search backend.
|
|
||||||
|
|
||||||
You can create a user script based on a template from the sist2-admin interface:
|
|
||||||
|
|
||||||

|
|
||||||
|
|
||||||
User scripts leverage the [sist2-python](https://github.com/simon987/sist2-python) library to interface with the
|
|
||||||
index file*. You can find sist2-python documentation and examples
|
|
||||||
here: [sist2-python.readthedocs.io](https://sist2-python.readthedocs.io/).
|
|
||||||
|
|
||||||
If you are not using the sist2-admin interface, you can run user scripts manually from the command line:
|
|
||||||
|
|
||||||
```
|
|
||||||
pip install git+https://github.com/simon987/sist2-python.git
|
|
||||||
|
|
||||||
python my_script.py /path/to/my_index.sist2
|
|
||||||
```
|
|
||||||
|
|
||||||
\* It is possible to manually update the index using raw SQL queries, but the database schema is not stable and
|
|
||||||
can change at any time; it is recommended to use the more stable sist2-python wrapper instead.
|
|
||||||
|
|
||||||
<hr>
|
|
||||||
|
|
||||||
<details>
|
|
||||||
<summary>Legacy user scripts (sist2 version < 3.2.0)</summary>
|
|
||||||
|
|
||||||
During the `index` step, you can use the `--script-file <script>` option to
|
During the `index` step, you can use the `--script-file <script>` option to
|
||||||
modify documents or add user tags. This option is mainly used to
|
modify documents or add user tags. This option is mainly used to
|
||||||
@ -41,7 +13,6 @@ without programming experience at all if you're somewhat familiar with
|
|||||||
regex.
|
regex.
|
||||||
|
|
||||||
This is the base structure of the documents we're working with:
|
This is the base structure of the documents we're working with:
|
||||||
|
|
||||||
```json
|
```json
|
||||||
{
|
{
|
||||||
"_id": "e171405c-fdb5-4feb-bb32-82637bc32084",
|
"_id": "e171405c-fdb5-4feb-bb32-82637bc32084",
|
||||||
@ -64,7 +35,6 @@ This is the base structure of the documents we're working with:
|
|||||||
|
|
||||||
This script checks if the `genre` attribute exists, if it does
|
This script checks if the `genre` attribute exists, if it does
|
||||||
it adds the `genre.<genre>` tag.
|
it adds the `genre.<genre>` tag.
|
||||||
|
|
||||||
```Java
|
```Java
|
||||||
ArrayList tags = ctx._source.tag = new ArrayList();
|
ArrayList tags = ctx._source.tag = new ArrayList();
|
||||||
|
|
||||||
@ -77,12 +47,11 @@ You can use `.` to create a hierarchical tag tree:
|
|||||||
|
|
||||||

|

|
||||||
|
|
||||||
To use regular expressions, you need to add this line in `/etc/elasticsearch/elasticsearch.yml`
|
|
||||||
|
|
||||||
|
To use regular expressions, you need to add this line in `/etc/elasticsearch/elasticsearch.yml`
|
||||||
```yaml
|
```yaml
|
||||||
script.painless.regex.enabled: true
|
script.painless.regex.enabled: true
|
||||||
```
|
```
|
||||||
|
|
||||||
Or, if you're using docker add `-e "script.painless.regex.enabled=true"`
|
Or, if you're using docker add `-e "script.painless.regex.enabled=true"`
|
||||||
|
|
||||||
**Tag color**
|
**Tag color**
|
||||||
@ -93,7 +62,6 @@ hexadecimal color code (`#RRGGBBAA`) to the tag name.
|
|||||||
### Examples
|
### Examples
|
||||||
|
|
||||||
If `(20XX)` is in the file name, add the `year.<year>` tag:
|
If `(20XX)` is in the file name, add the `year.<year>` tag:
|
||||||
|
|
||||||
```Java
|
```Java
|
||||||
ArrayList tags = ctx._source.tag = new ArrayList();
|
ArrayList tags = ctx._source.tag = new ArrayList();
|
||||||
|
|
||||||
@ -104,7 +72,6 @@ if (m.find()) {
|
|||||||
```
|
```
|
||||||
|
|
||||||
Use default *Calibre* folder structure to infer author.
|
Use default *Calibre* folder structure to infer author.
|
||||||
|
|
||||||
```Java
|
```Java
|
||||||
ArrayList tags = ctx._source.tag = new ArrayList();
|
ArrayList tags = ctx._source.tag = new ArrayList();
|
||||||
|
|
||||||
@ -119,7 +86,6 @@ if (ctx._source.name.contains("-") && ctx._source.extension == "pdf") {
|
|||||||
|
|
||||||
If the file matches a specific pattern `AAAA-000 fName1 lName1, <fName2 lName2>...`, add the `actress.<actress>` and
|
If the file matches a specific pattern `AAAA-000 fName1 lName1, <fName2 lName2>...`, add the `actress.<actress>` and
|
||||||
`studio.<studio>` tag:
|
`studio.<studio>` tag:
|
||||||
|
|
||||||
```Java
|
```Java
|
||||||
ArrayList tags = ctx._source.tag = new ArrayList();
|
ArrayList tags = ctx._source.tag = new ArrayList();
|
||||||
|
|
||||||
@ -136,18 +102,16 @@ if (m.find()) {
|
|||||||
```
|
```
|
||||||
|
|
||||||
Set the name of the last folder (`/path/to/<studio>/file.mp4`) to `studio.<studio>` tag
|
Set the name of the last folder (`/path/to/<studio>/file.mp4`) to `studio.<studio>` tag
|
||||||
|
|
||||||
```Java
|
```Java
|
||||||
ArrayList tags = ctx._source.tag = new ArrayList();
|
ArrayList tags = ctx._source.tag = new ArrayList();
|
||||||
|
|
||||||
if (ctx._source.path != "") {
|
if (ctx._source.path != "") {
|
||||||
String[] names = ctx._source.path.splitOnToken('/');
|
String[] names = ctx._source.path.splitOnToken('/');
|
||||||
tags.add("studio." + names[names.length-1]);
|
tags.add("studio." + names[names.length-1]);
|
||||||
}
|
}
|
||||||
```
|
```
|
||||||
|
|
||||||
Parse `EXIF:F Number` tag
|
Parse `EXIF:F Number` tag
|
||||||
|
|
||||||
```Java
|
```Java
|
||||||
if (ctx._source?.exif_fnumber != null) {
|
if (ctx._source?.exif_fnumber != null) {
|
||||||
String[] values = ctx._source.exif_fnumber.splitOnToken(' ');
|
String[] values = ctx._source.exif_fnumber.splitOnToken(' ');
|
||||||
@ -160,7 +124,6 @@ if (ctx._source?.exif_fnumber != null) {
|
|||||||
```
|
```
|
||||||
|
|
||||||
Display year and months from `EXIF:DateTime` tag
|
Display year and months from `EXIF:DateTime` tag
|
||||||
|
|
||||||
```Java
|
```Java
|
||||||
if (ctx._source?.exif_datetime != null) {
|
if (ctx._source?.exif_datetime != null) {
|
||||||
SimpleDateFormat parser = new SimpleDateFormat("yyyy:MM:dd HH:mm:ss");
|
SimpleDateFormat parser = new SimpleDateFormat("yyyy:MM:dd HH:mm:ss");
|
||||||
@ -177,6 +140,3 @@ if (ctx._source?.exif_datetime != null) {
|
|||||||
}
|
}
|
||||||
|
|
||||||
```
|
```
|
||||||
|
|
||||||
</details>
|
|
||||||
|
|
||||||
|
@ -202,46 +202,6 @@
|
|||||||
},
|
},
|
||||||
"modified_by": {
|
"modified_by": {
|
||||||
"type": "text"
|
"type": "text"
|
||||||
},
|
|
||||||
"emb.384.*": {
|
|
||||||
"type": "dense_vector",
|
|
||||||
"dims": 384
|
|
||||||
},
|
|
||||||
"emb.idx_384.*": {
|
|
||||||
"type": "dense_vector",
|
|
||||||
"dims": 384,
|
|
||||||
"index": true,
|
|
||||||
"similarity": "cosine"
|
|
||||||
},
|
|
||||||
"emb.idx_512.clip": {
|
|
||||||
"type": "dense_vector",
|
|
||||||
"dims": 512,
|
|
||||||
"index": true,
|
|
||||||
"similarity": "cosine"
|
|
||||||
},
|
|
||||||
"emb.512.*": {
|
|
||||||
"type": "dense_vector",
|
|
||||||
"dims": 512
|
|
||||||
},
|
|
||||||
"emb.idx_768.*": {
|
|
||||||
"type": "dense_vector",
|
|
||||||
"dims": 768,
|
|
||||||
"index": true,
|
|
||||||
"similarity": "cosine"
|
|
||||||
},
|
|
||||||
"emb.768.*": {
|
|
||||||
"type": "dense_vector",
|
|
||||||
"dims": 768
|
|
||||||
},
|
|
||||||
"emb.idx_1024.*": {
|
|
||||||
"type": "dense_vector",
|
|
||||||
"dims": 1024,
|
|
||||||
"index": true,
|
|
||||||
"similarity": "cosine"
|
|
||||||
},
|
|
||||||
"emb.1024.*": {
|
|
||||||
"type": "dense_vector",
|
|
||||||
"dims": 1024
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -1,131 +0,0 @@
|
|||||||
import sqlite3
|
|
||||||
import orjson as json
|
|
||||||
import os
|
|
||||||
import string
|
|
||||||
from hashlib import md5
|
|
||||||
import random
|
|
||||||
from tqdm import tqdm
|
|
||||||
|
|
||||||
schema = """
|
|
||||||
CREATE TABLE thumbnail (
|
|
||||||
id TEXT NOT NULL CHECK (
|
|
||||||
length(id) = 32
|
|
||||||
),
|
|
||||||
num INTEGER NOT NULL,
|
|
||||||
data BLOB NOT NULL,
|
|
||||||
PRIMARY KEY(id, num)
|
|
||||||
) WITHOUT ROWID;
|
|
||||||
CREATE TABLE version (
|
|
||||||
id INTEGER PRIMARY KEY AUTOINCREMENT,
|
|
||||||
date TEXT NOT NULL DEFAULT (CURRENT_TIMESTAMP)
|
|
||||||
);
|
|
||||||
CREATE TABLE document (
|
|
||||||
id TEXT PRIMARY KEY NOT NULL CHECK (
|
|
||||||
length(id) = 32
|
|
||||||
),
|
|
||||||
marked INTEGER NOT NULL DEFAULT (1),
|
|
||||||
version INTEGER NOT NULL REFERENCES version(id),
|
|
||||||
mtime INTEGER NOT NULL,
|
|
||||||
size INTEGER NOT NULL,
|
|
||||||
json_data TEXT NOT NULL CHECK (
|
|
||||||
json_valid(json_data)
|
|
||||||
)
|
|
||||||
);
|
|
||||||
CREATE TABLE delete_list (
|
|
||||||
id TEXT PRIMARY KEY CHECK (
|
|
||||||
length(id) = 32
|
|
||||||
)
|
|
||||||
) WITHOUT ROWID;
|
|
||||||
CREATE TABLE tag (
|
|
||||||
id TEXT NOT NULL,
|
|
||||||
tag TEXT NOT NULL,
|
|
||||||
PRIMARY KEY (id, tag)
|
|
||||||
);
|
|
||||||
CREATE TABLE document_sidecar (
|
|
||||||
id TEXT PRIMARY KEY NOT NULL, json_data TEXT NOT NULL
|
|
||||||
) WITHOUT ROWID;
|
|
||||||
CREATE TABLE descriptor (
|
|
||||||
id TEXT NOT NULL, version_major INTEGER NOT NULL,
|
|
||||||
version_minor INTEGER NOT NULL, version_patch INTEGER NOT NULL,
|
|
||||||
root TEXT NOT NULL, name TEXT NOT NULL,
|
|
||||||
rewrite_url TEXT, timestamp INTEGER NOT NULL
|
|
||||||
);
|
|
||||||
CREATE TABLE stats_treemap (
|
|
||||||
path TEXT NOT NULL, size INTEGER NOT NULL
|
|
||||||
);
|
|
||||||
CREATE TABLE stats_size_agg (
|
|
||||||
bucket INTEGER NOT NULL, count INTEGER NOT NULL
|
|
||||||
);
|
|
||||||
CREATE TABLE stats_date_agg (
|
|
||||||
bucket INTEGER NOT NULL, count INTEGER NOT NULL
|
|
||||||
);
|
|
||||||
CREATE TABLE stats_mime_agg (
|
|
||||||
mime TEXT NOT NULL, size INTEGER NOT NULL,
|
|
||||||
count INTEGER NOT NULL
|
|
||||||
);
|
|
||||||
CREATE TABLE embedding (
|
|
||||||
id TEXT REFERENCES document(id),
|
|
||||||
model_id INTEGER NOT NULL references model(id),
|
|
||||||
start INTEGER NOT NULL,
|
|
||||||
end INTEGER,
|
|
||||||
embedding BLOB NOT NULL,
|
|
||||||
PRIMARY KEY (id, model_id, start)
|
|
||||||
);
|
|
||||||
CREATE TABLE model (
|
|
||||||
id INTEGER PRIMARY KEY,
|
|
||||||
name TEXT NOT NULL UNIQUE CHECK (
|
|
||||||
length(name) < 16
|
|
||||||
),
|
|
||||||
url TEXT,
|
|
||||||
path TEXT NOT NULL UNIQUE,
|
|
||||||
size INTEGER NOT NULL,
|
|
||||||
type TEXT NOT NULL CHECK (
|
|
||||||
type IN ('flat', 'nested')
|
|
||||||
)
|
|
||||||
);
|
|
||||||
"""
|
|
||||||
|
|
||||||
content = "".join(random.choices(string.ascii_letters, k=500))
|
|
||||||
|
|
||||||
|
|
||||||
def gen_document():
|
|
||||||
return [
|
|
||||||
md5(random.randbytes(8)).hexdigest(),
|
|
||||||
json.dumps({
|
|
||||||
"content": content,
|
|
||||||
"mime": "image/jpeg",
|
|
||||||
"extension": "jpeg",
|
|
||||||
"name": "test",
|
|
||||||
"path": "",
|
|
||||||
})
|
|
||||||
]
|
|
||||||
|
|
||||||
|
|
||||||
if __name__ == "__main__":
|
|
||||||
DB_NAME = "big_index.sist2"
|
|
||||||
SIZE = 30_000_000
|
|
||||||
|
|
||||||
os.remove(DB_NAME)
|
|
||||||
db = sqlite3.connect(DB_NAME)
|
|
||||||
db.executescript(schema)
|
|
||||||
|
|
||||||
db.executescript("""
|
|
||||||
PRAGMA journal_mode = OFF;
|
|
||||||
PRAGMA synchronous = 0;
|
|
||||||
""")
|
|
||||||
|
|
||||||
for _ in tqdm(range(SIZE), total=SIZE):
|
|
||||||
db.execute(
|
|
||||||
"INSERT INTO document (id, version, mtime, size, json_data) VALUES (?, 1, 1000000, 10000, ?)",
|
|
||||||
gen_document()
|
|
||||||
)
|
|
||||||
|
|
||||||
# 1. Enable rowid from document
|
|
||||||
# 2. CREATE TABLE marked (
|
|
||||||
# id INTEGER PRIMARY KEY,
|
|
||||||
# marked int
|
|
||||||
# );
|
|
||||||
# 3. Set FK for document_sidecar, embedding, tag, thumbnail
|
|
||||||
# 4. Toggle FK if debug
|
|
||||||
|
|
||||||
db.commit()
|
|
@ -1,3 +1,3 @@
|
|||||||
docker run --rm -it --name "sist2-dev-es3"\
|
docker run --rm -it --name "sist2-dev-es"\
|
||||||
-p 9200:9200 -e "discovery.type=single-node" \
|
-p 9200:9200 -e "discovery.type=single-node" \
|
||||||
-e "ES_JAVA_OPTS=-Xms8g -Xmx8g" elasticsearch:7.17.9
|
-e "ES_JAVA_OPTS=-Xms8g -Xmx8g" elasticsearch:7.17.9
|
||||||
|
@ -1,3 +1,3 @@
|
|||||||
docker run --rm -it --name "sist2-dev-es3"\
|
docker run --rm -it --name "sist2-dev-es"\
|
||||||
-p 9200:9200 -p 9300:9300 -e "discovery.type=single-node" \
|
-p 9200:9200 -p 9300:9300 -e "discovery.type=single-node" \
|
||||||
-e "ES_JAVA_OPTS=-Xms8g -Xmx8g" elasticsearch:8.7.0
|
-e "ES_JAVA_OPTS=-Xms8g -Xmx8g" elasticsearch:8.7.0
|
||||||
|
@ -7,7 +7,7 @@
|
|||||||
to the <a href="https://github.com/simon987/sist2/issues/new/choose" target="_blank">issue tracker on
|
to the <a href="https://github.com/simon987/sist2/issues/new/choose" target="_blank">issue tracker on
|
||||||
Github</a>. Thank you!
|
Github</a>. Thank you!
|
||||||
</b-alert>
|
</b-alert>
|
||||||
<router-view v-if="$store.state.sist2AdminInfo"/>
|
<router-view/>
|
||||||
</b-container>
|
</b-container>
|
||||||
</div>
|
</div>
|
||||||
</template>
|
</template>
|
||||||
@ -71,12 +71,10 @@ html, body {
|
|||||||
|
|
||||||
.info-icon {
|
.info-icon {
|
||||||
width: 1rem;
|
width: 1rem;
|
||||||
min-width: 1rem;
|
|
||||||
margin-right: 0.2rem;
|
margin-right: 0.2rem;
|
||||||
cursor: pointer;
|
cursor: pointer;
|
||||||
line-height: 1rem;
|
line-height: 1rem;
|
||||||
height: 1rem;
|
height: 1rem;
|
||||||
min-height: 1rem;
|
|
||||||
background-image: url();
|
background-image: url();
|
||||||
filter: brightness(45%);
|
filter: brightness(45%);
|
||||||
display: block;
|
display: block;
|
||||||
|
@ -139,38 +139,6 @@ class Sist2AdminApi {
|
|||||||
deleteTaskLogs(taskId) {
|
deleteTaskLogs(taskId) {
|
||||||
return axios.post(`${this.baseUrl}/api/task/${taskId}/delete_logs`);
|
return axios.post(`${this.baseUrl}/api/task/${taskId}/delete_logs`);
|
||||||
}
|
}
|
||||||
|
|
||||||
getUserScripts() {
|
|
||||||
return axios.get(`${this.baseUrl}/api/user_script`);
|
|
||||||
}
|
|
||||||
|
|
||||||
getUserScript(name) {
|
|
||||||
return axios.get(`${this.baseUrl}/api/user_script/${name}`);
|
|
||||||
}
|
|
||||||
|
|
||||||
createUserScript(name, template) {
|
|
||||||
return axios.post(`${this.baseUrl}/api/user_script/${name}`, null, {
|
|
||||||
params: {
|
|
||||||
template: template
|
|
||||||
}
|
|
||||||
});
|
|
||||||
}
|
|
||||||
|
|
||||||
updateUserScript(name, data) {
|
|
||||||
return axios.put(`${this.baseUrl}/api/user_script/${name}`, data);
|
|
||||||
}
|
|
||||||
|
|
||||||
deleteUserScript(name) {
|
|
||||||
return axios.delete(`${this.baseUrl}/api/user_script/${name}`);
|
|
||||||
}
|
|
||||||
|
|
||||||
testUserScript(name, job) {
|
|
||||||
return axios.get(`${this.baseUrl}/api/user_script/${name}/run`, {
|
|
||||||
params: {
|
|
||||||
job: job
|
|
||||||
}
|
|
||||||
});
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
export default new Sist2AdminApi()
|
export default new Sist2AdminApi()
|
@ -1,34 +0,0 @@
|
|||||||
<template>
|
|
||||||
<b-progress v-if="loading" striped animated value="100"></b-progress>
|
|
||||||
<span v-else-if="jobs.length === 0"></span>
|
|
||||||
<b-form-select v-else :options="jobs" text-field="name" value-field="name"
|
|
||||||
@change="$emit('change', $event)" :value="$t('selectJob')"></b-form-select>
|
|
||||||
</template>
|
|
||||||
|
|
||||||
<script>
|
|
||||||
import Sist2AdminApi from "@/Sist2AdminApi";
|
|
||||||
|
|
||||||
export default {
|
|
||||||
name: "JobSelect",
|
|
||||||
mounted() {
|
|
||||||
Sist2AdminApi.getJobs().then(resp => {
|
|
||||||
this._jobs = resp.data;
|
|
||||||
this.loading = false;
|
|
||||||
});
|
|
||||||
},
|
|
||||||
computed: {
|
|
||||||
jobs() {
|
|
||||||
return [
|
|
||||||
{name: this.$t("selectJob"), disabled: true},
|
|
||||||
...this._jobs.filter(job => job.index_path)
|
|
||||||
]
|
|
||||||
}
|
|
||||||
},
|
|
||||||
data() {
|
|
||||||
return {
|
|
||||||
loading: true,
|
|
||||||
_jobs: null
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
</script>
|
|
@ -1,18 +0,0 @@
|
|||||||
<template>
|
|
||||||
<b-list-group-item action :to="`/userScript/${script.name}`">
|
|
||||||
|
|
||||||
<div class="d-flex w-100 justify-content-between">
|
|
||||||
<h5 class="mb-1">
|
|
||||||
{{ script.name }}
|
|
||||||
</h5>
|
|
||||||
</div>
|
|
||||||
</b-list-group-item>
|
|
||||||
</template>
|
|
||||||
|
|
||||||
<script>
|
|
||||||
|
|
||||||
export default {
|
|
||||||
name: "UserScriptListItem",
|
|
||||||
props: ["script"],
|
|
||||||
}
|
|
||||||
</script>
|
|
@ -1,88 +0,0 @@
|
|||||||
<template>
|
|
||||||
<b-progress v-if="loading" striped animated value="100"></b-progress>
|
|
||||||
|
|
||||||
<b-row v-else>
|
|
||||||
<b-col cols="6">
|
|
||||||
<h5>Selected scripts</h5>
|
|
||||||
<b-list-group>
|
|
||||||
<b-list-group-item v-for="script in selectedScripts" :key="script"
|
|
||||||
button
|
|
||||||
@click="onRemoveScript(script)"
|
|
||||||
class="d-flex justify-content-between align-items-center">
|
|
||||||
{{ script }}
|
|
||||||
<b-button-group>
|
|
||||||
<b-button variant="light" @click.stop="moveUpScript(script)">↑</b-button>
|
|
||||||
<b-button variant="light" @click.stop="moveDownScript(script)">↓</b-button>
|
|
||||||
</b-button-group>
|
|
||||||
</b-list-group-item>
|
|
||||||
</b-list-group>
|
|
||||||
</b-col>
|
|
||||||
<b-col cols="6">
|
|
||||||
<h5>Available scripts</h5>
|
|
||||||
<b-list-group>
|
|
||||||
<b-list-group-item v-for="script in availableScripts" :key="script" button
|
|
||||||
@click="onSelectScript(script)">
|
|
||||||
{{ script }}
|
|
||||||
</b-list-group-item>
|
|
||||||
</b-list-group>
|
|
||||||
</b-col>
|
|
||||||
</b-row>
|
|
||||||
|
|
||||||
<!-- <b-checkbox-group v-else :options="scripts" stacked :checked="selectedScripts"-->
|
|
||||||
<!-- @input="$emit('change', $event)"></b-checkbox-group>-->
|
|
||||||
</template>
|
|
||||||
|
|
||||||
<script>
|
|
||||||
import Sist2AdminApi from "@/Sist2AdminApi";
|
|
||||||
|
|
||||||
export default {
|
|
||||||
name: "UserScriptPicker",
|
|
||||||
props: ["selectedScripts"],
|
|
||||||
data() {
|
|
||||||
return {
|
|
||||||
loading: true,
|
|
||||||
scripts: []
|
|
||||||
}
|
|
||||||
},
|
|
||||||
computed: {
|
|
||||||
availableScripts() {
|
|
||||||
return this.scripts.filter(script => !this.selectedScripts.includes(script))
|
|
||||||
}
|
|
||||||
},
|
|
||||||
mounted() {
|
|
||||||
Sist2AdminApi.getUserScripts().then(resp => {
|
|
||||||
this.scripts = resp.data.map(script => script.name);
|
|
||||||
this.loading = false;
|
|
||||||
});
|
|
||||||
},
|
|
||||||
methods: {
|
|
||||||
onSelectScript(name) {
|
|
||||||
this.selectedScripts.push(name);
|
|
||||||
this.$emit("change", this.selectedScripts)
|
|
||||||
},
|
|
||||||
onRemoveScript(name) {
|
|
||||||
this.selectedScripts.splice(this.selectedScripts.indexOf(name), 1);
|
|
||||||
this.$emit("change", this.selectedScripts);
|
|
||||||
},
|
|
||||||
moveUpScript(name) {
|
|
||||||
const index = this.selectedScripts.indexOf(name);
|
|
||||||
if (index > 0) {
|
|
||||||
this.selectedScripts.splice(index, 1);
|
|
||||||
this.selectedScripts.splice(index - 1, 0, name);
|
|
||||||
}
|
|
||||||
this.$emit("change", this.selectedScripts);
|
|
||||||
},
|
|
||||||
moveDownScript(name) {
|
|
||||||
const index = this.selectedScripts.indexOf(name);
|
|
||||||
if (index < this.selectedScripts.length - 1) {
|
|
||||||
this.selectedScripts.splice(index, 1);
|
|
||||||
this.selectedScripts.splice(index + 1, 0, name);
|
|
||||||
}
|
|
||||||
this.$emit("change", this.selectedScripts);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
</script>
|
|
||||||
|
|
||||||
<style scoped>
|
|
||||||
</style>
|
|
@ -54,18 +54,8 @@ export default {
|
|||||||
|
|
||||||
frontendTab: "Frontend",
|
frontendTab: "Frontend",
|
||||||
backendTab: "Backend",
|
backendTab: "Backend",
|
||||||
scripts: "User Scripts",
|
|
||||||
script: "User Script",
|
|
||||||
testScript: "Test/debug User Script",
|
|
||||||
newScriptName: "New script name",
|
|
||||||
scriptType: "Script type",
|
|
||||||
scriptCode: "Script code (Python)",
|
|
||||||
scriptOptions: "User scripts",
|
|
||||||
gitRepository: "Git repository URL",
|
|
||||||
extraArgs: "Extra command line arguments",
|
|
||||||
|
|
||||||
selectJobs: "Available jobs",
|
selectJobs: "Available jobs",
|
||||||
selectJob: "Select a job",
|
|
||||||
webOptions: {
|
webOptions: {
|
||||||
title: "Web options",
|
title: "Web options",
|
||||||
lang: "UI Language",
|
lang: "UI Language",
|
||||||
|
@ -6,18 +6,12 @@ import Tasks from "@/views/Tasks";
|
|||||||
import Frontend from "@/views/Frontend";
|
import Frontend from "@/views/Frontend";
|
||||||
import Tail from "@/views/Tail";
|
import Tail from "@/views/Tail";
|
||||||
import SearchBackend from "@/views/SearchBackend.vue";
|
import SearchBackend from "@/views/SearchBackend.vue";
|
||||||
import UserScript from "@/views/UserScript.vue";
|
|
||||||
|
|
||||||
Vue.use(VueRouter);
|
Vue.use(VueRouter);
|
||||||
|
|
||||||
const routes = [
|
const routes = [
|
||||||
{
|
{
|
||||||
path: "/task",
|
path: "/",
|
||||||
name: "Tasks",
|
|
||||||
component: Tasks
|
|
||||||
},
|
|
||||||
{
|
|
||||||
path: "/:tab?",
|
|
||||||
name: "Home",
|
name: "Home",
|
||||||
component: Home
|
component: Home
|
||||||
},
|
},
|
||||||
@ -26,6 +20,11 @@ const routes = [
|
|||||||
name: "Job",
|
name: "Job",
|
||||||
component: Job
|
component: Job
|
||||||
},
|
},
|
||||||
|
{
|
||||||
|
path: "/task/",
|
||||||
|
name: "Tasks",
|
||||||
|
component: Tasks
|
||||||
|
},
|
||||||
{
|
{
|
||||||
path: "/frontend/:name",
|
path: "/frontend/:name",
|
||||||
name: "Frontend",
|
name: "Frontend",
|
||||||
@ -36,11 +35,6 @@ const routes = [
|
|||||||
name: "SearchBackend",
|
name: "SearchBackend",
|
||||||
component: SearchBackend
|
component: SearchBackend
|
||||||
},
|
},
|
||||||
{
|
|
||||||
path: "/userScript/:name",
|
|
||||||
name: "UserScript",
|
|
||||||
component: UserScript
|
|
||||||
},
|
|
||||||
{
|
{
|
||||||
path: "/log/:taskId",
|
path: "/log/:taskId",
|
||||||
name: "Tail",
|
name: "Tail",
|
||||||
|
@ -1,6 +1,6 @@
|
|||||||
<template>
|
<template>
|
||||||
<div>
|
<div>
|
||||||
<b-tabs content-class="mt-3" v-model="tab" @input="onTabChange($event)">
|
<b-tabs content-class="mt-3">
|
||||||
<b-tab :title="$t('backendTab')">
|
<b-tab :title="$t('backendTab')">
|
||||||
|
|
||||||
<b-card>
|
<b-card>
|
||||||
@ -25,6 +25,7 @@
|
|||||||
<SearchBackendListItem v-for="backend in backends"
|
<SearchBackendListItem v-for="backend in backends"
|
||||||
:key="backend.name" :backend="backend"></SearchBackendListItem>
|
:key="backend.name" :backend="backend"></SearchBackendListItem>
|
||||||
</b-list-group>
|
</b-list-group>
|
||||||
|
|
||||||
</b-card>
|
</b-card>
|
||||||
|
|
||||||
<br/>
|
<br/>
|
||||||
@ -58,37 +59,6 @@
|
|||||||
</b-list-group>
|
</b-list-group>
|
||||||
</b-card>
|
</b-card>
|
||||||
</b-tab>
|
</b-tab>
|
||||||
<b-tab :title="$t('scripts')">
|
|
||||||
|
|
||||||
<b-progress v-if="scriptsLoading" striped animated value="100"></b-progress>
|
|
||||||
<b-card v-else>
|
|
||||||
<b-card-title>{{ $t("scripts") }}</b-card-title>
|
|
||||||
|
|
||||||
<label>Select template</label>
|
|
||||||
<b-form-radio-group stacked :options="scriptTemplates" v-model="scriptTemplate"></b-form-radio-group>
|
|
||||||
<br>
|
|
||||||
|
|
||||||
<b-row>
|
|
||||||
<b-col>
|
|
||||||
<b-form-input v-model="newScriptName" :disabled="!scriptTemplate" :placeholder="$t('newScriptName')"></b-form-input>
|
|
||||||
</b-col>
|
|
||||||
<b-col>
|
|
||||||
<b-button variant="primary" @click="createScript()"
|
|
||||||
:disabled="!scriptNameValid(newScriptName)">
|
|
||||||
{{ $t("create") }}
|
|
||||||
</b-button>
|
|
||||||
</b-col>
|
|
||||||
</b-row>
|
|
||||||
|
|
||||||
<hr/>
|
|
||||||
|
|
||||||
<b-list-group>
|
|
||||||
<UserScriptListItem v-for="script in scripts"
|
|
||||||
:key="script.name" :script="script"></UserScriptListItem>
|
|
||||||
</b-list-group>
|
|
||||||
|
|
||||||
</b-card>
|
|
||||||
</b-tab>
|
|
||||||
<b-tab :title="$t('frontendTab')">
|
<b-tab :title="$t('frontendTab')">
|
||||||
<b-card>
|
<b-card>
|
||||||
|
|
||||||
@ -126,11 +96,10 @@ import {formatBindAddress} from "@/util";
|
|||||||
import Sist2AdminApi from "@/Sist2AdminApi";
|
import Sist2AdminApi from "@/Sist2AdminApi";
|
||||||
import FrontendListItem from "@/components/FrontendListItem";
|
import FrontendListItem from "@/components/FrontendListItem";
|
||||||
import SearchBackendListItem from "@/components/SearchBackendListItem.vue";
|
import SearchBackendListItem from "@/components/SearchBackendListItem.vue";
|
||||||
import UserScriptListItem from "@/components/UserScriptListItem.vue";
|
|
||||||
|
|
||||||
export default {
|
export default {
|
||||||
name: "Jobs",
|
name: "Jobs",
|
||||||
components: {UserScriptListItem, SearchBackendListItem, JobListItem, FrontendListItem},
|
components: {SearchBackendListItem, JobListItem, FrontendListItem},
|
||||||
data() {
|
data() {
|
||||||
return {
|
return {
|
||||||
jobsLoading: true,
|
jobsLoading: true,
|
||||||
@ -146,24 +115,11 @@ export default {
|
|||||||
backendsLoading: true,
|
backendsLoading: true,
|
||||||
newBackendName: "",
|
newBackendName: "",
|
||||||
|
|
||||||
scripts: [],
|
showHelp: false
|
||||||
scriptTemplates: [],
|
|
||||||
newScriptName: "",
|
|
||||||
scriptTemplate: null,
|
|
||||||
scriptsLoading: true,
|
|
||||||
|
|
||||||
showHelp: false,
|
|
||||||
tab: 0
|
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
mounted() {
|
mounted() {
|
||||||
this.loading = true;
|
this.loading = true;
|
||||||
if (this.$route.params.tab) {
|
|
||||||
console.log("mounted " + this.$route.params.tab)
|
|
||||||
window.setTimeout(() => {
|
|
||||||
this.tab = Math.round(Number(this.$route.params.tab));
|
|
||||||
}, 1)
|
|
||||||
}
|
|
||||||
this.reload();
|
this.reload();
|
||||||
},
|
},
|
||||||
methods: {
|
methods: {
|
||||||
@ -188,20 +144,11 @@ export default {
|
|||||||
|
|
||||||
return /^[a-zA-Z0-9-_,.; ]+$/.test(name);
|
return /^[a-zA-Z0-9-_,.; ]+$/.test(name);
|
||||||
},
|
},
|
||||||
scriptNameValid(name) {
|
|
||||||
if (this.scripts.some(script => script.name === name)) {
|
|
||||||
return false;
|
|
||||||
}
|
|
||||||
if (name.length > 16) {
|
|
||||||
return false;
|
|
||||||
}
|
|
||||||
|
|
||||||
return /^[a-zA-Z0-9-_,.; ]+$/.test(name);
|
|
||||||
},
|
|
||||||
reload() {
|
reload() {
|
||||||
Sist2AdminApi.getJobs().then(resp => {
|
Sist2AdminApi.getJobs().then(resp => {
|
||||||
this.jobs = resp.data;
|
this.jobs = resp.data;
|
||||||
this.jobsLoading = false;
|
this.jobsLoading = false;
|
||||||
|
|
||||||
this.showHelp = this.jobs.length === 0;
|
this.showHelp = this.jobs.length === 0;
|
||||||
});
|
});
|
||||||
Sist2AdminApi.getFrontends().then(resp => {
|
Sist2AdminApi.getFrontends().then(resp => {
|
||||||
@ -212,11 +159,6 @@ export default {
|
|||||||
this.backends = resp.data;
|
this.backends = resp.data;
|
||||||
this.backendsLoading = false;
|
this.backendsLoading = false;
|
||||||
})
|
})
|
||||||
Sist2AdminApi.getUserScripts().then(resp => {
|
|
||||||
this.scripts = resp.data;
|
|
||||||
this.scriptTemplates = this.$store.state.sist2AdminInfo.user_script_templates;
|
|
||||||
this.scriptsLoading = false;
|
|
||||||
})
|
|
||||||
},
|
},
|
||||||
createJob() {
|
createJob() {
|
||||||
Sist2AdminApi.createJob(this.newJobName).then(this.reload);
|
Sist2AdminApi.createJob(this.newJobName).then(this.reload);
|
||||||
@ -226,14 +168,6 @@ export default {
|
|||||||
},
|
},
|
||||||
createBackend() {
|
createBackend() {
|
||||||
Sist2AdminApi.createBackend(this.newBackendName).then(this.reload);
|
Sist2AdminApi.createBackend(this.newBackendName).then(this.reload);
|
||||||
},
|
|
||||||
createScript() {
|
|
||||||
Sist2AdminApi.createUserScript(this.newScriptName, this.scriptTemplate).then(this.reload)
|
|
||||||
},
|
|
||||||
onTabChange(tab) {
|
|
||||||
if (this.$route.params.tab != tab) {
|
|
||||||
this.$router.push({params: {tab: tab}})
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -30,13 +30,6 @@
|
|||||||
<SearchBackendSelect :value="job.index_options.search_backend"
|
<SearchBackendSelect :value="job.index_options.search_backend"
|
||||||
@change="onBackendSelect($event)"></SearchBackendSelect>
|
@change="onBackendSelect($event)"></SearchBackendSelect>
|
||||||
</b-card>
|
</b-card>
|
||||||
<br/>
|
|
||||||
|
|
||||||
<h4>{{ $t("scriptOptions") }}</h4>
|
|
||||||
<b-card>
|
|
||||||
<UserScriptPicker :selected-scripts="job.user_scripts"
|
|
||||||
@change="onScriptChange($event)"></UserScriptPicker>
|
|
||||||
</b-card>
|
|
||||||
|
|
||||||
<br/>
|
<br/>
|
||||||
|
|
||||||
@ -55,12 +48,10 @@ import ScanOptions from "@/components/ScanOptions";
|
|||||||
import Sist2AdminApi from "@/Sist2AdminApi";
|
import Sist2AdminApi from "@/Sist2AdminApi";
|
||||||
import JobOptions from "@/components/JobOptions";
|
import JobOptions from "@/components/JobOptions";
|
||||||
import SearchBackendSelect from "@/components/SearchBackendSelect.vue";
|
import SearchBackendSelect from "@/components/SearchBackendSelect.vue";
|
||||||
import UserScriptPicker from "@/components/UserScriptPicker.vue";
|
|
||||||
|
|
||||||
export default {
|
export default {
|
||||||
name: "Job",
|
name: "Job",
|
||||||
components: {
|
components: {
|
||||||
UserScriptPicker,
|
|
||||||
SearchBackendSelect,
|
SearchBackendSelect,
|
||||||
ScanOptions,
|
ScanOptions,
|
||||||
JobOptions
|
JobOptions
|
||||||
@ -104,10 +95,6 @@ export default {
|
|||||||
onBackendSelect(backend) {
|
onBackendSelect(backend) {
|
||||||
this.job.index_options.search_backend = backend;
|
this.job.index_options.search_backend = backend;
|
||||||
this.update();
|
this.update();
|
||||||
},
|
|
||||||
onScriptChange(scripts) {
|
|
||||||
this.job.user_scripts = scripts;
|
|
||||||
this.update();
|
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
mounted() {
|
mounted() {
|
||||||
|
@ -44,6 +44,9 @@
|
|||||||
|
|
||||||
<label>{{ $t("backendOptions.batchSize") }}</label>
|
<label>{{ $t("backendOptions.batchSize") }}</label>
|
||||||
<b-form-input v-model="backend.batch_size" type="number" min="1" @change="update()"></b-form-input>
|
<b-form-input v-model="backend.batch_size" type="number" min="1" @change="update()"></b-form-input>
|
||||||
|
|
||||||
|
<label>{{ $t("backendOptions.script") }}</label>
|
||||||
|
<b-form-textarea v-model="backend.script" rows="6" @change="update()"></b-form-textarea>
|
||||||
</template>
|
</template>
|
||||||
<template v-else>
|
<template v-else>
|
||||||
<label>{{ $t("backendOptions.searchIndex") }}</label>
|
<label>{{ $t("backendOptions.searchIndex") }}</label>
|
||||||
|
@ -92,9 +92,6 @@ export default {
|
|||||||
if ("stderr" in message) {
|
if ("stderr" in message) {
|
||||||
message.level = "ERROR";
|
message.level = "ERROR";
|
||||||
message.message = message["stderr"];
|
message.message = message["stderr"];
|
||||||
} else if ("stdout" in message) {
|
|
||||||
message.level = "INFO";
|
|
||||||
message.message = message["stdout"];
|
|
||||||
} else {
|
} else {
|
||||||
message.level = "ADMIN";
|
message.level = "ADMIN";
|
||||||
message.message = message["sist2-admin"];
|
message.message = message["sist2-admin"];
|
||||||
|
@ -1,117 +0,0 @@
|
|||||||
<template>
|
|
||||||
<b-progress v-if="loading" striped animated value="100"></b-progress>
|
|
||||||
<b-card v-else>
|
|
||||||
<b-card-title>
|
|
||||||
{{ $route.params.name }}
|
|
||||||
{{ $t("script") }}
|
|
||||||
</b-card-title>
|
|
||||||
|
|
||||||
<div class="mb-3">
|
|
||||||
<b-button variant="danger" @click="deleteScript()">{{ $t("delete") }}</b-button>
|
|
||||||
</div>
|
|
||||||
|
|
||||||
<b-card>
|
|
||||||
<h5>{{ $t("testScript") }}</h5>
|
|
||||||
|
|
||||||
<b-row>
|
|
||||||
<b-col cols="11">
|
|
||||||
<JobSelect @change="onJobSelect($event)"></JobSelect>
|
|
||||||
</b-col>
|
|
||||||
<b-col cols="1">
|
|
||||||
<b-button :disabled="!selectedTestJob" variant="primary" @click="testScript()">{{ $t("test") }}
|
|
||||||
</b-button>
|
|
||||||
</b-col>
|
|
||||||
</b-row>
|
|
||||||
|
|
||||||
</b-card>
|
|
||||||
<br/>
|
|
||||||
|
|
||||||
<label>{{ $t("scriptType") }}</label>
|
|
||||||
<b-form-select :options="['git', 'simple']" v-model="script.type" @change="update()"></b-form-select>
|
|
||||||
|
|
||||||
<template v-if="script.type === 'git'">
|
|
||||||
<label>{{ $t("gitRepository") }}</label>
|
|
||||||
<b-form-input v-model="script.git_repository" placeholder="https://github.com/example/example.git"
|
|
||||||
@change="update()"></b-form-input>
|
|
||||||
|
|
||||||
<label>{{ $t("extraArgs") }}</label>
|
|
||||||
<b-form-input v-model="script.extra_args" @change="update()" class="text-monospace"></b-form-input>
|
|
||||||
</template>
|
|
||||||
|
|
||||||
<template v-if="script.type === 'simple'">
|
|
||||||
|
|
||||||
<label>{{ $t("scriptCode") }}</label>
|
|
||||||
<p>Find sist2-python documentation <a href="https://sist2-python.readthedocs.io/" target="_blank">here</a></p>
|
|
||||||
<b-textarea rows="15" class="text-monospace" v-model="script.script" @change="update()" spellcheck="false"></b-textarea>
|
|
||||||
</template>
|
|
||||||
|
|
||||||
<template v-if="script.type === 'local'">
|
|
||||||
<!-- TODO-->
|
|
||||||
</template>
|
|
||||||
|
|
||||||
|
|
||||||
</b-card>
|
|
||||||
</template>
|
|
||||||
|
|
||||||
<script>
|
|
||||||
|
|
||||||
import Sist2AdminApi from "@/Sist2AdminApi";
|
|
||||||
import JobOptions from "@/components/JobOptions.vue";
|
|
||||||
import JobCheckboxGroup from "@/components/JobCheckboxGroup.vue";
|
|
||||||
import JobSelect from "@/components/JobSelect.vue";
|
|
||||||
|
|
||||||
export default {
|
|
||||||
name: "UserScript",
|
|
||||||
components: {JobSelect, JobCheckboxGroup, JobOptions},
|
|
||||||
data() {
|
|
||||||
return {
|
|
||||||
loading: true,
|
|
||||||
script: null,
|
|
||||||
selectedTestJob: null
|
|
||||||
}
|
|
||||||
},
|
|
||||||
methods: {
|
|
||||||
update() {
|
|
||||||
Sist2AdminApi.updateUserScript(this.name, this.script);
|
|
||||||
},
|
|
||||||
onJobSelect(job) {
|
|
||||||
this.selectedTestJob = job;
|
|
||||||
},
|
|
||||||
deleteScript() {
|
|
||||||
Sist2AdminApi.deleteUserScript(this.name)
|
|
||||||
.then(() => {
|
|
||||||
this.$router.push("/");
|
|
||||||
})
|
|
||||||
.catch(err => {
|
|
||||||
this.$bvToast.toast("Cannot delete user script " +
|
|
||||||
"because it is referenced by a job", {
|
|
||||||
title: "Error",
|
|
||||||
variant: "danger",
|
|
||||||
toaster: "b-toaster-bottom-right"
|
|
||||||
});
|
|
||||||
})
|
|
||||||
},
|
|
||||||
testScript() {
|
|
||||||
Sist2AdminApi.testUserScript(this.name, this.selectedTestJob)
|
|
||||||
.then(() => {
|
|
||||||
this.$bvToast.toast(this.$t("runJobConfirmation"), {
|
|
||||||
title: this.$t("runJobConfirmationTitle"),
|
|
||||||
variant: "success",
|
|
||||||
toaster: "b-toaster-bottom-right"
|
|
||||||
});
|
|
||||||
})
|
|
||||||
}
|
|
||||||
},
|
|
||||||
mounted() {
|
|
||||||
Sist2AdminApi.getUserScript(this.name).then(resp => {
|
|
||||||
this.script = resp.data;
|
|
||||||
this.loading = false;
|
|
||||||
});
|
|
||||||
},
|
|
||||||
computed: {
|
|
||||||
name() {
|
|
||||||
return this.$route.params.name;
|
|
||||||
},
|
|
||||||
},
|
|
||||||
}
|
|
||||||
</script>
|
|
@ -3,5 +3,3 @@ git+https://github.com/simon987/hexlib.git
|
|||||||
uvicorn
|
uvicorn
|
||||||
websockets
|
websockets
|
||||||
pycron
|
pycron
|
||||||
GitPython
|
|
||||||
git+https://github.com/simon987/sist2-python.git
|
|
@ -18,13 +18,12 @@ from websockets.exceptions import ConnectionClosed
|
|||||||
|
|
||||||
import cron
|
import cron
|
||||||
from config import LOG_FOLDER, logger, WEBSERVER_PORT, DATA_FOLDER, SIST2_BINARY
|
from config import LOG_FOLDER, logger, WEBSERVER_PORT, DATA_FOLDER, SIST2_BINARY
|
||||||
from jobs import Sist2Job, Sist2ScanTask, TaskQueue, Sist2IndexTask, JobStatus, Sist2UserScriptTask
|
from jobs import Sist2Job, Sist2ScanTask, TaskQueue, Sist2IndexTask, JobStatus
|
||||||
from notifications import Subscribe, Notifications
|
from notifications import Subscribe, Notifications
|
||||||
from sist2 import Sist2, Sist2SearchBackend
|
from sist2 import Sist2, Sist2SearchBackend
|
||||||
from state import migrate_v1_to_v2, RUNNING_FRONTENDS, TESSERACT_LANGS, DB_SCHEMA_VERSION, migrate_v3_to_v4, \
|
from state import migrate_v1_to_v2, RUNNING_FRONTENDS, TESSERACT_LANGS, DB_SCHEMA_VERSION, migrate_v3_to_v4, \
|
||||||
get_log_files_to_remove, delete_log_file, create_default_search_backends
|
get_log_files_to_remove, delete_log_file, create_default_search_backends
|
||||||
from web import Sist2Frontend
|
from web import Sist2Frontend
|
||||||
from script import UserScript, SCRIPT_TEMPLATES
|
|
||||||
|
|
||||||
sist2 = Sist2(SIST2_BINARY, DATA_FOLDER)
|
sist2 = Sist2(SIST2_BINARY, DATA_FOLDER)
|
||||||
db = PersistentState(dbfile=os.path.join(DATA_FOLDER, "state.db"))
|
db = PersistentState(dbfile=os.path.join(DATA_FOLDER, "state.db"))
|
||||||
@ -53,8 +52,7 @@ async def home():
|
|||||||
async def api():
|
async def api():
|
||||||
return {
|
return {
|
||||||
"tesseract_langs": TESSERACT_LANGS,
|
"tesseract_langs": TESSERACT_LANGS,
|
||||||
"logs_folder": LOG_FOLDER,
|
"logs_folder": LOG_FOLDER
|
||||||
"user_script_templates": list(SCRIPT_TEMPLATES.keys())
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
@ -115,6 +113,8 @@ async def update_job(name: str, new_job: Sist2Job):
|
|||||||
async def update_frontend(name: str, frontend: Sist2Frontend):
|
async def update_frontend(name: str, frontend: Sist2Frontend):
|
||||||
db["frontends"][name] = frontend
|
db["frontends"][name] = frontend
|
||||||
|
|
||||||
|
# TODO: Check etag
|
||||||
|
|
||||||
return "ok"
|
return "ok"
|
||||||
|
|
||||||
|
|
||||||
@ -150,21 +150,9 @@ def _run_job(job: Sist2Job):
|
|||||||
db["jobs"][job.name] = job
|
db["jobs"][job.name] = job
|
||||||
|
|
||||||
scan_task = Sist2ScanTask(job, f"Scan [{job.name}]")
|
scan_task = Sist2ScanTask(job, f"Scan [{job.name}]")
|
||||||
|
index_task = Sist2IndexTask(job, f"Index [{job.name}]", depends_on=scan_task)
|
||||||
index_depends_on = scan_task
|
|
||||||
script_tasks = []
|
|
||||||
for script_name in job.user_scripts:
|
|
||||||
script = db["user_scripts"][script_name]
|
|
||||||
|
|
||||||
task = Sist2UserScriptTask(script, job, f"Script <{script_name}> [{job.name}]", depends_on=scan_task)
|
|
||||||
script_tasks.append(task)
|
|
||||||
index_depends_on = task
|
|
||||||
|
|
||||||
index_task = Sist2IndexTask(job, f"Index [{job.name}]", depends_on=index_depends_on)
|
|
||||||
|
|
||||||
task_queue.submit(scan_task)
|
task_queue.submit(scan_task)
|
||||||
for task in script_tasks:
|
|
||||||
task_queue.submit(task)
|
|
||||||
task_queue.submit(index_task)
|
task_queue.submit(index_task)
|
||||||
|
|
||||||
|
|
||||||
@ -179,22 +167,6 @@ async def run_job(name: str):
|
|||||||
return "ok"
|
return "ok"
|
||||||
|
|
||||||
|
|
||||||
@app.get("/api/user_script/{name:str}/run")
|
|
||||||
def run_user_script(name: str, job: str):
|
|
||||||
script = db["user_scripts"][name]
|
|
||||||
if not script:
|
|
||||||
raise HTTPException(status_code=404)
|
|
||||||
job = db["jobs"][job]
|
|
||||||
if not job:
|
|
||||||
raise HTTPException(status_code=404)
|
|
||||||
|
|
||||||
script_task = Sist2UserScriptTask(script, job, f"Script <{name}> [{job.name}]")
|
|
||||||
|
|
||||||
task_queue.submit(script_task)
|
|
||||||
|
|
||||||
return "ok"
|
|
||||||
|
|
||||||
|
|
||||||
@app.get("/api/job/{name:str}/logs_to_delete")
|
@app.get("/api/job/{name:str}/logs_to_delete")
|
||||||
async def task_history(n: int, name: str):
|
async def task_history(n: int, name: str):
|
||||||
return get_log_files_to_remove(db, name, n)
|
return get_log_files_to_remove(db, name, n)
|
||||||
@ -267,7 +239,7 @@ def check_es_version(es_url: str, insecure: bool):
|
|||||||
es_url = f"{url.scheme}://{url.hostname}:{url.port}"
|
es_url = f"{url.scheme}://{url.hostname}:{url.port}"
|
||||||
else:
|
else:
|
||||||
auth = None
|
auth = None
|
||||||
r = requests.get(es_url, verify=not insecure, auth=auth)
|
r = requests.get(es_url, verify=insecure, auth=auth)
|
||||||
except SSLError:
|
except SSLError:
|
||||||
return {
|
return {
|
||||||
"ok": False,
|
"ok": False,
|
||||||
@ -403,59 +375,6 @@ def create_search_backend(name: str):
|
|||||||
return backend
|
return backend
|
||||||
|
|
||||||
|
|
||||||
@app.delete("/api/user_script/{name:str}")
|
|
||||||
def delete_user_script(name: str):
|
|
||||||
if db["user_scripts"][name] is None:
|
|
||||||
return HTTPException(status_code=404)
|
|
||||||
|
|
||||||
if any(name in job.user_scripts for job in db["jobs"]):
|
|
||||||
raise HTTPException(status_code=400, detail="in use (job)")
|
|
||||||
|
|
||||||
script: UserScript = db["user_scripts"][name]
|
|
||||||
script.delete_dir()
|
|
||||||
|
|
||||||
del db["user_scripts"][name]
|
|
||||||
|
|
||||||
return "ok"
|
|
||||||
|
|
||||||
|
|
||||||
@app.post("/api/user_script/{name:str}")
|
|
||||||
def create_user_script(name: str, template: str):
|
|
||||||
if db["user_scripts"][name] is not None:
|
|
||||||
return HTTPException(status_code=400, detail="already exists")
|
|
||||||
|
|
||||||
script = SCRIPT_TEMPLATES[template](name)
|
|
||||||
db["user_scripts"][name] = script
|
|
||||||
|
|
||||||
return script
|
|
||||||
|
|
||||||
|
|
||||||
@app.get("/api/user_script")
|
|
||||||
async def get_user_scripts():
|
|
||||||
return list(db["user_scripts"])
|
|
||||||
|
|
||||||
|
|
||||||
@app.get("/api/user_script/{name:str}")
|
|
||||||
async def get_user_script(name: str):
|
|
||||||
backend = db["user_scripts"][name]
|
|
||||||
if not backend:
|
|
||||||
raise HTTPException(status_code=404)
|
|
||||||
|
|
||||||
return backend
|
|
||||||
|
|
||||||
|
|
||||||
@app.put("/api/user_script/{name:str}")
|
|
||||||
async def update_user_script(name: str, script: UserScript):
|
|
||||||
previous_version: UserScript = db["user_scripts"][name]
|
|
||||||
|
|
||||||
if previous_version and previous_version.git_repository != script.git_repository:
|
|
||||||
script.force_clone = True
|
|
||||||
|
|
||||||
db["user_scripts"][name] = script
|
|
||||||
|
|
||||||
return "ok"
|
|
||||||
|
|
||||||
|
|
||||||
def tail(filepath: str, n: int):
|
def tail(filepath: str, n: int):
|
||||||
with open(filepath) as file:
|
with open(filepath) as file:
|
||||||
|
|
||||||
@ -560,8 +479,7 @@ if __name__ == '__main__':
|
|||||||
migrate_v3_to_v4(db)
|
migrate_v3_to_v4(db)
|
||||||
|
|
||||||
if db["sist2_admin"]["info"]["version"] != DB_SCHEMA_VERSION:
|
if db["sist2_admin"]["info"]["version"] != DB_SCHEMA_VERSION:
|
||||||
raise Exception(f"Incompatible database {db.dbfile}. "
|
raise Exception(f"Incompatible database version for {db.dbfile}")
|
||||||
f"Automatic migration is not available, please delete the database file to continue.")
|
|
||||||
|
|
||||||
start_frontends()
|
start_frontends()
|
||||||
cron.initialize(db, _run_job)
|
cron.initialize(db, _run_job)
|
||||||
|
@ -9,11 +9,9 @@ MAX_LOG_SIZE = 1 * 1024 * 1024
|
|||||||
SIST2_BINARY = os.environ.get("SIST2_BINARY", "/root/sist2")
|
SIST2_BINARY = os.environ.get("SIST2_BINARY", "/root/sist2")
|
||||||
DATA_FOLDER = os.environ.get("DATA_FOLDER", "/sist2-admin/")
|
DATA_FOLDER = os.environ.get("DATA_FOLDER", "/sist2-admin/")
|
||||||
LOG_FOLDER = os.path.join(DATA_FOLDER, "logs")
|
LOG_FOLDER = os.path.join(DATA_FOLDER, "logs")
|
||||||
SCRIPT_FOLDER = os.path.join(DATA_FOLDER, "scripts")
|
|
||||||
WEBSERVER_PORT = 8080
|
WEBSERVER_PORT = 8080
|
||||||
|
|
||||||
os.makedirs(LOG_FOLDER, exist_ok=True)
|
os.makedirs(LOG_FOLDER, exist_ok=True)
|
||||||
os.makedirs(SCRIPT_FOLDER, exist_ok=True)
|
|
||||||
os.makedirs(DATA_FOLDER, exist_ok=True)
|
os.makedirs(DATA_FOLDER, exist_ok=True)
|
||||||
|
|
||||||
logger = logging.Logger("sist2-admin")
|
logger = logging.Logger("sist2-admin")
|
||||||
|
@ -1,18 +1,13 @@
|
|||||||
import json
|
import json
|
||||||
import logging
|
import logging
|
||||||
import os.path
|
import os.path
|
||||||
import shlex
|
|
||||||
import signal
|
import signal
|
||||||
import uuid
|
import uuid
|
||||||
from datetime import datetime
|
from datetime import datetime
|
||||||
from enum import Enum
|
from enum import Enum
|
||||||
from io import TextIOWrapper
|
|
||||||
from logging import FileHandler
|
from logging import FileHandler
|
||||||
from subprocess import Popen
|
|
||||||
import subprocess
|
|
||||||
from threading import Lock, Thread
|
from threading import Lock, Thread
|
||||||
from time import sleep
|
from time import sleep
|
||||||
from typing import List
|
|
||||||
from uuid import uuid4, UUID
|
from uuid import uuid4, UUID
|
||||||
|
|
||||||
from hexlib.db import PersistentState
|
from hexlib.db import PersistentState
|
||||||
@ -23,7 +18,6 @@ from notifications import Notifications
|
|||||||
from sist2 import ScanOptions, IndexOptions, Sist2
|
from sist2 import ScanOptions, IndexOptions, Sist2
|
||||||
from state import RUNNING_FRONTENDS, get_log_files_to_remove, delete_log_file
|
from state import RUNNING_FRONTENDS, get_log_files_to_remove, delete_log_file
|
||||||
from web import Sist2Frontend
|
from web import Sist2Frontend
|
||||||
from script import UserScript
|
|
||||||
|
|
||||||
|
|
||||||
class JobStatus(Enum):
|
class JobStatus(Enum):
|
||||||
@ -38,8 +32,6 @@ class Sist2Job(BaseModel):
|
|||||||
scan_options: ScanOptions
|
scan_options: ScanOptions
|
||||||
index_options: IndexOptions
|
index_options: IndexOptions
|
||||||
|
|
||||||
user_scripts: List[str] = []
|
|
||||||
|
|
||||||
cron_expression: str
|
cron_expression: str
|
||||||
schedule_enabled: bool = False
|
schedule_enabled: bool = False
|
||||||
|
|
||||||
@ -190,7 +182,7 @@ class Sist2IndexTask(Sist2Task):
|
|||||||
|
|
||||||
duration = self.ended - self.started
|
duration = self.ended - self.started
|
||||||
|
|
||||||
ok = return_code in (0, 1)
|
ok = return_code == 0
|
||||||
|
|
||||||
if ok:
|
if ok:
|
||||||
self.restart_running_frontends(db, sist2)
|
self.restart_running_frontends(db, sist2)
|
||||||
@ -239,65 +231,6 @@ class Sist2IndexTask(Sist2Task):
|
|||||||
self._logger.info(json.dumps({"sist2-admin": f"Restart frontend {pid=} {frontend_name=}"}))
|
self._logger.info(json.dumps({"sist2-admin": f"Restart frontend {pid=} {frontend_name=}"}))
|
||||||
|
|
||||||
|
|
||||||
class Sist2UserScriptTask(Sist2Task):
|
|
||||||
|
|
||||||
def __init__(self, user_script: UserScript, job: Sist2Job, display_name: str, depends_on: Sist2Task = None):
|
|
||||||
super().__init__(job, display_name, depends_on=depends_on.id if depends_on else None)
|
|
||||||
self.user_script = user_script
|
|
||||||
|
|
||||||
def run(self, sist2: Sist2, db: PersistentState):
|
|
||||||
super().run(sist2, db)
|
|
||||||
|
|
||||||
try:
|
|
||||||
self.user_script.setup(self.log_callback)
|
|
||||||
except Exception as e:
|
|
||||||
logger.error(f"Setup for {self.user_script.name} failed: ")
|
|
||||||
logger.exception(e)
|
|
||||||
self.log_callback({"sist2-admin": f"Setup for {self.user_script.name} failed: {e}"})
|
|
||||||
return -1
|
|
||||||
|
|
||||||
executable = self.user_script.get_executable()
|
|
||||||
index_path = os.path.join(DATA_FOLDER, self.job.index_path)
|
|
||||||
extra_args = self.user_script.extra_args
|
|
||||||
|
|
||||||
args = [
|
|
||||||
executable,
|
|
||||||
index_path,
|
|
||||||
*shlex.split(extra_args)
|
|
||||||
]
|
|
||||||
|
|
||||||
self.log_callback({"sist2-admin": f"Starting user script with {executable=}, {index_path=}, {extra_args=}"})
|
|
||||||
|
|
||||||
proc = Popen(args, stdout=subprocess.PIPE, stderr=subprocess.PIPE, cwd=self.user_script.script_dir())
|
|
||||||
self.pid = proc.pid
|
|
||||||
|
|
||||||
t_stderr = Thread(target=self._consume_logs, args=(self.log_callback, proc, "stderr", False))
|
|
||||||
t_stderr.start()
|
|
||||||
|
|
||||||
self._consume_logs(self.log_callback, proc, "stdout", True)
|
|
||||||
|
|
||||||
self.ended = datetime.utcnow()
|
|
||||||
|
|
||||||
return 0
|
|
||||||
|
|
||||||
@staticmethod
|
|
||||||
def _consume_logs(logs_cb, proc, stream, wait):
|
|
||||||
pipe_wrapper = TextIOWrapper(getattr(proc, stream), encoding="utf8", errors="ignore")
|
|
||||||
try:
|
|
||||||
for line in pipe_wrapper:
|
|
||||||
if line.strip() == "":
|
|
||||||
continue
|
|
||||||
if line.startswith("$PROGRESS"):
|
|
||||||
progress = json.loads(line[len("$PROGRESS "):])
|
|
||||||
logs_cb({"progress": progress})
|
|
||||||
continue
|
|
||||||
logs_cb({stream: line})
|
|
||||||
finally:
|
|
||||||
if wait:
|
|
||||||
proc.wait()
|
|
||||||
pipe_wrapper.close()
|
|
||||||
|
|
||||||
|
|
||||||
class TaskQueue:
|
class TaskQueue:
|
||||||
def __init__(self, sist2: Sist2, db: PersistentState, notifications: Notifications):
|
def __init__(self, sist2: Sist2, db: PersistentState, notifications: Notifications):
|
||||||
self._lock = Lock()
|
self._lock = Lock()
|
||||||
|
@ -1,126 +0,0 @@
|
|||||||
import os
|
|
||||||
import shutil
|
|
||||||
import stat
|
|
||||||
import subprocess
|
|
||||||
from enum import Enum
|
|
||||||
|
|
||||||
from git import Repo
|
|
||||||
from pydantic import BaseModel
|
|
||||||
|
|
||||||
from config import SCRIPT_FOLDER
|
|
||||||
|
|
||||||
|
|
||||||
class ScriptType(Enum):
|
|
||||||
LOCAL = "local"
|
|
||||||
SIMPLE = "simple"
|
|
||||||
GIT = "git"
|
|
||||||
|
|
||||||
|
|
||||||
def set_executable(file):
|
|
||||||
os.chmod(file, os.stat(file).st_mode | stat.S_IEXEC)
|
|
||||||
|
|
||||||
|
|
||||||
def _initialize_git_repository(url, path, log_cb, force_clone):
|
|
||||||
log_cb({"sist2-admin": f"Cloning {url}"})
|
|
||||||
|
|
||||||
if force_clone or not os.path.exists(os.path.join(path, ".git")):
|
|
||||||
if force_clone:
|
|
||||||
shutil.rmtree(path, ignore_errors=True)
|
|
||||||
Repo.clone_from(url, path)
|
|
||||||
else:
|
|
||||||
repo = Repo(path)
|
|
||||||
repo.remote("origin").pull()
|
|
||||||
|
|
||||||
setup_script = os.path.join(path, "setup.sh")
|
|
||||||
if setup_script:
|
|
||||||
log_cb({"sist2-admin": f"Executing setup script {setup_script}"})
|
|
||||||
|
|
||||||
set_executable(setup_script)
|
|
||||||
result = subprocess.run([setup_script], cwd=path, stdout=subprocess.PIPE, stderr=subprocess.STDOUT)
|
|
||||||
for line in result.stdout.split(b"\n"):
|
|
||||||
if line:
|
|
||||||
log_cb({"stdout": line.decode()})
|
|
||||||
|
|
||||||
log_cb({"stdout": f"Executed setup script {setup_script}, return code = {result.returncode}"})
|
|
||||||
|
|
||||||
if result.returncode != 0:
|
|
||||||
raise Exception("Error when running setup script!")
|
|
||||||
|
|
||||||
log_cb({"sist2-admin": f"Initialized git repository in {path}"})
|
|
||||||
|
|
||||||
|
|
||||||
class UserScript(BaseModel):
|
|
||||||
name: str
|
|
||||||
type: ScriptType
|
|
||||||
git_repository: str = None
|
|
||||||
force_clone: bool = False
|
|
||||||
script: str = None
|
|
||||||
extra_args: str = ""
|
|
||||||
|
|
||||||
def script_dir(self):
|
|
||||||
return os.path.join(SCRIPT_FOLDER, self.name)
|
|
||||||
|
|
||||||
def setup(self, log_cb):
|
|
||||||
os.makedirs(self.script_dir(), exist_ok=True)
|
|
||||||
|
|
||||||
if self.type == ScriptType.GIT:
|
|
||||||
_initialize_git_repository(self.git_repository, self.script_dir(), log_cb, self.force_clone)
|
|
||||||
self.force_clone = False
|
|
||||||
elif self.type == ScriptType.SIMPLE:
|
|
||||||
self._setup_simple()
|
|
||||||
|
|
||||||
set_executable(self.get_executable())
|
|
||||||
|
|
||||||
def _setup_simple(self):
|
|
||||||
with open(self.get_executable(), "w") as f:
|
|
||||||
f.write(
|
|
||||||
"#!/bin/bash\n"
|
|
||||||
"python run.py \"$@\""
|
|
||||||
)
|
|
||||||
|
|
||||||
with open(os.path.join(self.script_dir(), "run.py"), "w") as f:
|
|
||||||
f.write(self.script)
|
|
||||||
|
|
||||||
def get_executable(self):
|
|
||||||
return os.path.join(self.script_dir(), "run.sh")
|
|
||||||
|
|
||||||
def delete_dir(self):
|
|
||||||
shutil.rmtree(self.script_dir(), ignore_errors=True)
|
|
||||||
|
|
||||||
|
|
||||||
SCRIPT_TEMPLATES = {
|
|
||||||
"CLIP - Generate embeddings to predict the most relevant image based on the text prompt": lambda name: UserScript(
|
|
||||||
name=name,
|
|
||||||
type=ScriptType.GIT,
|
|
||||||
git_repository="https://github.com/simon987/sist2-script-clip",
|
|
||||||
extra_args="--num-tags=1 --tags-file=general.txt --color=#dcd7ff"
|
|
||||||
),
|
|
||||||
"Whisper - Speech to text with OpenAI Whisper": lambda name: UserScript(
|
|
||||||
name=name,
|
|
||||||
type=ScriptType.GIT,
|
|
||||||
git_repository="https://github.com/simon987/sist2-script-whisper",
|
|
||||||
extra_args="--model=base --num-threads=4 --color=#51da4c --tag"
|
|
||||||
),
|
|
||||||
"Hamburger - Simple script example": lambda name: UserScript(
|
|
||||||
name=name,
|
|
||||||
type=ScriptType.SIMPLE,
|
|
||||||
script=
|
|
||||||
'from sist2 import Sist2Index\n'
|
|
||||||
'import sys\n'
|
|
||||||
'\n'
|
|
||||||
'index = Sist2Index(sys.argv[1])\n'
|
|
||||||
'for doc in index.document_iter():\n'
|
|
||||||
' doc.json_data["tag"] = ["hamburger.#00FF00"]\n'
|
|
||||||
' index.update_document(doc)\n'
|
|
||||||
'\n'
|
|
||||||
'index.sync_tag_table()\n'
|
|
||||||
'index.commit()\n'
|
|
||||||
'\n'
|
|
||||||
'print("Done!")\n'
|
|
||||||
),
|
|
||||||
"(Blank)": lambda name: UserScript(
|
|
||||||
name=name,
|
|
||||||
type=ScriptType.SIMPLE,
|
|
||||||
script=""
|
|
||||||
)
|
|
||||||
}
|
|
@ -41,6 +41,8 @@ class Sist2SearchBackend(BaseModel):
|
|||||||
es_insecure_ssl: bool = False
|
es_insecure_ssl: bool = False
|
||||||
es_index: str = "sist2"
|
es_index: str = "sist2"
|
||||||
threads: int = 1
|
threads: int = 1
|
||||||
|
script: str = ""
|
||||||
|
script_file: str = None
|
||||||
batch_size: int = 70
|
batch_size: int = 70
|
||||||
|
|
||||||
@staticmethod
|
@staticmethod
|
||||||
@ -72,6 +74,8 @@ class IndexOptions(BaseModel):
|
|||||||
f"--es-index={search_backend.es_index}",
|
f"--es-index={search_backend.es_index}",
|
||||||
f"--batch-size={search_backend.batch_size}"]
|
f"--batch-size={search_backend.batch_size}"]
|
||||||
|
|
||||||
|
if search_backend.script_file:
|
||||||
|
args.append(f"--script-file={search_backend.script_file}")
|
||||||
if search_backend.es_insecure_ssl:
|
if search_backend.es_insecure_ssl:
|
||||||
args.append(f"--es-insecure-ssl")
|
args.append(f"--es-insecure-ssl")
|
||||||
if self.incremental_index:
|
if self.incremental_index:
|
||||||
@ -245,6 +249,13 @@ class Sist2:
|
|||||||
|
|
||||||
def index(self, options: IndexOptions, search_backend: Sist2SearchBackend, logs_cb):
|
def index(self, options: IndexOptions, search_backend: Sist2SearchBackend, logs_cb):
|
||||||
|
|
||||||
|
if search_backend.script and search_backend.backend_type == SearchBackendType("elasticsearch"):
|
||||||
|
with NamedTemporaryFile("w", prefix="sist2-admin", suffix=".painless", delete=False) as f:
|
||||||
|
f.write(search_backend.script)
|
||||||
|
search_backend.script_file = f.name
|
||||||
|
else:
|
||||||
|
search_backend.script_file = None
|
||||||
|
|
||||||
args = [
|
args = [
|
||||||
self.bin_path,
|
self.bin_path,
|
||||||
*options.args(search_backend),
|
*options.args(search_backend),
|
||||||
|
@ -14,7 +14,7 @@ RUNNING_FRONTENDS: Dict[str, int] = {}
|
|||||||
|
|
||||||
TESSERACT_LANGS = get_tesseract_langs()
|
TESSERACT_LANGS = get_tesseract_langs()
|
||||||
|
|
||||||
DB_SCHEMA_VERSION = "5"
|
DB_SCHEMA_VERSION = "4"
|
||||||
|
|
||||||
from pydantic import BaseModel
|
from pydantic import BaseModel
|
||||||
|
|
||||||
|
854
sist2-vue/package-lock.json
generated
854
sist2-vue/package-lock.json
generated
File diff suppressed because it is too large
Load Diff
@ -1,6 +1,6 @@
|
|||||||
{
|
{
|
||||||
"name": "sist2",
|
"name": "sist2",
|
||||||
"version": "1.0.0",
|
"version": "2.11.0",
|
||||||
"private": true,
|
"private": true,
|
||||||
"scripts": {
|
"scripts": {
|
||||||
"serve": "vue-cli-service serve",
|
"serve": "vue-cli-service serve",
|
||||||
@ -9,6 +9,7 @@
|
|||||||
"dependencies": {
|
"dependencies": {
|
||||||
"@auth0/auth0-spa-js": "^2.0.2",
|
"@auth0/auth0-spa-js": "^2.0.2",
|
||||||
"@egjs/vue-infinitegrid": "3.3.0",
|
"@egjs/vue-infinitegrid": "3.3.0",
|
||||||
|
"@tensorflow/tfjs": "^4.4.0",
|
||||||
"axios": "^0.25.0",
|
"axios": "^0.25.0",
|
||||||
"bootstrap-vue": "^2.21.2",
|
"bootstrap-vue": "^2.21.2",
|
||||||
"core-js": "^3.6.5",
|
"core-js": "^3.6.5",
|
||||||
@ -17,7 +18,6 @@
|
|||||||
"dom-to-image": "^2.6.0",
|
"dom-to-image": "^2.6.0",
|
||||||
"fslightbox-vue": "fslightbox-vue.tgz",
|
"fslightbox-vue": "fslightbox-vue.tgz",
|
||||||
"nouislider": "^15.2.0",
|
"nouislider": "^15.2.0",
|
||||||
"onnxruntime-web": "^1.15.1",
|
|
||||||
"underscore": "^1.13.1",
|
"underscore": "^1.13.1",
|
||||||
"vue": "^2.6.12",
|
"vue": "^2.6.12",
|
||||||
"vue-color": "^2.8.1",
|
"vue-color": "^2.8.1",
|
||||||
@ -29,7 +29,6 @@
|
|||||||
},
|
},
|
||||||
"devDependencies": {
|
"devDependencies": {
|
||||||
"@babel/polyfill": "^7.12.1",
|
"@babel/polyfill": "^7.12.1",
|
||||||
"@types/underscore": "^1.11.6",
|
|
||||||
"@vue/cli-plugin-babel": "~5.0.8",
|
"@vue/cli-plugin-babel": "~5.0.8",
|
||||||
"@vue/cli-plugin-router": "~5.0.8",
|
"@vue/cli-plugin-router": "~5.0.8",
|
||||||
"@vue/cli-plugin-typescript": "^5.0.8",
|
"@vue/cli-plugin-typescript": "^5.0.8",
|
||||||
@ -44,8 +43,8 @@
|
|||||||
"portal-vue": "^2.1.7",
|
"portal-vue": "^2.1.7",
|
||||||
"sass": "^1.26.11",
|
"sass": "^1.26.11",
|
||||||
"sass-loader": "^10.0.2",
|
"sass-loader": "^10.0.2",
|
||||||
"typescript": "^4.9.5",
|
"typescript": "~4.1.5",
|
||||||
"vue-cli-plugin-bootstrap-vue": "~0.8.2",
|
"vue-cli-plugin-bootstrap-vue": "~0.7.0",
|
||||||
"vue-template-compiler": "^2.6.11"
|
"vue-template-compiler": "^2.6.11"
|
||||||
},
|
},
|
||||||
"browserslist": [
|
"browserslist": [
|
||||||
|
@ -308,21 +308,15 @@ html, body {
|
|||||||
|
|
||||||
.info-icon {
|
.info-icon {
|
||||||
width: 1rem;
|
width: 1rem;
|
||||||
min-width: 1rem;
|
|
||||||
margin-right: 0.2rem;
|
margin-right: 0.2rem;
|
||||||
cursor: pointer;
|
cursor: pointer;
|
||||||
line-height: 1rem;
|
line-height: 1rem;
|
||||||
height: 1rem;
|
height: 1rem;
|
||||||
min-height: 1rem;
|
|
||||||
background-image: url();
|
background-image: url();
|
||||||
filter: brightness(45%);
|
filter: brightness(45%);
|
||||||
display: block;
|
display: block;
|
||||||
}
|
}
|
||||||
|
|
||||||
.theme-black .info-icon {
|
|
||||||
filter: brightness(80%);
|
|
||||||
}
|
|
||||||
|
|
||||||
.tabs {
|
.tabs {
|
||||||
margin-top: 10px;
|
margin-top: 10px;
|
||||||
}
|
}
|
||||||
|
@ -25,7 +25,6 @@ export interface Index {
|
|||||||
id: string
|
id: string
|
||||||
idPrefix: string
|
idPrefix: string
|
||||||
timestamp: number
|
timestamp: number
|
||||||
models: []
|
|
||||||
}
|
}
|
||||||
|
|
||||||
export interface EsHit {
|
export interface EsHit {
|
||||||
@ -118,15 +117,6 @@ class Sist2Api {
|
|||||||
return this.sist2Info.searchBackend;
|
return this.sist2Info.searchBackend;
|
||||||
}
|
}
|
||||||
|
|
||||||
models() {
|
|
||||||
const allModels = this.sist2Info.indices
|
|
||||||
.map(idx => idx.models)
|
|
||||||
.flat();
|
|
||||||
|
|
||||||
return allModels
|
|
||||||
.filter((v, i, a) => a.findIndex(v2 => (v2.id === v.id)) === i)
|
|
||||||
}
|
|
||||||
|
|
||||||
getSist2Info(): Promise<any> {
|
getSist2Info(): Promise<any> {
|
||||||
return axios.get(`${this.baseUrl}i`).then(resp => {
|
return axios.get(`${this.baseUrl}i`).then(resp => {
|
||||||
const indices = resp.data.indices as Index[];
|
const indices = resp.data.indices as Index[];
|
||||||
@ -137,8 +127,7 @@ class Sist2Api {
|
|||||||
name: idx.name,
|
name: idx.name,
|
||||||
timestamp: idx.timestamp,
|
timestamp: idx.timestamp,
|
||||||
version: idx.version,
|
version: idx.version,
|
||||||
models: idx.models,
|
idPrefix: getIdPrefix(indices, idx.id)
|
||||||
idPrefix: getIdPrefix(indices, idx.id),
|
|
||||||
} as Index;
|
} as Index;
|
||||||
});
|
});
|
||||||
|
|
||||||
@ -629,15 +618,6 @@ class Sist2Api {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
if ("knn" in query) {
|
|
||||||
query.query = {
|
|
||||||
bool: {
|
|
||||||
must: []
|
|
||||||
}
|
|
||||||
};
|
|
||||||
delete query.knn;
|
|
||||||
}
|
|
||||||
|
|
||||||
if ("function_score" in query.query) {
|
if ("function_score" in query.query) {
|
||||||
query.query = query.query.function_score.query;
|
query.query = query.query.function_score.query;
|
||||||
}
|
}
|
||||||
@ -722,11 +702,6 @@ class Sist2Api {
|
|||||||
return result;
|
return result;
|
||||||
});
|
});
|
||||||
}
|
}
|
||||||
|
|
||||||
getEmbeddings(indexId, docId, modelId) {
|
|
||||||
return axios.post(`${this.baseUrl}/e/${indexId}/${docId}/${modelId.toString().padStart(3, '0')}`)
|
|
||||||
.then(resp => (resp.data));
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
export default new Sist2Api("");
|
export default new Sist2Api("");
|
@ -1,5 +1,5 @@
|
|||||||
import store from "./store";
|
import store from "./store";
|
||||||
import sist2Api, {EsHit, Index} from "@/Sist2Api";
|
import {EsHit, Index} from "@/Sist2Api";
|
||||||
|
|
||||||
const SORT_MODES = {
|
const SORT_MODES = {
|
||||||
score: {
|
score: {
|
||||||
@ -79,10 +79,8 @@ class Sist2ElasticsearchQuery {
|
|||||||
const selectedIndexIds = getters.selectedIndices.map((idx: Index) => idx.id)
|
const selectedIndexIds = getters.selectedIndices.map((idx: Index) => idx.id)
|
||||||
const selectedMimeTypes = getters.selectedMimeTypes;
|
const selectedMimeTypes = getters.selectedMimeTypes;
|
||||||
const selectedTags = getters.selectedTags;
|
const selectedTags = getters.selectedTags;
|
||||||
const sortMode = getters.embedding ? "score" : getters.sortMode;
|
|
||||||
|
|
||||||
const legacyES = store.state.sist2Info.esVersionLegacy;
|
const legacyES = store.state.sist2Info.esVersionLegacy;
|
||||||
const hasKnn = store.state.sist2Info.esVersionHasKnn;
|
|
||||||
|
|
||||||
const filters = [
|
const filters = [
|
||||||
{terms: {index: selectedIndexIds}}
|
{terms: {index: selectedIndexIds}}
|
||||||
@ -164,14 +162,14 @@ class Sist2ElasticsearchQuery {
|
|||||||
|
|
||||||
const q = {
|
const q = {
|
||||||
_source: {
|
_source: {
|
||||||
excludes: ["content", "_tie", "emb.*"]
|
excludes: ["content", "_tie"]
|
||||||
},
|
},
|
||||||
query: {
|
query: {
|
||||||
bool: {
|
bool: {
|
||||||
filter: filters,
|
filter: filters,
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
sort: SORT_MODES[sortMode].mode,
|
sort: SORT_MODES[getters.sortMode].mode,
|
||||||
size: size,
|
size: size,
|
||||||
} as any;
|
} as any;
|
||||||
|
|
||||||
@ -183,57 +181,14 @@ class Sist2ElasticsearchQuery {
|
|||||||
}
|
}
|
||||||
|
|
||||||
if (!empty && !blankSearch) {
|
if (!empty && !blankSearch) {
|
||||||
if (getters.embedding) {
|
|
||||||
filters.push(query)
|
|
||||||
} else {
|
|
||||||
q.query.bool.must = query;
|
q.query.bool.must = query;
|
||||||
}
|
}
|
||||||
}
|
|
||||||
|
|
||||||
if (getters.embedding) {
|
|
||||||
delete q.query;
|
|
||||||
|
|
||||||
const field = "emb." + sist2Api.models().find(m => m.id == getters.embeddingsModel).path;
|
|
||||||
|
|
||||||
if (hasKnn) {
|
|
||||||
// Use knn (8.8+)
|
|
||||||
q.knn = {
|
|
||||||
field: field,
|
|
||||||
query_vector: getters.embedding,
|
|
||||||
|
|
||||||
k: 600,
|
|
||||||
num_candidates: 600,
|
|
||||||
|
|
||||||
filter: filters
|
|
||||||
}
|
|
||||||
} else {
|
|
||||||
// Use brute-force as a fallback
|
|
||||||
|
|
||||||
filters.push({exists: {field: field}});
|
|
||||||
|
|
||||||
q.query = {
|
|
||||||
function_score: {
|
|
||||||
query: {
|
|
||||||
bool: {
|
|
||||||
must: filters,
|
|
||||||
}
|
|
||||||
},
|
|
||||||
script_score: {
|
|
||||||
script: {
|
|
||||||
source: `cosineSimilarity(params.query_vector, "${field}") + 1.0`,
|
|
||||||
params: {query_vector: getters.embedding}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
if (after) {
|
if (after) {
|
||||||
q.search_after = [SORT_MODES[sortMode].key(after), after["_id"]];
|
q.search_after = [SORT_MODES[getters.sortMode].key(after), after["_id"]];
|
||||||
}
|
}
|
||||||
|
|
||||||
if (getters.optHighlight && !getters.embedding) {
|
if (getters.optHighlight) {
|
||||||
q.highlight = {
|
q.highlight = {
|
||||||
pre_tags: ["<mark>"],
|
pre_tags: ["<mark>"],
|
||||||
post_tags: ["</mark>"],
|
post_tags: ["</mark>"],
|
||||||
@ -259,7 +214,7 @@ class Sist2ElasticsearchQuery {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
if (sortMode === "random") {
|
if (getters.sortMode === "random") {
|
||||||
q.query = {
|
q.query = {
|
||||||
function_score: {
|
function_score: {
|
||||||
query: {
|
query: {
|
||||||
|
@ -103,16 +103,6 @@ class Sist2ElasticsearchQuery {
|
|||||||
q["highlightContextSize"] = Number(getters.optFragmentSize);
|
q["highlightContextSize"] = Number(getters.optFragmentSize);
|
||||||
}
|
}
|
||||||
|
|
||||||
if (getters.embedding) {
|
|
||||||
q["model"] = getters.embeddingsModel;
|
|
||||||
q["embedding"] = getters.embedding;
|
|
||||||
q["sort"] = "embedding";
|
|
||||||
q["sortAsc"] = false;
|
|
||||||
} else if (getters.sortMode == "embedding") {
|
|
||||||
q["sort"] = "sort"
|
|
||||||
q["sortAsc"] = true;
|
|
||||||
}
|
|
||||||
|
|
||||||
return q;
|
return q;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -12,7 +12,7 @@ export default {
|
|||||||
props: ["span", "text"],
|
props: ["span", "text"],
|
||||||
methods: {
|
methods: {
|
||||||
getStyle() {
|
getStyle() {
|
||||||
return ModelsRepo.data[this.$store.getters.nerModel.name].labelStyles[this.span.label];
|
return ModelsRepo.data[this.$store.getters.mlModel.name].labelStyles[this.span.label];
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -22,7 +22,7 @@ export default {
|
|||||||
props: ["spans", "text"],
|
props: ["spans", "text"],
|
||||||
computed: {
|
computed: {
|
||||||
legend() {
|
legend() {
|
||||||
return Object.entries(ModelsRepo.data[this.$store.state.nerModel.name].legend)
|
return Object.entries(ModelsRepo.data[this.$store.state.mlModel.name].legend)
|
||||||
.map(([label, name]) => ({
|
.map(([label, name]) => ({
|
||||||
text: name,
|
text: name,
|
||||||
id: label,
|
id: label,
|
||||||
|
@ -45,8 +45,7 @@ export default {
|
|||||||
items.push(
|
items.push(
|
||||||
{key: "esVersion", value: this.$store.state.sist2Info.esVersion},
|
{key: "esVersion", value: this.$store.state.sist2Info.esVersion},
|
||||||
{key: "esVersionSupported", value: this.$store.state.sist2Info.esVersionSupported},
|
{key: "esVersionSupported", value: this.$store.state.sist2Info.esVersionSupported},
|
||||||
{key: "esVersionLegacy", value: this.$store.state.sist2Info.esVersionLegacy},
|
{key: "esVersionLegacy", value: this.$store.state.sist2Info.esVersionLegacy}
|
||||||
{key: "esVersionHasKnn", value: this.$store.state.sist2Info.esVersionHasKnn},
|
|
||||||
);
|
);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -24,7 +24,6 @@
|
|||||||
<!-- Title line -->
|
<!-- Title line -->
|
||||||
<div style="display: flex">
|
<div style="display: flex">
|
||||||
<span class="info-icon" @click="onInfoClick()"></span>
|
<span class="info-icon" @click="onInfoClick()"></span>
|
||||||
<MLIcon v-if="doc._source.embedding" clickable @click="onEmbeddingClick()"></MLIcon>
|
|
||||||
<DocFileTitle :doc="doc"></DocFileTitle>
|
<DocFileTitle :doc="doc"></DocFileTitle>
|
||||||
</div>
|
</div>
|
||||||
|
|
||||||
@ -50,12 +49,10 @@ import DocInfoModal from "@/components/DocInfoModal.vue";
|
|||||||
import ContentDiv from "@/components/ContentDiv.vue";
|
import ContentDiv from "@/components/ContentDiv.vue";
|
||||||
import FullThumbnail from "@/components/FullThumbnail";
|
import FullThumbnail from "@/components/FullThumbnail";
|
||||||
import FeaturedFieldsLine from "@/components/FeaturedFieldsLine";
|
import FeaturedFieldsLine from "@/components/FeaturedFieldsLine";
|
||||||
import MLIcon from "@/components/icons/MlIcon.vue";
|
|
||||||
import Sist2Api from "@/Sist2Api";
|
|
||||||
|
|
||||||
|
|
||||||
export default {
|
export default {
|
||||||
components: {MLIcon, FeaturedFieldsLine, FullThumbnail, ContentDiv, DocInfoModal, DocFileTitle, TagContainer},
|
components: {FeaturedFieldsLine, FullThumbnail, ContentDiv, DocInfoModal, DocFileTitle, TagContainer},
|
||||||
props: ["doc", "width"],
|
props: ["doc", "width"],
|
||||||
data() {
|
data() {
|
||||||
return {
|
return {
|
||||||
@ -74,13 +71,6 @@ export default {
|
|||||||
onInfoClick() {
|
onInfoClick() {
|
||||||
this.showInfo = true;
|
this.showInfo = true;
|
||||||
},
|
},
|
||||||
onEmbeddingClick() {
|
|
||||||
Sist2Api.getEmbeddings(this.doc._source.index, this.doc._id, this.$store.state.embeddingsModel).then(embeddings => {
|
|
||||||
this.$store.commit("setEmbeddingText", "");
|
|
||||||
this.$store.commit("setEmbedding", embeddings);
|
|
||||||
this.$store.commit("setEmbeddingDoc", this.doc);
|
|
||||||
})
|
|
||||||
},
|
|
||||||
async onThumbnailClick() {
|
async onThumbnailClick() {
|
||||||
this.$store.commit("setUiLightboxSlide", this.doc._seq);
|
this.$store.commit("setUiLightboxSlide", this.doc._seq);
|
||||||
await this.$store.dispatch("showLightbox");
|
await this.$store.dispatch("showLightbox");
|
||||||
|
@ -1,6 +1,5 @@
|
|||||||
<template>
|
<template>
|
||||||
<a :href="`f/${doc._source.index}/${doc._id}`"
|
<a :href="`f/${doc._source.index}/${doc._id}`" class="file-title-anchor" target="_blank">
|
||||||
:class="doc._source.embedding ? 'file-title-anchor-with-embedding' : 'file-title-anchor'" target="_blank">
|
|
||||||
<div class="file-title" :title="doc._source.path + '/' + doc._source.name + ext(doc)"
|
<div class="file-title" :title="doc._source.path + '/' + doc._source.name + ext(doc)"
|
||||||
v-html="fileName() + ext(doc)"></div>
|
v-html="fileName() + ext(doc)"></div>
|
||||||
</a>
|
</a>
|
||||||
@ -35,13 +34,8 @@ export default {
|
|||||||
max-width: calc(100% - 1.2rem);
|
max-width: calc(100% - 1.2rem);
|
||||||
}
|
}
|
||||||
|
|
||||||
.file-title-anchor-with-embedding {
|
|
||||||
max-width: calc(100% - 2.2rem);
|
|
||||||
}
|
|
||||||
|
|
||||||
.file-title {
|
.file-title {
|
||||||
width: 100%;
|
width: 100%;
|
||||||
max-width: 100%;
|
|
||||||
line-height: 1rem;
|
line-height: 1rem;
|
||||||
height: 1.1rem;
|
height: 1.1rem;
|
||||||
white-space: nowrap;
|
white-space: nowrap;
|
||||||
@ -55,7 +49,6 @@ export default {
|
|||||||
.theme-black .file-title {
|
.theme-black .file-title {
|
||||||
color: #ddd;
|
color: #ddd;
|
||||||
}
|
}
|
||||||
|
|
||||||
.theme-black .file-title:hover {
|
.theme-black .file-title:hover {
|
||||||
color: #fff;
|
color: #fff;
|
||||||
}
|
}
|
||||||
|
@ -32,7 +32,6 @@
|
|||||||
<div class="doc-line ml-3">
|
<div class="doc-line ml-3">
|
||||||
<div style="display: flex">
|
<div style="display: flex">
|
||||||
<span class="info-icon" @click="showInfo = true"></span>
|
<span class="info-icon" @click="showInfo = true"></span>
|
||||||
<MLIcon v-if="doc._source.embedding" clickable @click="onEmbeddingClick()"></MLIcon>
|
|
||||||
<DocFileTitle :doc="doc"></DocFileTitle>
|
<DocFileTitle :doc="doc"></DocFileTitle>
|
||||||
</div>
|
</div>
|
||||||
|
|
||||||
@ -68,12 +67,10 @@ import DocInfoModal from "@/components/DocInfoModal";
|
|||||||
import ContentDiv from "@/components/ContentDiv";
|
import ContentDiv from "@/components/ContentDiv";
|
||||||
import FileIcon from "@/components/icons/FileIcon";
|
import FileIcon from "@/components/icons/FileIcon";
|
||||||
import FeaturedFieldsLine from "@/components/FeaturedFieldsLine";
|
import FeaturedFieldsLine from "@/components/FeaturedFieldsLine";
|
||||||
import MLIcon from "@/components/icons/MlIcon.vue";
|
|
||||||
import Sist2Api from "@/Sist2Api";
|
|
||||||
|
|
||||||
export default {
|
export default {
|
||||||
name: "DocListItem",
|
name: "DocListItem",
|
||||||
components: {MLIcon, FileIcon, ContentDiv, DocInfoModal, DocFileTitle, TagContainer, FeaturedFieldsLine},
|
components: {FileIcon, ContentDiv, DocInfoModal, DocFileTitle, TagContainer, FeaturedFieldsLine},
|
||||||
props: ["doc"],
|
props: ["doc"],
|
||||||
data() {
|
data() {
|
||||||
return {
|
return {
|
||||||
@ -86,13 +83,6 @@ export default {
|
|||||||
this.$store.commit("setUiLightboxSlide", this.doc._seq);
|
this.$store.commit("setUiLightboxSlide", this.doc._seq);
|
||||||
await this.$store.dispatch("showLightbox");
|
await this.$store.dispatch("showLightbox");
|
||||||
},
|
},
|
||||||
onEmbeddingClick() {
|
|
||||||
Sist2Api.getEmbeddings(this.doc._source.index, this.doc._id, this.$store.state.embeddingsModel).then(embeddings => {
|
|
||||||
this.$store.commit("setEmbeddingText", "");
|
|
||||||
this.$store.commit("setEmbedding", embeddings);
|
|
||||||
this.$store.commit("setEmbeddingDoc", this.doc);
|
|
||||||
})
|
|
||||||
},
|
|
||||||
path() {
|
path() {
|
||||||
if (!this.doc.highlight) {
|
if (!this.doc.highlight) {
|
||||||
return this.doc._source.path + "/"
|
return this.doc._source.path + "/"
|
||||||
|
@ -1,155 +0,0 @@
|
|||||||
<template>
|
|
||||||
<div>
|
|
||||||
<b-progress v-if="modelLoading && [0, 1].includes(modelLoadingProgress)" max="1" class="mb-1" variant="primary"
|
|
||||||
striped animated :value="1">
|
|
||||||
</b-progress>
|
|
||||||
<b-progress v-else-if="modelLoading" :value="modelLoadingProgress" max="1" class="mb-1" variant="warning"
|
|
||||||
show-progress>
|
|
||||||
</b-progress>
|
|
||||||
<div style="display: flex">
|
|
||||||
<b-select :options="modelOptions()" class="mr-2 input-prepend" :value="modelName"
|
|
||||||
@change="onModelChange($event)"></b-select>
|
|
||||||
|
|
||||||
<b-input-group>
|
|
||||||
<b-form-input :value="embeddingText"
|
|
||||||
:placeholder="$store.state.embeddingDoc ? ' ' : $t('embeddingsSearchPlaceholder')"
|
|
||||||
@input="onInput($event)"
|
|
||||||
:disabled="modelLoading"
|
|
||||||
:style="{'pointer-events': $store.state.embeddingDoc ? 'none' : undefined}"
|
|
||||||
></b-form-input>
|
|
||||||
<b-badge v-if="$store.state.embeddingDoc" pill variant="primary" class="overlay-badge" href="#"
|
|
||||||
@click="onBadgeClick()">{{ docName }}
|
|
||||||
</b-badge>
|
|
||||||
|
|
||||||
<template #prepend>
|
|
||||||
</template>
|
|
||||||
|
|
||||||
<template #append>
|
|
||||||
<b-input-group-text>
|
|
||||||
<MLIcon class="ml-append" big></MLIcon>
|
|
||||||
</b-input-group-text>
|
|
||||||
</template>
|
|
||||||
|
|
||||||
</b-input-group>
|
|
||||||
</div>
|
|
||||||
|
|
||||||
</div>
|
|
||||||
</template>
|
|
||||||
|
|
||||||
<script>
|
|
||||||
import {mapGetters, mapMutations} from "vuex";
|
|
||||||
import {CLIPTransformerModel} from "@/ml/CLIPTransformerModel"
|
|
||||||
import _debounce from "lodash/debounce";
|
|
||||||
import MLIcon from "@/components/icons/MlIcon.vue";
|
|
||||||
import Sist2AdminApi from "@/Sist2Api";
|
|
||||||
|
|
||||||
export default {
|
|
||||||
components: {MLIcon},
|
|
||||||
data() {
|
|
||||||
return {
|
|
||||||
modelLoading: false,
|
|
||||||
modelLoadingProgress: 0,
|
|
||||||
modelLoaded: false,
|
|
||||||
model: null,
|
|
||||||
modelName: null
|
|
||||||
}
|
|
||||||
},
|
|
||||||
computed: {
|
|
||||||
...mapGetters({
|
|
||||||
optQueryMode: "optQueryMode",
|
|
||||||
embeddingText: "embeddingText",
|
|
||||||
fuzzy: "fuzzy",
|
|
||||||
}),
|
|
||||||
docName() {
|
|
||||||
const ext = this.$store.state.embeddingDoc._source.extension;
|
|
||||||
return this.$store.state.embeddingDoc._source.name +
|
|
||||||
(ext ? "." + ext : "")
|
|
||||||
}
|
|
||||||
},
|
|
||||||
mounted() {
|
|
||||||
// Set default model
|
|
||||||
this.modelName = Sist2AdminApi.models()[0].name;
|
|
||||||
this.onModelChange(this.modelName);
|
|
||||||
|
|
||||||
this.onInput = _debounce(this._onInput, 450, {leading: false});
|
|
||||||
},
|
|
||||||
methods: {
|
|
||||||
...mapMutations({
|
|
||||||
setEmbeddingText: "setEmbeddingText",
|
|
||||||
setEmbedding: "setEmbedding",
|
|
||||||
setEmbeddingModel: "setEmbeddingsModel",
|
|
||||||
}),
|
|
||||||
async loadModel() {
|
|
||||||
this.modelLoading = true;
|
|
||||||
|
|
||||||
await this.model.init(async progress => {
|
|
||||||
this.modelLoadingProgress = progress;
|
|
||||||
});
|
|
||||||
this.modelLoading = false;
|
|
||||||
this.modelLoaded = true;
|
|
||||||
},
|
|
||||||
async _onInput(text) {
|
|
||||||
try {
|
|
||||||
|
|
||||||
if (!this.modelLoaded) {
|
|
||||||
await this.loadModel();
|
|
||||||
}
|
|
||||||
|
|
||||||
if (text.length === 0) {
|
|
||||||
this.setEmbeddingText("");
|
|
||||||
this.setEmbedding(null);
|
|
||||||
return;
|
|
||||||
}
|
|
||||||
|
|
||||||
const embeddings = await this.model.predict(text);
|
|
||||||
|
|
||||||
this.setEmbeddingText(text);
|
|
||||||
this.setEmbedding(embeddings);
|
|
||||||
} catch (e) {
|
|
||||||
alert(e)
|
|
||||||
}
|
|
||||||
},
|
|
||||||
modelOptions() {
|
|
||||||
return Sist2AdminApi.models().map(model => model.name);
|
|
||||||
},
|
|
||||||
onModelChange(name) {
|
|
||||||
this.modelLoaded = false;
|
|
||||||
this.modelLoadingProgress = 0;
|
|
||||||
|
|
||||||
const modelInfo = Sist2AdminApi.models().find(m => m.name === name);
|
|
||||||
|
|
||||||
if (modelInfo.name === "CLIP") {
|
|
||||||
const tokenizerUrl = new URL("./tokenizer.json", modelInfo.url).href;
|
|
||||||
this.model = new CLIPTransformerModel(modelInfo.url, tokenizerUrl)
|
|
||||||
this.setEmbeddingModel(modelInfo.id);
|
|
||||||
} else {
|
|
||||||
throw new Error("Unknown model: " + name);
|
|
||||||
}
|
|
||||||
},
|
|
||||||
onBadgeClick() {
|
|
||||||
this.$store.commit("setEmbedding", null);
|
|
||||||
this.$store.commit("setEmbeddingDoc", null);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
</script>
|
|
||||||
<style>
|
|
||||||
.overlay-badge {
|
|
||||||
position: absolute;
|
|
||||||
z-index: 1;
|
|
||||||
left: 0.375rem;
|
|
||||||
top: 8px;
|
|
||||||
line-height: 1.1rem;
|
|
||||||
overflow: hidden;
|
|
||||||
max-width: 200px;
|
|
||||||
text-overflow: ellipsis;
|
|
||||||
}
|
|
||||||
|
|
||||||
.input-prepend {
|
|
||||||
max-width: 100px;
|
|
||||||
}
|
|
||||||
|
|
||||||
.theme-black .ml-append {
|
|
||||||
filter: brightness(0.95) !important;
|
|
||||||
}
|
|
||||||
</style>
|
|
@ -31,11 +31,7 @@
|
|||||||
<div class="d-flex">
|
<div class="d-flex">
|
||||||
<b-checkbox style="pointer-events: none" :checked="isSelected(idx)"></b-checkbox>
|
<b-checkbox style="pointer-events: none" :checked="isSelected(idx)"></b-checkbox>
|
||||||
{{ idx.name }}
|
{{ idx.name }}
|
||||||
<div style="vertical-align: center; margin-left: 5px">
|
<span class="text-muted timestamp-text ml-2">{{ formatIdxDate(idx.timestamp) }}</span>
|
||||||
<MLIcon small style="top: -1px; position: relative"></MLIcon>
|
|
||||||
</div>
|
|
||||||
<span class="text-muted timestamp-text ml-2"
|
|
||||||
style="top: 1px; position: relative">{{ formatIdxDate(idx.timestamp) }}</span>
|
|
||||||
</div>
|
</div>
|
||||||
<b-badge class="version-badge">v{{ idx.version }}</b-badge>
|
<b-badge class="version-badge">v{{ idx.version }}</b-badge>
|
||||||
</b-list-group-item>
|
</b-list-group-item>
|
||||||
@ -48,11 +44,9 @@ import SmallBadge from "./SmallBadge.vue"
|
|||||||
import {mapActions, mapGetters} from "vuex";
|
import {mapActions, mapGetters} from "vuex";
|
||||||
import Vue from "vue";
|
import Vue from "vue";
|
||||||
import {format} from "date-fns";
|
import {format} from "date-fns";
|
||||||
import MLIcon from "@/components/icons/MlIcon.vue";
|
|
||||||
|
|
||||||
export default Vue.extend({
|
export default Vue.extend({
|
||||||
components: {
|
components: {
|
||||||
MLIcon,
|
|
||||||
SmallBadge
|
SmallBadge
|
||||||
},
|
},
|
||||||
data() {
|
data() {
|
||||||
|
@ -9,7 +9,7 @@
|
|||||||
<b-button :disabled="mlPredictionsLoading || mlLoading" @click="mlAnalyze" variant="primary"
|
<b-button :disabled="mlPredictionsLoading || mlLoading" @click="mlAnalyze" variant="primary"
|
||||||
>{{ $t("ml.analyzeText") }}
|
>{{ $t("ml.analyzeText") }}
|
||||||
</b-button>
|
</b-button>
|
||||||
<b-select :disabled="mlPredictionsLoading || mlLoading" class="ml-2" v-model="nerModel">
|
<b-select :disabled="mlPredictionsLoading || mlLoading" class="ml-2" v-model="mlModel">
|
||||||
<b-select-option :value="opt.value" v-for="opt of ModelsRepo.getOptions()">{{ opt.text }}
|
<b-select-option :value="opt.value" v-for="opt of ModelsRepo.getOptions()">{{ opt.text }}
|
||||||
</b-select-option>
|
</b-select-option>
|
||||||
</b-select>
|
</b-select>
|
||||||
@ -57,16 +57,16 @@ export default {
|
|||||||
modelPredictionProgress: 0,
|
modelPredictionProgress: 0,
|
||||||
mlPredictionsLoading: false,
|
mlPredictionsLoading: false,
|
||||||
mlLoading: false,
|
mlLoading: false,
|
||||||
nerModel: null,
|
mlModel: null,
|
||||||
analyzedContentSpans: []
|
analyzedContentSpans: []
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
mounted() {
|
mounted() {
|
||||||
|
|
||||||
if (this.$store.getters.optMlDefaultModel) {
|
if (this.$store.getters.optMlDefaultModel) {
|
||||||
this.nerModel = this.$store.getters.optMlDefaultModel
|
this.mlModel = this.$store.getters.optMlDefaultModel
|
||||||
} else {
|
} else {
|
||||||
this.nerModel = ModelsRepo.getDefaultModel();
|
this.mlModel = ModelsRepo.getDefaultModel();
|
||||||
}
|
}
|
||||||
|
|
||||||
Sist2Api
|
Sist2Api
|
||||||
@ -86,7 +86,7 @@ export default {
|
|||||||
computed: {
|
computed: {
|
||||||
...mapGetters(["optAutoAnalyze"]),
|
...mapGetters(["optAutoAnalyze"]),
|
||||||
modelSize() {
|
modelSize() {
|
||||||
const modelData = ModelsRepo.data[this.nerModel];
|
const modelData = ModelsRepo.data[this.mlModel];
|
||||||
if (!modelData) {
|
if (!modelData) {
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
@ -110,10 +110,10 @@ export default {
|
|||||||
}
|
}
|
||||||
},
|
},
|
||||||
async getMlModel() {
|
async getMlModel() {
|
||||||
if (this.$store.getters.nerModel.name !== this.nerModel) {
|
if (this.$store.getters.mlModel.name !== this.mlModel) {
|
||||||
this.mlLoading = true;
|
this.mlLoading = true;
|
||||||
this.modelLoadingProgress = 0;
|
this.modelLoadingProgress = 0;
|
||||||
const modelInfo = ModelsRepo.data[this.nerModel];
|
const modelInfo = ModelsRepo.data[this.mlModel];
|
||||||
|
|
||||||
const model = new BertNerModel(
|
const model = new BertNerModel(
|
||||||
modelInfo.vocabUrl,
|
modelInfo.vocabUrl,
|
||||||
@ -122,25 +122,25 @@ export default {
|
|||||||
)
|
)
|
||||||
|
|
||||||
await model.init(progress => this.modelLoadingProgress = progress);
|
await model.init(progress => this.modelLoadingProgress = progress);
|
||||||
this.$store.commit("setNerModel", {model, name: this.nerModel});
|
this.$store.commit("setMlModel", {model, name: this.mlModel});
|
||||||
|
|
||||||
this.mlLoading = false;
|
this.mlLoading = false;
|
||||||
return model
|
return model
|
||||||
}
|
}
|
||||||
|
|
||||||
return this.$store.getters.nerModel.model;
|
return this.$store.getters.mlModel.model;
|
||||||
},
|
},
|
||||||
async mlAnalyze() {
|
async mlAnalyze() {
|
||||||
if (!this.content) {
|
if (!this.content) {
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
|
||||||
const modelInfo = ModelsRepo.data[this.nerModel];
|
const modelInfo = ModelsRepo.data[this.mlModel];
|
||||||
if (modelInfo === undefined) {
|
if (modelInfo === undefined) {
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
|
||||||
this.$store.commit("setOptMlDefaultModel", this.nerModel);
|
this.$store.commit("setOptMlDefaultModel", this.mlModel);
|
||||||
await this.$store.dispatch("updateConfiguration");
|
await this.$store.dispatch("updateConfiguration");
|
||||||
|
|
||||||
const model = await this.getMlModel();
|
const model = await this.getMlModel();
|
||||||
|
@ -1,5 +1,5 @@
|
|||||||
<template>
|
<template>
|
||||||
<b-dropdown variant="primary" :disabled="$store.getters.embedding !== null">
|
<b-dropdown variant="primary">
|
||||||
<b-dropdown-item :class="{'dropdown-active': sort === 'score'}" @click="onSelect('score')">{{
|
<b-dropdown-item :class="{'dropdown-active': sort === 'score'}" @click="onSelect('score')">{{
|
||||||
$t("sort.relevance")
|
$t("sort.relevance")
|
||||||
}}
|
}}
|
||||||
|
@ -210,8 +210,4 @@ export default {
|
|||||||
.theme-black .inspire-tree .matched > .wholerow {
|
.theme-black .inspire-tree .matched > .wholerow {
|
||||||
background: rgba(251, 191, 41, 0.25);
|
background: rgba(251, 191, 41, 0.25);
|
||||||
}
|
}
|
||||||
#tagTree {
|
|
||||||
max-height: 350px;
|
|
||||||
overflow: auto;
|
|
||||||
}
|
|
||||||
</style>
|
</style>
|
@ -1,76 +0,0 @@
|
|||||||
<template>
|
|
||||||
<svg class="ml-icon" :class="{'m-icon': 1, 'ml-icon-big': big, 'ml-icon-clickable': clickable}" xmlns="http://www.w3.org/2000/svg"
|
|
||||||
viewBox="0 0 512 512" xml:space="preserve" fill="currentColor" stroke="currentColor" @click="$emit('click')">
|
|
||||||
<g>
|
|
||||||
<path class="st0" d="M167.314,14.993C167.314,6.712,160.602,0,152.332,0h-5.514c-8.27,0-14.982,6.712-14.982,14.993v41.466h35.478
|
|
||||||
V14.993z"/>
|
|
||||||
<path class="st0"
|
|
||||||
d="M238.26,14.993C238.26,6.712,231.549,0,223.278,0h-5.504c-8.271,0-14.982,6.712-14.982,14.993v41.466h35.468 V14.993z"/>
|
|
||||||
<path class="st0"
|
|
||||||
d="M309.207,14.993C309.207,6.712,302.496,0,294.225,0h-5.504c-8.271,0-14.982,6.712-14.982,14.993v41.466h35.468 V14.993z"/>
|
|
||||||
<path class="st0"
|
|
||||||
d="M380.164,14.993C380.164,6.712,373.453,0,365.182,0h-5.514c-8.27,0-14.982,6.712-14.982,14.993v41.466h35.478 V14.993z"/>
|
|
||||||
<path class="st0"
|
|
||||||
d="M131.836,497.007c0,8.282,6.712,14.993,14.982,14.993h5.514c8.27,0,14.982-6.711,14.982-14.993V455.55h-35.478 V497.007z"/>
|
|
||||||
<path class="st0"
|
|
||||||
d="M202.792,497.007c0,8.282,6.712,14.993,14.982,14.993h5.504c8.27,0,14.982-6.711,14.982-14.993V455.55h-35.468 V497.007z"/>
|
|
||||||
<path class="st0"
|
|
||||||
d="M273.739,497.007c0,8.282,6.712,14.993,14.982,14.993h5.504c8.271,0,14.982-6.711,14.982-14.993V455.55 h-35.468V497.007z"/>
|
|
||||||
<path class="st0"
|
|
||||||
d="M344.686,497.007c0,8.282,6.712,14.993,14.982,14.993h5.514c8.271,0,14.982-6.711,14.982-14.993V455.55 h-35.478V497.007z"/>
|
|
||||||
<path class="st0"
|
|
||||||
d="M497.018,131.836H455.55v35.479h41.468c8.27,0,14.982-6.712,14.982-14.993v-5.493 C512,138.548,505.288,131.836,497.018,131.836z"/>
|
|
||||||
<path class="st0"
|
|
||||||
d="M497.018,202.793H455.55v35.468h41.468c8.27,0,14.982-6.712,14.982-14.982v-5.494 C512,209.504,505.288,202.793,497.018,202.793z"/>
|
|
||||||
<path class="st0"
|
|
||||||
d="M497.018,273.739H455.55v35.468h41.468c8.27,0,14.982-6.711,14.982-14.992v-5.494 C512,280.451,505.288,273.739,497.018,273.739z"/>
|
|
||||||
<path class="st0"
|
|
||||||
d="M497.018,344.686H455.55v35.479h41.468c8.27,0,14.982-6.712,14.982-14.993v-5.493 C512,351.398,505.288,344.686,497.018,344.686z"/>
|
|
||||||
<path class="st0"
|
|
||||||
d="M0,146.828v5.493c0,8.281,6.711,14.993,14.982,14.993H56.46v-35.479H14.982C6.711,131.836,0,138.548,0,146.828 z"/>
|
|
||||||
<path class="st0"
|
|
||||||
d="M0,217.785v5.494c0,8.27,6.711,14.982,14.982,14.982H56.46v-35.468H14.982C6.711,202.793,0,209.504,0,217.785z "/>
|
|
||||||
<path class="st0"
|
|
||||||
d="M0,288.721v5.494c0,8.281,6.711,14.992,14.982,14.992H56.46v-35.468H14.982C6.711,273.739,0,280.451,0,288.721 z"/>
|
|
||||||
<path class="st0"
|
|
||||||
d="M0,359.679v5.493c0,8.281,6.711,14.993,14.982,14.993H56.46v-35.479H14.982C6.711,344.686,0,351.398,0,359.679 z"/>
|
|
||||||
<path class="st0"
|
|
||||||
d="M78.628,433.382h354.753V78.628H78.628V433.382z M376.56,120.2c9.18,0,16.635,7.445,16.635,16.634 c0,9.18-7.455,16.624-16.635,16.624c-9.179,0-16.624-7.445-16.624-16.624C359.936,127.644,367.381,120.2,376.56,120.2z M376.56,361.32c9.18,0,16.635,7.445,16.635,16.635c0,9.179-7.455,16.623-16.635,16.623c-9.179,0-16.624-7.444-16.624-16.623 C359.936,368.764,367.381,361.32,376.56,361.32z M184.362,184.362h143.287v143.287H184.362V184.362z M135.439,120.2 c9.19,0,16.635,7.445,16.635,16.634c0,9.169-7.445,16.624-16.635,16.624c-9.178,0-16.623-7.455-16.623-16.624 C118.816,127.644,126.26,120.2,135.439,120.2z M135.439,361.32c9.19,0,16.635,7.445,16.635,16.635 c0,9.169-7.445,16.623-16.635,16.623c-9.178,0-16.623-7.454-16.623-16.623C118.816,368.764,126.26,361.32,135.439,361.32z"/>
|
|
||||||
</g>
|
|
||||||
</svg>
|
|
||||||
</template>
|
|
||||||
|
|
||||||
<script>
|
|
||||||
export default {
|
|
||||||
name: "MLIcon",
|
|
||||||
props: {
|
|
||||||
"big": Boolean,
|
|
||||||
"clickable": Boolean
|
|
||||||
}
|
|
||||||
}
|
|
||||||
</script>
|
|
||||||
|
|
||||||
<style scoped>
|
|
||||||
.ml-icon-clickable {
|
|
||||||
cursor: pointer;
|
|
||||||
}
|
|
||||||
|
|
||||||
.ml-icon-big {
|
|
||||||
width: 24px !important;
|
|
||||||
height: 24px !important;
|
|
||||||
}
|
|
||||||
|
|
||||||
.ml-icon {
|
|
||||||
width: 1rem;
|
|
||||||
min-width: 1rem;
|
|
||||||
margin-right: 0.2rem;
|
|
||||||
line-height: 1rem;
|
|
||||||
height: 1rem;
|
|
||||||
min-height: 1rem;
|
|
||||||
filter: brightness(45%);
|
|
||||||
}
|
|
||||||
|
|
||||||
.theme-black .ml-icon {
|
|
||||||
filter: brightness(80%);
|
|
||||||
}
|
|
||||||
</style>
|
|
@ -18,7 +18,6 @@ export default {
|
|||||||
tags: "Tags",
|
tags: "Tags",
|
||||||
tagFilter: "Filter tags",
|
tagFilter: "Filter tags",
|
||||||
forExample: "For example:",
|
forExample: "For example:",
|
||||||
embeddingsSearchPlaceholder: "Embeddings search",
|
|
||||||
help: {
|
help: {
|
||||||
simpleSearch: "Simple search",
|
simpleSearch: "Simple search",
|
||||||
advancedSearch: "Advanced search",
|
advancedSearch: "Advanced search",
|
||||||
|
@ -1,3 +1,4 @@
|
|||||||
|
import '@babel/polyfill'
|
||||||
import 'mutationobserver-shim'
|
import 'mutationobserver-shim'
|
||||||
import Vue from 'vue'
|
import Vue from 'vue'
|
||||||
import './plugins/bootstrap-vue'
|
import './plugins/bootstrap-vue'
|
||||||
|
@ -1,118 +0,0 @@
|
|||||||
const inf = Number.POSITIVE_INFINITY;
|
|
||||||
const START_TOK = 49406;
|
|
||||||
const END_TOK = 49407;
|
|
||||||
|
|
||||||
function min(array, key) {
|
|
||||||
return array
|
|
||||||
.reduce((a, b) => (key(a, b) ? b : a))
|
|
||||||
}
|
|
||||||
|
|
||||||
class TupleSet extends Set {
|
|
||||||
add(elem) {
|
|
||||||
return super.add(elem.join("`"));
|
|
||||||
}
|
|
||||||
|
|
||||||
has(elem) {
|
|
||||||
return super.has(elem.join("`"));
|
|
||||||
}
|
|
||||||
|
|
||||||
toList() {
|
|
||||||
return [...this].map(x => x.split("`"))
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
export class BPETokenizer {
|
|
||||||
|
|
||||||
_encoder = null;
|
|
||||||
_bpeRanks = null;
|
|
||||||
|
|
||||||
constructor(encoder, bpeRanks) {
|
|
||||||
this._encoder = encoder;
|
|
||||||
this._bpeRanks = bpeRanks;
|
|
||||||
}
|
|
||||||
|
|
||||||
getPairs(word) {
|
|
||||||
const pairs = new TupleSet();
|
|
||||||
|
|
||||||
let prevChar = word[0];
|
|
||||||
for (let i = 1; i < word.length; i++) {
|
|
||||||
pairs.add([prevChar, word[i]])
|
|
||||||
prevChar = word[i];
|
|
||||||
}
|
|
||||||
|
|
||||||
return pairs.toList();
|
|
||||||
}
|
|
||||||
|
|
||||||
bpe(token) {
|
|
||||||
let word = [...token];
|
|
||||||
word[word.length - 1] += "</w>";
|
|
||||||
let pairs = this.getPairs(word)
|
|
||||||
|
|
||||||
if (pairs.length === 0) {
|
|
||||||
return token + "</w>"
|
|
||||||
}
|
|
||||||
|
|
||||||
while (true) {
|
|
||||||
const bigram = min(pairs, (a, b) => {
|
|
||||||
return (this._bpeRanks[a.join("`")] ?? inf) > (this._bpeRanks[b.join("`") ?? inf])
|
|
||||||
});
|
|
||||||
|
|
||||||
if (this._bpeRanks[bigram.join("`")] === undefined) {
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
|
|
||||||
const [first, second] = bigram;
|
|
||||||
let newWord = [];
|
|
||||||
let i = 0;
|
|
||||||
|
|
||||||
while (i < word.length) {
|
|
||||||
const j = word.indexOf(first, i);
|
|
||||||
if (j === -1) {
|
|
||||||
newWord.push(...word.slice(i));
|
|
||||||
break;
|
|
||||||
} else {
|
|
||||||
newWord.push(...word.slice(i, j));
|
|
||||||
i = j;
|
|
||||||
}
|
|
||||||
|
|
||||||
if (word[i] === first && i < word.length - 1 && word[i + 1] === second) {
|
|
||||||
newWord.push(first + second);
|
|
||||||
i += 2;
|
|
||||||
} else {
|
|
||||||
newWord.push(word[i]);
|
|
||||||
i += 1;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
word = [...newWord]
|
|
||||||
if (word.length === 1) {
|
|
||||||
break;
|
|
||||||
} else {
|
|
||||||
pairs = this.getPairs(word);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
return word.join(" ");
|
|
||||||
}
|
|
||||||
|
|
||||||
encode(text) {
|
|
||||||
let bpeTokens = [];
|
|
||||||
text = text.trim();
|
|
||||||
text = text.replaceAll(/\s+/g, " ");
|
|
||||||
|
|
||||||
text
|
|
||||||
.match(/<\|startoftext\|>|<\|endoftext\|>|'s|'t|'re|'ve|'m|'ll|'d|[a-zA-Z0-9]+/ig)
|
|
||||||
.forEach(token => {
|
|
||||||
bpeTokens.push(...this.bpe(token).split(" ").map(t => this._encoder[t]));
|
|
||||||
});
|
|
||||||
|
|
||||||
bpeTokens.unshift(START_TOK);
|
|
||||||
bpeTokens = bpeTokens.slice(0, 76);
|
|
||||||
bpeTokens.push(END_TOK);
|
|
||||||
while (bpeTokens.length < 77) {
|
|
||||||
bpeTokens.push(0);
|
|
||||||
}
|
|
||||||
|
|
||||||
return bpeTokens;
|
|
||||||
}
|
|
||||||
}
|
|
@ -1,8 +1,6 @@
|
|||||||
import BertTokenizer from "@/ml/BertTokenizer";
|
import BertTokenizer from "@/ml/BertTokenizer";
|
||||||
|
import * as tf from "@tensorflow/tfjs";
|
||||||
import axios from "axios";
|
import axios from "axios";
|
||||||
import {chunk as _chunk} from "underscore";
|
|
||||||
import * as ort from "onnxruntime-web";
|
|
||||||
import {argMax, downloadToBuffer, ORT_WASM_PATHS} from "@/ml/mlUtils";
|
|
||||||
|
|
||||||
export default class BertNerModel {
|
export default class BertNerModel {
|
||||||
vocabUrl;
|
vocabUrl;
|
||||||
@ -31,10 +29,7 @@ export default class BertNerModel {
|
|||||||
}
|
}
|
||||||
|
|
||||||
async loadModel(onProgress) {
|
async loadModel(onProgress) {
|
||||||
ort.env.wasm.wasmPaths = ORT_WASM_PATHS;
|
this._model = await tf.loadGraphModel(this.modelUrl, {onProgress});
|
||||||
const buf = await downloadToBuffer(this.modelUrl, onProgress);
|
|
||||||
|
|
||||||
this._model = await ort.InferenceSession.create(buf.buffer, {executionProviders: ["wasm"]});
|
|
||||||
}
|
}
|
||||||
|
|
||||||
alignLabels(labels, wordIds, words) {
|
alignLabels(labels, wordIds, words) {
|
||||||
@ -62,28 +57,21 @@ export default class BertNerModel {
|
|||||||
|
|
||||||
async predict(text, callback) {
|
async predict(text, callback) {
|
||||||
this._previousWordId = null;
|
this._previousWordId = null;
|
||||||
const encoded = this._tokenizer.encodeText(text, this.inputSize);
|
const encoded = this._tokenizer.encodeText(text, this.inputSize)
|
||||||
|
|
||||||
let i = 0;
|
|
||||||
for (let chunk of encoded.inputChunks) {
|
for (let chunk of encoded.inputChunks) {
|
||||||
|
const rawResult = tf.tidy(() => this._model.execute({
|
||||||
|
input_ids: tf.tensor2d(chunk.inputIds, [1, this.inputSize], "int32"),
|
||||||
|
token_type_ids: tf.tensor2d(chunk.segmentIds, [1, this.inputSize], "int32"),
|
||||||
|
attention_mask: tf.tensor2d(chunk.inputMask, [1, this.inputSize], "int32"),
|
||||||
|
}));
|
||||||
|
|
||||||
const results = await this._model.run({
|
const labelIds = await tf.argMax(rawResult, -1);
|
||||||
input_ids: new ort.Tensor("int32", chunk.inputIds, [1, this.inputSize]),
|
const labelIdsArray = await labelIds.array();
|
||||||
token_type_ids: new ort.Tensor("int32", chunk.segmentIds, [1, this.inputSize]),
|
const labels = labelIdsArray[0].map(id => this.id2label[id]);
|
||||||
attention_mask: new ort.Tensor("int32", chunk.inputMask, [1, this.inputSize]),
|
rawResult.dispose()
|
||||||
});
|
|
||||||
|
|
||||||
const labelIds = _chunk(results["output"].data, this.id2label.length).map(argMax);
|
callback(this.alignLabels(labels, chunk.wordIds, encoded.words))
|
||||||
const labels = labelIds.map(id => this.id2label[id]);
|
|
||||||
|
|
||||||
callback(this.alignLabels(labels, chunk.wordIds, encoded.words));
|
|
||||||
|
|
||||||
i += 1;
|
|
||||||
|
|
||||||
// give browser some time to repaint
|
|
||||||
if (i % 2 === 0) {
|
|
||||||
await new Promise(resolve => setTimeout(resolve, 0));
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
@ -1,5 +1,4 @@
|
|||||||
import {zip, chunk} from "underscore";
|
import {zip, chunk} from "underscore";
|
||||||
import {toInt64} from "@/ml/mlUtils";
|
|
||||||
|
|
||||||
const UNK_INDEX = 100;
|
const UNK_INDEX = 100;
|
||||||
const CLS_INDEX = 101;
|
const CLS_INDEX = 101;
|
||||||
|
@ -1,57 +0,0 @@
|
|||||||
import * as ort from "onnxruntime-web";
|
|
||||||
import {BPETokenizer} from "@/ml/BPETokenizer";
|
|
||||||
import axios from "axios";
|
|
||||||
import {downloadToBuffer, ORT_WASM_PATHS} from "@/ml/mlUtils";
|
|
||||||
import ModelStore from "@/ml/ModelStore";
|
|
||||||
|
|
||||||
export class CLIPTransformerModel {
|
|
||||||
|
|
||||||
_modelUrl = null;
|
|
||||||
_tokenizerUrl = null;
|
|
||||||
_model = null;
|
|
||||||
_tokenizer = null;
|
|
||||||
|
|
||||||
constructor(modelUrl, tokenizerUrl) {
|
|
||||||
this._modelUrl = modelUrl;
|
|
||||||
this._tokenizerUrl = tokenizerUrl;
|
|
||||||
}
|
|
||||||
|
|
||||||
async init(onProgress) {
|
|
||||||
await Promise.all([this.loadTokenizer(), this.loadModel(onProgress)]);
|
|
||||||
}
|
|
||||||
|
|
||||||
async loadModel(onProgress) {
|
|
||||||
ort.env.wasm.wasmPaths = ORT_WASM_PATHS;
|
|
||||||
ort.env.wasm.numThreads = 2;
|
|
||||||
|
|
||||||
let buf = await ModelStore.get(this._modelUrl);
|
|
||||||
if (!buf) {
|
|
||||||
buf = await downloadToBuffer(this._modelUrl, onProgress);
|
|
||||||
await ModelStore.set(this._modelUrl, buf);
|
|
||||||
}
|
|
||||||
|
|
||||||
this._model = await ort.InferenceSession.create(buf.buffer, {
|
|
||||||
executionProviders: ["wasm"],
|
|
||||||
});
|
|
||||||
}
|
|
||||||
|
|
||||||
async loadTokenizer() {
|
|
||||||
const resp = await axios.get(this._tokenizerUrl);
|
|
||||||
this._tokenizer = new BPETokenizer(resp.data.encoder, resp.data.bpe_ranks)
|
|
||||||
}
|
|
||||||
|
|
||||||
async predict(text) {
|
|
||||||
const tokenized = this._tokenizer.encode(text);
|
|
||||||
|
|
||||||
const inputs = {
|
|
||||||
input_ids: new ort.Tensor("int32", tokenized, [1, 77])
|
|
||||||
};
|
|
||||||
|
|
||||||
const results = await this._model.run(inputs);
|
|
||||||
|
|
||||||
return Array.from(
|
|
||||||
Object.values(results)
|
|
||||||
.find(result => result.size === 512).data
|
|
||||||
);
|
|
||||||
}
|
|
||||||
}
|
|
@ -1,67 +0,0 @@
|
|||||||
class ModelStore {
|
|
||||||
|
|
||||||
_ok;
|
|
||||||
_db;
|
|
||||||
_resolve;
|
|
||||||
_loadingPromise;
|
|
||||||
|
|
||||||
constructor() {
|
|
||||||
const request = window.indexedDB.open("ModelStore", 1);
|
|
||||||
|
|
||||||
request.onerror = () => {
|
|
||||||
this._ok = false;
|
|
||||||
}
|
|
||||||
|
|
||||||
request.onupgradeneeded = event => {
|
|
||||||
const db = event.target.result;
|
|
||||||
db.createObjectStore("models");
|
|
||||||
}
|
|
||||||
|
|
||||||
request.onsuccess = () => {
|
|
||||||
this._ok = true;
|
|
||||||
this._db = request.result;
|
|
||||||
|
|
||||||
this._resolve();
|
|
||||||
}
|
|
||||||
|
|
||||||
this._loadingPromise = new Promise(resolve => this._resolve = resolve);
|
|
||||||
}
|
|
||||||
|
|
||||||
async get(key) {
|
|
||||||
await this._loadingPromise;
|
|
||||||
|
|
||||||
const req = this._db.transaction(["models"], "readwrite")
|
|
||||||
.objectStore("models")
|
|
||||||
.get(key);
|
|
||||||
|
|
||||||
return new Promise(resolve => {
|
|
||||||
req.onsuccess = event => {
|
|
||||||
resolve(event.target.result);
|
|
||||||
};
|
|
||||||
req.onerror = event => {
|
|
||||||
console.log("ERROR:");
|
|
||||||
console.log(event);
|
|
||||||
resolve(null);
|
|
||||||
};
|
|
||||||
});
|
|
||||||
}
|
|
||||||
|
|
||||||
async set(key, val) {
|
|
||||||
await this._loadingPromise;
|
|
||||||
|
|
||||||
const req = this._db.transaction(["models"], "readwrite")
|
|
||||||
.objectStore("models")
|
|
||||||
.put(val, key);
|
|
||||||
|
|
||||||
return new Promise(resolve => {
|
|
||||||
req.onsuccess = () => {
|
|
||||||
resolve(true);
|
|
||||||
};
|
|
||||||
req.onerror = () => {
|
|
||||||
resolve(false);
|
|
||||||
};
|
|
||||||
});
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
export default new ModelStore();
|
|
@ -1,46 +0,0 @@
|
|||||||
export async function downloadToBuffer(url, onProgress) {
|
|
||||||
const resp = await fetch(url);
|
|
||||||
|
|
||||||
const contentLength = +resp.headers.get("Content-Length");
|
|
||||||
const buf = new Uint8ClampedArray(contentLength);
|
|
||||||
const reader = resp.body.getReader();
|
|
||||||
let cursor = 0;
|
|
||||||
|
|
||||||
if (onProgress) {
|
|
||||||
onProgress(0);
|
|
||||||
}
|
|
||||||
|
|
||||||
while (true) {
|
|
||||||
const {done, value} = await reader.read();
|
|
||||||
|
|
||||||
if (done) {
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
|
|
||||||
buf.set(value, cursor);
|
|
||||||
cursor += value.length;
|
|
||||||
|
|
||||||
if (onProgress) {
|
|
||||||
onProgress(cursor / contentLength);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
return buf;
|
|
||||||
}
|
|
||||||
|
|
||||||
export function argMax(array) {
|
|
||||||
return array
|
|
||||||
.map((x, i) => [x, i])
|
|
||||||
.reduce((r, a) => (a[0] > r[0] ? a : r))[1];
|
|
||||||
}
|
|
||||||
|
|
||||||
export function toInt64(array) {
|
|
||||||
return new BigInt64Array(array.map(BigInt));
|
|
||||||
}
|
|
||||||
|
|
||||||
export const ORT_WASM_PATHS = {
|
|
||||||
"ort-wasm-simd.wasm": "https://cdn.jsdelivr.net/npm/onnxruntime-web@1.15.1/dist/ort-wasm-simd.wasm",
|
|
||||||
"ort-wasm.wasm": "https://cdn.jsdelivr.net/npm/onnxruntime-web@1.15.1/dist/ort-wasm.wasm",
|
|
||||||
"ort-wasm-simd-threaded.wasm": "https://cdn.jsdelivr.net/npm/onnxruntime-web@1.15.1/dist/ort-wasm-simd-threaded.wasm",
|
|
||||||
"ort-wasm-threaded.wasm": "https://cdn.jsdelivr.net/npm/onnxruntime-web@1.15.1/dist/ort-wasm-threaded.wasm",
|
|
||||||
}
|
|
@ -23,9 +23,6 @@ export default new Vuex.Store({
|
|||||||
dateMin: undefined,
|
dateMin: undefined,
|
||||||
dateMax: undefined,
|
dateMax: undefined,
|
||||||
searchText: "",
|
searchText: "",
|
||||||
embeddingText: "",
|
|
||||||
embedding: null,
|
|
||||||
embeddingDoc: null,
|
|
||||||
pathText: "",
|
pathText: "",
|
||||||
sortMode: "score",
|
sortMode: "score",
|
||||||
|
|
||||||
@ -94,11 +91,10 @@ export default new Vuex.Store({
|
|||||||
uiMimeMap: [] as any[],
|
uiMimeMap: [] as any[],
|
||||||
|
|
||||||
auth0Token: null,
|
auth0Token: null,
|
||||||
nerModel: {
|
mlModel: {
|
||||||
model: null,
|
model: null,
|
||||||
name: null
|
name: null
|
||||||
},
|
},
|
||||||
embeddingsModel: null
|
|
||||||
},
|
},
|
||||||
mutations: {
|
mutations: {
|
||||||
setUiShowDetails: (state, val) => state.uiShowDetails = val,
|
setUiShowDetails: (state, val) => state.uiShowDetails = val,
|
||||||
@ -133,9 +129,6 @@ export default new Vuex.Store({
|
|||||||
setDateBoundsMin: (state, val) => state.dateBoundsMin = val,
|
setDateBoundsMin: (state, val) => state.dateBoundsMin = val,
|
||||||
setDateBoundsMax: (state, val) => state.dateBoundsMax = val,
|
setDateBoundsMax: (state, val) => state.dateBoundsMax = val,
|
||||||
setSearchText: (state, val) => state.searchText = val,
|
setSearchText: (state, val) => state.searchText = val,
|
||||||
setEmbeddingText: (state, val) => state.embeddingText = val,
|
|
||||||
setEmbedding: (state, val) => state.embedding = val,
|
|
||||||
setEmbeddingDoc: (state, val) => state.embeddingDoc = val,
|
|
||||||
setFuzzy: (state, val) => state.fuzzy = val,
|
setFuzzy: (state, val) => state.fuzzy = val,
|
||||||
setLastQueryResult: (state, val) => state.lastQueryResults = val,
|
setLastQueryResult: (state, val) => state.lastQueryResults = val,
|
||||||
setFirstQueryResult: (state, val) => state.firstQueryResults = val,
|
setFirstQueryResult: (state, val) => state.firstQueryResults = val,
|
||||||
@ -219,8 +212,7 @@ export default new Vuex.Store({
|
|||||||
// noop
|
// noop
|
||||||
},
|
},
|
||||||
setAuth0Token: (state, val) => state.auth0Token = val,
|
setAuth0Token: (state, val) => state.auth0Token = val,
|
||||||
setNerModel: (state, val) => state.nerModel = val,
|
setMlModel: (state, val) => state.mlModel = val,
|
||||||
setEmbeddingsModel: (state, val) => state.embeddingsModel = val,
|
|
||||||
},
|
},
|
||||||
actions: {
|
actions: {
|
||||||
setSist2Info: (store, val) => {
|
setSist2Info: (store, val) => {
|
||||||
@ -378,9 +370,7 @@ export default new Vuex.Store({
|
|||||||
},
|
},
|
||||||
modules: {},
|
modules: {},
|
||||||
getters: {
|
getters: {
|
||||||
nerModel: (state) => state.nerModel,
|
mlModel: (state) => state.mlModel,
|
||||||
embeddingsModel: (state) => state.embeddingsModel,
|
|
||||||
embedding: (state) => state.embedding,
|
|
||||||
seed: (state) => state.seed,
|
seed: (state) => state.seed,
|
||||||
getPathText: (state) => state.pathText,
|
getPathText: (state) => state.pathText,
|
||||||
indices: state => state.indices,
|
indices: state => state.indices,
|
||||||
@ -399,7 +389,6 @@ export default new Vuex.Store({
|
|||||||
sizeMin: state => state.sizeMin,
|
sizeMin: state => state.sizeMin,
|
||||||
sizeMax: state => state.sizeMax,
|
sizeMax: state => state.sizeMax,
|
||||||
searchText: state => state.searchText,
|
searchText: state => state.searchText,
|
||||||
embeddingText: state => state.embeddingText,
|
|
||||||
pathText: state => state.pathText,
|
pathText: state => state.pathText,
|
||||||
fuzzy: state => state.fuzzy,
|
fuzzy: state => state.fuzzy,
|
||||||
size: state => state.optSize,
|
size: state => state.optSize,
|
||||||
|
@ -13,7 +13,6 @@
|
|||||||
|
|
||||||
<b-card v-show="!uiLoading && !showEsConnectionError" id="search-panel">
|
<b-card v-show="!uiLoading && !showEsConnectionError" id="search-panel">
|
||||||
<SearchBar @show-help="showHelp=true"></SearchBar>
|
<SearchBar @show-help="showHelp=true"></SearchBar>
|
||||||
<EmbeddingsSearchBar v-if="hasEmbeddings" class="mt-3"></EmbeddingsSearchBar>
|
|
||||||
<b-row>
|
<b-row>
|
||||||
<b-col style="height: 70px;" sm="6">
|
<b-col style="height: 70px;" sm="6">
|
||||||
<SizeSlider></SizeSlider>
|
<SizeSlider></SizeSlider>
|
||||||
@ -59,14 +58,16 @@
|
|||||||
</div>
|
</div>
|
||||||
</template>
|
</template>
|
||||||
|
|
||||||
<script>
|
<script lang="ts">
|
||||||
import Preloader from "@/components/Preloader.vue";
|
import Preloader from "@/components/Preloader.vue";
|
||||||
import {mapActions, mapGetters, mapMutations} from "vuex";
|
import {mapActions, mapGetters, mapMutations} from "vuex";
|
||||||
|
import sist2 from "../Sist2Api";
|
||||||
|
import Sist2Api, {EsHit, EsResult} from "../Sist2Api";
|
||||||
import SearchBar from "@/components/SearchBar.vue";
|
import SearchBar from "@/components/SearchBar.vue";
|
||||||
import IndexPicker from "@/components/IndexPicker.vue";
|
import IndexPicker from "@/components/IndexPicker.vue";
|
||||||
import Vue from "vue";
|
import Vue from "vue";
|
||||||
import Sist2Query from "@/Sist2ElasticsearchQuery";
|
import Sist2Query from "@/Sist2ElasticsearchQuery";
|
||||||
import {debounce as _debounce} from "underscore";
|
import _debounce from "lodash/debounce";
|
||||||
import DocCardWall from "@/components/DocCardWall.vue";
|
import DocCardWall from "@/components/DocCardWall.vue";
|
||||||
import Lightbox from "@/components/Lightbox.vue";
|
import Lightbox from "@/components/Lightbox.vue";
|
||||||
import LightboxCaption from "@/components/LightboxCaption.vue";
|
import LightboxCaption from "@/components/LightboxCaption.vue";
|
||||||
@ -78,13 +79,11 @@ import DateSlider from "@/components/DateSlider.vue";
|
|||||||
import TagPicker from "@/components/TagPicker.vue";
|
import TagPicker from "@/components/TagPicker.vue";
|
||||||
import DocList from "@/components/DocList.vue";
|
import DocList from "@/components/DocList.vue";
|
||||||
import HelpDialog from "@/components/HelpDialog.vue";
|
import HelpDialog from "@/components/HelpDialog.vue";
|
||||||
import EmbeddingsSearchBar from "@/components/EmbeddingsSearchBar.vue";
|
import Sist2SqliteQuery from "@/Sist2SqliteQuery";
|
||||||
import Sist2Api from "@/Sist2Api";
|
|
||||||
|
|
||||||
|
|
||||||
export default Vue.extend({
|
export default Vue.extend({
|
||||||
components: {
|
components: {
|
||||||
EmbeddingsSearchBar,
|
|
||||||
HelpDialog,
|
HelpDialog,
|
||||||
DocList,
|
DocList,
|
||||||
TagPicker,
|
TagPicker,
|
||||||
@ -94,8 +93,8 @@ export default Vue.extend({
|
|||||||
data: () => ({
|
data: () => ({
|
||||||
loading: false,
|
loading: false,
|
||||||
uiLoading: true,
|
uiLoading: true,
|
||||||
search: undefined,
|
search: undefined as any,
|
||||||
docs: [],
|
docs: [] as EsHit[],
|
||||||
docIds: new Set(),
|
docIds: new Set(),
|
||||||
docChecksums: new Set(),
|
docChecksums: new Set(),
|
||||||
searchBusy: false,
|
searchBusy: false,
|
||||||
@ -109,16 +108,16 @@ export default Vue.extend({
|
|||||||
mounted() {
|
mounted() {
|
||||||
// Handle touch events
|
// Handle touch events
|
||||||
window.ontouchend = () => this.$store.commit("busTouchEnd");
|
window.ontouchend = () => this.$store.commit("busTouchEnd");
|
||||||
window.ontouchcancel = () => this.$store.commit("busTouchEnd");
|
window.ontouchcancel = this.$store.commit("busTouchEnd");
|
||||||
|
|
||||||
this.search = _debounce(async (clear) => {
|
this.search = _debounce(async (clear: boolean) => {
|
||||||
if (clear) {
|
if (clear) {
|
||||||
await this.clearResults();
|
await this.clearResults();
|
||||||
}
|
}
|
||||||
|
|
||||||
await this.searchNow();
|
await this.searchNow();
|
||||||
|
|
||||||
}, 350, false);
|
}, 350, {leading: false});
|
||||||
|
|
||||||
this.$store.dispatch("loadFromArgs", this.$route).then(() => {
|
this.$store.dispatch("loadFromArgs", this.$route).then(() => {
|
||||||
this.$store.subscribe(() => this.$store.dispatch("updateArgs", this.$router));
|
this.$store.subscribe(() => this.$store.dispatch("updateArgs", this.$router));
|
||||||
@ -127,7 +126,6 @@ export default Vue.extend({
|
|||||||
"setSizeMin", "setSizeMax", "setDateMin", "setDateMax", "setSearchText", "setPathText",
|
"setSizeMin", "setSizeMax", "setDateMin", "setDateMax", "setSearchText", "setPathText",
|
||||||
"setSortMode", "setOptHighlight", "setOptFragmentSize", "setFuzzy", "setSize", "setSelectedIndices",
|
"setSortMode", "setOptHighlight", "setOptFragmentSize", "setFuzzy", "setSize", "setSelectedIndices",
|
||||||
"setSelectedMimeTypes", "setSelectedTags", "setOptQueryMode", "setOptSearchInPath",
|
"setSelectedMimeTypes", "setSelectedTags", "setOptQueryMode", "setOptSearchInPath",
|
||||||
"setEmbedding"
|
|
||||||
].includes(mutation.type)) {
|
].includes(mutation.type)) {
|
||||||
if (this.searchBusy) {
|
if (this.searchBusy) {
|
||||||
return;
|
return;
|
||||||
@ -154,7 +152,7 @@ export default Vue.extend({
|
|||||||
}).catch(error => {
|
}).catch(error => {
|
||||||
console.log(error);
|
console.log(error);
|
||||||
|
|
||||||
if (error.response.status === 503 || error.response.status === 500) {
|
if (error.response.status == 503 || error.response.status == 500) {
|
||||||
this.showEsConnectionError = true;
|
this.showEsConnectionError = true;
|
||||||
this.uiLoading = false;
|
this.uiLoading = false;
|
||||||
} else {
|
} else {
|
||||||
@ -172,12 +170,6 @@ export default Vue.extend({
|
|||||||
setDateBoundsMax: "setDateBoundsMax",
|
setDateBoundsMax: "setDateBoundsMax",
|
||||||
setTags: "setTags",
|
setTags: "setTags",
|
||||||
}),
|
}),
|
||||||
hasEmbeddings() {
|
|
||||||
if (!this.loading) {
|
|
||||||
return false;
|
|
||||||
}
|
|
||||||
return Sist2Api.models().some();
|
|
||||||
},
|
|
||||||
showErrorToast() {
|
showErrorToast() {
|
||||||
this.$bvToast.toast(
|
this.$bvToast.toast(
|
||||||
this.$t("toast.esConnErr"),
|
this.$t("toast.esConnErr"),
|
||||||
@ -189,7 +181,7 @@ export default Vue.extend({
|
|||||||
bodyClass: "toast-body-error",
|
bodyClass: "toast-body-error",
|
||||||
});
|
});
|
||||||
},
|
},
|
||||||
showSyntaxErrorToast: function () {
|
showSyntaxErrorToast: function (): void {
|
||||||
this.$bvToast.toast(
|
this.$bvToast.toast(
|
||||||
this.$t("toast.esQueryErr"),
|
this.$t("toast.esQueryErr"),
|
||||||
{
|
{
|
||||||
@ -205,11 +197,10 @@ export default Vue.extend({
|
|||||||
await this.$store.dispatch("incrementQuerySequence");
|
await this.$store.dispatch("incrementQuerySequence");
|
||||||
this.$store.commit("busSearch");
|
this.$store.commit("busSearch");
|
||||||
|
|
||||||
Sist2Api.search().then(async (resp) => {
|
Sist2Api.search().then(async (resp: EsResult) => {
|
||||||
await this.handleSearch(resp);
|
await this.handleSearch(resp);
|
||||||
this.searchBusy = false;
|
this.searchBusy = false;
|
||||||
}).catch(err => {
|
}).catch(err => {
|
||||||
console.log(err)
|
|
||||||
if (err.response.status === 500 && this.$store.state.optQueryMode === "advanced") {
|
if (err.response.status === 500 && this.$store.state.optQueryMode === "advanced") {
|
||||||
this.showSyntaxErrorToast();
|
this.showSyntaxErrorToast();
|
||||||
} else {
|
} else {
|
||||||
@ -224,8 +215,8 @@ export default Vue.extend({
|
|||||||
await this.$store.dispatch("clearResults");
|
await this.$store.dispatch("clearResults");
|
||||||
this.$store.commit("setUiReachedScrollEnd", false);
|
this.$store.commit("setUiReachedScrollEnd", false);
|
||||||
},
|
},
|
||||||
async handleSearch(resp) {
|
async handleSearch(resp: EsResult) {
|
||||||
if (resp.hits.hits.length === 0 || resp.hits.hits.length < this.$store.state.optSize) {
|
if (resp.hits.hits.length == 0 || resp.hits.hits.length < this.$store.state.optSize) {
|
||||||
this.$store.commit("setUiReachedScrollEnd", true);
|
this.$store.commit("setUiReachedScrollEnd", true);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -1,5 +1,3 @@
|
|||||||
const TerserPlugin = require("terser-webpack-plugin");
|
|
||||||
|
|
||||||
module.exports = {
|
module.exports = {
|
||||||
filenameHashing: false,
|
filenameHashing: false,
|
||||||
productionSourceMap: false,
|
productionSourceMap: false,
|
||||||
@ -8,19 +6,5 @@ module.exports = {
|
|||||||
index: {
|
index: {
|
||||||
entry: "src/main.js"
|
entry: "src/main.js"
|
||||||
}
|
}
|
||||||
},
|
|
||||||
configureWebpack: config => {
|
|
||||||
config.optimization.minimizer = [new TerserPlugin({
|
|
||||||
terserOptions: {
|
|
||||||
compress: {
|
|
||||||
passes: 2,
|
|
||||||
module: true,
|
|
||||||
hoist_funs: true,
|
|
||||||
// https://github.com/microsoft/onnxruntime/issues/16984
|
|
||||||
unused: false,
|
|
||||||
},
|
|
||||||
mangle: true,
|
|
||||||
}
|
|
||||||
})]
|
|
||||||
}
|
}
|
||||||
}
|
}
|
55
src/cli.c
55
src/cli.c
@ -38,6 +38,11 @@ scan_args_t *scan_args_create() {
|
|||||||
return args;
|
return args;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
exec_args_t *exec_args_create() {
|
||||||
|
exec_args_t *args = calloc(sizeof(exec_args_t), 1);
|
||||||
|
return args;
|
||||||
|
}
|
||||||
|
|
||||||
void scan_args_destroy(scan_args_t *args) {
|
void scan_args_destroy(scan_args_t *args) {
|
||||||
if (args->name != NULL) {
|
if (args->name != NULL) {
|
||||||
free(args->name);
|
free(args->name);
|
||||||
@ -69,11 +74,19 @@ void web_args_destroy(web_args_t *args) {
|
|||||||
free(args);
|
free(args);
|
||||||
}
|
}
|
||||||
|
|
||||||
void sqlite_index_args_destroy(sqlite_index_args_t *args) {
|
void exec_args_destroy(exec_args_t *args) {
|
||||||
|
|
||||||
|
if (args->index_path != NULL) {
|
||||||
free(args->index_path);
|
free(args->index_path);
|
||||||
|
}
|
||||||
|
|
||||||
free(args);
|
free(args);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
void sqlite_index_args_destroy(sqlite_index_args_t *args) {
|
||||||
|
// TODO
|
||||||
|
}
|
||||||
|
|
||||||
int scan_args_validate(scan_args_t *args, int argc, const char **argv) {
|
int scan_args_validate(scan_args_t *args, int argc, const char **argv) {
|
||||||
if (argc < 2) {
|
if (argc < 2) {
|
||||||
fprintf(stderr, "Required positional argument: PATH.\n");
|
fprintf(stderr, "Required positional argument: PATH.\n");
|
||||||
@ -613,3 +626,43 @@ web_args_t *web_args_create() {
|
|||||||
web_args_t *args = calloc(sizeof(web_args_t), 1);
|
web_args_t *args = calloc(sizeof(web_args_t), 1);
|
||||||
return args;
|
return args;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
int exec_args_validate(exec_args_t *args, int argc, const char **argv) {
|
||||||
|
|
||||||
|
if (argc < 2) {
|
||||||
|
fprintf(stderr, "Required positional argument: PATH.\n");
|
||||||
|
return 1;
|
||||||
|
}
|
||||||
|
|
||||||
|
char *index_path = abspath(argv[1]);
|
||||||
|
if (index_path == NULL) {
|
||||||
|
LOG_FATALF("cli.c", "Invalid index PATH argument. File not found: %s", argv[1]);
|
||||||
|
} else {
|
||||||
|
args->index_path = index_path;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (args->es_url == NULL) {
|
||||||
|
args->es_url = DEFAULT_ES_URL;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (args->es_index == NULL) {
|
||||||
|
args->es_index = DEFAULT_ES_INDEX;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (args->script_path == NULL) {
|
||||||
|
LOG_FATAL("cli.c", "--script-file argument is required");
|
||||||
|
}
|
||||||
|
|
||||||
|
if (load_external_file(args->script_path, &args->script) != 0) {
|
||||||
|
return 1;
|
||||||
|
}
|
||||||
|
|
||||||
|
LOG_DEBUGF("cli.c", "arg script_path=%s", args->script_path);
|
||||||
|
|
||||||
|
char log_buf[5000];
|
||||||
|
strncpy(log_buf, args->script, sizeof(log_buf));
|
||||||
|
*(log_buf + sizeof(log_buf) - 1) = '\0';
|
||||||
|
LOG_DEBUGF("cli.c", "arg script=%s", log_buf);
|
||||||
|
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
16
src/cli.h
16
src/cli.h
@ -102,6 +102,16 @@ typedef struct web_args {
|
|||||||
search_backend_t search_backend;
|
search_backend_t search_backend;
|
||||||
} web_args_t;
|
} web_args_t;
|
||||||
|
|
||||||
|
typedef struct exec_args {
|
||||||
|
char *es_url;
|
||||||
|
char *es_index;
|
||||||
|
int es_insecure_ssl;
|
||||||
|
char *index_path;
|
||||||
|
const char *script_path;
|
||||||
|
int async_script;
|
||||||
|
char *script;
|
||||||
|
} exec_args_t;
|
||||||
|
|
||||||
index_args_t *index_args_create();
|
index_args_t *index_args_create();
|
||||||
|
|
||||||
sqlite_index_args_t *sqlite_index_args_create();
|
sqlite_index_args_t *sqlite_index_args_create();
|
||||||
@ -118,6 +128,12 @@ int sqlite_index_args_validate(sqlite_index_args_t *args, int argc, const char *
|
|||||||
|
|
||||||
int web_args_validate(web_args_t *args, int argc, const char **argv);
|
int web_args_validate(web_args_t *args, int argc, const char **argv);
|
||||||
|
|
||||||
|
exec_args_t *exec_args_create();
|
||||||
|
|
||||||
|
void exec_args_destroy(exec_args_t *args);
|
||||||
|
|
||||||
|
int exec_args_validate(exec_args_t *args, int argc, const char **argv);
|
||||||
|
|
||||||
void sqlite_index_args_destroy(sqlite_index_args_t *args);
|
void sqlite_index_args_destroy(sqlite_index_args_t *args);
|
||||||
|
|
||||||
|
|
||||||
|
@ -163,8 +163,7 @@ void database_open(database_t *db) {
|
|||||||
&db->write_document_sidecar_stmt, NULL));
|
&db->write_document_sidecar_stmt, NULL));
|
||||||
CRASH_IF_NOT_SQLITE_OK(sqlite3_prepare_v2(
|
CRASH_IF_NOT_SQLITE_OK(sqlite3_prepare_v2(
|
||||||
db->db,
|
db->db,
|
||||||
"REPLACE INTO document (id, mtime, size, json_data, version) VALUES (?, ?, ?, ?, (SELECT max(id) FROM version));",
|
"REPLACE INTO document (id, mtime, size, json_data, version) VALUES (?, ?, ?, ?, (SELECT max(id) FROM version));", -1,
|
||||||
-1,
|
|
||||||
&db->write_document_stmt, NULL));
|
&db->write_document_stmt, NULL));
|
||||||
CRASH_IF_NOT_SQLITE_OK(sqlite3_prepare_v2(
|
CRASH_IF_NOT_SQLITE_OK(sqlite3_prepare_v2(
|
||||||
db->db,
|
db->db,
|
||||||
@ -176,14 +175,6 @@ void database_open(database_t *db) {
|
|||||||
db->db, "SELECT json_data FROM document WHERE id=?", -1,
|
db->db, "SELECT json_data FROM document WHERE id=?", -1,
|
||||||
&db->get_document, NULL));
|
&db->get_document, NULL));
|
||||||
|
|
||||||
CRASH_IF_NOT_SQLITE_OK(sqlite3_prepare_v2(
|
|
||||||
db->db, "SELECT * FROM model", -1,
|
|
||||||
&db->get_models, NULL));
|
|
||||||
|
|
||||||
CRASH_IF_NOT_SQLITE_OK(sqlite3_prepare_v2(
|
|
||||||
db->db, "SELECT embedding FROM embedding WHERE id=? AND model_id=? AND start=0", -1,
|
|
||||||
&db->get_embedding, NULL));
|
|
||||||
|
|
||||||
// Create functions
|
// Create functions
|
||||||
sqlite3_create_function(
|
sqlite3_create_function(
|
||||||
db->db,
|
db->db,
|
||||||
@ -195,17 +186,6 @@ void database_open(database_t *db) {
|
|||||||
NULL,
|
NULL,
|
||||||
NULL
|
NULL
|
||||||
);
|
);
|
||||||
|
|
||||||
sqlite3_create_function(
|
|
||||||
db->db,
|
|
||||||
"emb_to_json",
|
|
||||||
1,
|
|
||||||
SQLITE_UTF8,
|
|
||||||
NULL,
|
|
||||||
emb_to_json_func,
|
|
||||||
NULL,
|
|
||||||
NULL
|
|
||||||
);
|
|
||||||
} else if (db->type == IPC_CONSUMER_DATABASE) {
|
} else if (db->type == IPC_CONSUMER_DATABASE) {
|
||||||
|
|
||||||
sqlite3_create_function(
|
sqlite3_create_function(
|
||||||
@ -268,10 +248,6 @@ void database_open(database_t *db) {
|
|||||||
db->db, "SELECT tag, count(*) FROM tag GROUP BY tag", -1,
|
db->db, "SELECT tag, count(*) FROM tag GROUP BY tag", -1,
|
||||||
&db->fts_get_tags, NULL));
|
&db->fts_get_tags, NULL));
|
||||||
|
|
||||||
CRASH_IF_NOT_SQLITE_OK(sqlite3_prepare_v2(
|
|
||||||
db->db, "SELECT size FROM model WHERE id=?", -1,
|
|
||||||
&db->fts_model_size, NULL));
|
|
||||||
|
|
||||||
CRASH_IF_NOT_SQLITE_OK(sqlite3_prepare_v2(
|
CRASH_IF_NOT_SQLITE_OK(sqlite3_prepare_v2(
|
||||||
db->db, "SELECT path, count FROM path_index"
|
db->db, "SELECT path, count FROM path_index"
|
||||||
" WHERE (index_id=?1 OR ?1 IS NULL) AND depth BETWEEN ? AND ?"
|
" WHERE (index_id=?1 OR ?1 IS NULL) AND depth BETWEEN ? AND ?"
|
||||||
@ -326,17 +302,6 @@ void database_open(database_t *db) {
|
|||||||
NULL,
|
NULL,
|
||||||
NULL
|
NULL
|
||||||
);
|
);
|
||||||
|
|
||||||
sqlite3_create_function(
|
|
||||||
db->db,
|
|
||||||
"cosine_sim",
|
|
||||||
3,
|
|
||||||
SQLITE_UTF8,
|
|
||||||
NULL,
|
|
||||||
cosine_sim_func,
|
|
||||||
NULL,
|
|
||||||
NULL
|
|
||||||
);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
if (db->type == FTS_DATABASE || db->type == INDEX_DATABASE) {
|
if (db->type == FTS_DATABASE || db->type == INDEX_DATABASE) {
|
||||||
@ -498,31 +463,31 @@ database_iterator_t *database_create_document_iterator(database_t *db) {
|
|||||||
|
|
||||||
sqlite3_stmt *stmt;
|
sqlite3_stmt *stmt;
|
||||||
|
|
||||||
CRASH_IF_NOT_SQLITE_OK(
|
// TODO optimization: remove mtime, size, _id from json_data
|
||||||
sqlite3_prepare_v2(
|
|
||||||
db->db,
|
sqlite3_prepare_v2(db->db, "WITH doc (j) AS (SELECT CASE"
|
||||||
"WITH doc (j) AS (SELECT CASE"
|
" WHEN sc.json_data IS NULL THEN"
|
||||||
" WHEN emb.embedding IS NULL THEN"
|
" CASE"
|
||||||
" json_set(document.json_data, "
|
" WHEN t.tag IS NULL THEN"
|
||||||
" '$._id', document.id, "
|
" json_set(document.json_data, '$._id', document.id, '$.size', document.size, '$.mtime', document.mtime)"
|
||||||
" '$.size', document.size, "
|
|
||||||
" '$.mtime', document.mtime, "
|
|
||||||
" '$.tag', json_group_array((SELECT tag FROM tag WHERE document.id = tag.id)))"
|
|
||||||
" ELSE"
|
" ELSE"
|
||||||
" json_set(document.json_data,"
|
" json_set(document.json_data, '$._id', document.id, '$.size', document.size, '$.mtime', document.mtime, '$.tag', json_group_array(t.tag))"
|
||||||
" '$._id', document.id,"
|
" END"
|
||||||
" '$.size', document.size,"
|
" ELSE"
|
||||||
" '$.mtime', document.mtime,"
|
" CASE"
|
||||||
" '$.tag', json_group_array((SELECT tag FROM tag WHERE document.id = tag.id)),"
|
" WHEN t.tag IS NULL THEN"
|
||||||
" '$.emb', json_group_object(m.path, json(emb_to_json(emb.embedding))),"
|
" json_patch(json_set(document.json_data, '$._id', document.id, '$.size', document.size, '$.mtime', document.mtime), sc.json_data)"
|
||||||
" '$.embedding', 1)"
|
" ELSE"
|
||||||
|
// This will overwrite any tags specified in the sidecar file!
|
||||||
|
// TODO: concatenate the two arrays?
|
||||||
|
" json_set(json_patch(document.json_data, sc.json_data), '$._id', document.id, '$.size', document.size, '$.mtime', document.mtime, '$.tag', json_group_array(t.tag))"
|
||||||
|
" END"
|
||||||
" END"
|
" END"
|
||||||
" FROM document"
|
" FROM document"
|
||||||
" LEFT JOIN embedding emb ON document.id = emb.id"
|
" LEFT JOIN document_sidecar sc ON document.id = sc.id"
|
||||||
" LEFT JOIN model m ON emb.model_id = m.id"
|
" LEFT JOIN tag t ON document.id = t.id"
|
||||||
" GROUP BY document.id)"
|
" GROUP BY document.id)"
|
||||||
" SELECT json_set(j, '$.index', (SELECT id FROM descriptor)) FROM doc",
|
" SELECT json_set(j, '$.index', (SELECT id FROM descriptor)) FROM doc", -1, &stmt, NULL);
|
||||||
-1, &stmt, NULL));
|
|
||||||
|
|
||||||
database_iterator_t *iter = malloc(sizeof(database_iterator_t));
|
database_iterator_t *iter = malloc(sizeof(database_iterator_t));
|
||||||
|
|
||||||
@ -532,13 +497,6 @@ database_iterator_t *database_create_document_iterator(database_t *db) {
|
|||||||
return iter;
|
return iter;
|
||||||
}
|
}
|
||||||
|
|
||||||
void remove_tag_if_null(cJSON *doc) {
|
|
||||||
cJSON *tags = cJSON_GetObjectItem(doc, "tag");
|
|
||||||
if (tags != NULL && cJSON_IsNull(cJSON_GetArrayItem(tags, 0))) {
|
|
||||||
cJSON_DeleteItemFromObject(doc, "tag");
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
cJSON *database_document_iter(database_iterator_t *iter) {
|
cJSON *database_document_iter(database_iterator_t *iter) {
|
||||||
|
|
||||||
if (iter->stmt == NULL) {
|
if (iter->stmt == NULL) {
|
||||||
@ -550,12 +508,7 @@ cJSON *database_document_iter(database_iterator_t *iter) {
|
|||||||
|
|
||||||
if (ret == SQLITE_ROW) {
|
if (ret == SQLITE_ROW) {
|
||||||
const char *json_string = (const char *) sqlite3_column_text(iter->stmt, 0);
|
const char *json_string = (const char *) sqlite3_column_text(iter->stmt, 0);
|
||||||
|
return cJSON_Parse(json_string);
|
||||||
cJSON *doc = cJSON_Parse(json_string);
|
|
||||||
|
|
||||||
remove_tag_if_null(doc);
|
|
||||||
|
|
||||||
return doc;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
if (ret != SQLITE_DONE) {
|
if (ret != SQLITE_DONE) {
|
||||||
|
@ -41,7 +41,6 @@ typedef enum {
|
|||||||
FTS_SORT_RANDOM,
|
FTS_SORT_RANDOM,
|
||||||
FTS_SORT_NAME,
|
FTS_SORT_NAME,
|
||||||
FTS_SORT_ID,
|
FTS_SORT_ID,
|
||||||
FTS_SORT_EMBEDDING
|
|
||||||
} fts_sort_t;
|
} fts_sort_t;
|
||||||
|
|
||||||
typedef struct {
|
typedef struct {
|
||||||
@ -84,8 +83,6 @@ typedef struct database {
|
|||||||
sqlite3_stmt *write_document_sidecar_stmt;
|
sqlite3_stmt *write_document_sidecar_stmt;
|
||||||
sqlite3_stmt *write_thumbnail_stmt;
|
sqlite3_stmt *write_thumbnail_stmt;
|
||||||
sqlite3_stmt *get_document;
|
sqlite3_stmt *get_document;
|
||||||
sqlite3_stmt *get_models;
|
|
||||||
sqlite3_stmt *get_embedding;
|
|
||||||
|
|
||||||
sqlite3_stmt *delete_tag_stmt;
|
sqlite3_stmt *delete_tag_stmt;
|
||||||
sqlite3_stmt *write_tag_stmt;
|
sqlite3_stmt *write_tag_stmt;
|
||||||
@ -103,8 +100,6 @@ typedef struct database {
|
|||||||
sqlite3_stmt *fts_get_document;
|
sqlite3_stmt *fts_get_document;
|
||||||
sqlite3_stmt *fts_suggest_tag;
|
sqlite3_stmt *fts_suggest_tag;
|
||||||
sqlite3_stmt *fts_get_tags;
|
sqlite3_stmt *fts_get_tags;
|
||||||
sqlite3_stmt *fts_model_size;
|
|
||||||
|
|
||||||
|
|
||||||
char **tag_array;
|
char **tag_array;
|
||||||
|
|
||||||
@ -145,8 +140,6 @@ void database_write_document(database_t *db, document_t *doc, const char *json_d
|
|||||||
|
|
||||||
database_iterator_t *database_create_document_iterator(database_t *db);
|
database_iterator_t *database_create_document_iterator(database_t *db);
|
||||||
|
|
||||||
void emb_to_json_func(sqlite3_context *ctx, int argc, sqlite3_value **argv);
|
|
||||||
|
|
||||||
cJSON *database_document_iter(database_iterator_t *);
|
cJSON *database_document_iter(database_iterator_t *);
|
||||||
|
|
||||||
#define database_document_iter_foreach(element, iter) \
|
#define database_document_iter_foreach(element, iter) \
|
||||||
@ -217,8 +210,7 @@ cJSON *database_fts_search(database_t *db, const char *query, const char *path,
|
|||||||
long size_max, long date_min, long date_max, int page_size,
|
long size_max, long date_min, long date_max, int page_size,
|
||||||
char **index_ids, char **mime_types, char **tags, int sort_asc,
|
char **index_ids, char **mime_types, char **tags, int sort_asc,
|
||||||
fts_sort_t sort, int seed, char **after, int fetch_aggregations,
|
fts_sort_t sort, int seed, char **after, int fetch_aggregations,
|
||||||
int highlight, int highlight_context_size, int model,
|
int highlight, int highlight_context_size);
|
||||||
const float *embedding, int embedding_size);
|
|
||||||
|
|
||||||
void database_write_tag(database_t *db, char *doc_id, char *tag);
|
void database_write_tag(database_t *db, char *doc_id, char *tag);
|
||||||
|
|
||||||
@ -236,12 +228,4 @@ cJSON *database_fts_get_tags(database_t *db);
|
|||||||
|
|
||||||
cJSON *database_get_document(database_t *db, char *doc_id);
|
cJSON *database_get_document(database_t *db, char *doc_id);
|
||||||
|
|
||||||
void cosine_sim_func(sqlite3_context *ctx, int argc, sqlite3_value **argv);
|
|
||||||
|
|
||||||
cJSON *database_get_models(database_t *db);
|
|
||||||
|
|
||||||
int database_fts_get_model_size(database_t *db, int model_id);
|
|
||||||
|
|
||||||
cJSON *database_get_embedding(database_t *db, char *doc_id, int model_id);
|
|
||||||
|
|
||||||
#endif
|
#endif
|
@ -1,107 +0,0 @@
|
|||||||
#include <openblas/cblas.h>
|
|
||||||
#include "database.h"
|
|
||||||
#include "src/ctx.h"
|
|
||||||
|
|
||||||
|
|
||||||
static float cosine_sim(int n, const float *a, const float *b) {
|
|
||||||
float dot_product = cblas_sdot(n, a, 1, b, 1);
|
|
||||||
float norm_a = cblas_snrm2(n, a, 1);
|
|
||||||
float norm_b = cblas_snrm2(n, b, 1);
|
|
||||||
|
|
||||||
return dot_product / (norm_a * norm_b);
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
void cosine_sim_func(sqlite3_context *ctx, int argc, sqlite3_value **argv) {
|
|
||||||
if (argc != 3) {
|
|
||||||
sqlite3_result_error(ctx, "Invalid parameters", -1);
|
|
||||||
}
|
|
||||||
|
|
||||||
int n = sqlite3_value_int(argv[0]);
|
|
||||||
const float *a = sqlite3_value_blob(argv[1]);
|
|
||||||
const float *b = sqlite3_value_blob(argv[2]);
|
|
||||||
|
|
||||||
if (a == NULL || b == NULL) {
|
|
||||||
sqlite3_result_double(ctx, -1);
|
|
||||||
return;
|
|
||||||
}
|
|
||||||
|
|
||||||
float result = cosine_sim(n, a, b);
|
|
||||||
if (result != result) {
|
|
||||||
result = -1;
|
|
||||||
}
|
|
||||||
|
|
||||||
sqlite3_result_double(ctx, result);
|
|
||||||
}
|
|
||||||
|
|
||||||
cJSON *database_get_models(database_t *db) {
|
|
||||||
cJSON *json = cJSON_CreateArray();
|
|
||||||
sqlite3_stmt *stmt = db->get_models;
|
|
||||||
|
|
||||||
int ret;
|
|
||||||
do {
|
|
||||||
ret = sqlite3_step(stmt);
|
|
||||||
if (ret == SQLITE_BUSY) {
|
|
||||||
// Database is busy (probably scanning)
|
|
||||||
LOG_WARNING("database_embeddings.c",
|
|
||||||
"Database is busy, could not fetch list of models");
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
|
|
||||||
if (ret == SQLITE_DONE) {
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
|
|
||||||
cJSON *row = cJSON_CreateObject();
|
|
||||||
|
|
||||||
cJSON_AddNumberToObject(row, "id", sqlite3_column_int(stmt, 0));
|
|
||||||
cJSON_AddStringToObject(row, "name", (const char *) sqlite3_column_text(stmt, 1));
|
|
||||||
cJSON_AddStringToObject(row, "url", (const char *) sqlite3_column_text(stmt, 2));
|
|
||||||
cJSON_AddStringToObject(row, "path", (const char *) sqlite3_column_text(stmt, 3));
|
|
||||||
cJSON_AddNumberToObject(row, "size", sqlite3_column_int(stmt, 4));
|
|
||||||
cJSON_AddStringToObject(row, "type", (const char *) sqlite3_column_text(stmt, 5));
|
|
||||||
|
|
||||||
cJSON_AddItemToArray(json, row);
|
|
||||||
} while (TRUE);
|
|
||||||
|
|
||||||
sqlite3_reset(stmt);
|
|
||||||
|
|
||||||
return json;
|
|
||||||
}
|
|
||||||
|
|
||||||
cJSON *database_get_embedding(database_t *db, char *doc_id, int model_id) {
|
|
||||||
|
|
||||||
sqlite3_bind_text(db->get_embedding, 1, doc_id, -1, SQLITE_STATIC);
|
|
||||||
sqlite3_bind_int(db->get_embedding, 2, model_id);
|
|
||||||
int ret = sqlite3_step(db->get_embedding);
|
|
||||||
CRASH_IF_STMT_FAIL(ret);
|
|
||||||
|
|
||||||
if (ret == SQLITE_DONE) {
|
|
||||||
sqlite3_reset(db->get_embedding);
|
|
||||||
return NULL;
|
|
||||||
}
|
|
||||||
|
|
||||||
float *embedding = (float *) sqlite3_column_blob(db->get_embedding, 0);
|
|
||||||
size_t size = sqlite3_column_bytes(db->get_embedding, 0) / sizeof(float);
|
|
||||||
|
|
||||||
cJSON *json = cJSON_CreateFloatArray(embedding, (int) size);
|
|
||||||
sqlite3_reset(db->get_embedding);
|
|
||||||
|
|
||||||
return json;
|
|
||||||
}
|
|
||||||
|
|
||||||
void emb_to_json_func(sqlite3_context *ctx, int argc, sqlite3_value **argv) {
|
|
||||||
if (argc != 1) {
|
|
||||||
sqlite3_result_error(ctx, "Invalid parameters", -1);
|
|
||||||
}
|
|
||||||
|
|
||||||
float *embedding = (float *) sqlite3_value_blob(argv[0]);
|
|
||||||
int size = sqlite3_value_bytes(argv[0]) / 4;
|
|
||||||
|
|
||||||
cJSON *json = cJSON_CreateFloatArray(embedding, size);
|
|
||||||
char *json_str = cJSON_PrintUnformatted(json);
|
|
||||||
|
|
||||||
sqlite3_result_text(ctx, json_str, -1, SQLITE_TRANSIENT);
|
|
||||||
free(json_str);
|
|
||||||
cJSON_Delete(json);
|
|
||||||
}
|
|
@ -37,7 +37,7 @@ int database_fts_get_max_path_depth(database_t *db) {
|
|||||||
|
|
||||||
void database_fts_index(database_t *db) {
|
void database_fts_index(database_t *db) {
|
||||||
|
|
||||||
LOG_INFO("database_fts.c", "Creating content table");
|
LOG_INFO("database_fts.c", "Creating content table.");
|
||||||
|
|
||||||
CRASH_IF_NOT_SQLITE_OK(sqlite3_exec(
|
CRASH_IF_NOT_SQLITE_OK(sqlite3_exec(
|
||||||
db->db,
|
db->db,
|
||||||
@ -47,12 +47,21 @@ void database_fts_index(database_t *db) {
|
|||||||
" document.json_data ->> 'path' as path,"
|
" document.json_data ->> 'path' as path,"
|
||||||
" mtime,"
|
" mtime,"
|
||||||
" document.json_data ->> 'mime' as mime,"
|
" document.json_data ->> 'mime' as mime,"
|
||||||
|
" CASE"
|
||||||
|
" WHEN sc.json_data IS NULL THEN"
|
||||||
" json_set(document.json_data, "
|
" json_set(document.json_data, "
|
||||||
" '$._id',document.id,"
|
" '$._id',document.id,"
|
||||||
" '$.size',document.size, "
|
" '$.size',document.size, "
|
||||||
" '$.mtime',document.mtime)"
|
" '$.mtime',document.mtime)"
|
||||||
|
" ELSE json_patch("
|
||||||
|
" json_set(document.json_data,"
|
||||||
|
" '$._id',document.id,"
|
||||||
|
" '$.size',document.size,"
|
||||||
|
" '$.mtime', document.mtime),"
|
||||||
|
" sc.json_data) END"
|
||||||
" FROM document"
|
" FROM document"
|
||||||
" )"
|
" LEFT JOIN document_sidecar sc ON document.id = sc.id"
|
||||||
|
" GROUP BY document.id)"
|
||||||
" INSERT"
|
" INSERT"
|
||||||
" INTO fts.document_index (id, index_id, size, name, path, mtime, mime, json_data)"
|
" INTO fts.document_index (id, index_id, size, name, path, mtime, mime, json_data)"
|
||||||
" SELECT * FROM docs WHERE true"
|
" SELECT * FROM docs WHERE true"
|
||||||
@ -60,21 +69,7 @@ void database_fts_index(database_t *db) {
|
|||||||
" size=excluded.size, mtime=excluded.mtime, mime=excluded.mime, json_data=excluded.json_data;",
|
" size=excluded.size, mtime=excluded.mtime, mime=excluded.mime, json_data=excluded.json_data;",
|
||||||
NULL, NULL, NULL));
|
NULL, NULL, NULL));
|
||||||
|
|
||||||
LOG_DEBUG("database_fts.c", "Copying embeddings");
|
LOG_DEBUG("database_fts.c", "Deleting old documents.");
|
||||||
|
|
||||||
CRASH_IF_NOT_SQLITE_OK(sqlite3_exec(
|
|
||||||
db->db,
|
|
||||||
"REPLACE INTO fts.embedding (id, model_id, start, end, embedding)"
|
|
||||||
" SELECT id, model_id, start, end, embedding FROM embedding", NULL, NULL, NULL));
|
|
||||||
|
|
||||||
CRASH_IF_NOT_SQLITE_OK(sqlite3_exec(
|
|
||||||
db->db,
|
|
||||||
"INSERT INTO fts.model (id, size)"
|
|
||||||
" SELECT id, size FROM model WHERE TRUE ON CONFLICT (id) DO NOTHING", NULL, NULL, NULL));
|
|
||||||
|
|
||||||
// TODO: delete old embeddings
|
|
||||||
|
|
||||||
LOG_DEBUG("database_fts.c", "Deleting old documents");
|
|
||||||
|
|
||||||
CRASH_IF_NOT_SQLITE_OK(sqlite3_exec(
|
CRASH_IF_NOT_SQLITE_OK(sqlite3_exec(
|
||||||
db->db,
|
db->db,
|
||||||
@ -149,7 +144,7 @@ void database_fts_index(database_t *db) {
|
|||||||
"INSERT INTO path_index (path, index_id, count, depth) SELECT path, index_id, total, depth FROM path_tmp",
|
"INSERT INTO path_index (path, index_id, count, depth) SELECT path, index_id, total, depth FROM path_tmp",
|
||||||
NULL, NULL, NULL));
|
NULL, NULL, NULL));
|
||||||
|
|
||||||
LOG_DEBUG("database_fts.c", "Generating search index");
|
LOG_DEBUG("database_fts.c", "Generating search index.");
|
||||||
|
|
||||||
CRASH_IF_NOT_SQLITE_OK(sqlite3_exec(
|
CRASH_IF_NOT_SQLITE_OK(sqlite3_exec(
|
||||||
db->db, "INSERT INTO search(search) VALUES ('delete-all')",
|
db->db, "INSERT INTO search(search) VALUES ('delete-all')",
|
||||||
@ -162,7 +157,7 @@ void database_fts_index(database_t *db) {
|
|||||||
}
|
}
|
||||||
|
|
||||||
void database_fts_optimize(database_t *db) {
|
void database_fts_optimize(database_t *db) {
|
||||||
LOG_INFO("database_fts.c", "Optimizing search index");
|
LOG_INFO("database_fts.c", "Optimizing search index.");
|
||||||
|
|
||||||
CRASH_IF_NOT_SQLITE_OK(sqlite3_exec(
|
CRASH_IF_NOT_SQLITE_OK(sqlite3_exec(
|
||||||
db->db,
|
db->db,
|
||||||
@ -413,8 +408,6 @@ const char *get_sort_var(fts_sort_t sort) {
|
|||||||
return "doc.name";
|
return "doc.name";
|
||||||
case FTS_SORT_ID:
|
case FTS_SORT_ID:
|
||||||
return "doc.id";
|
return "doc.id";
|
||||||
case FTS_SORT_EMBEDDING:
|
|
||||||
return "cosine_sim(?7, ?8, emb.embedding)";
|
|
||||||
default:
|
default:
|
||||||
return NULL;
|
return NULL;
|
||||||
}
|
}
|
||||||
@ -466,36 +459,11 @@ char *get_after_where(char **after, fts_sort_t sort, int sort_asc) {
|
|||||||
return "(sort_var, doc.ROWID) < (?3, ?4)";
|
return "(sort_var, doc.ROWID) < (?3, ?4)";
|
||||||
}
|
}
|
||||||
|
|
||||||
int database_fts_get_model_size(database_t *db, int model_id) {
|
|
||||||
sqlite3_bind_int(db->fts_model_size, 1, model_id);
|
|
||||||
int ret = sqlite3_step(db->fts_model_size);
|
|
||||||
CRASH_IF_STMT_FAIL(ret);
|
|
||||||
|
|
||||||
if (ret == SQLITE_DONE) {
|
|
||||||
return -1;
|
|
||||||
}
|
|
||||||
|
|
||||||
int size = sqlite3_column_int(db->fts_model_size, 0);
|
|
||||||
sqlite3_reset(db->fts_model_size);
|
|
||||||
|
|
||||||
return size;
|
|
||||||
}
|
|
||||||
|
|
||||||
cJSON *database_fts_search(database_t *db, const char *query, const char *path, long size_min,
|
cJSON *database_fts_search(database_t *db, const char *query, const char *path, long size_min,
|
||||||
long size_max, long date_min, long date_max, int page_size,
|
long size_max, long date_min, long date_max, int page_size,
|
||||||
char **index_ids, char **mime_types, char **tags, int sort_asc,
|
char **index_ids, char **mime_types, char **tags, int sort_asc,
|
||||||
fts_sort_t sort, int seed, char **after, int fetch_aggregations,
|
fts_sort_t sort, int seed, char **after, int fetch_aggregations,
|
||||||
int highlight, int highlight_context_size, int model,
|
int highlight, int highlight_context_size) {
|
||||||
const float *embedding, int embedding_size) {
|
|
||||||
|
|
||||||
if (embedding) {
|
|
||||||
int model_embedding_size = database_fts_get_model_size(db, model);
|
|
||||||
if (model_embedding_size != embedding_size) {
|
|
||||||
LOG_WARNINGF("database_fts.c", "Received invalid embedding size for model %s: %d, expected %d",
|
|
||||||
model, embedding_size, model_embedding_size);
|
|
||||||
return NULL;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
char path_glob[PATH_MAX * 2];
|
char path_glob[PATH_MAX * 2];
|
||||||
snprintf(path_glob, sizeof(path_glob), "%s/*", path);
|
snprintf(path_glob, sizeof(path_glob), "%s/*", path);
|
||||||
@ -523,15 +491,15 @@ cJSON *database_fts_search(database_t *db, const char *query, const char *path,
|
|||||||
|
|
||||||
const char *json_object_sql;
|
const char *json_object_sql;
|
||||||
if (highlight && query_where != NULL) {
|
if (highlight && query_where != NULL) {
|
||||||
json_object_sql = "json_set(json_remove(doc.json_data, '$.content'),"
|
json_object_sql = "json_remove(json_set(doc.json_data,"
|
||||||
"'$.index', doc.index_id,"
|
"'$.index', doc.index_id,"
|
||||||
"'$.embedding', (CASE WHEN emb.id IS NOT NULL THEN 1 ELSE 0 END),"
|
|
||||||
"'$._highlight.name', snippet(search, 0, '<mark>', '</mark>', '', ?6),"
|
"'$._highlight.name', snippet(search, 0, '<mark>', '</mark>', '', ?6),"
|
||||||
"'$._highlight.content', snippet(search, 1, '<mark>', '</mark>', '', ?6))";
|
"'$._highlight.content', snippet(search, 1, '<mark>', '</mark>', '', ?6)),"
|
||||||
|
"'$.content')";
|
||||||
} else {
|
} else {
|
||||||
json_object_sql = "json_set(json_remove(doc.json_data, '$.content'),"
|
json_object_sql = "json_remove(json_set(doc.json_data,"
|
||||||
"'$.index', doc.index_id,"
|
"'$.index', doc.index_id),"
|
||||||
"'$.embedding', (CASE WHEN emb.id IS NOT NULL THEN 1 ELSE 0 END))";
|
"'$.content')";
|
||||||
}
|
}
|
||||||
|
|
||||||
char *sql;
|
char *sql;
|
||||||
@ -544,7 +512,6 @@ cJSON *database_fts_search(database_t *db, const char *query, const char *path,
|
|||||||
" %s, %s as sort_var, doc.ROWID"
|
" %s, %s as sort_var, doc.ROWID"
|
||||||
" FROM search"
|
" FROM search"
|
||||||
" INNER JOIN document_index doc on doc.ROWID = search.ROWID"
|
" INNER JOIN document_index doc on doc.ROWID = search.ROWID"
|
||||||
" LEFT JOIN embedding emb on emb.id = doc.id"
|
|
||||||
" WHERE %s"
|
" WHERE %s"
|
||||||
" ORDER BY sort_var%s, doc.ROWID"
|
" ORDER BY sort_var%s, doc.ROWID"
|
||||||
" LIMIT ?2",
|
" LIMIT ?2",
|
||||||
@ -566,7 +533,6 @@ cJSON *database_fts_search(database_t *db, const char *query, const char *path,
|
|||||||
"SELECT"
|
"SELECT"
|
||||||
" %s, %s as sort_var, doc.ROWID"
|
" %s, %s as sort_var, doc.ROWID"
|
||||||
" FROM document_index doc"
|
" FROM document_index doc"
|
||||||
" LEFT JOIN embedding emb on emb.id = doc.id"
|
|
||||||
" WHERE %s"
|
" WHERE %s"
|
||||||
" ORDER BY sort_var%s,doc.ROWID"
|
" ORDER BY sort_var%s,doc.ROWID"
|
||||||
" LIMIT ?2",
|
" LIMIT ?2",
|
||||||
@ -603,6 +569,7 @@ cJSON *database_fts_search(database_t *db, const char *query, const char *path,
|
|||||||
if (tags) {
|
if (tags) {
|
||||||
db->tag_array = tags;
|
db->tag_array = tags;
|
||||||
}
|
}
|
||||||
|
|
||||||
if (size_min > 0) {
|
if (size_min > 0) {
|
||||||
sqlite3_bind_int64(stmt, sqlite3_bind_parameter_index(stmt, "@size_min"), size_min);
|
sqlite3_bind_int64(stmt, sqlite3_bind_parameter_index(stmt, "@size_min"), size_min);
|
||||||
}
|
}
|
||||||
@ -622,7 +589,7 @@ cJSON *database_fts_search(database_t *db, const char *query, const char *path,
|
|||||||
if (after_where) {
|
if (after_where) {
|
||||||
if (sort == FTS_SORT_NAME || sort == FTS_SORT_ID) {
|
if (sort == FTS_SORT_NAME || sort == FTS_SORT_ID) {
|
||||||
sqlite3_bind_text(stmt, 3, after[0], -1, SQLITE_STATIC);
|
sqlite3_bind_text(stmt, 3, after[0], -1, SQLITE_STATIC);
|
||||||
} else if (sort == FTS_SORT_SCORE || sort == FTS_SORT_EMBEDDING) {
|
} else if (sort == FTS_SORT_SCORE) {
|
||||||
sqlite3_bind_double(stmt, 3, strtod(after[0], NULL));
|
sqlite3_bind_double(stmt, 3, strtod(after[0], NULL));
|
||||||
} else {
|
} else {
|
||||||
sqlite3_bind_int64(stmt, 3, strtol(after[0], NULL, 10));
|
sqlite3_bind_int64(stmt, 3, strtol(after[0], NULL, 10));
|
||||||
@ -635,11 +602,6 @@ cJSON *database_fts_search(database_t *db, const char *query, const char *path,
|
|||||||
if (highlight) {
|
if (highlight) {
|
||||||
sqlite3_bind_int(stmt, 6, highlight_context_size);
|
sqlite3_bind_int(stmt, 6, highlight_context_size);
|
||||||
}
|
}
|
||||||
if (embedding) {
|
|
||||||
sqlite3_bind_int(stmt, 7, embedding_size);
|
|
||||||
sqlite3_bind_blob(stmt, 8, embedding, (int) sizeof(float) * embedding_size, SQLITE_STATIC);
|
|
||||||
sqlite3_bind_int(stmt, 9, model);
|
|
||||||
}
|
|
||||||
|
|
||||||
cJSON *json = cJSON_CreateObject();
|
cJSON *json = cJSON_CreateObject();
|
||||||
cJSON *hits_hits = cJSON_CreateArray();
|
cJSON *hits_hits = cJSON_CreateArray();
|
||||||
|
@ -38,21 +38,6 @@ const char *FtsDatabaseSchema =
|
|||||||
");"
|
");"
|
||||||
"CREATE INDEX IF NOT EXISTS tag_tag_idx ON tag(tag);"
|
"CREATE INDEX IF NOT EXISTS tag_tag_idx ON tag(tag);"
|
||||||
"CREATE INDEX IF NOT EXISTS tag_id_idx ON tag(id);"
|
"CREATE INDEX IF NOT EXISTS tag_id_idx ON tag(id);"
|
||||||
""
|
|
||||||
"CREATE TABLE IF NOT EXISTS embedding ("
|
|
||||||
" id TEXT REFERENCES document(id),"
|
|
||||||
" model_id INTEGER NOT NULL REFERENCES model(id),"
|
|
||||||
" start INTEGER NOT NULL,"
|
|
||||||
" end INTEGER,"
|
|
||||||
" embedding BLOB NOT NULL,"
|
|
||||||
" PRIMARY KEY (id, model_id, start)"
|
|
||||||
");"
|
|
||||||
""
|
|
||||||
"CREATE TABLE IF NOT EXISTS model ("
|
|
||||||
" id INTEGER PRIMARY KEY CHECK (id > 0 AND id < 1000),"
|
|
||||||
" size INTEGER NOT NULL"
|
|
||||||
");"
|
|
||||||
""
|
|
||||||
"CREATE TRIGGER IF NOT EXISTS tag_write_trigger"
|
"CREATE TRIGGER IF NOT EXISTS tag_write_trigger"
|
||||||
" AFTER INSERT ON tag"
|
" AFTER INSERT ON tag"
|
||||||
" BEGIN"
|
" BEGIN"
|
||||||
@ -170,23 +155,5 @@ const char *IndexDatabaseSchema =
|
|||||||
" mime TEXT NOT NULL,"
|
" mime TEXT NOT NULL,"
|
||||||
" size INTEGER NOT NULL,"
|
" size INTEGER NOT NULL,"
|
||||||
" count INTEGER NOT NULL"
|
" count INTEGER NOT NULL"
|
||||||
");"
|
|
||||||
""
|
|
||||||
"CREATE TABLE embedding ("
|
|
||||||
" id TEXT REFERENCES document(id),"
|
|
||||||
" model_id INTEGER NOT NULL references model(id),"
|
|
||||||
" start INTEGER NOT NULL,"
|
|
||||||
" end INTEGER,"
|
|
||||||
" embedding BLOB NOT NULL,"
|
|
||||||
" PRIMARY KEY (id, model_id, start)"
|
|
||||||
");"
|
|
||||||
""
|
|
||||||
"CREATE TABLE model ("
|
|
||||||
" id INTEGER PRIMARY KEY CHECK (id > 0 AND id < 1000),"
|
|
||||||
" name TEXT NOT NULL UNIQUE CHECK ( length(name) < 16 ),"
|
|
||||||
" url TEXT,"
|
|
||||||
" path TEXT NOT NULL UNIQUE,"
|
|
||||||
" size INTEGER NOT NULL,"
|
|
||||||
" type TEXT NOT NULL CHECK ( type IN ('flat', 'nested') )"
|
|
||||||
");";
|
");";
|
||||||
|
|
||||||
|
@ -98,6 +98,61 @@ void index_json(cJSON *document, const char doc_id[SIST_DOC_ID_LEN]) {
|
|||||||
free(bulk_line);
|
free(bulk_line);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
void execute_update_script(const char *script, int async, const char index_id[SIST_INDEX_ID_LEN]) {
|
||||||
|
|
||||||
|
if (Indexer == NULL) {
|
||||||
|
Indexer = create_indexer(IndexCtx.es_url, IndexCtx.es_index);
|
||||||
|
}
|
||||||
|
|
||||||
|
cJSON *body = cJSON_CreateObject();
|
||||||
|
cJSON *script_obj = cJSON_AddObjectToObject(body, "script");
|
||||||
|
cJSON_AddStringToObject(script_obj, "lang", "painless");
|
||||||
|
cJSON_AddStringToObject(script_obj, "source", script);
|
||||||
|
|
||||||
|
cJSON *query = cJSON_AddObjectToObject(body, "query");
|
||||||
|
cJSON *term_obj = cJSON_AddObjectToObject(query, "term");
|
||||||
|
cJSON_AddStringToObject(term_obj, "index", index_id);
|
||||||
|
|
||||||
|
char *str = cJSON_PrintUnformatted(body);
|
||||||
|
|
||||||
|
char url[4096];
|
||||||
|
if (async) {
|
||||||
|
snprintf(url, sizeof(url), "%s/%s/_update_by_query?wait_for_completion=false", Indexer->es_url,
|
||||||
|
Indexer->es_index);
|
||||||
|
} else {
|
||||||
|
snprintf(url, sizeof(url), "%s/%s/_update_by_query", Indexer->es_url, Indexer->es_index);
|
||||||
|
}
|
||||||
|
response_t *r = web_post(url, str, IndexCtx.es_insecure_ssl);
|
||||||
|
if (!async) {
|
||||||
|
LOG_INFOF("elastic.c", "Executed user script <%d>", r->status_code);
|
||||||
|
}
|
||||||
|
cJSON *resp = cJSON_Parse(r->body);
|
||||||
|
|
||||||
|
cJSON_free(str);
|
||||||
|
cJSON_Delete(body);
|
||||||
|
free_response(r);
|
||||||
|
|
||||||
|
cJSON *error = cJSON_GetObjectItem(resp, "error");
|
||||||
|
if (error != NULL) {
|
||||||
|
char *error_str = cJSON_Print(error);
|
||||||
|
|
||||||
|
LOG_ERRORF("elastic.c", "User script error: \n%s", error_str);
|
||||||
|
cJSON_free(error_str);
|
||||||
|
}
|
||||||
|
|
||||||
|
if (async) {
|
||||||
|
cJSON *task = cJSON_GetObjectItem(resp, "task");
|
||||||
|
|
||||||
|
if (task == NULL) {
|
||||||
|
LOG_FATALF("elastic.c", "FIXME: Could not get task id: %s", r->body);
|
||||||
|
}
|
||||||
|
|
||||||
|
LOG_INFOF("elastic.c", "User script queued: %s/_tasks/%s", Indexer->es_url, task->valuestring);
|
||||||
|
}
|
||||||
|
|
||||||
|
cJSON_Delete(resp);
|
||||||
|
}
|
||||||
|
|
||||||
void *create_bulk_buffer(int max, int *count, size_t *buf_len, int legacy) {
|
void *create_bulk_buffer(int max, int *count, size_t *buf_len, int legacy) {
|
||||||
es_bulk_line_t *line = Indexer->line_head;
|
es_bulk_line_t *line = Indexer->line_head;
|
||||||
*count = 0;
|
*count = 0;
|
||||||
@ -348,7 +403,7 @@ es_indexer_t *create_indexer(const char *url, const char *index) {
|
|||||||
return indexer;
|
return indexer;
|
||||||
}
|
}
|
||||||
|
|
||||||
void finish_indexer(char *index_id) {
|
void finish_indexer(char *script, int async_script, char *index_id) {
|
||||||
|
|
||||||
char url[4096];
|
char url[4096];
|
||||||
|
|
||||||
@ -357,6 +412,16 @@ void finish_indexer(char *index_id) {
|
|||||||
LOG_INFOF("elastic.c", "Refresh index <%d>", r->status_code);
|
LOG_INFOF("elastic.c", "Refresh index <%d>", r->status_code);
|
||||||
free_response(r);
|
free_response(r);
|
||||||
|
|
||||||
|
if (script != NULL) {
|
||||||
|
execute_update_script(script, async_script, index_id);
|
||||||
|
free(script);
|
||||||
|
|
||||||
|
snprintf(url, sizeof(url), "%s/%s/_refresh", IndexCtx.es_url, IndexCtx.es_index);
|
||||||
|
r = web_post(url, "", IndexCtx.es_insecure_ssl);
|
||||||
|
LOG_INFOF("elastic.c", "Refresh index <%d>", r->status_code);
|
||||||
|
free_response(r);
|
||||||
|
}
|
||||||
|
|
||||||
snprintf(url, sizeof(url), "%s/%s/_forcemerge", IndexCtx.es_url, IndexCtx.es_index);
|
snprintf(url, sizeof(url), "%s/%s/_forcemerge", IndexCtx.es_url, IndexCtx.es_index);
|
||||||
r = web_post(url, "", IndexCtx.es_insecure_ssl);
|
r = web_post(url, "", IndexCtx.es_insecure_ssl);
|
||||||
LOG_INFOF("elastic.c", "Merge index <%d>", r->status_code);
|
LOG_INFOF("elastic.c", "Merge index <%d>", r->status_code);
|
||||||
|
@ -24,8 +24,6 @@ typedef struct {
|
|||||||
|
|
||||||
#define IS_SUPPORTED_ES_VERSION(es_version) ((es_version) != NULL && VERSION_GE((es_version), 6, 8) && VERSION_LT((es_version), 9, 0))
|
#define IS_SUPPORTED_ES_VERSION(es_version) ((es_version) != NULL && VERSION_GE((es_version), 6, 8) && VERSION_LT((es_version), 9, 0))
|
||||||
#define IS_LEGACY_VERSION(es_version) ((es_version) != NULL && VERSION_LT((es_version), 7, 14))
|
#define IS_LEGACY_VERSION(es_version) ((es_version) != NULL && VERSION_LT((es_version), 7, 14))
|
||||||
#define HAS_KNN(es_version) ((es_version) != NULL && VERSION_GE((es_version), 8, 0))
|
|
||||||
|
|
||||||
|
|
||||||
__always_inline
|
__always_inline
|
||||||
static const char *format_es_version(es_version_t *version) {
|
static const char *format_es_version(es_version_t *version) {
|
||||||
@ -53,7 +51,7 @@ void delete_document(const char *document_id);
|
|||||||
es_indexer_t *create_indexer(const char *url, const char *index);
|
es_indexer_t *create_indexer(const char *url, const char *index);
|
||||||
|
|
||||||
void elastic_cleanup();
|
void elastic_cleanup();
|
||||||
void finish_indexer(char *index_id);
|
void finish_indexer(char *script, int async_script, char *index_id);
|
||||||
|
|
||||||
void elastic_init(int force_reset, const char* user_mappings, const char* user_settings);
|
void elastic_init(int force_reset, const char* user_mappings, const char* user_settings);
|
||||||
|
|
||||||
@ -63,4 +61,6 @@ char *elastic_get_status();
|
|||||||
|
|
||||||
es_version_t *elastic_get_version(const char *es_url, int insecure);
|
es_version_t *elastic_get_version(const char *es_url, int insecure);
|
||||||
|
|
||||||
|
void execute_update_script(const char *script, int async, const char index_id[SIST_INDEX_ID_LEN]);
|
||||||
|
|
||||||
#endif
|
#endif
|
||||||
|
@ -1,7 +1,6 @@
|
|||||||
#ifndef WALK_H
|
#ifndef WALK_H
|
||||||
#define WALK_H
|
#define WALK_H
|
||||||
|
|
||||||
#undef _XOPEN_SOURCE
|
|
||||||
#define _XOPEN_SOURCE 500
|
#define _XOPEN_SOURCE 500
|
||||||
|
|
||||||
int walk_directory_tree(const char *);
|
int walk_directory_tree(const char *);
|
||||||
|
51
src/main.c
51
src/main.c
@ -24,6 +24,7 @@ static const char *const usage[] = {
|
|||||||
"sist2 index [OPTION]... INDEX",
|
"sist2 index [OPTION]... INDEX",
|
||||||
"sist2 sqlite-index [OPTION]... INDEX",
|
"sist2 sqlite-index [OPTION]... INDEX",
|
||||||
"sist2 web [OPTION]... INDEX...",
|
"sist2 web [OPTION]... INDEX...",
|
||||||
|
"sist2 exec-script [OPTION]... INDEX",
|
||||||
NULL,
|
NULL,
|
||||||
};
|
};
|
||||||
|
|
||||||
@ -320,8 +321,6 @@ void sist2_index(index_args_t *args) {
|
|||||||
strcpy(doc_id, cJSON_GetObjectItem(json, "_id")->valuestring);
|
strcpy(doc_id, cJSON_GetObjectItem(json, "_id")->valuestring);
|
||||||
cJSON_DeleteItemFromObject(json, "_id");
|
cJSON_DeleteItemFromObject(json, "_id");
|
||||||
|
|
||||||
// TODO: delete tag if empty
|
|
||||||
|
|
||||||
if (args->print) {
|
if (args->print) {
|
||||||
print_json(json, doc_id);
|
print_json(json, doc_id);
|
||||||
} else {
|
} else {
|
||||||
@ -348,7 +347,7 @@ void sist2_index(index_args_t *args) {
|
|||||||
tpool_destroy(IndexCtx.pool);
|
tpool_destroy(IndexCtx.pool);
|
||||||
|
|
||||||
if (IndexCtx.needs_es_connection) {
|
if (IndexCtx.needs_es_connection) {
|
||||||
finish_indexer(desc->id);
|
finish_indexer(args->script, args->async_script, desc->id);
|
||||||
}
|
}
|
||||||
free(desc);
|
free(desc);
|
||||||
}
|
}
|
||||||
@ -369,6 +368,25 @@ void sist2_sqlite_index(sqlite_index_args_t *args) {
|
|||||||
database_close(search_db, FALSE);
|
database_close(search_db, FALSE);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
void sist2_exec_script(exec_args_t *args) {
|
||||||
|
LogCtx.verbose = TRUE;
|
||||||
|
|
||||||
|
IndexCtx.es_url = args->es_url;
|
||||||
|
IndexCtx.es_index = args->es_index;
|
||||||
|
IndexCtx.es_insecure_ssl = args->es_insecure_ssl;
|
||||||
|
IndexCtx.needs_es_connection = TRUE;
|
||||||
|
|
||||||
|
database_t *db = database_create(args->index_path, INDEX_DATABASE);
|
||||||
|
database_open(db);
|
||||||
|
|
||||||
|
index_descriptor_t *desc = database_read_index_descriptor(db);
|
||||||
|
LOG_DEBUGF("main.c", "Index version %s", desc->version);
|
||||||
|
|
||||||
|
execute_update_script(args->script, args->async_script, desc->id);
|
||||||
|
free(args->script);
|
||||||
|
database_close(db, FALSE);
|
||||||
|
}
|
||||||
|
|
||||||
void sist2_web(web_args_t *args) {
|
void sist2_web(web_args_t *args) {
|
||||||
|
|
||||||
WebCtx.es_url = args->es_url;
|
WebCtx.es_url = args->es_url;
|
||||||
@ -447,6 +465,7 @@ int main(int argc, const char *argv[]) {
|
|||||||
scan_args_t *scan_args = scan_args_create();
|
scan_args_t *scan_args = scan_args_create();
|
||||||
index_args_t *index_args = index_args_create();
|
index_args_t *index_args = index_args_create();
|
||||||
web_args_t *web_args = web_args_create();
|
web_args_t *web_args = web_args_create();
|
||||||
|
exec_args_t *exec_args = exec_args_create();
|
||||||
sqlite_index_args_t *sqlite_index_args = sqlite_index_args_create();
|
sqlite_index_args_t *sqlite_index_args = sqlite_index_args_create();
|
||||||
|
|
||||||
int arg_version = 0;
|
int arg_version = 0;
|
||||||
@ -455,6 +474,7 @@ int main(int argc, const char *argv[]) {
|
|||||||
int common_es_insecure_ssl = 0;
|
int common_es_insecure_ssl = 0;
|
||||||
char *common_es_index = NULL;
|
char *common_es_index = NULL;
|
||||||
char *common_script_path = NULL;
|
char *common_script_path = NULL;
|
||||||
|
int common_async_script = 0;
|
||||||
int common_threads = 0;
|
int common_threads = 0;
|
||||||
int common_optimize_database = 0;
|
int common_optimize_database = 0;
|
||||||
char *common_search_index = NULL;
|
char *common_search_index = NULL;
|
||||||
@ -529,6 +549,7 @@ int main(int argc, const char *argv[]) {
|
|||||||
OPT_STRING(0, "script-file", &common_script_path, "Path to user script."),
|
OPT_STRING(0, "script-file", &common_script_path, "Path to user script."),
|
||||||
OPT_STRING(0, "mappings-file", &index_args->es_mappings_path, "Path to Elasticsearch mappings."),
|
OPT_STRING(0, "mappings-file", &index_args->es_mappings_path, "Path to Elasticsearch mappings."),
|
||||||
OPT_STRING(0, "settings-file", &index_args->es_settings_path, "Path to Elasticsearch settings."),
|
OPT_STRING(0, "settings-file", &index_args->es_settings_path, "Path to Elasticsearch settings."),
|
||||||
|
OPT_BOOLEAN(0, "async-script", &common_async_script, "Execute user script asynchronously."),
|
||||||
OPT_INTEGER(0, "batch-size", &index_args->batch_size, "Index batch size. DEFAULT: 70"),
|
OPT_INTEGER(0, "batch-size", &index_args->batch_size, "Index batch size. DEFAULT: 70"),
|
||||||
OPT_BOOLEAN('f', "force-reset", &index_args->force_reset, "Reset Elasticsearch mappings and settings."),
|
OPT_BOOLEAN('f', "force-reset", &index_args->force_reset, "Reset Elasticsearch mappings and settings."),
|
||||||
|
|
||||||
@ -539,6 +560,7 @@ int main(int argc, const char *argv[]) {
|
|||||||
OPT_STRING(0, "es-url", &common_es_url, "Elasticsearch url. DEFAULT: http://localhost:9200"),
|
OPT_STRING(0, "es-url", &common_es_url, "Elasticsearch url. DEFAULT: http://localhost:9200"),
|
||||||
OPT_BOOLEAN(0, "es-insecure-ssl", &common_es_insecure_ssl,
|
OPT_BOOLEAN(0, "es-insecure-ssl", &common_es_insecure_ssl,
|
||||||
"Do not verify SSL connections to Elasticsearch."),
|
"Do not verify SSL connections to Elasticsearch."),
|
||||||
|
// TODO: change arg name (?)
|
||||||
OPT_STRING(0, "search-index", &common_search_index, "Path to SQLite search index."),
|
OPT_STRING(0, "search-index", &common_search_index, "Path to SQLite search index."),
|
||||||
OPT_STRING(0, "es-index", &common_es_index, "Elasticsearch index name. DEFAULT: sist2"),
|
OPT_STRING(0, "es-index", &common_es_index, "Elasticsearch index name. DEFAULT: sist2"),
|
||||||
OPT_STRING(0, "bind", &web_args->listen_address,
|
OPT_STRING(0, "bind", &web_args->listen_address,
|
||||||
@ -554,6 +576,14 @@ int main(int argc, const char *argv[]) {
|
|||||||
OPT_BOOLEAN(0, "dev", &web_args->dev, "Serve html & js files from disk (for development)"),
|
OPT_BOOLEAN(0, "dev", &web_args->dev, "Serve html & js files from disk (for development)"),
|
||||||
OPT_STRING(0, "lang", &web_args->lang, "Default UI language. Can be changed by the user"),
|
OPT_STRING(0, "lang", &web_args->lang, "Default UI language. Can be changed by the user"),
|
||||||
|
|
||||||
|
OPT_GROUP("Exec-script options"),
|
||||||
|
OPT_STRING(0, "es-url", &common_es_url, "Elasticsearch url. DEFAULT: http://localhost:9200"),
|
||||||
|
OPT_BOOLEAN(0, "es-insecure-ssl", &common_es_insecure_ssl,
|
||||||
|
"Do not verify SSL connections to Elasticsearch."),
|
||||||
|
OPT_STRING(0, "es-index", &common_es_index, "Elasticsearch index name. DEFAULT: sist2"),
|
||||||
|
OPT_STRING(0, "script-file", &common_script_path, "Path to user script."),
|
||||||
|
OPT_BOOLEAN(0, "async-script", &common_async_script, "Execute user script asynchronously."),
|
||||||
|
|
||||||
OPT_END(),
|
OPT_END(),
|
||||||
};
|
};
|
||||||
|
|
||||||
@ -577,16 +607,22 @@ int main(int argc, const char *argv[]) {
|
|||||||
|
|
||||||
web_args->es_url = common_es_url;
|
web_args->es_url = common_es_url;
|
||||||
index_args->es_url = common_es_url;
|
index_args->es_url = common_es_url;
|
||||||
|
exec_args->es_url = common_es_url;
|
||||||
|
|
||||||
web_args->es_index = common_es_index;
|
web_args->es_index = common_es_index;
|
||||||
index_args->es_index = common_es_index;
|
index_args->es_index = common_es_index;
|
||||||
|
exec_args->es_index = common_es_index;
|
||||||
|
|
||||||
web_args->es_insecure_ssl = common_es_insecure_ssl;
|
web_args->es_insecure_ssl = common_es_insecure_ssl;
|
||||||
index_args->es_insecure_ssl = common_es_insecure_ssl;
|
index_args->es_insecure_ssl = common_es_insecure_ssl;
|
||||||
|
exec_args->es_insecure_ssl = common_es_insecure_ssl;
|
||||||
|
|
||||||
index_args->script_path = common_script_path;
|
index_args->script_path = common_script_path;
|
||||||
|
exec_args->script_path = common_script_path;
|
||||||
index_args->threads = common_threads;
|
index_args->threads = common_threads;
|
||||||
scan_args->threads = common_threads;
|
scan_args->threads = common_threads;
|
||||||
|
exec_args->async_script = common_async_script;
|
||||||
|
index_args->async_script = common_async_script;
|
||||||
|
|
||||||
scan_args->optimize_database = common_optimize_database;
|
scan_args->optimize_database = common_optimize_database;
|
||||||
|
|
||||||
@ -628,6 +664,14 @@ int main(int argc, const char *argv[]) {
|
|||||||
}
|
}
|
||||||
sist2_web(web_args);
|
sist2_web(web_args);
|
||||||
|
|
||||||
|
} else if (strcmp(argv[0], "exec-script") == 0) {
|
||||||
|
|
||||||
|
int err = exec_args_validate(exec_args, argc, argv);
|
||||||
|
if (err != 0) {
|
||||||
|
goto end;
|
||||||
|
}
|
||||||
|
sist2_exec_script(exec_args);
|
||||||
|
|
||||||
} else {
|
} else {
|
||||||
argparse_usage(&argparse);
|
argparse_usage(&argparse);
|
||||||
LOG_FATALF("main.c", "Invalid command: '%s'\n", argv[0]);
|
LOG_FATALF("main.c", "Invalid command: '%s'\n", argv[0]);
|
||||||
@ -638,6 +682,7 @@ int main(int argc, const char *argv[]) {
|
|||||||
scan_args_destroy(scan_args);
|
scan_args_destroy(scan_args);
|
||||||
index_args_destroy(index_args);
|
index_args_destroy(index_args);
|
||||||
web_args_destroy(web_args);
|
web_args_destroy(web_args);
|
||||||
|
exec_args_destroy(exec_args);
|
||||||
sqlite_index_args_destroy(sqlite_index_args);
|
sqlite_index_args_destroy(sqlite_index_args);
|
||||||
|
|
||||||
return 0;
|
return 0;
|
||||||
|
@ -16,7 +16,6 @@ typedef struct {
|
|||||||
|
|
||||||
typedef struct tpool {
|
typedef struct tpool {
|
||||||
pthread_t threads[256];
|
pthread_t threads[256];
|
||||||
void *start_thread_args[256];
|
|
||||||
int num_threads;
|
int num_threads;
|
||||||
|
|
||||||
int print_progress;
|
int print_progress;
|
||||||
@ -294,8 +293,6 @@ void tpool_destroy(tpool_t *pool) {
|
|||||||
void *_;
|
void *_;
|
||||||
pthread_join(thread, &_);
|
pthread_join(thread, &_);
|
||||||
}
|
}
|
||||||
|
|
||||||
free(pool->start_thread_args[i]);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
pthread_mutex_destroy(&pool->shm->ipc_ctx.mutex);
|
pthread_mutex_destroy(&pool->shm->ipc_ctx.mutex);
|
||||||
@ -323,7 +320,6 @@ tpool_t *tpool_create(int thread_cnt, int print_progress) {
|
|||||||
pool->shm->waiting = FALSE;
|
pool->shm->waiting = FALSE;
|
||||||
pool->shm->job_type = JOB_UNDEFINED;
|
pool->shm->job_type = JOB_UNDEFINED;
|
||||||
memset(pool->threads, 0, sizeof(pool->threads));
|
memset(pool->threads, 0, sizeof(pool->threads));
|
||||||
memset(pool->start_thread_args, 0, sizeof(pool->start_thread_args));
|
|
||||||
pool->print_progress = print_progress;
|
pool->print_progress = print_progress;
|
||||||
sprintf(pool->shm->ipc_database_filepath, "/dev/shm/sist2-ipc-%d.sqlite", getpid());
|
sprintf(pool->shm->ipc_database_filepath, "/dev/shm/sist2-ipc-%d.sqlite", getpid());
|
||||||
|
|
||||||
@ -365,7 +361,6 @@ void tpool_start(tpool_t *pool) {
|
|||||||
arg->pool = pool;
|
arg->pool = pool;
|
||||||
|
|
||||||
pthread_create(&pool->threads[i], NULL, tpool_worker, arg);
|
pthread_create(&pool->threads[i], NULL, tpool_worker, arg);
|
||||||
pool->start_thread_args[i] = arg;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
// Only open the database when all workers are done initializing
|
// Only open the database when all workers are done initializing
|
||||||
|
@ -87,7 +87,7 @@ static void buf2hex(const unsigned char *buf, size_t buflen, char *hex_string) {
|
|||||||
*s = '\0';
|
*s = '\0';
|
||||||
}
|
}
|
||||||
|
|
||||||
static void md5_hexdigest(const void *data, size_t size, char *output) {
|
static void md5_hexdigest(void *data, size_t size, char *output) {
|
||||||
EVP_MD_CTX *md_ctx = EVP_MD_CTX_new();
|
EVP_MD_CTX *md_ctx = EVP_MD_CTX_new();
|
||||||
EVP_DigestInit_ex(md_ctx, EVP_md5(), NULL);
|
EVP_DigestInit_ex(md_ctx, EVP_md5(), NULL);
|
||||||
|
|
||||||
@ -120,7 +120,7 @@ struct timespec timespec_add(struct timespec ts1, long usec);
|
|||||||
#define pthread_cond_timedwait_ms(cond, mutex, delay_ms) do {\
|
#define pthread_cond_timedwait_ms(cond, mutex, delay_ms) do {\
|
||||||
struct timespec now; \
|
struct timespec now; \
|
||||||
clock_gettime(CLOCK_REALTIME, &now); \
|
clock_gettime(CLOCK_REALTIME, &now); \
|
||||||
struct timespec end_time = timespec_add(now, MILLISECOND * (delay_ms)); \
|
struct timespec end_time = timespec_add(now, MILLISECOND * delay_ms); \
|
||||||
pthread_cond_timedwait(cond, mutex, &end_time); \
|
pthread_cond_timedwait(cond, mutex, &end_time); \
|
||||||
} while (0)
|
} while (0)
|
||||||
|
|
||||||
|
@ -28,60 +28,9 @@ static struct mg_http_serve_opts DefaultServeOpts = {
|
|||||||
.fs = NULL,
|
.fs = NULL,
|
||||||
.ssi_pattern = NULL,
|
.ssi_pattern = NULL,
|
||||||
.root_dir = NULL,
|
.root_dir = NULL,
|
||||||
.mime_types = HTTP_SERVER_HEADER
|
.mime_types = ""
|
||||||
};
|
};
|
||||||
|
|
||||||
static struct mg_http_serve_opts IndexServeOpts = {
|
|
||||||
.fs = NULL,
|
|
||||||
.ssi_pattern = NULL,
|
|
||||||
.root_dir = NULL,
|
|
||||||
.mime_types = "",
|
|
||||||
.extra_headers = HTTP_SERVER_HEADER HTTP_CROSS_ORIGIN_HEADERS
|
|
||||||
};
|
|
||||||
|
|
||||||
void get_embedding(struct mg_connection *nc, struct mg_http_message *hm) {
|
|
||||||
|
|
||||||
if (WebCtx.search_backend == ES_SEARCH_BACKEND && WebCtx.es_version != NULL && !HAS_KNN(WebCtx.es_version)) {
|
|
||||||
LOG_WARNINGF("serve.c",
|
|
||||||
"Your Elasticsearch version (%d.%d.%d) does not support approximate kNN search and will"
|
|
||||||
" fallback to a brute-force search. Please install ES 8.x.x+ for better search performance.",
|
|
||||||
WebCtx.es_version->major, WebCtx.es_version->minor, WebCtx.es_version->patch);
|
|
||||||
}
|
|
||||||
|
|
||||||
if (hm->uri.len != SIST_INDEX_ID_LEN + SIST_DOC_ID_LEN + 2 + 4) {
|
|
||||||
LOG_DEBUGF("serve.c", "Invalid thumbnail path: %.*s", (int) hm->uri.len, hm->uri.ptr);
|
|
||||||
HTTP_REPLY_NOT_FOUND
|
|
||||||
return;
|
|
||||||
}
|
|
||||||
|
|
||||||
char doc_id[SIST_DOC_ID_LEN];
|
|
||||||
char index_id[SIST_INDEX_ID_LEN];
|
|
||||||
|
|
||||||
memcpy(index_id, hm->uri.ptr + 3, SIST_INDEX_ID_LEN);
|
|
||||||
*(index_id + SIST_INDEX_ID_LEN - 1) = '\0';
|
|
||||||
memcpy(doc_id, hm->uri.ptr + 3 + SIST_INDEX_ID_LEN, SIST_DOC_ID_LEN);
|
|
||||||
*(doc_id + SIST_DOC_ID_LEN - 1) = '\0';
|
|
||||||
|
|
||||||
int model_id = (int) strtol(hm->uri.ptr + SIST_INDEX_ID_LEN + SIST_DOC_ID_LEN + 3, NULL, 10);
|
|
||||||
|
|
||||||
database_t *db = web_get_database(index_id);
|
|
||||||
if (db == NULL) {
|
|
||||||
LOG_DEBUGF("serve.c", "Could not get database for index: %s", index_id);
|
|
||||||
HTTP_REPLY_NOT_FOUND
|
|
||||||
return;
|
|
||||||
}
|
|
||||||
|
|
||||||
cJSON *json = database_get_embedding(db, doc_id, model_id);
|
|
||||||
|
|
||||||
if (json == NULL) {
|
|
||||||
HTTP_REPLY_NOT_FOUND
|
|
||||||
return;
|
|
||||||
}
|
|
||||||
|
|
||||||
mg_send_json(nc, json);
|
|
||||||
cJSON_Delete(json);
|
|
||||||
}
|
|
||||||
|
|
||||||
void stats_files(struct mg_connection *nc, struct mg_http_message *hm) {
|
void stats_files(struct mg_connection *nc, struct mg_http_message *hm) {
|
||||||
|
|
||||||
if (hm->uri.len != SIST_INDEX_ID_LEN + 7) {
|
if (hm->uri.len != SIST_INDEX_ID_LEN + 7) {
|
||||||
@ -118,7 +67,7 @@ void stats_files(struct mg_connection *nc, struct mg_http_message *hm) {
|
|||||||
|
|
||||||
void serve_index_html(struct mg_connection *nc, struct mg_http_message *hm) {
|
void serve_index_html(struct mg_connection *nc, struct mg_http_message *hm) {
|
||||||
if (WebCtx.dev) {
|
if (WebCtx.dev) {
|
||||||
mg_http_serve_file(nc, hm, "sist2-vue/dist/index.html", &IndexServeOpts);
|
mg_http_serve_file(nc, hm, "sist2-vue/dist/index.html", &DefaultServeOpts);
|
||||||
} else {
|
} else {
|
||||||
web_serve_asset_index_html(nc);
|
web_serve_asset_index_html(nc);
|
||||||
}
|
}
|
||||||
@ -359,7 +308,6 @@ void index_info(struct mg_connection *nc) {
|
|||||||
|
|
||||||
cJSON_AddBoolToObject(json, "esVersionSupported", IS_SUPPORTED_ES_VERSION(WebCtx.es_version));
|
cJSON_AddBoolToObject(json, "esVersionSupported", IS_SUPPORTED_ES_VERSION(WebCtx.es_version));
|
||||||
cJSON_AddBoolToObject(json, "esVersionLegacy", IS_LEGACY_VERSION(WebCtx.es_version));
|
cJSON_AddBoolToObject(json, "esVersionLegacy", IS_LEGACY_VERSION(WebCtx.es_version));
|
||||||
cJSON_AddBoolToObject(json, "esVersionHasKnn", HAS_KNN(WebCtx.es_version));
|
|
||||||
cJSON_AddStringToObject(json, "lang", WebCtx.lang);
|
cJSON_AddStringToObject(json, "lang", WebCtx.lang);
|
||||||
|
|
||||||
cJSON_AddBoolToObject(json, "auth0Enabled", WebCtx.auth0_enabled);
|
cJSON_AddBoolToObject(json, "auth0Enabled", WebCtx.auth0_enabled);
|
||||||
@ -386,9 +334,6 @@ void index_info(struct mg_connection *nc) {
|
|||||||
cJSON_AddStringToObject(idx_json, "rewriteUrl", idx->desc.rewrite_url);
|
cJSON_AddStringToObject(idx_json, "rewriteUrl", idx->desc.rewrite_url);
|
||||||
cJSON_AddNumberToObject(idx_json, "timestamp", (double) idx->desc.timestamp);
|
cJSON_AddNumberToObject(idx_json, "timestamp", (double) idx->desc.timestamp);
|
||||||
cJSON_AddItemToArray(arr, idx_json);
|
cJSON_AddItemToArray(arr, idx_json);
|
||||||
|
|
||||||
cJSON *models = database_get_models(idx->db);
|
|
||||||
cJSON_AddItemToObject(idx_json, "models", models);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
if (WebCtx.search_backend == SQLITE_SEARCH_BACKEND) {
|
if (WebCtx.search_backend == SQLITE_SEARCH_BACKEND) {
|
||||||
@ -752,9 +697,6 @@ static void ev_router(struct mg_connection *nc, int ev, void *ev_data, UNUSED(vo
|
|||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
tag(nc, hm);
|
tag(nc, hm);
|
||||||
} else if (mg_http_match_uri(hm, "/e/*/*/*")) {
|
|
||||||
get_embedding(nc, hm);
|
|
||||||
return;
|
|
||||||
} else {
|
} else {
|
||||||
HTTP_REPLY_NOT_FOUND
|
HTTP_REPLY_NOT_FOUND
|
||||||
}
|
}
|
||||||
|
@ -32,9 +32,6 @@ typedef struct {
|
|||||||
int fetch_aggregations;
|
int fetch_aggregations;
|
||||||
int highlight;
|
int highlight;
|
||||||
int highlight_context_size;
|
int highlight_context_size;
|
||||||
int model;
|
|
||||||
float *embedding;
|
|
||||||
int embedding_size;
|
|
||||||
} fts_search_req_t;
|
} fts_search_req_t;
|
||||||
|
|
||||||
fts_sort_t get_sort_mode(const cJSON *req_sort) {
|
fts_sort_t get_sort_mode(const cJSON *req_sort) {
|
||||||
@ -48,27 +45,11 @@ fts_sort_t get_sort_mode(const cJSON *req_sort) {
|
|||||||
return FTS_SORT_RANDOM;
|
return FTS_SORT_RANDOM;
|
||||||
} else if (strcmp(req_sort->valuestring, "name") == 0) {
|
} else if (strcmp(req_sort->valuestring, "name") == 0) {
|
||||||
return FTS_SORT_NAME;
|
return FTS_SORT_NAME;
|
||||||
} else if (strcmp(req_sort->valuestring, "embedding") == 0) {
|
|
||||||
return FTS_SORT_EMBEDDING;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
return FTS_SORT_INVALID;
|
return FTS_SORT_INVALID;
|
||||||
}
|
}
|
||||||
|
|
||||||
float *get_float_buffer(cJSON *arr, int *size) {
|
|
||||||
*size = cJSON_GetArraySize(arr);
|
|
||||||
|
|
||||||
float *floats = malloc(sizeof(float) * *size);
|
|
||||||
|
|
||||||
cJSON *elem;
|
|
||||||
int i = 0;
|
|
||||||
cJSON_ArrayForEach(elem, arr) {
|
|
||||||
floats[i] = (float) elem->valuedouble;
|
|
||||||
i += 1;
|
|
||||||
}
|
|
||||||
|
|
||||||
return floats;
|
|
||||||
}
|
|
||||||
|
|
||||||
static json_value get_json_string(cJSON *object, const char *name) {
|
static json_value get_json_string(cJSON *object, const char *name) {
|
||||||
|
|
||||||
@ -108,25 +89,6 @@ static json_value get_json_bool(cJSON *object, const char *name) {
|
|||||||
return (json_value) {item, FALSE};
|
return (json_value) {item, FALSE};
|
||||||
}
|
}
|
||||||
|
|
||||||
static json_value get_json_float_array(cJSON *object, const char *name) {
|
|
||||||
cJSON *item = cJSON_GetObjectItem(object, name);
|
|
||||||
if (item == NULL || cJSON_IsNull(item)) {
|
|
||||||
return (json_value) {NULL, FALSE};
|
|
||||||
}
|
|
||||||
if (!cJSON_IsArray(item) || cJSON_GetArraySize(item) == 0) {
|
|
||||||
return (json_value) {NULL, TRUE};
|
|
||||||
}
|
|
||||||
|
|
||||||
cJSON *elem;
|
|
||||||
cJSON_ArrayForEach(elem, item) {
|
|
||||||
if (!cJSON_IsNumber(elem)) {
|
|
||||||
return (json_value) {NULL, TRUE};
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
return (json_value) {item, FALSE};
|
|
||||||
}
|
|
||||||
|
|
||||||
static json_value get_json_array(cJSON *object, const char *name) {
|
static json_value get_json_array(cJSON *object, const char *name) {
|
||||||
cJSON *item = cJSON_GetObjectItem(object, name);
|
cJSON *item = cJSON_GetObjectItem(object, name);
|
||||||
if (item == NULL || cJSON_IsNull(item)) {
|
if (item == NULL || cJSON_IsNull(item)) {
|
||||||
@ -169,7 +131,7 @@ fts_search_req_t *get_search_req(struct mg_http_message *hm) {
|
|||||||
|
|
||||||
json_value req_query, req_path, req_size_min, req_size_max, req_date_min, req_date_max, req_page_size,
|
json_value req_query, req_path, req_size_min, req_size_max, req_date_min, req_date_max, req_page_size,
|
||||||
req_index_ids, req_mime_types, req_tags, req_sort_asc, req_sort, req_seed, req_after,
|
req_index_ids, req_mime_types, req_tags, req_sort_asc, req_sort, req_seed, req_after,
|
||||||
req_fetch_aggregations, req_highlight, req_highlight_context_size, req_embedding, req_model;
|
req_fetch_aggregations, req_highlight, req_highlight_context_size;
|
||||||
|
|
||||||
if (!cJSON_IsObject(json) ||
|
if (!cJSON_IsObject(json) ||
|
||||||
(req_query = get_json_string(json, "query")).invalid ||
|
(req_query = get_json_string(json, "query")).invalid ||
|
||||||
@ -188,8 +150,6 @@ fts_search_req_t *get_search_req(struct mg_http_message *hm) {
|
|||||||
(req_mime_types = get_json_array(json, "mimeTypes")).invalid ||
|
(req_mime_types = get_json_array(json, "mimeTypes")).invalid ||
|
||||||
(req_highlight = get_json_bool(json, "highlight")).invalid ||
|
(req_highlight = get_json_bool(json, "highlight")).invalid ||
|
||||||
(req_highlight_context_size = get_json_number(json, "highlightContextSize")).invalid ||
|
(req_highlight_context_size = get_json_number(json, "highlightContextSize")).invalid ||
|
||||||
(req_embedding = get_json_float_array(json, "embedding")).invalid ||
|
|
||||||
(req_model = get_json_number(json, "model")).invalid ||
|
|
||||||
(req_tags = get_json_array(json, "tags")).invalid) {
|
(req_tags = get_json_array(json, "tags")).invalid) {
|
||||||
cJSON_Delete(json);
|
cJSON_Delete(json);
|
||||||
return NULL;
|
return NULL;
|
||||||
@ -230,11 +190,7 @@ fts_search_req_t *get_search_req(struct mg_http_message *hm) {
|
|||||||
cJSON_Delete(json);
|
cJSON_Delete(json);
|
||||||
return NULL;
|
return NULL;
|
||||||
}
|
}
|
||||||
if (req_highlight_context_size.val && req_highlight_context_size.val->valueint < 0) {
|
if (req_highlight_context_size.val->valueint < 0) {
|
||||||
cJSON_Delete(json);
|
|
||||||
return NULL;
|
|
||||||
}
|
|
||||||
if (req_model.val && !req_embedding.val || !req_model.val && req_embedding.val) {
|
|
||||||
cJSON_Delete(json);
|
cJSON_Delete(json);
|
||||||
return NULL;
|
return NULL;
|
||||||
}
|
}
|
||||||
@ -260,10 +216,6 @@ fts_search_req_t *get_search_req(struct mg_http_message *hm) {
|
|||||||
req->highlight_context_size = req_highlight_context_size.val
|
req->highlight_context_size = req_highlight_context_size.val
|
||||||
? req_highlight_context_size.val->valueint
|
? req_highlight_context_size.val->valueint
|
||||||
: DEFAULT_HIGHLIGHT_CONTEXT_SIZE;
|
: DEFAULT_HIGHLIGHT_CONTEXT_SIZE;
|
||||||
req->model = req_model.val ? req_model.val->valueint : 0;
|
|
||||||
req->embedding = req_model.val
|
|
||||||
? get_float_buffer(req_embedding.val, &req->embedding_size)
|
|
||||||
: NULL;
|
|
||||||
|
|
||||||
cJSON_Delete(json);
|
cJSON_Delete(json);
|
||||||
|
|
||||||
@ -286,10 +238,6 @@ void destroy_search_req(fts_search_req_t *req) {
|
|||||||
destroy_array(req->mime_types);
|
destroy_array(req->mime_types);
|
||||||
destroy_array(req->tags);
|
destroy_array(req->tags);
|
||||||
|
|
||||||
if (req->embedding) {
|
|
||||||
free(req->embedding);
|
|
||||||
}
|
|
||||||
|
|
||||||
free(req);
|
free(req);
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -383,13 +331,7 @@ void fts_search(struct mg_connection *nc, struct mg_http_message *hm) {
|
|||||||
req->page_size, req->index_ids, req->mime_types,
|
req->page_size, req->index_ids, req->mime_types,
|
||||||
req->tags, req->sort_asc, req->sort, req->seed,
|
req->tags, req->sort_asc, req->sort, req->seed,
|
||||||
req->after, req->fetch_aggregations, req->highlight,
|
req->after, req->fetch_aggregations, req->highlight,
|
||||||
req->highlight_context_size, req->model,
|
req->highlight_context_size);
|
||||||
req->embedding, req->embedding_size);
|
|
||||||
|
|
||||||
if (json == NULL) {
|
|
||||||
HTTP_REPLY_BAD_REQUEST
|
|
||||||
return;
|
|
||||||
}
|
|
||||||
|
|
||||||
destroy_search_req(req);
|
destroy_search_req(req);
|
||||||
mg_send_json(nc, json);
|
mg_send_json(nc, json);
|
||||||
|
@ -3,7 +3,7 @@
|
|||||||
|
|
||||||
|
|
||||||
void web_serve_asset_index_html(struct mg_connection *nc) {
|
void web_serve_asset_index_html(struct mg_connection *nc) {
|
||||||
web_send_headers(nc, 200, sizeof(index_html), HTTP_CROSS_ORIGIN_HEADERS "Content-Type: text/html");
|
web_send_headers(nc, 200, sizeof(index_html), "Content-Type: text/html");
|
||||||
mg_send(nc, index_html, sizeof(index_html));
|
mg_send(nc, index_html, sizeof(index_html));
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -7,8 +7,6 @@
|
|||||||
#include <mongoose.h>
|
#include <mongoose.h>
|
||||||
|
|
||||||
#define HTTP_SERVER_HEADER "Server: sist2/" VERSION "\r\n"
|
#define HTTP_SERVER_HEADER "Server: sist2/" VERSION "\r\n"
|
||||||
// See https://web.dev/coop-coep/
|
|
||||||
#define HTTP_CROSS_ORIGIN_HEADERS "Cross-Origin-Embedder-Policy: require-corp\r\nCross-Origin-Opener-Policy: same-origin\r\n"
|
|
||||||
|
|
||||||
index_t *web_get_index_by_id(const char *index_id);
|
index_t *web_get_index_by_id(const char *index_id);
|
||||||
|
|
||||||
|
Loading…
x
Reference in New Issue
Block a user