mirror of
https://github.com/simon987/sist2.git
synced 2025-12-12 15:08:53 +00:00
Compare commits
10 Commits
d44679131b
...
fix/index-
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
e57fdc1fe8 | ||
|
|
8bb12f8ae2 | ||
|
|
670dad185e | ||
|
|
bbbd727e6a | ||
|
|
d800effad9 | ||
|
|
371e9c408e | ||
|
|
ee1b1d8bb4 | ||
|
|
63a097a463 | ||
|
|
7a03a2202e | ||
|
|
050fc500ce |
@@ -89,7 +89,7 @@ target_include_directories(
|
|||||||
target_compile_options(
|
target_compile_options(
|
||||||
sist2
|
sist2
|
||||||
PRIVATE
|
PRIVATE
|
||||||
-fPIC
|
# -fPIC
|
||||||
)
|
)
|
||||||
|
|
||||||
if (SIST_DEBUG)
|
if (SIST_DEBUG)
|
||||||
@@ -147,7 +147,7 @@ add_dependencies(
|
|||||||
target_link_libraries(
|
target_link_libraries(
|
||||||
sist2
|
sist2
|
||||||
|
|
||||||
m
|
# m
|
||||||
z
|
z
|
||||||
argparse
|
argparse
|
||||||
unofficial::mongoose::mongoose
|
unofficial::mongoose::mongoose
|
||||||
|
|||||||
34
README.md
34
README.md
@@ -1,5 +1,5 @@
|
|||||||

|

|
||||||
[](https://www.codefactor.io/repository/github/simon987/sist2)
|
[](https://www.codefactor.io/repository/github/sist2app/sist2)
|
||||||
[](https://files.simon987.net/.gate/sist2/simon987_sist2/)
|
[](https://files.simon987.net/.gate/sist2/simon987_sist2/)
|
||||||
|
|
||||||
**Demo**: [sist2.simon987.net](https://sist2.simon987.net/)
|
**Demo**: [sist2.simon987.net](https://sist2.simon987.net/)
|
||||||
@@ -38,8 +38,6 @@ sist2 (Simple incremental search tool)
|
|||||||
### Using Docker Compose *(Windows/Linux/Mac)*
|
### Using Docker Compose *(Windows/Linux/Mac)*
|
||||||
|
|
||||||
```yaml
|
```yaml
|
||||||
version: "3"
|
|
||||||
|
|
||||||
services:
|
services:
|
||||||
elasticsearch:
|
elasticsearch:
|
||||||
image: elasticsearch:7.17.9
|
image: elasticsearch:7.17.9
|
||||||
@@ -53,7 +51,7 @@ services:
|
|||||||
- "PUID=1000"
|
- "PUID=1000"
|
||||||
- "PGID=1000"
|
- "PGID=1000"
|
||||||
sist2-admin:
|
sist2-admin:
|
||||||
image: simon987/sist2:3.4.2-x64-linux
|
image: sist2app/sist2:x64-linux
|
||||||
restart: unless-stopped
|
restart: unless-stopped
|
||||||
volumes:
|
volumes:
|
||||||
- /data/sist2-admin-data/:/sist2-admin/
|
- /data/sist2-admin-data/:/sist2-admin/
|
||||||
@@ -81,7 +79,7 @@ Navigate to http://localhost:8080/ to configure sist2-admin.
|
|||||||
```
|
```
|
||||||
* **SQLite**: No installation required
|
* **SQLite**: No installation required
|
||||||
|
|
||||||
2. Download the [latest sist2 release](https://github.com/simon987/sist2/releases).
|
2. Download the [latest sist2 release](https://github.com/sist2app/sist2/releases).
|
||||||
Select the file corresponding to your CPU architecture and mark the binary as executable with `chmod +x`.
|
Select the file corresponding to your CPU architecture and mark the binary as executable with `chmod +x`.
|
||||||
3. See [usage guide](docs/USAGE.md) for command line usage.
|
3. See [usage guide](docs/USAGE.md) for command line usage.
|
||||||
|
|
||||||
@@ -100,20 +98,20 @@ Example usage:
|
|||||||
| File type | Library | Content | Thumbnail | Metadata |
|
| File type | Library | Content | Thumbnail | Metadata |
|
||||||
|:--------------------------------------------------------------------------|:-----------------------------------------------------------------------------|:---------|:------------|:---------------------------------------------------------------------------------------------------------------------------------------|
|
|:--------------------------------------------------------------------------|:-----------------------------------------------------------------------------|:---------|:------------|:---------------------------------------------------------------------------------------------------------------------------------------|
|
||||||
| pdf,xps,fb2,epub | MuPDF | text+ocr | yes | author, title |
|
| pdf,xps,fb2,epub | MuPDF | text+ocr | yes | author, title |
|
||||||
| cbz,cbr | [libscan](https://github.com/simon987/sist2/tree/master/third-party/libscan) | - | yes | - |
|
| cbz,cbr | [libscan](https://github.com/sist2app/sist2/tree/master/third-party/libscan) | - | yes | - |
|
||||||
| `audio/*` | ffmpeg | - | yes | ID3 tags |
|
| `audio/*` | ffmpeg | - | yes | ID3 tags |
|
||||||
| `video/*` | ffmpeg | - | yes | title, comment, artist |
|
| `video/*` | ffmpeg | - | yes | title, comment, artist |
|
||||||
| `image/*` | ffmpeg | ocr | yes | [Common EXIF tags](https://github.com/simon987/sist2/blob/efdde2734eca9b14a54f84568863b7ffd59bdba3/src/parsing/media.c#L190), GPS tags |
|
| `image/*` | ffmpeg | ocr | yes | [Common EXIF tags](https://github.com/sist2app/sist2/blob/efdde2734eca9b14a54f84568863b7ffd59bdba3/src/parsing/media.c#L190), GPS tags |
|
||||||
| raw, rw2, dng, cr2, crw, dcr, k25, kdc, mrw, pef, xf3, arw, sr2, srf, erf | LibRaw | no | yes | Common EXIF tags, GPS tags |
|
| raw, rw2, dng, cr2, crw, dcr, k25, kdc, mrw, pef, xf3, arw, sr2, srf, erf | LibRaw | no | yes | Common EXIF tags, GPS tags |
|
||||||
| ttf,ttc,cff,woff,fnt,otf | Freetype2 | - | yes, `bmp` | Name & style |
|
| ttf,ttc,cff,woff,fnt,otf | Freetype2 | - | yes, `bmp` | Name & style |
|
||||||
| `text/plain` | [libscan](https://github.com/simon987/sist2/tree/master/third-party/libscan) | yes | no | - |
|
| `text/plain` | [libscan](https://github.com/sist2app/sist2/tree/master/third-party/libscan) | yes | no | - |
|
||||||
| html, xml | [libscan](https://github.com/simon987/sist2/tree/master/third-party/libscan) | yes | no | - |
|
| html, xml | [libscan](https://github.com/sist2app/sist2/tree/master/third-party/libscan) | yes | no | - |
|
||||||
| tar, zip, rar, 7z, ar ... | Libarchive | yes\* | - | no |
|
| tar, zip, rar, 7z, ar ... | Libarchive | yes\* | - | no |
|
||||||
| docx, xlsx, pptx | [libscan](https://github.com/simon987/sist2/tree/master/third-party/libscan) | yes | if embedded | creator, modified_by, title |
|
| docx, xlsx, pptx | [libscan](https://github.com/sist2app/sist2/tree/master/third-party/libscan) | yes | if embedded | creator, modified_by, title |
|
||||||
| doc (MS Word 97-2003) | antiword | yes | no | author, title |
|
| doc (MS Word 97-2003) | antiword | yes | no | author, title |
|
||||||
| mobi, azw, azw3 | libmobi | yes | yes | author, title |
|
| mobi, azw, azw3 | libmobi | yes | yes | author, title |
|
||||||
| wpd (WordPerfect) | libwpd | yes | no | *planned* |
|
| wpd (WordPerfect) | libwpd | yes | no | *planned* |
|
||||||
| json, jsonl, ndjson | [libscan](https://github.com/simon987/sist2/tree/master/third-party/libscan) | yes | - | - |
|
| json, jsonl, ndjson | [libscan](https://github.com/sist2app/sist2/tree/master/third-party/libscan) | yes | - | - |
|
||||||
|
|
||||||
\* *See [Archive files](#archive-files)*
|
\* *See [Archive files](#archive-files)*
|
||||||
|
|
||||||
@@ -137,7 +135,7 @@ You can enable OCR support for ebook (pdf,xps,fb2,epub) or image file types with
|
|||||||
Download the language data files with your package manager (`apt install tesseract-ocr-eng`) or
|
Download the language data files with your package manager (`apt install tesseract-ocr-eng`) or
|
||||||
directly [from Github](https://github.com/tesseract-ocr/tesseract/wiki/Data-Files).
|
directly [from Github](https://github.com/tesseract-ocr/tesseract/wiki/Data-Files).
|
||||||
|
|
||||||
The `simon987/sist2` image comes with common languages
|
The `sist2app/sist2` image comes with common languages
|
||||||
(hin, jpn, eng, fra, rus, spa, chi_sim, deu, pol) pre-installed.
|
(hin, jpn, eng, fra, rus, spa, chi_sim, deu, pol) pre-installed.
|
||||||
|
|
||||||
You can use the `+` separator to specify multiple languages. The language
|
You can use the `+` separator to specify multiple languages. The language
|
||||||
@@ -177,13 +175,13 @@ sist2 v3.0.4+ supports named-entity recognition (NER). Simply add a supported re
|
|||||||
to enable it.
|
to enable it.
|
||||||
|
|
||||||
The text processing is done in your browser, no data is sent to any third-party services.
|
The text processing is done in your browser, no data is sent to any third-party services.
|
||||||
See [simon987/sist2-ner-models](https://github.com/simon987/sist2-ner-models) for more details.
|
See [sist2app/sist2-ner-models](https://github.com/sist2app/sist2-ner-models) for more details.
|
||||||
|
|
||||||
#### List of available repositories:
|
#### List of available repositories:
|
||||||
|
|
||||||
| URL | Maintainer | Purpose |
|
| URL | Maintainer | Purpose |
|
||||||
|---------------------------------------------------------------------------------------------------------|-----------------------------------------|---------|
|
|---------------------------------------------------------------------------------------------------------|-----------------------------------------|---------|
|
||||||
| [simon987/sist2-ner-models](https://raw.githubusercontent.com/simon987/sist2-ner-models/main/repo.json) | [simon987](https://github.com/simon987) | General |
|
| [sist2app/sist2-ner-models](https://raw.githubusercontent.com/sist2app/sist2-ner-models/main/repo.json) | [sist2app](https://github.com/sist2app) | General |
|
||||||
|
|
||||||
<details>
|
<details>
|
||||||
<summary>Screenshot</summary>
|
<summary>Screenshot</summary>
|
||||||
@@ -199,7 +197,7 @@ You can compile **sist2** by yourself if you don't want to use the pre-compiled
|
|||||||
### Using docker
|
### Using docker
|
||||||
|
|
||||||
```bash
|
```bash
|
||||||
git clone --recursive https://github.com/simon987/sist2/
|
git clone --recursive https://github.com/sist2app/sist2/
|
||||||
cd sist2
|
cd sist2
|
||||||
docker build . -t my-sist2-image
|
docker build . -t my-sist2-image
|
||||||
# Copy sist2 executable from docker image
|
# Copy sist2 executable from docker image
|
||||||
@@ -214,7 +212,7 @@ docker run --rm --entrypoint cat my-sist2-image /root/sist2 > sist2-x64-linux
|
|||||||
apt install gcc g++ python3 yasm ragel automake autotools-dev wget libtool libssl-dev curl zip unzip tar xorg-dev libglu1-mesa-dev libxcursor-dev libxml2-dev libxinerama-dev gettext nasm git nodejs
|
apt install gcc g++ python3 yasm ragel automake autotools-dev wget libtool libssl-dev curl zip unzip tar xorg-dev libglu1-mesa-dev libxcursor-dev libxml2-dev libxinerama-dev gettext nasm git nodejs
|
||||||
```
|
```
|
||||||
|
|
||||||
2. Install vcpkg using my fork: https://github.com/simon987/vcpkg
|
2. Install vcpkg using my fork: https://github.com/sist2app/vcpkg
|
||||||
3. Install vcpkg dependencies
|
3. Install vcpkg dependencies
|
||||||
|
|
||||||
```bash
|
```bash
|
||||||
@@ -223,7 +221,7 @@ docker run --rm --entrypoint cat my-sist2-image /root/sist2 > sist2-x64-linux
|
|||||||
|
|
||||||
4. Build
|
4. Build
|
||||||
```bash
|
```bash
|
||||||
git clone --recursive https://github.com/simon987/sist2/
|
git clone --recursive https://github.com/sist2app/sist2/
|
||||||
(cd sist2-vue; npm install; npm run build)
|
(cd sist2-vue; npm install; npm run build)
|
||||||
(cd sist2-admin/frontend; npm install; npm run build)
|
(cd sist2-admin/frontend; npm install; npm run build)
|
||||||
cmake -DSIST_DEBUG=off -DCMAKE_TOOLCHAIN_FILE=<VCPKG_ROOT>/scripts/buildsystems/vcpkg.cmake .
|
cmake -DSIST_DEBUG=off -DCMAKE_TOOLCHAIN_FILE=<VCPKG_ROOT>/scripts/buildsystems/vcpkg.cmake .
|
||||||
|
|||||||
@@ -4,4 +4,4 @@ uvicorn
|
|||||||
websockets
|
websockets
|
||||||
pycron
|
pycron
|
||||||
GitPython
|
GitPython
|
||||||
git+https://github.com/sist2app/sist2-python.git
|
git+https://github.com/sist2app/sist2-python.git@2.1
|
||||||
@@ -220,7 +220,7 @@ class Sist2IndexTask(Sist2Task):
|
|||||||
except ProcessLookupError:
|
except ProcessLookupError:
|
||||||
pass
|
pass
|
||||||
try:
|
try:
|
||||||
os.wait()
|
os.waitpid(pid, 0)
|
||||||
except ChildProcessError:
|
except ChildProcessError:
|
||||||
pass
|
pass
|
||||||
|
|
||||||
|
|||||||
@@ -309,7 +309,7 @@ class Sist2Api {
|
|||||||
}
|
}
|
||||||
|
|
||||||
getTagsSqlite() {
|
getTagsSqlite() {
|
||||||
return axios.get(`${this.baseUrl}/fts/tags`)
|
return axios.get(`${this.baseUrl}fts/tags`)
|
||||||
.then(resp => {
|
.then(resp => {
|
||||||
return resp.data.map(tag => this._createEsTag(tag.tag, tag.count))
|
return resp.data.map(tag => this._createEsTag(tag.tag, tag.count))
|
||||||
});
|
});
|
||||||
@@ -566,7 +566,7 @@ class Sist2Api {
|
|||||||
}
|
}
|
||||||
|
|
||||||
getDocumentSqlite(sid) {
|
getDocumentSqlite(sid) {
|
||||||
return axios.get(`${this.baseUrl}/fts/d/${sid}`)
|
return axios.get(`${this.baseUrl}fts/d/${sid}`)
|
||||||
.then(resp => ({
|
.then(resp => ({
|
||||||
_source: resp.data
|
_source: resp.data
|
||||||
}));
|
}));
|
||||||
@@ -589,7 +589,7 @@ class Sist2Api {
|
|||||||
}
|
}
|
||||||
|
|
||||||
getTagSuggestionsSqlite(prefix) {
|
getTagSuggestionsSqlite(prefix) {
|
||||||
return axios.post(`${this.baseUrl}/fts/suggestTags`, prefix)
|
return axios.post(`${this.baseUrl}fts/suggestTags`, prefix)
|
||||||
.then(resp => (resp.data));
|
.then(resp => (resp.data));
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -620,7 +620,7 @@ class Sist2Api {
|
|||||||
}
|
}
|
||||||
|
|
||||||
getEmbeddings(sid, modelId) {
|
getEmbeddings(sid, modelId) {
|
||||||
return axios.post(`${this.baseUrl}/e/${sid}/${modelId.toString().padStart(3, '0')}`)
|
return axios.post(`${this.baseUrl}e/${sid}/${modelId.toString().padStart(3, '0')}`)
|
||||||
.then(resp => (resp.data));
|
.then(resp => (resp.data));
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -117,11 +117,11 @@ class Sist2ElasticsearchQuery {
|
|||||||
}
|
}
|
||||||
|
|
||||||
if (dateMin && dateMax) {
|
if (dateMin && dateMax) {
|
||||||
filters.push({range: {mtime: {gte: dateMin, lte: dateMax}}})
|
filters.push({range: {mtime: {gte: dateMin, lte: dateMax, format: "epoch_second"}}})
|
||||||
} else if (dateMin) {
|
} else if (dateMin) {
|
||||||
filters.push({range: {mtime: {gte: dateMin}}})
|
filters.push({range: {mtime: {gte: dateMin, format: "epoch_second"}}})
|
||||||
} else if (dateMax) {
|
} else if (dateMax) {
|
||||||
filters.push({range: {mtime: {lte: dateMax}}})
|
filters.push({range: {mtime: {lte: dateMax, format: "epoch_second"}}})
|
||||||
}
|
}
|
||||||
|
|
||||||
const path = pathText.replace(/\/$/, "").toLowerCase(); //remove trailing slashes
|
const path = pathText.replace(/\/$/, "").toLowerCase(); //remove trailing slashes
|
||||||
|
|||||||
@@ -114,7 +114,7 @@ void save_current_job_info(sqlite3_context *ctx, int argc, sqlite3_value **argv)
|
|||||||
char buf[PATH_MAX];
|
char buf[PATH_MAX];
|
||||||
strcpy(buf, current_job);
|
strcpy(buf, current_job);
|
||||||
|
|
||||||
strcpy(ipc_ctx->current_job[ProcData.thread_id], current_job);
|
SET_CURRENT_JOB(ipc_ctx, current_job);
|
||||||
|
|
||||||
sqlite3_result_text(ctx, "ok", -1, SQLITE_STATIC);
|
sqlite3_result_text(ctx, "ok", -1, SQLITE_STATIC);
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -64,6 +64,8 @@ typedef struct {
|
|||||||
char current_job[MAX_THREADS][PATH_MAX * 2];
|
char current_job[MAX_THREADS][PATH_MAX * 2];
|
||||||
} database_ipc_ctx_t;
|
} database_ipc_ctx_t;
|
||||||
|
|
||||||
|
#define SET_CURRENT_JOB(ctx, job) (strcpy((ctx)->current_job[ProcData.thread_id], job))
|
||||||
|
|
||||||
typedef struct {
|
typedef struct {
|
||||||
double date_min;
|
double date_min;
|
||||||
double date_max;
|
double date_max;
|
||||||
|
|||||||
@@ -142,6 +142,10 @@ void parse(parse_job_t *job) {
|
|||||||
job->vfile.calculate_checksum = ScanCtx.calculate_checksums;
|
job->vfile.calculate_checksum = ScanCtx.calculate_checksums;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if (IS_SUB_JOB(job)) {
|
||||||
|
SET_CURRENT_JOB(ProcData.ipc_db->ipc_ctx, job->filepath);
|
||||||
|
}
|
||||||
|
|
||||||
document_t *doc = malloc(sizeof(document_t));
|
document_t *doc = malloc(sizeof(document_t));
|
||||||
|
|
||||||
strcpy(doc->filepath, job->filepath);
|
strcpy(doc->filepath, job->filepath);
|
||||||
|
|||||||
@@ -55,7 +55,7 @@
|
|||||||
static const char *const Version = VERSION;
|
static const char *const Version = VERSION;
|
||||||
static const int VersionMajor = 3;
|
static const int VersionMajor = 3;
|
||||||
static const int VersionMinor = 4;
|
static const int VersionMinor = 4;
|
||||||
static const int VersionPatch = 3;
|
static const int VersionPatch = 4;
|
||||||
|
|
||||||
#ifndef SIST_PLATFORM
|
#ifndef SIST_PLATFORM
|
||||||
#define SIST_PLATFORM unknown
|
#define SIST_PLATFORM unknown
|
||||||
|
|||||||
2
third-party/libscan/libscan/scan.h
vendored
2
third-party/libscan/libscan/scan.h
vendored
@@ -172,6 +172,8 @@ typedef struct {
|
|||||||
char filepath[PATH_MAX * 2 + 1];
|
char filepath[PATH_MAX * 2 + 1];
|
||||||
} parse_job_t;
|
} parse_job_t;
|
||||||
|
|
||||||
|
#define IS_SUB_JOB(job) ((job)->parent[0] != '\0')
|
||||||
|
|
||||||
|
|
||||||
#include "util.h"
|
#include "util.h"
|
||||||
|
|
||||||
|
|||||||
Reference in New Issue
Block a user