Compare commits

...

10 Commits

Author SHA1 Message Date
Andrew
3b54e089f7
Merge 4ec6a14252a5bf93a8e0eaeaa83e9317a1211990 into 7a03a2202ec52db2f4ba35413259799fcc6cdcbc 2025-02-02 14:08:06 +01:00
Shy
7a03a2202e Fix #481 2025-01-24 19:40:08 -05:00
Shy
050fc500ce Fix #462 2025-01-24 19:22:01 -05:00
Shy
d44679131b Update compose file to avoid confusion. Fixes #490 2025-01-23 21:45:01 -05:00
Shy
4dd5e70406 Fix #492 2025-01-23 21:40:37 -05:00
Shy
5a82581992 Fix magic database problem 2025-01-23 21:40:27 -05:00
Shy
0dc18a56c0 Fix #509 2025-01-23 19:10:17 -05:00
Shy
258b2e31e6 Version bump 2025-01-23 19:10:02 -05:00
Shy
c726074029 Update tessdata paths 2025-01-23 19:09:54 -05:00
Andrew
4ec6a14252
Add Version in sist2-admin
Added the version badge-pill to the sist2-admin page so you can check the version without going to the main search frontend.
2023-12-20 09:45:15 -06:00
11 changed files with 63 additions and 23 deletions

View File

@ -57,7 +57,7 @@ services:
restart: unless-stopped restart: unless-stopped
volumes: volumes:
- /data/sist2-admin-data/:/sist2-admin/ - /data/sist2-admin-data/:/sist2-admin/
- /:/host - /<path to index>/:/host
ports: ports:
- 4090:4090 - 4090:4090
# NOTE: Don't expose this port publicly! # NOTE: Don't expose this port publicly!

View File

@ -18,7 +18,7 @@ services:
container_name: sist2-admin container_name: sist2-admin
volumes: volumes:
- /data/sist2-admin-data/:/sist2-admin/ - /data/sist2-admin-data/:/sist2-admin/
- /:/host - /<path to index>/:/host
ports: ports:
- 4090:4090 - 4090:4090
# NOTE: Don't export this port publicly! # NOTE: Don't export this port publicly!

View File

@ -1,5 +1,16 @@
with open("/usr/lib/file/magic.mgc", "rb") as f: MAGIC_PATHS = [
data = f.read() "/vcpkg/installed/x64-linux/share/libmagic/misc/magic.mgc",
"/work/vcpkg/installed/x64-linux/share/libmagic/misc/magic.mgc",
"/usr/lib/file/magic.mgc"
]
for path in MAGIC_PATHS:
try:
with open(path, "rb") as f:
data = f.read()
break
except:
continue
print("char magic_database_buffer[%d] = {%s};" % (len(data), ",".join(str(int(b)) for b in data))) print("char magic_database_buffer[%d] = {%s};" % (len(data), ",".join(str(int(b)) for b in data)))

View File

@ -3,7 +3,11 @@
<b-navbar-brand to="/"> <b-navbar-brand to="/">
<Sist2Icon></Sist2Icon> <Sist2Icon></Sist2Icon>
</b-navbar-brand> </b-navbar-brand>
<span class="badge badge-pill version" v-if="$store && $store.state.sist2Info">
v{{ sist2Version() }}
</span>
<b-button class="ml-auto" to="/task" variant="link">{{ $t("tasks") }}</b-button> <b-button class="ml-auto" to="/task" variant="link">{{ $t("tasks") }}</b-button>
</b-navbar> </b-navbar>
</template> </template>
@ -66,4 +70,4 @@ export default {
.btn-link { .btn-link {
color: #222; color: #222;
} }
</style> </style>

View File

@ -309,7 +309,7 @@ class Sist2Api {
} }
getTagsSqlite() { getTagsSqlite() {
return axios.get(`${this.baseUrl}/fts/tags`) return axios.get(`${this.baseUrl}fts/tags`)
.then(resp => { .then(resp => {
return resp.data.map(tag => this._createEsTag(tag.tag, tag.count)) return resp.data.map(tag => this._createEsTag(tag.tag, tag.count))
}); });
@ -566,7 +566,7 @@ class Sist2Api {
} }
getDocumentSqlite(sid) { getDocumentSqlite(sid) {
return axios.get(`${this.baseUrl}/fts/d/${sid}`) return axios.get(`${this.baseUrl}fts/d/${sid}`)
.then(resp => ({ .then(resp => ({
_source: resp.data _source: resp.data
})); }));
@ -589,7 +589,7 @@ class Sist2Api {
} }
getTagSuggestionsSqlite(prefix) { getTagSuggestionsSqlite(prefix) {
return axios.post(`${this.baseUrl}/fts/suggestTags`, prefix) return axios.post(`${this.baseUrl}fts/suggestTags`, prefix)
.then(resp => (resp.data)); .then(resp => (resp.data));
} }
@ -620,7 +620,7 @@ class Sist2Api {
} }
getEmbeddings(sid, modelId) { getEmbeddings(sid, modelId) {
return axios.post(`${this.baseUrl}/e/${sid}/${modelId.toString().padStart(3, '0')}`) return axios.post(`${this.baseUrl}e/${sid}/${modelId.toString().padStart(3, '0')}`)
.then(resp => (resp.data)); .then(resp => (resp.data));
} }
} }

View File

@ -117,11 +117,11 @@ class Sist2ElasticsearchQuery {
} }
if (dateMin && dateMax) { if (dateMin && dateMax) {
filters.push({range: {mtime: {gte: dateMin, lte: dateMax}}}) filters.push({range: {mtime: {gte: dateMin, lte: dateMax, format: "epoch_second"}}})
} else if (dateMin) { } else if (dateMin) {
filters.push({range: {mtime: {gte: dateMin}}}) filters.push({range: {mtime: {gte: dateMin, format: "epoch_second"}}})
} else if (dateMax) { } else if (dateMax) {
filters.push({range: {mtime: {lte: dateMax}}}) filters.push({range: {mtime: {lte: dateMax, format: "epoch_second"}}})
} }
const path = pathText.replace(/\/$/, "").toLowerCase(); //remove trailing slashes const path = pathText.replace(/\/$/, "").toLowerCase(); //remove trailing slashes

View File

@ -25,6 +25,7 @@ const char *TESS_DATAPATHS[] = {
"/usr/share/tessdata/", "/usr/share/tessdata/",
"/usr/share/tesseract-ocr/tessdata/", "/usr/share/tesseract-ocr/tessdata/",
"/usr/share/tesseract-ocr/4.00/tessdata/", "/usr/share/tesseract-ocr/4.00/tessdata/",
"/usr/share/tesseract-ocr/5/tessdata/",
"./", "./",
NULL NULL
}; };

View File

@ -55,7 +55,7 @@
static const char *const Version = VERSION; static const char *const Version = VERSION;
static const int VersionMajor = 3; static const int VersionMajor = 3;
static const int VersionMinor = 4; static const int VersionMinor = 4;
static const int VersionPatch = 2; static const int VersionPatch = 3;
#ifndef SIST_PLATFORM #ifndef SIST_PLATFORM
#define SIST_PLATFORM unknown #define SIST_PLATFORM unknown

View File

@ -175,9 +175,19 @@ int render_cover(scan_ebook_ctx_t *ctx, fz_context *fzctx, document_t *doc, fz_d
return TRUE; return TRUE;
} }
#define IS_IGNORED_MESSAGE(message) \
( \
strstr(message, "invalid glyph index") \
|| strstr(message, "... repeated") \
) \
void fz_err_callback(void *user, const char *message) { void fz_err_callback(void *user, const char *message) {
document_t *doc = (document_t *) user; document_t *doc = (document_t *) user;
if (IS_IGNORED_MESSAGE(message)) {
return;
}
const scan_ebook_ctx_t *ctx = &thread_ctx; const scan_ebook_ctx_t *ctx = &thread_ctx;
CTX_LOG_WARNINGF(doc->filepath, "FZ: %s", message); CTX_LOG_WARNINGF(doc->filepath, "FZ: %s", message);
} }
@ -185,6 +195,10 @@ void fz_err_callback(void *user, const char *message) {
void fz_warn_callback(void *user, const char *message) { void fz_warn_callback(void *user, const char *message) {
document_t *doc = (document_t *) user; document_t *doc = (document_t *) user;
if (IS_IGNORED_MESSAGE(message)) {
return;
}
const scan_ebook_ctx_t *ctx = &thread_ctx; const scan_ebook_ctx_t *ctx = &thread_ctx;
CTX_LOG_DEBUGF(doc->filepath, "FZ: %s", message); CTX_LOG_DEBUGF(doc->filepath, "FZ: %s", message);
} }

View File

@ -223,14 +223,10 @@ read_frame(scan_media_ctx_t *ctx, AVFormatContext *pFormatCtx, AVCodecContext *d
void append_tag_meta_if_not_exists(scan_media_ctx_t *ctx, document_t *doc, AVDictionaryEntry *tag, enum metakey key) { void append_tag_meta_if_not_exists(scan_media_ctx_t *ctx, document_t *doc, AVDictionaryEntry *tag, enum metakey key) {
meta_line_t *meta = doc->meta_head; if (meta_contains_key(doc->meta_head, key)) {
while (meta != NULL) { CTX_LOG_DEBUGF(doc->filepath, "Ignoring duplicate tag: '%02x=%s'",
if (meta->key == key) { key, tag->value);
CTX_LOG_DEBUGF(doc->filepath, "Ignoring duplicate tag: '%02x=%s' and '%02x=%s'", return;
key, meta->str_val, key, tag->value);
return;
}
meta = meta->next;
} }
text_buffer_t tex = text_buffer_create(-1); text_buffer_t tex = text_buffer_create(-1);
@ -445,7 +441,7 @@ int decode_frame_and_save_thumbnail(scan_media_ctx_t *ctx, AVFormatContext *pFor
return SAVE_THUMBNAIL_FAILED; return SAVE_THUMBNAIL_FAILED;
} }
if (ctx->tesseract_lang != NULL && thumbnail_index == 0) { if (ctx->tesseract_lang != NULL && thumbnail_index == 0 && !meta_contains_key(doc->meta_head, MetaContent)) {
ocr_image(ctx, doc, decoder, frame_and_packet->frame); ocr_image(ctx, doc, decoder, frame_and_packet->frame);
} }

View File

@ -392,4 +392,18 @@ static parse_job_t *create_parse_job(const char *filepath, int mtime, size_t st_
return job; return job;
} }
static int meta_contains_key (meta_line_t *meta_head, enum metakey key) {
meta_line_t *meta = meta_head;
while (meta != NULL) {
if (meta->key == key) {
return TRUE;
}
meta = meta->next;
}
return FALSE;
}
#endif #endif