OCR fixes

This commit is contained in:
simon987 2023-01-13 20:13:20 -05:00
parent a074d8cf10
commit b9f008603a
5 changed files with 16 additions and 6 deletions

View File

@ -8,6 +8,7 @@ Testing/
**/cmake_install.cmake
**/CMakeCache.txt
**/CMakeFiles/
.cmake
LICENSE
Makefile
**/*.md

View File

@ -14,7 +14,7 @@ RUN cmake -DSIST_PLATFORM=x64_linux -DSIST_DEBUG=off -DBUILD_TESTS=off -DCMAKE_T
RUN make -j$(nproc)
RUN strip sist2 || mv sist2_debug sist2
FROM --platform="linux/amd64" ubuntu@sha256:b25ef49a40b7797937d0d23eca3b0a41701af6757afca23d504d50826f0b37ce
FROM --platform="linux/amd64" ubuntu@sha256:965fbcae990b0467ed5657caceaec165018ef44a4d2d46c7cdea80a9dff0d1ea
WORKDIR /root
@ -33,7 +33,9 @@ RUN mkdir -p /usr/share/tessdata && \
curl -o /usr/share/tessdata/eng.traineddata https://raw.githubusercontent.com/tesseract-ocr/tessdata/master/eng.traineddata &&\
curl -o /usr/share/tessdata/fra.traineddata https://raw.githubusercontent.com/tesseract-ocr/tessdata/master/fra.traineddata &&\
curl -o /usr/share/tessdata/rus.traineddata https://raw.githubusercontent.com/tesseract-ocr/tessdata/master/rus.traineddata &&\
curl -o /usr/share/tessdata/spa.traineddata https://raw.githubusercontent.com/tesseract-ocr/tessdata/master/spa.traineddata
curl -o /usr/share/tessdata/osd.traineddata https://raw.githubusercontent.com/tesseract-ocr/tessdata/master/osd.traineddata &&\
curl -o /usr/share/tessdata/spa.traineddata https://raw.githubusercontent.com/tesseract-ocr/tessdata/master/spa.traineddata &&\
curl -o /usr/share/tessdata/chi_sim.traineddata https://raw.githubusercontent.com/tesseract-ocr/tessdata/master/chi_sim.traineddata
# sist2
COPY --from=build /build/sist2 /root/sist2

View File

@ -209,8 +209,10 @@ void initialize_scan_context(scan_args_t *args) {
ScanCtx.ebook_ctx.content_size = args->content_size;
ScanCtx.ebook_ctx.enable_tn = args->tn_count > 0;
ScanCtx.ebook_ctx.tn_size = args->tn_size;
ScanCtx.ebook_ctx.tesseract_lang = args->tesseract_lang;
ScanCtx.ebook_ctx.tesseract_path = args->tesseract_path;
if (args->ocr_ebooks) {
ScanCtx.ebook_ctx.tesseract_lang = args->tesseract_lang;
ScanCtx.ebook_ctx.tesseract_path = args->tesseract_path;
}
ScanCtx.ebook_ctx.log = _log;
ScanCtx.ebook_ctx.logf = _logf;
ScanCtx.ebook_ctx.store = _store;

File diff suppressed because one or more lines are too long

View File

@ -446,7 +446,7 @@ int decode_frame_and_save_thumbnail(scan_media_ctx_t *ctx, AVFormatContext *pFor
return SAVE_THUMBNAIL_FAILED;
}
if (ctx->tesseract_lang != NULL && IS_VIDEO(pFormatCtx) && thumbnail_index == 0) {
if (ctx->tesseract_lang != NULL && thumbnail_index == 0) {
ocr_image(ctx, doc, decoder, frame_and_packet->frame);
}