Merge pull request #227 from yatli/dev

refactor: split ocr_extract_text from ebook
This commit is contained in:
simon987 2022-01-08 10:25:41 -05:00 committed by GitHub
commit fe1aa6dd4c
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
2 changed files with 49 additions and 23 deletions

View File

@ -5,9 +5,7 @@
#include "../media/media.h"
#include "../arc/arc.h"
#define MIN_OCR_SIZE 350
#define MIN_OCR_LEN 10
#include "../ocr/ocr.h"
/* fill_image callback doesn't let us pass opaque pointers unless I create my own device */
__thread text_buffer_t thread_buffer;
@ -225,7 +223,9 @@ static int read_stext_block(fz_stext_block *block, text_buffer_t *tex) {
return 0;
}
#define IS_VALID_BPP(d) ((d)==1 || (d)==2 || (d)==4 || (d)==8 || (d)==16 || (d)==24 || (d)==32)
static void fill_image_ocr_cb(const char* text, size_t len) {
text_buffer_append_string(&thread_buffer, text, len - 1);
}
void fill_image(fz_context *fzctx, UNUSED(fz_device *dev),
fz_image *img, UNUSED(fz_matrix ctm), UNUSED(float alpha),
@ -233,26 +233,9 @@ void fill_image(fz_context *fzctx, UNUSED(fz_device *dev),
int l2factor = 0;
if (img->w > MIN_OCR_SIZE && img->h > MIN_OCR_SIZE && IS_VALID_BPP(img->n)) {
if (img->w > MIN_OCR_SIZE && img->h > MIN_OCR_SIZE && OCR_IS_VALID_BPP(img->n)) {
fz_pixmap *pix = img->get_pixmap(fzctx, img, NULL, img->w, img->h, &l2factor);
if (pix->h > MIN_OCR_SIZE && img->h > MIN_OCR_SIZE && img->xres != 0) {
TessBaseAPI *api = TessBaseAPICreate();
TessBaseAPIInit3(api, thread_ctx.tesseract_path, thread_ctx.tesseract_lang);
TessBaseAPISetImage(api, pix->samples, pix->w, pix->h, pix->n, pix->stride);
TessBaseAPISetSourceResolution(api, pix->xres);
char *text = TessBaseAPIGetUTF8Text(api);
size_t len = strlen(text);
if (len >= MIN_OCR_LEN) {
text_buffer_append_string(&thread_buffer, text, len - 1);
}
TessBaseAPIEnd(api);
TessBaseAPIDelete(api);
}
ocr_extract_text(thread_ctx.tesseract_path, thread_ctx.tesseract_lang, pix->samples, pix->w, pix->h, pix->n, pix->stride, pix->xres, fill_image_ocr_cb);
fz_drop_pixmap(fzctx, pix);
}
}

43
third-party/libscan/libscan/ocr/ocr.h vendored Normal file
View File

@ -0,0 +1,43 @@
#ifndef OCR_H
#define OCR_H
#include "../scan.h"
#include <tesseract/capi.h>
#define MIN_OCR_SIZE 350
#define MIN_OCR_LEN 10
#define OCR_IS_VALID_BPP(d) \
((d) == 1 || (d) == 2 || (d) == 4 || (d) == 8 || (d) == 16 || (d) == 24 || \
(d) == 32)
typedef void (*ocr_extract_callback_t)(const char *, size_t);
__always_inline static void
ocr_extract_text(const char *tesseract_path, const char *tesseract_lang,
const unsigned char *img_buf, const int img_w, const int img_h,
const int img_bpp, const int img_stride, const int img_xres,
const ocr_extract_callback_t cb) {
if (img_w <= MIN_OCR_SIZE || img_h <= MIN_OCR_SIZE || img_xres <= 0 ||
!OCR_IS_VALID_BPP(img_bpp)) {
return;
}
TessBaseAPI *api = TessBaseAPICreate();
TessBaseAPIInit3(api, tesseract_path, tesseract_lang);
TessBaseAPISetImage(api, img_buf, img_w, img_h, img_bpp, img_stride);
TessBaseAPISetSourceResolution(api, img_xres);
char *text = TessBaseAPIGetUTF8Text(api);
size_t len = strlen(text);
if (len >= MIN_OCR_LEN) {
cb(text, len);
}
TessBaseAPIEnd(api);
TessBaseAPIDelete(api);
}
#endif