mirror of
https://github.com/simon987/sist2.git
synced 2025-04-17 17:26:48 +00:00
tesseract ocr path fix
This commit is contained in:
parent
d22f83c797
commit
bbee8aa721
22
src/cli.c
22
src/cli.c
@ -15,6 +15,13 @@
|
|||||||
#define DEFAULT_BIND_ADDR "localhost"
|
#define DEFAULT_BIND_ADDR "localhost"
|
||||||
#define DEFAULT_PORT "4090"
|
#define DEFAULT_PORT "4090"
|
||||||
|
|
||||||
|
const char* TESS_DATAPATHS[] = {
|
||||||
|
"/usr/share/tessdata/",
|
||||||
|
"/usr/share/tesseract-ocr/tessdata/",
|
||||||
|
"./",
|
||||||
|
NULL
|
||||||
|
};
|
||||||
|
|
||||||
|
|
||||||
scan_args_t *scan_args_create() {
|
scan_args_t *scan_args_create() {
|
||||||
scan_args_t *args = calloc(sizeof(scan_args_t), 1);
|
scan_args_t *args = calloc(sizeof(scan_args_t), 1);
|
||||||
@ -136,13 +143,23 @@ int scan_args_validate(scan_args_t *args, int argc, const char **argv) {
|
|||||||
|
|
||||||
if (args->tesseract_lang != NULL) {
|
if (args->tesseract_lang != NULL) {
|
||||||
TessBaseAPI *api = TessBaseAPICreate();
|
TessBaseAPI *api = TessBaseAPICreate();
|
||||||
ret = TessBaseAPIInit3(api, TESS_DATAPATH, args->tesseract_lang);
|
|
||||||
|
char filename[128];
|
||||||
|
sprintf(filename, "%s.traineddata", args->tesseract_lang);
|
||||||
|
const char * path = find_file_in_paths(TESS_DATAPATHS, filename);
|
||||||
|
if (path == NULL) {
|
||||||
|
LOG_FATAL("cli.c", "Could not find tesseract language file!");
|
||||||
|
}
|
||||||
|
|
||||||
|
ret = TessBaseAPIInit3(api, path, args->tesseract_lang);
|
||||||
if (ret != 0) {
|
if (ret != 0) {
|
||||||
fprintf(stderr, "Could not initialize tesseract with lang '%s'\n", args->tesseract_lang);
|
fprintf(stderr, "Could not initialize tesseract with lang '%s'\n", args->tesseract_lang);
|
||||||
return 1;
|
return 1;
|
||||||
}
|
}
|
||||||
TessBaseAPIEnd(api);
|
TessBaseAPIEnd(api);
|
||||||
TessBaseAPIDelete(api);
|
TessBaseAPIDelete(api);
|
||||||
|
|
||||||
|
args->tesseract_path = path;
|
||||||
}
|
}
|
||||||
|
|
||||||
LOG_DEBUGF("cli.c", "arg quality=%f", args->quality)
|
LOG_DEBUGF("cli.c", "arg quality=%f", args->quality)
|
||||||
@ -156,7 +173,8 @@ int scan_args_validate(scan_args_t *args, int argc, const char **argv) {
|
|||||||
LOG_DEBUGF("cli.c", "arg depth=%d", args->depth)
|
LOG_DEBUGF("cli.c", "arg depth=%d", args->depth)
|
||||||
LOG_DEBUGF("cli.c", "arg path=%s", args->path)
|
LOG_DEBUGF("cli.c", "arg path=%s", args->path)
|
||||||
LOG_DEBUGF("cli.c", "arg archive=%s", args->archive)
|
LOG_DEBUGF("cli.c", "arg archive=%s", args->archive)
|
||||||
LOG_DEBUGF("cli.c", "arg ocr=%s", args->tesseract_lang)
|
LOG_DEBUGF("cli.c", "arg tesseract_lang=%s", args->tesseract_lang)
|
||||||
|
LOG_DEBUGF("cli.c", "arg tesseract_path=%s", args->tesseract_path)
|
||||||
|
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
@ -17,6 +17,7 @@ typedef struct scan_args {
|
|||||||
char *archive;
|
char *archive;
|
||||||
archive_mode_t archive_mode;
|
archive_mode_t archive_mode;
|
||||||
char *tesseract_lang;
|
char *tesseract_lang;
|
||||||
|
const char *tesseract_path;
|
||||||
} scan_args_t;
|
} scan_args_t;
|
||||||
|
|
||||||
scan_args_t *scan_args_create();
|
scan_args_t *scan_args_create();
|
||||||
|
@ -28,6 +28,7 @@ struct {
|
|||||||
|
|
||||||
pthread_mutex_t mupdf_mu;
|
pthread_mutex_t mupdf_mu;
|
||||||
char * tesseract_lang;
|
char * tesseract_lang;
|
||||||
|
char * tesseract_path;
|
||||||
} ScanCtx;
|
} ScanCtx;
|
||||||
|
|
||||||
struct {
|
struct {
|
||||||
|
File diff suppressed because one or more lines are too long
@ -51,6 +51,7 @@ void sist2_scan(scan_args_t *args) {
|
|||||||
strncpy(ScanCtx.index.desc.root, args->path, sizeof(ScanCtx.index.desc.root));
|
strncpy(ScanCtx.index.desc.root, args->path, sizeof(ScanCtx.index.desc.root));
|
||||||
ScanCtx.index.desc.root_len = (short) strlen(ScanCtx.index.desc.root);
|
ScanCtx.index.desc.root_len = (short) strlen(ScanCtx.index.desc.root);
|
||||||
ScanCtx.tesseract_lang = args->tesseract_lang;
|
ScanCtx.tesseract_lang = args->tesseract_lang;
|
||||||
|
ScanCtx.tesseract_path = args->tesseract_path;
|
||||||
|
|
||||||
init_dir(ScanCtx.index.path);
|
init_dir(ScanCtx.index.path);
|
||||||
|
|
||||||
|
@ -141,7 +141,7 @@ void fill_image(fz_context *ctx, UNUSED(fz_device *dev),
|
|||||||
|
|
||||||
if (pix->h > MIN_OCR_SIZE && img->h > MIN_OCR_SIZE && img->xres != 0) {
|
if (pix->h > MIN_OCR_SIZE && img->h > MIN_OCR_SIZE && img->xres != 0) {
|
||||||
TessBaseAPI *api = TessBaseAPICreate();
|
TessBaseAPI *api = TessBaseAPICreate();
|
||||||
TessBaseAPIInit3(api, TESS_DATAPATH, ScanCtx.tesseract_lang);
|
TessBaseAPIInit3(api, ScanCtx.tesseract_path, ScanCtx.tesseract_lang);
|
||||||
|
|
||||||
TessBaseAPISetImage(api, pix->samples, pix->w, pix->h, pix->n, pix->stride);
|
TessBaseAPISetImage(api, pix->samples, pix->w, pix->h, pix->n, pix->stride);
|
||||||
TessBaseAPISetSourceResolution(api, pix->xres);
|
TessBaseAPISetSourceResolution(api, pix->xres);
|
||||||
@ -157,8 +157,8 @@ void fill_image(fz_context *ctx, UNUSED(fz_device *dev),
|
|||||||
|
|
||||||
TessBaseAPIEnd(api);
|
TessBaseAPIEnd(api);
|
||||||
TessBaseAPIDelete(api);
|
TessBaseAPIDelete(api);
|
||||||
fz_drop_pixmap(ctx, pix);
|
|
||||||
}
|
}
|
||||||
|
fz_drop_pixmap(ctx, pix);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -3,7 +3,6 @@
|
|||||||
|
|
||||||
#define UUID_STR_LEN 37
|
#define UUID_STR_LEN 37
|
||||||
#define UNUSED(x) __attribute__((__unused__)) x
|
#define UNUSED(x) __attribute__((__unused__)) x
|
||||||
#define TESS_DATAPATH "/usr/share/tessdata/"
|
|
||||||
|
|
||||||
#include <glib-2.0/glib.h>
|
#include <glib-2.0/glib.h>
|
||||||
#include <unistd.h>
|
#include <unistd.h>
|
||||||
|
26
src/util.c
26
src/util.c
@ -1,4 +1,5 @@
|
|||||||
#include "util.h"
|
#include "util.h"
|
||||||
|
#include "src/ctx.h"
|
||||||
|
|
||||||
dyn_buffer_t dyn_buffer_create() {
|
dyn_buffer_t dyn_buffer_create() {
|
||||||
dyn_buffer_t buf;
|
dyn_buffer_t buf;
|
||||||
@ -317,4 +318,29 @@ GHashTable *incremental_get_table() {
|
|||||||
return file_table;
|
return file_table;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
const char *find_file_in_paths(const char *paths[], const char *filename) {
|
||||||
|
|
||||||
|
for (int i = 0; paths[i] != NULL; i++) {
|
||||||
|
|
||||||
|
char *apath = abspath(paths[i]);
|
||||||
|
if (apath == NULL) {
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
|
||||||
|
char path[PATH_MAX];
|
||||||
|
snprintf(path, sizeof(path), "%s%s", apath, filename);
|
||||||
|
|
||||||
|
LOG_DEBUGF("util.c", "Looking for '%s' in folder '%s'", filename, apath)
|
||||||
|
free(apath);
|
||||||
|
|
||||||
|
struct stat info;
|
||||||
|
int ret = stat(path, &info);
|
||||||
|
if (ret != -1) {
|
||||||
|
return paths[i];
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return NULL;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
@ -74,5 +74,6 @@ int incremental_get(GHashTable *table, unsigned long inode_no);
|
|||||||
|
|
||||||
int incremental_mark_file_for_copy(GHashTable *table, unsigned long inode_no);
|
int incremental_mark_file_for_copy(GHashTable *table, unsigned long inode_no);
|
||||||
|
|
||||||
|
const char *find_file_in_paths(const char **paths, const char *filename);
|
||||||
|
|
||||||
#endif
|
#endif
|
||||||
|
Loading…
x
Reference in New Issue
Block a user