mirror of
https://github.com/simon987/sist2.git
synced 2025-04-19 18:26:43 +00:00
fix #228: Handle multiple languages for tesseract OCR
This commit is contained in:
parent
fe1aa6dd4c
commit
d10c13d46d
29
src/cli.c
29
src/cli.c
@ -77,6 +77,15 @@ void exec_args_destroy(exec_args_t *args) {
|
|||||||
free(args);
|
free(args);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
__always_inline
|
||||||
|
static const char* find_tessdata(const char* p_lang, size_t len) {
|
||||||
|
char filename[128];
|
||||||
|
memcpy(filename, p_lang, len);
|
||||||
|
filename[len] = '\0';
|
||||||
|
strcat(filename, ".traineddata");
|
||||||
|
return find_file_in_paths(TESS_DATAPATHS, filename);
|
||||||
|
}
|
||||||
|
|
||||||
int scan_args_validate(scan_args_t *args, int argc, const char **argv) {
|
int scan_args_validate(scan_args_t *args, int argc, const char **argv) {
|
||||||
if (argc < 2) {
|
if (argc < 2) {
|
||||||
fprintf(stderr, "Required positional argument: PATH.\n");
|
fprintf(stderr, "Required positional argument: PATH.\n");
|
||||||
@ -171,16 +180,28 @@ int scan_args_validate(scan_args_t *args, int argc, const char **argv) {
|
|||||||
if (args->tesseract_lang != NULL) {
|
if (args->tesseract_lang != NULL) {
|
||||||
TessBaseAPI *api = TessBaseAPICreate();
|
TessBaseAPI *api = TessBaseAPICreate();
|
||||||
|
|
||||||
char filename[128];
|
const char *p_lang = args->tesseract_lang;
|
||||||
sprintf(filename, "%s.traineddata", args->tesseract_lang);
|
const char *e_lang = p_lang + strlen(p_lang);
|
||||||
const char *path = find_file_in_paths(TESS_DATAPATHS, filename);
|
const char *path = NULL;
|
||||||
if (path == NULL) {
|
while (p_lang < e_lang) {
|
||||||
|
const char *p_plus = strstr(p_lang, "+");
|
||||||
|
if (p_plus == NULL) {
|
||||||
|
p_plus = e_lang;
|
||||||
|
}
|
||||||
|
const char* _path = find_tessdata(p_lang, p_plus - p_lang);
|
||||||
|
if (_path == NULL) {
|
||||||
LOG_FATAL("cli.c", "Could not find tesseract language file!");
|
LOG_FATAL("cli.c", "Could not find tesseract language file!");
|
||||||
|
} else if (path != NULL && path != _path) {
|
||||||
|
LOG_FATAL("cli.c", "Multiple tesseract language files are not in the same directory!");
|
||||||
|
}
|
||||||
|
path = _path;
|
||||||
|
p_lang = p_plus + 1;
|
||||||
}
|
}
|
||||||
|
|
||||||
ret = TessBaseAPIInit3(api, path, args->tesseract_lang);
|
ret = TessBaseAPIInit3(api, path, args->tesseract_lang);
|
||||||
if (ret != 0) {
|
if (ret != 0) {
|
||||||
fprintf(stderr, "Could not initialize tesseract with lang '%s'\n", args->tesseract_lang);
|
fprintf(stderr, "Could not initialize tesseract with lang '%s'\n", args->tesseract_lang);
|
||||||
|
TessBaseAPIDelete(api);
|
||||||
return 1;
|
return 1;
|
||||||
}
|
}
|
||||||
TessBaseAPIEnd(api);
|
TessBaseAPIEnd(api);
|
||||||
|
Loading…
x
Reference in New Issue
Block a user