diff --git a/src/parsing/pdf.c b/src/parsing/pdf.c
index e685abf..41f2ed0 100644
--- a/src/parsing/pdf.c
+++ b/src/parsing/pdf.c
@@ -2,6 +2,7 @@
 #include "src/ctx.h"
 
 #define MIN_OCR_SIZE 350
+#define MIN_OCR_LEN 10
 __thread text_buffer_t thread_buffer;
 
 
@@ -128,6 +129,7 @@ int read_stext_block(fz_stext_block *block, text_buffer_t *tex) {
     return 0;
 }
 
+#define IS_VALID_BPP(d) (d==1 || d==2 || d==4 || d==8 || d==16 || d==24 || d==32)
 
 void fill_image(fz_context *ctx, UNUSED(fz_device *dev),
                 fz_image *img, UNUSED(fz_matrix ctm), UNUSED(float alpha),
@@ -135,7 +137,7 @@ void fill_image(fz_context *ctx, UNUSED(fz_device *dev),
 
     int l2factor = 0;
 
-    if (img->w > MIN_OCR_SIZE && img->h > MIN_OCR_SIZE) {
+    if (img->w > MIN_OCR_SIZE && img->h > MIN_OCR_SIZE && IS_VALID_BPP(img->n)) {
 
         fz_pixmap *pix = img->get_pixmap(ctx, img, NULL, img->w, img->h, &l2factor);
 
@@ -148,12 +150,14 @@ void fill_image(fz_context *ctx, UNUSED(fz_device *dev),
 
             char *text = TessBaseAPIGetUTF8Text(api);
             size_t len = strlen(text);
-            text_buffer_append_string(&thread_buffer, text, len - 1);
-            LOG_DEBUGF(
-                    "pdf.c",
-                    "(OCR) %dx%d got %dB from tesseract (%s), buffer:%dB",
-                    pix->w, pix->h, len, ScanCtx.tesseract_lang, thread_buffer.dyn_buffer.cur
-            )
+            if (len >= MIN_OCR_LEN) {
+                text_buffer_append_string(&thread_buffer, text, len - 1);
+                LOG_DEBUGF(
+                        "pdf.c",
+                        "(OCR) %dx%d got %dB from tesseract (%s), buffer:%dB",
+                        pix->w, pix->h, len, ScanCtx.tesseract_lang, thread_buffer.dyn_buffer.cur
+                )
+            }
 
             TessBaseAPIEnd(api);
             TessBaseAPIDelete(api);