Skip to content

Commit 8c2e4ca

Browse files
committed
recognize: skip tiny or bin-empty lines, too
1 parent 395e43c commit 8c2e4ca

File tree

1 file changed

+11
-3
lines changed

1 file changed

+11
-3
lines changed

ocrd_calamari/recognize.py

Lines changed: 11 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -97,15 +97,23 @@ def process(self):
9797
log.debug("Recognizing line '%s' in region '%s'", line.id, region.id)
9898

9999
line_image, line_coords = self.workspace.image_from_segment(line, region_image, region_coords, feature_selector=self.features)
100-
if ('binarized' not in line_coords['features'] and 'grayscale_normalized' not in line_coords['features'] and self.network_input_channels == 1):
100+
if ('binarized' not in line_coords['features'] and
101+
'grayscale_normalized' not in line_coords['features'] and
102+
self.network_input_channels == 1):
101103
# We cannot use a feature selector for this since we don't
102104
# know whether the model expects (has been trained on)
103105
# binarized or grayscale images; but raw images are likely
104106
# always inadequate:
105107
log.warning("Using raw image for line '%s' in region '%s'", line.id, region.id)
106108

107-
line_image = line_image if all(line_image.size) else [[0]]
108-
line_image_np = np.array(line_image, dtype=np.uint8)
109+
if (not all(line_image.size) or
110+
line_image.height <= 8 or line_image.width <= 8 or
111+
'binarized' in line_coords['features'] and line_image.convert('1').getextrema()[0] == 255):
112+
# empty size or too tiny or no foreground at all: skip
113+
log.warning("Skipping empty line '%s' in region '%s'", line.id, region.id)
114+
line_image_np = np.array([[0]], dtype=np.uint8)
115+
else:
116+
line_image_np = np.array(line_image, dtype=np.uint8)
109117
line_images_np.append(line_image_np)
110118
line_coordss.append(line_coords)
111119
raw_results_all = self.predictor.predict_raw(line_images_np, progress_bar=False)

0 commit comments

Comments
 (0)