From 82fd08567356ec94caccfd00d222a66140f02744 Mon Sep 17 00:00:00 2001 From: David Magdy <137376090+David-Magdy@users.noreply.github.com> Date: Wed, 3 Sep 2025 16:11:35 +0300 Subject: [PATCH 1/2] Update recognition.py Feat: added temperature scaling to recognition and replace custom confidence with average probability - Introduced `temperature` parameter in `recognizer_predict` and `get_text` to calibrate model confidence output. - Applied temperature scaling to logits before softmax to soften or sharpen confidence. - Swapped `custom_mean` (geometric-inspired) for simple mean of max probabilities, to yield more interpretable confidence scores. --- easyocr/recognition.py | 17 ++++++++++++----- 1 file changed, 12 insertions(+), 5 deletions(-) diff --git a/easyocr/recognition.py b/easyocr/recognition.py index 530ef9517e2..0a5e1f21b41 100644 --- a/easyocr/recognition.py +++ b/easyocr/recognition.py @@ -97,7 +97,7 @@ def __call__(self, batch): return image_tensors def recognizer_predict(model, converter, test_loader, batch_max_length,\ - ignore_idx, char_group_idx, decoder = 'greedy', beamWidth= 5, device = 'cpu'): + ignore_idx, char_group_idx, decoder = 'greedy', beamWidth= 5, device = 'cpu', temperature=1.0): model.eval() result = [] with torch.no_grad(): @@ -110,6 +110,9 @@ def recognizer_predict(model, converter, test_loader, batch_max_length,\ preds = model(image, text_for_pred) + # Apply Temperature Scaling + preds = preds / temperature + # Select max probabilty (greedy decoding) then decode index to character preds_size = torch.IntTensor([preds.size(1)] * batch_size) @@ -145,7 +148,11 @@ def recognizer_predict(model, converter, test_loader, batch_max_length,\ preds_max_prob.append(np.array([0])) for pred, pred_max_prob in zip(preds_str, preds_max_prob): - confidence_score = custom_mean(pred_max_prob) + # Replaced custom_mean with a standard average of probabilities + if len(pred_max_prob) == 0: + confidence_score = 0.0 + else: + confidence_score = pred_max_prob.mean() result.append([pred, confidence_score]) return result @@ -185,7 +192,7 @@ def get_recognizer(recog_network, network_params, character,\ def get_text(character, imgH, imgW, recognizer, converter, image_list,\ ignore_char = '',decoder = 'greedy', beamWidth =5, batch_size=1, contrast_ths=0.1,\ - adjust_contrast=0.5, filter_ths = 0.003, workers = 1, device = 'cpu'): + adjust_contrast=0.5, filter_ths = 0.003, workers = 1, device = 'cpu', temperature=1.0): batch_max_length = int(imgW/10) char_group_idx = {} @@ -204,7 +211,7 @@ def get_text(character, imgH, imgW, recognizer, converter, image_list,\ # predict first round result1 = recognizer_predict(recognizer, converter, test_loader,batch_max_length,\ - ignore_idx, char_group_idx, decoder, beamWidth, device = device) + ignore_idx, char_group_idx, decoder, beamWidth, device=device, temperature=temperature) # predict second round low_confident_idx = [i for i,item in enumerate(result1) if (item[1] < contrast_ths)] @@ -216,7 +223,7 @@ def get_text(character, imgH, imgW, recognizer, converter, image_list,\ test_data, batch_size=batch_size, shuffle=False, num_workers=int(workers), collate_fn=AlignCollate_contrast, pin_memory=True) result2 = recognizer_predict(recognizer, converter, test_loader, batch_max_length,\ - ignore_idx, char_group_idx, decoder, beamWidth, device = device) + ignore_idx, char_group_idx, decoder, beamWidth, device=device, temperature=temperature) result = [] for i, zipped in enumerate(zip(coord, result1)): From 496288e221138aefcaf1cd7cb3a96f2e4d09eb15 Mon Sep 17 00:00:00 2001 From: David Magdy <137376090+David-Magdy@users.noreply.github.com> Date: Wed, 3 Sep 2025 16:17:24 +0300 Subject: [PATCH 2/2] Update easyocr.py Aligned API function definitions with the new temperature scaling feature. --- easyocr/easyocr.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/easyocr/easyocr.py b/easyocr/easyocr.py index c08fe0388dd..2ec5a2de91d 100644 --- a/easyocr/easyocr.py +++ b/easyocr/easyocr.py @@ -355,7 +355,7 @@ def recognize(self, img_cv_grey, horizontal_list=None, free_list=None,\ workers = 0, allowlist = None, blocklist = None, detail = 1,\ rotation_info = None,paragraph = False,\ contrast_ths = 0.1,adjust_contrast = 0.5, filter_ths = 0.003,\ - y_ths = 0.5, x_ths = 1.0, reformat=True, output_format='standard'): + y_ths = 0.5, x_ths = 1.0, reformat=True, output_format='standard',temperature=1.0): if reformat: img, img_cv_grey = reformat_input(img_cv_grey) @@ -383,7 +383,7 @@ def recognize(self, img_cv_grey, horizontal_list=None, free_list=None,\ image_list, max_width = get_image_list(h_list, f_list, img_cv_grey, model_height = imgH) result0 = get_text(self.character, imgH, int(max_width), self.recognizer, self.converter, image_list,\ ignore_char, decoder, beamWidth, batch_size, contrast_ths, adjust_contrast, filter_ths,\ - workers, self.device) + workers, self.device,temperature) result += result0 for bbox in free_list: h_list = [] @@ -391,7 +391,7 @@ def recognize(self, img_cv_grey, horizontal_list=None, free_list=None,\ image_list, max_width = get_image_list(h_list, f_list, img_cv_grey, model_height = imgH) result0 = get_text(self.character, imgH, int(max_width), self.recognizer, self.converter, image_list,\ ignore_char, decoder, beamWidth, batch_size, contrast_ths, adjust_contrast, filter_ths,\ - workers, self.device) + workers, self.device,temperature) result += result0 # default mode will try to process multiple boxes at the same time else: