From 56eaca7116dd5b21a2ebd456cd1b0237b8c09dc3 Mon Sep 17 00:00:00 2001 From: Mehmed Mustafa Date: Thu, 10 Oct 2024 19:09:41 +0200 Subject: [PATCH 1/2] fix: levenshtein import --- ocrd_cis/align/cli.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/ocrd_cis/align/cli.py b/ocrd_cis/align/cli.py index ffe53fd8..7747622e 100644 --- a/ocrd_cis/align/cli.py +++ b/ocrd_cis/align/cli.py @@ -2,7 +2,7 @@ import click import json import os -import Levenshtein +from rapidfuzz.distance import Levenshtein from ocrd import Processor from ocrd.decorators import ocrd_cli_options from ocrd.decorators import ocrd_cli_wrap_processor From ca08c1af462769df84cf5e83aadf118da0d96865 Mon Sep 17 00:00:00 2001 From: kba Date: Fri, 11 Oct 2024 11:12:20 +0200 Subject: [PATCH 2/2] eval/stats: Levenshtein -> rapidfuzz.distance.Levenshtein --- ocrd_cis/div/eval.py | 2 +- ocrd_cis/div/stats.py | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/ocrd_cis/div/eval.py b/ocrd_cis/div/eval.py index 6efe90c6..f47682ff 100644 --- a/ocrd_cis/div/eval.py +++ b/ocrd_cis/div/eval.py @@ -1,6 +1,6 @@ import os from PIL import Image -from Levenshtein import distance +from rapidfuzz.distance.Levenshtein import distance path = '/mnt/c/Users/chris/Documents/projects/OCR-D/daten/gt/lines/' diff --git a/ocrd_cis/div/stats.py b/ocrd_cis/div/stats.py index ea385d98..6f9c9816 100644 --- a/ocrd_cis/div/stats.py +++ b/ocrd_cis/div/stats.py @@ -4,7 +4,7 @@ from ocrd import Processor from ocrd_cis import get_ocrd_tool from ocrd_models.ocrd_page_generateds import parse -from Levenshtein import distance +from rapidfuzz.distance import Levenshtein class Stats(Processor): @@ -81,7 +81,7 @@ def process(self): # print(line.get_TextEquiv()[2].dataType) unicodeline = line.get_TextEquiv()[i].Unicode - d[i] += distance(gtline, unicodeline) + d[i] += Levenshtein.distance(gtline, unicodeline) # words = line.get_Word() # for word in words: