diff --git a/bpm_ai_inference/translation/easy_nmt/easy_nmt.py b/bpm_ai_inference/translation/easy_nmt/easy_nmt.py index 473955b..808a28d 100644 --- a/bpm_ai_inference/translation/easy_nmt/easy_nmt.py +++ b/bpm_ai_inference/translation/easy_nmt/easy_nmt.py @@ -407,9 +407,9 @@ def sentence_splitting(self, text: str, lang: str = None): sentences = list(re.findall(u'[^!?。\.]+[!?。\.]*', text, flags=re.U)) else: try: - nltk.data.find('tokenizers/punkt') + nltk.data.find('tokenizers/punkt_tab') except LookupError: - nltk.download('punkt') + nltk.download('punkt_tab') sentences = nltk.sent_tokenize(text)