From 5754deaaca44eba2a52fa9c60f036ab95db8781e Mon Sep 17 00:00:00 2001 From: UltralyticsAssistant Date: Thu, 5 Sep 2024 16:19:16 +0000 Subject: [PATCH] Auto-format by https://ultralytics.com/actions --- clip/simple_tokenizer.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/clip/simple_tokenizer.py b/clip/simple_tokenizer.py index 5c8387474..62304dc2b 100644 --- a/clip/simple_tokenizer.py +++ b/clip/simple_tokenizer.py @@ -7,13 +7,13 @@ import regex as re -@lru_cache() +@lru_cache def default_bpe(): """Returns the file path to the default BPE vocabulary file 'bpe_simple_vocab_16e6.txt.gz'.""" return os.path.join(os.path.dirname(os.path.abspath(__file__)), "bpe_simple_vocab_16e6.txt.gz") -@lru_cache() +@lru_cache def bytes_to_unicode(): """ Returns list of utf-8 byte and a corresponding list of unicode strings. @@ -65,7 +65,7 @@ def whitespace_clean(text): return text -class SimpleTokenizer(object): +class SimpleTokenizer: """Tokenizes text using byte pair encoding (BPE) and predefined tokenization rules for efficient text processing.""" def __init__(self, bpe_path: str = default_bpe()):