Skip to content

Commit aa1c3c3

Browse files
steegecsMindFlow
andcommitted
Added try-except blocks to handle tiktoken import and modified token count functions to handle import failure. Also updated tiktoken requirement to only install for python version >= 3.8.
Co-authored-by: MindFlow <mf@mindflo.ai>
1 parent 192721b commit aa1c3c3

File tree

3 files changed

+30
-8
lines changed

3 files changed

+30
-8
lines changed

mindflow/db/objects/model.py

Lines changed: 21 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -4,15 +4,23 @@
44
import numpy as np
55
from traitlets import Callable
66

7-
import tiktoken
7+
try:
8+
import tiktoken
9+
except ImportError:
10+
print(
11+
"tiktoken not not available in python<=v3.8. Estimation of tokens will be less precise, which may impact performance and quality of responses."
12+
)
13+
print("Upgrade to python v3.8 or higher for better results.")
14+
pass
15+
816

917
from mindflow.db.db.database import Collection
1018
from mindflow.db.objects.base import BaseObject
1119
from mindflow.db.objects.base import StaticObject
1220
from mindflow.db.objects.service import ServiceConfig
1321
from mindflow.db.objects.static_definition.model import ModelID
1422
from mindflow.db.objects.static_definition.service import ServiceID
15-
from mindflow.utils.errors import ModelError, EmbeddingModelError
23+
from mindflow.utils.errors import ModelError
1624

1725

1826
class Model(StaticObject):
@@ -47,7 +55,12 @@ class ConfiguredModel(Callable):
4755
name: str
4856
service: str
4957
model_type: str
50-
tokenizer: tiktoken.Encoding
58+
59+
try:
60+
tokenizer: tiktoken.Encoding
61+
except NameError:
62+
pass
63+
5164
hard_token_limit: int
5265
token_cost: int
5366
token_cost_unit: str
@@ -69,8 +82,11 @@ def __init__(self, model_id: str):
6982
if value not in [None, ""]:
7083
setattr(self, key, value)
7184

72-
if self.service == ServiceID.OPENAI.value:
73-
self.tokenizer = tiktoken.encoding_for_model(self.id)
85+
try:
86+
if self.service == ServiceID.OPENAI.value:
87+
self.tokenizer = tiktoken.encoding_for_model(self.id)
88+
except NameError:
89+
pass
7490

7591
service_config = ServiceConfig.load(f"{self.service}_config")
7692
self.api_key = service_config.api_key

mindflow/utils/token.py

Lines changed: 8 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -6,11 +6,17 @@ def get_token_count(model: ConfiguredModel, text: str) -> int:
66
"""
77
This function is used to get the token count of a string.
88
"""
9-
return len(model.tokenizer.encode(text))
9+
try:
10+
return len(model.tokenizer.encode(text))
11+
except Exception:
12+
return len(text) // 3
1013

1114

1215
def get_batch_token_count(model: ConfiguredModel, texts: List[str]) -> int:
1316
"""
1417
This function is used to get the token count of a list of strings.
1518
"""
16-
return sum([len(encoding) for encoding in model.tokenizer.encode_batch(texts)])
19+
try:
20+
return sum([len(encoding) for encoding in model.tokenizer.encode_batch(texts)])
21+
except Exception:
22+
return sum([len(text) // 3 for text in texts])

requirements.txt

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,7 @@ alive-progress
22
click
33
numpy
44
openai==0.27.0
5-
tiktoken==0.3.0
5+
tiktoken==0.3.0; python_version >= '3.8'
66
pytest
77
scikit-learn
88
tqdm

0 commit comments

Comments
 (0)