Added try-except blocks to handle tiktoken import and modified token count functions to handle import failure. Also updated tiktoken requirement to only install for python version >= 3.8.

steegecs · MindFlow · steegecs · commit aa1c3c3392ed · 2023-03-08T17:57:04.000-06:00
Co-authored-by: MindFlow &lt;mf@mindflo.ai&gt;
diff --git a/mindflow/db/objects/model.py b/mindflow/db/objects/model.py
@@ -4,15 +4,23 @@
 import numpy as np
 from traitlets import Callable
 
-import tiktoken
+try:
+    import tiktoken
+except ImportError:
+    print(
+        "tiktoken not not available in python<=v3.8. Estimation of tokens will be less precise, which may impact performance and quality of responses."
+    )
+    print("Upgrade to python v3.8 or higher for better results.")
+    pass
+
 
 from mindflow.db.db.database import Collection
 from mindflow.db.objects.base import BaseObject
 from mindflow.db.objects.base import StaticObject
 from mindflow.db.objects.service import ServiceConfig
 from mindflow.db.objects.static_definition.model import ModelID
 from mindflow.db.objects.static_definition.service import ServiceID
-from mindflow.utils.errors import ModelError, EmbeddingModelError
+from mindflow.utils.errors import ModelError
 
 
 class Model(StaticObject):
@@ -47,7 +55,12 @@ class ConfiguredModel(Callable):
     name: str
     service: str
     model_type: str
-    tokenizer: tiktoken.Encoding
+
+    try:
+        tokenizer: tiktoken.Encoding
+    except NameError:
+        pass
+
     hard_token_limit: int
     token_cost: int
     token_cost_unit: str
@@ -69,8 +82,11 @@ def __init__(self, model_id: str):
                 if value not in [None, ""]:
                     setattr(self, key, value)
 
-        if self.service == ServiceID.OPENAI.value:
-            self.tokenizer = tiktoken.encoding_for_model(self.id)
+        try:
+            if self.service == ServiceID.OPENAI.value:
+                self.tokenizer = tiktoken.encoding_for_model(self.id)
+        except NameError:
+            pass
 
         service_config = ServiceConfig.load(f"{self.service}_config")
         self.api_key = service_config.api_key
diff --git a/mindflow/utils/token.py b/mindflow/utils/token.py
@@ -6,11 +6,17 @@ def get_token_count(model: ConfiguredModel, text: str) -> int:
     """
     This function is used to get the token count of a string.
     """
-    return len(model.tokenizer.encode(text))
+    try:
+        return len(model.tokenizer.encode(text))
+    except Exception:
+        return len(text) // 3
 
 
 def get_batch_token_count(model: ConfiguredModel, texts: List[str]) -> int:
     """
     This function is used to get the token count of a list of strings.
     """
-    return sum([len(encoding) for encoding in model.tokenizer.encode_batch(texts)])
+    try:
+        return sum([len(encoding) for encoding in model.tokenizer.encode_batch(texts)])
+    except Exception:
+        return sum([len(text) // 3 for text in texts])
diff --git a/requirements.txt b/requirements.txt
@@ -2,7 +2,7 @@ alive-progress
 click
 numpy
 openai==0.27.0
-tiktoken==0.3.0
+tiktoken==0.3.0; python_version >= '3.8'
 pytest
 scikit-learn
 tqdm