Update 0.3.0

hieuminh65 · Mar 16, 2024 · d7d5a60 · d7d5a60
1 parent 2ac2adb
commit d7d5a60
Show file tree

Hide file tree

Showing 6 changed files with 156 additions and 14 deletions.
diff --git a/CHANGELOG.md b/CHANGELOG.md
@@ -6,7 +6,10 @@ All notable changes to this project will be documented in this file.
 ### Added
 - Refined the README file.
 - Officially released the package.
+- Google AI models
 
+### Fixed
+- fix the llama70B model api name string of groq.
 
 ## [0.2.8] - 2024-03-15
 

diff --git a/README.md b/README.md
@@ -6,6 +6,7 @@ Easy-to-use LLM API from a state-of-the-art provider and comparison.
 - **Comparison**: Compare the cost and performance of different providers and models.
 - **Log**: Log the response and cost of the request in a log file.
 - **Providers**: Support for all of providers both open-source and closed-source.
+- **Result**: See the actual time taken by the request, especially when you dont't trust the benchmark.
 
 ## Installation
 1. Install the package
@@ -34,6 +35,12 @@ MISTRAL_API_KEY=xxx
 ANTHROPIC_API_KEY=xxx
 ```
 
+or set the environment variable directly.
+```bash
+export TOGETHER_API_KEY=xxx
+export OPENAI_API_KEY=xxx
+```
+
 2. Run the code
 ```python
 from api4all import EngineFactory
@@ -68,11 +75,11 @@ Request ID - fa8cebd0-265a-44b2-95d7-6ff1588d2c87
 		I am not able to provide information about the current status of the economy, as I do not have access to real-time information. Therefore, I recommend checking a reliable source for the latest economic news and data.
 
     Cost: $0.0000154    # Cost of this provider for this request
-	Provider: together  # Provider used for this request
-	Execution-time: Execution time not provided by the provider
-	Actual-time: 0.9448428153991699 # Actual time taken by the request
-	Input-token: 33     # Number of tokens used for the input
-	Output-token: 44    # Number of tokens used for the output
+    Provider: together  # Provider used for this request
+    Execution-time: Execution time not provided by the provider
+    Actual-time: 0.9448428153991699 # Actual time taken by the request
+    Input-token: 33     # Number of tokens used for the input
+    Output-token: 44    # Number of tokens used for the output
 ```
 
 ## Providers and Models
@@ -142,5 +149,11 @@ Request ID - fa8cebd0-265a-44b2-95d7-6ff1588d2c87
 |  Claude 2.0  |     $0.8        | $2.4    |  100,000 | "anthropic/claude-instant-1.2" |
 
 
+#### 4. Google
+| Model | Input Pricing ($/1M Tokens) | Output Pricing ($/1M Tokens) | Context Length | API string name |
+|:------:|:------:|:------:|:------:|:------:|
+|  Google Gemini 1.0 Pro  |     $0        | $0    |  32,768 | "google/gemini-1.0-pro" |
+
+
 ## Contributing
 Welcome to contribute to the project. If you see any updated pricing, new models, new providers, or any other changes, feel free to open an issue or a pull request.
diff --git a/api4all/data/constant_data.py b/api4all/data/constant_data.py
@@ -290,7 +290,7 @@
                 }
             },
             "groq": {
-                "name": "LLaMA2-70b-chat",
+                "name": "llama2-70b-4096",
                 "price": {
                     "input": 0,
                     "output": 0
@@ -326,5 +326,17 @@
             }
         },
         "context-length": 4096
-    }
+    },
+    "google/gemini-1.0-pro": {
+        "provider": {
+            "google": {
+                "name": "gemini-1.0-pro",
+                "price": {
+                    "input": 0,
+                    "output": 0
+                }
+            }
+        },
+        "context-length": 32768
+    },
 }
diff --git a/api4all/engines/engines.py b/api4all/engines/engines.py
@@ -14,7 +14,7 @@
 import replicate
 import anthropic
 from mistralai.client import MistralClient
-
+import google.generativeai as genai
 
 __all__ = ["GroqEngine", "AnyscaleEngine", "TogetherEngine", "FireworksEngine", "ReplicateEngine", "DeepinfraEngine", "OpenaiEngine", "AnthropicEngine", "MistralEngine"]
 
@@ -387,7 +387,7 @@ def generate_response(self,
                     "temperature": self.temperature,
                     "max_new_tokens": self.max_tokens,
                     "top_p": self.top_p,
-                    "stop_sequences": self.stop
+                    "stop_sequences": self.stop if self.stop else ""
                 }
             )
         except Exception as e:
@@ -638,9 +638,9 @@ def generate_response(self,
 
         actual_time = time.time() - start_time
 
-        content = completion.choices[0].message.content
-        input_tokens = completion.usage.prompt_tokens
-        output_tokens = completion.usage.completion_tokens
+        content = completion.content[0].text
+        input_tokens = completion.usage.input_tokens
+        output_tokens = completion.usage.output_tokens
         execution_time = None
         cost = dataEngine.calculate_cost(self.provider, self.model, input_tokens, output_tokens)
 
@@ -732,3 +732,115 @@ def generate_response(self,
 
         return response
 
+
+#-----------------------------------------Gooole Gemini AI-----------------------------------------#
+@EngineFactory.register_engine('google')
+class GoogleEngine(TextEngine):
+    def __init__(self,
+                model: str,
+                provider: str = "google",
+                temperature: Optional[float] = ModelConfig.DEFAULT_TEMPERATURE,
+                max_tokens: Optional[int] = ModelConfig.DEFAULT_MAX_TOKENS,
+                top_p: Optional[float] = ModelConfig.DEFAULT_TOP_P,
+                stop: Union[str, List[str], None] = ModelConfig.DEFAULT_STOP,
+                messages: Optional[List[Dict[str, str]]] = ModelConfig.MESSAGES_EXAMPLE
+                ) -> None:
+        super().__init__(model, provider, temperature, max_tokens, top_p, stop, messages)
+
+        self._api_key = self._keys.get_api_keys("GOOGLE_API_KEY")
+        if self._api_key is None:
+            self.logger.error(f"API key not found for {self.provider}")
+            raise ValueError(f"API key not found for {self.provider}")
+
+        # Set up the client
+        self._set_up_client()
+
+        self._api_name = dataEngine.getAPIname(self.model, self.provider)
+
+        self.generation_config = {
+            "temperature": self.temperature,
+            "top_p": self.top_p,
+            "max_output_tokens": self.max_tokens,
+        }
+
+        self.safety_settings = [
+        {
+            "category": "HARM_CATEGORY_HARASSMENT",
+            "threshold": "BLOCK_ONLY_HIGH"
+        },
+        {
+            "category": "HARM_CATEGORY_HATE_SPEECH",
+            "threshold": "BLOCK_ONLY_HIGH"
+        },
+        {
+            "category": "HARM_CATEGORY_SEXUALLY_EXPLICIT",
+            "threshold": "BLOCK_ONLY_HIGH"
+        },
+        {
+            "category": "HARM_CATEGORY_DANGEROUS_CONTENT",
+            "threshold": "BLOCK_ONLY_HIGH"
+        }
+        ]
+
+
+    def _set_up_client(self):
+        genai.configure(api_key=self._api_key)
+
+
+    def construct_prompt_parts(self):
+        prompt_parts = []
+        for message in self.messages:
+            if message["role"] == "system":
+                prompt_parts.append(f"{message['content']}")
+            elif message["role"] == "user":
+                prompt_parts.append(f"Input: {message['content']}")
+            else:
+                prompt_parts.append(f"Output: {message['content']}")
+
+        return prompt_parts
+
+
+    def generate_response(self,
+                        **kwargs: Any
+                        ) -> Union[str, None]:
+        """
+        This method is used to generate a response from the AI model.
+
+        """
+        start_time = time.time()
+
+        model = genai.GenerativeModel(model_name = self._api_name,
+                                    generation_config = self.generation_config,
+                                    safety_settings = self.safety_settings)
+
+        prompt_parts = self.construct_prompt_parts()
+
+        try:
+            completion = model.generate_content(prompt_parts)
+        except Exception as e:
+            print(f"Error generating response: {e}")
+            self.logger.error(f"Error generating response of provider {self.provider}: {e}")
+            return None
+
+        actual_time = time.time() - start_time
+
+        content = completion.text
+        input_tokens = None
+        output_tokens = None
+        execution_time = None
+        cost = 0
+
+        response = TextResponse(
+            content=content,
+            cost=cost,
+            execution_time=execution_time,
+            actual_time=actual_time,
+            input_tokens=input_tokens,
+            output_tokens=output_tokens,
+            provider=self.provider
+        )
+
+        log_response(self.logger, "SUCCESS", response)
+
+        return response
+
diff --git a/pyproject.toml b/pyproject.toml
@@ -12,9 +12,10 @@ dependencies = [
     "python-dotenv == 1.0.1",
     "replicate == 0.24.0",
     "together == 0.2.11",
+    "google-generativeai == 0.4.1",
 ]
 name = "api4all"
-version = "0.2.9"
+version = "0.3.0"
 requires-python = ">=3.8"
 authors = [
   {name = "Hieu Nguyen", email = "hieung.tech@gmail.com"}

diff --git a/requirements.txt b/requirements.txt
@@ -6,4 +6,5 @@ openai==1.13.3
 python-dotenv==1.0.1
 replicate==0.24.0
 requests==2.31.0
-together==0.2.11
+together==0.2.11
+google-generativeai==0.4.1