diff --git a/CHANGELOG.md b/CHANGELOG.md index 82b2044..a401854 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -6,7 +6,10 @@ All notable changes to this project will be documented in this file. ### Added - Refined the README file. - Officially released the package. +- Google AI models +### Fixed +- fix the llama70B model api name string of groq. ## [0.2.8] - 2024-03-15 diff --git a/README.md b/README.md index fc778bc..3f52ec2 100644 --- a/README.md +++ b/README.md @@ -6,6 +6,7 @@ Easy-to-use LLM API from a state-of-the-art provider and comparison. - **Comparison**: Compare the cost and performance of different providers and models. - **Log**: Log the response and cost of the request in a log file. - **Providers**: Support for all of providers both open-source and closed-source. +- **Result**: See the actual time taken by the request, especially when you dont't trust the benchmark. ## Installation 1. Install the package @@ -34,6 +35,12 @@ MISTRAL_API_KEY=xxx ANTHROPIC_API_KEY=xxx ``` +or set the environment variable directly. +```bash +export TOGETHER_API_KEY=xxx +export OPENAI_API_KEY=xxx +``` + 2. Run the code ```python from api4all import EngineFactory @@ -68,11 +75,11 @@ Request ID - fa8cebd0-265a-44b2-95d7-6ff1588d2c87 I am not able to provide information about the current status of the economy, as I do not have access to real-time information. Therefore, I recommend checking a reliable source for the latest economic news and data. Cost: $0.0000154 # Cost of this provider for this request - Provider: together # Provider used for this request - Execution-time: Execution time not provided by the provider - Actual-time: 0.9448428153991699 # Actual time taken by the request - Input-token: 33 # Number of tokens used for the input - Output-token: 44 # Number of tokens used for the output + Provider: together # Provider used for this request + Execution-time: Execution time not provided by the provider + Actual-time: 0.9448428153991699 # Actual time taken by the request + Input-token: 33 # Number of tokens used for the input + Output-token: 44 # Number of tokens used for the output ``` ## Providers and Models @@ -142,5 +149,11 @@ Request ID - fa8cebd0-265a-44b2-95d7-6ff1588d2c87 | Claude 2.0 | $0.8 | $2.4 | 100,000 | "anthropic/claude-instant-1.2" | +#### 4. Google +| Model | Input Pricing ($/1M Tokens) | Output Pricing ($/1M Tokens) | Context Length | API string name | +|:------:|:------:|:------:|:------:|:------:| +| Google Gemini 1.0 Pro | $0 | $0 | 32,768 | "google/gemini-1.0-pro" | + + ## Contributing Welcome to contribute to the project. If you see any updated pricing, new models, new providers, or any other changes, feel free to open an issue or a pull request. \ No newline at end of file diff --git a/api4all/data/constant_data.py b/api4all/data/constant_data.py index e7cb87a..6075943 100644 --- a/api4all/data/constant_data.py +++ b/api4all/data/constant_data.py @@ -290,7 +290,7 @@ } }, "groq": { - "name": "LLaMA2-70b-chat", + "name": "llama2-70b-4096", "price": { "input": 0, "output": 0 @@ -326,5 +326,17 @@ } }, "context-length": 4096 - } + }, + "google/gemini-1.0-pro": { + "provider": { + "google": { + "name": "gemini-1.0-pro", + "price": { + "input": 0, + "output": 0 + } + } + }, + "context-length": 32768 + }, } \ No newline at end of file diff --git a/api4all/engines/engines.py b/api4all/engines/engines.py index cce342f..330ea4a 100644 --- a/api4all/engines/engines.py +++ b/api4all/engines/engines.py @@ -14,7 +14,7 @@ import replicate import anthropic from mistralai.client import MistralClient - +import google.generativeai as genai __all__ = ["GroqEngine", "AnyscaleEngine", "TogetherEngine", "FireworksEngine", "ReplicateEngine", "DeepinfraEngine", "OpenaiEngine", "AnthropicEngine", "MistralEngine"] @@ -387,7 +387,7 @@ def generate_response(self, "temperature": self.temperature, "max_new_tokens": self.max_tokens, "top_p": self.top_p, - "stop_sequences": self.stop + "stop_sequences": self.stop if self.stop else "" } ) except Exception as e: @@ -638,9 +638,9 @@ def generate_response(self, actual_time = time.time() - start_time - content = completion.choices[0].message.content - input_tokens = completion.usage.prompt_tokens - output_tokens = completion.usage.completion_tokens + content = completion.content[0].text + input_tokens = completion.usage.input_tokens + output_tokens = completion.usage.output_tokens execution_time = None cost = dataEngine.calculate_cost(self.provider, self.model, input_tokens, output_tokens) @@ -732,3 +732,115 @@ def generate_response(self, return response + +#-----------------------------------------Gooole Gemini AI-----------------------------------------# +@EngineFactory.register_engine('google') +class GoogleEngine(TextEngine): + def __init__(self, + model: str, + provider: str = "google", + temperature: Optional[float] = ModelConfig.DEFAULT_TEMPERATURE, + max_tokens: Optional[int] = ModelConfig.DEFAULT_MAX_TOKENS, + top_p: Optional[float] = ModelConfig.DEFAULT_TOP_P, + stop: Union[str, List[str], None] = ModelConfig.DEFAULT_STOP, + messages: Optional[List[Dict[str, str]]] = ModelConfig.MESSAGES_EXAMPLE + ) -> None: + super().__init__(model, provider, temperature, max_tokens, top_p, stop, messages) + + self._api_key = self._keys.get_api_keys("GOOGLE_API_KEY") + if self._api_key is None: + self.logger.error(f"API key not found for {self.provider}") + raise ValueError(f"API key not found for {self.provider}") + + # Set up the client + self._set_up_client() + + self._api_name = dataEngine.getAPIname(self.model, self.provider) + + self.generation_config = { + "temperature": self.temperature, + "top_p": self.top_p, + "max_output_tokens": self.max_tokens, + } + + self.safety_settings = [ + { + "category": "HARM_CATEGORY_HARASSMENT", + "threshold": "BLOCK_ONLY_HIGH" + }, + { + "category": "HARM_CATEGORY_HATE_SPEECH", + "threshold": "BLOCK_ONLY_HIGH" + }, + { + "category": "HARM_CATEGORY_SEXUALLY_EXPLICIT", + "threshold": "BLOCK_ONLY_HIGH" + }, + { + "category": "HARM_CATEGORY_DANGEROUS_CONTENT", + "threshold": "BLOCK_ONLY_HIGH" + } + ] + + + def _set_up_client(self): + genai.configure(api_key=self._api_key) + + + def construct_prompt_parts(self): + prompt_parts = [] + for message in self.messages: + if message["role"] == "system": + prompt_parts.append(f"{message['content']}") + elif message["role"] == "user": + prompt_parts.append(f"Input: {message['content']}") + else: + prompt_parts.append(f"Output: {message['content']}") + + return prompt_parts + + + def generate_response(self, + **kwargs: Any + ) -> Union[str, None]: + """ + This method is used to generate a response from the AI model. + + """ + start_time = time.time() + + model = genai.GenerativeModel(model_name = self._api_name, + generation_config = self.generation_config, + safety_settings = self.safety_settings) + + prompt_parts = self.construct_prompt_parts() + + try: + completion = model.generate_content(prompt_parts) + except Exception as e: + print(f"Error generating response: {e}") + self.logger.error(f"Error generating response of provider {self.provider}: {e}") + return None + + actual_time = time.time() - start_time + + content = completion.text + input_tokens = None + output_tokens = None + execution_time = None + cost = 0 + + response = TextResponse( + content=content, + cost=cost, + execution_time=execution_time, + actual_time=actual_time, + input_tokens=input_tokens, + output_tokens=output_tokens, + provider=self.provider + ) + + log_response(self.logger, "SUCCESS", response) + + return response + diff --git a/pyproject.toml b/pyproject.toml index 9d70d96..720f10f 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -12,9 +12,10 @@ dependencies = [ "python-dotenv == 1.0.1", "replicate == 0.24.0", "together == 0.2.11", + "google-generativeai == 0.4.1", ] name = "api4all" -version = "0.2.9" +version = "0.3.0" requires-python = ">=3.8" authors = [ {name = "Hieu Nguyen", email = "hieung.tech@gmail.com"} diff --git a/requirements.txt b/requirements.txt index 215627e..0138b2a 100644 --- a/requirements.txt +++ b/requirements.txt @@ -6,4 +6,5 @@ openai==1.13.3 python-dotenv==1.0.1 replicate==0.24.0 requests==2.31.0 -together==0.2.11 \ No newline at end of file +together==0.2.11 +google-generativeai==0.4.1 \ No newline at end of file