diff --git a/.github/workflows/license_tests.yml b/.github/workflows/license_tests.yml index 1404a33..9fd433c 100644 --- a/.github/workflows/license_tests.yml +++ b/.github/workflows/license_tests.yml @@ -9,4 +9,4 @@ jobs: license_tests: uses: neongeckocom/.github/.github/workflows/license_tests.yml@master with: - packages-exclude: '^(neon-llm-chatgpt|tqdm).*' \ No newline at end of file + packages-exclude: '^(neon-llm-palm2|tqdm).*' \ No newline at end of file diff --git a/Dockerfile b/Dockerfile index 822d628..7f2f65f 100644 --- a/Dockerfile +++ b/Dockerfile @@ -1,7 +1,7 @@ FROM python:3.9-slim LABEL vendor=neon.ai \ - ai.neon.name="neon-llm-chatgpt" + ai.neon.name="neon-llm-palm2" ENV OVOS_CONFIG_BASE_FOLDER neon ENV OVOS_CONFIG_FILENAME diana.yaml @@ -12,4 +12,4 @@ WORKDIR /app COPY . /app RUN pip install /app -CMD [ "neon-llm-chatgpt" ] \ No newline at end of file +CMD [ "neon-llm-palm2" ] \ No newline at end of file diff --git a/README.md b/README.md index ce20d97..950c434 100644 --- a/README.md +++ b/README.md @@ -1,5 +1,5 @@ -# NeonAI LLM ChatGPT -Proxies API calls to ChatGPT. +# NeonAI LLM Palm2 +Proxies API calls to Google Palm2. ## Request Format API requests should include `history`, a list of tuples of strings, and the current @@ -25,12 +25,11 @@ MQ: port: server: users: - neon_llm_chat_gpt: - password: - user: neon_chatgpt -LLM_CHAT_GPT: - key: "" - model: "gpt-3.5-turbo" + neon_llm_palm2: + password: + user: neon_palm2 +LLM_PALM2: + key_path: "" role: "You are trying to give a short answer in less than 40 words." context_depth: 3 max_tokens: 100 @@ -40,6 +39,6 @@ LLM_CHAT_GPT: For example, if your configuration resides in `~/.config`: ```shell export CONFIG_PATH="/home/${USER}/.config" -docker run -v ${CONFIG_PATH}:/config neon_llm_chatgpt +docker run -v ${CONFIG_PATH}:/config neon_llm_palm2 ``` > Note: If connecting to a local MQ server, you may need to specify `--network host` \ No newline at end of file diff --git a/docker_overlay/etc/neon/diana.yaml b/docker_overlay/etc/neon/diana.yaml index 3c0fa01..739256f 100644 --- a/docker_overlay/etc/neon/diana.yaml +++ b/docker_overlay/etc/neon/diana.yaml @@ -14,8 +14,7 @@ MQ: mq_handler: user: neon_api_utils password: Klatchat2021 -LLM_CHAT_GPT: - model: "gpt-3.5-turbo" +LLM_PALM2: role: "You are trying to give a short answer in less than 40 words." context_depth: 3 max_tokens: 100 diff --git a/neon_llm_chatgpt/default_config.json b/neon_llm_chatgpt/default_config.json deleted file mode 100644 index f8887c7..0000000 --- a/neon_llm_chatgpt/default_config.json +++ /dev/null @@ -1,19 +0,0 @@ -{ - "MQ": { - "server": "localhost", - "port": "5672", - "users": { - "mq-chatgpt-api": { - "user": "", - "password": "" - } - } - }, - "ChatGPT": { - "key": "", - "model": "gpt-3.5-turbo", - "role": "You are trying to give a short answer in less than 40 words.", - "context_depth": 3, - "max_tokens": 100 - } -} \ No newline at end of file diff --git a/neon_llm_chatgpt/__init__.py b/neon_llm_palm2/__init__.py similarity index 100% rename from neon_llm_chatgpt/__init__.py rename to neon_llm_palm2/__init__.py diff --git a/neon_llm_chatgpt/__main__.py b/neon_llm_palm2/__main__.py similarity index 91% rename from neon_llm_chatgpt/__main__.py rename to neon_llm_palm2/__main__.py index e98ff45..d11acd5 100644 --- a/neon_llm_chatgpt/__main__.py +++ b/neon_llm_palm2/__main__.py @@ -24,15 +24,15 @@ # NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS # SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -from neon_llm_chatgpt.rmq import ChatgptMQ +from neon_llm_palm2.rmq import Palm2MQ def main(): # Run RabbitMQ - chatgptMQ = ChatgptMQ() - chatgptMQ.run(run_sync=False, run_consumers=True, + palm2MQ = Palm2MQ() + palm2MQ.run(run_sync=False, run_consumers=True, daemonize_consumers=True) - chatgptMQ.observer_thread.join() + palm2MQ.observer_thread.join() if __name__ == "__main__": diff --git a/neon_llm_chatgpt/chatgpt.py b/neon_llm_palm2/palm2.py similarity index 77% rename from neon_llm_chatgpt/chatgpt.py rename to neon_llm_palm2/palm2.py index fe378bf..846b83c 100644 --- a/neon_llm_chatgpt/chatgpt.py +++ b/neon_llm_palm2/palm2.py @@ -24,27 +24,28 @@ # NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS # SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -import openai -from openai.embeddings_utils import get_embeddings, distances_from_embeddings +import os +from vertexai.language_models import ChatModel, ChatMessage, TextEmbeddingModel +from openai.embeddings_utils import distances_from_embeddings from typing import List, Dict from neon_llm_core.llm import NeonLLM -class ChatGPT(NeonLLM): +class Palm2(NeonLLM): mq_to_llm_role = { "user": "user", - "llm": "assistant" + "llm": "bot" } def __init__(self, config): super().__init__(config) - self.model_name = config["model"] + self._embedding = None self.role = config["role"] self.context_depth = config["context_depth"] self.max_tokens = config["max_tokens"] - self.api_key = config["key"] + self.api_key_path = config["key_path"] self.warmup() @property @@ -56,11 +57,16 @@ def tokenizer_model_name(self) -> str: return "" @property - def model(self) -> openai: + def model(self) -> ChatModel: if self._model is None: - openai.api_key = self.api_key - self._model = openai + self._model = ChatModel.from_pretrained("chat-bison@001") return self._model + + @property + def embedding(self) -> TextEmbeddingModel: + if self._embedding is None: + self._embedding = TextEmbeddingModel.from_pretrained("textembedding-gecko@001") + return self._embedding @property def llm_model_name(self) -> str: @@ -88,20 +94,23 @@ def get_sorted_answer_indexes(self, question: str, answers: List[str], persona: sorted_items_indexes = [x[0] for x in sorted_items] return sorted_items_indexes - def _call_model(self, prompt: List[Dict[str, str]]) -> str: + def _call_model(self, prompt: Dict) -> str: """ - Wrapper for ChatGPT Model generation logic + Wrapper for Palm2 Model generation logic :param prompt: Input messages sequence :returns: Output text sequence generated by model """ - response = openai.ChatCompletion.create( - model=self.llm_model_name, - messages=prompt, + chat = self._model.start_chat( + context=prompt["system_prompt"], + message_history=prompt["chat_history"], + max_output_tokens=self.max_tokens, temperature=0, - max_tokens=self.max_tokens, ) - text = response.choices[0].message['content'] + response = chat.send_message( + prompt["message"], + ) + text = response.text return text @@ -109,22 +118,25 @@ def _assemble_prompt(self, message: str, chat_history: List[List[str]], persona: """ Assembles prompt engineering logic Setup Guidance: - https://platform.openai.com/docs/guides/gpt/chat-completions-api + https://cloud.google.com/vertex-ai/docs/generative-ai/model-reference/overview :param message: Incoming prompt :param chat_history: History of preceding conversation :returns: assembled prompt """ system_prompt = persona.get("description", self._system_prompt) - messages = [ - {"role": "system", "content": system_prompt}, - ] # Context N messages + messages = [] for role, content in chat_history[-self.context_depth:]: - role_chatgpt = self.convert_role(role) - messages.append({"role": role_chatgpt, "content": content}) - messages.append({"role": "user", "content": message}) - return messages + role_palm2 = self.convert_role(role) + messages.append(ChatMessage(content, role_palm2)) + prompt = { + "system_prompt": system_prompt, + "chat_history": messages, + "message": message + } + + return prompt def _score(self, prompt: str, targets: List[str], persona: dict) -> List[float]: """ @@ -150,7 +162,8 @@ def _embeddings(self, question: str, answers: List[str], persona: dict) -> (List """ response = self.ask(question, [], persona=persona) texts = [response] + answers - embeddings = get_embeddings(texts, engine="text-embedding-ada-002") + embeddings_obj = self._embedding.get_embeddings(texts) + embeddings = [embedding.values for embedding in embeddings_obj] question_embeddings = embeddings[0] answers_embeddings = embeddings[1:] return question_embeddings, answers_embeddings \ No newline at end of file diff --git a/neon_llm_chatgpt/rmq.py b/neon_llm_palm2/rmq.py similarity index 91% rename from neon_llm_chatgpt/rmq.py rename to neon_llm_palm2/rmq.py index 3409a95..39a5e91 100644 --- a/neon_llm_chatgpt/rmq.py +++ b/neon_llm_palm2/rmq.py @@ -25,12 +25,12 @@ # SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. from neon_llm_core.rmq import NeonLLMMQConnector -from neon_llm_chatgpt.chatgpt import ChatGPT +from neon_llm_palm2.palm2 import Palm2 -class ChatgptMQ(NeonLLMMQConnector): +class Palm2MQ(NeonLLMMQConnector): """ - Module for processing MQ requests to ChatGPT + Module for processing MQ requests to Palm2 """ def __init__(self): @@ -39,12 +39,12 @@ def __init__(self): @property def name(self): - return "chat_gpt" + return "palm2" @property def model(self): if self._model is None: - self._model = ChatGPT(self.model_config) + self._model = Palm2(self.model_config) return self._model def warmup(self): diff --git a/requirements/requirements.txt b/requirements/requirements.txt index e08541f..e5c40fa 100644 --- a/requirements/requirements.txt +++ b/requirements/requirements.txt @@ -1,4 +1,5 @@ # model +google-cloud-aiplatform openai[embeddings]~=0.27 # networking neon_llm_core~=0.1.0 \ No newline at end of file diff --git a/setup.py b/setup.py index 64747b5..8bd7cd3 100644 --- a/setup.py +++ b/setup.py @@ -67,12 +67,12 @@ def get_requirements(requirements_filename: str): version = line.split("'")[1] setup( - name='neon-llm-chatgpt', + name='neon-llm-palm2', version=version, - description='LLM service for Chat GPT', + description='LLM service for Palm2', long_description=long_description, long_description_content_type="text/markdown", - url='https://github.com/NeonGeckoCom/neon-llm-chatgpt', + url='https://github.com/NeonGeckoCom/neon-llm-palm2', author='Neongecko', author_email='developers@neon.ai', license='BSD-3.0', @@ -85,7 +85,7 @@ def get_requirements(requirements_filename: str): ], entry_points={ 'console_scripts': [ - 'neon-llm-chatgpt=neon_llm_chatgpt.__main__:main' + 'neon-llm-palm2=neon_llm_palm2.__main__:main' ] } )