Skip to content

Commit

Permalink
Migration Completion api to chat completion api (#419)
Browse files Browse the repository at this point in the history
Port completion api to chatCompletion api
  • Loading branch information
srbalakr authored Jul 17, 2023
1 parent 1a8a1ce commit 6bfb2cc
Show file tree
Hide file tree
Showing 5 changed files with 172 additions and 115 deletions.
4 changes: 2 additions & 2 deletions app/backend/app.py
Original file line number Diff line number Diff line change
Expand Up @@ -56,13 +56,13 @@
# Various approaches to integrate GPT and external knowledge, most applications will use a single one of these patterns
# or some derivative, here we include several for exploration purposes
ask_approaches = {
"rtr": RetrieveThenReadApproach(search_client, AZURE_OPENAI_GPT_DEPLOYMENT, KB_FIELDS_SOURCEPAGE, KB_FIELDS_CONTENT),
"rtr": RetrieveThenReadApproach(search_client, AZURE_OPENAI_CHATGPT_DEPLOYMENT, AZURE_OPENAI_CHATGPT_MODEL, KB_FIELDS_SOURCEPAGE, KB_FIELDS_CONTENT),
"rrr": ReadRetrieveReadApproach(search_client, AZURE_OPENAI_GPT_DEPLOYMENT, KB_FIELDS_SOURCEPAGE, KB_FIELDS_CONTENT),
"rda": ReadDecomposeAsk(search_client, AZURE_OPENAI_GPT_DEPLOYMENT, KB_FIELDS_SOURCEPAGE, KB_FIELDS_CONTENT)
}

chat_approaches = {
"rrr": ChatReadRetrieveReadApproach(search_client, AZURE_OPENAI_CHATGPT_DEPLOYMENT, AZURE_OPENAI_CHATGPT_MODEL, AZURE_OPENAI_GPT_DEPLOYMENT, KB_FIELDS_SOURCEPAGE, KB_FIELDS_CONTENT)
"rrr": ChatReadRetrieveReadApproach(search_client, AZURE_OPENAI_CHATGPT_DEPLOYMENT, AZURE_OPENAI_CHATGPT_MODEL, KB_FIELDS_SOURCEPAGE, KB_FIELDS_CONTENT)
}

app = Flask(__name__)
Expand Down
156 changes: 68 additions & 88 deletions app/backend/approaches/chatreadretrieveread.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,9 @@
from approaches.approach import Approach
from text import nonewlines

from core.messagebuilder import MessageBuilder
from core.modelhelper import get_token_limit

class ChatReadRetrieveReadApproach(Approach):
# Chat roles
SYSTEM = "system"
Expand Down Expand Up @@ -34,41 +37,53 @@ class ChatReadRetrieveReadApproach(Approach):
Generate a search query based on the conversation and the new question.
Do not include cited source filenames and document names e.g info.txt or doc.pdf in the search query terms.
Do not include any text inside [] or <<>> in the search query terms.
Do not include any special characters like '+'.
If the question is not in English, translate the question to English before generating the search query.
Chat History:
{chat_history}
Question:
{question}
Search query:
Search Query:
"""

def __init__(self, search_client: SearchClient, chatgpt_deployment: str, chatgpt_model: str, gpt_deployment: str, sourcepage_field: str, content_field: str):
query_prompt_few_shots = [
{'role' : USER, 'content' : 'What are my health plans?' },
{'role' : ASSISTANT, 'content' : 'Show available health plans' },
{'role' : USER, 'content' : 'does my plan cover cardio?' },
{'role' : ASSISTANT, 'content' : 'Health plan cardio coverage' }
]

def __init__(self, search_client: SearchClient, chatgpt_deployment: str, chatgpt_model: str, sourcepage_field: str, content_field: str):
self.search_client = search_client
self.chatgpt_deployment = chatgpt_deployment
self.chatgpt_model = chatgpt_model
self.gpt_deployment = gpt_deployment
self.sourcepage_field = sourcepage_field
self.content_field = content_field
self.chatgpt_token_limit = get_token_limit(chatgpt_model)

def run(self, history: Sequence[dict[str, str]], overrides: dict[str, Any]) -> Any:
use_semantic_captions = True if overrides.get("semantic_captions") else False
top = overrides.get("top") or 3
exclude_category = overrides.get("exclude_category") or None
filter = "category ne '{}'".format(exclude_category.replace("'", "''")) if exclude_category else None

user_q = 'Generate search query for: ' + history[-1]["user"]

# STEP 1: Generate an optimized keyword search query based on the chat history and the last question
prompt = self.query_prompt_template.format(chat_history=self.get_chat_history_as_text(history, include_last_turn=False), question=history[-1]["user"])
completion = openai.Completion.create(
engine=self.gpt_deployment,
prompt=prompt,
messages = self.get_messages_from_history(
self.query_prompt_template,
self.chatgpt_model,
history,
user_q,
self.query_prompt_few_shots,
self.chatgpt_token_limit - len(user_q)
)

chat_completion = openai.ChatCompletion.create(
deployment_id=self.chatgpt_deployment,
model=self.chatgpt_model,
messages=messages,
temperature=0.0,
max_tokens=32,
n=1,
stop=["\n"])
q = completion.choices[0].text
n=1)

q = chat_completion.choices[0].message.content

# STEP 2: Retrieve relevant documents from the search index with the GPT optimized query
if overrides.get("semantic_ranker"):
Expand All @@ -90,94 +105,59 @@ def run(self, history: Sequence[dict[str, str]], overrides: dict[str, Any]) -> A

follow_up_questions_prompt = self.follow_up_questions_prompt_content if overrides.get("suggest_followup_questions") else ""

# STEP 3: Generate a contextual and content specific answer using the search results and chat history

# Allow client to replace the entire prompt, or to inject into the exiting prompt using >>>
prompt_override = overrides.get("prompt_template")
messages = self.get_messages_from_history(prompt_override=prompt_override, follow_up_questions_prompt=follow_up_questions_prompt,history=history, sources=content)
prompt_override = overrides.get("prompt_override")
if prompt_override is None:
system_message = self.system_message_chat_conversation.format(injected_prompt="", follow_up_questions_prompt=follow_up_questions_prompt)
elif prompt_override.startswith(">>>"):
system_message = self.system_message_chat_conversation.format(injected_prompt=prompt_override[3:] + "\n", follow_up_questions_prompt=follow_up_questions_prompt)
else:
system_message = prompt_override.format(follow_up_questions_prompt=follow_up_questions_prompt)

# latest conversation
user_content = history[-1]["user"] + " \nSources:" + content

messages = self.get_messages_from_history(
system_message,
self.chatgpt_model,
history,
user_content,
max_tokens=self.chatgpt_token_limit)

# STEP 3: Generate a contextual and content specific answer using the search results and chat history
chat_completion = openai.ChatCompletion.create(
deployment_id=self.chatgpt_deployment,
model=self.chatgpt_model,
messages=messages,
temperature=overrides.get("temperature") or 0.7,
max_tokens=1024,
n=1)

chat_content = chat_completion.choices[0].message.content

msg_to_display = '\n\n'.join([str(message) for message in messages])

return {"data_points": results, "answer": chat_content, "thoughts": f"Searched for:<br>{q}<br><br>Conversations:<br>" + msg_to_display.replace('\n', '<br>')}

def get_chat_history_as_text(self, history: Sequence[dict[str, str]], include_last_turn: bool=True, approx_max_tokens: int=1000) -> str:
history_text = ""
for h in reversed(history if include_last_turn else history[:-1]):
history_text = """<|im_start|>user""" + "\n" + h["user"] + "\n" + """<|im_end|>""" + "\n" + """<|im_start|>assistant""" + "\n" + (h.get("bot", "") + """<|im_end|>""" if h.get("bot") else "") + "\n" + history_text
if len(history_text) > approx_max_tokens*4:
break
return history_text

def get_messages_from_history(self, prompt_override, follow_up_questions_prompt, history: Sequence[dict[str, str]], sources: str, approx_max_tokens: int = 1000) -> []:
'''
Generate messages needed for chat Completion api
'''
messages = []
token_count = 0
if prompt_override is None:
system_message = self.system_message_chat_conversation.format(injected_prompt="", follow_up_questions_prompt=follow_up_questions_prompt)
elif prompt_override.startswith(">>>"):
system_message = self.system_message_chat_conversation.format(injected_prompt=prompt_override[3:] + "\n", follow_up_questions_prompt=follow_up_questions_prompt)
else:
system_message = prompt_override.format(follow_up_questions_prompt=follow_up_questions_prompt)
def get_messages_from_history(self, system_prompt: str, model_id: str, history: Sequence[dict[str, str]], user_conv: str, few_shots = [], max_tokens: int = 4096) -> []:
message_builder = MessageBuilder(system_prompt, model_id)

# Add examples to show the chat what responses we want. It will try to mimic any responses and make sure they match the rules laid out in the system message.
for shot in few_shots:
message_builder.append_message(shot.get('role'), shot.get('content'))

user_content = user_conv
append_index = len(few_shots) + 1

message_builder.append_message(self.USER, user_content, index=append_index)

messages.append({"role":self.SYSTEM, "content": system_message})
token_count += self.num_tokens_from_messages(messages[-1], self.chatgpt_model)

# latest conversation
user_content = history[-1]["user"] + " \nSources:" + sources
messages.append({"role": self.USER, "content": user_content})
token_count += token_count + self.num_tokens_from_messages(messages[-1], self.chatgpt_model)

'''
Enqueue in reverse order
if limit exceeds truncate old messages
leaving system message behind
Keep track of token count for each conversation
If token count exceeds limit, break
'''
for h in reversed(history[:-1]):
if h.get("bot"):
messages.insert(1, {"role": self.ASSISTANT, "content" : h.get("bot")})
token_count += self.num_tokens_from_messages(messages[1], self.chatgpt_model)
messages.insert(1, {"role": self.USER, "content" : h.get("user")})
token_count += self.num_tokens_from_messages(messages[1], self.chatgpt_model)
if token_count > approx_max_tokens*4:
message_builder.append_message(self.ASSISTANT, h.get('bot'), index=append_index)
message_builder.append_message(self.USER, h.get('user'), index=append_index)
if message_builder.token_length > max_tokens:
break
return messages

def num_tokens_from_messages(self, message: dict[str,str], model: str) -> int:
"""
Calculate the number of tokens required to encode a message.
Args:
message (dict): The message to encode, represented as a dictionary.
model (str): The name of the model to use for encoding.
Returns:
int: The total number of tokens required to encode the message.
Example:
message = {'role': 'user', 'content': 'Hello, how are you?'}
model = 'gpt-3.5-turbo'
num_tokens_from_messages(message, model)
output: 11
"""
encoding = tiktoken.encoding_for_model(self.get_oai_chatmodel_tiktok(model))
num_tokens = 0
num_tokens += 2 # For "role" and "content" keys
for key, value in message.items():
num_tokens += len(encoding.encode(value))
return num_tokens

def get_oai_chatmodel_tiktok(self, aoaimodel: str):
if aoaimodel == "" or aoaimodel is None:
raise Exception("Expected AOAI chatGPT model name")

return "gpt-3.5-turbo" if aoaimodel == "gpt-35-turbo" else aoaimodel
messages = message_builder.messages
return messages
53 changes: 28 additions & 25 deletions app/backend/approaches/retrievethenread.py
Original file line number Diff line number Diff line change
@@ -1,10 +1,12 @@
import openai

from approaches.approach import Approach
from azure.search.documents import SearchClient
from azure.search.documents.models import QueryType
from text import nonewlines
from typing import Any

from core.messagebuilder import MessageBuilder

class RetrieveThenReadApproach(Approach):
"""
Expand All @@ -13,39 +15,30 @@ class RetrieveThenReadApproach(Approach):
(answer) with that prompt.
"""

template = \
system_chat_template = \
"You are an intelligent assistant helping Contoso Inc employees with their healthcare plan questions and employee handbook questions. " + \
"Use 'you' to refer to the individual asking the questions even if they ask with 'I'. " + \
"Answer the following question using only the data provided in the sources below. " + \
"For tabular information return it as an html table. Do not return markdown format. " + \
"Each source has a name followed by colon and the actual information, always include the source name for each fact you use in the response. " + \
"If you cannot answer using the sources below, say you don't know. " + \
"""
"If you cannot answer using the sources below, say you don't know. Use below example to answer"

###
Question: 'What is the deductible for the employee plan for a visit to Overlake in Bellevue?'
#shots/sample conversation
question = """
'What is the deductible for the employee plan for a visit to Overlake in Bellevue?'
Sources:
info1.txt: deductibles depend on whether you are in-network or out-of-network. In-network deductibles are $500 for employee and $1000 for family. Out-of-network deductibles are $1000 for employee and $2000 for family.
info2.pdf: Overlake is in-network for the employee plan.
info3.pdf: Overlake is the name of the area that includes a park and ride near Bellevue.
info4.pdf: In-network institutions include Overlake, Swedish and others in the region
Answer:
In-network deductibles are $500 for employee and $1000 for family [info1.txt] and Overlake is in-network for the employee plan [info2.pdf][info4.pdf].
###
Question: '{q}'?
Sources:
{retrieved}
Answer:
"""
answer = "In-network deductibles are $500 for employee and $1000 for family [info1.txt] and Overlake is in-network for the employee plan [info2.pdf][info4.pdf]."

def __init__(self, search_client: SearchClient, openai_deployment: str, sourcepage_field: str, content_field: str):
def __init__(self, search_client: SearchClient, openai_deployment: str, chatgpt_model: str, sourcepage_field: str, content_field: str):
self.search_client = search_client
self.openai_deployment = openai_deployment
self.chatgpt_model = chatgpt_model
self.sourcepage_field = sourcepage_field
self.content_field = content_field

Expand All @@ -72,13 +65,23 @@ def run(self, q: str, overrides: dict[str, Any]) -> Any:
results = [doc[self.sourcepage_field] + ": " + nonewlines(doc[self.content_field]) for doc in r]
content = "\n".join(results)

prompt = (overrides.get("prompt_template") or self.template).format(q=q, retrieved=content)
completion = openai.Completion.create(
engine=self.openai_deployment,
prompt=prompt,
message_builder = MessageBuilder(overrides.get("prompt_template") or self.system_chat_template, self.chatgpt_model);

# add user question
user_content = q + "\n" + "Sources:\n {content}".format(content=content)
message_builder.append_message('user', user_content)

# Add shots/samples. This helps model to mimic response and make sure they match rules laid out in system message.
message_builder.append_message('assistant', self.answer)
message_builder.append_message('user', self.question)

messages = message_builder.messages
chat_completion = openai.ChatCompletion.create(
deployment_id=self.openai_deployment,
model=self.chatgpt_model,
messages=messages,
temperature=overrides.get("temperature") or 0.3,
max_tokens=1024,
n=1,
stop=["\n"])

return {"data_points": results, "answer": completion.choices[0].text, "thoughts": f"Question:<br>{q}<br><br>Prompt:<br>" + prompt.replace('\n', '<br>')}
n=1)

return {"data_points": results, "answer": chat_completion.choices[0].message.content, "thoughts": f"Question:<br>{q}<br><br>Prompt:<br>" + '\n\n'.join([str(message) for message in messages])}
25 changes: 25 additions & 0 deletions app/backend/core/messagebuilder.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,25 @@
from core.modelhelper import num_tokens_from_messages


class MessageBuilder:
"""
A class for building and managing messages in a chat conversation.
Attributes:
message (list): A list of dictionaries representing chat messages.
model (str): The name of the ChatGPT model.
token_count (int): The total number of tokens in the conversation.
Methods:
__init__(self, system_content: str, chatgpt_model: str): Initializes the MessageBuilder instance.
append_message(self, role: str, content: str, index: int = 1): Appends a new message to the conversation.
"""

def __init__(self, system_content: str, chatgpt_model: str):
self.messages = [{'role': 'system', 'content': system_content}]
self.model = chatgpt_model
self.token_length = num_tokens_from_messages(
self.messages[-1], self.model)

def append_message(self, role: str, content: str, index: int = 1):
self.messages.insert(index, {'role': role, 'content': content})
self.token_length += num_tokens_from_messages(
self.messages[index], self.model)
49 changes: 49 additions & 0 deletions app/backend/core/modelhelper.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,49 @@
import tiktoken

MODELS_2_TOKEN_LIMITS = {
"gpt-35-turbo": 4000,
"gpt-3.5-turbo": 4000,
"gpt-35-turbo-16k": 16000,
"gpt-3.5-turbo-16k": 16000,
"gpt-4": 8100,
"gpt-4-32k": 32000
}

AOAI_2_OAI = {
"gpt-35-turbo": "gpt-3.5-turbo",
"gpt-35-turbo-16k": "gpt-3.5-turbo-16k"
}


def get_token_limit(model_id: str) -> int:
if model_id not in MODELS_2_TOKEN_LIMITS:
raise ValueError("Expected Model Gpt-35-turbo and above")
return MODELS_2_TOKEN_LIMITS.get(model_id)


def num_tokens_from_messages(message: dict[str, str], model: str) -> int:
"""
Calculate the number of tokens required to encode a message.
Args:
message (dict): The message to encode, represented as a dictionary.
model (str): The name of the model to use for encoding.
Returns:
int: The total number of tokens required to encode the message.
Example:
message = {'role': 'user', 'content': 'Hello, how are you?'}
model = 'gpt-3.5-turbo'
num_tokens_from_messages(message, model)
output: 11
"""
encoding = tiktoken.encoding_for_model(get_oai_chatmodel_tiktok(model))
num_tokens = 2 # For "role" and "content" keys
for key, value in message.items():
num_tokens += len(encoding.encode(value))
return num_tokens


def get_oai_chatmodel_tiktok(aoaimodel: str) -> str:
if aoaimodel == "" or aoaimodel is None:
raise ValueError("Expected AOAI chatGPT model name")

return AOAI_2_OAI.get(aoaimodel)

0 comments on commit 6bfb2cc

Please sign in to comment.