Skip to content

Commit

Permalink
Feat/langfuse (#55)
Browse files Browse the repository at this point in the history
* Remove GPT call method.

* Remove GPT call method.

* Add sections in prompt.

* Add universal gpt call method and implement in get_response method.

* Remove OpenAI client, using it from langfuse.

* Switch to gpt-4o for chat.

* Switch to langfuse openai client for tracking, add embedding generation to track.

* Remove OpenAI client, using it from langfuse.
  • Loading branch information
milistu authored Jun 4, 2024
1 parent baa449d commit cd4f9c2
Show file tree
Hide file tree
Showing 7 changed files with 84 additions and 98 deletions.
3 changes: 1 addition & 2 deletions app.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,7 @@
st.divider()

# Initialize API clients for OpenAI and Qdrant and load configuration settings.
openai_client, qdrant_client = initialize_clients()
qdrant_client = initialize_clients()
config = load_config()

# Display the logo and set up the sidebar with useful information and links.
Expand Down Expand Up @@ -63,7 +63,6 @@
# Generate a response using the LLM and display it as a stream.
stream = generate_response(
query=prompt,
openai_client=openai_client,
qdrant_client=qdrant_client,
config=config,
)
Expand Down
2 changes: 1 addition & 1 deletion config.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@ openai:
model: "text-embedding-3-small"
dimensions: 1536
chat:
model: "gpt-4-turbo-preview"
model: "gpt-4o"
temperature: 0
max_conversation: 100
router:
Expand Down
10 changes: 5 additions & 5 deletions database/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,8 +5,9 @@

import numpy as np
import tiktoken
from langfuse.decorators import observe
from langfuse.openai import openai
from loguru import logger
from openai import OpenAI
from openai.types import CreateEmbeddingResponse
from qdrant_client import QdrantClient
from qdrant_client.http.models import (
Expand Down Expand Up @@ -86,13 +87,12 @@ def search(
)


def embed_text(
client: OpenAI, text: Union[str, list], model: str
) -> CreateEmbeddingResponse:
@observe()
def embed_text(text: Union[str, list], model: str) -> CreateEmbeddingResponse:
"""
Create embeddings using OpenAI API.
"""
response = client.embeddings.create(input=text, model=model)
response = openai.embeddings.create(input=text, model=model)
return response


Expand Down
34 changes: 3 additions & 31 deletions llm/utils.py
Original file line number Diff line number Diff line change
@@ -1,9 +1,5 @@
from typing import Dict, List

from langfuse.decorators import observe
from openai import OpenAI
from openai.types.chat import ChatCompletion

from llm.prompts import (
CONTEXT_PROMPT,
CONVERSATION_PROMPT,
Expand All @@ -12,33 +8,9 @@
)


@observe()
def get_answer(
client: OpenAI,
model: str,
temperature: float,
messages: List[Dict],
stream: bool = False,
) -> ChatCompletion:
"""
Get an answer from the OpenAI chat model.
Args:
client (OpenAI): The OpenAI client instance.
model (str): The model name to use.
temperature (float): The temperature setting for the model.
messages (List[Dict]): The list of messages to send to the model.
stream (bool, optional): Whether to stream the response. Defaults to False.
Returns:
ChatCompletion: The chat completion response from OpenAI.
"""
return client.chat.completions.create(
model=model, temperature=temperature, messages=messages, stream=stream
)


def get_messages(context: str, query: str, conversation: List[str]) -> List[Dict]:
def formate_messages_chat(
context: str, query: str, conversation: List[str]
) -> List[Dict]:
"""
Prepare the list of messages for the chat model.
Expand Down
39 changes: 11 additions & 28 deletions router/query_router.py
Original file line number Diff line number Diff line change
@@ -1,38 +1,21 @@
import json
from typing import List
from typing import Dict, List

from langfuse.decorators import observe
from openai import OpenAI
from router.router_prompt import ROUTER_PROMPT, USER_QUERY


@observe()
def semantic_query_router(
client: OpenAI,
def formate_messages_router(
query: str,
prompt: str,
temperature: float,
model: str = "gpt-3.5-turbo",
) -> List[str]:
) -> List[Dict]:
"""
Routes a semantic query to the appropriate collections using OpenAI's API.
Prepare the list of messages for the llm model.
Args:
client (OpenAI): The OpenAI client instance.
query (str): The query string to be routed.
prompt (str): The prompt template to be used for the query.
temperature (float): The temperature setting for the model's response.
model (str, optional): The model to be used. Defaults to "gpt-3.5-turbo".
query (str): The user's query.
Returns:
List[str]: A list of collections that are relevant to the query.
List[Dict]: The list of messages formatted for the llm model.
"""
# Create the completion request to the OpenAI API
response = client.chat.completions.create(
model=model,
response_format={"type": "json_object"},
messages=[{"role": "system", "content": prompt.format(query=query)}],
temperature=temperature,
)
# Parse the response to extract the collections
collections = json.loads(response.choices[0].message.content)["response"]
return collections
return [
{"role": "system", "content": ROUTER_PROMPT},
{"role": "user", "content": USER_QUERY.format(query=query)},
]
5 changes: 4 additions & 1 deletion router/router_prompt.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
ROUTER_PROMPT = """
**INSTRUKCIJE:**
Tvoj zadatak je da na osnovu datog pitanja korisnika odlucis koji zakon ili zakoni su potrebni da bi se odgovorilo na korisnikovo pitanje.
Ponudjeni zakoni i njihova objasnjenja su sledeci:
- zakon_o_radu
Expand All @@ -20,12 +21,14 @@
- Jedno pitanje korisnika moze da se odnosi na vise zakona.
- Vrati zakone koji mogu da pomognu prilikom generisanja odgovora.
- Ukoliko korisnikovo pitanje ne odgovara ni jednom zakonu vrati listu sa generickim stringom: ["nema_zakona"].
- Primer JSON odgovora:
**PRIMER ODGOVORA:**
{{
response: ["ime_zakona"]
}}
"""

USER_QUERY = """
**PITANJE KORISINKA:**
{query}
"""
Expand Down
89 changes: 59 additions & 30 deletions utils.py
Original file line number Diff line number Diff line change
@@ -1,19 +1,21 @@
import json
import os
from typing import Generator, List, Tuple
from typing import Dict, Generator, List

import streamlit as st
import yaml
from langfuse.decorators import observe
from langfuse.decorators import langfuse_context, observe
from langfuse.openai import openai
from loguru import logger
from openai import OpenAI
from openai.types.chat import ChatCompletion
from pydantic import BaseModel
from qdrant_client import QdrantClient

from database.utils import embed_text, get_context, search
from llm.prompts import DEFAULT_CONTEXT
from llm.utils import get_answer, get_messages
from router.query_router import semantic_query_router
from router.router_prompt import DEFAULT_ROUTER_RESPONSE, ROUTER_PROMPT
from llm.utils import formate_messages_chat
from router.query_router import formate_messages_router
from router.router_prompt import DEFAULT_ROUTER_RESPONSE

LOGO_URL = "assets/Legabot-Logomark.svg"
LOGO_TEXT_URL = "assets/Legabot-Light-Horizontal.svg"
Expand Down Expand Up @@ -71,7 +73,7 @@ def load_config(yaml_file_path: str = "./config.yaml") -> Config:


@st.cache_resource
def initialize_clients() -> Tuple[OpenAI, QdrantClient]:
def initialize_clients() -> QdrantClient:
"""
Initializes and returns the clients for OpenAI and Qdrant services.
Expand All @@ -87,27 +89,51 @@ def initialize_clients() -> Tuple[OpenAI, QdrantClient]:
qdrant_api_key = os.environ["QDRANT_API_KEY"]
qdrant_client = QdrantClient(url=qdrant_url, api_key=qdrant_api_key)

# Retrieve OpenAI client configuration from environment variables
openai_api_key = os.environ["OPENAI_API_KEY"]
openai_client = OpenAI(api_key=openai_api_key)

return openai_client, qdrant_client
return qdrant_client
except KeyError as e:
error_msg = f"Missing environment variable: {str(e)}"
logger.error(error_msg)
raise EnvironmentError(error_msg)


@observe(as_type="generation")
def call_llm(
model: str,
temperature: float,
messages: List[Dict],
json_response: bool = False,
stream: bool = False,
) -> ChatCompletion:
"""
Get an answer from the OpenAI chat model.
Args:
model (str): The model name to use.
temperature (float): The temperature setting for the model.
messages (List[Dict]): The list of messages to send to the model.
stream (bool, optional): Whether to stream the response. Defaults to False.
Returns:
ChatCompletion: The chat completion response from OpenAI.
"""
return openai.chat.completions.create(
model=model,
response_format={"type": "json_object"} if json_response else None,
temperature=temperature,
messages=messages,
stream=stream,
)


@observe()
def generate_response(
query: str, openai_client: OpenAI, qdrant_client: QdrantClient, config: Config
query: str, qdrant_client: QdrantClient, config: Config
) -> Generator[str, None, None]:
"""
Generates a response for a given user query using a combination of semantic search and a chat model.
Args:
- query (str): The user's query string.
- openai_client (OpenAI): Client to interact with OpenAI's API.
- qdrant_client (QdrantClient): Client to interact with Qdrant's API.
- config (Config): Configuration settings for API interaction and response handling.
Expand All @@ -120,35 +146,36 @@ def generate_response(
-config.openai.chat.max_conversation :
]

# Determine the relevant collections to route the query to
messages = formate_messages_router(query)
response = call_llm(
model=config.openai.router.model,
temperature=config.openai.router.temperature,
messages=messages,
json_response=True,
)
collections = json.loads(response.choices[0].message.content)["response"]
logger.info(f"Query routed to collections: {collections}")
langfuse_context.update_current_trace(tags=collections)

# Embed the user query using the specified model in the configuration
embedding_response = embed_text(
client=openai_client,
text=query,
model=config.openai.embeddings.model,
)
embedding = embedding_response.data[0].embedding

# Determine the relevant collections to route the query to
collections = semantic_query_router(
client=openai_client,
model=config.openai.router.model,
query=query,
prompt=ROUTER_PROMPT,
temperature=config.openai.router.temperature,
)
logger.info(f"Query routed to collections: {collections}")

# Determine the context for the chat model based on the routed collections
context = determine_context(collections, embedding, qdrant_client)

# Generate the response stream from the chat model
stream = get_answer(
client=openai_client,
messages = formate_messages_chat(
context=context, query=query, conversation=st.session_state.messages
)
stream = call_llm(
model=config.openai.chat.model,
temperature=config.openai.chat.temperature,
messages=get_messages(
context=context, query=query, conversation=st.session_state.messages
),
messages=messages,
stream=True,
)

Expand All @@ -158,6 +185,8 @@ def generate_response(
if part is not None:
yield part

langfuse_context.flush()

except Exception as e:
logger.error(f"An error occurred while generating the response: {str(e)}")
yield "Sorry, an error occurred while processing your request."
Expand Down

0 comments on commit cd4f9c2

Please sign in to comment.