Skip to content

Commit

Permalink
fix errors and add option to use azure openai with key
Browse files Browse the repository at this point in the history
  • Loading branch information
dayesouza committed Jul 8, 2024
1 parent bfa879d commit a6436fb
Show file tree
Hide file tree
Showing 11 changed files with 83 additions and 29 deletions.
14 changes: 11 additions & 3 deletions app/pages/Settings.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@
from util.openai_wrapper import (
UIOpenAIConfiguration,
key,
openai_azure_auth_type,
openai_azure_model_key,
openai_endpoint_key,
openai_type_key,
Expand All @@ -33,7 +34,6 @@ def main():
st.set_page_config(layout="wide", initial_sidebar_state="collapsed", page_icon="app/myapp.ico", page_title='Intelligence Toolkit | Settings')
load_multipage_app()
openai_config = UIOpenAIConfiguration().get_configuration()
print('openai_config', openai_config.api_type)
st.header("Settings")
secrets_handler = SecretsHandler()

Expand All @@ -51,6 +51,14 @@ def main():
st.rerun()

if type_input == "Azure OpenAI":
types_az = ["Managed Identity", "Azure Key"]
index_az = types_az.index(openai_config.az_auth_type) if openai_config.az_auth_type in types_az else 0
type_input_az = st.radio("Azure OpenAI Auth Type", types_az, index=index_az, disabled=is_mode_cloud)
if type_input_az != openai_config.az_auth_type:
print('type_input_az', type_input_az)
print('openai_config.az_auth_type', openai_config.az_auth_type)
on_change(secrets_handler, openai_azure_auth_type, type_input_az)()
st.rerun()
col1, col2, col3 = st.columns(3)
with col1:
endpoint = st.text_input("Azure OpenAI Endpoint", disabled=is_mode_cloud, type="password", value=openai_config.api_base)
Expand All @@ -69,9 +77,9 @@ def main():
if version != openai_config.api_version:
on_change(secrets_handler, openai_version_key, version)()
st.rerun()
else:
if type_input == "OpenAI" or type_input_az != "Managed Identity":
placeholder = "Enter key here..."
secret_input = st.text_input('Enter your OpenAI key', type="password", disabled=is_mode_cloud, placeholder=placeholder, value=secret)
secret_input = st.text_input('Enter your key', type="password", disabled=is_mode_cloud, placeholder=placeholder, value=secret)

if secret and len(secret) > 0:
st.info("Your key is saved securely.")
Expand Down
5 changes: 4 additions & 1 deletion app/util/openai_wrapper.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@
openai_version_key = 'openai_version'
openai_endpoint_key = 'openai_endpoint'
openai_azure_model_key = 'openai_azure_model'
openai_azure_auth_type = 'openai_azure_auth_type'

class UIOpenAIConfiguration():
def __init__(
Expand All @@ -23,13 +24,15 @@ def get_configuration(self):
endpoint = self._secrets.get_secret(openai_endpoint_key) or None
secret_key = self._secrets.get_secret(key) or None
model = self._secrets.get_secret(openai_azure_model_key) or None
az_auth_type = self._secrets.get_secret(openai_azure_auth_type) or None

config = {
'api_type': type,
'api_version': version,
'api_base': endpoint,
'api_key': secret_key,
'model': model
'model': model,
'az_auth_type': az_auth_type
}
values = {k: v for k, v in config.items() if v is not None}
return OpenAIConfiguration(values)
14 changes: 10 additions & 4 deletions app/workflows/question_answering/functions.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,9 +18,15 @@
from python.AI.text_splitter import TextSplitter

sv_home = SessionVariables('home')
ai_configuration = UIOpenAIConfiguration().get_configuration()

embedder = Embedder(ai_configuration, config.cache_dir)

def embedder():
try:
ai_configuration = UIOpenAIConfiguration().get_configuration()
return Embedder(ai_configuration, config.cache_dir)
except Exception as e:
st.error(f'Error creating connection: {e}')
st.stop()

def chunk_files(sv, files):
pb = st.progress(0, 'Chunking files...')
Expand Down Expand Up @@ -52,11 +58,11 @@ def chunk_files(sv, files):
for chunk in chunks:
file_chunks.append((file, chunk))
file.set_text(doc_text)

functions_embedder = embedder()
for cx, (file, chunk) in enumerate(file_chunks):
pb.progress((cx+1) / len(file_chunks), f'Embedding chunk {cx+1} of {len(file_chunks)}...')
formatted_chunk = chunk.replace("\n", " ")
chunk_vec = embedder.embed_store_one(formatted_chunk, sv_home.save_cache.value)
chunk_vec = functions_embedder.embed_store_one(formatted_chunk, sv_home.save_cache.value)
file.add_chunk(chunk, np.array(chunk_vec), cx+1)
pb.empty()

Expand Down
12 changes: 7 additions & 5 deletions app/workflows/question_answering/workflow.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,6 @@
import scipy.spatial.distance
import streamlit as st
import workflows.question_answering.classes as classes
import workflows.question_answering.config as config
import workflows.question_answering.functions as functions
import workflows.question_answering.prompts as prompts
from util import ui_components
Expand Down Expand Up @@ -110,8 +109,10 @@ def create(sv: SessionVariables, workflow = None):
iteration = 0
source_counts = Counter()
used_chunks = set()
functions_embedder = functions.embedder()

while True:
qe = np.array(functions.embedder.embed_store_one(question, sv_home.save_cache.value))
qe = np.array(functions_embedder.embed_store_one(question, sv_home.save_cache.value))
iteration += 1
cosine_distances = sorted([(t, c, scipy.spatial.distance.cosine(qe, v)) for (t, c, v) in all_units], key=lambda x:x[2], reverse=False)
chunk_index = sv.answering_target_matches.value
Expand Down Expand Up @@ -185,15 +186,16 @@ def create(sv: SessionVariables, workflow = None):
qas_raw = ui_components.generate_text(messages, callbacks=[on_callback])
status_history += qas_raw + '<br/><br/>'
try:
functions_embedder = functions.embedder()
qas = json.loads(qas_raw)
for qa in qas:
q = qa['question']
a = qa['answer']
raw_refs = qa['source']
file_page_refs = [tuple([int(x[1:]) for x in r.split(';')]) for r in raw_refs]

q_vec = np.array(functions.embedder.embed_store_one(q, sv_home.save_cache.value))
a_vec = np.array(functions.embedder.embed_store_one(a, sv_home.save_cache.value))
q_vec = np.array(functions_embedder.embed_store_one(q, sv_home.save_cache.value))
a_vec = np.array(functions_embedder.embedder.embed_store_one(a, sv_home.save_cache.value))

qid = sv.answering_next_q_id.value
sv.answering_next_q_id.value += 1
Expand All @@ -209,7 +211,7 @@ def create(sv: SessionVariables, workflow = None):
if t == 'chunk' and c[0].id == f.id and c[1] == cx:
all_units.remove((t, c, v))

status_history += f'Augmenting user question with partial answers:<br/>'
status_history += 'Augmenting user question with partial answers:<br/>'
new_question = functions.update_question(sv, sv.answering_question_history.value, new_questions, lazy_answering_placeholder, status_history)
status_history += new_question + '<br/><br/>'
sv.answering_question_history.value.append(new_question)
Expand Down
11 changes: 9 additions & 2 deletions app/workflows/record_matching/functions.py
Original file line number Diff line number Diff line change
@@ -1,13 +1,20 @@
# Copyright (c) 2024 Microsoft Corporation. All rights reserved.
# Licensed under the MIT license. See LICENSE file in the project.
#
import streamlit as st
from util.openai_wrapper import UIOpenAIConfiguration
from workflows.record_matching import config

from python.AI.embedder import Embedder

ai_configuration = UIOpenAIConfiguration().get_configuration()
embedder = Embedder(ai_configuration, config.cache_dir)

def embedder():
try:
ai_configuration = UIOpenAIConfiguration().get_configuration()
return Embedder(ai_configuration, config.cache_dir)
except Exception as e:
st.error(f'Error creating connection: {e}')
st.stop()

def convert_to_sentences(df, skip):
sentences = []
Expand Down
4 changes: 3 additions & 1 deletion app/workflows/record_matching/workflow.py
Original file line number Diff line number Diff line change
Expand Up @@ -194,7 +194,9 @@ def on_embedding_batch_change(current, total):

callback = classes.BatchEmbeddingCallback()
callback.on_embedding_batch_change = on_embedding_batch_change
embeddings = functions.embedder.embed_store_many(all_sentences,[callback], sv_home.save_cache.value)
functions_embedder = functions.embedder()

embeddings = functions_embedder.embed_store_many(all_sentences,[callback], sv_home.save_cache.value)
pb.empty()

nbrs = NearestNeighbors(n_neighbors=50, n_jobs=1, algorithm='auto', leaf_size=20, metric='cosine').fit(embeddings)
Expand Down
11 changes: 9 additions & 2 deletions app/workflows/risk_networks/functions.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,14 +6,21 @@

import networkx as nx
import pandas as pd
import streamlit as st
import workflows.risk_networks.config as config
from streamlit_agraph import Config, Edge, Node
from util.openai_wrapper import UIOpenAIConfiguration

from python.AI.embedder import Embedder

ai_configuration = UIOpenAIConfiguration().get_configuration()
embedder = Embedder(ai_configuration, config.cache_dir)

def embedder():
try:
ai_configuration = UIOpenAIConfiguration().get_configuration()
return Embedder(ai_configuration, config.cache_dir)
except Exception as e:
st.error(f'Error creating connection: {e}')
st.stop()

def hsl_to_hex(h, s, l):
rgb = colorsys.hls_to_rgb(h / 360, l / 100, s / 100)
Expand Down
3 changes: 2 additions & 1 deletion app/workflows/risk_networks/workflow.py
Original file line number Diff line number Diff line change
Expand Up @@ -255,7 +255,8 @@ def on_embedding_batch_change(current, total):

callback = classes.BatchEmbeddingCallback()
callback.on_embedding_batch_change = on_embedding_batch_change
embeddings = functions.embedder.embed_store_many(texts,[callback], sv_home.save_cache.value)
functions_embedder = functions.embedder()
embeddings = functions_embedder.embed_store_many(texts,[callback], sv_home.save_cache.value)
pb.empty()

vals = [(n, t, e) for (n, t), e in zip(text_types, embeddings)]
Expand Down
29 changes: 19 additions & 10 deletions python/AI/client.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,8 +4,9 @@
import logging
from typing import List

from openai import AzureOpenAI, OpenAI
from azure.identity import DefaultAzureCredential, get_bearer_token_provider
from openai import AzureOpenAI, OpenAI

from .classes import LLMCallback
from .defaults import API_BASE_REQUIRED_FOR_AZURE, DEFAULT_EMBEDDING_MODEL
from .openai_configuration import OpenAIConfiguration
Expand All @@ -31,16 +32,24 @@ def create_openai_client(self) -> None:
api_base,
)

token_provider = get_bearer_token_provider(
DefaultAzureCredential(), "https://cognitiveservices.azure.com/.default"
)
if self.configuration.az_auth_type == 'Managed Identity':
token_provider = get_bearer_token_provider(
DefaultAzureCredential(), "https://cognitiveservices.azure.com/.default"
)

self._client = AzureOpenAI(
api_version=self.configuration.api_version,
# Azure-Specifics
azure_ad_token_provider=token_provider,
azure_endpoint=api_base,
)
self._client = AzureOpenAI(
api_version=self.configuration.api_version,
# Azure-Specifics
azure_ad_token_provider=token_provider,
azure_endpoint=api_base,
)
else:
self._client = AzureOpenAI(
api_version=self.configuration.api_version,
# Azure-Specifics
azure_endpoint=api_base,
api_key=self.configuration.api_key,
)
else:
log.info("Creating OpenAI client")
self._client = OpenAI(
Expand Down
1 change: 1 addition & 0 deletions python/AI/defaults.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@
DEFAULT_LLM_MODEL = "gpt-4o"
DEFAULT_AZURE_LLM_MODEL = "gpt-4o"
DEFAULT_LLM_MAX_TOKENS = 4000
DEFAULT_AZ_AUTH_TYPE = "Managed Identity"
#
# Text Embedding Parameters
DEFAULT_EMBEDDING_MODEL = "text-embedding-ada-002"
Expand Down
8 changes: 8 additions & 0 deletions python/AI/openai_configuration.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@
import os

from .defaults import (
DEFAULT_AZ_AUTH_TYPE,
DEFAULT_AZURE_LLM_MODEL,
DEFAULT_LLM_MAX_TOKENS,
DEFAULT_LLM_MODEL,
Expand Down Expand Up @@ -31,6 +32,7 @@ class OpenAIConfiguration():
_temperature: float | None
_max_tokens: int | None
_api_type: str
_az_auth_type: str

def __init__(
self,
Expand All @@ -44,6 +46,7 @@ def __init__(
self._api_version = config.get("api_version", self._get_azure_openai_version())
self._temperature = config.get("temperature", DEFAULT_TEMPERATURE)
self._max_tokens = config.get("max_tokens", DEFAULT_LLM_MAX_TOKENS)
self._az_auth_type = config.get("az_auth_type", DEFAULT_AZ_AUTH_TYPE)
self._api_type = config.get("api_type", oai_type)


Expand Down Expand Up @@ -98,3 +101,8 @@ def max_tokens(self) -> int | None:
def api_type(self) -> str | None:
"""Type of the AI connection."""
return self._api_type

@property
def az_auth_type(self) -> str:
"""Type of the Azure OpenAI connection."""
return self._az_auth_type

0 comments on commit a6436fb

Please sign in to comment.