Skip to content

Commit

Permalink
Add statistics, handle 429 error more specifically
Browse files Browse the repository at this point in the history
  • Loading branch information
simonkurtz-MSFT committed May 23, 2024
1 parent dcf3391 commit 1d98031
Showing 1 changed file with 70 additions and 5 deletions.
75 changes: 70 additions & 5 deletions aoai.py
Original file line number Diff line number Diff line change
Expand Up @@ -48,6 +48,8 @@ def __init__(self):
def send_request(num_of_requests: int, azure_endpoint: str):
"""Function to send standard requests to the Azure OpenAI API."""

global counter, success_counter

try:
client = AzureOpenAI(
azure_endpoint = azure_endpoint,
Expand All @@ -66,6 +68,8 @@ def send_request(num_of_requests: int, azure_endpoint: str):
]
)

success_counter += 1
counter += 1
print(f"{datetime.now()}:\n{response}\n\n\n")

except NotFoundError as e:
Expand All @@ -79,6 +83,8 @@ def send_request(num_of_requests: int, azure_endpoint: str):
def send_loadbalancer_request(num_of_requests: int):
"""Function to send load-balanced requests to the Azure OpenAI API."""

global counter, failure_counter, success_counter

try:
# Instantiate the LoadBalancer class and create a new https client with the load balancer as the injected transport.
lb = LoadBalancer(backends)
Expand All @@ -102,10 +108,20 @@ def send_loadbalancer_request(num_of_requests: int):
]
)

success_counter += 1
print(f"{datetime.now()}:\n{response}\n\n\n")
except APIError as e:
if e.code == 429:
print(f"{datetime.now()}: Rate limit exceeded. Python OpenAI Library has exhausted all of its retries.")
else:
print(f"{datetime.now()}: Python OpenAI Library request failure.")

failure_counter += 1
except Exception:
print(f"{datetime.now()}: Request failure. Python OpenAI Library has exhausted all of its retries.")
traceback.print_exc()
failure_counter += 1

counter += 1

except NotFoundError as e:
print("openai.NotFoundError:", vars(e))
Expand All @@ -117,6 +133,8 @@ def send_loadbalancer_request(num_of_requests: int):
async def send_async_loadbalancer_request(num_of_requests: int):
"""Function to send load-balanced requests to the Azure OpenAI API."""

global counter, failure_counter, success_counter

try:
# Instantiate the LoadBalancer class and create a new https client with the load balancer as the injected transport.
lb = AsyncLoadBalancer(backends)
Expand All @@ -140,10 +158,20 @@ async def send_async_loadbalancer_request(num_of_requests: int):
]
)

success_counter += 1
print(f"{datetime.now()}:\n{response}\n\n\n")
except APIError as e:
if e.code == 429:
print(f"{datetime.now()}: Rate limit exceeded. Python OpenAI Library has exhausted all of its retries.")
else:
print(f"{datetime.now()}: Python OpenAI Library request failure.")

failure_counter += 1
except Exception:
print(f"{datetime.now()}: Request failure. Python OpenAI Library has exhausted all of its retries.")
traceback.print_exc()
failure_counter += 1

counter += 1

except NotFoundError as e:
print("openai.NotFoundError:", vars(e))
Expand All @@ -156,6 +184,8 @@ async def send_async_loadbalancer_request(num_of_requests: int):
def send_stream_loadbalancer_request(num_of_requests: int):
"""Function to send load-balanced streaming requests to the Azure OpenAI API."""

global counter, failure_counter, success_counter

try:
# Instantiate the LoadBalancer class and create a new https client with the load balancer as the injected transport.
lb = LoadBalancer(backends)
Expand Down Expand Up @@ -201,10 +231,20 @@ def send_stream_loadbalancer_request(num_of_requests: int):
collected_messages = [m for m in collected_messages if m is not None] # Clean None in collected_messages
full_reply_content = ''.join(collected_messages)
print(f"\nFull conversation received: {full_reply_content}\n\n")
success_counter += 1

except APIError as e:
if e.code == 429:
print(f"{datetime.now()}: Rate limit exceeded. Python OpenAI Library has exhausted all of its retries.")
else:
print(f"{datetime.now()}: Python OpenAI Library request failure.")

failure_counter += 1
except Exception:
print(f"{datetime.now()}: Request failure. Python OpenAI Library has exhausted all of its retries.")
traceback.print_exc()
failure_counter += 1

counter += 1

except NotFoundError as e:
print("openai.NotFoundError:", vars(e))
Expand All @@ -217,6 +257,8 @@ def send_stream_loadbalancer_request(num_of_requests: int):
async def send_async_stream_loadbalancer_request(num_of_requests: int):
"""Function to send load-balanced streaming requests to the Azure OpenAI API."""

global counter, failure_counter, success_counter

try:
# Instantiate the LoadBalancer class and create a new https client with the load balancer as the injected transport.
lb = AsyncLoadBalancer(backends)
Expand Down Expand Up @@ -263,11 +305,22 @@ async def send_async_stream_loadbalancer_request(num_of_requests: int):
collected_messages = [m for m in collected_messages if m is not None] # Clean None in collected_messages
full_reply_content = ''.join(collected_messages)
print(f"\nFull conversation received: {full_reply_content}\n\n")
success_counter += 1

except APIError as e:
if e.code == 429:
print(f"{datetime.now()}: Rate limit exceeded. Python OpenAI Library has exhausted all of its retries.")
else:
print(f"{datetime.now()}: Python OpenAI Library request failure.")

failure_counter += 1
except Exception:
print(f"{datetime.now()}: Request failure. Python OpenAI Library has exhausted all of its retries.")
traceback.print_exc()

failure_counter += 1

counter += 1

except NotFoundError as e:
print("openai.NotFoundError:", vars(e))
traceback.print_exc()
Expand All @@ -279,6 +332,8 @@ async def send_async_stream_loadbalancer_request(num_of_requests: int):

# >>> TEST HARNESS <<<

success_counter = failure_counter = counter = 0

# Set up the logger: https://www.machinelearningplus.com/python/python-logging-guide/
logging.basicConfig(
format = '%(asctime)s %(levelname)-8s %(module)-30s %(message)s',
Expand Down Expand Up @@ -336,8 +391,18 @@ async def send_async_stream_loadbalancer_request(num_of_requests: int):
async_stream_lb_end_time = time.time()

# Statistics
width = 7

print(f"\n{'*' * 100}\n")
print(f"Number of requests : {NUM_OF_REQUESTS}\n")
print(f"Number of requests : {str(NUM_OF_REQUESTS).rjust(width)}\n")

print(f"Total requests : {str(counter).rjust(width)}")
print(f"Successful requests : {str(success_counter).rjust(width)}")
print(f"Failed requests : {str(failure_counter).rjust(width)}\n")

print(f"Successful requests percentage : {('{:.2%}'.format(success_counter / counter)).rjust(width)}")
print(f"Failed requests percentage : {('{:.2%}'.format(failure_counter / counter)).rjust(width)}\n")


if test_executions.standard:
print(f"Single instance operation duration : {end_time - start_time:.2f} seconds")
Expand Down

0 comments on commit 1d98031

Please sign in to comment.