Skip to content

Commit

Permalink
fix exception mapping for streaming
Browse files Browse the repository at this point in the history
  • Loading branch information
krrishdholakia committed Sep 23, 2023
1 parent f984e5f commit 889679a
Show file tree
Hide file tree
Showing 8 changed files with 760 additions and 94 deletions.
Binary file modified litellm/__pycache__/main.cpython-311.pyc
Binary file not shown.
Binary file modified litellm/__pycache__/utils.cpython-311.pyc
Binary file not shown.
6 changes: 4 additions & 2 deletions litellm/llms/replicate.py
Original file line number Diff line number Diff line change
Expand Up @@ -77,14 +77,16 @@ def handle_prediction_response_streaming(prediction_url, api_token, print_verbos
}
status = ""
while True and (status not in ["succeeded", "failed", "canceled"]):
time.sleep(0.0001)
time.sleep(0.0001) # prevent being rate limited by replicate
response = requests.get(prediction_url, headers=headers)
if response.status_code == 200:
response_data = response.json()
status = response_data['status']
print(f"response data: {response_data}")
if "output" in response_data:
output_string = "".join(response_data['output'])
new_output = output_string[len(previous_output):]
yield new_output
yield {"output": new_output, "status": status}
previous_output = output_string
status = response_data['status']

Expand Down
12 changes: 6 additions & 6 deletions litellm/main.py
Original file line number Diff line number Diff line change
Expand Up @@ -485,11 +485,11 @@ def completion(
# Setting the relevant API KEY for replicate, replicate defaults to using os.environ.get("REPLICATE_API_TOKEN")
replicate_key = None
replicate_key = (
get_secret("REPLICATE_API_KEY")
or get_secret("REPLICATE_API_TOKEN")
or api_key
api_key
or litellm.replicate_key
or litellm.api_key
or litellm.api_key
or get_secret("REPLICATE_API_KEY")
or get_secret("REPLICATE_API_TOKEN")
)

model_response = replicate.completion(
Expand Down Expand Up @@ -575,7 +575,7 @@ def completion(

if "stream" in optional_params and optional_params["stream"] == True:
# don't try to access stream object,
response = CustomStreamWrapper(model_response, model, custom_llm_provider="aleph-alpha", logging_obj=logging)
response = CustomStreamWrapper(model_response, model, custom_llm_provider="aleph_alpha", logging_obj=logging)
return response
response = model_response
elif model in litellm.openrouter_models or custom_llm_provider == "openrouter":
Expand Down Expand Up @@ -769,7 +769,7 @@ def completion(
if stream:
model_response = chat.send_message_streaming(prompt, **optional_params)
response = CustomStreamWrapper(
model_response, model, custom_llm_provider="vertexai", logging_obj=logging
model_response, model, custom_llm_provider="vertex_ai", logging_obj=logging
)
return response

Expand Down
18 changes: 0 additions & 18 deletions litellm/tests/test_completion.py
Original file line number Diff line number Diff line change
Expand Up @@ -643,24 +643,6 @@ def test_completion_sagemaker():

# test_completion_sagemaker()

def test_completion_sagemaker_stream():
litellm.set_verbose = False
try:
response = completion(
model="sagemaker/jumpstart-dft-meta-textgeneration-llama-2-7b",
messages=messages,
temperature=0.2,
max_tokens=80,
stream=True,
)
# Add any assertions here to check the response
for chunk in response:
print(chunk)
except Exception as e:
pytest.fail(f"Error occurred: {e}")

# test_completion_sagemaker_stream()

def test_completion_bedrock_titan():
try:
response = completion(
Expand Down
Loading

0 comments on commit 889679a

Please sign in to comment.