Skip to content

Commit 705f10e

Browse files
claramoreiragactions-userMiNeves00diogoncalves
authored
LLMstudio tracker 1.0.5, LLMstudio proxy 1.0.4, LLMstudio core 1.0.2 (#192)
* feat: allow for url to be used instead of just host and port * feat: allow url instead of just host and port on proxy and tracker config * style: lint * [fix] bump prerelease version in pyproject.toml * [fix] bump prerelease version in pyproject.toml * fix: llmstudio-tracker pyproject.toml Signed-off-by: Clara Moreira Gadelho <56916880+claramoreirag@users.noreply.github.com> * feat: added support for o1-preview and o1-mini (#193) * feat: added support for o1-preview and o1-mini * chore: added o1 preview and mini for azure to config * chore: bump core Signed-off-by: Diogo Goncalves <diogoncalves@users.noreply.github.com> * fix: proxy version Signed-off-by: Diogo Goncalves <diogoncalves@users.noreply.github.com> * chore: isort fix * chore: pyproject.toml * chore: update pyproject.toml --------- Signed-off-by: Clara Moreira Gadelho <56916880+claramoreirag@users.noreply.github.com> Signed-off-by: Diogo Goncalves <diogoncalves@users.noreply.github.com> Co-authored-by: GitHub Actions <actions@github.com> Co-authored-by: Miguel Neves <61327611+MiNeves00@users.noreply.github.com> Co-authored-by: Diogo Goncalves <diogoncalves@users.noreply.github.com> Co-authored-by: Diogo Goncalves <diogo.goncalves@tensorops.ai>
1 parent 8f26ce1 commit 705f10e

File tree

10 files changed

+138
-103
lines changed

10 files changed

+138
-103
lines changed

examples/core.py

Lines changed: 72 additions & 58 deletions
Original file line numberDiff line numberDiff line change
@@ -11,20 +11,7 @@ def run_provider(provider, model, api_key, **kwargs):
1111
llm = LLMCore(provider=provider, api_key=api_key, **kwargs)
1212

1313
latencies = {}
14-
chat_request = {
15-
"chat_input": "Hello, my name is Json",
16-
"model": model,
17-
"is_stream": False,
18-
"retries": 0,
19-
"parameters": {
20-
"temperature": 0,
21-
"max_tokens": 100,
22-
"response_format": {"type": "json_object"},
23-
"functions": None,
24-
}
25-
}
26-
27-
14+
chat_request = build_chat_request(model, chat_input="Hello, my name is Jason Json", is_stream=False)
2815

2916
import asyncio
3017
response_async = asyncio.run(llm.achat(**chat_request))
@@ -34,18 +21,7 @@ def run_provider(provider, model, api_key, **kwargs):
3421
# stream
3522
print("\nasync stream")
3623
async def async_stream():
37-
chat_request = {
38-
"chat_input": "Hello, my name is Json",
39-
"model": model,
40-
"is_stream": True,
41-
"retries": 0,
42-
"parameters": {
43-
"temperature": 0,
44-
"max_tokens": 100,
45-
"response_format": {"type": "json_object"},
46-
"functions": None,
47-
}
48-
}
24+
chat_request = build_chat_request(model, chat_input="Hello, my name is Tom Json", is_stream=True)
4925

5026
response_async = await llm.achat(**chat_request)
5127
async for p in response_async:
@@ -61,36 +37,14 @@ async def async_stream():
6137

6238

6339
print("# Now sync calls")
64-
chat_request = {
65-
"chat_input": "Hello, my name is Json",
66-
"model": model,
67-
"is_stream": False,
68-
"retries": 0,
69-
"parameters": {
70-
"temperature": 0,
71-
"max_tokens": 100,
72-
"response_format": {"type": "json_object"},
73-
"functions": None,
74-
}
75-
}
40+
chat_request = build_chat_request(model, chat_input="Hello, my name is Alice Json", is_stream=False)
7641

7742
response_sync = llm.chat(**chat_request)
7843
pprint(response_sync)
7944
latencies["sync (ms)"]= response_sync.metrics["latency_s"]*1000
8045

8146
print("# Now sync calls streaming")
82-
chat_request = {
83-
"chat_input": "Hello, my name is Json",
84-
"model": model,
85-
"is_stream": True,
86-
"retries": 0,
87-
"parameters": {
88-
"temperature": 0,
89-
"max_tokens": 100,
90-
"response_format": {"type": "json_object"},
91-
"functions": None,
92-
}
93-
}
47+
chat_request = build_chat_request(model, chat_input="Hello, my name is Mary Json", is_stream=True)
9448

9549
response_sync_stream = llm.chat(**chat_request)
9650
for p in response_sync_stream:
@@ -101,16 +55,58 @@ async def async_stream():
10155
pprint(p)
10256
latencies["sync stream (ms)"]= p.metrics["latency_s"]*1000
10357

104-
print(f"\n\n###EPORT for {provider}, {model} ###")
58+
print(f"\n\n###REPORT for <{provider}>, <{model}> ###")
10559
return latencies
10660

61+
def build_chat_request(model: str, chat_input: str, is_stream: bool, max_tokens: int=1000):
62+
if model == "o1-preview" or model == "o1-mini":
63+
chat_request = {
64+
"chat_input": chat_input,
65+
"model": model,
66+
"is_stream": is_stream,
67+
"retries": 0,
68+
"parameters": {
69+
"max_completion_tokens": max_tokens
70+
}
71+
}
72+
else:
73+
chat_request = {
74+
"chat_input": chat_input,
75+
"model": model,
76+
"is_stream": is_stream,
77+
"retries": 0,
78+
"parameters": {
79+
"temperature": 0,
80+
"max_tokens": max_tokens,
81+
"response_format": {"type": "json_object"},
82+
"functions": None,
83+
}
84+
}
85+
return chat_request
86+
87+
88+
89+
10790

10891
provider = "openai"
10992
model = "gpt-4o-mini"
11093
for _ in range(1):
11194
latencies = run_provider(provider=provider, model=model, api_key=os.environ["OPENAI_API_KEY"])
11295
pprint(latencies)
11396

97+
98+
provider = "openai"
99+
model = "o1-preview"
100+
for _ in range(1):
101+
latencies = run_provider(provider=provider, model=model, api_key=os.environ["OPENAI_API_KEY"])
102+
pprint(latencies)
103+
104+
provider = "openai"
105+
model = "o1-mini"
106+
for _ in range(1):
107+
latencies = run_provider(provider=provider, model=model, api_key=os.environ["OPENAI_API_KEY"])
108+
pprint(latencies)
109+
114110
# provider = "anthropic"
115111
# model = "claude-3-opus-20240229"
116112
# for _ in range(1):
@@ -126,6 +122,24 @@ async def async_stream():
126122
api_version=os.environ["AZURE_API_VERSION"],
127123
api_endpoint=os.environ["AZURE_API_ENDPOINT"])
128124
pprint(latencies)
125+
126+
provider = "azure"
127+
model = "o1-preview"
128+
for _ in range(1):
129+
latencies = run_provider(provider=provider, model=model,
130+
api_key=os.environ["AZURE_API_KEY"],
131+
api_version=os.environ["AZURE_API_VERSION"],
132+
api_endpoint=os.environ["AZURE_API_ENDPOINT"])
133+
pprint(latencies)
134+
135+
provider = "azure"
136+
model = "o1-mini"
137+
for _ in range(1):
138+
latencies = run_provider(provider=provider, model=model,
139+
api_key=os.environ["AZURE_API_KEY"],
140+
api_version=os.environ["AZURE_API_VERSION"],
141+
api_endpoint=os.environ["AZURE_API_ENDPOINT"])
142+
pprint(latencies)
129143

130144
# provider = "azure"
131145
# model = "gpt-4o"
@@ -137,10 +151,10 @@ async def async_stream():
137151
# pprint(latencies)
138152

139153

140-
provider = "vertexai"
141-
model = "gemini-1.5-pro-latest"
142-
for _ in range(1):
143-
latencies = run_provider(provider=provider, model=model,
144-
api_key=os.environ["GOOGLE_API_KEY"],
145-
)
146-
pprint(latencies)
154+
# provider = "vertexai"
155+
# model = "gemini-1.5-pro-latest"
156+
# for _ in range(1):
157+
# latencies = run_provider(provider=provider, model=model,
158+
# api_key=os.environ["GOOGLE_API_KEY"],
159+
# )
160+
# pprint(latencies)

libs/core/llmstudio_core/config.yaml

Lines changed: 20 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -204,6 +204,16 @@ providers:
204204
keys:
205205
- OPENAI_API_KEY
206206
models:
207+
o1-preview:
208+
mode: chat
209+
max_completion_tokens: 128000
210+
input_token_cost: 0.000015
211+
output_token_cost: 0.000060
212+
o1-mini:
213+
mode: chat
214+
max_completion_tokens: 128000
215+
input_token_cost: 0.000003
216+
output_token_cost: 0.000012
207217
gpt-4o-mini:
208218
mode: chat
209219
max_tokens: 128000
@@ -280,6 +290,16 @@ providers:
280290
- AZURE_API_ENDPOINT
281291
- AZURE_API_VERSION
282292
models:
293+
o1-preview:
294+
mode: chat
295+
max_completion_tokens: 128000
296+
input_token_cost: 0.0000165
297+
output_token_cost: 0.000066
298+
o1-mini:
299+
mode: chat
300+
max_completion_tokens: 128000
301+
input_token_cost: 0.0000033
302+
output_token_cost: 0.0000132
283303
gpt-4o-mini:
284304
mode: chat
285305
max_tokens: 128000

libs/core/llmstudio_core/utils.py

Lines changed: 5 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,7 @@
33
from typing import Any, Dict, List, Optional, Union
44

55
import yaml
6-
from pydantic import BaseModel, ValidationError
6+
from pydantic import BaseModel, Field, ValidationError
77

88

99
class OpenAIToolParameters(BaseModel):
@@ -30,9 +30,10 @@ class CostRange(BaseModel):
3030

3131
class ModelConfig(BaseModel):
3232
mode: str
33-
max_tokens: int
34-
input_token_cost: Union[float, List[CostRange]]
35-
output_token_cost: Union[float, List[CostRange]]
33+
max_tokens: Optional[int] = Field(default=None, alias="max_completion_tokens")
34+
max_completion_tokens: Optional[int] = None
35+
input_token_cost: Union[float, List["CostRange"]]
36+
output_token_cost: Union[float, List["CostRange"]]
3637

3738

3839
class ProviderConfig(BaseModel):

libs/core/pyproject.toml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
[tool.poetry]
22
name = "llmstudio-core"
3-
version = "1.0.1"
3+
version = "1.0.2"
44
description = "LLMStudio core capabilities for routing llm calls for any vendor. No proxy server required. For that use llmstudio[proxy]"
55
authors = ["Cláudio Lemos <claudio.lemos@tensorops.ai>"]
66
license = "MIT"

libs/proxy/llmstudio_proxy/provider.py

Lines changed: 14 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -18,26 +18,24 @@ class ProxyConfig(BaseModel):
1818

1919
def __init__(self, **data):
2020
super().__init__(**data)
21-
if (self.host is None and self.port is None) and self.url is None:
22-
raise ValueError(
23-
"Either both 'host' and 'port' must be provided, or 'url' must be specified."
24-
)
21+
if self.url is None:
22+
if self.host is not None and self.port is not None:
23+
self.url = f"http://{self.host}:{self.port}"
24+
else:
25+
raise ValueError(
26+
"You must provide either both 'host' and 'port', or 'url'."
27+
)
2528

2629

2730
class LLMProxyProvider(Provider):
2831
def __init__(self, provider: str, proxy_config: ProxyConfig):
2932
self.provider = provider
33+
self.engine_url = proxy_config.url
3034

31-
self.engine_host = proxy_config.host
32-
self.engine_port = proxy_config.port
33-
if is_server_running(host=self.engine_host, port=self.engine_port):
34-
print(
35-
f"Connected to LLMStudio Proxy @ {self.engine_host}:{self.engine_port}"
36-
)
35+
if is_server_running(url=self.engine_url):
36+
print(f"Connected to LLMStudio Proxy @ {self.engine_url}")
3737
else:
38-
raise Exception(
39-
f"LLMStudio Proxy is not running @ {self.engine_host}:{self.engine_port}"
40-
)
38+
raise Exception(f"LLMStudio Proxy is not running @ {self.engine_url}")
4139

4240
@staticmethod
4341
def _provider_config_name():
@@ -53,7 +51,7 @@ def chat(
5351
**kwargs,
5452
) -> Union[ChatCompletion]:
5553
response = requests.post(
56-
f"http://{self.engine_host}:{self.engine_port}/api/engine/chat/{self.provider}",
54+
f"{self.engine_url}/api/engine/chat/{self.provider}",
5755
json={
5856
"chat_input": chat_input,
5957
"model": model,
@@ -109,7 +107,7 @@ async def async_non_stream(
109107
):
110108
response = await asyncio.to_thread(
111109
requests.post,
112-
f"http://{self.engine_host}:{self.engine_port}/api/engine/chat/{self.provider}",
110+
f"{self.engine_url}/api/engine/chat/{self.provider}",
113111
json={
114112
"chat_input": chat_input,
115113
"model": model,
@@ -131,7 +129,7 @@ async def async_stream(
131129
):
132130
response = await asyncio.to_thread(
133131
requests.post,
134-
f"http://{self.engine_host}:{self.engine_port}/api/engine/chat/{self.provider}",
132+
f"{self.engine_url}/api/engine/chat/{self.provider}",
135133
json={
136134
"chat_input": chat_input,
137135
"model": model,

libs/proxy/llmstudio_proxy/server.py

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -165,9 +165,9 @@ def run_proxy_app(started_event: Event):
165165
print(f"Error running LLMstudio Proxy: {e}")
166166

167167

168-
def is_server_running(host, port, path="/health"):
168+
def is_server_running(url, path="/health"):
169169
try:
170-
response = requests.get(f"http://{host}:{port}{path}")
170+
response = requests.get(f"{url}{path}")
171171
if response.status_code == 200 and response.json().get("status") == "healthy":
172172
return True
173173
except requests.ConnectionError:
@@ -176,7 +176,7 @@ def is_server_running(host, port, path="/health"):
176176

177177

178178
def start_server_component(host, port, run_func, server_name):
179-
if not is_server_running(host, port):
179+
if not is_server_running(url=f"http://{host}:{port}"):
180180
started_event = Event()
181181
thread = Thread(target=run_func, daemon=True, args=(started_event,))
182182
thread.start()

libs/proxy/pyproject.toml

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,8 +1,8 @@
11
[tool.poetry]
22
name = "llmstudio-proxy"
3-
version = "1.0.3a0"
4-
description = ""
5-
authors = ["Diogo Goncalves <diogo.goncalves@tensorops.ai>"]
3+
version = "1.0.4"
4+
description = "LLMstudio Proxy is the module of LLMstudio that allows calling any LLM as a Service Provider in proxy server. By leveraging LLMstudio Proxy, you can have a centralized point for connecting to any provider running independently from your application."
5+
authors = ["Claudio Lemos <claudio.lemos@tensorops.ai>"]
66
readme = "README.md"
77

88
[tool.poetry.dependencies]

libs/tracker/llmstudio_tracker/server.py

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -66,9 +66,9 @@ def run_tracker_app(started_event: Event):
6666
print(f"Error running LLMstudio Tracking: {e}")
6767

6868

69-
def is_server_running(host, port, path="/health"):
69+
def is_server_running(url, path="/health"):
7070
try:
71-
response = requests.get(f"http://{host}:{port}{path}")
71+
response = requests.get(f"{url}{path}")
7272
if response.status_code == 200 and response.json().get("status") == "healthy":
7373
return True
7474
except requests.ConnectionError:
@@ -77,7 +77,7 @@ def is_server_running(host, port, path="/health"):
7777

7878

7979
def start_server_component(host, port, run_func, server_name):
80-
if not is_server_running(host, port):
80+
if not is_server_running(url=f"http://{host}:{port}"):
8181
started_event = Event()
8282
thread = Thread(target=run_func, daemon=True, args=(started_event,))
8383
thread.start()

0 commit comments

Comments
 (0)