Skip to content

Commit 218c1e7

Browse files
committed
Merge branch 'main' into runners2
2 parents 023ddc2 + b5cbcbc commit 218c1e7

39 files changed

+3025
-288
lines changed

.gitignore

+2
Original file line numberDiff line numberDiff line change
@@ -171,3 +171,5 @@ cython_debug/
171171

172172
**/*.xcodeproj/*
173173
.aider*
174+
175+
exo/tinychat/images/*.png

README.md

+8-6
Original file line numberDiff line numberDiff line change
@@ -18,6 +18,8 @@ exo: Run your own AI cluster at home with everyday devices. Maintained by [exo l
1818
[![Tests](https://dl.circleci.com/status-badge/img/circleci/TrkofJDoGzdQAeL6yVHKsg/4i5hJuafuwZYZQxbRAWS71/tree/main.svg?style=svg)](https://dl.circleci.com/status-badge/redirect/circleci/TrkofJDoGzdQAeL6yVHKsg/4i5hJuafuwZYZQxbRAWS71/tree/main)
1919
[![License: GPL v3](https://img.shields.io/badge/License-GPLv3-blue.svg)](https://www.gnu.org/licenses/gpl-3.0)
2020

21+
<a href="https://trendshift.io/repositories/11849" target="_blank"><img src="https://trendshift.io/api/badge/repositories/11849" alt="exo-explore%2Fexo | Trendshift" style="width: 250px; height: 55px;" width="250" height="55"/></a>
22+
2123
</div>
2224

2325
---
@@ -38,7 +40,7 @@ We also welcome contributions from the community. We have a list of bounties in
3840

3941
### Wide Model Support
4042

41-
exo supports different models including LLaMA ([MLX](exo/inference/mlx/models/llama.py) and [tinygrad](exo/inference/tinygrad/models/llama.py)), Mistral, LlaVA, Qwen and Deepseek.
43+
exo supports different models including LLaMA ([MLX](exo/inference/mlx/models/llama.py) and [tinygrad](exo/inference/tinygrad/models/llama.py)), Mistral, LlaVA, Qwen, and Deepseek.
4244

4345
### Dynamic Model Partitioning
4446

@@ -100,13 +102,13 @@ source install.sh
100102

101103
- There are a number of things users have empirically found to improve performance on Apple Silicon Macs:
102104

103-
1. Upgrade to the latest version of MacOS 15.
105+
1. Upgrade to the latest version of macOS Sequoia.
104106
2. Run `./configure_mlx.sh`. This runs commands to optimize GPU memory allocation on Apple Silicon Macs.
105107

106108

107109
## Documentation
108110

109-
### Example Usage on Multiple MacOS Devices
111+
### Example Usage on Multiple macOS Devices
110112

111113
#### Device 1:
112114

@@ -177,9 +179,9 @@ curl http://localhost:52415/v1/chat/completions \
177179
}'
178180
```
179181

180-
### Example Usage on Multiple Heterogenous Devices (MacOS + Linux)
182+
### Example Usage on Multiple Heterogenous Devices (macOS + Linux)
181183

182-
#### Device 1 (MacOS):
184+
#### Device 1 (macOS):
183185

184186
```sh
185187
exo
@@ -244,7 +246,7 @@ python3 format.py ./exo
244246

245247
## Known Issues
246248

247-
- On some versions of MacOS/Python, certificates are not installed properly which can lead to SSL errors (e.g. SSL error with huggingface.co). To fix this, run the Install Certificates command, usually:
249+
- On certain versions of Python on macOS, certificates may not installed correctly, potentially causing SSL errors (e.g., when accessing huggingface.co). To resolve this, run the `Install Certificates` command, typicall as follows:
248250

249251
```sh
250252
/Applications/Python 3.x/Install Certificates.command

configure_mlx.sh

+1-1
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
#!/bin/bash
1+
#!/usr/bin/env bash
22

33
# Get the total memory in MB
44
TOTAL_MEM_MB=$(($(sysctl -n hw.memsize) / 1024 / 1024))

examples/function_calling.py

+111
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,111 @@
1+
import json
2+
import re
3+
import requests
4+
5+
def get_current_weather(location: str, unit: str = "celsius"):
6+
"""Mock weather data function"""
7+
# Hardcoded response for demo purposes
8+
return {
9+
"location": location,
10+
"temperature": 22 if unit == "celsius" else 72,
11+
"unit": unit,
12+
"forecast": "Sunny with light clouds"
13+
}
14+
15+
def try_parse_tool_calls(content: str):
16+
"""Try parse the tool calls."""
17+
tool_calls = []
18+
offset = 0
19+
for i, m in enumerate(re.finditer(r"<tool_call>\n(.+)?\n</tool_call>", content)):
20+
if i == 0:
21+
offset = m.start()
22+
try:
23+
func = json.loads(m.group(1))
24+
tool_calls.append({"type": "function", "function": func})
25+
if isinstance(func["arguments"], str):
26+
func["arguments"] = json.loads(func["arguments"])
27+
except json.JSONDecodeError as e:
28+
print(f"Failed to parse tool calls: the content is {m.group(1)} and {e}")
29+
pass
30+
if tool_calls:
31+
if offset > 0 and content[:offset].strip():
32+
c = content[:offset]
33+
else:
34+
c = ""
35+
return {"role": "assistant", "content": c, "tool_calls": tool_calls}
36+
return {"role": "assistant", "content": re.sub(r"<\|im_end\|>$", "", content)}
37+
38+
def chat_completion(messages):
39+
"""Send chat completion request to local server"""
40+
response = requests.post(
41+
"http://localhost:52415/v1/chat/completions",
42+
json={
43+
"model": "qwen-2.5-1.5b",
44+
"messages": messages,
45+
"tools": [{
46+
"type": "function",
47+
"function": {
48+
"name": "get_current_weather",
49+
"description": "Get the current weather in a given location",
50+
"parameters": {
51+
"type": "object",
52+
"properties": {
53+
"location": {
54+
"type": "string",
55+
"description": "The city and state, e.g. San Francisco, CA"
56+
},
57+
"unit": {
58+
"type": "string",
59+
"enum": ["celsius", "fahrenheit"]
60+
}
61+
},
62+
"required": ["location"]
63+
}
64+
}
65+
}],
66+
"tool_choice": "auto"
67+
}
68+
)
69+
return response.json()
70+
71+
def main():
72+
# Initial conversation
73+
messages = [{
74+
"role": "user",
75+
"content": "Hi there, what's the weather in Boston?"
76+
}]
77+
78+
# Get initial response
79+
response = chat_completion(messages)
80+
print(f"First response: {response}")
81+
assistant_message = try_parse_tool_calls(response["choices"][0]["message"]["content"])
82+
messages.append(assistant_message)
83+
84+
# If there are tool calls, execute them and continue conversation
85+
if "tool_calls" in assistant_message:
86+
for tool_call in assistant_message["tool_calls"]:
87+
if tool_call["function"]["name"] == "get_current_weather":
88+
args = tool_call["function"]["arguments"]
89+
weather_data = get_current_weather(**args)
90+
91+
# Add tool response to messages
92+
messages.append({
93+
"role": "tool",
94+
"content": json.dumps(weather_data),
95+
"name": tool_call["function"]["name"]
96+
})
97+
98+
# Get final response with weather data
99+
response = chat_completion(messages)
100+
print(f"Final response: {response}")
101+
messages.append({
102+
"role": "assistant",
103+
"content": response["choices"][0]["message"]["content"]
104+
})
105+
106+
# Print full conversation
107+
for msg in messages:
108+
print(f"\n{msg['role'].upper()}: {msg['content']}")
109+
110+
if __name__ == "__main__":
111+
main()

0 commit comments

Comments
 (0)