Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
@@ -1,3 +1,5 @@
**.wav

# Byte-compiled / optimized / DLL files
__pycache__/
*.py[codz]
Expand Down
3 changes: 3 additions & 0 deletions .idea/.gitignore

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

8 changes: 8 additions & 0 deletions .idea/Homework6-Submission.iml

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

6 changes: 6 additions & 0 deletions .idea/inspectionProfiles/profiles_settings.xml

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

7 changes: 7 additions & 0 deletions .idea/misc.xml

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

8 changes: 8 additions & 0 deletions .idea/modules.xml

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

6 changes: 6 additions & 0 deletions .idea/vcs.xml

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

77 changes: 77 additions & 0 deletions .virtual_documents/Class 6 Homework.ipynb
Original file line number Diff line number Diff line change
@@ -0,0 +1,77 @@









# Function tool stubs (starter implementations)
def search_arxiv(query: str) -> str:
"""
Simulate an arXiv search or return a dummy passage for the given query.
In a real system, this might query the arXiv API and extract a summary.
"""
# returning dummy text to avoid rate limits
return f"[arXiv snippet related to '{query}']"

def calculate(expression: str) -> str:
"""
Evaluate a mathematical expression and return the result as a string.
"""
try:
from sympy import sympify
result = sympify(expression)
return str(result)
except Exception as e:
return f"Error: {e}"


# Dialogue engine: function-routing logic
import json

def route_llm_output(llm_output: str) -> str:
"""
Route LLM response to the correct tool if it's a function call, else return the text.
Expects LLM output in JSON format like {'function': ..., 'arguments': {...}}.
"""
try:
output = json.loads(llm_output)
func_name = output.get("function")
args = output.get("arguments", {})
except (json.JSONDecodeError, TypeError):
# Not a JSON function call; return the text directly
return llm_output

if func_name == "search_arxiv":
query = args.get("query", "")
return search_arxiv(query)
elif func_name == "calculate":
expr = args.get("expression", "")
return calculate(expr)
else:
return f"Error: Unknown function '{func_name}'"



# Example FastAPI endpoint (sketch)
from fastapi import FastAPI
app = FastAPI()

@app.post("/api/voice-query/")
async def voice_query_endpoint(request: dict):
# Assume request has 'text': the user's query string
user_text = request.get("text", "")
# Call Llama 3 model (instructed to output function calls when needed)
llm_response = llama3_chat_model(user_text)
# Process LLM output and possibly call tools
reply_text = route_llm_output(llm_response)
# Convert reply_text to speech (TTS) and return it
return {"response": reply_text}






26 changes: 20 additions & 6 deletions Class 6 Homework.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -61,7 +61,7 @@
},
{
"cell_type": "code",
"execution_count": null,
"execution_count": 1,
"metadata": {},
"outputs": [],
"source": [
Expand All @@ -71,7 +71,7 @@
" Simulate an arXiv search or return a dummy passage for the given query.\n",
" In a real system, this might query the arXiv API and extract a summary.\n",
" \"\"\"\n",
" # Example placeholder implementation:\n",
" # returning dummy text to avoid rate limits\n",
" return f\"[arXiv snippet related to '{query}']\"\n",
"\n",
"def calculate(expression: str) -> str:\n",
Expand All @@ -80,15 +80,15 @@
" \"\"\"\n",
" try:\n",
" from sympy import sympify\n",
" result = sympify(expression) # use sympy for safe evaluation\n",
" result = sympify(expression) \n",
" return str(result)\n",
" except Exception as e:\n",
" return f\"Error: {e}\""
]
},
{
"cell_type": "code",
"execution_count": null,
"execution_count": 2,
"metadata": {},
"outputs": [],
"source": [
Expand Down Expand Up @@ -179,10 +179,24 @@
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3 (ipykernel)",
"language": "python",
"name": "python3"
},
"language_info": {
"name": "python"
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.13.5"
}
},
"nbformat": 4,
"nbformat_minor": 2
"nbformat_minor": 4
}
22 changes: 22 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,22 @@
How to run:
In server/src, run
```bash
uvicorn server.src.main:app --reload
```

In web, run
```bash
npm i
npm run start
```

example test logs in example_test_logs.txt

Changes made:
* Updated arxiv response
* Add logging for test logs
* Add frontend UI
* Change to use ollama cloud model instead
* Added prompts for LLM to use tools
* Perform TTS instead of just using text queries
* Changed tool usage format to fit with ollama's tool_calls
20 changes: 20 additions & 0 deletions example_test_logs.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,20 @@
User: Tell me about lead's binding energy.
function=Function(name='search_arxiv', arguments={'arguments': {'query': 'lead binding energy nuclear binding energy per nucleon'}, 'f
unction': 'search_arxiv'})
Function call made to: search_arxiv
Function output: [arXiv snippet related to 'lead binding energy nuclear binding energy per nucleon']
Output: [arXiv snippet related to 'lead binding energy nuclear binding energy per nucleon']

User: What's 2 to the power of 10?
function=Function(name='calculate', arguments={'arguments': {'expression': '2^10'}, 'function': 'calculate'})
Function call made to: calculate
Function output: 1024
Output: 1024

User: How are scissors made?
LLM output: Scissors are made by forging or casting two steel blades that are heated and shaped, then hardened and tempered, have a pi
vot hole drilled and a metal pin inserted, the blades are polished and fitted together, and rubber or plastic handles are molded or gl
ued onto the pin for comfortable use.
Output: Scissors are made by forging or casting two steel blades that are heated and shaped, then hardened and tempered, have a pivot
hole drilled and a metal pin inserted, the blades are polished and fitted together, and rubber or plastic handles are molded or glued
onto the pin for comfortable use.
9 changes: 9 additions & 0 deletions server/src/audio/transcribe.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,9 @@
import whisper

asr_model = whisper.load_model("small")

def transcribe_audio(audio_bytes):
with open("temp.wav", "wb") as f:
f.write(audio_bytes)
result = asr_model.transcribe("temp.wav")
return result["text"]
12 changes: 12 additions & 0 deletions server/src/audio/tts.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,12 @@
import pyttsx3
import tempfile
import shutil

def text_to_speech(text: str, path: str):
engine = pyttsx3.init()

temp_file = tempfile.mktemp(suffix=".wav")
engine.save_to_file(text, temp_file)
engine.runAndWait()

shutil.move(temp_file, path)
70 changes: 70 additions & 0 deletions server/src/llm/generate.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,70 @@
from ollama import Client
import json
from server.src.tools.tools import calculate, search_arxiv

client = Client(host='http://localhost:11434')

model_name = "gpt-oss:20b-cloud"

prompts = [
{"role": "system", "content": "You are a helpful assistant. Your responses will be used for TTS as a live conversation, so keep your responses short. The user will not be able to see any visuals or read any latex/math. Respond in one sentence."},
{"role": "system", "content": """You have access to 2 tools. When appropriate, invoke these tools in a JSON format.

The first tool is calculate, and it is used to calculate mathematical expressions. Use this when any math is required, even if it is trivial, like 1 + 1. The format is: {"function": "calculate", "arguments": { "expression": string }}

Your second tool is searching arXiv for relevant passages from scientific papers. Use this whenever the user asks a question related to science. The format is:

{"function": "search_arxiv", "arguments": { "query": string }}"""}
]


def route_llm_output(llm_output: str) -> str:
"""
Route LLM response to the correct tool if it's a function call, else return the text.
Expects LLM output in JSON format like {'function': ..., 'arguments': {...}}.
"""
try:
output = json.loads(llm_output)
func_name = output.get("function")
args = output.get("arguments", {})
except (json.JSONDecodeError, TypeError):
# Not a JSON function call; return the text directly
return llm_output

print("Function call made to: ", func_name)
if func_name == "search_arxiv":
query = args.get("query", "")
output = search_arxiv(query)
elif func_name == "calculate":
expr = args.get("expression", "")
output = calculate(expr)
else:
return f"Error: Unknown function '{func_name}'"
print("Function output: ", output)
return output

def generate_response(user_text):
conversation = [*prompts, {"role": "user", "content": user_text}]

response = client.chat(model=model_name, messages=conversation, options={
})

tool_calls = response.message.tool_calls
if tool_calls:
call = tool_calls[0]
print(call)
name = call.function.name
print("Function call made to: ", name)
if name == "calculate":
output = calculate(call.function.arguments["arguments"]["expression"])
elif name == "search_arxiv":
output = search_arxiv(call.function.arguments["arguments"]["query"])
else:
return f"Error: Unknown function '{name}'"
print("Function output:", output)
return output

generated_text = response["message"]["content"]
print("LLM output:", generated_text)

return generated_text
35 changes: 35 additions & 0 deletions server/src/main.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,35 @@
from fastapi import FastAPI, UploadFile, File
from fastapi.responses import FileResponse
from fastapi.middleware.cors import CORSMiddleware
from server.src.audio.transcribe import transcribe_audio
from server.src.audio.tts import text_to_speech
from server.src.llm.generate import generate_response

origins = [
"http://localhost:3000", # React dev server
"http://127.0.0.1:3000" # sometimes React uses this instead
]

app = FastAPI()

app.add_middleware(
CORSMiddleware,
allow_origins=origins,
allow_credentials=True,
allow_methods=["*"],
allow_headers=["*"],
)

@app.post("/chat/")
async def chat_endpoint(file: UploadFile = File(...)):
audio_bytes = await file.read()

user_text = transcribe_audio(audio_bytes)
print("User:", user_text)
bot_text = generate_response(user_text)
print("Output: ", bot_text)

path = "response.wav"
text_to_speech(bot_text, path)

return FileResponse(path, media_type="audio/wav")
18 changes: 18 additions & 0 deletions server/src/tools/tools.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,18 @@
def search_arxiv(query: str) -> str:
"""
Simulate an arXiv search or return a dummy passage for the given query.
In a real system, this might query the arXiv API and extract a summary.
"""
# returning dummy text to avoid rate limits
return f"[arXiv snippet related to '{query}']"

def calculate(expression: str) -> str:
"""
Evaluate a mathematical expression and return the result as a string.
"""
try:
from sympy import sympify
result = sympify(expression)
return str(result)
except Exception as e:
return f"Error: {e}"
4 changes: 4 additions & 0 deletions web/.eslintignore
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
node_modules/
build/
dist/
public/
Loading