inference-ai-course · MichaelXian · Dec 7, 2025
diff --git a/.gitignore b/.gitignore
@@ -1,3 +1,5 @@
+**.wav
+
 # Byte-compiled / optimized / DLL files
 __pycache__/
 *.py[codz]

diff --git a/.idea/.gitignore b/.idea/.gitignore
diff --git a/.idea/Homework6-Submission.iml b/.idea/Homework6-Submission.iml
diff --git a/.idea/inspectionProfiles/profiles_settings.xml b/.idea/inspectionProfiles/profiles_settings.xml
diff --git a/.idea/misc.xml b/.idea/misc.xml
diff --git a/.idea/modules.xml b/.idea/modules.xml
diff --git a/.idea/vcs.xml b/.idea/vcs.xml
diff --git a/.virtual_documents/Class 6 Homework.ipynb b/.virtual_documents/Class 6 Homework.ipynb
@@ -0,0 +1,77 @@
+
+
+
+
+
+
+
+
+
+# Function tool stubs (starter implementations)
+def search_arxiv(query: str) -> str:
+    """
+    Simulate an arXiv search or return a dummy passage for the given query.
+    In a real system, this might query the arXiv API and extract a summary.
+    """
+    # returning dummy text to avoid rate limits
+    return f"[arXiv snippet related to '{query}']"
+
+def calculate(expression: str) -> str:
+    """
+    Evaluate a mathematical expression and return the result as a string.
+    """
+    try:
+        from sympy import sympify
+        result = sympify(expression) 
+        return str(result)
+    except Exception as e:
+        return f"Error: {e}"
+
+
+# Dialogue engine: function-routing logic
+import json
+
+def route_llm_output(llm_output: str) -> str:
+    """
+    Route LLM response to the correct tool if it's a function call, else return the text.
+    Expects LLM output in JSON format like {'function': ..., 'arguments': {...}}.
+    """
+    try:
+        output = json.loads(llm_output)
+        func_name = output.get("function")
+        args = output.get("arguments", {})
+    except (json.JSONDecodeError, TypeError):
+        # Not a JSON function call; return the text directly
+        return llm_output
+
+    if func_name == "search_arxiv":
+        query = args.get("query", "")
+        return search_arxiv(query)
+    elif func_name == "calculate":
+        expr = args.get("expression", "")
+        return calculate(expr)
+    else:
+        return f"Error: Unknown function '{func_name}'"
+
+
+
+# Example FastAPI endpoint (sketch)
+from fastapi import FastAPI
+app = FastAPI()
+
+@app.post("/api/voice-query/")
+async def voice_query_endpoint(request: dict):
+    # Assume request has 'text': the user's query string
+    user_text = request.get("text", "")
+    # Call Llama 3 model (instructed to output function calls when needed)
+    llm_response = llama3_chat_model(user_text)
+    # Process LLM output and possibly call tools
+    reply_text = route_llm_output(llm_response)
+    # Convert reply_text to speech (TTS) and return it
+    return {"response": reply_text}
+
+
+
+
+
+
diff --git a/Class 6 Homework.ipynb b/Class 6 Homework.ipynb
@@ -61,7 +61,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": null,
+   "execution_count": 1,
    "metadata": {},
    "outputs": [],
    "source": [
@@ -71,7 +71,7 @@
     "    Simulate an arXiv search or return a dummy passage for the given query.\n",
     "    In a real system, this might query the arXiv API and extract a summary.\n",
     "    \"\"\"\n",
-    "    # Example placeholder implementation:\n",
+    "    # returning dummy text to avoid rate limits\n",
     "    return f\"[arXiv snippet related to '{query}']\"\n",
     "\n",
     "def calculate(expression: str) -> str:\n",
@@ -80,15 +80,15 @@
     "    \"\"\"\n",
     "    try:\n",
     "        from sympy import sympify\n",
-    "        result = sympify(expression)  # use sympy for safe evaluation\n",
+    "        result = sympify(expression) \n",
     "        return str(result)\n",
     "    except Exception as e:\n",
     "        return f\"Error: {e}\""
    ]
   },
   {
    "cell_type": "code",
-   "execution_count": null,
+   "execution_count": 2,
    "metadata": {},
    "outputs": [],
    "source": [
@@ -179,10 +179,24 @@
   }
  ],
  "metadata": {
+  "kernelspec": {
+   "display_name": "Python 3 (ipykernel)",
+   "language": "python",
+   "name": "python3"
+  },
   "language_info": {
-   "name": "python"
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.13.5"
   }
  },
  "nbformat": 4,
- "nbformat_minor": 2
+ "nbformat_minor": 4
 }
diff --git a/README.md b/README.md
@@ -0,0 +1,22 @@
+How to run:
+In server/src, run
+```bash
+uvicorn server.src.main:app --reload
+```
+
+In web, run
+```bash
+npm i
+npm run start
+```
+
+example test logs in example_test_logs.txt
+
+Changes made:
+* Updated arxiv response
+* Add logging for test logs
+* Add frontend UI
+* Change to use ollama cloud model instead
+* Added prompts for LLM to use tools
+* Perform TTS instead of just using text queries
+* Changed tool usage format to fit with ollama's tool_calls
diff --git a/example_test_logs.txt b/example_test_logs.txt
@@ -0,0 +1,20 @@
+User:  Tell me about lead's binding energy.
+function=Function(name='search_arxiv', arguments={'arguments': {'query': 'lead binding energy nuclear binding energy per nucleon'}, 'f
+unction': 'search_arxiv'})
+Function call made to:  search_arxiv
+Function output: [arXiv snippet related to 'lead binding energy nuclear binding energy per nucleon']
+Output:  [arXiv snippet related to 'lead binding energy nuclear binding energy per nucleon']
+
+User:  What's 2 to the power of 10?
+function=Function(name='calculate', arguments={'arguments': {'expression': '2^10'}, 'function': 'calculate'})
+Function call made to:  calculate
+Function output: 1024
+Output:  1024
+
+User:  How are scissors made?
+LLM output: Scissors are made by forging or casting two steel blades that are heated and shaped, then hardened and tempered, have a pi
+vot hole drilled and a metal pin inserted, the blades are polished and fitted together, and rubber or plastic handles are molded or gl
+ued onto the pin for comfortable use.
+Output:  Scissors are made by forging or casting two steel blades that are heated and shaped, then hardened and tempered, have a pivot
+ hole drilled and a metal pin inserted, the blades are polished and fitted together, and rubber or plastic handles are molded or glued
+ onto the pin for comfortable use.
diff --git a/server/src/audio/transcribe.py b/server/src/audio/transcribe.py
@@ -0,0 +1,9 @@
+import whisper
+
+asr_model = whisper.load_model("small")
+
+def transcribe_audio(audio_bytes):
+    with open("temp.wav", "wb") as f:
+        f.write(audio_bytes)
+    result = asr_model.transcribe("temp.wav")
+    return result["text"]
diff --git a/server/src/audio/tts.py b/server/src/audio/tts.py
@@ -0,0 +1,12 @@
+import pyttsx3
+import tempfile
+import shutil
+
+def text_to_speech(text: str, path: str):
+    engine = pyttsx3.init()
+
+    temp_file = tempfile.mktemp(suffix=".wav")
+    engine.save_to_file(text, temp_file)
+    engine.runAndWait()
+
+    shutil.move(temp_file, path)
diff --git a/server/src/llm/generate.py b/server/src/llm/generate.py
@@ -0,0 +1,70 @@
+from ollama import Client
+import json
+from server.src.tools.tools import calculate, search_arxiv
+
+client = Client(host='http://localhost:11434')
+
+model_name = "gpt-oss:20b-cloud"
+
+prompts = [
+    {"role": "system", "content": "You are a helpful assistant. Your responses will be used for TTS as a live conversation, so keep your responses short. The user will not be able to see any visuals or read any latex/math. Respond in one sentence."},
+    {"role": "system", "content": """You have access to 2 tools. When appropriate, invoke these tools in a JSON format. 
+
+The first tool is calculate, and it is used to calculate mathematical expressions. Use this when any math is required, even if it is trivial, like 1 + 1. The format is: {"function": "calculate", "arguments": { "expression": string }}
+
+Your second tool is searching arXiv for relevant passages from scientific papers. Use this whenever the user asks a question related to science. The format is:
+
+{"function": "search_arxiv", "arguments": { "query": string }}"""}
+]
+
+
+def route_llm_output(llm_output: str) -> str:
+    """
+    Route LLM response to the correct tool if it's a function call, else return the text.
+    Expects LLM output in JSON format like {'function': ..., 'arguments': {...}}.
+    """
+    try:
+        output = json.loads(llm_output)
+        func_name = output.get("function")
+        args = output.get("arguments", {})
+    except (json.JSONDecodeError, TypeError):
+        # Not a JSON function call; return the text directly
+        return llm_output
+
+    print("Function call made to: ", func_name)
+    if func_name == "search_arxiv":
+        query = args.get("query", "")
+        output =  search_arxiv(query)
+    elif func_name == "calculate":
+        expr = args.get("expression", "")
+        output =  calculate(expr)
+    else:
+        return f"Error: Unknown function '{func_name}'"
+    print("Function output: ", output)
+    return output
+
+def generate_response(user_text):
+    conversation = [*prompts, {"role": "user", "content": user_text}]
+
+    response = client.chat(model=model_name, messages=conversation, options={
+    })
+
+    tool_calls = response.message.tool_calls
+    if tool_calls:
+        call = tool_calls[0]
+        print(call)
+        name = call.function.name
+        print("Function call made to: ", name)
+        if name == "calculate":
+            output = calculate(call.function.arguments["arguments"]["expression"])
+        elif name == "search_arxiv":
+            output = search_arxiv(call.function.arguments["arguments"]["query"])
+        else:
+            return f"Error: Unknown function '{name}'"
+        print("Function output:", output)
+        return output
+
+    generated_text = response["message"]["content"]
+    print("LLM output:", generated_text)
+
+    return generated_text
diff --git a/server/src/main.py b/server/src/main.py
@@ -0,0 +1,35 @@
+from fastapi import FastAPI, UploadFile, File
+from fastapi.responses import FileResponse
+from fastapi.middleware.cors import CORSMiddleware
+from server.src.audio.transcribe import transcribe_audio
+from server.src.audio.tts import text_to_speech
+from server.src.llm.generate import generate_response
+
+origins = [
+    "http://localhost:3000",  # React dev server
+    "http://127.0.0.1:3000"   # sometimes React uses this instead
+]
+
+app = FastAPI()
+
+app.add_middleware(
+    CORSMiddleware,
+    allow_origins=origins,
+    allow_credentials=True,
+    allow_methods=["*"],
+    allow_headers=["*"],
+)
+
+@app.post("/chat/")
+async def chat_endpoint(file: UploadFile = File(...)):
+    audio_bytes = await file.read()
+
+    user_text = transcribe_audio(audio_bytes)
+    print("User:", user_text)
+    bot_text = generate_response(user_text)
+    print("Output: ", bot_text)
+
+    path = "response.wav"
+    text_to_speech(bot_text, path)
+
+    return FileResponse(path, media_type="audio/wav")
diff --git a/server/src/tools/tools.py b/server/src/tools/tools.py
@@ -0,0 +1,18 @@
+def search_arxiv(query: str) -> str:
+    """
+    Simulate an arXiv search or return a dummy passage for the given query.
+    In a real system, this might query the arXiv API and extract a summary.
+    """
+    # returning dummy text to avoid rate limits
+    return f"[arXiv snippet related to '{query}']"
+
+def calculate(expression: str) -> str:
+    """
+    Evaluate a mathematical expression and return the result as a string.
+    """
+    try:
+        from sympy import sympify
+        result = sympify(expression)
+        return str(result)
+    except Exception as e:
+        return f"Error: {e}"
diff --git a/web/.eslintignore b/web/.eslintignore
@@ -0,0 +1,4 @@
+node_modules/
+build/
+dist/
+public/