diff --git a/screen recording.mkv b/screen recording.mkv new file mode 100644 index 0000000..444c41f Binary files /dev/null and b/screen recording.mkv differ diff --git a/voice_chat/.env.example b/voice_chat/.env.example new file mode 100644 index 0000000..adbeb3d --- /dev/null +++ b/voice_chat/.env.example @@ -0,0 +1,3 @@ +# Hugging Face API Token +# Get your token from: https://huggingface.co/settings/tokens +HF_TOKEN=your_hugging_face_token_here diff --git a/voice_chat/.gitignore b/voice_chat/.gitignore new file mode 100644 index 0000000..7f48a3c --- /dev/null +++ b/voice_chat/.gitignore @@ -0,0 +1,47 @@ +# Environment variables +.env +.env.local +*.env + +# Python +__pycache__/ +*.py[cod] +*$py.class +*.so +.Python +env/ +venv/ +ENV/ +build/ +develop-eggs/ +dist/ +downloads/ +eggs/ +.eggs/ +lib/ +lib64/ +parts/ +sdist/ +var/ +wheels/ +*.egg-info/ +.installed.cfg +*.egg + +# Temporary files +temp/ +*.wav +*.mp3 +*.flac +*.tmp + +# IDE +.vscode/ +.idea/ +*.swp +*.swo +*~ + +# OS +.DS_Store +Thumbs.db diff --git a/voice_chat/QUICK_START.md b/voice_chat/QUICK_START.md new file mode 100644 index 0000000..86c2507 --- /dev/null +++ b/voice_chat/QUICK_START.md @@ -0,0 +1,427 @@ +# ๐Ÿš€ First-Time User Quick Start + +## 30-Second Startup (Windows) + +1. **Open File Explorer** + - Navigate to: `d:\MLE\Week3_HW\voice_chat` + +2. **Double-click `run.bat`** + - A terminal window appears + - Wait for it to say "Uvicorn running on..." + +3. **Open Browser** + - Go to: `http://localhost:8000` + - You should see the Voice Chat interface! + +**That's it! You're running.** ๐ŸŽ‰ + +--- + +## 30-Second Startup (macOS/Linux) + +1. **Open Terminal** + - Navigate to project: `cd ~/path/to/voice_chat` + +2. **Run:** + ```bash + bash run.sh + ``` + - Wait for: "Uvicorn running on..." + +3. **Open Browser** + - Go to: `http://localhost:8000` + - Ready to chat! + +--- + +## First Test (Do This Now!) + +### Step 1: Allow Microphone +``` +1. Page might ask for microphone permission +2. Click "Allow" button +3. If no permission prompt, you're good to go! +``` + +### Step 2: Record & Send +``` +1. Click blue ๐ŸŽค button +2. Say: "Hello, how are you?" +3. Wait 2 seconds +4. Click ๐ŸŽค button again (stops recording) +5. Click ๐Ÿ“ค "Send" button +``` + +### Step 3: Wait for Response +``` +โœ“ Loading spinner appears +โœ“ Backend processes (wait 5-15 seconds) +โœ“ Your message appears in blue on RIGHT +โœ“ Bot message appears in purple on LEFT +โœ“ Audio plays automatically +``` + +### Step 4: Success! ๐ŸŽ‰ +``` +If all above worked: Application is running perfectly! +If something failed: See troubleshooting below โ†“ +``` + +--- + +## โœ… How to Know It's Working + +### Good Signs โœ“ +- Page loads immediately +- Microphone icon visible +- Buttons are clickable +- Recording starts when clicked +- Message sends successfully +- Bot responds within 15 seconds +- Audio plays automatically +- No red error messages + +### Bad Signs โœ— +- Page doesn't load or shows error +- Buttons don't respond +- Microphone permission denied +- Recording doesn't start +- Message doesn't send +- Backend doesn't respond after 30 seconds +- Red error message appears +- Audio doesn't play + +--- + +## ๐Ÿ”ง Quick Troubleshooting + +### Problem: "Page won't load" + +**Fix:** +1. Make sure terminal shows "Uvicorn running on..." +2. Try refreshing page (F5) +3. Try different URL: `http://127.0.0.1:8000` +4. Close and restart backend + +### Problem: "Microphone permission denied" + +**Fix:** +1. Refresh page +2. Click the permission prompt +3. If still denied: + - Chrome: Click ๐Ÿ”’ icon โ†’ Permissions โ†’ Allow microphone + - Firefox: Check privacy settings + - Safari: Settings โ†’ Privacy + +### Problem: "Bot doesn't respond" + +**Fix:** +1. Make sure you recorded audio (see blue message first) +2. Wait 15 seconds (backend processing) +3. Check terminal for [CHAT] messages +4. Make sure internet is connected +5. Restart backend + +### Problem: "No audio output" + +**Fix:** +1. Check if muted: Click ๐Ÿ”Š button +2. Check system volume (bottom right taskbar) +3. Test speakers with YouTube +4. Refresh page and try again + +### Problem: "Error message appears" + +**Fix:** +1. Read the error message carefully +2. It tells you what went wrong +3. Most common: "Backend not running" +4. Restart using run.bat or run.sh + +--- + +## ๐Ÿ“Š Expected Behavior + +### What Should Happen: + +``` +Timeline of Events: + +T=0s You click ๐ŸŽค (Record) + โ€ข Button turns RED + โ€ข Indicator starts pulsing + โ€ข Status shows "Recording..." + +T=2s You finish speaking + You click ๐ŸŽค (Stop) + โ€ข Button turns BLUE + โ€ข Status shows "Ready" + +T=2.5s You click ๐Ÿ“ค (Send) + โ€ข Loading spinner appears + โ€ข Button is now disabled + โ€ข Send button disabled + +T=3-5s Frontend sends audio to backend + โ€ข Request sent as FormData + +T=5-15s Backend processes + โ€ข ASR converts audio to text + โ€ข LLM generates response + โ€ข TTS converts response to audio + +T=15-16s Response received + โ€ข Your message appears (BLUE, RIGHT) + โ€ข Bot message appears (PURPLE, LEFT) + โ€ข Audio plays automatically (if unmuted) + โ€ข Loading spinner gone + โ€ข Buttons re-enabled + +T=17s Ready for next message! + Click Record again to continue +``` + +--- + +## ๐Ÿ“ฑ Mobile Testing (Optional) + +### Test on Your Phone + +**Step 1: Get Your Computer's IP** +```powershell +# Windows PowerShell +ipconfig +# Look for: "IPv4 Address" like 192.168.1.100 + +# macOS/Linux Terminal +ifconfig +# Look for "inet" like 192.168.1.100 +``` + +**Step 2: Start Backend (Different Command)** +```bash +uvicorn app.main:app --host 0.0.0.0 --port 8000 +``` + +**Step 3: On Your Phone** +Open browser and go to: +``` +http://192.168.1.100:8000 +``` +(Replace with YOUR IP address from Step 1) + +**Step 4: Test** +- Everything works the same as desktop! +- Try sending messages +- Check if responsive +- Verify touch buttons work + +--- + +## ๐Ÿงช Simple Test Cases + +Try these to verify everything works: + +### Test 1: Basic Greeting +``` +Say: "Hi" +Expected: Greeting response +``` + +### Test 2: Question +``` +Say: "What is Python?" +Expected: Answer about Python +``` + +### Test 3: Math +``` +Say: "What is 2 plus 2?" +Expected: Answer "4" or "equals 4" +``` + +### Test 4: Information Request +``` +Say: "Tell me about AI" +Expected: Information about AI +``` + +### Test 5: Multiple Messages +``` +Message 1: "Hello" +Response: "Hi, how can I help?" +Message 2: "What's the weather?" +Response: Weather-related answer +``` + +All 5 work โ†’ โœ… Perfect! +3-4 work โ†’ โœ… Good! +1-2 work โ†’ โš ๏ธ Check setup +0 work โ†’ โŒ Need troubleshooting + +--- + +## ๐Ÿ’ก Pro Tips + +1. **Speak clearly and slowly** + - Better transcription accuracy + +2. **Wait for microphone permission** + - Don't click buttons before allowing + +3. **Be patient with backend** + - 5-15 seconds is normal processing time + +4. **Check browser console if issues** + - Press F12 + - Go to Console tab + - Look for red error messages + +5. **Read error messages** + - They tell you exactly what went wrong + +6. **Try different phrases** + - Bot is smarter than you might think! + +7. **Use on quiet place** + - Less noise = better transcription + +8. **Test with friends** + - They might say things you wouldn't + +--- + +## ๐ŸŽ“ Understanding the Flow + +### How It Works (Simple Version) + +``` +You (User) + โ†“ +Click Record & Speak + โ†“ +Click Send + โ†“ +Your audio goes to backend + โ†“ +Backend does: + โ€ข Hears: "What is Python?" + โ€ข Understands: It's a question about Python + โ€ข Thinks: "I should explain Python" + โ€ข Speaks: Generated answer + โ†“ +Answer comes back as audio + โ†“ +Your speaker plays it + โ†“ +Chat shows both sides + โ†“ +You can send another message + โ†“ +Repeat! +``` + +--- + +## ๐ŸŽฏ Success Indicators + +### โœ… Everything is Working If: +- [ ] Page loads without errors +- [ ] Microphone permission granted +- [ ] Recording captures audio +- [ ] Message sends successfully +- [ ] Bot responds within 15 seconds +- [ ] Audio plays clearly +- [ ] Chat displays messages correctly +- [ ] Can send multiple messages +- [ ] Clear chat button works +- [ ] Mute button works + +### โš ๏ธ Minor Issues If: +- Microphone permission takes time +- Backend response is slow (10-15s) +- Audio transcription not 100% accurate +- Bot response seems generic + +### โŒ Major Issues If: +- Page won't load at all +- Can't record audio +- Bot never responds +- Backend crashes +- Audio doesn't play at all + +--- + +## ๐Ÿ“ž When You Get Stuck + +### Read These (In Order): +1. **QUICK_REFERENCE.md** - Quick fixes +2. **SETUP_GUIDE.md** - Detailed setup help +3. **USER_TESTING_GUIDE.md** - Testing procedures + +### Check These (Also in Order): +1. Terminal window (backend logs) +2. Browser console (F12) +3. Browser address bar (correct URL?) +4. System volume (not muted?) +5. Internet connection (is it working?) + +--- + +## ๐Ÿš€ Next Steps After Success + +1. **Try React Version** (Optional) + ```bash + npm install + npm run dev + ``` + +2. **Test on Mobile** (Optional) + - Follow Mobile Testing steps above + +3. **Try Different Inputs** + - Long sentences + - Questions + - Commands + - Different languages/accents + +4. **Test Limits** + - Very quiet voice + - Very loud voice + - Background noise + - Multiple speakers + +5. **Read Documentation** + - INDEX.md (overview) + - FRONTEND_README.md (features) + - TESTING_GUIDE.md (detailed testing) + +--- + +## ๐ŸŽ‰ Congratulations! + +You're now successfully running the Voice Chat Application! + +**What to do now:** +- Try sending several messages +- Test with different types of inputs +- Show it to friends +- Explore the codebase +- Try the React version +- Customize if you want + +**You've got this! ๐Ÿ’ช** + +--- + +**Questions?** Check the documentation files in the project folder. + +**Found a bug?** Document it and let the team know! + +**Enjoying it?** Share it with others! + +--- + +Version: 1.0 +Last Updated: November 2024 +Status: Ready for Users โœ… diff --git a/voice_chat/app/__init__.py b/voice_chat/app/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/voice_chat/app/asr.py b/voice_chat/app/asr.py new file mode 100644 index 0000000..daebe27 --- /dev/null +++ b/voice_chat/app/asr.py @@ -0,0 +1,54 @@ +# app/asr.py +import torch +from transformers import WhisperProcessor, WhisperForConditionalGeneration +import soundfile as sf +from io import BytesIO +from typing import Union + +processor = WhisperProcessor.from_pretrained("openai/whisper-base") +model = WhisperForConditionalGeneration.from_pretrained("openai/whisper-base") +device = torch.device("cuda" if torch.cuda.is_available() else "cpu") +model.to(device) +print(f"Device set to use {device}") + +def transcribe_audio(audio_input: Union[str, bytes]) -> str: + """ + Transcribe audio to text + + Args: + audio_input: Audio file path (str) or audio bytes data (bytes) + + Returns: + Transcribed text + """ + # Determine input type + if isinstance(audio_input, bytes): + # Read from bytes + audio_np, samplerate = sf.read(BytesIO(audio_input)) + elif isinstance(audio_input, str): + # Read from file path + audio_np, samplerate = sf.read(audio_input) + else: + raise TypeError(f"Expected str or bytes, got {type(audio_input)}") + + # Resample to 16kHz + if samplerate != 16000: + import librosa + audio_np = librosa.resample(audio_np, orig_sr=samplerate, target_sr=16000) + + # Process audio + input_features = processor( + audio_np, + sampling_rate=16000, + return_tensors="pt" + ).input_features + input_features = input_features.to(device) + + # Generate transcription + with torch.no_grad(): + predicted_ids = model.generate(input_features) + + # Decode + transcription = processor.batch_decode(predicted_ids, skip_special_tokens=True)[0] + + return transcription \ No newline at end of file diff --git a/voice_chat/app/llm.py b/voice_chat/app/llm.py new file mode 100644 index 0000000..535ccd9 --- /dev/null +++ b/voice_chat/app/llm.py @@ -0,0 +1,29 @@ +from huggingface_hub import InferenceClient +import os +from dotenv import load_dotenv + +# Load environment variables from .env file +load_dotenv() + +# Load token from environment variable for security +HF_TOKEN = os.getenv("HF_TOKEN") +if not HF_TOKEN: + raise ValueError("HF_TOKEN environment variable not set. Please set it in .env file or as environment variable.") + +client = InferenceClient(token=HF_TOKEN) + +def generate_response(text: str) -> str: + try: + # chat completion API + response = client.chat.completions.create( + model="meta-llama/Llama-3.1-8B-Instruct", + messages=[{"role": "user", "content": text}], + max_tokens=200 + ) + # response is ChatCompletionOutput object + if response.choices and len(response.choices) > 0: + return response.choices[0].message["content"] + else: + return str(response) + except Exception as e: + return f"Error generating response: {str(e)}" diff --git a/voice_chat/app/main.py b/voice_chat/app/main.py new file mode 100644 index 0000000..43e662e --- /dev/null +++ b/voice_chat/app/main.py @@ -0,0 +1,102 @@ +# app/main.py +from fastapi import FastAPI, UploadFile, File +from fastapi.responses import FileResponse +from fastapi.staticfiles import StaticFiles +from fastapi.middleware.cors import CORSMiddleware +import json +from pathlib import Path + +from app.asr import transcribe_audio +from app.tts import synthesize_speech +from app.llm import generate_response + +app = FastAPI() + +# Add CORS middleware to allow requests from frontend +app.add_middleware( + CORSMiddleware, + allow_origins=["*"], + allow_credentials=True, + allow_methods=["*"], + allow_headers=["*"], +) + +# Create temporary file directory +TEMP_DIR = Path("temp") +TEMP_DIR.mkdir(exist_ok=True) + +# Mount static files +STATIC_DIR = Path("static") +if STATIC_DIR.exists(): + app.mount("/static", StaticFiles(directory=str(STATIC_DIR)), name="static") + +@app.get("/") +def read_root(): + """Serve the main HTML file""" + return FileResponse("index.html", media_type="text/html") + +@app.post("/chat/") +async def chat_endpoint(audio: UploadFile = File(...)): + """ + Voice chat endpoint + 1. Receive audio file + 2. Transcribe to text (ASR) + 3. Generate response (LLM) + 4. Synthesize speech (TTS) + 5. Return JSON with transcribed text, response text, and audio + """ + audio_path = None + try: + # Save uploaded audio file with .wav extension + audio_path = TEMP_DIR / "input_audio.wav" + content = await audio.read() + with open(audio_path, "wb") as f: + f.write(content) + + print(f"[CHAT] Saved audio to: {audio_path}") + + # 1. ASR: Audio -> Text + print("[CHAT] Transcribing audio...") + user_text = transcribe_audio(str(audio_path)) + print(f"[CHAT] Transcribed: {user_text}") + + # 2. LLM: Generate response + print("[CHAT] Generating response...") + bot_response = generate_response(user_text) + print(f"[CHAT] Response: {bot_response}") + + # 3. TTS: Text -> Audio + print("[CHAT] Synthesizing speech...") + output_path = TEMP_DIR / "output.wav" + synthesize_speech(bot_response, str(output_path)) + print(f"[CHAT] Synthesized to: {output_path}") + + # 4. Read audio file and return with text + with open(output_path, "rb") as f: + audio_bytes = f.read() + + # Return JSON response with text and audio data + import base64 + audio_b64 = base64.b64encode(audio_bytes).decode('utf-8') + + return { + "user_text": user_text, + "bot_response": bot_response, + "audio": audio_b64 + } + + except Exception as e: + import traceback + print(f"[CHAT ERROR] {str(e)}") + traceback.print_exc() + return {"error": str(e)} + + finally: + # Clean up temporary files + if audio_path and audio_path.exists(): + audio_path.unlink() + +@app.get("/health") +def health_check(): + """Health check endpoint""" + return {"status": "healthy"} \ No newline at end of file diff --git a/voice_chat/app/tts.py b/voice_chat/app/tts.py new file mode 100644 index 0000000..3ae12cf --- /dev/null +++ b/voice_chat/app/tts.py @@ -0,0 +1,91 @@ +# app/tts.py +from transformers import SpeechT5Processor, SpeechT5ForTextToSpeech, SpeechT5HifiGan +import torch +import soundfile as sf +import numpy as np + +# Device setup +device = torch.device("cuda" if torch.cuda.is_available() else "cpu") +print(f"[TTS] Using device: {device}") + +# Global model loading (load once for efficiency) +processor = SpeechT5Processor.from_pretrained("microsoft/speecht5_tts") +model = SpeechT5ForTextToSpeech.from_pretrained("microsoft/speecht5_tts") +model = model.to(device) +vocoder = SpeechT5HifiGan.from_pretrained("microsoft/speecht5_hifigan") +vocoder = vocoder.to(device) + +print("[TTS] Models loaded successfully") + + +def synthesize_speech(text: str, output_file: str) -> str: + """ + Generate speech using SpeechT5 with female voice and moderate speed + + Args: + text: Text to convert to speech + output_file: Output WAV file path + + Returns: + Output file path + """ + try: + # Limit text length + text = text[:500] if len(text) > 500 else text + + # Process input text + inputs = processor(text=text, return_tensors="pt") + input_ids = inputs["input_ids"].to(device) + + # Create speaker embeddings on the correct device + # Using a fixed female speaker embedding (512 dims) + speaker_embeddings = torch.FloatTensor([ + 0.1, -0.2, 0.3, -0.15, 0.25, -0.1, 0.2, -0.25, 0.15, -0.3, + -0.1, 0.2, -0.25, 0.15, -0.3, 0.1, -0.2, 0.3, -0.15, 0.25, + 0.05, -0.15, 0.2, -0.1, 0.25, -0.2, 0.15, -0.25, 0.1, -0.3, + -0.15, 0.1, -0.2, 0.15, -0.25, 0.2, -0.1, 0.3, -0.05, 0.2, + ] * 13)[:512] # Ensure exactly 512 dimensions + speaker_embeddings = speaker_embeddings.unsqueeze(0).to(device) + + # Generate speech + with torch.no_grad(): + speech = model.generate_speech( + input_ids, + speaker_embeddings, + vocoder=vocoder + ) + + # Convert to numpy array + speech_np = speech.cpu().numpy() + + # Ensure audio is in correct format + if speech_np.ndim > 1: + speech_np = speech_np.squeeze() + + # Normalize audio + max_val = np.max(np.abs(speech_np)) + if max_val > 1.0: + speech_np = speech_np / max_val + + # Slow down speech by 15% + speech_slow = np.interp( + np.linspace(0, len(speech_np) - 1, int(len(speech_np) * 1.15)), + np.arange(len(speech_np)), + speech_np + ) + + # Save as WAV file + sf.write(output_file, speech_slow, samplerate=16000) + print(f"[TTS] Audio saved to {output_file}") + + return output_file + + except Exception as e: + print(f"[TTS Error] {str(e)}") + import traceback + traceback.print_exc() + + # Fallback: create silent audio + silent_audio = np.zeros(16000) + sf.write(output_file, silent_audio, samplerate=16000) + raise Exception(f"TTS generation failed: {str(e)}") \ No newline at end of file diff --git a/voice_chat/index.html b/voice_chat/index.html new file mode 100644 index 0000000..e05161d --- /dev/null +++ b/voice_chat/index.html @@ -0,0 +1,58 @@ + + + + + + Voice Chat Application + + + +
+
+

๐ŸŽค Voice Chat

+

Talk to your AI assistant

+
+ +
+
+
Hello! I'm your AI assistant. Click the microphone button to start recording your message.
+
+
+ +
+
+ Ready +
+
+ +
+ + +
+ +
+
+
+ +
+ + +
+
+ +
+
+

Processing...

+
+
+ + + + diff --git a/voice_chat/requirements.txt b/voice_chat/requirements.txt new file mode 100644 index 0000000..604fe41 --- /dev/null +++ b/voice_chat/requirements.txt @@ -0,0 +1,12 @@ +fastapi +uvicorn[standard] +python-multipart + +openai-whisper +transformers +datasets +soundfile + +torch +torchaudio +huggingface_hub diff --git a/voice_chat/static/css/style.css b/voice_chat/static/css/style.css new file mode 100644 index 0000000..a7cfaf2 --- /dev/null +++ b/voice_chat/static/css/style.css @@ -0,0 +1,398 @@ +/* static/css/style.css */ + +* { + margin: 0; + padding: 0; + box-sizing: border-box; +} + +body { + font-family: 'Segoe UI', Tahoma, Geneva, Verdana, sans-serif; + background: linear-gradient(135deg, #667eea 0%, #764ba2 100%); + min-height: 100vh; + display: flex; + justify-content: center; + align-items: center; + padding: 10px; +} + +.container { + width: 100%; + max-width: 600px; + background: white; + border-radius: 20px; + box-shadow: 0 20px 60px rgba(0, 0, 0, 0.3); + display: flex; + flex-direction: column; + height: 90vh; + max-height: 800px; + overflow: hidden; +} + +.chat-header { + background: linear-gradient(135deg, #667eea 0%, #764ba2 100%); + color: white; + padding: 25px; + text-align: center; + border-radius: 20px 20px 0 0; +} + +.chat-header h1 { + font-size: 28px; + margin-bottom: 5px; + font-weight: 600; +} + +.chat-header p { + font-size: 14px; + opacity: 0.9; +} + +.chat-messages { + flex: 1; + overflow-y: auto; + padding: 20px; + display: flex; + flex-direction: column; + gap: 15px; + background: #f8f9fa; +} + +.message { + display: flex; + animation: slideIn 0.3s ease-out; +} + +@keyframes slideIn { + from { + opacity: 0; + transform: translateY(10px); + } + to { + opacity: 1; + transform: translateY(0); + } +} + +.message-content { + padding: 12px 16px; + border-radius: 16px; + max-width: 85%; + word-wrap: break-word; + line-height: 1.5; + font-size: 15px; +} + +.user-message { + justify-content: flex-end; +} + +.user-message .message-content { + background: #667eea; + color: white; + border-bottom-right-radius: 4px; +} + +.bot-message { + justify-content: flex-start; +} + +.bot-message .message-content { + background: #e0e7ff; + color: #1a1a1a; + border-bottom-left-radius: 4px; + border: 1px solid #d1d5f3; +} + +.bot-message.loading .message-content { + display: flex; + align-items: center; + gap: 8px; +} + +.typing-indicator { + display: flex; + gap: 4px; +} + +.typing-dot { + width: 8px; + height: 8px; + border-radius: 50%; + background: #667eea; + animation: typing 1.4s infinite; +} + +.typing-dot:nth-child(2) { + animation-delay: 0.2s; +} + +.typing-dot:nth-child(3) { + animation-delay: 0.4s; +} + +@keyframes typing { + 0%, 60%, 100% { + opacity: 0.3; + transform: translateY(0); + } + 30% { + opacity: 1; + transform: translateY(-10px); + } +} + +.chat-input-area { + background: white; + padding: 20px; + border-top: 1px solid #e0e7ff; + display: flex; + flex-direction: column; + gap: 15px; +} + +.recording-status { + display: flex; + align-items: center; + justify-content: center; + gap: 10px; + min-height: 24px; +} + +#statusText { + font-size: 13px; + color: #666; + font-weight: 500; +} + +.recording-indicator { + width: 12px; + height: 12px; + border-radius: 50%; + background: #4caf50; + opacity: 0; +} + +.recording-indicator.active { + animation: pulse 1s infinite; + opacity: 1; +} + +@keyframes pulse { + 0%, 100% { + opacity: 1; + transform: scale(1); + } + 50% { + opacity: 0.5; + transform: scale(1.2); + } +} + +.button-group { + display: flex; + gap: 10px; + justify-content: center; +} + +.btn { + padding: 10px 20px; + border: none; + border-radius: 10px; + font-size: 14px; + font-weight: 600; + cursor: pointer; + transition: all 0.3s ease; + display: flex; + align-items: center; + gap: 8px; + white-space: nowrap; +} + +.btn:disabled { + opacity: 0.5; + cursor: not-allowed; +} + +.btn-primary { + background: linear-gradient(135deg, #667eea 0%, #764ba2 100%); + color: white; + flex: 1; +} + +.btn-primary:hover:not(:disabled) { + transform: translateY(-2px); + box-shadow: 0 10px 25px rgba(102, 126, 234, 0.4); +} + +.btn-primary:active:not(:disabled) { + transform: translateY(0); +} + +.btn-primary.recording { + background: linear-gradient(135deg, #ef5350 0%, #e53935 100%); + animation: recording-pulse 0.6s infinite; +} + +@keyframes recording-pulse { + 0%, 100% { + box-shadow: 0 0 0 0 rgba(239, 83, 80, 0.7); + } + 50% { + box-shadow: 0 0 0 10px rgba(239, 83, 80, 0); + } +} + +.btn-large { + min-height: 50px; + font-size: 16px; +} + +.btn-secondary { + background: #f0f4ff; + color: #667eea; + border: 2px solid #667eea; +} + +.btn-secondary:hover:not(:disabled) { + background: #667eea; + color: white; + transform: translateY(-2px); +} + +.btn-small { + padding: 8px 12px; + font-size: 12px; +} + +.controls { + display: flex; + gap: 10px; + justify-content: center; +} + +.transcription-display { + min-height: 40px; + background: #f0f4ff; + padding: 12px; + border-radius: 8px; + border-left: 4px solid #667eea; + display: none; +} + +.transcription-display.active { + display: block; +} + +#transcriptionText { + font-size: 14px; + color: #333; + line-height: 1.5; + font-style: italic; +} + +.loader { + display: none; + position: fixed; + top: 0; + left: 0; + right: 0; + bottom: 0; + background: rgba(0, 0, 0, 0.5); + justify-content: center; + align-items: center; + flex-direction: column; + gap: 20px; + z-index: 1000; +} + +.loader.active { + display: flex; +} + +.loader-spinner { + width: 50px; + height: 50px; + border: 4px solid #f3f3f3; + border-top: 4px solid #667eea; + border-radius: 50%; + animation: spin 1s linear infinite; +} + +@keyframes spin { + 0% { + transform: rotate(0deg); + } + 100% { + transform: rotate(360deg); + } +} + +.loader p { + color: white; + font-size: 16px; + font-weight: 600; +} + +.error-message { + background: #ffebee; + color: #c62828; + padding: 12px; + border-radius: 8px; + border-left: 4px solid #c62828; + font-size: 14px; + margin: 10px 0; +} + +/* Scrollbar styling */ +.chat-messages::-webkit-scrollbar { + width: 6px; +} + +.chat-messages::-webkit-scrollbar-track { + background: #f1f1f1; +} + +.chat-messages::-webkit-scrollbar-thumb { + background: #667eea; + border-radius: 3px; +} + +.chat-messages::-webkit-scrollbar-thumb:hover { + background: #764ba2; +} + +/* Mobile responsive */ +@media (max-width: 600px) { + .container { + max-height: 100vh; + border-radius: 0; + } + + .chat-header { + border-radius: 0; + padding: 20px; + } + + .chat-header h1 { + font-size: 24px; + } + + .message-content { + max-width: 95%; + font-size: 14px; + } + + .btn-large { + min-height: 45px; + font-size: 14px; + } + + .button-group { + flex-direction: column; + } + + .btn-primary { + flex: none; + } +} diff --git a/voice_chat/static/js/app.js b/voice_chat/static/js/app.js new file mode 100644 index 0000000..538bd1c --- /dev/null +++ b/voice_chat/static/js/app.js @@ -0,0 +1,350 @@ +// static/js/app.js + +let mediaRecorder; +let audioChunks = []; +let isRecording = false; +let isMuted = false; +const API_BASE_URL = 'http://localhost:8000'; + +// WAV encoder function to convert raw audio data to WAV format +function encodeWAV(samples, sampleRate = 16000) { + const numChannels = 1; + const length = samples.length * numChannels * 2 + 36; + const arrayBuffer = new ArrayBuffer(44 + length); + const view = new DataView(arrayBuffer); + + // WAV header + const writeString = (offset, string) => { + for (let i = 0; i < string.length; i++) { + view.setUint8(offset + i, string.charCodeAt(i)); + } + }; + + writeString(0, 'RIFF'); + view.setUint32(4, 36 + length, true); + writeString(8, 'WAVE'); + writeString(12, 'fmt '); + view.setUint32(16, 16, true); + view.setUint16(20, 1, true); + view.setUint16(22, numChannels, true); + view.setUint32(24, sampleRate, true); + view.setUint32(28, sampleRate * 2, true); + view.setUint16(32, numChannels * 2, true); + view.setUint16(34, 16, true); + writeString(36, 'data'); + view.setUint32(40, length, true); + + // Write samples + let offset = 44; + for (let i = 0; i < samples.length; i++) { + view.setInt16(offset, samples[i] * 0x7FFF, true); + offset += 2; + } + + return new Blob([arrayBuffer], { type: 'audio/wav' }); +} + +// Convert audio blob to WAV format +async function blobToWAV(blob) { + const audioContext = new (window.AudioContext || window.webkitAudioContext)(); + const arrayBuffer = await blob.arrayBuffer(); + const audioBuffer = await audioContext.decodeAudioData(arrayBuffer); + + // Get audio data + const samples = audioBuffer.getChannelData(0); + + return encodeWAV(Array.from(samples), audioBuffer.sampleRate); +} + +// Initialize on page load +document.addEventListener('DOMContentLoaded', () => { + checkBrowserSupport(); + setupEventListeners(); +}); + +function checkBrowserSupport() { + const getUserMedia = navigator.mediaDevices && navigator.mediaDevices.getUserMedia; + if (!getUserMedia) { + showError('Your browser does not support audio recording. Please use a modern browser.'); + document.getElementById('recordButton').disabled = true; + } +} + +function setupEventListeners() { + // Handle Enter key to send + document.addEventListener('keydown', (e) => { + if (e.key === 'Enter' && !e.shiftKey && !isRecording) { + if (!document.getElementById('sendButton').disabled) { + sendAudio(); + } + } + }); +} + +async function toggleRecording() { + if (isRecording) { + stopRecording(); + } else { + startRecording(); + } +} + +async function startRecording() { + try { + const stream = await navigator.mediaDevices.getUserMedia({ + audio: { + echoCancellation: true, + noiseSuppression: true, + autoGainControl: true + } + }); + + audioChunks = []; + mediaRecorder = new MediaRecorder(stream); + + mediaRecorder.ondataavailable = (event) => { + audioChunks.push(event.data); + }; + + mediaRecorder.onstop = () => { + // Optionally process when recording stops + }; + + mediaRecorder.start(); + isRecording = true; + + // Update UI + const recordButton = document.getElementById('recordButton'); + recordButton.classList.add('recording'); + document.getElementById('recordButtonText').textContent = 'Stop Recording'; + document.getElementById('statusText').textContent = 'Recording...'; + document.getElementById('recordingIndicator').classList.add('active'); + document.getElementById('sendButton').disabled = false; + + } catch (error) { + showError(`Failed to start recording: ${error.message}`); + isRecording = false; + } +} + +function stopRecording() { + if (!mediaRecorder) return; + + mediaRecorder.stop(); + mediaRecorder.stream.getTracks().forEach(track => track.stop()); + isRecording = false; + + // Update UI + const recordButton = document.getElementById('recordButton'); + recordButton.classList.remove('recording'); + document.getElementById('recordButtonText').textContent = 'Start Recording'; + document.getElementById('statusText').textContent = 'Ready'; + document.getElementById('recordingIndicator').classList.remove('active'); +} + +async function sendAudio() { + if (audioChunks.length === 0) { + showError('No audio recorded. Please record something first.'); + return; + } + + // Create blob from chunks - use actual recorded format + const recordedBlob = new Blob(audioChunks, { type: mediaRecorder.mimeType || 'audio/webm' }); + + try { + // Convert to proper WAV format + const audioWAVBlob = await blobToWAV(recordedBlob); + + // Disable send button and show loader + document.getElementById('sendButton').disabled = true; + document.getElementById('recordButton').disabled = true; + showLoader(); + + // Send audio to backend + const formData = new FormData(); + formData.append('audio', audioWAVBlob, 'audio.wav'); + + const response = await fetch(`${API_BASE_URL}/chat/`, { + method: 'POST', + body: formData, + }); + + if (!response.ok) { + const errorText = await response.text(); + console.error('Backend error response:', errorText); + throw new Error(errorText); + } + + // Parse JSON response containing text and audio + const responseData = await response.json(); + + if (responseData.error) { + throw new Error(responseData.error); + } + + // Show user's transcribed text with audio player + addMessageWithAudio(responseData.user_text, 'user', audioWAVBlob); + + // Decode audio from base64 + const audioBytes = new Uint8Array(atob(responseData.audio).split('').map(c => c.charCodeAt(0))); + const audioBlob = new Blob([audioBytes], { type: 'audio/wav' }); + + // Play audio + await playAudioResponse(audioBlob); + + // Show bot's response text with audio player + addMessageWithAudio(responseData.bot_response, 'bot', audioBlob); + + } catch (error) { + showError(`Error: ${error.message}`); + addMessage(`Error: ${error.message}`, 'bot'); + } finally { + hideLoader(); + document.getElementById('sendButton').disabled = true; + document.getElementById('recordButton').disabled = false; + + // Reset for next recording + audioChunks = []; + document.getElementById('statusText').textContent = 'Ready'; + } +} + +async function playAudioResponse(audioBlob) { + return new Promise((resolve) => { + try { + if (isMuted) { + console.log('Audio is muted, skipping playback'); + resolve(); + return; + } + + const audioUrl = URL.createObjectURL(audioBlob); + const audio = new Audio(audioUrl); + + // Set a timeout to resolve even if audio doesn't end naturally + const timeout = setTimeout(() => { + console.log('Audio timeout - resolving after 10 seconds'); + URL.revokeObjectURL(audioUrl); + resolve(); + }, 10000); // 10 second timeout + + audio.onended = () => { + console.log('Audio playback ended'); + clearTimeout(timeout); + URL.revokeObjectURL(audioUrl); + resolve(); + }; + + audio.onerror = (error) => { + console.error('Audio element error:', error); + clearTimeout(timeout); + URL.revokeObjectURL(audioUrl); + resolve(); + }; + + // Play the audio + const playPromise = audio.play(); + if (playPromise !== undefined) { + playPromise.then(() => { + console.log('Audio playing successfully'); + }).catch(err => { + console.error('Playback error:', err); + clearTimeout(timeout); + URL.revokeObjectURL(audioUrl); + resolve(); + }); + } + } catch (error) { + console.error('Audio creation error:', error); + resolve(); + } + }); +} + +function addMessage(text, sender) { + const chatMessages = document.getElementById('chatMessages'); + const messageDiv = document.createElement('div'); + messageDiv.className = `message ${sender}-message`; + + const contentDiv = document.createElement('div'); + contentDiv.className = 'message-content'; + contentDiv.textContent = text; + + messageDiv.appendChild(contentDiv); + chatMessages.appendChild(messageDiv); + + // Scroll to bottom + chatMessages.scrollTop = chatMessages.scrollHeight; +} + +function addMessageWithAudio(text, sender, audioBlob) { + const chatMessages = document.getElementById('chatMessages'); + const messageDiv = document.createElement('div'); + messageDiv.className = `message ${sender}-message`; + + // Create audio player + const audioUrl = URL.createObjectURL(audioBlob); + const audioPlayer = document.createElement('audio'); + audioPlayer.controls = true; + audioPlayer.style.width = '100%'; + audioPlayer.style.marginBottom = '10px'; + audioPlayer.src = audioUrl; + + // Create text content + const contentDiv = document.createElement('div'); + contentDiv.className = 'message-content'; + contentDiv.textContent = text; + + messageDiv.appendChild(audioPlayer); + messageDiv.appendChild(contentDiv); + chatMessages.appendChild(messageDiv); + + // Scroll to bottom + chatMessages.scrollTop = chatMessages.scrollHeight; +} + +function showError(message) { + const errorDiv = document.createElement('div'); + errorDiv.className = 'error-message'; + errorDiv.textContent = message; + + const chatInputArea = document.querySelector('.chat-input-area'); + chatInputArea.insertBefore(errorDiv, chatInputArea.firstChild); + + // Remove error after 5 seconds + setTimeout(() => { + errorDiv.remove(); + }, 5000); +} + +function showLoader() { + document.getElementById('loader').classList.add('active'); +} + +function hideLoader() { + document.getElementById('loader').classList.remove('active'); +} + +function clearChat() { + if (confirm('Are you sure you want to clear the chat history?')) { + document.getElementById('chatMessages').innerHTML = ` +
+
Hello! I'm your AI assistant. Click the microphone button to start recording your message.
+
+ `; + audioChunks = []; + document.getElementById('recordButton').disabled = false; + } +} + +function toggleMicrophone() { + isMuted = !isMuted; + const muteButton = document.getElementById('muteButton'); + if (isMuted) { + muteButton.textContent = '๐Ÿ”‡ Muted'; + muteButton.style.opacity = '0.6'; + } else { + muteButton.textContent = '๐Ÿ”Š Unmute'; + muteButton.style.opacity = '1'; + } +} diff --git a/voice_chat/temp/output.wav b/voice_chat/temp/output.wav new file mode 100644 index 0000000..ecce7df Binary files /dev/null and b/voice_chat/temp/output.wav differ