diff --git a/README.md b/README.md
index 59ac98a..60450c0 100644
--- a/README.md
+++ b/README.md
@@ -19,9 +19,34 @@ pip install -r requirements.txt
 
 ## 실행
 
+개발 서버(Uvicorn) 실행:
+
 ```bash
-python -m app.main
+uvicorn app.main:app --reload --port 8000
+```
+
+API 문서: `http://127.0.0.1:8000/docs`
+
+## 환경 변수 설정
+
+프로젝트 루트에 `.env` 파일을 생성하고 다음 값을 채우세요.
+
+### AWS S3 설정
 ```
+AWS_ACCESS_KEY_ID=...
+AWS_SECRET_ACCESS_KEY=...
+AWS_REGION=ap-northeast-2
+S3_BUCKET_NAME=your-bucket
+S3_PREFIX=voices
+```
+
+### Google Cloud Speech-to-Text 설정
+```
+# 서비스 계정 키 파일 경로 설정
+GOOGLE_APPLICATION_CREDENTIALS=/path/to/your/service-account-key.json
+```
+
+`.env`는 `app/__init__.py`에서 자동 로드됩니다.
 
 ## 프로젝트 구조
 
@@ -29,7 +54,7 @@ python -m app.main
 caring-voice/
 ├── app/
 │   ├── __init__.py
-│   └── main.py          # 메인 엔트리 포인트
+│   └── main.py          # FastAPI 엔트리 포인트 및 엔드포인트
 ├── .gitignore
 ├── README.md
 ├── requirements.txt
diff --git a/app/__init__.py b/app/__init__.py
index e69de29..1e86223 100644
--- a/app/__init__.py
+++ b/app/__init__.py
@@ -0,0 +1,3 @@
+from dotenv import load_dotenv  # type: ignore
+load_dotenv()
+
diff --git a/app/constants.py b/app/constants.py
new file mode 100644
index 0000000..981926a
--- /dev/null
+++ b/app/constants.py
@@ -0,0 +1,11 @@
+import os
+
+# 업로드 기본 베이스 프리픽스 (환경변수 S3_PREFIX로 오버라이드 가능)
+VOICE_BASE_PREFIX = os.getenv("S3_PREFIX", "voices")
+
+# 기본 폴더명 (요청에 folder 미지정 시 사용)
+DEFAULT_UPLOAD_FOLDER = "voiceFile"
+
+# # 필요 시 허용 폴더 집합 정의 (예: 검증용)
+# ALLOWED_FOLDERS = {"raw", "processed", "public"}
+
diff --git a/app/emotion_service.py b/app/emotion_service.py
new file mode 100644
index 0000000..5cfdcfc
--- /dev/null
+++ b/app/emotion_service.py
@@ -0,0 +1,117 @@
+import io
+import os
+import tempfile
+from typing import Dict, Any
+import librosa
+import torch
+from transformers import Wav2Vec2ForSequenceClassification, Wav2Vec2FeatureExtractor
+import numpy as np
+
+
+class EmotionAnalyzer:
+    def __init__(self):
+        self.model = None
+        self.feature_extractor = None
+        self.device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
+        self._load_model()
+    
+    def _load_model(self):
+        """Hugging Face 모델 로드"""
+        model_name = "jungjongho/wav2vec2-xlsr-korean-speech-emotion-recognition"
+        
+        try:
+            self.model = Wav2Vec2ForSequenceClassification.from_pretrained(model_name)
+            self.feature_extractor = Wav2Vec2FeatureExtractor.from_pretrained(model_name)
+            self.model.to(self.device)
+            self.model.eval()
+        except Exception as e:
+            print(f"모델 로드 실패: {e}")
+            self.model = None
+            self.feature_extractor = None
+    
+    def analyze_emotion(self, audio_file) -> Dict[str, Any]:
+        """
+        음성 파일의 감정을 분석합니다.
+        
+        Args:
+            audio_file: 업로드된 음성 파일 (FastAPI UploadFile)
+            
+        Returns:
+            Dict: 감정 분석 결과
+        """
+        if not self.model or not self.feature_extractor:
+            return {
+                "error": "모델이 로드되지 않았습니다",
+                "emotion": "unknown",
+                "confidence": 0.0
+            }
+        
+        try:
+            # 임시 파일로 저장
+            with tempfile.NamedTemporaryFile(suffix=".wav", delete=False) as tmp_file:
+                content = audio_file.file.read()
+                tmp_file.write(content)
+                tmp_file_path = tmp_file.name
+            
+            # 오디오 로드 (16kHz로 리샘플링)
+            audio, sr = librosa.load(tmp_file_path, sr=16000)
+            
+            # 특성 추출
+            inputs = self.feature_extractor(
+                audio, 
+                sampling_rate=16000, 
+                return_tensors="pt", 
+                padding=True
+            )
+            
+            # GPU로 이동
+            inputs = {k: v.to(self.device) for k, v in inputs.items()}
+            
+            # 추론
+            with torch.no_grad():
+                outputs = self.model(**inputs)
+                predictions = torch.nn.functional.softmax(outputs.logits, dim=-1)
+            
+            # 감정 라벨 (모델에 따라 조정 필요)
+            emotion_labels = ["neutral", "happy", "sad", "angry", "fear", "surprise", "disgust"]
+            
+            # 가장 높은 확률의 감정
+            predicted_class = torch.argmax(predictions, dim=-1).item()
+            confidence = predictions[0][predicted_class].item()
+            emotion = emotion_labels[predicted_class] if predicted_class < len(emotion_labels) else "unknown"
+            
+            # 모든 감정의 확률
+            emotion_scores = {
+                emotion_labels[i]: predictions[0][i].item() 
+                for i in range(min(len(emotion_labels), predictions.shape[1]))
+            }
+            
+            return {
+                "emotion": emotion,
+                "confidence": confidence,
+                "emotion_scores": emotion_scores,
+                "audio_duration": len(audio) / sr,
+                "sample_rate": sr
+            }
+            
+        except Exception as e:
+            return {
+                "error": f"분석 중 오류 발생: {str(e)}",
+                "emotion": "unknown",
+                "confidence": 0.0
+            }
+        finally:
+            # 임시 파일 정리
+            try:
+                os.unlink(tmp_file_path)
+            except OSError as e:
+                print(f"임시 파일 삭제 실패: {tmp_file_path}, 오류: {e}")
+
+
+# 전역 인스턴스
+emotion_analyzer = EmotionAnalyzer()
+
+
+def analyze_voice_emotion(audio_file) -> Dict[str, Any]:
+    """음성 감정 분석 함수"""
+    return emotion_analyzer.analyze_emotion(audio_file)
diff --git a/app/main.py b/app/main.py
index 98d96a2..a6d1b88 100644
--- a/app/main.py
+++ b/app/main.py
@@ -1,7 +1,144 @@
-from fastapi import FastAPI
+import os
+from typing import Optional
+from fastapi import FastAPI, UploadFile, File, HTTPException, Form
+from fastapi.responses import JSONResponse
+from typing import List
+from .s3_service import upload_fileobj, list_bucket_objects
+from .constants import VOICE_BASE_PREFIX, DEFAULT_UPLOAD_FOLDER
+from .emotion_service import analyze_voice_emotion
+from .stt_service import transcribe_voice
 
 app = FastAPI(title="Caring API")
 
 @app.get("/health")
 def health():
     return {"status": "ok"}
+
+
+# POST : upload voice
+@app.post("/voices/upload")
+async def upload_voice(
+    file: UploadFile = File(...),
+    folder: Optional[str] = Form(default=None),  # 예: "raw" 또는 "user123/session1"
+):
+    bucket = os.getenv("S3_BUCKET_NAME")
+    if not bucket:
+        raise HTTPException(status_code=500, detail="S3_BUCKET_NAME not configured")
+
+    # 키: optional prefix/YYYYMMDD_originalname
+    base_prefix = VOICE_BASE_PREFIX.rstrip("/")
+    effective_prefix = f"{base_prefix}/{folder or DEFAULT_UPLOAD_FOLDER}".rstrip("/")
+    filename = os.path.basename(file.filename or "upload.wav")
+    key = f"{effective_prefix}/{filename}"
+
+    # 파일을 S3에 업로드
+    # Content-Type 저장
+    upload_fileobj(bucket=bucket, key=key, fileobj=file.file, content_type=file.content_type)
+    # 이후 소비자를 위해 포인터 리셋
+    try:
+        file.file.seek(0)
+    except Exception:
+        pass
+
+    # 감정 분석 수행
+    emotion_result = analyze_voice_emotion(file)
+
+    # DB가 없으므로, 버킷의 파일 목록을 반환
+    names = list_bucket_objects(bucket=bucket, prefix=effective_prefix)
+    return {
+        "uploaded": key, 
+        "files": names,
+        "emotion_analysis": emotion_result
+    }
+
+
+# GET : query my voice histories
+@app.get("/voices")
+async def list_voices(skip: int = 0, limit: int = 50, folder: Optional[str] = None):
+    bucket = os.getenv("S3_BUCKET_NAME")
+    if not bucket:
+        raise HTTPException(status_code=500, detail="S3_BUCKET_NAME not configured")
+    base_prefix = VOICE_BASE_PREFIX.rstrip("/")
+    effective_prefix = f"{base_prefix}/{folder or DEFAULT_UPLOAD_FOLDER}".rstrip("/")
+
+    keys = list_bucket_objects(bucket=bucket, prefix=effective_prefix)
+    # 페이징 비슷하게 slice만 적용
+    sliced = keys[skip: skip + limit]
+    return {"items": sliced, "count": len(sliced), "next": skip + len(sliced)}
+
+
+# GET : query specific voice & show result
+@app.get("/voices/{voice_id}")
+async def get_voice(voice_id: str):
+    # 내부 로직은 생략, 더미 상세 반환
+    result = {
+        "voice_id": voice_id,
+        "filename": f"{voice_id}.wav",
+        "status": "processed",
+        "duration_sec": 12.34,
+        "analysis": {"pitch_mean": 220.5, "energy": 0.82}
+    }
+    return JSONResponse(content=result)
+
+
+# POST : analyze emotion from uploaded voice file
+@app.post("/voices/analyze-emotion")
+async def analyze_emotion(file: UploadFile = File(...)):
+    """음성 파일의 감정을 분석합니다."""
+    emotion_result = analyze_voice_emotion(file)
+    return emotion_result
+
+
+# POST : convert speech to text using Google STT
+@app.post("/voices/transcribe")
+async def transcribe_speech(
+    file: UploadFile = File(...),
+    language_code: str = "ko-KR"
+):
+    """음성 파일을 텍스트로 변환합니다."""
+    stt_result = transcribe_voice(file, language_code)
+    return stt_result
+
+
+# POST : upload voice with both emotion analysis and STT
+@app.post("/voices/upload-with-analysis")
+async def upload_voice_with_analysis(
+    file: UploadFile = File(...),
+    folder: Optional[str] = Form(default=None),
+    language_code: str = Form(default="ko-KR")
+):
+    """음성 파일을 업로드하고 감정 분석과 STT를 모두 수행합니다."""
+    bucket = os.getenv("S3_BUCKET_NAME")
+    if not bucket:
+        raise HTTPException(status_code=500, detail="S3_BUCKET_NAME not configured")
+
+    # S3 업로드
+    base_prefix = VOICE_BASE_PREFIX.rstrip("/")
+    effective_prefix = f"{base_prefix}/{folder or DEFAULT_UPLOAD_FOLDER}".rstrip("/")
+    filename = os.path.basename(file.filename or "upload.wav")
+    key = f"{effective_prefix}/{filename}"
+    upload_fileobj(bucket=bucket, key=key, fileobj=file.file, content_type=file.content_type)
+    try:
+        file.file.seek(0)
+    except Exception:
+        pass
+
+    # 감정 분석
+    emotion_result = analyze_voice_emotion(file)
+    try:
+        file.file.seek(0)
+    except Exception:
+        pass
+    
+    # STT 변환
+    stt_result = transcribe_voice(file, language_code)
+
+    # 파일 목록 조회
+    names = list_bucket_objects(bucket=bucket, prefix=effective_prefix)
+    
+    return {
+        "uploaded": key,
+        "files": names,
+        "emotion_analysis": emotion_result,
+        "transcription": stt_result
+    }
diff --git a/app/s3_service.py b/app/s3_service.py
new file mode 100644
index 0000000..d86a2d7
--- /dev/null
+++ b/app/s3_service.py
@@ -0,0 +1,40 @@
+import os
+from typing import List
+
+import boto3  # type: ignore
+from botocore.client import Config  # type: ignore
+
+
+def get_s3_client():
+    region = os.getenv("AWS_REGION", "ap-northeast-2")
+    kwargs = {
+        "region_name": region,
+        "config": Config(signature_version="s3v4"),
+    }
+    access_key = os.getenv("AWS_ACCESS_KEY_ID")
+    secret_key = os.getenv("AWS_SECRET_ACCESS_KEY")
+    session_token = os.getenv("AWS_SESSION_TOKEN")
+    if access_key and secret_key:
+        kwargs["aws_access_key_id"] = access_key
+        kwargs["aws_secret_access_key"] = secret_key
+        if session_token:
+            kwargs["aws_session_token"] = session_token
+    return boto3.client("s3", **kwargs)
+
+
+def upload_fileobj(bucket: str, key: str, fileobj) -> str:
+    s3 = get_s3_client()
+    s3.upload_fileobj(fileobj, bucket, key)
+    return key
+
+
+def list_bucket_objects(bucket: str, prefix: str = "") -> List[str]:
+    s3 = get_s3_client()
+    paginator = s3.get_paginator("list_objects_v2")
+    keys: List[str] = []
+    for page in paginator.paginate(Bucket=bucket, Prefix=prefix):
+        for obj in page.get("Contents", []) or []:
+            keys.append(obj["Key"]) 
+    return keys
+
+
diff --git a/app/stt_service.py b/app/stt_service.py
new file mode 100644
index 0000000..29a2612
--- /dev/null
+++ b/app/stt_service.py
@@ -0,0 +1,124 @@
+import io
+import tempfile
+import os
+from typing import Dict, Any, Optional
+from google.cloud import speech
+from google.oauth2 import service_account
+import librosa
+import numpy as np
+
+
+class GoogleSTTService:
+    def __init__(self):
+        self.client = None
+        self._initialize_client()
+    
+    def _initialize_client(self):
+        """Google Cloud Speech-to-Text 클라이언트 초기화"""
+        try:
+            # 환경변수에서 서비스 계정 키 파일 경로 가져오기
+            credentials_path = os.getenv("GOOGLE_APPLICATION_CREDENTIALS")
+            
+            if credentials_path and os.path.exists(credentials_path):
+                # 서비스 계정 키 파일로 인증
+                credentials = service_account.Credentials.from_service_account_file(
+                    credentials_path,
+                    scopes=["https://www.googleapis.com/auth/cloud-platform"]
+                )
+                self.client = speech.SpeechClient(credentials=credentials)
+            else:
+                # 기본 인증 (환경변수 GOOGLE_APPLICATION_CREDENTIALS 설정됨)
+                self.client = speech.SpeechClient()
+                
+        except Exception as e:
+            print(f"Google STT 클라이언트 초기화 실패: {e}")
+            self.client = None
+    
+    def transcribe_audio(self, audio_file, language_code: str = "ko-KR") -> Dict[str, Any]:
+        """
+        음성 파일을 텍스트로 변환합니다.
+        
+        Args:
+            audio_file: 업로드된 음성 파일 (FastAPI UploadFile)
+            language_code: 언어 코드 (기본값: ko-KR)
+            
+        Returns:
+            Dict: STT 결과
+        """
+        if not self.client:
+            return {
+                "error": "Google STT 클라이언트가 초기화되지 않았습니다",
+                "transcript": "",
+                "confidence": 0.0
+            }
+        
+        try:
+            # 임시 파일로 저장
+            with tempfile.NamedTemporaryFile(suffix=".wav", delete=False) as tmp_file:
+                content = audio_file.file.read()
+                audio_file.file.seek(0)
+                tmp_file.write(content)
+                tmp_file_path = tmp_file.name
+            
+            # 오디오 파일 로드 및 전처리
+            audio_data, sample_rate = librosa.load(tmp_file_path, sr=16000)
+            
+            # 오디오 데이터를 bytes로 변환
+            audio_data = np.clip(audio_data, -1.0, 1.0)
+            audio_bytes = (audio_data * 32767).astype('int16').tobytes()
+            
+            # Google Cloud Speech-to-Text 요청 구성
+            audio = speech.RecognitionAudio(content=audio_bytes)
+            config = speech.RecognitionConfig(
+                encoding=speech.RecognitionConfig.AudioEncoding.LINEAR16,
+                sample_rate_hertz=sample_rate,
+                language_code=language_code,
+                enable_automatic_punctuation=True,
+                enable_word_time_offsets=True,
+                model="latest_long",  # 최신 장시간 모델 사용
+            )
+            
+            # STT 요청 실행
+            response = self.client.recognize(config=config, audio=audio)
+            
+            # 결과 처리
+            if response.results:
+                result = response.results[0]
+                transcript = result.alternatives[0].transcript
+                confidence = result.alternatives[0].confidence
+                
+                return {
+                    "transcript": transcript,
+                    "confidence": confidence,
+                    "language_code": language_code,
+                    "audio_duration": len(audio_data) / sample_rate,
+                    "sample_rate": sample_rate
+                }
+            else:
+                return {
+                    "error": "음성을 인식할 수 없습니다",
+                    "transcript": "",
+                    "confidence": 0.0
+                }
+                
+        except Exception as e:
+            return {
+                "error": f"STT 처리 중 오류 발생: {str(e)}",
+                "transcript": "",
+                "confidence": 0.0
+            }
+        finally:
+            # 임시 파일 정리
+            try:
+                os.unlink(tmp_file_path)
+            except:
+                pass
+
+
+# 전역 인스턴스
+stt_service = GoogleSTTService()
+
+
+def transcribe_voice(audio_file, language_code: str = "ko-KR") -> Dict[str, Any]:
+    """음성을 텍스트로 변환하는 함수"""
+    return stt_service.transcribe_audio(audio_file, language_code)
diff --git a/requirements.txt b/requirements.txt
index 21a99f5..297b90c 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -1,7 +1,10 @@
-# 현재 프로젝트에 특별한 의존성이 없습니다.
-# 필요에 따라 패키지를 추가하세요.
-
-# 예시:
-# requests>=2.31.0
-# numpy>=1.24.0
-# pandas>=2.0.0
+fastapi>=0.115.0
+uvicorn[standard]>=0.30.0
+boto3>=1.34.0
+python-dotenv>=1.0.1
+transformers>=4.30.0
+torch>=2.0.0
+librosa>=0.10.0
+scipy>=1.10.0
+google-cloud-speech>=2.21.0
+google-auth>=2.23.0