safori-team · H4nnhoi · Oct 26, 2025 · Oct 20, 2025 · Oct 25, 2025 · Oct 25, 2025
diff --git a/README.md b/README.md
@@ -19,17 +19,42 @@ pip install -r requirements.txt
 
 ## 실행
 
+개발 서버(Uvicorn) 실행:
+
 ```bash
-python -m app.main
+uvicorn app.main:app --reload --port 8000
+```
+
+API 문서: `http://127.0.0.1:8000/docs`
+
+## 환경 변수 설정
+
+프로젝트 루트에 `.env` 파일을 생성하고 다음 값을 채우세요.
+
+### AWS S3 설정
 ```
+AWS_ACCESS_KEY_ID=...
+AWS_SECRET_ACCESS_KEY=...
+AWS_REGION=ap-northeast-2
+S3_BUCKET_NAME=your-bucket
+S3_PREFIX=voices
+```
+
+### Google Cloud Speech-to-Text 설정
+```
+# 서비스 계정 키 파일 경로 설정
+GOOGLE_APPLICATION_CREDENTIALS=/path/to/your/service-account-key.json
+```
+
+`.env`는 `app/__init__.py`에서 자동 로드됩니다.
 
 ## 프로젝트 구조
 
 ```
 caring-voice/
 ├── app/
 │   ├── __init__.py
-│   └── main.py          # 메인 엔트리 포인트
+│   └── main.py          # FastAPI 엔트리 포인트 및 엔드포인트
 ├── .gitignore
 ├── README.md
 ├── requirements.txt

diff --git a/app/__init__.py b/app/__init__.py
@@ -0,0 +1,3 @@
+from dotenv import load_dotenv  # type: ignore
+load_dotenv()
+
diff --git a/app/constants.py b/app/constants.py
@@ -0,0 +1,11 @@
+import os
+
+# 업로드 기본 베이스 프리픽스 (환경변수 S3_PREFIX로 오버라이드 가능)
+VOICE_BASE_PREFIX = os.getenv("S3_PREFIX", "voices")
+
+# 기본 폴더명 (요청에 folder 미지정 시 사용)
+DEFAULT_UPLOAD_FOLDER = "voiceFile"
+
+# # 필요 시 허용 폴더 집합 정의 (예: 검증용)
+# ALLOWED_FOLDERS = {"raw", "processed", "public"}
+
diff --git a/app/emotion_service.py b/app/emotion_service.py
@@ -0,0 +1,117 @@
+import io
+import os
+import tempfile
+from typing import Dict, Any
+import librosa
+import torch
+from transformers import Wav2Vec2ForSequenceClassification, Wav2Vec2FeatureExtractor
+import numpy as np
+
+
+class EmotionAnalyzer:
+    def __init__(self):
+        self.model = None
+        self.feature_extractor = None
+        self.device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
+        self._load_model()
+
+    def _load_model(self):
+        """Hugging Face 모델 로드"""
+        model_name = "jungjongho/wav2vec2-xlsr-korean-speech-emotion-recognition"
+
+        try:
+            self.model = Wav2Vec2ForSequenceClassification.from_pretrained(model_name)
+            self.feature_extractor = Wav2Vec2FeatureExtractor.from_pretrained(model_name)
+            self.model.to(self.device)
+            self.model.eval()
+        except Exception as e:
+            print(f"모델 로드 실패: {e}")
+            self.model = None
+            self.feature_extractor = None
+
+    def analyze_emotion(self, audio_file) -> Dict[str, Any]:
+        """
+        음성 파일의 감정을 분석합니다.
+
+        Args:
+            audio_file: 업로드된 음성 파일 (FastAPI UploadFile)
+
+        Returns:
+            Dict: 감정 분석 결과
+        """
+        if not self.model or not self.feature_extractor:
+            return {
+                "error": "모델이 로드되지 않았습니다",
+                "emotion": "unknown",
+                "confidence": 0.0
+            }
+
+        try:
+            # 임시 파일로 저장
+            with tempfile.NamedTemporaryFile(suffix=".wav", delete=False) as tmp_file:
+                content = audio_file.file.read()
+                tmp_file.write(content)
+                tmp_file_path = tmp_file.name
+
+            # 오디오 로드 (16kHz로 리샘플링)
+            audio, sr = librosa.load(tmp_file_path, sr=16000)
+
+            # 특성 추출
+            inputs = self.feature_extractor(
+                audio, 
+                sampling_rate=16000, 
+                return_tensors="pt", 
+                padding=True
+            )
+
+            # GPU로 이동
+            inputs = {k: v.to(self.device) for k, v in inputs.items()}
+
+            # 추론
+            with torch.no_grad():
+                outputs = self.model(**inputs)
+                predictions = torch.nn.functional.softmax(outputs.logits, dim=-1)
+
+            # 감정 라벨 (모델에 따라 조정 필요)
+            emotion_labels = ["neutral", "happy", "sad", "angry", "fear", "surprise", "disgust"]
+
+            # 가장 높은 확률의 감정
+            predicted_class = torch.argmax(predictions, dim=-1).item()
+            confidence = predictions[0][predicted_class].item()
+            emotion = emotion_labels[predicted_class] if predicted_class < len(emotion_labels) else "unknown"
+
+            # 모든 감정의 확률
+            emotion_scores = {
+                emotion_labels[i]: predictions[0][i].item() 
+                for i in range(min(len(emotion_labels), predictions.shape[1]))
+            }
+
+            return {
+                "emotion": emotion,
+                "confidence": confidence,
+                "emotion_scores": emotion_scores,
+                "audio_duration": len(audio) / sr,
+                "sample_rate": sr
+            }
+
+        except Exception as e:
+            return {
+                "error": f"분석 중 오류 발생: {str(e)}",
+                "emotion": "unknown",
+                "confidence": 0.0
+            }
+        finally:
+            # 임시 파일 정리
+            try:
+                os.unlink(tmp_file_path)
+            except OSError as e:
+                print(f"임시 파일 삭제 실패: {tmp_file_path}, 오류: {e}")
+
+
+# 전역 인스턴스
+emotion_analyzer = EmotionAnalyzer()
+
+
+def analyze_voice_emotion(audio_file) -> Dict[str, Any]:
+    """음성 감정 분석 함수"""
+    return emotion_analyzer.analyze_emotion(audio_file)
diff --git a/app/main.py b/app/main.py
@@ -1,7 +1,144 @@
-from fastapi import FastAPI
+import os
+from typing import Optional
+from fastapi import FastAPI, UploadFile, File, HTTPException, Form
+from fastapi.responses import JSONResponse
+from typing import List
+from .s3_service import upload_fileobj, list_bucket_objects
+from .constants import VOICE_BASE_PREFIX, DEFAULT_UPLOAD_FOLDER
+from .emotion_service import analyze_voice_emotion
+from .stt_service import transcribe_voice
 
 app = FastAPI(title="Caring API")
 
 @app.get("/health")
 def health():
     return {"status": "ok"}
+
+
+# POST : upload voice
+@app.post("/voices/upload")
+async def upload_voice(
+    file: UploadFile = File(...),
+    folder: Optional[str] = Form(default=None),  # 예: "raw" 또는 "user123/session1"
+):
+    bucket = os.getenv("S3_BUCKET_NAME")
+    if not bucket:
+        raise HTTPException(status_code=500, detail="S3_BUCKET_NAME not configured")
+
+    # 키: optional prefix/YYYYMMDD_originalname
+    base_prefix = VOICE_BASE_PREFIX.rstrip("/")
+    effective_prefix = f"{base_prefix}/{folder or DEFAULT_UPLOAD_FOLDER}".rstrip("/")
+    filename = os.path.basename(file.filename or "upload.wav")
+    key = f"{effective_prefix}/{filename}"
+
+    # 파일을 S3에 업로드
+    # Content-Type 저장
+    upload_fileobj(bucket=bucket, key=key, fileobj=file.file, content_type=file.content_type)
+    # 이후 소비자를 위해 포인터 리셋
+    try:
+        file.file.seek(0)
+    except Exception:
+        pass
+
+    # 감정 분석 수행
+    emotion_result = analyze_voice_emotion(file)
+
+    # DB가 없으므로, 버킷의 파일 목록을 반환
+    names = list_bucket_objects(bucket=bucket, prefix=effective_prefix)
+    return {
+        "uploaded": key, 
+        "files": names,
+        "emotion_analysis": emotion_result
+    }
+
+
+# GET : query my voice histories
+@app.get("/voices")
+async def list_voices(skip: int = 0, limit: int = 50, folder: Optional[str] = None):
+    bucket = os.getenv("S3_BUCKET_NAME")
+    if not bucket:
+        raise HTTPException(status_code=500, detail="S3_BUCKET_NAME not configured")
+    base_prefix = VOICE_BASE_PREFIX.rstrip("/")
+    effective_prefix = f"{base_prefix}/{folder or DEFAULT_UPLOAD_FOLDER}".rstrip("/")
+
+    keys = list_bucket_objects(bucket=bucket, prefix=effective_prefix)
+    # 페이징 비슷하게 slice만 적용
+    sliced = keys[skip: skip + limit]
+    return {"items": sliced, "count": len(sliced), "next": skip + len(sliced)}
+
+
+# GET : query specific voice & show result
+@app.get("/voices/{voice_id}")
+async def get_voice(voice_id: str):
+    # 내부 로직은 생략, 더미 상세 반환
+    result = {
+        "voice_id": voice_id,
+        "filename": f"{voice_id}.wav",
+        "status": "processed",
+        "duration_sec": 12.34,
+        "analysis": {"pitch_mean": 220.5, "energy": 0.82}
+    }
+    return JSONResponse(content=result)
+
+
+# POST : analyze emotion from uploaded voice file
+@app.post("/voices/analyze-emotion")
+async def analyze_emotion(file: UploadFile = File(...)):
+    """음성 파일의 감정을 분석합니다."""
+    emotion_result = analyze_voice_emotion(file)
+    return emotion_result
+
+
+# POST : convert speech to text using Google STT
+@app.post("/voices/transcribe")
+async def transcribe_speech(
+    file: UploadFile = File(...),
+    language_code: str = "ko-KR"
+):
+    """음성 파일을 텍스트로 변환합니다."""
+    stt_result = transcribe_voice(file, language_code)
+    return stt_result
+
+
+# POST : upload voice with both emotion analysis and STT
+@app.post("/voices/upload-with-analysis")
+async def upload_voice_with_analysis(
+    file: UploadFile = File(...),
+    folder: Optional[str] = Form(default=None),
+    language_code: str = Form(default="ko-KR")
+):
+    """음성 파일을 업로드하고 감정 분석과 STT를 모두 수행합니다."""
+    bucket = os.getenv("S3_BUCKET_NAME")
+    if not bucket:
+        raise HTTPException(status_code=500, detail="S3_BUCKET_NAME not configured")
+
+    # S3 업로드
+    base_prefix = VOICE_BASE_PREFIX.rstrip("/")
+    effective_prefix = f"{base_prefix}/{folder or DEFAULT_UPLOAD_FOLDER}".rstrip("/")
+    filename = os.path.basename(file.filename or "upload.wav")
+    key = f"{effective_prefix}/{filename}"
+    upload_fileobj(bucket=bucket, key=key, fileobj=file.file, content_type=file.content_type)
+    try:
+        file.file.seek(0)
+    except Exception:
+        pass
+
+    # 감정 분석
+    emotion_result = analyze_voice_emotion(file)
+    try:
+        file.file.seek(0)
+    except Exception:
+        pass
+
+    # STT 변환
+    stt_result = transcribe_voice(file, language_code)
+
+    # 파일 목록 조회
+    names = list_bucket_objects(bucket=bucket, prefix=effective_prefix)
+
+    return {
+        "uploaded": key,
+        "files": names,
+        "emotion_analysis": emotion_result,
+        "transcription": stt_result
+    }
diff --git a/app/s3_service.py b/app/s3_service.py
@@ -0,0 +1,40 @@
+import os
+from typing import List
+
+import boto3  # type: ignore
+from botocore.client import Config  # type: ignore
+
+
+def get_s3_client():
+    region = os.getenv("AWS_REGION", "ap-northeast-2")
+    kwargs = {
+        "region_name": region,
+        "config": Config(signature_version="s3v4"),
+    }
+    access_key = os.getenv("AWS_ACCESS_KEY_ID")
+    secret_key = os.getenv("AWS_SECRET_ACCESS_KEY")
+    session_token = os.getenv("AWS_SESSION_TOKEN")
+    if access_key and secret_key:
+        kwargs["aws_access_key_id"] = access_key
+        kwargs["aws_secret_access_key"] = secret_key
+        if session_token:
+            kwargs["aws_session_token"] = session_token
+    return boto3.client("s3", **kwargs)
+
+
+def upload_fileobj(bucket: str, key: str, fileobj) -> str:
+    s3 = get_s3_client()
+    s3.upload_fileobj(fileobj, bucket, key)
+    return key
-def upload_fileobj(bucket: str, key: str, fileobj) -> str:
-    s3 = get_s3_client()
-    s3.upload_fileobj(fileobj, bucket, key)
-    return key
+def upload_fileobj(bucket: str, key: str, fileobj, content_type: str = None) -> str:
+    s3 = get_s3_client()
+    extra_args = {}
+    if content_type:
+        extra_args["ContentType"] = content_type
+    s3.upload_fileobj(fileobj, bucket, key, ExtraArgs=extra_args if extra_args else None)
+    return key
-def upload_fileobj(bucket: str, key: str, fileobj) -> str:
-    s3 = get_s3_client()
-    s3.upload_fileobj(fileobj, bucket, key)
-    return key
+def upload_fileobj(bucket: str, key: str, fileobj, content_type: str = None) -> str:
+    s3 = get_s3_client()
+    extra_args = {}
+    if content_type:
+        extra_args["ContentType"] = content_type
+    s3.upload_fileobj(fileobj, bucket, key, ExtraArgs=extra_args if extra_args else None)
+    return key
+
+
+def list_bucket_objects(bucket: str, prefix: str = "") -> List[str]:
+    s3 = get_s3_client()
+    paginator = s3.get_paginator("list_objects_v2")
+    keys: List[str] = []
+    for page in paginator.paginate(Bucket=bucket, Prefix=prefix):
+        for obj in page.get("Contents", []) or []:
+            keys.append(obj["Key"]) 
+    return keys
+
+
Original file line number	Diff line number	Diff line change
		@@ -0,0 +1,3 @@
		from dotenv import load_dotenv # type: ignore
		load_dotenv()