From b3a91f0b7e86a484310766efbd2a3cc53948b7fb Mon Sep 17 00:00:00 2001 From: JEONGHAN <69452755+H4nnhoi@users.noreply.github.com> Date: Mon, 20 Oct 2025 20:48:41 +0900 Subject: [PATCH 1/5] [FEAT] create Default APIs of voice (#2) * feat : create default APIs(upload, query list, and query specific) #1 * refactor : upload file at s3 #1 --- README.md | 22 +++++++++++++++-- app/__init__.py | 3 +++ app/constants.py | 11 +++++++++ app/main.py | 60 ++++++++++++++++++++++++++++++++++++++++++++++- app/s3_service.py | 34 +++++++++++++++++++++++++++ requirements.txt | 11 ++++----- 6 files changed, 131 insertions(+), 10 deletions(-) create mode 100644 app/constants.py create mode 100644 app/s3_service.py diff --git a/README.md b/README.md index 59ac98a..787d677 100644 --- a/README.md +++ b/README.md @@ -19,9 +19,27 @@ pip install -r requirements.txt ## 실행 +개발 서버(Uvicorn) 실행: + ```bash -python -m app.main +uvicorn app.main:app --reload --port 8000 +``` + +API 문서: `http://127.0.0.1:8000/docs` + +## 환경 변수 설정 + +프로젝트 루트에 `.env` 파일을 생성하고 다음 값을 채우세요. 예시는 `.env.example` 참고. + ``` +AWS_ACCESS_KEY_ID=... +AWS_SECRET_ACCESS_KEY=... +AWS_REGION=ap-northeast-2 +S3_BUCKET_NAME=your-bucket +S3_PREFIX=voices +``` + +`.env`는 `app/__init__.py`에서 자동 로드됩니다. ## 프로젝트 구조 @@ -29,7 +47,7 @@ python -m app.main caring-voice/ ├── app/ │ ├── __init__.py -│ └── main.py # 메인 엔트리 포인트 +│ └── main.py # FastAPI 엔트리 포인트 및 엔드포인트 ├── .gitignore ├── README.md ├── requirements.txt diff --git a/app/__init__.py b/app/__init__.py index e69de29..1e86223 100644 --- a/app/__init__.py +++ b/app/__init__.py @@ -0,0 +1,3 @@ +from dotenv import load_dotenv # type: ignore +load_dotenv() + diff --git a/app/constants.py b/app/constants.py new file mode 100644 index 0000000..8073bfd --- /dev/null +++ b/app/constants.py @@ -0,0 +1,11 @@ +import os + +# 업로드 기본 베이스 프리픽스 (환경변수 S3_PREFIX로 오버라이드 가능) +VOICE_BASE_PREFIX = os.getenv("S3_PREFIX", "voices") + +# 기본 폴더명 (요청에 folder 미지정 시 사용) +DEFAULT_UPLOAD_FOLDER = "raw" + +# 필요 시 허용 폴더 집합 정의 (예: 검증용) +ALLOWED_FOLDERS = {"raw", "processed", "public"} + diff --git a/app/main.py b/app/main.py index 98d96a2..342c6ac 100644 --- a/app/main.py +++ b/app/main.py @@ -1,7 +1,65 @@ -from fastapi import FastAPI +import os +from typing import Optional +from fastapi import FastAPI, UploadFile, File, HTTPException, Form +from fastapi.responses import JSONResponse +from typing import List +from .s3_service import upload_fileobj, list_bucket_objects +from .constants import VOICE_BASE_PREFIX, DEFAULT_UPLOAD_FOLDER app = FastAPI(title="Caring API") @app.get("/health") def health(): return {"status": "ok"} + + +# POST : upload voice +@app.post("/voices/upload") +async def upload_voice( + file: UploadFile = File(...), + folder: Optional[str] = Form(default=None), # 예: "raw" 또는 "user123/session1" +): + bucket = os.getenv("S3_BUCKET_NAME") + if not bucket: + raise HTTPException(status_code=500, detail="S3_BUCKET_NAME not configured") + + # 키: optional prefix/YYYYMMDD_originalname + base_prefix = VOICE_BASE_PREFIX.rstrip("/") + effective_prefix = f"{base_prefix}/{folder or DEFAULT_UPLOAD_FOLDER}".rstrip("/") + key = f"{effective_prefix}/{file.filename}" + + # 파일을 S3에 업로드 + upload_fileobj(bucket=bucket, key=key, fileobj=file.file) + + # DB가 없으므로, 버킷의 파일 목록을 반환 + names = list_bucket_objects(bucket=bucket, prefix=effective_prefix) + return {"uploaded": key, "files": names} + + +# GET : query my voice histories +@app.get("/voices") +async def list_voices(skip: int = 0, limit: int = 50, folder: Optional[str] = None): + bucket = os.getenv("S3_BUCKET_NAME") + if not bucket: + raise HTTPException(status_code=500, detail="S3_BUCKET_NAME not configured") + base_prefix = VOICE_BASE_PREFIX.rstrip("/") + effective_prefix = f"{base_prefix}/{folder or DEFAULT_UPLOAD_FOLDER}".rstrip("/") + + keys = list_bucket_objects(bucket=bucket, prefix=effective_prefix) + # 페이징 비슷하게 slice만 적용 + sliced = keys[skip: skip + limit] + return {"items": sliced, "count": len(sliced), "next": skip + len(sliced)} + + +# GET : query specific voice & show result +@app.get("/voices/{voice_id}") +async def get_voice(voice_id: str): + # 내부 로직은 생략, 더미 상세 반환 + result = { + "voice_id": voice_id, + "filename": f"{voice_id}.wav", + "status": "processed", + "duration_sec": 12.34, + "analysis": {"pitch_mean": 220.5, "energy": 0.82} + } + return JSONResponse(content=result) diff --git a/app/s3_service.py b/app/s3_service.py new file mode 100644 index 0000000..4fdbc14 --- /dev/null +++ b/app/s3_service.py @@ -0,0 +1,34 @@ +import os +from typing import List + +import boto3 # type: ignore +from botocore.client import Config # type: ignore + + +def get_s3_client(): + region = os.getenv("AWS_REGION", "ap-northeast-2") + return boto3.client( + "s3", + region_name=region, + aws_access_key_id=os.getenv("AWS_ACCESS_KEY_ID"), + aws_secret_access_key=os.getenv("AWS_SECRET_ACCESS_KEY"), + config=Config(signature_version="s3v4"), + ) + + +def upload_fileobj(bucket: str, key: str, fileobj) -> str: + s3 = get_s3_client() + s3.upload_fileobj(fileobj, bucket, key) + return key + + +def list_bucket_objects(bucket: str, prefix: str = "") -> List[str]: + s3 = get_s3_client() + paginator = s3.get_paginator("list_objects_v2") + keys: List[str] = [] + for page in paginator.paginate(Bucket=bucket, Prefix=prefix): + for obj in page.get("Contents", []) or []: + keys.append(obj["Key"]) + return keys + + diff --git a/requirements.txt b/requirements.txt index 21a99f5..15f74e3 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,7 +1,4 @@ -# 현재 프로젝트에 특별한 의존성이 없습니다. -# 필요에 따라 패키지를 추가하세요. - -# 예시: -# requests>=2.31.0 -# numpy>=1.24.0 -# pandas>=2.0.0 +fastapi>=0.115.0 +uvicorn[standard]>=0.30.0 +boto3>=1.34.0 +python-dotenv>=1.0.1 From 6ca42ff66f31f02fe54217a265e43b32573179bd Mon Sep 17 00:00:00 2001 From: JEONGHAN <69452755+H4nnhoi@users.noreply.github.com> Date: Sat, 25 Oct 2025 16:02:43 +0900 Subject: [PATCH 2/5] feat : set analyzing voice emotion method #3 (#4) --- app/emotion_service.py | 117 +++++++++++++++++++++++++++++++++++++++++ app/main.py | 18 ++++++- requirements.txt | 4 ++ 3 files changed, 138 insertions(+), 1 deletion(-) create mode 100644 app/emotion_service.py diff --git a/app/emotion_service.py b/app/emotion_service.py new file mode 100644 index 0000000..5ff363a --- /dev/null +++ b/app/emotion_service.py @@ -0,0 +1,117 @@ +import io +import tempfile +from typing import Dict, Any +import librosa +import torch +from transformers import Wav2Vec2ForSequenceClassification, Wav2Vec2FeatureExtractor +import numpy as np + + +class EmotionAnalyzer: + def __init__(self): + self.model = None + self.feature_extractor = None + self.device = torch.device("cuda" if torch.cuda.is_available() else "cpu") + self._load_model() + + def _load_model(self): + """Hugging Face 모델 로드""" + model_name = "jungjongho/wav2vec2-xlsr-korean-speech-emotion-recognition" + + try: + self.model = Wav2Vec2ForSequenceClassification.from_pretrained(model_name) + self.feature_extractor = Wav2Vec2FeatureExtractor.from_pretrained(model_name) + self.model.to(self.device) + self.model.eval() + except Exception as e: + print(f"모델 로드 실패: {e}") + self.model = None + self.feature_extractor = None + + def analyze_emotion(self, audio_file) -> Dict[str, Any]: + """ + 음성 파일의 감정을 분석합니다. + + Args: + audio_file: 업로드된 음성 파일 (FastAPI UploadFile) + + Returns: + Dict: 감정 분석 결과 + """ + if not self.model or not self.feature_extractor: + return { + "error": "모델이 로드되지 않았습니다", + "emotion": "unknown", + "confidence": 0.0 + } + + try: + # 임시 파일로 저장 + with tempfile.NamedTemporaryFile(suffix=".wav", delete=False) as tmp_file: + content = audio_file.file.read() + tmp_file.write(content) + tmp_file_path = tmp_file.name + + # 오디오 로드 (16kHz로 리샘플링) + audio, sr = librosa.load(tmp_file_path, sr=16000) + + # 특성 추출 + inputs = self.feature_extractor( + audio, + sampling_rate=16000, + return_tensors="pt", + padding=True + ) + + # GPU로 이동 + inputs = {k: v.to(self.device) for k, v in inputs.items()} + + # 추론 + with torch.no_grad(): + outputs = self.model(**inputs) + predictions = torch.nn.functional.softmax(outputs.logits, dim=-1) + + # 감정 라벨 (모델에 따라 조정 필요) + emotion_labels = ["neutral", "happy", "sad", "angry", "fear", "surprise", "disgust"] + + # 가장 높은 확률의 감정 + predicted_class = torch.argmax(predictions, dim=-1).item() + confidence = predictions[0][predicted_class].item() + emotion = emotion_labels[predicted_class] if predicted_class < len(emotion_labels) else "unknown" + + # 모든 감정의 확률 + emotion_scores = { + emotion_labels[i]: predictions[0][i].item() + for i in range(min(len(emotion_labels), predictions.shape[1])) + } + + return { + "emotion": emotion, + "confidence": confidence, + "emotion_scores": emotion_scores, + "audio_duration": len(audio) / sr, + "sample_rate": sr + } + + except Exception as e: + return { + "error": f"분석 중 오류 발생: {str(e)}", + "emotion": "unknown", + "confidence": 0.0 + } + finally: + # 임시 파일 정리 + try: + import os + os.unlink(tmp_file_path) + except: + pass + + +# 전역 인스턴스 +emotion_analyzer = EmotionAnalyzer() + + +def analyze_voice_emotion(audio_file) -> Dict[str, Any]: + """음성 감정 분석 함수""" + return emotion_analyzer.analyze_emotion(audio_file) diff --git a/app/main.py b/app/main.py index 342c6ac..238f215 100644 --- a/app/main.py +++ b/app/main.py @@ -5,6 +5,7 @@ from typing import List from .s3_service import upload_fileobj, list_bucket_objects from .constants import VOICE_BASE_PREFIX, DEFAULT_UPLOAD_FOLDER +from .emotion_service import analyze_voice_emotion app = FastAPI(title="Caring API") @@ -31,9 +32,16 @@ async def upload_voice( # 파일을 S3에 업로드 upload_fileobj(bucket=bucket, key=key, fileobj=file.file) + # 감정 분석 수행 + emotion_result = analyze_voice_emotion(file) + # DB가 없으므로, 버킷의 파일 목록을 반환 names = list_bucket_objects(bucket=bucket, prefix=effective_prefix) - return {"uploaded": key, "files": names} + return { + "uploaded": key, + "files": names, + "emotion_analysis": emotion_result + } # GET : query my voice histories @@ -63,3 +71,11 @@ async def get_voice(voice_id: str): "analysis": {"pitch_mean": 220.5, "energy": 0.82} } return JSONResponse(content=result) + + +# POST : analyze emotion from uploaded voice file +@app.post("/voices/analyze-emotion") +async def analyze_emotion(file: UploadFile = File(...)): + """음성 파일의 감정을 분석합니다.""" + emotion_result = analyze_voice_emotion(file) + return emotion_result diff --git a/requirements.txt b/requirements.txt index 15f74e3..ea0991e 100644 --- a/requirements.txt +++ b/requirements.txt @@ -2,3 +2,7 @@ fastapi>=0.115.0 uvicorn[standard]>=0.30.0 boto3>=1.34.0 python-dotenv>=1.0.1 +transformers>=4.30.0 +torch>=2.0.0 +librosa>=0.10.0 +scipy>=1.10.0 From af2290ca514bf8d5fe77e83d90559fd80bc1a144 Mon Sep 17 00:00:00 2001 From: hann Date: Sat, 25 Oct 2025 19:17:14 +0900 Subject: [PATCH 3/5] feat : add translate API voice to text #5 --- README.md | 9 +++- app/main.py | 47 ++++++++++++++++++ app/stt_service.py | 121 +++++++++++++++++++++++++++++++++++++++++++++ requirements.txt | 2 + 4 files changed, 178 insertions(+), 1 deletion(-) create mode 100644 app/stt_service.py diff --git a/README.md b/README.md index 787d677..60450c0 100644 --- a/README.md +++ b/README.md @@ -29,8 +29,9 @@ API 문서: `http://127.0.0.1:8000/docs` ## 환경 변수 설정 -프로젝트 루트에 `.env` 파일을 생성하고 다음 값을 채우세요. 예시는 `.env.example` 참고. +프로젝트 루트에 `.env` 파일을 생성하고 다음 값을 채우세요. +### AWS S3 설정 ``` AWS_ACCESS_KEY_ID=... AWS_SECRET_ACCESS_KEY=... @@ -39,6 +40,12 @@ S3_BUCKET_NAME=your-bucket S3_PREFIX=voices ``` +### Google Cloud Speech-to-Text 설정 +``` +# 서비스 계정 키 파일 경로 설정 +GOOGLE_APPLICATION_CREDENTIALS=/path/to/your/service-account-key.json +``` + `.env`는 `app/__init__.py`에서 자동 로드됩니다. ## 프로젝트 구조 diff --git a/app/main.py b/app/main.py index 238f215..056a513 100644 --- a/app/main.py +++ b/app/main.py @@ -6,6 +6,7 @@ from .s3_service import upload_fileobj, list_bucket_objects from .constants import VOICE_BASE_PREFIX, DEFAULT_UPLOAD_FOLDER from .emotion_service import analyze_voice_emotion +from .stt_service import transcribe_voice app = FastAPI(title="Caring API") @@ -79,3 +80,49 @@ async def analyze_emotion(file: UploadFile = File(...)): """음성 파일의 감정을 분석합니다.""" emotion_result = analyze_voice_emotion(file) return emotion_result + + +# POST : convert speech to text using Google STT +@app.post("/voices/transcribe") +async def transcribe_speech( + file: UploadFile = File(...), + language_code: str = "ko-KR" +): + """음성 파일을 텍스트로 변환합니다.""" + stt_result = transcribe_voice(file, language_code) + return stt_result + + +# POST : upload voice with both emotion analysis and STT +@app.post("/voices/upload-with-analysis") +async def upload_voice_with_analysis( + file: UploadFile = File(...), + folder: Optional[str] = Form(default=None), + language_code: str = Form(default="ko-KR") +): + """음성 파일을 업로드하고 감정 분석과 STT를 모두 수행합니다.""" + bucket = os.getenv("S3_BUCKET_NAME") + if not bucket: + raise HTTPException(status_code=500, detail="S3_BUCKET_NAME not configured") + + # S3 업로드 + base_prefix = VOICE_BASE_PREFIX.rstrip("/") + effective_prefix = f"{base_prefix}/{folder or DEFAULT_UPLOAD_FOLDER}".rstrip("/") + key = f"{effective_prefix}/{file.filename}" + upload_fileobj(bucket=bucket, key=key, fileobj=file.file) + + # 감정 분석 + emotion_result = analyze_voice_emotion(file) + + # STT 변환 + stt_result = transcribe_voice(file, language_code) + + # 파일 목록 조회 + names = list_bucket_objects(bucket=bucket, prefix=effective_prefix) + + return { + "uploaded": key, + "files": names, + "emotion_analysis": emotion_result, + "transcription": stt_result + } diff --git a/app/stt_service.py b/app/stt_service.py new file mode 100644 index 0000000..0a55179 --- /dev/null +++ b/app/stt_service.py @@ -0,0 +1,121 @@ +import io +import tempfile +import os +from typing import Dict, Any, Optional +from google.cloud import speech +from google.oauth2 import service_account +import librosa + + +class GoogleSTTService: + def __init__(self): + self.client = None + self._initialize_client() + + def _initialize_client(self): + """Google Cloud Speech-to-Text 클라이언트 초기화""" + try: + # 환경변수에서 서비스 계정 키 파일 경로 가져오기 + credentials_path = os.getenv("GOOGLE_APPLICATION_CREDENTIALS") + + if credentials_path and os.path.exists(credentials_path): + # 서비스 계정 키 파일로 인증 + credentials = service_account.Credentials.from_service_account_file( + credentials_path, + scopes=["https://www.googleapis.com/auth/cloud-platform"] + ) + self.client = speech.SpeechClient(credentials=credentials) + else: + # 기본 인증 (환경변수 GOOGLE_APPLICATION_CREDENTIALS 설정됨) + self.client = speech.SpeechClient() + + except Exception as e: + print(f"Google STT 클라이언트 초기화 실패: {e}") + self.client = None + + def transcribe_audio(self, audio_file, language_code: str = "ko-KR") -> Dict[str, Any]: + """ + 음성 파일을 텍스트로 변환합니다. + + Args: + audio_file: 업로드된 음성 파일 (FastAPI UploadFile) + language_code: 언어 코드 (기본값: ko-KR) + + Returns: + Dict: STT 결과 + """ + if not self.client: + return { + "error": "Google STT 클라이언트가 초기화되지 않았습니다", + "transcript": "", + "confidence": 0.0 + } + + try: + # 임시 파일로 저장 + with tempfile.NamedTemporaryFile(suffix=".wav", delete=False) as tmp_file: + content = audio_file.file.read() + tmp_file.write(content) + tmp_file_path = tmp_file.name + + # 오디오 파일 로드 및 전처리 + audio_data, sample_rate = librosa.load(tmp_file_path, sr=16000) + + # 오디오 데이터를 bytes로 변환 + audio_bytes = (audio_data * 32767).astype('int16').tobytes() + + # Google Cloud Speech-to-Text 요청 구성 + audio = speech.RecognitionAudio(content=audio_bytes) + config = speech.RecognitionConfig( + encoding=speech.RecognitionConfig.AudioEncoding.LINEAR16, + sample_rate_hertz=sample_rate, + language_code=language_code, + enable_automatic_punctuation=True, + enable_word_time_offsets=True, + model="latest_long", # 최신 장시간 모델 사용 + ) + + # STT 요청 실행 + response = self.client.recognize(config=config, audio=audio) + + # 결과 처리 + if response.results: + result = response.results[0] + transcript = result.alternatives[0].transcript + confidence = result.alternatives[0].confidence + + return { + "transcript": transcript, + "confidence": confidence, + "language_code": language_code, + "audio_duration": len(audio_data) / sample_rate, + "sample_rate": sample_rate + } + else: + return { + "error": "음성을 인식할 수 없습니다", + "transcript": "", + "confidence": 0.0 + } + + except Exception as e: + return { + "error": f"STT 처리 중 오류 발생: {str(e)}", + "transcript": "", + "confidence": 0.0 + } + finally: + # 임시 파일 정리 + try: + os.unlink(tmp_file_path) + except: + pass + + +# 전역 인스턴스 +stt_service = GoogleSTTService() + + +def transcribe_voice(audio_file, language_code: str = "ko-KR") -> Dict[str, Any]: + """음성을 텍스트로 변환하는 함수""" + return stt_service.transcribe_audio(audio_file, language_code) diff --git a/requirements.txt b/requirements.txt index ea0991e..297b90c 100644 --- a/requirements.txt +++ b/requirements.txt @@ -6,3 +6,5 @@ transformers>=4.30.0 torch>=2.0.0 librosa>=0.10.0 scipy>=1.10.0 +google-cloud-speech>=2.21.0 +google-auth>=2.23.0 From 04e7f0dc620d7a8f6cbd93fdb572fa82168fee67 Mon Sep 17 00:00:00 2001 From: hann Date: Sat, 25 Oct 2025 19:44:12 +0900 Subject: [PATCH 4/5] refactor : edit by feedback(coderabbit) #5 --- app/emotion_service.py | 6 +++--- app/main.py | 24 ++++++++++++++++++++---- app/s3_service.py | 20 +++++++++++++------- 3 files changed, 36 insertions(+), 14 deletions(-) diff --git a/app/emotion_service.py b/app/emotion_service.py index 5ff363a..5cfdcfc 100644 --- a/app/emotion_service.py +++ b/app/emotion_service.py @@ -1,4 +1,5 @@ import io +import os import tempfile from typing import Dict, Any import librosa @@ -102,10 +103,9 @@ def analyze_emotion(self, audio_file) -> Dict[str, Any]: finally: # 임시 파일 정리 try: - import os os.unlink(tmp_file_path) - except: - pass + except OSError as e: + print(f"임시 파일 삭제 실패: {tmp_file_path}, 오류: {e}") # 전역 인스턴스 diff --git a/app/main.py b/app/main.py index 056a513..a6d1b88 100644 --- a/app/main.py +++ b/app/main.py @@ -28,10 +28,17 @@ async def upload_voice( # 키: optional prefix/YYYYMMDD_originalname base_prefix = VOICE_BASE_PREFIX.rstrip("/") effective_prefix = f"{base_prefix}/{folder or DEFAULT_UPLOAD_FOLDER}".rstrip("/") - key = f"{effective_prefix}/{file.filename}" + filename = os.path.basename(file.filename or "upload.wav") + key = f"{effective_prefix}/{filename}" # 파일을 S3에 업로드 - upload_fileobj(bucket=bucket, key=key, fileobj=file.file) + # Content-Type 저장 + upload_fileobj(bucket=bucket, key=key, fileobj=file.file, content_type=file.content_type) + # 이후 소비자를 위해 포인터 리셋 + try: + file.file.seek(0) + except Exception: + pass # 감정 분석 수행 emotion_result = analyze_voice_emotion(file) @@ -108,11 +115,20 @@ async def upload_voice_with_analysis( # S3 업로드 base_prefix = VOICE_BASE_PREFIX.rstrip("/") effective_prefix = f"{base_prefix}/{folder or DEFAULT_UPLOAD_FOLDER}".rstrip("/") - key = f"{effective_prefix}/{file.filename}" - upload_fileobj(bucket=bucket, key=key, fileobj=file.file) + filename = os.path.basename(file.filename or "upload.wav") + key = f"{effective_prefix}/{filename}" + upload_fileobj(bucket=bucket, key=key, fileobj=file.file, content_type=file.content_type) + try: + file.file.seek(0) + except Exception: + pass # 감정 분석 emotion_result = analyze_voice_emotion(file) + try: + file.file.seek(0) + except Exception: + pass # STT 변환 stt_result = transcribe_voice(file, language_code) diff --git a/app/s3_service.py b/app/s3_service.py index 4fdbc14..d86a2d7 100644 --- a/app/s3_service.py +++ b/app/s3_service.py @@ -7,13 +7,19 @@ def get_s3_client(): region = os.getenv("AWS_REGION", "ap-northeast-2") - return boto3.client( - "s3", - region_name=region, - aws_access_key_id=os.getenv("AWS_ACCESS_KEY_ID"), - aws_secret_access_key=os.getenv("AWS_SECRET_ACCESS_KEY"), - config=Config(signature_version="s3v4"), - ) + kwargs = { + "region_name": region, + "config": Config(signature_version="s3v4"), + } + access_key = os.getenv("AWS_ACCESS_KEY_ID") + secret_key = os.getenv("AWS_SECRET_ACCESS_KEY") + session_token = os.getenv("AWS_SESSION_TOKEN") + if access_key and secret_key: + kwargs["aws_access_key_id"] = access_key + kwargs["aws_secret_access_key"] = secret_key + if session_token: + kwargs["aws_session_token"] = session_token + return boto3.client("s3", **kwargs) def upload_fileobj(bucket: str, key: str, fileobj) -> str: From 990df4939526682fbeeed5434603181dc1b15e71 Mon Sep 17 00:00:00 2001 From: hann Date: Sun, 26 Oct 2025 15:01:25 +0900 Subject: [PATCH 5/5] refactor : edit s3_service.py by coderabbit feedback #5 --- app/constants.py | 6 +++--- app/stt_service.py | 3 +++ 2 files changed, 6 insertions(+), 3 deletions(-) diff --git a/app/constants.py b/app/constants.py index 8073bfd..981926a 100644 --- a/app/constants.py +++ b/app/constants.py @@ -4,8 +4,8 @@ VOICE_BASE_PREFIX = os.getenv("S3_PREFIX", "voices") # 기본 폴더명 (요청에 folder 미지정 시 사용) -DEFAULT_UPLOAD_FOLDER = "raw" +DEFAULT_UPLOAD_FOLDER = "voiceFile" -# 필요 시 허용 폴더 집합 정의 (예: 검증용) -ALLOWED_FOLDERS = {"raw", "processed", "public"} +# # 필요 시 허용 폴더 집합 정의 (예: 검증용) +# ALLOWED_FOLDERS = {"raw", "processed", "public"} diff --git a/app/stt_service.py b/app/stt_service.py index 0a55179..29a2612 100644 --- a/app/stt_service.py +++ b/app/stt_service.py @@ -5,6 +5,7 @@ from google.cloud import speech from google.oauth2 import service_account import librosa +import numpy as np class GoogleSTTService: @@ -55,6 +56,7 @@ def transcribe_audio(self, audio_file, language_code: str = "ko-KR") -> Dict[str # 임시 파일로 저장 with tempfile.NamedTemporaryFile(suffix=".wav", delete=False) as tmp_file: content = audio_file.file.read() + audio_file.file.seek(0) tmp_file.write(content) tmp_file_path = tmp_file.name @@ -62,6 +64,7 @@ def transcribe_audio(self, audio_file, language_code: str = "ko-KR") -> Dict[str audio_data, sample_rate = librosa.load(tmp_file_path, sr=16000) # 오디오 데이터를 bytes로 변환 + audio_data = np.clip(audio_data, -1.0, 1.0) audio_bytes = (audio_data * 32767).astype('int16').tobytes() # Google Cloud Speech-to-Text 요청 구성