diff --git a/.dockerignore b/.dockerignore new file mode 100644 index 0000000..a6eafad --- /dev/null +++ b/.dockerignore @@ -0,0 +1,64 @@ +# Git +.git +.gitignore + +# Python +__pycache__ +*.pyc +*.pyo +*.pyd +.Python +*.so +*.egg +*.egg-info +dist +build + +# Virtual Environment +.venv +venv/ +ENV/ +env/ + +# IDE +.vscode +.idea +*.swp +*.swo +*~ + +# Environment +.env +.env.local + +# Documentation +*.md +!README.md + +# Database +*.db +*.sqlite +*.sqlite3 + +# Logs +*.log +logs/ + +# OS +.DS_Store +Thumbs.db + +# Docker +Dockerfile +docker-compose.yml +.dockerignore + +# CI/CD +.github + +# Others +alembic.ini +manage_db.py +create_migration.py +init_questions.py +credentials/ diff --git a/README.md b/README.md index 59ac98a..7c68516 100644 --- a/README.md +++ b/README.md @@ -19,17 +19,143 @@ pip install -r requirements.txt ## 실행 +### 로컬 개발 환경 + +개발 서버(Uvicorn) 실행: + +```bash +uvicorn app.main:app --reload --port 8000 +``` + +API 문서: `http://127.0.0.1:8000/docs` + +### Docker를 사용한 실행 + +#### 1. 환경 변수 설정 +`.env` 파일에 AWS RDS 및 필요한 설정을 추가하세요: + +```env +# 데이터베이스 (AWS RDS) +DB_HOST=your-rds-endpoint.region.rds.amazonaws.com +DB_PORT=3306 +DB_USER=admin +DB_PASSWORD=your_password +DB_NAME=caring_voice + +# AWS 설정 +AWS_ACCESS_KEY_ID=your_access_key +AWS_SECRET_ACCESS_KEY=your_secret_key +AWS_REGION=ap-northeast-2 +S3_BUCKET_NAME=your-bucket-name + +# Google Cloud 설정 +GOOGLE_APPLICATION_CREDENTIALS=/app/credentials/google-credentials.json +``` + +#### 2. Docker Compose로 서버 실행 + +```bash +# 빌드 및 실행 +docker-compose up -d + +# 로그 확인 +docker-compose logs -f + +# 중지 +docker-compose down +``` + +#### 3. API 접근 +- 로컬: `http://localhost:8000` +- API 문서: `http://localhost:8000/docs` + +## 데이터베이스 마이그레이션 + +### 자동 마이그레이션 +서버 시작 시 자동으로 마이그레이션이 실행됩니다. + +### 수동 마이그레이션 +```bash +# 마이그레이션 파일 생성 +alembic revision --autogenerate -m "커밋 메시지" + +# 마이그레이션 실행 +alembic upgrade head + +# 마이그레이션 롤백 +alembic downgrade -1 +``` + +## 질문 데이터 초기화 + +질문 템플릿을 데이터베이스에 저장합니다: + ```bash -python -m app.main +python init_questions.py +``` + +이 명령어는 다음 카테고리의 질문들을 자동으로 추가합니다: +- emotion (14개) +- stress (10개) +- physical (10개) +- social (10개) +- self_reflection (15개) + +총 59개의 질문이 데이터베이스에 저장됩니다. + +## 환경 변수 설정 + +프로젝트 루트에 `.env` 파일을 생성하고 다음 값을 채우세요. 예시는 `.env.example` 참고. + +``` +AWS_ACCESS_KEY_ID=... +AWS_SECRET_ACCESS_KEY=... +AWS_REGION=ap-northeast-2 +S3_BUCKET_NAME=your-bucket +S3_PREFIX=voices ``` +### Google Cloud 설정 +``` +# 서비스 계정 키 파일 경로 설정 (Speech-to-Text, Natural Language API 공통) +GOOGLE_APPLICATION_CREDENTIALS=/path/to/your/service-account-key.json +``` + +### 데이터베이스 설정 +``` +# MySQL 데이터베이스 연결 정보 +DB_HOST=localhost +DB_PORT=3306 +DB_USER=root +DB_PASSWORD=your_password +DB_NAME=table_name +``` + +`.env`는 `app/database.py`에서 자동 로드됩니다. + +> 💡 **배포 환경**: 운영 환경에서는 환경변수를 시스템에 직접 설정하거나, `.env` 파일을 사용하지 않고 컨테이너/Docker의 환경변수 설정을 사용하세요. + +## API 엔드포인트 + +### 음성 관련 API +- `POST /voices/upload`: 음성 파일 업로드 + STT 변환 +- `POST /voices/transcribe`: STT 변환만 +- `POST /voices/{voice_key}/analyze-emotion`: S3 파일 감정 분석 +- `GET /voices`: 파일 목록 조회 + +### 텍스트 분석 API (Google Natural Language) +- `POST /nlp/sentiment`: 텍스트 감정 분석 +- `POST /nlp/entities`: 엔티티 추출 +- `POST /nlp/syntax`: 구문 분석 +- `POST /nlp/analyze`: 종합 텍스트 분석 + ## 프로젝트 구조 ``` caring-voice/ ├── app/ │ ├── __init__.py -│ └── main.py # 메인 엔트리 포인트 +│ └── main.py # FastAPI 엔트리 포인트 및 엔드포인트 ├── .gitignore ├── README.md ├── requirements.txt diff --git a/alembic.ini b/alembic.ini new file mode 100644 index 0000000..57efa5f --- /dev/null +++ b/alembic.ini @@ -0,0 +1,60 @@ +# Alembic 설정 파일 + +[alembic] +# 마이그레이션 파일 위치 +script_location = migrations + +# 버전 파일 이름 형식 +file_template = %%(year)d_%%(month).2d_%%(day).2d_%%(hour).2d%%(minute).2d-%%(rev)s_%%(slug)s + +# SQLAlchemy URL +sqlalchemy.url = driver://user:pass@localhost/dbname + +# 서버 인코딩 +prepend_sys_path = . + +# 로그 레벨 +log_level = INFO + +# UTC 타임스탬프 사용 +date_format = %%Y-%%m-%%d %%H:%%M:%%S + +# 제외할 테이블 패턴 +exclude_tables = + +[post_write_hooks] + +# 로그 출력 +[loggers] +keys = root,sqlalchemy,alembic + +[handlers] +keys = console + +[formatters] +keys = generic + +[logger_root] +level = WARN +handlers = console +qualname = + +[logger_sqlalchemy] +level = WARN +handlers = +qualname = sqlalchemy.engine + +[logger_alembic] +level = INFO +handlers = +qualname = alembic + +[handler_console] +class = StreamHandler +args = (sys.stderr,) +level = NOTSET +formatter = generic + +[formatter_generic] +format = %(levelname)-5.5s [%(name)s] %(message)s +datefmt = %H:%M:%S diff --git a/app/__init__.py b/app/__init__.py index e69de29..1e86223 100644 --- a/app/__init__.py +++ b/app/__init__.py @@ -0,0 +1,3 @@ +from dotenv import load_dotenv # type: ignore +load_dotenv() + diff --git a/app/auth_service.py b/app/auth_service.py new file mode 100644 index 0000000..03e8cb3 --- /dev/null +++ b/app/auth_service.py @@ -0,0 +1,209 @@ +import secrets +import string +from datetime import date, datetime +from typing import Optional +from sqlalchemy.orm import Session +from .models import User + + +def generate_user_code(length: int = 8) -> str: + """사용자 코드 자동 생성 (영문 대소문자 + 숫자)""" + characters = string.ascii_letters + string.digits + return ''.join(secrets.choice(characters) for _ in range(length)) + + +def hash_password(password: str) -> str: + """비밀번호 해시 (bcrypt)""" + import bcrypt + salt = bcrypt.gensalt() + return bcrypt.hashpw(password.encode('utf-8'), salt).decode('utf-8') + + +def verify_password(password: str, hashed: str) -> bool: + """비밀번호 검증""" + import bcrypt + return bcrypt.checkpw(password.encode('utf-8'), hashed.encode('utf-8')) + + +class AuthService: + """인증 관련 서비스""" + + def __init__(self, db: Session): + self.db = db + + def signup(self, name: str, birthdate: str, username: str, password: str, + role: str, connecting_user_code: Optional[str] = None) -> dict: + """ + 회원가입 처리 + + Args: + name: 사용자 이름 + birthdate: 생년월일 (YYYY.MM.DD) + username: 아이디 + password: 비밀번호 + role: 역할 (USER 또는 CARE) + connecting_user_code: CARE 역할일 때 연결할 사용자 코드 + + Returns: + dict: 회원가입 결과 + """ + try: + # 1. 역할 검증 + if role not in ['USER', 'CARE']: + return { + "success": False, + "error": "Invalid role. Must be 'USER' or 'CARE'" + } + + # 2. USER 역할일 때는 connecting_user_code를 None으로 설정 (무시) + if role == 'USER': + connecting_user_code = None + + # 3. CARE 역할일 때 연결 사용자 코드 검증 + if role == 'CARE': + if not connecting_user_code: + return { + "success": False, + "error": "connecting_user_code is required for CARE role" + } + + # 연결할 사용자가 존재하는지 확인 + connecting_user = self.db.query(User).filter( + User.user_code == connecting_user_code + ).first() + + if not connecting_user: + return { + "success": False, + "error": "Connecting user not found" + } + + # 4. 사용자명 중복 확인 + existing_user = self.db.query(User).filter( + User.username == username + ).first() + + if existing_user: + return { + "success": False, + "error": "Username already exists" + } + + # 5. 생년월일 파싱 + try: + birth_date = datetime.strptime(birthdate, "%Y.%m.%d").date() + except ValueError: + return { + "success": False, + "error": "Invalid birthdate format. Use YYYY.MM.DD" + } + + # 5. 사용자 코드 생성 (중복 방지) + user_code = generate_user_code() + while self.db.query(User).filter(User.user_code == user_code).first(): + user_code = generate_user_code() + + # 5-1. 비밀번호 해시 + hashed_password = hash_password(password) + + # 6. 사용자 생성 + user = User( + user_code=user_code, + username=username, + password=hashed_password, + role=role, + name=name, + birthdate=birth_date, + connecting_user_code=connecting_user_code + ) + + self.db.add(user) + self.db.commit() + self.db.refresh(user) + + return { + "success": True, + "user_code": user.user_code, + "username": user.username, + "name": user.name, + "role": user.role, + "created_at": user.created_at.isoformat() + } + + except Exception as e: + self.db.rollback() + return { + "success": False, + "error": f"Signup failed: {str(e)}" + } + + def get_user_by_code(self, user_code: str) -> Optional[User]: + """사용자 코드로 사용자 조회""" + return self.db.query(User).filter(User.user_code == user_code).first() + + def get_user_by_username(self, username: str) -> Optional[User]: + """사용자명으로 사용자 조회""" + return self.db.query(User).filter(User.username == username).first() + + + def signin(self, username: str, password: str, role: str) -> dict: + """ + 로그인 처리 + + Args: + username: 아이디 + password: 비밀번호 + role: 역할 (USER 또는 CARE) + + Returns: + dict: 로그인 결과 + """ + try: + # 1. 역할 검증 + if role not in ['USER', 'CARE']: + return { + "success": False, + "error": "Invalid role. Must be 'USER' or 'CARE'" + } + + # 2. 사용자 조회 + user = self.db.query(User).filter(User.username == username).first() + + if not user: + return { + "success": False, + "error": "User not found" + } + + # 3. 역할 확인 + if user.role != role: + return { + "success": False, + "error": "Invalid role for this user" + } + + # 4. 비밀번호 검증 + if not verify_password(password, user.password): + return { + "success": False, + "error": "Invalid password" + } + + return { + "success": True, + "username": user.username, + "name": user.name, + "role": user.role, + "user_code": user.user_code + } + + except Exception as e: + return { + "success": False, + "error": f"Signin failed: {str(e)}" + } + + +def get_auth_service(db: Session) -> AuthService: + """인증 서비스 인스턴스 생성""" + return AuthService(db) diff --git a/app/constants.py b/app/constants.py new file mode 100644 index 0000000..981926a --- /dev/null +++ b/app/constants.py @@ -0,0 +1,11 @@ +import os + +# 업로드 기본 베이스 프리픽스 (환경변수 S3_PREFIX로 오버라이드 가능) +VOICE_BASE_PREFIX = os.getenv("S3_PREFIX", "voices") + +# 기본 폴더명 (요청에 folder 미지정 시 사용) +DEFAULT_UPLOAD_FOLDER = "voiceFile" + +# # 필요 시 허용 폴더 집합 정의 (예: 검증용) +# ALLOWED_FOLDERS = {"raw", "processed", "public"} + diff --git a/app/database.py b/app/database.py new file mode 100644 index 0000000..403ef04 --- /dev/null +++ b/app/database.py @@ -0,0 +1,54 @@ +import os +from sqlalchemy import create_engine +from sqlalchemy.ext.declarative import declarative_base +from sqlalchemy.orm import sessionmaker +from dotenv import load_dotenv +from urllib.parse import quote_plus + +load_dotenv() + +# 데이터베이스 연결 정보 (환경변수에서 로드) +DB_HOST = os.getenv("DB_HOST", "localhost") +DB_PORT = os.getenv("DB_PORT", "3306") +DB_USER = os.getenv("DB_USER", "root") +DB_PASSWORD = os.getenv("DB_PASSWORD", "springproject") +DB_NAME = os.getenv("DB_NAME", "caring_voice") + +# 패스워드에 특수문자가 있을 경우 URL 인코딩 +ENCODED_PASSWORD = quote_plus(DB_PASSWORD) if DB_PASSWORD else "" + +# 데이터베이스 URL 구성 +DATABASE_URL = f"mysql+pymysql://{DB_USER}:{DB_PASSWORD}@{DB_HOST}:{DB_PORT}/{DB_NAME}" + +# SQLAlchemy 엔진 생성 +engine = create_engine( + DATABASE_URL, + echo=False, # SQL 쿼리 로깅 (개발 시 True로 설정) + pool_pre_ping=True, # 연결 상태 확인 + pool_recycle=3600, # 연결 재사용 시간 (1시간) +) + +# 세션 팩토리 생성 +SessionLocal = sessionmaker(autocommit=False, autoflush=False, bind=engine) + +# Base 클래스 생성 (모든 모델이 상속받을 클래스) +Base = declarative_base() + + +def get_db(): + """데이터베이스 세션 의존성 함수""" + db = SessionLocal() + try: + yield db + finally: + db.close() + + +def create_tables(): + """모든 테이블 생성""" + Base.metadata.create_all(bind=engine) + + +def drop_tables(): + """모든 테이블 삭제 (개발/테스트용)""" + Base.metadata.drop_all(bind=engine) diff --git a/app/db_service.py b/app/db_service.py new file mode 100644 index 0000000..687d1c6 --- /dev/null +++ b/app/db_service.py @@ -0,0 +1,235 @@ +from sqlalchemy.orm import Session +from typing import Optional, List +from datetime import date, datetime +from .models import User, Voice, VoiceContent, VoiceAnalyze, Question, VoiceQuestion + + +class DatabaseService: + """데이터베이스 작업을 위한 서비스 클래스""" + + def __init__(self, db: Session): + self.db = db + + # User 관련 메서드 + def create_user(self, username: str, password: str, role: str, name: str, birthdate: date) -> User: + """사용자 생성""" + user = User( + username=username, + password=password, + role=role, + name=name, + birthdate=birthdate + ) + self.db.add(user) + self.db.commit() + self.db.refresh(user) + return user + + def get_user_by_id(self, user_id: int) -> Optional[User]: + """ID로 사용자 조회""" + return self.db.query(User).filter(User.user_id == user_id).first() + + def get_user_by_username(self, username: str) -> Optional[User]: + """사용자명으로 사용자 조회""" + return self.db.query(User).filter(User.username == username).first() + + def get_users(self, skip: int = 0, limit: int = 100) -> List[User]: + """사용자 목록 조회""" + return self.db.query(User).offset(skip).limit(limit).all() + + # Voice 관련 메서드 + def create_voice(self, voice_key: str, voice_name: str, duration_ms: int, + user_id: int, sample_rate: Optional[int] = None, + bit_rate: Optional[int] = None) -> Voice: + """음성 파일 메타데이터 생성""" + voice = Voice( + voice_key=voice_key, + voice_name=voice_name, + duration_ms=duration_ms, + sample_rate=sample_rate, + bit_rate=bit_rate, + user_id=user_id + ) + self.db.add(voice) + self.db.commit() + self.db.refresh(voice) + return voice + + def get_voice_by_id(self, voice_id: int) -> Optional[Voice]: + """ID로 음성 파일 조회""" + return self.db.query(Voice).filter(Voice.voice_id == voice_id).first() + + def get_voice_by_key(self, voice_key: str) -> Optional[Voice]: + """S3 키로 음성 파일 조회""" + return self.db.query(Voice).filter(Voice.voice_key == voice_key).first() + + def get_voices_by_user(self, user_id: int, skip: int = 0, limit: int = 50) -> List[Voice]: + """사용자별 음성 파일 목록 조회 (question 포함)""" + from sqlalchemy.orm import joinedload + return self.db.query(Voice).filter(Voice.user_id == user_id)\ + .options(joinedload(Voice.questions))\ + .order_by(Voice.created_at.desc()).offset(skip).limit(limit).all() + + def get_all_voices(self, skip: int = 0, limit: int = 50) -> List[Voice]: + """전체 음성 파일 목록 조회""" + return self.db.query(Voice).order_by(Voice.created_at.desc()).offset(skip).limit(limit).all() + + # VoiceContent 관련 메서드 + def create_voice_content(self, voice_id: int, content: str, + score_bps: Optional[int] = None, magnitude_x1000: Optional[int] = None, + locale: Optional[str] = None, provider: Optional[str] = None, + model_version: Optional[str] = None, confidence_bps: Optional[int] = None) -> VoiceContent: + """음성 전사 및 텍스트 감정 분석 데이터 생성""" + voice_content = VoiceContent( + voice_id=voice_id, + content=content, + score_bps=score_bps, + magnitude_x1000=magnitude_x1000, + locale=locale, + provider=provider, + model_version=model_version, + confidence_bps=confidence_bps + ) + self.db.add(voice_content) + self.db.commit() + self.db.refresh(voice_content) + return voice_content + + def get_voice_content_by_voice_id(self, voice_id: int) -> Optional[VoiceContent]: + """음성 파일 ID로 전사 내용 조회""" + return self.db.query(VoiceContent).filter(VoiceContent.voice_id == voice_id).first() + + def update_voice_content(self, voice_id: int, content: str, + score_bps: Optional[int] = None, magnitude_x1000: Optional[int] = None, + locale: Optional[str] = None, provider: Optional[str] = None, + model_version: Optional[str] = None, confidence_bps: Optional[int] = None) -> Optional[VoiceContent]: + """음성 전사 내용 업데이트""" + voice_content = self.get_voice_content_by_voice_id(voice_id) + if voice_content: + voice_content.content = content + if score_bps is not None: + voice_content.score_bps = score_bps + if magnitude_x1000 is not None: + voice_content.magnitude_x1000 = magnitude_x1000 + if locale is not None: + voice_content.locale = locale + if provider is not None: + voice_content.provider = provider + if model_version is not None: + voice_content.model_version = model_version + if confidence_bps is not None: + voice_content.confidence_bps = confidence_bps + + self.db.commit() + self.db.refresh(voice_content) + return voice_content + + # VoiceAnalyze 관련 메서드 + def create_voice_analyze(self, voice_id: int, happy_bps: int, sad_bps: int, + neutral_bps: int, angry_bps: int, fear_bps: int, + top_emotion: Optional[str] = None, top_confidence_bps: Optional[int] = None, + model_version: Optional[str] = None) -> VoiceAnalyze: + """음성 감정 분석 데이터 생성""" + voice_analyze = VoiceAnalyze( + voice_id=voice_id, + happy_bps=happy_bps, + sad_bps=sad_bps, + neutral_bps=neutral_bps, + angry_bps=angry_bps, + fear_bps=fear_bps, + top_emotion=top_emotion, + top_confidence_bps=top_confidence_bps, + model_version=model_version + ) + self.db.add(voice_analyze) + self.db.commit() + self.db.refresh(voice_analyze) + return voice_analyze + + def get_voice_analyze_by_voice_id(self, voice_id: int) -> Optional[VoiceAnalyze]: + """음성 파일 ID로 감정 분석 결과 조회""" + return self.db.query(VoiceAnalyze).filter(VoiceAnalyze.voice_id == voice_id).first() + + def update_voice_analyze(self, voice_id: int, happy_bps: int, sad_bps: int, + neutral_bps: int, angry_bps: int, fear_bps: int, + top_emotion: Optional[str] = None, top_confidence_bps: Optional[int] = None, + model_version: Optional[str] = None) -> Optional[VoiceAnalyze]: + """음성 감정 분석 결과 업데이트""" + voice_analyze = self.get_voice_analyze_by_voice_id(voice_id) + if voice_analyze: + voice_analyze.happy_bps = happy_bps + voice_analyze.sad_bps = sad_bps + voice_analyze.neutral_bps = neutral_bps + voice_analyze.angry_bps = angry_bps + voice_analyze.fear_bps = fear_bps + if top_emotion is not None: + voice_analyze.top_emotion = top_emotion + if top_confidence_bps is not None: + voice_analyze.top_confidence_bps = top_confidence_bps + if model_version is not None: + voice_analyze.model_version = model_version + + self.db.commit() + self.db.refresh(voice_analyze) + return voice_analyze + + # Question 관련 메서드 + def create_question(self, question_category: str, content: str) -> Question: + """질문 템플릿 생성""" + question = Question( + question_category=question_category, + content=content + ) + self.db.add(question) + self.db.commit() + self.db.refresh(question) + return question + + def get_questions_by_category(self, category: str) -> List[Question]: + """카테고리별 질문 조회""" + return self.db.query(Question).filter(Question.question_category == category).all() + + def get_all_questions(self) -> List[Question]: + """전체 질문 조회""" + return self.db.query(Question).all() + + def get_question_by_id(self, question_id: int) -> Optional[Question]: + """ID로 질문 조회""" + return self.db.query(Question).filter(Question.question_id == question_id).first() + + # VoiceQuestion 관련 메서드 + def link_voice_question(self, voice_id: int, question_id: int) -> VoiceQuestion: + """Voice와 Question 연결""" + voice_question = VoiceQuestion( + voice_id=voice_id, + question_id=question_id + ) + self.db.add(voice_question) + self.db.commit() + self.db.refresh(voice_question) + return voice_question + + def get_questions_by_voice_id(self, voice_id: int) -> List[Question]: + """음성에 연결된 질문 조회""" + return self.db.query(Question).join(VoiceQuestion).filter(VoiceQuestion.voice_id == voice_id).all() + + def get_voices_by_question_id(self, question_id: int) -> List[Voice]: + """질문에 연결된 음성 조회""" + return self.db.query(Voice).join(VoiceQuestion).filter(VoiceQuestion.question_id == question_id).all() + + def unlink_voice_question(self, voice_id: int, question_id: int) -> bool: + """Voice와 Question 연결 해제""" + voice_question = self.db.query(VoiceQuestion).filter( + VoiceQuestion.voice_id == voice_id, + VoiceQuestion.question_id == question_id + ).first() + if voice_question: + self.db.delete(voice_question) + self.db.commit() + return True + return False + + +def get_db_service(db: Session) -> DatabaseService: + """데이터베이스 서비스 인스턴스 생성""" + return DatabaseService(db) diff --git a/app/dto.py b/app/dto.py new file mode 100644 index 0000000..a048ab4 --- /dev/null +++ b/app/dto.py @@ -0,0 +1,141 @@ +from pydantic import BaseModel +from typing import Optional +from datetime import date + + +# 회원가입 관련 DTO +class SignupRequest(BaseModel): + name: str + birthdate: str # YYYY.MM.DD + username: str + password: str + role: str # USER or CARE + connecting_user_code: Optional[str] = None # CARE 역할일 때 연결할 사용자 코드 + + +class SignupResponse(BaseModel): + message: str + user_code: str + username: str + name: str + role: str + + +# 로그인 관련 DTO +class SigninRequest(BaseModel): + username: str + password: str + + +class SigninResponse(BaseModel): + message: str + username: str + name: str + role: str + + +# 음성 관련 DTO +class VoiceUploadRequest(BaseModel): + folder: Optional[str] = None + language_code: str = "ko-KR" + + +class VoiceUploadResponse(BaseModel): + success: bool + message: str + + +class UserVoiceUploadRequest(BaseModel): + language_code: str = "ko-KR" + + +class UserVoiceUploadResponse(BaseModel): + success: bool + message: str + voice_id: Optional[int] = None + + +class VoiceQuestionUploadResponse(BaseModel): + success: bool + message: str + voice_id: Optional[int] = None + question_id: Optional[int] = None + + +class VoiceListItem(BaseModel): + created_at: str + emotion: Optional[str] = None + question_title: Optional[str] = None + content: str + + +class UserVoiceListResponse(BaseModel): + success: bool + voices: list[VoiceListItem] + + +class VoiceDetailResponse(BaseModel): + voice_id: str + filename: str + status: str + duration_sec: float + analysis: dict + + +# 감정 분석 관련 DTO +class EmotionAnalysisResponse(BaseModel): + voice_key: str + emotion_analysis: dict + + +# STT 관련 DTO +class TranscribeRequest(BaseModel): + language_code: str = "ko-KR" + + +class TranscribeResponse(BaseModel): + transcript: str + confidence: float + language_code: str + audio_duration: float + sample_rate: int + + +# NLP 관련 DTO +class NLPAnalysisRequest(BaseModel): + text: str + language_code: str = "ko" + + +class SentimentResponse(BaseModel): + sentiment: dict + sentences: list[dict] + language_code: str + + +class EntitiesResponse(BaseModel): + entities: list[dict] + language_code: str + + +class SyntaxResponse(BaseModel): + tokens: list[dict] + language_code: str + + +class ComprehensiveAnalysisResponse(BaseModel): + text: str + language_code: str + sentiment_analysis: dict + entity_analysis: dict + syntax_analysis: dict + + +# 공통 응답 DTO +class ErrorResponse(BaseModel): + detail: str + + +class SuccessResponse(BaseModel): + message: str + status: str = "success" diff --git a/app/emotion_service.py b/app/emotion_service.py new file mode 100644 index 0000000..5ff363a --- /dev/null +++ b/app/emotion_service.py @@ -0,0 +1,117 @@ +import io +import tempfile +from typing import Dict, Any +import librosa +import torch +from transformers import Wav2Vec2ForSequenceClassification, Wav2Vec2FeatureExtractor +import numpy as np + + +class EmotionAnalyzer: + def __init__(self): + self.model = None + self.feature_extractor = None + self.device = torch.device("cuda" if torch.cuda.is_available() else "cpu") + self._load_model() + + def _load_model(self): + """Hugging Face 모델 로드""" + model_name = "jungjongho/wav2vec2-xlsr-korean-speech-emotion-recognition" + + try: + self.model = Wav2Vec2ForSequenceClassification.from_pretrained(model_name) + self.feature_extractor = Wav2Vec2FeatureExtractor.from_pretrained(model_name) + self.model.to(self.device) + self.model.eval() + except Exception as e: + print(f"모델 로드 실패: {e}") + self.model = None + self.feature_extractor = None + + def analyze_emotion(self, audio_file) -> Dict[str, Any]: + """ + 음성 파일의 감정을 분석합니다. + + Args: + audio_file: 업로드된 음성 파일 (FastAPI UploadFile) + + Returns: + Dict: 감정 분석 결과 + """ + if not self.model or not self.feature_extractor: + return { + "error": "모델이 로드되지 않았습니다", + "emotion": "unknown", + "confidence": 0.0 + } + + try: + # 임시 파일로 저장 + with tempfile.NamedTemporaryFile(suffix=".wav", delete=False) as tmp_file: + content = audio_file.file.read() + tmp_file.write(content) + tmp_file_path = tmp_file.name + + # 오디오 로드 (16kHz로 리샘플링) + audio, sr = librosa.load(tmp_file_path, sr=16000) + + # 특성 추출 + inputs = self.feature_extractor( + audio, + sampling_rate=16000, + return_tensors="pt", + padding=True + ) + + # GPU로 이동 + inputs = {k: v.to(self.device) for k, v in inputs.items()} + + # 추론 + with torch.no_grad(): + outputs = self.model(**inputs) + predictions = torch.nn.functional.softmax(outputs.logits, dim=-1) + + # 감정 라벨 (모델에 따라 조정 필요) + emotion_labels = ["neutral", "happy", "sad", "angry", "fear", "surprise", "disgust"] + + # 가장 높은 확률의 감정 + predicted_class = torch.argmax(predictions, dim=-1).item() + confidence = predictions[0][predicted_class].item() + emotion = emotion_labels[predicted_class] if predicted_class < len(emotion_labels) else "unknown" + + # 모든 감정의 확률 + emotion_scores = { + emotion_labels[i]: predictions[0][i].item() + for i in range(min(len(emotion_labels), predictions.shape[1])) + } + + return { + "emotion": emotion, + "confidence": confidence, + "emotion_scores": emotion_scores, + "audio_duration": len(audio) / sr, + "sample_rate": sr + } + + except Exception as e: + return { + "error": f"분석 중 오류 발생: {str(e)}", + "emotion": "unknown", + "confidence": 0.0 + } + finally: + # 임시 파일 정리 + try: + import os + os.unlink(tmp_file_path) + except: + pass + + +# 전역 인스턴스 +emotion_analyzer = EmotionAnalyzer() + + +def analyze_voice_emotion(audio_file) -> Dict[str, Any]: + """음성 감정 분석 함수""" + return emotion_analyzer.analyze_emotion(audio_file) diff --git a/app/main.py b/app/main.py index 98d96a2..961e9b0 100644 --- a/app/main.py +++ b/app/main.py @@ -1,7 +1,408 @@ -from fastapi import FastAPI +import os +from typing import Optional +from fastapi import FastAPI, UploadFile, File, HTTPException, Form +from fastapi.responses import JSONResponse +from typing import List +from .s3_service import upload_fileobj, list_bucket_objects +from .constants import VOICE_BASE_PREFIX, DEFAULT_UPLOAD_FOLDER +from .emotion_service import analyze_voice_emotion +from .stt_service import transcribe_voice +from .nlp_service import analyze_text_sentiment, analyze_text_entities, analyze_text_syntax +from .database import create_tables, engine, get_db +from .models import Base +from .auth_service import get_auth_service +from .voice_service import get_voice_service +from .dto import ( + SignupRequest, SignupResponse, + SigninRequest, SigninResponse, + UserVoiceUploadRequest, UserVoiceUploadResponse, + VoiceQuestionUploadResponse, + UserVoiceListResponse, + EmotionAnalysisResponse, TranscribeResponse, + SentimentResponse, EntitiesResponse, SyntaxResponse, ComprehensiveAnalysisResponse +) app = FastAPI(title="Caring API") + @app.get("/health") def health(): return {"status": "ok"} + + +# ==================== 데이터베이스 관리 API ==================== + +@app.post("/admin/db/migrate") +async def run_migration(): + """데이터베이스 마이그레이션 실행""" + try: + from alembic import command + from alembic.config import Config + + print("🔄 마이그레이션 실행 중...") + alembic_cfg = Config("alembic.ini") + command.upgrade(alembic_cfg, "head") + + return { + "success": True, + "message": "마이그레이션이 성공적으로 실행되었습니다." + } + except Exception as e: + raise HTTPException(status_code=500, detail=f"마이그레이션 실패: {str(e)}") + + +@app.post("/admin/db/init") +async def init_database(): + """데이터베이스 초기화 (테이블 생성)""" + try: + from sqlalchemy import inspect + inspector = inspect(engine) + existing_tables = inspector.get_table_names() + all_tables = set(Base.metadata.tables.keys()) + missing_tables = all_tables - set(existing_tables) + + if missing_tables: + print(f"🔨 테이블 생성 중: {', '.join(missing_tables)}") + table_order = ['user', 'voice', 'voice_content', 'voice_analyze', 'question', 'voice_question'] + + for table_name in table_order: + if table_name in missing_tables: + table = Base.metadata.tables[table_name] + table.create(bind=engine, checkfirst=True) + + other_tables = missing_tables - set(table_order) + if other_tables: + for table_name in other_tables: + table = Base.metadata.tables[table_name] + table.create(bind=engine, checkfirst=True) + + return { + "success": True, + "message": "테이블이 생성되었습니다.", + "created_tables": list(missing_tables) + } + else: + return { + "success": True, + "message": "모든 테이블이 이미 존재합니다." + } + except Exception as e: + raise HTTPException(status_code=500, detail=f"데이터베이스 초기화 실패: {str(e)}") + + +@app.get("/admin/db/status") +async def get_database_status(): + """데이터베이스 상태 확인""" + try: + from sqlalchemy import inspect + inspector = inspect(engine) + existing_tables = inspector.get_table_names() + all_tables = set(Base.metadata.tables.keys()) + missing_tables = all_tables - set(existing_tables) + + return { + "success": True, + "total_tables": len(all_tables), + "existing_tables": existing_tables, + "missing_tables": list(missing_tables), + "is_sync": len(missing_tables) == 0 + } + except Exception as e: + raise HTTPException(status_code=500, detail=f"상태 확인 실패: {str(e)}") + +# --------------------------------------auth API-------------------------------------- + +# POST : 회원가입 +@app.post("/sign-up", response_model=SignupResponse) +async def sign_up(request: SignupRequest): + """회원가입 API""" + db = next(get_db()) + auth_service = get_auth_service(db) + + result = auth_service.signup( + name=request.name, + birthdate=request.birthdate, + username=request.username, + password=request.password, + role=request.role, + connecting_user_code=request.connecting_user_code + ) + + if result["success"]: + return SignupResponse( + message="회원가입이 완료되었습니다.", + user_code=result["user_code"], + username=result["username"], + name=result["name"], + role=result["role"] + ) + else: + raise HTTPException(status_code=400, detail=result["error"]) + + +# POST : 로그인 +@app.post("/sign-in", response_model=SigninResponse) +async def sign_in(request: SigninRequest, role: str): + """로그인 API (role은 Request Parameter)""" + db = next(get_db()) + auth_service = get_auth_service(db) + + result = auth_service.signin( + username=request.username, + password=request.password, + role=role + ) + + if result["success"]: + return SigninResponse( + message="로그인 성공", + username=result["username"], + name=result["name"], + role=result["role"] + ) + else: + raise HTTPException(status_code=401, detail=result["error"]) + + +# POST : 사용자 음성 업로드 +# @app.post("/users/voices", response_model=UserVoiceUploadResponse) +# async def upload_user_voice( +# file: UploadFile = File(...), +# username: str = Form(...) +# ): +# """사용자 음성 파일 업로드 (S3 + DB 저장)""" +# db = next(get_db()) +# voice_service = get_voice_service(db) + +# result = await voice_service.upload_user_voice(file, username) + +# if result["success"]: +# return UserVoiceUploadResponse( +# success=True, +# message=result["message"], +# voice_id=result.get("voice_id") +# ) +# else: +# raise HTTPException(status_code=400, detail=result["message"]) + + +# --------------------------------------voice API-------------------------------------- +# GET : 사용자 음성 리스트 조회 +@app.get("/users/voices", response_model=UserVoiceListResponse) +async def get_user_voice_list(username: str): + """사용자 음성 리스트 조회""" + db = next(get_db()) + voice_service = get_voice_service(db) + + result = voice_service.get_user_voice_list(username) + + return UserVoiceListResponse( + success=result["success"], + voices=result.get("voices", []) + ) + + +# POST : 질문과 함께 음성 업로드 +@app.post("/users/voices", response_model=VoiceQuestionUploadResponse) +async def upload_voice_with_question( + file: UploadFile = File(...), + username: str = Form(...), + question_id: int = Form(...) +): + """질문과 함께 음성 파일 업로드 (S3 + DB 저장 + STT + voice_question 매핑)""" + db = next(get_db()) + voice_service = get_voice_service(db) + + result = await voice_service.upload_voice_with_question(file, username, question_id) + + if result["success"]: + return VoiceQuestionUploadResponse( + success=True, + message=result["message"], + voice_id=result.get("voice_id"), + question_id=result.get("question_id") + ) + else: + raise HTTPException(status_code=400, detail=result["message"]) + + +# POST : upload voice with STT +@app.post("/voices/upload") +async def upload_voice( + file: UploadFile = File(...), + folder: Optional[str] = Form(default=None), + language_code: str = Form(default="ko-KR") +): + """음성 파일을 업로드하고 STT를 수행합니다.""" + bucket = os.getenv("S3_BUCKET_NAME") + if not bucket: + raise HTTPException(status_code=500, detail="S3_BUCKET_NAME not configured") + + # 파일 내용을 메모리에 읽기 (두 번 사용하기 위해) + file_content = await file.read() + + # S3 업로드 + base_prefix = VOICE_BASE_PREFIX.rstrip("/") + effective_prefix = f"{base_prefix}/{folder or DEFAULT_UPLOAD_FOLDER}".rstrip("/") + key = f"{effective_prefix}/{file.filename}" + + from io import BytesIO + file_obj_for_s3 = BytesIO(file_content) + upload_fileobj(bucket=bucket, key=key, fileobj=file_obj_for_s3) + + # STT 변환 - 파일 내용을 직접 사용 + from io import BytesIO + temp_file_obj = BytesIO(file_content) + + # UploadFile과 유사한 객체 생성 + class TempUploadFile: + def __init__(self, content, filename): + self.file = content + self.filename = filename + self.content_type = "audio/wav" + + temp_upload_file = TempUploadFile(temp_file_obj, file.filename) + stt_result = transcribe_voice(temp_upload_file, language_code) + + # 파일 목록 조회 + names = list_bucket_objects(bucket=bucket, prefix=effective_prefix) + + return { + "uploaded": key, + "files": names, + "transcription": stt_result + } + + +# GET : query my voice histories +@app.get("/voices") +async def list_voices(skip: int = 0, limit: int = 50, folder: Optional[str] = None): + bucket = os.getenv("S3_BUCKET_NAME") + if not bucket: + raise HTTPException(status_code=500, detail="S3_BUCKET_NAME not configured") + base_prefix = VOICE_BASE_PREFIX.rstrip("/") + effective_prefix = f"{base_prefix}/{folder or DEFAULT_UPLOAD_FOLDER}".rstrip("/") + + keys = list_bucket_objects(bucket=bucket, prefix=effective_prefix) + # 페이징 비슷하게 slice만 적용 + sliced = keys[skip: skip + limit] + return {"items": sliced, "count": len(sliced), "next": skip + len(sliced)} + + +# GET : query specific voice & show result +@app.get("/voices/{voice_id}") +async def get_voice(voice_id: str): + # 내부 로직은 생략, 더미 상세 반환 + result = { + "voice_id": voice_id, + "filename": f"{voice_id}.wav", + "status": "processed", + "duration_sec": 12.34, + "analysis": {"pitch_mean": 220.5, "energy": 0.82} + } + return JSONResponse(content=result) + + +# POST : analyze emotion from S3 file +@app.post("/voices/{voice_key}/analyze-emotion") +async def analyze_emotion_from_s3(voice_key: str): + """S3에 저장된 음성 파일의 감정을 분석합니다.""" + bucket = os.getenv("S3_BUCKET_NAME") + if not bucket: + raise HTTPException(status_code=500, detail="S3_BUCKET_NAME not configured") + + try: + # S3에서 파일 다운로드 + from .s3_service import get_s3_client + s3_client = get_s3_client() + + response = s3_client.get_object(Bucket=bucket, Key=voice_key) + file_content = response['Body'].read() + + # BytesIO로 파일 객체 생성 + from io import BytesIO + file_obj = BytesIO(file_content) + + # 파일명 추출 (키에서 마지막 부분) + filename = voice_key.split('/')[-1] + + class FileWrapper: + def __init__(self, content, filename, content_type): + self.file = content + self.filename = filename + self.content_type = content_type + + emotion_file = FileWrapper(file_obj, filename, "audio/wav") + emotion_result = analyze_voice_emotion(emotion_file) + + return { + "voice_key": voice_key, + "emotion_analysis": emotion_result + } + + except Exception as e: + raise HTTPException(status_code=404, detail=f"파일을 찾을 수 없거나 분석 중 오류 발생: {str(e)}") + + +# POST : convert speech to text using Google STT +@app.post("/voices/transcribe") +async def transcribe_speech( + file: UploadFile = File(...), + language_code: str = "ko-KR" +): + """음성 파일을 텍스트로 변환합니다.""" + stt_result = transcribe_voice(file, language_code) + return stt_result + + +# POST : analyze text sentiment using Google NLP +@app.post("/nlp/sentiment") +async def analyze_sentiment( + text: str, + language_code: str = "ko" +): + """텍스트의 감정을 분석합니다.""" + sentiment_result = analyze_text_sentiment(text, language_code) + return sentiment_result + + +# POST : extract entities from text using Google NLP +@app.post("/nlp/entities") +async def extract_entities( + text: str, + language_code: str = "ko" +): + """텍스트에서 엔티티를 추출합니다.""" + entities_result = analyze_text_entities(text, language_code) + return entities_result + + +# POST : analyze text syntax using Google NLP +@app.post("/nlp/syntax") +async def analyze_syntax( + text: str, + language_code: str = "ko" +): + """텍스트의 구문을 분석합니다.""" + syntax_result = analyze_text_syntax(text, language_code) + return syntax_result + + +# POST : comprehensive text analysis using Google NLP +@app.post("/nlp/analyze") +async def analyze_text_comprehensive( + text: str, + language_code: str = "ko" +): + """텍스트의 감정, 엔티티, 구문을 종합 분석합니다.""" + sentiment_result = analyze_text_sentiment(text, language_code) + entities_result = analyze_text_entities(text, language_code) + syntax_result = analyze_text_syntax(text, language_code) + + return { + "text": text, + "language_code": language_code, + "sentiment_analysis": sentiment_result, + "entity_analysis": entities_result, + "syntax_analysis": syntax_result + } diff --git a/app/models.py b/app/models.py new file mode 100644 index 0000000..ce340b9 --- /dev/null +++ b/app/models.py @@ -0,0 +1,140 @@ +from sqlalchemy import Column, BigInteger, String, Date, DateTime, Integer, SmallInteger, Text, ForeignKey, CheckConstraint, UniqueConstraint, Index +from sqlalchemy.dialects.mysql import VARCHAR +from sqlalchemy.sql import func +from sqlalchemy.orm import relationship +from .database import Base + + +class User(Base): + """사용자 테이블""" + __tablename__ = "user" + + user_id = Column(BigInteger, primary_key=True, autoincrement=True) + user_code = Column(String(20), nullable=False, unique=True) # 자동 생성되는 사용자 코드 + username = Column(String(64), nullable=False, unique=True) + password = Column(String(72), nullable=False) # bcrypt 해시 + role = Column(String(20), nullable=False) + name = Column(String(50), nullable=False) + birthdate = Column(Date, nullable=False) + connecting_user_code = Column(String(20), nullable=True) # CARE 역할일 때 연결할 사용자 코드 + created_at = Column(DateTime, nullable=False, server_default=func.current_timestamp()) + updated_at = Column(DateTime, nullable=False, server_default=func.current_timestamp(), onupdate=func.current_timestamp()) + + # 관계 설정 + voices = relationship("Voice", back_populates="user", cascade="all, delete-orphan") + + # 제약 조건 + __table_args__ = ( + CheckConstraint("role IN ('USER','CARE')", name='check_user_role'), + ) + + +class Voice(Base): + """음성 파일 메타데이터 테이블""" + __tablename__ = "voice" + + voice_id = Column(BigInteger, primary_key=True, autoincrement=True) + voice_key = Column(String(1024), nullable=False) # S3 key + voice_name = Column(String(255), nullable=False) # 제목 + duration_ms = Column(Integer, nullable=False) # 길이(ms) + sample_rate = Column(Integer, nullable=True) # Hz + bit_rate = Column(Integer, nullable=True) # bps + created_at = Column(DateTime, nullable=False, server_default=func.current_timestamp()) + user_id = Column(BigInteger, ForeignKey("user.user_id", ondelete="CASCADE"), nullable=False) + + # 관계 설정 + user = relationship("User", back_populates="voices") + voice_content = relationship("VoiceContent", back_populates="voice", uselist=False, cascade="all, delete-orphan") + voice_analyze = relationship("VoiceAnalyze", back_populates="voice", uselist=False, cascade="all, delete-orphan") + questions = relationship("Question", secondary="voice_question", back_populates="voices") + + # 인덱스 + __table_args__ = ( + Index('idx_voice_user_created', 'user_id', 'created_at'), + # voice_key의 일부(255자)만 인덱싱하여 길이 제한 문제 해결 + Index('idx_voice_key', 'voice_key', mysql_length=255), + ) + + +class VoiceContent(Base): + """음성 전사 및 텍스트 감정 분석 테이블""" + __tablename__ = "voice_content" + + voice_content_id = Column(BigInteger, primary_key=True, autoincrement=True) + voice_id = Column(BigInteger, ForeignKey("voice.voice_id", ondelete="CASCADE"), nullable=False) + content = Column(Text, nullable=False) # 전사 내용 + score_bps = Column(SmallInteger, nullable=True) # -10000~10000 (감정 점수 * 10000) + magnitude_x1000 = Column(Integer, nullable=True) # 0~? (감정 강도 * 1000) + locale = Column(String(10), nullable=True) # 'ko-KR' 등 + provider = Column(String(32), nullable=True) # 'google', 'aws' 등 + model_version = Column(String(32), nullable=True) + confidence_bps = Column(SmallInteger, nullable=True) # 0~10000 (신뢰도 * 10000) + created_at = Column(DateTime, nullable=False, server_default=func.current_timestamp()) + + # 관계 설정 + voice = relationship("Voice", back_populates="voice_content") + + # 제약 조건 + __table_args__ = ( + UniqueConstraint('voice_id', name='uq_vc_voice'), # 1:1 관계 + ) + + +class VoiceAnalyze(Base): + """음성 감정 분석 테이블""" + __tablename__ = "voice_analyze" + + voice_analyze_id = Column(BigInteger, primary_key=True, autoincrement=True) + voice_id = Column(BigInteger, ForeignKey("voice.voice_id", ondelete="CASCADE"), nullable=False) + happy_bps = Column(SmallInteger, nullable=False) # 0~10000 + sad_bps = Column(SmallInteger, nullable=False) # 0~10000 + neutral_bps = Column(SmallInteger, nullable=False) # 0~10000 + angry_bps = Column(SmallInteger, nullable=False) # 0~10000 + fear_bps = Column(SmallInteger, nullable=False) # 0~10000 + top_emotion = Column(String(16), nullable=True) # 'neutral' 등 + top_confidence_bps = Column(SmallInteger, nullable=True) # 0~10000 + model_version = Column(String(32), nullable=True) + analyzed_at = Column(DateTime, nullable=False, server_default=func.current_timestamp()) + + # 관계 설정 + voice = relationship("Voice", back_populates="voice_analyze") + + # 제약 조건 + __table_args__ = ( + UniqueConstraint('voice_id', name='uq_va_voice'), + CheckConstraint("happy_bps <= 10000 AND sad_bps <= 10000 AND neutral_bps <= 10000 AND angry_bps <= 10000 AND fear_bps <= 10000", name='check_emotion_bps_range'), + CheckConstraint("happy_bps + sad_bps + neutral_bps + angry_bps + fear_bps = 10000", name='check_emotion_bps_sum'), + ) + + +class Question(Base): + """질문 템플릿 테이블""" + __tablename__ = "question" + + question_id = Column(BigInteger, primary_key=True, autoincrement=True) + question_category = Column(String(50), nullable=False) # emotion, stress, physical, social, self_reflection + content = Column(Text, nullable=False) + created_at = Column(DateTime, nullable=False, server_default=func.current_timestamp()) + + # 다대다 관계 + voices = relationship("Voice", secondary="voice_question", back_populates="questions") + + # 제약 조건 + __table_args__ = ( + CheckConstraint("question_category IN ('emotion', 'stress', 'physical', 'social', 'self_reflection')", name='check_question_category'), + ) + + +class VoiceQuestion(Base): + """Voice와 Question의 다대다 매핑 테이블""" + __tablename__ = "voice_question" + + voice_question_id = Column(BigInteger, primary_key=True, autoincrement=True) + voice_id = Column(BigInteger, ForeignKey("voice.voice_id", ondelete="CASCADE"), nullable=False) + question_id = Column(BigInteger, ForeignKey("question.question_id", ondelete="CASCADE"), nullable=False) + created_at = Column(DateTime, nullable=False, server_default=func.current_timestamp()) + + # 제약 조건 + __table_args__ = ( + UniqueConstraint('voice_id', 'question_id', name='uq_voice_question'), + ) diff --git a/app/nlp_service.py b/app/nlp_service.py new file mode 100644 index 0000000..2845478 --- /dev/null +++ b/app/nlp_service.py @@ -0,0 +1,214 @@ +import os +from typing import Dict, Any, List +from google.cloud import language_v1 +from google.oauth2 import service_account + + +class GoogleNLPService: + def __init__(self): + self.client = None + self._initialize_client() + + def _initialize_client(self): + """Google Cloud Natural Language API 클라이언트 초기화""" + try: + # 환경변수에서 서비스 계정 키 파일 경로 가져오기 + credentials_path = os.getenv("GOOGLE_APPLICATION_CREDENTIALS") + + if credentials_path and os.path.exists(credentials_path): + # 서비스 계정 키 파일로 인증 + credentials = service_account.Credentials.from_service_account_file( + credentials_path, + scopes=["https://www.googleapis.com/auth/cloud-platform"] + ) + self.client = language_v1.LanguageServiceClient(credentials=credentials) + else: + # 기본 인증 (환경변수 GOOGLE_APPLICATION_CREDENTIALS 설정됨) + self.client = language_v1.LanguageServiceClient() + + except Exception as e: + print(f"Google NLP 클라이언트 초기화 실패: {e}") + self.client = None + + def analyze_sentiment(self, text: str, language_code: str = "ko") -> Dict[str, Any]: + """ + 텍스트의 감정을 분석합니다. + + Args: + text: 분석할 텍스트 + language_code: 언어 코드 (기본값: ko) + + Returns: + Dict: 감정 분석 결과 + """ + if not self.client: + return { + "error": "Google NLP 클라이언트가 초기화되지 않았습니다", + "sentiment": {"score": 0.0, "magnitude": 0.0}, + "sentences": [] + } + + try: + # 문서 객체 생성 + document = language_v1.Document( + content=text, + type_=language_v1.Document.Type.PLAIN_TEXT, + language=language_code + ) + + # 감정 분석 실행 + response = self.client.analyze_sentiment( + request={'document': document} + ) + + # 전체 문서 감정 점수 + document_sentiment = response.document_sentiment + + # 문장별 감정 분석 + sentences = [] + for sentence in response.sentences: + sentences.append({ + "text": sentence.text.content, + "sentiment_score": sentence.sentiment.score, + "sentiment_magnitude": sentence.sentiment.magnitude + }) + + return { + "sentiment": { + "score": document_sentiment.score, + "magnitude": document_sentiment.magnitude + }, + "sentences": sentences, + "language_code": language_code + } + + except Exception as e: + return { + "error": f"NLP 분석 중 오류 발생: {str(e)}", + "sentiment": {"score": 0.0, "magnitude": 0.0}, + "sentences": [] + } + + def analyze_entities(self, text: str, language_code: str = "ko") -> Dict[str, Any]: + """ + 텍스트에서 엔티티를 추출합니다. + + Args: + text: 분석할 텍스트 + language_code: 언어 코드 (기본값: ko) + + Returns: + Dict: 엔티티 분석 결과 + """ + if not self.client: + return { + "error": "Google NLP 클라이언트가 초기화되지 않았습니다", + "entities": [] + } + + try: + # 문서 객체 생성 + document = language_v1.Document( + content=text, + type_=language_v1.Document.Type.PLAIN_TEXT, + language=language_code + ) + + # 엔티티 분석 실행 + response = self.client.analyze_entities( + request={'document': document} + ) + + # 엔티티 정보 추출 + entities = [] + for entity in response.entities: + entities.append({ + "name": entity.name, + "type": entity.type_.name, + "salience": entity.salience, + "mentions": [mention.text.content for mention in entity.mentions] + }) + + return { + "entities": entities, + "language_code": language_code + } + + except Exception as e: + return { + "error": f"엔티티 분석 중 오류 발생: {str(e)}", + "entities": [] + } + + def analyze_syntax(self, text: str, language_code: str = "ko") -> Dict[str, Any]: + """ + 텍스트의 구문을 분석합니다. + + Args: + text: 분석할 텍스트 + language_code: 언어 코드 (기본값: ko) + + Returns: + Dict: 구문 분석 결과 + """ + if not self.client: + return { + "error": "Google NLP 클라이언트가 초기화되지 않았습니다", + "tokens": [] + } + + try: + # 문서 객체 생성 + document = language_v1.Document( + content=text, + type_=language_v1.Document.Type.PLAIN_TEXT, + language=language_code + ) + + # 구문 분석 실행 + response = self.client.analyze_syntax( + request={'document': document} + ) + + # 토큰 정보 추출 + tokens = [] + for token in response.tokens: + tokens.append({ + "text": token.text.content, + "part_of_speech": token.part_of_speech.tag.name, + "lemma": token.lemma, + "dependency_edge": { + "head_token_index": token.dependency_edge.head_token_index, + "label": token.dependency_edge.label.name + } + }) + + return { + "tokens": tokens, + "language_code": language_code + } + + except Exception as e: + return { + "error": f"구문 분석 중 오류 발생: {str(e)}", + "tokens": [] + } + + +# 전역 인스턴스 +nlp_service = GoogleNLPService() + + +def analyze_text_sentiment(text: str, language_code: str = "ko") -> Dict[str, Any]: + """텍스트 감정 분석 함수""" + return nlp_service.analyze_sentiment(text, language_code) + + +def analyze_text_entities(text: str, language_code: str = "ko") -> Dict[str, Any]: + """텍스트 엔티티 분석 함수""" + return nlp_service.analyze_entities(text, language_code) + + +def analyze_text_syntax(text: str, language_code: str = "ko") -> Dict[str, Any]: + """텍스트 구문 분석 함수""" + return nlp_service.analyze_syntax(text, language_code) diff --git a/app/s3_service.py b/app/s3_service.py new file mode 100644 index 0000000..4fdbc14 --- /dev/null +++ b/app/s3_service.py @@ -0,0 +1,34 @@ +import os +from typing import List + +import boto3 # type: ignore +from botocore.client import Config # type: ignore + + +def get_s3_client(): + region = os.getenv("AWS_REGION", "ap-northeast-2") + return boto3.client( + "s3", + region_name=region, + aws_access_key_id=os.getenv("AWS_ACCESS_KEY_ID"), + aws_secret_access_key=os.getenv("AWS_SECRET_ACCESS_KEY"), + config=Config(signature_version="s3v4"), + ) + + +def upload_fileobj(bucket: str, key: str, fileobj) -> str: + s3 = get_s3_client() + s3.upload_fileobj(fileobj, bucket, key) + return key + + +def list_bucket_objects(bucket: str, prefix: str = "") -> List[str]: + s3 = get_s3_client() + paginator = s3.get_paginator("list_objects_v2") + keys: List[str] = [] + for page in paginator.paginate(Bucket=bucket, Prefix=prefix): + for obj in page.get("Contents", []) or []: + keys.append(obj["Key"]) + return keys + + diff --git a/app/stt_service.py b/app/stt_service.py new file mode 100644 index 0000000..ef2a394 --- /dev/null +++ b/app/stt_service.py @@ -0,0 +1,124 @@ +import io +import tempfile +import os +from typing import Dict, Any, Optional +from google.cloud import speech +from google.oauth2 import service_account +import librosa +import numpy as np + + +class GoogleSTTService: + def __init__(self): + self.client = None + self._initialize_client() + + def _initialize_client(self): + """Google Cloud Speech-to-Text 클라이언트 초기화""" + try: + # 환경변수에서 서비스 계정 키 파일 경로 가져오기 + credentials_path = os.getenv("GOOGLE_APPLICATION_CREDENTIALS") + + if credentials_path and os.path.exists(credentials_path): + # 서비스 계정 키 파일로 인증 + credentials = service_account.Credentials.from_service_account_file( + credentials_path, + scopes=["https://www.googleapis.com/auth/cloud-platform"] + ) + self.client = speech.SpeechClient(credentials=credentials) + else: + # 기본 인증 (환경변수 GOOGLE_APPLICATION_CREDENTIALS 설정됨) + self.client = speech.SpeechClient() + + except Exception as e: + print(f"Google STT 클라이언트 초기화 실패: {e}") + self.client = None + + def transcribe_audio(self, audio_file, language_code: str = "ko-KR") -> Dict[str, Any]: + """ + 음성 파일을 텍스트로 변환합니다. + + Args: + audio_file: 업로드된 음성 파일 (FastAPI UploadFile) + language_code: 언어 코드 (기본값: ko-KR) + + Returns: + Dict: STT 결과 + """ + if not self.client: + return { + "error": "Google STT 클라이언트가 초기화되지 않았습니다", + "transcript": "", + "confidence": 0.0 + } + + try: + # 임시 파일로 저장 + with tempfile.NamedTemporaryFile(suffix=".wav", delete=False) as tmp_file: + content = audio_file.file.read() + audio_file.file.seek(0) + tmp_file.write(content) + tmp_file_path = tmp_file.name + + # 오디오 파일 로드 및 전처리 + audio_data, sample_rate = librosa.load(tmp_file_path, sr=16000) + + # 오디오 데이터를 bytes로 변환 + audio_data = np.clip(audio_data, -1.0, 1.0) + audio_bytes = (audio_data * 32767).astype('int16').tobytes() + + # Google Cloud Speech-to-Text 요청 구성 + audio = speech.RecognitionAudio(content=audio_bytes) + config = speech.RecognitionConfig( + encoding=speech.RecognitionConfig.AudioEncoding.LINEAR16, + sample_rate_hertz=sample_rate, + language_code=language_code, + enable_automatic_punctuation=True, + enable_word_time_offsets=True, + model="latest_long", # 최신 장시간 모델 사용 + ) + + # STT 요청 실행 + response = self.client.recognize(config=config, audio=audio) + + # 결과 처리 + if response.results: + result = response.results[0] + transcript = result.alternatives[0].transcript + confidence = result.alternatives[0].confidence + + return { + "transcript": transcript, + "confidence": confidence, + "language_code": language_code, + "audio_duration": len(audio_data) / sample_rate, + "sample_rate": sample_rate + } + else: + return { + "error": "음성을 인식할 수 없습니다", + "transcript": "", + "confidence": 0.0 + } + + except Exception as e: + return { + "error": f"STT 처리 중 오류 발생: {str(e)}", + "transcript": "", + "confidence": 0.0 + } + finally: + # 임시 파일 정리 + try: + os.unlink(tmp_file_path) + except OSError as e: + print(f"임시 파일 삭제 실패: {tmp_file_path}, 오류: {e}") + + +# 전역 인스턴스 +stt_service = GoogleSTTService() + + +def transcribe_voice(audio_file, language_code: str = "ko-KR") -> Dict[str, Any]: + """음성을 텍스트로 변환하는 함수""" + return stt_service.transcribe_audio(audio_file, language_code) diff --git a/app/voice_service.py b/app/voice_service.py new file mode 100644 index 0000000..7fca548 --- /dev/null +++ b/app/voice_service.py @@ -0,0 +1,291 @@ +import os +from typing import Optional, Dict, Any +from sqlalchemy.orm import Session +from fastapi import UploadFile, HTTPException +from io import BytesIO +import asyncio +from .s3_service import upload_fileobj +from .stt_service import transcribe_voice +from .nlp_service import analyze_text_sentiment +from .constants import VOICE_BASE_PREFIX, DEFAULT_UPLOAD_FOLDER +from .db_service import get_db_service +from .auth_service import get_auth_service + + +class VoiceService: + """음성 관련 서비스""" + + def __init__(self, db: Session): + self.db = db + self.db_service = get_db_service(db) + self.auth_service = get_auth_service(db) + + async def upload_user_voice(self, file: UploadFile, username: str) -> Dict[str, Any]: + """ + 사용자 음성 파일 업로드 (S3 + DB 저장) + + Args: + file: 업로드된 음성 파일 + username: 사용자 아이디 + + Returns: + dict: 업로드 결과 + """ + try: + # 1. 사용자 조회 + user = self.auth_service.get_user_by_username(username) + if not user: + return { + "success": False, + "message": "User not found" + } + + # 2. 파일 확장자 검증 + if not (file.filename.endswith('.wav') or file.filename.endswith('.m4a')): + return { + "success": False, + "message": "Only .wav and .m4a files are allowed" + } + + # 3. S3 업로드 + bucket = os.getenv("S3_BUCKET_NAME") + if not bucket: + return { + "success": False, + "message": "S3_BUCKET_NAME not configured" + } + + file_content = await file.read() + base_prefix = VOICE_BASE_PREFIX.rstrip("/") + effective_prefix = f"{base_prefix}/{DEFAULT_UPLOAD_FOLDER}".rstrip("/") + key = f"{effective_prefix}/{file.filename}" + + file_obj_for_s3 = BytesIO(file_content) + upload_fileobj(bucket=bucket, key=key, fileobj=file_obj_for_s3) + + # 4. 데이터베이스 저장 (기본 정보만) + # 파일 크기로 대략적인 duration 추정 + file_size_mb = len(file_content) / (1024 * 1024) + estimated_duration_ms = int(file_size_mb * 1000) # 대략적인 추정 + + # Voice 저장 (STT 없이 기본 정보만) + voice = self.db_service.create_voice( + voice_key=key, + voice_name=file.filename, + duration_ms=estimated_duration_ms, + user_id=user.user_id, + sample_rate=16000 # 기본값 + ) + + # 5. STT → NLP 순차 처리 (백그라운드 비동기) + asyncio.create_task(self._process_stt_and_nlp_background(file_content, file.filename, voice.voice_id)) + + return { + "success": True, + "message": "음성 파일이 성공적으로 업로드되었습니다.", + "voice_id": voice.voice_id + } + except Exception as e: + return { + "success": False, + "message": f"업로드 실패: {str(e)}" + } + + async def _process_stt_and_nlp_background(self, file_content: bytes, filename: str, voice_id: int): + """STT → NLP 순차 처리 (백그라운드 비동기)""" + try: + # 1. STT 처리 + file_obj_for_stt = BytesIO(file_content) + + class TempUploadFile: + def __init__(self, content, filename): + self.file = content + self.filename = filename + self.content_type = "audio/m4a" if filename.endswith('.m4a') else "audio/wav" + + stt_file = TempUploadFile(file_obj_for_stt, filename) + stt_result = transcribe_voice(stt_file, "ko-KR") + + if not stt_result.get("transcript"): + print(f"STT 변환 실패: voice_id={voice_id}") + return + + transcript = stt_result["transcript"] + confidence = stt_result.get("confidence", 0) + + # 2. NLP 감정 분석 (STT 결과로) + nlp_result = analyze_text_sentiment(transcript, "ko") + + # 3. VoiceContent 저장 (STT 결과 + NLP 감정 분석 결과) + score_bps = None + magnitude_x1000 = None + + if "sentiment" in nlp_result and nlp_result["sentiment"]: + sentiment = nlp_result["sentiment"] + score_bps = int(sentiment.get("score", 0) * 10000) # -10000~10000 + magnitude = sentiment.get("magnitude", 0) + magnitude_x1000 = int(magnitude * 1000) # 0~? + + self.db_service.create_voice_content( + voice_id=voice_id, + content=transcript, + score_bps=score_bps, + magnitude_x1000=magnitude_x1000, + locale="ko-KR", + provider="google", + confidence_bps=int(confidence * 10000) + ) + + print(f"STT → NLP 처리 완료: voice_id={voice_id}") + + except Exception as e: + print(f"STT → NLP 처리 중 오류 발생: {e}") + + def get_user_voice_list(self, username: str) -> Dict[str, Any]: + """ + 사용자 음성 리스트 조회 + + Args: + username: 사용자 아이디 + + Returns: + dict: 음성 리스트 + """ + try: + # 1. 사용자 조회 + user = self.auth_service.get_user_by_username(username) + if not user: + return { + "success": False, + "voices": [] + } + + # 2. 사용자의 음성 목록 조회 + voices = self.db_service.get_voices_by_user(user.user_id) + + voice_list = [] + for voice in voices: + # 생성 날짜 + created_at = voice.created_at.isoformat() if voice.created_at else "" + + # 감정 (voice_analyze에서 top_emotion 가져오기) + emotion = None + if voice.voice_analyze: + emotion = voice.voice_analyze.top_emotion + + # 질문 제목 (voice_question -> question.content) + question_title = None + # voice는 이미 relationship으로 questions를 가지고 있음 + if voice.questions: + question_title = voice.questions[0].content + + # 음성 내용 + content = "아직 기록이 완성되지 않았습니다" + if voice.voice_content and voice.voice_content.content: + content = voice.voice_content.content + + voice_list.append({ + "created_at": created_at, + "emotion": emotion, + "question_title": question_title, + "content": content + }) + + return { + "success": True, + "voices": voice_list + } + + except Exception as e: + return { + "success": False, + "voices": [] + } + + async def upload_voice_with_question(self, file: UploadFile, username: str, question_id: int) -> Dict[str, Any]: + """ + 질문과 함께 음성 파일 업로드 (S3 + DB 저장 + STT + voice_question 매핑) + + Args: + file: 업로드된 음성 파일 + username: 사용자 아이디 + question_id: 질문 ID + + Returns: + dict: 업로드 결과 + """ + try: + # 1. 사용자 조회 + user = self.auth_service.get_user_by_username(username) + if not user: + return { + "success": False, + "message": "User not found" + } + + # 2. 질문 조회 + question = self.db_service.get_question_by_id(question_id) + if not question: + return { + "success": False, + "message": "Question not found" + } + + # 3. 파일 확장자 검증 + if not (file.filename.endswith('.wav') or file.filename.endswith('.m4a')): + return { + "success": False, + "message": "Only .wav and .m4a files are allowed" + } + + # 4. S3 업로드 + bucket = os.getenv("S3_BUCKET_NAME") + if not bucket: + return { + "success": False, + "message": "S3_BUCKET_NAME not configured" + } + + file_content = await file.read() + base_prefix = VOICE_BASE_PREFIX.rstrip("/") + effective_prefix = f"{base_prefix}/{DEFAULT_UPLOAD_FOLDER}".rstrip("/") + key = f"{effective_prefix}/{file.filename}" + + file_obj_for_s3 = BytesIO(file_content) + upload_fileobj(bucket=bucket, key=key, fileobj=file_obj_for_s3) + + # 5. 데이터베이스 저장 (기본 정보만) + file_size_mb = len(file_content) / (1024 * 1024) + estimated_duration_ms = int(file_size_mb * 1000) + + voice = self.db_service.create_voice( + voice_key=key, + voice_name=file.filename, + duration_ms=estimated_duration_ms, + user_id=user.user_id, + sample_rate=16000 + ) + + # 6. STT + NLP 순차 처리 (백그라운드 비동기) + asyncio.create_task(self._process_stt_and_nlp_background(file_content, file.filename, voice.voice_id)) + + # 7. Voice-Question 매핑 저장 + self.db_service.link_voice_question(voice.voice_id, question_id) + + return { + "success": True, + "message": "음성 파일과 질문이 성공적으로 업로드되었습니다.", + "voice_id": voice.voice_id, + "question_id": question_id + } + + except Exception as e: + return { + "success": False, + "message": f"업로드 실패: {str(e)}" + } + + +def get_voice_service(db: Session) -> VoiceService: + """음성 서비스 인스턴스 생성""" + return VoiceService(db) diff --git a/create_migration.py b/create_migration.py new file mode 100644 index 0000000..5442200 --- /dev/null +++ b/create_migration.py @@ -0,0 +1,15 @@ +#!/usr/bin/env python3 +""" +Alembic 마이그레이션 파일 자동 생성 스크립트 +""" + +from alembic import command +from alembic.config import Config + +# Alembic 설정 로드 +alembic_cfg = Config("alembic.ini") + +# 모델 변경사항을 자동으로 감지하여 마이그레이션 파일 생성 +command.revision(alembic_cfg, autogenerate=True, message="add all tables") + +print("✅ 마이그레이션 파일이 생성되었습니다!") diff --git a/deploy.sh b/deploy.sh new file mode 100644 index 0000000..6c0c174 --- /dev/null +++ b/deploy.sh @@ -0,0 +1,121 @@ +#!/bin/bash + +# Caring Voice API 서버 배포 스크립트 +# Ubuntu 환경 EC2에서 실행 + +set -e + +echo "🚀 Caring Voice API 서버 배포 시작..." + +# 1. 시스템 업데이트 +echo "📦 시스템 패키지 업데이트..." +sudo apt-get update +sudo apt-get upgrade -y + +# 2. Python 3.11 설치 +echo "🐍 Python 3.11 설치..." +sudo apt-get install -y software-properties-common +sudo add-apt-repository -y ppa:deadsnakes/ppa +sudo apt-get update +sudo apt-get install -y python3.11 python3.11-venv python3.11-dev python3-pip + +# 3. 시스템 의존성 설치 (오디오 처리) +echo "📚 시스템 의존성 설치..." +sudo apt-get install -y \ + ffmpeg \ + libsndfile1 \ + libffi-dev \ + libssl-dev \ + build-essential \ + pkg-config \ + gcc \ + g++ \ + git \ + curl + +# 4. 프로젝트 디렉토리 설정 +echo "📁 프로젝트 디렉토리 설정..." +PROJECT_DIR="/home/ubuntu/caring-voice" +mkdir -p $PROJECT_DIR +cd $PROJECT_DIR + +# 5. 가상 환경 생성 +echo "🔧 가상 환경 생성..." +python3.11 -m venv venv +source venv/bin/activate + +# 6. pip 업그레이드 +echo "⬆️ pip 업그레이드..." +pip install --upgrade pip setuptools wheel + +# 7. 프로젝트 파일 복사 (git clone 또는 scp 사용) +# 이 부분은 수동으로 또는 별도 스크립트로 처리 +echo "📥 프로젝트 파일 복사 확인..." +# git clone . +# 또는 scp로 파일 복사 + +# 8. 의존성 설치 +echo "📦 Python 의존성 설치..." +pip install -r requirements.txt + +# 9. 환경 변수 설정 +echo "⚙️ 환경 변수 설정..." +if [ ! -f .env ]; then + echo ".env 파일이 없습니다. 수동으로 생성하세요." + echo "DB_HOST=your-rds-endpoint" + echo "DB_PORT=3306" + echo "DB_USER=admin" + echo "DB_PASSWORD=your-password" + echo "DB_NAME=caring_voice" + echo "AWS_ACCESS_KEY_ID=your-key" + echo "AWS_SECRET_ACCESS_KEY=your-secret" + echo "AWS_REGION=ap-northeast-2" + echo "S3_BUCKET_NAME=your-bucket" + exit 1 +fi + +# 10. 데이터베이스 마이그레이션 +echo "🔄 데이터베이스 마이그레이션..." +python -m alembic upgrade head + +# 11. systemd 서비스 설정 +echo "🔧 systemd 서비스 설정..." +sudo tee /etc/systemd/system/caring-voice.service > /dev/null < 0: + print(f"⚠️ 이미 {existing_count}개의 질문이 존재합니다. 건너뜁니다.") + return + + # 카테고리별로 질문 생성 + total_count = 0 + for category, questions in questions_data.items(): + for question_text in questions: + question = Question( + question_category=category, + content=question_text + ) + db.add(question) + total_count += 1 + + db.commit() + print(f"✅ {total_count}개의 질문이 성공적으로 추가되었습니다!") + + # 카테고리별 통계 출력 + for category in questions_data.keys(): + count = db.query(Question).filter(Question.question_category == category).count() + print(f" - {category}: {count}개") + + except Exception as e: + db.rollback() + print(f"❌ 질문 추가 실패: {e}") + raise + finally: + db.close() + + +if __name__ == "__main__": + print("📝 질문 데이터 초기화 시작...") + init_questions() diff --git a/manage_db.py b/manage_db.py new file mode 100644 index 0000000..7e9de76 --- /dev/null +++ b/manage_db.py @@ -0,0 +1,66 @@ +#!/usr/bin/env python3 +""" +데이터베이스 초기화 및 관리 스크립트 +""" + +from app.database import create_tables, drop_tables, engine +from app.models import Base +import sys + + +def init_database(): + """데이터베이스 테이블 생성""" + print("데이터베이스 테이블을 생성합니다...") + try: + create_tables() + print("✅ 데이터베이스 테이블 생성 완료!") + except Exception as e: + print(f"❌ 데이터베이스 테이블 생성 실패: {e}") + sys.exit(1) + + +def reset_database(): + """데이터베이스 테이블 재생성 (기존 데이터 삭제)""" + print("⚠️ 기존 데이터를 모두 삭제하고 테이블을 재생성합니다...") + try: + drop_tables() + create_tables() + print("✅ 데이터베이스 재생성 완료!") + except Exception as e: + print(f"❌ 데이터베이스 재생성 실패: {e}") + sys.exit(1) + + +def show_tables(): + """생성된 테이블 목록 표시""" + from sqlalchemy import inspect + inspector = inspect(engine) + tables = inspector.get_table_names() + + print("📋 생성된 테이블 목록:") + for table in tables: + print(f" - {table}") + + +if __name__ == "__main__": + if len(sys.argv) > 1: + command = sys.argv[1] + + if command == "init": + init_database() + show_tables() + elif command == "reset": + reset_database() + show_tables() + elif command == "show": + show_tables() + else: + print("사용법: python manage_db.py [init|reset|show]") + print(" init - 테이블 생성") + print(" reset - 테이블 재생성 (데이터 삭제)") + print(" show - 테이블 목록 표시") + else: + print("사용법: python manage_db.py [init|reset|show]") + print(" init - 테이블 생성") + print(" reset - 테이블 재생성 (데이터 삭제)") + print(" show - 테이블 목록 표시") diff --git a/migrations/env.py b/migrations/env.py new file mode 100644 index 0000000..0670605 --- /dev/null +++ b/migrations/env.py @@ -0,0 +1,72 @@ +"""Alembic 환경 설정""" +from logging.config import fileConfig +from sqlalchemy import engine_from_config +from sqlalchemy import pool +from alembic import context +import os +from dotenv import load_dotenv + +load_dotenv() + +# Alembic Config 객체 가져오기 +config = context.config + +# 로그 설정 +if config.config_file_name is not None: + fileConfig(config.config_file_name) + +# SQLAlchemy 모델 가져오기 +from app.models import Base +target_metadata = Base.metadata + +# 데이터베이스 URL 설정 +DB_HOST = os.getenv("DB_HOST", "localhost") +DB_PORT = os.getenv("DB_PORT", "3306") +DB_USER = os.getenv("DB_USER", "root") +DB_PASSWORD = os.getenv("DB_PASSWORD", "springproject") # 기본값 설정 +DB_NAME = os.getenv("DB_NAME", "caring_voice") + +from urllib.parse import quote_plus +ENCODED_PASSWORD = quote_plus(DB_PASSWORD) if DB_PASSWORD else "" + +config.set_main_option( + "sqlalchemy.url", + f"mysql+pymysql://{DB_USER}:{ENCODED_PASSWORD}@{DB_HOST}:{DB_PORT}/{DB_NAME}" +) + + +def run_migrations_offline() -> None: + """오프라인 마이그레이션 모드""" + url = config.get_main_option("sqlalchemy.url") + context.configure( + url=url, + target_metadata=target_metadata, + literal_binds=True, + dialect_opts={"paramstyle": "named"}, + ) + + with context.begin_transaction(): + context.run_migrations() + + +def run_migrations_online() -> None: + """온라인 마이그레이션 모드""" + connectable = engine_from_config( + config.get_section(config.config_ini_section, {}), + prefix="sqlalchemy.", + poolclass=pool.NullPool, + ) + + with connectable.connect() as connection: + context.configure( + connection=connection, target_metadata=target_metadata + ) + + with context.begin_transaction(): + context.run_migrations() + + +if context.is_offline_mode(): + run_migrations_offline() +else: + run_migrations_online() diff --git a/migrations/versions/20240115000000_add_question_tables.py b/migrations/versions/20240115000000_add_question_tables.py new file mode 100644 index 0000000..f8d906b --- /dev/null +++ b/migrations/versions/20240115000000_add_question_tables.py @@ -0,0 +1,133 @@ +"""Rev: add question and voice_question tables + +Revision ID: add_question_tables +Revises: 20240101000000_add_user_code +Create Date: 2024-01-15 12:00:00.000000 + +""" +from alembic import op +import sqlalchemy as sa +from sqlalchemy.dialects import mysql + +# revision identifiers, used by Alembic. +revision = 'add_question_tables' +down_revision = None # 첫 번째 마이그레이션으로 설정 +branch_labels = None +depends_on = None + + +def upgrade() -> None: + # user, voice, voice_content, voice_analyze 테이블 생성 + op.create_table( + 'user', + sa.Column('user_id', sa.BigInteger(), nullable=False, autoincrement=True), + sa.Column('user_code', sa.String(length=20), nullable=False), + sa.Column('username', sa.String(length=64), nullable=False), + sa.Column('password', sa.String(length=72), nullable=False), + sa.Column('role', sa.String(length=20), nullable=False), + sa.Column('name', sa.String(length=50), nullable=False), + sa.Column('birthdate', sa.Date(), nullable=False), + sa.Column('connecting_user_code', sa.String(length=20), nullable=True), + sa.Column('created_at', sa.DateTime(), nullable=False, server_default=sa.text('CURRENT_TIMESTAMP')), + sa.Column('updated_at', sa.DateTime(), nullable=False, server_default=sa.text('CURRENT_TIMESTAMP ON UPDATE CURRENT_TIMESTAMP')), + sa.PrimaryKeyConstraint('user_id'), + sa.UniqueConstraint('user_code', name='unique_user_code'), + sa.UniqueConstraint('username', name='username'), + sa.CheckConstraint("role IN ('USER','CARE')", name='check_user_role') + ) + + op.create_table( + 'voice', + sa.Column('voice_id', sa.BigInteger(), nullable=False, autoincrement=True), + sa.Column('voice_key', sa.String(length=1024), nullable=False), + sa.Column('voice_name', sa.String(length=255), nullable=False), + sa.Column('duration_ms', sa.Integer(), nullable=False), + sa.Column('sample_rate', sa.Integer(), nullable=True), + sa.Column('bit_rate', sa.Integer(), nullable=True), + sa.Column('created_at', sa.DateTime(), nullable=False, server_default=sa.text('CURRENT_TIMESTAMP')), + sa.Column('user_id', sa.BigInteger(), nullable=False), + sa.PrimaryKeyConstraint('voice_id'), + sa.ForeignKeyConstraint(['user_id'], ['user.user_id'], ondelete='CASCADE'), + sa.Index('idx_voice_user_created', 'user_id', 'created_at') + ) + + op.create_table( + 'voice_content', + sa.Column('voice_content_id', sa.BigInteger(), nullable=False, autoincrement=True), + sa.Column('voice_id', sa.BigInteger(), nullable=False), + sa.Column('content', sa.Text(), nullable=False), + sa.Column('score_bps', sa.SmallInteger(), nullable=True), + sa.Column('magnitude_x1000', sa.Integer(), nullable=True), + sa.Column('locale', sa.String(length=10), nullable=True), + sa.Column('provider', sa.String(length=32), nullable=True), + sa.Column('model_version', sa.String(length=32), nullable=True), + sa.Column('confidence_bps', sa.SmallInteger(), nullable=True), + sa.Column('created_at', sa.DateTime(), nullable=False, server_default=sa.text('CURRENT_TIMESTAMP')), + sa.PrimaryKeyConstraint('voice_content_id'), + sa.UniqueConstraint('voice_id', name='uq_vc_voice'), + sa.ForeignKeyConstraint(['voice_id'], ['voice.voice_id'], ondelete='CASCADE') + ) + + op.create_table( + 'voice_analyze', + sa.Column('voice_analyze_id', sa.BigInteger(), nullable=False, autoincrement=True), + sa.Column('voice_id', sa.BigInteger(), nullable=False), + sa.Column('happy_bps', sa.SmallInteger(), nullable=False), + sa.Column('sad_bps', sa.SmallInteger(), nullable=False), + sa.Column('neutral_bps', sa.SmallInteger(), nullable=False), + sa.Column('angry_bps', sa.SmallInteger(), nullable=False), + sa.Column('fear_bps', sa.SmallInteger(), nullable=False), + sa.Column('top_emotion', sa.String(length=16), nullable=True), + sa.Column('top_confidence_bps', sa.SmallInteger(), nullable=True), + sa.Column('model_version', sa.String(length=32), nullable=True), + sa.Column('analyzed_at', sa.DateTime(), nullable=False, server_default=sa.text('CURRENT_TIMESTAMP')), + sa.PrimaryKeyConstraint('voice_analyze_id'), + sa.UniqueConstraint('voice_id', name='uq_va_voice'), + sa.ForeignKeyConstraint(['voice_id'], ['voice.voice_id'], ondelete='CASCADE'), + sa.CheckConstraint("happy_bps <= 10000 AND sad_bps <= 10000 AND neutral_bps <= 10000 AND angry_bps <= 10000 AND fear_bps <= 10000", name='check_emotion_bps_range'), + sa.CheckConstraint("happy_bps + sad_bps + neutral_bps + angry_bps + fear_bps = 10000", name='check_emotion_bps_sum') + ) + + # question 테이블 생성 + op.create_table( + 'question', + sa.Column('question_id', sa.BigInteger(), nullable=False, autoincrement=True), + sa.Column('question_category', sa.String(length=50), nullable=False), + sa.Column('content', sa.Text(), nullable=False), + sa.Column('created_at', sa.DateTime(), nullable=False, server_default=sa.text('CURRENT_TIMESTAMP')), + sa.PrimaryKeyConstraint('question_id'), + sa.CheckConstraint("question_category IN ('emotion', 'stress', 'physical', 'social', 'self_reflection')", name='check_question_category') + ) + + # voice_question 테이블 생성 + op.create_table( + 'voice_question', + sa.Column('voice_question_id', sa.BigInteger(), nullable=False, autoincrement=True), + sa.Column('voice_id', sa.BigInteger(), nullable=False), + sa.Column('question_id', sa.BigInteger(), nullable=False), + sa.Column('created_at', sa.DateTime(), nullable=False, server_default=sa.text('CURRENT_TIMESTAMP')), + sa.PrimaryKeyConstraint('voice_question_id'), + sa.UniqueConstraint('voice_id', 'question_id', name='uq_voice_question'), + sa.ForeignKeyConstraint(['voice_id'], ['voice.voice_id'], ondelete='CASCADE'), + sa.ForeignKeyConstraint(['question_id'], ['question.question_id'], ondelete='CASCADE') + ) + + +def downgrade() -> None: + # voice_question 테이블 삭제 + op.drop_table('voice_question') + + # question 테이블 삭제 + op.drop_table('question') + + # voice_analyze 테이블 삭제 + op.drop_table('voice_analyze') + + # voice_content 테이블 삭제 + op.drop_table('voice_content') + + # voice 테이블 삭제 + op.drop_table('voice') + + # user 테이블 삭제 + op.drop_table('user') diff --git a/requirements.txt b/requirements.txt index 21a99f5..5891f6f 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,7 +1,14 @@ -# 현재 프로젝트에 특별한 의존성이 없습니다. -# 필요에 따라 패키지를 추가하세요. - -# 예시: -# requests>=2.31.0 -# numpy>=1.24.0 -# pandas>=2.0.0 +fastapi>=0.115.0 +uvicorn[standard]>=0.30.0 +boto3>=1.34.0 +python-dotenv>=1.0.1 +transformers>=4.30.0 +torch>=2.0.0 +librosa>=0.10.0 +scipy>=1.10.0 +google-cloud-speech>=2.21.0 +google-auth>=2.23.0 +google-cloud-language>=2.8.0 +sqlalchemy>=2.0.0 +pymysql>=1.1.0 +cryptography>=41.0.0