Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
345 changes: 345 additions & 0 deletions apps/pre-processing-service/app/service/blog/blog_create_service.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,345 @@
import json
import logging
import os
from datetime import datetime
from typing import Dict, List, Optional, Any

from openai import OpenAI
from dotenv import load_dotenv

from app.model.schemas import RequestBlogCreate
from app.errors.BlogPostingException import *

# ν™˜κ²½λ³€μˆ˜ λ‘œλ“œ
load_dotenv(".env.dev")


class BlogContentService:
"""RAGλ₯Ό μ‚¬μš©ν•œ λΈ”λ‘œκ·Έ μ½˜ν…μΈ  생성 μ „μš© μ„œλΉ„μŠ€"""

def __init__(self):
# OpenAI API ν‚€ μ„€μ •
self.openai_api_key = os.getenv("OPENAI_API_KEY")
if not self.openai_api_key:
raise ValueError("OPENAI_API_KEYκ°€ .env.dev νŒŒμΌμ— μ„€μ •λ˜μ§€ μ•Šμ•˜μŠ΅λ‹ˆλ‹€.")

# μΈμŠ€ν„΄μŠ€ λ ˆλ²¨μ—μ„œ ν΄λΌμ΄μ–ΈνŠΈ 생성
self.client = OpenAI(api_key=self.openai_api_key)
logging.basicConfig(level=logging.INFO)
self.logger = logging.getLogger(__name__)

def generate_blog_content(self, request: RequestBlogCreate) -> Dict[str, Any]:
"""
μš”μ²­ 데이터λ₯Ό 기반으둜 λΈ”λ‘œκ·Έ μ½˜ν…μΈ  생성

Args:
request: RequestBlogCreate 객체

Returns:
Dict: {"title": str, "content": str, "tags": List[str]} ν˜•νƒœμ˜ κ²°κ³Ό
"""
try:
# 1. μ½˜ν…μΈ  정보 정리
content_context = self._prepare_content_context(request)

# 2. ν”„λ‘¬ν”„νŠΈ 생성
prompt = self._create_content_prompt(content_context, request)

# 3. GPTλ₯Ό ν†΅ν•œ μ½˜ν…μΈ  생성
generated_content = self._generate_with_openai(prompt)

# 4. μ½˜ν…μΈ  νŒŒμ‹± 및 ꡬ쑰화
return self._parse_generated_content(generated_content, request)

except Exception as e:
self.logger.error(f"μ½˜ν…μΈ  생성 μ‹€νŒ¨: {e}")
return self._create_fallback_content(request)

def _prepare_content_context(self, request: RequestBlogCreate) -> str:
"""μš”μ²­ 데이터λ₯Ό μ½˜ν…μΈ  μƒμ„±μš© μ»¨ν…μŠ€νŠΈλ‘œ λ³€ν™˜"""
context_parts = []

# ν‚€μ›Œλ“œ 정보 μΆ”κ°€
if request.keyword:
context_parts.append(f"μ£Όμš” ν‚€μ›Œλ“œ: {request.keyword}")

# μƒν’ˆ 정보 μΆ”κ°€
if request.product_info:
context_parts.append("\nμƒν’ˆ 정보:")

# μƒν’ˆ κΈ°λ³Έ 정보
if request.product_info.get("title"):
context_parts.append(f"- μƒν’ˆλͺ…: {request.product_info['title']}")

if request.product_info.get("price"):
context_parts.append(f"- 가격: {request.product_info['price']:,}원")

if request.product_info.get("rating"):
context_parts.append(f"- 평점: {request.product_info['rating']}/5.0")

# μƒν’ˆ 상세 정보
if request.product_info.get("description"):
context_parts.append(f"- μ„€λͺ…: {request.product_info['description']}")

# μƒν’ˆ 사양 (material_info λ“±)
if request.product_info.get("material_info"):
context_parts.append("- μ£Όμš” 사양:")
specs = request.product_info["material_info"]
if isinstance(specs, dict):
for key, value in specs.items():
context_parts.append(f" * {key}: {value}")

# μƒν’ˆ μ˜΅μ…˜
if request.product_info.get("options"):
options = request.product_info["options"]
context_parts.append(f"- ꡬ맀 μ˜΅μ…˜ ({len(options)}개):")
for i, option in enumerate(options[:5], 1): # μ΅œλŒ€ 5개만
if isinstance(option, dict):
option_name = option.get("name", f"μ˜΅μ…˜ {i}")
context_parts.append(f" {i}. {option_name}")
else:
context_parts.append(f" {i}. {option}")

# ꡬ맀 링크
if request.product_info.get("url") or request.product_info.get(
"product_url"
):
url = request.product_info.get("url") or request.product_info.get(
"product_url"
)
context_parts.append(f"- ꡬ맀 링크: {url}")

return "\n".join(context_parts) if context_parts else "ν‚€μ›Œλ“œ 기반 μ½˜ν…μΈ  생성"

def _create_content_prompt(self, context: str, request: RequestBlogCreate) -> str:
"""μ½˜ν…μΈ  μƒμ„±μš© ν”„λ‘¬ν”„νŠΈ 생성"""

# κΈ°λ³Έ ν‚€μ›Œλ“œκ°€ μ—†μœΌλ©΄ μƒν’ˆ 제λͺ©μ—μ„œ μΆ”μΆœ
main_keyword = request.keyword
if (
not main_keyword
and request.product_info
and request.product_info.get("title")
):
main_keyword = request.product_info["title"]

prompt = f"""
λ‹€μŒ 정보λ₯Ό λ°”νƒ•μœΌλ‘œ λ§€λ ₯적인 λΈ”λ‘œκ·Έ 포슀트λ₯Ό μž‘μ„±ν•΄μ£Όμ„Έμš”.

정보:
{context}

μž‘μ„± κ°€μ΄λ“œλΌμΈ:
- μŠ€νƒ€μΌ: μΉœκ·Όν•˜λ©΄μ„œλ„ μ‹ λ’°ν•  수 μžˆλŠ”, 정보 제곡 쀑심
- 길이: 1200자 λ‚΄μ™Έμ˜ μ λ‹Ήν•œ 길이
- 톀: λ…μžμ˜ 관심을 λ„λŠ” μžμ—°μŠ€λŸ¬μš΄ μ–΄μ‘°

μž‘μ„± μš”κ΅¬μ‚¬ν•­:
1. SEO μΉœν™”μ μ΄κ³  ν΄λ¦­ν•˜κ³  싢은 λ§€λ ₯적인 제λͺ©
2. λ…μžμ˜ 관심을 λ„λŠ” λ„μž…λΆ€
3. 핡심 νŠΉμ§•κ³Ό μž₯점을 ꡬ체적으둜 μ„€λͺ…
4. μ‹€μ œ μ‚¬μš© μ‹œλ‚˜λ¦¬μ˜€λ‚˜ ν™œμš© 팁
5. ꡬ맀 결정에 도움이 λ˜λŠ” 정보

⚠️ 주의:
- μ ˆλŒ€λ‘œ λ§ˆμ§€λ§‰μ— 'HTML κ΅¬μ‘°λŠ”β€¦' 같은 자기 평가 λ¬Έμž₯을 μΆ”κ°€ν•˜μ§€ λ§ˆμ„Έμš”.
- 좜λ ₯ μ‹œ ```λ‚˜ ```html 같은 μ½”λ“œ 블둝 ꡬ문을 ν¬ν•¨ν•˜μ§€ λ§ˆμ„Έμš”.
- 였직 HTML νƒœκ·Έλ§Œ μ‚¬μš©ν•˜μ—¬ κ΅¬μ‘°ν™”λœ μ½˜ν…μΈ λ₯Ό μž‘μ„±ν•΄μ£Όμ„Έμš”.
(예: <h2>, <h3>, <p>, <ul>, <li> λ“±)
"""

return prompt

def _generate_with_openai(self, prompt: str) -> str:
"""OpenAI APIλ₯Ό ν†΅ν•œ μ½˜ν…μΈ  생성"""
try:
response = self.client.chat.completions.create(
model="gpt-4o-mini",
messages=[
{
"role": "system",
"content": "당신은 전문적인 λΈ”λ‘œκ·Έ μ½˜ν…μΈ  μž‘μ„±μžμž…λ‹ˆλ‹€. μƒν’ˆ 리뷰와 정보성 μ½˜ν…μΈ λ₯Ό λ§€λ ₯적이고 SEO μΉœν™”μ μœΌλ‘œ μž‘μ„±ν•©λ‹ˆλ‹€.",
},
{"role": "user", "content": prompt},
],
temperature=0.7,
max_tokens=2000,
)

return response.choices[0].message.content

except Exception as e:
self.logger.error(f"OpenAI API 호좜 μ‹€νŒ¨: {e}")
raise

def _parse_generated_content(
self, content: str, request: RequestBlogCreate
) -> Dict[str, Any]:
"""μƒμ„±λœ μ½˜ν…μΈ λ₯Ό νŒŒμ‹±ν•˜μ—¬ ꡬ쑰화"""

# 제λͺ© μΆ”μΆœ (첫 번째 h1μ΄λ‚˜ κ°•μ‘°λœ 쀄)
lines = content.strip().split("\n")
title = "λΈ”λ‘œκ·Έ 포슀트" # κΈ°λ³Έκ°’

for line in lines[:10]: # 처음 10μ€„μ—μ„œ 제λͺ© μ°ΎκΈ°
clean_line = (
line.strip()
.replace("#", "")
.replace("<h1>", "")
.replace("</h1>", "")
.replace("<h2>", "")
.replace("</h2>", "")
)
if clean_line and len(clean_line) > 5 and len(clean_line) < 100:
title = clean_line
break

# ν‚€μ›Œλ“œκ°€ 있으면 제λͺ©μ— 없을 경우 κΈ°λ³Έ 제λͺ© 생성
if request.keyword and request.keyword not in title:
if request.product_info and request.product_info.get("title"):
title = (
f"{request.product_info['title']} - {request.keyword} μ™„λ²½ κ°€μ΄λ“œ"
)
else:
title = f"{request.keyword} - μ™„λ²½ κ°€μ΄λ“œ"

# νƒœκ·Έ 생성
tags = self._generate_tags(request)

return {"title": title, "content": content, "tags": tags}

def _generate_tags(self, request: RequestBlogCreate) -> List[str]:
"""μš”μ²­ 정보 기반 νƒœκ·Έ 생성"""
tags = []

# ν‚€μ›Œλ“œ μΆ”κ°€
if request.keyword:
tags.append(request.keyword)

# μƒν’ˆ μ •λ³΄μ—μ„œ νƒœκ·Έ μΆ”μΆœ
if request.product_info:
# μƒν’ˆλͺ…μ—μ„œ ν‚€μ›Œλ“œ μΆ”μΆœ
if request.product_info.get("title"):
title = request.product_info["title"].lower()

# 일반적인 μ œν’ˆ μΉ΄ν…Œκ³ λ¦¬ νƒœκ·Έ
if any(word in title for word in ["iphone", "아이폰", "phone"]):
tags.extend(["아이폰", "슀마트폰"])
if any(word in title for word in ["필름", "보호", "κ°•ν™”"]):
tags.extend(["λ³΄ν˜Έν•„λ¦„", "강화필름"])
if any(word in title for word in ["μΌ€μ΄μŠ€", "컀버"]):
tags.extend(["ν°μΌ€μ΄μŠ€", "μ•‘μ„Έμ„œλ¦¬"])
if any(word in title for word in ["λ…ΈνŠΈλΆ", "laptop"]):
tags.extend(["λ…ΈνŠΈλΆ", "컴퓨터"])
if any(word in title for word in ["마우슀", "ν‚€λ³΄λ“œ"]):
tags.extend(["μ»΄ν“¨ν„°μš©ν’ˆ", "PCμ•‘μ„Έμ„œλ¦¬"])

# 재료/사양 μ •λ³΄μ—μ„œ νƒœκ·Έ 생성
if request.product_info.get("material_info"):
material_info = request.product_info["material_info"]
if isinstance(material_info, dict):
for key, value in material_info.items():
if value and len(str(value).strip()) <= 20:
clean_value = str(value).strip()
if clean_value not in tags:
tags.append(clean_value)

# κΈ°λ³Έ νƒœκ·Έ μΆ”κ°€
if not tags:
tags = ["μƒν’ˆμ •λ³΄", "리뷰"]

# 쀑볡 제거 및 개수 μ œν•œ
unique_tags = []
for tag in tags:
if tag not in unique_tags and len(unique_tags) < 10:
unique_tags.append(tag)

return unique_tags

def _create_fallback_content(self, request: RequestBlogCreate) -> Dict[str, Any]:
"""μ½˜ν…μΈ  생성 μ‹€νŒ¨ μ‹œ λŒ€μ•ˆ μ½˜ν…μΈ  생성"""

if request.product_info and request.product_info.get("title"):
title = f"{request.product_info['title']} - μƒν’ˆ 정보 및 ꡬ맀 κ°€μ΄λ“œ"
product_name = request.product_info["title"]
elif request.keyword:
title = f"{request.keyword} - μ™„λ²½ κ°€μ΄λ“œ"
product_name = request.keyword
else:
title = "μƒν’ˆ 정보 및 ꡬ맀 κ°€μ΄λ“œ"
product_name = "μƒν’ˆ"

content = f"""
<h1>{title}</h1>

<h2>μƒν’ˆ μ†Œκ°œ</h2>
<p>{product_name}에 λŒ€ν•œ μƒμ„Έν•œ 정보λ₯Ό μ†Œκ°œν•©λ‹ˆλ‹€.</p>

<h2>μ£Όμš” νŠΉμ§•</h2>
<ul>
<li>κ³ ν’ˆμ§ˆμ˜ μ œν’ˆμœΌλ‘œ μ‹ λ’°ν•  수 μžˆλŠ” λΈŒλžœλ“œμž…λ‹ˆλ‹€</li>
<li>합리적인 κ°€κ²©μœΌλ‘œ κ°€μ„±λΉ„κ°€ λ›°μ–΄λ‚©λ‹ˆλ‹€</li>
<li>μ‚¬μš©μž μΉœν™”μ μΈ λ””μžμΈκ³Ό κΈ°λŠ₯을 μ œκ³΅ν•©λ‹ˆλ‹€</li>
</ul>
"""

if request.product_info:
if request.product_info.get("price"):
content += f"<h2>가격 정보</h2>\n<p>νŒλ§€κ°€: <strong>{request.product_info['price']:,}원</strong></p>\n"

if request.product_info.get("material_info"):
content += "<h2>μƒν’ˆ 사양</h2>\n<ul>\n"
for key, value in request.product_info["material_info"].items():
content += f"<li><strong>{key}:</strong> {value}</li>\n"
content += "</ul>\n"

content += """
<h2>ꡬ맀 μ•ˆλ‚΄</h2>
<p>μ‹ μ€‘ν•œ κ²€ν† λ₯Ό 톡해 만쑱슀러운 ꡬ맀 결정을 λ‚΄λ¦¬μ‹œκΈ° λ°”λžλ‹ˆλ‹€.</p>
"""

return {
"title": title,
"content": content,
"tags": self._generate_tags(request),
}


# if __name__ == '__main__':
# # ν…ŒμŠ€νŠΈμš© μš”μ²­ 데이터
# test_request = RequestBlogCreate(
# keyword="아이폰 μΌ€μ΄μŠ€",
# product_info={
# "title": "아이폰 15 ν”„λ‘œ 투λͺ… μΌ€μ΄μŠ€",
# "price": 29900,
# "rating": 4.8,
# "description": "9H κ°•ν™” 보호 κΈ°λŠ₯을 μ œκ³΅ν•˜λŠ” 투λͺ… μΌ€μ΄μŠ€",
# "material_info": {
# "μ†Œμž¬": "TPU + PC",
# "λ‘κ»˜": "1.2mm",
# "색상": "투λͺ…",
# "ν˜Έν™˜μ„±": "아이폰 15 Pro"
# },
# "options": [
# {"name": "투λͺ…"},
# {"name": "반투λͺ…"},
# {"name": "λΈ”λž™"}
# ],
# "url": "https://example.com/iphone-case"
# }
# )
#
# # μ„œλΉ„μŠ€ μ‹€ν–‰
# service = BlogContentService()
# print("=== λΈ”λ‘œκ·Έ μ½˜ν…μΈ  생성 ν…ŒμŠ€νŠΈ ===")
# print(f"ν‚€μ›Œλ“œ: {test_request.keyword}")
# print(f"μƒν’ˆ: {test_request.product_info['title']}")
# print("\n--- 생성 μ‹œμž‘ ---")
#
# result = service.generate_blog_content(test_request)
#
# print(f"\n=== 생성 κ²°κ³Ό ===")
# print(f"제λͺ©: {result['title']}")
# print(f"\nνƒœκ·Έ: {', '.join(result['tags'])}")
# print(f"\nλ‚΄μš©:\n{result['content']}")
# print(f"\nκΈ€μžμˆ˜: {len(result['content'])}자")
Loading
Loading