From 0673340ca8f992e73449c0a3b3e96db2f2fbd3fe Mon Sep 17 00:00:00 2001
From: sunny <anna7789@naver.com>
Date: Mon, 14 Jul 2025 00:02:00 +0900
Subject: [PATCH 1/2] =?UTF-8?q?chore:=20RAG=EA=B4=80=EB=A0=A8=20=EC=9D=98?=
 =?UTF-8?q?=EC=A1=B4=EC=84=B1=20=EC=B6=94=EA=B0=80?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 requirements.txt | 8 +++++++-
 1 file changed, 7 insertions(+), 1 deletion(-)

diff --git a/requirements.txt b/requirements.txt
index c8da40e..7d48c41 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -8,4 +8,10 @@ safetensors #HuggingFace 모델이 저장되는 .safetensors 파일 포맷을 
 accelerate>=0.20.3 # HuggingFace 모델 가속 및 디바이스 관리
 huggingface-hub
 sentencepiece #모델 토크나이저
-bitsandbytes==0.42.0 # gpu용 4bit양자화
\ No newline at end of file
+bitsandbytes==0.42.0 # gpu용 4bit양자화
+
+# RAG 도입
+langchain
+sentence-transformers
+quadrant
+httpx
\ No newline at end of file

From f4ee55b2b1d012cc0136670238329a0fa3c922f6 Mon Sep 17 00:00:00 2001
From: sunny <anna7789@naver.com>
Date: Mon, 14 Jul 2025 00:04:32 +0900
Subject: [PATCH 2/2] =?UTF-8?q?feat:=20RAG=EA=B5=AC=EC=A1=B0=20=EB=8F=84?=
 =?UTF-8?q?=EC=9E=85?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 app/services/rag.py | 51 +++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 51 insertions(+)
 create mode 100644 app/services/rag.py

diff --git a/app/services/rag.py b/app/services/rag.py
new file mode 100644
index 0000000..93b1707
--- /dev/null
+++ b/app/services/rag.py
@@ -0,0 +1,51 @@
+# app/services/rag.py
+
+from langchain.embeddings import HuggingFaceEmbeddings
+from langchain.vectorstores import Quadrant
+from sentence_transformers import SentenceTransformer
+
+# 1. 임베딩 모델 준비
+embedding_model = HuggingFaceEmbeddings(
+    model_name="intfloat/multilingual-e5-small",
+    model_kwargs={"device": "cpu"}  # GPU 사용시 "cuda"
+)
+
+# 2. 쿼드런트DB 벡터스토어 연결 (환경변수로 API키 등 설정 필요)
+vectorstore = Quadrant(
+    embedding=embedding_model,
+    collection_name="dictionary",  # 사전 데이터가 저장된 컬렉션명
+    url="https://api.cloud.quadrant.io",  # 쿼드런트DB 엔드포인트
+    api_key="YOUR_QUADRANT_API_KEY"
+)
+
+def search_word_info(word: str, top_k: int = 1) -> str:
+    # 쿼드런트DB에서 임베딩 기반으로 단어 설명 검색
+    docs = vectorstore.similarity_search(word, k=top_k)
+    if docs:
+        return docs[0].page_content  # 가장 유사한 설명 반환
+    return "설명을 찾을 수 없습니다."
+
+async def process_news_with_rag(title, content, level):
+    # 1. 어려운 단어 추출 (기존 LLM 활용)
+    word_prompt = build_difficult_word_prompt(content, level)
+    difficult_words_raw = await generate_content(word_prompt)
+    difficult_words = []
+    for line in difficult_words_raw.splitlines():
+        if ":" in line:
+            term, _ = line.split(":", 1)
+            difficult_words.append(term.strip())
+
+    # 2. RAG로 단어 설명 검색 (레벨 하일 때만)
+    wordbook = []
+    if level == "하":
+        for word in difficult_words:
+            info = search_word_info(word)
+            wordbook.append({"term": word, "description": info})
+    else:
+        # 기존 방식(LLM 설명) 사용
+        for line in difficult_words_raw.splitlines():
+            if ":" in line:
+                term, description = line.split(":", 1)
+                wordbook.append({"term": term.strip(), "description": description.strip()})
+
+    return wordbook
\ No newline at end of file