Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Binary file removed __pycache__/main.cpython-310.pyc
Binary file not shown.
Empty file added app/__init__.py
Empty file.
12 changes: 12 additions & 0 deletions app/config.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,12 @@
from dotenv import load_dotenv
import os

INDEX_PATH = "data/faiss_index"
UPLOAD_DIR = "data/uploads"

os.makedirs(UPLOAD_DIR, exist_ok=True)
os.makedirs(INDEX_PATH, exist_ok=True)

load_dotenv()


35 changes: 35 additions & 0 deletions app/main.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,35 @@
from dotenv import load_dotenv
from fastapi import FastAPI, UploadFile, File, HTTPException
from services.embedding_service import embed_pdf
from services.task_service import generate_task
import os


load_dotenv()

app = FastAPI()

INDEX_PATH = "data/faiss_index"
UPLOAD_DIR = "data/uploads"

os.makedirs(UPLOAD_DIR, exist_ok=True)
os.makedirs(INDEX_PATH, exist_ok=True)

@app.post("/upload")
async def upload_pdf(file: UploadFile = File(...)):
if not file.filename.endswith(".pdf"):
raise HTTPException(status_code=400, detail="PDF 파일만 μ—…λ‘œλ“œ κ°€λŠ₯ν•©λ‹ˆλ‹€.")

file_path = os.path.join(UPLOAD_DIR, file.filename)

with open(file_path, "wb") as f:
content = await file.read()
f.write(content)

chunk_count = embed_pdf(file_path, INDEX_PATH)
return {"message": f"{chunk_count}개의 청크가 μž„λ² λ”©λ˜μ–΄ μ €μž₯λ˜μ—ˆμŠ΅λ‹ˆλ‹€."}

@app.get("/generate-task")
async def get_task(prompt: str):
result = generate_task(prompt, INDEX_PATH)
return {"task": result}
35 changes: 35 additions & 0 deletions app/routes.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,35 @@
from dotenv import load_dotenv
from fastapi import APIRouter, UploadFile, File, HTTPException
from services.embedding_service import embed_file
from services.task_service import generate_task
from app.config import UPLOAD_DIR, INDEX_PATH
import os

router = APIRouter()
ALLOWED_EXTENSIONS = {".pdf", ".txt",".html", ".json", ".docx", ".xlsx", ".pptx"}

@router.post("/upload")
async def upload_file(file: UploadFile = File(...)):
ext = os.path.splitext(file.filename)[1].lower()
if ext not in ALLOWED_EXTENSIONS:
raise HTTPException(
status_code=400,
detail="μ§€μ›ν•˜μ§€ μ•ŠλŠ” 파일 ν˜•μ‹μž…λ‹ˆλ‹€. PDF, TXT, HTML, JSON, DOCX, XLSX, PPTX만 μ—…λ‘œλ“œ κ°€λŠ₯ν•©λ‹ˆλ‹€."
)

os.makedirs(UPLOAD_DIR, exist_ok=True)
file_path = os.path.join(UPLOAD_DIR, file.filename)

with open(file_path, "wb") as f:
content = await file.read()
f.write(content)

# TODO: ν™•μž₯μžλ³„ λ‹€λ₯Έ μž„λ² λ”© ν•¨μˆ˜λ‘œ λ³€κ²½ κ°€λŠ₯
chunk_count = embed_file(file_path, INDEX_PATH)

return {"message": f"{chunk_count}개의 청크가 μž„λ² λ”©λ˜μ–΄ μ €μž₯λ˜μ—ˆμŠ΅λ‹ˆλ‹€."}

@router.get("/generate-task")
async def get_task(prompt: str):
result = generate_task(prompt, INDEX_PATH)
return {"task": result}
Binary file removed data/faiss_index/index.faiss
Binary file not shown.
Binary file removed data/faiss_index/index.pkl
Binary file not shown.
36 changes: 4 additions & 32 deletions main.py
Original file line number Diff line number Diff line change
@@ -1,35 +1,7 @@
from dotenv import load_dotenv
from fastapi import FastAPI, UploadFile, File, HTTPException
from services.embedding_service import embed_pdf
from services.task_service import generate_task
import os


load_dotenv()
from fastapi import FastAPI
from app.routes import router

app = FastAPI()
app.include_router(router)

INDEX_PATH = "data/faiss_index"
UPLOAD_DIR = "data/uploads"

os.makedirs(UPLOAD_DIR, exist_ok=True)
os.makedirs(INDEX_PATH, exist_ok=True)

@app.post("/upload")
async def upload_pdf(file: UploadFile = File(...)):
if not file.filename.endswith(".pdf"):
raise HTTPException(status_code=400, detail="PDF 파일만 μ—…λ‘œλ“œ κ°€λŠ₯ν•©λ‹ˆλ‹€.")

file_path = os.path.join(UPLOAD_DIR, file.filename)

with open(file_path, "wb") as f:
content = await file.read()
f.write(content)

chunk_count = embed_pdf(file_path, INDEX_PATH)
return {"message": f"{chunk_count}개의 청크가 μž„λ² λ”©λ˜μ–΄ μ €μž₯λ˜μ—ˆμŠ΅λ‹ˆλ‹€."}

@app.get("/generate-task")
async def get_task(prompt: str):
result = generate_task(prompt, INDEX_PATH)
return {"task": result}
#uvicorn main:app --reload : μ‹€ν–‰ λͺ…λ Ήμ–΄
Binary file removed services/__pycache__/__init__.cpython-310.pyc
Binary file not shown.
Binary file removed services/__pycache__/embedding_service.cpython-310.pyc
Binary file not shown.
Binary file removed services/__pycache__/task_service.cpython-310.pyc
Binary file not shown.
29 changes: 25 additions & 4 deletions services/embedding_service.py
Original file line number Diff line number Diff line change
@@ -1,17 +1,38 @@
from langchain_community.document_loaders import PyPDFLoader
from langchain.document_loaders import (
PyPDFLoader,
TextLoader,
UnstructuredHTMLLoader,
JSONLoader,
UnstructuredFileLoader,
)

from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain_openai import OpenAIEmbeddings
from langchain_community.vectorstores import FAISS

def embed_pdf(file_path: str, index_path: str) -> int:
loader = PyPDFLoader(file_path)
def embed_file(file_path: str, index_path: str) -> int:
ext = file_path.split(".")[-1].lower()

if ext == "pdf":
loader = PyPDFLoader(file_path)
elif ext == "txt":
loader = TextLoader(file_path)
elif ext == "html":
loader = UnstructuredHTMLLoader(file_path)
elif ext == "json":
loader = JSONLoader(file_path)
elif ext in ["docx", "xlsx", "pptx"]:
loader = UnstructuredFileLoader(file_path)
else:
raise ValueError(f"μ§€μ›ν•˜μ§€ μ•ŠλŠ” 파일 ν˜•μ‹μž…λ‹ˆλ‹€: {ext}")

docs = loader.load()

splitter = RecursiveCharacterTextSplitter(chunk_size=500, chunk_overlap=50)
chunks = splitter.split_documents(docs)

embeddings = OpenAIEmbeddings(model="text-embedding-3-small")
vectorstore = FAISS.from_documents(chunks, embeddings)

vectorstore.save_local(index_path)

return len(chunks)