From 6ced962440ec4c62c7b807eb572ebacdf56ead01 Mon Sep 17 00:00:00 2001
From: sehwan505 <sehwan505@gmail.com>
Date: Mon, 24 Nov 2025 14:59:57 +0900
Subject: [PATCH 01/13] =?UTF-8?q?Chore:=20Ruff=20=EB=A6=B0=ED=84=B0=20?=
 =?UTF-8?q?=EC=84=A4=EC=A0=95=20=EC=B6=94=EA=B0=80?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 pyproject.toml | 35 +++++++++++++++++++++++++++++++++++
 1 file changed, 35 insertions(+)

diff --git a/pyproject.toml b/pyproject.toml
index 3bfeb6e..12e52e0 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -21,6 +21,41 @@ pytest-order = "^1.3.0"
 pytest-mock = "^3.14.0"
 
 
+[tool.poetry.group.dev.dependencies]
+ruff = "^0.8.0"
+
 [build-system]
 requires = ["poetry-core"]
 build-backend = "poetry.core.masonry.api"
+
+[tool.ruff]
+target-version = "py310"
+line-length = 88
+
+[tool.ruff.lint]
+select = [
+    "E",      # pycodestyle errors
+    "W",      # pycodestyle warnings
+    "F",      # Pyflakes
+    "I",      # isort
+    "B",      # flake8-bugbear
+    "C4",     # flake8-comprehensions
+    "UP",     # pyupgrade
+    "ARG",    # flake8-unused-arguments
+    "SIM",    # flake8-simplify
+    "N",      # pep8-naming
+    "ANN",    # flake8-annotations
+]
+ignore = [
+    "ANN101",  # Missing type annotation for self
+    "ANN102",  # Missing type annotation for cls
+    "ANN401",  # Dynamically typed expressions (Any)
+]
+
+[tool.ruff.lint.pep8-naming]
+classmethod-decorators = ["classmethod"]
+
+[tool.ruff.format]
+quote-style = "double"
+indent-style = "space"
+docstring-code-format = true

From 6631d67eb91a4b5a155cb0f63bea00b7c50e937d Mon Sep 17 00:00:00 2001
From: sehwan505 <sehwan505@gmail.com>
Date: Mon, 24 Nov 2025 15:00:22 +0900
Subject: [PATCH 02/13] =?UTF-8?q?Refactor:=20=EC=97=90=EC=9D=B4=EC=A0=84?=
 =?UTF-8?q?=ED=8A=B8=20=EB=AA=A8=EB=93=88=20=EB=A6=AC=ED=8C=A9=ED=86=A0?=
 =?UTF-8?q?=EB=A7=81?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 mafm/agent/agents/__init__.py   |  6 +--
 mafm/agent/agents/analyst.py    | 53 +++++++++++++++----
 mafm/agent/agents/llm_model.py  | 12 +++--
 mafm/agent/agents/member.py     | 91 ++++++++++++++++++++-------------
 mafm/agent/agents/supervisor.py | 39 +++++++++++---
 mafm/agent/graph.py             | 81 +++++++++++++++++++----------
 6 files changed, 197 insertions(+), 85 deletions(-)

diff --git a/mafm/agent/agents/__init__.py b/mafm/agent/agents/__init__.py
index bd490d6..b088e10 100644
--- a/mafm/agent/agents/__init__.py
+++ b/mafm/agent/agents/__init__.py
@@ -1,5 +1,5 @@
-from .supervisor import supervisor_agent
-from .member import agent_node
-from .analyst import analyst_agent
+from mafm.agent.agents.analyst import analyst_agent
+from mafm.agent.agents.member import agent_node
+from mafm.agent.agents.supervisor import supervisor_agent
 
 __all__ = ["supervisor_agent", "agent_node", "analyst_agent"]
diff --git a/mafm/agent/agents/analyst.py b/mafm/agent/agents/analyst.py
index aa39c84..13210d0 100644
--- a/mafm/agent/agents/analyst.py
+++ b/mafm/agent/agents/analyst.py
@@ -1,31 +1,62 @@
+"""분석가 에이전트 모듈.
+
+파일 경로 결과를 정리하는 분석가 에이전트를 정의합니다.
+"""
+
+from typing import Any
+
 from langchain_core.prompts import ChatPromptTemplate, MessagesPlaceholder
-from .llm_model import api_key
 from langchain_openai import ChatOpenAI
 from pydantic import BaseModel
-from typing import List, Literal
 
+from mafm.agent.agents.llm_model import api_key
+
+
+class ListResponse(BaseModel):
+    """분석 결과 응답 모델.
+
+    Attributes:
+        messages: 필터링된 파일 경로 목록.
+    """
 
-def analyst_agent(state, input_prompt: str, output_list: List[str]):
+    messages: list[str]
+
+
+def analyst_agent(
+    state: dict[str, Any],
+    input_prompt: str,
+    output_list: list[str],
+) -> dict[str, list[str]]:
+    """분석가 에이전트.
+
+    구성원들이 검색한 파일 경로들을 정리하고 필터링합니다.
+
+    Args:
+        state: 현재 에이전트 상태 (메시지 포함).
+        input_prompt: 사용자의 원본 요청.
+        output_list: 구성원들이 검색한 파일 경로 목록.
+
+    Returns:
+        필터링된 파일 경로 목록을 포함한 딕셔너리.
+    """
     llm = ChatOpenAI(api_key=api_key, model="gpt-4o-mini")
 
-    class listResponse(BaseModel):
-        messages: List[str]
+    system_prompt = "당신은 구성원들이 답변한 파일의 경로들을 받고 정리하는 감독자입니다."
 
-    system_prompt = (
-        "당신은 구성원들이 답변한 파일의 경로들을 받고 정리하는 감독자입니다."
-    )
     prompt = ChatPromptTemplate.from_messages(
         [
             ("system", system_prompt),
             MessagesPlaceholder(variable_name="messages"),
             (
                 "system",
-                "주어진 파일 경로들 안에서 사용자 요청에 맞는 파일 경로만 뽑아주세요. 주어지지 않은 파일 경로는 뽑으면 안됩니다."
+                "주어진 파일 경로들 안에서 사용자 요청에 맞는 파일 경로만 뽑아주세요. "
+                "주어지지 않은 파일 경로는 뽑으면 안됩니다."
                 "사용자 요청: {input_prompt}"
                 "파일 경로: {output_list}",
             ),
         ]
     ).partial(input_prompt=input_prompt, output_list=", ".join(output_list))
+
     print(output_list)
-    analyst_chain = prompt | llm.with_structured_output(listResponse)
-    return analyst_chain.invoke(state)
+    analyst_chain = prompt | llm.with_structured_output(ListResponse)
+    return analyst_chain.invoke(state)
\ No newline at end of file
diff --git a/mafm/agent/agents/llm_model.py b/mafm/agent/agents/llm_model.py
index cf738c8..addff9f 100644
--- a/mafm/agent/agents/llm_model.py
+++ b/mafm/agent/agents/llm_model.py
@@ -1,6 +1,12 @@
-from dotenv import load_dotenv
+"""LLM 모델 설정 모듈.
+
+OpenAI API 키 로드 및 LLM 설정을 담당합니다.
+"""
+
 import os
-from langchain_openai import ChatOpenAI
+
+from dotenv import load_dotenv
 
 load_dotenv()
-api_key = os.getenv("OPENAI_API_KEY")
+
+api_key: str | None = os.getenv("OPENAI_API_KEY")
\ No newline at end of file
diff --git a/mafm/agent/agents/member.py b/mafm/agent/agents/member.py
index 5db32fd..5781341 100644
--- a/mafm/agent/agents/member.py
+++ b/mafm/agent/agents/member.py
@@ -1,43 +1,62 @@
+"""멤버 에이전트 모듈.
+
+디렉토리 내 파일 검색을 담당하는 에이전트를 정의합니다.
+"""
+
+import os
+from typing import Any
+
 from langchain.prompts import ChatPromptTemplate, MessagesPlaceholder
+from langchain_openai import ChatOpenAI
 from pydantic import BaseModel, Field
-from typing import Literal, List
-from .llm_model import api_key
 
-# from .tools import get_file_list
-from langchain_openai import ChatOpenAI
-from langgraph.store.base import BaseStore
-from langchain.output_parsers import PydanticOutputParser
-from langchain_core.utils.function_calling import convert_to_openai_function
-from langchain_core.messages import HumanMessage
-import os
+from mafm.agent.agents.llm_model import api_key
+from mafm.rag.vectorDb import search
 
-from rag.vectorDb import search
 
-global current_directory_name
+class QueryResponse(BaseModel):
+    """사용자 입력에서 추출한 검색 쿼리 응답 모델.
 
+    Attributes:
+        query: 검색에 사용할 쿼리 문장.
+    """
 
-class queryResponse(BaseModel):
     query: str = Field(description="query sentence")
 
 
-def get_file_list(query: queryResponse) -> List[str]:
-    """
-    get file list from user input
+def _get_file_list(query: QueryResponse, directory_name: str) -> list[str]:
+    """사용자 입력에서 파일 목록을 검색합니다.
+
+    Args:
+        query: 검색 쿼리 응답 객체.
+        directory_name: 검색할 디렉토리 이름.
+
+    Returns:
+        검색된 파일 경로 목록.
     """
-    global current_directory_name
+    print(f"current_directory_name: {directory_name}")
+    print(f"query: {query}")
+    db_path = f"{directory_name}/{os.path.basename(directory_name)}.db"
+    return search(db_path, [query.query])
 
-    print("current_directory_name: ", current_directory_name)
-    print("query: ", query)
-    return search(
-        current_directory_name + "/" + os.path.basename(current_directory_name) + ".db",
-        [query.query],
-    )
 
+def agent_node(
+    state: dict[str, Any],
+    directory_name: str,
+    output_list: list[str],
+) -> dict[str, list[str]]:
+    """파일 검색 에이전트 노드.
 
-def agent_node(state, directory_name: str, output_list: List[str]):
-    global current_directory_name
-    current_directory_name = directory_name
+    주어진 디렉토리에서 사용자 요청에 맞는 파일을 검색합니다.
 
+    Args:
+        state: 현재 에이전트 상태 (메시지 포함).
+        directory_name: 검색할 디렉토리 경로.
+        output_list: 검색 결과를 추가할 출력 리스트.
+
+    Returns:
+        검색 결과 메시지를 포함한 딕셔너리.
+    """
     llm = ChatOpenAI(
         api_key=api_key,
         model="gpt-4o-mini",
@@ -52,14 +71,14 @@ def agent_node(state, directory_name: str, output_list: List[str]):
                 "사용자에 요청에 따라서 디렉토리에서 파일을 검색하려고 합니다 쿼리를 문장으로 정리해주세요",
             ),
         ]
-    ).partial(
-        directory_name=directory_name,
-    )
-    query_chain = prompt | llm.with_structured_output(queryResponse)
-    chain = query_chain | get_file_list
-    res = chain.invoke(state)
-    if res:
-        output_list.extend(res)
-        return {"messages": res}
-    else:
-        return {"messages": []}
+    ).partial(directory_name=directory_name)
+
+    query_chain = prompt | llm.with_structured_output(QueryResponse)
+
+    query_result = query_chain.invoke(state)
+    file_list = _get_file_list(query_result, directory_name)
+
+    if file_list:
+        output_list.extend(file_list)
+        return {"messages": file_list}
+    return {"messages": []}
\ No newline at end of file
diff --git a/mafm/agent/agents/supervisor.py b/mafm/agent/agents/supervisor.py
index 98e6ed2..99932fb 100644
--- a/mafm/agent/agents/supervisor.py
+++ b/mafm/agent/agents/supervisor.py
@@ -1,16 +1,43 @@
+"""감독자 에이전트 모듈.
+
+디렉토리 선택을 담당하는 감독자 에이전트를 정의합니다.
+"""
+
+from typing import Any, Literal
+
 from langchain_core.prompts import ChatPromptTemplate, MessagesPlaceholder
-from .llm_model import api_key
 from langchain_openai import ChatOpenAI
 from pydantic import BaseModel
-from typing import List, Literal
 
+from mafm.agent.agents.llm_model import api_key
+
+
+def supervisor_agent(
+    state: dict[str, Any],
+    member_list: list[str],
+) -> dict[str, str]:
+    """감독자 에이전트.
 
-def supervisor_agent(state, member_list: List[str]):
+    사용자 요청에 따라 다음에 실행할 디렉토리를 선택합니다.
+
+    Args:
+        state: 현재 에이전트 상태 (메시지 포함).
+        member_list: 선택 가능한 디렉토리 목록.
+
+    Returns:
+        다음 노드 이름을 포함한 딕셔너리.
+    """
     llm = ChatOpenAI(api_key=api_key, model="gpt-4o-mini")
 
     next_options = member_list + ["analyst"]
 
-    class routeResponse(BaseModel):
+    class RouteResponse(BaseModel):
+        """라우팅 응답 모델.
+
+        Attributes:
+            next: 다음으로 실행할 노드 이름.
+        """
+
         next: Literal[*(next_options)]
 
     system_prompt = (
@@ -30,5 +57,5 @@ class routeResponse(BaseModel):
         ]
     ).partial(members=", ".join(member_list))
 
-    supervisor_chain = prompt | llm.with_structured_output(routeResponse)
-    return supervisor_chain.invoke(state)
+    supervisor_chain = prompt | llm.with_structured_output(RouteResponse)
+    return supervisor_chain.invoke(state)
\ No newline at end of file
diff --git a/mafm/agent/graph.py b/mafm/agent/graph.py
index a043715..82a7125 100644
--- a/mafm/agent/graph.py
+++ b/mafm/agent/graph.py
@@ -1,52 +1,86 @@
+"""에이전트 그래프 모듈.
+
+LangGraph 기반 멀티 에이전트 워크플로우를 정의합니다.
+"""
+
 import functools
 import operator
-from typing import Sequence, TypedDict, Annotated, List
+from typing import Annotated, Any, Sequence, TypedDict
 
 from langchain_core.messages import BaseMessage, HumanMessage
-from langgraph.graph import END, StateGraph, START
-from .agents import agent_node, supervisor_agent, analyst_agent
-from rag.sqlite import get_directory_structure
+from langgraph.graph import END, START, StateGraph
+
+from mafm.agent.agents import agent_node, analyst_agent, supervisor_agent
+from mafm.rag.sqlite import get_directory_structure
 
 
 class AgentState(TypedDict):
+    """에이전트 상태 타입.
+
+    Attributes:
+        messages: 대화 메시지 시퀀스.
+        next: 다음에 실행할 노드 이름.
+    """
+
     messages: Annotated[Sequence[BaseMessage], operator.add]
     next: str
 
 
-def graph(directory_path: str, prompt: str) -> List[str]:
+def graph(directory_path: str, prompt: str) -> list[str]:
+    """멀티 에이전트 그래프를 실행합니다.
+
+    Args:
+        directory_path: 검색할 루트 디렉토리 경로.
+        prompt: 사용자 검색 요청.
+
+    Returns:
+        검색된 파일 경로 목록.
+    """
     human_input = HumanMessage(content=prompt)
 
     members = get_directory_structure()
-    output_list = []
+    output_list: list[str] = []
+
     print(members)
     print(human_input)
-    # graph 생성
+
+    # 워크플로우 그래프 생성
     workflow = StateGraph(AgentState)
+
+    # 감독자 노드 추가
     supervisor_node = functools.partial(supervisor_agent, member_list=members)
     workflow.add_node("supervisor", supervisor_node)
-    analyst_node = functools.partial(
+
+    # 분석가 노드 추가
+    analyst_node_partial = functools.partial(
         analyst_agent, input_prompt=human_input.content, output_list=output_list
     )
-    workflow.add_node("analyst", analyst_node)
+    workflow.add_node("analyst", analyst_node_partial)
+
+    # 멤버 노드 추가
     for member in members:
         member_node = functools.partial(
             agent_node, directory_name=member, output_list=output_list
         )
         workflow.add_node(member, member_node)
         workflow.add_edge(member, "supervisor")
-    conditional_map = {k: k for k in members}
+
+    # 조건부 엣지 설정
+    conditional_map: dict[str, str] = {k: k for k in members}
     conditional_map["analyst"] = "analyst"
-    workflow.add_conditional_edges("supervisor", lambda x: x["next"], conditional_map)
+
+    workflow.add_conditional_edges(
+        "supervisor",
+        lambda x: x["next"],
+        conditional_map,
+    )
     workflow.add_edge(START, "supervisor")
     workflow.add_edge("analyst", END)
-    app = workflow.compile()
 
-    # from IPython.display import Image, display
-    # png_data = app.get_graph().draw_mermaid_png()
-    # with open("graph_image.png", "wb") as file:
-    #     file.write(png_data)
+    app = workflow.compile()
 
-    previous_output = None
+    # 그래프 실행
+    previous_output: dict[str, Any] | None = None
     for s in app.stream(
         {"messages": [human_input]},
         {"recursion_limit": 20},
@@ -55,17 +89,12 @@ def graph(directory_path: str, prompt: str) -> List[str]:
         if "__end__" not in s:
             print(s)
             print("----")
-    return previous_output["analyst"]["messages"]
 
+    if previous_output is None:
+        return []
 
-# def graph():
-#     for output in app.stream(human_input, stream_mode="updates"):
-#         for key, value in output.items():
-#             print(f"Output from node '{key}':")
-#             print("---")
-#             print(value["messages"][-1].pretty_print())
-#         print("\n---\n")
+    return previous_output["analyst"]["messages"]
 
 
 if __name__ == "__main__":
-    print(graph(""))
+    print(graph("", ""))
\ No newline at end of file

From 7d97ddc23fc006a776c58fa8a7ad662683cfe5f3 Mon Sep 17 00:00:00 2001
From: sehwan505 <sehwan505@gmail.com>
Date: Mon, 24 Nov 2025 15:00:48 +0900
Subject: [PATCH 03/13] =?UTF-8?q?Refactor:=20RAG=20=EB=AA=A8=EB=93=88=20?=
 =?UTF-8?q?=EB=A6=AC=ED=8C=A9=ED=86=A0=EB=A7=81?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 mafm/rag/embedding.py | 100 +++++++++++--------
 mafm/rag/fileops.py   |  63 +++++++++---
 mafm/rag/sqlite.py    | 199 ++++++++++++++++++++++++++++----------
 mafm/rag/vectorDb.py  | 216 ++++++++++++++++++++++++++++--------------
 4 files changed, 407 insertions(+), 171 deletions(-)

diff --git a/mafm/rag/embedding.py b/mafm/rag/embedding.py
index 4f089f4..86542fe 100644
--- a/mafm/rag/embedding.py
+++ b/mafm/rag/embedding.py
@@ -1,61 +1,87 @@
-from sentence_transformers import SentenceTransformer
+"""임베딩 모듈.
+
+SentenceTransformer를 사용한 텍스트 임베딩 기능을 제공합니다.
+"""
+
 import os
-import psutil
+from typing import TYPE_CHECKING
+
+if TYPE_CHECKING:
+    from sentence_transformers import SentenceTransformer
 
-# 모델을 전역 변수로 초기화하여 재사용
-model = None
 os.environ["TOKENIZERS_PARALLELISM"] = "false"
 
+_model: "SentenceTransformer | None" = None
+
 
-def initialize_model():
+def initialize_model() -> None:
+    """임베딩 모델을 초기화합니다.
 
-    global model
-    if model is None:
-        try:
+    전역 모델 인스턴스를 생성하고 재사용합니다.
 
-            # 일단 1024 차원으로 실험
-            # 실험 결과가 만족스럽지 않다면 모델을 clone 한 후 8000 차원 이상으로 늘릴 예정
-            # 모델은 github에 등록되어 있음
-            # CPU로 실험
-            # GPU로 변환 시 SentenceTransformer() 메소드 뒤에 .cuda() 메소드를 붙여주면 됨
-            # 모델 초기화
-            model = SentenceTransformer(
-                # "dunzhang/stella_en_400M_v5",
-                "avsolatorio/GIST-small-Embedding-v0",  # 33
-                # "hkunlp/instructor-base",  # 110
-                trust_remote_code=True,
-                device="cpu",
-                config_kwargs={
-                    "use_memory_efficient_attention": False,
-                    "unpad_inputs": False,
-                },
-            )
+    Raises:
+        Exception: 모델 초기화 중 오류가 발생한 경우.
+    """
+    global _model
 
-            # model = SentenceTransformer("sentence-transformers/all-MiniLM-L12-v1")
+    if _model is not None:
+        return
+
+    try:
+        from sentence_transformers import SentenceTransformer
+
+        _model = SentenceTransformer(
+            # "dunzhang/stella_en_400M_v5",
+            "avsolatorio/GIST-small-Embedding-v0",  # 33
+            # "hkunlp/instructor-base",  # 110
+            trust_remote_code=True,
+            device="cpu",
+            config_kwargs={
+                "use_memory_efficient_attention": False,
+                "unpad_inputs": False,
+            },
+        )
+        print("모델이 성공적으로 초기화되었습니다.")
+    except Exception as e:
+        print(f"모델 초기화 중 오류 발생: {e}")
+        raise
 
-            print("모델이 성공적으로 초기화되었습니다.")
-        except Exception as e:
-            print(f"모델 초기화 중 오류 발생: {e}")
 
+def embedding(queries: list[str]) -> list[list[float]] | None:
+    """텍스트 쿼리를 임베딩 벡터로 변환합니다.
 
-def embedding(queries):
-    global model
+    Args:
+        queries: 임베딩할 텍스트 문자열 리스트.
 
-    # 모델이 초기화되지 않은 경우 초기화
-    if model is None:
+    Returns:
+        임베딩 벡터 리스트. 오류 발생 시 None.
+
+    Raises:
+        ValueError: 입력이 문자열 리스트가 아닌 경우.
+    """
+    global _model
+
+    if _model is None:
         initialize_model()
 
+    if _model is None:
+        print("모델이 초기화되지 않았습니다.")
+        return None
+
     try:
-        # 쿼리 임베딩
         if not isinstance(queries, list) or not all(
             isinstance(q, str) for q in queries
         ):
             raise ValueError("The input to encode() must be a list of strings.")
-        query_embeddings = model.encode(queries)
 
+        query_embeddings = _model.encode(queries)
         return query_embeddings.tolist()
+
     except MemoryError as me:
         print(f"MemoryError: {me}")
-    except Exception as e:
-        print(f"embedding 중 오z류 발생: {e}")
         return None
+    except ValueError:
+        raise
+    except Exception as e:
+        print(f"embedding 중 오류 발생: {e}")
+        return None
\ No newline at end of file
diff --git a/mafm/rag/fileops.py b/mafm/rag/fileops.py
index 32c8aa3..8d9dffb 100644
--- a/mafm/rag/fileops.py
+++ b/mafm/rag/fileops.py
@@ -1,4 +1,11 @@
+"""파일 연산 모듈.
+
+C 라이브러리를 사용한 파일 연산 기능을 제공합니다.
+"""
+
 import ctypes
+from tempfile import TemporaryDirectory
+from typing import Any
 
 lib = ctypes.CDLL("./rag/C_library/libfileops.so")
 
@@ -10,7 +17,16 @@
 lib.make_soft_links.restype = ctypes.c_char_p
 
 
-def make_soft_links(paths, temp_dir):
+def make_soft_links(paths: list[str], temp_dir: TemporaryDirectory[str]) -> str:
+    """소프트 링크를 생성합니다.
+
+    Args:
+        paths: 링크를 생성할 파일 경로 리스트.
+        temp_dir: 링크를 생성할 임시 디렉토리.
+
+    Returns:
+        생성 결과 메시지.
+    """
     path_array = (ctypes.c_char_p * len(paths))(
         *[path.encode("utf-8") for path in paths]
     )
@@ -22,9 +38,17 @@ def make_soft_links(paths, temp_dir):
 lib.get_file_data.restype = ctypes.POINTER(ctypes.c_char_p)
 
 
-def get_file_data(path):
+def get_file_data(path: str) -> list[str]:
+    """파일 데이터를 읽습니다.
+
+    Args:
+        path: 읽을 파일의 경로.
+
+    Returns:
+        파일 데이터 문자열 리스트.
+    """
     result = lib.get_file_data(path.encode("utf-8"))
-    data_list = []
+    data_list: list[str] = []
     idx = 0
 
     while result[idx] is not None:
@@ -37,31 +61,40 @@ def get_file_data(path):
 lib.get_all_file_data.argtypes = [ctypes.c_char_p, ctypes.POINTER(ctypes.c_int)]
 lib.get_all_file_data.restype = ctypes.POINTER(ctypes.POINTER(ctypes.c_char_p))
 
-# ctypes를 통해 free_all_file_data 함수를 정의
 lib.free_file_data_array.restype = None
-lib.free_file_data_array.argtypes = [ctypes.POINTER(ctypes.POINTER(ctypes.c_char_p)), ctypes.c_int]
+lib.free_file_data_array.argtypes = [
+    ctypes.POINTER(ctypes.POINTER(ctypes.c_char_p)),
+    ctypes.c_int,
+]
+
+
+def get_all_file_data(directory: str) -> list[list[Any]]:
+    """디렉토리 내 모든 파일 데이터를 읽습니다.
 
-def get_all_file_data(directory):
+    Args:
+        directory: 읽을 디렉토리 경로.
+
+    Returns:
+        각 파일의 데이터 리스트.
+    """
     num_files = ctypes.c_int(0)
     result = lib.get_all_file_data(directory.encode("utf-8"), ctypes.byref(num_files))
-    files = []
+    files: list[list[Any]] = []
     try:
         for i in range(num_files.value):
             idx = 0
-            data_list = []
+            data_list: list[Any] = []
             while result[i][idx] is not None:
                 try:
                     string = ctypes.string_at(result[i][idx]).decode("utf-8")
-                except:
+                except UnicodeDecodeError:
                     string = ctypes.string_at(result[i][idx])
                 data_list.append(string)
                 idx += 1
             files.append(data_list)
         return files
     finally:
-        # C에서 할당된 메모리 해제
-        result_casted = ctypes.cast(result, ctypes.POINTER(ctypes.POINTER(ctypes.c_char_p)))
-        lib.free_file_data_array(result_casted, num_files.value)
-
-
-# get_file_data("/Users/Ruffles/Downloads/MAFM_test/text9.txt")
+        result_casted = ctypes.cast(
+            result, ctypes.POINTER(ctypes.POINTER(ctypes.c_char_p))
+        )
+        lib.free_file_data_array(result_casted, num_files.value)
\ No newline at end of file
diff --git a/mafm/rag/sqlite.py b/mafm/rag/sqlite.py
index 380c879..5cfc221 100644
--- a/mafm/rag/sqlite.py
+++ b/mafm/rag/sqlite.py
@@ -1,25 +1,26 @@
-import sqlite3
+"""SQLite 데이터베이스 모듈.
+
+파일 시스템 메타데이터를 SQLite에 저장하고 관리합니다.
+"""
+
 import os
+import sqlite3
+
 
-# sqlite는 서버 기반 데이터베이스가 아니다.
-# 서버 기반 데이터 베이스(MySQL, PostgreSQL)와는 다르게, 서버가 없는 내장형 데이터베이스이다.
-# 데이터베이스 파일은 하나의 독립적인 파일로 구성된다.
+def initialize_database(db_name: str = "filesystem.db") -> None:
+    """데이터베이스를 초기화합니다.
 
+    기존 데이터베이스가 존재하면 삭제하고 새로 생성합니다.
 
-def initialize_database(db_name="filesystem.db"):
-    # 기존에 db가 존재하면 날림
+    Args:
+        db_name: 데이터베이스 파일 이름.
+    """
     if os.path.exists(db_name):
         os.remove(db_name)
 
-    # 데이터베이스 파일에 연결
     connection = sqlite3.connect("filesystem.db")
-
-    # 커서 생성
-    # 커서는 SQL 문을 실행하고 결과를 처리하는 데 사용되는 객체이다.
-    # cursor.execute() 메소드를 사용해서 데이터베이스에 대한 SQL 쿼리를 실행할 수 있다.
     cursor = connection.cursor()
 
-    # 첫 번째 테이블(file_info) 생성
     cursor.execute(
         """
         CREATE TABLE IF NOT EXISTS file_info (
@@ -30,7 +31,6 @@ def initialize_database(db_name="filesystem.db"):
     """
     )
 
-    # 두 번째 테이블(directory_structure) 생성
     cursor.execute(
         """
         CREATE TABLE IF NOT EXISTS directory_structure (
@@ -42,13 +42,25 @@ def initialize_database(db_name="filesystem.db"):
     """
     )
 
-    # 변경 사항 저장
     connection.commit()
     connection.close()
 
 
-# CREATE 함수 - 데이터 삽입
-def insert_file_info(file_path, is_dir, db_name="filesystem.db"):
+def insert_file_info(
+    file_path: str,
+    is_dir: int,
+    db_name: str = "filesystem.db",
+) -> int:
+    """파일 정보를 데이터베이스에 삽입합니다.
+
+    Args:
+        file_path: 파일 또는 디렉토리의 절대 경로.
+        is_dir: 디렉토리 여부 (1: 디렉토리, 0: 파일).
+        db_name: 데이터베이스 파일 이름.
+
+    Returns:
+        삽입된 레코드의 ID.
+    """
     connection = sqlite3.connect(db_name)
     cursor = connection.cursor()
     cursor.execute(
@@ -65,7 +77,20 @@ def insert_file_info(file_path, is_dir, db_name="filesystem.db"):
     return rows[0][0]
 
 
-def insert_directory_structure(id, dir_path, parent_dir_path, db_name="filesystem.db"):
+def insert_directory_structure(
+    dir_id: int,
+    dir_path: str,
+    parent_dir_path: str,
+    db_name: str = "filesystem.db",
+) -> None:
+    """디렉토리 구조 정보를 삽입합니다.
+
+    Args:
+        dir_id: 디렉토리 ID.
+        dir_path: 디렉토리 경로.
+        parent_dir_path: 부모 디렉토리 경로.
+        db_name: 데이터베이스 파일 이름.
+    """
     connection = sqlite3.connect(db_name)
     cursor = connection.cursor()
     cursor.execute(
@@ -73,14 +98,21 @@ def insert_directory_structure(id, dir_path, parent_dir_path, db_name="filesyste
         INSERT INTO directory_structure (id, dir_path, parent_dir_path)
         VALUES (?, ?, ?)
     """,
-        (id, dir_path, parent_dir_path),
+        (dir_id, dir_path, parent_dir_path),
     )
     connection.commit()
     connection.close()
 
 
-# READ 함수 - 데이터 조회
-def get_file_info(db_name="filesystem.db"):
+def get_file_info(db_name: str = "filesystem.db") -> list[tuple]:
+    """모든 파일 정보를 조회합니다.
+
+    Args:
+        db_name: 데이터베이스 파일 이름.
+
+    Returns:
+        파일 정보 튜플 리스트.
+    """
     connection = sqlite3.connect(db_name)
     cursor = connection.cursor()
     cursor.execute("SELECT * FROM file_info")
@@ -89,41 +121,72 @@ def get_file_info(db_name="filesystem.db"):
     return rows
 
 
-def get_path_by_id(id, db_name="filesystem.db"):
+def get_path_by_id(file_id: int, db_name: str = "filesystem.db") -> str:
+    """ID로 파일 경로를 조회합니다.
+
+    Args:
+        file_id: 파일 ID.
+        db_name: 데이터베이스 파일 이름.
+
+    Returns:
+        파일 경로.
+    """
     connection = sqlite3.connect(db_name)
     cursor = connection.cursor()
-    cursor.execute("SELECT file_path FROM file_info WHERE id = ?", (id,))
+    cursor.execute("SELECT file_path FROM file_info WHERE id = ?", (file_id,))
     rows = cursor.fetchall()
     connection.close()
-    file_path = rows[0][0]
-    return file_path
+    return rows[0][0]
+
 
+def get_id_by_path(path: str, db_name: str = "filesystem.db") -> int:
+    """경로로 파일 ID를 조회합니다.
 
-def get_id_by_path(path, db_name="filesystem.db"):
+    Args:
+        path: 파일 경로.
+        db_name: 데이터베이스 파일 이름.
+
+    Returns:
+        파일 ID.
+    """
     connection = sqlite3.connect(db_name)
     cursor = connection.cursor()
     cursor.execute("SELECT id FROM file_info WHERE file_path = ?", (path,))
     rows = cursor.fetchall()
     connection.close()
-    print("rows ========", rows)
-    file_path = rows[0][0]
-    return file_path
+    print(f"rows ======== {rows}")
+    return rows[0][0]
+
+
+def get_directory_structure(db_name: str = "filesystem.db") -> list[str]:
+    """모든 디렉토리 경로를 조회합니다.
 
+    Args:
+        db_name: 데이터베이스 파일 이름.
 
-def get_directory_structure(db_name="filesystem.db"):
+    Returns:
+        디렉토리 경로 리스트.
+    """
     connection = sqlite3.connect(db_name)
     cursor = connection.cursor()
     cursor.execute("SELECT dir_path FROM directory_structure")
     rows = cursor.fetchall()
     connection.close()
-    ret_list = []
-    for row in rows:
-        ret_list.append(row[0])
-    return ret_list
+    return [row[0] for row in rows]
+
 
+def update_file_info(
+    file_id: int,
+    new_file_path: str,
+    db_name: str = "filesystem.db",
+) -> None:
+    """파일 경로를 업데이트합니다.
 
-# UPDATE 함수 - 데이터 수정
-def update_file_info(id, new_file_path, db_name="filesystem.db"):
+    Args:
+        file_id: 업데이트할 파일 ID.
+        new_file_path: 새 파일 경로.
+        db_name: 데이터베이스 파일 이름.
+    """
     connection = sqlite3.connect(db_name)
     cursor = connection.cursor()
     cursor.execute(
@@ -132,13 +195,24 @@ def update_file_info(id, new_file_path, db_name="filesystem.db"):
         SET file_path = ?
         WHERE id = ?
     """,
-        (new_file_path, id),
+        (new_file_path, file_id),
     )
     connection.commit()
     connection.close()
 
 
-def update_directory_structure(record_id, new_dir_path, db_name="filesystem.db"):
+def update_directory_structure(
+    record_id: int,
+    new_dir_path: str,
+    db_name: str = "filesystem.db",
+) -> None:
+    """디렉토리 경로를 업데이트합니다.
+
+    Args:
+        record_id: 업데이트할 레코드 ID.
+        new_dir_path: 새 디렉토리 경로.
+        db_name: 데이터베이스 파일 이름.
+    """
     connection = sqlite3.connect(db_name)
     cursor = connection.cursor()
     cursor.execute(
@@ -153,8 +227,13 @@ def update_directory_structure(record_id, new_dir_path, db_name="filesystem.db")
     connection.close()
 
 
-# DELETE 함수 - 데이터 삭제
-def delete_file_info(record_id, db_name="filesystem.db"):
+def delete_file_info(record_id: int, db_name: str = "filesystem.db") -> None:
+    """파일 정보를 삭제합니다.
+
+    Args:
+        record_id: 삭제할 레코드 ID.
+        db_name: 데이터베이스 파일 이름.
+    """
     connection = sqlite3.connect(db_name)
     cursor = connection.cursor()
     cursor.execute(
@@ -168,7 +247,18 @@ def delete_file_info(record_id, db_name="filesystem.db"):
     connection.close()
 
 
-def change_directory_path(dir_src_path, dir_dest_path, db_name="filesystem.db"):
+def change_directory_path(
+    dir_src_path: str,
+    dir_dest_path: str,
+    db_name: str = "filesystem.db",
+) -> None:
+    """디렉토리 경로를 변경합니다.
+
+    Args:
+        dir_src_path: 원본 디렉토리 경로.
+        dir_dest_path: 대상 디렉토리 경로.
+        db_name: 데이터베이스 파일 이름.
+    """
     connection = sqlite3.connect(db_name)
     cursor = connection.cursor()
     cursor.execute(
@@ -187,7 +277,6 @@ def change_directory_path(dir_src_path, dir_dest_path, db_name="filesystem.db"):
     )
     rows = cursor.fetchall()
 
-    # 각 레코드에 대해 dir_path를 업데이트합니다.
     for (file_path,) in rows:
         new_file_path = file_path.replace(dir_src_path, dir_dest_path, 1)
         cursor.execute(
@@ -202,7 +291,18 @@ def change_directory_path(dir_src_path, dir_dest_path, db_name="filesystem.db"):
     connection.close()
 
 
-def change_file_path(file_src_path, file_dest_path, db_name):
+def change_file_path(
+    file_src_path: str,
+    file_dest_path: str,
+    db_name: str,
+) -> None:
+    """파일 경로를 변경합니다.
+
+    Args:
+        file_src_path: 원본 파일 경로.
+        file_dest_path: 대상 파일 경로.
+        db_name: 데이터베이스 파일 이름.
+    """
     connection = sqlite3.connect(db_name)
     cursor = connection.cursor()
     cursor.execute(
@@ -217,11 +317,15 @@ def change_file_path(file_src_path, file_dest_path, db_name):
     connection.close()
 
 
-def delete_directory_and_subdirectories(dir_path):
+def delete_directory_and_subdirectories(dir_path: str) -> None:
+    """디렉토리와 하위 디렉토리 정보를 삭제합니다.
+
+    Args:
+        dir_path: 삭제할 디렉토리 경로.
+    """
     connection = sqlite3.connect("filesystem.db")
     cursor = connection.cursor()
 
-    # directory_structure 테이블에서 dir_path가 포함된 모든 레코드 삭제
     cursor.execute(
         """
         DELETE FROM directory_structure
@@ -230,7 +334,6 @@ def delete_directory_and_subdirectories(dir_path):
         (f"{dir_path}%",),
     )
 
-    # file_info 테이블에서 file_path가 dir_path로 시작하는 모든 레코드 삭제
     cursor.execute(
         """
         DELETE FROM file_info
@@ -239,6 +342,6 @@ def delete_directory_and_subdirectories(dir_path):
         (f"{dir_path}%",),
     )
 
-    # 변경 사항을 커밋
-    conn.commit()
-    print(f"Deleted all records related to {dir_path} and its subdirectories.")
+    connection.commit()
+    connection.close()
+    print(f"Deleted all records related to {dir_path} and its subdirectories.")
\ No newline at end of file
diff --git a/mafm/rag/vectorDb.py b/mafm/rag/vectorDb.py
index 70aff03..de98caa 100644
--- a/mafm/rag/vectorDb.py
+++ b/mafm/rag/vectorDb.py
@@ -1,19 +1,27 @@
-import ast
+"""벡터 데이터베이스 모듈.
+
+Milvus 벡터 데이터베이스 관리 기능을 제공합니다.
+"""
+
 import gc
 import os
-from pymilvus import (
-    MilvusClient,
-    connections,
-    Collection,
-    FieldSchema,
-    CollectionSchema,
-    DataType,
-)
-from .embedding import embedding
-from .sqlite import get_path_by_id
-
-
-def delete_db_lock_file(db_name):
+from typing import Any
+
+from pymilvus import MilvusClient
+
+from mafm.rag.embedding import embedding
+from mafm.rag.sqlite import get_path_by_id
+
+COLLECTION_NAME = "demo_collection"
+VECTOR_DIMENSION = 384
+
+
+def _delete_db_lock_file(db_name: str) -> None:
+    """데이터베이스 잠금 파일을 삭제합니다.
+
+    Args:
+        db_name: 데이터베이스 파일 경로.
+    """
     dir_path = os.path.dirname(db_name)
     base_name = os.path.basename(db_name)
 
@@ -24,64 +32,86 @@ def delete_db_lock_file(db_name):
         print(f"No lock file found for {lock_file}")
 
 
-def initialize_vector_db(db_name):
-    client = None
+def initialize_vector_db(db_name: str) -> None:
+    """벡터 데이터베이스를 초기화합니다.
+
+    Args:
+        db_name: 생성할 데이터베이스 파일 경로.
+
+    Raises:
+        Exception: 데이터베이스 초기화 중 오류가 발생한 경우.
+    """
+    client: MilvusClient | None = None
     try:
-        # Milvus에 연결
         client = MilvusClient(db_name)
         print(f"Connected to {db_name}")
 
-        # 컬렉션 스키마 정의 => RDB의 테이블과 비슷한 개념
-        if client.has_collection(collection_name="demo_collection"):
-            client.drop_collection(collection_name="demo_collection")
+        if client.has_collection(collection_name=COLLECTION_NAME):
+            client.drop_collection(collection_name=COLLECTION_NAME)
 
         client.create_collection(
-            collection_name="demo_collection",
-            dimension=384,  #  384 Adjust dimension as needed
+            collection_name=COLLECTION_NAME,
+            dimension=VECTOR_DIMENSION,
         )
     except Exception as e:
         print(f"Error initializing vector DB for {db_name}: {e}")
+        raise
     finally:
         if client is not None:
             client.close()
         gc.collect()
-        delete_db_lock_file(db_name)
+        _delete_db_lock_file(db_name)
+
 
+def delete_vector_db(db_name: str) -> None:
+    """벡터 데이터베이스를 삭제합니다.
 
-def delete_vector_db(db_name):
+    Args:
+        db_name: 삭제할 데이터베이스 파일 경로.
+    """
+    client: MilvusClient | None = None
     try:
         client = MilvusClient(db_name)
-        if client.has_collection(collection_name="demo_collection"):
-            client.drop_collection(collection_name="demo_collection")
-            print(f"Collection 'demo_collection' in {db_name} has been deleted.")
+        if client.has_collection(collection_name=COLLECTION_NAME):
+            client.drop_collection(collection_name=COLLECTION_NAME)
+            print(f"Collection '{COLLECTION_NAME}' in {db_name} has been deleted.")
         else:
-            print(f"Collection 'demo_collection' does not exist in {db_name}")
+            print(f"Collection '{COLLECTION_NAME}' does not exist in {db_name}")
     except Exception as e:
         print(f"Error deleting collection in {db_name}: {e}")
     finally:
-        client.close()
+        if client is not None:
+            client.close()
         gc.collect()
-        delete_db_lock_file(db_name)
+        _delete_db_lock_file(db_name)
+
 
+def save(db_name: str, file_id: int, queries: list[str]) -> None:
+    """쿼리 데이터를 벡터 데이터베이스에 저장합니다.
 
-def save(db_name, id, queries):
+    Args:
+        db_name: 데이터베이스 파일 경로.
+        file_id: 파일 고유 식별자.
+        queries: 저장할 텍스트 쿼리 리스트.
+    """
+    client: MilvusClient | None = None
     try:
         client = MilvusClient(db_name)
-        if not client.has_collection(collection_name="demo_collection"):
-            print(f"Collection 'demo_collection' does not exist in {db_name}")
+        if not client.has_collection(collection_name=COLLECTION_NAME):
+            print(f"Collection '{COLLECTION_NAME}' does not exist in {db_name}")
             return
 
-        # 쿼리 임베딩
         query_embeddings = embedding(queries)
+        if query_embeddings is None:
+            print("Failed to generate embeddings")
+            return
 
-        # 임베딩 데이터 저장
         data = [
-            {"id": id, "vector": query_embeddings[i], "word": queries[i]}
+            {"id": file_id, "vector": query_embeddings[i], "word": queries[i]}
             for i in range(len(query_embeddings))
         ]
 
-        # 데이터 삽입
-        res = client.insert(collection_name="demo_collection", data=data)
+        res = client.insert(collection_name=COLLECTION_NAME, data=data)
         print(res)
 
     except MemoryError as me:
@@ -91,20 +121,27 @@ def save(db_name, id, queries):
     except Exception as e:
         print(f"Error occurred during saving data to Milvus: {e}")
     finally:
-        client.close()
+        if client is not None:
+            client.close()
         gc.collect()
-        delete_db_lock_file(db_name)
+        _delete_db_lock_file(db_name)
+
 
+def insert_file_embedding(file_data: list[dict[str, Any]], db_name: str) -> None:
+    """파일 임베딩 데이터를 삽입합니다.
 
-def insert_file_embedding(file_data, db_name):
+    Args:
+        file_data: 삽입할 임베딩 데이터 리스트.
+        db_name: 데이터베이스 파일 경로.
+    """
+    client: MilvusClient | None = None
     try:
         client = MilvusClient(db_name)
-        if not client.has_collection(collection_name="demo_collection"):
-            print(f"Collection 'demo_collection' does not exist in {db_name}")
+        if not client.has_collection(collection_name=COLLECTION_NAME):
+            print(f"Collection '{COLLECTION_NAME}' does not exist in {db_name}")
             return
 
-        # 데이터 삽입
-        res = client.insert(collection_name="demo_collection", data=file_data)
+        client.insert(collection_name=COLLECTION_NAME, data=file_data)
 
     except MemoryError as me:
         print(f"MemoryError: {me}")
@@ -113,73 +150,110 @@ def insert_file_embedding(file_data, db_name):
     except Exception as e:
         print(f"Error occurred during saving data to Milvus: {e}")
     finally:
-        client.close()
+        if client is not None:
+            client.close()
         gc.collect()
-        delete_db_lock_file(db_name)
+        _delete_db_lock_file(db_name)
+
 
+def search(db_name: str, query_list: list[str]) -> list[str]:
+    """벡터 데이터베이스에서 유사한 항목을 검색합니다.
 
-def search(db_name, query_list):
+    Args:
+        db_name: 데이터베이스 파일 경로.
+        query_list: 검색할 쿼리 텍스트 리스트.
+
+    Returns:
+        검색된 파일 경로 리스트.
+    """
+    client: MilvusClient | None = None
     try:
         client = MilvusClient(db_name)
-        if not client.has_collection(collection_name="demo_collection"):
-            print(f"Collection 'demo_collection' does not exist in {db_name}")
-            return
+        if not client.has_collection(collection_name=COLLECTION_NAME):
+            print(f"Collection '{COLLECTION_NAME}' does not exist in {db_name}")
+            return []
 
         query_vectors = embedding(query_list)
+        if query_vectors is None:
+            return []
 
         res = client.search(
-            collection_name="demo_collection",
+            collection_name=COLLECTION_NAME,
             data=query_vectors,
             limit=2,
         )
         id_list = [item["id"] for item in res[0]]
-        path_list = [get_path_by_id(id, "filesystem.db") for id in id_list]
+        path_list = [get_path_by_id(file_id, "filesystem.db") for file_id in id_list]
         return path_list
     finally:
-        client.close()
+        if client is not None:
+            client.close()
         gc.collect()
-        delete_db_lock_file(db_name)
+        _delete_db_lock_file(db_name)
+
+
+def find_by_id(search_id: int, db_name: str) -> list[dict[str, Any]] | None:
+    """ID로 벡터 데이터를 검색합니다.
 
+    Args:
+        search_id: 검색할 파일 ID.
+        db_name: 데이터베이스 파일 경로.
 
-def find_by_id(search_id, db_name):
+    Returns:
+        검색된 데이터 리스트. 없으면 None.
+    """
+    client: MilvusClient | None = None
     try:
         client = MilvusClient(db_name)
-        collection_name = "demo_collection"
 
-        if not client.has_collection(collection_name):
-            print(f"Collection '{collection_name}' does not exist in {db_name}")
-            return
+        if not client.has_collection(COLLECTION_NAME):
+            print(f"Collection '{COLLECTION_NAME}' does not exist in {db_name}")
+            return None
 
         res = client.query(
-            collection_name=collection_name, filter=f"id in [{search_id}]"
+            collection_name=COLLECTION_NAME, filter=f"id in [{search_id}]"
         )
 
         if not res:
             print(f"No results found for ID: {search_id}")
-            return
+            return None
         return res
     finally:
-        client.close()
+        if client is not None:
+            client.close()
         gc.collect()
-        delete_db_lock_file(db_name)
+        _delete_db_lock_file(db_name)
+
 
+def remove_by_id(remove_id: int, db_name: str) -> dict[str, Any] | None:
+    """ID로 벡터 데이터를 삭제합니다.
 
-def remove_by_id(remove_id, db_name):
+    Args:
+        remove_id: 삭제할 파일 ID.
+        db_name: 데이터베이스 파일 경로.
+
+    Returns:
+        삭제 결과. 오류 시 None.
+
+    Raises:
+        Exception: 컬렉션이 존재하지 않는 경우.
+    """
+    client: MilvusClient | None = None
     try:
         client = MilvusClient(db_name)
-        collection_name = "demo_collection"
-        if not client.has_collection(collection_name):
+        if not client.has_collection(COLLECTION_NAME):
             raise Exception(
-                f"Collection '{collection_name}' does not exist in {db_name}"
+                f"Collection '{COLLECTION_NAME}' does not exist in {db_name}"
             )
 
         res = client.delete(
-            collection_name=collection_name, filter=f"id in [{remove_id}]"
+            collection_name=COLLECTION_NAME, filter=f"id in [{remove_id}]"
         )
 
         print(f"Deleted records with ID: {remove_id}")
         return res
     finally:
-        client.close()
+        if client is not None:
+            client.close()
         gc.collect()
-        delete_db_lock_file(db_name)
+        _delete_db_lock_file(db_name)
\ No newline at end of file

From db6c27400a52ed34a354e5bfffc883f8432753f0 Mon Sep 17 00:00:00 2001
From: sehwan505 <sehwan505@gmail.com>
Date: Mon, 24 Nov 2025 15:03:15 +0900
Subject: [PATCH 04/13] =?UTF-8?q?Refactor:=20=EB=A9=94=EC=9D=B8=20?=
 =?UTF-8?q?=EB=AA=A8=EB=93=88=20=EB=A6=AC=ED=8C=A9=ED=86=A0=EB=A7=81?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 mafm/observer.py | 350 +++++++++++++++++++++++++++--------------------
 mafm/shell.py    | 118 +++++++++-------
 2 files changed, 270 insertions(+), 198 deletions(-)

diff --git a/mafm/observer.py b/mafm/observer.py
index 4df6dcf..288be04 100644
--- a/mafm/observer.py
+++ b/mafm/observer.py
@@ -1,137 +1,194 @@
+"""파일 시스템 감시 모듈.
+
+Watchdog을 사용하여 파일 시스템 변경을 모니터링합니다.
+"""
+
+import argparse
+import os
 import time
+
+import pdfplumber
+from docx import Document
+from watchdog.events import FileSystemEvent, FileSystemEventHandler
 from watchdog.observers import Observer
-from watchdog.events import FileSystemEventHandler
-import os
-from rag.vectorDb import save
-from rag.sqlite import (
-    insert_file_info,
-    insert_directory_structure,
-    update_file_info,
-    get_id_by_path,
+
+from mafm.rag.embedding import initialize_model
+from mafm.rag.fileops import get_file_data
+from mafm.rag.sqlite import (
     change_directory_path,
     change_file_path,
     delete_directory_and_subdirectories,
+    get_id_by_path,
     initialize_database,
+    insert_directory_structure,
+    insert_file_info,
 )
-from rag.embedding import embedding, initialize_model
-from rag.fileops import get_file_data
-from rag.vectorDb import (
+from mafm.rag.vectorDb import (
+    delete_vector_db,
+    find_by_id,
     initialize_vector_db,
     insert_file_embedding,
-    find_by_id,
     remove_by_id,
-    delete_vector_db,
+    save,
 )
-from collections import defaultdict
-import pdfplumber
-from docx import Document
 
+DEFAULT_CHUNK_SIZE = 500
+
+
+def read_pdf(file_path: str) -> str:
+    """PDF 파일을 읽어서 텍스트로 변환합니다.
 
-def read_pdf(file_path):
-    """PDF 파일을 읽어서 텍스트로 변환하는 함수"""
+    Args:
+        file_path: PDF 파일 경로.
+
+    Returns:
+        추출된 텍스트 내용.
+    """
     text = ""
     with pdfplumber.open(file_path) as pdf:
         for page in pdf.pages:
-            text += page.extract_text() + "\n"
+            extracted = page.extract_text()
+            if extracted:
+                text += extracted + "\n"
     return text
 
-def read_word(file_path):
-    """Word 파일을 읽어서 텍스트로 변환하는 함수"""
+
+def read_word(file_path: str) -> str:
+    """Word 파일을 읽어서 텍스트로 변환합니다.
+
+    Args:
+        file_path: Word 파일 경로.
+
+    Returns:
+        추출된 텍스트 내용.
+    """
     text = ""
     doc = Document(file_path)
     for paragraph in doc.paragraphs:
         text += paragraph.text + "\n"
     return text
 
-def split_text_into_chunks(text, chunk_size=500):
-    """텍스트를 주어진 크기의 청크 배열로 분할하는 함수"""
-    return [text[i:i+chunk_size] for i in range(0, len(text), chunk_size)]
+
+def split_text_into_chunks(
+    text: str,
+    chunk_size: int = DEFAULT_CHUNK_SIZE,
+) -> list[str]:
+    """텍스트를 주어진 크기의 청크로 분할합니다.
+
+    Args:
+        text: 분할할 텍스트.
+        chunk_size: 각 청크의 최대 크기.
+
+    Returns:
+        텍스트 청크 리스트.
+    """
+    return [text[i : i + chunk_size] for i in range(0, len(text), chunk_size)]
+
 
 class FileEventHandler(FileSystemEventHandler):
-    """파일 시스템 이벤트 핸들러 클래스"""
+    """파일 시스템 이벤트 핸들러 클래스.
+
+    파일 생성, 삭제, 이동 이벤트를 처리합니다.
+    """
 
-    def __init__(self):
+    IGNORED_PATTERNS = ["db-journal", ".db"]
+
+    def __init__(self) -> None:
+        """FileEventHandler를 초기화합니다."""
         super().__init__()
 
-    def is_dot_file(self, path):
-        """숨김 파일인지 확인하는 함수"""
+    def _is_dot_file(self, path: str) -> bool:
+        """숨김 파일인지 확인합니다.
+
+        Args:
+            path: 확인할 파일 경로.
+
+        Returns:
+            숨김 파일이면 True.
+        """
         return os.path.basename(path).startswith(".")
 
-    def is_ignored_file(self, path):
-        """특정 패턴을 가진 파일을 무시하는 함수"""
-        ignored_patterns = ["db-journal", ".db"]
-        return any(pattern in path for pattern in ignored_patterns)
+    def _is_ignored_file(self, path: str) -> bool:
+        """무시할 파일인지 확인합니다.
+
+        Args:
+            path: 확인할 파일 경로.
+
+        Returns:
+            무시할 파일이면 True.
+        """
+        return any(pattern in path for pattern in self.IGNORED_PATTERNS)
+
+    def _should_ignore(self, path: str) -> bool:
+        """파일을 무시해야 하는지 확인합니다.
 
-    def on_deleted(self, event):
-        """파일 또는 디렉토리 삭제 이벤트 처리 함수"""
-        if self.is_dot_file(event.src_path) or self.is_ignored_file(event.src_path):
-            print("ignore deleted: " + event.src_path)
-            return  # 숨김 파일과 무시할 패턴을 가진 파일은 무시
+        Args:
+            path: 확인할 파일 경로.
+
+        Returns:
+            무시해야 하면 True.
+        """
+        return self._is_dot_file(path) or self._is_ignored_file(path)
+
+    def on_deleted(self, event: FileSystemEvent) -> None:
+        """파일 또는 디렉토리 삭제 이벤트를 처리합니다.
+
+        Args:
+            event: 파일 시스템 이벤트.
+        """
+        if self._should_ignore(event.src_path):
+            print(f"ignore deleted: {event.src_path}")
+            return
 
         print("--deleted--")
-        print("deleting: " + event.src_path)
+        print(f"deleting: {event.src_path}")
 
         if event.is_directory:
             dir_path = event.src_path
             dir_name = os.path.basename(dir_path)
 
-            db_name = dir_path + "/" + dir_name + ".db"
-            delete_vector_db(db_name)  # 디렉토리와 연결된 벡터 DB 삭제
-            delete_directory_and_subdirectories(dir_path)  # 디렉토리 정보 DB에서 삭제
+            db_name = f"{dir_path}/{dir_name}.db"
+            delete_vector_db(db_name)
+            delete_directory_and_subdirectories(dir_path)
             print(f"Deleted directory and associated VectorDB: {db_name}")
             return
 
         file_path = event.src_path
         dir_path = os.path.dirname(file_path)
 
-        db_name = dir_path + "/" + os.path.basename(dir_path) + ".db"
-        id = get_id_by_path(file_path, "filesystem.db")
-        remove_by_id(id, db_name)  # 벡터 DB에서 파일 데이터 삭제
+        db_name = f"{dir_path}/{os.path.basename(dir_path)}.db"
+        file_id = get_id_by_path(file_path, "filesystem.db")
+        remove_by_id(file_id, db_name)
         print(f"Deleted file: {event.src_path}")
 
-    # def on_modified(self, event):
-    #     """파일 수정 이벤트 처리 함수"""
-    #     if event.is_directory or self.is_dot_file(event.src_path) or self.is_ignored_file(event.src_path):
-    #         print("directory or dotfile modified")
-    #         return  # 디렉토리와 숨김 파일은 무시
-    #
-    #     file_src_path = event.src_path
-    #     dir_path = os.path.dirname(file_src_path)
-    #     db_name = dir_path + "/" + os.path.basename(dir_path) + ".db"
-    #
-    #
-    #     id = get_id_by_path(file_src_path, "filesystem.db")
-    #     remove_by_id(id, db_name)  # 기존 벡터 데이터 제거
-    #     save(db_name, id, get_file_data(file_src_path)[2:])  # 새로운 벡터 데이터 저장
-    #     insert_file_info(file_src_path, 0, "filesystem.db")  # 파일 정보 DB 업데이트
-    #     print(f"Modified file: {event.src_path}")
-
-    def on_moved(self, event):
-        """파일 또는 디렉토리 이동 이벤트 처리 함수"""
-        if (
-            self.is_dot_file(event.src_path)
-            or self.is_dot_file(event.dest_path)
-            or self.is_ignored_file(event.src_path)
-            or self.is_ignored_file(event.dest_path)
-        ):
-            return  # 숨김 파일은 무시
+    def on_moved(self, event: FileSystemEvent) -> None:
+        """파일 또는 디렉토리 이동 이벤트를 처리합니다.
+
+        Args:
+            event: 파일 시스템 이벤트.
+        """
+        if self._should_ignore(event.src_path) or self._should_ignore(event.dest_path):
+            return
 
         print("--moved--")
 
         if event.is_directory:
-            change_directory_path(
-                event.src_path, event.dest_path, "filesystem.db"
-            )  # 디렉토리 경로 변경
+            change_directory_path(event.src_path, event.dest_path, "filesystem.db")
             print(f"Moved directory: from {event.src_path} to {event.dest_path}")
         else:
             print(f"Moved file: from {event.src_path} to {event.dest_path}")
-            self.move_file(event.src_path, event.dest_path)
+            self._move_file(event.src_path, event.dest_path)
+
+    def on_created(self, event: FileSystemEvent) -> None:
+        """파일 생성 이벤트를 처리합니다.
 
-    def on_created(self, event):
+        Args:
+            event: 파일 시스템 이벤트.
+        """
         print("--created--", flush=True)
-        """파일 생성 이벤트 처리 함수"""
-        if self.is_dot_file(event.src_path) or self.is_ignored_file(event.src_path):
-            return  # 숨김 파일과 무시할 패턴을 가진 파일은 무시
+
+        if self._should_ignore(event.src_path):
+            return
 
         absolute_file_path = event.src_path
         dirpath = os.path.dirname(absolute_file_path)
@@ -140,107 +197,111 @@ def on_created(self, event):
         if event.is_directory:
             print("created directory")
             try:
-                initialize_vector_db(dirpath + "/" + dirname + ".db")  # 벡터 DB 초기화
-                id = insert_file_info(absolute_file_path, 1, "filesystem.db")
+                initialize_vector_db(f"{dirpath}/{dirname}.db")
+                file_id = insert_file_info(absolute_file_path, 1, "filesystem.db")
                 insert_directory_structure(
-                    id, dirpath, os.path.dirname(dirpath), "filesystem.db"
+                    file_id, dirpath, os.path.dirname(dirpath), "filesystem.db"
                 )
             except Exception as e:
                 print(f"Error initializing vector DB for directory: {e}")
         else:
             print("created file")
-            insert_file_info(
-                absolute_file_path, 0, "filesystem.db"
-            )  # 파일 정보 DB에 추가
-
-            # 파일 형식에 따라 데이터를 읽고 500바이트 크기의 배열로 분할
-            if absolute_file_path.endswith(".pdf"):
-                text_content = read_pdf(absolute_file_path)
-                text_chunks = split_text_into_chunks(text_content)
-            elif absolute_file_path.endswith(".docx"):
-                text_content = read_word(absolute_file_path)
-                text_chunks = split_text_into_chunks(text_content)
-            else:
-                # 일반 텍스트 파일일 경우
-                file_chunks = get_file_data(absolute_file_path)
-                text_chunks = file_chunks[2:]  # 필요한 데이터 조정
+            insert_file_info(absolute_file_path, 0, "filesystem.db")
 
+            text_chunks = self._extract_file_content(absolute_file_path)
 
-            # 벡터 DB에 저장
             save(
-                dirpath + "/" + dirname + ".db",
+                f"{dirpath}/{dirname}.db",
                 get_id_by_path(absolute_file_path, "filesystem.db"),
                 text_chunks,
             )
             print(f"Created file: {event.src_path}")
 
-    def move_file(self, file_src_path, file_dest_path):
-        """파일 이동 시 벡터 DB 업데이트 함수"""
+    def _extract_file_content(self, file_path: str) -> list[str]:
+        """파일 내용을 추출합니다.
+
+        Args:
+            file_path: 파일 경로.
+
+        Returns:
+            텍스트 청크 리스트.
+        """
+        if file_path.endswith(".pdf"):
+            text_content = read_pdf(file_path)
+            return split_text_into_chunks(text_content)
+        elif file_path.endswith(".docx"):
+            text_content = read_word(file_path)
+            return split_text_into_chunks(text_content)
+        else:
+            file_chunks = get_file_data(file_path)
+            return file_chunks[2:]
+
+    def _move_file(self, file_src_path: str, file_dest_path: str) -> None:
+        """파일 이동 시 벡터 DB를 업데이트합니다.
+
+        Args:
+            file_src_path: 원본 파일 경로.
+            file_dest_path: 대상 파일 경로.
+        """
         dir_path = os.path.dirname(file_src_path)
-        db_name = dir_path + "/" + os.path.basename(dir_path) + ".db"
-        id = get_id_by_path(file_src_path, "filesystem.db")
-        file_data = find_by_id(id, db_name)
-        insert_file_embedding(file_data, db_name)  # 파일 임베딩 데이터 추가
-        remove_by_id(id, db_name)  # 기존 ID 데이터 제거
-        change_file_path(file_src_path, file_dest_path, db_name)  # 파일 경로 업데이트
+        db_name = f"{dir_path}/{os.path.basename(dir_path)}.db"
+        file_id = get_id_by_path(file_src_path, "filesystem.db")
+        file_data = find_by_id(file_id, db_name)
+        if file_data:
+            insert_file_embedding(file_data, db_name)
+        remove_by_id(file_id, db_name)
+        change_file_path(file_src_path, file_dest_path, db_name)
 
 
-# SQLite DB에 파일 및 디렉토리 데이터 삽입
-def start_command_c(root):
-    # 시작 시간 기록
+def start_command_c(root: str) -> None:
+    """SQLite DB에 파일 및 디렉토리 데이터를 삽입합니다.
+
+    Args:
+        root: 루트 디렉토리 경로.
+    """
     start_time = time.time()
 
-    # SQLite DB 연결 및 초기화
     try:
         initialize_database("filesystem.db")
     except Exception as e:
         print(f"Error initializing database: {e}")
         return
 
-    # root 디렉토리의 벡터 DB 초기화
     try:
-        initialize_vector_db(root + "/" + os.path.basename(root) + ".db")
+        initialize_vector_db(f"{root}/{os.path.basename(root)}.db")
     except Exception as e:
         print(f"Error initializing vector DB for root: {e}")
         return
 
-    id = insert_file_info(root, 1, "filesystem.db")
+    file_id = insert_file_info(root, 1, "filesystem.db")
 
-    # 루트의 부모 디렉토리 찾기
     last_slash_index = root.rfind("/")
-    if last_slash_index != -1:
-        root_parent = root[:last_slash_index]
+    root_parent = root[:last_slash_index] if last_slash_index != -1 else ""
 
-    insert_directory_structure(id, root, root_parent, "filesystem.db")
+    insert_directory_structure(file_id, root, root_parent, "filesystem.db")
 
-    # 디렉터리 재귀 탐색
     for dirpath, dirnames, filenames in os.walk(root):
-        # 디렉토리 정보 삽입
         for dirname in dirnames:
             full_path = os.path.join(dirpath, dirname)
             print(f"디렉토리 경로: {full_path}")
             try:
-                initialize_vector_db(full_path + "/" + dirname + ".db")
+                initialize_vector_db(f"{full_path}/{dirname}.db")
             except Exception as e:
                 print(f"Error initializing vector DB for directory: {e}")
                 continue
 
-            id = insert_file_info(full_path, 1, "filesystem.db")
-            insert_directory_structure(id, full_path, dirpath, "filesystem.db")
+            dir_id = insert_file_info(full_path, 1, "filesystem.db")
+            insert_directory_structure(dir_id, full_path, dirpath, "filesystem.db")
 
-        # 파일 정보 삽입 및 벡터 DB에 저장
         for filename in filenames:
-            # 비밀 파일과 .db 파일 제외
             if filename.startswith(".") or filename.endswith(".db"):
                 continue
 
             full_path = os.path.join(dirpath, filename)
             print(f"Embedding 하는 파일의 절대 경로: {full_path}")
 
-            # 파일 정보 삽입
-            id = insert_file_info(full_path, 0, "filesystem.db")
+            file_id = insert_file_info(full_path, 0, "filesystem.db")
 
-            # PDF 및 Word 파일 처리
             if filename.endswith(".pdf"):
                 text_content = read_pdf(full_path)
                 text_chunks = split_text_into_chunks(text_content)
@@ -248,60 +309,49 @@ def start_command_c(root):
                 text_content = read_word(full_path)
                 text_chunks = split_text_into_chunks(text_content)
             else:
-                # 일반 텍스트 파일 처리
                 file_chunks = get_file_data(full_path)
-                text_chunks = file_chunks[2:]  # 필요한 데이터 조정
+                text_chunks = file_chunks[2:]
 
-            # 각 디렉토리의 벡터 DB에 해당 파일 내용을 저장
             dirname = dirpath.split("/")[-1]
-            save(dirpath + "/" + dirname + ".db", id, text_chunks)
-
-    # 종료 시간 기록
-    end_time = time.time()
+            save(f"{dirpath}/{dirname}.db", file_id, text_chunks)
 
-    # 걸린 시간 계산
-    elapsed_time = end_time - start_time
+    elapsed_time = time.time() - start_time
     print(f"작업에 걸린 시간: {elapsed_time:.4f} 초")
 
 
-def start_watchdog(root_dir):
-    """파일 시스템 감시 시작 함수"""
-    initialize_model()  # 임베딩 모델 초기화
+def start_watchdog(root_dir: str) -> None:
+    """파일 시스템 감시를 시작합니다.
+
+    Args:
+        root_dir: 감시할 루트 디렉토리 경로.
+    """
+    initialize_model()
     try:
-        # 해당 root 아래에 존재하는 모든 파일들을 탐색해서 sqlite db에 저장해야함.
-        # start_command_python(root_dir)
         start_command_c(root_dir)
-        # get_file_data(root)
     except IndexError:
         print("start: missing argument")
     except FileNotFoundError:
         print(f"start: no such file or directory: {root_dir}")
 
-    # 파일 이벤트 핸들러와 감시자 생성
     event_handler = FileEventHandler()
     observer = Observer()
     observer.schedule(event_handler, path=root_dir, recursive=True)
 
-    # 파일 시스템 모니터링 시작
     observer.start()
     try:
         while True:
-            time.sleep(1)  # 감시 유지
+            time.sleep(1)
     except KeyboardInterrupt:
         observer.stop()
     observer.join()
 
 
-import argparse
-
 if __name__ == "__main__":
-    # 명령줄 인자 파싱
     parser = argparse.ArgumentParser(description="MAFM watchdog")
     parser.add_argument("-r", "--root", help="Root directory path")
     args = parser.parse_args()
 
-    # 루트 디렉토리 경로가 제공되지 않으면 경고 메시지 출력
     if not args.root:
         print("Root directory path is required.")
     else:
-        start_watchdog(args.root)  # 감시 시작
+        start_watchdog(args.root)
\ No newline at end of file
diff --git a/mafm/shell.py b/mafm/shell.py
index 1da3806..995c721 100644
--- a/mafm/shell.py
+++ b/mafm/shell.py
@@ -1,31 +1,62 @@
+"""MAFM 셸 모듈.
+
+대화형 파일 관리 셸 인터페이스를 제공합니다.
+"""
+
+import argparse
 import os
 import subprocess
 import tempfile
-import time
-from rag.fileops import make_soft_links, get_file_data
-from rag.sqlite import (
-    initialize_database,
-    insert_file_info,
-    insert_directory_structure,
-)
-from rag.vectorDb import (
-    initialize_vector_db,
-    save,
-)
-from rag.embedding import initialize_model
-from agent.graph import graph
+from tempfile import TemporaryDirectory
+from typing import TYPE_CHECKING
+
+from mafm.agent.graph import graph
+from mafm.rag.embedding import initialize_model
+from mafm.rag.fileops import make_soft_links
+
+if TYPE_CHECKING:
+    from subprocess import CompletedProcess
+
+
+class ShellContext:
+    """셸 컨텍스트를 관리하는 클래스.
 
+    Attributes:
+        link_dir: 현재 활성화된 임시 링크 디렉토리.
+        root_dir: 루트 디렉토리 경로.
+    """
 
-link_dir = None
+    def __init__(self, root_dir: str) -> None:
+        """ShellContext를 초기화합니다.
 
+        Args:
+            root_dir: 루트 디렉토리 경로.
+        """
+        self.link_dir: TemporaryDirectory[str] | None = None
+        self.root_dir = root_dir
 
-def execute_command(command, root_dir):
-    global link_dir
+    def cleanup_link_dir(self) -> None:
+        """링크 디렉토리를 정리합니다."""
+        if self.link_dir is not None:
+            self.link_dir.cleanup()
+            self.link_dir = None
 
-    # temp_dir_path 지정
+
+def execute_command(
+    command: str,
+    context: ShellContext,
+) -> "CompletedProcess[bytes] | None":
+    """명령어를 실행합니다.
+
+    Args:
+        command: 실행할 명령어 문자열.
+        context: 셸 컨텍스트.
+
+    Returns:
+        명령어 실행 결과. 실패 시 None.
+    """
     temp_dir_path = os.path.join(os.getcwd(), "temp")
 
-    # temp 디렉토리가 없으면 생성
     if not os.path.exists(temp_dir_path):
         os.makedirs(temp_dir_path)
 
@@ -34,22 +65,19 @@ def execute_command(command, root_dir):
         if cmd_parts[0] == "mlink":
             if len(cmd_parts) < 2:
                 print("mlink: missing arguments. Usage: mlink <query>")
-                return
+                return None
 
             prompt = " ".join(cmd_parts[1:])
-            paths = graph(root_dir, prompt)
+            paths = graph(context.root_dir, prompt)
 
-            # 임시 디렉토리 생성
             temp_dir = tempfile.TemporaryDirectory(dir=temp_dir_path)
 
-            # 소프트 링크 생성
             result = make_soft_links(paths, temp_dir)
             print(f"Soft links created: {result}")
 
-            # 디렉토리 변경 및 링크 디렉토리 갱신
             os.chdir(temp_dir.name)
-            link_dir = temp_dir
-            return
+            context.link_dir = temp_dir
+            return None
 
         elif cmd_parts[0] == "cd":
             try:
@@ -61,39 +89,35 @@ def execute_command(command, root_dir):
                 print("cd: missing argument")
             except FileNotFoundError:
                 print(f"cd: no such file or directory: {cmd_parts[1]}")
+            return None
 
         else:
-            result = subprocess.run(cmd_parts, check=True)
-            return result
+            return subprocess.run(cmd_parts, check=True)
+
     except subprocess.CalledProcessError as e:
         print(f"Command failed: {e}")
+        return None
     except FileNotFoundError:
         print(f"Command not found: {command}")
+        return None
 
 
-def shell(root_dir: str):
-    global link_dir
+def shell(root_dir: str) -> None:
+    """대화형 셸을 시작합니다.
 
-    initialize_model()  # embedding 모델 초기화
+    Args:
+        root_dir: 루트 디렉토리 경로.
+    """
+    initialize_model()
     print("model")
 
-    # # root 위치에서부터 MAFM을 활성화
-    # # /Users 아래에 존재하는 모든 디렉토리들을 관리할 수 있으면 좋겠지만, 일단 프로토타입이기 때문에 depth를 최소화
-    # try:
-    #     # 해당 root 아래에 존재하는 모든 파일들을 탐색해서 sqlite db에 저장해야함.
-    #     # start_command_python(root_dir)
-    #     start_command_c(root_dir)
-    #     # get_file_data(root)
-    # except IndexError:
-    #     print("start: missing argument")
-    # except FileNotFoundError:
-    #     print(f"start: no such file or directory: {root_dir}")
+    context = ShellContext(root_dir)
 
     while True:
         cwd = os.getcwd()
-        if link_dir != None and not link_dir.name in cwd:
-            link_dir.cleanup()
-            link_dir = None
+        if context.link_dir is not None and context.link_dir.name not in cwd:
+            context.cleanup_link_dir()
+
         command = input(f"{cwd} $ ")
         command = command.encode("utf-8").decode("utf-8")
 
@@ -103,10 +127,8 @@ def shell(root_dir: str):
         elif command.strip() == "":
             continue
         else:
-            execute_command(command, root_dir)
-
+            execute_command(command, context)
 
-import argparse
 
 if __name__ == "__main__":
     parser = argparse.ArgumentParser(description="MAFM shell")
@@ -116,4 +138,4 @@ def shell(root_dir: str):
     if not args.root:
         print("Root directory path is required.")
     else:
-        shell(args.root)
+        shell(args.root)
\ No newline at end of file

From a1fd452c46d4bddefb223ba48664bf602423c29e Mon Sep 17 00:00:00 2001
From: sehwan505 <sehwan505@gmail.com>
Date: Mon, 24 Nov 2025 15:52:48 +0900
Subject: [PATCH 05/13] =?UTF-8?q?Refactor:=20=5F=5Finit=5F=5F.py=20?=
 =?UTF-8?q?=EB=B0=8F=20=EB=A0=88=EA=B1=B0=EC=8B=9C=20=EC=BD=94=EB=93=9C=20?=
 =?UTF-8?q?=EC=A0=95=EB=A6=AC?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 mafm/__init__.py           | Bin 0 -> 83 bytes
 mafm/agent/__init__.py     |   4 ++++
 mafm/agent/agents/tools.py |  25 +++++++------------------
 mafm/rag/__init__.py       | Bin 0 -> 86 bytes
 4 files changed, 11 insertions(+), 18 deletions(-)

diff --git a/mafm/__init__.py b/mafm/__init__.py
index e69de29bb2d1d6434b8b29ae775ad8c2e48c5391..a3855940b5e4d2a13069230528043291e8152491 100644
GIT binary patch
literal 83
zcmY#VQu1|l^HtFBEzK#()OAcx%_~uG%gjkt@J-B1OiwM+RM1#*K#z;dCowPGy(qCD
nL%~2o!Qq3#g%Xw>3OqFm3~LnbEQxVZkolT<;)x!Yl9CbtOH&(d

literal 0
HcmV?d00001

diff --git a/mafm/agent/__init__.py b/mafm/agent/__init__.py
index e69de29..58a9cd8 100644
--- a/mafm/agent/__init__.py
+++ b/mafm/agent/__init__.py
@@ -0,0 +1,4 @@
+"""�t� (��.
+
+LangGraph 0 @� �t� �l\�| �i��.
+"""
\ No newline at end of file
diff --git a/mafm/agent/agents/tools.py b/mafm/agent/agents/tools.py
index 3868acf..cd987f2 100644
--- a/mafm/agent/agents/tools.py
+++ b/mafm/agent/agents/tools.py
@@ -1,20 +1,9 @@
-# # Legacy
-# from typing import Annotated
-# from pydantic import BaseModel, Field
-# from langchain_core.tools import tool
-# from rag.vectorDb import search
+"""레거시 도구 모듈.
 
+이 모듈은 더 이상 사용되지 않습니다. 향후 삭제 예정입니다.
 
-# @tool("get_file_list")
-# def get_file_list(
-#     query: Annotated[str, "query"], directory_name: Annotated[str, "directory name"]
-# ) -> Annotated[list, "file_list"]:
-#     """
-#     get file list from user input
-#     """
-#     # return search(member + ".db", query)
-#     return [
-#         f"file1_{directory_name}.txt",
-#         f"file2_{directory_name}.txt",
-#         f"file3_{directory_name}.txt",
-#     ]
+Deprecated:
+    이 모듈의 기능은 member.py로 이전되었습니다.
+"""
+
+# Legacy code - 향후 삭제 예정
\ No newline at end of file
diff --git a/mafm/rag/__init__.py b/mafm/rag/__init__.py
index e69de29bb2d1d6434b8b29ae775ad8c2e48c5391..bbce5fcc62250192c3575a248cbc4051fa95ecd1 100644
GIT binary patch
literal 86
zcmY#VQVMc(SI`JbEh)-OElbSNbu3NKP0cGwO;K=9%}XsxEXmBz(^Sw{azKxZYoUQc
qL5abFk|jC{EekYt6oLbNGD}hw7}h8lEOk+k`I>p+i5{1dk`e&kt{naV

literal 0
HcmV?d00001


From 2e526e825b3a18dca1e63301241d726c2d5907fd Mon Sep 17 00:00:00 2001
From: sehwan505 <sehwan505@gmail.com>
Date: Mon, 24 Nov 2025 15:54:08 +0900
Subject: [PATCH 06/13] =?UTF-8?q?Refactor:=20=ED=85=8C=EC=8A=A4=ED=8A=B8?=
 =?UTF-8?q?=20=ED=8C=8C=EC=9D=BC=20=EB=A6=AC=ED=8C=A9=ED=86=A0=EB=A7=81?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 tests/__init__.py       |   1 +
 tests/embedding_test.py |  23 +++++---
 tests/milvus_test.py    | 114 +++++++++++++++++++++++++---------------
 3 files changed, 91 insertions(+), 47 deletions(-)

diff --git a/tests/__init__.py b/tests/__init__.py
index e69de29..755fac1 100644
--- a/tests/__init__.py
+++ b/tests/__init__.py
@@ -0,0 +1 @@
+"""MAFM L�� (��."""
\ No newline at end of file
diff --git a/tests/embedding_test.py b/tests/embedding_test.py
index 3486f0f..5eab180 100644
--- a/tests/embedding_test.py
+++ b/tests/embedding_test.py
@@ -1,10 +1,17 @@
+"""임베딩 모듈 테스트."""
+
 import pytest
+
 from mafm.rag.embedding import embedding
 
 
 @pytest.fixture
-def test_sentences():
-    # Define a set of test sentences
+def test_sentences() -> list[str]:
+    """테스트용 문장 리스트를 반환합니다.
+
+    Returns:
+        테스트 문장 리스트.
+    """
     return [
         "This is the first test sentence.",
         "Here is another example sentence.",
@@ -12,8 +19,12 @@ def test_sentences():
     ]
 
 
-def test_embedding_output_shape(test_sentences):
-    """Test that the embeddings have the correct shape."""
+def test_embedding_output_shape(test_sentences: list[str]) -> None:
+    """임베딩 출력 형태가 올바른지 테스트합니다.
+
+    Args:
+        test_sentences: 테스트 문장 리스트.
+    """
     embeddings = embedding(test_sentences)
-    # Assert that the number of embeddings matches the number of sentences
-    assert len(embeddings) == len(test_sentences)
+    assert embeddings is not None
+    assert len(embeddings) == len(test_sentences)
\ No newline at end of file
diff --git a/tests/milvus_test.py b/tests/milvus_test.py
index 5055693..6848aae 100644
--- a/tests/milvus_test.py
+++ b/tests/milvus_test.py
@@ -1,38 +1,52 @@
+"""Milvus 벡터 데이터베이스 테스트."""
+
+from typing import Any
+
 import pytest
-from pymilvus import connections, utility, Collection
+from pymilvus import Collection, connections, utility
+
+from mafm.rag.embedding import embedding
 from mafm.rag.vectorDb import (
-    initialize_vector_db,
     delete_vector_db,
-    save,
-    insert_file_embedding,
-    search,
     find_by_id,
+    initialize_vector_db,
+    insert_file_embedding,
     remove_by_id,
+    save,
+    search,
 )
-from mafm.rag.embedding import embedding  # Ensure this is implemented
-from mafm.rag.sqlite import get_path_by_id  # Ensure this is implemented
 
-# Constants for testing
 DB_NAME = "test_db"
 TEST_ID = 123
 TEST_QUERIES = ["test query one", "test query two"]
-TEST_FILE_DATA = [
-    # Assuming file_data is in the format expected by insert_file_embedding
-    [TEST_ID, embedding(["file data sample"])[0], "file data sample"]
-]
+
+
+def _get_test_file_data() -> list[list[Any]]:
+    """테스트용 파일 데이터를 생성합니다.
+
+    Returns:
+        테스트 파일 데이터.
+    """
+    embedding_result = embedding(["file data sample"])
+    if embedding_result is None:
+        return []
+    return [[TEST_ID, embedding_result[0], "file data sample"]]
 
 
 @pytest.fixture(scope="module")
-def setup_milvus():
-    # Setup: Initialize the vector database
+def setup_milvus() -> Any:
+    """Milvus 테스트 환경을 설정합니다.
+
+    Yields:
+        None.
+    """
     initialize_vector_db(DB_NAME)
     yield
-    # Teardown: Delete the vector database
     delete_vector_db(DB_NAME)
 
 
-def test_initialize_vector_db():
-    # Test initialization
+def test_initialize_vector_db() -> None:
+    """벡터 데이터베이스 초기화를 테스트합니다."""
     initialize_vector_db(DB_NAME)
     connections.connect(alias="default", host="localhost", port="19530")
     collection_name = f"{DB_NAME}_demo_collection"
@@ -40,82 +54,100 @@ def test_initialize_vector_db():
     connections.disconnect(alias="default")
 
 
-def test_save(setup_milvus):
-    # Test saving data
+def test_save(setup_milvus: Any) -> None:
+    """데이터 저장을 테스트합니다.
+
+    Args:
+        setup_milvus: Milvus 설정 픽스처.
+    """
     save(DB_NAME, TEST_ID, TEST_QUERIES)
     connections.connect(alias="default", host="localhost", port="19530")
     collection_name = f"{DB_NAME}_demo_collection"
     collection = Collection(name=collection_name)
     collection.load()
-    # Query to check if data was inserted
     res = collection.query(expr=f"id in [{TEST_ID}]", output_fields=["id", "word"])
     assert len(res) > 0
     collection.release()
     connections.disconnect(alias="default")
 
 
-def test_insert_file_embedding(setup_milvus):
-    # Test inserting file embeddings
-    insert_file_embedding(TEST_FILE_DATA, DB_NAME)
+def test_insert_file_embedding(setup_milvus: Any) -> None:
+    """파일 임베딩 삽입을 테스트합니다.
+
+    Args:
+        setup_milvus: Milvus 설정 픽스처.
+    """
+    test_file_data = _get_test_file_data()
+    insert_file_embedding(test_file_data, DB_NAME)
     connections.connect(alias="default", host="localhost", port="19530")
     collection_name = f"{DB_NAME}_demo_collection"
     collection = Collection(name=collection_name)
     collection.load()
-    # Query to check if data was inserted
     res = collection.query(expr=f"id in [{TEST_ID}]", output_fields=["id", "word"])
     assert len(res) > 0
     collection.release()
     connections.disconnect(alias="default")
 
 
-def test_search(setup_milvus, mocker):
-    # Mock get_path_by_id to return a predictable value
-    mocker.patch("your_module.get_path_by_id", return_value="path/to/file")
+def test_search(setup_milvus: Any, mocker: Any) -> None:
+    """검색 기능을 테스트합니다.
+
+    Args:
+        setup_milvus: Milvus 설정 픽스처.
+        mocker: pytest-mock mocker.
+    """
+    mocker.patch(
+        "mafm.rag.vectorDb.get_path_by_id",
+        return_value="path/to/file",
+    )
 
-    # Ensure data is saved before searching
     save(DB_NAME, TEST_ID, TEST_QUERIES)
 
-    # Test searching
     results = search(DB_NAME, ["test query one"])
     assert results is not None
-    assert len(results) == 2  # As limit=2 in search
+    assert len(results) == 2
     assert all(result == "path/to/file" for result in results)
 
 
-def test_find_by_id(setup_milvus):
-    # Ensure data is saved before querying
+def test_find_by_id(setup_milvus: Any) -> None:
+    """ID로 검색 기능을 테스트합니다.
+
+    Args:
+        setup_milvus: Milvus 설정 픽스처.
+    """
     save(DB_NAME, TEST_ID, TEST_QUERIES)
 
-    # Test finding by ID
     res = find_by_id(TEST_ID, DB_NAME)
     assert res is not None
     assert len(res) > 0
     assert res[0]["id"] == TEST_ID
 
 
-def test_remove_by_id(setup_milvus):
-    # Ensure data is saved before deleting
+def test_remove_by_id(setup_milvus: Any) -> None:
+    """ID로 삭제 기능을 테스트합니다.
+
+    Args:
+        setup_milvus: Milvus 설정 픽스처.
+    """
     save(DB_NAME, TEST_ID, TEST_QUERIES)
 
-    # Test removing by ID
     remove_by_id(TEST_ID, DB_NAME)
 
-    # Check if the data is actually deleted
     connections.connect(alias="default", host="localhost", port="19530")
     collection_name = f"{DB_NAME}_demo_collection"
     collection = Collection(name=collection_name)
     collection.load()
     res = collection.query(expr=f"id in [{TEST_ID}]", output_fields=["id", "word"])
-    assert len(res) == 0  # No records should be found
+    assert len(res) == 0
     collection.release()
     connections.disconnect(alias="default")
 
 
-def test_delete_vector_db():
-    # Test deleting the vector database
+def test_delete_vector_db() -> None:
+    """벡터 데이터베이스 삭제를 테스트합니다."""
     initialize_vector_db(DB_NAME)
     delete_vector_db(DB_NAME)
     connections.connect(alias="default", host="localhost", port="19530")
     collection_name = f"{DB_NAME}_demo_collection"
     assert not utility.has_collection(collection_name)
-    connections.disconnect(alias="default")
+    connections.disconnect(alias="default")
\ No newline at end of file

From aac95bc75b2b16decc46f827abd4452229af8342 Mon Sep 17 00:00:00 2001
From: sehwan505 <sehwan505@gmail.com>
Date: Mon, 24 Nov 2025 15:54:58 +0900
Subject: [PATCH 07/13] =?UTF-8?q?Chore:=20nohup.out=20=EC=82=AD=EC=A0=9C?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 mafm/nohup.out | 71 --------------------------------------------------
 1 file changed, 71 deletions(-)
 delete mode 100644 mafm/nohup.out

diff --git a/mafm/nohup.out b/mafm/nohup.out
deleted file mode 100644
index 338ca5a..0000000
--- a/mafm/nohup.out
+++ /dev/null
@@ -1,71 +0,0 @@
-모델이 성공적으로 초기화되었습니다.
-Connected to /Users/Ruffles/Projects/MAFM/MAFM/mafm/MAFM_test/MAFM_test.db
-디렉토리 경로: /Users/Ruffles/Projects/MAFM/MAFM/mafm/MAFM_test/temp
-Connected to /Users/Ruffles/Projects/MAFM/MAFM/mafm/MAFM_test/temp/temp.db
-디렉토리 경로: /Users/Ruffles/Projects/MAFM/MAFM/mafm/MAFM_test/download
-Connected to /Users/Ruffles/Projects/MAFM/MAFM/mafm/MAFM_test/download/download.db
-디렉토리 경로: /Users/Ruffles/Projects/MAFM/MAFM/mafm/MAFM_test/report
-Connected to /Users/Ruffles/Projects/MAFM/MAFM/mafm/MAFM_test/report/report.db
-디렉토리 경로: /Users/Ruffles/Projects/MAFM/MAFM/mafm/MAFM_test/coding
-Connected to /Users/Ruffles/Projects/MAFM/MAFM/mafm/MAFM_test/coding/coding.db
-Embedding 하는 파일의 절대 경로: /Users/Ruffles/Projects/MAFM/MAFM/mafm/MAFM_test/download/document_download.txt
-{'insert_count': 4, 'ids': [6, 6, 6, 6]}
-No lock file found for /Users/Ruffles/Projects/MAFM/MAFM/mafm/MAFM_test/download/.download.db.lock
-Embedding 하는 파일의 절대 경로: /Users/Ruffles/Projects/MAFM/MAFM/mafm/MAFM_test/download/software_download2.txt
-{'insert_count': 4, 'ids': [7, 7, 7, 7]}
-No lock file found for /Users/Ruffles/Projects/MAFM/MAFM/mafm/MAFM_test/download/.download.db.lock
-Embedding 하는 파일의 절대 경로: /Users/Ruffles/Projects/MAFM/MAFM/mafm/MAFM_test/download/software_download.txt
-{'insert_count': 4, 'ids': [8, 8, 8, 8]}
-No lock file found for /Users/Ruffles/Projects/MAFM/MAFM/mafm/MAFM_test/download/.download.db.lock
-Embedding 하는 파일의 절대 경로: /Users/Ruffles/Projects/MAFM/MAFM/mafm/MAFM_test/download/media_download.txt
-{'insert_count': 4, 'ids': [9, 9, 9, 9]}
-No lock file found for /Users/Ruffles/Projects/MAFM/MAFM/mafm/MAFM_test/download/.download.db.lock
-Embedding 하는 파일의 절대 경로: /Users/Ruffles/Projects/MAFM/MAFM/mafm/MAFM_test/report/scientific_report.txt
-{'insert_count': 4, 'ids': [10, 10, 10, 10]}
-No lock file found for /Users/Ruffles/Projects/MAFM/MAFM/mafm/MAFM_test/report/.report.db.lock
-Embedding 하는 파일의 절대 경로: /Users/Ruffles/Projects/MAFM/MAFM/mafm/MAFM_test/report/financial_report.txt
-{'insert_count': 4, 'ids': [11, 11, 11, 11]}
-No lock file found for /Users/Ruffles/Projects/MAFM/MAFM/mafm/MAFM_test/report/.report.db.lock
-Embedding 하는 파일의 절대 경로: /Users/Ruffles/Projects/MAFM/MAFM/mafm/MAFM_test/report/협력 자율 주행을 위한 V2X 통신기술.pdf
-{'insert_count': 23, 'ids': [12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12]}
-No lock file found for /Users/Ruffles/Projects/MAFM/MAFM/mafm/MAFM_test/report/.report.db.lock
-Embedding 하는 파일의 절대 경로: /Users/Ruffles/Projects/MAFM/MAFM/mafm/MAFM_test/report/test.txt
-{'insert_count': 2, 'ids': [13, 13]}
-No lock file found for /Users/Ruffles/Projects/MAFM/MAFM/mafm/MAFM_test/report/.report.db.lock
-Embedding 하는 파일의 절대 경로: /Users/Ruffles/Projects/MAFM/MAFM/mafm/MAFM_test/report/marketing_report.txt
-{'insert_count': 3, 'ids': [14, 14, 14]}
-No lock file found for /Users/Ruffles/Projects/MAFM/MAFM/mafm/MAFM_test/report/.report.db.lock
-디렉토리 경로: /Users/Ruffles/Projects/MAFM/MAFM/mafm/MAFM_test/coding/temp
-Connected to /Users/Ruffles/Projects/MAFM/MAFM/mafm/MAFM_test/coding/temp/temp.db
-디렉토리 경로: /Users/Ruffles/Projects/MAFM/MAFM/mafm/MAFM_test/coding/test_code
-Connected to /Users/Ruffles/Projects/MAFM/MAFM/mafm/MAFM_test/coding/test_code/test_code.db
-Embedding 하는 파일의 절대 경로: /Users/Ruffles/Projects/MAFM/MAFM/mafm/MAFM_test/coding/javascript_coding.txt
-{'insert_count': 4, 'ids': [17, 17, 17, 17]}
-No lock file found for /Users/Ruffles/Projects/MAFM/MAFM/mafm/MAFM_test/coding/.coding.db.lock
-Embedding 하는 파일의 절대 경로: /Users/Ruffles/Projects/MAFM/MAFM/mafm/MAFM_test/coding/python_coding.txt
-{'insert_count': 4, 'ids': [18, 18, 18, 18]}
-No lock file found for /Users/Ruffles/Projects/MAFM/MAFM/mafm/MAFM_test/coding/.coding.db.lock
-Embedding 하는 파일의 절대 경로: /Users/Ruffles/Projects/MAFM/MAFM/mafm/MAFM_test/coding/python_coding 복사본.txt
-{'insert_count': 4, 'ids': [19, 19, 19, 19]}
-No lock file found for /Users/Ruffles/Projects/MAFM/MAFM/mafm/MAFM_test/coding/.coding.db.lock
-Embedding 하는 파일의 절대 경로: /Users/Ruffles/Projects/MAFM/MAFM/mafm/MAFM_test/coding/algorithm_coding.txt
-{'insert_count': 5, 'ids': [20, 20, 20, 20, 20]}
-No lock file found for /Users/Ruffles/Projects/MAFM/MAFM/mafm/MAFM_test/coding/.coding.db.lock
-디렉토리 경로: /Users/Ruffles/Projects/MAFM/MAFM/mafm/MAFM_test/coding/test_code/temp
-Connected to /Users/Ruffles/Projects/MAFM/MAFM/mafm/MAFM_test/coding/test_code/temp/temp.db
-Embedding 하는 파일의 절대 경로: /Users/Ruffles/Projects/MAFM/MAFM/mafm/MAFM_test/coding/test_code/abc.txt
-{'insert_count': 1, 'ids': [22]}
-No lock file found for /Users/Ruffles/Projects/MAFM/MAFM/mafm/MAFM_test/coding/test_code/.test_code.db.lock
-작업에 걸린 시간: 24.6404 초
---created--
-ignore deleted: /Users/Ruffles/Projects/MAFM/MAFM/mafm/MAFM_test/report/.report.db.lock
---created--
-ignore deleted: /Users/Ruffles/Projects/MAFM/MAFM/mafm/MAFM_test/.MAFM_test.db.lock
---deleted--
-deleting: /Users/Ruffles/Projects/MAFM/MAFM/mafm/MAFM_test/report/test.txt
-[(13,)]
-Deleted records with ID: 13
-No lock file found for /Users/Ruffles/Projects/MAFM/MAFM/mafm/MAFM_test/report/.report.db.lock
-Deleted file: /Users/Ruffles/Projects/MAFM/MAFM/mafm/MAFM_test/report/test.txt
---created--
-ignore deleted: /Users/Ruffles/Projects/MAFM/MAFM/mafm/MAFM_test/report/report.db-journal

From 63a56a4f932dcff6825f3c3c221e9ce4a943cccd Mon Sep 17 00:00:00 2001
From: sehwan505 <sehwan505@gmail.com>
Date: Mon, 24 Nov 2025 16:09:24 +0900
Subject: [PATCH 08/13] =?UTF-8?q?Refactor:=20C=20=EB=9D=BC=EC=9D=B4?=
 =?UTF-8?q?=EB=B8=8C=EB=9F=AC=EB=A6=AC=20=EC=A0=9C=EA=B1=B0=20=EB=B0=8F=20?=
 =?UTF-8?q?=ED=8C=8C=EC=9D=B4=EC=8D=AC=EC=9C=BC=EB=A1=9C=20=EB=8C=80?=
 =?UTF-8?q?=EC=B2=B4?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 mafm/rag/C_library/Makefile  |  14 --
 mafm/rag/C_library/fileops.c | 286 -----------------------------------
 mafm/rag/C_library/fileops.h |  12 --
 mafm/rag/C_library/utils.c   |  13 --
 mafm/rag/C_library/utils.h   |   6 -
 mafm/rag/fileops.py          | 134 +++++++++-------
 6 files changed, 76 insertions(+), 389 deletions(-)
 delete mode 100644 mafm/rag/C_library/Makefile
 delete mode 100644 mafm/rag/C_library/fileops.c
 delete mode 100644 mafm/rag/C_library/fileops.h
 delete mode 100644 mafm/rag/C_library/utils.c
 delete mode 100644 mafm/rag/C_library/utils.h

diff --git a/mafm/rag/C_library/Makefile b/mafm/rag/C_library/Makefile
deleted file mode 100644
index 25b51d4..0000000
--- a/mafm/rag/C_library/Makefile
+++ /dev/null
@@ -1,14 +0,0 @@
-CC = gcc
-CFLAGS = -fPIC -Wall -I.
-OUTPUT = libfileops.so
-SRC = fileops.c utils.c
-HEADERS = fileops.h utils.h
-
-# Target for shared object
-$(OUTPUT): $(SRC) $(HEADERS)
-	$(CC) $(CFLAGS) -shared -o $(OUTPUT) $(SRC)
-
-clean:
-	rm -f $(OUTPUT)
-	rm -f *.o
-
diff --git a/mafm/rag/C_library/fileops.c b/mafm/rag/C_library/fileops.c
deleted file mode 100644
index 9f57590..0000000
--- a/mafm/rag/C_library/fileops.c
+++ /dev/null
@@ -1,286 +0,0 @@
-#include "fileops.h"
-#include "utils.h"
-#include <stdlib.h>
-#include <string.h>
-#include <unistd.h>
-#include <sys/stat.h>
-#include <dirent.h>
-
-// make_soft_links: 여러 파일 경로에 대해 임시 디렉토리에 소프트 링크를 만든다.
-// 입력: paths (파일 경로 배열), num_paths (경로 개수), temp_dir (임시 디렉토리 경로)
-// 출력: 임시 디렉토리 경로 (char*)
-char* make_soft_links(char** paths, int num_paths, char *temp_dir) {
-    for (int i = 0; i < num_paths; i++) {
-        char link_path[512];
-        snprintf(link_path, sizeof(link_path), "%s/%s", temp_dir, strrchr(paths[i], '/') + 1);
-        symlink(paths[i], link_path);
-    }
-    
-    return temp_dir;
-}
-
-// get_filename: 파일 경로에서 파일 이름만 추출
-// 입력: path (파일 경로)
-// 출력: 파일 이름 (char*)
-char *get_filename(const char *path) {
-    // Find the last occurrence of '/' in the path
-    const char *lastSlash = strrchr(path, '/');
-    
-    // If there is no '/' in the path, use the whole path as the filename
-    if (lastSlash == NULL) {
-        return strdup(path);  // No '/' found, return the entire path as the filename
-    }
-    
-    // Otherwise, return everything after the last '/'
-    return strdup(lastSlash + 1);
-}
-
-
-// get_file_data: 주어진 파일 경로에 대한 정보를 읽고 필요한 데이터를 반환
-// 입력: path (파일 경로)
-// 출력: 파일 정보 배열 (파일 경로, 파일 이름, 파일 내용 조각)
-/* 파일 정보 배열의 구조:
- * data[0]: 파일의 전체 경로 (char*)
- * data[1]: 파일의 이름 (char*)
- * data[2], data[3], ...: 파일 내용을 일정 크기(chunkSize)로 나눈 조각들 (char*)
- * 마지막 data[idx + 1]: NULL 포인터 (배열의 끝을 알리기 위해)
-*/
-char** get_file_data(const char* path) {
-    // 파일을 읽기 모드로 엽니다.
-    FILE *file = fopen(path, "rb");
-    if (!file) {
-        perror("Failed to open file"); // 파일 열기 실패 시 오류 메시지 출력
-        return NULL; // 실패 시 NULL 반환
-    }
-
-    // 파일 이름을 가져옵니다.
-    char *fname = get_filename(path);
-    if (!fname) {
-        perror("Failed to get filename"); // 파일 이름을 가져오는 데 실패하면 오류 메시지 출력
-        fclose(file); // 파일 닫기
-        return NULL; // 실패 시 NULL 반환
-    }
-
-    // 파일이 이미지 또는 비디오인지 확인합니다.
-    int is_image_or_video_flag = is_image_or_video(path);
-
-    // 이미지 또는 비디오 파일일 경우
-    if (is_image_or_video_flag) {
-        char **data = malloc(sizeof(char *) * 3); // data 배열에 3개의 포인터 공간을 할당합니다.
-        if (!data) {
-            perror("Failed to allocate memory for data array"); // 메모리 할당 실패 시 오류 메시지 출력
-            free(fname); // 파일 이름 메모리 해제
-            fclose(file); // 파일 닫기
-            return NULL; // 실패 시 NULL 반환
-        }
-        data[0] = strdup(path); // 경로 복사
-        if (!data[0]) {
-            perror("Failed to duplicate path"); // 경로 복사 실패 시 오류 메시지 출력
-            free(data); // data 배열 해제
-            free(fname); // 파일 이름 해제
-            fclose(file); // 파일 닫기
-            return NULL; // 실패 시 NULL 반환
-        }
-        data[1] = fname; // 파일 이름 저장
-        data[2] = NULL; // 마지막에 NULL 포인터 설정 (배열의 끝을 알리기 위해)
-        fclose(file); // 파일 닫기
-        return data; // data 배열 반환
-    }
-
-    // 일반 파일일 경우, 초기 배열 크기를 설정합니다.
-    int maxChunks = 4;
-    char **data = (char **)malloc(sizeof(char *) * maxChunks); // 초기 크기 4로 data 배열 할당
-    if (!data) {
-        perror("Failed to allocate memory for data array"); // 메모리 할당 실패 시 오류 메시지 출력
-        free(fname); // 파일 이름 해제
-        fclose(file); // 파일 닫기
-        return NULL; // 실패 시 NULL 반환
-    }
-
-    data[0] = strdup(path); // 파일 경로 복사
-    if (!data[0]) {
-        perror("Failed to duplicate path"); // 파일 경로 복사 실패 시 오류 메시지 출력
-        free(data); // data 배열 해제
-        free(fname); // 파일 이름 해제
-        fclose(file); // 파일 닫기
-        return NULL; // 실패 시 NULL 반환
-    }
-
-    data[1] = fname; // 파일 이름 저장
-
-    int idx = 2; // 데이터 조각을 저장할 인덱스 시작 (0과 1은 경로와 이름)
-    int chunkSize = 500; // 각 조각의 크기 (500바이트)
-    int bytesRead;
-
-    // 파일 내용을 500바이트씩 읽습니다.
-    while (1) {
-        if (idx >= maxChunks) {
-            // 현재 배열의 크기가 부족할 경우 크기를 2배로 늘립니다.
-            maxChunks *= 2;
-            char **temp = realloc(data, maxChunks * sizeof(char *));
-            if (temp == NULL) {
-                perror("Failed to reallocate memory for data array"); // 메모리 재할당 실패 시 오류 메시지 출력
-                // 이미 할당된 메모리 해제
-                for (int i = 0; i < idx; i++) {
-                    free(data[i]);
-                }
-                free(data);
-                fclose(file);
-                return NULL; // 실패 시 NULL 반환
-            }
-            data = temp; // 재할당된 메모리 주소로 업데이트
-        }
-
-        // 새로운 조각을 위한 메모리 할당
-        data[idx] = (char *)malloc(chunkSize * sizeof(char));
-        if (data[idx] == NULL) {
-            perror("Failed to allocate memory for chunk"); // 메모리 할당 실패 시 오류 메시지 출력
-            // 이미 할당된 메모리 해제
-            for (int i = 0; i < idx; i++) {
-                free(data[i]);
-            }
-            free(data);
-            fclose(file);
-            return NULL; // 실패 시 NULL 반환
-        }
-
-        // 파일에서 chunkSize만큼 읽기
-        bytesRead = fread(data[idx], 1, chunkSize, file);
-        if (bytesRead > 0) {
-            if (bytesRead < chunkSize) {
-                // 만약 읽은 바이트가 chunkSize보다 적다면 메모리 크기 조정
-                char *adjusted = realloc(data[idx], bytesRead);
-                if (adjusted) {
-                    data[idx] = adjusted;
-                }
-            }
-            idx++;
-        }
-        if (bytesRead < chunkSize) {
-            if (feof(file)) {
-                break; // 파일 끝에 도달하면 종료
-            } else if (ferror(file)) {
-                perror("Error reading file"); // 파일 읽기 중 오류 발생 시 메시지 출력
-                // 이미 할당된 메모리 해제
-                for (int i = 0; i <= idx; i++) {
-                    free(data[i]);
-                }
-                free(data);
-                fclose(file);
-                return NULL; // 실패 시 NULL 반환
-            }
-        }
-    }
-
-    // 마지막에 NULL 포인터 설정 (배열의 끝을 알리기 위해)
-    if (idx < maxChunks) {
-        data[idx] = NULL;
-    } else {
-        // 추가 공간이 필요하면 재할당하여 NULL 포인터 추가
-        char **temp = realloc(data, (idx + 1) * sizeof(char *));
-        if (temp == NULL) {
-            perror("Failed to reallocate memory for terminating NULL pointer"); // 메모리 재할당 실패 시 오류 메시지 출력
-            // 이미 할당된 메모리 해제
-            for (int i = 0; i < idx; i++) {
-                free(data[i]);
-            }
-            free(data);
-            fclose(file);
-            return NULL; // 실패 시 NULL 반환
-        }
-        data = temp;
-        data[idx] = NULL;
-    }
-
-    fclose(file); // 파일 닫기
-    return data; // 파일 정보와 조각들을 포함한 data 배열 반환
-}
-
-
-// 파일 데이터를 해제하는 함수 (get_file_data로부터 반환된 메모리 해제)
-void free_file_data(char** data) {
-    if (!data) return;
-    for (int i = 0; data[i] != NULL; i++) {
-        free(data[i]); // 각 문자열에 할당된 메모리 해제
-    }
-    free(data); // data 포인터 자체 해제
-}
-
-// 파일 데이터 배열을 해제하는 함수
-void free_file_data_array(char*** file_data_array, int num_files) {
-    for (int i = 0; i < num_files; i++) {
-        free_file_data(file_data_array[i]);
-    }
-    free(file_data_array);
-}
-
-// collect_file_data_recursive: 지정된 디렉터리를 재귀적으로 탐색하며 파일 데이터를 수집
-// 입력: dir_path (탐색할 디렉터리 경로), num_files (수집된 파일 개수 포인터), file_data_array (파일 데이터 배열 포인터), depth (재귀 깊이)
-// 출력: 없음 (num_files와 file_data_array가 수정됨)
-/* file_data_array는 파일 데이터를 가진 3차원 배열이다.
- * file_data_array[0]는 첫 번째 파일의 데이터를 가리키고, file_data_array[1]는 두 번째 파일의 데이터를 가리킨다.
- * file_data_array[i][j]는 i번째 파일 중 j번째 data(get_file_data의 리턴 배열)를 의미한다.
- * 즉, file_data_array[0][0]는 첫 번째 파일의 전체 경로를 의미한다.
- * file_data_array[i][j][k]는 단순 문자이므로 큰 의미가 없다.
-*/
-void collect_file_data_recursive(const char* dir_path, int* num_files, char**** file_data_array, int depth) {
-    if (depth > 3) {
-        return;
-    }
-
-    DIR *d = opendir(dir_path);
-    if (!d) {
-        return;
-    }
-
-    struct dirent *dir;
-    int alloc_size = 4;
-
-    // 동적 메모리 할당 후, 메모리 누수 방지를 위한 코드 추가
-    while ((dir = readdir(d)) != NULL) {
-        // 현재 디렉토리와 상위 디렉토리는 건너뛰기
-        if (strcmp(dir->d_name, ".") == 0 || strcmp(dir->d_name, "..") == 0) {
-            continue;
-        }
-
-        char full_path[512];
-        snprintf(full_path, sizeof(full_path), "%s/%s", dir_path, dir->d_name);
-
-        struct stat st;
-
-        // 파일 또는 디렉토리의 정보(stat 구조체)를 가져오기
-        if (stat(full_path, &st) == 0) {
-            if (S_ISREG(st.st_mode)) {
-                // 일반 파일일 경우, 파일 데이터를 수집하는 함수 호출
-                char **data = get_file_data(full_path);
-                if (data) { // 파일 데이터가 성공적으로 수집된 경우
-                    (*num_files)++; // 파일 수 증가
-
-                    // 파일 수가 할당된 크기를 초과하는 경우
-                    if (*num_files > alloc_size) {
-                        alloc_size *= 2;
-                        char ***temp = realloc(*file_data_array, sizeof(char**) * alloc_size);
-                        if (!temp) { // realloc 실패 시 메모리 해제
-                            free_file_data(data);
-                            closedir(d);
-                            return;
-                        }
-                        *file_data_array = temp;
-                    }
-
-                    (*file_data_array)[*num_files - 1] = data;
-                }
-            } else if (S_ISDIR(st.st_mode)) {
-                collect_file_data_recursive(full_path, num_files, file_data_array, depth + 1);
-            }
-        }
-    }
-    closedir(d); // 디렉토리 닫기
-}
-
-char*** get_all_file_data(const char* dir_path, int* num_files) {
-    *num_files = 0;
-    char*** file_data_array = malloc(sizeof(char **) * 4);
-    collect_file_data_recursive(dir_path, num_files, &file_data_array, 1);
-    return file_data_array;
-}
\ No newline at end of file
diff --git a/mafm/rag/C_library/fileops.h b/mafm/rag/C_library/fileops.h
deleted file mode 100644
index 1566885..0000000
--- a/mafm/rag/C_library/fileops.h
+++ /dev/null
@@ -1,12 +0,0 @@
-#ifndef FILEOPS_H
-#define FILEOPS_H
-
-#include <stdio.h>
-
-char* make_soft_links(char** paths, int num_paths, char *temp_dir);
-char** get_file_data(const char* path);
-char*** get_all_file_data(const char* dir_path, int* num_files);
-int split_file(const char* file_path, const char* output_dir, size_t chunk_size);
-int is_image_or_video(const char* filename);
-
-#endif
\ No newline at end of file
diff --git a/mafm/rag/C_library/utils.c b/mafm/rag/C_library/utils.c
deleted file mode 100644
index b5d9198..0000000
--- a/mafm/rag/C_library/utils.c
+++ /dev/null
@@ -1,13 +0,0 @@
-#include "utils.h"
-#include <string.h>
-int is_image_or_video(const char* filename) {
-    const char *ext = strrchr(filename, '.');
-    if (!ext) {
-        return 0; // No extension
-    }
-    if (strcmp(ext, ".jpg") == 0 || strcmp(ext, ".png") == 0 || 
-        strcmp(ext, ".mp4") == 0 || strcmp(ext, ".avi") == 0 || strcmp(ext, ".mp3") == 0) {
-        return 1;
-    }
-    return 0;
-}
\ No newline at end of file
diff --git a/mafm/rag/C_library/utils.h b/mafm/rag/C_library/utils.h
deleted file mode 100644
index 6e65659..0000000
--- a/mafm/rag/C_library/utils.h
+++ /dev/null
@@ -1,6 +0,0 @@
-#ifndef UTILS_H
-#define UTILS_H
-
-int is_image_or_video(const char* filename);
-
-#endif
\ No newline at end of file
diff --git a/mafm/rag/fileops.py b/mafm/rag/fileops.py
index 8d9dffb..7968e71 100644
--- a/mafm/rag/fileops.py
+++ b/mafm/rag/fileops.py
@@ -1,20 +1,27 @@
 """파일 연산 모듈.
 
-C 라이브러리를 사용한 파일 연산 기능을 제공합니다.
+파일 읽기 및 소프트 링크 생성 기능을 제공합니다.
 """
 
-import ctypes
+import os
+from pathlib import Path
 from tempfile import TemporaryDirectory
-from typing import Any
 
-lib = ctypes.CDLL("./rag/C_library/libfileops.so")
+DEFAULT_CHUNK_SIZE = 500
+BINARY_EXTENSIONS = {".jpg", ".jpeg", ".png", ".gif", ".bmp", ".mp4", ".avi", ".mp3", ".wav", ".pdf"}
 
-lib.make_soft_links.argtypes = [
-    ctypes.POINTER(ctypes.c_char_p),
-    ctypes.c_int,
-    ctypes.c_char_p,
-]
-lib.make_soft_links.restype = ctypes.c_char_p
+
+def _is_binary_file(path: str) -> bool:
+    """바이너리 파일인지 확인합니다.
+
+    Args:
+        path: 확인할 파일 경로.
+
+    Returns:
+        바이너리 파일이면 True.
+    """
+    ext = Path(path).suffix.lower()
+    return ext in BINARY_EXTENSIONS
 
 
 def make_soft_links(paths: list[str], temp_dir: TemporaryDirectory[str]) -> str:
@@ -25,76 +32,87 @@ def make_soft_links(paths: list[str], temp_dir: TemporaryDirectory[str]) -> str:
         temp_dir: 링크를 생성할 임시 디렉토리.
 
     Returns:
-        생성 결과 메시지.
+        임시 디렉토리 경로.
     """
-    path_array = (ctypes.c_char_p * len(paths))(
-        *[path.encode("utf-8") for path in paths]
-    )
-    result = lib.make_soft_links(path_array, len(paths), temp_dir.name.encode("utf-8"))
-    return result.decode("utf-8")
+    for path in paths:
+        filename = os.path.basename(path)
+        link_path = os.path.join(temp_dir.name, filename)
+        try:
+            os.symlink(path, link_path)
+        except FileExistsError:
+            pass
+        except OSError as e:
+            print(f"Failed to create symlink for {path}: {e}")
 
+    return temp_dir.name
 
-lib.get_file_data.argtypes = [ctypes.c_char_p]
-lib.get_file_data.restype = ctypes.POINTER(ctypes.c_char_p)
 
+def get_file_data(path: str, chunk_size: int = DEFAULT_CHUNK_SIZE) -> list[str]:
+    """파일 데이터를 읽고 청크로 분할합니다.
 
-def get_file_data(path: str) -> list[str]:
-    """파일 데이터를 읽습니다.
+    파일을 읽어서 [파일경로, 파일명, 청크1, 청크2, ...] 형태로 반환합니다.
+    바이너리 파일(이미지, 비디오 등)은 청크 없이 [파일경로, 파일명]만 반환합니다.
 
     Args:
         path: 읽을 파일의 경로.
+        chunk_size: 각 청크의 크기 (기본값: 500).
 
     Returns:
-        파일 데이터 문자열 리스트.
+        파일 데이터 리스트. [경로, 파일명, 청크들...]
     """
-    result = lib.get_file_data(path.encode("utf-8"))
-    data_list: list[str] = []
-    idx = 0
+    filename = os.path.basename(path)
+    data: list[str] = [path, filename]
+
+    if _is_binary_file(path):
+        return data
 
-    while result[idx] is not None:
-        string = ctypes.string_at(result[idx]).decode("utf-8")
-        data_list.append(string)
-        idx += 1
-    return data_list
+    try:
+        with open(path, "r", encoding="utf-8", errors="ignore") as f:
+            content = f.read()
 
+        chunks = [content[i : i + chunk_size] for i in range(0, len(content), chunk_size)]
+        data.extend(chunks)
 
-lib.get_all_file_data.argtypes = [ctypes.c_char_p, ctypes.POINTER(ctypes.c_int)]
-lib.get_all_file_data.restype = ctypes.POINTER(ctypes.POINTER(ctypes.c_char_p))
+    except OSError as e:
+        print(f"Failed to read file {path}: {e}")
 
-lib.free_file_data_array.restype = None
-lib.free_file_data_array.argtypes = [
-    ctypes.POINTER(ctypes.POINTER(ctypes.c_char_p)),
-    ctypes.c_int,
-]
+    return data
 
 
-def get_all_file_data(directory: str) -> list[list[Any]]:
+def get_all_file_data(
+    directory: str,
+    chunk_size: int = DEFAULT_CHUNK_SIZE,
+    max_depth: int = 3,
+) -> list[list[str]]:
     """디렉토리 내 모든 파일 데이터를 읽습니다.
 
     Args:
         directory: 읽을 디렉토리 경로.
+        chunk_size: 각 청크의 크기.
+        max_depth: 탐색할 최대 깊이.
 
     Returns:
         각 파일의 데이터 리스트.
     """
-    num_files = ctypes.c_int(0)
-    result = lib.get_all_file_data(directory.encode("utf-8"), ctypes.byref(num_files))
-    files: list[list[Any]] = []
-    try:
-        for i in range(num_files.value):
-            idx = 0
-            data_list: list[Any] = []
-            while result[i][idx] is not None:
-                try:
-                    string = ctypes.string_at(result[i][idx]).decode("utf-8")
-                except UnicodeDecodeError:
-                    string = ctypes.string_at(result[i][idx])
-                data_list.append(string)
-                idx += 1
-            files.append(data_list)
-        return files
-    finally:
-        result_casted = ctypes.cast(
-            result, ctypes.POINTER(ctypes.POINTER(ctypes.c_char_p))
-        )
-        lib.free_file_data_array(result_casted, num_files.value)
\ No newline at end of file
+    files: list[list[str]] = []
+
+    def _collect_recursive(dir_path: str, depth: int) -> None:
+        if depth > max_depth:
+            return
+
+        try:
+            with os.scandir(dir_path) as entries:
+                for entry in entries:
+                    if entry.name.startswith("."):
+                        continue
+
+                    if entry.is_file():
+                        file_data = get_file_data(entry.path, chunk_size)
+                        files.append(file_data)
+                    elif entry.is_dir():
+                        _collect_recursive(entry.path, depth + 1)
+        except OSError as e:
+            print(f"Failed to read directory {dir_path}: {e}")
+
+    _collect_recursive(directory, 1)
+    return files
\ No newline at end of file

From f5e0cf3f112b31e5b4410d63e3ea5759c31dae14 Mon Sep 17 00:00:00 2001
From: sehwan505 <sehwan505@gmail.com>
Date: Tue, 25 Nov 2025 09:08:55 +0900
Subject: [PATCH 09/13] =?UTF-8?q?=ED=8F=90=EA=B8=B0=EB=90=9C=20ruff=20?=
 =?UTF-8?q?=EA=B7=9C=EC=B9=99=20=EC=A0=9C=EA=B1=B0=20=EB=B0=8F=20=ED=85=8C?=
 =?UTF-8?q?=EC=8A=A4=ED=8A=B8=20=ED=94=BD=EC=8A=A4=EC=B2=98=20=EC=98=88?=
 =?UTF-8?q?=EC=99=B8=20=EC=B6=94=EA=B0=80?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 mafm/rag/vectorDb.py | 259 -------------------------------------------
 pyproject.toml       |   5 +-
 2 files changed, 3 insertions(+), 261 deletions(-)
 delete mode 100644 mafm/rag/vectorDb.py

diff --git a/mafm/rag/vectorDb.py b/mafm/rag/vectorDb.py
deleted file mode 100644
index de98caa..0000000
--- a/mafm/rag/vectorDb.py
+++ /dev/null
@@ -1,259 +0,0 @@
-"""벡터 데이터베이스 모듈.
-
-Milvus 벡터 데이터베이스 관리 기능을 제공합니다.
-"""
-
-import gc
-import os
-from typing import Any
-
-from pymilvus import MilvusClient
-
-from mafm.rag.embedding import embedding
-from mafm.rag.sqlite import get_path_by_id
-
-COLLECTION_NAME = "demo_collection"
-VECTOR_DIMENSION = 384
-
-
-def _delete_db_lock_file(db_name: str) -> None:
-    """데이터베이스 잠금 파일을 삭제합니다.
-
-    Args:
-        db_name: 데이터베이스 파일 경로.
-    """
-    dir_path = os.path.dirname(db_name)
-    base_name = os.path.basename(db_name)
-
-    lock_file = f"{dir_path}/.{base_name}.lock"
-    if os.path.exists(lock_file):
-        os.remove(lock_file)
-    else:
-        print(f"No lock file found for {lock_file}")
-
-
-def initialize_vector_db(db_name: str) -> None:
-    """벡터 데이터베이스를 초기화합니다.
-
-    Args:
-        db_name: 생성할 데이터베이스 파일 경로.
-
-    Raises:
-        Exception: 데이터베이스 초기화 중 오류가 발생한 경우.
-    """
-    client: MilvusClient | None = None
-    try:
-        client = MilvusClient(db_name)
-        print(f"Connected to {db_name}")
-
-        if client.has_collection(collection_name=COLLECTION_NAME):
-            client.drop_collection(collection_name=COLLECTION_NAME)
-
-        client.create_collection(
-            collection_name=COLLECTION_NAME,
-            dimension=VECTOR_DIMENSION,
-        )
-    except Exception as e:
-        print(f"Error initializing vector DB for {db_name}: {e}")
-        raise
-    finally:
-        if client is not None:
-            client.close()
-        gc.collect()
-        _delete_db_lock_file(db_name)
-
-
-def delete_vector_db(db_name: str) -> None:
-    """벡터 데이터베이스를 삭제합니다.
-
-    Args:
-        db_name: 삭제할 데이터베이스 파일 경로.
-    """
-    client: MilvusClient | None = None
-    try:
-        client = MilvusClient(db_name)
-        if client.has_collection(collection_name=COLLECTION_NAME):
-            client.drop_collection(collection_name=COLLECTION_NAME)
-            print(f"Collection '{COLLECTION_NAME}' in {db_name} has been deleted.")
-        else:
-            print(f"Collection '{COLLECTION_NAME}' does not exist in {db_name}")
-    except Exception as e:
-        print(f"Error deleting collection in {db_name}: {e}")
-    finally:
-        if client is not None:
-            client.close()
-        gc.collect()
-        _delete_db_lock_file(db_name)
-
-
-def save(db_name: str, file_id: int, queries: list[str]) -> None:
-    """쿼리 데이터를 벡터 데이터베이스에 저장합니다.
-
-    Args:
-        db_name: 데이터베이스 파일 경로.
-        file_id: 파일 고유 식별자.
-        queries: 저장할 텍스트 쿼리 리스트.
-    """
-    client: MilvusClient | None = None
-    try:
-        client = MilvusClient(db_name)
-        if not client.has_collection(collection_name=COLLECTION_NAME):
-            print(f"Collection '{COLLECTION_NAME}' does not exist in {db_name}")
-            return
-
-        query_embeddings = embedding(queries)
-        if query_embeddings is None:
-            print("Failed to generate embeddings")
-            return
-
-        data = [
-            {"id": file_id, "vector": query_embeddings[i], "word": queries[i]}
-            for i in range(len(query_embeddings))
-        ]
-
-        res = client.insert(collection_name=COLLECTION_NAME, data=data)
-        print(res)
-
-    except MemoryError as me:
-        print(f"MemoryError: {me}")
-    except ValueError as ve:
-        print(f"ValueError: {ve}")
-    except Exception as e:
-        print(f"Error occurred during saving data to Milvus: {e}")
-    finally:
-        if client is not None:
-            client.close()
-        gc.collect()
-        _delete_db_lock_file(db_name)
-
-
-def insert_file_embedding(file_data: list[dict[str, Any]], db_name: str) -> None:
-    """파일 임베딩 데이터를 삽입합니다.
-
-    Args:
-        file_data: 삽입할 임베딩 데이터 리스트.
-        db_name: 데이터베이스 파일 경로.
-    """
-    client: MilvusClient | None = None
-    try:
-        client = MilvusClient(db_name)
-        if not client.has_collection(collection_name=COLLECTION_NAME):
-            print(f"Collection '{COLLECTION_NAME}' does not exist in {db_name}")
-            return
-
-        client.insert(collection_name=COLLECTION_NAME, data=file_data)
-
-    except MemoryError as me:
-        print(f"MemoryError: {me}")
-    except ValueError as ve:
-        print(f"ValueError: {ve}")
-    except Exception as e:
-        print(f"Error occurred during saving data to Milvus: {e}")
-    finally:
-        if client is not None:
-            client.close()
-        gc.collect()
-        _delete_db_lock_file(db_name)
-
-
-def search(db_name: str, query_list: list[str]) -> list[str]:
-    """벡터 데이터베이스에서 유사한 항목을 검색합니다.
-
-    Args:
-        db_name: 데이터베이스 파일 경로.
-        query_list: 검색할 쿼리 텍스트 리스트.
-
-    Returns:
-        검색된 파일 경로 리스트.
-    """
-    client: MilvusClient | None = None
-    try:
-        client = MilvusClient(db_name)
-        if not client.has_collection(collection_name=COLLECTION_NAME):
-            print(f"Collection '{COLLECTION_NAME}' does not exist in {db_name}")
-            return []
-
-        query_vectors = embedding(query_list)
-        if query_vectors is None:
-            return []
-
-        res = client.search(
-            collection_name=COLLECTION_NAME,
-            data=query_vectors,
-            limit=2,
-        )
-        id_list = [item["id"] for item in res[0]]
-        path_list = [get_path_by_id(file_id, "filesystem.db") for file_id in id_list]
-        return path_list
-    finally:
-        if client is not None:
-            client.close()
-        gc.collect()
-        _delete_db_lock_file(db_name)
-
-
-def find_by_id(search_id: int, db_name: str) -> list[dict[str, Any]] | None:
-    """ID로 벡터 데이터를 검색합니다.
-
-    Args:
-        search_id: 검색할 파일 ID.
-        db_name: 데이터베이스 파일 경로.
-
-    Returns:
-        검색된 데이터 리스트. 없으면 None.
-    """
-    client: MilvusClient | None = None
-    try:
-        client = MilvusClient(db_name)
-
-        if not client.has_collection(COLLECTION_NAME):
-            print(f"Collection '{COLLECTION_NAME}' does not exist in {db_name}")
-            return None
-
-        res = client.query(
-            collection_name=COLLECTION_NAME, filter=f"id in [{search_id}]"
-        )
-
-        if not res:
-            print(f"No results found for ID: {search_id}")
-            return None
-        return res
-    finally:
-        if client is not None:
-            client.close()
-        gc.collect()
-        _delete_db_lock_file(db_name)
-
-
-def remove_by_id(remove_id: int, db_name: str) -> dict[str, Any] | None:
-    """ID로 벡터 데이터를 삭제합니다.
-
-    Args:
-        remove_id: 삭제할 파일 ID.
-        db_name: 데이터베이스 파일 경로.
-
-    Returns:
-        삭제 결과. 오류 시 None.
-
-    Raises:
-        Exception: 컬렉션이 존재하지 않는 경우.
-    """
-    client: MilvusClient | None = None
-    try:
-        client = MilvusClient(db_name)
-        if not client.has_collection(COLLECTION_NAME):
-            raise Exception(
-                f"Collection '{COLLECTION_NAME}' does not exist in {db_name}"
-            )
-
-        res = client.delete(
-            collection_name=COLLECTION_NAME, filter=f"id in [{remove_id}]"
-        )
-
-        print(f"Deleted records with ID: {remove_id}")
-        return res
-    finally:
-        if client is not None:
-            client.close()
-        gc.collect()
-        _delete_db_lock_file(db_name)
\ No newline at end of file
diff --git a/pyproject.toml b/pyproject.toml
index 12e52e0..448449d 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -47,14 +47,15 @@ select = [
     "ANN",    # flake8-annotations
 ]
 ignore = [
-    "ANN101",  # Missing type annotation for self
-    "ANN102",  # Missing type annotation for cls
     "ANN401",  # Dynamically typed expressions (Any)
 ]
 
 [tool.ruff.lint.pep8-naming]
 classmethod-decorators = ["classmethod"]
 
+[tool.ruff.lint.per-file-ignores]
+"tests/*" = ["ARG001"]
+
 [tool.ruff.format]
 quote-style = "double"
 indent-style = "space"

From 61c794ed1163bc630c3e2d302d2c63b07ccc7cc9 Mon Sep 17 00:00:00 2001
From: sehwan505 <sehwan505@gmail.com>
Date: Tue, 25 Nov 2025 09:09:39 +0900
Subject: [PATCH 10/13] =?UTF-8?q?UTF-8=20=EC=9D=B8=EC=BD=94=EB=94=A9=20?=
 =?UTF-8?q?=EC=98=A4=EB=A5=98=20=EC=88=98=EC=A0=95?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 mafm/__init__.py       | Bin 83 -> 115 bytes
 mafm/agent/__init__.py |   6 +++---
 mafm/rag/__init__.py   | Bin 86 -> 135 bytes
 tests/__init__.py      |   2 +-
 4 files changed, 4 insertions(+), 4 deletions(-)

diff --git a/mafm/__init__.py b/mafm/__init__.py
index a3855940b5e4d2a13069230528043291e8152491..d164a63b491f4286d12c2435e7668b292e3bd9fb 100644
GIT binary patch
delta 85
zcmV-b0IL5}bC4rY?To1Hf~4%HfG!FOOkr+kM{;3sXdvpiu<NjxAnU1s?TfG=?3a-2
rowV$rgzbvBAnlBd?47(I>a>9CrmXCqknEm?>x+o%i=-|JA|fIR)NC(B

delta 53
zcmXR8o}g@?vE+ar7ne_BUb=fxVnK$2frNs?2ZakIEISl<Y7`jODBM{R<Dww*HS@$1
JJuW3BB>?E+5?lZP

diff --git a/mafm/agent/__init__.py b/mafm/agent/__init__.py
index 58a9cd8..45aa092 100644
--- a/mafm/agent/__init__.py
+++ b/mafm/agent/__init__.py
@@ -1,4 +1,4 @@
-"""�t� (��.
+"""에이전트 패키지.
 
-LangGraph 0 @� �t� �l\�| �i��.
-"""
\ No newline at end of file
+LangGraph 기반 에이전트 모듈을 포함합니다.
+"""
diff --git a/mafm/rag/__init__.py b/mafm/rag/__init__.py
index bbce5fcc62250192c3575a248cbc4051fa95ecd1..3ea29ef83f770fc56edc2db5856c12866a9d0dda 100644
GIT binary patch
delta 101
zcmV-r0Gj_+hXIf%U+s*j?SiE2r+_XB3hT0=?S!x(>y5DNowV(Qu<Np*?47jiili(c
z?4E?{vY_jfsVpE<QA}xcWgzObfa{>BAnU5A>ywD=orECmkF4#LsO^=h>x+o%i=-|J
HA|fIRs_Z$F

delta 52
zcmZo?44a^#r?KRK9v9a_1BHSTg9Rl^bQD?^XzC~g2l`}|q$)71Q7~BQq9F4%^TZQ9
IE+r);0LgX{Pyhe`

diff --git a/tests/__init__.py b/tests/__init__.py
index 755fac1..41a414b 100644
--- a/tests/__init__.py
+++ b/tests/__init__.py
@@ -1 +1 @@
-"""MAFM L�� (��."""
\ No newline at end of file
+"""MAFM 테스트 패키지."""

From 7d91cd0d808e90cd6b9bad97814bab24334231c3 Mon Sep 17 00:00:00 2001
From: sehwan505 <sehwan505@gmail.com>
Date: Tue, 25 Nov 2025 09:10:36 +0900
Subject: [PATCH 11/13] =?UTF-8?q?Refactor:=20=ED=8C=8C=EC=9D=BC=20?=
 =?UTF-8?q?=EB=81=9D=20=EA=B0=9C=ED=96=89=20=EC=B6=94=EA=B0=80=20=EB=B0=8F?=
 =?UTF-8?q?=20import=20=EC=B5=9C=EC=A0=81=ED=99=94?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 mafm/agent/agents/llm_model.py |  2 +-
 mafm/agent/agents/tools.py     |  2 +-
 mafm/agent/graph.py            |  8 ++++----
 mafm/observer.py               |  4 ++--
 mafm/rag/embedding.py          |  2 +-
 mafm/rag/fileops.py            | 14 ++++++++++----
 mafm/rag/sqlite.py             |  2 +-
 mafm/shell.py                  |  4 ++--
 tests/embedding_test.py        |  2 +-
 9 files changed, 23 insertions(+), 17 deletions(-)

diff --git a/mafm/agent/agents/llm_model.py b/mafm/agent/agents/llm_model.py
index addff9f..bd1994d 100644
--- a/mafm/agent/agents/llm_model.py
+++ b/mafm/agent/agents/llm_model.py
@@ -9,4 +9,4 @@
 
 load_dotenv()
 
-api_key: str | None = os.getenv("OPENAI_API_KEY")
\ No newline at end of file
+api_key: str | None = os.getenv("OPENAI_API_KEY")
diff --git a/mafm/agent/agents/tools.py b/mafm/agent/agents/tools.py
index cd987f2..3e61c2f 100644
--- a/mafm/agent/agents/tools.py
+++ b/mafm/agent/agents/tools.py
@@ -6,4 +6,4 @@
     이 모듈의 기능은 member.py로 이전되었습니다.
 """
 
-# Legacy code - 향후 삭제 예정
\ No newline at end of file
+# Legacy code - 향후 삭제 예정
diff --git a/mafm/agent/graph.py b/mafm/agent/graph.py
index 82a7125..d51a549 100644
--- a/mafm/agent/graph.py
+++ b/mafm/agent/graph.py
@@ -5,7 +5,8 @@
 
 import functools
 import operator
-from typing import Annotated, Any, Sequence, TypedDict
+from collections.abc import Sequence
+from typing import Annotated, Any, TypedDict
 
 from langchain_core.messages import BaseMessage, HumanMessage
 from langgraph.graph import END, START, StateGraph
@@ -26,11 +27,10 @@ class AgentState(TypedDict):
     next: str
 
 
-def graph(directory_path: str, prompt: str) -> list[str]:
+def graph(prompt: str) -> list[str]:
     """멀티 에이전트 그래프를 실행합니다.
 
     Args:
-        directory_path: 검색할 루트 디렉토리 경로.
         prompt: 사용자 검색 요청.
 
     Returns:
@@ -97,4 +97,4 @@ def graph(directory_path: str, prompt: str) -> list[str]:
 
 
 if __name__ == "__main__":
-    print(graph("", ""))
\ No newline at end of file
+    print(graph(""))
diff --git a/mafm/observer.py b/mafm/observer.py
index 288be04..66b9d81 100644
--- a/mafm/observer.py
+++ b/mafm/observer.py
@@ -23,7 +23,7 @@
     insert_directory_structure,
     insert_file_info,
 )
-from mafm.rag.vectorDb import (
+from mafm.rag.vector_db import (
     delete_vector_db,
     find_by_id,
     initialize_vector_db,
@@ -354,4 +354,4 @@ def start_watchdog(root_dir: str) -> None:
     if not args.root:
         print("Root directory path is required.")
     else:
-        start_watchdog(args.root)
\ No newline at end of file
+        start_watchdog(args.root)
diff --git a/mafm/rag/embedding.py b/mafm/rag/embedding.py
index 86542fe..ec02620 100644
--- a/mafm/rag/embedding.py
+++ b/mafm/rag/embedding.py
@@ -84,4 +84,4 @@ def embedding(queries: list[str]) -> list[list[float]] | None:
         raise
     except Exception as e:
         print(f"embedding 중 오류 발생: {e}")
-        return None
\ No newline at end of file
+        return None
diff --git a/mafm/rag/fileops.py b/mafm/rag/fileops.py
index 7968e71..a2d6931 100644
--- a/mafm/rag/fileops.py
+++ b/mafm/rag/fileops.py
@@ -8,7 +8,10 @@
 from tempfile import TemporaryDirectory
 
 DEFAULT_CHUNK_SIZE = 500
-BINARY_EXTENSIONS = {".jpg", ".jpeg", ".png", ".gif", ".bmp", ".mp4", ".avi", ".mp3", ".wav", ".pdf"}
+BINARY_EXTENSIONS = {
+    ".jpg", ".jpeg", ".png", ".gif", ".bmp",
+    ".mp4", ".avi", ".mp3", ".wav", ".pdf",
+}
 
 
 def _is_binary_file(path: str) -> bool:
@@ -67,10 +70,13 @@ def get_file_data(path: str, chunk_size: int = DEFAULT_CHUNK_SIZE) -> list[str]:
         return data
 
     try:
-        with open(path, "r", encoding="utf-8", errors="ignore") as f:
+        with open(path, encoding="utf-8", errors="ignore") as f:
             content = f.read()
 
-        chunks = [content[i : i + chunk_size] for i in range(0, len(content), chunk_size)]
+        chunks = [
+            content[i : i + chunk_size]
+            for i in range(0, len(content), chunk_size)
+        ]
         data.extend(chunks)
 
     except OSError as e:
@@ -115,4 +121,4 @@ def _collect_recursive(dir_path: str, depth: int) -> None:
             print(f"Failed to read directory {dir_path}: {e}")
 
     _collect_recursive(directory, 1)
-    return files
\ No newline at end of file
+    return files
diff --git a/mafm/rag/sqlite.py b/mafm/rag/sqlite.py
index 5cfc221..9f8c33f 100644
--- a/mafm/rag/sqlite.py
+++ b/mafm/rag/sqlite.py
@@ -344,4 +344,4 @@ def delete_directory_and_subdirectories(dir_path: str) -> None:
 
     connection.commit()
     connection.close()
-    print(f"Deleted all records related to {dir_path} and its subdirectories.")
\ No newline at end of file
+    print(f"Deleted all records related to {dir_path} and its subdirectories.")
diff --git a/mafm/shell.py b/mafm/shell.py
index 995c721..6f20117 100644
--- a/mafm/shell.py
+++ b/mafm/shell.py
@@ -68,7 +68,7 @@ def execute_command(
                 return None
 
             prompt = " ".join(cmd_parts[1:])
-            paths = graph(context.root_dir, prompt)
+            paths = graph(prompt)
 
             temp_dir = tempfile.TemporaryDirectory(dir=temp_dir_path)
 
@@ -138,4 +138,4 @@ def shell(root_dir: str) -> None:
     if not args.root:
         print("Root directory path is required.")
     else:
-        shell(args.root)
\ No newline at end of file
+        shell(args.root)
diff --git a/tests/embedding_test.py b/tests/embedding_test.py
index 5eab180..f103f70 100644
--- a/tests/embedding_test.py
+++ b/tests/embedding_test.py
@@ -27,4 +27,4 @@ def test_embedding_output_shape(test_sentences: list[str]) -> None:
     """
     embeddings = embedding(test_sentences)
     assert embeddings is not None
-    assert len(embeddings) == len(test_sentences)
\ No newline at end of file
+    assert len(embeddings) == len(test_sentences)

From 9857957a95d131f5f2eb63410256dca049e37bf8 Mon Sep 17 00:00:00 2001
From: sehwan505 <sehwan505@gmail.com>
Date: Tue, 25 Nov 2025 09:12:57 +0900
Subject: [PATCH 12/13] =?UTF-8?q?Refactor:=20=EC=A4=84=20=EA=B8=B8?=
 =?UTF-8?q?=EC=9D=B4=20=EC=A0=9C=ED=95=9C=20=EC=A4=80=EC=88=98?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 mafm/agent/agents/analyst.py    | 6 ++++--
 mafm/agent/agents/member.py     | 7 ++++---
 mafm/agent/agents/supervisor.py | 5 +++--
 3 files changed, 11 insertions(+), 7 deletions(-)

diff --git a/mafm/agent/agents/analyst.py b/mafm/agent/agents/analyst.py
index 13210d0..164a03a 100644
--- a/mafm/agent/agents/analyst.py
+++ b/mafm/agent/agents/analyst.py
@@ -41,7 +41,9 @@ def analyst_agent(
     """
     llm = ChatOpenAI(api_key=api_key, model="gpt-4o-mini")
 
-    system_prompt = "당신은 구성원들이 답변한 파일의 경로들을 받고 정리하는 감독자입니다."
+    system_prompt = (
+        "당신은 구성원들이 답변한 파일의 경로들을 받고 정리하는 감독자입니다."
+    )
 
     prompt = ChatPromptTemplate.from_messages(
         [
@@ -59,4 +61,4 @@ def analyst_agent(
 
     print(output_list)
     analyst_chain = prompt | llm.with_structured_output(ListResponse)
-    return analyst_chain.invoke(state)
\ No newline at end of file
+    return analyst_chain.invoke(state)
diff --git a/mafm/agent/agents/member.py b/mafm/agent/agents/member.py
index 5781341..3664460 100644
--- a/mafm/agent/agents/member.py
+++ b/mafm/agent/agents/member.py
@@ -11,7 +11,7 @@
 from pydantic import BaseModel, Field
 
 from mafm.agent.agents.llm_model import api_key
-from mafm.rag.vectorDb import search
+from mafm.rag.vector_db import search
 
 
 class QueryResponse(BaseModel):
@@ -68,7 +68,8 @@ def agent_node(
             (
                 "system",
                 "current directory name: {directory_name} "
-                "사용자에 요청에 따라서 디렉토리에서 파일을 검색하려고 합니다 쿼리를 문장으로 정리해주세요",
+                "사용자에 요청에 따라서 디렉토리에서 파일을 검색하려고 합니다 "
+                "쿼리를 문장으로 정리해주세요",
             ),
         ]
     ).partial(directory_name=directory_name)
@@ -81,4 +82,4 @@ def agent_node(
     if file_list:
         output_list.extend(file_list)
         return {"messages": file_list}
-    return {"messages": []}
\ No newline at end of file
+    return {"messages": []}
diff --git a/mafm/agent/agents/supervisor.py b/mafm/agent/agents/supervisor.py
index 99932fb..d49af6d 100644
--- a/mafm/agent/agents/supervisor.py
+++ b/mafm/agent/agents/supervisor.py
@@ -52,10 +52,11 @@ class RouteResponse(BaseModel):
             (
                 "system",
                 "선택할 수 있는 디렉토리는 다음과 같습니다: {members}. "
-                "디렉토리를 선택해주세요. 절대로 같은 디렉토리를 두 번 선택하지 마세요.",
+                "디렉토리를 선택해주세요. "
+                "절대로 같은 디렉토리를 두 번 선택하지 마세요.",
             ),
         ]
     ).partial(members=", ".join(member_list))
 
     supervisor_chain = prompt | llm.with_structured_output(RouteResponse)
-    return supervisor_chain.invoke(state)
\ No newline at end of file
+    return supervisor_chain.invoke(state)

From 31b9e7d2919a29aadfe3f7dd48e50c0e63933272 Mon Sep 17 00:00:00 2001
From: sehwan505 <sehwan505@gmail.com>
Date: Tue, 25 Nov 2025 09:15:53 +0900
Subject: [PATCH 13/13] =?UTF-8?q?Refactor:=20=EB=AA=A8=EB=93=88=20?=
 =?UTF-8?q?=EC=9D=B4=EB=A6=84=20=EA=B7=9C=EC=B9=99=20=EC=A4=80=EC=88=98?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 mafm/rag/vector_db.py | 259 ++++++++++++++++++++++++++++++++++++++++++
 tests/milvus_test.py  |   6 +-
 2 files changed, 262 insertions(+), 3 deletions(-)
 create mode 100644 mafm/rag/vector_db.py

diff --git a/mafm/rag/vector_db.py b/mafm/rag/vector_db.py
new file mode 100644
index 0000000..91dd4c3
--- /dev/null
+++ b/mafm/rag/vector_db.py
@@ -0,0 +1,259 @@
+"""벡터 데이터베이스 모듈.
+
+Milvus 벡터 데이터베이스 관리 기능을 제공합니다.
+"""
+
+import gc
+import os
+from typing import Any
+
+from pymilvus import MilvusClient
+
+from mafm.rag.embedding import embedding
+from mafm.rag.sqlite import get_path_by_id
+
+COLLECTION_NAME = "demo_collection"
+VECTOR_DIMENSION = 384
+
+
+def _delete_db_lock_file(db_name: str) -> None:
+    """데이터베이스 잠금 파일을 삭제합니다.
+
+    Args:
+        db_name: 데이터베이스 파일 경로.
+    """
+    dir_path = os.path.dirname(db_name)
+    base_name = os.path.basename(db_name)
+
+    lock_file = f"{dir_path}/.{base_name}.lock"
+    if os.path.exists(lock_file):
+        os.remove(lock_file)
+    else:
+        print(f"No lock file found for {lock_file}")
+
+
+def initialize_vector_db(db_name: str) -> None:
+    """벡터 데이터베이스를 초기화합니다.
+
+    Args:
+        db_name: 생성할 데이터베이스 파일 경로.
+
+    Raises:
+        Exception: 데이터베이스 초기화 중 오류가 발생한 경우.
+    """
+    client: MilvusClient | None = None
+    try:
+        client = MilvusClient(db_name)
+        print(f"Connected to {db_name}")
+
+        if client.has_collection(collection_name=COLLECTION_NAME):
+            client.drop_collection(collection_name=COLLECTION_NAME)
+
+        client.create_collection(
+            collection_name=COLLECTION_NAME,
+            dimension=VECTOR_DIMENSION,
+        )
+    except Exception as e:
+        print(f"Error initializing vector DB for {db_name}: {e}")
+        raise
+    finally:
+        if client is not None:
+            client.close()
+        gc.collect()
+        _delete_db_lock_file(db_name)
+
+
+def delete_vector_db(db_name: str) -> None:
+    """벡터 데이터베이스를 삭제합니다.
+
+    Args:
+        db_name: 삭제할 데이터베이스 파일 경로.
+    """
+    client: MilvusClient | None = None
+    try:
+        client = MilvusClient(db_name)
+        if client.has_collection(collection_name=COLLECTION_NAME):
+            client.drop_collection(collection_name=COLLECTION_NAME)
+            print(f"Collection '{COLLECTION_NAME}' in {db_name} has been deleted.")
+        else:
+            print(f"Collection '{COLLECTION_NAME}' does not exist in {db_name}")
+    except Exception as e:
+        print(f"Error deleting collection in {db_name}: {e}")
+    finally:
+        if client is not None:
+            client.close()
+        gc.collect()
+        _delete_db_lock_file(db_name)
+
+
+def save(db_name: str, file_id: int, queries: list[str]) -> None:
+    """쿼리 데이터를 벡터 데이터베이스에 저장합니다.
+
+    Args:
+        db_name: 데이터베이스 파일 경로.
+        file_id: 파일 고유 식별자.
+        queries: 저장할 텍스트 쿼리 리스트.
+    """
+    client: MilvusClient | None = None
+    try:
+        client = MilvusClient(db_name)
+        if not client.has_collection(collection_name=COLLECTION_NAME):
+            print(f"Collection '{COLLECTION_NAME}' does not exist in {db_name}")
+            return
+
+        query_embeddings = embedding(queries)
+        if query_embeddings is None:
+            print("Failed to generate embeddings")
+            return
+
+        data = [
+            {"id": file_id, "vector": query_embeddings[i], "word": queries[i]}
+            for i in range(len(query_embeddings))
+        ]
+
+        res = client.insert(collection_name=COLLECTION_NAME, data=data)
+        print(res)
+
+    except MemoryError as me:
+        print(f"MemoryError: {me}")
+    except ValueError as ve:
+        print(f"ValueError: {ve}")
+    except Exception as e:
+        print(f"Error occurred during saving data to Milvus: {e}")
+    finally:
+        if client is not None:
+            client.close()
+        gc.collect()
+        _delete_db_lock_file(db_name)
+
+
+def insert_file_embedding(file_data: list[dict[str, Any]], db_name: str) -> None:
+    """파일 임베딩 데이터를 삽입합니다.
+
+    Args:
+        file_data: 삽입할 임베딩 데이터 리스트.
+        db_name: 데이터베이스 파일 경로.
+    """
+    client: MilvusClient | None = None
+    try:
+        client = MilvusClient(db_name)
+        if not client.has_collection(collection_name=COLLECTION_NAME):
+            print(f"Collection '{COLLECTION_NAME}' does not exist in {db_name}")
+            return
+
+        client.insert(collection_name=COLLECTION_NAME, data=file_data)
+
+    except MemoryError as me:
+        print(f"MemoryError: {me}")
+    except ValueError as ve:
+        print(f"ValueError: {ve}")
+    except Exception as e:
+        print(f"Error occurred during saving data to Milvus: {e}")
+    finally:
+        if client is not None:
+            client.close()
+        gc.collect()
+        _delete_db_lock_file(db_name)
+
+
+def search(db_name: str, query_list: list[str]) -> list[str]:
+    """벡터 데이터베이스에서 유사한 항목을 검색합니다.
+
+    Args:
+        db_name: 데이터베이스 파일 경로.
+        query_list: 검색할 쿼리 텍스트 리스트.
+
+    Returns:
+        검색된 파일 경로 리스트.
+    """
+    client: MilvusClient | None = None
+    try:
+        client = MilvusClient(db_name)
+        if not client.has_collection(collection_name=COLLECTION_NAME):
+            print(f"Collection '{COLLECTION_NAME}' does not exist in {db_name}")
+            return []
+
+        query_vectors = embedding(query_list)
+        if query_vectors is None:
+            return []
+
+        res = client.search(
+            collection_name=COLLECTION_NAME,
+            data=query_vectors,
+            limit=2,
+        )
+        id_list = [item["id"] for item in res[0]]
+        path_list = [get_path_by_id(file_id, "filesystem.db") for file_id in id_list]
+        return path_list
+    finally:
+        if client is not None:
+            client.close()
+        gc.collect()
+        _delete_db_lock_file(db_name)
+
+
+def find_by_id(search_id: int, db_name: str) -> list[dict[str, Any]] | None:
+    """ID로 벡터 데이터를 검색합니다.
+
+    Args:
+        search_id: 검색할 파일 ID.
+        db_name: 데이터베이스 파일 경로.
+
+    Returns:
+        검색된 데이터 리스트. 없으면 None.
+    """
+    client: MilvusClient | None = None
+    try:
+        client = MilvusClient(db_name)
+
+        if not client.has_collection(COLLECTION_NAME):
+            print(f"Collection '{COLLECTION_NAME}' does not exist in {db_name}")
+            return None
+
+        res = client.query(
+            collection_name=COLLECTION_NAME, filter=f"id in [{search_id}]"
+        )
+
+        if not res:
+            print(f"No results found for ID: {search_id}")
+            return None
+        return res
+    finally:
+        if client is not None:
+            client.close()
+        gc.collect()
+        _delete_db_lock_file(db_name)
+
+
+def remove_by_id(remove_id: int, db_name: str) -> dict[str, Any] | None:
+    """ID로 벡터 데이터를 삭제합니다.
+
+    Args:
+        remove_id: 삭제할 파일 ID.
+        db_name: 데이터베이스 파일 경로.
+
+    Returns:
+        삭제 결과. 오류 시 None.
+
+    Raises:
+        Exception: 컬렉션이 존재하지 않는 경우.
+    """
+    client: MilvusClient | None = None
+    try:
+        client = MilvusClient(db_name)
+        if not client.has_collection(COLLECTION_NAME):
+            raise Exception(
+                f"Collection '{COLLECTION_NAME}' does not exist in {db_name}"
+            )
+
+        res = client.delete(
+            collection_name=COLLECTION_NAME, filter=f"id in [{remove_id}]"
+        )
+
+        print(f"Deleted records with ID: {remove_id}")
+        return res
+    finally:
+        if client is not None:
+            client.close()
+        gc.collect()
+        _delete_db_lock_file(db_name)
diff --git a/tests/milvus_test.py b/tests/milvus_test.py
index 6848aae..d23cd84 100644
--- a/tests/milvus_test.py
+++ b/tests/milvus_test.py
@@ -6,7 +6,7 @@
 from pymilvus import Collection, connections, utility
 
 from mafm.rag.embedding import embedding
-from mafm.rag.vectorDb import (
+from mafm.rag.vector_db import (
     delete_vector_db,
     find_by_id,
     initialize_vector_db,
@@ -97,7 +97,7 @@ def test_search(setup_milvus: Any, mocker: Any) -> None:
         mocker: pytest-mock mocker.
     """
     mocker.patch(
-        "mafm.rag.vectorDb.get_path_by_id",
+        "mafm.rag.vector_db.get_path_by_id",
         return_value="path/to/file",
     )
 
@@ -150,4 +150,4 @@ def test_delete_vector_db() -> None:
     connections.connect(alias="default", host="localhost", port="19530")
     collection_name = f"{DB_NAME}_demo_collection"
     assert not utility.has_collection(collection_name)
-    connections.disconnect(alias="default")
\ No newline at end of file
+    connections.disconnect(alias="default")