Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 2 additions & 2 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -57,5 +57,5 @@ Thumbs.db
# Vector database persistence
data/

# Knowledge Documents
documents/
# Knowledge Documents (root folder only)
/documents/
5 changes: 3 additions & 2 deletions src/config.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,8 +20,8 @@ class Settings(BaseSettings):
app_version: str = "0.1.0"
debug: bool = False

# Server
host: str = "0.0.0.0"
# Server - bind to all interfaces for container deployment
host: str = "0.0.0.0" # nosec B104
port: int = 8000

# LLM Provider (OpenRouter)
Expand Down Expand Up @@ -53,6 +53,7 @@ class Settings(BaseSettings):
rag_chunk_overlap: int = 800 # Characters
rag_top_k: int = 5 # Number of chunks to retrieve
documents_path: str = "./documents"
uploads_path: str = "./uploads" # Directory for user-uploaded documents

# Semantic Cache
cache_enabled: bool = True # Enable semantic caching for faster responses
Expand Down
18 changes: 18 additions & 0 deletions src/infrastructure/database/connection.py
Original file line number Diff line number Diff line change
Expand Up @@ -35,6 +35,24 @@
);

CREATE INDEX IF NOT EXISTS idx_messages_user_created ON messages(user_id, created_at);

CREATE TABLE IF NOT EXISTS documents (
id TEXT PRIMARY KEY,
filename TEXT UNIQUE NOT NULL,
title TEXT NOT NULL,
description TEXT,
file_path TEXT NOT NULL,
file_type TEXT NOT NULL,
file_size_bytes INTEGER NOT NULL,
uploaded_by TEXT,
is_indexed INTEGER DEFAULT 0,
created_at TEXT NOT NULL,
updated_at TEXT NOT NULL,
FOREIGN KEY (uploaded_by) REFERENCES users(id) ON DELETE SET NULL
);

CREATE INDEX IF NOT EXISTS idx_documents_filename ON documents(filename);
CREATE INDEX IF NOT EXISTS idx_documents_created ON documents(created_at);
"""


Expand Down
2 changes: 1 addition & 1 deletion src/modules/auth/service.py
Original file line number Diff line number Diff line change
Expand Up @@ -133,7 +133,7 @@ def create_token(self, user: User) -> LoginResponse:

return LoginResponse(
access_token=token,
token_type="bearer",
token_type="bearer", # nosec B106 - OAuth2 token type, not a password
expires_in=self._jwt_expire_hours * 3600,
user=UserResponse(
id=user.id,
Expand Down
35 changes: 35 additions & 0 deletions src/modules/documents/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,35 @@
"""Document management module.

Provides functionality for uploading, managing, and indexing documents
for the RAG pipeline.
"""

from src.modules.documents.exceptions import (
DocumentAlreadyExistsError,
DocumentError,
DocumentIndexingError,
DocumentNotFoundError,
DocumentValidationError,
)
from src.modules.documents.models import Document
from src.modules.documents.repository import DocumentRepository
from src.modules.documents.schemas import (
DocumentListResponse,
DocumentResponse,
DocumentUploadRequest,
)
from src.modules.documents.service import DocumentService

__all__ = [
"Document",
"DocumentAlreadyExistsError",
"DocumentError",
"DocumentIndexingError",
"DocumentListResponse",
"DocumentNotFoundError",
"DocumentRepository",
"DocumentResponse",
"DocumentService",
"DocumentUploadRequest",
"DocumentValidationError",
]
39 changes: 39 additions & 0 deletions src/modules/documents/exceptions.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,39 @@
"""Document management exceptions."""


class DocumentError(Exception):
"""Base exception for document operations."""

pass


class DocumentNotFoundError(DocumentError):
"""Raised when a document is not found."""

def __init__(self, identifier: str) -> None:
self.identifier = identifier
super().__init__(f"Document not found: {identifier}")


class DocumentAlreadyExistsError(DocumentError):
"""Raised when a document with the same filename already exists."""

def __init__(self, filename: str) -> None:
self.filename = filename
super().__init__(f"Document already exists: {filename}")


class DocumentValidationError(DocumentError):
"""Raised when document validation fails."""

def __init__(self, message: str) -> None:
super().__init__(message)


class DocumentIndexingError(DocumentError):
"""Raised when document indexing fails."""

def __init__(self, filename: str, reason: str) -> None:
self.filename = filename
self.reason = reason
super().__init__(f"Failed to index document '{filename}': {reason}")
48 changes: 48 additions & 0 deletions src/modules/documents/models.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,48 @@
"""Document domain models."""

from dataclasses import dataclass
from datetime import datetime
from pathlib import Path
from typing import Any
from uuid import UUID


@dataclass
class Document:
"""Document domain model representing an uploaded document."""

id: UUID
filename: str
title: str
description: str | None
file_path: Path
file_type: str # 'markdown' or 'text'
file_size_bytes: int
uploaded_by: UUID | None
is_indexed: bool
created_at: datetime
updated_at: datetime

@classmethod
def from_row(cls, row: dict[str, Any]) -> "Document":
"""Create a Document from a database row.

Args:
row: Dictionary containing database row data.

Returns:
Document instance.
"""
return cls(
id=UUID(str(row["id"])),
filename=str(row["filename"]),
title=str(row["title"]),
description=str(row["description"]) if row["description"] else None,
file_path=Path(str(row["file_path"])),
file_type=str(row["file_type"]),
file_size_bytes=int(row["file_size_bytes"]),
uploaded_by=UUID(str(row["uploaded_by"])) if row["uploaded_by"] else None,
is_indexed=bool(row["is_indexed"]),
created_at=datetime.fromisoformat(str(row["created_at"])),
updated_at=datetime.fromisoformat(str(row["updated_at"])),
)
Loading