From ed07a89fb787dc0afa3b0e3edde65006fe447c1f Mon Sep 17 00:00:00 2001
From: Chris Krough <461869+ckrough@users.noreply.github.com>
Date: Sat, 10 Jan 2026 16:35:02 -0500
Subject: [PATCH] chore: add OpenTelemetry observability instrumentation

Add OpenTelemetry tracing to key operations:
- Create src/infrastructure/observability/ module with setup, tracing utilities
- Instrument RAG pipeline (ask, retrieve), LLM, embeddings, vectordb, cache
- Add structlog processor for trace/span ID injection into logs
- Configure via OTEL_ENABLED, OTEL_EXPORTER, OTEL_ENDPOINT env vars

Closes retriever-cd6
---
 .env.example                                  |   7 +
 pyproject.toml                                |   8 +
 src/config.py                                 |   7 +
 src/infrastructure/cache/chroma_cache.py      | 240 ++++----
 src/infrastructure/embeddings/openai.py       | 174 +++---
 src/infrastructure/llm/openrouter.py          | 184 +++---
 src/infrastructure/observability/__init__.py  |  25 +
 src/infrastructure/observability/setup.py     | 145 +++++
 .../observability/structlog_processor.py      |  33 ++
 src/infrastructure/observability/tracing.py   | 157 ++++++
 src/infrastructure/vectordb/chroma.py         | 247 ++++----
 src/main.py                                   |  27 +-
 src/modules/rag/retriever.py                  |  90 +--
 src/modules/rag/service.py                    | 530 ++++++++++--------
 tests/test_observability.py                   | 236 ++++++++
 uv.lock                                       | 133 +++--
 16 files changed, 1541 insertions(+), 702 deletions(-)
 create mode 100644 src/infrastructure/observability/__init__.py
 create mode 100644 src/infrastructure/observability/setup.py
 create mode 100644 src/infrastructure/observability/structlog_processor.py
 create mode 100644 src/infrastructure/observability/tracing.py
 create mode 100644 tests/test_observability.py

diff --git a/.env.example b/.env.example
index 6570deb..c719c73 100644
--- a/.env.example
+++ b/.env.example
@@ -61,3 +61,10 @@ AUTH_ENABLED=true  # Enable user authentication
 JWT_SECRET_KEY=change-this-to-a-random-secret-key  # Secret key for JWT signing (REQUIRED if auth enabled)
 JWT_ALGORITHM=HS256  # JWT signing algorithm
 JWT_EXPIRE_HOURS=24  # Token expiration in hours
+
+# OpenTelemetry Observability
+OTEL_ENABLED=true  # Enable OpenTelemetry tracing
+OTEL_EXPORTER=none  # Exporter: "otlp", "console", or "none"
+OTEL_ENDPOINT=  # OTLP collector endpoint (e.g., "http://localhost:4318")
+OTEL_SERVICE_NAME=retriever  # Service name for traces
+OTEL_SAMPLE_RATE=1.0  # Sampling rate (0.0 to 1.0)
diff --git a/pyproject.toml b/pyproject.toml
index 3d5eb48..41ec492 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -40,6 +40,12 @@ dependencies = [
     "email-validator>=2.3.0",
     "markdown>=3.10",
     "bleach>=6.3.0",
+    # Observability (OpenTelemetry)
+    "opentelemetry-api~=1.29.0",
+    "opentelemetry-sdk~=1.29.0",
+    "opentelemetry-exporter-otlp-proto-http~=1.29.0",
+    "opentelemetry-instrumentation-fastapi~=0.50b0",
+    "opentelemetry-instrumentation-httpx~=0.50b0",
 ]
 
 [project.optional-dependencies]
@@ -120,6 +126,8 @@ module = [
     "chromadb",
     "chromadb.*",
     "rank_bm25",
+    "opentelemetry",
+    "opentelemetry.*",
 ]
 ignore_missing_imports = true
 
diff --git a/src/config.py b/src/config.py
index b047175..0a5d85d 100644
--- a/src/config.py
+++ b/src/config.py
@@ -84,6 +84,13 @@ class Settings(BaseSettings):
     # Conversation History
     conversation_max_messages: int = 20  # Max messages to include in context
 
+    # OpenTelemetry / Observability
+    otel_enabled: bool = True  # Enable OpenTelemetry tracing
+    otel_exporter: str = "none"  # Exporter: "otlp", "console", or "none"
+    otel_endpoint: str | None = None  # OTLP endpoint (e.g., "http://localhost:4318")
+    otel_service_name: str = "retriever"  # Service name for traces
+    otel_sample_rate: float = 1.0  # Sampling rate (0.0 to 1.0)
+
 
 @lru_cache
 def get_settings() -> Settings:
diff --git a/src/infrastructure/cache/chroma_cache.py b/src/infrastructure/cache/chroma_cache.py
index afbcd00..cfce052 100644
--- a/src/infrastructure/cache/chroma_cache.py
+++ b/src/infrastructure/cache/chroma_cache.py
@@ -10,8 +10,10 @@
 
 from src.infrastructure.cache.protocol import CacheEntry
 from src.infrastructure.embeddings import EmbeddingProvider
+from src.infrastructure.observability import get_tracer
 
 logger = structlog.get_logger()
+tracer = get_tracer(__name__)
 
 
 class ChromaSemanticCache:
@@ -83,94 +85,107 @@ async def get(self, question: str) -> CacheEntry | None:
             CacheEntry if a similar question was found above the
             similarity threshold, None otherwise.
         """
-        if self._collection.count() == 0:
-            return None
-
-        # Embed the question
-        query_embedding = await self._embeddings.embed(question)
-
-        # Query for similar cached questions
-        results = self._collection.query(
-            query_embeddings=[query_embedding],  # type: ignore[arg-type]
-            n_results=1,
-            include=["documents", "metadatas", "distances"],  # type: ignore[list-item]
-        )
+        with tracer.start_as_current_span("cache.get") as span:
+            span.set_attribute("cache.question_length", len(question))
+
+            if self._collection.count() == 0:
+                span.set_attribute("cache.hit", False)
+                span.set_attribute("cache.miss_reason", "empty_cache")
+                return None
+
+            # Embed the question
+            query_embedding = await self._embeddings.embed(question)
+
+            # Query for similar cached questions
+            results = self._collection.query(
+                query_embeddings=[query_embedding],
+                n_results=1,
+                include=["documents", "metadatas", "distances"],
+            )
 
-        # Check if we got a result
-        ids_result = results.get("ids")
-        if not ids_result or not ids_result[0]:
-            logger.debug("cache_miss_no_results", question_length=len(question))
-            return None
-
-        # Get the distance and convert to similarity
-        distances_result = results.get("distances")
-        if distances_result and distances_result[0]:
-            distance = float(distances_result[0][0])
-        else:
-            distance = 2.0  # Max cosine distance
-        similarity = 1.0 - distance
-
-        # Check threshold
-        if similarity < self._similarity_threshold:
-            logger.debug(
-                "cache_miss_below_threshold",
+            # Check if we got a result
+            ids_result = results.get("ids")
+            if not ids_result or not ids_result[0]:
+                span.set_attribute("cache.hit", False)
+                span.set_attribute("cache.miss_reason", "no_results")
+                logger.debug("cache_miss_no_results", question_length=len(question))
+                return None
+
+            # Get the distance and convert to similarity
+            distances_result = results.get("distances")
+            if distances_result and distances_result[0]:
+                distance = float(distances_result[0][0])
+            else:
+                distance = 2.0  # Max cosine distance
+            similarity = 1.0 - distance
+            span.set_attribute("cache.similarity", similarity)
+
+            # Check threshold
+            if similarity < self._similarity_threshold:
+                span.set_attribute("cache.hit", False)
+                span.set_attribute("cache.miss_reason", "below_threshold")
+                logger.debug(
+                    "cache_miss_below_threshold",
+                    similarity=similarity,
+                    threshold=self._similarity_threshold,
+                    question_length=len(question),
+                )
+                return None
+
+            # Check TTL
+            metadatas_result = results.get("metadatas")
+            if metadatas_result and metadatas_result[0]:
+                metadata = metadatas_result[0][0]
+            else:
+                metadata = {}
+            created_at_str = str(metadata.get("created_at", ""))
+
+            if created_at_str:
+                try:
+                    created_at = datetime.fromisoformat(created_at_str)
+                    # Handle timezone-naive datetime from storage
+                    if created_at.tzinfo is None:
+                        created_at = created_at.replace(tzinfo=UTC)
+                    age_hours = (datetime.now(UTC) - created_at).total_seconds() / 3600
+                    if age_hours > self._ttl_hours:
+                        span.set_attribute("cache.hit", False)
+                        span.set_attribute("cache.miss_reason", "expired")
+                        logger.debug(
+                            "cache_miss_expired",
+                            age_hours=age_hours,
+                            ttl_hours=self._ttl_hours,
+                        )
+                        return None
+                except ValueError:
+                    pass  # Invalid date, treat as valid
+
+            # Cache hit!
+            span.set_attribute("cache.hit", True)
+            documents_result = results.get("documents")
+            if documents_result and documents_result[0]:
+                answer = str(documents_result[0][0])
+            else:
+                answer = ""
+
+            cached_question = str(metadata.get("question", ""))
+            chunks_json = str(metadata.get("chunks_json", "[]"))
+
+            logger.info(
+                "cache_hit",
                 similarity=similarity,
-                threshold=self._similarity_threshold,
-                question_length=len(question),
+                cached_question_length=len(cached_question),
+                query_question_length=len(question),
             )
-            return None
-
-        # Check TTL
-        metadatas_result = results.get("metadatas")
-        if metadatas_result and metadatas_result[0]:
-            metadata = metadatas_result[0][0]
-        else:
-            metadata = {}
-        created_at_str = str(metadata.get("created_at", ""))
-
-        if created_at_str:
-            try:
-                created_at = datetime.fromisoformat(created_at_str)
-                # Handle timezone-naive datetime from storage
-                if created_at.tzinfo is None:
-                    created_at = created_at.replace(tzinfo=UTC)
-                age_hours = (datetime.now(UTC) - created_at).total_seconds() / 3600
-                if age_hours > self._ttl_hours:
-                    logger.debug(
-                        "cache_miss_expired",
-                        age_hours=age_hours,
-                        ttl_hours=self._ttl_hours,
-                    )
-                    return None
-            except ValueError:
-                pass  # Invalid date, treat as valid
-
-        # Cache hit!
-        documents_result = results.get("documents")
-        if documents_result and documents_result[0]:
-            answer = str(documents_result[0][0])
-        else:
-            answer = ""
-
-        cached_question = str(metadata.get("question", ""))
-        chunks_json = str(metadata.get("chunks_json", "[]"))
-
-        logger.info(
-            "cache_hit",
-            similarity=similarity,
-            cached_question_length=len(cached_question),
-            query_question_length=len(question),
-        )
 
-        return CacheEntry(
-            question=cached_question,
-            answer=answer,
-            chunks_json=chunks_json,
-            created_at=datetime.fromisoformat(created_at_str)
-            if created_at_str
-            else datetime.now(UTC),
-            similarity_score=similarity,
-        )
+            return CacheEntry(
+                question=cached_question,
+                answer=answer,
+                chunks_json=chunks_json,
+                created_at=datetime.fromisoformat(created_at_str)
+                if created_at_str
+                else datetime.now(UTC),
+                similarity_score=similarity,
+            )
 
     async def set(
         self,
@@ -185,32 +200,39 @@ async def set(
             answer: The generated answer.
             chunks_json: JSON-serialized list of ChunkWithScore used.
         """
-        # Generate embedding for the question
-        question_embedding = await self._embeddings.embed(question)
-
-        # Generate unique ID
-        entry_id = f"cache:{uuid.uuid4().hex}"
-
-        # Store with metadata
-        self._collection.add(
-            ids=[entry_id],
-            documents=[answer],
-            embeddings=[question_embedding],  # type: ignore[arg-type]
-            metadatas=[
-                {
-                    "question": question,
-                    "chunks_json": chunks_json,
-                    "created_at": datetime.now(UTC).isoformat(),
-                }
-            ],
-        )
+        with tracer.start_as_current_span("cache.set") as span:
+            span.set_attribute("cache.question_length", len(question))
+            span.set_attribute("cache.answer_length", len(answer))
+
+            # Generate embedding for the question
+            question_embedding = await self._embeddings.embed(question)
+
+            # Generate unique ID
+            entry_id = f"cache:{uuid.uuid4().hex}"
+
+            # Store with metadata
+            self._collection.add(
+                ids=[entry_id],
+                documents=[answer],
+                embeddings=[question_embedding],
+                metadatas=[
+                    {
+                        "question": question,
+                        "chunks_json": chunks_json,
+                        "created_at": datetime.now(UTC).isoformat(),
+                    }
+                ],
+            )
 
-        logger.debug(
-            "cache_set",
-            question_length=len(question),
-            answer_length=len(answer),
-            total_cached=self._collection.count(),
-        )
+            total_cached = self._collection.count()
+            span.set_attribute("cache.total_entries", total_cached)
+
+            logger.debug(
+                "cache_set",
+                question_length=len(question),
+                answer_length=len(answer),
+                total_cached=total_cached,
+            )
 
     async def clear(self) -> None:
         """Clear all cached entries.
diff --git a/src/infrastructure/embeddings/openai.py b/src/infrastructure/embeddings/openai.py
index a31449c..0dc47f0 100644
--- a/src/infrastructure/embeddings/openai.py
+++ b/src/infrastructure/embeddings/openai.py
@@ -19,8 +19,10 @@
     EmbeddingRateLimitError,
     EmbeddingTimeoutError,
 )
+from src.infrastructure.observability import get_tracer
 
 logger = structlog.get_logger()
+tracer = get_tracer(__name__)
 
 
 class OpenAIEmbeddingProvider:
@@ -102,44 +104,53 @@ async def embed(self, text: str) -> list[float]:
             EmbeddingTimeoutError: If the request times out.
             EmbeddingRateLimitError: If rate limited.
         """
-        try:
-            result = await self._embed_with_resilience([text])
-            return result[0]
-
-        except CircuitBreakerError as e:
-            logger.warning(
-                "circuit_breaker_open",
-                provider=self.PROVIDER_NAME,
-                model=self._model,
-            )
-            raise EmbeddingProviderError(
-                "Service temporarily unavailable. Please try again in a moment.",
-                provider=self.PROVIDER_NAME,
-            ) from e
-
-        except APITimeoutError as e:
-            logger.warning(
-                "embedding_timeout",
-                provider=self.PROVIDER_NAME,
-                model=self._model,
-                timeout_seconds=self._timeout,
-            )
-            raise EmbeddingTimeoutError(
-                f"Request timed out after {self._timeout}s",
-                provider=self.PROVIDER_NAME,
-            ) from e
-
-        except APIConnectionError as e:
-            logger.error(
-                "embedding_connection_error",
-                provider=self.PROVIDER_NAME,
-                model=self._model,
-                error=str(e),
-            )
-            raise EmbeddingProviderError(
-                "Unable to connect to embedding service",
-                provider=self.PROVIDER_NAME,
-            ) from e
+        with tracer.start_as_current_span("embeddings.embed") as span:
+            span.set_attribute("embeddings.provider", self.PROVIDER_NAME)
+            span.set_attribute("embeddings.model", self._model)
+            span.set_attribute("embeddings.input_length", len(text))
+
+            try:
+                result = await self._embed_with_resilience([text])
+                span.set_attribute("embeddings.dimensions", len(result[0]))
+                return result[0]
+
+            except CircuitBreakerError as e:
+                span.record_exception(e)
+                logger.warning(
+                    "circuit_breaker_open",
+                    provider=self.PROVIDER_NAME,
+                    model=self._model,
+                )
+                raise EmbeddingProviderError(
+                    "Service temporarily unavailable. Please try again in a moment.",
+                    provider=self.PROVIDER_NAME,
+                ) from e
+
+            except APITimeoutError as e:
+                span.record_exception(e)
+                logger.warning(
+                    "embedding_timeout",
+                    provider=self.PROVIDER_NAME,
+                    model=self._model,
+                    timeout_seconds=self._timeout,
+                )
+                raise EmbeddingTimeoutError(
+                    f"Request timed out after {self._timeout}s",
+                    provider=self.PROVIDER_NAME,
+                ) from e
+
+            except APIConnectionError as e:
+                span.record_exception(e)
+                logger.error(
+                    "embedding_connection_error",
+                    provider=self.PROVIDER_NAME,
+                    model=self._model,
+                    error=str(e),
+                )
+                raise EmbeddingProviderError(
+                    "Unable to connect to embedding service",
+                    provider=self.PROVIDER_NAME,
+                ) from e
 
     async def embed_batch(self, texts: list[str]) -> list[list[float]]:
         """Generate embedding vectors for multiple texts.
@@ -158,44 +169,55 @@ async def embed_batch(self, texts: list[str]) -> list[list[float]]:
         if not texts:
             return []
 
-        try:
-            return await self._embed_with_resilience(texts)
-
-        except CircuitBreakerError as e:
-            logger.warning(
-                "circuit_breaker_open",
-                provider=self.PROVIDER_NAME,
-                model=self._model,
-            )
-            raise EmbeddingProviderError(
-                "Service temporarily unavailable. Please try again in a moment.",
-                provider=self.PROVIDER_NAME,
-            ) from e
-
-        except APITimeoutError as e:
-            logger.warning(
-                "embedding_timeout",
-                provider=self.PROVIDER_NAME,
-                model=self._model,
-                timeout_seconds=self._timeout,
-                batch_size=len(texts),
-            )
-            raise EmbeddingTimeoutError(
-                f"Request timed out after {self._timeout}s",
-                provider=self.PROVIDER_NAME,
-            ) from e
-
-        except APIConnectionError as e:
-            logger.error(
-                "embedding_connection_error",
-                provider=self.PROVIDER_NAME,
-                model=self._model,
-                error=str(e),
-            )
-            raise EmbeddingProviderError(
-                "Unable to connect to embedding service",
-                provider=self.PROVIDER_NAME,
-            ) from e
+        with tracer.start_as_current_span("embeddings.embed_batch") as span:
+            span.set_attribute("embeddings.provider", self.PROVIDER_NAME)
+            span.set_attribute("embeddings.model", self._model)
+            span.set_attribute("embeddings.batch_size", len(texts))
+
+            try:
+                result = await self._embed_with_resilience(texts)
+                if result:
+                    span.set_attribute("embeddings.dimensions", len(result[0]))
+                return result
+
+            except CircuitBreakerError as e:
+                span.record_exception(e)
+                logger.warning(
+                    "circuit_breaker_open",
+                    provider=self.PROVIDER_NAME,
+                    model=self._model,
+                )
+                raise EmbeddingProviderError(
+                    "Service temporarily unavailable. Please try again in a moment.",
+                    provider=self.PROVIDER_NAME,
+                ) from e
+
+            except APITimeoutError as e:
+                span.record_exception(e)
+                logger.warning(
+                    "embedding_timeout",
+                    provider=self.PROVIDER_NAME,
+                    model=self._model,
+                    timeout_seconds=self._timeout,
+                    batch_size=len(texts),
+                )
+                raise EmbeddingTimeoutError(
+                    f"Request timed out after {self._timeout}s",
+                    provider=self.PROVIDER_NAME,
+                ) from e
+
+            except APIConnectionError as e:
+                span.record_exception(e)
+                logger.error(
+                    "embedding_connection_error",
+                    provider=self.PROVIDER_NAME,
+                    model=self._model,
+                    error=str(e),
+                )
+                raise EmbeddingProviderError(
+                    "Unable to connect to embedding service",
+                    provider=self.PROVIDER_NAME,
+                ) from e
 
     @retry(
         retry=retry_if_exception_type((APIConnectionError, APITimeoutError)),
diff --git a/src/infrastructure/llm/openrouter.py b/src/infrastructure/llm/openrouter.py
index c0e9d2a..35417b4 100644
--- a/src/infrastructure/llm/openrouter.py
+++ b/src/infrastructure/llm/openrouter.py
@@ -18,8 +18,10 @@
     LLMRateLimitError,
     LLMTimeoutError,
 )
+from src.infrastructure.observability import get_tracer
 
 logger = structlog.get_logger()
+tracer = get_tracer(__name__)
 
 # System prompt for Retriever assistant
 DEFAULT_SYSTEM_PROMPT = """You are Retriever, a helpful assistant for animal shelter volunteers.
@@ -103,50 +105,59 @@ async def complete(
         """
         model_to_use = model or self._default_model
 
-        try:
-            result: str = await self._complete_with_resilience(
-                system_prompt=system_prompt,
-                user_message=user_message,
-                model=model_to_use,
-            )
-            return result
+        with tracer.start_as_current_span("llm.complete") as span:
+            span.set_attribute("llm.provider", self.PROVIDER_NAME)
+            span.set_attribute("llm.model", model_to_use)
+            span.set_attribute("llm.input_length", len(user_message))
 
-        except CircuitBreakerError as e:
-            logger.warning(
-                "circuit_breaker_open",
-                provider=self.PROVIDER_NAME,
-                model=model_to_use,
-            )
-            raise LLMProviderError(
-                "Service temporarily unavailable. Please try again in a moment.",
-                provider=self.PROVIDER_NAME,
-            ) from e
+            try:
+                result: str = await self._complete_with_resilience(
+                    system_prompt=system_prompt,
+                    user_message=user_message,
+                    model=model_to_use,
+                )
+                span.set_attribute("llm.output_length", len(result))
+                return result
 
-        except APITimeoutError as e:
-            # After all retries exhausted
-            logger.warning(
-                "llm_timeout",
-                provider=self.PROVIDER_NAME,
-                model=model_to_use,
-                timeout_seconds=self._timeout,
-            )
-            raise LLMTimeoutError(
-                f"Request timed out after {self._timeout}s",
-                provider=self.PROVIDER_NAME,
-            ) from e
+            except CircuitBreakerError as e:
+                span.record_exception(e)
+                logger.warning(
+                    "circuit_breaker_open",
+                    provider=self.PROVIDER_NAME,
+                    model=model_to_use,
+                )
+                raise LLMProviderError(
+                    "Service temporarily unavailable. Please try again in a moment.",
+                    provider=self.PROVIDER_NAME,
+                ) from e
 
-        except APIConnectionError as e:
-            # After all retries exhausted
-            logger.error(
-                "llm_connection_error",
-                provider=self.PROVIDER_NAME,
-                model=model_to_use,
-                error=str(e),
-            )
-            raise LLMProviderError(
-                "Unable to connect to LLM service",
-                provider=self.PROVIDER_NAME,
-            ) from e
+            except APITimeoutError as e:
+                # After all retries exhausted
+                span.record_exception(e)
+                logger.warning(
+                    "llm_timeout",
+                    provider=self.PROVIDER_NAME,
+                    model=model_to_use,
+                    timeout_seconds=self._timeout,
+                )
+                raise LLMTimeoutError(
+                    f"Request timed out after {self._timeout}s",
+                    provider=self.PROVIDER_NAME,
+                ) from e
+
+            except APIConnectionError as e:
+                # After all retries exhausted
+                span.record_exception(e)
+                logger.error(
+                    "llm_connection_error",
+                    provider=self.PROVIDER_NAME,
+                    model=model_to_use,
+                    error=str(e),
+                )
+                raise LLMProviderError(
+                    "Unable to connect to LLM service",
+                    provider=self.PROVIDER_NAME,
+                ) from e
 
     @retry(
         retry=retry_if_exception_type((APIConnectionError, APITimeoutError)),
@@ -255,48 +266,59 @@ async def complete_with_history(
         """
         model_to_use = model or self._default_model
 
-        try:
-            result: str = await self._complete_history_with_resilience(
-                system_prompt=system_prompt,
-                messages=messages,
-                model=model_to_use,
-            )
-            return result
+        with tracer.start_as_current_span("llm.complete_with_history") as span:
+            span.set_attribute("llm.provider", self.PROVIDER_NAME)
+            span.set_attribute("llm.model", model_to_use)
+            span.set_attribute("llm.message_count", len(messages))
+            total_input_length = sum(len(m.get("content", "")) for m in messages)
+            span.set_attribute("llm.input_length", total_input_length)
+
+            try:
+                result: str = await self._complete_history_with_resilience(
+                    system_prompt=system_prompt,
+                    messages=messages,
+                    model=model_to_use,
+                )
+                span.set_attribute("llm.output_length", len(result))
+                return result
 
-        except CircuitBreakerError as e:
-            logger.warning(
-                "circuit_breaker_open",
-                provider=self.PROVIDER_NAME,
-                model=model_to_use,
-            )
-            raise LLMProviderError(
-                "Service temporarily unavailable. Please try again in a moment.",
-                provider=self.PROVIDER_NAME,
-            ) from e
+            except CircuitBreakerError as e:
+                span.record_exception(e)
+                logger.warning(
+                    "circuit_breaker_open",
+                    provider=self.PROVIDER_NAME,
+                    model=model_to_use,
+                )
+                raise LLMProviderError(
+                    "Service temporarily unavailable. Please try again in a moment.",
+                    provider=self.PROVIDER_NAME,
+                ) from e
 
-        except APITimeoutError as e:
-            logger.warning(
-                "llm_timeout",
-                provider=self.PROVIDER_NAME,
-                model=model_to_use,
-                timeout_seconds=self._timeout,
-            )
-            raise LLMTimeoutError(
-                f"Request timed out after {self._timeout}s",
-                provider=self.PROVIDER_NAME,
-            ) from e
+            except APITimeoutError as e:
+                span.record_exception(e)
+                logger.warning(
+                    "llm_timeout",
+                    provider=self.PROVIDER_NAME,
+                    model=model_to_use,
+                    timeout_seconds=self._timeout,
+                )
+                raise LLMTimeoutError(
+                    f"Request timed out after {self._timeout}s",
+                    provider=self.PROVIDER_NAME,
+                ) from e
 
-        except APIConnectionError as e:
-            logger.error(
-                "llm_connection_error",
-                provider=self.PROVIDER_NAME,
-                model=model_to_use,
-                error=str(e),
-            )
-            raise LLMProviderError(
-                "Unable to connect to LLM service",
-                provider=self.PROVIDER_NAME,
-            ) from e
+            except APIConnectionError as e:
+                span.record_exception(e)
+                logger.error(
+                    "llm_connection_error",
+                    provider=self.PROVIDER_NAME,
+                    model=model_to_use,
+                    error=str(e),
+                )
+                raise LLMProviderError(
+                    "Unable to connect to LLM service",
+                    provider=self.PROVIDER_NAME,
+                ) from e
 
     @retry(
         retry=retry_if_exception_type((APIConnectionError, APITimeoutError)),
@@ -341,7 +363,7 @@ async def _do_complete_with_history(
 
             response = await self._client.chat.completions.create(
                 model=model,
-                messages=all_messages,  # type: ignore[arg-type]
+                messages=all_messages,
             )
 
             content = response.choices[0].message.content or ""
diff --git a/src/infrastructure/observability/__init__.py b/src/infrastructure/observability/__init__.py
new file mode 100644
index 0000000..eaacf0b
--- /dev/null
+++ b/src/infrastructure/observability/__init__.py
@@ -0,0 +1,25 @@
+"""Observability module providing OpenTelemetry tracing and structlog integration."""
+
+from src.infrastructure.observability.setup import (
+    init_observability,
+    shutdown_observability,
+)
+from src.infrastructure.observability.structlog_processor import add_trace_context
+from src.infrastructure.observability.tracing import (
+    add_span_attributes,
+    get_current_span_id,
+    get_current_trace_id,
+    get_tracer,
+    traced,
+)
+
+__all__ = [
+    "add_span_attributes",
+    "add_trace_context",
+    "get_current_span_id",
+    "get_current_trace_id",
+    "get_tracer",
+    "init_observability",
+    "shutdown_observability",
+    "traced",
+]
diff --git a/src/infrastructure/observability/setup.py b/src/infrastructure/observability/setup.py
new file mode 100644
index 0000000..f21a7a6
--- /dev/null
+++ b/src/infrastructure/observability/setup.py
@@ -0,0 +1,145 @@
+"""OpenTelemetry setup and initialization."""
+
+import logging
+from typing import TYPE_CHECKING
+
+import structlog
+from opentelemetry import trace
+from opentelemetry.exporter.otlp.proto.http.trace_exporter import OTLPSpanExporter
+from opentelemetry.instrumentation.fastapi import FastAPIInstrumentor
+from opentelemetry.instrumentation.httpx import HTTPXClientInstrumentor
+from opentelemetry.sdk.resources import SERVICE_NAME, SERVICE_VERSION, Resource
+from opentelemetry.sdk.trace import TracerProvider
+from opentelemetry.sdk.trace.export import BatchSpanProcessor, ConsoleSpanExporter
+from opentelemetry.sdk.trace.sampling import ParentBasedTraceIdRatio
+
+from src.infrastructure.observability.structlog_processor import add_trace_context
+
+if TYPE_CHECKING:
+    from fastapi import FastAPI
+
+# Module-level state for cleanup
+_tracer_provider: TracerProvider | None = None
+_initialized: bool = False
+
+
+def init_observability(
+    service_name: str,
+    service_version: str,
+    *,
+    otlp_endpoint: str | None = None,
+    console_export: bool = False,
+    enabled: bool = True,
+    sample_rate: float = 1.0,
+    app: "FastAPI | None" = None,
+) -> None:
+    """Initialize OpenTelemetry tracing and configure structlog integration.
+
+    This function sets up the global tracer provider with the specified exporter,
+    configures structlog to include trace context in logs, and optionally
+    instruments FastAPI.
+
+    Args:
+        service_name: Name of the service for resource attribution.
+        service_version: Version of the service.
+        otlp_endpoint: OTLP collector endpoint (e.g., "http://localhost:4318").
+            If None and console_export is False, no exporter is configured.
+        console_export: If True, export spans to console (for development).
+        enabled: If False, tracing is completely disabled (no-op provider).
+        sample_rate: Sampling rate between 0.0 and 1.0. Default is 1.0 (all traces).
+        app: Optional FastAPI app instance to instrument.
+    """
+    global _tracer_provider, _initialized
+
+    if _initialized:
+        return
+
+    if not enabled:
+        # Set no-op tracer provider
+        trace.set_tracer_provider(trace.NoOpTracerProvider())
+        _initialized = True
+        return
+
+    # Create resource with service metadata
+    resource = Resource.create(
+        {
+            SERVICE_NAME: service_name,
+            SERVICE_VERSION: service_version,
+        }
+    )
+
+    # Configure sampler
+    sampler = ParentBasedTraceIdRatio(sample_rate)
+
+    # Create tracer provider
+    _tracer_provider = TracerProvider(resource=resource, sampler=sampler)
+
+    # Add exporters
+    if console_export:
+        _tracer_provider.add_span_processor(BatchSpanProcessor(ConsoleSpanExporter()))
+
+    if otlp_endpoint:
+        otlp_exporter = OTLPSpanExporter(endpoint=f"{otlp_endpoint}/v1/traces")
+        _tracer_provider.add_span_processor(BatchSpanProcessor(otlp_exporter))
+
+    # Set as global tracer provider
+    trace.set_tracer_provider(_tracer_provider)
+
+    # Configure structlog with trace context processor
+    _configure_structlog()
+
+    # Instrument FastAPI if app provided
+    if app is not None:
+        FastAPIInstrumentor.instrument_app(app)
+
+    # Instrument httpx for outgoing HTTP calls
+    HTTPXClientInstrumentor().instrument()
+
+    _initialized = True
+
+
+def shutdown_observability() -> None:
+    """Shutdown the tracer provider and flush any pending spans.
+
+    This should be called during application shutdown to ensure all
+    spans are exported before the process exits.
+    """
+    global _tracer_provider, _initialized
+
+    if _tracer_provider is not None:
+        _tracer_provider.shutdown()
+        _tracer_provider = None
+
+    _initialized = False
+
+
+def _configure_structlog() -> None:
+    """Configure structlog with OpenTelemetry trace context injection.
+
+    This adds the trace context processor to the structlog processing chain,
+    ensuring that trace_id and span_id are included in all log events.
+    """
+    structlog.configure(
+        processors=[
+            structlog.contextvars.merge_contextvars,
+            structlog.stdlib.add_log_level,
+            structlog.stdlib.add_logger_name,
+            structlog.stdlib.PositionalArgumentsFormatter(),
+            add_trace_context,  # Inject trace context
+            structlog.processors.TimeStamper(fmt="iso"),
+            structlog.processors.StackInfoRenderer(),
+            structlog.processors.format_exc_info,
+            structlog.processors.UnicodeDecoder(),
+            structlog.stdlib.ProcessorFormatter.wrap_for_formatter,
+        ],
+        wrapper_class=structlog.stdlib.BoundLogger,
+        context_class=dict,
+        logger_factory=structlog.stdlib.LoggerFactory(),
+        cache_logger_on_first_use=True,
+    )
+
+    # Configure standard library logging
+    logging.basicConfig(
+        format="%(message)s",
+        level=logging.INFO,
+    )
diff --git a/src/infrastructure/observability/structlog_processor.py b/src/infrastructure/observability/structlog_processor.py
new file mode 100644
index 0000000..fbbd998
--- /dev/null
+++ b/src/infrastructure/observability/structlog_processor.py
@@ -0,0 +1,33 @@
+"""Structlog processor for OpenTelemetry trace context injection."""
+
+from typing import Any
+
+from opentelemetry import trace
+
+
+def add_trace_context(
+    logger: Any,  # noqa: ARG001
+    method_name: str,  # noqa: ARG001
+    event_dict: dict[str, Any],
+) -> dict[str, Any]:
+    """Structlog processor that adds trace_id and span_id to log events.
+
+    This processor injects OpenTelemetry trace context into every log event,
+    enabling correlation between logs and traces in observability backends.
+
+    Args:
+        logger: The logger instance (unused, required by structlog API).
+        method_name: The log method name (unused, required by structlog API).
+        event_dict: The log event dictionary to enrich.
+
+    Returns:
+        The enriched event dictionary with trace_id and span_id if available.
+    """
+    span = trace.get_current_span()
+    span_context = span.get_span_context()
+
+    if span_context.is_valid:
+        event_dict["trace_id"] = format(span_context.trace_id, "032x")
+        event_dict["span_id"] = format(span_context.span_id, "016x")
+
+    return event_dict
diff --git a/src/infrastructure/observability/tracing.py b/src/infrastructure/observability/tracing.py
new file mode 100644
index 0000000..69561b8
--- /dev/null
+++ b/src/infrastructure/observability/tracing.py
@@ -0,0 +1,157 @@
+"""Tracing utilities for instrumenting application code with OpenTelemetry."""
+
+import asyncio
+from collections.abc import Callable
+from functools import wraps
+from typing import Any, ParamSpec, TypeVar, overload
+
+from opentelemetry import trace
+from opentelemetry.trace import Status, StatusCode, Tracer
+
+P = ParamSpec("P")
+R = TypeVar("R")
+
+
+def get_tracer(name: str) -> Tracer:
+    """Get a tracer for the given module name.
+
+    Args:
+        name: Module name, typically __name__.
+
+    Returns:
+        A Tracer instance for creating spans.
+    """
+    return trace.get_tracer(name)
+
+
+def add_span_attributes(attributes: dict[str, str | int | float | bool]) -> None:
+    """Add attributes to the current span.
+
+    Args:
+        attributes: Key-value pairs to add to the span.
+    """
+    span = trace.get_current_span()
+    if span.is_recording():
+        for key, value in attributes.items():
+            span.set_attribute(key, value)
+
+
+def get_current_trace_id() -> str | None:
+    """Get the current trace ID as a hex string.
+
+    Returns:
+        The trace ID as a 32-character hex string, or None if no active trace.
+    """
+    span = trace.get_current_span()
+    span_context = span.get_span_context()
+    if span_context.is_valid:
+        return format(span_context.trace_id, "032x")
+    return None
+
+
+def get_current_span_id() -> str | None:
+    """Get the current span ID as a hex string.
+
+    Returns:
+        The span ID as a 16-character hex string, or None if no active span.
+    """
+    span = trace.get_current_span()
+    span_context = span.get_span_context()
+    if span_context.is_valid:
+        return format(span_context.span_id, "016x")
+    return None
+
+
+@overload
+def traced(  # noqa: UP047
+    func: Callable[P, R],
+) -> Callable[P, R]: ...
+
+
+@overload
+def traced(
+    func: None = None,
+    *,
+    span_name: str | None = None,
+    attributes: dict[str, str | int | float | bool] | None = None,
+    record_exception: bool = True,
+) -> Callable[[Callable[P, R]], Callable[P, R]]: ...
+
+
+def traced(  # noqa: UP047
+    func: Callable[P, R] | None = None,
+    *,
+    span_name: str | None = None,
+    attributes: dict[str, str | int | float | bool] | None = None,
+    record_exception: bool = True,
+) -> Callable[P, R] | Callable[[Callable[P, R]], Callable[P, R]]:
+    """Decorator to trace a function with OpenTelemetry.
+
+    Works with both sync and async functions. Can be used with or without
+    parentheses.
+
+    Args:
+        func: The function to trace (when used without parentheses).
+        span_name: Name for the span (defaults to function name).
+        attributes: Static attributes to add to the span.
+        record_exception: Whether to record exceptions on the span.
+
+    Returns:
+        The decorated function.
+
+    Examples:
+        @traced
+        def my_function():
+            ...
+
+        @traced(span_name="custom.name", attributes={"key": "value"})
+        async def my_async_function():
+            ...
+    """
+
+    def decorator(fn: Callable[P, R]) -> Callable[P, R]:
+        tracer = get_tracer(fn.__module__)
+        name = span_name or fn.__name__
+
+        if asyncio.iscoroutinefunction(fn):
+
+            @wraps(fn)
+            async def async_wrapper(*args: P.args, **kwargs: P.kwargs) -> Any:
+                with tracer.start_as_current_span(name) as span:
+                    if attributes:
+                        for key, value in attributes.items():
+                            span.set_attribute(key, value)
+                    try:
+                        result = await fn(*args, **kwargs)
+                        span.set_status(Status(StatusCode.OK))
+                        return result
+                    except Exception as e:
+                        if record_exception:
+                            span.record_exception(e)
+                        span.set_status(Status(StatusCode.ERROR, str(e)))
+                        raise
+
+            return async_wrapper  # type: ignore[return-value]
+        else:
+
+            @wraps(fn)
+            def sync_wrapper(*args: P.args, **kwargs: P.kwargs) -> R:
+                with tracer.start_as_current_span(name) as span:
+                    if attributes:
+                        for key, value in attributes.items():
+                            span.set_attribute(key, value)
+                    try:
+                        result = fn(*args, **kwargs)
+                        span.set_status(Status(StatusCode.OK))
+                        return result
+                    except Exception as e:
+                        if record_exception:
+                            span.record_exception(e)
+                        span.set_status(Status(StatusCode.ERROR, str(e)))
+                        raise
+
+            return sync_wrapper
+
+    if func is not None:
+        return decorator(func)
+    return decorator
diff --git a/src/infrastructure/vectordb/chroma.py b/src/infrastructure/vectordb/chroma.py
index b1de007..0e803eb 100644
--- a/src/infrastructure/vectordb/chroma.py
+++ b/src/infrastructure/vectordb/chroma.py
@@ -11,6 +11,7 @@
 import structlog
 from chromadb.config import Settings as ChromaSettings
 
+from src.infrastructure.observability import get_tracer
 from src.infrastructure.vectordb.exceptions import (
     VectorStoreConfigurationError,
     VectorStoreError,
@@ -18,6 +19,7 @@
 from src.infrastructure.vectordb.protocol import DocumentChunk, RetrievalResult
 
 logger = structlog.get_logger()
+tracer = get_tracer(__name__)
 
 
 class ChromaVectorStore:
@@ -98,38 +100,46 @@ async def add_chunks(self, chunks: list[DocumentChunk]) -> None:
         if not chunks:
             return
 
-        try:
-            # ChromaDB expects separate lists for each component
-            ids = [chunk.id for chunk in chunks]
-            documents = [chunk.content for chunk in chunks]
-            embeddings = [chunk.embedding for chunk in chunks]
-            metadatas: list[dict[str, Any]] = [chunk.metadata for chunk in chunks]
-
-            self._collection.add(
-                ids=ids,
-                documents=documents,
-                embeddings=embeddings,  # type: ignore[arg-type]
-                metadatas=metadatas,  # type: ignore[arg-type]
-            )
+        with tracer.start_as_current_span("vectordb.add_chunks") as span:
+            span.set_attribute("vectordb.provider", self.PROVIDER_NAME)
+            span.set_attribute("vectordb.chunk_count", len(chunks))
+
+            try:
+                # ChromaDB expects separate lists for each component
+                ids = [chunk.id for chunk in chunks]
+                documents = [chunk.content for chunk in chunks]
+                embeddings = [chunk.embedding for chunk in chunks]
+                metadatas: list[dict[str, Any]] = [chunk.metadata for chunk in chunks]
+
+                self._collection.add(
+                    ids=ids,
+                    documents=documents,
+                    embeddings=embeddings,
+                    metadatas=metadatas,
+                )
 
-            logger.debug(
-                "chroma_chunks_added",
-                provider=self.PROVIDER_NAME,
-                count=len(chunks),
-                total_count=self._collection.count(),
-            )
+                total_count = self._collection.count()
+                span.set_attribute("vectordb.total_count", total_count)
 
-        except Exception as e:
-            logger.error(
-                "chroma_add_failed",
-                provider=self.PROVIDER_NAME,
-                error=str(e),
-                chunk_count=len(chunks),
-            )
-            raise VectorStoreError(
-                f"Failed to add chunks: {e}",
-                provider=self.PROVIDER_NAME,
-            ) from e
+                logger.debug(
+                    "chroma_chunks_added",
+                    provider=self.PROVIDER_NAME,
+                    count=len(chunks),
+                    total_count=total_count,
+                )
+
+            except Exception as e:
+                span.record_exception(e)
+                logger.error(
+                    "chroma_add_failed",
+                    provider=self.PROVIDER_NAME,
+                    error=str(e),
+                    chunk_count=len(chunks),
+                )
+                raise VectorStoreError(
+                    f"Failed to add chunks: {e}",
+                    provider=self.PROVIDER_NAME,
+                ) from e
 
     async def query(
         self,
@@ -149,70 +159,80 @@ async def query(
         Raises:
             VectorStoreError: If query fails.
         """
-        try:
-            results = self._collection.query(
-                query_embeddings=[embedding],  # type: ignore[arg-type]
-                n_results=min(top_k, self._collection.count()),
-                include=["documents", "metadatas", "distances"],  # type: ignore[list-item]
-            )
+        with tracer.start_as_current_span("vectordb.query") as span:
+            span.set_attribute("vectordb.provider", self.PROVIDER_NAME)
+            span.set_attribute("vectordb.top_k", top_k)
+            span.set_attribute("vectordb.collection_size", self._collection.count())
+
+            try:
+                results = self._collection.query(
+                    query_embeddings=[embedding],
+                    n_results=min(top_k, self._collection.count()),
+                    include=["documents", "metadatas", "distances"],
+                )
 
-            # Convert Chroma results to RetrievalResult objects
-            retrieval_results: list[RetrievalResult] = []
-
-            # Chroma returns lists of lists (one per query), we only have one query
-            # All fields can be None if not included
-            ids_list = results.get("ids") or [[]]
-            ids = ids_list[0] if ids_list else []
-
-            docs_list = results.get("documents") or [[]]
-            documents = docs_list[0] if docs_list else []
-
-            meta_list = results.get("metadatas") or [[]]
-            metadatas = meta_list[0] if meta_list else []
-
-            dist_list = results.get("distances") or [[]]
-            distances = dist_list[0] if dist_list else []
-
-            for i, doc_id in enumerate(ids):
-                # Chroma returns distance; convert to similarity score
-                # For cosine, distance = 1 - similarity
-                distance = distances[i] if i < len(distances) else 0.0
-                score = 1.0 - float(distance)
-
-                # Convert metadata values to strings for our protocol
-                raw_meta = metadatas[i] if i < len(metadatas) else {}
-                str_meta: dict[str, str] = {
-                    k: str(v) for k, v in (raw_meta or {}).items()
-                }
-
-                retrieval_results.append(
-                    RetrievalResult(
-                        id=doc_id,
-                        content=documents[i] if i < len(documents) else "",
-                        metadata=str_meta,
-                        score=score,
+                # Convert Chroma results to RetrievalResult objects
+                retrieval_results: list[RetrievalResult] = []
+
+                # Chroma returns lists of lists (one per query), we only have one query
+                # All fields can be None if not included
+                ids_list = results.get("ids") or [[]]
+                ids = ids_list[0] if ids_list else []
+
+                docs_list = results.get("documents") or [[]]
+                documents = docs_list[0] if docs_list else []
+
+                meta_list = results.get("metadatas") or [[]]
+                metadatas = meta_list[0] if meta_list else []
+
+                dist_list = results.get("distances") or [[]]
+                distances = dist_list[0] if dist_list else []
+
+                for i, doc_id in enumerate(ids):
+                    # Chroma returns distance; convert to similarity score
+                    # For cosine, distance = 1 - similarity
+                    distance = distances[i] if i < len(distances) else 0.0
+                    score = 1.0 - float(distance)
+
+                    # Convert metadata values to strings for our protocol
+                    raw_meta = metadatas[i] if i < len(metadatas) else {}
+                    str_meta: dict[str, str] = {
+                        k: str(v) for k, v in (raw_meta or {}).items()
+                    }
+
+                    retrieval_results.append(
+                        RetrievalResult(
+                            id=doc_id,
+                            content=documents[i] if i < len(documents) else "",
+                            metadata=str_meta,
+                            score=score,
+                        )
                     )
-                )
 
-            logger.debug(
-                "chroma_query_success",
-                provider=self.PROVIDER_NAME,
-                top_k=top_k,
-                results_count=len(retrieval_results),
-            )
+                span.set_attribute("vectordb.results_count", len(retrieval_results))
+                if retrieval_results:
+                    span.set_attribute("vectordb.top_score", retrieval_results[0].score)
 
-            return retrieval_results
+                logger.debug(
+                    "chroma_query_success",
+                    provider=self.PROVIDER_NAME,
+                    top_k=top_k,
+                    results_count=len(retrieval_results),
+                )
 
-        except Exception as e:
-            logger.error(
-                "chroma_query_failed",
-                provider=self.PROVIDER_NAME,
-                error=str(e),
-            )
-            raise VectorStoreError(
-                f"Failed to query: {e}",
-                provider=self.PROVIDER_NAME,
-            ) from e
+                return retrieval_results
+
+            except Exception as e:
+                span.record_exception(e)
+                logger.error(
+                    "chroma_query_failed",
+                    provider=self.PROVIDER_NAME,
+                    error=str(e),
+                )
+                raise VectorStoreError(
+                    f"Failed to query: {e}",
+                    provider=self.PROVIDER_NAME,
+                ) from e
 
     async def clear(self) -> None:
         """Clear all data from the store.
@@ -220,30 +240,35 @@ async def clear(self) -> None:
         Raises:
             VectorStoreError: If clearing fails.
         """
-        try:
-            # Delete and recreate the collection
-            self._client.delete_collection(name=self._collection_name)
-            self._collection = self._client.create_collection(
-                name=self._collection_name,
-                metadata={"hnsw:space": "cosine"},
-            )
+        with tracer.start_as_current_span("vectordb.clear") as span:
+            span.set_attribute("vectordb.provider", self.PROVIDER_NAME)
+            span.set_attribute("vectordb.collection", self._collection_name)
+
+            try:
+                # Delete and recreate the collection
+                self._client.delete_collection(name=self._collection_name)
+                self._collection = self._client.create_collection(
+                    name=self._collection_name,
+                    metadata={"hnsw:space": "cosine"},
+                )
 
-            logger.info(
-                "chroma_cleared",
-                provider=self.PROVIDER_NAME,
-                collection=self._collection_name,
-            )
+                logger.info(
+                    "chroma_cleared",
+                    provider=self.PROVIDER_NAME,
+                    collection=self._collection_name,
+                )
 
-        except Exception as e:
-            logger.error(
-                "chroma_clear_failed",
-                provider=self.PROVIDER_NAME,
-                error=str(e),
-            )
-            raise VectorStoreError(
-                f"Failed to clear collection: {e}",
-                provider=self.PROVIDER_NAME,
-            ) from e
+            except Exception as e:
+                span.record_exception(e)
+                logger.error(
+                    "chroma_clear_failed",
+                    provider=self.PROVIDER_NAME,
+                    error=str(e),
+                )
+                raise VectorStoreError(
+                    f"Failed to clear collection: {e}",
+                    provider=self.PROVIDER_NAME,
+                ) from e
 
     def count(self) -> int:
         """Return the number of chunks in the store."""
diff --git a/src/main.py b/src/main.py
index 93159c3..bc072ae 100644
--- a/src/main.py
+++ b/src/main.py
@@ -13,6 +13,7 @@
 from src.api.rate_limit import limiter, rate_limit_exceeded_handler
 from src.config import get_settings
 from src.infrastructure.database import Database, init_database
+from src.infrastructure.observability import init_observability, shutdown_observability
 from src.modules.auth.repository import UserRepository
 from src.modules.auth.routes import router as auth_api_router
 from src.modules.auth.routes import set_auth_service as set_api_auth_service
@@ -31,10 +32,26 @@
 
 
 @asynccontextmanager
-async def lifespan(_app: FastAPI) -> AsyncGenerator[None]:
+async def lifespan(app: FastAPI) -> AsyncGenerator[None]:
     """Application lifespan handler for startup/shutdown."""
     global _database
 
+    # Initialize observability first (before other services)
+    init_observability(
+        service_name=settings.otel_service_name,
+        service_version=settings.app_version,
+        otlp_endpoint=settings.otel_endpoint,
+        console_export=(settings.otel_exporter == "console"),
+        enabled=settings.otel_enabled,
+        sample_rate=settings.otel_sample_rate,
+        app=app,
+    )
+    logger.info(
+        "observability_initialized",
+        enabled=settings.otel_enabled,
+        exporter=settings.otel_exporter,
+    )
+
     # Initialize database (sets global in connection module)
     db_path = Path(settings.database_path)
     db_path.parent.mkdir(parents=True, exist_ok=True)
@@ -65,6 +82,10 @@ async def lifespan(_app: FastAPI) -> AsyncGenerator[None]:
         await _database.disconnect()
         logger.info("database_disconnected")
 
+    # Shutdown observability (flush pending spans)
+    shutdown_observability()
+    logger.info("observability_shutdown")
+
 
 app = FastAPI(
     title=settings.app_name,
@@ -78,13 +99,13 @@ async def lifespan(_app: FastAPI) -> AsyncGenerator[None]:
 app.state.limiter = limiter
 app.add_exception_handler(
     RateLimitExceeded,
-    rate_limit_exceeded_handler,  # type: ignore[arg-type]
+    rate_limit_exceeded_handler,
 )
 
 # Authentication redirect
 app.add_exception_handler(
     AuthenticationRequired,
-    auth_exception_handler,  # type: ignore[arg-type]
+    auth_exception_handler,
 )
 
 # Static files (optional - only mount if directory exists)
diff --git a/src/modules/rag/retriever.py b/src/modules/rag/retriever.py
index a05030f..735fc2a 100644
--- a/src/modules/rag/retriever.py
+++ b/src/modules/rag/retriever.py
@@ -7,9 +7,11 @@
 from rank_bm25 import BM25Okapi
 
 from src.infrastructure.embeddings import EmbeddingProvider
+from src.infrastructure.observability import get_tracer
 from src.infrastructure.vectordb import RetrievalResult, VectorStore
 
 logger = structlog.get_logger()
+tracer = get_tracer(__name__)
 
 
 @dataclass
@@ -108,50 +110,66 @@ async def retrieve(
         Returns:
             List of retrieval results ordered by combined relevance.
         """
-        # Get semantic results (over-retrieve for better fusion)
-        semantic_k = min(top_k * 2, self._vector_store.count())
-        if semantic_k == 0:
-            return []
+        with tracer.start_as_current_span("retrieval.hybrid") as span:
+            span.set_attribute("retrieval.query_length", len(query))
+            span.set_attribute("retrieval.top_k", top_k)
+
+            # Get semantic results (over-retrieve for better fusion)
+            semantic_k = min(top_k * 2, self._vector_store.count())
+            if semantic_k == 0:
+                span.set_attribute("retrieval.results_count", 0)
+                return []
+
+            with tracer.start_as_current_span("retrieval.semantic"):
+                query_embedding = await self._embeddings.embed(query)
+                semantic_results = await self._vector_store.query(
+                    query_embedding, top_k=semantic_k
+                )
 
-        query_embedding = await self._embeddings.embed(query)
-        semantic_results = await self._vector_store.query(
-            query_embedding, top_k=semantic_k
-        )
+            span.set_attribute("retrieval.semantic_count", len(semantic_results))
 
-        logger.debug(
-            "hybrid_semantic_results",
-            count=len(semantic_results),
-            top_score=semantic_results[0].score if semantic_results else 0,
-        )
+            logger.debug(
+                "hybrid_semantic_results",
+                count=len(semantic_results),
+                top_score=semantic_results[0].score if semantic_results else 0,
+            )
 
-        # Get keyword results
-        keyword_results = self._keyword_search(query, top_k=top_k * 2)
+            # Get keyword results
+            with tracer.start_as_current_span("retrieval.keyword"):
+                keyword_results = self._keyword_search(query, top_k=top_k * 2)
 
-        logger.debug(
-            "hybrid_keyword_results",
-            count=len(keyword_results),
-            top_score=keyword_results[0].score if keyword_results else 0,
-        )
+            span.set_attribute("retrieval.keyword_count", len(keyword_results))
 
-        # Merge results using Reciprocal Rank Fusion
-        merged = self._reciprocal_rank_fusion(
-            semantic_results,
-            keyword_results,
-        )
+            logger.debug(
+                "hybrid_keyword_results",
+                count=len(keyword_results),
+                top_score=keyword_results[0].score if keyword_results else 0,
+            )
 
-        # Return top_k results
-        final_results = merged[:top_k]
+            # Merge results using Reciprocal Rank Fusion
+            merged = self._reciprocal_rank_fusion(
+                semantic_results,
+                keyword_results,
+            )
 
-        logger.info(
-            "hybrid_retrieval_complete",
-            query_length=len(query),
-            semantic_count=len(semantic_results),
-            keyword_count=len(keyword_results),
-            merged_count=len(merged),
-            returned_count=len(final_results),
-        )
+            # Return top_k results
+            final_results = merged[:top_k]
+
+            span.set_attribute("retrieval.merged_count", len(merged))
+            span.set_attribute("retrieval.results_count", len(final_results))
+            if final_results:
+                span.set_attribute("retrieval.top_score", final_results[0].score)
+
+            logger.info(
+                "hybrid_retrieval_complete",
+                query_length=len(query),
+                semantic_count=len(semantic_results),
+                keyword_count=len(keyword_results),
+                merged_count=len(merged),
+                returned_count=len(final_results),
+            )
 
-        return final_results
+            return final_results
 
     def _keyword_search(
         self,
diff --git a/src/modules/rag/service.py b/src/modules/rag/service.py
index 6911d72..21d0ef4 100644
--- a/src/modules/rag/service.py
+++ b/src/modules/rag/service.py
@@ -9,6 +9,7 @@
 from src.infrastructure.cache import SemanticCache
 from src.infrastructure.embeddings import EmbeddingProvider
 from src.infrastructure.llm import LLMProvider
+from src.infrastructure.observability import get_tracer
 from src.infrastructure.safety import (
     ConfidenceScorer,
     SafetyService,
@@ -27,6 +28,7 @@
 from src.modules.rag.schemas import ChunkWithScore, IndexingResult, RAGResponse
 
 logger = structlog.get_logger()
+tracer = get_tracer(__name__)
 
 
 def _serialize_chunks(chunks: list[ChunkWithScore]) -> str:
@@ -135,191 +137,222 @@ async def ask(
         Returns:
             RAGResponse with answer and retrieved chunks.
         """
-        # Check input safety first (if enabled)
-        if self._safety is not None:
-            safety_result = await self._safety.check_input(question)
-            if not safety_result.is_safe:
-                logger.warning(
-                    "rag_input_blocked",
-                    violation_type=safety_result.violation_type.value,
-                    question_length=len(question),
-                )
+        with tracer.start_as_current_span("rag.ask") as span:
+            span.set_attribute("rag.question_length", len(question))
+            span.set_attribute("rag.has_history", conversation_history is not None)
+            span.set_attribute(
+                "rag.history_length",
+                len(conversation_history) if conversation_history else 0,
+            )
+
+            # Check input safety first (if enabled)
+            if self._safety is not None:
+                safety_result = await self._safety.check_input(question)
+                if not safety_result.is_safe:
+                    span.set_attribute("rag.blocked", True)
+                    span.set_attribute(
+                        "rag.blocked_reason", safety_result.violation_type.value
+                    )
+                    logger.warning(
+                        "rag_input_blocked",
+                        violation_type=safety_result.violation_type.value,
+                        question_length=len(question),
+                    )
+                    return RAGResponse(
+                        answer=safety_result.message,
+                        chunks_used=[],
+                        question=question,
+                        confidence_level="low",
+                        confidence_score=0.0,
+                        blocked=True,
+                        blocked_reason=safety_result.violation_type.value,
+                    )
+
+            # Check semantic cache first (if enabled)
+            if self._cache is not None:
+                cached = await self._cache.get(question)
+                if cached is not None:
+                    span.set_attribute("rag.cache_hit", True)
+                    logger.info(
+                        "rag_cache_hit",
+                        question_length=len(question),
+                        similarity=cached.similarity_score,
+                    )
+                    return RAGResponse(
+                        answer=cached.answer,
+                        chunks_used=_deserialize_chunks(cached.chunks_json),
+                        question=question,
+                    )
+
+            span.set_attribute("rag.cache_hit", False)
+
+            # Check if we have any documents indexed
+            if self._store.count() == 0:
+                span.set_attribute("rag.no_documents", True)
+                logger.info("rag_no_documents", question_length=len(question))
+                if conversation_history:
+                    messages = conversation_history.copy()
+                    messages.append({"role": "user", "content": question})
+                    answer = await self._llm.complete_with_history(
+                        system_prompt=FALLBACK_SYSTEM_PROMPT,
+                        messages=messages,
+                    )
+                else:
+                    answer = await self._llm.complete(
+                        system_prompt=FALLBACK_SYSTEM_PROMPT,
+                        user_message=question,
+                    )
+                # Don't cache fallback responses (no context)
                 return RAGResponse(
-                    answer=safety_result.message,
+                    answer=answer,
                     chunks_used=[],
                     question=question,
-                    confidence_level="low",
-                    confidence_score=0.0,
-                    blocked=True,
-                    blocked_reason=safety_result.violation_type.value,
                 )
 
-        # Check semantic cache first (if enabled)
-        if self._cache is not None:
-            cached = await self._cache.get(question)
-            if cached is not None:
-                logger.info(
-                    "rag_cache_hit",
-                    question_length=len(question),
-                    similarity=cached.similarity_score,
+            # Retrieve relevant chunks (hybrid or semantic-only)
+            if self._retriever is not None:
+                # Use hybrid retrieval (semantic + keyword search with RRF)
+                span.set_attribute("rag.retrieval_type", "hybrid")
+                logger.debug(
+                    "rag_hybrid_retrieving",
+                    top_k=self._top_k,
+                    keyword_index_size=self._retriever.get_keyword_index_count(),
                 )
-                return RAGResponse(
-                    answer=cached.answer,
-                    chunks_used=_deserialize_chunks(cached.chunks_json),
-                    question=question,
+                results = await self._retriever.retrieve(question, top_k=self._top_k)
+            else:
+                # Fall back to semantic-only search
+                span.set_attribute("rag.retrieval_type", "semantic")
+                logger.debug("rag_embedding_question", question_length=len(question))
+                query_embedding = await self._embeddings.embed(question)
+                logger.debug("rag_retrieving", top_k=self._top_k)
+                results = await self._store.query(query_embedding, top_k=self._top_k)
+
+            # Convert to ChunkWithScore for response
+            chunks_used = [
+                ChunkWithScore(
+                    content=r.content,
+                    source=r.metadata.get("source", "unknown"),
+                    section=r.metadata.get("section", ""),
+                    score=r.score,
+                    title=r.metadata.get("title", ""),
                 )
+                for r in results
+            ]
+
+            span.set_attribute("rag.chunks_retrieved", len(chunks_used))
+
+            # Build context for LLM
+            context_tuples = [
+                (r.content, r.metadata.get("source", "unknown"), r.score)
+                for r in results
+            ]
+            system_prompt = build_rag_prompt(context_tuples)
+
+            # Generate answer
+            logger.debug(
+                "rag_generating",
+                context_chunks=len(results),
+                question_length=len(question),
+                has_history=conversation_history is not None,
+                history_length=len(conversation_history) if conversation_history else 0,
+            )
 
-        # Check if we have any documents indexed
-        if self._store.count() == 0:
-            logger.info("rag_no_documents", question_length=len(question))
             if conversation_history:
+                # Multi-turn: include conversation history
                 messages = conversation_history.copy()
                 messages.append({"role": "user", "content": question})
                 answer = await self._llm.complete_with_history(
-                    system_prompt=FALLBACK_SYSTEM_PROMPT,
+                    system_prompt=system_prompt,
                     messages=messages,
                 )
             else:
+                # Single-turn: no history
                 answer = await self._llm.complete(
-                    system_prompt=FALLBACK_SYSTEM_PROMPT,
+                    system_prompt=system_prompt,
                     user_message=question,
                 )
-            # Don't cache fallback responses (no context)
-            return RAGResponse(
-                answer=answer,
-                chunks_used=[],
-                question=question,
-            )
 
-        # Retrieve relevant chunks (hybrid or semantic-only)
-        if self._retriever is not None:
-            # Use hybrid retrieval (semantic + keyword search with RRF)
-            logger.debug(
-                "rag_hybrid_retrieving",
-                top_k=self._top_k,
-                keyword_index_size=self._retriever.get_keyword_index_count(),
-            )
-            results = await self._retriever.retrieve(question, top_k=self._top_k)
-        else:
-            # Fall back to semantic-only search
-            logger.debug("rag_embedding_question", question_length=len(question))
-            query_embedding = await self._embeddings.embed(question)
-            logger.debug("rag_retrieving", top_k=self._top_k)
-            results = await self._store.query(query_embedding, top_k=self._top_k)
-
-        # Convert to ChunkWithScore for response
-        chunks_used = [
-            ChunkWithScore(
-                content=r.content,
-                source=r.metadata.get("source", "unknown"),
-                section=r.metadata.get("section", ""),
-                score=r.score,
-                title=r.metadata.get("title", ""),
-            )
-            for r in results
-        ]
+            span.set_attribute("rag.answer_length", len(answer))
 
-        # Build context for LLM
-        context_tuples = [
-            (r.content, r.metadata.get("source", "unknown"), r.score) for r in results
-        ]
-        system_prompt = build_rag_prompt(context_tuples)
-
-        # Generate answer
-        logger.debug(
-            "rag_generating",
-            context_chunks=len(results),
-            question_length=len(question),
-            has_history=conversation_history is not None,
-            history_length=len(conversation_history) if conversation_history else 0,
-        )
+            # Calculate confidence score
+            scores = [c.score for c in chunks_used]
+            chunk_texts = [c.content for c in chunks_used]
+            sources_used = list({c.source for c in chunks_used})
 
-        if conversation_history:
-            # Multi-turn: include conversation history
-            messages = conversation_history.copy()
-            messages.append({"role": "user", "content": question})
-            answer = await self._llm.complete_with_history(
-                system_prompt=system_prompt,
-                messages=messages,
-            )
-        else:
-            # Single-turn: no history
-            answer = await self._llm.complete(
-                system_prompt=system_prompt,
-                user_message=question,
+            # Check for hallucination and get grounding ratio
+            grounding_ratio: float | None = None
+            if self._safety is not None and chunks_used:
+                hallucination_result = self._safety.check_hallucination(
+                    answer=answer,
+                    chunks=chunk_texts,
+                    sources=[c.source for c in chunks_used],
+                )
+                if not hallucination_result.is_safe:
+                    # Hallucination detected - return low confidence response
+                    span.set_attribute("rag.blocked", True)
+                    span.set_attribute(
+                        "rag.blocked_reason", SafetyViolationType.HALLUCINATION.value
+                    )
+                    logger.warning(
+                        "rag_hallucination_detected",
+                        question_length=len(question),
+                    )
+                    return RAGResponse(
+                        answer=hallucination_result.message,
+                        chunks_used=chunks_used,
+                        question=question,
+                        confidence_level="low",
+                        confidence_score=0.0,
+                        blocked=True,
+                        blocked_reason=SafetyViolationType.HALLUCINATION.value,
+                    )
+                # Get grounding ratio for confidence scoring
+                details = self._safety.get_hallucination_details(
+                    answer=answer,
+                    chunks=chunk_texts,
+                )
+                grounding_ratio = details.support_ratio
+
+            # Calculate confidence
+            confidence = self._confidence_scorer.score(
+                chunk_scores=scores,
+                grounding_ratio=grounding_ratio,
             )
 
-        # Calculate confidence score
-        scores = [c.score for c in chunks_used]
-        chunk_texts = [c.content for c in chunks_used]
-        sources_used = list({c.source for c in chunks_used})
+            span.set_attribute("rag.confidence_level", confidence.level.value)
+            span.set_attribute("rag.confidence_score", confidence.score)
+            span.set_attribute("rag.sources_count", len(sources_used))
 
-        # Check for hallucination and get grounding ratio
-        grounding_ratio: float | None = None
-        if self._safety is not None and chunks_used:
-            hallucination_result = self._safety.check_hallucination(
-                answer=answer,
-                chunks=chunk_texts,
-                sources=[c.source for c in chunks_used],
+            # Log quality metrics for observability
+            logger.info(
+                "rag_answer_generated",
+                question_length=len(question),
+                answer_length=len(answer),
+                chunks_used=len(chunks_used),
+                sources_used=sources_used,
+                top_score=max(scores) if scores else 0.0,
+                min_score=min(scores) if scores else 0.0,
+                avg_score=sum(scores) / len(scores) if scores else 0.0,
+                confidence_level=confidence.level.value,
+                confidence_score=confidence.score,
             )
-            if not hallucination_result.is_safe:
-                # Hallucination detected - return low confidence response
-                logger.warning(
-                    "rag_hallucination_detected",
-                    question_length=len(question),
-                )
-                return RAGResponse(
-                    answer=hallucination_result.message,
-                    chunks_used=chunks_used,
+
+            # Store in cache (only high/medium confidence answers with context)
+            if self._cache is not None and chunks_used and not confidence.needs_review:
+                await self._cache.set(
                     question=question,
-                    confidence_level="low",
-                    confidence_score=0.0,
-                    blocked=True,
-                    blocked_reason=SafetyViolationType.HALLUCINATION.value,
+                    answer=answer,
+                    chunks_json=_serialize_chunks(chunks_used),
                 )
-            # Get grounding ratio for confidence scoring
-            details = self._safety.get_hallucination_details(
-                answer=answer,
-                chunks=chunk_texts,
-            )
-            grounding_ratio = details.support_ratio
 
-        # Calculate confidence
-        confidence = self._confidence_scorer.score(
-            chunk_scores=scores,
-            grounding_ratio=grounding_ratio,
-        )
-
-        # Log quality metrics for observability
-        logger.info(
-            "rag_answer_generated",
-            question_length=len(question),
-            answer_length=len(answer),
-            chunks_used=len(chunks_used),
-            sources_used=sources_used,
-            top_score=max(scores) if scores else 0.0,
-            min_score=min(scores) if scores else 0.0,
-            avg_score=sum(scores) / len(scores) if scores else 0.0,
-            confidence_level=confidence.level.value,
-            confidence_score=confidence.score,
-        )
-
-        # Store in cache (only high/medium confidence answers with context)
-        if self._cache is not None and chunks_used and not confidence.needs_review:
-            await self._cache.set(
-                question=question,
+            return RAGResponse(
                 answer=answer,
-                chunks_json=_serialize_chunks(chunks_used),
+                chunks_used=chunks_used,
+                question=question,
+                confidence_level=confidence.level.value,
+                confidence_score=confidence.score,
             )
 
-        return RAGResponse(
-            answer=answer,
-            chunks_used=chunks_used,
-            question=question,
-            confidence_level=confidence.level.value,
-            confidence_score=confidence.score,
-        )
-
     async def index_document(self, file_path: Path) -> IndexingResult:
         """Index a document into the vector store.
 
@@ -329,104 +362,119 @@ async def index_document(self, file_path: Path) -> IndexingResult:
         Returns:
             IndexingResult with status and chunk count.
         """
-        try:
-            # Load the document
-            doc: LoadedDocument = load_document(file_path)
-
-            # Chunk the document
-            chunks = chunk_document(
-                doc.content,
-                source=doc.source,
-                config=self._chunking_config,
-                title=doc.title,
-            )
-
-            if not chunks:
-                return IndexingResult(
+        with tracer.start_as_current_span("rag.index_document") as span:
+            span.set_attribute("rag.file_path", str(file_path))
+            span.set_attribute("rag.file_name", file_path.name)
+
+            try:
+                # Load the document
+                doc: LoadedDocument = load_document(file_path)
+                span.set_attribute("rag.source", doc.source)
+
+                # Chunk the document
+                chunks = chunk_document(
+                    doc.content,
                     source=doc.source,
-                    chunks_created=0,
-                    success=True,
+                    config=self._chunking_config,
+                    title=doc.title,
                 )
 
-            # Generate embeddings for all chunks
-            logger.debug(
-                "rag_embedding_chunks",
-                source=doc.source,
-                chunk_count=len(chunks),
-            )
-            contents = [chunk.content for chunk in chunks]
-            embeddings = await self._embeddings.embed_batch(contents)
-
-            # Create DocumentChunk objects for storage
-            doc_chunks = [
-                DocumentChunk(
-                    id=f"{doc.source}:{chunk.position}:{uuid.uuid4().hex[:8]}",
-                    content=chunk.content,
-                    embedding=embedding,
-                    metadata=chunk.metadata,
-                )
-                for chunk, embedding in zip(chunks, embeddings, strict=True)
-            ]
-
-            # Store in vector DB
-            await self._store.add_chunks(doc_chunks)
-
-            # Track documents for keyword index
-            for doc_chunk in doc_chunks:
-                self._indexed_docs.append(
-                    IndexedDocument(
-                        id=doc_chunk.id,
-                        content=doc_chunk.content,
-                        metadata=doc_chunk.metadata,
+                if not chunks:
+                    span.set_attribute("rag.chunks_created", 0)
+                    return IndexingResult(
+                        source=doc.source,
+                        chunks_created=0,
+                        success=True,
                     )
-                )
 
-            # Rebuild keyword index if hybrid retriever is enabled
-            if self._retriever is not None:
-                self._retriever.build_keyword_index(self._indexed_docs)
+                span.set_attribute("rag.chunks_to_embed", len(chunks))
+
+                # Generate embeddings for all chunks
                 logger.debug(
-                    "rag_keyword_index_rebuilt",
-                    document_count=len(self._indexed_docs),
+                    "rag_embedding_chunks",
+                    source=doc.source,
+                    chunk_count=len(chunks),
                 )
+                contents = [chunk.content for chunk in chunks]
+                embeddings = await self._embeddings.embed_batch(contents)
+
+                # Create DocumentChunk objects for storage
+                doc_chunks = [
+                    DocumentChunk(
+                        id=f"{doc.source}:{chunk.position}:{uuid.uuid4().hex[:8]}",
+                        content=chunk.content,
+                        embedding=embedding,
+                        metadata=chunk.metadata,
+                    )
+                    for chunk, embedding in zip(chunks, embeddings, strict=True)
+                ]
+
+                # Store in vector DB
+                await self._store.add_chunks(doc_chunks)
+
+                # Track documents for keyword index
+                for doc_chunk in doc_chunks:
+                    self._indexed_docs.append(
+                        IndexedDocument(
+                            id=doc_chunk.id,
+                            content=doc_chunk.content,
+                            metadata=doc_chunk.metadata,
+                        )
+                    )
 
-            logger.info(
-                "rag_document_indexed",
-                source=doc.source,
-                chunks_created=len(doc_chunks),
-            )
+                # Rebuild keyword index if hybrid retriever is enabled
+                if self._retriever is not None:
+                    self._retriever.build_keyword_index(self._indexed_docs)
+                    logger.debug(
+                        "rag_keyword_index_rebuilt",
+                        document_count=len(self._indexed_docs),
+                    )
 
-            return IndexingResult(
-                source=doc.source,
-                chunks_created=len(doc_chunks),
-                success=True,
-            )
+                span.set_attribute("rag.chunks_created", len(doc_chunks))
+                span.set_attribute("rag.success", True)
 
-        except DocumentLoadError as e:
-            logger.error(
-                "rag_load_error",
-                file_path=str(file_path),
-                error=str(e),
-            )
-            return IndexingResult(
-                source=file_path.name,
-                chunks_created=0,
-                success=False,
-                error_message=str(e),
-            )
+                logger.info(
+                    "rag_document_indexed",
+                    source=doc.source,
+                    chunks_created=len(doc_chunks),
+                )
 
-        except Exception as e:
-            logger.error(
-                "rag_index_error",
-                file_path=str(file_path),
-                error=str(e),
-                error_type=type(e).__name__,
-            )
-            return IndexingResult(
-                source=file_path.name,
-                chunks_created=0,
-                success=False,
-                error_message=f"Indexing failed: {e}",
-            )
+                return IndexingResult(
+                    source=doc.source,
+                    chunks_created=len(doc_chunks),
+                    success=True,
+                )
+
+            except DocumentLoadError as e:
+                span.record_exception(e)
+                span.set_attribute("rag.success", False)
+                logger.error(
+                    "rag_load_error",
+                    file_path=str(file_path),
+                    error=str(e),
+                )
+                return IndexingResult(
+                    source=file_path.name,
+                    chunks_created=0,
+                    success=False,
+                    error_message=str(e),
+                )
+
+            except Exception as e:
+                span.record_exception(e)
+                span.set_attribute("rag.success", False)
+                logger.error(
+                    "rag_index_error",
+                    file_path=str(file_path),
+                    error=str(e),
+                    error_type=type(e).__name__,
+                )
+                return IndexingResult(
+                    source=file_path.name,
+                    chunks_created=0,
+                    success=False,
+                    error_message=f"Indexing failed: {e}",
+                )
 
     async def index_all_documents(self, documents_path: Path) -> list[IndexingResult]:
         """Index all documents in a directory.
diff --git a/tests/test_observability.py b/tests/test_observability.py
new file mode 100644
index 0000000..80119b8
--- /dev/null
+++ b/tests/test_observability.py
@@ -0,0 +1,236 @@
+"""Tests for the observability module."""
+
+import pytest
+from opentelemetry import trace
+from opentelemetry.sdk.trace import TracerProvider
+from opentelemetry.sdk.trace.export import SimpleSpanProcessor
+from opentelemetry.sdk.trace.export.in_memory_span_exporter import InMemorySpanExporter
+
+from src.infrastructure.observability.structlog_processor import add_trace_context
+from src.infrastructure.observability.tracing import (
+    add_span_attributes,
+    get_current_span_id,
+    get_current_trace_id,
+    get_tracer,
+)
+
+# Module-level setup: configure TracerProvider once before any tests run
+_exporter = InMemorySpanExporter()
+_provider = TracerProvider()
+_processor = SimpleSpanProcessor(_exporter)
+_provider.add_span_processor(_processor)
+trace.set_tracer_provider(_provider)
+
+
+@pytest.fixture(autouse=True)
+def clear_spans():
+    """Clear spans before each test."""
+    _exporter.clear()
+    yield
+    _exporter.clear()
+
+
+def get_finished_spans():
+    """Get finished spans from the module-level exporter."""
+    return _exporter.get_finished_spans()
+
+
+class TestGetTracer:
+    """Tests for get_tracer function."""
+
+    def test_returns_tracer_instance(self):
+        """get_tracer should return a Tracer instance."""
+        tracer = get_tracer("test_module")
+        assert tracer is not None
+        # Create a span to verify tracer works
+        with tracer.start_as_current_span("test_span"):
+            pass
+        spans = get_finished_spans()
+        assert len(spans) == 1
+        assert spans[0].name == "test_span"
+
+
+class TestTracedDecorator:
+    """Tests for @traced decorator.
+
+    Note: The @traced decorator caches its tracer at decoration time,
+    so we test it using get_tracer() directly to verify span creation
+    in the test's TracerProvider context.
+    """
+
+    def test_traced_sync_function(self):
+        """Sync function wrapped in span should create a span."""
+        tracer = get_tracer("test")
+
+        def my_sync_function():
+            return "result"
+
+        # Manually wrap to test the pattern
+        with tracer.start_as_current_span("my_sync_function"):
+            result = my_sync_function()
+
+        assert result == "result"
+        spans = get_finished_spans()
+        assert len(spans) == 1
+        assert spans[0].name == "my_sync_function"
+
+    @pytest.mark.asyncio
+    async def test_traced_async_function(self):
+        """Async function wrapped in span should create a span."""
+        tracer = get_tracer("test")
+
+        async def my_async_function():
+            return "async_result"
+
+        with tracer.start_as_current_span("my_async_function"):
+            result = await my_async_function()
+
+        assert result == "async_result"
+        spans = get_finished_spans()
+        assert len(spans) == 1
+        assert spans[0].name == "my_async_function"
+
+    def test_span_with_custom_name(self):
+        """Span should use custom name when provided."""
+        tracer = get_tracer("test")
+
+        with tracer.start_as_current_span("custom.span.name"):
+            pass
+
+        spans = get_finished_spans()
+        assert len(spans) == 1
+        assert spans[0].name == "custom.span.name"
+
+    def test_span_with_attributes(self):
+        """Span should have attributes set correctly."""
+        tracer = get_tracer("test")
+
+        with tracer.start_as_current_span("test_span") as span:
+            span.set_attribute("key1", "value1")
+            span.set_attribute("key2", 42)
+
+        spans = get_finished_spans()
+        assert len(spans) == 1
+        attrs = dict(spans[0].attributes or {})
+        assert attrs.get("key1") == "value1"
+        assert attrs.get("key2") == 42
+
+    def test_span_records_exception(self):
+        """Span should record exceptions with error status."""
+        tracer = get_tracer("test")
+
+        with pytest.raises(ValueError, match="test error"):  # noqa: SIM117
+            with tracer.start_as_current_span("failing_function") as span:
+                try:
+                    raise ValueError("test error")
+                except ValueError as e:
+                    span.record_exception(e)
+                    span.set_status(trace.Status(trace.StatusCode.ERROR, str(e)))
+                    raise
+
+        spans = get_finished_spans()
+        assert len(spans) == 1
+        assert spans[0].status.status_code == trace.StatusCode.ERROR
+
+
+class TestAddSpanAttributes:
+    """Tests for add_span_attributes function."""
+
+    def test_adds_attributes_to_current_span(self):
+        """add_span_attributes should add attributes to current span."""
+        tracer = get_tracer("test")
+
+        with tracer.start_as_current_span("test_span"):
+            add_span_attributes({"custom_key": "custom_value", "number": 100})
+
+        spans = get_finished_spans()
+        assert len(spans) == 1
+        attrs = dict(spans[0].attributes or {})
+        assert attrs.get("custom_key") == "custom_value"
+        assert attrs.get("number") == 100
+
+    def test_does_nothing_without_active_span(self):
+        """add_span_attributes should not fail without active span."""
+        # Should not raise
+        add_span_attributes({"key": "value"})
+
+
+class TestGetCurrentTraceId:
+    """Tests for get_current_trace_id function."""
+
+    def test_returns_trace_id_when_in_span(self):
+        """get_current_trace_id should return trace ID in active span."""
+        tracer = get_tracer("test")
+
+        with tracer.start_as_current_span("test_span"):
+            trace_id = get_current_trace_id()
+            assert trace_id is not None
+            assert len(trace_id) == 32  # 128-bit trace ID as hex
+
+    def test_returns_none_without_span(self):
+        """get_current_trace_id should return None without active span."""
+        # Outside any span, the context is invalid
+        # Result depends on whether there's lingering context
+        _ = get_current_trace_id()
+
+
+class TestGetCurrentSpanId:
+    """Tests for get_current_span_id function."""
+
+    def test_returns_span_id_when_in_span(self):
+        """get_current_span_id should return span ID in active span."""
+        tracer = get_tracer("test")
+
+        with tracer.start_as_current_span("test_span"):
+            span_id = get_current_span_id()
+            assert span_id is not None
+            assert len(span_id) == 16  # 64-bit span ID as hex
+
+
+class TestStructlogProcessor:
+    """Tests for structlog trace context processor."""
+
+    def test_adds_trace_context_when_in_span(self):
+        """Processor should add trace_id and span_id to event dict."""
+        tracer = get_tracer("test")
+
+        with tracer.start_as_current_span("test_span"):
+            event_dict: dict = {"event": "test_event"}
+            result = add_trace_context(None, "info", event_dict)
+
+            assert "trace_id" in result
+            assert "span_id" in result
+            assert len(result["trace_id"]) == 32
+            assert len(result["span_id"]) == 16
+
+    def test_does_not_add_context_without_span(self):
+        """Processor should not add trace context without active span."""
+        event_dict: dict = {"event": "test_event"}
+        result = add_trace_context(None, "info", event_dict)
+
+        # May or may not have trace_id depending on provider state
+        # The important thing is it doesn't crash
+        assert "event" in result
+
+
+class TestNestedSpans:
+    """Tests for nested span relationships."""
+
+    def test_nested_spans_have_parent_child_relationship(self):
+        """Nested spans should maintain parent-child relationship."""
+        tracer = get_tracer("test")
+
+        with tracer.start_as_current_span("parent"):  # noqa: SIM117
+            with tracer.start_as_current_span("child"):  # Nested to test parent-child
+                pass
+
+        spans = get_finished_spans()
+        assert len(spans) == 2
+
+        # Find parent and child
+        parent = next(s for s in spans if s.name == "parent")
+        child = next(s for s in spans if s.name == "child")
+
+        # Child should reference parent
+        assert child.parent is not None
+        assert child.parent.span_id == parent.context.span_id
diff --git a/uv.lock b/uv.lock
index 154716c..ac1a0bf 100644
--- a/uv.lock
+++ b/uv.lock
@@ -683,14 +683,14 @@ wheels = [
 
 [[package]]
 name = "importlib-metadata"
-version = "8.7.0"
+version = "8.5.0"
 source = { registry = "https://pypi.org/simple" }
 dependencies = [
     { name = "zipp" },
 ]
-sdist = { url = "https://files.pythonhosted.org/packages/76/66/650a33bd90f786193e4de4b3ad86ea60b53c89b669a5c7be931fac31cdb0/importlib_metadata-8.7.0.tar.gz", hash = "sha256:d13b81ad223b890aa16c5471f2ac3056cf76c5f10f82d6f9292f0b415f389000", size = 56641, upload-time = "2025-04-27T15:29:01.736Z" }
+sdist = { url = "https://files.pythonhosted.org/packages/cd/12/33e59336dca5be0c398a7482335911a33aa0e20776128f038019f1a95f1b/importlib_metadata-8.5.0.tar.gz", hash = "sha256:71522656f0abace1d072b9e5481a48f07c138e00f079c38c8f883823f9c26bd7", size = 55304, upload-time = "2024-09-11T14:56:08.937Z" }
 wheels = [
-    { url = "https://files.pythonhosted.org/packages/20/b0/36bd937216ec521246249be3bf9855081de4c5e06a0c9b4219dbeda50373/importlib_metadata-8.7.0-py3-none-any.whl", hash = "sha256:e5dd1551894c77868a30651cef00984d50e1002d06942a7101d34870c5f02afd", size = 27656, upload-time = "2025-04-27T15:29:00.214Z" },
+    { url = "https://files.pythonhosted.org/packages/a0/d9/a1e041c5e7caa9a05c925f4bdbdfb7f006d1f74996af53467bc394c97be7/importlib_metadata-8.5.0-py3-none-any.whl", hash = "sha256:45e54197d28b7a7f1559e60b95e7c567032b602131fbd588f1497f47880aa68b", size = 26514, upload-time = "2024-09-11T14:56:07.019Z" },
 ]
 
 [[package]]
@@ -1192,50 +1192,68 @@ wheels = [
 
 [[package]]
 name = "opentelemetry-api"
-version = "1.39.1"
+version = "1.29.0"
 source = { registry = "https://pypi.org/simple" }
 dependencies = [
+    { name = "deprecated" },
     { name = "importlib-metadata" },
-    { name = "typing-extensions" },
 ]
-sdist = { url = "https://files.pythonhosted.org/packages/97/b9/3161be15bb8e3ad01be8be5a968a9237c3027c5be504362ff800fca3e442/opentelemetry_api-1.39.1.tar.gz", hash = "sha256:fbde8c80e1b937a2c61f20347e91c0c18a1940cecf012d62e65a7caf08967c9c", size = 65767, upload-time = "2025-12-11T13:32:39.182Z" }
+sdist = { url = "https://files.pythonhosted.org/packages/bc/8e/b886a5e9861afa188d1fe671fb96ff9a1d90a23d57799331e137cc95d573/opentelemetry_api-1.29.0.tar.gz", hash = "sha256:d04a6cf78aad09614f52964ecb38021e248f5714dc32c2e0d8fd99517b4d69cf", size = 62900, upload-time = "2024-12-11T17:02:23.275Z" }
 wheels = [
-    { url = "https://files.pythonhosted.org/packages/cf/df/d3f1ddf4bb4cb50ed9b1139cc7b1c54c34a1e7ce8fd1b9a37c0d1551a6bd/opentelemetry_api-1.39.1-py3-none-any.whl", hash = "sha256:2edd8463432a7f8443edce90972169b195e7d6a05500cd29e6d13898187c9950", size = 66356, upload-time = "2025-12-11T13:32:17.304Z" },
+    { url = "https://files.pythonhosted.org/packages/43/53/5249ea860d417a26a3a6f1bdedfc0748c4f081a3adaec3d398bc0f7c6a71/opentelemetry_api-1.29.0-py3-none-any.whl", hash = "sha256:5fcd94c4141cc49c736271f3e1efb777bebe9cc535759c54c936cca4f1b312b8", size = 64304, upload-time = "2024-12-11T17:01:48.691Z" },
 ]
 
 [[package]]
 name = "opentelemetry-exporter-otlp-proto-common"
-version = "1.39.1"
+version = "1.29.0"
 source = { registry = "https://pypi.org/simple" }
 dependencies = [
     { name = "opentelemetry-proto" },
 ]
-sdist = { url = "https://files.pythonhosted.org/packages/e9/9d/22d241b66f7bbde88a3bfa6847a351d2c46b84de23e71222c6aae25c7050/opentelemetry_exporter_otlp_proto_common-1.39.1.tar.gz", hash = "sha256:763370d4737a59741c89a67b50f9e39271639ee4afc999dadfe768541c027464", size = 20409, upload-time = "2025-12-11T13:32:40.885Z" }
+sdist = { url = "https://files.pythonhosted.org/packages/b1/58/f7fd7eaf592b2521999a4271ab3ce1c82fe37fe9b0dc25c348398d95d66a/opentelemetry_exporter_otlp_proto_common-1.29.0.tar.gz", hash = "sha256:e7c39b5dbd1b78fe199e40ddfe477e6983cb61aa74ba836df09c3869a3e3e163", size = 19133, upload-time = "2024-12-11T17:02:27.092Z" }
 wheels = [
-    { url = "https://files.pythonhosted.org/packages/8c/02/ffc3e143d89a27ac21fd557365b98bd0653b98de8a101151d5805b5d4c33/opentelemetry_exporter_otlp_proto_common-1.39.1-py3-none-any.whl", hash = "sha256:08f8a5862d64cc3435105686d0216c1365dc5701f86844a8cd56597d0c764fde", size = 18366, upload-time = "2025-12-11T13:32:20.2Z" },
+    { url = "https://files.pythonhosted.org/packages/9e/75/7609bda3d72bf307839570b226180513e854c01443ebe265ed732a4980fc/opentelemetry_exporter_otlp_proto_common-1.29.0-py3-none-any.whl", hash = "sha256:a9d7376c06b4da9cf350677bcddb9618ed4b8255c3f6476975f5e38274ecd3aa", size = 18459, upload-time = "2024-12-11T17:01:54.817Z" },
 ]
 
 [[package]]
 name = "opentelemetry-exporter-otlp-proto-grpc"
-version = "1.39.1"
+version = "1.29.0"
 source = { registry = "https://pypi.org/simple" }
 dependencies = [
+    { name = "deprecated" },
     { name = "googleapis-common-protos" },
     { name = "grpcio" },
     { name = "opentelemetry-api" },
     { name = "opentelemetry-exporter-otlp-proto-common" },
     { name = "opentelemetry-proto" },
     { name = "opentelemetry-sdk" },
-    { name = "typing-extensions" },
 ]
-sdist = { url = "https://files.pythonhosted.org/packages/53/48/b329fed2c610c2c32c9366d9dc597202c9d1e58e631c137ba15248d8850f/opentelemetry_exporter_otlp_proto_grpc-1.39.1.tar.gz", hash = "sha256:772eb1c9287485d625e4dbe9c879898e5253fea111d9181140f51291b5fec3ad", size = 24650, upload-time = "2025-12-11T13:32:41.429Z" }
+sdist = { url = "https://files.pythonhosted.org/packages/41/aa/b3f2190613141f35fe15145bf438334fdd1eac8aeeee4f7ecbc887999443/opentelemetry_exporter_otlp_proto_grpc-1.29.0.tar.gz", hash = "sha256:3d324d07d64574d72ed178698de3d717f62a059a93b6b7685ee3e303384e73ea", size = 26224, upload-time = "2024-12-11T17:02:28.911Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/f2/de/4b4127a25d1594851d99032f3a9acb09cb512d11edec713410fb906607f4/opentelemetry_exporter_otlp_proto_grpc-1.29.0-py3-none-any.whl", hash = "sha256:5a2a3a741a2543ed162676cf3eefc2b4150e6f4f0a193187afb0d0e65039c69c", size = 18520, upload-time = "2024-12-11T17:01:57.001Z" },
+]
+
+[[package]]
+name = "opentelemetry-exporter-otlp-proto-http"
+version = "1.29.0"
+source = { registry = "https://pypi.org/simple" }
+dependencies = [
+    { name = "deprecated" },
+    { name = "googleapis-common-protos" },
+    { name = "opentelemetry-api" },
+    { name = "opentelemetry-exporter-otlp-proto-common" },
+    { name = "opentelemetry-proto" },
+    { name = "opentelemetry-sdk" },
+    { name = "requests" },
+]
+sdist = { url = "https://files.pythonhosted.org/packages/ab/88/e70a2e9fbb1bddb1ab7b6d74fb02c68601bff5948292ce33464c84ee082e/opentelemetry_exporter_otlp_proto_http-1.29.0.tar.gz", hash = "sha256:b10d174e3189716f49d386d66361fbcf6f2b9ad81e05404acdee3f65c8214204", size = 15041, upload-time = "2024-12-11T17:02:30.568Z" }
 wheels = [
-    { url = "https://files.pythonhosted.org/packages/81/a3/cc9b66575bd6597b98b886a2067eea2693408d2d5f39dad9ab7fc264f5f3/opentelemetry_exporter_otlp_proto_grpc-1.39.1-py3-none-any.whl", hash = "sha256:fa1c136a05c7e9b4c09f739469cbdb927ea20b34088ab1d959a849b5cc589c18", size = 19766, upload-time = "2025-12-11T13:32:21.027Z" },
+    { url = "https://files.pythonhosted.org/packages/31/49/a1c3d24e8fe73b5f422e21b46c24aed3db7fd9427371c06442e7bdfe4d3b/opentelemetry_exporter_otlp_proto_http-1.29.0-py3-none-any.whl", hash = "sha256:b228bdc0f0cfab82eeea834a7f0ffdd2a258b26aa33d89fb426c29e8e934d9d0", size = 17217, upload-time = "2024-12-11T17:01:58.229Z" },
 ]
 
 [[package]]
 name = "opentelemetry-instrumentation"
-version = "0.60b1"
+version = "0.50b0"
 source = { registry = "https://pypi.org/simple" }
 dependencies = [
     { name = "opentelemetry-api" },
@@ -1243,14 +1261,14 @@ dependencies = [
     { name = "packaging" },
     { name = "wrapt" },
 ]
-sdist = { url = "https://files.pythonhosted.org/packages/41/0f/7e6b713ac117c1f5e4e3300748af699b9902a2e5e34c9cf443dde25a01fa/opentelemetry_instrumentation-0.60b1.tar.gz", hash = "sha256:57ddc7974c6eb35865af0426d1a17132b88b2ed8586897fee187fd5b8944bd6a", size = 31706, upload-time = "2025-12-11T13:36:42.515Z" }
+sdist = { url = "https://files.pythonhosted.org/packages/79/2e/2e59a7cb636dc394bd7cf1758ada5e8ed87590458ca6bb2f9c26e0243847/opentelemetry_instrumentation-0.50b0.tar.gz", hash = "sha256:7d98af72de8dec5323e5202e46122e5f908592b22c6d24733aad619f07d82979", size = 26539, upload-time = "2024-12-11T17:05:18.336Z" }
 wheels = [
-    { url = "https://files.pythonhosted.org/packages/77/d2/6788e83c5c86a2690101681aeef27eeb2a6bf22df52d3f263a22cee20915/opentelemetry_instrumentation-0.60b1-py3-none-any.whl", hash = "sha256:04480db952b48fb1ed0073f822f0ee26012b7be7c3eac1a3793122737c78632d", size = 33096, upload-time = "2025-12-11T13:35:33.067Z" },
+    { url = "https://files.pythonhosted.org/packages/ff/b1/55a77152a83ec8998e520a3a575f44af1020cfe4bdc000b7538583293b85/opentelemetry_instrumentation-0.50b0-py3-none-any.whl", hash = "sha256:b8f9fc8812de36e1c6dffa5bfc6224df258841fb387b6dfe5df15099daa10630", size = 30728, upload-time = "2024-12-11T17:03:54.948Z" },
 ]
 
 [[package]]
 name = "opentelemetry-instrumentation-asgi"
-version = "0.60b1"
+version = "0.50b0"
 source = { registry = "https://pypi.org/simple" }
 dependencies = [
     { name = "asgiref" },
@@ -1259,14 +1277,14 @@ dependencies = [
     { name = "opentelemetry-semantic-conventions" },
     { name = "opentelemetry-util-http" },
 ]
-sdist = { url = "https://files.pythonhosted.org/packages/77/db/851fa88db7441da82d50bd80f2de5ee55213782e25dc858e04d0c9961d60/opentelemetry_instrumentation_asgi-0.60b1.tar.gz", hash = "sha256:16bfbe595cd24cda309a957456d0fc2523f41bc7b076d1f2d7e98a1ad9876d6f", size = 26107, upload-time = "2025-12-11T13:36:47.015Z" }
+sdist = { url = "https://files.pythonhosted.org/packages/49/cc/a7b2fd243c6d2621803092eba62e450071b6752dfe4f64f530bbfd91a328/opentelemetry_instrumentation_asgi-0.50b0.tar.gz", hash = "sha256:3ca4cb5616ae6a3e8ce86e7d5c360a8d8cc8ed722cf3dc8a5e44300774e87d49", size = 24105, upload-time = "2024-12-11T17:05:23.773Z" }
 wheels = [
-    { url = "https://files.pythonhosted.org/packages/76/76/1fb94367cef64420d2171157a6b9509582873bd09a6afe08a78a8d1f59d9/opentelemetry_instrumentation_asgi-0.60b1-py3-none-any.whl", hash = "sha256:d48def2dbed10294c99cfcf41ebbd0c414d390a11773a41f472d20000fcddc25", size = 16933, upload-time = "2025-12-11T13:35:40.462Z" },
+    { url = "https://files.pythonhosted.org/packages/d2/81/0899c6b56b1023835f266d909250d439174afa0c34ed5944c5021d3da263/opentelemetry_instrumentation_asgi-0.50b0-py3-none-any.whl", hash = "sha256:2ba1297f746e55dec5a17fe825689da0613662fb25c004c3965a6c54b1d5be22", size = 16304, upload-time = "2024-12-11T17:04:03.555Z" },
 ]
 
 [[package]]
 name = "opentelemetry-instrumentation-fastapi"
-version = "0.60b1"
+version = "0.50b0"
 source = { registry = "https://pypi.org/simple" }
 dependencies = [
     { name = "opentelemetry-api" },
@@ -1275,57 +1293,73 @@ dependencies = [
     { name = "opentelemetry-semantic-conventions" },
     { name = "opentelemetry-util-http" },
 ]
-sdist = { url = "https://files.pythonhosted.org/packages/9c/e7/e7e5e50218cf488377209d85666b182fa2d4928bf52389411ceeee1b2b60/opentelemetry_instrumentation_fastapi-0.60b1.tar.gz", hash = "sha256:de608955f7ff8eecf35d056578346a5365015fd7d8623df9b1f08d1c74769c01", size = 24958, upload-time = "2025-12-11T13:36:59.35Z" }
+sdist = { url = "https://files.pythonhosted.org/packages/8d/f8/1917b0b3e414e23c7d71c9a33f0ce020f94bc47d22a30f54ace704e07588/opentelemetry_instrumentation_fastapi-0.50b0.tar.gz", hash = "sha256:16b9181682136da210295def2bb304a32fb9bdee9a935cdc9da43567f7c1149e", size = 19214, upload-time = "2024-12-11T17:05:42.062Z" }
 wheels = [
-    { url = "https://files.pythonhosted.org/packages/7d/cc/6e808328ba54662e50babdcab21138eae4250bc0fddf67d55526a615a2ca/opentelemetry_instrumentation_fastapi-0.60b1-py3-none-any.whl", hash = "sha256:af94b7a239ad1085fc3a820ecf069f67f579d7faf4c085aaa7bd9b64eafc8eaf", size = 13478, upload-time = "2025-12-11T13:36:00.811Z" },
+    { url = "https://files.pythonhosted.org/packages/cb/d6/37784bb30b213e2dd6838b9f96c2940907022c1b75ef1ff18a99afe42433/opentelemetry_instrumentation_fastapi-0.50b0-py3-none-any.whl", hash = "sha256:8f03b738495e4705fbae51a2826389c7369629dace89d0f291c06ffefdff5e52", size = 12079, upload-time = "2024-12-11T17:04:26.15Z" },
+]
+
+[[package]]
+name = "opentelemetry-instrumentation-httpx"
+version = "0.50b0"
+source = { registry = "https://pypi.org/simple" }
+dependencies = [
+    { name = "opentelemetry-api" },
+    { name = "opentelemetry-instrumentation" },
+    { name = "opentelemetry-semantic-conventions" },
+    { name = "opentelemetry-util-http" },
+    { name = "wrapt" },
+]
+sdist = { url = "https://files.pythonhosted.org/packages/d5/9c/4a6b0af28d579690fbab7ffd0560802e4384cd56c8d637f99641f44a7291/opentelemetry_instrumentation_httpx-0.50b0.tar.gz", hash = "sha256:0072d1d39552449c08a45a7a0db0cd6af32c85205bd97267b2a272fc56a9b438", size = 17611, upload-time = "2024-12-11T17:05:44.377Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/03/25/6a0edd3d161d5a15cf4538e32a1289f1f6fd7a52651afd32bb856ede4e99/opentelemetry_instrumentation_httpx-0.50b0-py3-none-any.whl", hash = "sha256:27acd41a9e70384d0978d58f492e5c16fc7a1b2363d5992b5bd0a27a3df7b68e", size = 13838, upload-time = "2024-12-11T17:04:31.053Z" },
 ]
 
 [[package]]
 name = "opentelemetry-proto"
-version = "1.39.1"
+version = "1.29.0"
 source = { registry = "https://pypi.org/simple" }
 dependencies = [
     { name = "protobuf" },
 ]
-sdist = { url = "https://files.pythonhosted.org/packages/49/1d/f25d76d8260c156c40c97c9ed4511ec0f9ce353f8108ca6e7561f82a06b2/opentelemetry_proto-1.39.1.tar.gz", hash = "sha256:6c8e05144fc0d3ed4d22c2289c6b126e03bcd0e6a7da0f16cedd2e1c2772e2c8", size = 46152, upload-time = "2025-12-11T13:32:48.681Z" }
+sdist = { url = "https://files.pythonhosted.org/packages/80/52/fd3b3d79e1b00ad2dcac92db6885e49bedbf7a6828647954e4952d653132/opentelemetry_proto-1.29.0.tar.gz", hash = "sha256:3c136aa293782e9b44978c738fff72877a4b78b5d21a64e879898db7b2d93e5d", size = 34320, upload-time = "2024-12-11T17:02:44.709Z" }
 wheels = [
-    { url = "https://files.pythonhosted.org/packages/51/95/b40c96a7b5203005a0b03d8ce8cd212ff23f1793d5ba289c87a097571b18/opentelemetry_proto-1.39.1-py3-none-any.whl", hash = "sha256:22cdc78efd3b3765d09e68bfbd010d4fc254c9818afd0b6b423387d9dee46007", size = 72535, upload-time = "2025-12-11T13:32:33.866Z" },
+    { url = "https://files.pythonhosted.org/packages/bd/66/a500e38ee322d89fce61c74bd7769c8ef3bebc6c2f43fda5f3fc3441286d/opentelemetry_proto-1.29.0-py3-none-any.whl", hash = "sha256:495069c6f5495cbf732501cdcd3b7f60fda2b9d3d4255706ca99b7ca8dec53ff", size = 55818, upload-time = "2024-12-11T17:02:14.03Z" },
 ]
 
 [[package]]
 name = "opentelemetry-sdk"
-version = "1.39.1"
+version = "1.29.0"
 source = { registry = "https://pypi.org/simple" }
 dependencies = [
     { name = "opentelemetry-api" },
     { name = "opentelemetry-semantic-conventions" },
     { name = "typing-extensions" },
 ]
-sdist = { url = "https://files.pythonhosted.org/packages/eb/fb/c76080c9ba07e1e8235d24cdcc4d125ef7aa3edf23eb4e497c2e50889adc/opentelemetry_sdk-1.39.1.tar.gz", hash = "sha256:cf4d4563caf7bff906c9f7967e2be22d0d6b349b908be0d90fb21c8e9c995cc6", size = 171460, upload-time = "2025-12-11T13:32:49.369Z" }
+sdist = { url = "https://files.pythonhosted.org/packages/0c/5a/1ed4c3cf6c09f80565fc085f7e8efa0c222712fd2a9412d07424705dcf72/opentelemetry_sdk-1.29.0.tar.gz", hash = "sha256:b0787ce6aade6ab84315302e72bd7a7f2f014b0fb1b7c3295b88afe014ed0643", size = 157229, upload-time = "2024-12-11T17:02:47.024Z" }
 wheels = [
-    { url = "https://files.pythonhosted.org/packages/7c/98/e91cf858f203d86f4eccdf763dcf01cf03f1dae80c3750f7e635bfa206b6/opentelemetry_sdk-1.39.1-py3-none-any.whl", hash = "sha256:4d5482c478513ecb0a5d938dcc61394e647066e0cc2676bee9f3af3f3f45f01c", size = 132565, upload-time = "2025-12-11T13:32:35.069Z" },
+    { url = "https://files.pythonhosted.org/packages/d1/1d/512b86af21795fb463726665e2f61db77d384e8779fdcf4cb0ceec47866d/opentelemetry_sdk-1.29.0-py3-none-any.whl", hash = "sha256:173be3b5d3f8f7d671f20ea37056710217959e774e2749d984355d1f9391a30a", size = 118078, upload-time = "2024-12-11T17:02:16.748Z" },
 ]
 
 [[package]]
 name = "opentelemetry-semantic-conventions"
-version = "0.60b1"
+version = "0.50b0"
 source = { registry = "https://pypi.org/simple" }
 dependencies = [
+    { name = "deprecated" },
     { name = "opentelemetry-api" },
-    { name = "typing-extensions" },
 ]
-sdist = { url = "https://files.pythonhosted.org/packages/91/df/553f93ed38bf22f4b999d9be9c185adb558982214f33eae539d3b5cd0858/opentelemetry_semantic_conventions-0.60b1.tar.gz", hash = "sha256:87c228b5a0669b748c76d76df6c364c369c28f1c465e50f661e39737e84bc953", size = 137935, upload-time = "2025-12-11T13:32:50.487Z" }
+sdist = { url = "https://files.pythonhosted.org/packages/e7/4e/d7c7c91ff47cd96fe4095dd7231701aec7347426fd66872ff320d6cd1fcc/opentelemetry_semantic_conventions-0.50b0.tar.gz", hash = "sha256:02dc6dbcb62f082de9b877ff19a3f1ffaa3c306300fa53bfac761c4567c83d38", size = 100459, upload-time = "2024-12-11T17:02:48.115Z" }
 wheels = [
-    { url = "https://files.pythonhosted.org/packages/7a/5e/5958555e09635d09b75de3c4f8b9cae7335ca545d77392ffe7331534c402/opentelemetry_semantic_conventions-0.60b1-py3-none-any.whl", hash = "sha256:9fa8c8b0c110da289809292b0591220d3a7b53c1526a23021e977d68597893fb", size = 219982, upload-time = "2025-12-11T13:32:36.955Z" },
+    { url = "https://files.pythonhosted.org/packages/da/fb/dc15fad105450a015e913cfa4f5c27b6a5f1bea8fb649f8cae11e699c8af/opentelemetry_semantic_conventions-0.50b0-py3-none-any.whl", hash = "sha256:e87efba8fdb67fb38113efea6a349531e75ed7ffc01562f65b802fcecb5e115e", size = 166602, upload-time = "2024-12-11T17:02:19.504Z" },
 ]
 
 [[package]]
 name = "opentelemetry-util-http"
-version = "0.60b1"
+version = "0.50b0"
 source = { registry = "https://pypi.org/simple" }
-sdist = { url = "https://files.pythonhosted.org/packages/50/fc/c47bb04a1d8a941a4061307e1eddfa331ed4d0ab13d8a9781e6db256940a/opentelemetry_util_http-0.60b1.tar.gz", hash = "sha256:0d97152ca8c8a41ced7172d29d3622a219317f74ae6bb3027cfbdcf22c3cc0d6", size = 11053, upload-time = "2025-12-11T13:37:25.115Z" }
+sdist = { url = "https://files.pythonhosted.org/packages/69/10/ce3f0d1157cedbd819194f0b27a6bbb7c19a8bceb3941e4a4775014076cf/opentelemetry_util_http-0.50b0.tar.gz", hash = "sha256:dc4606027e1bc02aabb9533cc330dd43f874fca492e4175c31d7154f341754af", size = 7859, upload-time = "2024-12-11T17:06:14.206Z" }
 wheels = [
-    { url = "https://files.pythonhosted.org/packages/16/5c/d3f1733665f7cd582ef0842fb1d2ed0bc1fba10875160593342d22bba375/opentelemetry_util_http-0.60b1-py3-none-any.whl", hash = "sha256:66381ba28550c91bee14dcba8979ace443444af1ed609226634596b4b0faf199", size = 8947, upload-time = "2025-12-11T13:36:37.151Z" },
+    { url = "https://files.pythonhosted.org/packages/64/8a/9e1b54f50d1fddebbeac9a9b0632f8db6ece7add904fb593ee2e268ee4de/opentelemetry_util_http-0.50b0-py3-none-any.whl", hash = "sha256:21f8aedac861ffa3b850f8c0a6c373026189eb8630ac6e14a2bf8c55695cc090", size = 6942, upload-time = "2024-12-11T17:05:13.342Z" },
 ]
 
 [[package]]
@@ -1494,17 +1528,16 @@ wheels = [
 
 [[package]]
 name = "protobuf"
-version = "6.33.2"
+version = "5.29.5"
 source = { registry = "https://pypi.org/simple" }
-sdist = { url = "https://files.pythonhosted.org/packages/34/44/e49ecff446afeec9d1a66d6bbf9adc21e3c7cea7803a920ca3773379d4f6/protobuf-6.33.2.tar.gz", hash = "sha256:56dc370c91fbb8ac85bc13582c9e373569668a290aa2e66a590c2a0d35ddb9e4", size = 444296, upload-time = "2025-12-06T00:17:53.311Z" }
+sdist = { url = "https://files.pythonhosted.org/packages/43/29/d09e70352e4e88c9c7a198d5645d7277811448d76c23b00345670f7c8a38/protobuf-5.29.5.tar.gz", hash = "sha256:bc1463bafd4b0929216c35f437a8e28731a2b7fe3d98bb77a600efced5a15c84", size = 425226, upload-time = "2025-05-28T23:51:59.82Z" }
 wheels = [
-    { url = "https://files.pythonhosted.org/packages/bc/91/1e3a34881a88697a7354ffd177e8746e97a722e5e8db101544b47e84afb1/protobuf-6.33.2-cp310-abi3-win32.whl", hash = "sha256:87eb388bd2d0f78febd8f4c8779c79247b26a5befad525008e49a6955787ff3d", size = 425603, upload-time = "2025-12-06T00:17:41.114Z" },
-    { url = "https://files.pythonhosted.org/packages/64/20/4d50191997e917ae13ad0a235c8b42d8c1ab9c3e6fd455ca16d416944355/protobuf-6.33.2-cp310-abi3-win_amd64.whl", hash = "sha256:fc2a0e8b05b180e5fc0dd1559fe8ebdae21a27e81ac77728fb6c42b12c7419b4", size = 436930, upload-time = "2025-12-06T00:17:43.278Z" },
-    { url = "https://files.pythonhosted.org/packages/b2/ca/7e485da88ba45c920fb3f50ae78de29ab925d9e54ef0de678306abfbb497/protobuf-6.33.2-cp39-abi3-macosx_10_9_universal2.whl", hash = "sha256:d9b19771ca75935b3a4422957bc518b0cecb978b31d1dd12037b088f6bcc0e43", size = 427621, upload-time = "2025-12-06T00:17:44.445Z" },
-    { url = "https://files.pythonhosted.org/packages/7d/4f/f743761e41d3b2b2566748eb76bbff2b43e14d5fcab694f494a16458b05f/protobuf-6.33.2-cp39-abi3-manylinux2014_aarch64.whl", hash = "sha256:b5d3b5625192214066d99b2b605f5783483575656784de223f00a8d00754fc0e", size = 324460, upload-time = "2025-12-06T00:17:45.678Z" },
-    { url = "https://files.pythonhosted.org/packages/b1/fa/26468d00a92824020f6f2090d827078c09c9c587e34cbfd2d0c7911221f8/protobuf-6.33.2-cp39-abi3-manylinux2014_s390x.whl", hash = "sha256:8cd7640aee0b7828b6d03ae518b5b4806fdfc1afe8de82f79c3454f8aef29872", size = 339168, upload-time = "2025-12-06T00:17:46.813Z" },
-    { url = "https://files.pythonhosted.org/packages/56/13/333b8f421738f149d4fe5e49553bc2a2ab75235486259f689b4b91f96cec/protobuf-6.33.2-cp39-abi3-manylinux2014_x86_64.whl", hash = "sha256:1f8017c48c07ec5859106533b682260ba3d7c5567b1ca1f24297ce03384d1b4f", size = 323270, upload-time = "2025-12-06T00:17:48.253Z" },
-    { url = "https://files.pythonhosted.org/packages/0e/15/4f02896cc3df04fc465010a4c6a0cd89810f54617a32a70ef531ed75d61c/protobuf-6.33.2-py3-none-any.whl", hash = "sha256:7636aad9bb01768870266de5dc009de2d1b936771b38a793f73cbbf279c91c5c", size = 170501, upload-time = "2025-12-06T00:17:52.211Z" },
+    { url = "https://files.pythonhosted.org/packages/5f/11/6e40e9fc5bba02988a214c07cf324595789ca7820160bfd1f8be96e48539/protobuf-5.29.5-cp310-abi3-win32.whl", hash = "sha256:3f1c6468a2cfd102ff4703976138844f78ebd1fb45f49011afc5139e9e283079", size = 422963, upload-time = "2025-05-28T23:51:41.204Z" },
+    { url = "https://files.pythonhosted.org/packages/81/7f/73cefb093e1a2a7c3ffd839e6f9fcafb7a427d300c7f8aef9c64405d8ac6/protobuf-5.29.5-cp310-abi3-win_amd64.whl", hash = "sha256:3f76e3a3675b4a4d867b52e4a5f5b78a2ef9565549d4037e06cf7b0942b1d3fc", size = 434818, upload-time = "2025-05-28T23:51:44.297Z" },
+    { url = "https://files.pythonhosted.org/packages/dd/73/10e1661c21f139f2c6ad9b23040ff36fee624310dc28fba20d33fdae124c/protobuf-5.29.5-cp38-abi3-macosx_10_9_universal2.whl", hash = "sha256:e38c5add5a311f2a6eb0340716ef9b039c1dfa428b28f25a7838ac329204a671", size = 418091, upload-time = "2025-05-28T23:51:45.907Z" },
+    { url = "https://files.pythonhosted.org/packages/6c/04/98f6f8cf5b07ab1294c13f34b4e69b3722bb609c5b701d6c169828f9f8aa/protobuf-5.29.5-cp38-abi3-manylinux2014_aarch64.whl", hash = "sha256:fa18533a299d7ab6c55a238bf8629311439995f2e7eca5caaff08663606e9015", size = 319824, upload-time = "2025-05-28T23:51:47.545Z" },
+    { url = "https://files.pythonhosted.org/packages/85/e4/07c80521879c2d15f321465ac24c70efe2381378c00bf5e56a0f4fbac8cd/protobuf-5.29.5-cp38-abi3-manylinux2014_x86_64.whl", hash = "sha256:63848923da3325e1bf7e9003d680ce6e14b07e55d0473253a690c3a8b8fd6e61", size = 319942, upload-time = "2025-05-28T23:51:49.11Z" },
+    { url = "https://files.pythonhosted.org/packages/7e/cc/7e77861000a0691aeea8f4566e5d3aa716f2b1dece4a24439437e41d3d25/protobuf-5.29.5-py3-none-any.whl", hash = "sha256:6cf42630262c59b2d8de33954443d94b746c952b01434fc58a417fdbd2e84bd5", size = 172823, upload-time = "2025-05-28T23:51:58.157Z" },
 ]
 
 [[package]]
@@ -1806,6 +1839,11 @@ dependencies = [
     { name = "jinja2" },
     { name = "markdown" },
     { name = "openai" },
+    { name = "opentelemetry-api" },
+    { name = "opentelemetry-exporter-otlp-proto-http" },
+    { name = "opentelemetry-instrumentation-fastapi" },
+    { name = "opentelemetry-instrumentation-httpx" },
+    { name = "opentelemetry-sdk" },
     { name = "pydantic" },
     { name = "pydantic-settings" },
     { name = "pyjwt" },
@@ -1848,6 +1886,11 @@ requires-dist = [
     { name = "markdown", specifier = ">=3.10" },
     { name = "mypy", marker = "extra == 'dev'", specifier = ">=1.13.0" },
     { name = "openai", specifier = "~=1.59.0" },
+    { name = "opentelemetry-api", specifier = "~=1.29.0" },
+    { name = "opentelemetry-exporter-otlp-proto-http", specifier = "~=1.29.0" },
+    { name = "opentelemetry-instrumentation-fastapi", specifier = "~=0.50b0" },
+    { name = "opentelemetry-instrumentation-httpx", specifier = "~=0.50b0" },
+    { name = "opentelemetry-sdk", specifier = "~=1.29.0" },
     { name = "pip-audit", marker = "extra == 'dev'", specifier = ">=2.7.0" },
     { name = "pydantic", specifier = "~=2.10.0" },
     { name = "pydantic-settings", specifier = "~=2.6.0" },