From 93ea754ef3aa97a95dbbf3a5b0180cb53df5a47c Mon Sep 17 00:00:00 2001
From: iamsims <074bct541.simran@pcampus.edu.np>
Date: Tue, 2 Dec 2025 14:58:26 -0600
Subject: [PATCH 1/2] Create a base class for Criteria

---
 akd/structures.py     | 9 +++++++++
 akd/tools/reranker.py | 6 ++----
 2 files changed, 11 insertions(+), 4 deletions(-)

diff --git a/akd/structures.py b/akd/structures.py
index 69f8006d..ebbc24f1 100644
--- a/akd/structures.py
+++ b/akd/structures.py
@@ -242,6 +242,13 @@ class PaperDataItem(BaseModel):
     )
 
 
+class BaseCriterion(BaseModel):
+    """Base class for criteria used in reranking."""
+
+    name: str = Field(..., description="Criterion name")
+    description: str = Field(..., description="Detailed description of what this criterion evaluates")
+
+
 # =============================================================================
 # Extraction Schemas
 # =============================================================================
@@ -331,6 +338,7 @@ def name(self) -> str:
 # Type alias for semantic clarity in literature search contexts
 LitSearchResult = SearchResultItem
 
+
 __all__ = [
     # Search and Data Models
     "SearchResult",
@@ -342,4 +350,5 @@ def name(self) -> str:
     "SingleEstimation",
     # Tool Models
     "ToolSearchResult",
+    "BaseCriterion",
 ]
diff --git a/akd/tools/reranker.py b/akd/tools/reranker.py
index c4117816..f9d77cb0 100644
--- a/akd/tools/reranker.py
+++ b/akd/tools/reranker.py
@@ -11,7 +11,7 @@
 
 from akd._base import InputSchema, OutputSchema
 from akd.agents._base import BaseAgentConfig, LiteLLMInstructorBaseAgent
-from akd.structures import SearchResultItem
+from akd.structures import BaseCriterion, SearchResultItem
 from akd.tools._base import BaseTool, BaseToolConfig
 from akd.tools.search.utils import deduplicate_results, sort_results
 
@@ -167,11 +167,9 @@ class ScoringCategory(BaseModel):
     value: float = Field(..., description="Numeric score for this category")
 
 
-class ScoringCriterion(BaseModel):
+class ScoringCriterion(BaseCriterion):
     """Individual criterion for evaluating results."""
 
-    name: str = Field(..., description="Criterion name (e.g., 'Relevancy', 'Processing Level', 'Ease of Use')")
-    description: str = Field(..., description="Detailed description of what this criterion evaluates")
     weight: float = Field(default=1.0, ge=0.0, le=1.0, description="Weight for this criterion (0.0 to 1.0)")
     scoring_categories: list[ScoringCategory] = Field(
         default_factory=lambda: [

From 62c562f31e4826247bc49a9da8ad2060d8ec421c Mon Sep 17 00:00:00 2001
From: Bernard Benson <bernardvbenson@gmail.com>
Date: Thu, 4 Dec 2025 16:24:18 -0600
Subject: [PATCH 2/2] llm based decomp classifier

---
 .../search/aspect_search/aspect_search.py     |  34 ++
 .../search/aspect_search/interview_utils.py   |  33 +-
 akd/agents/search/aspect_search/structures.py |   6 +
 akd/structures.py                             |  49 +++
 akd/tools/decomp_classifier.py                | 389 ++++++++++++++++++
 examples/decomp_classifier_example.py         | 285 +++++++++++++
 6 files changed, 795 insertions(+), 1 deletion(-)
 create mode 100644 akd/tools/decomp_classifier.py
 create mode 100644 examples/decomp_classifier_example.py

diff --git a/akd/agents/search/aspect_search/aspect_search.py b/akd/agents/search/aspect_search/aspect_search.py
index 1f52853e..1f3ad4b5 100644
--- a/akd/agents/search/aspect_search/aspect_search.py
+++ b/akd/agents/search/aspect_search/aspect_search.py
@@ -23,6 +23,8 @@
     update_references,
     update_search_results,
 )
+from akd.structures import DecompositionClassification
+from akd.tools.decomp_classifier import DecompClassifierConfig, DecompClassifierTool
 from akd.tools.search import SearchResultItem, SearxNGSearchTool
 
 
@@ -89,6 +91,23 @@ class AspectSearchConfig(BaseAgentConfig):
         description="Maximum length of the search result context during interviews.",
     )
 
+    # Query classification configuration
+    enable_query_classification: bool = Field(
+        default=False,
+        description="Whether to classify decomposed queries before execution",
+    )
+    classifier_config: Optional[DecompClassifierConfig] = Field(
+        default=None,
+        description="Configuration for the decomposition classifier tool",
+    )
+    filter_classifications: Optional[List[DecompositionClassification]] = Field(
+        default=None,
+        description=(
+            "If set, only execute queries with these classifications. "
+            "Example: [EXACT, CALCULATOR, PROXY] to skip TANGENTIAL queries"
+        ),
+    )
+
 
 class AspectSearchAgent(BaseAgent):
     input_schema = AspectSearchInputSchema
@@ -113,6 +132,19 @@ def _post_init(
 
         self.search_tool = self.config.search_tool
 
+        # Initialize query classifier if enabled
+        self.classifier_tool = None
+        if self.config.enable_query_classification:
+            classifier_config = self.config.classifier_config or DecompClassifierConfig()
+            self.classifier_tool = DecompClassifierTool(
+                config=classifier_config,
+                debug=self.debug,
+            )
+            if self.debug:
+                logger.debug(
+                    f"Query classification enabled with model: {classifier_config.model_name}"
+                )
+
         builder = StateGraph(InterviewState)
         builder.add_node(
             "ask_question",
@@ -126,6 +158,8 @@ def _post_init(
                 search_tool=self.search_tool,
                 search_category=self.config.category,
                 max_context_len=self.config.max_ctx_len,
+                classifier_tool=self.classifier_tool,
+                filter_classifications=self.config.filter_classifications,
             ),
             retry=RetryPolicy(max_attempts=self.config.retry_attempts),
         )
diff --git a/akd/agents/search/aspect_search/interview_utils.py b/akd/agents/search/aspect_search/interview_utils.py
index 8b657103..a720310d 100644
--- a/akd/agents/search/aspect_search/interview_utils.py
+++ b/akd/agents/search/aspect_search/interview_utils.py
@@ -181,6 +181,8 @@ async def generate_answer(
     search_category: str = None,
     name: str = "Subject_Matter_Expert",
     max_ctx_len: int = 15000,
+    classifier_tool=None,
+    filter_classifications=None,
     **kwargs,
 ) -> Dict:
     """
@@ -191,8 +193,12 @@ async def generate_answer(
         state (InterviewState): Current interview state.
         llm (ChatOpenAI): Language model for generating queries and answers.
         search_tool (SearchTool): Tool for retrieving search results.
+        search_category (str, optional): Category for the search tool.
         name (str, optional): AI participant name. Defaults to "Subject_Matter_Expert".
         max_ctx_len (int, optional): Max context length for search data. Defaults to 15000.
+        classifier_tool (DecompClassifierTool, optional): Tool for classifying queries.
+        filter_classifications (List[DecompositionClassification], optional):
+            If set, only execute queries with these classifications.
 
     Returns:
         Dict: Generated answer message, cited references, and search results.
@@ -208,9 +214,34 @@ async def generate_answer(
     )
     swapped_state = swap_roles(state, name)
     queries = await gen_queries_chain.ainvoke(swapped_state)
+
+    # Classify queries if classifier is enabled
+    queries_to_execute = queries["parsed"].queries
+    if classifier_tool is not None:
+        # Extract original topic from the last editor question
+        last_question = state["messages"][-2].content if len(state["messages"]) >= 2 else "research topic"
+
+        # Classify all queries
+        classification_result = await classifier_tool.arun(
+            classifier_tool.input_schema(
+                original_topic=last_question,
+                queries=queries["parsed"].queries,
+            )
+        )
+
+        # Store classifications in the queries object
+        queries["parsed"].classified_queries = classification_result.classified_queries
+
+        # Filter queries if filter_classifications is set
+        if filter_classifications is not None:
+            queries_to_execute = [
+                cq.query for cq in classification_result.classified_queries
+                if cq.classification in filter_classifications
+            ]
+
     query_results = await search_tool.arun(
         search_tool.input_schema(
-            queries=queries["parsed"].queries,
+            queries=queries_to_execute,
             category=search_category,
         ),
     )
diff --git a/akd/agents/search/aspect_search/structures.py b/akd/agents/search/aspect_search/structures.py
index 64b14a12..689e9c88 100644
--- a/akd/agents/search/aspect_search/structures.py
+++ b/akd/agents/search/aspect_search/structures.py
@@ -4,6 +4,8 @@
 from pydantic import BaseModel, Field
 from typing_extensions import TypedDict
 
+from akd.structures import ClassifiedQuery
+
 # ---------------------------------------------------
 # Interview state helper functions
 # ---------------------------------------------------
@@ -96,6 +98,10 @@ class Queries(BaseModel):
     queries: List[str] = Field(
         description="Comprehensive list of search engine queries to answer the user's questions.",
     )
+    classified_queries: Optional[List[ClassifiedQuery]] = Field(
+        default=None,
+        description="Optional classified version of queries with labels and reasoning",
+    )
 
 
 class AnswerWithCitations(BaseModel):
diff --git a/akd/structures.py b/akd/structures.py
index ebbc24f1..926c8963 100644
--- a/akd/structures.py
+++ b/akd/structures.py
@@ -6,6 +6,7 @@
 organized into logical sections for better maintainability.
 """
 
+from enum import Enum
 from typing import Any
 
 from pydantic import (
@@ -22,6 +23,51 @@
 # from akd.common_types import ToolType
 from akd.configs.project import CONFIG
 
+# =============================================================================
+# Classification Enums
+# =============================================================================
+
+
+class DecompositionClassification(str, Enum):
+    """
+    Classification categories for decomposed queries relative to original topic.
+
+    Categories are defined by their relationship to the research topic:
+    - EXACT: Direct measurement of the phenomenon ("That is the thing you asked for")
+    - CALCULATOR: Mechanistic input/driver that physically affects the topic
+    - PROXY: Surrogate/stand-in measurement used because it correlates with the topic
+    - TANGENTIAL: Weakly related, contextual information not core to the analysis
+
+    Examples:
+        - Fire risk → Fire Weather Index: EXACT
+        - Fire risk → soil moisture: CALCULATOR
+        - Phytoplankton biomass → chlorophyll-a: PROXY
+        - Fire risk → regional humidity: TANGENTIAL
+    """
+
+    EXACT = "exact"
+    CALCULATOR = "calculator"
+    PROXY = "proxy"
+    TANGENTIAL = "tangential"
+
+
+class ClassifiedQuery(BaseModel):
+    """
+    A decomposed query with its classification relative to the original topic.
+
+    Attributes:
+        query: The search query text
+        classification: Classification category (EXACT, CALCULATOR, PROXY, TANGENTIAL)
+        reasoning: Brief explanation for why this classification was assigned
+    """
+
+    query: str = Field(description="The search query text")
+    classification: DecompositionClassification = Field(
+        description="Classification of query relative to topic"
+    )
+    reasoning: str = Field(description="Brief explanation for the classification")
+
+
 # =============================================================================
 # Search and Data Models
 # =============================================================================
@@ -340,6 +386,9 @@ def name(self) -> str:
 
 
 __all__ = [
+    # Classification
+    "DecompositionClassification",
+    "ClassifiedQuery",
     # Search and Data Models
     "SearchResult",
     "SearchResultItem",
diff --git a/akd/tools/decomp_classifier.py b/akd/tools/decomp_classifier.py
new file mode 100644
index 00000000..d8d6eb11
--- /dev/null
+++ b/akd/tools/decomp_classifier.py
@@ -0,0 +1,389 @@
+"""
+Decomposition classification tool for categorizing decomposed queries.
+
+This module provides tools to classify decomposed research queries into categories
+(EXACT, CALCULATOR, PROXY, TANGENTIAL) based on their relationship to the original topic.
+Uses an LLM-based approach with structured outputs for reliable classification.
+"""
+
+from __future__ import annotations
+
+from typing import Any, List
+
+from loguru import logger
+from pydantic import AnyUrl, BaseModel, Field
+from pydantic import create_model
+
+from akd._base import InputSchema, OutputSchema
+from akd.agents._base import BaseAgentConfig, LiteLLMInstructorBaseAgent
+from akd.structures import ClassifiedQuery, DecompositionClassification
+from akd.tools._base import BaseTool, BaseToolConfig
+
+
+class DecompClassifierInputSchema(InputSchema):
+    """
+    Input schema for decomposition classification tool.
+
+    Attributes:
+        original_topic: The original research question/topic that was decomposed
+        queries: List of decomposed queries to classify
+    """
+
+    original_topic: str = Field(
+        ...,
+        description="The original research question or topic that was decomposed",
+    )
+    queries: List[str] = Field(..., description="List of decomposed queries to classify")
+
+
+class DecompClassifierOutputSchema(OutputSchema):
+    """
+    Output schema for decomposition classification tool.
+
+    Attributes:
+        classified_queries: List of queries with their classifications and reasoning
+    """
+
+    classified_queries: List[ClassifiedQuery] = Field(
+        ...,
+        description="Queries with their classifications and reasoning",
+    )
+
+
+class DecompClassifierConfig(BaseToolConfig):
+    """
+    Configuration for the decomposition classifier tool.
+
+    This config includes LLM settings and the detailed prompt template that
+    guides the classification process.
+    """
+
+    base_url: AnyUrl | None = Field(default=None, description="Base URL for LLM API")
+    api_key: str | None = Field(default=None, description="API key for LLM")
+    model_name: str = Field(default="gpt-5-mini", description="LLM model name")
+    temperature: float = Field(default=0.0, ge=0.0, le=2.0, description="LLM temperature for consistency")
+
+    agent_system_prompt: str = Field(
+        default=(
+            "You are an expert at analyzing scientific research queries and classifying them "
+            "based on their relationship to the original research topic.\n\n"
+            "You will classify each decomposed query into one of four categories:\n\n"
+            "1. EXACT - The query is essentially the same as the topic. A domain expert would say "
+            "'yes, that is exactly what you asked for.' Example: Fire risk → Fire Weather Index\n\n"
+            "2. CALCULATOR - The query is a mechanistic input or driver that physically affects "
+            "the topic. Changing this variable would physically change the phenomenon. "
+            "Example: Fire risk → soil moisture, wind speed\n\n"
+            "3. PROXY - The query is not the phenomenon itself, but is used as a surrogate because "
+            "it correlates with the topic or is easier to measure. Example: Phytoplankton biomass → "
+            "chlorophyll-a concentration\n\n"
+            "4. TANGENTIAL - The query is only indirectly or weakly related, potentially useful as "
+            "context but not standard practice as a core input or proxy. Example: Fire risk → "
+            "regional humidity (if not part of the risk index)\n\n"
+            "Provide clear, concise reasoning for each classification."
+        ),
+        description="System prompt for the internal classification agent",
+    )
+
+    classification_prompt_template: str = Field(
+        default=(
+            "Original Research Topic: {original_topic}\n\n"
+            "Decomposed Queries to Classify:\n{queries}\n\n"
+            "CLASSIFICATION DECISION TREE:\n"
+            "For each query, follow this decision process:\n\n"
+            "1. Is the query conceptually the same quantity as the topic?\n"
+            "   → If YES: Classify as EXACT\n\n"
+            "2. Does the query enter the physical/statistical mechanism of the topic?\n"
+            "   (i.e., would changing this variable physically change the topic?)\n"
+            "   → If YES: Classify as CALCULATOR\n\n"
+            "3. Is the query used as a surrogate because it tracks the topic?\n"
+            "   (i.e., we use this because we can't easily measure the topic directly)\n"
+            "   → If YES: Classify as PROXY\n\n"
+            "4. Otherwise: Classify as TANGENTIAL\n\n"
+            "EXAMPLES:\n"
+            "- Fire risk → Fire Weather Index: EXACT (direct fire risk metric)\n"
+            "- Fire risk → soil moisture: CALCULATOR (mechanistic input to fire risk)\n"
+            "- Fire risk → wind speed: CALCULATOR (physical driver of fire spread)\n"
+            "- Phytoplankton biomass → chlorophyll-a: PROXY (surrogate for biomass)\n"
+            "- Ocean health → sea surface temperature: CALCULATOR (mechanistic input)\n"
+            "- Flood risk → precipitation: CALCULATOR (physical driver)\n"
+            "- Fire risk → regional humidity: TANGENTIAL (weakly related, not core)\n"
+            "- Fire risk → ENSO SST: TANGENTIAL (indirect teleconnection)\n\n"
+            "For EACH query above, classify it relative to the original topic and provide "
+            "brief reasoning (1-2 sentences) explaining your classification."
+        ),
+        description="Template for the classification prompt with decision tree and examples",
+    )
+
+
+class DecompClassifierTool(BaseTool[DecompClassifierInputSchema, DecompClassifierOutputSchema]):
+    """
+    Tool for classifying decomposed queries using LLM-based structured output.
+
+    This tool takes an original research topic and a list of decomposed queries,
+    then classifies each query into one of four categories (EXACT, CALCULATOR,
+    PROXY, TANGENTIAL) using a single LLM call that evaluates all queries together.
+
+    The tool uses instructor with dynamic Pydantic models to ensure structured,
+    reliable output from the LLM.
+
+    Key features:
+    - Single LLM call for all queries (efficient and context-aware)
+    - Structured output with reasoning for each classification
+    - Follows established pattern from LLMRerankerTool
+    - Returns ClassifiedQuery objects with classification and reasoning
+
+    Example:
+        >>> config = DecompClassifierConfig(model_name="gpt-4o-mini")
+        >>> classifier = DecompClassifierTool(config=config)
+        >>> result = await classifier.arun(
+        ...     classifier.input_schema(
+        ...         original_topic="What is fire risk?",
+        ...         queries=["soil moisture data", "Fire Weather Index"],
+        ...     )
+        ... )
+        >>> for cq in result.classified_queries:
+        ...     print(f"{cq.query} → {cq.classification}: {cq.reasoning}")
+    """
+
+    input_schema = DecompClassifierInputSchema
+    output_schema = DecompClassifierOutputSchema
+    config_schema = DecompClassifierConfig
+
+    def __init__(
+        self,
+        config: DecompClassifierConfig | None = None,
+        debug: bool = False,
+    ):
+        """
+        Initialize the decomposition classifier tool.
+
+        Args:
+            config: Configuration for the classifier (LLM settings, prompts)
+            debug: Enable debug logging
+        """
+        super().__init__(config=config, debug=debug)
+        self.config: DecompClassifierConfig = self.config  # type hint
+
+        # Create internal agent config
+        agent_config = BaseAgentConfig(
+            base_url=self.config.base_url,
+            api_key=self.config.api_key,
+            model_name=self.config.model_name,
+            temperature=self.config.temperature,
+            system_prompt=self.config.agent_system_prompt,
+        )
+
+        # Create dummy input schema for the internal agent
+        class DummyInput(InputSchema):
+            """Dummy input schema for classification agent."""
+
+            pass
+
+        # We'll create the dynamic output model per request since it depends
+        # on the number of queries. For now, initialize the agent wrapper.
+        self.agent_config = agent_config
+        self.DummyInput = DummyInput
+
+    def _create_dynamic_classification_model(self, queries: List[str]) -> type[BaseModel]:
+        """
+        Create a dynamic Pydantic model with one field per query.
+
+        This allows the LLM to see each query as a separate field in the JSON schema,
+        making it easier for structured output generation.
+
+        Args:
+            queries: List of query strings to create fields for
+
+        Returns:
+            Dynamically created Pydantic model class with one ClassifiedQuery field per query
+        """
+
+        def sanitize_field_name(name: str) -> str:
+            """Convert query text to a valid Python identifier."""
+            # Take first few words, replace special chars with underscores
+            sanitized = name[:50].replace(" ", "_").replace("-", "_")
+            # Remove non-alphanumeric chars except underscore
+            sanitized = "".join(c if c.isalnum() or c == "_" else "_" for c in sanitized)
+            # Ensure it starts with a letter
+            if sanitized and not sanitized[0].isalpha():
+                sanitized = "q_" + sanitized
+            return sanitized or "query"
+
+        # Build fields dict: {field_name: (type, Field(...))}
+        query_fields = {}
+        for idx, query in enumerate(queries):
+            field_name = f"query_{idx}_{sanitize_field_name(query)}"
+            field_description = (
+                f"Classification for query: '{query}'. "
+                "Select category (EXACT, CALCULATOR, PROXY, TANGENTIAL) and provide brief reasoning."
+            )
+            query_fields[field_name] = (
+                ClassifiedQuery,
+                Field(..., description=field_description),
+            )
+
+        # Create the dynamic model
+        DynamicClassificationModel = create_model(
+            "AllQueryClassifications",
+            **query_fields,
+        )
+
+        return DynamicClassificationModel
+
+    def _format_prompt(self, original_topic: str, queries: List[str]) -> str:
+        """
+        Format the classification prompt with the original topic and queries.
+
+        Args:
+            original_topic: The original research question
+            queries: List of decomposed queries
+
+        Returns:
+            Formatted prompt string ready for LLM
+        """
+        # Format queries as numbered list
+        queries_formatted = "\n".join(f"{i+1}. {q}" for i, q in enumerate(queries))
+
+        # Fill in the template
+        prompt = self.config.classification_prompt_template.format(
+            original_topic=original_topic,
+            queries=queries_formatted,
+        )
+
+        return prompt
+
+    async def _classify_all_queries(
+        self,
+        original_topic: str,
+        queries: List[str],
+    ) -> List[ClassifiedQuery]:
+        """
+        Classify all queries in a single LLM call.
+
+        Args:
+            original_topic: The original research question
+            queries: List of decomposed queries to classify
+
+        Returns:
+            List of ClassifiedQuery objects with classifications and reasoning
+        """
+        # Create dynamic output model for this specific set of queries
+        DynamicOutputModel = self._create_dynamic_classification_model(queries)
+
+        # Create classification agent with dynamic output schema
+        class ClassificationAgent(LiteLLMInstructorBaseAgent):
+            input_schema = self.DummyInput
+            output_schema = DynamicOutputModel
+
+        classification_agent = ClassificationAgent(
+            config=self.agent_config,
+            debug=self.debug,
+        )
+
+        # Format the prompt
+        formatted_prompt = self._format_prompt(original_topic, queries)
+
+        if self.debug:
+            logger.debug(f"Classification prompt:\n{formatted_prompt}")
+
+        try:
+            # Call the LLM with structured output
+            messages = [
+                classification_agent._default_system_message(),
+                {
+                    "role": "user",
+                    "content": formatted_prompt,
+                },
+            ]
+
+            response = await classification_agent.get_response_async(messages=messages)
+
+            # Extract classified queries from response
+            response_dict = response.model_dump()
+
+            classified_queries = []
+            for idx, query in enumerate(queries):
+                # Find the corresponding field in the response
+                # The field names follow the pattern query_{idx}_...
+                matching_key = None
+                for key in response_dict.keys():
+                    if key.startswith(f"query_{idx}_"):
+                        matching_key = key
+                        break
+
+                if matching_key and response_dict[matching_key]:
+                    # The value should already be a dict with 'query', 'classification', 'reasoning'
+                    # But since we defined the field type as ClassifiedQuery, we need to reconstruct it
+                    classification_data = response_dict[matching_key]
+
+                    classified_query = ClassifiedQuery(
+                        query=query,  # Use original query text
+                        classification=DecompositionClassification(classification_data["classification"]),
+                        reasoning=classification_data["reasoning"],
+                    )
+                    classified_queries.append(classified_query)
+
+                    if self.debug:
+                        logger.debug(
+                            f"Classified '{query}' as {classified_query.classification}: "
+                            f"{classified_query.reasoning[:100]}"
+                        )
+                else:
+                    logger.warning(f"No classification found for query '{query}', defaulting to TANGENTIAL")
+                    classified_queries.append(
+                        ClassifiedQuery(
+                            query=query,
+                            classification=DecompositionClassification.TANGENTIAL,
+                            reasoning="Classification not returned by LLM",
+                        )
+                    )
+
+            return classified_queries
+
+        except Exception as e:
+            logger.error(f"Error classifying queries: {e}")
+            # Return tangential for all queries on error
+            return [
+                ClassifiedQuery(
+                    query=query,
+                    classification=DecompositionClassification.TANGENTIAL,
+                    reasoning=f"Error during classification: {str(e)}",
+                )
+                for query in queries
+            ]
+
+    async def _arun(self, params: DecompClassifierInputSchema) -> DecompClassifierOutputSchema:
+        """
+        Main execution method for the classifier tool.
+
+        Args:
+            params: Input parameters with original_topic and queries
+
+        Returns:
+            Output with classified queries including classifications and reasoning
+        """
+        if not params.queries:
+            return DecompClassifierOutputSchema(classified_queries=[])
+
+        # Classify all queries in one LLM call
+        classified_queries = await self._classify_all_queries(
+            original_topic=params.original_topic,
+            queries=params.queries,
+        )
+
+        return DecompClassifierOutputSchema(classified_queries=classified_queries)
+
+    def __str__(self) -> str:
+        return f"{self.__class__.__name__} | model={self.config.model_name}"
+
+    def __repr__(self) -> str:
+        return str(self)
+
+
+# Export public API
+__all__ = [
+    "DecompClassifierInputSchema",
+    "DecompClassifierOutputSchema",
+    "DecompClassifierConfig",
+    "DecompClassifierTool",
+]
diff --git a/examples/decomp_classifier_example.py b/examples/decomp_classifier_example.py
new file mode 100644
index 00000000..c43d2154
--- /dev/null
+++ b/examples/decomp_classifier_example.py
@@ -0,0 +1,285 @@
+"""
+Example script demonstrating the decomposition classifier for aspect search.
+
+This example shows how to:
+1. Enable query classification in the aspect search agent
+2. View classifications for decomposed queries
+3. Filter queries by classification (e.g., skip TANGENTIAL queries)
+4. Access classification results for analysis
+"""
+
+import asyncio
+import os
+
+from akd.agents.search.aspect_search import AspectSearchAgent, AspectSearchConfig
+from akd.structures import DecompositionClassification
+from akd.tools.decomp_classifier import DecompClassifierConfig
+
+
+async def example_basic_classification():
+    """
+    Example 1: Basic classification without filtering.
+
+    This runs the aspect search with classification enabled, allowing you to
+    see how each decomposed query is categorized, but all queries are still executed.
+    """
+    print("\n" + "=" * 80)
+    print("EXAMPLE 1: Basic Classification (No Filtering)")
+    print("=" * 80 + "\n")
+
+    config = AspectSearchConfig(
+        model_name="gpt-4o",
+        api_key=os.getenv("OPENAI_API_KEY"),
+        enable_query_classification=True,
+        classifier_config=DecompClassifierConfig(
+            model_name="gpt-5-mini",
+            temperature=0.0,
+        ),
+        max_turns=2,  # Limit turns for faster example
+        num_editors=2,  # Fewer editors for faster example
+    )
+
+    agent = AspectSearchAgent(config=config, debug=True)
+
+    # Run aspect search on a sample topic
+    result = await agent.arun(agent.input_schema(topic="What is fire risk in forests?"))
+
+    print("\n" + "-" * 80)
+    print("CLASSIFICATION RESULTS")
+    print("-" * 80 + "\n")
+
+    # Display classifications from interviews
+    for idx, interview in enumerate(result.interview_results, 1):
+        print(f"Interview {idx}:")
+        if "messages" in interview:
+            # Look for messages that contain classified queries
+            for message in interview["messages"]:
+                if hasattr(message, "tool_calls") and message.tool_calls:
+                    # This is where queries were generated
+                    print(f"  Editor: {interview.get('editor', {}).get('name', 'Unknown')}")
+
+        # Check if classifications are available in the interview state
+        # Note: Classifications may be stored differently depending on implementation
+        print()
+
+    print(f"Total search results: {len(result.search_results)}")
+    print(f"Total references: {len(result.references)}")
+
+
+async def example_filtered_classification():
+    """
+    Example 2: Classification with filtering to skip TANGENTIAL queries.
+
+    This configuration will classify queries and only execute those that are
+    EXACT, CALCULATOR, or PROXY, skipping any TANGENTIAL queries that are
+    only weakly related to the topic.
+    """
+    print("\n" + "=" * 80)
+    print("EXAMPLE 2: Classification with Filtering (Skip TANGENTIAL)")
+    print("=" * 80 + "\n")
+
+    config = AspectSearchConfig(
+        model_name="gpt-4o",
+        api_key=os.getenv("OPENAI_API_KEY"),
+        enable_query_classification=True,
+        classifier_config=DecompClassifierConfig(
+            model_name="gpt-5-mini",
+            temperature=0.0,
+        ),
+        # Only execute queries that are directly relevant
+        filter_classifications=[
+            DecompositionClassification.EXACT,
+            DecompositionClassification.CALCULATOR,
+            DecompositionClassification.PROXY,
+            # TANGENTIAL queries will be skipped
+        ],
+        max_turns=2,
+        num_editors=2,
+    )
+
+    agent = AspectSearchAgent(config=config, debug=True)
+
+    # Run aspect search
+    result = await agent.arun(agent.input_schema(topic="Ocean temperature trends and climate change"))
+
+    print("\n" + "-" * 80)
+    print("FILTERING RESULTS")
+    print("-" * 80 + "\n")
+
+    print("Configuration filters out TANGENTIAL queries.")
+    print("Only EXACT, CALCULATOR, and PROXY queries are executed.")
+    print(f"\nTotal search results: {len(result.search_results)}")
+    print(f"Total references: {len(result.references)}")
+
+
+async def example_standalone_classifier():
+    """
+    Example 3: Using the classifier tool standalone.
+
+    This shows how to use the DecompClassifierTool directly without the
+    aspect search agent, which can be useful for testing or analysis.
+    """
+    print("\n" + "=" * 80)
+    print("EXAMPLE 3: Standalone Classifier Tool")
+    print("=" * 80 + "\n")
+
+    from akd.tools.decomp_classifier import DecompClassifierTool
+
+    # Create classifier
+    config = DecompClassifierConfig(
+        model_name="gpt-5-mini",
+        temperature=0.0,
+    )
+    classifier = DecompClassifierTool(config=config, debug=True)
+
+    # Test queries
+    original_topic = "What is fire risk in California forests?"
+    test_queries = [
+        "California wildfire risk index",
+        "soil moisture content in forests",
+        "wind speed and direction patterns",
+        "chlorophyll content as indicator of forest health",
+        "historical rainfall patterns in California",
+        "general climate change overview",
+    ]
+
+    print(f"Original Topic: {original_topic}\n")
+    print("Decomposed Queries:")
+    for i, q in enumerate(test_queries, 1):
+        print(f"  {i}. {q}")
+    print()
+
+    # Classify
+    result = await classifier.arun(
+        classifier.input_schema(original_topic=original_topic, queries=test_queries)
+    )
+
+    print("\n" + "-" * 80)
+    print("CLASSIFICATIONS")
+    print("-" * 80 + "\n")
+
+    # Display results
+    for cq in result.classified_queries:
+        print(f"Query: {cq.query}")
+        print(f"Classification: {cq.classification.value.upper()}")
+        print(f"Reasoning: {cq.reasoning}")
+        print()
+
+    # Summary by category
+    print("-" * 80)
+    print("SUMMARY BY CATEGORY")
+    print("-" * 80 + "\n")
+
+    from collections import Counter
+
+    category_counts = Counter(cq.classification for cq in result.classified_queries)
+
+    for category, count in category_counts.items():
+        print(f"{category.value.upper()}: {count} queries")
+
+
+async def example_domain_specific():
+    """
+    Example 4: Domain-specific classification for Earth science research.
+
+    This demonstrates how the classifier handles domain-specific queries
+    related to Earth observation data and CMR (Common Metadata Repository).
+    """
+    print("\n" + "=" * 80)
+    print("EXAMPLE 4: Domain-Specific Classification (Earth Science)")
+    print("=" * 80 + "\n")
+
+    from akd.tools.decomp_classifier import DecompClassifierTool
+
+    config = DecompClassifierConfig(
+        model_name="gpt-5-mini",
+        temperature=0.0,
+    )
+    classifier = DecompClassifierTool(config=config)
+
+    # Earth science topic
+    original_topic = "What is the impact of soil moisture on flood risk?"
+    earth_science_queries = [
+        "SMAP soil moisture L3 product",
+        "soil moisture anomaly calculation",
+        "precipitation data from GPM",
+        "topography and slope from DEM",
+        "NDVI as proxy for vegetation water stress",
+        "historical flood events database",
+        "general hydrology textbook information",
+    ]
+
+    print(f"Original Topic: {original_topic}\n")
+    print("Earth Science Queries:")
+    for i, q in enumerate(earth_science_queries, 1):
+        print(f"  {i}. {q}")
+    print()
+
+    result = await classifier.arun(
+        classifier.input_schema(original_topic=original_topic, queries=earth_science_queries)
+    )
+
+    print("\n" + "-" * 80)
+    print("EARTH SCIENCE CLASSIFICATIONS")
+    print("-" * 80 + "\n")
+
+    # Organize by category
+    by_category = {
+        DecompositionClassification.EXACT: [],
+        DecompositionClassification.CALCULATOR: [],
+        DecompositionClassification.PROXY: [],
+        DecompositionClassification.TANGENTIAL: [],
+    }
+
+    for cq in result.classified_queries:
+        by_category[cq.classification].append(cq)
+
+    for category, queries in by_category.items():
+        print(f"\n{category.value.upper()} ({len(queries)} queries):")
+        for cq in queries:
+            print(f"  • {cq.query}")
+            print(f"    Reasoning: {cq.reasoning}")
+
+
+async def main():
+    """Run all examples."""
+    print("=" * 80)
+    print("DECOMPOSITION CLASSIFICATION EXAMPLES")
+    print("=" * 80)
+
+    # Check for API key
+    if not os.getenv("OPENAI_API_KEY"):
+        print("\nERROR: OPENAI_API_KEY environment variable not set.")
+        print("Please set it before running this example.")
+        return
+
+    # Run examples
+    try:
+        # Example 3: Standalone classifier (fastest, no search)
+        await example_standalone_classifier()
+
+        # Example 4: Domain-specific (fast, no search)
+        await example_domain_specific()
+
+        # Example 1: Basic classification (slower, includes search)
+        # await example_basic_classification()
+
+        # Example 2: Filtered classification (slower, includes search)
+        # await example_filtered_classification()
+
+        print("\n" + "=" * 80)
+        print("EXAMPLES COMPLETED")
+        print("=" * 80)
+        print("\nNote: Examples 1 and 2 are commented out by default as they")
+        print("perform full aspect search which takes longer. Uncomment them")
+        print("in main() to run the full examples.")
+
+    except Exception as e:
+        print(f"\nError running examples: {e}")
+        import traceback
+
+        traceback.print_exc()
+
+
+if __name__ == "__main__":
+    asyncio.run(main())