From 93ea754ef3aa97a95dbbf3a5b0180cb53df5a47c Mon Sep 17 00:00:00 2001 From: iamsims <074bct541.simran@pcampus.edu.np> Date: Tue, 2 Dec 2025 14:58:26 -0600 Subject: [PATCH 1/2] Create a base class for Criteria --- akd/structures.py | 9 +++++++++ akd/tools/reranker.py | 6 ++---- 2 files changed, 11 insertions(+), 4 deletions(-) diff --git a/akd/structures.py b/akd/structures.py index 69f8006d..ebbc24f1 100644 --- a/akd/structures.py +++ b/akd/structures.py @@ -242,6 +242,13 @@ class PaperDataItem(BaseModel): ) +class BaseCriterion(BaseModel): + """Base class for criteria used in reranking.""" + + name: str = Field(..., description="Criterion name") + description: str = Field(..., description="Detailed description of what this criterion evaluates") + + # ============================================================================= # Extraction Schemas # ============================================================================= @@ -331,6 +338,7 @@ def name(self) -> str: # Type alias for semantic clarity in literature search contexts LitSearchResult = SearchResultItem + __all__ = [ # Search and Data Models "SearchResult", @@ -342,4 +350,5 @@ def name(self) -> str: "SingleEstimation", # Tool Models "ToolSearchResult", + "BaseCriterion", ] diff --git a/akd/tools/reranker.py b/akd/tools/reranker.py index c4117816..f9d77cb0 100644 --- a/akd/tools/reranker.py +++ b/akd/tools/reranker.py @@ -11,7 +11,7 @@ from akd._base import InputSchema, OutputSchema from akd.agents._base import BaseAgentConfig, LiteLLMInstructorBaseAgent -from akd.structures import SearchResultItem +from akd.structures import BaseCriterion, SearchResultItem from akd.tools._base import BaseTool, BaseToolConfig from akd.tools.search.utils import deduplicate_results, sort_results @@ -167,11 +167,9 @@ class ScoringCategory(BaseModel): value: float = Field(..., description="Numeric score for this category") -class ScoringCriterion(BaseModel): +class ScoringCriterion(BaseCriterion): """Individual criterion for evaluating results.""" - name: str = Field(..., description="Criterion name (e.g., 'Relevancy', 'Processing Level', 'Ease of Use')") - description: str = Field(..., description="Detailed description of what this criterion evaluates") weight: float = Field(default=1.0, ge=0.0, le=1.0, description="Weight for this criterion (0.0 to 1.0)") scoring_categories: list[ScoringCategory] = Field( default_factory=lambda: [ From 62c562f31e4826247bc49a9da8ad2060d8ec421c Mon Sep 17 00:00:00 2001 From: Bernard Benson Date: Thu, 4 Dec 2025 16:24:18 -0600 Subject: [PATCH 2/2] llm based decomp classifier --- .../search/aspect_search/aspect_search.py | 34 ++ .../search/aspect_search/interview_utils.py | 33 +- akd/agents/search/aspect_search/structures.py | 6 + akd/structures.py | 49 +++ akd/tools/decomp_classifier.py | 389 ++++++++++++++++++ examples/decomp_classifier_example.py | 285 +++++++++++++ 6 files changed, 795 insertions(+), 1 deletion(-) create mode 100644 akd/tools/decomp_classifier.py create mode 100644 examples/decomp_classifier_example.py diff --git a/akd/agents/search/aspect_search/aspect_search.py b/akd/agents/search/aspect_search/aspect_search.py index 1f52853e..1f3ad4b5 100644 --- a/akd/agents/search/aspect_search/aspect_search.py +++ b/akd/agents/search/aspect_search/aspect_search.py @@ -23,6 +23,8 @@ update_references, update_search_results, ) +from akd.structures import DecompositionClassification +from akd.tools.decomp_classifier import DecompClassifierConfig, DecompClassifierTool from akd.tools.search import SearchResultItem, SearxNGSearchTool @@ -89,6 +91,23 @@ class AspectSearchConfig(BaseAgentConfig): description="Maximum length of the search result context during interviews.", ) + # Query classification configuration + enable_query_classification: bool = Field( + default=False, + description="Whether to classify decomposed queries before execution", + ) + classifier_config: Optional[DecompClassifierConfig] = Field( + default=None, + description="Configuration for the decomposition classifier tool", + ) + filter_classifications: Optional[List[DecompositionClassification]] = Field( + default=None, + description=( + "If set, only execute queries with these classifications. " + "Example: [EXACT, CALCULATOR, PROXY] to skip TANGENTIAL queries" + ), + ) + class AspectSearchAgent(BaseAgent): input_schema = AspectSearchInputSchema @@ -113,6 +132,19 @@ def _post_init( self.search_tool = self.config.search_tool + # Initialize query classifier if enabled + self.classifier_tool = None + if self.config.enable_query_classification: + classifier_config = self.config.classifier_config or DecompClassifierConfig() + self.classifier_tool = DecompClassifierTool( + config=classifier_config, + debug=self.debug, + ) + if self.debug: + logger.debug( + f"Query classification enabled with model: {classifier_config.model_name}" + ) + builder = StateGraph(InterviewState) builder.add_node( "ask_question", @@ -126,6 +158,8 @@ def _post_init( search_tool=self.search_tool, search_category=self.config.category, max_context_len=self.config.max_ctx_len, + classifier_tool=self.classifier_tool, + filter_classifications=self.config.filter_classifications, ), retry=RetryPolicy(max_attempts=self.config.retry_attempts), ) diff --git a/akd/agents/search/aspect_search/interview_utils.py b/akd/agents/search/aspect_search/interview_utils.py index 8b657103..a720310d 100644 --- a/akd/agents/search/aspect_search/interview_utils.py +++ b/akd/agents/search/aspect_search/interview_utils.py @@ -181,6 +181,8 @@ async def generate_answer( search_category: str = None, name: str = "Subject_Matter_Expert", max_ctx_len: int = 15000, + classifier_tool=None, + filter_classifications=None, **kwargs, ) -> Dict: """ @@ -191,8 +193,12 @@ async def generate_answer( state (InterviewState): Current interview state. llm (ChatOpenAI): Language model for generating queries and answers. search_tool (SearchTool): Tool for retrieving search results. + search_category (str, optional): Category for the search tool. name (str, optional): AI participant name. Defaults to "Subject_Matter_Expert". max_ctx_len (int, optional): Max context length for search data. Defaults to 15000. + classifier_tool (DecompClassifierTool, optional): Tool for classifying queries. + filter_classifications (List[DecompositionClassification], optional): + If set, only execute queries with these classifications. Returns: Dict: Generated answer message, cited references, and search results. @@ -208,9 +214,34 @@ async def generate_answer( ) swapped_state = swap_roles(state, name) queries = await gen_queries_chain.ainvoke(swapped_state) + + # Classify queries if classifier is enabled + queries_to_execute = queries["parsed"].queries + if classifier_tool is not None: + # Extract original topic from the last editor question + last_question = state["messages"][-2].content if len(state["messages"]) >= 2 else "research topic" + + # Classify all queries + classification_result = await classifier_tool.arun( + classifier_tool.input_schema( + original_topic=last_question, + queries=queries["parsed"].queries, + ) + ) + + # Store classifications in the queries object + queries["parsed"].classified_queries = classification_result.classified_queries + + # Filter queries if filter_classifications is set + if filter_classifications is not None: + queries_to_execute = [ + cq.query for cq in classification_result.classified_queries + if cq.classification in filter_classifications + ] + query_results = await search_tool.arun( search_tool.input_schema( - queries=queries["parsed"].queries, + queries=queries_to_execute, category=search_category, ), ) diff --git a/akd/agents/search/aspect_search/structures.py b/akd/agents/search/aspect_search/structures.py index 64b14a12..689e9c88 100644 --- a/akd/agents/search/aspect_search/structures.py +++ b/akd/agents/search/aspect_search/structures.py @@ -4,6 +4,8 @@ from pydantic import BaseModel, Field from typing_extensions import TypedDict +from akd.structures import ClassifiedQuery + # --------------------------------------------------- # Interview state helper functions # --------------------------------------------------- @@ -96,6 +98,10 @@ class Queries(BaseModel): queries: List[str] = Field( description="Comprehensive list of search engine queries to answer the user's questions.", ) + classified_queries: Optional[List[ClassifiedQuery]] = Field( + default=None, + description="Optional classified version of queries with labels and reasoning", + ) class AnswerWithCitations(BaseModel): diff --git a/akd/structures.py b/akd/structures.py index ebbc24f1..926c8963 100644 --- a/akd/structures.py +++ b/akd/structures.py @@ -6,6 +6,7 @@ organized into logical sections for better maintainability. """ +from enum import Enum from typing import Any from pydantic import ( @@ -22,6 +23,51 @@ # from akd.common_types import ToolType from akd.configs.project import CONFIG +# ============================================================================= +# Classification Enums +# ============================================================================= + + +class DecompositionClassification(str, Enum): + """ + Classification categories for decomposed queries relative to original topic. + + Categories are defined by their relationship to the research topic: + - EXACT: Direct measurement of the phenomenon ("That is the thing you asked for") + - CALCULATOR: Mechanistic input/driver that physically affects the topic + - PROXY: Surrogate/stand-in measurement used because it correlates with the topic + - TANGENTIAL: Weakly related, contextual information not core to the analysis + + Examples: + - Fire risk → Fire Weather Index: EXACT + - Fire risk → soil moisture: CALCULATOR + - Phytoplankton biomass → chlorophyll-a: PROXY + - Fire risk → regional humidity: TANGENTIAL + """ + + EXACT = "exact" + CALCULATOR = "calculator" + PROXY = "proxy" + TANGENTIAL = "tangential" + + +class ClassifiedQuery(BaseModel): + """ + A decomposed query with its classification relative to the original topic. + + Attributes: + query: The search query text + classification: Classification category (EXACT, CALCULATOR, PROXY, TANGENTIAL) + reasoning: Brief explanation for why this classification was assigned + """ + + query: str = Field(description="The search query text") + classification: DecompositionClassification = Field( + description="Classification of query relative to topic" + ) + reasoning: str = Field(description="Brief explanation for the classification") + + # ============================================================================= # Search and Data Models # ============================================================================= @@ -340,6 +386,9 @@ def name(self) -> str: __all__ = [ + # Classification + "DecompositionClassification", + "ClassifiedQuery", # Search and Data Models "SearchResult", "SearchResultItem", diff --git a/akd/tools/decomp_classifier.py b/akd/tools/decomp_classifier.py new file mode 100644 index 00000000..d8d6eb11 --- /dev/null +++ b/akd/tools/decomp_classifier.py @@ -0,0 +1,389 @@ +""" +Decomposition classification tool for categorizing decomposed queries. + +This module provides tools to classify decomposed research queries into categories +(EXACT, CALCULATOR, PROXY, TANGENTIAL) based on their relationship to the original topic. +Uses an LLM-based approach with structured outputs for reliable classification. +""" + +from __future__ import annotations + +from typing import Any, List + +from loguru import logger +from pydantic import AnyUrl, BaseModel, Field +from pydantic import create_model + +from akd._base import InputSchema, OutputSchema +from akd.agents._base import BaseAgentConfig, LiteLLMInstructorBaseAgent +from akd.structures import ClassifiedQuery, DecompositionClassification +from akd.tools._base import BaseTool, BaseToolConfig + + +class DecompClassifierInputSchema(InputSchema): + """ + Input schema for decomposition classification tool. + + Attributes: + original_topic: The original research question/topic that was decomposed + queries: List of decomposed queries to classify + """ + + original_topic: str = Field( + ..., + description="The original research question or topic that was decomposed", + ) + queries: List[str] = Field(..., description="List of decomposed queries to classify") + + +class DecompClassifierOutputSchema(OutputSchema): + """ + Output schema for decomposition classification tool. + + Attributes: + classified_queries: List of queries with their classifications and reasoning + """ + + classified_queries: List[ClassifiedQuery] = Field( + ..., + description="Queries with their classifications and reasoning", + ) + + +class DecompClassifierConfig(BaseToolConfig): + """ + Configuration for the decomposition classifier tool. + + This config includes LLM settings and the detailed prompt template that + guides the classification process. + """ + + base_url: AnyUrl | None = Field(default=None, description="Base URL for LLM API") + api_key: str | None = Field(default=None, description="API key for LLM") + model_name: str = Field(default="gpt-5-mini", description="LLM model name") + temperature: float = Field(default=0.0, ge=0.0, le=2.0, description="LLM temperature for consistency") + + agent_system_prompt: str = Field( + default=( + "You are an expert at analyzing scientific research queries and classifying them " + "based on their relationship to the original research topic.\n\n" + "You will classify each decomposed query into one of four categories:\n\n" + "1. EXACT - The query is essentially the same as the topic. A domain expert would say " + "'yes, that is exactly what you asked for.' Example: Fire risk → Fire Weather Index\n\n" + "2. CALCULATOR - The query is a mechanistic input or driver that physically affects " + "the topic. Changing this variable would physically change the phenomenon. " + "Example: Fire risk → soil moisture, wind speed\n\n" + "3. PROXY - The query is not the phenomenon itself, but is used as a surrogate because " + "it correlates with the topic or is easier to measure. Example: Phytoplankton biomass → " + "chlorophyll-a concentration\n\n" + "4. TANGENTIAL - The query is only indirectly or weakly related, potentially useful as " + "context but not standard practice as a core input or proxy. Example: Fire risk → " + "regional humidity (if not part of the risk index)\n\n" + "Provide clear, concise reasoning for each classification." + ), + description="System prompt for the internal classification agent", + ) + + classification_prompt_template: str = Field( + default=( + "Original Research Topic: {original_topic}\n\n" + "Decomposed Queries to Classify:\n{queries}\n\n" + "CLASSIFICATION DECISION TREE:\n" + "For each query, follow this decision process:\n\n" + "1. Is the query conceptually the same quantity as the topic?\n" + " → If YES: Classify as EXACT\n\n" + "2. Does the query enter the physical/statistical mechanism of the topic?\n" + " (i.e., would changing this variable physically change the topic?)\n" + " → If YES: Classify as CALCULATOR\n\n" + "3. Is the query used as a surrogate because it tracks the topic?\n" + " (i.e., we use this because we can't easily measure the topic directly)\n" + " → If YES: Classify as PROXY\n\n" + "4. Otherwise: Classify as TANGENTIAL\n\n" + "EXAMPLES:\n" + "- Fire risk → Fire Weather Index: EXACT (direct fire risk metric)\n" + "- Fire risk → soil moisture: CALCULATOR (mechanistic input to fire risk)\n" + "- Fire risk → wind speed: CALCULATOR (physical driver of fire spread)\n" + "- Phytoplankton biomass → chlorophyll-a: PROXY (surrogate for biomass)\n" + "- Ocean health → sea surface temperature: CALCULATOR (mechanistic input)\n" + "- Flood risk → precipitation: CALCULATOR (physical driver)\n" + "- Fire risk → regional humidity: TANGENTIAL (weakly related, not core)\n" + "- Fire risk → ENSO SST: TANGENTIAL (indirect teleconnection)\n\n" + "For EACH query above, classify it relative to the original topic and provide " + "brief reasoning (1-2 sentences) explaining your classification." + ), + description="Template for the classification prompt with decision tree and examples", + ) + + +class DecompClassifierTool(BaseTool[DecompClassifierInputSchema, DecompClassifierOutputSchema]): + """ + Tool for classifying decomposed queries using LLM-based structured output. + + This tool takes an original research topic and a list of decomposed queries, + then classifies each query into one of four categories (EXACT, CALCULATOR, + PROXY, TANGENTIAL) using a single LLM call that evaluates all queries together. + + The tool uses instructor with dynamic Pydantic models to ensure structured, + reliable output from the LLM. + + Key features: + - Single LLM call for all queries (efficient and context-aware) + - Structured output with reasoning for each classification + - Follows established pattern from LLMRerankerTool + - Returns ClassifiedQuery objects with classification and reasoning + + Example: + >>> config = DecompClassifierConfig(model_name="gpt-4o-mini") + >>> classifier = DecompClassifierTool(config=config) + >>> result = await classifier.arun( + ... classifier.input_schema( + ... original_topic="What is fire risk?", + ... queries=["soil moisture data", "Fire Weather Index"], + ... ) + ... ) + >>> for cq in result.classified_queries: + ... print(f"{cq.query} → {cq.classification}: {cq.reasoning}") + """ + + input_schema = DecompClassifierInputSchema + output_schema = DecompClassifierOutputSchema + config_schema = DecompClassifierConfig + + def __init__( + self, + config: DecompClassifierConfig | None = None, + debug: bool = False, + ): + """ + Initialize the decomposition classifier tool. + + Args: + config: Configuration for the classifier (LLM settings, prompts) + debug: Enable debug logging + """ + super().__init__(config=config, debug=debug) + self.config: DecompClassifierConfig = self.config # type hint + + # Create internal agent config + agent_config = BaseAgentConfig( + base_url=self.config.base_url, + api_key=self.config.api_key, + model_name=self.config.model_name, + temperature=self.config.temperature, + system_prompt=self.config.agent_system_prompt, + ) + + # Create dummy input schema for the internal agent + class DummyInput(InputSchema): + """Dummy input schema for classification agent.""" + + pass + + # We'll create the dynamic output model per request since it depends + # on the number of queries. For now, initialize the agent wrapper. + self.agent_config = agent_config + self.DummyInput = DummyInput + + def _create_dynamic_classification_model(self, queries: List[str]) -> type[BaseModel]: + """ + Create a dynamic Pydantic model with one field per query. + + This allows the LLM to see each query as a separate field in the JSON schema, + making it easier for structured output generation. + + Args: + queries: List of query strings to create fields for + + Returns: + Dynamically created Pydantic model class with one ClassifiedQuery field per query + """ + + def sanitize_field_name(name: str) -> str: + """Convert query text to a valid Python identifier.""" + # Take first few words, replace special chars with underscores + sanitized = name[:50].replace(" ", "_").replace("-", "_") + # Remove non-alphanumeric chars except underscore + sanitized = "".join(c if c.isalnum() or c == "_" else "_" for c in sanitized) + # Ensure it starts with a letter + if sanitized and not sanitized[0].isalpha(): + sanitized = "q_" + sanitized + return sanitized or "query" + + # Build fields dict: {field_name: (type, Field(...))} + query_fields = {} + for idx, query in enumerate(queries): + field_name = f"query_{idx}_{sanitize_field_name(query)}" + field_description = ( + f"Classification for query: '{query}'. " + "Select category (EXACT, CALCULATOR, PROXY, TANGENTIAL) and provide brief reasoning." + ) + query_fields[field_name] = ( + ClassifiedQuery, + Field(..., description=field_description), + ) + + # Create the dynamic model + DynamicClassificationModel = create_model( + "AllQueryClassifications", + **query_fields, + ) + + return DynamicClassificationModel + + def _format_prompt(self, original_topic: str, queries: List[str]) -> str: + """ + Format the classification prompt with the original topic and queries. + + Args: + original_topic: The original research question + queries: List of decomposed queries + + Returns: + Formatted prompt string ready for LLM + """ + # Format queries as numbered list + queries_formatted = "\n".join(f"{i+1}. {q}" for i, q in enumerate(queries)) + + # Fill in the template + prompt = self.config.classification_prompt_template.format( + original_topic=original_topic, + queries=queries_formatted, + ) + + return prompt + + async def _classify_all_queries( + self, + original_topic: str, + queries: List[str], + ) -> List[ClassifiedQuery]: + """ + Classify all queries in a single LLM call. + + Args: + original_topic: The original research question + queries: List of decomposed queries to classify + + Returns: + List of ClassifiedQuery objects with classifications and reasoning + """ + # Create dynamic output model for this specific set of queries + DynamicOutputModel = self._create_dynamic_classification_model(queries) + + # Create classification agent with dynamic output schema + class ClassificationAgent(LiteLLMInstructorBaseAgent): + input_schema = self.DummyInput + output_schema = DynamicOutputModel + + classification_agent = ClassificationAgent( + config=self.agent_config, + debug=self.debug, + ) + + # Format the prompt + formatted_prompt = self._format_prompt(original_topic, queries) + + if self.debug: + logger.debug(f"Classification prompt:\n{formatted_prompt}") + + try: + # Call the LLM with structured output + messages = [ + classification_agent._default_system_message(), + { + "role": "user", + "content": formatted_prompt, + }, + ] + + response = await classification_agent.get_response_async(messages=messages) + + # Extract classified queries from response + response_dict = response.model_dump() + + classified_queries = [] + for idx, query in enumerate(queries): + # Find the corresponding field in the response + # The field names follow the pattern query_{idx}_... + matching_key = None + for key in response_dict.keys(): + if key.startswith(f"query_{idx}_"): + matching_key = key + break + + if matching_key and response_dict[matching_key]: + # The value should already be a dict with 'query', 'classification', 'reasoning' + # But since we defined the field type as ClassifiedQuery, we need to reconstruct it + classification_data = response_dict[matching_key] + + classified_query = ClassifiedQuery( + query=query, # Use original query text + classification=DecompositionClassification(classification_data["classification"]), + reasoning=classification_data["reasoning"], + ) + classified_queries.append(classified_query) + + if self.debug: + logger.debug( + f"Classified '{query}' as {classified_query.classification}: " + f"{classified_query.reasoning[:100]}" + ) + else: + logger.warning(f"No classification found for query '{query}', defaulting to TANGENTIAL") + classified_queries.append( + ClassifiedQuery( + query=query, + classification=DecompositionClassification.TANGENTIAL, + reasoning="Classification not returned by LLM", + ) + ) + + return classified_queries + + except Exception as e: + logger.error(f"Error classifying queries: {e}") + # Return tangential for all queries on error + return [ + ClassifiedQuery( + query=query, + classification=DecompositionClassification.TANGENTIAL, + reasoning=f"Error during classification: {str(e)}", + ) + for query in queries + ] + + async def _arun(self, params: DecompClassifierInputSchema) -> DecompClassifierOutputSchema: + """ + Main execution method for the classifier tool. + + Args: + params: Input parameters with original_topic and queries + + Returns: + Output with classified queries including classifications and reasoning + """ + if not params.queries: + return DecompClassifierOutputSchema(classified_queries=[]) + + # Classify all queries in one LLM call + classified_queries = await self._classify_all_queries( + original_topic=params.original_topic, + queries=params.queries, + ) + + return DecompClassifierOutputSchema(classified_queries=classified_queries) + + def __str__(self) -> str: + return f"{self.__class__.__name__} | model={self.config.model_name}" + + def __repr__(self) -> str: + return str(self) + + +# Export public API +__all__ = [ + "DecompClassifierInputSchema", + "DecompClassifierOutputSchema", + "DecompClassifierConfig", + "DecompClassifierTool", +] diff --git a/examples/decomp_classifier_example.py b/examples/decomp_classifier_example.py new file mode 100644 index 00000000..c43d2154 --- /dev/null +++ b/examples/decomp_classifier_example.py @@ -0,0 +1,285 @@ +""" +Example script demonstrating the decomposition classifier for aspect search. + +This example shows how to: +1. Enable query classification in the aspect search agent +2. View classifications for decomposed queries +3. Filter queries by classification (e.g., skip TANGENTIAL queries) +4. Access classification results for analysis +""" + +import asyncio +import os + +from akd.agents.search.aspect_search import AspectSearchAgent, AspectSearchConfig +from akd.structures import DecompositionClassification +from akd.tools.decomp_classifier import DecompClassifierConfig + + +async def example_basic_classification(): + """ + Example 1: Basic classification without filtering. + + This runs the aspect search with classification enabled, allowing you to + see how each decomposed query is categorized, but all queries are still executed. + """ + print("\n" + "=" * 80) + print("EXAMPLE 1: Basic Classification (No Filtering)") + print("=" * 80 + "\n") + + config = AspectSearchConfig( + model_name="gpt-4o", + api_key=os.getenv("OPENAI_API_KEY"), + enable_query_classification=True, + classifier_config=DecompClassifierConfig( + model_name="gpt-5-mini", + temperature=0.0, + ), + max_turns=2, # Limit turns for faster example + num_editors=2, # Fewer editors for faster example + ) + + agent = AspectSearchAgent(config=config, debug=True) + + # Run aspect search on a sample topic + result = await agent.arun(agent.input_schema(topic="What is fire risk in forests?")) + + print("\n" + "-" * 80) + print("CLASSIFICATION RESULTS") + print("-" * 80 + "\n") + + # Display classifications from interviews + for idx, interview in enumerate(result.interview_results, 1): + print(f"Interview {idx}:") + if "messages" in interview: + # Look for messages that contain classified queries + for message in interview["messages"]: + if hasattr(message, "tool_calls") and message.tool_calls: + # This is where queries were generated + print(f" Editor: {interview.get('editor', {}).get('name', 'Unknown')}") + + # Check if classifications are available in the interview state + # Note: Classifications may be stored differently depending on implementation + print() + + print(f"Total search results: {len(result.search_results)}") + print(f"Total references: {len(result.references)}") + + +async def example_filtered_classification(): + """ + Example 2: Classification with filtering to skip TANGENTIAL queries. + + This configuration will classify queries and only execute those that are + EXACT, CALCULATOR, or PROXY, skipping any TANGENTIAL queries that are + only weakly related to the topic. + """ + print("\n" + "=" * 80) + print("EXAMPLE 2: Classification with Filtering (Skip TANGENTIAL)") + print("=" * 80 + "\n") + + config = AspectSearchConfig( + model_name="gpt-4o", + api_key=os.getenv("OPENAI_API_KEY"), + enable_query_classification=True, + classifier_config=DecompClassifierConfig( + model_name="gpt-5-mini", + temperature=0.0, + ), + # Only execute queries that are directly relevant + filter_classifications=[ + DecompositionClassification.EXACT, + DecompositionClassification.CALCULATOR, + DecompositionClassification.PROXY, + # TANGENTIAL queries will be skipped + ], + max_turns=2, + num_editors=2, + ) + + agent = AspectSearchAgent(config=config, debug=True) + + # Run aspect search + result = await agent.arun(agent.input_schema(topic="Ocean temperature trends and climate change")) + + print("\n" + "-" * 80) + print("FILTERING RESULTS") + print("-" * 80 + "\n") + + print("Configuration filters out TANGENTIAL queries.") + print("Only EXACT, CALCULATOR, and PROXY queries are executed.") + print(f"\nTotal search results: {len(result.search_results)}") + print(f"Total references: {len(result.references)}") + + +async def example_standalone_classifier(): + """ + Example 3: Using the classifier tool standalone. + + This shows how to use the DecompClassifierTool directly without the + aspect search agent, which can be useful for testing or analysis. + """ + print("\n" + "=" * 80) + print("EXAMPLE 3: Standalone Classifier Tool") + print("=" * 80 + "\n") + + from akd.tools.decomp_classifier import DecompClassifierTool + + # Create classifier + config = DecompClassifierConfig( + model_name="gpt-5-mini", + temperature=0.0, + ) + classifier = DecompClassifierTool(config=config, debug=True) + + # Test queries + original_topic = "What is fire risk in California forests?" + test_queries = [ + "California wildfire risk index", + "soil moisture content in forests", + "wind speed and direction patterns", + "chlorophyll content as indicator of forest health", + "historical rainfall patterns in California", + "general climate change overview", + ] + + print(f"Original Topic: {original_topic}\n") + print("Decomposed Queries:") + for i, q in enumerate(test_queries, 1): + print(f" {i}. {q}") + print() + + # Classify + result = await classifier.arun( + classifier.input_schema(original_topic=original_topic, queries=test_queries) + ) + + print("\n" + "-" * 80) + print("CLASSIFICATIONS") + print("-" * 80 + "\n") + + # Display results + for cq in result.classified_queries: + print(f"Query: {cq.query}") + print(f"Classification: {cq.classification.value.upper()}") + print(f"Reasoning: {cq.reasoning}") + print() + + # Summary by category + print("-" * 80) + print("SUMMARY BY CATEGORY") + print("-" * 80 + "\n") + + from collections import Counter + + category_counts = Counter(cq.classification for cq in result.classified_queries) + + for category, count in category_counts.items(): + print(f"{category.value.upper()}: {count} queries") + + +async def example_domain_specific(): + """ + Example 4: Domain-specific classification for Earth science research. + + This demonstrates how the classifier handles domain-specific queries + related to Earth observation data and CMR (Common Metadata Repository). + """ + print("\n" + "=" * 80) + print("EXAMPLE 4: Domain-Specific Classification (Earth Science)") + print("=" * 80 + "\n") + + from akd.tools.decomp_classifier import DecompClassifierTool + + config = DecompClassifierConfig( + model_name="gpt-5-mini", + temperature=0.0, + ) + classifier = DecompClassifierTool(config=config) + + # Earth science topic + original_topic = "What is the impact of soil moisture on flood risk?" + earth_science_queries = [ + "SMAP soil moisture L3 product", + "soil moisture anomaly calculation", + "precipitation data from GPM", + "topography and slope from DEM", + "NDVI as proxy for vegetation water stress", + "historical flood events database", + "general hydrology textbook information", + ] + + print(f"Original Topic: {original_topic}\n") + print("Earth Science Queries:") + for i, q in enumerate(earth_science_queries, 1): + print(f" {i}. {q}") + print() + + result = await classifier.arun( + classifier.input_schema(original_topic=original_topic, queries=earth_science_queries) + ) + + print("\n" + "-" * 80) + print("EARTH SCIENCE CLASSIFICATIONS") + print("-" * 80 + "\n") + + # Organize by category + by_category = { + DecompositionClassification.EXACT: [], + DecompositionClassification.CALCULATOR: [], + DecompositionClassification.PROXY: [], + DecompositionClassification.TANGENTIAL: [], + } + + for cq in result.classified_queries: + by_category[cq.classification].append(cq) + + for category, queries in by_category.items(): + print(f"\n{category.value.upper()} ({len(queries)} queries):") + for cq in queries: + print(f" • {cq.query}") + print(f" Reasoning: {cq.reasoning}") + + +async def main(): + """Run all examples.""" + print("=" * 80) + print("DECOMPOSITION CLASSIFICATION EXAMPLES") + print("=" * 80) + + # Check for API key + if not os.getenv("OPENAI_API_KEY"): + print("\nERROR: OPENAI_API_KEY environment variable not set.") + print("Please set it before running this example.") + return + + # Run examples + try: + # Example 3: Standalone classifier (fastest, no search) + await example_standalone_classifier() + + # Example 4: Domain-specific (fast, no search) + await example_domain_specific() + + # Example 1: Basic classification (slower, includes search) + # await example_basic_classification() + + # Example 2: Filtered classification (slower, includes search) + # await example_filtered_classification() + + print("\n" + "=" * 80) + print("EXAMPLES COMPLETED") + print("=" * 80) + print("\nNote: Examples 1 and 2 are commented out by default as they") + print("perform full aspect search which takes longer. Uncomment them") + print("in main() to run the full examples.") + + except Exception as e: + print(f"\nError running examples: {e}") + import traceback + + traceback.print_exc() + + +if __name__ == "__main__": + asyncio.run(main())