From 25c94e51d8ed490fecc4f04b17ece64f2f8ee689 Mon Sep 17 00:00:00 2001
From: mariankrotil <mariankrotil34@gmail.com>
Date: Tue, 6 Jan 2026 14:28:41 +0100
Subject: [PATCH 01/29] AI-2161 feat: add complex component config search tool

---
 TOOLS.md                              | 132 +++++++++
 src/keboola_mcp_server/server.py      |   2 +
 src/keboola_mcp_server/tools/usage.py | 395 ++++++++++++++++++++++++++
 tests/test_server.py                  |   1 +
 4 files changed, 530 insertions(+)
diff --git a/TOOLS.md b/TOOLS.md
index f8171479..7c025630 100644
--- a/TOOLS.md
+++ b/TOOLS.md
@@ -40,6 +40,7 @@ and the configuration ID.
 - [get_data_apps](#get_data_apps): Lists summaries of data apps in the project given the limit and offset or gets details of a data apps by
 providing their configuration IDs.
 - [modify_data_app](#modify_data_app): Creates or updates a Streamlit data app.
+- [search_keboola_objects](#search_keboola_objects): Deep search across Keboola objects including their full JSON configuration data.
 
 ### Project Tools
 - [get_project_info](#get_project_info): Retrieves structured information about the current project,
@@ -1721,6 +1722,137 @@ updating, set `authentication_type` to `default` to keep the existing authentica
 }
 ```
 
+---
+<a name="search_keboola_objects"></a>
+## search_keboola_objects
+**Annotations**: `read-only`
+
+**Tags**: `usage`
+
+**Description**:
+
+Deep search across Keboola objects including their full JSON configuration data.
+
+WHAT IT SEARCHES:
+- Buckets/Tables: name, description, metadata, column names, column descriptions, and entire API payload
+- Components/Flows/Data Apps/Transformations: name, description, and entire configuration JSON in raw format:
+  * All configuration parameters and nested settings
+  * Storage mappings (input/output tables)
+  * Credentials and connection details
+  * SQL queries and code blocks
+  * Any other data stored in the configuration
+
+WHEN TO USE:
+- Find configurations by specific parameter values (e.g., API endpoints, database hosts)
+- Search deep in nested JSON structures (e.g., table mappings, processors)
+- Locate objects containing specific SQL code or queries
+- Find configurations with particular credentials or connection strings
+- Use advanced pattern matching with wildcards or regex
+
+PATTERN MATCHING:
+- literal (default): Exact text matching - patterns=["salesforce.com"]
+- wildcard: Glob patterns with * - patterns=["sales*"] matches "sales", "salesforce", "sales_data"
+- regex: Regular expressions - patterns=["flow-[0-9]+"] matches "flow-1", "flow-123"
+- Multiple patterns use OR logic (matches ANY pattern)
+
+USAGE EXAMPLES:
+
+1. Find extractors connecting to a specific database:
+   patterns=["prod-db-server.company.com"], search_types=["component"]
+
+2. Find transformations using a specific input table:
+   patterns=["in.c-main.customers"], search_types=["transformation"]
+
+3. Find all objects with "test" or "staging" in their configuration:
+   patterns=["test", "staging"], mode="literal"
+
+4. Find flows starting with "daily-" prefix:
+   patterns=["daily-*"], mode="wildcard", search_types=["flow"]
+
+5. Find components with API version v2 or v3:
+   patterns=["api/v[23]"], mode="regex", search_types=["component"]
+
+6. Find data apps using specific Python packages:
+   patterns=["pandas", "streamlit"], search_types=["data-app"]
+
+7. Search for exact table IDs (avoid partial matches):
+   patterns=["in.c-bucket.table"], whole_word=True
+
+8. Find configs with nested JSON structure (key-value in parameters):
+   patterns=[""parameters":\s*\{.*api\.paychex.*\}"], mode="regex"
+
+9. Find configs with specific authentication type:
+   patterns=[""authentication":\s*\{.*"type":\s*"oauth20""], mode="regex"
+
+10. Find configs with incremental loading enabled:
+    patterns=[""incremental":\s*true"], mode="regex"
+
+11. Find storage mappings referencing specific tables:
+    patterns=[""source":\s*"in\.*\.customers""], mode="regex"
+
+TIPS:
+- Use whole_word=True when searching for IDs to avoid partial matches
+- Start with literal mode for speed, use wildcard/regex for flexibility
+- Narrow results with search_types when you know the object type
+- Results include direct links to objects in Keboola UI
+
+
+**Input JSON Schema**:
+```json
+{
+  "properties": {
+    "patterns": {
+      "description": "Search patterns to match. Multiple patterns use OR logic (matches ANY pattern). Examples: [\"customer\"], [\"sales*\", \"revenue*\"] for wildcards, [\"flow-.*\"] for regex. Do not pass empty strings.",
+      "items": {
+        "type": "string"
+      },
+      "type": "array"
+    },
+    "mode": {
+      "default": "literal",
+      "description": "Pattern matching mode: \"literal\" - exact text match (default, fastest), \"wildcard\" - use * for glob patterns (e.g., \"sales*\"), \"regex\" - full regular expressions (most powerful).",
+      "enum": [
+        "literal",
+        "wildcard",
+        "regex"
+      ],
+      "type": "string"
+    },
+    "whole_word": {
+      "default": false,
+      "description": "When true, only matches complete words. Prevents partial matches like finding \"test\" in \"latest\". Useful for searching IDs or specific terms.",
+      "type": "boolean"
+    },
+    "ignore_case": {
+      "default": true,
+      "description": "When true, search ignores letter casing (e.g., \"Sales\" matches \"sales\"). Default: true.",
+      "type": "boolean"
+    },
+    "search_types": {
+      "default": [],
+      "description": "Filter by object types: \"bucket\", \"table\", \"component\", \"transformation\", \"flow\", \"data-app\". Empty list or [\"any\"] searches all types. Use to narrow results when you know what you need.",
+      "items": {
+        "enum": [
+          "bucket",
+          "table",
+          "component",
+          "flow",
+          "data-app",
+          "transformation",
+          "any"
+        ],
+        "type": "string"
+      },
+      "type": "array"
+    }
+  },
+  "required": [
+    "patterns"
+  ],
+  "type": "object"
+}
+```
+
 ---
 
 # Documentation Tools
diff --git a/src/keboola_mcp_server/server.py b/src/keboola_mcp_server/server.py
index fff236a2..dc3fb34c 100644
--- a/src/keboola_mcp_server/server.py
+++ b/src/keboola_mcp_server/server.py
@@ -34,6 +34,7 @@
 from keboola_mcp_server.tools.search import add_search_tools
 from keboola_mcp_server.tools.sql import add_sql_tools
 from keboola_mcp_server.tools.storage import add_storage_tools
+from keboola_mcp_server.tools.usage import add_usage_tools
 
 LOG = logging.getLogger(__name__)
 
@@ -238,6 +239,7 @@ def create_server(
     add_sql_tools(mcp)
     add_storage_tools(mcp)
     add_keboola_prompts(mcp)
+    add_usage_tools(mcp)
 
     if custom_routes_handling != 'return':
         return mcp
diff --git a/src/keboola_mcp_server/tools/usage.py b/src/keboola_mcp_server/tools/usage.py
index 5f3fe173..8e2fc6d3 100644
--- a/src/keboola_mcp_server/tools/usage.py
+++ b/src/keboola_mcp_server/tools/usage.py
@@ -1,4 +1,5 @@
 import json
+import re
 from typing import Annotated, Literal, Mapping, Optional, Sequence
 
 from fastmcp import Context
@@ -356,3 +357,397 @@ def get_last_updated_by(
         configuration_id=str(configuration_id),
         timestamp=timestamp,
     )
+
+
+def search_json_string(
+    json_string: str,
+    pattern: str,
+    *,
+    mode: Literal['literal', 'wildcard', 'regex'] = 'literal',
+    whole_word: bool = False,
+    ignore_case: bool = True,
+) -> bool:
+    """
+    Search inside a JSON string using different pattern modes.
+
+    Args:
+        json_string: Stringified JSON (e.g. json.dumps(obj))
+        pattern: Search pattern as string
+        mode:
+            - "literal": exact text match
+            - "wildcard": supports '*' like shell glob
+            - "regex": full regular expression
+        whole_word: Match full words only
+        ignore_case: Case-insensitive search
+
+    Returns:
+        True if pattern is found, False otherwise
+    """
+
+    flags = re.IGNORECASE if ignore_case else 0
+
+    # Escape literal pattern
+    if mode == 'literal':
+        regex = re.escape(pattern)
+
+    # Convert wildcard -> regex
+    elif mode == 'wildcard':
+        # Escape everything except '*'
+        regex = re.escape(pattern).replace(r'\*', '.*')
+
+    # Regex mode
+    elif mode == 'regex':
+        regex = pattern
+
+    else:
+        raise ValueError(f'Unsupported mode: {mode}')
+
+    # Whole word match
+    if whole_word:
+        regex = rf'\b{regex}\b'
+
+    return re.search(regex, json_string, flags) is not None
+
+
+def _matches_patterns(
+    value: JsonStruct,
+    patterns: Sequence[str],
+    *,
+    mode: Literal['literal', 'wildcard', 'regex'],
+    whole_word: bool,
+    ignore_case: bool,
+) -> bool:
+    haystack = _stringify_for_search(value)
+    return any(
+        search_json_string(
+            haystack,
+            pattern,
+            mode=mode,
+            whole_word=whole_word,
+            ignore_case=ignore_case,
+        )
+        for pattern in patterns
+    )
+
+
+class DataMatch(BaseModel):
+    item_type: Literal['bucket', 'table', 'component', 'flow', 'data-app', 'transformation']
+    bucket_id: str | None = None
+    table_id: str | None = None
+    component_id: str | None = None
+    configuration_id: str | None = None
+    configuration_row_id: str | None = None
+    name: str | None = None
+    description: str | None = None
+
+
+def add_usage_tools(mcp: KeboolaMcpServer) -> None:
+    """Add usage/search tools to the MCP server."""
+    mcp.add_tool(
+        FunctionTool.from_function(
+            search_keboola_objects,
+            annotations=ToolAnnotations(readOnlyHint=True),
+            serializer=toon_serializer,
+            tags={USAGE_TOOLS_TAG},
+        )
+    )
+
+
+async def search_data_matches(
+    client: KeboolaClient,
+    patterns: Sequence[str],
+    *,
+    mode: Literal['literal', 'wildcard', 'regex'] = 'literal',
+    whole_word: bool = False,
+    ignore_case: bool = True,
+    search_types: Optional[Sequence[SearchDataType]] = None,
+) -> list[DataMatch]:
+    """
+    Searches through configurations (components, flows, data apps) and optionally buckets/tables.
+
+    :param client: The Keboola client to use.
+    :param patterns: Patterns to search for.
+    :param mode: Search mode (literal, wildcard, regex).
+    :param whole_word: Match whole words only.
+    :param ignore_case: Case-insensitive search.
+    :param search_types: Types to search in (bucket, table, component, flow, data-app).
+    :return: A list of data matches.
+    """
+    normalized_patterns = _normalize_ids(patterns)
+    if not normalized_patterns:
+        return []
+
+    normalized_types = _normalize_data_search_types(search_types or [])
+    include_components = 'component' in normalized_types
+    include_transformations = 'transformation' in normalized_types
+    include_flow = 'flow' in normalized_types
+    include_data_apps = 'data-app' in normalized_types
+
+    matches: list[DataMatch] = []
+
+    if 'bucket' in normalized_types:
+        for bucket in await client.storage_client.bucket_list():
+            if _matches_patterns(
+                bucket, normalized_patterns, mode=mode, whole_word=whole_word, ignore_case=ignore_case
+            ):
+                matches.append(
+                    DataMatch(
+                        item_type='bucket',
+                        bucket_id=bucket.get('id'),
+                        name=bucket.get('displayName') or bucket.get('name'),
+                        description=bucket.get('description'),
+                    )
+                )
+
+    if 'table' in normalized_types:
+        for bucket in await client.storage_client.bucket_list():
+            bucket_id = bucket.get('id')
+            if not bucket_id:
+                continue
+            tables = await client.storage_client.bucket_table_list(bucket_id, include=['columns', 'columnMetadata'])
+            for table in tables:
+                if _matches_patterns(
+                    table, normalized_patterns, mode=mode, whole_word=whole_word, ignore_case=ignore_case
+                ):
+                    matches.append(
+                        DataMatch(
+                            item_type='table',
+                            bucket_id=bucket_id,
+                            table_id=table.get('id'),
+                            name=table.get('displayName') or table.get('name'),
+                            description=table.get('description'),
+                        )
+                    )
+
+    if include_components or include_flow or include_data_apps:
+        components = await client.storage_client.component_list(include=['configuration', 'rows'])
+        for component in components:
+            component_id = component.get('id')
+            if not component_id:
+                continue
+            component_type = component.get('type')
+
+            if component_id == DATA_APP_COMPONENT_ID and not include_data_apps:
+                continue
+            if component_id in {CONDITIONAL_FLOW_COMPONENT_ID, ORCHESTRATOR_COMPONENT_ID} and not include_flow:
+                continue
+            if component_id not in {DATA_APP_COMPONENT_ID, CONDITIONAL_FLOW_COMPONENT_ID, ORCHESTRATOR_COMPONENT_ID}:
+                if component_type == 'transformation' and not include_transformations:
+                    continue
+                if component_type != 'transformation' and not include_components:
+                    continue
+
+            configurations = component.get('configurations', []) or []
+            for configuration in configurations:
+                configuration_id = _get_configuration_id(configuration)
+                if not configuration_id:
+                    continue
+
+                config_name = configuration.get('name')
+                config_description = configuration.get('description')
+                config_definition = configuration.get('configuration') or {}
+
+                config_match = _matches_patterns(
+                    {
+                        'component_id': component_id,
+                        'component_type': component_type,
+                        'configuration_id': configuration_id,
+                        'name': config_name,
+                        'description': config_description,
+                        'configuration': config_definition,
+                    },
+                    normalized_patterns,
+                    mode=mode,
+                    whole_word=whole_word,
+                    ignore_case=ignore_case,
+                )
+
+                if config_match:
+                    matches.append(
+                        DataMatch(
+                            item_type=(
+                                'data-app'
+                                if component_id == DATA_APP_COMPONENT_ID
+                                else (
+                                    'flow'
+                                    if component_id in {CONDITIONAL_FLOW_COMPONENT_ID, ORCHESTRATOR_COMPONENT_ID}
+                                    else 'transformation' if component_type == 'transformation' else 'component'
+                                )
+                            ),
+                            component_id=component_id,
+                            configuration_id=configuration_id,
+                            name=config_name,
+                            description=config_description,
+                        )
+                    )
+
+                rows = configuration.get('rows', []) or []
+                for row in rows:
+                    row_id = row.get('id')
+                    row_name = row.get('name')
+                    row_description = row.get('description')
+                    row_config = row.get('configuration') or {}
+                    if not row_id:
+                        continue
+
+                    row_match = _matches_patterns(
+                        {
+                            'component_id': component_id,
+                            'component_type': component_type,
+                            'configuration_id': configuration_id,
+                            'row_id': row_id,
+                            'name': row_name,
+                            'description': row_description,
+                            'configuration': row_config,
+                        },
+                        normalized_patterns,
+                        mode=mode,
+                        whole_word=whole_word,
+                        ignore_case=ignore_case,
+                    )
+
+                    if row_match:
+                        matches.append(
+                            DataMatch(
+                                item_type=(
+                                    'data-app'
+                                    if component_id == DATA_APP_COMPONENT_ID
+                                    else (
+                                        'flow'
+                                        if component_id in {CONDITIONAL_FLOW_COMPONENT_ID, ORCHESTRATOR_COMPONENT_ID}
+                                        else 'transformation' if component_type == 'transformation' else 'component'
+                                    )
+                                ),
+                                component_id=component_id,
+                                configuration_id=configuration_id,
+                                configuration_row_id=row_id,
+                                name=row_name or config_name,
+                                description=row_description or config_description,
+                            )
+                        )
+
+    return matches
+
+
+@tool_errors()
+async def search_keboola_objects(
+    ctx: Context,
+    patterns: Annotated[
+        Sequence[str],
+        Field(
+            description=(
+                'Search patterns to match. Multiple patterns use OR logic (matches ANY pattern). '
+                'Examples: ["customer"], ["sales*", "revenue*"] for wildcards, ["flow-.*"] for regex. '
+                'Do not pass empty strings.'
+            )
+        ),
+    ],
+    mode: Annotated[
+        Literal['literal', 'wildcard', 'regex'],
+        Field(
+            description=(
+                'Pattern matching mode: '
+                '"literal" - exact text match (default, fastest), '
+                '"wildcard" - use * for glob patterns (e.g., "sales*"), '
+                '"regex" - full regular expressions (most powerful).'
+            )
+        ),
+    ] = 'literal',
+    whole_word: Annotated[
+        bool,
+        Field(
+            description=(
+                'When true, only matches complete words. Prevents partial matches like finding "test" in "latest". '
+                'Useful for searching IDs or specific terms.'
+            )
+        ),
+    ] = False,
+    ignore_case: Annotated[
+        bool,
+        Field(description='When true, search ignores letter casing (e.g., "Sales" matches "sales"). Default: true.'),
+    ] = True,
+    search_types: Annotated[
+        Sequence[SearchDataType],
+        Field(
+            description=(
+                'Filter by object types: "bucket", "table", "component", "transformation", "flow", "data-app". '
+                'Empty list or ["any"] searches all types. Use to narrow results when you know what you need.'
+            )
+        ),
+    ] = tuple(),
+) -> list[DataMatch]:
+    """
+    Deep search across Keboola objects including their full JSON configuration data.
+
+    WHAT IT SEARCHES:
+    - Buckets/Tables: name, description, metadata, column names, column descriptions, and entire API payload
+    - Components/Flows/Data Apps/Transformations: name, description, and entire configuration JSON in raw format:
+      * All configuration parameters and nested settings
+      * Storage mappings (input/output tables)
+      * Credentials and connection details
+      * SQL queries and code blocks
+      * Any other data stored in the configuration
+
+    WHEN TO USE:
+    - Find configurations by specific parameter values (e.g., API endpoints, database hosts)
+    - Search deep in nested JSON structures (e.g., table mappings, processors)
+    - Locate objects containing specific SQL code or queries
+    - Find configurations with particular credentials or connection strings
+    - Use advanced pattern matching with wildcards or regex
+
+    PATTERN MATCHING:
+    - literal (default): Exact text matching - patterns=["salesforce.com"]
+    - wildcard: Glob patterns with * - patterns=["sales*"] matches "sales", "salesforce", "sales_data"
+    - regex: Regular expressions - patterns=["flow-[0-9]+"] matches "flow-1", "flow-123"
+    - Multiple patterns use OR logic (matches ANY pattern)
+
+    USAGE EXAMPLES:
+
+    1. Find extractors connecting to a specific database:
+       patterns=["prod-db-server.company.com"], search_types=["component"]
+
+    2. Find transformations using a specific input table:
+       patterns=["in.c-main.customers"], search_types=["transformation"]
+
+    3. Find all objects with "test" or "staging" in their configuration:
+       patterns=["test", "staging"], mode="literal"
+
+    4. Find flows starting with "daily-" prefix:
+       patterns=["daily-*"], mode="wildcard", search_types=["flow"]
+
+    5. Find components with API version v2 or v3:
+       patterns=["api/v[23]"], mode="regex", search_types=["component"]
+
+    6. Find data apps using specific Python packages:
+       patterns=["pandas", "streamlit"], search_types=["data-app"]
+
+    7. Search for exact table IDs (avoid partial matches):
+       patterns=["in.c-bucket.table"], whole_word=True
+
+    8. Find configs with nested JSON structure (key-value in parameters):
+       patterns=["\"parameters\":\\s*\\{.*api\\.paychex.*\\}"], mode="regex"
+
+    9. Find configs with specific authentication type:
+       patterns=["\"authentication\":\\s*\\{.*\"type\":\\s*\"oauth20\""], mode="regex"
+
+    10. Find configs with incremental loading enabled:
+        patterns=["\"incremental\":\\s*true"], mode="regex"
+
+    11. Find storage mappings referencing specific tables:
+        patterns=["\"source\":\\s*\"in\\.*\\.customers\""], mode="regex"
+
+    TIPS:
+    - Use whole_word=True when searching for IDs to avoid partial matches
+    - Start with literal mode for speed, use wildcard/regex for flexibility
+    - Narrow results with search_types when you know the object type
+    - Results include direct links to objects in Keboola UI
+    """
+    client = KeboolaClient.from_state(ctx.session.state)
+    return await search_data_matches(
+        client=client,
+        patterns=patterns,
+        mode=mode,
+        whole_word=whole_word,
+        ignore_case=ignore_case,
+        search_types=search_types,
+    )
diff --git a/tests/test_server.py b/tests/test_server.py
index cff89f87..890958e8 100644
--- a/tests/test_server.py
+++ b/tests/test_server.py
@@ -60,6 +60,7 @@ async def test_list_tools(self):
             'query_data',
             'run_job',
             'search',
+            'search_keboola_objects',
             'update_config',
             'update_config_row',
             'update_descriptions',

From 7478604833806d9241c057958542716f073cddd2 Mon Sep 17 00:00:00 2001
From: mariankrotil <mariankrotil34@gmail.com>
Date: Thu, 8 Jan 2026 15:34:40 +0100
Subject: [PATCH 02/29] AI-2161 feat: add instruction examples

---
 src/keboola_mcp_server/tools/usage.py | 28 +++++++++++++++++++++------
 1 file changed, 22 insertions(+), 6 deletions(-)

diff --git a/src/keboola_mcp_server/tools/usage.py b/src/keboola_mcp_server/tools/usage.py
index 8e2fc6d3..dfbe5701 100644
--- a/src/keboola_mcp_server/tools/usage.py
+++ b/src/keboola_mcp_server/tools/usage.py
@@ -431,7 +431,7 @@ def _matches_patterns(
 
 
 class DataMatch(BaseModel):
-    item_type: Literal['bucket', 'table', 'component', 'flow', 'data-app', 'transformation']
+    item_type: Literal[ 'component', 'flow', 'data-app', 'transformation']
     bucket_id: str | None = None
     table_id: str | None = None
     component_id: str | None = None
@@ -710,10 +710,10 @@ async def search_keboola_objects(
        patterns=["in.c-main.customers"], search_types=["transformation"]
 
     3. Find all objects with "test" or "staging" in their configuration:
-       patterns=["test", "staging"], mode="literal"
+       patterns=["test", "staging"], mode="literal", search_types=["component", "transformation", "flow", "data-app"]
 
-    4. Find flows starting with "daily-" prefix:
-       patterns=["daily-*"], mode="wildcard", search_types=["flow"]
+    4. Find in which flows is this component used? kds-team.ex-shopify 01k9cz233cvd1rga3zzx40g8qj
+       patterns=["01k9cz233cvd1rga3zzx40g8qj"], search_types=["flows"]
 
     5. Find components with API version v2 or v3:
        patterns=["api/v[23]"], mode="regex", search_types=["component"]
@@ -731,10 +731,26 @@ async def search_keboola_objects(
        patterns=["\"authentication\":\\s*\\{.*\"type\":\\s*\"oauth20\""], mode="regex"
 
     10. Find configs with incremental loading enabled:
-        patterns=["\"incremental\":\\s*true"], mode="regex"
+        patterns=["\"incremental\":\\s*true"], mode="regex", search_types=["component", "transformation"]
 
     11. Find storage mappings referencing specific tables:
-        patterns=["\"source\":\\s*\"in\\.*\\.customers\""], mode="regex"
+        patterns=["\"source\":\\s*\"in\\.*\\.customers\""], mode="regex", search_types=["transformation", "component"]
+    
+    12. Find SQL transformations that calculate avg_monetary_value or create rfm_segment_summary:
+        patterns=["avg_monetary_value", "rfm_segment_summary"], mode="literal", search_types=["transformation"]
+    
+    13. Find which components use a specific table in input/output mappings (both directions):
+        patterns=["out\\.c-RFM-Segment-Summary-for-App\\.rfm_segment_summary"], 
+        mode="regex", 
+        search_types=["component", "transformation"]
+        
+        # Or more specific - find only input mappings:
+        patterns=["\"source\":\\s*\"out\\.c-RFM-Segment-Summary-for-App\\.rfm_segment_summary\""], 
+        mode="regex"
+        
+        # Or find only output mappings:
+        patterns=["\"destination\":\\s*\"out\\.c-RFM-Segment-Summary-for-App\\.rfm_segment_summary\""], 
+        mode="regex"
 
     TIPS:
     - Use whole_word=True when searching for IDs to avoid partial matches

From c8a43cba8d21769e6244be4d2f7db009832231df Mon Sep 17 00:00:00 2001
From: mariankrotil <mariankrotil34@gmail.com>
Date: Mon, 12 Jan 2026 18:17:13 +0100
Subject: [PATCH 03/29] AI-2161 fix: ignore private matches in SearchHit
 equality

---
 src/keboola_mcp_server/tools/search/tools.py | 237 ++++++++++++-------
 src/keboola_mcp_server/tools/search/usage.py |   2 +-
 2 files changed, 147 insertions(+), 92 deletions(-)

diff --git a/src/keboola_mcp_server/tools/search/tools.py b/src/keboola_mcp_server/tools/search/tools.py
index bdda9062..638d6d05 100644
--- a/src/keboola_mcp_server/tools/search/tools.py
+++ b/src/keboola_mcp_server/tools/search/tools.py
@@ -12,11 +12,11 @@
 from keboola_mcp_server.clients.base import JsonDict
 from keboola_mcp_server.clients.client import (
     CONDITIONAL_FLOW_COMPONENT_ID,
+    DATA_APP_COMPONENT_ID,
     ORCHESTRATOR_COMPONENT_ID,
     KeboolaClient,
     get_metadata_property,
 )
-from keboola_mcp_server.clients.storage import ItemType
 from keboola_mcp_server.config import MetadataField
 from keboola_mcp_server.errors import tool_errors
 from keboola_mcp_server.links import Link, ProjectLinksManager
@@ -45,29 +45,17 @@
     'state',
 ]
 
-ITEM_TYPE_TO_COMPONENT_TYPES: Mapping[ItemType, Sequence[str]] = {
-    'flow': ['other'],
-    'transformation': ['transformation'],
-    'configuration': ['extractor', 'writer'],
-    'configuration-row': ['extractor', 'writer'],
-    'workspace': ['other'],
-}
-
 SEARCH_ITEM_TYPE_TO_COMPONENT_TYPES: Mapping[SearchItemType, Sequence[str]] = {
     'data-app': ['other'],
     'flow': ['other'],
     'transformation': ['transformation'],
     'component': ['extractor', 'writer', 'application'],
-    'configuration': ['extractor', 'writer'],
-    'configuration-row': ['extractor', 'writer'],
+    'configuration': ['extractor', 'writer', 'application'],
+    'configuration-row': ['extractor', 'writer', 'application'],
     'workspace': ['other'],
 }
 
 SearchType = Literal['textual', 'config-based']
-SearchConfigurationScope = Literal['any', 'parameters', 'storage', 'processors', 'authorization', 'tasks', 'phases']
-SearchConfigurationScopeResp = Literal[
-    'any', 'parameters', 'storage', 'storage.input', 'storage.output', 'processors', 'authorization', 'tasks', 'phases'
-]
 SearchPatternMode = Literal['regex', 'literal']
 
 
@@ -109,7 +97,7 @@ class SearchHit(BaseModel):
     configuration_id: str | None = Field(default=None, description='The ID of the configuration.')
     configuration_row_id: str | None = Field(default=None, description='The ID of the configuration row.')
 
-    item_type: ItemType = Field(description='The type of the item (e.g. table, bucket, configuration, etc.).')
+    item_type: SearchItemType = Field(description='The type of the item (e.g. table, bucket, configuration, etc.).')
     updated: str = Field(description='The date and time the item was created in ISO 8601 format.')
 
     name: str | None = Field(default=None, description='Name of the item.')
@@ -118,6 +106,11 @@ class SearchHit(BaseModel):
     links: list[Link] = Field(default_factory=list, description='Links to the item.')
     _matches: list[PatternMatch] = PrivateAttr(default_factory=list)
 
+    def __eq__(self, other: object) -> bool:
+        if isinstance(other, SearchHit):
+            return self.model_dump() == other.model_dump()
+        return False
+
     @model_validator(mode='after')
     def check_id_fields(self) -> 'SearchHit':
         id_fields = [
@@ -154,7 +147,8 @@ class SearchSpec(BaseModel):
     case_sensitive: bool = False
     search_scopes: Sequence[str] = tuple()
     search_type: SearchType = 'textual'
-    return_matched_patterns: bool = False
+    # If True, returns all matched patterns instead of only the first one.
+    return_all_matched_patterns: bool = False
     # If True, stops searching scopes after the first match is found.
     stop_searching_after_first_value_match: bool = True
 
@@ -218,7 +212,7 @@ def match_patterns(self, value: str | JsonDict | None) -> list[str]:
             if compiled.search(haystack)
         )
 
-        if self.return_matched_patterns:
+        if self.return_all_matched_patterns:
             return list(matches)
 
         return [m] if (m := next(matches, None)) else []
@@ -250,6 +244,12 @@ def match_configuration_scopes(self, configuration: JsonDict | None) -> list[Pat
             )
 
     def match_texts(self, texts: Sequence[str]) -> list[PatternMatch]:
+        """
+        Matches a sequence of strings against the patterns.
+
+        :param texts: The sequence of strings to match against the patterns.
+        :return: A list of PatternMatch objects.
+        """
         matches: list[PatternMatch] = []
 
         matches = (
@@ -270,21 +270,17 @@ def _get_field_value(item: JsonDict, fields: Sequence[str]) -> Any | None:
     return None
 
 
-def _check_column_match(table: JsonDict, cfg: SearchSpec) -> bool:
+def _check_column_match(table: JsonDict, cfg: SearchSpec) -> list[PatternMatch]:
     """Check if any column name or description matches the patterns."""
     # Check column names (list of strings)
-    for col_name in table.get('columns', []):
-        if cfg.match_patterns(col_name):
-            return True
-
-    # Check column descriptions (from columnMetadata)
-    column_metadata = table.get('columnMetadata', {})
-    for col_meta in column_metadata.values():
-        col_description = get_metadata_property(col_meta, MetadataField.DESCRIPTION)
-        if cfg.match_patterns(col_description):
-            return True
+    if col_names := table.get('columns', []):
+        if matched := cfg.match_texts(col_names):
+            return matched
 
-    return False
+    if col_metadata := table.get('columnMetadata', {}):
+        col_descs = (get_metadata_property(col_meta, MetadataField.DESCRIPTION) for col_meta in col_metadata.values())
+        if matched := cfg.match_texts(col_descs):
+            return matched
 
 
 async def _fetch_buckets(client: KeboolaClient, cfg: SearchSpec) -> list[SearchHit]:
@@ -298,12 +294,7 @@ async def _fetch_buckets(client: KeboolaClient, cfg: SearchSpec) -> list[SearchH
         bucket_display_name = bucket.get('displayName')
         bucket_description = get_metadata_property(bucket.get('metadata', []), MetadataField.DESCRIPTION)
 
-        if (
-            cfg.match_patterns(bucket_id)
-            or cfg.match_patterns(bucket_name)
-            or cfg.match_patterns(bucket_display_name)
-            or cfg.match_patterns(bucket_description)
-        ):
+        if matches := cfg.match_texts([bucket_id, bucket_name, bucket_display_name, bucket_description]):
             hits.append(
                 SearchHit(
                     bucket_id=bucket_id,
@@ -312,7 +303,7 @@ async def _fetch_buckets(client: KeboolaClient, cfg: SearchSpec) -> list[SearchH
                     name=bucket_name,
                     display_name=bucket_display_name,
                     description=bucket_description,
-                )
+                ).with_matches(matches)
             )
     return hits
 
@@ -333,13 +324,9 @@ async def _fetch_tables(client: KeboolaClient, cfg: SearchSpec) -> list[SearchHi
             table_display_name = table.get('displayName')
             table_description = get_metadata_property(table.get('metadata', []), MetadataField.DESCRIPTION)
 
-            if (
-                cfg.match_patterns(table_id)
-                or cfg.match_patterns(table_name)
-                or cfg.match_patterns(table_display_name)
-                or cfg.match_patterns(table_description)
-                or _check_column_match(table, cfg)
-            ):
+            if matches := cfg.match_texts(
+                [table_id, table_name, table_display_name, table_description]
+            ) or _check_column_match(table, cfg):
                 hits.append(
                     SearchHit(
                         table_id=table_id,
@@ -348,7 +335,7 @@ async def _fetch_tables(client: KeboolaClient, cfg: SearchSpec) -> list[SearchHi
                         name=table_name,
                         display_name=table_display_name,
                         description=table_description,
-                    )
+                    ).with_matches(matches)
                 )
     return hits
 
@@ -373,6 +360,13 @@ async def _fetch_configs(
     client: KeboolaClient, spec: SearchSpec, component_type: str | None = None
 ) -> AsyncGenerator[SearchHit, None]:
     components = await client.storage_client.component_list(component_type, include=['configuration', 'rows'])
+
+    allowed_transformations = 'transformation' in spec.item_types
+    allowed_components = 'component' in spec.item_types
+    allowed_flows = 'flow' in spec.item_types
+    allowed_workspaces = 'workspace' in spec.item_types
+    allowed_data_apps = 'data-app' in spec.item_types
+
     for component in components:
         if not (component_id := component.get('id')):
             continue
@@ -380,10 +374,24 @@ async def _fetch_configs(
         current_component_type = component.get('type')
         if component_id in [ORCHESTRATOR_COMPONENT_ID, CONDITIONAL_FLOW_COMPONENT_ID]:
             item_type = 'flow'
+            if not allowed_flows:
+                continue
         elif current_component_type == 'transformation':
             item_type = 'transformation'
+            if not allowed_transformations:
+                continue
         elif component_id == 'keboola.sandboxes':
             item_type = 'workspace'
+            if not allowed_workspaces:
+                continue
+        elif component_id == DATA_APP_COMPONENT_ID:
+            item_type = 'data-app'
+            if not allowed_data_apps:
+                continue
+        elif current_component_type in ['extractor', 'writer', 'application']:
+            item_type = 'component'
+            if not allowed_components:
+                continue
         else:
             item_type = 'configuration'
 
@@ -396,11 +404,7 @@ async def _fetch_configs(
             config_updated = _get_field_value(config, ['currentVersion.created', 'created']) or ''
 
             if spec.search_type == 'textual':
-                if (
-                    spec.match_patterns(config_id)
-                    or spec.match_patterns(config_name)
-                    or spec.match_patterns(config_description)
-                ):
+                if matches := spec.match_texts([config_id, config_name, config_description]):
                     yield SearchHit(
                         component_id=component_id,
                         configuration_id=config_id,
@@ -408,7 +412,7 @@ async def _fetch_configs(
                         updated=config_updated,
                         name=config_name,
                         description=config_description,
-                    )
+                    ).with_matches(matches)
             elif spec.search_type == 'config-based':
                 if matches := spec.match_configuration_scopes(config.get('configuration')):
                     yield SearchHit(
@@ -428,11 +432,7 @@ async def _fetch_configs(
                 row_description = row.get('description')
 
                 if spec.search_type == 'textual':
-                    if (
-                        spec.match_patterns(row_id)
-                        or spec.match_patterns(row_name)
-                        or spec.match_patterns(row_description)
-                    ):
+                    if matches := spec.match_texts([row_id, row_name, row_description]):
                         yield SearchHit(
                             component_id=component_id,
                             configuration_id=config_id,
@@ -441,7 +441,7 @@ async def _fetch_configs(
                             updated=config_updated or _get_field_value(row, ['created']),
                             name=row_name,
                             description=row_description,
-                        )
+                        ).with_matches(matches)
 
                 elif spec.search_type == 'config-based':
                     if matches := spec.match_configuration_scopes(row.get('configuration')):
@@ -463,19 +463,46 @@ async def search(
         list[str],
         Field(
             description='One or more search patterns to match against item ID, name, display name, or description. '
-            'Supports regex patterns. Case-insensitive. Examples: ["customer"], ["sales", "revenue"], '
-            '["test.*table"]. Do not use empty strings or empty lists.'
+            'Supports regex patterns. Case-insensitive by default. Examples: ["customer"], ["sales", "revenue"], '
+            '["test.*table"], ["key1.*:.*key2.*:.*value.*"]. Do not use empty strings or empty lists.'
         ),
     ],
     item_types: Annotated[
-        Sequence[ItemType],
+        Sequence[SearchItemType],
         Field(
-            description='Optional filter for specific Keboola item types. Leave empty to search all types. '
+            description='Filter for specific Keboola item types. '
             'Common values: "table" (data tables), "bucket" (table containers), "transformation" '
-            '(SQL/Python transformations), "configuration" (extractor/writer configs), "flow" (orchestration flows). '
-            "Use when you know what type of item you're looking for."
+            '(SQL/Python transformations), "component" (extractor/writer/application components), '
+            '"data-app" (data apps), "flow" (orchestration flows). '
+            "Use when you know what type of item you're looking for or leave empty to search all types."
+        ),
+    ] = tuple(),
+    search_type: Annotated[
+        SearchType,
+        Field(
+            description='Search mode: "textual" (name/id/description) or "config-based" (stringified configuration '
+            'payloads).'
+        ),
+    ] = 'textual',
+    scopes: Annotated[
+        Sequence[str],
+        Field(
+            description='Dot-separated keys to search in configuration payloads, used with "config-based" search. '
+            'Example: "parameters.field", "storage.input", "storage.output", "processors.before", "processors.after", '
+            '"authorization", "tasks", "phases". Leave empty to search the whole configuration.'
         ),
     ] = tuple(),
+    mode: Annotated[
+        SearchPatternMode,
+        Field(
+            description='How to interpret patterns: "regex" for regular expressions or "literal" for exact text '
+            '(default: "literal").'
+        ),
+    ] = 'literal',
+    case_sensitive: Annotated[
+        bool,
+        Field(description='If true, match patterns with case sensitivity (default: false).'),
+    ] = False,
     limit: Annotated[
         int,
         Field(
@@ -486,59 +513,89 @@ async def search(
     offset: Annotated[int, Field(description='Number of matching items to skip for pagination (default: 0).')] = 0,
 ) -> list[SearchHit]:
     """
-    Searches for Keboola items (tables, buckets, configurations, transformations, flows, etc.) in the current project
-    by matching patterns against item ID, name, display name, or description. Returns matching items grouped by type
-    with their IDs and metadata.
+    Searches for Keboola items (tables, buckets, configurations, transformations, flows, data-apps etc.) in the current
+    project.
+    Supports two modes:
+    - textual: match patterns against ID, name, display name, description (and table columns)
+    - config-based: match patterns against stringified configuration payloads, optionally limited to specific scopes
+    Returns matching items with IDs and metadata.
 
     WHEN TO USE:
-    - User asks to "find", "locate", or "search for" something by name
+    - User asks to "find", "locate", or "search for" something by name or text
     - User mentions a partial name and you need to find the full item (e.g., "find the customer table")
     - User asks "what tables/configs/flows do I have with X in the name?"
+    - User asks to find configs containing a value in parameters (use config-based + scopes and regex patterns)
+    - Use this tool to trace lineage by searching for IDs referenced in configurations, or to find flows using a
+    specific component, or find usage of a bucket/table in transformations, or to find items with specific parameters.
     - You need to discover items before performing operations on them
-    - User asks to "list all items with [name] in it"
+    - User assks to "what is the genesis of this item?" or "explain me bussiness logic of this item?"
+    - User asks to "list all items with [name] or [configuration value/part] in it"
     - DO NOT use for listing all items of a specific type. Use get_configs, list_tables, get_flows, etc instead.
 
     HOW IT WORKS:
-    - Searches by regex pattern matching against id, name, displayName, and description fields
-    - For tables, also searches column names and column descriptions
-    - Case-insensitive search
-    - Multiple patterns work as OR condition - matches items containing ANY of the patterns
-    - Returns grouped results by item type (tables, buckets, configurations, flows, etc.)
-    - Each result includes the item's ID, name, creation date, and relevant metadata
+    - mode: "regex" (default) or "literal" (escape special characters)
+    - case_sensitive: false by default; set true for exact casing
+    - search_type:
+      - "textual": matches id/name/display_name/description fields
+      - "config-based": matches stringified configuration payloads (JSON) via scopes or the whole config using
+      regex patterns.
+    - scopes: dot-separated paths (e.g., "parameters", "storage.input", "parameters.script")
+    - For tables, textual search also checks column names and column descriptions
+    - Multiple patterns are ORed: any match includes the item
+    - Results are ordered by update time, newest first, and can be paginated via limit/offset
 
     IMPORTANT:
     - Always use this tool when the user mentions a name but you don't have the exact ID
     - The search returns IDs that you can use with other tools (e.g., get_table, get_configs, get_flows)
-    - Results are ordered by update time. The most recently updated items are returned first.
+    - Use item_types to make the search more efficient when you know the type; scanning buckets and tables can be
+    expensive
     - For exact ID lookups, use specific tools like get_table, get_configs, get_flows instead
-    - Use find_component_id and get_configs tools to find configurations related to a specific component
 
     USAGE EXAMPLES:
     - user_input: "Find all tables with 'customer' in the name"
-      → patterns=["customer"], item_types=["table"]
-      → Returns all tables whose id, name, displayName, or description contains "customer"
+      → patterns=["customer"], search_type="textual", mode="literal", item_types=["table"]
 
     - user_input: "Find tables with 'email' column"
-      → patterns=["email"], item_types=["table"]
-      → Returns all tables that have a column named "email" or with "email" in column description
+      → patterns=["email"], search_type="textual", mode="literal", item_types=["table"]
 
     - user_input: "Search for the sales transformation"
-      → patterns=["sales"], item_types=["transformation"]
+      → patterns=["sales"], search_type="textual", mode="literal", item_types=["transformation"]
       → Returns transformations with "sales" in any searchable field
 
     - user_input: "Find items named 'daily report' or 'weekly summary'"
-      → patterns=["daily.*report", "weekly.*summary"], item_types=[]
-      → Returns all items matching any of these patterns
+      → patterns=["daily.*report", "weekly.*summary"], search_type="textual", mode="regex", item_types=[]
+
+    - user_input: "Show me all configurations/components related to Google Analytics"
+      → patterns=["google.*analytics"], search_type="textual", mode="regex", item_types=["component"]
+
+    - user_input: "Find storage input mappings referencing specific tables:"
+      → patterns=["\"storage\"\\.*\"input\"\\.*:\\s*\"in\\.*\\.customers\""], search_type="config-based", mode="regex",
+      item_types=["transformation", "component"]
 
-    - user_input: "Show me all configurations related to Google Analytics"
-      → patterns=["google.*analytics"], item_types=["configuration"]
-      → Returns configurations with matching patterns
+    - user input: "Find components or transformations using 'my_bucket' in output mappings"
+      → patterns=["my_bucket"], item_types=["component", "transformation"], search_type="config-based",
+        scopes=["storage.output"], mode="literal"
+
+    - user input: "Find configs with specific authentication type"
+      → patterns=["\"authentication\":\\s*\\{.*\"type\":\\s*\"oauth20\""], search_type="config-based", mode="regex",
+      item_types=["component"]
+
+    - user input: "Find flows using this configuration ID: 01k9cz233cvd1rga3zzx40g8qj"
+      → patterns=["01k9cz233cvd1rga3zzx40g8qj"], search_type="config-based", item_types=["flow"], mode="literal",
+      scopes=["tasks"]
+
+    - user input: "Find data apps using specific code part ..."
+      → patterns=["regex-representing-the-code-part"], search_type="config-based", item_types=["data-app"],
+      mode="regex"], scopes=["script"]
     """
 
     cfg = SearchSpec(
         patterns=patterns,
         item_types=item_types,
-        search_type='textual',
+        pattern_mode=mode,
+        case_sensitive=case_sensitive,
+        search_type=search_type,
+        search_scopes=scopes,
     )
 
     offset = max(0, offset)
@@ -567,10 +624,12 @@ async def search(
         tasks.append(_fetch_configurations(client, cfg))
     elif types_to_fetch & {
         'configuration',
+        'component',
         'transformation',
         'flow',
         'configuration-row',
         'workspace',
+        'data-app',
     }:
         tasks.append(_fetch_configurations(client, cfg))
 
@@ -586,10 +645,6 @@ async def search(
         else:
             all_hits.extend(result)
 
-    # Filter by item_types if specified
-    if types_to_fetch:
-        all_hits = [item for item in all_hits if item.item_type in types_to_fetch]
-
     # TODO: Should we sort by the item type too?
     all_hits.sort(
         key=lambda x: (
diff --git a/src/keboola_mcp_server/tools/search/usage.py b/src/keboola_mcp_server/tools/search/usage.py
index 2bf1f6ce..778021d9 100644
--- a/src/keboola_mcp_server/tools/search/usage.py
+++ b/src/keboola_mcp_server/tools/search/usage.py
@@ -49,7 +49,7 @@ async def find_id_usage(
         search_scopes=scopes,
         pattern_mode='literal',
         search_type='config-based',
-        return_matched_patterns=True,
+        return_all_matched_patterns=True,
         stop_searching_after_first_value_match=False,
     )
 

From 88d66156d731810c5b12750a63369aa6dc58ee9c Mon Sep 17 00:00:00 2001
From: mariankrotil <mariankrotil34@gmail.com>
Date: Mon, 12 Jan 2026 18:18:17 +0100
Subject: [PATCH 04/29] AI-2161 test: use regex mode in search regex test

---
 tests/search/tools_test.py | 31 +++++++++++++++++++------------
 1 file changed, 19 insertions(+), 12 deletions(-)

diff --git a/tests/search/tools_test.py b/tests/search/tools_test.py
index da4587c4..ded3f3a8 100644
--- a/tests/search/tools_test.py
+++ b/tests/search/tools_test.py
@@ -9,11 +9,11 @@
 from keboola_mcp_server.clients.ai_service import ComponentSuggestionResponse, SuggestedComponent
 from keboola_mcp_server.clients.base import JsonDict
 from keboola_mcp_server.clients.client import KeboolaClient
-from keboola_mcp_server.clients.storage import ItemType
 from keboola_mcp_server.config import MetadataField
 from keboola_mcp_server.links import Link
 from keboola_mcp_server.tools.search.tools import (
     SearchHit,
+    SearchItemType,
     SearchSpec,
     SuggestedComponentOutput,
     find_component_id,
@@ -83,7 +83,7 @@ def component_list_side_effect(component_type, include=None):
         result = await search(
             ctx=mcp_context_client,
             patterns=['test'],
-            item_types=(cast(ItemType, 'table'), cast(ItemType, 'configuration')),
+            item_types=(cast(SearchItemType, 'table'), cast(SearchItemType, 'configuration')),
             limit=20,
             offset=0,
         )
@@ -144,7 +144,12 @@ async def test_search_with_regex_pattern(self, mocker: MockerFixture, mcp_contex
         keboola_client.storage_client.component_list = mocker.AsyncMock(return_value=[])
         keboola_client.storage_client.workspace_list = mocker.AsyncMock(return_value=[])
 
-        result = await search(ctx=mcp_context_client, patterns=['customer.*'], item_types=(cast(ItemType, 'bucket'),))
+        result = await search(
+            ctx=mcp_context_client,
+            patterns=['customer.*'],
+            item_types=(cast(SearchItemType, 'bucket'),),
+            mode='regex',
+        )
 
         assert isinstance(result, list)
         assert result == [
@@ -361,7 +366,7 @@ async def test_search_matches_description(self, mocker: MockerFixture, mcp_conte
         keboola_client.storage_client.component_list = mocker.AsyncMock(return_value=[])
         keboola_client.storage_client.workspace_list = mocker.AsyncMock(return_value=[])
 
-        result = await search(ctx=mcp_context_client, patterns=['test'], item_types=(cast(ItemType, 'bucket'),))
+        result = await search(ctx=mcp_context_client, patterns=['test'], item_types=(cast(SearchItemType, 'bucket'),))
 
         assert isinstance(result, list)
         assert result == [
@@ -680,7 +685,9 @@ async def test_search_table_by_columns(
         # Mock bucket_table_list with provided test data
         keboola_client.storage_client.bucket_table_list = mocker.AsyncMock(return_value=tables_data)
 
-        result = await search(ctx=mcp_context_client, patterns=[search_pattern], item_types=(cast(ItemType, 'table'),))
+        result = await search(
+            ctx=mcp_context_client, patterns=[search_pattern], item_types=(cast(SearchItemType, 'table'),)
+        )
 
         assert isinstance(result, list)
         assert len(result) == expected_count
@@ -696,7 +703,7 @@ def test_match_texts_with_literal_and_regex(self):
             patterns=['foo.*'],
             item_types=('bucket',),
             pattern_mode='literal',
-            return_matched_patterns=True,
+            return_all_matched_patterns=True,
         )
         matches = spec.match_texts(['foo.*', 'foobar'])
         assert [match.model_dump() for match in matches] == [
@@ -707,7 +714,7 @@ def test_match_texts_with_literal_and_regex(self):
             patterns=['foo.*'],
             item_types=('bucket',),
             pattern_mode='regex',
-            return_matched_patterns=True,
+            return_all_matched_patterns=True,
         )
         regex_matches = regex_spec.match_texts(['foo.*', 'foobar'])
         assert [match.model_dump() for match in regex_matches] == [
@@ -718,7 +725,7 @@ def test_match_texts_case_sensitivity_and_stop(self):
         spec = SearchSpec(
             patterns=['foo', 'bar'],
             item_types=('bucket',),
-            return_matched_patterns=True,
+            return_all_matched_patterns=True,
             stop_searching_after_first_value_match=True,
         )
         matches = spec.match_texts(['Foo baz', 'BAR qux'])
@@ -729,7 +736,7 @@ def test_match_texts_case_sensitivity_and_stop(self):
         all_spec = SearchSpec(
             patterns=['foo', 'bar'],
             item_types=('bucket',),
-            return_matched_patterns=True,
+            return_all_matched_patterns=True,
             stop_searching_after_first_value_match=False,
         )
         all_matches = all_spec.match_texts(['Foo baz', 'BAR qux'])
@@ -748,7 +755,7 @@ def test_match_configuration_scopes(self):
             patterns=['alpha', 'beta'],
             item_types=('bucket',),
             search_scopes=('parameters', 'storage.input'),
-            return_matched_patterns=True,
+            return_all_matched_patterns=True,
             stop_searching_after_first_value_match=True,
         )
         matches = spec.match_configuration_scopes(configuration)
@@ -761,7 +768,7 @@ def test_match_configuration_scopes(self):
             patterns=['alpha', 'beta'],
             item_types=('bucket',),
             search_scopes=('parameters', 'storage.input'),
-            return_matched_patterns=True,
+            return_all_matched_patterns=True,
             stop_searching_after_first_value_match=False,
         )
         first_only_matches = first_only_spec.match_configuration_scopes(configuration)
@@ -772,7 +779,7 @@ def test_match_configuration_scopes(self):
         any_scope_spec = SearchSpec(
             patterns=['gamma'],
             item_types=('bucket',),
-            return_matched_patterns=True,
+            return_all_matched_patterns=True,
         )
         any_scope_matches = any_scope_spec.match_configuration_scopes(configuration)
         assert [match.model_dump() for match in any_scope_matches] == [

From ff2f68bfb0bfa257be7b31568b653e96286b91ec Mon Sep 17 00:00:00 2001
From: mariankrotil <mariankrotil34@gmail.com>
Date: Mon, 12 Jan 2026 18:18:48 +0100
Subject: [PATCH 05/29] AI-2161 refactor: remove usage tool registration

---
 TOOLS.md                         | 246 +++++++++++--------------------
 src/keboola_mcp_server/server.py |   2 -
 tests/test_server.py             |   1 -
 3 files changed, 87 insertions(+), 162 deletions(-)

diff --git a/TOOLS.md b/TOOLS.md
index 6bcece3d..c6f39486 100644
--- a/TOOLS.md
+++ b/TOOLS.md
@@ -40,7 +40,6 @@ and the configuration ID.
 - [get_data_apps](#get_data_apps): Lists summaries of data apps in the project given the limit and offset or gets details of a data apps by
 providing their configuration IDs.
 - [modify_data_app](#modify_data_app): Creates or updates a Streamlit data app.
-- [search_keboola_objects](#search_keboola_objects): Deep search across Keboola objects including their full JSON configuration data.
 
 ### Project Tools
 - [get_project_info](#get_project_info): Retrieves structured information about the current project,
@@ -52,7 +51,7 @@ including essential context and base instructions for working with it
 
 ### Search Tools
 - [find_component_id](#find_component_id): Returns list of component IDs that match the given query.
-- [search](#search): Searches for Keboola items (tables, buckets, configurations, transformations, flows, etc.
+- [search](#search): Searches for Keboola items (tables, buckets, configurations, transformations, flows, data-apps etc.
 
 ### Storage Tools
 - [get_buckets](#get_buckets): Lists buckets or retrieves full details of specific buckets, including metadata-derived descriptions,
@@ -1723,137 +1722,6 @@ updating, set `authentication_type` to `default` to keep the existing authentica
 }
 ```
 
----
-<a name="search_keboola_objects"></a>
-## search_keboola_objects
-**Annotations**: `read-only`
-
-**Tags**: `usage`
-
-**Description**:
-
-Deep search across Keboola objects including their full JSON configuration data.
-
-WHAT IT SEARCHES:
-- Buckets/Tables: name, description, metadata, column names, column descriptions, and entire API payload
-- Components/Flows/Data Apps/Transformations: name, description, and entire configuration JSON in raw format:
-  * All configuration parameters and nested settings
-  * Storage mappings (input/output tables)
-  * Credentials and connection details
-  * SQL queries and code blocks
-  * Any other data stored in the configuration
-
-WHEN TO USE:
-- Find configurations by specific parameter values (e.g., API endpoints, database hosts)
-- Search deep in nested JSON structures (e.g., table mappings, processors)
-- Locate objects containing specific SQL code or queries
-- Find configurations with particular credentials or connection strings
-- Use advanced pattern matching with wildcards or regex
-
-PATTERN MATCHING:
-- literal (default): Exact text matching - patterns=["salesforce.com"]
-- wildcard: Glob patterns with * - patterns=["sales*"] matches "sales", "salesforce", "sales_data"
-- regex: Regular expressions - patterns=["flow-[0-9]+"] matches "flow-1", "flow-123"
-- Multiple patterns use OR logic (matches ANY pattern)
-
-USAGE EXAMPLES:
-
-1. Find extractors connecting to a specific database:
-   patterns=["prod-db-server.company.com"], search_types=["component"]
-
-2. Find transformations using a specific input table:
-   patterns=["in.c-main.customers"], search_types=["transformation"]
-
-3. Find all objects with "test" or "staging" in their configuration:
-   patterns=["test", "staging"], mode="literal"
-
-4. Find flows starting with "daily-" prefix:
-   patterns=["daily-*"], mode="wildcard", search_types=["flow"]
-
-5. Find components with API version v2 or v3:
-   patterns=["api/v[23]"], mode="regex", search_types=["component"]
-
-6. Find data apps using specific Python packages:
-   patterns=["pandas", "streamlit"], search_types=["data-app"]
-
-7. Search for exact table IDs (avoid partial matches):
-   patterns=["in.c-bucket.table"], whole_word=True
-
-8. Find configs with nested JSON structure (key-value in parameters):
-   patterns=[""parameters":\s*\{.*api\.paychex.*\}"], mode="regex"
-
-9. Find configs with specific authentication type:
-   patterns=[""authentication":\s*\{.*"type":\s*"oauth20""], mode="regex"
-
-10. Find configs with incremental loading enabled:
-    patterns=[""incremental":\s*true"], mode="regex"
-
-11. Find storage mappings referencing specific tables:
-    patterns=[""source":\s*"in\.*\.customers""], mode="regex"
-
-TIPS:
-- Use whole_word=True when searching for IDs to avoid partial matches
-- Start with literal mode for speed, use wildcard/regex for flexibility
-- Narrow results with search_types when you know the object type
-- Results include direct links to objects in Keboola UI
-
-
-**Input JSON Schema**:
-```json
-{
-  "properties": {
-    "patterns": {
-      "description": "Search patterns to match. Multiple patterns use OR logic (matches ANY pattern). Examples: [\"customer\"], [\"sales*\", \"revenue*\"] for wildcards, [\"flow-.*\"] for regex. Do not pass empty strings.",
-      "items": {
-        "type": "string"
-      },
-      "type": "array"
-    },
-    "mode": {
-      "default": "literal",
-      "description": "Pattern matching mode: \"literal\" - exact text match (default, fastest), \"wildcard\" - use * for glob patterns (e.g., \"sales*\"), \"regex\" - full regular expressions (most powerful).",
-      "enum": [
-        "literal",
-        "wildcard",
-        "regex"
-      ],
-      "type": "string"
-    },
-    "whole_word": {
-      "default": false,
-      "description": "When true, only matches complete words. Prevents partial matches like finding \"test\" in \"latest\". Useful for searching IDs or specific terms.",
-      "type": "boolean"
-    },
-    "ignore_case": {
-      "default": true,
-      "description": "When true, search ignores letter casing (e.g., \"Sales\" matches \"sales\"). Default: true.",
-      "type": "boolean"
-    },
-    "search_types": {
-      "default": [],
-      "description": "Filter by object types: \"bucket\", \"table\", \"component\", \"transformation\", \"flow\", \"data-app\". Empty list or [\"any\"] searches all types. Use to narrow results when you know what you need.",
-      "items": {
-        "enum": [
-          "bucket",
-          "table",
-          "component",
-          "flow",
-          "data-app",
-          "transformation",
-          "any"
-        ],
-        "type": "string"
-      },
-      "type": "array"
-    }
-  },
-  "required": [
-    "patterns"
-  ],
-  "type": "object"
-}
-```
-
 ---
 
 # Documentation Tools
@@ -2505,53 +2373,80 @@ USAGE EXAMPLES:
 
 **Description**:
 
-Searches for Keboola items (tables, buckets, configurations, transformations, flows, etc.) in the current project
-by matching patterns against item ID, name, display name, or description. Returns matching items grouped by type
-with their IDs and metadata.
+Searches for Keboola items (tables, buckets, configurations, transformations, flows, data-apps etc.) in the current
+project.
+Supports two modes:
+- textual: match patterns against ID, name, display name, description (and table columns)
+- config-based: match patterns against stringified configuration payloads, optionally limited to specific scopes
+Returns matching items with IDs and metadata.
 
 WHEN TO USE:
-- User asks to "find", "locate", or "search for" something by name
+- User asks to "find", "locate", or "search for" something by name or text
 - User mentions a partial name and you need to find the full item (e.g., "find the customer table")
 - User asks "what tables/configs/flows do I have with X in the name?"
+- User asks to find configs containing a value in parameters (use config-based + scopes and regex patterns)
+- Use this tool to trace lineage by searching for IDs referenced in configurations, or to find flows using a
+specific component, or find usage of a bucket/table in transformations, or to find items with specific parameters.
 - You need to discover items before performing operations on them
-- User asks to "list all items with [name] in it"
+- User assks to "what is the genesis of this item?" or "explain me bussiness logic of this item?"
+- User asks to "list all items with [name] or [configuration value/part] in it"
 - DO NOT use for listing all items of a specific type. Use get_configs, list_tables, get_flows, etc instead.
 
 HOW IT WORKS:
-- Searches by regex pattern matching against id, name, displayName, and description fields
-- For tables, also searches column names and column descriptions
-- Case-insensitive search
-- Multiple patterns work as OR condition - matches items containing ANY of the patterns
-- Returns grouped results by item type (tables, buckets, configurations, flows, etc.)
-- Each result includes the item's ID, name, creation date, and relevant metadata
+- mode: "regex" (default) or "literal" (escape special characters)
+- case_sensitive: false by default; set true for exact casing
+- search_type:
+  - "textual": matches id/name/display_name/description fields
+  - "config-based": matches stringified configuration payloads (JSON) via scopes or the whole config using
+  regex patterns.
+- scopes: dot-separated paths (e.g., "parameters", "storage.input", "parameters.script")
+- For tables, textual search also checks column names and column descriptions
+- Multiple patterns are ORed: any match includes the item
+- Results are ordered by update time, newest first, and can be paginated via limit/offset
 
 IMPORTANT:
 - Always use this tool when the user mentions a name but you don't have the exact ID
 - The search returns IDs that you can use with other tools (e.g., get_table, get_configs, get_flows)
-- Results are ordered by update time. The most recently updated items are returned first.
+- Use item_types to make the search more efficient when you know the type; scanning buckets and tables can be
+expensive
 - For exact ID lookups, use specific tools like get_table, get_configs, get_flows instead
-- Use find_component_id and get_configs tools to find configurations related to a specific component
 
 USAGE EXAMPLES:
 - user_input: "Find all tables with 'customer' in the name"
-  → patterns=["customer"], item_types=["table"]
-  → Returns all tables whose id, name, displayName, or description contains "customer"
+  → patterns=["customer"], search_type="textual", mode="literal", item_types=["table"]
 
 - user_input: "Find tables with 'email' column"
-  → patterns=["email"], item_types=["table"]
-  → Returns all tables that have a column named "email" or with "email" in column description
+  → patterns=["email"], search_type="textual", mode="literal", item_types=["table"]
 
 - user_input: "Search for the sales transformation"
-  → patterns=["sales"], item_types=["transformation"]
+  → patterns=["sales"], search_type="textual", mode="literal", item_types=["transformation"]
   → Returns transformations with "sales" in any searchable field
 
 - user_input: "Find items named 'daily report' or 'weekly summary'"
-  → patterns=["daily.*report", "weekly.*summary"], item_types=[]
-  → Returns all items matching any of these patterns
+  → patterns=["daily.*report", "weekly.*summary"], search_type="textual", mode="regex", item_types=[]
+
+- user_input: "Show me all configurations/components related to Google Analytics"
+  → patterns=["google.*analytics"], search_type="textual", mode="regex", item_types=["component"]
+
+- user_input: "Find storage input mappings referencing specific tables:"
+  → patterns=[""storage"\.*"input"\.*:\s*"in\.*\.customers""], search_type="config-based", mode="regex",
+  item_types=["transformation", "component"]
+
+- user input: "Find components or transformations using 'my_bucket' in output mappings"
+  → patterns=["my_bucket"], item_types=["component", "transformation"], search_type="config-based",
+    scopes=["storage.output"], mode="literal"
+
+- user input: "Find configs with specific authentication type"
+  → patterns=[""authentication":\s*\{.*"type":\s*"oauth20""], search_type="config-based", mode="regex",
+  item_types=["component"]
 
-- user_input: "Show me all configurations related to Google Analytics"
-  → patterns=["google.*analytics"], item_types=["configuration"]
-  → Returns configurations with matching patterns
+- user input: "Find flows using this configuration ID: 01k9cz233cvd1rga3zzx40g8qj"
+  → patterns=["01k9cz233cvd1rga3zzx40g8qj"], search_type="config-based", item_types=["flow"], mode="literal",
+  scopes=["tasks"]
+
+- user input: "Find data apps using specific code part ..."
+  → patterns=["regex-representing-the-code-part"], search_type="config-based", item_types=["data-app"],
+  mode="regex"], scopes=["script"]
 
 
 **Input JSON Schema**:
@@ -2559,7 +2454,7 @@ USAGE EXAMPLES:
 {
   "properties": {
     "patterns": {
-      "description": "One or more search patterns to match against item ID, name, display name, or description. Supports regex patterns. Case-insensitive. Examples: [\"customer\"], [\"sales\", \"revenue\"], [\"test.*table\"]. Do not use empty strings or empty lists.",
+      "description": "One or more search patterns to match against item ID, name, display name, or description. Supports regex patterns. Case-insensitive by default. Examples: [\"customer\"], [\"sales\", \"revenue\"], [\"test.*table\"], [\"key1.*:.*key2.*:.*value.*\"]. Do not use empty strings or empty lists.",
       "items": {
         "type": "string"
       },
@@ -2567,15 +2462,17 @@ USAGE EXAMPLES:
     },
     "item_types": {
       "default": [],
-      "description": "Optional filter for specific Keboola item types. Leave empty to search all types. Common values: \"table\" (data tables), \"bucket\" (table containers), \"transformation\" (SQL/Python transformations), \"configuration\" (extractor/writer configs), \"flow\" (orchestration flows). Use when you know what type of item you're looking for.",
+      "description": "Filter for specific Keboola item types. Common values: \"table\" (data tables), \"bucket\" (table containers), \"transformation\" (SQL/Python transformations), \"component\" (extractor/writer/application components), \"data-app\" (data apps), \"flow\" (orchestration flows). Use when you know what type of item you're looking for or leave empty to search all types.",
       "items": {
         "enum": [
-          "flow",
           "bucket",
           "table",
+          "data-app",
+          "flow",
           "transformation",
           "configuration",
           "configuration-row",
+          "component",
           "workspace",
           "shared-code",
           "rows",
@@ -2585,6 +2482,37 @@ USAGE EXAMPLES:
       },
       "type": "array"
     },
+    "search_type": {
+      "default": "textual",
+      "description": "Search mode: \"textual\" (name/id/description) or \"config-based\" (stringified configuration payloads).",
+      "enum": [
+        "textual",
+        "config-based"
+      ],
+      "type": "string"
+    },
+    "scopes": {
+      "default": [],
+      "description": "Dot-separated keys to search in configuration payloads, used with \"config-based\" search. Example: \"parameters.field\", \"storage.input\", \"storage.output\", \"processors.before\", \"processors.after\", \"authorization\", \"tasks\", \"phases\". Leave empty to search the whole configuration.",
+      "items": {
+        "type": "string"
+      },
+      "type": "array"
+    },
+    "mode": {
+      "default": "literal",
+      "description": "How to interpret patterns: \"regex\" for regular expressions or \"literal\" for exact text (default: \"literal\").",
+      "enum": [
+        "regex",
+        "literal"
+      ],
+      "type": "string"
+    },
+    "case_sensitive": {
+      "default": false,
+      "description": "If true, match patterns with case sensitivity (default: false).",
+      "type": "boolean"
+    },
     "limit": {
       "default": 50,
       "description": "Maximum number of items to return (default: 50, max: 100).",
diff --git a/src/keboola_mcp_server/server.py b/src/keboola_mcp_server/server.py
index 6fae8ac9..c7090624 100644
--- a/src/keboola_mcp_server/server.py
+++ b/src/keboola_mcp_server/server.py
@@ -34,7 +34,6 @@
 from keboola_mcp_server.tools.search.tools import add_search_tools
 from keboola_mcp_server.tools.sql import add_sql_tools
 from keboola_mcp_server.tools.storage import add_storage_tools
-from keboola_mcp_server.tools.usage import add_usage_tools
 
 LOG = logging.getLogger(__name__)
 
@@ -239,7 +238,6 @@ def create_server(
     add_sql_tools(mcp)
     add_storage_tools(mcp)
     add_keboola_prompts(mcp)
-    add_usage_tools(mcp)
 
     if custom_routes_handling != 'return':
         return mcp
diff --git a/tests/test_server.py b/tests/test_server.py
index ee2d4973..cacdceb0 100644
--- a/tests/test_server.py
+++ b/tests/test_server.py
@@ -60,7 +60,6 @@ async def test_list_tools(self):
             'query_data',
             'run_job',
             'search',
-            'search_keboola_objects',
             'update_config',
             'update_config_row',
             'update_descriptions',

From dc1da3ea81d556043345825dcecaecec829f793b Mon Sep 17 00:00:00 2001
From: mariankrotil <mariankrotil34@gmail.com>
Date: Mon, 12 Jan 2026 18:50:05 +0100
Subject: [PATCH 06/29] AI-2161 fix: return extractor configs as configuration
 items

---
 src/keboola_mcp_server/tools/search/tools.py | 17 ++++++++---------
 1 file changed, 8 insertions(+), 9 deletions(-)

diff --git a/src/keboola_mcp_server/tools/search/tools.py b/src/keboola_mcp_server/tools/search/tools.py
index 638d6d05..fb8385d1 100644
--- a/src/keboola_mcp_server/tools/search/tools.py
+++ b/src/keboola_mcp_server/tools/search/tools.py
@@ -38,7 +38,6 @@
     'transformation',
     'configuration',
     'configuration-row',
-    'component',
     'workspace',
     'shared-code',
     'rows',
@@ -49,7 +48,6 @@
     'data-app': ['other'],
     'flow': ['other'],
     'transformation': ['transformation'],
-    'component': ['extractor', 'writer', 'application'],
     'configuration': ['extractor', 'writer', 'application'],
     'configuration-row': ['extractor', 'writer', 'application'],
     'workspace': ['other'],
@@ -361,11 +359,13 @@ async def _fetch_configs(
 ) -> AsyncGenerator[SearchHit, None]:
     components = await client.storage_client.component_list(component_type, include=['configuration', 'rows'])
 
-    allowed_transformations = 'transformation' in spec.item_types
-    allowed_components = 'component' in spec.item_types
-    allowed_flows = 'flow' in spec.item_types
-    allowed_workspaces = 'workspace' in spec.item_types
-    allowed_data_apps = 'data-app' in spec.item_types
+    allowed_transformations = 'transformation' in spec.item_types or component_type is None
+    allowed_components = (
+        'configuration' in spec.item_types or 'configuration-row' in spec.item_types or component_type is None
+    )
+    allowed_flows = 'flow' in spec.item_types or component_type is None
+    allowed_workspaces = 'workspace' in spec.item_types or component_type is None
+    allowed_data_apps = 'data-app' in spec.item_types or component_type is None
 
     for component in components:
         if not (component_id := component.get('id')):
@@ -389,7 +389,7 @@ async def _fetch_configs(
             if not allowed_data_apps:
                 continue
         elif current_component_type in ['extractor', 'writer', 'application']:
-            item_type = 'component'
+            item_type = 'configuration'
             if not allowed_components:
                 continue
         else:
@@ -624,7 +624,6 @@ async def search(
         tasks.append(_fetch_configurations(client, cfg))
     elif types_to_fetch & {
         'configuration',
-        'component',
         'transformation',
         'flow',
         'configuration-row',

From 4ef683b517dbab549b147b651fe50699003d8b87 Mon Sep 17 00:00:00 2001
From: mariankrotil <mariankrotil34@gmail.com>
Date: Mon, 12 Jan 2026 18:50:09 +0100
Subject: [PATCH 07/29] AI-2161 fix: include configurations in table usage
 search

---
 src/keboola_mcp_server/tools/storage.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/keboola_mcp_server/tools/storage.py b/src/keboola_mcp_server/tools/storage.py
index 0707bdbe..5be81202 100644
--- a/src/keboola_mcp_server/tools/storage.py
+++ b/src/keboola_mcp_server/tools/storage.py
@@ -619,7 +619,7 @@ async def _fetch_table_detail(_table_id: str) -> TableDetail | str:
         # Add the component usage to the table detail
         if include_usage:
             usage_by_ids = await find_id_usage(
-                client, table_ids, ['component', 'transformation'], ['storage.input', 'storage.output']
+                client, table_ids, ['configuration', 'configuration-row', 'transformation'], ['storage.input', 'storage.output']
             )
             for usage_by_id in usage_by_ids:
                 if usage_by_id.target_id in tables_by_id and usage_by_id.usage_references:

From da22a0929a22642212e65c61d8a41ef3b021e086 Mon Sep 17 00:00:00 2001
From: mariankrotil <mariankrotil34@gmail.com>
Date: Mon, 12 Jan 2026 18:50:14 +0100
Subject: [PATCH 08/29] AI-2161 test: add config-based search integration test

---
 integtests/tools/test_search.py | 29 +++++++++++++++++++++++++++++
 1 file changed, 29 insertions(+)

diff --git a/integtests/tools/test_search.py b/integtests/tools/test_search.py
index f7342ac6..26187cf0 100644
--- a/integtests/tools/test_search.py
+++ b/integtests/tools/test_search.py
@@ -103,3 +103,32 @@ async def test_find_component_id(mcp_client: Client):
     assert full_result.content[0].type == 'text'
     decoded_toon = toon_format.decode(full_result.content[0].text)
     assert decoded_toon == result
+
+
+@pytest.mark.asyncio
+async def test_search_config_based_simple_query(
+    mcp_client: Client,
+    configs: list[ConfigDef],
+) -> None:
+    """
+    Test config-based search with a simple scoped query.
+    """
+    config = next(cfg for cfg in configs if cfg.component_id == 'ex-generic-v2')
+    full_result = await mcp_client.call_tool(
+        'search',
+        {
+            'patterns': ['wttr.in'],
+            'item_types': ['configuration'],
+            'search_type': 'config-based',
+            'scopes': ['parameters.api.baseUrl'],
+            'limit': 20,
+            'offset': 0,
+        },
+    )
+
+    assert full_result.structured_content is not None
+    result = [SearchHit.model_validate(hit) for hit in full_result.structured_content['result']]
+
+    assert any(
+        hit.component_id == 'ex-generic-v2' and hit.configuration_id == config.configuration_id for hit in result
+    ), f'Expected config {config.configuration_id} to be returned. Found: {result}'

From cbe58d72b2e4f54504ecc4c7b9af2976be48c41e Mon Sep 17 00:00:00 2001
From: mariankrotil <mariankrotil34@gmail.com>
Date: Mon, 12 Jan 2026 18:52:21 +0100
Subject: [PATCH 09/29] AI-2161 style: apply tox

---
 TOOLS.md                                | 1 -
 src/keboola_mcp_server/tools/storage.py | 5 ++++-
 2 files changed, 4 insertions(+), 2 deletions(-)

diff --git a/TOOLS.md b/TOOLS.md
index c6f39486..3e935a5d 100644
--- a/TOOLS.md
+++ b/TOOLS.md
@@ -2472,7 +2472,6 @@ USAGE EXAMPLES:
           "transformation",
           "configuration",
           "configuration-row",
-          "component",
           "workspace",
           "shared-code",
           "rows",
diff --git a/src/keboola_mcp_server/tools/storage.py b/src/keboola_mcp_server/tools/storage.py
index 5be81202..1ff3f65e 100644
--- a/src/keboola_mcp_server/tools/storage.py
+++ b/src/keboola_mcp_server/tools/storage.py
@@ -619,7 +619,10 @@ async def _fetch_table_detail(_table_id: str) -> TableDetail | str:
         # Add the component usage to the table detail
         if include_usage:
             usage_by_ids = await find_id_usage(
-                client, table_ids, ['configuration', 'configuration-row', 'transformation'], ['storage.input', 'storage.output']
+                client,
+                table_ids,
+                ['configuration', 'configuration-row', 'transformation'],
+                ['storage.input', 'storage.output'],
             )
             for usage_by_id in usage_by_ids:
                 if usage_by_id.target_id in tables_by_id and usage_by_id.usage_references:

From e10c074079919e0a95b164a7238d4c156a1cafbc Mon Sep 17 00:00:00 2001
From: mariankrotil <mariankrotil34@gmail.com>
Date: Wed, 21 Jan 2026 15:14:00 +0100
Subject: [PATCH 10/29] AI-2161 chore: update version

---
 pyproject.toml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/pyproject.toml b/pyproject.toml
index 07873846..259efcc7 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -4,7 +4,7 @@ build-backend = "hatchling.build"
 
 [project]
 name = "keboola-mcp-server"
-version = "1.39.0"
+version = "1.40.0"
 description = "MCP server for interacting with Keboola Connection"
 readme = "README.md"
 requires-python = ">=3.10"

From e421ed804033847f69222991989b3d6adff99a60 Mon Sep 17 00:00:00 2001
From: mariankrotil <mariankrotil34@gmail.com>
Date: Wed, 21 Jan 2026 15:35:34 +0100
Subject: [PATCH 11/29] AI-2161 refactor: add component to search type

---
 TOOLS.md                               |  6 ++++--
 src/keboola_mcp_server/tools/search.py | 13 +++++++++++--
 2 files changed, 15 insertions(+), 4 deletions(-)

diff --git a/TOOLS.md b/TOOLS.md
index 40192983..f95a890b 100644
--- a/TOOLS.md
+++ b/TOOLS.md
@@ -2550,11 +2550,12 @@ USAGE EXAMPLES:
 **Description**:
 
 Searches for Keboola items (tables, buckets, configurations, transformations, flows, data-apps etc.) in the current
-project.
+project. Returns matching items with IDs and metadata.
 Supports two modes:
 - textual: match patterns against ID, name, display name, description (and table columns)
 - config-based: match patterns against stringified configuration payloads, optionally limited to specific scopes
-Returns matching items with IDs and metadata.
+which can be derived from the configuration schemas or objects.
+
 
 WHEN TO USE:
 - User asks to "find", "locate", or "search for" something by name or text
@@ -2646,6 +2647,7 @@ USAGE EXAMPLES:
           "data-app",
           "flow",
           "transformation",
+          "component",
           "configuration",
           "configuration-row",
           "workspace",
diff --git a/src/keboola_mcp_server/tools/search.py b/src/keboola_mcp_server/tools/search.py
index 0d9069b5..c4237572 100644
--- a/src/keboola_mcp_server/tools/search.py
+++ b/src/keboola_mcp_server/tools/search.py
@@ -36,6 +36,7 @@
     'data-app',
     'flow',
     'transformation',
+    'component',
     'configuration',
     'configuration-row',
     'workspace',
@@ -48,6 +49,7 @@
 SearchComponentItemType = Literal[
     'flow',
     'transformation',
+    'component',
     'configuration',
     'configuration-row',
     'workspace',
@@ -60,6 +62,7 @@
     'transformation': ['transformation'],
     'configuration': ['extractor', 'writer', 'application'],
     'configuration-row': ['extractor', 'writer', 'application'],
+    'component': ['extractor', 'writer', 'application'],
     'workspace': ['other'],
 }
 
@@ -189,6 +192,11 @@ def _validate_component_args(self) -> 'SearchSpec':
             )
         return self
 
+    @model_validator(mode='after')
+    def _validate_item_types(self) -> 'SearchSpec':
+        if 'component' in self.item_types:
+            self.item_types = list(set(self.item_types + ['configuration', 'configuration-row']))
+
     @staticmethod
     def _stringify(value: JsonDict) -> str:
         try:
@@ -508,11 +516,12 @@ async def search(
 ) -> list[SearchHit]:
     """
     Searches for Keboola items (tables, buckets, configurations, transformations, flows, data-apps etc.) in the current
-    project.
+    project. Returns matching items with IDs and metadata.
     Supports two modes:
     - textual: match patterns against ID, name, display name, description (and table columns)
     - config-based: match patterns against stringified configuration payloads, optionally limited to specific scopes
-    Returns matching items with IDs and metadata.
+    which can be derived from the configuration schemas or objects.
+
 
     WHEN TO USE:
     - User asks to "find", "locate", or "search for" something by name or text

From 5ca04b55875f620ab308f0b57c797f7b94f8aa88 Mon Sep 17 00:00:00 2001
From: mariankrotil <mariankrotil34@gmail.com>
Date: Thu, 22 Jan 2026 12:43:32 +0100
Subject: [PATCH 12/29] AI-2161 fix: update import

---
 integtests/test_errors.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/integtests/test_errors.py b/integtests/test_errors.py
index d366f41a..b22fa19c 100644
--- a/integtests/test_errors.py
+++ b/integtests/test_errors.py
@@ -17,7 +17,7 @@
 from keboola_mcp_server.tools.doc import docs_query
 from keboola_mcp_server.tools.jobs import get_jobs
 from keboola_mcp_server.tools.sql import query_data
-from keboola_mcp_server.tools.storage import GetBucketsOutput, get_buckets
+from keboola_mcp_server.tools.storage.tools import GetBucketsOutput, get_buckets
 
 
 class TestHttpErrors:

From c1fc287f77e4fa7819614a34dd4959ac5c35a70f Mon Sep 17 00:00:00 2001
From: mariankrotil <mariankrotil34@gmail.com>
Date: Mon, 26 Jan 2026 12:57:05 +0100
Subject: [PATCH 13/29] AI-2161 chore: update version

---
 pyproject.toml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/pyproject.toml b/pyproject.toml
index 259efcc7..b2a74a52 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -4,7 +4,7 @@ build-backend = "hatchling.build"
 
 [project]
 name = "keboola-mcp-server"
-version = "1.40.0"
+version = "1.41.0"
 description = "MCP server for interacting with Keboola Connection"
 readme = "README.md"
 requires-python = ">=3.10"

From 05b868effa277058d3f63a118ecce2f8e933bd0c Mon Sep 17 00:00:00 2001
From: Vita Stejskal <Vita Stejskal>
Date: Fri, 6 Feb 2026 12:06:33 +0100
Subject: [PATCH 14/29] AI-2161 fix: missing return in validator, doc defaults,
 typos, and None filtering

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
---
 TOOLS.md                               |  4 ++--
 src/keboola_mcp_server/tools/search.py | 31 +++++++++++++-------------
 2 files changed, 18 insertions(+), 17 deletions(-)

diff --git a/TOOLS.md b/TOOLS.md
index f640ec0b..315200a1 100644
--- a/TOOLS.md
+++ b/TOOLS.md
@@ -2565,12 +2565,12 @@ WHEN TO USE:
 - Use this tool to trace lineage by searching for IDs referenced in configurations, or to find flows using a
 specific component, or find usage of a bucket/table in transformations, or to find items with specific parameters.
 - You need to discover items before performing operations on them
-- User assks to "what is the genesis of this item?" or "explain me bussiness logic of this item?"
+- User asks to "what is the genesis of this item?" or "explain me business logic of this item?"
 - User asks to "list all items with [name] or [configuration value/part] in it"
 - DO NOT use for listing all items of a specific type. Use get_configs, list_tables, get_flows, etc instead.
 
 HOW IT WORKS:
-- mode: "regex" (default) or "literal" (escape special characters)
+- mode: "literal" (default) or "regex" (regular expressions)
 - case_sensitive: false by default; set true for exact casing
 - search_type:
   - "textual": matches id/name/display_name/description fields
diff --git a/src/keboola_mcp_server/tools/search.py b/src/keboola_mcp_server/tools/search.py
index c4237572..8bd5144c 100644
--- a/src/keboola_mcp_server/tools/search.py
+++ b/src/keboola_mcp_server/tools/search.py
@@ -2,7 +2,7 @@
 import json
 import logging
 import re
-from typing import Annotated, Any, AsyncGenerator, Literal, Mapping, Sequence
+from typing import Annotated, Any, AsyncGenerator, Iterable, Literal, Mapping, Sequence
 
 from fastmcp import Context, FastMCP
 from fastmcp.tools import FunctionTool
@@ -195,7 +195,8 @@ def _validate_component_args(self) -> 'SearchSpec':
     @model_validator(mode='after')
     def _validate_item_types(self) -> 'SearchSpec':
         if 'component' in self.item_types:
-            self.item_types = list(set(self.item_types + ['configuration', 'configuration-row']))
+            self.item_types = list({*self.item_types, 'configuration', 'configuration-row'})
+        return self
 
     @staticmethod
     def _stringify(value: JsonDict) -> str:
@@ -246,7 +247,7 @@ def match_configuration_scopes(self, configuration: JsonDict | None) -> list[Pat
             return [PatternMatch(scope=None, patterns=matched)]
         return []
 
-    def match_texts(self, texts: Sequence[str]) -> list[PatternMatch]:
+    def match_texts(self, texts: Iterable[str]) -> list[PatternMatch]:
         """
         Matches a sequence of strings against the patterns.
 
@@ -278,7 +279,7 @@ def _check_column_match(table: JsonDict, cfg: SearchSpec) -> list[PatternMatch]:
 
     if col_metadata := table.get('columnMetadata', {}):
         col_descs = (get_metadata_property(col_meta, MetadataField.DESCRIPTION) for col_meta in col_metadata.values())
-        if matched := cfg.match_texts(col_descs):
+        if matched := cfg.match_texts(filter(None, col_descs)):
             return matched
     return []
 
@@ -324,9 +325,9 @@ async def _fetch_tables(client: KeboolaClient, spec: SearchSpec) -> list[SearchH
             table_display_name = table.get('displayName')
             table_description = get_metadata_property(table.get('metadata', []), MetadataField.DESCRIPTION)
 
-            if matches := spec.match_texts(
-                [table_id, table_name, table_display_name, table_description]
-            ) or _check_column_match(table, spec):
+            matches = spec.match_texts([table_id, table_name, table_display_name, table_description])
+            matches.extend(_check_column_match(table, spec))
+            if matches:
                 hits.append(
                     SearchHit(
                         table_id=table_id,
@@ -375,27 +376,27 @@ async def _fetch_configs(
 
         current_component_type = component.get('type')
         if component_id in [ORCHESTRATOR_COMPONENT_ID, CONDITIONAL_FLOW_COMPONENT_ID]:
-            item_type = 'flow'
+            item_type: SearchItemType = 'flow'
             if not allowed_flows:
                 continue
         elif current_component_type == 'transformation':
-            item_type = 'transformation'
+            item_type: SearchItemType = 'transformation'
             if not allowed_transformations:
                 continue
         elif component_id == 'keboola.sandboxes':
-            item_type = 'workspace'
+            item_type: SearchItemType = 'workspace'
             if not allowed_workspaces:
                 continue
         elif component_id == DATA_APP_COMPONENT_ID:
-            item_type = 'data-app'
+            item_type: SearchItemType = 'data-app'
             if not allowed_data_apps:
                 continue
         elif current_component_type in ['extractor', 'writer', 'application']:
-            item_type = 'configuration'
+            item_type: SearchItemType = 'configuration'
             if not allowed_components:
                 continue
         else:
-            item_type = 'configuration'
+            item_type: SearchItemType = 'configuration'
 
         for config in component.get('configurations', []):
             if not (config_id := config.get('id')):
@@ -531,12 +532,12 @@ async def search(
     - Use this tool to trace lineage by searching for IDs referenced in configurations, or to find flows using a
     specific component, or find usage of a bucket/table in transformations, or to find items with specific parameters.
     - You need to discover items before performing operations on them
-    - User assks to "what is the genesis of this item?" or "explain me bussiness logic of this item?"
+    - User asks to "what is the genesis of this item?" or "explain me business logic of this item?"
     - User asks to "list all items with [name] or [configuration value/part] in it"
     - DO NOT use for listing all items of a specific type. Use get_configs, list_tables, get_flows, etc instead.
 
     HOW IT WORKS:
-    - mode: "regex" (default) or "literal" (escape special characters)
+    - mode: "literal" (default) or "regex" (regular expressions)
     - case_sensitive: false by default; set true for exact casing
     - search_type:
       - "textual": matches id/name/display_name/description fields

From 6de7ea405ad22e53885680c87d1f9385b9423f49 Mon Sep 17 00:00:00 2001
From: Vita Stejskal <Vita Stejskal>
Date: Sun, 8 Feb 2026 20:40:12 +0100
Subject: [PATCH 15/29] AI-2161: fix search tool's docstring

---
 TOOLS.md                               | 31 +++++++++++++-------------
 src/keboola_mcp_server/tools/search.py | 31 +++++++++++++-------------
 2 files changed, 30 insertions(+), 32 deletions(-)

diff --git a/TOOLS.md b/TOOLS.md
index 7fb36445..71935805 100644
--- a/TOOLS.md
+++ b/TOOLS.md
@@ -2628,40 +2628,39 @@ expensive
 
 USAGE EXAMPLES:
 - user_input: "Find all tables with 'customer' in the name"
-  → patterns=["customer"], search_type="textual", mode="literal", item_types=["table"]
+  → `patterns=["customer"], search_type="textual", mode="literal", item_types=["table"]`
 
 - user_input: "Find tables with 'email' column"
-  → patterns=["email"], search_type="textual", mode="literal", item_types=["table"]
+  → `patterns=["email"], search_type="textual", mode="literal", item_types=["table"]`
 
 - user_input: "Search for the sales transformation"
-  → patterns=["sales"], search_type="textual", mode="literal", item_types=["transformation"]
-  → Returns transformations with "sales" in any searchable field
+  → `patterns=["sales"], search_type="textual", mode="literal", item_types=["transformation"]`
 
 - user_input: "Find items named 'daily report' or 'weekly summary'"
-  → patterns=["daily.*report", "weekly.*summary"], search_type="textual", mode="regex", item_types=[]
+  → `patterns=["daily.*report", "weekly.*summary"], search_type="textual", mode="regex", item_types=[]`
 
 - user_input: "Show me all configurations/components related to Google Analytics"
-  → patterns=["google.*analytics"], search_type="textual", mode="regex", item_types=["component"]
+  → `patterns=["google.*analytics"], search_type="textual", mode="regex", item_types=["component"]`
 
 - user_input: "Find storage input mappings referencing specific tables:"
-  → patterns=[""storage"\.*"input"\.*:\s*"in\.*\.customers""], search_type="config-based", mode="regex",
-  item_types=["transformation", "component"]
+  → `patterns=["\"storage\".*\"input\".*:\s*\"in\..*\.customers\""], search_type="config-based",
+  mode="regex", item_types=["transformation", "component"]`
 
 - user input: "Find components or transformations using 'my_bucket' in output mappings"
-  → patterns=["my_bucket"], item_types=["component", "transformation"], search_type="config-based",
-    scopes=["storage.output"], mode="literal"
+  → `patterns=["my_bucket"], item_types=["component", "transformation"], search_type="config-based",
+    scopes=["storage.output"], mode="literal"`
 
 - user input: "Find configs with specific authentication type"
-  → patterns=[""authentication":\s*\{.*"type":\s*"oauth20""], search_type="config-based", mode="regex",
-  item_types=["component"]
+  → `patterns=["\"authentication\":\s*\{.*\"type\":\s*\"oauth20\""], search_type="config-based",
+  mode="regex", item_types=["component"]`
 
 - user input: "Find flows using this configuration ID: 01k9cz233cvd1rga3zzx40g8qj"
-  → patterns=["01k9cz233cvd1rga3zzx40g8qj"], search_type="config-based", item_types=["flow"], mode="literal",
-  scopes=["tasks"]
+  → `patterns=["01k9cz233cvd1rga3zzx40g8qj"], search_type="config-based", item_types=["flow"], mode="literal",
+  scopes=["tasks"]`
 
 - user input: "Find data apps using specific code part ..."
-  → patterns=["regex-representing-the-code-part"], search_type="config-based", item_types=["data-app"],
-  mode="regex"], scopes=["script"]
+  → `patterns=["regex-representing-the-code-part"], search_type="config-based", item_types=["data-app"],
+  mode="regex"], scopes=["script"]`
 
 
 **Input JSON Schema**:
diff --git a/src/keboola_mcp_server/tools/search.py b/src/keboola_mcp_server/tools/search.py
index e33738e2..42ae7714 100644
--- a/src/keboola_mcp_server/tools/search.py
+++ b/src/keboola_mcp_server/tools/search.py
@@ -557,40 +557,39 @@ async def search(
 
     USAGE EXAMPLES:
     - user_input: "Find all tables with 'customer' in the name"
-      → patterns=["customer"], search_type="textual", mode="literal", item_types=["table"]
+      → `patterns=["customer"], search_type="textual", mode="literal", item_types=["table"]`
 
     - user_input: "Find tables with 'email' column"
-      → patterns=["email"], search_type="textual", mode="literal", item_types=["table"]
+      → `patterns=["email"], search_type="textual", mode="literal", item_types=["table"]`
 
     - user_input: "Search for the sales transformation"
-      → patterns=["sales"], search_type="textual", mode="literal", item_types=["transformation"]
-      → Returns transformations with "sales" in any searchable field
+      → `patterns=["sales"], search_type="textual", mode="literal", item_types=["transformation"]`
 
     - user_input: "Find items named 'daily report' or 'weekly summary'"
-      → patterns=["daily.*report", "weekly.*summary"], search_type="textual", mode="regex", item_types=[]
+      → `patterns=["daily.*report", "weekly.*summary"], search_type="textual", mode="regex", item_types=[]`
 
     - user_input: "Show me all configurations/components related to Google Analytics"
-      → patterns=["google.*analytics"], search_type="textual", mode="regex", item_types=["component"]
+      → `patterns=["google.*analytics"], search_type="textual", mode="regex", item_types=["component"]`
 
     - user_input: "Find storage input mappings referencing specific tables:"
-      → patterns=["\"storage\"\\.*\"input\"\\.*:\\s*\"in\\.*\\.customers\""], search_type="config-based", mode="regex",
-      item_types=["transformation", "component"]
+      → `patterns=["\\"storage\\".*\\"input\\".*:\\s*\\"in\\..*\\.customers\\""], search_type="config-based",
+      mode="regex", item_types=["transformation", "component"]`
 
     - user input: "Find components or transformations using 'my_bucket' in output mappings"
-      → patterns=["my_bucket"], item_types=["component", "transformation"], search_type="config-based",
-        scopes=["storage.output"], mode="literal"
+      → `patterns=["my_bucket"], item_types=["component", "transformation"], search_type="config-based",
+        scopes=["storage.output"], mode="literal"`
 
     - user input: "Find configs with specific authentication type"
-      → patterns=["\"authentication\":\\s*\\{.*\"type\":\\s*\"oauth20\""], search_type="config-based", mode="regex",
-      item_types=["component"]
+      → `patterns=["\\"authentication\\":\\s*\\{.*\\"type\\":\\s*\\"oauth20\\""], search_type="config-based",
+      mode="regex", item_types=["component"]`
 
     - user input: "Find flows using this configuration ID: 01k9cz233cvd1rga3zzx40g8qj"
-      → patterns=["01k9cz233cvd1rga3zzx40g8qj"], search_type="config-based", item_types=["flow"], mode="literal",
-      scopes=["tasks"]
+      → `patterns=["01k9cz233cvd1rga3zzx40g8qj"], search_type="config-based", item_types=["flow"], mode="literal",
+      scopes=["tasks"]`
 
     - user input: "Find data apps using specific code part ..."
-      → patterns=["regex-representing-the-code-part"], search_type="config-based", item_types=["data-app"],
-      mode="regex"], scopes=["script"]
+      → `patterns=["regex-representing-the-code-part"], search_type="config-based", item_types=["data-app"],
+      mode="regex"], scopes=["script"]`
     """
 
     spec = SearchSpec(

From 0e6686e9614359390a703f876f890c52de6d8b0c Mon Sep 17 00:00:00 2001
From: mariankrotil <mariankrotil34@gmail.com>
Date: Wed, 18 Feb 2026 06:31:24 +0100
Subject: [PATCH 16/29] AI-2161 feat: simplify search API and improve config
 scope matching

---
 src/keboola_mcp_server/tools/search.py | 269 ++++++++++++++++++-------
 1 file changed, 194 insertions(+), 75 deletions(-)

diff --git a/src/keboola_mcp_server/tools/search.py b/src/keboola_mcp_server/tools/search.py
index 42ae7714..90a430f1 100644
--- a/src/keboola_mcp_server/tools/search.py
+++ b/src/keboola_mcp_server/tools/search.py
@@ -4,8 +4,10 @@
 import re
 from typing import Annotated, Any, AsyncGenerator, Iterable, Literal, Mapping, Sequence
 
+import jsonpath_ng
 from fastmcp import Context, FastMCP
 from fastmcp.tools import FunctionTool
+from jsonpath_ng.jsonpath import JSONPath
 from mcp.types import ToolAnnotations
 from pydantic import BaseModel, Field, PrivateAttr, model_validator
 
@@ -114,6 +116,11 @@ class SearchHit(BaseModel):
     name: str | None = Field(default=None, description='Name of the item.')
     display_name: str | None = Field(default=None, description='Display name of the item.')
     description: str | None = Field(default=None, description='Description of the item.')
+    match_scopes: list[str] = Field(
+        default_factory=list,
+        description='Most specific JSONPath scopes within the configuration where a pattern was matched '
+        '(config-based search only).',
+    )
     links: list[Link] = Field(default_factory=list, description='Links to the item.')
     _matches: list[PatternMatch] = PrivateAttr(default_factory=list)
 
@@ -148,6 +155,15 @@ def check_id_fields(self) -> 'SearchHit':
     def with_matches(self, matches: list['PatternMatch']) -> 'SearchHit':
         """Assign pattern matches to this search hit and return self for chaining."""
         self._matches = matches
+        unique_scopes = list(dict.fromkeys(match.scope for match in matches if match.scope))
+        self.match_scopes = [
+            scope
+            for scope in unique_scopes
+            if not any(
+                other != scope and other.startswith(scope) and other[len(scope) : len(scope) + 1] in {'.', '['}
+                for other in unique_scopes
+            )
+        ]
         return self
 
 
@@ -163,6 +179,8 @@ class SearchSpec(BaseModel):
     _component_types: Sequence[str] = PrivateAttr(default_factory=tuple)
     _compiled_patterns: list[re.Pattern] = PrivateAttr(default_factory=list)
     _clean_patterns: list[str] = PrivateAttr(default_factory=list)
+    _all_nodes_expr: JSONPath | None = PrivateAttr(default=None)
+    _scope_exprs: list[tuple[str, JSONPath, JSONPath]] = PrivateAttr(default_factory=list)
 
     @model_validator(mode='after')
     def _compile_patterns(self) -> 'SearchSpec':
@@ -198,8 +216,21 @@ def _validate_item_types(self) -> 'SearchSpec':
             self.item_types = list({*self.item_types, 'configuration', 'configuration-row'})
         return self
 
+    @model_validator(mode='after')
+    def _compile_jsonpath_exprs(self) -> 'SearchSpec':
+        # Compile commonly used expressions once per SearchSpec instance.
+        self._all_nodes_expr = jsonpath_ng.parse('$..*')
+        self._scope_exprs = []
+        for scope in self.search_scopes:
+            normalized = scope if scope.startswith('$') else f'$.{scope}'
+            try:
+                self._scope_exprs.append((scope, jsonpath_ng.parse(normalized), jsonpath_ng.parse(f'{normalized}..*')))
+            except Exception as e:
+                LOG.warning(f'Invalid JSONPath scope "{scope}": {e}')
+        return self
+
     @staticmethod
-    def _stringify(value: JsonDict) -> str:
+    def _stringify(value: Any) -> str:
         try:
             return json.dumps(value, sort_keys=True, default=str, ensure_ascii=False)
         except (TypeError, ValueError):
@@ -227,25 +258,76 @@ def match_patterns(self, value: str | JsonDict | None) -> list[str]:
 
         return matches
 
+    def _find_matches_for_expr(self, configuration: JsonDict, parsed_expr: JSONPath) -> list[PatternMatch]:
+        """Find pattern matches on JSON nodes matched by a JSONPath expression."""
+        matches: list[PatternMatch] = []
+        for jpath_match in parsed_expr.find(configuration):
+            value = jpath_match.value
+            if matched := self.match_patterns(value):
+                matches.append(
+                    PatternMatch(
+                        scope=re.sub(r'\.\[', '[', str(jpath_match.full_path)),
+                        patterns=matched,
+                    )
+                )
+                if not self.return_all_matched_patterns:
+                    return matches
+        return matches
+
+    def _find_scalar_matches_for_expr(self, configuration: JsonDict, parsed_expr: JSONPath) -> list[PatternMatch]:
+        """Find pattern matches only on scalar nodes matched by a JSONPath expression."""
+        matches: list[PatternMatch] = []
+        for jpath_match in parsed_expr.find(configuration):
+            value = jpath_match.value
+            if value is None or isinstance(value, (dict, list)):
+                continue
+            if matched := self.match_patterns(value):
+                matches.append(
+                    PatternMatch(
+                        scope=re.sub(r'\.\[', '[', str(jpath_match.full_path)),
+                        patterns=matched,
+                    )
+                )
+                if not self.return_all_matched_patterns:
+                    return matches
+        return matches
+
     def match_configuration_scopes(self, configuration: JsonDict | None) -> list[PatternMatch]:
         """
-        Checks configuration fields within specified scopes for pattern matches.
+        Checks configuration fields within specified JSONPath scopes for pattern matches.
+        Walks matching nodes within each scope and returns the exact path where the match
+        was found. When no scopes are specified, walks the entire configuration.
 
         :param configuration: The configuration to match against the patterns.
-        :return: A tuple of scopes and patterns that matched the configuration; empty patterns if no matches.
+        :return: List of PatternMatch with matching JSONPath scopes; empty list if no matches.
         """
+        if configuration is None:
+            return []
+
         if self.search_scopes:
-            matches: list[PatternMatch] = []
-            for scope in self.search_scopes:
-                if matched := self.match_patterns(get_nested(configuration, scope, default=None)):
-                    matches.append(PatternMatch(scope=scope, patterns=matched))
+            all_matches: list[PatternMatch] = []
+            # Deduplicate hits when scopes overlap (e.g. "parameters" + "parameters.query")
+            # or the same logical scope is provided multiple times.
+            seen: set[str | None] = set()
+            for _scope, self_expr, desc_expr in self._scope_exprs:
+                # Include self scope only for scalar values. For objects/lists, include descendants only.
+                self_matches = self._find_scalar_matches_for_expr(configuration, self_expr)
+                desc_matches = self._find_matches_for_expr(configuration, desc_expr)
+
+                scope_matches = desc_matches if desc_matches else self_matches
+                for match in scope_matches:
+                    if match.scope in seen:
+                        continue
+                    seen.add(match.scope)
+                    all_matches.append(match)
                     if not self.return_all_matched_patterns:
-                        break
-            return matches
+                        return all_matches
+            return all_matches
 
-        if matched := self.match_patterns(configuration):
-            return [PatternMatch(scope=None, patterns=matched)]
-        return []
+        # No scope provided – search all descendants and return exact match paths.
+        if self._all_nodes_expr is None:
+            self._all_nodes_expr = jsonpath_ng.parse('$..*')
+        return self._find_matches_for_expr(configuration, self._all_nodes_expr)
 
     def match_texts(self, texts: Iterable[str]) -> list[PatternMatch]:
         """
@@ -465,9 +547,10 @@ async def search(
     patterns: Annotated[
         list[str],
         Field(
-            description='One or more search patterns to match against item ID, name, display name, or description. '
-            'Supports regex patterns. Case-insensitive by default. Examples: ["customer"], ["sales", "revenue"], '
-            '["test.*table"], ["key1.*:.*key2.*:.*value.*"]. Do not use empty strings or empty lists.'
+            description='One or more search patterns to match against item ID, name, display name, description, '
+            'or configuration JSON objects. Case-insensitive by default. '
+            'Examples: ["customer"], ["sales", "revenue"], ["my_bucket"]. '
+            'Do not use empty strings or empty lists.'
         ),
     ],
     item_types: Annotated[
@@ -484,15 +567,16 @@ async def search(
         SearchType,
         Field(
             description='Search mode: "textual" (name/id/description) or "config-based" (stringified configuration '
-            'payloads).'
+            'payloads). (default: "textual")'
         ),
     ] = 'textual',
     scopes: Annotated[
         Sequence[str],
         Field(
-            description='Dot-separated keys to search in configuration payloads, used with "config-based" search. '
-            'Example: "parameters.field", "storage.input", "storage.output", "processors.before", "processors.after", '
-            '"authorization", "tasks", "phases". Leave empty to search the whole configuration.'
+            description='JSONPath expressions to narrow config-based search to specific parts of the configuration. '
+            'Simple dot-notation (e.g. "parameters", "storage.input") and full JSONPath (e.g. "$.tasks[*]") are both '
+            'supported (e.g. "parameters.host", "storage.input[0].source"). '
+            'Leave empty to search the whole configuration.'
         ),
     ] = tuple(),
     mode: Annotated[
@@ -502,10 +586,6 @@ async def search(
             '(default: "literal").'
         ),
     ] = 'literal',
-    case_sensitive: Annotated[
-        bool,
-        Field(description='If true, match patterns with case sensitivity (default: false).'),
-    ] = False,
     limit: Annotated[
         int,
         Field(
@@ -516,89 +596,128 @@ async def search(
     offset: Annotated[int, Field(description='Number of matching items to skip for pagination (default: 0).')] = 0,
 ) -> list[SearchHit]:
     """
-    Searches for Keboola items (tables, buckets, configurations, transformations, flows, data-apps etc.) in the current
-    project. Returns matching items with IDs and metadata.
-    Supports two modes:
-    - textual: match patterns against ID, name, display name, description (and table columns)
-    - config-based: match patterns against stringified configuration payloads, optionally limited to specific scopes
-    which can be derived from the configuration schemas or objects.
+    Searches for Keboola items (tables, buckets, components, configurations, transformations, flows, data-apps, etc.)
+    in the current project and returns matching ID + metadata.
+
+    This tool supports two complementary search types:
+
+    1) textual
+    - Searches item metadata fields by matching patterns against id, name, displayName, and description.
+    - For tables, also searches column names and column descriptions.
 
+    2) config-based
+    - Searches item configurations (JSON objects) by matching patterns against the configuration values ​​converted
+    to a string, optionally narrowed by JSON path `scopes`.
+    - Returns also `match_scopes` with JSON paths in configuration where a pattern was found.
+
+    THIS IS THE PRIMARY DISCOVERY TOOL. Always use it BEFORE any get_* tool when you need to find items
+    by name or specific configuration content. Do NOT enumerate items with get_buckets, get_tables, get_configs,
+    get_flows, or get_data_apps just to locate a specific item — use this tool instead.
 
     WHEN TO USE:
-    - User asks to "find", "locate", or "search for" something by name or text
+    - User asks to "find", "locate", or "search for" something by name, keyword, text pattern, configuration content or
+    value
     - User mentions a partial name and you need to find the full item (e.g., "find the customer table")
     - User asks "what tables/configs/flows do I have with X in the name?"
-    - User asks to find configs containing a value in parameters (use config-based + scopes and regex patterns)
-    - Use this tool to trace lineage by searching for IDs referenced in configurations, or to find flows using a
-    specific component, or find usage of a bucket/table in transformations, or to find items with specific parameters.
     - You need to discover items before performing operations on them
-    - User asks to "what is the genesis of this item?" or "explain me business logic of this item?"
     - User asks to "list all items with [name] or [configuration value/part] in it"
-    - DO NOT use for listing all items of a specific type. Use get_configs, list_tables, get_flows, etc instead.
+    - User asks where a value, table, component, specific configuration ID, or specific settings is used in components,
+    data-apps, flows, or transformations
+    - You need to trace lineage by searching for IDs referenced in configurations, or to find flows using a
+      specific component, or find usage of a bucket/table in transformations or components, or to find items with
+      specific parameters.
+    - User asks to "what is the genesis of this item?" or "explain me business logic of this item?"
 
     HOW IT WORKS:
-    - mode: "literal" (default) or "regex" (regular expressions)
-    - case_sensitive: false by default; set true for exact casing
-    - search_type:
-      - "textual": matches id/name/display_name/description fields
-      - "config-based": matches stringified configuration payloads (JSON) via scopes or the whole config using
-      regex patterns.
-    - scopes: dot-separated paths (e.g., "parameters", "storage.input", "parameters.script")
-    - For tables, textual search also checks column names and column descriptions
-    - Multiple patterns are ORed: any match includes the item
-    - Results are ordered by update time, newest first, and can be paginated via limit/offset
+    - Supports two types:
+      - search_type="textual": matches against id, name, displayName, and description, for tables also column names
+      and column descriptions
+      - search_type="config-based": matches inside configuration JSON objects, optionally narrowed by JSON path `scopes`
+    - case-insensitive search
+    - mode for pattern search: `literal` (default) or `regex`
+    - Multiple patterns work as OR condition - matches items containing ANY of the patterns
+    - Each result includes the item's ID, name, creation date, and relevant metadata
+    - scopes (config-based) narrow matching to specific JSONPath areas within configurations; matching is performed
+    against the stringified JSON node content in those areas.
+    - config-based always returns all matched paths per item in `match_scopes`
 
     IMPORTANT:
     - Always use this tool when the user mentions a name but you don't have the exact ID
-    - The search returns IDs that you can use with other tools (e.g., get_table, get_configs, get_flows)
-    - Use item_types to make the search more efficient when you know the type; scanning buckets and tables can be
-    expensive
-    - For exact ID lookups, use specific tools like get_table, get_configs, get_flows instead
+    - The search returns IDs that you can use with other tools (e.g., get_tables, get_configs, get_flows)
+    - Results are ordered by update time. The most recently updated items are returned first.
+    - Fill `item_types` to make the search more efficient when you know the item type; scanning buckets and tables can
+    be expensive
+    - For exact ID lookups, use specific tools like get_tables, get_configs, get_flows instead
+    - Use specific `scopes` only when you know the config structure (schema or real example); otherwise run config-based
+    search without scopes.
+    - Use find_component_id and get_configs tools to find configurations related to a specific component
+    - If results are too numerous or empty, ask the user to refine their query rather than enumerating all items.
 
     USAGE EXAMPLES:
-    - user_input: "Find all tables with 'customer' in the name"
-      → `patterns=["customer"], search_type="textual", mode="literal", item_types=["table"]`
+    1) textual search examples:
+      - user_input: "Find all tables with 'customer' in the name"
+        → patterns=["customer"], item_types=["table"]
+        → Returns all tables whose id, name, displayName, or description contains "customer"
+
+      - user_input: "Find tables with 'email' column"
+        → patterns=["email"], item_types=["table"]
+        → Returns all tables that have a column named "email" or with "email" in column description
+
+      - user_input: "Search for the sales transformation"
+        → patterns=["sales"], item_types=["transformation"]
+        → Returns transformations with "sales" in any searchable field
+
+      - user_input: "Find items named 'daily report' or 'weekly summary'"
+        → patterns=["daily.*report", "weekly.*summary"], item_types=[], mode="regex"
+        → Returns all items matching any of these patterns
+
+      - user_input: "Show me all configurations related to Google Analytics"
+        → patterns=["google.*analytics"], item_types=["configuration"], mode="regex"
+        → Returns configurations with matching patterns
 
-    - user_input: "Find tables with 'email' column"
-      → `patterns=["email"], search_type="textual", mode="literal", item_types=["table"]`
+    2) config-based search examples:
+      - user_input: "Find transformations/configs/components referencing table in.c-prod.customers"
+        -> patterns=["in.c-prod.customers"], item_types=["transformation", "configuration"], search_type="config-based"
+        -> No scopes = search whole stringified config; result includes `match_scopes` with exact paths
 
-    - user_input: "Search for the sales transformation"
-      → `patterns=["sales"], search_type="textual", mode="literal", item_types=["transformation"]`
+      - user_input: "Find configurations (etc.) using specific setting / id anywhere"
+        -> patterns=["setting", "id"], item_types=["configuration"], search_type="config-based",
 
-    - user_input: "Find items named 'daily report' or 'weekly summary'"
-      → `patterns=["daily.*report", "weekly.*summary"], search_type="textual", mode="regex", item_types=[]`
+      - user_input: "Find configurations (etc.) using specific setting /id in parameters"
+      -> patterns=["setting", "id"], item_types=["configuration"], search_type="config-based", scopes=["parameters"]
 
-    - user_input: "Show me all configurations/components related to Google Analytics"
-      → `patterns=["google.*analytics"], search_type="textual", mode="regex", item_types=["component"]`
+      - user_input: "Find configurations (etc.) using specific setting / id in storage"
+      -> patterns=["setting", "id"], item_types=["configuration"], search_type="config-based", scopes=["storage"]
 
-    - user_input: "Find storage input mappings referencing specific tables:"
-      → `patterns=["\\"storage\\".*\\"input\\".*:\\s*\\"in\\..*\\.customers\\""], search_type="config-based",
-      mode="regex", item_types=["transformation", "component"]`
+      - user_input: "Find configurations (etc.) using specific setting / id in authorization"
+        -> patterns=["setting", "id"], item_types=["configuration"], search_type="config-based",
+          scopes=["parameters.authorization", "authorization"]
 
-    - user input: "Find components or transformations using 'my_bucket' in output mappings"
-      → `patterns=["my_bucket"], item_types=["component", "transformation"], search_type="config-based",
-        scopes=["storage.output"], mode="literal"`
+      - user_input: "Find components/transformations using my_bucket in input or output mappings"
+        -> patterns=["my_bucket"], item_types=["configuration", "transformation"], search_type="config-based",
+          scopes=["storage.input", "storage.output"]
+        -> Returns matches with paths like `storage.input[0].source` or `storage.output[0].target`
 
-    - user input: "Find configs with specific authentication type"
-      → `patterns=["\\"authentication\\":\\s*\\{.*\\"type\\":\\s*\\"oauth20\\""], search_type="config-based",
-      mode="regex", item_types=["component"]`
+      - user_input: "Find flows using configuration ID 01k9cz233cvd1rga3zzx40g8qj"
+        -> patterns=["01k9cz233cvd1rga3zzx40g8qj"], item_types=["flow"], search_type="config-based",
+          scopes=["tasks", "phases"]
 
-    - user input: "Find flows using this configuration ID: 01k9cz233cvd1rga3zzx40g8qj"
-      → `patterns=["01k9cz233cvd1rga3zzx40g8qj"], search_type="config-based", item_types=["flow"], mode="literal",
-      scopes=["tasks"]`
+      - user_input: "Find transformations using this table / column / specific code in its script"
+        -> patterns=["element"], item_types=["transformation"], search_type="config-based",
+          scopes=["parameters"]
 
-    - user input: "Find data apps using specific code part ..."
-      → `patterns=["regex-representing-the-code-part"], search_type="config-based", item_types=["data-app"],
-      mode="regex"], scopes=["script"]`
+      - user_input: "Find data apps using something in its config / python code / setting"
+        -> patterns=["something"], item_types=["data-app"], search_type="config-based"
+        -> Returns data apps where script/config sections contain the keyword and includes `match_scopes`
     """
 
     spec = SearchSpec(
         patterns=patterns,
         item_types=item_types,
         pattern_mode=mode,
-        case_sensitive=case_sensitive,
         search_type=search_type,
         search_scopes=scopes,
+        return_all_matched_patterns=(search_type == 'config-based'),
     )
 
     offset = max(0, offset)

From ac4e665e53047c2c83a0359361e1c355bc7d657a Mon Sep 17 00:00:00 2001
From: mariankrotil <mariankrotil34@gmail.com>
Date: Wed, 18 Feb 2026 06:31:26 +0100
Subject: [PATCH 17/29] AI-2161 test: add config-based search scope coverage

---
 tests/tools/test_search.py | 175 +++++++++++++++++++++++++++++++++++--
 1 file changed, 169 insertions(+), 6 deletions(-)

diff --git a/tests/tools/test_search.py b/tests/tools/test_search.py
index 1e302e76..7688b1e6 100644
--- a/tests/tools/test_search.py
+++ b/tests/tools/test_search.py
@@ -694,6 +694,135 @@ async def test_search_table_by_columns(
         if expected_count > 0:
             assert result[0].table_id == expected_first_table_id
 
+    @pytest.mark.asyncio
+    @pytest.mark.parametrize(
+        (
+            'patterns',
+            'scopes',
+            'component_configurations',
+            'expected_hits',
+        ),
+        [
+            (
+                ['alpha', 'beta'],
+                ('parameters', 'storage.input'),
+                [
+                    {
+                        'id': 'test-config',
+                        'name': 'Test Config',
+                        'created': '2024-01-02T00:00:00Z',
+                        'configuration': {
+                            'parameters': {'query': 'alpha'},
+                            'storage': {'input': [{'source': 'beta'}]},
+                        },
+                        'rows': [],
+                    }
+                ],
+                [('test-config', ['parameters.query', 'storage.input[0].source'])],
+            ),
+            (
+                ['gamma'],
+                tuple(),
+                [
+                    {
+                        'id': 'test-config',
+                        'name': 'Test Config',
+                        'created': '2024-01-02T00:00:00Z',
+                        'configuration': {
+                            'parameters': {'query': 'alpha'},
+                            'storage': {
+                                'input': [{'source': 'beta'}, {'source': 'gamma'}],
+                                'output': [{'destination': 'gamma'}],
+                            },
+                        },
+                        'rows': [],
+                    }
+                ],
+                [('test-config', ['storage.input[1].source', 'storage.output[0].destination'])],
+            ),
+            (
+                ['alpha', 'gamma'],
+                tuple(),
+                [
+                    {
+                        'id': 'test-config-a',
+                        'name': 'Test Config A',
+                        'created': '2024-01-02T00:00:00Z',
+                        'configuration': {
+                            'parameters': {'query': 'alpha'},
+                            'storage': {'input': [{'source': 'beta'}]},
+                        },
+                        'rows': [],
+                    },
+                    {
+                        'id': 'test-config-b',
+                        'name': 'Test Config B',
+                        'created': '2024-01-03T00:00:00Z',
+                        'configuration': {
+                            'storage': {'output': [{'destination': 'gamma'}]},
+                        },
+                        'rows': [],
+                    },
+                    {
+                        'id': 'test-config-c',
+                        'name': 'Test Config C',
+                        'created': '2024-01-01T00:00:00Z',
+                        'configuration': {
+                            'parameters': {'query': 'nomatch'},
+                        },
+                        'rows': [],
+                    },
+                ],
+                [
+                    ('test-config-b', ['storage.output[0].destination']),
+                    ('test-config-a', ['parameters.query']),
+                ],
+            ),
+        ],
+        ids=[
+            'all_matches_in_scopes',
+            'most_specific_scope_only',
+            'multiple_configurations_returned',
+        ],
+    )
+    async def test_search_config_based_match_scopes(
+        self,
+        mocker: MockerFixture,
+        mcp_context_client: Context,
+        patterns: list[str],
+        scopes: tuple[str, ...],
+        component_configurations: list[dict[str, Any]],
+        expected_hits: list[tuple[str, list[str]]],
+    ):
+        keboola_client = KeboolaClient.from_state(mcp_context_client.session.state)
+
+        keboola_client.storage_client.bucket_list = mocker.AsyncMock(return_value=[])
+        keboola_client.storage_client.bucket_table_list = mocker.AsyncMock(return_value=[])
+        keboola_client.storage_client.component_list = mocker.AsyncMock(
+            side_effect=lambda component_type, include=None: (
+                [
+                    {
+                        'id': 'keboola.ex-db-mysql',
+                        'type': 'extractor',
+                        'configurations': component_configurations,
+                    }
+                ]
+                if component_type == 'extractor'
+                else []
+            )
+        )
+        keboola_client.storage_client.workspace_list = mocker.AsyncMock(return_value=[])
+
+        result = await search(
+            ctx=mcp_context_client,
+            patterns=patterns,
+            item_types=(cast(SearchItemType, 'configuration'),),
+            search_type='config-based',
+            scopes=scopes,
+        )
+
+        assert [(hit.configuration_id, hit.match_scopes) for hit in result] == expected_hits
+
 
 @pytest.mark.parametrize(
     ('spec_kwargs', 'texts', 'expected'),
@@ -774,6 +903,7 @@ def test_match_texts(spec_kwargs: dict[str, Any], texts: list[str], expected: li
     ('spec_kwargs', 'configuration', 'expected'),
     [
         (
+            # Scopes provided; each scope has one matching leaf – returns the exact leaf path.
             {
                 'patterns': ['alpha', 'beta'],
                 'item_types': ('configuration',),
@@ -785,11 +915,12 @@ def test_match_texts(spec_kwargs: dict[str, Any], texts: list[str], expected: li
                 'storage': {'input': [{'source': 'beta'}], 'output': [{'destination': 'gamma'}]},
             },
             [
-                {'scope': 'parameters', 'patterns': ['alpha']},
-                {'scope': 'storage.input', 'patterns': ['beta']},
+                {'scope': 'parameters.query', 'patterns': ['alpha']},
+                {'scope': 'storage.input[0].source', 'patterns': ['beta']},
             ],
         ),
         (
+            # Both patterns match across two leaves inside the same scope; each leaf gets its own entry.
             {
                 'patterns': ['alpha', 'beta'],
                 'item_types': ('configuration',),
@@ -801,11 +932,13 @@ def test_match_texts(spec_kwargs: dict[str, Any], texts: list[str], expected: li
                 'storage': {'input': [{'source': 'beta'}, {'source': 'alpha'}], 'output': [{'destination': 'gamma'}]},
             },
             [
-                {'scope': 'parameters', 'patterns': ['alpha']},
-                {'scope': 'storage.input', 'patterns': ['alpha', 'beta']},
+                {'scope': 'parameters.query', 'patterns': ['alpha']},
+                {'scope': 'storage.input[0].source', 'patterns': ['beta']},
+                {'scope': 'storage.input[1].source', 'patterns': ['alpha']},
             ],
         ),
         (
+            # Pattern not present in any of the specified scopes → empty result.
             {
                 'patterns': ['gamma'],
                 'item_types': ('configuration',),
@@ -819,6 +952,7 @@ def test_match_texts(spec_kwargs: dict[str, Any], texts: list[str], expected: li
             [],
         ),
         (
+            # No scopes → walk the whole config; can match parent nodes containing the searched fragment.
             {
                 'patterns': ['gamma'],
                 'item_types': ('configuration',),
@@ -828,9 +962,14 @@ def test_match_texts(spec_kwargs: dict[str, Any], texts: list[str], expected: li
                 'parameters': {'query': 'alpha'},
                 'storage': {'input': [{'source': 'beta'}], 'output': [{'destination': 'gamma'}]},
             },
-            [{'scope': None, 'patterns': ['gamma']}],
+            [
+                {'scope': 'storage', 'patterns': ['gamma']},
+                {'scope': 'storage.output', 'patterns': ['gamma']},
+                {'scope': 'storage.output[0].destination', 'patterns': ['gamma']},
+            ],
         ),
         (
+            # return_all_matched_patterns=False → stop after first matching leaf.
             {
                 'patterns': ['alpha', 'beta'],
                 'item_types': ('configuration',),
@@ -841,7 +980,29 @@ def test_match_texts(spec_kwargs: dict[str, Any], texts: list[str], expected: li
                 'parameters': {'query': 'alpha'},
                 'storage': {'input': [{'source': 'beta'}], 'output': [{'destination': 'gamma'}]},
             },
-            [{'scope': 'parameters', 'patterns': ['alpha']}],
+            [{'scope': 'parameters.query', 'patterns': ['alpha']}],
+        ),
+        (
+            # Overlapping scopes should not return duplicate leaf hits.
+            {
+                'patterns': ['alpha'],
+                'item_types': ('configuration',),
+                'search_scopes': ('parameters', 'parameters.query'),
+                'return_all_matched_patterns': True,
+            },
+            {'parameters': {'query': 'alpha'}},
+            [{'scope': 'parameters.query', 'patterns': ['alpha']}],
+        ),
+        (
+            # Scope pointing directly to scalar should still match (self-scope fallback).
+            {
+                'patterns': ['wttr.in'],
+                'item_types': ('configuration',),
+                'search_scopes': ('parameters.api.baseUrl',),
+                'return_all_matched_patterns': True,
+            },
+            {'parameters': {'api': {'baseUrl': 'https://wttr.in'}}},
+            [{'scope': 'parameters.api.baseUrl', 'patterns': ['wttr.in']}],
         ),
     ],
     ids=[
@@ -850,6 +1011,8 @@ def test_match_texts(spec_kwargs: dict[str, Any], texts: list[str], expected: li
         'no_patterns_in_scope',
         'all_patterns_no_scope',
         'any_patterns_return_first_match',
+        'overlapping_scopes_deduplicated',
+        'scalar_scope_matches_self',
     ],
 )
 def test_match_configuration_scopes(spec_kwargs: dict[str, Any], configuration: dict[str, Any], expected: list[dict]):

From b390a73c55eda7ab16bef3e133dde434516fba2b Mon Sep 17 00:00:00 2001
From: mariankrotil <mariankrotil34@gmail.com>
Date: Wed, 18 Feb 2026 06:31:31 +0100
Subject: [PATCH 18/29] AI-2161 docs: refresh search tool documentation

---
 TOOLS.md | 154 +++++++++++++++++++++++++++++++++----------------------
 1 file changed, 94 insertions(+), 60 deletions(-)

diff --git a/TOOLS.md b/TOOLS.md
index 71935805..9f3e2b22 100644
--- a/TOOLS.md
+++ b/TOOLS.md
@@ -53,7 +53,7 @@ including essential context and base instructions for working with it
 
 ### Search Tools
 - [find_component_id](#find_component_id): Returns list of component IDs that match the given query.
-- [search](#search): Searches for Keboola items (tables, buckets, configurations, transformations, flows, data-apps etc.
+- [search](#search): Searches for Keboola items (tables, buckets, components, configurations, transformations, flows, data-apps, etc.
 
 ### Storage Tools
 - [get_buckets](#get_buckets): Lists buckets or retrieves full details of specific buckets, including descriptions,
@@ -2587,80 +2587,119 @@ USAGE EXAMPLES:
 
 **Description**:
 
-Searches for Keboola items (tables, buckets, configurations, transformations, flows, data-apps etc.) in the current
-project. Returns matching items with IDs and metadata.
-Supports two modes:
-- textual: match patterns against ID, name, display name, description (and table columns)
-- config-based: match patterns against stringified configuration payloads, optionally limited to specific scopes
-which can be derived from the configuration schemas or objects.
+Searches for Keboola items (tables, buckets, components, configurations, transformations, flows, data-apps, etc.)
+in the current project and returns matching ID + metadata.
 
+This tool supports two complementary search types:
+
+1) textual
+- Searches item metadata fields by matching patterns against id, name, displayName, and description.
+- For tables, also searches column names and column descriptions.
+
+2) config-based
+- Searches item configurations (JSON objects) by matching patterns against the configuration values ​​converted
+to a string, optionally narrowed by JSON path `scopes`.
+- Returns also `match_scopes` with JSON paths in configuration where a pattern was found.
+
+THIS IS THE PRIMARY DISCOVERY TOOL. Always use it BEFORE any get_* tool when you need to find items
+by name or specific configuration content. Do NOT enumerate items with get_buckets, get_tables, get_configs,
+get_flows, or get_data_apps just to locate a specific item — use this tool instead.
 
 WHEN TO USE:
-- User asks to "find", "locate", or "search for" something by name or text
+- User asks to "find", "locate", or "search for" something by name, keyword, text pattern, configuration content or
+value
 - User mentions a partial name and you need to find the full item (e.g., "find the customer table")
 - User asks "what tables/configs/flows do I have with X in the name?"
-- User asks to find configs containing a value in parameters (use config-based + scopes and regex patterns)
-- Use this tool to trace lineage by searching for IDs referenced in configurations, or to find flows using a
-specific component, or find usage of a bucket/table in transformations, or to find items with specific parameters.
 - You need to discover items before performing operations on them
-- User asks to "what is the genesis of this item?" or "explain me business logic of this item?"
 - User asks to "list all items with [name] or [configuration value/part] in it"
-- DO NOT use for listing all items of a specific type. Use get_configs, list_tables, get_flows, etc instead.
+- User asks where a value, table, component, specific configuration ID, or specific settings is used in components,
+data-apps, flows, or transformations
+- You need to trace lineage by searching for IDs referenced in configurations, or to find flows using a
+  specific component, or find usage of a bucket/table in transformations or components, or to find items with
+  specific parameters.
+- User asks to "what is the genesis of this item?" or "explain me business logic of this item?"
 
 HOW IT WORKS:
-- mode: "literal" (default) or "regex" (regular expressions)
-- case_sensitive: false by default; set true for exact casing
-- search_type:
-  - "textual": matches id/name/display_name/description fields
-  - "config-based": matches stringified configuration payloads (JSON) via scopes or the whole config using
-  regex patterns.
-- scopes: dot-separated paths (e.g., "parameters", "storage.input", "parameters.script")
-- For tables, textual search also checks column names and column descriptions
-- Multiple patterns are ORed: any match includes the item
-- Results are ordered by update time, newest first, and can be paginated via limit/offset
+- Supports two types:
+  - search_type="textual": matches against id, name, displayName, and description, for tables also column names
+  and column descriptions
+  - search_type="config-based": matches inside configuration JSON objects, optionally narrowed by JSON path `scopes`
+- case-insensitive search
+- mode for pattern search: `literal` (default) or `regex`
+- Multiple patterns work as OR condition - matches items containing ANY of the patterns
+- Each result includes the item's ID, name, creation date, and relevant metadata
+- scopes (config-based) narrow matching to specific JSONPath areas within configurations; matching is performed
+against the stringified JSON node content in those areas.
+- config-based always returns all matched paths per item in `match_scopes`
 
 IMPORTANT:
 - Always use this tool when the user mentions a name but you don't have the exact ID
-- The search returns IDs that you can use with other tools (e.g., get_table, get_configs, get_flows)
-- Use item_types to make the search more efficient when you know the type; scanning buckets and tables can be
-expensive
-- For exact ID lookups, use specific tools like get_table, get_configs, get_flows instead
+- The search returns IDs that you can use with other tools (e.g., get_tables, get_configs, get_flows)
+- Results are ordered by update time. The most recently updated items are returned first.
+- Fill `item_types` to make the search more efficient when you know the item type; scanning buckets and tables can
+be expensive
+- For exact ID lookups, use specific tools like get_tables, get_configs, get_flows instead
+- Use specific `scopes` only when you know the config structure (schema or real example); otherwise run config-based
+search without scopes.
+- Use find_component_id and get_configs tools to find configurations related to a specific component
+- If results are too numerous or empty, ask the user to refine their query rather than enumerating all items.
 
 USAGE EXAMPLES:
-- user_input: "Find all tables with 'customer' in the name"
-  → `patterns=["customer"], search_type="textual", mode="literal", item_types=["table"]`
+1) textual search examples:
+  - user_input: "Find all tables with 'customer' in the name"
+    → patterns=["customer"], item_types=["table"]
+    → Returns all tables whose id, name, displayName, or description contains "customer"
+
+  - user_input: "Find tables with 'email' column"
+    → patterns=["email"], item_types=["table"]
+    → Returns all tables that have a column named "email" or with "email" in column description
 
-- user_input: "Find tables with 'email' column"
-  → `patterns=["email"], search_type="textual", mode="literal", item_types=["table"]`
+  - user_input: "Search for the sales transformation"
+    → patterns=["sales"], item_types=["transformation"]
+    → Returns transformations with "sales" in any searchable field
 
-- user_input: "Search for the sales transformation"
-  → `patterns=["sales"], search_type="textual", mode="literal", item_types=["transformation"]`
+  - user_input: "Find items named 'daily report' or 'weekly summary'"
+    → patterns=["daily.*report", "weekly.*summary"], item_types=[], mode="regex"
+    → Returns all items matching any of these patterns
 
-- user_input: "Find items named 'daily report' or 'weekly summary'"
-  → `patterns=["daily.*report", "weekly.*summary"], search_type="textual", mode="regex", item_types=[]`
+  - user_input: "Show me all configurations related to Google Analytics"
+    → patterns=["google.*analytics"], item_types=["configuration"], mode="regex"
+    → Returns configurations with matching patterns
 
-- user_input: "Show me all configurations/components related to Google Analytics"
-  → `patterns=["google.*analytics"], search_type="textual", mode="regex", item_types=["component"]`
+2) config-based search examples:
+  - user_input: "Find transformations/configs/components referencing table in.c-prod.customers"
+    -> patterns=["in.c-prod.customers"], item_types=["transformation", "configuration"], search_type="config-based"
+    -> No scopes = search whole stringified config; result includes `match_scopes` with exact paths
 
-- user_input: "Find storage input mappings referencing specific tables:"
-  → `patterns=["\"storage\".*\"input\".*:\s*\"in\..*\.customers\""], search_type="config-based",
-  mode="regex", item_types=["transformation", "component"]`
+  - user_input: "Find configurations (etc.) using specific setting / id anywhere"
+    -> patterns=["setting", "id"], item_types=["configuration"], search_type="config-based",
 
-- user input: "Find components or transformations using 'my_bucket' in output mappings"
-  → `patterns=["my_bucket"], item_types=["component", "transformation"], search_type="config-based",
-    scopes=["storage.output"], mode="literal"`
+  - user_input: "Find configurations (etc.) using specific setting /id in parameters"
+  -> patterns=["setting", "id"], item_types=["configuration"], search_type="config-based", scopes=["parameters"]
 
-- user input: "Find configs with specific authentication type"
-  → `patterns=["\"authentication\":\s*\{.*\"type\":\s*\"oauth20\""], search_type="config-based",
-  mode="regex", item_types=["component"]`
+  - user_input: "Find configurations (etc.) using specific setting / id in storage"
+  -> patterns=["setting", "id"], item_types=["configuration"], search_type="config-based", scopes=["storage"]
 
-- user input: "Find flows using this configuration ID: 01k9cz233cvd1rga3zzx40g8qj"
-  → `patterns=["01k9cz233cvd1rga3zzx40g8qj"], search_type="config-based", item_types=["flow"], mode="literal",
-  scopes=["tasks"]`
+  - user_input: "Find configurations (etc.) using specific setting / id in authorization"
+    -> patterns=["setting", "id"], item_types=["configuration"], search_type="config-based",
+      scopes=["parameters.authorization", "authorization"]
 
-- user input: "Find data apps using specific code part ..."
-  → `patterns=["regex-representing-the-code-part"], search_type="config-based", item_types=["data-app"],
-  mode="regex"], scopes=["script"]`
+  - user_input: "Find components/transformations using my_bucket in input or output mappings"
+    -> patterns=["my_bucket"], item_types=["configuration", "transformation"], search_type="config-based",
+      scopes=["storage.input", "storage.output"]
+    -> Returns matches with paths like `storage.input[0].source` or `storage.output[0].target`
+
+  - user_input: "Find flows using configuration ID 01k9cz233cvd1rga3zzx40g8qj"
+    -> patterns=["01k9cz233cvd1rga3zzx40g8qj"], item_types=["flow"], search_type="config-based",
+      scopes=["tasks", "phases"]
+
+  - user_input: "Find transformations using this table / column / specific code in its script"
+    -> patterns=["element"], item_types=["transformation"], search_type="config-based",
+      scopes=["parameters"]
+
+  - user_input: "Find data apps using something in its config / python code / setting"
+    -> patterns=["something"], item_types=["data-app"], search_type="config-based"
+    -> Returns data apps where script/config sections contain the keyword and includes `match_scopes`
 
 
 **Input JSON Schema**:
@@ -2668,7 +2707,7 @@ USAGE EXAMPLES:
 {
   "properties": {
     "patterns": {
-      "description": "One or more search patterns to match against item ID, name, display name, or description. Supports regex patterns. Case-insensitive by default. Examples: [\"customer\"], [\"sales\", \"revenue\"], [\"test.*table\"], [\"key1.*:.*key2.*:.*value.*\"]. Do not use empty strings or empty lists.",
+      "description": "One or more search patterns to match against item ID, name, display name, description, or configuration JSON objects. Case-insensitive by default. Examples: [\"customer\"], [\"sales\", \"revenue\"], [\"my_bucket\"]. Do not use empty strings or empty lists.",
       "items": {
         "type": "string"
       },
@@ -2698,7 +2737,7 @@ USAGE EXAMPLES:
     },
     "search_type": {
       "default": "textual",
-      "description": "Search mode: \"textual\" (name/id/description) or \"config-based\" (stringified configuration payloads).",
+      "description": "Search mode: \"textual\" (name/id/description) or \"config-based\" (stringified configuration payloads). (default: \"textual\")",
       "enum": [
         "textual",
         "config-based"
@@ -2707,7 +2746,7 @@ USAGE EXAMPLES:
     },
     "scopes": {
       "default": [],
-      "description": "Dot-separated keys to search in configuration payloads, used with \"config-based\" search. Example: \"parameters.field\", \"storage.input\", \"storage.output\", \"processors.before\", \"processors.after\", \"authorization\", \"tasks\", \"phases\". Leave empty to search the whole configuration.",
+      "description": "JSONPath expressions to narrow config-based search to specific parts of the configuration. Simple dot-notation (e.g. \"parameters\", \"storage.input\") and full JSONPath (e.g. \"$.tasks[*]\") are both supported (e.g. \"parameters.host\", \"storage.input[0].source\"). Leave empty to search the whole configuration.",
       "items": {
         "type": "string"
       },
@@ -2722,11 +2761,6 @@ USAGE EXAMPLES:
       ],
       "type": "string"
     },
-    "case_sensitive": {
-      "default": false,
-      "description": "If true, match patterns with case sensitivity (default: false).",
-      "type": "boolean"
-    },
     "limit": {
       "default": 50,
       "description": "Maximum number of items to return (default: 50, max: 100).",

From 19ba6582738ccd02017af0c466eb1fdd07659394 Mon Sep 17 00:00:00 2001
From: mariankrotil <mariankrotil34@gmail.com>
Date: Wed, 18 Feb 2026 06:31:34 +0100
Subject: [PATCH 19/29] AI-2161 docs: add draft search description

---
 search_description_draft.txt | 112 +++++++++++++++++++++++++++++++++++
 1 file changed, 112 insertions(+)
 create mode 100644 search_description_draft.txt

diff --git a/search_description_draft.txt b/search_description_draft.txt
new file mode 100644
index 00000000..c43b56e6
--- /dev/null
+++ b/search_description_draft.txt
@@ -0,0 +1,112 @@
+Searches for Keboola items (tables, buckets, components, configurations, transformations, flows, data-apps, etc.)
+in the current project and returns matching ID + metadata.
+
+This tool supports two complementary search types:
+
+1) textual
+- Searches item metadata fields by matching patterns against id, name, displayName, and description.
+- For tables, also searches column names and column descriptions.
+
+2) config-based
+- Searches item configurations (JSON objects) by matching patterns against the configuration values ​​converted
+to a string, optionally narrowed by JSON path `scopes`.
+- Returns also `match_scopes` with JSON paths in configuration where a pattern was found.
+
+THIS IS THE PRIMARY DISCOVERY TOOL. Always use it BEFORE any get_* tool when you need to find items
+by name or specific configuration content. Do NOT enumerate items with get_buckets, get_tables, get_configs,
+get_flows, or get_data_apps just to locate a specific item — use this tool instead.
+
+WHEN TO USE:
+- User asks to "find", "locate", or "search for" something by name, keyword, text pattern, configuration content or
+value
+- User mentions a partial name and you need to find the full item (e.g., "find the customer table")
+- User asks "what tables/configs/flows do I have with X in the name?"
+- You need to discover items before performing operations on them
+- User asks to "list all items with [name] or [configuration value/part] in it"
+- User asks where a value, table, component, specific configuration ID, or specific settings is used in components,
+data-apps, flows, or transformations
+- You need to trace lineage by searching for IDs referenced in configurations, or to find flows using a
+  specific component, or find usage of a bucket/table in transformations or components, or to find items with
+  specific parameters.
+- User asks to "what is the genesis of this item?" or "explain me business logic of this item?"
+
+HOW IT WORKS:
+- Supports two types:
+  - search_type="textual": matches against id, name, displayName, and description, for tables also column names
+  and column descriptions
+  - search_type="config-based": matches inside configuration JSON objects, optionally narrowed by JSON path `scopes`
+- case-insensitive search (default)
+- mode for pattern search: `literal` (default) or `regex
+- Multiple patterns work as OR condition - matches items containing ANY of the patterns
+- Each result includes the item's ID, name, creation date, and relevant metadata
+- scopes (config-based) narrow matching to specific JSONPath areas within configurations; matching is performed
+against the stringified JSON node content in those areas.
+
+IMPORTANT:
+- Always use this tool when the user mentions a name but you don't have the exact ID
+- The search returns IDs that you can use with other tools (e.g., get_tables, get_configs, get_flows)
+- Results are ordered by update time. The most recently updated items are returned first.
+- Fill `item_types` to make the search more efficient when you know the item type; scanning buckets and tables can be
+expensive
+- For exact ID lookups, use specific tools like get_tables, get_configs, get_flows instead
+- Use specific `scopes` only when you know the config structure (schema or real example); otherwise run config-based
+search without scopes.
+- Use find_component_id and get_configs tools to find configurations related to a specific component
+- If results are too numerous or empty, ask the user to refine their query rather than enumerating all items.
+
+USAGE EXAMPLES:
+1) textual search examples:
+  - user_input: "Find all tables with 'customer' in the name"
+    → patterns=["customer"], item_types=["table"]
+    → Returns all tables whose id, name, displayName, or description contains "customer"
+
+  - user_input: "Find tables with 'email' column"
+    → patterns=["email"], item_types=["table"]
+    → Returns all tables that have a column named "email" or with "email" in column description
+
+  - user_input: "Search for the sales transformation"
+    → patterns=["sales"], item_types=["transformation"]
+    → Returns transformations with "sales" in any searchable field
+
+  - user_input: "Find items named 'daily report' or 'weekly summary'"
+    → patterns=["daily.*report", "weekly.*summary"], item_types=[], mode="regex"
+    → Returns all items matching any of these patterns
+
+  - user_input: "Show me all configurations related to Google Analytics"
+    → patterns=["google.*analytics"], item_types=["configuration"], mode="regex"
+    → Returns configurations with matching patterns
+
+2) config-based search examples:
+  - user_input: "Find transformations/configs/components referencing table in.c-prod.customers"
+    -> patterns=["in.c-prod.customers"], item_types=["transformation", "configuration"], search_type="config-based"
+    -> No scopes = search whole stringified config; result includes `match_scopes` with exact paths
+
+  - user_input: "Find configurations (etc.) using specific setting / id anywhere"
+    -> patterns=["setting", "id], item_types=["configuration"], search_type="config-based",
+
+  - user_input: "Find configurations (etc.) using specific setting /id in parameters"
+  -> patterns=["setting", "id"], item_types=["configuration"], search_type="config-based", scopes=["storage"]
+
+  - user_input: "Find configurations (etc.) using specific setting / id in storage"
+  -> patterns=["setting", "id"], item_types=["configuration"], search_type="config-based", scopes=["storage"]
+
+  - user_input: "Find configurations (etc.) using specific setting / id in authorization"
+    -> patterns=["setting", "id"], item_types=["configuration"], search_type="config-based",
+      scopes=["parameters.authorization", "authorization"]
+
+  - user_input: "Find components/transformations using my_bucket in input or output mappings"
+    -> patterns=["my_bucket"], item_types=["configuration", "transformation"], search_type="config-based",
+      scopes=["storage.input", "storage.output"],  return_all_matches=true
+    -> Returns matches with paths like `storage.input[0].source` or `storage.output[0].target`
+
+  - user_input: "Find flows using configuration ID 01k9cz233cvd1rga3zzx40g8qj"
+    -> patterns=["01k9cz233cvd1rga3zzx40g8qj"], item_types=["flow"], search_type="config-based",
+      scopes=["tasks", "phases"], return_all_matches=true
+    
+  - user_input: "Find transformations using this table / column / specific code in its script"
+    -> patterns=["element"], item_types=["transformation"], search_type="config-based",
+      scopes=["parameters"], return_all_matches=true
+
+  - user_input: "Find data apps using something in its config / python code / setting"
+    -> patterns=["something"], item_types=["data-app"], search_type="config-based", return_all_matches=true
+    -> Returns data apps where script/config sections contain the keyword and includes `match_scopes`
\ No newline at end of file

From f2a489ed305d6578e650683062cb54bcbefd39ed Mon Sep 17 00:00:00 2001
From: mariankrotil <mariankrotil34@gmail.com>
Date: Wed, 18 Feb 2026 06:31:37 +0100
Subject: [PATCH 20/29] AI-2161 chore: update lockfile package version

---
 uv.lock | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/uv.lock b/uv.lock
index 1f67e055..f84955db 100644
--- a/uv.lock
+++ b/uv.lock
@@ -1200,7 +1200,7 @@ wheels = [
 
 [[package]]
 name = "keboola-mcp-server"
-version = "1.43.3"
+version = "1.44.0"
 source = { editable = "." }
 dependencies = [
     { name = "cryptography" },

From 6f4e672849123cfabd35f49a83aca1d90283d9c6 Mon Sep 17 00:00:00 2001
From: mariankrotil <mariankrotil34@gmail.com>
Date: Wed, 18 Feb 2026 06:32:02 +0100
Subject: [PATCH 21/29] AI-2161 chore: remove draft search description file

---
 search_description_draft.txt | 112 -----------------------------------
 1 file changed, 112 deletions(-)
 delete mode 100644 search_description_draft.txt

diff --git a/search_description_draft.txt b/search_description_draft.txt
deleted file mode 100644
index c43b56e6..00000000
--- a/search_description_draft.txt
+++ /dev/null
@@ -1,112 +0,0 @@
-Searches for Keboola items (tables, buckets, components, configurations, transformations, flows, data-apps, etc.)
-in the current project and returns matching ID + metadata.
-
-This tool supports two complementary search types:
-
-1) textual
-- Searches item metadata fields by matching patterns against id, name, displayName, and description.
-- For tables, also searches column names and column descriptions.
-
-2) config-based
-- Searches item configurations (JSON objects) by matching patterns against the configuration values ​​converted
-to a string, optionally narrowed by JSON path `scopes`.
-- Returns also `match_scopes` with JSON paths in configuration where a pattern was found.
-
-THIS IS THE PRIMARY DISCOVERY TOOL. Always use it BEFORE any get_* tool when you need to find items
-by name or specific configuration content. Do NOT enumerate items with get_buckets, get_tables, get_configs,
-get_flows, or get_data_apps just to locate a specific item — use this tool instead.
-
-WHEN TO USE:
-- User asks to "find", "locate", or "search for" something by name, keyword, text pattern, configuration content or
-value
-- User mentions a partial name and you need to find the full item (e.g., "find the customer table")
-- User asks "what tables/configs/flows do I have with X in the name?"
-- You need to discover items before performing operations on them
-- User asks to "list all items with [name] or [configuration value/part] in it"
-- User asks where a value, table, component, specific configuration ID, or specific settings is used in components,
-data-apps, flows, or transformations
-- You need to trace lineage by searching for IDs referenced in configurations, or to find flows using a
-  specific component, or find usage of a bucket/table in transformations or components, or to find items with
-  specific parameters.
-- User asks to "what is the genesis of this item?" or "explain me business logic of this item?"
-
-HOW IT WORKS:
-- Supports two types:
-  - search_type="textual": matches against id, name, displayName, and description, for tables also column names
-  and column descriptions
-  - search_type="config-based": matches inside configuration JSON objects, optionally narrowed by JSON path `scopes`
-- case-insensitive search (default)
-- mode for pattern search: `literal` (default) or `regex
-- Multiple patterns work as OR condition - matches items containing ANY of the patterns
-- Each result includes the item's ID, name, creation date, and relevant metadata
-- scopes (config-based) narrow matching to specific JSONPath areas within configurations; matching is performed
-against the stringified JSON node content in those areas.
-
-IMPORTANT:
-- Always use this tool when the user mentions a name but you don't have the exact ID
-- The search returns IDs that you can use with other tools (e.g., get_tables, get_configs, get_flows)
-- Results are ordered by update time. The most recently updated items are returned first.
-- Fill `item_types` to make the search more efficient when you know the item type; scanning buckets and tables can be
-expensive
-- For exact ID lookups, use specific tools like get_tables, get_configs, get_flows instead
-- Use specific `scopes` only when you know the config structure (schema or real example); otherwise run config-based
-search without scopes.
-- Use find_component_id and get_configs tools to find configurations related to a specific component
-- If results are too numerous or empty, ask the user to refine their query rather than enumerating all items.
-
-USAGE EXAMPLES:
-1) textual search examples:
-  - user_input: "Find all tables with 'customer' in the name"
-    → patterns=["customer"], item_types=["table"]
-    → Returns all tables whose id, name, displayName, or description contains "customer"
-
-  - user_input: "Find tables with 'email' column"
-    → patterns=["email"], item_types=["table"]
-    → Returns all tables that have a column named "email" or with "email" in column description
-
-  - user_input: "Search for the sales transformation"
-    → patterns=["sales"], item_types=["transformation"]
-    → Returns transformations with "sales" in any searchable field
-
-  - user_input: "Find items named 'daily report' or 'weekly summary'"
-    → patterns=["daily.*report", "weekly.*summary"], item_types=[], mode="regex"
-    → Returns all items matching any of these patterns
-
-  - user_input: "Show me all configurations related to Google Analytics"
-    → patterns=["google.*analytics"], item_types=["configuration"], mode="regex"
-    → Returns configurations with matching patterns
-
-2) config-based search examples:
-  - user_input: "Find transformations/configs/components referencing table in.c-prod.customers"
-    -> patterns=["in.c-prod.customers"], item_types=["transformation", "configuration"], search_type="config-based"
-    -> No scopes = search whole stringified config; result includes `match_scopes` with exact paths
-
-  - user_input: "Find configurations (etc.) using specific setting / id anywhere"
-    -> patterns=["setting", "id], item_types=["configuration"], search_type="config-based",
-
-  - user_input: "Find configurations (etc.) using specific setting /id in parameters"
-  -> patterns=["setting", "id"], item_types=["configuration"], search_type="config-based", scopes=["storage"]
-
-  - user_input: "Find configurations (etc.) using specific setting / id in storage"
-  -> patterns=["setting", "id"], item_types=["configuration"], search_type="config-based", scopes=["storage"]
-
-  - user_input: "Find configurations (etc.) using specific setting / id in authorization"
-    -> patterns=["setting", "id"], item_types=["configuration"], search_type="config-based",
-      scopes=["parameters.authorization", "authorization"]
-
-  - user_input: "Find components/transformations using my_bucket in input or output mappings"
-    -> patterns=["my_bucket"], item_types=["configuration", "transformation"], search_type="config-based",
-      scopes=["storage.input", "storage.output"],  return_all_matches=true
-    -> Returns matches with paths like `storage.input[0].source` or `storage.output[0].target`
-
-  - user_input: "Find flows using configuration ID 01k9cz233cvd1rga3zzx40g8qj"
-    -> patterns=["01k9cz233cvd1rga3zzx40g8qj"], item_types=["flow"], search_type="config-based",
-      scopes=["tasks", "phases"], return_all_matches=true
-    
-  - user_input: "Find transformations using this table / column / specific code in its script"
-    -> patterns=["element"], item_types=["transformation"], search_type="config-based",
-      scopes=["parameters"], return_all_matches=true
-
-  - user_input: "Find data apps using something in its config / python code / setting"
-    -> patterns=["something"], item_types=["data-app"], search_type="config-based", return_all_matches=true
-    -> Returns data apps where script/config sections contain the keyword and includes `match_scopes`
\ No newline at end of file

From d6240cbf06c8f0d809752d7d6f90bb60a4ba04d7 Mon Sep 17 00:00:00 2001
From: mariankrotil <mariankrotil34@gmail.com>
Date: Wed, 18 Feb 2026 07:15:36 +0100
Subject: [PATCH 22/29] AI-2161 docs: mention config-based search in project
 system prompt

Co-authored-by: Codex <codex@openai.com>
---
 .../resources/prompts/project_system_prompt.md           | 9 +++++++--
 1 file changed, 7 insertions(+), 2 deletions(-)

diff --git a/src/keboola_mcp_server/resources/prompts/project_system_prompt.md b/src/keboola_mcp_server/resources/prompts/project_system_prompt.md
index 355f2cdd..fb74c746 100644
--- a/src/keboola_mcp_server/resources/prompts/project_system_prompt.md
+++ b/src/keboola_mcp_server/resources/prompts/project_system_prompt.md
@@ -1,13 +1,18 @@
 ### Finding Items by Name
 
 When looking for specific items (tables, buckets, configurations, flows, data apps) by name, description,
-or partial match, **always use the `search` tool first** rather than listing all items with `get_*` tools.
+partial match, or configuration content/reference, **always use the `search` tool first** rather than listing all
+items with `get_*` tools.
 
-- `search` matches by regex against names, IDs, descriptions, and (for tables) column names.
+- `search` supports:
+  - textual search over names, IDs, descriptions, and (for tables) column names
+  - config-based search over item configuration JSON contents, including scoped JSONPath search when useful
 - Listing all items with empty IDs (e.g., `get_buckets(bucket_ids=[])`, `get_configs()`, `get_flows(flow_ids=[])`)
   is wasteful on large projects and should only be used when you genuinely need a complete inventory.
 - If the user mentions a name but you do not have the exact ID, call `search` with an appropriate pattern
   and `item_types` filter.
+- If the user asks where a table/component/config ID/value is used, call `search` with
+  `search_type="config-based"` (and use `scopes` when you know the config structure).
 - If `search` returns too many results or zero results, ask the user to be more specific rather than
   falling back to enumerating all items.
 

From 52c32eac80238545522d718361687148af47afb6 Mon Sep 17 00:00:00 2001
From: mariankrotil <mariankrotil34@gmail.com>
Date: Wed, 18 Feb 2026 08:24:55 +0100
Subject: [PATCH 23/29] AI-2161 docs: rename project prompt section to finding
 items

---
 .../resources/prompts/project_system_prompt.md                  | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/keboola_mcp_server/resources/prompts/project_system_prompt.md b/src/keboola_mcp_server/resources/prompts/project_system_prompt.md
index fb74c746..e4f394db 100644
--- a/src/keboola_mcp_server/resources/prompts/project_system_prompt.md
+++ b/src/keboola_mcp_server/resources/prompts/project_system_prompt.md
@@ -1,4 +1,4 @@
-### Finding Items by Name
+### Finding Items
 
 When looking for specific items (tables, buckets, configurations, flows, data apps) by name, description,
 partial match, or configuration content/reference, **always use the `search` tool first** rather than listing all

From 241fbb20e2538f878cab233555367ad3c7594d3f Mon Sep 17 00:00:00 2001
From: mariankrotil <mariankrotil34@gmail.com>
Date: Mon, 23 Feb 2026 14:52:02 +0100
Subject: [PATCH 24/29] AI-2161 feat: address search review feedback

---
 src/keboola_mcp_server/tools/search.py | 71 +++++++++++---------------
 tests/tools/storage/test_usage.py      |  4 +-
 tests/tools/test_search.py             | 48 +++++++++++++++++
 3 files changed, 79 insertions(+), 44 deletions(-)

diff --git a/src/keboola_mcp_server/tools/search.py b/src/keboola_mcp_server/tools/search.py
index 0e83ff7d..8af88bab 100644
--- a/src/keboola_mcp_server/tools/search.py
+++ b/src/keboola_mcp_server/tools/search.py
@@ -23,7 +23,7 @@
 from keboola_mcp_server.errors import tool_errors
 from keboola_mcp_server.links import Link, ProjectLinksManager
 from keboola_mcp_server.mcp import toon_serializer_compact
-from keboola_mcp_server.tools.components.utils import get_nested
+from keboola_mcp_server.tools.components.utils import _normalize_jsonpath, get_nested
 
 LOG = logging.getLogger(__name__)
 
@@ -152,7 +152,7 @@ def check_id_fields(self) -> 'SearchHit':
 
         return self
 
-    def with_matches(self, matches: list['PatternMatch']) -> 'SearchHit':
+    def set_matches(self, matches: list['PatternMatch']) -> 'SearchHit':
         """Assign pattern matches to this search hit and return self for chaining."""
         self._matches = matches
         unique_scopes = list(dict.fromkeys(match.scope for match in matches if match.scope))
@@ -160,7 +160,7 @@ def with_matches(self, matches: list['PatternMatch']) -> 'SearchHit':
             scope
             for scope in unique_scopes
             if not any(
-                other != scope and other.startswith(scope) and other[len(scope) : len(scope) + 1] in {'.', '['}
+                other.startswith(scope) and len(other) > len(scope) and other[len(scope)] in ('.', '[')
                 for other in unique_scopes
             )
         ]
@@ -180,6 +180,7 @@ class SearchSpec(BaseModel):
     _compiled_patterns: list[re.Pattern] = PrivateAttr(default_factory=list)
     _clean_patterns: list[str] = PrivateAttr(default_factory=list)
     _all_nodes_expr: JSONPath | None = PrivateAttr(default=None)
+    # Tuple fields: (original_scope, parsed_scope_expr, parsed_descendants_expr)
     _scope_exprs: list[tuple[str, JSONPath, JSONPath]] = PrivateAttr(default_factory=list)
 
     @model_validator(mode='after')
@@ -222,7 +223,7 @@ def _compile_jsonpath_exprs(self) -> 'SearchSpec':
         self._all_nodes_expr = jsonpath_ng.parse('$..*')
         self._scope_exprs = []
         for scope in self.search_scopes:
-            normalized = scope if scope.startswith('$') else f'$.{scope}'
+            normalized = _normalize_jsonpath(scope if scope.startswith('$') else f'$.{scope}')
             try:
                 self._scope_exprs.append((scope, jsonpath_ng.parse(normalized), jsonpath_ng.parse(f'{normalized}..*')))
             except Exception as e:
@@ -258,28 +259,14 @@ def match_patterns(self, value: str | JsonDict | None) -> list[str]:
 
         return matches
 
-    def _find_matches_for_expr(self, configuration: JsonDict, parsed_expr: JSONPath) -> list[PatternMatch]:
-        """Find pattern matches on JSON nodes matched by a JSONPath expression."""
+    def _find_matches_for_expr(
+        self, configuration: JsonDict, parsed_expr: JSONPath, scalar_only: bool = False
+    ) -> list[PatternMatch]:
+        """Find pattern matches on JSON nodes matched by a JSONPath expression. If scalar_only is True, only scalar nodes are matched."""
         matches: list[PatternMatch] = []
         for jpath_match in parsed_expr.find(configuration):
             value = jpath_match.value
-            if matched := self.match_patterns(value):
-                matches.append(
-                    PatternMatch(
-                        scope=re.sub(r'\.\[', '[', str(jpath_match.full_path)),
-                        patterns=matched,
-                    )
-                )
-                if not self.return_all_matched_patterns:
-                    return matches
-        return matches
-
-    def _find_scalar_matches_for_expr(self, configuration: JsonDict, parsed_expr: JSONPath) -> list[PatternMatch]:
-        """Find pattern matches only on scalar nodes matched by a JSONPath expression."""
-        matches: list[PatternMatch] = []
-        for jpath_match in parsed_expr.find(configuration):
-            value = jpath_match.value
-            if value is None or isinstance(value, (dict, list)):
+            if scalar_only and isinstance(value, (dict, list)):
                 continue
             if matched := self.match_patterns(value):
                 matches.append(
@@ -310,12 +297,13 @@ def match_configuration_scopes(self, configuration: JsonDict | None) -> list[Pat
             # or the same logical scope is provided multiple times.
             seen: set[str | None] = set()
             for _scope, self_expr, desc_expr in self._scope_exprs:
-                # Include self scope only for scalar values. For objects/lists, include descendants only.
-                self_matches = self._find_scalar_matches_for_expr(configuration, self_expr)
-                desc_matches = self._find_matches_for_expr(configuration, desc_expr)
-
-                scope_matches = desc_matches if desc_matches else self_matches
-                for match in scope_matches:
+                # Search in self expression node for scalar matches first
+                self_matches = self._find_matches_for_expr(configuration, self_expr, scalar_only=True)
+                # If no scalar matches, search in descendants nodes
+                desc_matches: list[PatternMatch] = []
+                if not self_matches:
+                    desc_matches = self._find_matches_for_expr(configuration, desc_expr)
+                for match in self_matches or desc_matches:
                     if match.scope in seen:
                         continue
                     seen.add(match.scope)
@@ -323,11 +311,9 @@ def match_configuration_scopes(self, configuration: JsonDict | None) -> list[Pat
                     if not self.return_all_matched_patterns:
                         return all_matches
             return all_matches
-
-        # No scope provided – search all descendants and return exact match paths.
-        if self._all_nodes_expr is None:
-            self._all_nodes_expr = jsonpath_ng.parse('$..*')
-        return self._find_matches_for_expr(configuration, self._all_nodes_expr)
+        else:
+            # No scope provided – search all descendants and return exact match paths.
+            return self._find_matches_for_expr(configuration, self._all_nodes_expr)
 
     def match_texts(self, texts: Iterable[str]) -> list[PatternMatch]:
         """
@@ -386,7 +372,7 @@ async def _fetch_buckets(client: KeboolaClient, spec: SearchSpec) -> list[Search
                     name=bucket_name,
                     display_name=bucket_display_name,
                     description=bucket_description,
-                ).with_matches(matches)
+                ).set_matches(matches)
             )
     return hits
 
@@ -418,7 +404,7 @@ async def _fetch_tables(client: KeboolaClient, spec: SearchSpec) -> list[SearchH
                         name=table_name,
                         display_name=table_display_name,
                         description=table_description,
-                    ).with_matches(matches)
+                    ).set_matches(matches)
                 )
     return hits
 
@@ -497,7 +483,7 @@ async def _fetch_configs(
                         updated=config_updated,
                         name=config_name,
                         description=config_description,
-                    ).with_matches(matches)
+                    ).set_matches(matches)
             elif spec.search_type == 'config-based':
                 if matches := spec.match_configuration_scopes(config.get('configuration')):
                     yield SearchHit(
@@ -507,7 +493,7 @@ async def _fetch_configs(
                         updated=config_updated,
                         name=config_name,
                         description=config_description,
-                    ).with_matches(matches)
+                    ).set_matches(matches)
 
             for row in config.get('rows', []):
                 if not (row_id := row.get('id')):
@@ -526,7 +512,7 @@ async def _fetch_configs(
                             updated=config_updated or _get_field_value(row, ['created']),
                             name=row_name,
                             description=row_description,
-                        ).with_matches(matches)
+                        ).set_matches(matches)
 
                 elif spec.search_type == 'config-based':
                     if matches := spec.match_configuration_scopes(row.get('configuration')):
@@ -538,7 +524,7 @@ async def _fetch_configs(
                             updated=config_updated or _get_field_value(row, ['created']),
                             name=row_name,
                             description=row_description,
-                        ).with_matches(matches)
+                        ).set_matches(matches)
 
 
 @tool_errors()
@@ -699,7 +685,8 @@ async def search(
     - user_input: "Find components/transformations using my_bucket in input or output mappings"
         -> patterns=["my_bucket"], item_types=["configuration", "transformation"], search_type="config-based",
         scopes=["storage.input", "storage.output"]
-        -> Returns matches with paths like `storage.input[0].source` or `storage.output[0].target`
+        -> Returns matches with paths like `storage.input.tables[0].source`, `storage.input.files[0].source`,
+        or `storage.output.tables[0].destination`
 
     - user_input: "Find flows using configuration ID 01k9cz233cvd1rga3zzx40g8qj"
         -> patterns=["01k9cz233cvd1rga3zzx40g8qj"], item_types=["flow"], search_type="config-based",
@@ -707,7 +694,7 @@ async def search(
 
     - user_input: "Find transformations using this table / column / specific code in its script"
         -> patterns=["element"], item_types=["transformation"], search_type="config-based",
-        scopes=["parameters"]
+        scopes=["parameters", "storage"]
 
     - user_input: "Find data apps using something in its config / python code / setting"
         -> patterns=["something"], item_types=["data-app"], search_type="config-based"
diff --git a/tests/tools/storage/test_usage.py b/tests/tools/storage/test_usage.py
index dbe7c173..fe7d883e 100644
--- a/tests/tools/storage/test_usage.py
+++ b/tests/tools/storage/test_usage.py
@@ -24,14 +24,14 @@ def _sorted_usage(output: Sequence[storage_usage.UsageById]) -> list[storage_usa
                     item_type='configuration',
                     updated='2024-01-01T00:00:00Z',
                     name='Config 1',
-                ).with_matches([PatternMatch(scope='storage.input', patterns=['id-1', 'id-2'])]),
+                ).set_matches([PatternMatch(scope='storage.input', patterns=['id-1', 'id-2'])]),
                 SearchHit(
                     component_id='keboola.ex-db',
                     configuration_id='cfg-2',
                     item_type='configuration',
                     updated='2024-01-02T00:00:00Z',
                     name='Config 2',
-                ).with_matches([PatternMatch(scope='storage.output', patterns=['id-1'])]),
+                ).set_matches([PatternMatch(scope='storage.output', patterns=['id-1'])]),
             ],
             {
                 'id-1': [
diff --git a/tests/tools/test_search.py b/tests/tools/test_search.py
index 7688b1e6..f842e985 100644
--- a/tests/tools/test_search.py
+++ b/tests/tools/test_search.py
@@ -740,6 +740,40 @@ async def test_search_table_by_columns(
                 ],
                 [('test-config', ['storage.input[1].source', 'storage.output[0].destination'])],
             ),
+            (
+                ['alpha'],
+                ('parameters',),
+                [
+                    {
+                        'id': 'test-config',
+                        'name': 'Test Config',
+                        'created': '2024-01-02T00:00:00Z',
+                        'configuration': {
+                            'parameters': {'query': 'alpha'},
+                            'storage': {'input': [{'source': 'alpha'}]},
+                        },
+                        'rows': [],
+                    }
+                ],
+                [('test-config', ['parameters.query'])],
+            ),
+            (
+                ['alpha'],
+                ('authorization.#apiKey',),
+                [
+                    {
+                        'id': 'test-config',
+                        'name': 'Test Config',
+                        'created': '2024-01-02T00:00:00Z',
+                        'configuration': {
+                            'authorization': {'#apiKey': 'alpha'},
+                            'parameters': {'query': 'nomatch'},
+                        },
+                        'rows': [],
+                    }
+                ],
+                [('test-config', ['authorization.#apiKey'])],
+            ),
             (
                 ['alpha', 'gamma'],
                 tuple(),
@@ -782,6 +816,8 @@ async def test_search_table_by_columns(
         ids=[
             'all_matches_in_scopes',
             'most_specific_scope_only',
+            'scope_constrains_same_value_in_other_path',
+            'hash_prefixed_scope_key_in_search_tool',
             'multiple_configurations_returned',
         ],
     )
@@ -1004,6 +1040,17 @@ def test_match_texts(spec_kwargs: dict[str, Any], texts: list[str], expected: li
             {'parameters': {'api': {'baseUrl': 'https://wttr.in'}}},
             [{'scope': 'parameters.api.baseUrl', 'patterns': ['wttr.in']}],
         ),
+        (
+            # Scope with #-prefixed key should be normalized and parsed correctly.
+            {
+                'patterns': ['alpha'],
+                'item_types': ('configuration',),
+                'search_scopes': ('authorization.#apiKey',),
+                'return_all_matched_patterns': True,
+            },
+            {'authorization': {'#apiKey': 'alpha'}},
+            [{'scope': 'authorization.#apiKey', 'patterns': ['alpha']}],
+        ),
     ],
     ids=[
         'all_patterns_many_scopes',
@@ -1013,6 +1060,7 @@ def test_match_texts(spec_kwargs: dict[str, Any], texts: list[str], expected: li
         'any_patterns_return_first_match',
         'overlapping_scopes_deduplicated',
         'scalar_scope_matches_self',
+        'hash_prefixed_scope_key_matches',
     ],
 )
 def test_match_configuration_scopes(spec_kwargs: dict[str, Any], configuration: dict[str, Any], expected: list[dict]):

From 6eb6277a46ae8ae8d10fd94360ab28a1fab6597a Mon Sep 17 00:00:00 2001
From: mariankrotil <mariankrotil34@gmail.com>
Date: Mon, 23 Feb 2026 15:32:13 +0100
Subject: [PATCH 25/29] AI-2161 feat: group matched patterns by scope in search
 results

---
 src/keboola_mcp_server/tools/search.py | 25 ++++++---
 tests/tools/test_search.py             | 77 +++++++++++++++++++++++---
 2 files changed, 86 insertions(+), 16 deletions(-)

diff --git a/src/keboola_mcp_server/tools/search.py b/src/keboola_mcp_server/tools/search.py
index 8af88bab..77437fb0 100644
--- a/src/keboola_mcp_server/tools/search.py
+++ b/src/keboola_mcp_server/tools/search.py
@@ -2,6 +2,7 @@
 import json
 import logging
 import re
+from collections import defaultdict
 from typing import Annotated, Any, AsyncGenerator, Iterable, Literal, Mapping, Sequence
 
 import jsonpath_ng
@@ -116,10 +117,9 @@ class SearchHit(BaseModel):
     name: str | None = Field(default=None, description='Name of the item.')
     display_name: str | None = Field(default=None, description='Display name of the item.')
     description: str | None = Field(default=None, description='Description of the item.')
-    match_scopes: list[str] = Field(
+    match_scopes: list[PatternMatch] = Field(
         default_factory=list,
-        description='Most specific JSONPath scopes within the configuration where a pattern was matched '
-        '(config-based search only).',
+        description='Most specific JSONPath scopes with grouped matched patterns ' '(config-based search only).',
     )
     links: list[Link] = Field(default_factory=list, description='Links to the item.')
     _matches: list[PatternMatch] = PrivateAttr(default_factory=list)
@@ -155,8 +155,14 @@ def check_id_fields(self) -> 'SearchHit':
     def set_matches(self, matches: list['PatternMatch']) -> 'SearchHit':
         """Assign pattern matches to this search hit and return self for chaining."""
         self._matches = matches
-        unique_scopes = list(dict.fromkeys(match.scope for match in matches if match.scope))
-        self.match_scopes = [
+        grouped_patterns_by_scope: dict[str, set[str]] = defaultdict(set)
+        for match in matches:
+            if not match.scope:
+                continue
+            grouped_patterns_by_scope[match.scope].update(match.patterns)
+
+        unique_scopes = list(grouped_patterns_by_scope)
+        most_specific_scopes = [
             scope
             for scope in unique_scopes
             if not any(
@@ -164,6 +170,9 @@ def set_matches(self, matches: list['PatternMatch']) -> 'SearchHit':
                 for other in unique_scopes
             )
         ]
+        self.match_scopes = [
+            PatternMatch(scope=scope, patterns=list(grouped_patterns_by_scope[scope])) for scope in most_specific_scopes
+        ]
         return self
 
 
@@ -594,7 +603,7 @@ async def search(
     2) config-based
     - Searches item configurations (JSON objects) by matching patterns against the configuration values ​​converted
     to a string, optionally narrowed by JSON path `scopes`.
-    - Returns also `match_scopes` with JSON paths in configuration where a pattern was found.
+    - Returns also `match_scopes` with JSON paths and matched patterns per scope.
 
     THIS IS THE PRIMARY DISCOVERY TOOL. Always use it BEFORE any get_* tool when you need to find items
     by name or specific configuration content. Do NOT enumerate items with get_buckets, get_tables, get_configs,
@@ -625,7 +634,7 @@ async def search(
     - Each result includes the item's ID, name, creation date, and relevant metadata
     - scopes (config-based) narrow matching to specific JSONPath areas within configurations; matching is performed
     against the stringified JSON node content in those areas.
-    - config-based always returns all matched paths per item in `match_scopes`
+    - config-based always returns all matched paths per item in `match_scopes` (including matched patterns)
 
     IMPORTANT:
     - Always use this tool when the user mentions a name but you don't have the exact ID
@@ -665,7 +674,7 @@ async def search(
     - user_input: "Find transformations/configs/components referencing table in.c-prod.customers"
         -> patterns=["in.c-prod.customers"], item_types=["transformation", "configuration"],
         search_type="config-based"
-        -> No scopes = search whole stringified config; result includes `match_scopes` with exact paths
+        -> No scopes = search whole stringified config; result includes `match_scopes` with exact paths + patterns
 
     - user_input: "Find configurations/transformations (etc.) using specific setting / id anywhere"
         -> patterns=["setting", "id"], item_types=["configuration", "transformations"], search_type="config-based",
diff --git a/tests/tools/test_search.py b/tests/tools/test_search.py
index f842e985..6ca83ea9 100644
--- a/tests/tools/test_search.py
+++ b/tests/tools/test_search.py
@@ -718,7 +718,15 @@ async def test_search_table_by_columns(
                         'rows': [],
                     }
                 ],
-                [('test-config', ['parameters.query', 'storage.input[0].source'])],
+                [
+                    (
+                        'test-config',
+                        [
+                            {'scope': 'parameters.query', 'patterns': ['alpha']},
+                            {'scope': 'storage.input[0].source', 'patterns': ['beta']},
+                        ],
+                    )
+                ],
             ),
             (
                 ['gamma'],
@@ -738,7 +746,15 @@ async def test_search_table_by_columns(
                         'rows': [],
                     }
                 ],
-                [('test-config', ['storage.input[1].source', 'storage.output[0].destination'])],
+                [
+                    (
+                        'test-config',
+                        [
+                            {'scope': 'storage.input[1].source', 'patterns': ['gamma']},
+                            {'scope': 'storage.output[0].destination', 'patterns': ['gamma']},
+                        ],
+                    )
+                ],
             ),
             (
                 ['alpha'],
@@ -755,7 +771,7 @@ async def test_search_table_by_columns(
                         'rows': [],
                     }
                 ],
-                [('test-config', ['parameters.query'])],
+                [('test-config', [{'scope': 'parameters.query', 'patterns': ['alpha']}])],
             ),
             (
                 ['alpha'],
@@ -772,7 +788,31 @@ async def test_search_table_by_columns(
                         'rows': [],
                     }
                 ],
-                [('test-config', ['authorization.#apiKey'])],
+                [('test-config', [{'scope': 'authorization.#apiKey', 'patterns': ['alpha']}])],
+            ),
+            (
+                ['alpha', 'beta'],
+                ('parameters',),
+                [
+                    {
+                        'id': 'test-config',
+                        'name': 'Test Config',
+                        'created': '2024-01-02T00:00:00Z',
+                        'configuration': {
+                            'parameters': {'query': 'alpha beta', 'query2': 'beta'},
+                        },
+                        'rows': [],
+                    }
+                ],
+                [
+                    (
+                        'test-config',
+                        [
+                            {'scope': 'parameters.query', 'patterns': ['alpha', 'beta']},
+                            {'scope': 'parameters.query2', 'patterns': ['beta']},
+                        ],
+                    )
+                ],
             ),
             (
                 ['alpha', 'gamma'],
@@ -808,8 +848,8 @@ async def test_search_table_by_columns(
                     },
                 ],
                 [
-                    ('test-config-b', ['storage.output[0].destination']),
-                    ('test-config-a', ['parameters.query']),
+                    ('test-config-b', [{'scope': 'storage.output[0].destination', 'patterns': ['gamma']}]),
+                    ('test-config-a', [{'scope': 'parameters.query', 'patterns': ['alpha']}]),
                 ],
             ),
         ],
@@ -818,6 +858,7 @@ async def test_search_table_by_columns(
             'most_specific_scope_only',
             'scope_constrains_same_value_in_other_path',
             'hash_prefixed_scope_key_in_search_tool',
+            'group_two_patterns_in_one_scope',
             'multiple_configurations_returned',
         ],
     )
@@ -828,7 +869,7 @@ async def test_search_config_based_match_scopes(
         patterns: list[str],
         scopes: tuple[str, ...],
         component_configurations: list[dict[str, Any]],
-        expected_hits: list[tuple[str, list[str]]],
+        expected_hits: list[tuple[str, list[dict[str, Any]]]],
     ):
         keboola_client = KeboolaClient.from_state(mcp_context_client.session.state)
 
@@ -857,7 +898,27 @@ async def test_search_config_based_match_scopes(
             scopes=scopes,
         )
 
-        assert [(hit.configuration_id, hit.match_scopes) for hit in result] == expected_hits
+        normalized_actual = [
+            (
+                hit.configuration_id,
+                sorted(
+                    ({'scope': m.scope, 'patterns': sorted(m.patterns)} for m in hit.match_scopes),
+                    key=lambda x: x['scope'] or '',
+                ),
+            )
+            for hit in result
+        ]
+        normalized_expected = [
+            (
+                config_id,
+                sorted(
+                    ({'scope': m['scope'], 'patterns': sorted(m['patterns'])} for m in matches),
+                    key=lambda x: x['scope'] or '',
+                ),
+            )
+            for config_id, matches in expected_hits
+        ]
+        assert normalized_actual == normalized_expected
 
 
 @pytest.mark.parametrize(

From 26e8869f8cdfe2452d6872be54df007c6725ce28 Mon Sep 17 00:00:00 2001
From: mariankrotil <mariankrotil34@gmail.com>
Date: Mon, 23 Feb 2026 15:39:10 +0100
Subject: [PATCH 26/29] AI-2161 feat: adapt usage to grouped match scopes

---
 src/keboola_mcp_server/tools/search.py        | 14 ++++++--------
 src/keboola_mcp_server/tools/storage/usage.py |  2 +-
 tests/tools/test_search.py                    |  2 +-
 3 files changed, 8 insertions(+), 10 deletions(-)

diff --git a/src/keboola_mcp_server/tools/search.py b/src/keboola_mcp_server/tools/search.py
index 77437fb0..901ed97f 100644
--- a/src/keboola_mcp_server/tools/search.py
+++ b/src/keboola_mcp_server/tools/search.py
@@ -117,12 +117,11 @@ class SearchHit(BaseModel):
     name: str | None = Field(default=None, description='Name of the item.')
     display_name: str | None = Field(default=None, description='Display name of the item.')
     description: str | None = Field(default=None, description='Description of the item.')
-    match_scopes: list[PatternMatch] = Field(
+    matches: list[PatternMatch] = Field(
         default_factory=list,
         description='Most specific JSONPath scopes with grouped matched patterns ' '(config-based search only).',
     )
     links: list[Link] = Field(default_factory=list, description='Links to the item.')
-    _matches: list[PatternMatch] = PrivateAttr(default_factory=list)
 
     def __eq__(self, other: object) -> bool:
         if isinstance(other, SearchHit):
@@ -154,14 +153,13 @@ def check_id_fields(self) -> 'SearchHit':
 
     def set_matches(self, matches: list['PatternMatch']) -> 'SearchHit':
         """Assign pattern matches to this search hit and return self for chaining."""
-        self._matches = matches
-        grouped_patterns_by_scope: dict[str, set[str]] = defaultdict(set)
+        patterns_by_scope: dict[str, set[str]] = defaultdict(set)
         for match in matches:
             if not match.scope:
                 continue
-            grouped_patterns_by_scope[match.scope].update(match.patterns)
+            patterns_by_scope[match.scope].update(match.patterns)
 
-        unique_scopes = list(grouped_patterns_by_scope)
+        unique_scopes = list(patterns_by_scope)
         most_specific_scopes = [
             scope
             for scope in unique_scopes
@@ -170,8 +168,8 @@ def set_matches(self, matches: list['PatternMatch']) -> 'SearchHit':
                 for other in unique_scopes
             )
         ]
-        self.match_scopes = [
-            PatternMatch(scope=scope, patterns=list(grouped_patterns_by_scope[scope])) for scope in most_specific_scopes
+        self.matches = [
+            PatternMatch(scope=scope, patterns=list(patterns_by_scope[scope])) for scope in most_specific_scopes
         ]
         return self
 
diff --git a/src/keboola_mcp_server/tools/storage/usage.py b/src/keboola_mcp_server/tools/storage/usage.py
index 83e2e201..01fb399c 100644
--- a/src/keboola_mcp_server/tools/storage/usage.py
+++ b/src/keboola_mcp_server/tools/storage/usage.py
@@ -67,7 +67,7 @@ async def find_id_usage(
     # group usage references by pattern = target_id
     output: dict[str, list[ComponentUsageReference]] = defaultdict(list)
     for search_hit in search_hits:
-        for match in search_hit._matches:
+        for match in search_hit.matches:
             for target_id in match.patterns:
                 output[target_id].append(
                     # TODO: Consider whether adding configuration description is useful, it could overload context.
diff --git a/tests/tools/test_search.py b/tests/tools/test_search.py
index 6ca83ea9..91d6b52c 100644
--- a/tests/tools/test_search.py
+++ b/tests/tools/test_search.py
@@ -902,7 +902,7 @@ async def test_search_config_based_match_scopes(
             (
                 hit.configuration_id,
                 sorted(
-                    ({'scope': m.scope, 'patterns': sorted(m.patterns)} for m in hit.match_scopes),
+                    ({'scope': m.scope, 'patterns': sorted(m.patterns)} for m in hit.matches),
                     key=lambda x: x['scope'] or '',
                 ),
             )

From a25546cb4f8fce57d0dd2511849655beccfaad7b Mon Sep 17 00:00:00 2001
From: mariankrotil <mariankrotil34@gmail.com>
Date: Mon, 23 Feb 2026 15:41:17 +0100
Subject: [PATCH 27/29] AI-2161 docs: update TOOLS reference

---
 TOOLS.md | 11 ++++++-----
 1 file changed, 6 insertions(+), 5 deletions(-)

diff --git a/TOOLS.md b/TOOLS.md
index d1335643..e866239d 100644
--- a/TOOLS.md
+++ b/TOOLS.md
@@ -2631,7 +2631,7 @@ This tool supports two complementary search types:
 2) config-based
 - Searches item configurations (JSON objects) by matching patterns against the configuration values ​​converted
 to a string, optionally narrowed by JSON path `scopes`.
-- Returns also `match_scopes` with JSON paths in configuration where a pattern was found.
+- Returns also `match_scopes` with JSON paths and matched patterns per scope.
 
 THIS IS THE PRIMARY DISCOVERY TOOL. Always use it BEFORE any get_* tool when you need to find items
 by name or specific configuration content. Do NOT enumerate items with get_buckets, get_tables, get_configs,
@@ -2662,7 +2662,7 @@ HOW IT WORKS:
 - Each result includes the item's ID, name, creation date, and relevant metadata
 - scopes (config-based) narrow matching to specific JSONPath areas within configurations; matching is performed
 against the stringified JSON node content in those areas.
-- config-based always returns all matched paths per item in `match_scopes`
+- config-based always returns all matched paths per item in `match_scopes` (including matched patterns)
 
 IMPORTANT:
 - Always use this tool when the user mentions a name but you don't have the exact ID
@@ -2702,7 +2702,7 @@ USAGE EXAMPLES:
 - user_input: "Find transformations/configs/components referencing table in.c-prod.customers"
     -> patterns=["in.c-prod.customers"], item_types=["transformation", "configuration"],
     search_type="config-based"
-    -> No scopes = search whole stringified config; result includes `match_scopes` with exact paths
+    -> No scopes = search whole stringified config; result includes `match_scopes` with exact paths + patterns
 
 - user_input: "Find configurations/transformations (etc.) using specific setting / id anywhere"
     -> patterns=["setting", "id"], item_types=["configuration", "transformations"], search_type="config-based",
@@ -2722,7 +2722,8 @@ scopes=["storage"]
 - user_input: "Find components/transformations using my_bucket in input or output mappings"
     -> patterns=["my_bucket"], item_types=["configuration", "transformation"], search_type="config-based",
     scopes=["storage.input", "storage.output"]
-    -> Returns matches with paths like `storage.input[0].source` or `storage.output[0].target`
+    -> Returns matches with paths like `storage.input.tables[0].source`, `storage.input.files[0].source`,
+    or `storage.output.tables[0].destination`
 
 - user_input: "Find flows using configuration ID 01k9cz233cvd1rga3zzx40g8qj"
     -> patterns=["01k9cz233cvd1rga3zzx40g8qj"], item_types=["flow"], search_type="config-based",
@@ -2730,7 +2731,7 @@ scopes=["storage"]
 
 - user_input: "Find transformations using this table / column / specific code in its script"
     -> patterns=["element"], item_types=["transformation"], search_type="config-based",
-    scopes=["parameters"]
+    scopes=["parameters", "storage"]
 
 - user_input: "Find data apps using something in its config / python code / setting"
     -> patterns=["something"], item_types=["data-app"], search_type="config-based"

From 42eac29d1bfe7a78f3f9988a07316600c8f89805 Mon Sep 17 00:00:00 2001
From: mariankrotil <mariankrotil34@gmail.com>
Date: Mon, 23 Feb 2026 15:57:36 +0100
Subject: [PATCH 28/29] AI-2161 style: apply flake

---
 src/keboola_mcp_server/tools/search.py | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/src/keboola_mcp_server/tools/search.py b/src/keboola_mcp_server/tools/search.py
index 901ed97f..59cd4211 100644
--- a/src/keboola_mcp_server/tools/search.py
+++ b/src/keboola_mcp_server/tools/search.py
@@ -269,7 +269,8 @@ def match_patterns(self, value: str | JsonDict | None) -> list[str]:
     def _find_matches_for_expr(
         self, configuration: JsonDict, parsed_expr: JSONPath, scalar_only: bool = False
     ) -> list[PatternMatch]:
-        """Find pattern matches on JSON nodes matched by a JSONPath expression. If scalar_only is True, only scalar nodes are matched."""
+        """Find pattern matches on JSON nodes matched by a JSONPath expression. If scalar_only is True, only scalar
+        nodes are matched."""
         matches: list[PatternMatch] = []
         for jpath_match in parsed_expr.find(configuration):
             value = jpath_match.value

From 5ceae6b11edfea15c3c95664735472cc893ff710 Mon Sep 17 00:00:00 2001
From: Marian Krotil <154078172+mariankrotil@users.noreply.github.com>
Date: Tue, 24 Feb 2026 14:55:57 +0100
Subject: [PATCH 29/29] Update src/keboola_mcp_server/tools/search.py

---
 src/keboola_mcp_server/tools/search.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/keboola_mcp_server/tools/search.py b/src/keboola_mcp_server/tools/search.py
index 59cd4211..ab7af0b1 100644
--- a/src/keboola_mcp_server/tools/search.py
+++ b/src/keboola_mcp_server/tools/search.py
@@ -119,7 +119,7 @@ class SearchHit(BaseModel):
     description: str | None = Field(default=None, description='Description of the item.')
     matches: list[PatternMatch] = Field(
         default_factory=list,
-        description='Most specific JSONPath scopes with grouped matched patterns ' '(config-based search only).',
+        description='Most specific JSONPath scopes with grouped matched patterns (config-based search only).',
     )
     links: list[Link] = Field(default_factory=list, description='Links to the item.')