keboola · jordanrburger · Feb 21, 2026 · Feb 21, 2026 · Feb 21, 2026 · Feb 21, 2026
diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml
@@ -147,6 +147,14 @@ jobs:
           path: ./integtest-results.xml
           reporter: 'java-junit'
 
+  kaibench:
+    name: KaiBench Evaluation
+    needs: build
+    # Run only for same-repo pushes (not fork PRs, which lack secrets)
+    if: github.event_name == 'push' && github.repository == 'keboola/mcp-server'
+    uses: ./.github/workflows/kaibench.yml
+    secrets: inherit
+
   deploy_to_pypi:
     name: Deploy to pypi.org
     needs:

diff --git a/.github/workflows/kaibench.yml b/.github/workflows/kaibench.yml
@@ -0,0 +1,48 @@
+name: KaiBench Evaluation
+
+# Thin trigger that dispatches the full evaluation workflow in the KaiBench repo.
+# All infrastructure (kai-assistant build, eval framework, secrets) lives there.
+# Results are posted back as a commit status on this repo.
+
+permissions:
+  contents: read
+
+on:
+  workflow_dispatch:
+    inputs:
+      question_types:
+        description: 'Question types (comma-separated, or "all")'
+        default: 'Data Analysis Query,Configuration Reasoning,Storage Object Reasoning,MCP Tool Validation'
+        type: string
+      regression_only:
+        description: 'Only regression-flagged questions'
+        type: boolean
+        default: false
+      kai_assistant_image_tag:
+        description: 'Pre-built kai-assistant image tag (leave empty to build from UI main)'
+        required: false
+        type: string
+        default: ''
+  workflow_call:
+
+jobs:
+  trigger:
+    name: Trigger KaiBench evaluation
+    runs-on: ubuntu-latest
+    steps:
+      - name: Dispatch evaluation to KaiBench repo
+        env:
+          GH_TOKEN: ${{ secrets.KAIBENCH_REPO_TOKEN }}
+        run: |
+          gh workflow run evaluate.yml \
+            --repo keboola-rnd/KaiBench \
+            --field mcp_server_repo="${{ github.repository }}" \
+            --field mcp_server_ref="${{ github.sha }}" \
+            --field callback_repo="${{ github.repository }}" \
+            --field callback_sha="${{ github.sha }}" \
+            --field question_types="${{ inputs.question_types || 'Data Analysis Query,Configuration Reasoning,Storage Object Reasoning,MCP Tool Validation' }}" \
+            --field regression_only="${{ inputs.regression_only || 'false' }}" \
+            --field kai_assistant_image_tag="${{ inputs.kai_assistant_image_tag || '' }}"
+          echo "Dispatched KaiBench evaluation"
+          echo "Results will appear as a commit status on ${{ github.sha }}"
+          echo "Monitor at: https://github.com/keboola-rnd/KaiBench/actions"
diff --git a/pyproject.toml b/pyproject.toml
@@ -24,6 +24,7 @@ dependencies = [
     "sqlglot ~= 28.5",
     "toon-format ~= 0.9.0b1",
     "pyyaml ~= 6.0",
+    "requests ~= 2.32",
 ]
 [project.optional-dependencies]
 codestyle = [

diff --git a/uv.lock b/uv.lock