Skip to content
Open
8 changes: 8 additions & 0 deletions .github/workflows/ci.yml
Original file line number Diff line number Diff line change
Expand Up @@ -147,6 +147,14 @@ jobs:
path: ./integtest-results.xml
reporter: 'java-junit'

kaibench:
name: KaiBench Evaluation
needs: build
# Run only for same-repo pushes (not fork PRs, which lack secrets)
if: github.event_name == 'push' && github.repository == 'keboola/mcp-server'
uses: ./.github/workflows/kaibench.yml
secrets: inherit

deploy_to_pypi:
name: Deploy to pypi.org
needs:
Expand Down
48 changes: 48 additions & 0 deletions .github/workflows/kaibench.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,48 @@
name: KaiBench Evaluation

# Thin trigger that dispatches the full evaluation workflow in the KaiBench repo.
# All infrastructure (kai-assistant build, eval framework, secrets) lives there.
# Results are posted back as a commit status on this repo.

permissions:
contents: read

on:
workflow_dispatch:
inputs:
question_types:
description: 'Question types (comma-separated, or "all")'
default: 'Data Analysis Query,Configuration Reasoning,Storage Object Reasoning,MCP Tool Validation'
type: string
regression_only:
description: 'Only regression-flagged questions'
type: boolean
default: false
kai_assistant_image_tag:
description: 'Pre-built kai-assistant image tag (leave empty to build from UI main)'
required: false
type: string
default: ''
workflow_call:

jobs:
trigger:
name: Trigger KaiBench evaluation
runs-on: ubuntu-latest
steps:
- name: Dispatch evaluation to KaiBench repo
env:
GH_TOKEN: ${{ secrets.KAIBENCH_REPO_TOKEN }}
run: |
gh workflow run evaluate.yml \
--repo keboola-rnd/KaiBench \
--field mcp_server_repo="${{ github.repository }}" \
--field mcp_server_ref="${{ github.sha }}" \
--field callback_repo="${{ github.repository }}" \
--field callback_sha="${{ github.sha }}" \
--field question_types="${{ inputs.question_types || 'Data Analysis Query,Configuration Reasoning,Storage Object Reasoning,MCP Tool Validation' }}" \
--field regression_only="${{ inputs.regression_only || 'false' }}" \
--field kai_assistant_image_tag="${{ inputs.kai_assistant_image_tag || '' }}"
echo "Dispatched KaiBench evaluation"
echo "Results will appear as a commit status on ${{ github.sha }}"
echo "Monitor at: https://github.com/keboola-rnd/KaiBench/actions"
1 change: 1 addition & 0 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,7 @@ dependencies = [
"sqlglot ~= 28.5",
"toon-format ~= 0.9.0b1",
"pyyaml ~= 6.0",
"requests ~= 2.32",
]
[project.optional-dependencies]
codestyle = [
Expand Down
8 changes: 5 additions & 3 deletions uv.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

Loading