From 0ea984d31d4b4477db4eec03920f3c19441467ef Mon Sep 17 00:00:00 2001 From: Matt Linville Date: Fri, 5 Dec 2025 08:05:57 -0800 Subject: [PATCH 01/10] Improve MDX validation triggering Currently, the script can hang indefinitely if there are no mdx files and prevent the PR from merging. This improves the mdx detection and exits much earlier and cleanly if the test is irrelevant. --- .github/workflows/validate-mdx.yml | 2 ++ .../mdx-validation/validate-mdx-mintlify.sh | 23 ++++++++++++++++++- 2 files changed, 24 insertions(+), 1 deletion(-) diff --git a/.github/workflows/validate-mdx.yml b/.github/workflows/validate-mdx.yml index 673ecea0dc..a2c39957dc 100644 --- a/.github/workflows/validate-mdx.yml +++ b/.github/workflows/validate-mdx.yml @@ -9,6 +9,8 @@ jobs: runs-on: ubuntu-latest steps: - uses: actions/checkout@v6 + with: + fetch-depth: 0 - uses: actions/setup-node@v6 with: node-version: '20' diff --git a/scripts/mdx-validation/validate-mdx-mintlify.sh b/scripts/mdx-validation/validate-mdx-mintlify.sh index ebaef91ba9..d909f23413 100755 --- a/scripts/mdx-validation/validate-mdx-mintlify.sh +++ b/scripts/mdx-validation/validate-mdx-mintlify.sh @@ -18,6 +18,28 @@ cleanup() { # Trap to ensure cleanup trap cleanup EXIT INT TERM +# Check if there are any MDX files in the changeset +if [ -n "$GITHUB_BASE_REF" ]; then + # In a PR context, check changed files + echo "Checking for changed MDX files in PR..." + + # Get the list of changed files + CHANGED_MDX=$(git diff --name-only "origin/$GITHUB_BASE_REF"...HEAD -- '*.mdx' 2>/dev/null | grep -E '\.mdx$' || true) + + if [ -z "$CHANGED_MDX" ]; then + echo "━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━" + echo "NO MDX FILES CHANGED" + echo "━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━" + echo "No MDX files found in this PR. Skipping validation." + echo "━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━" + exit 0 + fi + + echo "Found changed MDX files:" + echo "$CHANGED_MDX" | sed 's/^/ /' + echo "" +fi + echo "Starting Mintlify validation..." echo "" echo "Running: mint dev --no-open (will run for ${PARSE_TIME}s to parse all files)" @@ -54,4 +76,3 @@ else echo "No parsing errors detected by Mintlify" exit 0 fi - From 6fb29b164d99e205c7df2d7a700ce6272ecc9c78 Mon Sep 17 00:00:00 2001 From: Matt Linville Date: Fri, 5 Dec 2025 08:16:30 -0800 Subject: [PATCH 02/10] Add broken-links check to validate-mdx script - WIth the previous commit to improve the MDX file detection, we can fix the problem of the remote Mintlify link-rot test not working in forks by adding mint broken-links to the end of the MDX validation script. Then we can make the MDX validation script mandatory. --- .../mdx-validation/validate-mdx-mintlify.sh | 32 +++++++++++++++++-- 1 file changed, 29 insertions(+), 3 deletions(-) diff --git a/scripts/mdx-validation/validate-mdx-mintlify.sh b/scripts/mdx-validation/validate-mdx-mintlify.sh index d909f23413..363a50bce9 100755 --- a/scripts/mdx-validation/validate-mdx-mintlify.sh +++ b/scripts/mdx-validation/validate-mdx-mintlify.sh @@ -69,10 +69,36 @@ if grep -q "parsing error" "$LOGFILE"; then echo "━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━" echo "" exit 1 -else +fi + +echo "━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━" +echo "✅ MINTLIFY PARSING VALIDATION PASSED" +echo "━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━" +echo "No parsing errors detected by Mintlify" +echo "" + +# Run broken links check +echo "━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━" +echo "CHECKING FOR BROKEN LINKS" +echo "━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━" +echo "Running: mint broken-links" +echo "" + +if mint broken-links; then + echo "" echo "━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━" - echo "✅ MINTLIFY VALIDATION PASSED" + echo "✅ ALL VALIDATION CHECKS PASSED" echo "━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━" - echo "No parsing errors detected by Mintlify" + echo "- No parsing errors" + echo "- No broken links" exit 0 +else + echo "" + echo "━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━" + echo "❌ BROKEN LINKS DETECTED" + echo "━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━" + echo "Please fix the broken links shown above." + echo "━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━" + echo "" + exit 1 fi From 68a7ba9045f20b67979f34a5e0295be676ca1556 Mon Sep 17 00:00:00 2001 From: Matt Linville Date: Fri, 5 Dec 2025 11:50:14 -0800 Subject: [PATCH 03/10] Fix script bugs --- .../mdx-validation/validate-mdx-mintlify.sh | 36 +++++++++++++------ 1 file changed, 26 insertions(+), 10 deletions(-) diff --git a/scripts/mdx-validation/validate-mdx-mintlify.sh b/scripts/mdx-validation/validate-mdx-mintlify.sh index 363a50bce9..77966d12a5 100755 --- a/scripts/mdx-validation/validate-mdx-mintlify.sh +++ b/scripts/mdx-validation/validate-mdx-mintlify.sh @@ -46,7 +46,19 @@ echo "Running: mint dev --no-open (will run for ${PARSE_TIME}s to parse all file echo "" # Run mint dev with tee to force output writing, timeout after PARSE_TIME seconds -timeout --preserve-status ${PARSE_TIME}s mint dev --no-open 2>&1 | tee "$LOGFILE" > /dev/null || true +# Use timeout if available (Linux), otherwise use gtimeout (macOS with coreutils), or perl as fallback +if command -v timeout > /dev/null 2>&1; then + timeout --preserve-status ${PARSE_TIME}s mint dev --no-open 2>&1 | tee "$LOGFILE" > /dev/null || true +elif command -v gtimeout > /dev/null 2>&1; then + gtimeout --preserve-status ${PARSE_TIME}s mint dev --no-open 2>&1 | tee "$LOGFILE" > /dev/null || true +else + # Fallback: run mint dev in background and kill after PARSE_TIME + mint dev --no-open 2>&1 | tee "$LOGFILE" > /dev/null & + PID=$! + sleep ${PARSE_TIME} + kill "$PID" 2>/dev/null || true + wait "$PID" 2>/dev/null || true +fi echo "" echo "✓ Mintlify finished parsing" @@ -84,15 +96,11 @@ echo "━━━━━━━━━━━━━━━━━━━━━━━━ echo "Running: mint broken-links" echo "" -if mint broken-links; then - echo "" - echo "━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━" - echo "✅ ALL VALIDATION CHECKS PASSED" - echo "━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━" - echo "- No parsing errors" - echo "- No broken links" - exit 0 -else +LINKS_OUTPUT=$(mint broken-links 2>&1) +echo "$LINKS_OUTPUT" + +# Check if broken links were found (mint broken-links exits 0 even when it finds broken links) +if echo "$LINKS_OUTPUT" | grep -q "found.*broken links"; then echo "" echo "━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━" echo "❌ BROKEN LINKS DETECTED" @@ -102,3 +110,11 @@ else echo "" exit 1 fi + +echo "" +echo "━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━" +echo "✅ ALL VALIDATION CHECKS PASSED" +echo "━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━" +echo "- No parsing errors" +echo "- No broken links" +exit 0 From 0c4547a2f0b8059ce584a08f6b5283a0eb1197b4 Mon Sep 17 00:00:00 2001 From: Matt Linville Date: Fri, 5 Dec 2025 14:24:47 -0800 Subject: [PATCH 04/10] Fix broken-links failure detection --- scripts/mdx-validation/validate-mdx-mintlify.sh | 16 ++-------------- 1 file changed, 2 insertions(+), 14 deletions(-) diff --git a/scripts/mdx-validation/validate-mdx-mintlify.sh b/scripts/mdx-validation/validate-mdx-mintlify.sh index 77966d12a5..3da0ca629f 100755 --- a/scripts/mdx-validation/validate-mdx-mintlify.sh +++ b/scripts/mdx-validation/validate-mdx-mintlify.sh @@ -96,20 +96,8 @@ echo "━━━━━━━━━━━━━━━━━━━━━━━━ echo "Running: mint broken-links" echo "" -LINKS_OUTPUT=$(mint broken-links 2>&1) -echo "$LINKS_OUTPUT" - -# Check if broken links were found (mint broken-links exits 0 even when it finds broken links) -if echo "$LINKS_OUTPUT" | grep -q "found.*broken links"; then - echo "" - echo "━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━" - echo "❌ BROKEN LINKS DETECTED" - echo "━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━" - echo "Please fix the broken links shown above." - echo "━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━" - echo "" - exit 1 -fi +# Run mint broken-links - it will exit with non-zero if broken links are found +mint broken-links echo "" echo "━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━" From 1093e943e94269c72537388f67b1f30521eab68d Mon Sep 17 00:00:00 2001 From: Matt Linville Date: Fri, 5 Dec 2025 08:17:52 -0800 Subject: [PATCH 05/10] Introduce a broken link --- models/launch/evaluate-hosted-model.mdx | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/models/launch/evaluate-hosted-model.mdx b/models/launch/evaluate-hosted-model.mdx index 0708dd808f..410f7e6fe6 100644 --- a/models/launch/evaluate-hosted-model.mdx +++ b/models/launch/evaluate-hosted-model.mdx @@ -42,7 +42,7 @@ Follow these steps to set up and launch an evaluation job: This example job runs the `simpleqa` benchmark against the OpenAI model `o4-mini`: -![Example hosted model evaluation job](/images/models/llm-evaluation-jobs/hosted-model-job-example.png) +![Example hosted model evaluation job](/images/models/llm-evaluation-jobs/hosted-model-job-example.pn) This example leaderboard visualizes the performance of several OpenAI models together: From 6af1474cc63d793a4c3dd3a38bf0c48f11b5515c Mon Sep 17 00:00:00 2001 From: Matt Linville Date: Fri, 5 Dec 2025 08:18:18 -0800 Subject: [PATCH 06/10] Introduce a MDX validation error --- models/launch/evaluate-hosted-model.mdx | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/models/launch/evaluate-hosted-model.mdx b/models/launch/evaluate-hosted-model.mdx index 410f7e6fe6..b3b05bf725 100644 --- a/models/launch/evaluate-hosted-model.mdx +++ b/models/launch/evaluate-hosted-model.mdx @@ -45,7 +45,7 @@ This example job runs the `simpleqa` benchmark against the OpenAI model `o4-mini ![Example hosted model evaluation job](/images/models/llm-evaluation-jobs/hosted-model-job-example.pn) -This example leaderboard visualizes the performance of several OpenAI models together: +This example leaderboard visualizes the performance of several OpenAI models together: {} ![Example leaderboard visualizing the performance of several hosted models](/images/models/llm-evaluation-jobs/hosted-model-leaderboard-example.png) From fda85912b595c7508bf5bd1b2fce542fcf33d6e7 Mon Sep 17 00:00:00 2001 From: Matt Linville Date: Fri, 5 Dec 2025 14:15:27 -0800 Subject: [PATCH 07/10] Break an actual link since the broken image doesn't trigger --- models/launch/evaluate-hosted-model.mdx | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/models/launch/evaluate-hosted-model.mdx b/models/launch/evaluate-hosted-model.mdx index b3b05bf725..2338467ea0 100644 --- a/models/launch/evaluate-hosted-model.mdx +++ b/models/launch/evaluate-hosted-model.mdx @@ -12,7 +12,7 @@ import PreviewLink from '/snippets/en/_includes/llm-eval-jobs/preview.mdx'; This page shows how to use [LLM Evaluation Jobs](/models/launch) to run a series of evaluation benchmarks on a hosted API model at a publicly-accessible URL, using infrastructure managed by CoreWeave. To evaluate a model checkpoint saved as an artifact in W&B Models, see [Evaluate a model checkpoint](/models/launch/evaluate-model-checkpoint) instead. ## Prerequisites -1. Review the [requirements and limitations](/models/launch#more-details) for LLM Evaluation Jobs. +1. Review the [requirements and limitations](/models/launc#more-details) for LLM Evaluation Jobs. 1. To run certain benchmarks, a team admin must add the required API keys as team-scoped secrets. Any team member can specify the secret when configuring an evaluation job. - An **OpenAPI API key**: Used by benchmarks that use OpenAI models for scoring. Required if the field **Scorer API key** appears after you select a benchmark. The secret must be named `OPENAI_API_KEY`. - A **Hugging Face user access token**: Required for certain benchmarks like `lingoly` and `lingoly2` that require access to one or more gated Hugging Face datasets. Required if the field **Hugging Face Token** appears after selecting a benchmark. The API key must have access to the relevant dataset. See the Hugging Face documentation for [User access tokens](https://huggingface.co/docs/hub/en/security-tokens) and [accessing gated datasets](https://huggingface.co/docs/hub/en/datasets-gated#access-gated-datasets-as-a-user). From 9a7c1a192ebad3266f55e4b683c16e33c2aee88a Mon Sep 17 00:00:00 2001 From: Matt Linville Date: Fri, 5 Dec 2025 14:20:52 -0800 Subject: [PATCH 08/10] Introduce another parsing error --- models/launch/evaluate-hosted-model.mdx | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/models/launch/evaluate-hosted-model.mdx b/models/launch/evaluate-hosted-model.mdx index 2338467ea0..f14afe7edb 100644 --- a/models/launch/evaluate-hosted-model.mdx +++ b/models/launch/evaluate-hosted-model.mdx @@ -45,7 +45,7 @@ This example job runs the `simpleqa` benchmark against the OpenAI model `o4-mini ![Example hosted model evaluation job](/images/models/llm-evaluation-jobs/hosted-model-job-example.pn) -This example leaderboard visualizes the performance of several OpenAI models together: {} +This example leaderboard visualizes the performance of several OpenAI models together: { ![Example leaderboard visualizing the performance of several hosted models](/images/models/llm-evaluation-jobs/hosted-model-leaderboard-example.png) From 67139efbe0878fc2fdafe683ae1feac61d75c079 Mon Sep 17 00:00:00 2001 From: Matt Linville Date: Fri, 5 Dec 2025 14:33:32 -0800 Subject: [PATCH 09/10] Fix introduced parsing error --- models/launch/evaluate-hosted-model.mdx | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/models/launch/evaluate-hosted-model.mdx b/models/launch/evaluate-hosted-model.mdx index f14afe7edb..7db7844b50 100644 --- a/models/launch/evaluate-hosted-model.mdx +++ b/models/launch/evaluate-hosted-model.mdx @@ -45,7 +45,7 @@ This example job runs the `simpleqa` benchmark against the OpenAI model `o4-mini ![Example hosted model evaluation job](/images/models/llm-evaluation-jobs/hosted-model-job-example.pn) -This example leaderboard visualizes the performance of several OpenAI models together: { +This example leaderboard visualizes the performance of several OpenAI models together: ![Example leaderboard visualizing the performance of several hosted models](/images/models/llm-evaluation-jobs/hosted-model-leaderboard-example.png) From 5aecfa5cb92f09099c6956579d45a9a69f366335 Mon Sep 17 00:00:00 2001 From: Matt Linville Date: Fri, 5 Dec 2025 14:37:18 -0800 Subject: [PATCH 10/10] Fix introduced broken links --- models/launch/evaluate-hosted-model.mdx | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/models/launch/evaluate-hosted-model.mdx b/models/launch/evaluate-hosted-model.mdx index 7db7844b50..0708dd808f 100644 --- a/models/launch/evaluate-hosted-model.mdx +++ b/models/launch/evaluate-hosted-model.mdx @@ -12,7 +12,7 @@ import PreviewLink from '/snippets/en/_includes/llm-eval-jobs/preview.mdx'; This page shows how to use [LLM Evaluation Jobs](/models/launch) to run a series of evaluation benchmarks on a hosted API model at a publicly-accessible URL, using infrastructure managed by CoreWeave. To evaluate a model checkpoint saved as an artifact in W&B Models, see [Evaluate a model checkpoint](/models/launch/evaluate-model-checkpoint) instead. ## Prerequisites -1. Review the [requirements and limitations](/models/launc#more-details) for LLM Evaluation Jobs. +1. Review the [requirements and limitations](/models/launch#more-details) for LLM Evaluation Jobs. 1. To run certain benchmarks, a team admin must add the required API keys as team-scoped secrets. Any team member can specify the secret when configuring an evaluation job. - An **OpenAPI API key**: Used by benchmarks that use OpenAI models for scoring. Required if the field **Scorer API key** appears after you select a benchmark. The secret must be named `OPENAI_API_KEY`. - A **Hugging Face user access token**: Required for certain benchmarks like `lingoly` and `lingoly2` that require access to one or more gated Hugging Face datasets. Required if the field **Hugging Face Token** appears after selecting a benchmark. The API key must have access to the relevant dataset. See the Hugging Face documentation for [User access tokens](https://huggingface.co/docs/hub/en/security-tokens) and [accessing gated datasets](https://huggingface.co/docs/hub/en/datasets-gated#access-gated-datasets-as-a-user). @@ -42,7 +42,7 @@ Follow these steps to set up and launch an evaluation job: This example job runs the `simpleqa` benchmark against the OpenAI model `o4-mini`: -![Example hosted model evaluation job](/images/models/llm-evaluation-jobs/hosted-model-job-example.pn) +![Example hosted model evaluation job](/images/models/llm-evaluation-jobs/hosted-model-job-example.png) This example leaderboard visualizes the performance of several OpenAI models together: