From 8f050a9ba05526c71708b40a00092b9e3b4de51d Mon Sep 17 00:00:00 2001 From: Matt Linville Date: Tue, 13 Jan 2026 16:13:34 -0800 Subject: [PATCH 1/5] Update Lychee link check actions to be recursive --- .github/workflows/linkcheck-pr.yml | 206 +++++++++++++++++++++++++-- .github/workflows/linkcheck-prod.yml | 30 ++-- lychee.toml | 53 +++++++ 3 files changed, 268 insertions(+), 21 deletions(-) create mode 100644 lychee.toml diff --git a/.github/workflows/linkcheck-pr.yml b/.github/workflows/linkcheck-pr.yml index b498e5973b..e6a473b4b6 100644 --- a/.github/workflows/linkcheck-pr.yml +++ b/.github/workflows/linkcheck-pr.yml @@ -1,33 +1,213 @@ -name: Lychee PR link checker +name: Link checker - PR changed files + +# Avoid collisions by ensuring only one run per ref +concurrency: + group: linkcheck-pr-${{ github.ref_name }} + cancel-in-progress: false on: + workflow_dispatch: deployment_status: + pull_request: + types: [opened, synchronize, reopened] + paths: + - '**.md' + - '**.mdx' permissions: contents: read deployments: read + pull-requests: write jobs: linkChecker: runs-on: ubuntu-latest - - # Only run when Mintlify PR deployment succeeds + + # Run on: manual trigger, successful Mintlify deployment, or PR events (for forks) if: | - github.event.deployment_status.state == 'success' && - github.event.deployment.environment == 'staging' && - contains(github.event.deployment_status.creator.login, 'mintlify') && - contains(github.event.deployment_status.environment_url, 'mintlify') + github.event_name == 'workflow_dispatch' || + github.event_name == 'pull_request' || + (github.event_name == 'deployment_status' && + github.event.deployment_status.state == 'success' && + github.event.deployment.environment == 'staging' && + contains(github.event.deployment_status.creator.login, 'mintlify') && + contains(github.event.deployment_status.environment_url, 'mintlify')) + steps: - # check URLs with Lychee - uses: actions/checkout@v6 + with: + # Needed to diff base..head for the associated PR + fetch-depth: 0 + + - name: Resolve PR and deployment URL + id: pr-context + if: github.event_name == 'deployment_status' + uses: actions/github-script@v7 + with: + script: | + const { owner, repo } = context.repo; + const sha = context.payload.deployment?.sha; + const deployUrl = + context.payload.deployment_status?.environment_url || + context.payload.deployment_status?.target_url || + ''; + + core.info(`Deployment SHA: ${sha}`); + core.info(`Deployment URL: ${deployUrl}`); + + // Find PR(s) associated with this deployment commit SHA + const prsResp = await github.rest.repos.listPullRequestsAssociatedWithCommit({ + owner, + repo, + commit_sha: sha, + }); + + const pr = prsResp.data?.[0]; + if (!pr) { + core.warning(`No PR associated with commit ${sha}. Skipping linkcheck + PR comment.`); + core.setOutput('pr_number', ''); + core.setOutput('deploy_url', deployUrl); + return; + } + + core.info(`Associated PR: #${pr.number} (${pr.html_url})`); + core.setOutput('pr_number', String(pr.number)); + core.setOutput('base_sha', pr.base.sha); + core.setOutput('head_sha', pr.head.sha); + core.setOutput('deploy_url', deployUrl); + + - name: Get changed documentation files + id: changed-files + if: steps.pr-context.outputs.pr_number != '' || github.event_name == 'workflow_dispatch' || github.event_name == 'pull_request' + uses: tj-actions/changed-files@v47.0.1 + with: + base_sha: ${{ steps.pr-context.outputs.base_sha || github.event.pull_request.base.sha }} + sha: ${{ steps.pr-context.outputs.head_sha || github.event.pull_request.head.sha }} + files: | + **/*.md + **/*.mdx - name: Link Checker id: lychee + if: steps.changed-files.outputs.any_changed == 'true' || github.event_name == 'workflow_dispatch' uses: lycheeverse/lychee-action@v2 with: - args: "--threads 5 --max-retries 5 --retry-wait-time 2 --include '^https?://' --include '^http?://' --base-url='${{ github.event.deployment_status.environment_url }}' '${{ github.event.deployment_status.environment_url }}'" - format: markdown fail: false - env: - # to be used in case rate limits are surpassed - GITHUB_TOKEN: ${{secrets.GITHUB_TOKEN}} + # Don't fail if no files to check + failIfEmpty: false + # Output format for reports + format: markdown + # GitHub token for API rate limiting + token: ${{ secrets.GITHUB_TOKEN }} + # Override base_url with deployment URL (if available) or use production + # For forks without Mintlify preview: checks against production site + args: >- + --base-url ${{ steps.pr-context.outputs.deploy_url || 'https://docs.wandb.ai' }} + ${{ steps.changed-files.outputs.all_changed_files || '.' }} + + - name: Comment PR with link check results + if: (steps.pr-context.outputs.pr_number != '' || github.event_name == 'pull_request') && (steps.changed-files.outputs.any_changed == 'true' || github.event_name == 'workflow_dispatch') + uses: actions/github-script@v7 + with: + script: | + const fs = require('fs'); + const identifier = ''; + + let commentBody = identifier + '\n'; + + const isFork = context.payload.pull_request?.head?.repo?.fork || false; + const deployUrl = '${{ steps.pr-context.outputs.deploy_url }}'; + + if (${{ steps.lychee.outputs.exit_code }} === 0) { + // Success - no broken links + commentBody += '## 🔗 Link Checker Results\n\n'; + commentBody += '✅ **All links are valid!**\n\n'; + commentBody += 'No broken links were detected in the changed files.\n'; + if (isFork && !deployUrl) { + commentBody += '\n_Note: Checked against production site (https://docs.wandb.ai) since preview deployments are not available for forks._\n'; + } + + // Check if there were redirects in the report + try { + const report = fs.readFileSync('./lychee/out.md', 'utf8'); + if (report.includes('Redirect') || report.includes('redirect')) { + commentBody += '\n\n> [!TIP]\n'; + commentBody += '> **Redirects detected**: If you see redirects for internal docs.wandb.ai links, check if they have trailing slashes.\n'; + commentBody += '> \n'; + commentBody += '> Mintlify automatically removes trailing slashes, causing redirects like:\n'; + commentBody += '> - `https://docs.wandb.ai/models/` → `https://docs.wandb.ai/models`\n'; + commentBody += '> \n'; + commentBody += '> **Fix**: Remove trailing slashes from links to avoid unnecessary redirects.\n'; + } + } catch (e) { + // Ignore if report file doesn't exist + } + } else { + // Issues found - include report + const report = fs.readFileSync('./lychee/out.md', 'utf8'); + + commentBody += '## 🔗 Link Checker Results\n\n'; + commentBody += '> [!NOTE]\n'; + if (isFork && !deployUrl) { + commentBody += '> This PR is from a fork, so links were checked against the **production site** (https://docs.wandb.ai).\n'; + commentBody += '> \n'; + commentBody += '> Links to **newly created files** in this PR will be reported as broken until the PR is merged.\n'; + } else { + commentBody += '> Links to **newly created files** in this PR may be reported as broken because this checks links against the **preview deployment**.\n'; + } + commentBody += '> \n'; + commentBody += '> Warnings about **new** files in this PR can be safely ignored.\n\n'; + + // Add trailing slash tip if redirects are present + if (report.includes('Redirect') || report.includes('redirect')) { + commentBody += '> [!TIP]\n'; + commentBody += '> **Redirects detected**: If you see redirects for internal docs.wandb.ai links, check if they have trailing slashes.\n'; + commentBody += '> \n'; + commentBody += '> Mintlify automatically removes trailing slashes, causing redirects like:\n'; + commentBody += '> - `https://docs.wandb.ai/models/` → `https://docs.wandb.ai/models`\n'; + commentBody += '> - `/weave/quickstart/` → `/weave/quickstart`\n'; + commentBody += '> \n'; + commentBody += '> **Fix**: Remove trailing slashes from links to avoid unnecessary redirects.\n\n'; + } + + commentBody += '---\n\n'; + commentBody += report; + } + + // Determine PR number + const prNumber = Number('${{ steps.pr-context.outputs.pr_number }}') || + context.payload.pull_request?.number; + + if (!prNumber) { + core.info('No PR number available, skipping comment'); + return; + } + + // Find existing comment + const { data: comments } = await github.rest.issues.listComments({ + owner: context.repo.owner, + repo: context.repo.repo, + issue_number: prNumber, + }); + + const existingComment = comments.find(comment => + comment.body?.includes(identifier) && comment.user?.login === 'github-actions[bot]' + ); + + if (existingComment) { + // Update existing comment + await github.rest.issues.updateComment({ + owner: context.repo.owner, + repo: context.repo.repo, + comment_id: existingComment.id, + body: commentBody + }); + } else { + // Create new comment + await github.rest.issues.createComment({ + owner: context.repo.owner, + repo: context.repo.repo, + issue_number: prNumber, + body: commentBody + }); + } diff --git a/.github/workflows/linkcheck-prod.yml b/.github/workflows/linkcheck-prod.yml index a72662d688..62f0d2dfc1 100644 --- a/.github/workflows/linkcheck-prod.yml +++ b/.github/workflows/linkcheck-prod.yml @@ -1,30 +1,44 @@ -name: Lychee production link checker +name: Link checker - production site + on: repository_dispatch: workflow_dispatch: schedule: - - cron: "5 0 1 * *" # In UTC, currently 12:05 AM on the 1st of each month + - cron: "5 0 1 * *" # Monthly on the 1st at 12:05 AM UTC jobs: linkChecker: runs-on: ubuntu-latest permissions: - issues: write # required for peter-evans/create-issue-from-file + issues: write # Required for creating issues steps: - # check URLs with Lychee - uses: actions/checkout@v6 + - name: Download and parse sitemap + run: | + echo "Fetching sitemap from https://docs.wandb.ai/sitemap.xml..." + curl -s https://docs.wandb.ai/sitemap.xml | \ + grep -o '[^<]*' | \ + sed 's///g; s/<\/loc>//g' > urls.txt + + URL_COUNT=$(wc -l < urls.txt | tr -d ' ') + echo "Found ${URL_COUNT} URLs in sitemap" + + # Show first few URLs for verification + echo "Sample URLs:" + head -5 urls.txt + - name: Link Checker id: lychee uses: lycheeverse/lychee-action@v2 with: - args: "--threads 5 --max-retries 5 --retry-wait-time 2 --include '^https?://' --include '^http?://' --base-url='http://docs.wandb.ai' 'http://docs.wandb.ai'" + # Configuration is in lychee.toml + # Check all URLs from sitemap + args: "urls.txt" output: ./lychee-report.md format: markdown fail: false - env: - # to be used in case rate limits are surpassed - GITHUB_TOKEN: ${{secrets.GITHUB_TOKEN}} + token: ${{ secrets.GITHUB_TOKEN }} - name: Create Issue From File if: steps.lychee.outputs.exit_code != 0 diff --git a/lychee.toml b/lychee.toml new file mode 100644 index 0000000000..a62763ed06 --- /dev/null +++ b/lychee.toml @@ -0,0 +1,53 @@ +# Lychee link checker configuration +# See https://lychee.cli.rs/guides/config/ for full documentation + +# Base URL for resolving relative links +# This will be overridden at runtime by the workflows +base_url = 'https://docs.wandb.ai' + +# Don't check heading anchors (fragments) - Mintlify generates these dynamically +include_fragments = false + +# Suppress progress bars in CI for cleaner logs +no_progress = true + +# Exclude private/internal network links +exclude_all_private = true + +# Retry configuration for handling transient failures +max_retries = 5 +retry_wait_time = 2 + +# Accept these HTTP status codes as valid +accept = [ + 200, # OK + 429, # Too Many Requests (rate limit - treat as success) +] + +# Only check HTTP/HTTPS URLs +scheme = [ + "https", + "http", +] + +# Only check URLs matching these patterns (required when checking URL lists) +include = [ + '^https?://', # Match all http:// and https:// URLs +] + +# Logging verbosity +verbose = "info" + +# URL patterns to exclude from checking +exclude = [ + # Exclude images - Mintlify rewrites paths during build + '\.(png|jpg|jpeg|gif|svg|webp|ico)$', +] + +# File/directory paths to exclude from checking +exclude_path = [ + # Exclude non-documentation directories + "scripts", + "node_modules", + ".github", +] From 42a012bab0523a5ea66522da58a0e19cc91a0909 Mon Sep 17 00:00:00 2001 From: Matt Linville Date: Tue, 13 Jan 2026 16:55:35 -0800 Subject: [PATCH 2/5] Fix broken links to /weave/guides/core-types/ pages by using absolute paths --- weave/cookbooks/source/dspy_prompt_optimization.ipynb | 4 ++-- weave/guides/integrations/notdiamond.mdx | 4 ++-- .../reference/generated_typescript_docs/intro-notebook.mdx | 6 +++--- 3 files changed, 7 insertions(+), 7 deletions(-) diff --git a/weave/cookbooks/source/dspy_prompt_optimization.ipynb b/weave/cookbooks/source/dspy_prompt_optimization.ipynb index 14afde3f63..8ce172ec72 100644 --- a/weave/cookbooks/source/dspy_prompt_optimization.ipynb +++ b/weave/cookbooks/source/dspy_prompt_optimization.ipynb @@ -127,7 +127,7 @@ "source": [ "## Load the BIG-Bench Hard Dataset\n", "\n", - "We will load this dataset from HuggingFace Hub, split into training and validation sets, and [publish](../../guides/core-types/datasets.md) them on Weave, this will let us version the datasets, and also use [`weave.Evaluation`](../../guides/core-types/evaluations.md) to evaluate our prompting strategy." + "We will load this dataset from HuggingFace Hub, split into training and validation sets, and [publish](/weave/guides/core-types/datasets) them on Weave, this will let us version the datasets, and also use [`weave.Evaluation`](/weave/guides/core-types/evaluations) to evaluate our prompting strategy." ] }, { @@ -287,7 +287,7 @@ "source": [ "## Evaluating our DSPy Program\n", "\n", - "Now that we have a baseline prompting strategy, let's evaluate it on our validation set using [`weave.Evaluation`](../../guides/core-types/evaluations.md) on a simple metric that matches the predicted answer with the ground truth. Weave will take each example, pass it through your application and score the output on multiple custom scoring functions. By doing this, you'll have a view of the performance of your application, and a rich UI to drill into individual outputs and scores.\n", + "Now that we have a baseline prompting strategy, let's evaluate it on our validation set using [`weave.Evaluation`](/weave/guides/core-types/evaluations) on a simple metric that matches the predicted answer with the ground truth. Weave will take each example, pass it through your application and score the output on multiple custom scoring functions. By doing this, you'll have a view of the performance of your application, and a rich UI to drill into individual outputs and scores.\n", "\n", "First, we need to create a simple weave evaluation scoring function that tells whether the answer from the baseline module's output is the same as the ground truth answer or not. Scoring functions need to have a `model_output` keyword argument, but the other arguments are user defined and are taken from the dataset examples. It will only take the necessary keys by using a dictionary key based on the argument name." ] diff --git a/weave/guides/integrations/notdiamond.mdx b/weave/guides/integrations/notdiamond.mdx index 638b5c0c60..0fba392dfd 100644 --- a/weave/guides/integrations/notdiamond.mdx +++ b/weave/guides/integrations/notdiamond.mdx @@ -107,10 +107,10 @@ Visit the [docs] or [send us a message][support] for further support. [chat]: https://chat.notdiamond.ai [custom router]: https://docs.notdiamond.ai/docs/router-training-quickstart [docs]: https://docs.notdiamond.ai -[evals]: ../../guides/core-types/evaluations.mdx +[evals]: /weave/guides/core-types/evaluations [keys]: https://app.notdiamond.ai/keys [nd]: https://www.notdiamond.ai/ -[ops]: ../../guides/tracking/ops.mdx +[ops]: /weave/guides/tracking/ops [python]: https://github.com/Not-Diamond/notdiamond-python [quickstart guide]: https://docs.notdiamond.ai/docs/quickstart [support]: mailto:support@notdiamond.ai diff --git a/weave/reference/generated_typescript_docs/intro-notebook.mdx b/weave/reference/generated_typescript_docs/intro-notebook.mdx index e2fc3d080d..8fb47c5974 100644 --- a/weave/reference/generated_typescript_docs/intro-notebook.mdx +++ b/weave/reference/generated_typescript_docs/intro-notebook.mdx @@ -106,7 +106,7 @@ async function demonstrateNestedTracking() { ## Dataset management -You can create and manage datasets with Weave using the [`weave.Dataset`](../../guides/core-types/datasets.mdx) class. Similar to [Weave `Models`](../../guides/core-types/models.mdx), `weave.Dataset` helps: +You can create and manage datasets with Weave using the [`weave.Dataset`](/weave/guides/core-types/datasets) class. Similar to [Weave `Models`](/weave/guides/core-types/models), `weave.Dataset` helps: - Track and version your data - Organize test cases @@ -144,7 +144,7 @@ function createGrammarDataset(): weave.Dataset { ## Evaluation framework -Weave supports evaluation-driven development with the [`Evaluation` class](../../guides/core-types/evaluations.mdx). Evaluations help you reliably iterate on your GenAI application. The `Evaluation` class does the following: +Weave supports evaluation-driven development with the [`Evaluation` class](/weave/guides/core-types/evaluations). Evaluations help you reliably iterate on your GenAI application. The `Evaluation` class does the following: - Assesses `Model` performance on a `Dataset` - Applies custom scoring functions @@ -224,4 +224,4 @@ async function main() { console.error('Error running demonstrations:', error); } } -``` \ No newline at end of file +``` From d2f53e52cdf4ccf16262957c7a82d6df56462fd2 Mon Sep 17 00:00:00 2001 From: Matt Linville Date: Wed, 14 Jan 2026 14:19:35 -0800 Subject: [PATCH 3/5] Remove trailing slashes from main index --- index.mdx | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/index.mdx b/index.mdx index b9b26147e9..7365da40a8 100644 --- a/index.mdx +++ b/index.mdx @@ -19,8 +19,8 @@ import {ProductCard} from "/snippets/ProductCard.jsx"; Use W&B Models to manage AI model development. Features include training, fine-tuning, reporting, automating hyperparameter sweeps, and utilizing the model registry for versioning and reproducibility.

- • Introduction
- • Quickstart
+ • Introduction
+ • Quickstart
• YouTube Tutorial
@@ -33,7 +33,7 @@ import {ProductCard} from "/snippets/ProductCard.jsx"; Use W&B Weave to manage AI models in your code. Features include tracing, output evaluation, cost estimates, and a hosted inference service and playground for comparing different large language models (LLMs) and settings.

- • Introduction
+ • Introduction
• Quickstart
• YouTube Demo
@@ -41,27 +41,27 @@ import {ProductCard} from "/snippets/ProductCard.jsx"; Use W&B Inference to access leading open-source foundation models through an OpenAI-compatible API. Features include multiple model options, usage tracking, and integration with Weave for tracing and evaluation.

- • Introduction
+ • Introduction
• Try in Playground
Now in public preview, use W&B Training to post-train large language models using serverless reinforcement learning (RL). Features include fully managed GPU infrastructure, integration with ART and RULER, and automatic scaling for multi-turn agentic tasks.

- • Introduction
- • Quickstart
+ • Introduction
+ • Quickstart
From 90afa0b67f421c950fcc6fc837df27cc10a351f5 Mon Sep 17 00:00:00 2001 From: Matt Linville Date: Fri, 16 Jan 2026 10:03:10 -0800 Subject: [PATCH 4/5] Move excludes to lychee.toml Also sort each list in the TOML alphabetically for maintenance --- .lycheeignore | 18 ------------------ lychee.toml | 25 ++++++++++++++++++++++--- 2 files changed, 22 insertions(+), 21 deletions(-) delete mode 100644 .lycheeignore diff --git a/.lycheeignore b/.lycheeignore deleted file mode 100644 index 1d024bf85b..0000000000 --- a/.lycheeignore +++ /dev/null @@ -1,18 +0,0 @@ -# W&B production URLs -^https://(api|app|deploy|docs|www)\.wandb\.(ai|com) -^https://wandb\.(ai|com) -^https://github\.com/wandb - -# Example URLs used in generated docs -https://kubeflow.mysite.com -https://my.domain.net -https://wiki.python.org/moin/UsingPickle -https://my-fake-url.com - -# Third party URLs -https://auth0.com/docs -https://cognito-idp.us-east-1.amazonaws.com - -# Social media -https://discord.com -https://x.com diff --git a/lychee.toml b/lychee.toml index a62763ed06..7d86e97d0a 100644 --- a/lychee.toml +++ b/lychee.toml @@ -26,8 +26,8 @@ accept = [ # Only check HTTP/HTTPS URLs scheme = [ - "https", "http", + "https", ] # Only check URLs matching these patterns (required when checking URL lists) @@ -42,12 +42,31 @@ verbose = "info" exclude = [ # Exclude images - Mintlify rewrites paths during build '\.(png|jpg|jpeg|gif|svg|webp|ico)$', + + # Example URLs used in generated docs + 'https://kubeflow.mysite.com', + 'https://my-fake-url.com', + 'https://my.domain.net', + 'https://wiki.python.org/moin/UsingPickle', + + # Social media + 'https://discord.com', + 'https://x.com', + + # Third party URLs + 'https://auth0.com/docs', + 'https://cognito-idp.us-east-1.amazonaws.com', + + # W&B production URLs + '^https://github\.com/wandb', + '^https://(api|app|deploy|docs|www)\.wandb\.(ai|com)', + '^https://wandb\.(ai|com)', ] # File/directory paths to exclude from checking exclude_path = [ # Exclude non-documentation directories - "scripts", - "node_modules", ".github", + "node_modules", + "scripts", ] From cf490a03af5fe043970774793e0df36b3ba864cd Mon Sep 17 00:00:00 2001 From: Matt Linville Date: Fri, 16 Jan 2026 10:10:28 -0800 Subject: [PATCH 5/5] Fix notdiamond quickstart link --- weave/guides/integrations/notdiamond.mdx | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/weave/guides/integrations/notdiamond.mdx b/weave/guides/integrations/notdiamond.mdx index 0fba392dfd..589a555662 100644 --- a/weave/guides/integrations/notdiamond.mdx +++ b/weave/guides/integrations/notdiamond.mdx @@ -112,5 +112,5 @@ Visit the [docs] or [send us a message][support] for further support. [nd]: https://www.notdiamond.ai/ [ops]: /weave/guides/tracking/ops [python]: https://github.com/Not-Diamond/notdiamond-python -[quickstart guide]: https://docs.notdiamond.ai/docs/quickstart +[quickstart guide]: https://docs.notdiamond.ai/docs/quickstart-routing [support]: mailto:support@notdiamond.ai