Skip to content

Commit

Permalink
Merge pull request #5950 from aws-amplify/update-link-script
Browse files Browse the repository at this point in the history
Add link checker workflow for PRs
  • Loading branch information
jacoblogan authored Oct 10, 2023
2 parents 7f577d0 + f1801bd commit 37d957d
Show file tree
Hide file tree
Showing 6 changed files with 375 additions and 30 deletions.
4 changes: 2 additions & 2 deletions .github/workflows/check_for_broken_links.yml
Original file line number Diff line number Diff line change
Expand Up @@ -24,8 +24,8 @@ jobs:
with:
result-encoding: string
script: |
const { checkLinks } = require('./tasks/link-checker.js');
return await checkLinks();
const { checkProdLinks } = require('./tasks/link-checker.js');
return await checkProdLinks();
- name: Configure AWS Credentials
uses: aws-actions/configure-aws-credentials@5fd3084fc36e372ff1fff382a39b10d03659f355 # v2.2.0
with:
Expand Down
36 changes: 36 additions & 0 deletions .github/workflows/check_pr_for_broken_links.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,36 @@
name: CheckPRLinks
on:
pull_request:
branches: [main]
types: [opened, synchronize]
jobs:
CheckPRLinks:
runs-on: ubuntu-latest
steps:
- name: Checkout repository
uses: actions/checkout@f43a0e5ff2bd294095638e18286ca9a3d1956744 # v3.6.0 https://github.com/actions/checkout/commit/f43a0e5ff2bd294095638e18286ca9a3d1956744
- name: Setup Node.js 16.x
uses: actions/setup-node@e33196f7422957bea03ed53f6fbb155025ffc7b8 # v3.7.0 https://github.com/actions/setup-node/commit/e33196f7422957bea03ed53f6fbb155025ffc7b8
with:
node-version: 16.x
- name: Install Dependencies
run: yarn
- name: Run Build
run: yarn build
env:
NODE_OPTIONS: --max_old_space_size=4096
- name: Run Server
run: |
node ./node_modules/.bin/serve client/www/next-build --no-request-logging &
sleep 5
- name: Run Link Checker
id: checkLinks
uses: actions/github-script@d7906e4ad0b1822421a7e6a35d5ca353c962f410 # v6.4.1 https://github.com/actions/github-script/commit/d7906e4ad0b1822421a7e6a35d5ca353c962f410
with:
result-encoding: string
script: |
const { checkDevLinks } = require('./tasks/link-checker.js');
return await checkDevLinks();
- name: Fail if broken links have been found
if: ${{ steps.checkLinks.outputs.result }}
run: exit 1
3 changes: 3 additions & 0 deletions generatePathMap.cjs.js
Original file line number Diff line number Diff line change
Expand Up @@ -82,6 +82,9 @@ function generatePathMap(
'/sdk/q/platform/flutter': {
page: '/sdk/q/platform/[platform]'
},
'/sdk/q/platform/react-native': {
page: '/sdk/q/platform/[platform]'
},
'/console': {
page: '/console'
},
Expand Down
1 change: 1 addition & 0 deletions package.json
Original file line number Diff line number Diff line change
Expand Up @@ -87,6 +87,7 @@
"puppeteer": "^20.8.2",
"rollup-plugin-node-polyfills": "^0.2.1",
"sass": "^1.54.8",
"serve": "^14.2.1",
"tiny-glob": "0.2.9",
"ts-jest": "^26.0.3",
"ts-node": "^8.5.0",
Expand Down
45 changes: 29 additions & 16 deletions tasks/link-checker.js
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@ const puppeteer = require('puppeteer');
const axios = require('axios');

const SITEMAP_URL = 'https://docs.amplify.aws/sitemap.xml';
const DOMAIN = 'https://docs.amplify.aws';
const CRAWLER_EXCEPTIONS = [
'https://aaaaaaaaaa.execute-api.us-east-1.amazonaws.com/api',
'https://aaaaaaaaaaaaaaaaaaaaaaaaaa.appsync-api.us-east-1.amazonaws.com/graphql',
Expand All @@ -11,12 +12,13 @@ const GITHUB_CREATE_ISSUE_LINK =
'https://github.com/aws-amplify/docs/issues/new';
const GITHUB_EDIT_LINK = 'https://github.com/aws-amplify/docs/edit/';

const getSitemapUrls = async () => {
const getSitemapUrls = async (localDomain) => {
let browser = await puppeteer.launch({ headless: 'new' });

const page = await browser.newPage();

let response = await page.goto(SITEMAP_URL);
let siteMap = localDomain ? `${localDomain}/sitemap.xml` : SITEMAP_URL;
let response = await page.goto(siteMap);

const siteMapUrls = [];

Expand All @@ -33,6 +35,10 @@ const getSitemapUrls = async () => {
urlTags,
i
);
if (localDomain) {
// Currently the sitemap is always generated with the prod docs domain so we need to replace this with localhost
url = url.replace(DOMAIN, localDomain);
}
siteMapUrls.push(url);
}
}
Expand All @@ -42,7 +48,7 @@ const getSitemapUrls = async () => {
return siteMapUrls;
};

const retrieveLinks = async (siteMapUrls, visitedLinks) => {
const retrieveLinks = async (siteMapUrls, visitedLinks, localDomain) => {
let browser = await puppeteer.launch({ headless: 'new' });

let page = await browser.newPage();
Expand All @@ -54,7 +60,7 @@ const retrieveLinks = async (siteMapUrls, visitedLinks) => {

try {
let response = await page.goto(url, { waitUntil: 'domcontentloaded' });
await page.waitForNetworkIdle();
await new Promise((r) => setTimeout(r, 100)); // localhost hangs on wait for idle so use a short timeout instead
if (response && response.status() && response.status() === 200) {
console.log(`successfully visited ${url} to retrieve links`);
visitedLinks[url] = true;
Expand All @@ -77,7 +83,10 @@ const retrieveLinks = async (siteMapUrls, visitedLinks) => {
}, url);

urlList.forEach((link) => {
if (!CRAWLER_EXCEPTIONS.includes(link.url)) {
if (
!CRAWLER_EXCEPTIONS.includes(link.url) &&
(!localDomain || link.url.startsWith(localDomain))
) {
urlsToVisit.push(link);
}
});
Expand Down Expand Up @@ -106,14 +115,18 @@ const formatString = (inputs) => {
return retString;
};

const linkChecker = async () => {
const linkChecker = async (localDomain) => {
const visitedLinks = {};
const statusCodes = {};
const brokenLinks = [];

const siteMapUrls = await getSitemapUrls();
const siteMapUrls = await getSitemapUrls(localDomain);

const urlsToVisit = await retrieveLinks(siteMapUrls, visitedLinks);
const urlsToVisit = await retrieveLinks(
siteMapUrls,
visitedLinks,
localDomain
);

let allPromises = [];

Expand All @@ -129,7 +142,9 @@ const linkChecker = async () => {
visitedLinks[href] = true;

let request = axios
.get(href)
.get(href, {
timeout: 5000
})
.then((response) => {
let statusCode = response.status;
if (statusCode && statusCode !== 200) {
Expand All @@ -144,12 +159,7 @@ const linkChecker = async () => {
statusCodes[statusCode].push(href);
}
if (statusCode === 404) {
// this regular expression is meant to filter out any of the platform selector pages. These are appearing in the result set
// because the crawler is seeing disabled platform dropdown links
const platformPages = /\/q\/(platform|integration|framework)\/(android|ios|flutter|js|react-native)/gm;
if (!platformPages.test(link.url)) {
brokenLinks.push(link);
}
brokenLinks.push(link);
}
});

Expand All @@ -165,7 +175,10 @@ const linkChecker = async () => {
};

module.exports = {
checkLinks: async () => {
checkProdLinks: async () => {
return await linkChecker();
},
checkDevLinks: async () => {
return await linkChecker('http://localhost:3000');
}
};
Loading

0 comments on commit 37d957d

Please sign in to comment.