From 9f6d586aed43fa685e9b9148f24f1206af74b827 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Thomas=20Rame=CC=81?= Date: Fri, 22 Mar 2024 16:21:58 +0100 Subject: [PATCH] chore(seo): implement sitemap logic --- next.config.js | 4 ++ package-lock.json | 76 ++++++++++++++++++++-- package.json | 1 + src/pages/api/robots.ts | 20 ++++-- src/pages/api/sitemap/[sitemap].ts | 84 +++++++++++++++++++++++++ src/pages/assets/public/dev/robots.txt | 3 - src/pages/assets/public/prod/robots.txt | 2 - src/utils/routes/list.ts | 14 +++++ 8 files changed, 189 insertions(+), 15 deletions(-) create mode 100644 src/pages/api/sitemap/[sitemap].ts delete mode 100644 src/pages/assets/public/dev/robots.txt delete mode 100644 src/pages/assets/public/prod/robots.txt diff --git a/next.config.js b/next.config.js index 18f903c..8312243 100644 --- a/next.config.js +++ b/next.config.js @@ -82,6 +82,10 @@ const moduleExports = async () => { source: '/robots.txt', destination: '/api/robots', }, + { + source: '/sitemap/:reference.xml', + destination: '/api/sitemap/:reference', + }, ]; }, images: { diff --git a/package-lock.json b/package-lock.json index 4f2304b..22d3c50 100644 --- a/package-lock.json +++ b/package-lock.json @@ -81,6 +81,7 @@ "schema-dts": "^1.1.2", "sharp": "^0.32.1", "simple-git": "^3.22.0", + "sitemap": "^7.1.1", "superjson": "^1.13.3", "tiktoken": "^1.0.10", "ts-custom-error": "^3.3.1", @@ -14153,6 +14154,14 @@ "resolved": "https://registry.npmjs.org/@types/retry/-/retry-0.12.0.tgz", "integrity": "sha512-wWKOClTTiizcZhXnPY4wikVAwmdYHp8q6DmC+EJUzAMsycb7HB32Kh9RN4+0gExjmPmZSAQjgURXIGATPegAvA==" }, + "node_modules/@types/sax": { + "version": "1.2.7", + "resolved": "https://registry.npmjs.org/@types/sax/-/sax-1.2.7.tgz", + "integrity": "sha512-rO73L89PJxeYM3s3pPPjiPgVVcymqU490g0YO5n5By0k2Erzj6tay/4lr1CHAAU4JyOWd1rpQ8bCf6cZfHU96A==", + "dependencies": { + "@types/node": "*" + } + }, "node_modules/@types/scheduler": { "version": "0.16.8", "resolved": "https://registry.npmjs.org/@types/scheduler/-/scheduler-0.16.8.tgz", @@ -36194,9 +36203,7 @@ "node_modules/sax": { "version": "1.3.0", "resolved": "https://registry.npmjs.org/sax/-/sax-1.3.0.tgz", - "integrity": "sha512-0s+oAmw9zLl1V1cS9BtZN7JAd0cW5e0QH4W3LWEK6a4LaLEA2OTpGYWDY+6XasBLtz6wkm3u1xRw95mRuJ59WA==", - "dev": true, - "optional": true + "integrity": "sha512-0s+oAmw9zLl1V1cS9BtZN7JAd0cW5e0QH4W3LWEK6a4LaLEA2OTpGYWDY+6XasBLtz6wkm3u1xRw95mRuJ59WA==" }, "node_modules/saxes": { "version": "6.0.0", @@ -36848,6 +36855,34 @@ "integrity": "sha512-bLGGlR1QxBcynn2d5YmDX4MGjlZvy2MRBDRNHLJ8VI6l6+9FUiyTFNJ0IveOSP0bcXgVDPRcfGqA0pjaqUpfVg==", "dev": true }, + "node_modules/sitemap": { + "version": "7.1.1", + "resolved": "https://registry.npmjs.org/sitemap/-/sitemap-7.1.1.tgz", + "integrity": "sha512-mK3aFtjz4VdJN0igpIJrinf3EO8U8mxOPsTBzSsy06UtjZQJ3YY3o3Xa7zSc5nMqcMrRwlChHZ18Kxg0caiPBg==", + "dependencies": { + "@types/node": "^17.0.5", + "@types/sax": "^1.2.1", + "arg": "^5.0.0", + "sax": "^1.2.4" + }, + "bin": { + "sitemap": "dist/cli.js" + }, + "engines": { + "node": ">=12.0.0", + "npm": ">=5.6.0" + } + }, + "node_modules/sitemap/node_modules/@types/node": { + "version": "17.0.45", + "resolved": "https://registry.npmjs.org/@types/node/-/node-17.0.45.tgz", + "integrity": "sha512-w+tIMs3rq2afQdsPJlODhoUEKzFP1ayaoyl1CcnwtIlsVe7K7bA1NGm4s3PraqTLlXnbIN84zuBlxBWo1u9BLw==" + }, + "node_modules/sitemap/node_modules/arg": { + "version": "5.0.2", + "resolved": "https://registry.npmjs.org/arg/-/arg-5.0.2.tgz", + "integrity": "sha512-PYjyFOLKQ9y57JvQ6QLo8dAgNqswh8M1RMJYdQduT6xbWSgK36P/Z/v+p888pM69jMMfS8Xd8F6I1kQ/I9HUGg==" + }, "node_modules/slash": { "version": "3.0.0", "resolved": "https://registry.npmjs.org/slash/-/slash-3.0.0.tgz", @@ -51358,6 +51393,14 @@ "resolved": "https://registry.npmjs.org/@types/retry/-/retry-0.12.0.tgz", "integrity": "sha512-wWKOClTTiizcZhXnPY4wikVAwmdYHp8q6DmC+EJUzAMsycb7HB32Kh9RN4+0gExjmPmZSAQjgURXIGATPegAvA==" }, + "@types/sax": { + "version": "1.2.7", + "resolved": "https://registry.npmjs.org/@types/sax/-/sax-1.2.7.tgz", + "integrity": "sha512-rO73L89PJxeYM3s3pPPjiPgVVcymqU490g0YO5n5By0k2Erzj6tay/4lr1CHAAU4JyOWd1rpQ8bCf6cZfHU96A==", + "requires": { + "@types/node": "*" + } + }, "@types/scheduler": { "version": "0.16.8", "resolved": "https://registry.npmjs.org/@types/scheduler/-/scheduler-0.16.8.tgz", @@ -67549,9 +67592,7 @@ "sax": { "version": "1.3.0", "resolved": "https://registry.npmjs.org/sax/-/sax-1.3.0.tgz", - "integrity": "sha512-0s+oAmw9zLl1V1cS9BtZN7JAd0cW5e0QH4W3LWEK6a4LaLEA2OTpGYWDY+6XasBLtz6wkm3u1xRw95mRuJ59WA==", - "dev": true, - "optional": true + "integrity": "sha512-0s+oAmw9zLl1V1cS9BtZN7JAd0cW5e0QH4W3LWEK6a4LaLEA2OTpGYWDY+6XasBLtz6wkm3u1xRw95mRuJ59WA==" }, "saxes": { "version": "6.0.0", @@ -68060,6 +68101,29 @@ "integrity": "sha512-bLGGlR1QxBcynn2d5YmDX4MGjlZvy2MRBDRNHLJ8VI6l6+9FUiyTFNJ0IveOSP0bcXgVDPRcfGqA0pjaqUpfVg==", "dev": true }, + "sitemap": { + "version": "7.1.1", + "resolved": "https://registry.npmjs.org/sitemap/-/sitemap-7.1.1.tgz", + "integrity": "sha512-mK3aFtjz4VdJN0igpIJrinf3EO8U8mxOPsTBzSsy06UtjZQJ3YY3o3Xa7zSc5nMqcMrRwlChHZ18Kxg0caiPBg==", + "requires": { + "@types/node": "^17.0.5", + "@types/sax": "^1.2.1", + "arg": "^5.0.0", + "sax": "^1.2.4" + }, + "dependencies": { + "@types/node": { + "version": "17.0.45", + "resolved": "https://registry.npmjs.org/@types/node/-/node-17.0.45.tgz", + "integrity": "sha512-w+tIMs3rq2afQdsPJlODhoUEKzFP1ayaoyl1CcnwtIlsVe7K7bA1NGm4s3PraqTLlXnbIN84zuBlxBWo1u9BLw==" + }, + "arg": { + "version": "5.0.2", + "resolved": "https://registry.npmjs.org/arg/-/arg-5.0.2.tgz", + "integrity": "sha512-PYjyFOLKQ9y57JvQ6QLo8dAgNqswh8M1RMJYdQduT6xbWSgK36P/Z/v+p888pM69jMMfS8Xd8F6I1kQ/I9HUGg==" + } + } + }, "slash": { "version": "3.0.0", "resolved": "https://registry.npmjs.org/slash/-/slash-3.0.0.tgz", diff --git a/package.json b/package.json index 8c85786..9ef92c1 100644 --- a/package.json +++ b/package.json @@ -150,6 +150,7 @@ "schema-dts": "^1.1.2", "sharp": "^0.32.1", "simple-git": "^3.22.0", + "sitemap": "^7.1.1", "superjson": "^1.13.3", "tiktoken": "^1.0.10", "ts-custom-error": "^3.3.1", diff --git a/src/pages/api/robots.ts b/src/pages/api/robots.ts index db7aff2..252668a 100644 --- a/src/pages/api/robots.ts +++ b/src/pages/api/robots.ts @@ -1,18 +1,30 @@ import { NextApiRequest, NextApiResponse } from 'next'; import getConfig from 'next/config'; -import devRobotsFile from '@etabli/src/pages/assets/public/dev/robots.txt'; -import prodRobotsFile from '@etabli/src/pages/assets/public/prod/robots.txt'; import { apiHandlerWrapper } from '@etabli/src/utils/api'; +import { linkRegistry } from '@etabli/src/utils/routes/registry'; const { publicRuntimeConfig } = getConfig(); export function handler(req: NextApiRequest, res: NextApiResponse) { // Only allow indexing in production if (publicRuntimeConfig.appMode === 'prod') { - res.send(prodRobotsFile); + // Note: sitemap URLs need to be absolute (ref: https://stackoverflow.com/a/14218476/3608410) + res.send( + ` +User-agent: * +Allow: / +Sitemap: ${linkRegistry.get('sitemapIndex', undefined, { absolute: true })} +`.trim() + ); } else { - res.send(devRobotsFile); + res.send( + ` +User-agent: * +Disallow: / +Allow: /.well-known/ +`.trim() + ); } } diff --git a/src/pages/api/sitemap/[sitemap].ts b/src/pages/api/sitemap/[sitemap].ts new file mode 100644 index 0000000..abf7cf3 --- /dev/null +++ b/src/pages/api/sitemap/[sitemap].ts @@ -0,0 +1,84 @@ +import { NextApiRequest, NextApiResponse } from 'next'; +import { SitemapIndexStream, SitemapItemLoose, SitemapStream } from 'sitemap'; +import { createGzip } from 'zlib'; +import { z } from 'zod'; + +import { prisma } from '@etabli/src/prisma/client'; +import { apiHandlerWrapper } from '@etabli/src/utils/api'; +import { linkRegistry } from '@etabli/src/utils/routes/registry'; +import { getBaseUrl } from '@etabli/src/utils/url'; + +const PathSchema = z.literal('index').or(z.coerce.number().positive()); + +const chunkSize = 40_000; // The maximum allowed by Google is 50k URLs or 50MB + +export async function handler(req: NextApiRequest, res: NextApiResponse) { + // Either to return all sitemaps index, or a specific sitemap + const value = PathSchema.parse(req.query.sitemap); + + // Listing static routes first + const routes: string[] = [ + linkRegistry.get('assistant', undefined), + linkRegistry.get('explore', undefined), + linkRegistry.get('home', undefined), + linkRegistry.get('initiatives', undefined), + ]; + + res.setHeader('Content-Type', 'application/xml'); + res.setHeader('Content-Encoding', 'gzip'); + + if (value === 'index') { + const initiativesCount = await prisma.initiative.count({}); + const sitemapsCount = Math.ceil((routes.length + initiativesCount) / chunkSize); + + const stream = new SitemapIndexStream({}); + const pipeline = stream.pipe(createGzip()); + + for (let i = 1; i <= sitemapsCount; i++) { + // URLs must be absolute to be indexed + stream.write({ url: linkRegistry.get('sitemap', { sitemapId: i }, { absolute: true }) }); + } + + stream.end(); + pipeline.pipe(res).on('error', (error) => { + throw error; + }); + } else { + const stream = new SitemapStream({ hostname: getBaseUrl() }); + const pipeline = stream.pipe(createGzip()); + + const page = value; + + const initiatives = await prisma.initiative.findMany({ + select: { + id: true, + updatedAt: true, + }, + orderBy: { + createdAt: 'asc', + }, + skip: (page - 1) * chunkSize, // The static routes are limited so we are fine if the first chunk will be for example 40_013 length + take: chunkSize, + }); + + for (const staticRoute of routes) { + stream.write({ url: staticRoute, changefreq: 'weekly', priority: 0.8 } as SitemapItemLoose); + } + + for (const initiative of initiatives) { + stream.write({ + url: linkRegistry.get('initiative', { initiativeId: initiative.id }), + lastmod: initiative.updatedAt.toISOString(), + changefreq: 'monthly', + priority: 0.5, + } as SitemapItemLoose); + } + + stream.end(); + pipeline.pipe(res).on('error', (error) => { + throw error; + }); + } +} + +export default apiHandlerWrapper(handler); diff --git a/src/pages/assets/public/dev/robots.txt b/src/pages/assets/public/dev/robots.txt deleted file mode 100644 index c740259..0000000 --- a/src/pages/assets/public/dev/robots.txt +++ /dev/null @@ -1,3 +0,0 @@ -User-agent: * -Disallow: / -Allow: /.well-known/ diff --git a/src/pages/assets/public/prod/robots.txt b/src/pages/assets/public/prod/robots.txt deleted file mode 100644 index c2a49f4..0000000 --- a/src/pages/assets/public/prod/robots.txt +++ /dev/null @@ -1,2 +0,0 @@ -User-agent: * -Allow: / diff --git a/src/utils/routes/list.ts b/src/utils/routes/list.ts index 15c7d91..84a9462 100644 --- a/src/utils/routes/list.ts +++ b/src/utils/routes/list.ts @@ -35,6 +35,18 @@ export const localizedRoutes = { en: (p) => `/initiatives`, } ), + sitemap: defineLocalizedRoute( + { sitemapId: param.path.number }, + { + en: (p) => `/sitemap/${p.sitemapId}.xml`, + } + ), + sitemapIndex: defineLocalizedRoute( + {}, + { + en: (p) => `/sitemap/index.xml`, + } + ), }; // function createLocalizedRouter(lang: Lang, localeRoutes: typeof localizedRoutes) { @@ -75,5 +87,7 @@ export const routes = { home: defineRoute(localizedRoutes.home.params, localizedRoutes.home.paths.en), initiative: defineRoute(localizedRoutes.initiative.params, localizedRoutes.initiative.paths.en), initiatives: defineRoute(localizedRoutes.initiatives.params, localizedRoutes.initiatives.paths.en), + sitemap: defineRoute(localizedRoutes.sitemap.params, localizedRoutes.sitemap.paths.en), + sitemapIndex: defineRoute(localizedRoutes.sitemapIndex.params, localizedRoutes.sitemapIndex.paths.en), }).routes, };