Skip to content

Commit bf0b566

Browse files
authored
Merge pull request #35198 from github/repo-sync
Repo sync
2 parents ea09c2b + 629632f commit bf0b566

File tree

11 files changed

+192
-98
lines changed

11 files changed

+192
-98
lines changed

src/archives/middleware/archived-enterprise-versions-assets.ts

Lines changed: 18 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,3 @@
1-
import path from 'path'
2-
31
import got from 'got'
42
import type { Response, NextFunction } from 'express'
53

@@ -14,13 +12,7 @@ import type { ExtendedRequest } from '@/types'
1412

1513
// This module handles requests for the CSS and JS assets for
1614
// deprecated GitHub Enterprise versions by routing them to static content in
17-
// help-docs-archived-enterprise-versions
18-
//
19-
// Note that as of GHES 3.2, we no longer store assets for deprecated versions
20-
// in help-docs-archived-enterprise-versions. Instead, we store them in the
21-
// Azure blob storage `githubdocs` in the `enterprise` container. All HTML files
22-
// have been updated to use references to this blob storage for all assets.
23-
//
15+
// one of the docs-ghes-<release number> repos.
2416
// See also ./archived-enterprise-versions.js for non-CSS/JS paths
2517

2618
export default async function archivedEnterpriseVersionsAssets(
@@ -33,12 +25,13 @@ export default async function archivedEnterpriseVersionsAssets(
3325
// or /_next/static/foo.css
3426
if (!patterns.assetPaths.test(req.path)) return next()
3527

36-
// We now know the URL is either /enterprise/2.22/_next/static/foo.css
37-
// or the regular /_next/static/foo.css. But we're only going to
38-
// bother looking it up on https://github.github.com/help-docs-archived-enterprise-versions
39-
// if the URL has the enterprise bit in it, or if the path was
40-
// /_next/static/foo.css *and* its Referrer had the enterprise
41-
// bit in it.
28+
// The URL is either in the format
29+
// /enterprise/2.22/_next/static/foo.css,
30+
// /enterprise-server@<release>,
31+
// or /_next/static/foo.css.
32+
// If the URL is prefixed with the enterprise version and release number
33+
// or if the Referrer contains the enterprise version and release number,
34+
// then we'll fetch it from the docs-ghes-<release number> repo.
4235
if (
4336
!(
4437
patterns.getEnterpriseVersionNumber.test(req.path) ||
@@ -59,25 +52,28 @@ export default async function archivedEnterpriseVersionsAssets(
5952
const { isArchived, requestedVersion } = isArchivedVersion(req)
6053
if (!isArchived || !requestedVersion) return next()
6154

62-
const assetPath = req.path.replace(`/enterprise/${requestedVersion}`, '')
55+
// In all of the `docs-ghes-<relase number` repos, the asset directories
56+
// are at the root. This removes the version and release number from the
57+
// asset path so that we can proxy the request to the correct location.
58+
const newEnterprisePrefix = `/enterprise-server@${requestedVersion}`
59+
const legacyEnterprisePrefix = `/enterprise/${requestedVersion}`
60+
const assetPath = req.path.replace(newEnterprisePrefix, '').replace(legacyEnterprisePrefix, '')
6361

6462
// Just to be absolutely certain that the path can not contain
6563
// a URL that might trip up the GET we're about to make.
6664
if (
67-
assetPath.includes('..') ||
65+
assetPath.includes('../') ||
6866
assetPath.includes('://') ||
6967
(assetPath.includes(':') && assetPath.includes('@'))
7068
) {
7169
defaultCacheControl(res)
7270
return res.status(404).type('text/plain').send('Asset path not valid')
7371
}
7472

75-
const proxyPath = path.join('/', requestedVersion, assetPath)
76-
73+
const proxyPath = `https://github.github.com/docs-ghes-${requestedVersion}${assetPath}`
7774
try {
78-
const r = await got(
79-
`https://github.github.com/help-docs-archived-enterprise-versions${proxyPath}`,
80-
)
75+
const r = await got(proxyPath)
76+
8177
res.set('accept-ranges', 'bytes')
8278
res.set('content-type', r.headers['content-type'])
8379
res.set('content-length', r.headers['content-length'])

src/archives/middleware/archived-enterprise-versions.ts

Lines changed: 129 additions & 36 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,4 @@
1-
import path from 'path'
2-
31
import type { Response, NextFunction } from 'express'
4-
import slash from 'slash'
52
import got from 'got'
63

74
import statsd from '@/observability/lib/statsd.js'
@@ -25,18 +22,16 @@ import getRedirect, { splitPathByLanguage } from '@/redirects/lib/get-redirect.j
2522
import getRemoteJSON from '@/frame/lib/get-remote-json.js'
2623
import { ExtendedRequest } from '@/types'
2724

28-
const REMOTE_ENTERPRISE_STORAGE_URL = 'https://githubdocs.azureedge.net/enterprise'
29-
30-
function splitByLanguage(uri: string) {
31-
let language = null
32-
let withoutLanguage = uri
33-
const match = uri.match(languagePrefixPathRegex)
34-
if (match) {
35-
language = match[1]
36-
withoutLanguage = uri.replace(languagePrefixPathRegex, '/')
37-
}
38-
return [language, withoutLanguage]
39-
}
25+
const OLD_PUBLIC_AZURE_BLOB_URL = 'https://githubdocs.azureedge.net'
26+
// Old Azure Blob Storage `enterprise` container.
27+
const OLD_AZURE_BLOB_ENTERPRISE_DIR = `${OLD_PUBLIC_AZURE_BLOB_URL}/enterprise`
28+
// Old Azure Blob storage `github-images` container with
29+
// the root directory of 'enterprise'.
30+
const OLD_GITHUB_IMAGES_ENTERPRISE_DIR = `${OLD_PUBLIC_AZURE_BLOB_URL}/github-images/enterprise`
31+
const OLD_DEVELOPER_SITE_CONTAINER = `${OLD_PUBLIC_AZURE_BLOB_URL}/developer-site`
32+
// This is the new repo naming convention we use for each archived enterprise
33+
// version. E.g. https://github.github.com/docs-ghes-2.10
34+
const ENTERPRISE_GH_PAGES_URL_PREFIX = 'https://github.github.com/docs-ghes-'
4035

4136
type ArchivedRedirects = {
4237
[url: string]: string | null
@@ -93,7 +88,8 @@ const retryConfiguration = { limit: 3 }
9388
const timeoutConfiguration = { response: 1500 }
9489

9590
// This module handles requests for deprecated GitHub Enterprise versions
96-
// by routing them to static content in help-docs-archived-enterprise-versions
91+
// by routing them to static content in
92+
// one of the docs-ghes-<release number> repos.
9793

9894
export default async function archivedEnterpriseVersions(
9995
req: ExtendedRequest,
@@ -108,6 +104,7 @@ export default async function archivedEnterpriseVersions(
108104

109105
const redirectCode = pathLanguagePrefixed(req.path) ? 301 : 302
110106

107+
// Redirects for releases 3.0+
111108
if (deprecatedWithFunctionalRedirects.includes(requestedVersion)) {
112109
const redirectTo = getRedirect(req.path, req.context)
113110
if (redirectTo) {
@@ -138,8 +135,7 @@ export default async function archivedEnterpriseVersions(
138135
return res.redirect(redirectCode, `/${language}${newRedirectTo}`)
139136
}
140137
}
141-
// redirect language-prefixed URLs like /en/enterprise/2.10 -> /enterprise/2.10
142-
// (this only applies to versions <2.13)
138+
// For releases 2.13 and lower, redirect language-prefixed URLs like /en/enterprise/2.10 -> /enterprise/2.10
143139
if (
144140
req.path.startsWith('/en/') &&
145141
versionSatisfiesRange(requestedVersion, `<${firstVersionDeprecatedOnNewSite}`)
@@ -148,8 +144,7 @@ export default async function archivedEnterpriseVersions(
148144
return res.redirect(redirectCode, req.baseUrl + req.path.replace(/^\/en/, ''))
149145
}
150146

151-
// find redirects for versions between 2.13 and 2.17
152-
// starting with 2.18, we updated the archival script to create a redirects.json file
147+
// Redirects for releases 2.13 - 2.17
153148
if (
154149
versionSatisfiesRange(requestedVersion, `>=${firstVersionDeprecatedOnNewSite}`) &&
155150
versionSatisfiesRange(requestedVersion, `<=${lastVersionWithoutArchivedRedirectsFile}`)
@@ -173,7 +168,8 @@ export default async function archivedEnterpriseVersions(
173168
return res.redirect(redirectCode, redirect)
174169
}
175170
}
176-
171+
// Redirects for 2.18 - 3.0. Starting with 2.18, we updated the archival
172+
// script to create a redirects.json file
177173
if (
178174
versionSatisfiesRange(requestedVersion, `>${lastVersionWithoutArchivedRedirectsFile}`) &&
179175
!deprecatedWithFunctionalRedirects.includes(requestedVersion)
@@ -195,19 +191,25 @@ export default async function archivedEnterpriseVersions(
195191
return res.redirect(redirectCode, redirectJson[req.path])
196192
}
197193
}
198-
199-
const statsdTags = [`version:${requestedVersion}`]
194+
// Retrieve the page from the archived repo
200195
const doGet = () =>
201196
got(getProxyPath(req.path, requestedVersion), {
202197
throwHttpErrors: false,
203198
retry: retryConfiguration,
204199
timeout: timeoutConfiguration,
205200
})
201+
202+
const statsdTags = [`version:${requestedVersion}`]
206203
const r = await statsd.asyncTimer(doGet, 'archive_enterprise_proxy', [
207204
...statsdTags,
208205
`path:${req.path}`,
209206
])()
207+
210208
if (r.statusCode === 200) {
209+
const [, withoutLanguagePath] = splitByLanguage(req.path)
210+
const isDeveloperPage = withoutLanguagePath?.startsWith(
211+
`/enterprise/${requestedVersion}/developer`,
212+
)
211213
res.set('x-robots-tag', 'noindex')
212214

213215
// make stubbed redirect files (which exist in versions <2.13) redirect with a 301
@@ -221,11 +223,74 @@ export default async function archivedEnterpriseVersions(
221223

222224
cacheAggressively(res)
223225

226+
// Releases 3.2 and higher contain image asset paths with the
227+
// old Azure Blob Storage URL. These need to be rewritten to
228+
// the new archived enterprise repo URL.
229+
if (versionSatisfiesRange(requestedVersion, `>=${firstReleaseStoredInBlobStorage}`)) {
230+
r.body = r.body
231+
.replaceAll(
232+
`${OLD_AZURE_BLOB_ENTERPRISE_DIR}/${requestedVersion}/assets/cb-`,
233+
`${ENTERPRISE_GH_PAGES_URL_PREFIX}${requestedVersion}/assets/cb-`,
234+
)
235+
.replaceAll(
236+
`${OLD_AZURE_BLOB_ENTERPRISE_DIR}/${requestedVersion}/`,
237+
`${req.protocol}://${req.get('host')}/enterprise-server@${requestedVersion}/`,
238+
)
239+
}
240+
241+
// Releases 3.1 and lower were previously hosted in the
242+
// help-docs-archived-enterprise-versions repo. Only the images
243+
// were stored in the old Azure Blob Storage `github-images` container.
244+
// The image paths all need to be updated to reference the images in the
245+
// new archived enterprise repo's root assets directory.
246+
if (versionSatisfiesRange(requestedVersion, `<${firstReleaseStoredInBlobStorage}`)) {
247+
r.body = r.body.replaceAll(
248+
`${OLD_GITHUB_IMAGES_ENTERPRISE_DIR}/${requestedVersion}`,
249+
`${ENTERPRISE_GH_PAGES_URL_PREFIX}${requestedVersion}`,
250+
)
251+
if (versionSatisfiesRange(requestedVersion, '<=2.18') && isDeveloperPage) {
252+
r.body = r.body.replaceAll(
253+
`${OLD_DEVELOPER_SITE_CONTAINER}/${requestedVersion}`,
254+
`${ENTERPRISE_GH_PAGES_URL_PREFIX}${requestedVersion}/developer`,
255+
)
256+
// Update all hrefs to add /developer to the path
257+
r.body = r.body.replaceAll(
258+
`="/enterprise/${requestedVersion}`,
259+
`="/enterprise/${requestedVersion}/developer`,
260+
)
261+
// The changelog is the only thing remaining on developer.github.com
262+
r.body = r.body.replaceAll('href="/changes', 'href="https://developer.github.com/changes')
263+
}
264+
}
265+
266+
// In all releases, some assets were incorrectly scraped and contain
267+
// deep relative paths. For example, releases 3.4+ use the webp format
268+
// for images. The URLs for those images were never rewritten to pull
269+
// from the Azure Blob Storage container. This may be due to not
270+
// updating our scraping tool to handle the new image types. There
271+
// are additional images in older versions that also have a relative path.
272+
// We want to update the URLs in the format
273+
// "../../../../../../assets/" to prefix the assets directory with the
274+
// new archived enterprise repo URL.
275+
r.body = r.body.replaceAll(
276+
/="(\.\.\/)*assets/g,
277+
`="${ENTERPRISE_GH_PAGES_URL_PREFIX}${requestedVersion}/assets`,
278+
)
279+
280+
// Fix broken hrefs on the 2.16 landing page
281+
if (requestedVersion === '2.16' && req.path === '/en/enterprise/2.16') {
282+
r.body = r.body.replaceAll('ref="/en/enterprise', 'ref="/en/enterprise/2.16')
283+
}
284+
285+
// Remove the search results container from the page, which removes a white
286+
// box that prevents clicking on page links
287+
r.body = r.body.replaceAll('<div id="search-results-container"></div>', '')
288+
224289
return res.send(r.body)
225290
}
226-
227-
// from 2.13 to 2.17, we lost access to frontmatter redirects during the archival process
228-
// this workaround finds potentially relevant frontmatter redirects in currently supported pages
291+
// In releases 2.13 - 2.17, we lost access to frontmatter redirects
292+
// during the archival process. This workaround finds potentially
293+
// relevant frontmatter redirects in currently supported pages
229294
if (
230295
versionSatisfiesRange(requestedVersion, `>=${firstVersionDeprecatedOnNewSite}`) &&
231296
versionSatisfiesRange(requestedVersion, `<=${lastVersionWithoutArchivedRedirectsFile}`)
@@ -244,18 +309,35 @@ export default async function archivedEnterpriseVersions(
244309
return next()
245310
}
246311

247-
// paths are slightly different depending on the version
248-
// for >=2.13: /2.13/en/enterprise/2.13/user/articles/viewing-contributions-on-your-profile
249-
// for <2.13: /2.12/user/articles/viewing-contributions-on-your-profile
250312
function getProxyPath(reqPath: string, requestedVersion: string) {
251-
if (versionSatisfiesRange(requestedVersion, `>=${firstReleaseStoredInBlobStorage}`)) {
313+
const [, withoutLanguagePath] = splitByLanguage(reqPath)
314+
const isDeveloperPage = withoutLanguagePath?.startsWith(
315+
`/enterprise/${requestedVersion}/developer`,
316+
)
317+
318+
// This was the last release supported on developer.github.com
319+
if (isDeveloperPage) {
320+
const enterprisePath = `/enterprise/${requestedVersion}`
321+
const newReqPath = reqPath.replace(enterprisePath, '')
322+
return ENTERPRISE_GH_PAGES_URL_PREFIX + requestedVersion + newReqPath
323+
}
324+
325+
// Releases 2.18 and higher
326+
if (versionSatisfiesRange(requestedVersion, `>${lastVersionWithoutArchivedRedirectsFile}`)) {
252327
const newReqPath = reqPath.includes('redirects.json') ? `/${reqPath}` : reqPath + '/index.html'
253-
return `${REMOTE_ENTERPRISE_STORAGE_URL}/${requestedVersion}${newReqPath}`
328+
return ENTERPRISE_GH_PAGES_URL_PREFIX + requestedVersion + newReqPath
254329
}
255-
const proxyPath = versionSatisfiesRange(requestedVersion, `>=${firstVersionDeprecatedOnNewSite}`)
256-
? slash(path.join('/', requestedVersion, reqPath))
257-
: reqPath.replace(/^\/enterprise/, '')
258-
return `https://github.github.com/help-docs-archived-enterprise-versions${proxyPath}`
330+
331+
// Releases 2.13 - 2.17
332+
// redirect.json files don't exist for these versions
333+
if (versionSatisfiesRange(requestedVersion, `>=2.13`)) {
334+
return ENTERPRISE_GH_PAGES_URL_PREFIX + requestedVersion + reqPath + '/index.html'
335+
}
336+
337+
// Releases 2.12 and lower
338+
const enterprisePath = `/enterprise/${requestedVersion}`
339+
const newReqPath = reqPath.replace(enterprisePath, '')
340+
return ENTERPRISE_GH_PAGES_URL_PREFIX + requestedVersion + newReqPath
259341
}
260342

261343
// Module-level global cache object.
@@ -276,7 +358,7 @@ function getFallbackRedirect(req: ExtendedRequest) {
276358
//
277359
// The keys are valid URLs that it can redirect to. I.e. these are
278360
// URLs that we definitely know are valid and will be found
279-
// in https://github.com/github/help-docs-archived-enterprise-versions
361+
// in one of the docs-ghes-<release number> repos.
280362
// The array values are possible URLs we deem acceptable redirect
281363
// sources.
282364
// But to avoid an unnecessary, O(n), loop every time, we turn this
@@ -311,3 +393,14 @@ function getFallbackRedirect(req: ExtendedRequest) {
311393
return `/${language}${fallback}`
312394
}
313395
}
396+
397+
function splitByLanguage(uri: string) {
398+
let language = null
399+
let withoutLanguage = uri
400+
const match = uri.match(languagePrefixPathRegex)
401+
if (match) {
402+
language = match[1]
403+
withoutLanguage = uri.replace(languagePrefixPathRegex, '/')
404+
}
405+
return [language, withoutLanguage]
406+
}

src/archives/scripts/warmup-remotejson.js

Lines changed: 1 addition & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -41,9 +41,7 @@ function version2url(version) {
4141
semver.coerce(version).raw,
4242
semver.coerce(firstReleaseStoredInBlobStorage).raw,
4343
)
44-
return inBlobStorage
45-
? `https://githubdocs.azureedge.net/enterprise/${version}/redirects.json`
46-
: `https://github.github.com/help-docs-archived-enterprise-versions/${version}/redirects.json`
44+
return `https://github.github.com/docs-ghes-${version}/redirects.json`
4745
}
4846

4947
function withArchivedRedirectsFile(version) {

src/archives/tests/deprecated-enterprise-versions.js

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -122,7 +122,7 @@ describe('recently deprecated redirects', () => {
122122
expect(res.headers.vary).toContain('accept-language')
123123
expect(res.headers.vary).toContain('x-user-language')
124124
// This is based on
125-
// https://github.com/github/help-docs-archived-enterprise-versions/blob/master/3.0/redirects.json
125+
// https://github.com/github/docs-ghes-3.0/blob/main/redirects.json
126126
expect(res.headers.location).toBe(
127127
'/en/enterprise-server@3.0/get-started/learning-about-github/githubs-products',
128128
)
@@ -309,8 +309,8 @@ describe('JS and CSS assets', () => {
309309
expect(result.headers['x-is-archived']).toBeUndefined()
310310
})
311311

312-
test('404 if the pathname contains URL characters (..)', async () => {
313-
const result = await get('/enterprise/2.18/dist/index..css', {
312+
test('404 if the pathname contains URL characters (../)', async () => {
313+
const result = await get('/enterprise/2.18/dist/index../css', {
314314
headers: {
315315
Referrer: '/en/enterprise/2.18',
316316
},

0 commit comments

Comments
 (0)