|
| 1 | +/* eslint-disable no-await-in-loop,no-inner-declarations */ |
| 2 | +/* |
| 3 | + External Resource Gatherer |
| 4 | +
|
| 5 | + Functions of this module are reponsible for grabbing the various external files needed by CDTS. |
| 6 | + (mostly HTML snippets from esdc.prv for the GCintranet template) |
| 7 | +*/ |
| 8 | + |
| 9 | +const fs = require('fs'); |
| 10 | +//const path = require('path'); |
| 11 | +const axios = require('axios'); |
| 12 | + |
| 13 | +const { exceptionCDTSHTTPLinks } = require('./TestLinks'); |
| 14 | + |
| 15 | + |
| 16 | +const defaultResourceList = [ |
| 17 | + { targetFilePath: ['./public/global/esdcmenu-eng.html', './public/global/esdcmenu1-eng.html'], url: 'https://esdc.prv/_conf/assets/en/mega_menu/esdcmenu-eng.html', sourcePageUrl: ['https://esdc.prv/js/esdc-template_js_en.js', 'https://esdc.prv/en/index.shtml'] }, |
| 18 | + { targetFilePath: ['./public/global/esdcmenu-fra.html', './public/global/esdcmenu1-fra.html'], url: 'https://esdc.prv/_conf/assets/fr/mega_menu/esdcmenu-fra.html', sourcePageUrl: ['https://esdc.prv/js/esdc-template_js_fr.js', 'https://esdc.prv/fr/index.shtml'] }, |
| 19 | + { targetFilePath: './public/global/esdcfooter-eng.html', url: 'https://esdc.prv/_conf/assets/en/footer/esdcfooter-eng.html', sourcePageUrl: ['https://esdc.prv/js/esdc-template_js_en.js', 'https://esdc.prv/en/index.shtml'] }, |
| 20 | + { targetFilePath: './public/global/esdcfooter-fra.html', url: 'https://esdc.prv/_conf/assets/fr/footer/esdcfooter-fra.html', sourcePageUrl: ['https://esdc.prv/js/esdc-template_js_fr.js', 'https://esdc.prv/fr/index.shtml'] }, |
| 21 | + { targetFilePath: './public/gcintranet/ajax/sitemenu-eng.html', url: 'https://intranet.canada.ca/wet/sitemenu-eng.html', sourcePageUrl: 'https://intranet.canada.ca/index-eng.asp' }, |
| 22 | + { targetFilePath: './public/gcintranet/ajax/sitemenu-fra.html', url: 'https://intranet.canada.ca/wet/sitemenu-fra.html', sourcePageUrl: 'https://intranet.canada.ca/index-fra.asp' }, |
| 23 | +]; |
| 24 | + |
| 25 | + |
| 26 | +/** |
| 27 | + * Verifies that the resource's parent page still contains a reference to the resource. |
| 28 | + * (Just a sanity check in case source web site changes things without letting CDTS know) |
| 29 | + * |
| 30 | + * Throws an error if resource url cannot be found in sourcePageUrl. |
| 31 | + * |
| 32 | + * If sourcePageUrl is an array, will be validated as a chain |
| 33 | + * (ie it will validate that resource.url is found in resource.sourcePageUrl[0], which is found on resource.sourcePageUrl[1], etc) |
| 34 | + */ |
| 35 | +async function validateSourceProvenance(resource) { |
| 36 | + if (!resource.sourcePageUrl) return; |
| 37 | + |
| 38 | + if (Array.isArray(resource.sourcePageUrl)) { |
| 39 | + //If sourcePageUrl is an array, validate the whole chain of ownership |
| 40 | + |
| 41 | + let resourceUrl = resource.url; |
| 42 | + let sourceUrls = resource.sourcePageUrl; |
| 43 | + while (sourceUrls.length > 0) { |
| 44 | + validateSourceProvenance({ url: resourceUrl, sourcePageUrl: sourceUrls[0] }); |
| 45 | + resourceUrl = sourceUrls[0]; |
| 46 | + sourceUrls = sourceUrls.splice(1); |
| 47 | + } |
| 48 | + |
| 49 | + return; |
| 50 | + } |
| 51 | + |
| 52 | + const response = await axios.get(resource.sourcePageUrl); |
| 53 | + if (response.status !== 200) throw new Error(`Invalid HTTP status received from [${resource.sourcePageUrl}]: ${response.status}`); |
| 54 | + |
| 55 | + //---[ Try to find the url in source page |
| 56 | + if (!response.data.includes(resource.url)) { |
| 57 | + //---[ OK, URL not found, try again without the host in case a relative URL is used |
| 58 | + const tmpURL = new URL(resource.url); |
| 59 | + if (!response.data.includes(tmpURL.pathname)) { |
| 60 | + throw new Error(`Reference source page [${resource.sourcePageUrl}] no longer seem to contain a reference to resource URL [${resource.url}]. Is that file still usable?`); |
| 61 | + } |
| 62 | + } |
| 63 | +} |
| 64 | + |
| 65 | +/** |
| 66 | + * Transform the specified resource content to add a comment and optionally fix some issues. |
| 67 | + * (Some resource we get are not directly usable, containing for example relative links |
| 68 | + * or `http` links for reources accessible with `https`) |
| 69 | + * |
| 70 | + * NOTE: This implementation is a bit simplistic/naive but will do for our purposes. |
| 71 | + * |
| 72 | + */ |
| 73 | +function transformResouceContent(resource, content) { |
| 74 | + let result = content; |
| 75 | + |
| 76 | + //Add a comment to indicate this file's origin |
| 77 | + if (resource.url.toLowerCase().endsWith('.html') && resource.skipComment !== false) { |
| 78 | + const htmlRegex = /<html.*?>/; |
| 79 | + const commentLine = `<!-- The contents of this file were retrieved from ${resource.url} -->\n`; |
| 80 | + if (result.match(htmlRegex)) { |
| 81 | + result = result.replace(htmlRegex, `$&\n${commentLine}`); |
| 82 | + } |
| 83 | + else { |
| 84 | + result = commentLine + result; |
| 85 | + } |
| 86 | + } |
| 87 | + |
| 88 | + //Make relative links absolute |
| 89 | + if (resource.enforceAbsoluteLinks !== false) { |
| 90 | + const url = new URL(resource.url); |
| 91 | + |
| 92 | + result = result.replaceAll('href="/', `href="${url.origin}/`); |
| 93 | + result = result.replaceAll("href='/", `href='${url.origin}/`); |
| 94 | + } |
| 95 | + |
| 96 | + //Make http links https |
| 97 | + if (resource.enforceHttps !== false) { |
| 98 | + // There are two main ways to use the http exception list from TestLinks... |
| 99 | + // The efficient way would be to match on all links and rebuild content as we go through them |
| 100 | + // The other way is simpler but involves a lot of search&replace... let's go simple |
| 101 | + for (const httpException of exceptionCDTSHTTPLinks) { |
| 102 | + // Replace any acceptable http link with a temporary placeholder |
| 103 | + result = result.replaceAll(httpException, `&*&*&${httpException}`); |
| 104 | + } |
| 105 | + |
| 106 | + //Replace (remaining) http links with https |
| 107 | + result = result.replaceAll('href="http://', 'href="https://'); |
| 108 | + result = result.replaceAll("href='http://", "href='https://"); |
| 109 | + |
| 110 | + //Put back the http links |
| 111 | + result = result.replaceAll('href="&*&*&http://', 'href="http://'); |
| 112 | + result = result.replaceAll("href='&*&*&http://", "href='http://"); |
| 113 | + } |
| 114 | + |
| 115 | + //Convert line endings to Unix |
| 116 | + if (resource.enforceLineEndings !== false) { |
| 117 | + result = result.replaceAll('\r\n', '\n'); |
| 118 | + } |
| 119 | + |
| 120 | + return result; |
| 121 | +} |
| 122 | + |
| 123 | +/** |
| 124 | + * Saves specified content to specified file path. |
| 125 | + * |
| 126 | + * @returns {boolean} Whether or not the resource was changed. |
| 127 | + */ |
| 128 | +async function saveFileContent(filePath, content) { |
| 129 | + let contentChanged = false; |
| 130 | + |
| 131 | + //---[ Check if file exists/changed |
| 132 | + if (await fs.promises.stat(filePath).then(() => true, () => false)) { //check if file exists |
| 133 | + const originalContent = await fs.promises.readFile(filePath, { encoding: 'utf8' }); |
| 134 | + contentChanged = originalContent !== content; |
| 135 | + if (contentChanged) console.log(` ***** FILE [${filePath}] WAS MODIFIED!`); |
| 136 | + } |
| 137 | + else { |
| 138 | + //file did not exist: content is new! |
| 139 | + contentChanged = true; |
| 140 | + console.log(` ***** FILE [${filePath}] IS NEW!`); |
| 141 | + } |
| 142 | + |
| 143 | + //---[ (Over)write to file |
| 144 | + if (contentChanged) { |
| 145 | + await fs.promises.writeFile(filePath, content, { encoding: 'utf8' }); |
| 146 | + } |
| 147 | + |
| 148 | + return contentChanged; |
| 149 | +} |
| 150 | + |
| 151 | +/** |
| 152 | + * Downloads the specified resource to local project. |
| 153 | + * |
| 154 | + * @returns {boolean} Whether or not the resource was changed. |
| 155 | + */ |
| 156 | +async function downloadExternalResource(resource) { |
| 157 | + if (!resource.url || !resource.targetFilePath) return false; |
| 158 | + |
| 159 | + //---[ Get external file |
| 160 | + const response = await axios.get(resource.url); |
| 161 | + if (response.status !== 200) throw new Error(`Invalid HTTP status received from [${resource.url}]: ${response.status}`); |
| 162 | + |
| 163 | + //---[ Apply any transformation |
| 164 | + const content = transformResouceContent(resource, response.data); |
| 165 | + |
| 166 | + //---[ Save to local repo |
| 167 | + let contentChanged = false; |
| 168 | + if (Array.isArray(resource.targetFilePath)) { |
| 169 | + for (const filePath of resource.targetFilePath) { |
| 170 | + const fileChanged = await saveFileContent(filePath, content); |
| 171 | + contentChanged ||= fileChanged; //must be done separately from function call otherwise Javascript can shortcut the call out |
| 172 | + } |
| 173 | + } |
| 174 | + else { |
| 175 | + contentChanged = await saveFileContent(resource.targetFilePath, content); |
| 176 | + } |
| 177 | + |
| 178 | + return contentChanged; |
| 179 | +} |
| 180 | + |
| 181 | +/** |
| 182 | + * Downloads the specified external resources, saving them at their prescribed destination. |
| 183 | + * (Only update files, does NOT do any commit or push) |
| 184 | + * |
| 185 | + * @param resourceList Array of resource objects ({url, targetFilePath, sourcePageUrl}) to be processed. |
| 186 | + */ |
| 187 | +module.exports.downloadExternalResources = async function downloadExternalResources(resourceList = defaultResourceList) { |
| 188 | + |
| 189 | + console.log('Gathering External Resources...'); |
| 190 | + console.log(); |
| 191 | + |
| 192 | + process.env['NODE_TLS_REJECT_UNAUTHORIZED'] = 0; //eslint-disable-line |
| 193 | + |
| 194 | + try { |
| 195 | + let contentChanged = false; |
| 196 | + |
| 197 | + for (const resource of resourceList) { |
| 198 | + if (!resource.url || !resource.targetFilePath) continue; |
| 199 | + console.log(`Processing [${resource.targetFilePath}]...`); |
| 200 | + |
| 201 | + await validateSourceProvenance(resource); |
| 202 | + const resourceChanged = await downloadExternalResource(resource); |
| 203 | + contentChanged ||= resourceChanged; //must be done separately from function call otherwise Javascript can shortcut the call out |
| 204 | + } |
| 205 | + |
| 206 | + console.log(); |
| 207 | + console.log('SUCCESS! All external files were re-downloaded.'); |
| 208 | + if (contentChanged) { |
| 209 | + console.log('***** *** ONE OR MORE FILES WERE MODIFIED! ***'); |
| 210 | + console.log('***** *** All links should now be re-tested (ie `npm run test-links`) ***'); |
| 211 | + console.log('***** *** Changes can then be committed/pushed. ***'); |
| 212 | + } |
| 213 | + else { |
| 214 | + console.log('No changes detected.'); |
| 215 | + } |
| 216 | + } |
| 217 | + catch (err) { |
| 218 | + console.error(`ERROR: An error occured processing one of the resource files.`); |
| 219 | + console.error(`ERROR: ${err}`); |
| 220 | + console.error(` MAKE SURE TO FIX ALL ERRORS BEFORE PROCEEDING WITH COMMIT/PUSH.`); |
| 221 | + } |
| 222 | +} |
| 223 | + |
| 224 | +//module.exports.downloadExternalResources(); |
0 commit comments