diff --git a/aws/iam/policy/NextstrainDotOrgServerInstanceDev.json b/aws/iam/policy/NextstrainDotOrgServerInstanceDev.json index 1c8798599..fa44a0d92 100644 --- a/aws/iam/policy/NextstrainDotOrgServerInstanceDev.json +++ b/aws/iam/policy/NextstrainDotOrgServerInstanceDev.json @@ -50,11 +50,11 @@ "arn:aws:s3:::nextstrain-groups" ], "Condition": { - "StringEquals": { + "StringLike": { "s3:prefix": [ - "blab/", - "test/", - "test-private/" + "blab/*", + "test/*", + "test-private/*" ] } } diff --git a/scripts/migrate-group b/scripts/migrate-group index 227a9ba38..647251b95 100755 --- a/scripts/migrate-group +++ b/scripts/migrate-group @@ -2,16 +2,13 @@ const {ArgumentParser} = require("argparse"); const IAM = require("@aws-sdk/client-iam"); const S3 = require("@aws-sdk/client-s3"); -const {spawn} = require("child_process"); -const {Console} = require("console"); const fs = require("fs"); const os = require("os"); const {basename, relative: relativePath, parse: parsePath} = require("path"); const process = require("process"); -const {Transform} = require("stream"); const {Group} = require("../src/groups"); -const {reportUnhandledRejectionsAtExit} = require("../src/utils/scripts"); +const {reportUnhandledRejectionsAtExit, run, setupConsole} = require("../src/utils/scripts"); const AWS_ACCOUNT_ID = process.env.AWS_ACCOUNT_ID; @@ -93,6 +90,58 @@ async function migrate({group, dryRun = true}) { async function syncData({dryRun = true, group}) { + console.group(`\nSyncing S3 data`); + + // Datasets + await s3Sync({ + dryRun, + group, + prefix: "datasets/", + filters: [ + "--exclude=*", + "--include=*.json", + ] + }); + + // Narratives + await s3Sync({ + dryRun, + group, + prefix: "narratives/", + filters: [ + "--exclude=*", + "--include=*.md", + "--exclude=group-overview.md", + ] + }); + + // Control/customization files + await s3Sync({ + dryRun, + group, + prefix: "", + filters: [ + "--exclude=*", + "--include=group-overview.md", + "--include=group-logo.png", + ] + }); + + // Discover files to consider for manual review + const unsynced = (await s3ListObjects({group})).filter( + key => !key.endsWith(".json") + && !key.endsWith(".md") + && key !== "group-overview.md" + && key !== "group-logo.png" + ); + + console.groupEnd(); + + return unsynced.map(key => `Investigate unsynced object s3://${group.bucket}/${key}`); +} + + +async function s3Sync({dryRun = true, group, prefix = "", filters = []}) { const argv = [ "aws", "s3", "sync", ...(dryRun @@ -100,7 +149,8 @@ async function syncData({dryRun = true, group}) { : []), "--delete", `s3://${group.bucket}/`, - `s3://nextstrain-groups/${group.name}/`, + `s3://nextstrain-groups/${group.name}/${prefix}`, + ...filters, ]; console.group(`\nRunning ${argv.join(" ")}`); await run(argv); @@ -108,6 +158,16 @@ async function syncData({dryRun = true, group}) { } +async function s3ListObjects({group}) { + const client = new S3.S3Client(); + + return await collate( + S3.paginateListObjectsV2({client}, {Bucket: group.bucket}), + page => page.Contents.map(object => object.Key), + ); +} + + async function updateServerPolicies({dryRun = true, oldBucket}) { const policyFiles = [ "aws/iam/policy/NextstrainDotOrgServerInstance.json", @@ -379,64 +439,6 @@ async function diff(...args) { } -/** - * Run a command with stdout and stderr sent to `console.log()` and - * `console.error()`. - * - * @param {string[]} argv - * @returns {{code, signal, argv}} - */ -async function run(argv) { - return new Promise((resolve, reject) => { - const proc = spawn(argv[0], argv.slice(1), {stdio: ["ignore", "pipe", "pipe"]}); - - proc.stdout.on("data", data => console.log(data.toString().replace(/\n$/, ""))); - proc.stderr.on("data", data => console.error(data.toString().replace(/\n$/, ""))); - - proc.on("close", (code, signal) => { - const result = code !== 0 || signal != null - ? reject - : resolve; - return result({code, signal, argv}); - }); - }); -} - - -/** - * Set up the global `console` object to prefix all output lines with an - * indication of the state of *dryRun*. - * - * @param {{dryRun: boolean}} - */ -function setupConsole({dryRun = true}) { - if (!dryRun) return; - - const LinePrefixer = class extends Transform { - constructor(prefix) { - super(); - this.prefix = prefix; - } - _transform(chunk, encoding, callback) { - // Prefix the beginning of the string and every internal newline, but not - // the last trailing newline. - this.push(chunk.toString().replace(/^|(?<=\n)(?!$)/g, this.prefix)); - callback(); - } - }; - - const stdout = new LinePrefixer("DRY RUN | "); - const stderr = new LinePrefixer("DRY RUN | "); - - stdout.pipe(process.stdout); - stderr.pipe(process.stderr); - - console = new Console({stdout, stderr}); - process.stdout = stdout; - process.stderr = stderr; -} - - /** * Read file at *path* as JSON. * diff --git a/scripts/migrate-groups-layout b/scripts/migrate-groups-layout new file mode 100755 index 000000000..ca16751a3 --- /dev/null +++ b/scripts/migrate-groups-layout @@ -0,0 +1,160 @@ +#!/usr/bin/env node +const {ArgumentParser} = require("argparse"); +const S3 = require("@aws-sdk/client-s3"); +const process = require("process"); + +const {reportUnhandledRejectionsAtExit, run, setupConsole} = require("../src/utils/scripts"); + +const BUCKET = "nextstrain-groups"; + + +function parseArgs() { + const argparser = new ArgumentParser({ + usage: `%(prog)s [--dry-run | --wet-run] [--delete-after-copy]`, + description: ` + Migrate layout of new multi-tenant bucket for Nextstrain Groups from old + layout to the new layout. + + This program is designed to be idempotent if run multiple times. In + practice, it likely only needs to be run once before deploy of the + layout change and once again (this time with --delete-after-copy) after + deploy. + `, + }); + + argparser.addArgument("--dry-run", { + help: "Go through the motions locally but don't actually make any changes on S3. This is the default.", + dest: "dryRun", + action: "storeTrue", + defaultValue: true, + }); + argparser.addArgument("--wet-run", { + help: "Actually make changes on S3.", + dest: "dryRun", + action: "storeFalse", + }); + + argparser.addArgument("--delete-after-copy", { + help: "Delete objects in the old layout after copying them to the new layout.", + dest: "deleteAfterCopy", + action: "storeTrue", + defaultValue: false, + }); + + return argparser.parseArgs(); +} + + +function main({dryRun = true, deleteAfterCopy = false}) { + setupConsole({dryRun}); + + console.log(`Migrating layout of multi-tenant bucket`); + + migrate({dryRun, deleteAfterCopy}) + .then(counts => { + console.log(`\nMigration complete: %o`, counts); + }) + .catch(error => { + console.error("\n\n%s\n", error); + console.error("Migration FAILED. See above for details. It's typically safe to re-run this program after fixing the issue."); + process.exitCode = 1; + }); +} + + +async function migrate({dryRun = true, deleteAfterCopy = false}) { + const s3 = new S3.S3Client(); + + console.log("\nDiscovering objects…"); + let objects = []; + + for await (const page of S3.paginateListObjectsV2({client: s3}, {Bucket: BUCKET})) { + objects = objects.concat(page.Contents); + } + + const existingKeys = new Map(objects.map(o => [o.Key, o])); + + console.group(`\n${deleteAfterCopy ? "Moving" : "Copying"} objects…`); + + const counts = {copied: 0, updated: 0, existed: 0}; + + for (const object of objects) { + const oldKey = object.Key; + const newKey = newKeyFor(oldKey); + + if (!newKey) continue; + + let status; + const existingCopy = existingKeys.get(newKey); + if (existingCopy) { + if (existingCopy.LastModified >= object.LastModified) { + status = "existed"; + } else { + status = "updated"; + } + } else { + status = "copied"; + } + + if (status !== "existed") { + console.log(`copying: ${oldKey} → ${newKey}`); + + if (!dryRun) { + await s3.send(new S3.CopyObjectCommand({ + CopySource: `${BUCKET}/${oldKey}`, + Bucket: BUCKET, + Key: newKey, + })); + } + } + + if (!dryRun && deleteAfterCopy) { + console.log(`deleting: ${oldKey}`); + + await s3.send(new S3.DeleteObjectCommand({ + Bucket: BUCKET, + Key: oldKey, + })); + } + + counts[status]++; + } + + console.groupEnd(); + + return counts; +} + + +function newKeyFor(key) { + const {groupName, subKey} = parseKey(key); + + if (!shouldCopy(subKey)) return; + + const subPrefix = + subKey.endsWith(".json") ? "datasets" : + subKey.endsWith(".md") ? "narratives" : + undefined ; + + if (!subPrefix) throw new Error(`unrecognized key: ${key}`); + + return `${groupName}/${subPrefix}/${subKey}`; +} + + +function parseKey(key) { + const [groupName, ...rest] = key.split("/"); + return {groupName, subKey: rest.join("/")}; +} + + +function shouldCopy(subKey) { + return !subKey.startsWith("datasets/") + && !subKey.startsWith("narratives/") + && subKey !== "group-overview.md" + && subKey !== "group-logo.png"; +} + + +reportUnhandledRejectionsAtExit(); +main(parseArgs()); diff --git a/src/sources/groups.js b/src/sources/groups.js index 706215027..af6f26500 100644 --- a/src/sources/groups.js +++ b/src/sources/groups.js @@ -7,7 +7,7 @@ const authz = require("../authz"); const {fetch} = require("../fetch"); const {Group} = require("../groups"); const utils = require("../utils"); -const {Source} = require("./models"); +const {Source, Dataset, Narrative} = require("./models"); const S3 = new AWS.S3(); @@ -62,6 +62,13 @@ class GroupSource extends Source { : ""; } + dataset(pathParts) { + return new GroupDataset(this, pathParts); + } + narrative(pathParts) { + return new GroupNarrative(this, pathParts); + } + async urlFor(path, method = 'GET', headers = {}) { const normalizedHeaders = utils.normalizeHeaders(headers); const action = { @@ -86,10 +93,12 @@ class GroupSource extends Source { ...action[method].params, }); } - async _listFiles() { + async _listFiles(listPrefix = "") { + const prefix = this.prefix + listPrefix; + return new Promise((resolve, reject) => { let files = []; - S3.listObjectsV2({Bucket: this.bucket, Prefix: this.prefix}).eachPage((err, data, done) => { + S3.listObjectsV2({Bucket: this.bucket, Prefix: prefix}).eachPage((err, data, done) => { if (err) { utils.warn(`Could not list S3 objects for group '${this.group.name}'\n${err.message}`); return reject(err); @@ -106,22 +115,27 @@ class GroupSource extends Source { files = files.concat( data.Contents .map(object => object.Key) - .filter(key => key.startsWith(this.prefix)) - .map(key => key.slice(this.prefix.length)) - .filter(subKey => !subKey.startsWith("datasets/") && !subKey.startsWith("narratives/")) + .filter(key => key.startsWith(prefix)) + .map(key => key.slice(prefix.length)) ); return done(); }); }); } async availableDatasets() { - const files = await this._listFiles(); + const prefix = this.bucket === MULTI_TENANT_BUCKET + ? "datasets/" + : ""; + const files = await this._listFiles(prefix); const pathnames = utils.getDatasetsFromListOfFilenames(files); return pathnames; } async availableNarratives() { // Walking logic borrowed from auspice's cli/server/getAvailable.js - const files = await this._listFiles(); + const prefix = this.bucket === MULTI_TENANT_BUCKET + ? "narratives/" + : ""; + const files = await this._listFiles(prefix); return files .filter((file) => file !== 'group-overview.md') .filter((file) => file.endsWith(".md")) @@ -223,6 +237,20 @@ class GroupSource extends Source { } +class GroupDataset extends Dataset { + get baseName() { + return `datasets/${super.baseName}`; + } +} + + +class GroupNarrative extends Narrative { + get baseName() { + return `narratives/${super.baseName}`; + } +} + + /** * Generate the authorization policy for a given Nextstrain Group. * diff --git a/src/utils/scripts.js b/src/utils/scripts.js index 8550854dc..f54a809eb 100644 --- a/src/utils/scripts.js +++ b/src/utils/scripts.js @@ -1,4 +1,41 @@ +const {Console} = require("console"); const process = require("process"); +const {spawn} = require("child_process"); +const {Transform} = require("stream"); + + +/** + * Set up the global `console` object to prefix all output lines with an + * indication of the state of *dryRun*. + * + * @param {{dryRun: boolean}} + */ +function setupConsole({dryRun = true}) { + if (!dryRun) return; + + const LinePrefixer = class extends Transform { + constructor(prefix) { + super(); + this.prefix = prefix; + } + _transform(chunk, encoding, callback) { + // Prefix the beginning of the string and every internal newline, but not + // the last trailing newline. + this.push(chunk.toString().replace(/^|(?<=\n)(?!$)/g, this.prefix)); + callback(); + } + }; + + const stdout = new LinePrefixer("DRY RUN | "); + const stderr = new LinePrefixer("DRY RUN | "); + + stdout.pipe(process.stdout); + stderr.pipe(process.stderr); + + console = new Console({stdout, stderr}); // eslint-disable-line no-global-assign + process.stdout = stdout; + process.stderr = stderr; +} /** @@ -40,6 +77,32 @@ function reportUnhandledRejectionsAtExit() { } +/** + * Run a command with stdout and stderr sent to `console.log()` and + * `console.error()`. + * + * @param {string[]} argv + * @returns {{code, signal, argv}} + */ +async function run(argv) { + return new Promise((resolve, reject) => { + const proc = spawn(argv[0], argv.slice(1), {stdio: ["ignore", "pipe", "pipe"]}); + + proc.stdout.on("data", data => console.log(data.toString().replace(/\n$/, ""))); + proc.stderr.on("data", data => console.error(data.toString().replace(/\n$/, ""))); + + proc.on("close", (code, signal) => { + const result = code !== 0 || signal != null + ? reject + : resolve; + return result({code, signal, argv}); + }); + }); +} + + module.exports = { + setupConsole, reportUnhandledRejectionsAtExit, + run, };