Skip to content

Commit

Permalink
Adjust layout of multi-tenant groups bucket
Browse files Browse the repository at this point in the history
The bucket now stores datasets and narratives separately under a fixed
prefix within each group's prefix so the user-provided names/paths of
the datasets and narratives can't conflict with control objects we store
at the root of the group's prefix.

Besides making security easier to reason about here with the
user-provided names, this will also, in time, allow us to eliminate a
few special-cased exclusions (like excluding group-overview.md from a
list of *.md narratives).

As part of deploying this, we'll need to perform a one-time manual
layout switch behind the scenes using scripts/migrate-groups-layout.
Though migrate-groups-layout is intended to be run only twice:

  1. Immediately before deploy of the layout change.
  2. Immediately after deploy of the layout change, with --delete-after-copy.

…it is designed to be safe to run repeatedly.  However, as it is a
single-use program, it's not designed to live past those two primary
invocations and we should delete it after it's served its purpose.
  • Loading branch information
tsibley committed Jun 22, 2022
1 parent 14bd907 commit 5ff47d9
Show file tree
Hide file tree
Showing 4 changed files with 244 additions and 13 deletions.
8 changes: 4 additions & 4 deletions aws/iam/policy/NextstrainDotOrgServerInstanceDev.json
Original file line number Diff line number Diff line change
Expand Up @@ -50,11 +50,11 @@
"arn:aws:s3:::nextstrain-groups"
],
"Condition": {
"StringEquals": {
"StringLike": {
"s3:prefix": [
"blab/",
"test/",
"test-private/"
"blab/*",
"test/*",
"test-private/*"
]
}
}
Expand Down
45 changes: 44 additions & 1 deletion scripts/migrate-group
Original file line number Diff line number Diff line change
Expand Up @@ -90,14 +90,57 @@ async function migrate({group, dryRun = true}) {


async function syncData({dryRun = true, group}) {
console.group(`\nSyncing S3 data`);

// Datasets
await s3Sync({
dryRun,
group,
prefix: "datasets/",
filters: [
"--exclude=*",
"--include=*.json",
]
});

// Narratives
await s3Sync({
dryRun,
group,
prefix: "narratives/",
filters: [
"--exclude=*",
"--include=*.md",
"--exclude=group-overview.md",
]
});

// Control/customization files
await s3Sync({
dryRun,
group,
prefix: "",
filters: [
"--exclude=*",
"--include=group-overview.md",
"--include=group-logo.png",
]
});

console.groupEnd();
}


async function s3Sync({dryRun = true, group, prefix = "", filters = []}) {
const argv = [
"aws", "s3", "sync",
...(dryRun
? ["--dryrun"]
: []),
"--delete",
`s3://${group.bucket}/`,
`s3://nextstrain-groups/${group.name}/`,
`s3://nextstrain-groups/${group.name}/${prefix}`,
...filters,
];
console.group(`\nRunning ${argv.join(" ")}`);
await run(argv);
Expand Down
160 changes: 160 additions & 0 deletions scripts/migrate-groups-layout
Original file line number Diff line number Diff line change
@@ -0,0 +1,160 @@
#!/usr/bin/env node
const {ArgumentParser} = require("argparse");
const S3 = require("@aws-sdk/client-s3");
const process = require("process");

const {reportUnhandledRejectionsAtExit, run, setupConsole} = require("../src/utils/scripts");

const BUCKET = "nextstrain-groups";


function parseArgs() {
const argparser = new ArgumentParser({
usage: `%(prog)s [--dry-run | --wet-run] [--delete-after-copy]`,
description: `
Migrate layout of new multi-tenant bucket for Nextstrain Groups from old
layout to the new layout.
This program is designed to be idempotent if run multiple times. In
practice, it likely only needs to be run once before deploy of the
layout change and once again (this time with --delete-after-copy) after
deploy.
`,
});

argparser.addArgument("--dry-run", {
help: "Go through the motions locally but don't actually make any changes on S3. This is the default.",
dest: "dryRun",
action: "storeTrue",
defaultValue: true,
});
argparser.addArgument("--wet-run", {
help: "Actually make changes on S3.",
dest: "dryRun",
action: "storeFalse",
});

argparser.addArgument("--delete-after-copy", {
help: "Delete objects in the old layout after copying them to the new layout.",
dest: "deleteAfterCopy",
action: "storeTrue",
defaultValue: false,
});

return argparser.parseArgs();
}


function main({dryRun = true, deleteAfterCopy = false}) {
setupConsole({dryRun});

console.log(`Migrating layout of multi-tenant bucket`);

migrate({dryRun, deleteAfterCopy})
.then(counts => {
console.log(`\nMigration complete: %o`, counts);
})
.catch(error => {
console.error("\n\n%s\n", error);
console.error("Migration FAILED. See above for details. It's typically safe to re-run this program after fixing the issue.");
process.exitCode = 1;
});
}


async function migrate({dryRun = true, deleteAfterCopy = false}) {
const s3 = new S3.S3Client();

console.log("\nDiscovering objects…");
let objects = [];

for await (const page of S3.paginateListObjectsV2({client: s3}, {Bucket: BUCKET})) {
objects = objects.concat(page.Contents);
}

const existingKeys = new Map(objects.map(o => [o.Key, o]));

console.group(`\n${deleteAfterCopy ? "Moving" : "Copying"} objects…`);

const counts = {copied: 0, updated: 0, existed: 0};

for (const object of objects) {
const oldKey = object.Key;
const newKey = newKeyFor(oldKey);

if (!newKey) continue;

let status;
const existingCopy = existingKeys.get(newKey);
if (existingCopy) {
if (existingCopy.LastModified >= object.LastModified) {
status = "existed";
} else {
status = "updated";
}
} else {
status = "copied";
}

if (status !== "existed") {
console.log(`copying: ${oldKey}${newKey}`);

if (!dryRun) {
await s3.send(new S3.CopyObjectCommand({
CopySource: `${BUCKET}/${oldKey}`,
Bucket: BUCKET,
Key: newKey,
}));
}
}

if (!dryRun && deleteAfterCopy) {
console.log(`deleting: ${oldKey}`);

await s3.send(new S3.DeleteObjectCommand({
Bucket: BUCKET,
Key: oldKey,
}));
}

counts[status]++;
}

console.groupEnd();

return counts;
}


function newKeyFor(key) {
const {groupName, subKey} = parseKey(key);

if (!shouldCopy(subKey)) return;

const subPrefix =
subKey.endsWith(".json") ? "datasets" :
subKey.endsWith(".md") ? "narratives" :
undefined ;

if (!subPrefix) throw new Error(`unrecognized key: ${key}`);

return `${groupName}/${subPrefix}/${subKey}`;
}


function parseKey(key) {
const [groupName, ...rest] = key.split("/");
return {groupName, subKey: rest.join("/")};
}


function shouldCopy(subKey) {
return !subKey.startsWith("datasets/")
&& !subKey.startsWith("narratives/")
&& subKey !== "group-overview.md"
&& subKey !== "group-logo.png";
}


reportUnhandledRejectionsAtExit();
main(parseArgs());
44 changes: 36 additions & 8 deletions src/sources/groups.js
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@ const authz = require("../authz");
const {fetch} = require("../fetch");
const {Group} = require("../groups");
const utils = require("../utils");
const {Source} = require("./models");
const {Source, Dataset, Narrative} = require("./models");

const S3 = new AWS.S3();

Expand Down Expand Up @@ -62,6 +62,13 @@ class GroupSource extends Source {
: "";
}

dataset(pathParts) {
return new GroupDataset(this, pathParts);
}
narrative(pathParts) {
return new GroupNarrative(this, pathParts);
}

async urlFor(path, method = 'GET', headers = {}) {
const normalizedHeaders = utils.normalizeHeaders(headers);
const action = {
Expand All @@ -86,10 +93,12 @@ class GroupSource extends Source {
...action[method].params,
});
}
async _listFiles() {
async _listFiles(listPrefix = "") {
const prefix = this.prefix + listPrefix;

return new Promise((resolve, reject) => {
let files = [];
S3.listObjectsV2({Bucket: this.bucket, Prefix: this.prefix}).eachPage((err, data, done) => {
S3.listObjectsV2({Bucket: this.bucket, Prefix: prefix}).eachPage((err, data, done) => {
if (err) {
utils.warn(`Could not list S3 objects for group '${this.group.name}'\n${err.message}`);
return reject(err);
Expand All @@ -106,22 +115,27 @@ class GroupSource extends Source {
files = files.concat(
data.Contents
.map(object => object.Key)
.filter(key => key.startsWith(this.prefix))
.map(key => key.slice(this.prefix.length))
.filter(subKey => !subKey.startsWith("datasets/") && !subKey.startsWith("narratives/"))
.filter(key => key.startsWith(prefix))
.map(key => key.slice(prefix.length))
);
return done();
});
});
}
async availableDatasets() {
const files = await this._listFiles();
const prefix = this.bucket === MULTI_TENANT_BUCKET
? "datasets/"
: "";
const files = await this._listFiles(prefix);
const pathnames = utils.getDatasetsFromListOfFilenames(files);
return pathnames;
}
async availableNarratives() {
// Walking logic borrowed from auspice's cli/server/getAvailable.js
const files = await this._listFiles();
const prefix = this.bucket === MULTI_TENANT_BUCKET
? "narratives/"
: "";
const files = await this._listFiles(prefix);
return files
.filter((file) => file !== 'group-overview.md')
.filter((file) => file.endsWith(".md"))
Expand Down Expand Up @@ -223,6 +237,20 @@ class GroupSource extends Source {
}


class GroupDataset extends Dataset {
get baseName() {
return `datasets/${super.baseName}`;
}
}


class GroupNarrative extends Narrative {
get baseName() {
return `narratives/${super.baseName}`;
}
}


/**
* Generate the authorization policy for a given Nextstrain Group.
*
Expand Down

0 comments on commit 5ff47d9

Please sign in to comment.