Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

ENG-2740 Use elapsed time for SSH retry timeout #124

Merged
merged 12 commits into from
Oct 4, 2024
8 changes: 2 additions & 6 deletions src/plugins/aws/ssh.ts
Original file line number Diff line number Diff line change
Expand Up @@ -22,11 +22,7 @@ import {
AwsSshRoleRequest,
} from "./types";

/** Maximum number of attempts to start an SSH session
*
* Each attempt consumes ~ 1 s.
*/
const MAX_SSH_RETRIES = 6;
const PROPAGATION_TIMEOUT_LIMIT_MS = 30 * 1000;

/** The name of the SessionManager port forwarding document. This document is managed by AWS. */
const START_SSH_SESSION_DOCUMENT_NAME = "AWS-StartSSHSession";
Expand Down Expand Up @@ -84,7 +80,7 @@ export const awsSshProvider: SshProvider<

friendlyName: "AWS",

maxRetries: MAX_SSH_RETRIES,
propagationTimeoutMs: PROPAGATION_TIMEOUT_LIMIT_MS,

preTestAccessPropagationArgs: () => undefined,

Expand Down
9 changes: 3 additions & 6 deletions src/plugins/google/ssh.ts
Original file line number Diff line number Diff line change
Expand Up @@ -14,11 +14,8 @@ import { ensureGcpSshInstall } from "./install";
import { importSshKey } from "./ssh-key";
import { GcpSshPermissionSpec, GcpSshRequest } from "./types";

/** Maximum number of attempts to start an SSH session
*
* The length of each attempt varies based on the type of error from a few seconds to < 1s
*/
const MAX_SSH_RETRIES = 24;
// It typically takes < 1 minute for access to propagate on GCP, so set the time limit to 2 minutes.
const PROPAGATION_TIMEOUT_LIMIT_MS = 2 * 60 * 1000;

/**
* There are 7 cases of unprovisioned access in Google Cloud.
Expand Down Expand Up @@ -74,7 +71,7 @@ export const gcpSshProvider: SshProvider<

loginRequiredPattern: /You do not currently have an active account selected/,

maxRetries: MAX_SSH_RETRIES,
propagationTimeoutMs: PROPAGATION_TIMEOUT_LIMIT_MS,

preTestAccessPropagationArgs: (cmdArgs) => {
if (isSudoCommand(cmdArgs)) {
Expand Down
39 changes: 17 additions & 22 deletions src/plugins/ssh/index.ts
Original file line number Diff line number Diff line change
Expand Up @@ -53,7 +53,6 @@ const accessPropagationGuard = (
) => {
let isEphemeralAccessDeniedException = false;
let isLoginException = false;
const beforeStart = Date.now();

child.stderr.on("data", (chunk) => {
const chunkString: string = chunk.toString("utf-8");
Expand All @@ -63,11 +62,7 @@ const accessPropagationGuard = (
chunkString.match(message.pattern)
);

if (
match &&
Date.now() <=
beforeStart + (match.validationWindowMs || DEFAULT_VALIDATION_WINDOW_MS)
) {
if (match) {
isEphemeralAccessDeniedException = true;
}

Expand Down Expand Up @@ -136,7 +131,7 @@ type SpawnSshNodeOptions = {
credential?: AwsCredentials;
command: string;
args: string[];
attemptsRemaining: number;
endTime: number;
abortController?: AbortController;
detached?: boolean;
stdio: [StdioNull, StdioNull, StdioPipe];
Expand All @@ -151,13 +146,15 @@ async function spawnSshNode(
return new Promise((resolve, reject) => {
const provider = SSH_PROVIDERS[options.provider];

const attemptsRemaining = options.attemptsRemaining;
if (options.debug) {
const gerund = options.isAccessPropagationPreTest
? "Pre-testing"
: "Trying";
const remainingSeconds = ((options.endTime - Date.now()) / 1e3).toFixed(
1
);
print2(
`Waiting for access to propagate. ${gerund} SSH session... (remaining attempts: ${attemptsRemaining})`
`Waiting for access to propagate. ${gerund} SSH session... (will wait up to ${remainingSeconds} seconds)`
);
}

Expand All @@ -180,23 +177,17 @@ async function spawnSshNode(
// In the case of ephemeral AccessDenied exceptions due to unpropagated
// permissions, continually retry access until success
if (!isAccessPropagated()) {
if (attemptsRemaining <= 0) {
if (options.endTime < Date.now()) {
reject(
`Access did not propagate through ${provider.friendlyName} before max retry attempts were exceeded. Please contact support@p0.dev for assistance.`
`Access did not propagate through ${provider.friendlyName} in time. Please contact support@p0.dev for assistance.`
);
return;
}

delay(RETRY_DELAY_MS)
.then(() =>
spawnSshNode({
...options,
attemptsRemaining: attemptsRemaining - 1,
})
)
.then(() => spawnSshNode(options))
.then((code) => resolve(code))
.catch(reject);

return;
} else if (isLoginException()) {
reject(
Expand Down Expand Up @@ -352,7 +343,8 @@ const preTestAccessPropagationIfNeeded = async <
proxyCommand: string[],
credential: P extends SshProvider<infer _PR, infer _O, infer _SR, infer C>
? C
: undefined
: undefined,
endTime: number
) => {
const testCmdArgs = sshProvider.preTestAccessPropagationArgs(cmdArgs);

Expand All @@ -369,7 +361,7 @@ const preTestAccessPropagationIfNeeded = async <
stdio: ["inherit", "inherit", "pipe"],
debug: cmdArgs.debug,
provider: request.type,
attemptsRemaining: sshProvider.maxRetries,
endTime: endTime,
isAccessPropagationPreTest: true,
});
}
Expand Down Expand Up @@ -413,12 +405,15 @@ export const sshOrScp = async (args: {
}
}

const endTime = Date.now() + sshProvider.propagationTimeoutMs;

const exitCode = await preTestAccessPropagationIfNeeded(
sshProvider,
request,
cmdArgs,
proxyCommand,
credential
credential,
endTime
);

// Only exit if there was an error when pre-testing
Expand All @@ -434,6 +429,6 @@ export const sshOrScp = async (args: {
stdio: ["inherit", "inherit", "pipe"],
debug: cmdArgs.debug,
provider: request.type,
attemptsRemaining: sshProvider.maxRetries,
endTime: endTime,
});
};
4 changes: 2 additions & 2 deletions src/types/ssh.ts
Original file line number Diff line number Diff line change
Expand Up @@ -57,8 +57,8 @@ export type SshProvider<
/** Regex match for error string indicating that CSP login is required */
loginRequiredPattern?: RegExp;

/** Maximum number of times to call this provider's CLI SSH command */
maxRetries: number;
/** Amount of time, in ms, to wait between granting access and giving up on attempting an SSH connection */
propagationTimeoutMs: number;

/** Arguments for a pre-test command to verify access propagation prior
* to actually logging in the user to the ssh session.
Expand Down
Loading