Skip to content

Commit

Permalink
ENG-2740 Use elapsed time for SSH retry timeout (#124)
Browse files Browse the repository at this point in the history
Use elapsed time to determine the SSH retry timeout instead of the
number of retries.
  • Loading branch information
fabgo authored Oct 4, 2024
1 parent a18fac0 commit d0e583c
Show file tree
Hide file tree
Showing 4 changed files with 24 additions and 36 deletions.
8 changes: 2 additions & 6 deletions src/plugins/aws/ssh.ts
Original file line number Diff line number Diff line change
Expand Up @@ -22,11 +22,7 @@ import {
AwsSshRoleRequest,
} from "./types";

/** Maximum number of attempts to start an SSH session
*
* Each attempt consumes ~ 1 s.
*/
const MAX_SSH_RETRIES = 6;
const PROPAGATION_TIMEOUT_LIMIT_MS = 30 * 1000;

/** The name of the SessionManager port forwarding document. This document is managed by AWS. */
const START_SSH_SESSION_DOCUMENT_NAME = "AWS-StartSSHSession";
Expand Down Expand Up @@ -84,7 +80,7 @@ export const awsSshProvider: SshProvider<

friendlyName: "AWS",

maxRetries: MAX_SSH_RETRIES,
propagationTimeoutMs: PROPAGATION_TIMEOUT_LIMIT_MS,

preTestAccessPropagationArgs: () => undefined,

Expand Down
9 changes: 3 additions & 6 deletions src/plugins/google/ssh.ts
Original file line number Diff line number Diff line change
Expand Up @@ -14,11 +14,8 @@ import { ensureGcpSshInstall } from "./install";
import { importSshKey } from "./ssh-key";
import { GcpSshPermissionSpec, GcpSshRequest } from "./types";

/** Maximum number of attempts to start an SSH session
*
* The length of each attempt varies based on the type of error from a few seconds to < 1s
*/
const MAX_SSH_RETRIES = 24;
// It typically takes < 1 minute for access to propagate on GCP, so set the time limit to 2 minutes.
const PROPAGATION_TIMEOUT_LIMIT_MS = 2 * 60 * 1000;

/**
* There are 7 cases of unprovisioned access in Google Cloud.
Expand Down Expand Up @@ -74,7 +71,7 @@ export const gcpSshProvider: SshProvider<

loginRequiredPattern: /You do not currently have an active account selected/,

maxRetries: MAX_SSH_RETRIES,
propagationTimeoutMs: PROPAGATION_TIMEOUT_LIMIT_MS,

preTestAccessPropagationArgs: (cmdArgs) => {
if (isSudoCommand(cmdArgs)) {
Expand Down
39 changes: 17 additions & 22 deletions src/plugins/ssh/index.ts
Original file line number Diff line number Diff line change
Expand Up @@ -53,7 +53,6 @@ const accessPropagationGuard = (
) => {
let isEphemeralAccessDeniedException = false;
let isLoginException = false;
const beforeStart = Date.now();

child.stderr.on("data", (chunk) => {
const chunkString: string = chunk.toString("utf-8");
Expand All @@ -63,11 +62,7 @@ const accessPropagationGuard = (
chunkString.match(message.pattern)
);

if (
match &&
Date.now() <=
beforeStart + (match.validationWindowMs || DEFAULT_VALIDATION_WINDOW_MS)
) {
if (match) {
isEphemeralAccessDeniedException = true;
}

Expand Down Expand Up @@ -136,7 +131,7 @@ type SpawnSshNodeOptions = {
credential?: AwsCredentials;
command: string;
args: string[];
attemptsRemaining: number;
endTime: number;
abortController?: AbortController;
detached?: boolean;
stdio: [StdioNull, StdioNull, StdioPipe];
Expand All @@ -151,13 +146,15 @@ async function spawnSshNode(
return new Promise((resolve, reject) => {
const provider = SSH_PROVIDERS[options.provider];

const attemptsRemaining = options.attemptsRemaining;
if (options.debug) {
const gerund = options.isAccessPropagationPreTest
? "Pre-testing"
: "Trying";
const remainingSeconds = ((options.endTime - Date.now()) / 1e3).toFixed(
1
);
print2(
`Waiting for access to propagate. ${gerund} SSH session... (remaining attempts: ${attemptsRemaining})`
`Waiting for access to propagate. ${gerund} SSH session... (will wait up to ${remainingSeconds} seconds)`
);
}

Expand All @@ -180,23 +177,17 @@ async function spawnSshNode(
// In the case of ephemeral AccessDenied exceptions due to unpropagated
// permissions, continually retry access until success
if (!isAccessPropagated()) {
if (attemptsRemaining <= 0) {
if (options.endTime < Date.now()) {
reject(
`Access did not propagate through ${provider.friendlyName} before max retry attempts were exceeded. Please contact support@p0.dev for assistance.`
`Access did not propagate through ${provider.friendlyName} in time. Please contact support@p0.dev for assistance.`
);
return;
}

delay(RETRY_DELAY_MS)
.then(() =>
spawnSshNode({
...options,
attemptsRemaining: attemptsRemaining - 1,
})
)
.then(() => spawnSshNode(options))
.then((code) => resolve(code))
.catch(reject);

return;
} else if (isLoginException()) {
reject(
Expand Down Expand Up @@ -352,7 +343,8 @@ const preTestAccessPropagationIfNeeded = async <
proxyCommand: string[],
credential: P extends SshProvider<infer _PR, infer _O, infer _SR, infer C>
? C
: undefined
: undefined,
endTime: number
) => {
const testCmdArgs = sshProvider.preTestAccessPropagationArgs(cmdArgs);

Expand All @@ -369,7 +361,7 @@ const preTestAccessPropagationIfNeeded = async <
stdio: ["inherit", "inherit", "pipe"],
debug: cmdArgs.debug,
provider: request.type,
attemptsRemaining: sshProvider.maxRetries,
endTime: endTime,
isAccessPropagationPreTest: true,
});
}
Expand Down Expand Up @@ -413,12 +405,15 @@ export const sshOrScp = async (args: {
}
}

const endTime = Date.now() + sshProvider.propagationTimeoutMs;

const exitCode = await preTestAccessPropagationIfNeeded(
sshProvider,
request,
cmdArgs,
proxyCommand,
credential
credential,
endTime
);

// Only exit if there was an error when pre-testing
Expand All @@ -434,6 +429,6 @@ export const sshOrScp = async (args: {
stdio: ["inherit", "inherit", "pipe"],
debug: cmdArgs.debug,
provider: request.type,
attemptsRemaining: sshProvider.maxRetries,
endTime: endTime,
});
};
4 changes: 2 additions & 2 deletions src/types/ssh.ts
Original file line number Diff line number Diff line change
Expand Up @@ -57,8 +57,8 @@ export type SshProvider<
/** Regex match for error string indicating that CSP login is required */
loginRequiredPattern?: RegExp;

/** Maximum number of times to call this provider's CLI SSH command */
maxRetries: number;
/** Amount of time, in ms, to wait between granting access and giving up on attempting an SSH connection */
propagationTimeoutMs: number;

/** Arguments for a pre-test command to verify access propagation prior
* to actually logging in the user to the ssh session.
Expand Down

0 comments on commit d0e583c

Please sign in to comment.