From c9ec581094c8aef2e28a095f4f84025dffa80cb4 Mon Sep 17 00:00:00 2001 From: Ruy Adorno Date: Fri, 22 Sep 2023 17:49:09 -0400 Subject: [PATCH 1/5] fix: silent refresh errors on active connection When an instance already have active connections, it should not throw errors when trying to refresh cloud instance certificates in the background. Otherwise these errors will bubble up and stop the end user application. This changeset fixes it by adding a more resilient system to the CloudSQLInstance refresh logic that silents errors occured during refresh and keeps valid certificate data that can be used if a refresh is still ongoing or if any error happens when during a refresh. Fixes: https://github.com/GoogleCloudPlatform/cloud-sql-nodejs-connector/issues/201 --- src/cloud-sql-instance.ts | 121 +++++++++++++++---- src/connector.ts | 3 + test/cloud-sql-instance.ts | 240 +++++++++++++++++++++++++++++++++---- 3 files changed, 318 insertions(+), 46 deletions(-) diff --git a/src/cloud-sql-instance.ts b/src/cloud-sql-instance.ts index 6b07c3bb..432de83b 100644 --- a/src/cloud-sql-instance.ts +++ b/src/cloud-sql-instance.ts @@ -43,6 +43,13 @@ interface CloudSQLInstanceOptions { sqlAdminFetcher: Fetcher; } +interface RefreshableValues { + ephemeralCert: SslCert; + host: string; + privateKey: string; + serverCaCert: SslCert; +} + export class CloudSQLInstance { static async getCloudSQLInstance( options: CloudSQLInstanceOptions @@ -56,8 +63,9 @@ export class CloudSQLInstance { private readonly authType: AuthTypes; private readonly sqlAdminFetcher: Fetcher; private readonly limitRateInterval: number; - private ongoingRefreshPromise?: Promise; - private scheduledRefreshID?: ReturnType; + private activeConnection: boolean = false; + private ongoingRefreshPromise?: Promise; + private scheduledRefreshID?: ReturnType | null = undefined; /* eslint-disable-next-line @typescript-eslint/no-explicit-any */ private throttle?: any; public readonly instanceInfo: InstanceConnectionInfo; @@ -107,51 +115,118 @@ export class CloudSQLInstance { } async refresh(): Promise { + const currentRefreshId = this.scheduledRefreshID; + // Since forceRefresh might be invoked during an ongoing refresh // we keep track of the ongoing promise in order to be able to await // for it in the forceRefresh method. // In case the throttle mechanism is already initialized, we add the // extra wait time `limitRateInterval` in order to limit the rate of // requests to Cloud SQL Admin APIs. - this.ongoingRefreshPromise = this.throttle - ? this.throttle(this._refresh).call(this) - : this._refresh(); - - // awaits for the ongoing promise to resolve, since the refresh is - // completed once the promise is resolved, we just free up the reference - // to the promise at this point, ensuring any new call to `forceRefresh` - // is able to trigger a new refresh - await this.ongoingRefreshPromise; - this.ongoingRefreshPromise = undefined; + this.ongoingRefreshPromise = ( + this.throttle && this.scheduledRefreshID + ? this.throttle(this.refreshValues).call(this) + : this.refreshValues() + ) + // These needs to be part of the chain of promise referenced in + // ongoingRefreshPromise in order to avoid race conditions + .then((nextValues: RefreshableValues) => { + // in case the id at the moment of starting this refresh cycle has + // changed, that means that it has been canceled + if (currentRefreshId !== this.scheduledRefreshID) { + return nextValues; + } + + // In case the refreshValues method succeeded + // then we go ahead and update values + this.updateValues(nextValues); - // Initializing the rate limiter at the end of the function so that the - // first refresh cycle is never rate-limited, ensuring there are 2 calls - // allowed prior to waiting a throttle interval. + this.scheduleRefresh(); + + // This is the end of the successful refresh chain, so now + // we release the reference to the ongoingRefreshPromise + this.ongoingRefreshPromise = undefined; + + return nextValues; + }) + .catch((err: unknown) => { + // In case there's already an active connection we won't throw + // refresh errors to the final user, scheduling a new refresh instead. + if (this.activeConnection) { + if (currentRefreshId === this.scheduledRefreshID) { + this.scheduleRefresh(); + } + } else { + throw err as Error; + } + + // This refresh cycle has failed, releases ref to ongoingRefreshPromise + this.ongoingRefreshPromise = undefined; + }); + + // The rate limiter needs to be initialized _after_ assigning a ref + // to ongoingRefreshPromise in order to avoid race conditions with + // the forceRefresh check that ensures a refresh cycle is not ongoing await this.initializeRateLimiter(); + + await this.ongoingRefreshPromise; } - private async _refresh(): Promise { + // The refreshValues method will perform all the necessary async steps + // in order to get a new set of values for an instance that can then be + // used to create new connections to a Cloud SQL instance. It throws in + // case any of the internal steps fails. + private async refreshValues(): Promise { const rsaKeys: RSAKeys = await generateKeys(); const metadata: InstanceMetadata = await this.sqlAdminFetcher.getInstanceMetadata(this.instanceInfo); - this.ephemeralCert = await this.sqlAdminFetcher.getEphemeralCertificate( + const ephemeralCert = await this.sqlAdminFetcher.getEphemeralCertificate( this.instanceInfo, rsaKeys.publicKey, this.authType ); - this.host = selectIpAddress(metadata.ipAddresses, this.ipType); - this.privateKey = rsaKeys.privateKey; - this.serverCaCert = metadata.serverCaCert; + const host = selectIpAddress(metadata.ipAddresses, this.ipType); + const privateKey = rsaKeys.privateKey; + const serverCaCert = metadata.serverCaCert; + + return { + ephemeralCert, + host, + privateKey, + serverCaCert, + }; + } + + private updateValues(nextValues: RefreshableValues): void { + const {ephemeralCert, host, privateKey, serverCaCert} = nextValues; + + this.ephemeralCert = ephemeralCert; + this.host = host; + this.privateKey = privateKey; + this.serverCaCert = serverCaCert; + } + + private scheduleRefresh(): void { + const refreshInterval = getRefreshInterval( + /* c8 ignore next */ + String(this.ephemeralCert?.expirationTime) + ); - this.scheduledRefreshID = setTimeout(() => { - this.refresh(); - }, getRefreshInterval(this.ephemeralCert.expirationTime)); + this.scheduledRefreshID = setTimeout(() => this.refresh(), refreshInterval); } cancelRefresh(): void { if (this.scheduledRefreshID) { clearTimeout(this.scheduledRefreshID); } + this.scheduledRefreshID = null; + } + + // Mark this instance as having an active connection. This is important to + // ensure any possible errors thrown during a future refresh cycle should + // not be thrown to the final user. + setActiveConnection(): void { + this.activeConnection = true; } } diff --git a/src/connector.ts b/src/connector.ts index e63c4778..2723a82a 100644 --- a/src/connector.ts +++ b/src/connector.ts @@ -216,6 +216,9 @@ export class Connector { tlsSocket.once('error', async () => { await cloudSqlInstance.forceRefresh(); }); + tlsSocket.once('secureConnect', async () => { + cloudSqlInstance.setActiveConnection(); + }); return tlsSocket; } diff --git a/test/cloud-sql-instance.ts b/test/cloud-sql-instance.ts index 4f9a76fe..8eb71a6f 100644 --- a/test/cloud-sql-instance.ts +++ b/test/cloud-sql-instance.ts @@ -41,7 +41,8 @@ t.test('CloudSQLInstance', async t => { }, }; - // mocks generateKeys module so that it can return a deterministic result + // mocks crypto module so that it can return a deterministic result + // and set a standard, fast static value for cert refresh interval const {CloudSQLInstance} = t.mock('../src/cloud-sql-instance', { '../src/crypto': { generateKeys: async () => ({ @@ -92,6 +93,28 @@ t.test('CloudSQLInstance', async t => { instance.cancelRefresh(); + t.test('initial refresh error', async t => { + const failedFetcher = { + ...fetcher, + async getInstanceMetadata() { + throw new Error('ERR'); + }, + }; + const instance = new CloudSQLInstance({ + ipType: IpAddressTypes.PUBLIC, + authType: AuthTypes.PASSWORD, + instanceConnectionName: 'my-project:us-east1:my-instance', + sqlAdminFetcher: failedFetcher, + limitRateInterval: 50, + }); + + t.rejects( + instance.refresh(), + /ERR/, + 'should raise the specific error to the end user' + ); + }); + t.test('refresh', t => { const start = Date.now(); let refreshCount = 0; @@ -100,33 +123,123 @@ t.test('CloudSQLInstance', async t => { authType: AuthTypes.PASSWORD, instanceConnectionName: 'my-project:us-east1:my-instance', sqlAdminFetcher: fetcher, + limitRateInterval: 50, }); - const refreshFn = instance.refresh; instance.refresh = () => { if (refreshCount === 2) { - instance.cancelRefresh(); const end = Date.now(); const duration = end - start; t.ok( duration >= 100, `should respect refresh delay time, ${duration}ms elapsed` ); + instance.cancelRefresh(); return t.end(); } refreshCount++; t.ok(refreshCount, `should refresh ${refreshCount} times`); - refreshFn.call(instance); + CloudSQLInstance.prototype.refresh.call(instance); }; // starts out refresh logic instance.refresh(); }); + t.test('refresh error', async t => { + let metadataCount = 0; + const failedFetcher = { + ...fetcher, + async getInstanceMetadata() { + if (metadataCount === 1) { + throw new Error('ERR'); + } + metadataCount++; + return fetcher.getInstanceMetadata(); + }, + }; + const instance = new CloudSQLInstance({ + ipType: IpAddressTypes.PUBLIC, + authType: AuthTypes.PASSWORD, + instanceConnectionName: 'my-project:us-east1:my-instance', + sqlAdminFetcher: failedFetcher, + limitRateInterval: 50, + }); + await (() => + new Promise((res): void => { + let refreshCount = 0; + instance.refresh = function mockRefresh() { + if (refreshCount === 3) { + t.ok('done refreshing 3 times'); + instance.cancelRefresh(); + return res(null); + } + refreshCount++; + t.ok(refreshCount, `should refresh ${refreshCount} times`); + return CloudSQLInstance.prototype.refresh.call(instance); + }; + // starts out refresh logic + instance.refresh(); + instance.setActiveConnection(); + }))(); + }); + + t.test('refresh error with expired cert', async t => { + const {CloudSQLInstance} = t.mock('../src/cloud-sql-instance', { + '../src/crypto': { + generateKeys: async () => ({ + publicKey: '-----BEGIN PUBLIC KEY-----', + privateKey: CLIENT_KEY, + }), + }, + '../src/time': { + getRefreshInterval() { + return 0; // an expired cert will want to reload right away + }, + }, + }); + let metadataCount = 0; + const failedFetcher = { + ...fetcher, + async getInstanceMetadata() { + if (metadataCount === 1) { + throw new Error('ERR'); + } + metadataCount++; + return fetcher.getInstanceMetadata(); + }, + }; + const instance = new CloudSQLInstance({ + ipType: IpAddressTypes.PUBLIC, + authType: AuthTypes.PASSWORD, + instanceConnectionName: 'my-project:us-east1:my-instance', + sqlAdminFetcher: failedFetcher, + limitRateInterval: 50, + }); + await (() => + new Promise((res): void => { + let refreshCount = 0; + instance.refresh = function mockRefresh() { + if (refreshCount === 3) { + t.ok('done refreshing 3 times'); + instance.cancelRefresh(); + return res(null); + } + refreshCount++; + t.ok(refreshCount, `should refresh ${refreshCount} times`); + return CloudSQLInstance.prototype.refresh.call(instance); + }; + // starts out refresh logic + instance.refresh(); + instance.setActiveConnection(); + }))(); + }); + t.test('forceRefresh', async t => { const instance = new CloudSQLInstance({ ipType: IpAddressTypes.PUBLIC, authType: AuthTypes.PASSWORD, instanceConnectionName: 'my-project:us-east1:my-instance', sqlAdminFetcher: fetcher, + limitRateInterval: 50, }); await instance.refresh(); @@ -134,18 +247,16 @@ t.test('CloudSQLInstance', async t => { let cancelRefreshCalled = false; let refreshCalled = false; - const cancelRefreshFn = instance.cancelRefresh; instance.cancelRefresh = () => { cancelRefreshCalled = true; - cancelRefreshFn.call(instance); - instance.cancelRefresh = cancelRefreshFn; + CloudSQLInstance.prototype.cancelRefresh.call(instance); + instance.cancelRefresh = CloudSQLInstance.prototype.cancelRefresh; }; - const refreshFn = instance.refresh; instance.refresh = async () => { refreshCalled = true; - await refreshFn.call(instance); - instance.refresh = refreshFn; + await CloudSQLInstance.prototype.refresh.call(instance); + instance.refresh = CloudSQLInstance.prototype.refresh; }; await instance.forceRefresh(); t.ok( @@ -162,6 +273,7 @@ t.test('CloudSQLInstance', async t => { authType: AuthTypes.PASSWORD, instanceConnectionName: 'my-project:us-east1:my-instance', sqlAdminFetcher: fetcher, + limitRateInterval: 50, }); let cancelRefreshCalled = false; @@ -169,18 +281,14 @@ t.test('CloudSQLInstance', async t => { const refreshPromise = instance.refresh(); - const cancelRefreshFn = instance.cancelRefresh; instance.cancelRefresh = () => { cancelRefreshCalled = true; - cancelRefreshFn.call(instance); - instance.cancelRefresh = cancelRefreshFn; + return CloudSQLInstance.prototype.cancelRefresh.call(instance); }; - const refreshFn = instance.refresh; - instance.refresh = async () => { + instance.refresh = () => { refreshCalled = true; - await refreshFn.call(instance); - instance.refresh = refreshFn; + return CloudSQLInstance.prototype.refresh.call(instance); }; const forceRefreshPromise = instance.forceRefresh(); @@ -197,7 +305,7 @@ t.test('CloudSQLInstance', async t => { ); t.ok(!refreshCalled, 'should not refresh if already happening'); - instance.cancelRefresh(); + CloudSQLInstance.prototype.cancelRefresh.call(instance); }); t.test('refresh post-forceRefresh', async t => { @@ -216,7 +324,6 @@ t.test('CloudSQLInstance', async t => { await (() => new Promise((res): void => { - const refreshFn = instance.refresh; instance.refresh = () => { if (refreshCount === 3) { const end = Date.now(); @@ -230,7 +337,7 @@ t.test('CloudSQLInstance', async t => { } refreshCount++; t.ok(refreshCount, `should refresh ${refreshCount} times`); - refreshFn.call(instance); + CloudSQLInstance.prototype.refresh.call(instance); }; instance.forceRefresh(); }))(); @@ -253,23 +360,110 @@ t.test('CloudSQLInstance', async t => { await (() => new Promise((res): void => { - const refreshFn = instance.refresh; instance.refresh = () => { if (refreshCount === 3) { - instance.cancelRefresh(); const end = Date.now(); const duration = end - start; t.ok( duration >= 150, `should respect refresh delay time + rate limit, ${duration}ms elapsed` ); + instance.cancelRefresh(); return res(null); } refreshCount++; t.ok(refreshCount, `should refresh ${refreshCount} times`); - refreshFn.call(instance); + CloudSQLInstance.prototype.refresh.call(instance); }; }))(); t.strictSame(refreshCount, 3, 'should have refreshed'); }); + + // The cancelRefresh methods should never hang, given the async and timer + // dependent nature of the refresh cycles, it's possible to get into really + // hard to debug race conditions. The set of cancelRefresh tests below just + // ensure that the tests runs and terminates as expected. + t.test('cancelRefresh first cycle', async t => { + const slowFetcher = { + ...fetcher, + async getInstanceMetadata() { + await (() => new Promise(res => setTimeout(res, 50)))(); + return fetcher.getInstanceMetadata(); + }, + }; + const instance = new CloudSQLInstance({ + ipType: IpAddressTypes.PUBLIC, + authType: AuthTypes.PASSWORD, + instanceConnectionName: 'my-project:us-east1:my-instance', + sqlAdminFetcher: slowFetcher, + limitRateInterval: 50, + }); + + // starts a new refresh cycle but do not await on it + instance.refresh(); + + // cancel refresh before the ongoing promise fulfills + instance.cancelRefresh(); + + t.ok('should not leave hanging setTimeout'); + }); + + t.test('cancelRefresh ongoing cycle', async t => { + const slowFetcher = { + ...fetcher, + async getInstanceMetadata() { + await (() => new Promise(res => setTimeout(res, 50)))(); + return fetcher.getInstanceMetadata(); + }, + }; + const instance = new CloudSQLInstance({ + ipType: IpAddressTypes.PUBLIC, + authType: AuthTypes.PASSWORD, + instanceConnectionName: 'my-project:us-east1:my-instance', + sqlAdminFetcher: slowFetcher, + limitRateInterval: 50, + }); + + // simulates an ongoing instance, already has data + await instance.refresh(); + + // starts a new refresh cycle but do not await on it + instance.refresh(); + + instance.cancelRefresh(); + + t.ok('should not leave hanging setTimeout'); + }); + + t.test('cancelRefresh active and ongoing failed cycle', async t => { + let metadataCount = 0; + const failAndSlowFetcher = { + ...fetcher, + async getInstanceMetadata() { + await (() => new Promise(res => setTimeout(res, 50)))(); + if (metadataCount === 1) { + throw new Error('ERR'); + } + metadataCount++; + return fetcher.getInstanceMetadata(); + }, + }; + const instance = new CloudSQLInstance({ + ipType: IpAddressTypes.PUBLIC, + authType: AuthTypes.PASSWORD, + instanceConnectionName: 'my-project:us-east1:my-instance', + sqlAdminFetcher: failAndSlowFetcher, + limitRateInterval: 50, + }); + + await instance.refresh(); + instance.setActiveConnection(); + + // starts a new refresh cycle but do not await on it + instance.refresh(); + + instance.cancelRefresh(); + + t.ok('should not leave hanging setTimeout'); + }); }); From 7204ef0afc8660aacaaecc3c6302d7ce2178fdc1 Mon Sep 17 00:00:00 2001 From: Ruy Adorno Date: Mon, 25 Sep 2023 17:20:17 -0400 Subject: [PATCH 2/5] fixup! fix: silent refresh errors on active connection --- src/cloud-sql-instance.ts | 22 +++-- test/cloud-sql-instance.ts | 176 +++++++++++++++++++------------------ 2 files changed, 101 insertions(+), 97 deletions(-) diff --git a/src/cloud-sql-instance.ts b/src/cloud-sql-instance.ts index 432de83b..6f991d20 100644 --- a/src/cloud-sql-instance.ts +++ b/src/cloud-sql-instance.ts @@ -43,7 +43,7 @@ interface CloudSQLInstanceOptions { sqlAdminFetcher: Fetcher; } -interface RefreshableValues { +interface RefreshResult { ephemeralCert: SslCert; host: string; privateKey: string; @@ -64,7 +64,7 @@ export class CloudSQLInstance { private readonly sqlAdminFetcher: Fetcher; private readonly limitRateInterval: number; private activeConnection: boolean = false; - private ongoingRefreshPromise?: Promise; + private ongoingRefreshPromise?: Promise; private scheduledRefreshID?: ReturnType | null = undefined; /* eslint-disable-next-line @typescript-eslint/no-explicit-any */ private throttle?: any; @@ -125,19 +125,19 @@ export class CloudSQLInstance { // requests to Cloud SQL Admin APIs. this.ongoingRefreshPromise = ( this.throttle && this.scheduledRefreshID - ? this.throttle(this.refreshValues).call(this) - : this.refreshValues() + ? this.throttle(this.performRefresh).call(this) + : this.performRefresh() ) // These needs to be part of the chain of promise referenced in // ongoingRefreshPromise in order to avoid race conditions - .then((nextValues: RefreshableValues) => { + .then((nextValues: RefreshResult) => { // in case the id at the moment of starting this refresh cycle has // changed, that means that it has been canceled if (currentRefreshId !== this.scheduledRefreshID) { - return nextValues; + return; } - // In case the refreshValues method succeeded + // In case the performRefresh method succeeded // then we go ahead and update values this.updateValues(nextValues); @@ -146,8 +146,6 @@ export class CloudSQLInstance { // This is the end of the successful refresh chain, so now // we release the reference to the ongoingRefreshPromise this.ongoingRefreshPromise = undefined; - - return nextValues; }) .catch((err: unknown) => { // In case there's already an active connection we won't throw @@ -172,11 +170,11 @@ export class CloudSQLInstance { await this.ongoingRefreshPromise; } - // The refreshValues method will perform all the necessary async steps + // The performRefresh method will perform all the necessary async steps // in order to get a new set of values for an instance that can then be // used to create new connections to a Cloud SQL instance. It throws in // case any of the internal steps fails. - private async refreshValues(): Promise { + private async performRefresh(): Promise { const rsaKeys: RSAKeys = await generateKeys(); const metadata: InstanceMetadata = await this.sqlAdminFetcher.getInstanceMetadata(this.instanceInfo); @@ -198,7 +196,7 @@ export class CloudSQLInstance { }; } - private updateValues(nextValues: RefreshableValues): void { + private updateValues(nextValues: RefreshResult): void { const {ephemeralCert, host, privateKey, serverCaCert} = nextValues; this.ephemeralCert = ephemeralCert; diff --git a/test/cloud-sql-instance.ts b/test/cloud-sql-instance.ts index 8eb71a6f..70a923b8 100644 --- a/test/cloud-sql-instance.ts +++ b/test/cloud-sql-instance.ts @@ -93,7 +93,7 @@ t.test('CloudSQLInstance', async t => { instance.cancelRefresh(); - t.test('initial refresh error', async t => { + t.test('initial refresh error should throw errors', async t => { const failedFetcher = { ...fetcher, async getInstanceMetadata() { @@ -144,94 +144,100 @@ t.test('CloudSQLInstance', async t => { instance.refresh(); }); - t.test('refresh error', async t => { - let metadataCount = 0; - const failedFetcher = { - ...fetcher, - async getInstanceMetadata() { - if (metadataCount === 1) { - throw new Error('ERR'); - } - metadataCount++; - return fetcher.getInstanceMetadata(); - }, - }; - const instance = new CloudSQLInstance({ - ipType: IpAddressTypes.PUBLIC, - authType: AuthTypes.PASSWORD, - instanceConnectionName: 'my-project:us-east1:my-instance', - sqlAdminFetcher: failedFetcher, - limitRateInterval: 50, - }); - await (() => - new Promise((res): void => { - let refreshCount = 0; - instance.refresh = function mockRefresh() { - if (refreshCount === 3) { - t.ok('done refreshing 3 times'); - instance.cancelRefresh(); - return res(null); + t.test( + 'refresh error should not throw any errors on active connection', + async t => { + let metadataCount = 0; + const failedFetcher = { + ...fetcher, + async getInstanceMetadata() { + if (metadataCount === 1) { + throw new Error('ERR'); } - refreshCount++; - t.ok(refreshCount, `should refresh ${refreshCount} times`); - return CloudSQLInstance.prototype.refresh.call(instance); - }; - // starts out refresh logic - instance.refresh(); - instance.setActiveConnection(); - }))(); - }); + metadataCount++; + return fetcher.getInstanceMetadata(); + }, + }; + const instance = new CloudSQLInstance({ + ipType: IpAddressTypes.PUBLIC, + authType: AuthTypes.PASSWORD, + instanceConnectionName: 'my-project:us-east1:my-instance', + sqlAdminFetcher: failedFetcher, + limitRateInterval: 50, + }); + await (() => + new Promise((res): void => { + let refreshCount = 0; + instance.refresh = function mockRefresh() { + if (refreshCount === 3) { + t.ok('done refreshing 3 times'); + instance.cancelRefresh(); + return res(null); + } + refreshCount++; + t.ok(refreshCount, `should refresh ${refreshCount} times`); + return CloudSQLInstance.prototype.refresh.call(instance); + }; + // starts out refresh logic + instance.refresh(); + instance.setActiveConnection(); + }))(); + } + ); - t.test('refresh error with expired cert', async t => { - const {CloudSQLInstance} = t.mock('../src/cloud-sql-instance', { - '../src/crypto': { - generateKeys: async () => ({ - publicKey: '-----BEGIN PUBLIC KEY-----', - privateKey: CLIENT_KEY, - }), - }, - '../src/time': { - getRefreshInterval() { - return 0; // an expired cert will want to reload right away + t.test( + 'refresh error with expired cert should not throw any errors on active connection', + async t => { + const {CloudSQLInstance} = t.mock('../src/cloud-sql-instance', { + '../src/crypto': { + generateKeys: async () => ({ + publicKey: '-----BEGIN PUBLIC KEY-----', + privateKey: CLIENT_KEY, + }), }, - }, - }); - let metadataCount = 0; - const failedFetcher = { - ...fetcher, - async getInstanceMetadata() { - if (metadataCount === 1) { - throw new Error('ERR'); - } - metadataCount++; - return fetcher.getInstanceMetadata(); - }, - }; - const instance = new CloudSQLInstance({ - ipType: IpAddressTypes.PUBLIC, - authType: AuthTypes.PASSWORD, - instanceConnectionName: 'my-project:us-east1:my-instance', - sqlAdminFetcher: failedFetcher, - limitRateInterval: 50, - }); - await (() => - new Promise((res): void => { - let refreshCount = 0; - instance.refresh = function mockRefresh() { - if (refreshCount === 3) { - t.ok('done refreshing 3 times'); - instance.cancelRefresh(); - return res(null); + '../src/time': { + getRefreshInterval() { + return 0; // an expired cert will want to reload right away + }, + }, + }); + let metadataCount = 0; + const failedFetcher = { + ...fetcher, + async getInstanceMetadata() { + if (metadataCount === 1) { + throw new Error('ERR'); } - refreshCount++; - t.ok(refreshCount, `should refresh ${refreshCount} times`); - return CloudSQLInstance.prototype.refresh.call(instance); - }; - // starts out refresh logic - instance.refresh(); - instance.setActiveConnection(); - }))(); - }); + metadataCount++; + return fetcher.getInstanceMetadata(); + }, + }; + const instance = new CloudSQLInstance({ + ipType: IpAddressTypes.PUBLIC, + authType: AuthTypes.PASSWORD, + instanceConnectionName: 'my-project:us-east1:my-instance', + sqlAdminFetcher: failedFetcher, + limitRateInterval: 50, + }); + await (() => + new Promise((res): void => { + let refreshCount = 0; + instance.refresh = function mockRefresh() { + if (refreshCount === 3) { + t.ok('done refreshing 3 times'); + instance.cancelRefresh(); + return res(null); + } + refreshCount++; + t.ok(refreshCount, `should refresh ${refreshCount} times`); + return CloudSQLInstance.prototype.refresh.call(instance); + }; + // starts out refresh logic + instance.refresh(); + instance.setActiveConnection(); + }))(); + } + ); t.test('forceRefresh', async t => { const instance = new CloudSQLInstance({ From 8bdf41b7ae4109b8f01915fc88606005a160716f Mon Sep 17 00:00:00 2001 From: Ruy Adorno Date: Tue, 26 Sep 2023 16:27:08 -0400 Subject: [PATCH 3/5] fixup! fix: silent refresh errors on active connection --- src/cloud-sql-instance.ts | 63 ++++++++--- src/connector.ts | 2 +- src/time.ts | 5 + test/cloud-sql-instance.ts | 214 +++++++++++++++++++++++++------------ test/time.ts | 21 +++- 5 files changed, 220 insertions(+), 85 deletions(-) diff --git a/src/cloud-sql-instance.ts b/src/cloud-sql-instance.ts index 6f991d20..1eab505b 100644 --- a/src/cloud-sql-instance.ts +++ b/src/cloud-sql-instance.ts @@ -19,7 +19,7 @@ import {InstanceMetadata} from './sqladmin-fetcher'; import {generateKeys} from './crypto'; import {RSAKeys} from './rsa-keys'; import {SslCert} from './ssl-cert'; -import {getRefreshInterval} from './time'; +import {getRefreshInterval, isExpirationTimeValid} from './time'; import {AuthTypes} from './auth-types'; interface Fetcher { @@ -63,8 +63,9 @@ export class CloudSQLInstance { private readonly authType: AuthTypes; private readonly sqlAdminFetcher: Fetcher; private readonly limitRateInterval: number; - private activeConnection: boolean = false; - private ongoingRefreshPromise?: Promise; + private stablishedConnection: boolean = false; + // The ongoing refresh promise is referenced by the `next` property + private next?: Promise; private scheduledRefreshID?: ReturnType | null = undefined; /* eslint-disable-next-line @typescript-eslint/no-explicit-any */ private throttle?: any; @@ -106,8 +107,8 @@ export class CloudSQLInstance { async forceRefresh(): Promise { // if a refresh is already ongoing, just await for its promise to fulfill // so that a new instance info is available before reconnecting - if (this.ongoingRefreshPromise) { - await this.ongoingRefreshPromise; + if (this.next) { + await this.next; return; } this.cancelRefresh(); @@ -123,13 +124,13 @@ export class CloudSQLInstance { // In case the throttle mechanism is already initialized, we add the // extra wait time `limitRateInterval` in order to limit the rate of // requests to Cloud SQL Admin APIs. - this.ongoingRefreshPromise = ( + this.next = ( this.throttle && this.scheduledRefreshID ? this.throttle(this.performRefresh).call(this) : this.performRefresh() ) // These needs to be part of the chain of promise referenced in - // ongoingRefreshPromise in order to avoid race conditions + // next in order to avoid race conditions .then((nextValues: RefreshResult) => { // in case the id at the moment of starting this refresh cycle has // changed, that means that it has been canceled @@ -144,13 +145,13 @@ export class CloudSQLInstance { this.scheduleRefresh(); // This is the end of the successful refresh chain, so now - // we release the reference to the ongoingRefreshPromise - this.ongoingRefreshPromise = undefined; + // we release the reference to the next + this.next = undefined; }) .catch((err: unknown) => { // In case there's already an active connection we won't throw // refresh errors to the final user, scheduling a new refresh instead. - if (this.activeConnection) { + if (this.stablishedConnection) { if (currentRefreshId === this.scheduledRefreshID) { this.scheduleRefresh(); } @@ -158,16 +159,16 @@ export class CloudSQLInstance { throw err as Error; } - // This refresh cycle has failed, releases ref to ongoingRefreshPromise - this.ongoingRefreshPromise = undefined; + // This refresh cycle has failed, releases ref to next + this.next = undefined; }); // The rate limiter needs to be initialized _after_ assigning a ref - // to ongoingRefreshPromise in order to avoid race conditions with + // to next in order to avoid race conditions with // the forceRefresh check that ensures a refresh cycle is not ongoing await this.initializeRateLimiter(); - await this.ongoingRefreshPromise; + await this.next; } // The performRefresh method will perform all the necessary async steps @@ -188,12 +189,40 @@ export class CloudSQLInstance { const privateKey = rsaKeys.privateKey; const serverCaCert = metadata.serverCaCert; - return { + const currentValues = { + ephemeralCert: this.ephemeralCert, + host: this.host, + privateKey: this.privateKey, + serverCaCert: this.serverCaCert, + }; + + const nextValues = { ephemeralCert, host, privateKey, serverCaCert, }; + + // In the rather odd case that the current ephemeral certificate is still + // valid while we get an invalid result from the API calls, then preserve + // the current metadata. + if (this.isValid(currentValues) && !this.isValid(nextValues)) { + return currentValues as RefreshResult; + } + + return nextValues; + } + + private isValid({ + ephemeralCert, + host, + privateKey, + serverCaCert, + }: Partial): boolean { + if (!ephemeralCert || !host || !privateKey || !serverCaCert) { + return false; + } + return isExpirationTimeValid(ephemeralCert.expirationTime); } private updateValues(nextValues: RefreshResult): void { @@ -224,7 +253,7 @@ export class CloudSQLInstance { // Mark this instance as having an active connection. This is important to // ensure any possible errors thrown during a future refresh cycle should // not be thrown to the final user. - setActiveConnection(): void { - this.activeConnection = true; + setStablishedConnection(): void { + this.stablishedConnection = true; } } diff --git a/src/connector.ts b/src/connector.ts index 2723a82a..6054ef64 100644 --- a/src/connector.ts +++ b/src/connector.ts @@ -217,7 +217,7 @@ export class Connector { await cloudSqlInstance.forceRefresh(); }); tlsSocket.once('secureConnect', async () => { - cloudSqlInstance.setActiveConnection(); + cloudSqlInstance.setStablishedConnection(); }); return tlsSocket; } diff --git a/src/time.ts b/src/time.ts index e30188f8..d3a7f16b 100644 --- a/src/time.ts +++ b/src/time.ts @@ -43,3 +43,8 @@ export function getNearestExpiration( } return new Date(certExp).toISOString(); } + +export function isExpirationTimeValid(isoTime: string): boolean { + const expirationTime = Date.parse(isoTime); + return Date.now() < expirationTime; +} diff --git a/test/cloud-sql-instance.ts b/test/cloud-sql-instance.ts index 70a923b8..8204ab0f 100644 --- a/test/cloud-sql-instance.ts +++ b/test/cloud-sql-instance.ts @@ -22,8 +22,8 @@ t.test('CloudSQLInstance', async t => { setupCredentials(t); // setup google-auth credentials mocks const fetcher = { - getInstanceMetadata() { - return Promise.resolve({ + async getInstanceMetadata() { + return { ipAddresses: { public: '127.0.0.1', }, @@ -31,13 +31,13 @@ t.test('CloudSQLInstance', async t => { cert: CA_CERT, expirationTime: '2033-01-06T10:00:00.232Z', }, - }); + }; }, - getEphemeralCertificate() { - return Promise.resolve({ + async getEphemeralCertificate() { + return { cert: CLIENT_CERT, expirationTime: '2033-01-06T10:00:00.232Z', - }); + }; }, }; @@ -54,44 +54,49 @@ t.test('CloudSQLInstance', async t => { getRefreshInterval() { return 50; // defaults to 50ms in unit tests }, + isExpirationTimeValid() { + return true; + }, }, }); - const instance = await CloudSQLInstance.getCloudSQLInstance({ - ipType: IpAddressTypes.PUBLIC, - authType: AuthTypes.PASSWORD, - instanceConnectionName: 'my-project:us-east1:my-instance', - sqlAdminFetcher: fetcher, - }); + t.test('assert basic instance usage and API', async t => { + const instance = await CloudSQLInstance.getCloudSQLInstance({ + ipType: IpAddressTypes.PUBLIC, + authType: AuthTypes.PASSWORD, + instanceConnectionName: 'my-project:us-east1:my-instance', + sqlAdminFetcher: fetcher, + }); - t.same( - instance.ephemeralCert.cert, - CLIENT_CERT, - 'should have expected privateKey' - ); + t.same( + instance.ephemeralCert.cert, + CLIENT_CERT, + 'should have expected privateKey' + ); - t.same( - instance.instanceInfo, - { - projectId: 'my-project', - regionId: 'us-east1', - instanceId: 'my-instance', - }, - 'should have expected connection info' - ); + t.same( + instance.instanceInfo, + { + projectId: 'my-project', + regionId: 'us-east1', + instanceId: 'my-instance', + }, + 'should have expected connection info' + ); - t.same(instance.privateKey, CLIENT_KEY, 'should have expected privateKey'); + t.same(instance.privateKey, CLIENT_KEY, 'should have expected privateKey'); - t.same(instance.host, '127.0.0.1', 'should have expected host'); - t.same(instance.port, 3307, 'should have expected port'); + t.same(instance.host, '127.0.0.1', 'should have expected host'); + t.same(instance.port, 3307, 'should have expected port'); - t.same( - instance.serverCaCert.cert, - CA_CERT, - 'should have expected serverCaCert' - ); + t.same( + instance.serverCaCert.cert, + CA_CERT, + 'should have expected serverCaCert' + ); - instance.cancelRefresh(); + instance.cancelRefresh(); + }); t.test('initial refresh error should throw errors', async t => { const failedFetcher = { @@ -145,7 +150,7 @@ t.test('CloudSQLInstance', async t => { }); t.test( - 'refresh error should not throw any errors on active connection', + 'refresh error should not throw any errors on stablished connection', async t => { let metadataCount = 0; const failedFetcher = { @@ -180,13 +185,13 @@ t.test('CloudSQLInstance', async t => { }; // starts out refresh logic instance.refresh(); - instance.setActiveConnection(); + instance.setStablishedConnection(); }))(); } ); t.test( - 'refresh error with expired cert should not throw any errors on active connection', + 'refresh error with expired cert should not throw any errors on stablished connection', async t => { const {CloudSQLInstance} = t.mock('../src/cloud-sql-instance', { '../src/crypto': { @@ -234,7 +239,7 @@ t.test('CloudSQLInstance', async t => { }; // starts out refresh logic instance.refresh(); - instance.setActiveConnection(); + instance.setStablishedConnection(); }))(); } ); @@ -441,35 +446,112 @@ t.test('CloudSQLInstance', async t => { t.ok('should not leave hanging setTimeout'); }); - t.test('cancelRefresh active and ongoing failed cycle', async t => { - let metadataCount = 0; - const failAndSlowFetcher = { - ...fetcher, - async getInstanceMetadata() { - await (() => new Promise(res => setTimeout(res, 50)))(); - if (metadataCount === 1) { - throw new Error('ERR'); - } - metadataCount++; - return fetcher.getInstanceMetadata(); - }, - }; - const instance = new CloudSQLInstance({ - ipType: IpAddressTypes.PUBLIC, - authType: AuthTypes.PASSWORD, - instanceConnectionName: 'my-project:us-east1:my-instance', - sqlAdminFetcher: failAndSlowFetcher, - limitRateInterval: 50, - }); + t.test( + 'cancelRefresh on stablished connection and ongoing failed cycle', + async t => { + let metadataCount = 0; + const failAndSlowFetcher = { + ...fetcher, + async getInstanceMetadata() { + await (() => new Promise(res => setTimeout(res, 50)))(); + if (metadataCount === 1) { + throw new Error('ERR'); + } + metadataCount++; + return fetcher.getInstanceMetadata(); + }, + }; + const instance = new CloudSQLInstance({ + ipType: IpAddressTypes.PUBLIC, + authType: AuthTypes.PASSWORD, + instanceConnectionName: 'my-project:us-east1:my-instance', + sqlAdminFetcher: failAndSlowFetcher, + limitRateInterval: 50, + }); - await instance.refresh(); - instance.setActiveConnection(); + await instance.refresh(); + instance.setStablishedConnection(); - // starts a new refresh cycle but do not await on it - instance.refresh(); + // starts a new refresh cycle but do not await on it + instance.refresh(); - instance.cancelRefresh(); + instance.cancelRefresh(); - t.ok('should not leave hanging setTimeout'); - }); + t.ok('should not leave hanging setTimeout'); + } + ); + + t.test( + 'get invalid certificate data while having a current valid', + async t => { + let checkedExpirationTimeCount = 0; + const {CloudSQLInstance} = t.mock('../src/cloud-sql-instance', { + '../src/crypto': { + generateKeys: async () => ({ + publicKey: '-----BEGIN PUBLIC KEY-----', + privateKey: CLIENT_KEY, + }), + }, + '../src/time': { + getRefreshInterval() { + return 50; + }, + // succeds first time and fails for next calls + isExpirationTimeValid() { + checkedExpirationTimeCount++; + return checkedExpirationTimeCount < 2; + }, + }, + }); + + // A fetcher mock that will return a new ip on every refresh + let metadataCount = 0; + const updateFetcher = { + ...fetcher, + async getInstanceMetadata() { + const instanceMetadata = await fetcher.getInstanceMetadata(); + const ips = ['127.0.0.1', '127.0.0.2']; + const ipAddresses = { + public: ips[metadataCount], + }; + metadataCount++; + return { + ...instanceMetadata, + ipAddresses, + }; + }, + }; + + const instance = new CloudSQLInstance({ + ipType: IpAddressTypes.PUBLIC, + authType: AuthTypes.PASSWORD, + instanceConnectionName: 'my-project:us-east1:my-instance', + sqlAdminFetcher: updateFetcher, + limitRateInterval: 0, + }); + await (() => + new Promise((res): void => { + let refreshCount = 0; + instance.refresh = function mockRefresh() { + if (refreshCount === 2) { + t.ok('done refreshing 2 times'); + // instance.host value will be 127.0.0.2 if + // isExpirationTimeValid does not work as expected + t.strictSame( + instance.host, + '127.0.0.1', + 'should not have updated values' + ); + instance.cancelRefresh(); + return res(null); + } + refreshCount++; + return CloudSQLInstance.prototype.refresh.call(instance); + }; + // starts out refresh logic + instance.refresh(); + instance.setStablishedConnection(); + }))(); + } + ); }); diff --git a/test/time.ts b/test/time.ts index ac7404b5..90a9a669 100644 --- a/test/time.ts +++ b/test/time.ts @@ -13,7 +13,11 @@ // limitations under the License. import t from 'tap'; -import {getRefreshInterval, getNearestExpiration} from '../src/time'; +import { + getRefreshInterval, + getNearestExpiration, + isExpirationTimeValid, +} from '../src/time'; const datenow = Date.now; Date.now = () => 1672567200000; // 2023-01-01T10:00:00.000Z @@ -122,4 +126,19 @@ t.same( 'should return cert exp' ); +t.ok( + !isExpirationTimeValid('2023-01-01T09:00:00.000Z'), + 'should return false on expired time' +); + +t.ok( + !isExpirationTimeValid('2023-01-01T10:00:00.000Z'), + 'should return false on same (expired) time' +); + +t.ok( + isExpirationTimeValid('2023-01-01T11:00:00.000Z'), + 'should return true on valid time' +); + Date.now = datenow; From 0447d6e91767eb0b1867fcb7b8dbfbcf054f9c8b Mon Sep 17 00:00:00 2001 From: Ruy Adorno Date: Tue, 26 Sep 2023 17:17:07 -0400 Subject: [PATCH 4/5] fixup! fix: silent refresh errors on active connection --- src/cloud-sql-instance.ts | 28 ++++++++++++++-------------- src/connector.ts | 2 +- test/cloud-sql-instance.ts | 8 ++++---- 3 files changed, 19 insertions(+), 19 deletions(-) diff --git a/src/cloud-sql-instance.ts b/src/cloud-sql-instance.ts index 1eab505b..e87acfd7 100644 --- a/src/cloud-sql-instance.ts +++ b/src/cloud-sql-instance.ts @@ -63,7 +63,7 @@ export class CloudSQLInstance { private readonly authType: AuthTypes; private readonly sqlAdminFetcher: Fetcher; private readonly limitRateInterval: number; - private stablishedConnection: boolean = false; + private establishedConnection: boolean = false; // The ongoing refresh promise is referenced by the `next` property private next?: Promise; private scheduledRefreshID?: ReturnType | null = undefined; @@ -142,7 +142,11 @@ export class CloudSQLInstance { // then we go ahead and update values this.updateValues(nextValues); - this.scheduleRefresh(); + const refreshInterval = getRefreshInterval( + /* c8 ignore next */ + String(this.ephemeralCert?.expirationTime) + ); + this.scheduleRefresh(refreshInterval); // This is the end of the successful refresh chain, so now // we release the reference to the next @@ -150,10 +154,11 @@ export class CloudSQLInstance { }) .catch((err: unknown) => { // In case there's already an active connection we won't throw - // refresh errors to the final user, scheduling a new refresh instead. - if (this.stablishedConnection) { + // refresh errors to the final user, scheduling a new + // immediate refresh instead. + if (this.establishedConnection) { if (currentRefreshId === this.scheduledRefreshID) { - this.scheduleRefresh(); + this.scheduleRefresh(0); } } else { throw err as Error; @@ -234,13 +239,8 @@ export class CloudSQLInstance { this.serverCaCert = serverCaCert; } - private scheduleRefresh(): void { - const refreshInterval = getRefreshInterval( - /* c8 ignore next */ - String(this.ephemeralCert?.expirationTime) - ); - - this.scheduledRefreshID = setTimeout(() => this.refresh(), refreshInterval); + private scheduleRefresh(delay: number): void { + this.scheduledRefreshID = setTimeout(() => this.refresh(), delay); } cancelRefresh(): void { @@ -253,7 +253,7 @@ export class CloudSQLInstance { // Mark this instance as having an active connection. This is important to // ensure any possible errors thrown during a future refresh cycle should // not be thrown to the final user. - setStablishedConnection(): void { - this.stablishedConnection = true; + setEstablishedConnection(): void { + this.establishedConnection = true; } } diff --git a/src/connector.ts b/src/connector.ts index 6054ef64..ec680a7f 100644 --- a/src/connector.ts +++ b/src/connector.ts @@ -217,7 +217,7 @@ export class Connector { await cloudSqlInstance.forceRefresh(); }); tlsSocket.once('secureConnect', async () => { - cloudSqlInstance.setStablishedConnection(); + cloudSqlInstance.setEstablishedConnection(); }); return tlsSocket; } diff --git a/test/cloud-sql-instance.ts b/test/cloud-sql-instance.ts index 8204ab0f..c17d0a60 100644 --- a/test/cloud-sql-instance.ts +++ b/test/cloud-sql-instance.ts @@ -185,7 +185,7 @@ t.test('CloudSQLInstance', async t => { }; // starts out refresh logic instance.refresh(); - instance.setStablishedConnection(); + instance.setEstablishedConnection(); }))(); } ); @@ -239,7 +239,7 @@ t.test('CloudSQLInstance', async t => { }; // starts out refresh logic instance.refresh(); - instance.setStablishedConnection(); + instance.setEstablishedConnection(); }))(); } ); @@ -470,7 +470,7 @@ t.test('CloudSQLInstance', async t => { }); await instance.refresh(); - instance.setStablishedConnection(); + instance.setEstablishedConnection(); // starts a new refresh cycle but do not await on it instance.refresh(); @@ -550,7 +550,7 @@ t.test('CloudSQLInstance', async t => { }; // starts out refresh logic instance.refresh(); - instance.setStablishedConnection(); + instance.setEstablishedConnection(); }))(); } ); From da6805197773a0092cd5551028b1c495186b0db4 Mon Sep 17 00:00:00 2001 From: Ruy Adorno Date: Tue, 26 Sep 2023 17:38:30 -0400 Subject: [PATCH 5/5] fix typos in test file Co-authored-by: Jack Wotherspoon --- test/cloud-sql-instance.ts | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/test/cloud-sql-instance.ts b/test/cloud-sql-instance.ts index c17d0a60..e83a25e7 100644 --- a/test/cloud-sql-instance.ts +++ b/test/cloud-sql-instance.ts @@ -150,7 +150,7 @@ t.test('CloudSQLInstance', async t => { }); t.test( - 'refresh error should not throw any errors on stablished connection', + 'refresh error should not throw any errors on established connection', async t => { let metadataCount = 0; const failedFetcher = { @@ -191,7 +191,7 @@ t.test('CloudSQLInstance', async t => { ); t.test( - 'refresh error with expired cert should not throw any errors on stablished connection', + 'refresh error with expired cert should not throw any errors on established connection', async t => { const {CloudSQLInstance} = t.mock('../src/cloud-sql-instance', { '../src/crypto': { @@ -447,7 +447,7 @@ t.test('CloudSQLInstance', async t => { }); t.test( - 'cancelRefresh on stablished connection and ongoing failed cycle', + 'cancelRefresh on established connection and ongoing failed cycle', async t => { let metadataCount = 0; const failAndSlowFetcher = {