Skip to content

Commit 3e43e70

Browse files
authored
feat(elasticache): monitor Redis engine CPU utilization (#442)
Monitor engine utilization by default for Redis ElastiCache clusters. Optionally add alarms based on this metric. Redis is single-threaded and the Redis thread's utilization is available via the EngineCPUUtilization metric. This metric provides visibility into the load of the redis process itself. --- _By submitting this pull request, I confirm that my contribution is made under the terms of the Apache-2.0 license_
1 parent d273f84 commit 3e43e70

File tree

7 files changed

+268
-19
lines changed

7 files changed

+268
-19
lines changed

API.md

Lines changed: 110 additions & 5 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

lib/common/monitoring/alarms/UsageAlarmFactory.ts

Lines changed: 9 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -60,10 +60,16 @@ export class UsageAlarmFactory {
6060
percentMetric: MetricWithAlarmSupport,
6161
props: UsageThreshold,
6262
disambiguator?: string,
63-
usageType?: UsageType
63+
usageType?: UsageType,
64+
additionalAlarmNameSuffix?: string
6465
) {
65-
const alarmNameSuffix: string =
66-
usageType === undefined ? "CPU-Usage" : `${usageType}-CPU-Usage`;
66+
const alarmNameSuffix: string = [
67+
usageType,
68+
"CPU-Usage",
69+
additionalAlarmNameSuffix,
70+
]
71+
.filter((i) => i !== undefined)
72+
.join("-");
6773
return this.alarmFactory.addAlarm(percentMetric, {
6874
treatMissingData:
6975
props.treatMissingDataOverride ?? TreatMissingData.MISSING,

lib/monitoring/aws-elasticache/ElastiCacheClusterMetricFactory.ts

Lines changed: 19 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -112,6 +112,25 @@ export class ElastiCacheClusterMetricFactory {
112112
);
113113
}
114114

115+
/**
116+
* Because Redis is single-threaded, you can use this metric to analyze the load of the Redis process itself.
117+
* Note that you may want to monitor both Engine CPU Utilization as well as CPU Utilization as background
118+
* processes can take up a significant portion of the CPU workload. This is especially important for
119+
* hosts with 2 vCPUs or less.
120+
*
121+
* @see https://docs.aws.amazon.com/AmazonElastiCache/latest/red-ug/CacheMetrics.Redis.html
122+
*/
123+
metricMaxRedisEngineCpuUtilizationInPercent() {
124+
return this.metricFactory.createMetric(
125+
"EngineCPUUtilization",
126+
MetricStatistic.MAX,
127+
"Cluster Engine CPU Utilization",
128+
this.dimensionsMap,
129+
undefined,
130+
Namespace
131+
);
132+
}
133+
115134
metricAverageConnections() {
116135
return this.metricFactory.createMetric(
117136
"CurrConnections",

lib/monitoring/aws-elasticache/ElastiCacheClusterMonitoring.ts

Lines changed: 79 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,5 @@
11
import {
2+
Column,
23
GraphWidget,
34
HorizontalAnnotation,
45
IWidget,
@@ -13,6 +14,7 @@ import {
1314
BaseMonitoringProps,
1415
CountAxisFromZero,
1516
DefaultGraphWidgetHeight,
17+
DefaultTwoLinerGraphWidgetHalfHeight,
1618
DefaultSummaryWidgetHeight,
1719
ElastiCacheAlarmFactory,
1820
MaxItemsCountThreshold,
@@ -42,10 +44,18 @@ export interface ElastiCacheClusterMonitoringOptions
4244
readonly clusterType: ElastiCacheClusterType;
4345

4446
/**
45-
* Add CPU usage alarm
47+
* Add CPU usage alarm (useful for all clusterTypes including Redis)
4648
*/
4749
readonly addCpuUsageAlarm?: Record<string, UsageThreshold>;
4850

51+
/**
52+
* Add Redis engine CPU usage alarm.
53+
*
54+
* It is recommended to monitor CPU utilization with `addCpuUsageAlarm`
55+
* as well for hosts with two vCPUs or less.
56+
*/
57+
readonly addRedisEngineCpuUsageAlarm?: Record<string, UsageThreshold>;
58+
4959
/**
5060
* Add alarm on total number of items
5161
*/
@@ -81,9 +91,11 @@ export interface ElastiCacheClusterMonitoringProps
8191
export class ElastiCacheClusterMonitoring extends Monitoring {
8292
readonly title: string;
8393
readonly clusterUrl?: string;
94+
readonly clusterType: ElastiCacheClusterType;
8495

8596
readonly connectionsMetric: MetricWithAlarmSupport;
8697
readonly cpuUsageMetric: MetricWithAlarmSupport;
98+
readonly redisEngineCpuUsageMetric: MetricWithAlarmSupport;
8799
readonly freeableMemoryMetric: MetricWithAlarmSupport;
88100
readonly unusedMemoryMetric: MetricWithAlarmSupport;
89101
readonly swapMemoryMetric: MetricWithAlarmSupport;
@@ -94,6 +106,7 @@ export class ElastiCacheClusterMonitoring extends Monitoring {
94106
readonly usageAlarmFactory: UsageAlarmFactory;
95107
readonly elastiCacheAlarmFactory: ElastiCacheAlarmFactory;
96108
readonly cpuUsageAnnotations: HorizontalAnnotation[];
109+
readonly redisEngineCpuUsageAnnotations: HorizontalAnnotation[];
97110
readonly itemsCountAnnotations: HorizontalAnnotation[];
98111
readonly evictedItemsCountAnnotations: HorizontalAnnotation[];
99112
readonly memoryUsageAnnotations: HorizontalAnnotation[];
@@ -104,6 +117,8 @@ export class ElastiCacheClusterMonitoring extends Monitoring {
104117
) {
105118
super(scope, props);
106119

120+
this.clusterType = props.clusterType;
121+
107122
const clusterType = capitalizeFirstLetterOnly(
108123
ElastiCacheClusterType[props.clusterType]
109124
);
@@ -127,6 +142,8 @@ export class ElastiCacheClusterMonitoring extends Monitoring {
127142
);
128143
this.connectionsMetric = metricFactory.metricAverageConnections();
129144
this.cpuUsageMetric = metricFactory.metricMaxCpuUtilizationInPercent();
145+
this.redisEngineCpuUsageMetric =
146+
metricFactory.metricMaxRedisEngineCpuUtilizationInPercent();
130147
this.freeableMemoryMetric =
131148
metricFactory.metricAverageFreeableMemoryInBytes();
132149
this.unusedMemoryMetric = metricFactory.metricAverageUnusedMemoryInBytes();
@@ -137,6 +154,7 @@ export class ElastiCacheClusterMonitoring extends Monitoring {
137154
this.itemsEvictedMetrics = metricFactory.metricEvictions();
138155

139156
this.cpuUsageAnnotations = [];
157+
this.redisEngineCpuUsageAnnotations = [];
140158
this.itemsCountAnnotations = [];
141159
this.evictedItemsCountAnnotations = [];
142160
this.memoryUsageAnnotations = [];
@@ -157,6 +175,29 @@ export class ElastiCacheClusterMonitoring extends Monitoring {
157175
this.cpuUsageAnnotations.push(createdAlarm.annotation);
158176
this.addAlarm(createdAlarm);
159177
}
178+
179+
if (
180+
props.addRedisEngineCpuUsageAlarm !== undefined &&
181+
props.clusterType !== ElastiCacheClusterType.REDIS
182+
) {
183+
throw new Error(
184+
"It is only possible to alarm on Redis Engine CPU Usage for Redis clusters"
185+
);
186+
}
187+
188+
for (const disambiguator in props.addRedisEngineCpuUsageAlarm) {
189+
const alarmProps = props.addRedisEngineCpuUsageAlarm[disambiguator];
190+
const createdAlarm = this.usageAlarmFactory.addMaxCpuUsagePercentAlarm(
191+
this.redisEngineCpuUsageMetric,
192+
alarmProps,
193+
disambiguator,
194+
undefined,
195+
"RedisEngine"
196+
);
197+
this.redisEngineCpuUsageAnnotations.push(createdAlarm.annotation);
198+
this.addAlarm(createdAlarm);
199+
}
200+
160201
for (const disambiguator in props.addMaxItemsCountAlarm) {
161202
const alarmProps = props.addMaxItemsCountAlarm[disambiguator];
162203
const createdAlarm = this.elastiCacheAlarmFactory.addMaxItemsCountAlarm(
@@ -214,13 +255,32 @@ export class ElastiCacheClusterMonitoring extends Monitoring {
214255
}
215256

216257
widgets(): IWidget[] {
217-
return [
218-
this.createTitleWidget(),
219-
this.createCpuUsageWidget(QuarterWidth, DefaultGraphWidgetHeight),
220-
this.createMemoryUsageWidget(QuarterWidth, DefaultGraphWidgetHeight),
221-
this.createConnectionsWidget(QuarterWidth, DefaultGraphWidgetHeight),
222-
this.createItemCountWidget(QuarterWidth, DefaultGraphWidgetHeight),
223-
];
258+
if (this.clusterType === ElastiCacheClusterType.REDIS) {
259+
return [
260+
this.createTitleWidget(),
261+
new Column(
262+
this.createCpuUsageWidget(
263+
QuarterWidth,
264+
DefaultTwoLinerGraphWidgetHalfHeight
265+
),
266+
this.createRedisEngineCpuUsageWidget(
267+
QuarterWidth,
268+
DefaultTwoLinerGraphWidgetHalfHeight
269+
)
270+
),
271+
this.createMemoryUsageWidget(QuarterWidth, DefaultGraphWidgetHeight),
272+
this.createConnectionsWidget(QuarterWidth, DefaultGraphWidgetHeight),
273+
this.createItemCountWidget(QuarterWidth, DefaultGraphWidgetHeight),
274+
];
275+
} else {
276+
return [
277+
this.createTitleWidget(),
278+
this.createCpuUsageWidget(QuarterWidth, DefaultGraphWidgetHeight),
279+
this.createMemoryUsageWidget(QuarterWidth, DefaultGraphWidgetHeight),
280+
this.createConnectionsWidget(QuarterWidth, DefaultGraphWidgetHeight),
281+
this.createItemCountWidget(QuarterWidth, DefaultGraphWidgetHeight),
282+
];
283+
}
224284
}
225285

226286
createTitleWidget() {
@@ -242,6 +302,17 @@ export class ElastiCacheClusterMonitoring extends Monitoring {
242302
});
243303
}
244304

305+
createRedisEngineCpuUsageWidget(width: number, height: number) {
306+
return new GraphWidget({
307+
width,
308+
height,
309+
title: "Engine CPU Utilization",
310+
left: [this.redisEngineCpuUsageMetric],
311+
leftYAxis: PercentageAxisFromZeroToHundred,
312+
leftAnnotations: this.redisEngineCpuUsageAnnotations,
313+
});
314+
}
315+
245316
createMemoryUsageWidget(width: number, height: number) {
246317
return new GraphWidget({
247318
width,

test/facade/__snapshots__/MonitoringAspect.test.ts.snap

Lines changed: 6 additions & 2 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

test/monitoring/aws-elasticache/ElastiCacheClusterMonitoring.test.ts

Lines changed: 17 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -52,6 +52,7 @@ test("snapshot test: all alarms", () => {
5252
new ElastiCacheClusterMonitoring(scope, {
5353
clusterType: ElastiCacheClusterType.REDIS,
5454
addCpuUsageAlarm: { Warning: { maxUsagePercent: 11 } },
55+
addRedisEngineCpuUsageAlarm: { Warning: { maxUsagePercent: 10 } },
5556
addMaxItemsCountAlarm: { Warning: { maxItemsCount: 21 } },
5657
addMaxEvictedItemsCountAlarm: { Warning: { maxItemsCount: 31 } },
5758
addMinFreeableMemoryAlarm: { Warning: { minFreeableMemoryInBytes: 41 } },
@@ -64,7 +65,7 @@ test("snapshot test: all alarms", () => {
6465
});
6566

6667
expect(numAlarmsCreatedForMemcached).toStrictEqual(5);
67-
expect(numAlarmsCreatedForRedis).toStrictEqual(5);
68+
expect(numAlarmsCreatedForRedis).toStrictEqual(6);
6869
expect(Template.fromStack(stack)).toMatchSnapshot();
6970
});
7071

@@ -89,3 +90,18 @@ test("snapshot test: cluster ID specified", () => {
8990

9091
expect(Template.fromStack(stack)).toMatchSnapshot();
9192
});
93+
94+
test("validation test: redisEngineCpuUsageAlarm added for non-redis cluster ", () => {
95+
const stack = new Stack();
96+
const scope = new TestMonitoringScope(stack, "Scope");
97+
98+
expect(
99+
() =>
100+
new ElastiCacheClusterMonitoring(scope, {
101+
clusterType: ElastiCacheClusterType.MEMCACHED,
102+
addRedisEngineCpuUsageAlarm: { Warning: { maxUsagePercent: 10 } },
103+
})
104+
).toThrowError(
105+
"It is only possible to alarm on Redis Engine CPU Usage for Redis clusters"
106+
);
107+
});

test/monitoring/aws-elasticache/__snapshots__/ElastiCacheClusterMonitoring.test.ts.snap

Lines changed: 28 additions & 0 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

0 commit comments

Comments
 (0)