Skip to content

Commit ab103ae

Browse files
fix(aws): CleanupAlarmsAgent cycle to catch exceptions (spinnaker#6333)
1 parent bbc923b commit ab103ae

File tree

2 files changed

+32
-27
lines changed

2 files changed

+32
-27
lines changed

clouddriver-aws/src/main/groovy/com/netflix/spinnaker/clouddriver/aws/agent/CleanupAlarmsAgent.groovy

Lines changed: 28 additions & 26 deletions
Original file line numberDiff line numberDiff line change
@@ -86,36 +86,38 @@ class CleanupAlarmsAgent implements RunnableAgent, CustomScheduledAgent {
8686
getAccounts().each { NetflixAmazonCredentials credentials ->
8787
credentials.regions.each { AmazonCredentials.AWSRegion region ->
8888
log.info("Looking for alarms to delete")
89-
90-
def cloudWatch = amazonClientProvider.getCloudWatch(credentials, region.name)
91-
Set<String> attachedAlarms = getAttachedAlarms(amazonClientProvider.getAutoScaling(credentials, region.name))
92-
def describeAlarmsRequest = new DescribeAlarmsRequest().withStateValue(StateValue.INSUFFICIENT_DATA)
93-
94-
while (true) {
95-
def result = cloudWatch.describeAlarms(describeAlarmsRequest)
96-
97-
List<MetricAlarm> alarmsToDelete = result.metricAlarms.findAll {
98-
it.stateUpdatedTimestamp.before(DateTime.now().minusDays(daysToLeave).toDate()) &&
99-
!attachedAlarms.contains(it.alarmName) &&
100-
ALARM_NAME_PATTERN.matcher(it.alarmName).matches()
101-
}
102-
103-
if (alarmsToDelete) {
104-
// terminate up to 20 alarms at a time (avoids any AWS limits on # of concurrent deletes)
105-
alarmsToDelete.collate(20).each {
106-
log.info("Deleting ${it.size()} alarms in ${credentials.name}/${region.name} " +
107-
"(alarms: ${it.alarmName.join(", ")})")
108-
cloudWatch.deleteAlarms(new DeleteAlarmsRequest().withAlarmNames(it.alarmName))
109-
Thread.sleep(500)
89+
try {
90+
def cloudWatch = amazonClientProvider.getCloudWatch(credentials, region.name)
91+
Set<String> attachedAlarms = getAttachedAlarms(amazonClientProvider.getAutoScaling(credentials, region.name))
92+
def describeAlarmsRequest = new DescribeAlarmsRequest().withStateValue(StateValue.INSUFFICIENT_DATA)
93+
94+
while (true) {
95+
def result = cloudWatch.describeAlarms(describeAlarmsRequest)
96+
97+
List<MetricAlarm> alarmsToDelete = result.metricAlarms.findAll {
98+
it.stateUpdatedTimestamp.before(DateTime.now().minusDays(daysToLeave).toDate()) &&
99+
!attachedAlarms.contains(it.alarmName) &&
100+
ALARM_NAME_PATTERN.matcher(it.alarmName).matches()
110101
}
111102

112-
}
103+
if (alarmsToDelete) {
104+
// terminate up to 20 alarms at a time (avoids any AWS limits on # of concurrent deletes)
105+
alarmsToDelete.collate(20).each {
106+
log.info("Deleting ${it.size()} alarms in ${credentials.name}/${region.name} " +
107+
"(alarms: ${it.alarmName.join(", ")})")
108+
cloudWatch.deleteAlarms(new DeleteAlarmsRequest().withAlarmNames(it.alarmName))
109+
Thread.sleep(500)
110+
}
111+
}
113112

114-
if (result.nextToken) {
115-
describeAlarmsRequest.withNextToken(result.nextToken)
116-
} else {
117-
break
113+
if (result.nextToken) {
114+
describeAlarmsRequest.withNextToken(result.nextToken)
115+
} else {
116+
break
117+
}
118118
}
119+
} catch (Exception e) {
120+
log.error("Error occurred while processing alarms for ${credentials.name}/${region.name}: ${e.message}", e)
119121
}
120122
}
121123
}

clouddriver-aws/src/main/groovy/com/netflix/spinnaker/clouddriver/aws/agent/CleanupDetachedInstancesAgent.groovy

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -74,7 +74,7 @@ class CleanupDetachedInstancesAgent implements RunnableAgent, CustomScheduledAge
7474
getAccounts().each { NetflixAmazonCredentials credentials ->
7575
credentials.regions.each { AmazonCredentials.AWSRegion region ->
7676
log.info("Looking for instances pending termination in ${credentials.name}:${region.name}")
77-
77+
try {
7878
def amazonEC2 = amazonClientProvider.getAmazonEC2(credentials, region.name, true)
7979
def describeInstancesRequest = new DescribeInstancesRequest().withFilters(
8080
new Filter("tag-key", [DetachInstancesAtomicOperation.TAG_PENDING_TERMINATION])
@@ -103,6 +103,9 @@ class CleanupDetachedInstancesAgent implements RunnableAgent, CustomScheduledAge
103103
break
104104
}
105105
}
106+
} catch (Exception e) {
107+
log.error("Error occurred while processing instances pending termination for ${credentials.name}/${region.name}: ${e.message}", e)
108+
}
106109
}
107110
}
108111
}

0 commit comments

Comments
 (0)