Skip to content
This repository has been archived by the owner on Nov 14, 2024. It is now read-only.

Commit

Permalink
Merge pull request #28 from TechNative-B-V/feature/update-alarm-creator
Browse files Browse the repository at this point in the history
Updated Readme with a known error. Updated Lambda alarm creator to be able to loop through priorities instead of only running once
  • Loading branch information
AndrNgg authored Aug 30, 2024
2 parents 05f5f46 + 6add13b commit bd192b9
Show file tree
Hide file tree
Showing 2 changed files with 67 additions and 59 deletions.
8 changes: 8 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -71,6 +71,14 @@ module "observability_sender" {
}
```

## Put exceeded error when running alarm creator Lambda

At first run you might end up with a put exceeded error where you are trying to create too many alarms at once.

You need to rerun the Lambda alarm creator a few times maybe with a shorter list. This allows you to not reach the maximum threshold set by AWS.

You need to also clean up the SQS queue in the observablity hub account as the error might hang in the SQS queue even though the problem is resolved.

<!-- BEGIN_TF_DOCS -->
## Providers

Expand Down
118 changes: 59 additions & 59 deletions alarm_creator/actions.py
Original file line number Diff line number Diff line change
Expand Up @@ -37,10 +37,10 @@ def AWS_Alarms():
instances = GetRunningDBInstances()
elif service == "CWAgent":
instances = GetRunningInstances()
# elif service == "ECS":
# instances = GetRunningClusters()
# elif service == "ElastiCache":
# instances = GetRunningCacheClusters()
elif service == "ECS":
instances = GetRunningClusters()
elif service == "ElastiCache":
instances = GetRunningCacheClusters()

for alarm in alarms[service]:
# Query the namespaces in CloudWatch Metrics
Expand All @@ -59,63 +59,63 @@ def AWS_Alarms():
else:
cw_threshold = int(threshold)

# Handling dimensions
for instance in instances:

instanceDimensions = {
"Name": f"{alarms[service][alarm]['Dimensions']}",
"Value": instance
}

#Add any additional disk-related dimensions if present
if 'ExtraDimensions' in alarms[service][alarm]:
dimensionlist.extend(alarms[service][alarm]['ExtraDimensions'])

for dimension in dimensionlist:
if dimension["Name"] == "path" and dimension["Value"] == "/":
# Query the namespaces in CloudWatch Metrics
# Find the correct device dimension for the root volume
response_2 = CWclient.list_metrics(Namespace=f"{alarms[service][alarm]['Namespace']}", RecentlyActive='PT3H',
Dimensions=[instanceDimensions, {'Name': 'path', 'Value': '/'}]
# Handling dimensions
for instance in instances:

instanceDimensions = {
"Name": f"{alarms[service][alarm]['Dimensions']}",
"Value": instance
}

#Add any additional disk-related dimensions if present
if 'ExtraDimensions' in alarms[service][alarm]:
dimensionlist.extend(alarms[service][alarm]['ExtraDimensions'])

for dimension in dimensionlist:
if dimension["Name"] == "path" and dimension["Value"] == "/":
# Query the namespaces in CloudWatch Metrics
# Find the correct device dimension for the root volume
response_2 = CWclient.list_metrics(Namespace=f"{alarms[service][alarm]['Namespace']}", RecentlyActive='PT3H',
Dimensions=[instanceDimensions, {'Name': 'path', 'Value': '/'}]
)

for metrics in response_2["Metrics"]:
for dimension in metrics["Dimensions"]:
if dimension['Name'] == "device":

dimensionlist = [
instanceDimensions,
{
"Name": "device",
"Value": f"{dimension['Value']}"
}
]
dimensionlist.extend(alarms[service][alarm]['ExtraDimensions'])
else:
continue
else:
#Clean up dimensionlist if not extra dimensions are present and only add the instance dimension
dimensionlist = []
dimensionlist = [instanceDimensions]


# Create the alarms
CWclient.put_metric_alarm(
AlarmName=f"{instance}-{alarm} {alarms[service][alarm]['Description']['Operatorsymbol']} {threshold} {alarms[service][alarm]['Description']['ThresholdUnit']}",
ComparisonOperator=alarms[service][alarm]['ComparisonOperator'],
EvaluationPeriods=alarms[service][alarm]['EvaluationPeriods'],
MetricName=alarms[service][alarm]['MetricName'],
Namespace=alarms[service][alarm]['Namespace'],
Period=alarms[service][alarm]['Period'],
Statistic=alarms[service][alarm]['Statistic'],
Threshold=cw_threshold,
ActionsEnabled=True,
TreatMissingData=alarms[service][alarm]['TreatMissingData'],
AlarmDescription=f"{priority}",
Dimensions=dimensionlist,
Tags=[{"Key": "CreatedbyLambda", "Value": "True"}],
)

for metrics in response_2["Metrics"]:
for dimension in metrics["Dimensions"]:
if dimension['Name'] == "device":

dimensionlist = [
instanceDimensions,
{
"Name": "device",
"Value": f"{dimension['Value']}"
}
]
dimensionlist.extend(alarms[service][alarm]['ExtraDimensions'])
else:
continue
else:
#Clean up dimensionlist if not extra dimensions are present and only add the instance dimension
dimensionlist = []
dimensionlist = [instanceDimensions]


# Create the alarms
CWclient.put_metric_alarm(
AlarmName=f"{instance}-{alarm} {alarms[service][alarm]['Description']['Operatorsymbol']} {threshold} {alarms[service][alarm]['Description']['ThresholdUnit']}",
ComparisonOperator=alarms[service][alarm]['ComparisonOperator'],
EvaluationPeriods=alarms[service][alarm]['EvaluationPeriods'],
MetricName=alarms[service][alarm]['MetricName'],
Namespace=alarms[service][alarm]['Namespace'],
Period=alarms[service][alarm]['Period'],
Statistic=alarms[service][alarm]['Statistic'],
Threshold=cw_threshold,
ActionsEnabled=True,
TreatMissingData=alarms[service][alarm]['TreatMissingData'],
AlarmDescription=f"{priority}",
Dimensions=dimensionlist,
Tags=[{"Key": "CreatedbyLambda", "Value": "True"}],
)



def GetRunningInstances():
Expand Down

0 comments on commit bd192b9

Please sign in to comment.