description |
---|
Create CloudWatch Alarms for ASG, ALB, Synthetics, CIS Alarams. In this repository, you will find files and detailed guides for creating alarms in AWS CloudWatch for your cloud resources. The setup includes creating alarms for Auto Scaling Groups (ASG) that scale in response to CPU utilization, alarms for Application Load Balancers (ALB) that alert on 4xx errors in traffic, and CIS (Center for Internet Security) alarms that help adhere to best security practices. Additionally, a Heart Beat Monitor is implemented using AWS CloudWatch Synthetics, allowing you to monitor the availability and response of your critical applications. |
- Create the following Alarms using CloudWatch with the end to end usecase we have built so far
- AWS Application Load Balancer Alarms
- AWS Autoscaling Group Alarms
- AWS CIS Alarms (Center for Internet Security)
- AWS CloudWatch Synthetics
- Implement a Heart Beat Monitor
- Copy all the files from
15-Autoscaling-with-Launch-Templates\terraform-manifests
- Change the DNS name as per your demo content
name = "cloudwatch1.devopsincloud.com"
- Create a place holder file to define CloudWatch Variables
# Define CloudWatch Alarms for Autoscaling Groups
# Autoscaling - Scaling Policy for High CPU
resource "aws_autoscaling_policy" "high_cpu" {
name = "high-cpu"
scaling_adjustment = 4
adjustment_type = "ChangeInCapacity"
cooldown = 300
autoscaling_group_name = aws_autoscaling_group.my_asg.name
}
# Cloud Watch Alarm to trigger the above scaling policy when CPU Utilization is above 80%
# Also send the notificaiton email to users present in SNS Topic Subscription
resource "aws_cloudwatch_metric_alarm" "app1_asg_cwa_cpu" {
alarm_name = "App1-ASG-CWA-CPUUtilization"
comparison_operator = "GreaterThanOrEqualToThreshold"
evaluation_periods = "2"
metric_name = "CPUUtilization"
namespace = "AWS/EC2"
period = "120"
statistic = "Average"
threshold = "80"
dimensions = {
AutoScalingGroupName = aws_autoscaling_group.my_asg.name
}
alarm_description = "This metric monitors ec2 cpu utilization and triggers the ASG Scaling policy to scale-out if CPU is above 80%"
ok_actions = [aws_sns_topic.myasg_sns_topic.arn]
alarm_actions = [
aws_autoscaling_policy.high_cpu.arn,
aws_sns_topic.myasg_sns_topic.arn
]
}
# Define CloudWatch Alarms for ALB
# Alert if HTTP 4xx errors are more than threshold value
resource "aws_cloudwatch_metric_alarm" "alb_4xx_errors" {
alarm_name = "App1-ALB-HTTP-4xx-errors"
comparison_operator = "GreaterThanThreshold"
datapoints_to_alarm = "2" # "2"
evaluation_periods = "3" # "3"
metric_name = "HTTPCode_Target_4XX_Count"
namespace = "AWS/ApplicationELB"
period = "120"
statistic = "Sum"
threshold = "5" # Update real-world value like 100, 200 etc
treat_missing_data = "missing"
dimensions = {
LoadBalancer = module.alb.lb_arn_suffix
}
alarm_description = "This metric monitors ALB HTTP 4xx errors and if they are above 100 in specified interval, it is going to send a notification email"
ok_actions = [aws_sns_topic.myasg_sns_topic.arn]
alarm_actions = [aws_sns_topic.myasg_sns_topic.arn]
}
# Per AppELB Metrics
## - HTTPCode_ELB_5XX_Count
## - HTTPCode_ELB_502_Count
## - TargetResponseTime
# Per AppELB, per TG Metrics
## - UnHealthyHostCount
## - HealthyHostCount
## - HTTPCode_Target_4XX_Count
## - TargetResponseTime
# Create Log Group for CIS
resource "aws_cloudwatch_log_group" "cis_log_group" {
name = "cis-log-group-${random_pet.this.id}"
}
# Define CIS Alarms
module "all_cis_alarms" {
source = "terraform-aws-modules/cloudwatch/aws//modules/cis-alarms"
version = "2.0.0"
disabled_controls = ["DisableOrDeleteCMK", "VPCChanges"]
log_group_name = aws_cloudwatch_log_group.cis_log_group.name
alarm_actions = [aws_sns_topic.myasg_sns_topic.arn]
tags = local.common_tags
}
- Understand AWS CloudWatch Synthetics
- Create CloudWatch Synthetics using AWS management console and explore more about it
- Review the following files
- File-1:
sswebsite2\nodejs\node_modules\sswebsite2.js
- File-2: sswebsite2v1.zip
nodejs\node_modules\
- Use
Heart Beat Monitor
sample from AWS Management Console - AWS CloudWatch Sythetic Service - Update your Application DNS Name
# Before
const urls = ['https://google.com'];
# After
const urls = ['https://yourapp.com'];
cd sswebsite2
zip -r sswebsite2v1.zip nodejs
# AWS IAM Policy
resource "aws_iam_policy" "cw_canary_iam_policy" {
name = "cw-canary-iam-policy"
path = "/"
description = "CloudWatch Canary Synthetic IAM Policy"
# Terraform's "jsonencode" function converts a
# Terraform expression result to valid JSON syntax.
policy = jsonencode({
"Version": "2012-10-17",
"Statement": [
{
"Sid": "VisualEditor0",
"Effect": "Allow",
"Action": "cloudwatch:PutMetricData",
"Resource": "*",
"Condition": {
"StringEquals": {
"cloudwatch:namespace": "CloudWatchSynthetics"
}
}
},
{
"Sid": "VisualEditor1",
"Effect": "Allow",
"Action": [
"s3:PutObject",
"logs:CreateLogStream",
"s3:ListAllMyBuckets",
"logs:CreateLogGroup",
"logs:PutLogEvents",
"s3:GetBucketLocation",
"xray:PutTraceSegments"
],
"Resource": "*"
}
]
})
}
# AWS IAM Role
resource "aws_iam_role" "cw_canary_iam_role" {
name = "cw-canary-iam-role"
description = "CloudWatch Synthetics lambda execution role for running canaries"
path = "/service-role/"
#assume_role_policy = data.aws_iam_policy_document.instance_assume_role_policy.json # (not shown)
assume_role_policy = "{\"Version\":\"2012-10-17\",\"Statement\":[{\"Effect\":\"Allow\",\"Principal\":{\"Service\":\"lambda.amazonaws.com\"},\"Action\":\"sts:AssumeRole\"}]}"
managed_policy_arns = [aws_iam_policy.cw_canary_iam_policy.arn]
}
# Create S3 Bucket
resource "aws_s3_bucket" "cw_canary_bucket" {
bucket = "cw-canary-bucket-${random_pet.this.id}"
acl = "private"
force_destroy = true
tags = {
Name = "My bucket"
Environment = "Dev"
}
}
# AWS CloudWatch Canary
resource "aws_synthetics_canary" "sswebsite2" {
name = "sswebsite2"
artifact_s3_location = "s3://${aws_s3_bucket.cw_canary_bucket.id}/sswebsite2"
execution_role_arn = aws_iam_role.cw_canary_iam_role.arn
handler = "sswebsite2.handler"
zip_file = "sswebsite2/sswebsite2v1.zip"
runtime_version = "syn-nodejs-puppeteer-3.1"
start_canary = true
run_config {
active_tracing = true
memory_in_mb = 960
timeout_in_seconds = 60
}
schedule {
expression = "rate(1 minute)"
}
}
Step-09-07: c14-05-cloudwatch-synthetics.tf - Create AWS CloudWatch Metric Alarm for Canary Resource
# AWS CloudWatch Metric Alarm for Synthetics Heart Beat Monitor when availability is less than 10 percent
resource "aws_cloudwatch_metric_alarm" "synthetics_alarm_app1" {
alarm_name = "Synthetics-Alarm-App1"
comparison_operator = "LessThanThreshold"
datapoints_to_alarm = "1" # "2"
evaluation_periods = "1" # "3"
metric_name = "SuccessPercent"
namespace = "CloudWatchSynthetics"
period = "300"
statistic = "Average"
threshold = "90"
treat_missing_data = "breaching" # You can also add "missing"
dimensions = {
CanaryName = aws_synthetics_canary.sswebsite2.id
}
alarm_description = "Synthetics alarm metric: SuccessPercent LessThanThreshold 90"
ok_actions = [aws_sns_topic.myasg_sns_topic.arn]
alarm_actions = [aws_sns_topic.myasg_sns_topic.arn]
}
# Terraform Initialize
terraform init
# Terraform Validate
terraform validate
# Terraform Plan
terraform plan
# Terraform Apply
terraform apply -auto-approve
- Confirm SNS Subscription in your email
- Verify EC2 Instances
- Verify Launch Templates (High Level)
- Verify Autoscaling Group (High Level)
- Verify Load Balancer
- Verify Load Balancer Target Group - Health Checks
- Cloud Watch
- ALB Alarm
- ASG Alarm
- CIS Alarms
- Synthetics
- Access and Test
# Access and Test
http://cloudwatch.devopsincloud.com
http://cloudwatch.devopsincloud.com/app1/index.html
http://cloudwatch.devopsincloud.com/app1/metadata.html
# Delete Resources
terraform destroy -auto-approve
# Delete Files
rm -rf .terraform*
rm -rf terraform.tfstate*
terraform import aws_cloudwatch_metric_alarm.test alarm-12345
terraform import aws_cloudwatch_metric_alarm.temp1 alb-4xx-temp-1