Skip to content

Commit

Permalink
feat: add docker autoscaler executor (#1118)
Browse files Browse the repository at this point in the history
## Description
 
Provides a new executor using the new GitLab autoscaler executor. I've
been using the fleeting plugin for AWS only.

Prerequisite: Docker must already be installed on the AMI used by worker
machines (the Docker autoscaler does not install it, unlike the Docker
machine). Additionally, the user used to connect to the workers must
also be added to the Docker group.

Related to issue
#624

## Verification

Built an AMI with Docker based on Amazon Linux 2023. Set up the new
executor according to the example. Works!

---------

Co-authored-by: Matthias Kay <matthias.kay@hlag.com>
  • Loading branch information
mmoutama09 and kayman-mk authored Aug 3, 2024
1 parent 8e92bba commit 8aaad0c
Show file tree
Hide file tree
Showing 30 changed files with 1,002 additions and 410 deletions.
8 changes: 8 additions & 0 deletions .cspell.json
Original file line number Diff line number Diff line change
Expand Up @@ -10,18 +10,22 @@
"awscli",
"boto",
"botocore",
"buildx",
"certdir",
"checkmarx",
"concat",
"containerd",
"devskim",
"dind",
"dpkg",
"endfor",
"filesha",
"formatlist",
"glrt",
"glrunners",
"hmarr",
"instancelifecycle",
"keyrings",
"kics",
"joho",
"jsonencode",
Expand Down Expand Up @@ -57,22 +61,26 @@
"tonumber",
"trimprefix",
"trivy",
"usermod",
"userns",
"xanzy",
"xvda"
],
"words": [
"aquasecurity",
"automerge",
"autoscaler",
"backports",
"blockquotes",
"bluegreen",
"codeowners",
"cpu",
"cpus",
"cpuset",
"gitter",
"Niek",
"oxsecurity",
"rebalance",
"signoff",
"typecheck",
"userdata",
Expand Down
2 changes: 1 addition & 1 deletion .github/workflows/ci.yml
Original file line number Diff line number Diff line change
Expand Up @@ -39,7 +39,7 @@ jobs:
[
"runner-default",
"runner-docker",
"runner-pre-registered",
"runner-fleeting-plugin",
"runner-public",
"runner-certificates",
]
Expand Down
16 changes: 16 additions & 0 deletions data.tf
Original file line number Diff line number Diff line change
Expand Up @@ -41,3 +41,19 @@ data "aws_ami" "docker-machine" {

owners = var.runner_worker_docker_machine_ami_owners
}

data "aws_ami" "docker-autoscaler" {
count = var.runner_worker.type == "docker-autoscaler" ? 1 : 0

most_recent = "true"

dynamic "filter" {
for_each = var.runner_worker_docker_autoscaler_ami_filter
content {
name = filter.key
values = filter.value
}
}

owners = var.runner_worker_docker_autoscaler_ami_owners
}
198 changes: 198 additions & 0 deletions docker_autoscaler.tf
Original file line number Diff line number Diff line change
@@ -0,0 +1,198 @@
#
# This file is responsible for creating the resources needed to run the docker autoscaler plugin from GitLab. It replaces the
# outdated docker+machine driver. The docker+machine driver is a legacy driver that is no longer maintained by GitLab.
#

resource "aws_security_group" "docker_autoscaler" {
count = var.runner_worker.type == "docker-autoscaler" ? 1 : 0

description = "Docker autoscaler security group"
vpc_id = var.vpc_id
name = "${local.name_sg}-docker-autoscaler"

tags = merge(
local.tags,
{
"Name" = format("%s", local.name_sg)
},
)
}

resource "aws_security_group_rule" "autoscaler_egress" {
count = var.runner_worker.type == "docker-autoscaler" ? 1 : 0

description = "All egress traffic docker autoscaler"
type = "egress"
from_port = 0
to_port = 0
protocol = "-1"
cidr_blocks = ["0.0.0.0/0"]
security_group_id = join("", aws_security_group.docker_autoscaler[*].id)
}

resource "aws_security_group_rule" "autoscaler_ingress" {
count = var.runner_worker.type == "docker-autoscaler" ? 1 : 0

description = "All ingress traffic from runner security group"
type = "ingress"
from_port = 0
to_port = 0
protocol = "-1"
source_security_group_id = aws_security_group.runner.id
security_group_id = join("", aws_security_group.docker_autoscaler[*].id)
}

resource "aws_security_group_rule" "extra_autoscaler_ingress" {
count = var.runner_worker.type == "docker-autoscaler" ? length(var.runner_worker_docker_autoscaler_asg.sg_ingresses) : 0

description = var.runner_worker_docker_autoscaler_asg.sg_ingresses[count.index].description
type = "ingress"
from_port = var.runner_worker_docker_autoscaler_asg.sg_ingresses[count.index].from_port
to_port = var.runner_worker_docker_autoscaler_asg.sg_ingresses[count.index].to_port
protocol = var.runner_worker_docker_autoscaler_asg.sg_ingresses[count.index].protocol
cidr_blocks = var.runner_worker_docker_autoscaler_asg.sg_ingresses[count.index].cidr_blocks
security_group_id = join("", aws_security_group.docker_autoscaler[*].id)
}

####################################
###### Launch template Workers #####
####################################
resource "aws_launch_template" "this" {
count = var.runner_worker.type == "docker-autoscaler" ? 1 : 0

name = "${local.name_runner_agent_instance}-worker-launch-template"
user_data = base64gzip(var.runner_worker_docker_autoscaler_instance.start_script)
image_id = data.aws_ami.docker-autoscaler[0].id
instance_type = var.runner_worker_docker_autoscaler_asg.types[0]
key_name = aws_key_pair.autoscaler[0].key_name
ebs_optimized = var.runner_worker_docker_autoscaler_instance.ebs_optimized

monitoring {
enabled = var.runner_worker_docker_autoscaler_instance.monitoring
}

iam_instance_profile {
name = aws_iam_instance_profile.docker_autoscaler[0].name
}

network_interfaces {
security_groups = [aws_security_group.docker_autoscaler[0].id]
associate_public_ip_address = !var.runner_worker_docker_autoscaler_instance.private_address_only
}

block_device_mappings {
device_name = var.runner_worker_docker_autoscaler_instance.root_device_name

ebs {
volume_size = var.runner_worker_docker_autoscaler_instance.root_size
volume_type = var.runner_worker_docker_autoscaler_instance.volume_type
iops = contains(["gp3", "io1", "io2"], var.runner_worker_docker_autoscaler_instance.volume_type) ? var.runner_worker_docker_autoscaler_instance.volume_iops : null
throughput = var.runner_worker_docker_autoscaler_instance.volume_type == "gp3" ? var.runner_worker_docker_autoscaler_instance.volume_throughput : null
}
}

tag_specifications {
resource_type = "instance"
tags = local.tags
}

tag_specifications {
resource_type = "volume"
tags = local.tags
}

tags = local.tags

metadata_options {
http_tokens = var.runner_worker_docker_autoscaler_instance.http_tokens
http_put_response_hop_limit = var.runner_worker_docker_autoscaler_instance.http_put_response_hop_limit
instance_metadata_tags = "enabled"
}

lifecycle {
create_before_destroy = true
}
}

#########################################
# Autoscaling group with launch template
#########################################
# false positive, tags are created with "dynamic" block
# kics-scan ignore-line
resource "aws_autoscaling_group" "autoscaler" {
count = var.runner_worker.type == "docker-autoscaler" ? 1 : 0

name = "${local.name_runner_agent_instance}-asg"
capacity_rebalance = false
protect_from_scale_in = true

dynamic "launch_template" {
for_each = var.runner_worker_docker_autoscaler_asg.enable_mixed_instances_policy ? [] : [1]
content {
id = aws_launch_template.this[0].id
version = aws_launch_template.this[0].latest_version
}
}

dynamic "mixed_instances_policy" {
for_each = var.runner_worker_docker_autoscaler_asg.enable_mixed_instances_policy ? [1] : []

content {
instances_distribution {
on_demand_base_capacity = var.runner_worker_docker_autoscaler_asg.on_demand_base_capacity
on_demand_percentage_above_base_capacity = var.runner_worker_docker_autoscaler_asg.on_demand_percentage_above_base_capacity
spot_allocation_strategy = var.runner_worker_docker_autoscaler_asg.spot_allocation_strategy
spot_instance_pools = var.runner_worker_docker_autoscaler_asg.spot_instance_pools
}
launch_template {
launch_template_specification {
launch_template_id = aws_launch_template.this[0].id
version = aws_launch_template.this[0].latest_version
}
dynamic "override" {
for_each = var.runner_worker_docker_autoscaler_asg.types
content {
instance_type = override.value
}
}
}
}
}

dynamic "instance_refresh" {
for_each = var.runner_worker_docker_autoscaler_asg.upgrade_strategy == "rolling" ? [1] : []
content {
strategy = "Rolling"
preferences {
min_healthy_percentage = var.runner_worker_docker_autoscaler_asg.instance_refresh_min_healthy_percentage
}
triggers = var.runner_worker_docker_autoscaler_asg.instance_refresh_triggers
}
}

vpc_zone_identifier = var.runner_worker_docker_autoscaler_asg.subnet_ids
max_size = var.runner_worker.max_jobs
min_size = 0
desired_capacity = 0 # managed by the fleeting plugin
health_check_grace_period = var.runner_worker_docker_autoscaler_asg.health_check_grace_period
health_check_type = var.runner_worker_docker_autoscaler_asg.health_check_type
force_delete = true

dynamic "tag" {
for_each = local.tags
content {
key = tag.key
value = tag.value
propagate_at_launch = true
}
}

lifecycle {
# do not change these values as we would immediately scale up/down, which is not wanted
ignore_changes = [
desired_capacity,
min_size,
max_size
]
}
}
73 changes: 73 additions & 0 deletions docs/usage.md
Original file line number Diff line number Diff line change
Expand Up @@ -144,6 +144,79 @@ module "runner" {
}
```

### Scenario: Use of Docker autoscaler

As docker machine is no longer maintained by docker, gitlab recently developed docker autoscaler to replace docker machine (still in beta). An option is available to test it out.

Tested with amazon-linux-2-x86 as runner manager and ubuntu-server-22-lts-x86 for runner worker. The following commands have been added to the original AMI for the runner worker for the docker-autoscaler to work correctly:

```bash
# Install docker
# Add Docker's official GPG key:
apt-get update
apt-get install -y ca-certificates curl
install -m 0755 -d /etc/apt/keyrings
curl -fsSL https://download.docker.com/linux/ubuntu/gpg -o /etc/apt/keyrings/docker.asc
chmod a+r /etc/apt/keyrings/docker.asc

# Add the repository to Apt sources:
echo \
"deb [arch=$(dpkg --print-architecture) signed-by=/etc/apt/keyrings/docker.asc] https://download.docker.com/linux/ubuntu \
$(. /etc/os-release && echo "$VERSION_CODENAME") stable" | \
tee /etc/apt/sources.list.d/docker.list > /dev/null
apt-get update

apt-get install -y docker-ce docker-ce-cli containerd.io docker-buildx-plugin docker-compose-plugin
usermod -aG docker ubuntu
```

#### Configuration example

```hcl
module "runner" {
# https://registry.terraform.io/modules/cattle-ops/gitlab-runner/aws/
source = "cattle-ops/gitlab-runner/aws"
vpc_id = module.vpc.vpc_id
subnet_id = element(module.vpc.private_subnets, 0)
runner_gitlab = {
tag_list = "runner_worker"
type = "instance"
url = "https://gitlab.com"
preregistered_runner_token_ssm_parameter_name = "my-gitlab-runner-token-ssm-parameter-name"
}
runner_manager = {
maximum_concurrent_jobs = 5
}
runner_worker = {
max_jobs = 5
request_concurrency = 5
type = "docker_autoscaler"
}
runner_worker_docker_autoscaler_asg = {
on_demand_percentage_above_base_capacity = 0
enable_mixed_instances_policy = true
idle_time = 600
subnet_ids = vpc.private_subnets_ids
types = ["t3a.medium", "t3.medium"]
volume_type = "gp3"
private_address_only = true
ebs_optimized = true
root_size = 40
}
runner_worker_docker_autoscaler = {
connector_config_user = "ubuntu"
}
}
```

## Examples

A few [examples](https://github.com/cattle-ops/terraform-aws-gitlab-runner/tree/main/examples/) are provided. Use the
Expand Down
6 changes: 4 additions & 2 deletions examples/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,9 @@

Please see the readme in per example for more details:

- [runner-certificates](runner-certificates/) Example showing how to add custom TLS certificates to the runner
- [runner-default](runner-default/) The default setup, private subnet, auto register, runner on spot instances.
- [runner-public](runner-public/) Runner in a public subnet, auto register, runner on spot instances.
- [runner-docker](runner-docker/) Runners run on the same instance as the agent.
- [runner-pre-registered](runner-pre-registered) Runner needs to be preregistered, old setup DEPRECATED.
- [runner-fleeting](runner-fleeting-plugin/) Runners using the AWS fleeting plugin
- [runner-public](runner-public/) Runner in a public subnet, auto register, runner on spot instances.

2 changes: 1 addition & 1 deletion examples/runner-certificates/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -33,7 +33,7 @@ Create a PEM-encoded `.crt` file containing the public certificate of your Gitla
```hcl
module {
# ...
# Public cert of my companys gitlab instance
# Public cert of my company's gitlab instance
runner_gitlab = {
certificate = file("${path.module}/my_gitlab_instance_cert.crt")
}
Expand Down
Empty file.
Loading

0 comments on commit 8aaad0c

Please sign in to comment.