Skip to content

Commit

Permalink
E2E: use a self-hosted Consul for easier WI testing
Browse files Browse the repository at this point in the history
Our `consulcompat` tests exercise both the Workload Identity and legacy Consul
token workflow, but they are limited to running single node tests. The E2E
cluster is network isolated, so using our HCP Consul cluster runs into a
problem validating WI tokens because it can't reach the JWKS endpoint. In real
production environments, you'd solve this with a CNAME pointing to a public IP
pointing to a proxy with a real domain name. But that's logisitcally
impractical for our ephemeral nightly cluster.

Migrate the HCP Consul to a single-node Consul cluster on AWS EC2 alongside our
Nomad cluster. Bootstrap TLS and ACLs in Terraform and ensure all nodes can
reach each other. This will allow us to update our Consul tests so they can use
Workload Identity, in a separate PR.

Ref: #19698
  • Loading branch information
tgross committed Apr 2, 2024
1 parent 31f53ce commit 3e0affe
Show file tree
Hide file tree
Showing 24 changed files with 557 additions and 175 deletions.
12 changes: 6 additions & 6 deletions e2e/terraform/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -4,10 +4,10 @@ This folder contains Terraform resources for provisioning a Nomad
cluster on EC2 instances on AWS to use as the target of end-to-end
tests.

Terraform provisions the AWS infrastructure assuming that EC2 AMIs
have already been built via Packer and HCP Consul and HCP Vault
clusters are already running. It deploys a build of Nomad from your
local machine along with configuration files.
Terraform provisions the AWS infrastructure assuming that EC2 AMIs have already
been built via Packer and a HCP Vault cluster is already running. It deploys a
build of Nomad from your local machine along with configuration files, as well
as a single-node Consul server cluster.

## Setup

Expand All @@ -30,8 +30,6 @@ team's vault under `nomad-e2e`.
```
export HCP_CLIENT_ID=
export HCP_CLIENT_SECRET=
export CONSUL_HTTP_TOKEN=
export CONSUL_HTTP_ADDR=
```

The Vault admin token will expire after 6 hours. If you haven't
Expand All @@ -57,6 +55,8 @@ client_count_ubuntu_jammy_amd64 = "4"
client_count_windows_2016_amd64 = "1"
```

You will also need a Consul Enterprise license file.

Optionally, edit the `nomad_local_binary` variable in the
`terraform.tfvars` file to change the path to the local binary of
Nomad you'd like to upload.
Expand Down
17 changes: 17 additions & 0 deletions e2e/terraform/compute.tf
Original file line number Diff line number Diff line change
Expand Up @@ -58,6 +58,23 @@ resource "aws_instance" "client_windows_2016_amd64" {
}
}

resource "aws_instance" "consul_server" {
ami = data.aws_ami.ubuntu_jammy_amd64.image_id
instance_type = var.instance_type
key_name = module.keys.key_name
vpc_security_group_ids = [aws_security_group.consul_server.id]
iam_instance_profile = data.aws_iam_instance_profile.nomad_e2e_cluster.name
availability_zone = var.availability_zone

# Instance tags
tags = {
Name = "${local.random_name}-consul-server-ubuntu-jammy-amd64"
ConsulAutoJoin = "auto-join-${local.random_name}"
User = data.aws_caller_identity.current.arn
}
}


data "external" "packer_sha" {
program = ["/bin/sh", "-c", <<EOT
sha=$(git log -n 1 --pretty=format:%H packer)
Expand Down
81 changes: 81 additions & 0 deletions e2e/terraform/consul-clients.tf
Original file line number Diff line number Diff line change
@@ -0,0 +1,81 @@
# Copyright (c) HashiCorp, Inc.
# SPDX-License-Identifier: BUSL-1.1

# consul-client.tf produces the TLS certifications and configuration files for
# the Consul agents running on the Nomad server and client nodes

# TLS certs for the Consul agents

resource "tls_private_key" "consul_agents" {
algorithm = "ECDSA"
ecdsa_curve = "P384"
}

resource "tls_cert_request" "consul_agents" {
private_key_pem = tls_private_key.consul_agents.private_key_pem

subject {
common_name = "${local.random_name} Consul agent"
}
}

resource "tls_locally_signed_cert" "consul_agents" {
cert_request_pem = tls_cert_request.consul_agents.cert_request_pem
ca_private_key_pem = tls_private_key.ca.private_key_pem
ca_cert_pem = tls_self_signed_cert.ca.cert_pem

validity_period_hours = 720

allowed_uses = [
"key_encipherment",
"digital_signature",
"client_auth",
]
}

resource "local_sensitive_file" "consul_agents_key" {
content = tls_private_key.consul_agents.private_key_pem
filename = "uploads/shared/consul.d/agent_cert.key.pem"
}

resource "local_sensitive_file" "consul_agents_cert" {
content = tls_locally_signed_cert.consul_agents.cert_pem
filename = "uploads/shared/consul.d/agent_cert.pem"
}

# Consul tokens for the Consul agents

resource "random_uuid" "consul_agent_token" {}

resource "local_sensitive_file" "consul_agent_config_file" {
content = templatefile("etc/consul.d/clients.hcl", {
token = "${random_uuid.consul_agent_token.result}"
autojoin_value = "auto-join-${local.random_name}"
})
filename = "uploads/shared/consul.d/clients.hcl"
file_permission = "0600"
}

# Consul tokens for the Nomad agents

resource "random_uuid" "consul_token_for_nomad" {}

resource "local_sensitive_file" "nomad_client_config_for_consul" {
content = templatefile("etc/nomad.d/client-consul.hcl", {
token = "${random_uuid.consul_token_for_nomad.result}"
client_service_name = "client-${local.random_name}"
server_service_name = "server-${local.random_name}"
})
filename = "uploads/shared/nomad.d/client-consul.hcl"
file_permission = "0600"
}

resource "local_sensitive_file" "nomad_server_config_for_consul" {
content = templatefile("etc/nomad.d/server-consul.hcl", {
token = "${random_uuid.consul_token_for_nomad.result}"
client_service_name = "client-${local.random_name}"
server_service_name = "server-${local.random_name}"
})
filename = "uploads/shared/nomad.d/server-consul.hcl"
file_permission = "0600"
}
178 changes: 178 additions & 0 deletions e2e/terraform/consul-servers.tf
Original file line number Diff line number Diff line change
@@ -0,0 +1,178 @@
# Copyright (c) HashiCorp, Inc.
# SPDX-License-Identifier: BUSL-1.1

# consul-servers.tf produces the TLS certifications and configuration files for
# the single-node Consul server cluster

# Consul token for bootstrapping the Consul server

resource "random_uuid" "consul_initial_management_token" {}

resource "local_sensitive_file" "consul_initial_management_token" {
content = random_uuid.consul_initial_management_token.result
filename = "keys/consul_initial_management_token"
file_permission = "0600"
}

resource "local_sensitive_file" "consul_server_config_file" {
content = templatefile("etc/consul.d/servers.hcl", {
management_token = "${random_uuid.consul_initial_management_token.result}"
token = "${random_uuid.consul_agent_token.result}"
nomad_token = "${random_uuid.consul_token_for_nomad.result}"
autojoin_value = "auto-join-${local.random_name}"
})
filename = "uploads/shared/consul.d/servers.hcl"
file_permission = "0600"
}

# TLS cert for the Consul server

resource "tls_private_key" "consul_server" {
algorithm = "ECDSA"
ecdsa_curve = "P384"
}

resource "tls_cert_request" "consul_server" {
private_key_pem = tls_private_key.consul_server.private_key_pem
ip_addresses = [aws_instance.consul_server.public_ip, aws_instance.consul_server.private_ip, "127.0.0.1"]
dns_names = ["server.consul.global"]

subject {
common_name = "${local.random_name} Consul server"
}
}

resource "tls_locally_signed_cert" "consul_server" {
cert_request_pem = tls_cert_request.consul_server.cert_request_pem
ca_private_key_pem = tls_private_key.ca.private_key_pem
ca_cert_pem = tls_self_signed_cert.ca.cert_pem

validity_period_hours = 720

allowed_uses = [
"key_encipherment",
"digital_signature",
"client_auth",
"server_auth",
]
}

resource "local_sensitive_file" "consul_server_key" {
content = tls_private_key.consul_server.private_key_pem
filename = "uploads/shared/consul.d/server_cert.key.pem"
}

resource "local_sensitive_file" "consul_server_cert" {
content = tls_locally_signed_cert.consul_server.cert_pem
filename = "uploads/shared/consul.d/server_cert.pem"
}

# if consul_license is unset, it'll be a harmless empty license file
resource "local_sensitive_file" "consul_environment" {
content = templatefile("etc/consul.d/.environment", {
license = var.consul_license
})
filename = "uploads/shared/consul.d/.environment"
file_permission = "0600"
}

resource "null_resource" "upload_consul_server_configs" {

depends_on = [
local_sensitive_file.ca_cert,
local_sensitive_file.consul_server_config_file,
local_sensitive_file.consul_server_key,
local_sensitive_file.consul_server_cert,
local_sensitive_file.consul_environment,
]

connection {
type = "ssh"
user = "ubuntu"
host = aws_instance.consul_server.public_ip
port = 22
private_key = file("${path.root}/keys/${local.random_name}.pem")
target_platform = "unix"
timeout = "15m"
}

provisioner "file" {
source = "keys/tls_ca.crt"
destination = "/tmp/consul_ca.pem"
}
provisioner "file" {
source = "uploads/shared/consul.d/.environment"
destination = "/tmp/.consul_environment"
}
provisioner "file" {
source = "uploads/shared/consul.d/server_cert.pem"
destination = "/tmp/consul_cert.pem"
}
provisioner "file" {
source = "uploads/shared/consul.d/server_cert.key.pem"
destination = "/tmp/consul_cert.key.pem"
}
provisioner "file" {
source = "uploads/shared/consul.d/servers.hcl"
destination = "/tmp/consul_server.hcl"
}
provisioner "file" {
source = "etc/consul.d/consul-server.service"
destination = "/tmp/consul.service"
}
}

resource "null_resource" "install_consul_server_configs" {

depends_on = [
null_resource.upload_consul_server_configs,
]

connection {
type = "ssh"
user = "ubuntu"
host = aws_instance.consul_server.public_ip
port = 22
private_key = file("${path.root}/keys/${local.random_name}.pem")
target_platform = "unix"
timeout = "15m"
}

provisioner "remote-exec" {
inline = [
"sudo rm -rf /etc/consul.d/*",
"sudo mkdir -p /etc/consul.d/bootstrap",
"sudo mv /tmp/consul_ca.pem /etc/consul.d/ca.pem",
"sudo mv /tmp/consul_cert.pem /etc/consul.d/cert.pem",
"sudo mv /tmp/consul_cert.key.pem /etc/consul.d/cert.key.pem",
"sudo mv /tmp/consul_server.hcl /etc/consul.d/consul.hcl",
"sudo mv /tmp/consul.service /etc/systemd/system/consul.service",
"sudo mv /tmp/.consul_environment /etc/consul.d/.environment",
"sudo systemctl daemon-reload",
"sudo systemctl enable consul",
"sudo systemctl restart consul",
]
}
}

# Bootstrapping Consul ACLs:
#
# We can't both bootstrap the ACLs and use the Consul TF provider's
# resource.consul_acl_token in the same Terraform run, because there's no way to
# get the management token into the provider's environment after we bootstrap,
# and we want to pass various tokens in the Nomad and Consul configuration
# files. So we run a bootstrapping script that uses tokens we generate randomly.
locals {
consul_env = "CONSUL_HTTP_ADDR=https://${aws_instance.consul_server.public_ip}:8501 CONSUL_CACERT=keys/tls_ca.crt CONSUL_HTTP_TOKEN=${random_uuid.consul_initial_management_token.result} CONSUL_AGENT_TOKEN=${random_uuid.consul_agent_token.result} NOMAD_CLUSTER_CONSUL_TOKEN=${random_uuid.consul_token_for_nomad.result}"
}

resource "null_resource" "bootstrap_consul_acls" {
depends_on = [null_resource.install_consul_server_configs]
triggers = {
command = aws_instance.consul_server.public_ip != "" ? local.consul_env : "echo 'Consul server not ready yet, skipping bootstrap'"
}

provisioner "local-exec" {
command = "${local.consul_env} ./scripts/bootstrap-consul.sh"
}
}
1 change: 1 addition & 0 deletions e2e/terraform/etc/consul.d/.environment
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
CONSUL_LICENSE=${license}
8 changes: 0 additions & 8 deletions e2e/terraform/etc/consul.d/client_acl.json

This file was deleted.

41 changes: 41 additions & 0 deletions e2e/terraform/etc/consul.d/clients.hcl
Original file line number Diff line number Diff line change
@@ -0,0 +1,41 @@
# Copyright (c) HashiCorp, Inc.
# SPDX-License-Identifier: BUSL-1.1

log_level = "DEBUG"
data_dir = "/opt/consul/data"
bind_addr = "{{ GetPrivateIP }}"
advertise_addr = "{{ GetPrivateIP }}"
client_addr = "0.0.0.0"

server = false

acl {
enabled = true
tokens {
agent = "${token}"
default = "${token}"
}
}

retry_join = ["provider=aws tag_key=ConsulAutoJoin tag_value=${autojoin_value}"]

tls {
defaults {
ca_file = "/etc/consul.d/ca.pem"
cert_file = "/etc/consul.d/cert.pem"
key_file = "/etc/consul.d/cert.key.pem"
}
}

connect {
enabled = true
}

service {
name = "consul"
}

ports {
grpc_tls = 8502
dns = 8600
}
12 changes: 0 additions & 12 deletions e2e/terraform/etc/consul.d/clients.json

This file was deleted.

Loading

0 comments on commit 3e0affe

Please sign in to comment.