MIGRATIONS-963 Added CloudWatch Agent to Demo HAProxy Primary (opense…

…arch-project#111) * MIGRATIONS-963 Pipe AWS Creds through to container * The HAProxy container needs AWS Credentials and other context in order to offload the logs. When the containers are run in an AWS-context, these will be provided by Fargate/ECS/whatever natively. For our demo, we need to pipe them in manually but preferably in a manner similar to how they will be available in an AWS-context. Signed-off-by: Chris Helma <chelma+github@amazon.com> * MIGRATIONS-963 Added CloudWatch Agent to Demo HAProxy Primary Signed-off-by: Chris Helma <chelma+github@amazon.com> --------- Signed-off-by: Chris Helma <chelma+github@amazon.com>
chelma · Feb 22, 2023 · 0a40c7d · 0a40c7d
1 parent 146b304
commit 0a40c7d
Show file tree

Hide file tree

Showing 11 changed files with 246 additions and 71 deletions.
diff --git a/cluster_migration_core/cluster_migration_core/cluster_management/docker_command_gen.py b/cluster_migration_core/cluster_migration_core/cluster_management/docker_command_gen.py
@@ -14,17 +14,18 @@
 
 
 def gen_docker_run(image: str, name: str, network: str, ports: Dict[str, str], volumes: Dict[str, Dict[str, str]],
-                   ulimits: List[Ulimit], detach: bool, environment: Dict[str, str],
-                   extra_hosts: Dict[str, str]) -> str:
+                   ulimits: List[Ulimit], detach: bool, environment: List[str],
+                   extra_hosts: Dict[str, str], entrypoint: List[str]) -> str:
     prefix = "docker run"
     name_section = f"--name {name}"
     network_section = f"--network {network}"
     publish_strs = [f"--publish {host_port}:{container_port}" for container_port, host_port in ports.items()]
     publish_section = " ".join(publish_strs)
     volumes_section = " ".join([f"--volume {k}:{v['bind']}:{v['mode']}" for k, v in volumes.items()])
     ulimits_section = " ".join([f"--ulimit {u.name}={u.soft}:{u.hard}" for u in ulimits])
-    environment_section = " ".join([f"--env {k}='{v}'" for k, v in environment.items()])
+    environment_section = " ".join([f"--env {entry}" for entry in environment])
     extra_hosts_section = " ".join([f"--add-host {k}:{v}" for k, v in extra_hosts.items()])
+    entrypoint_section = " ".join([f"--entrypoint {cmd}" for cmd in entrypoint])
     detach_section = "--detach" if detach else ""
     image_section = image
 
@@ -37,6 +38,7 @@ def gen_docker_run(image: str, name: str, network: str, ports: Dict[str, str], v
         ulimits_section,
         environment_section,
         extra_hosts_section,
+        entrypoint_section,
         detach_section,
         image_section  # Needs to be last
     ]

diff --git a/cluster_migration_core/cluster_migration_core/cluster_management/docker_framework_client.py b/cluster_migration_core/cluster_migration_core/cluster_management/docker_framework_client.py
@@ -142,20 +142,47 @@ def remove_volume(self, volume: Volume):
         self.logger.debug(f"Removed volume {volume.name}")
 
     def create_container(self, image: str, container_name: str, network: Network, ports: List[PortMapping],
-                         volumes: List[DockerVolume], ulimits: List[Ulimit], env_variables: Dict[str, str] = None,
-                         extra_hosts: Dict[str, str] = None, detach: bool = True
-                         ) -> Container:
+                         volumes: List[DockerVolume], ulimits: List[Ulimit], env_kv: Dict[str, str] = None,
+                         env_passthrough: List[str] = None, extra_hosts: Dict[str, str] = None, detach: bool = True,
+                         entrypoint: List[str] = None) -> Container:
+        """
+        image: the name of the Docker image to spin up
+        container_name: the name/tag you want assigned to the created container
+        network: the Docker network to connect the container to
+        ports: list of port mappings you want configured between the container and the local host (e.g. --publish)
+        volumes: list of Docker volumes you want mounted into the container
+        ulimits: list of resource constraints to apply to the container
+        env_kv: dict of key/value pairs of ENV variables that should be present in the container
+        env_passthrough: list of export'd ENV variable names to pipe through from the invoking context to the container
+        extra_hosts: dict of hostname mappings to add to the container (e.g. --add-host)
+        detach: whether to detach the container from the current process
+        entrypoints: list of strings to supply during the run as entrypoint commands; overrides default in Dockerfile
+        """
 
         # TODO - need handle if container already exists (name collision)
         # TODO - need handle if we exceed the resource allocation for Docker
         # TODO - need handle port contention
 
         # Initialize optional, mutable containers
-        if not env_variables:
-            env_variables = {}
+        if not entrypoint:
+            entrypoint = []
+        if not env_kv:
+            env_kv = {}
+        if not env_passthrough:
+            env_passthrough = []
         if not extra_hosts:
             extra_hosts = {}
 
+        # Environment variables can be specified to a Docker container in few ways.  One is to provide explicit key/val
+        # pairs as part of the "run" command; another is to provide just the key name of an existing environment
+        # variable in the user's shell context (e.g. passthrough).  Assuming the passthrough variable has been export'd
+        # then Docker will pipe it through to the container.  This is useful for things like security credentials.
+        # However, we need to assemble a single list of variables to pass to the Docker SDK, which is what this code
+        # is doing.
+        environment_combined = []
+        environment_combined.extend([f"{k}={v}" for k, v in env_kv.items()])
+        environment_combined.extend(env_passthrough)
+
         # It doesn't appear you can just pass in a list of Volumes to the client, so we have to make this wonky mapping
         port_mapping = {str(pair.container_port): str(pair.host_port) for pair in ports}
         volume_mapping = {}
@@ -181,22 +208,30 @@ def create_container(self, image: str, container_name: str, network: Network, po
             volumes=volume_mapping,
             ulimits=ulimits,
             detach=detach,
-            environment=env_variables,
-            extra_hosts=extra_hosts
+            environment=environment_combined,
+            extra_hosts=extra_hosts,
+            entrypoint=entrypoint
         )
         self.logger.debug(f"Created container {container_name}")
         return container
 
     def _log_and_execute_command_run(self, image: str, name: str, network: str, ports: Dict[str, str],
                                      volumes: Dict[str, Dict[str, str]], ulimits: List[Ulimit], detach: bool,
-                                     environment: Dict[str, str], extra_hosts: Dict[str, str]) -> Container:
+                                     environment: List[str], extra_hosts: Dict[str, str],
+                                     entrypoint: List[str]) -> Container:
 
         args = {"image": image, "name": name, "network": network, "ports": ports, "volumes": volumes,
-                "ulimits": ulimits, "detach": detach, "environment": environment, "extra_hosts": extra_hosts}
+                "ulimits": ulimits, "detach": detach, "environment": environment, "extra_hosts": extra_hosts,
+                "entrypoint": entrypoint}
 
         run_command = dcg.gen_docker_run(**args)
         self.logger.debug(f"Predicted command being run by the Docker SDK: {run_command}")
-
+
+        # Annoyingly, the Docker SDK breaks if you pass in an empty list, so we have to manually look for and handle
+        # this case.
+        if not ("entrypoint" in args.keys() and args.get("entrypoint")):  # We don't actually have entrypoint cmds
+            args.pop("entrypoint", None)
+
         container = self._docker_client.containers.run(**args)
         return container
 

diff --git a/cluster_migration_core/unit_tests/cluster_management/test_docker_command_gen.py b/cluster_migration_core/unit_tests/cluster_management/test_docker_command_gen.py
@@ -18,22 +18,26 @@ def test_WHEN_gen_run_command_THEN_as_expected():
             Ulimit(name='limit', soft=1, hard=2)
         ],
         "detach": True,
-        "environment": {
-            "a": "b",
-            "c": "d"
-        },
+        "environment": [
+            "a=b",
+            "c"
+        ],
         "extra_hosts": {
             "name": "host"
-        }
+        },
+        "entrypoint": [
+            "cmd 1",
+            "cmd 2"
+        ]
     }
 
     # Run our test
     generated_command = dcg.gen_docker_run(**test_args)
 
     # Check our results
     expected_command = ("docker run --name container_name --network network_name --publish 80:9200 --publish 42:6160"
-                        " --volume /mydir1:/path:ro --volume volume1:/path2:rw --ulimit limit=1:2 --env a='b'"
-                        " --env c='d' --add-host name:host --detach image")
+                        " --volume /mydir1:/path:ro --volume volume1:/path2:rw --ulimit limit=1:2 --env a=b"
+                        " --env c --add-host name:host --entrypoint cmd 1 --entrypoint cmd 2 --detach image")
     assert expected_command == generated_command
 
 
@@ -49,17 +53,18 @@ def test_WHEN_gen_run_command_2_THEN_as_expected():
             Ulimit(name='limit', soft=1, hard=2)
         ],
         "detach": False,
-        "environment": {
-            "a": "b",
-            "c": "d"
-        },
-        "extra_hosts": {}
+        "environment": [
+            "a=b",
+            "c"
+        ],
+        "extra_hosts": {},
+        "entrypoint": []
     }
 
     # Run our test
     generated_command = dcg.gen_docker_run(**test_args)
 
     # Check our results
     expected_command = ("docker run --name container_name --network network_name --publish 80:9200 --publish 42:6160"
-                        " --ulimit limit=1:2 --env a='b' --env c='d' image")
+                        " --ulimit limit=1:2 --env a=b --env c image")
     assert expected_command == generated_command
diff --git a/cluster_migration_core/unit_tests/cluster_management/test_docker_framework_client.py b/cluster_migration_core/unit_tests/cluster_management/test_docker_framework_client.py
@@ -181,20 +181,24 @@ def test_WHEN_create_container_called_THEN_executes_normally():
     mock_volume_2.attrs = {"Name": "volume2"}
     mock_docker_volume_2 = dfc.DockerVolume("/mount/point2", mock_volume_2, host_mount_point="/host/")
     mock_ulimit = mock.Mock()
-    test_env_vars = {"key": "value"}
+    test_env_vars = {"env1": "value"}
+    test_env_passthrough = ["env2"]
     test_extra_hosts = {"tag": "hostname"}
+    test_entrypoint = ["cmd 1", "cmd 2"]
 
     # Run our test
     test_client = dfc.DockerFrameworkClient(docker_client=mock_inner_client)
     test_client.create_container(
-        test_image,
-        test_container_name,
-        mock_network,
-        test_ports,
-        [mock_docker_volume_1, mock_docker_volume_2],
-        [mock_ulimit],
-        test_env_vars,
-        extra_hosts=test_extra_hosts
+        image=test_image,
+        container_name=test_container_name,
+        network=mock_network,
+        ports=test_ports,
+        volumes=[mock_docker_volume_1, mock_docker_volume_2],
+        ulimits=[mock_ulimit],
+        env_kv=test_env_vars,
+        env_passthrough=test_env_passthrough,
+        extra_hosts=test_extra_hosts,
+        entrypoint=test_entrypoint
     )
 
     # Check our results
@@ -213,8 +217,9 @@ def test_WHEN_create_container_called_THEN_executes_normally():
             },
             ulimits=[mock_ulimit],
             detach=True,
-            environment=test_env_vars,
-            extra_hosts=test_extra_hosts
+            environment=["env1=value", "env2"],
+            extra_hosts=test_extra_hosts,
+            entrypoint=test_entrypoint
         )
     ]
     assert expected_calls == mock_inner_client.containers.run.call_args_list

diff --git a/cluster_traffic_capture/README.md b/cluster_traffic_capture/README.md
@@ -95,7 +95,31 @@ rm -rf .venv
 
 Learn more about venv [here](https://docs.python.org/3/library/venv.html).
 
-### Step 2 - Start the demo
+#### Step 2 - Set up AWS credentials
+
+This demo offloads captured from the HAProxy Primary container to AWS CloudWatch using the CloudWatch Agent.  CloudWatch Agent is not the only implementation of offload, but just one approach.
+
+In order to use the Agent, we need to get access to AWS Credentials.  This can be done a numer of ways, including [setting up a dedicated IAM User (not recommended)](https://docs.aws.amazon.com/IAM/latest/UserGuide/id_users_create.html) or [using the AWS CLI STS Commands to assume a Role](https://aws.amazon.com/premiumsupport/knowledge-center/iam-assume-role-cli/).  Whichever way you choose to create AWS Credentials, they should have read/write permissions to CloudWatch Logs.
+
+Export the credentials in your terminal session like so:
+```
+export AWS_ACCESS_KEY_ID=<access key ID> \
+    && export AWS_SECRET_ACCESS_KEY=<secret access key>
+```
+
+If you assumed a Role, you'll have a session token as well which you need to export:
+```
+export AWS_SESSION_TOKEN=<session token>
+```
+
+Finally, export the AWS Region ID you want the captured logs to be exported to:
+```
+export AWS_REGION=us-east-2
+```
+
+The demo script uses these ENV variables to safely construct an AWS credential file inside the HAProxy Primary container.  This file's lifespan is tied to the container's lifespan and should be terminated when the container is.
+
+#### Step 3 - Start the demo
 
 You should now be able to invoke the demo script, like so:
 
@@ -147,7 +171,7 @@ c198216efc27   docker.elastic.co/elasticsearch/elasticsearch-oss:7.10.2   "/tini
 c688ef30cd52   docker.elastic.co/elasticsearch/elasticsearch-oss:7.10.2   "/tini -- /usr/local…"   51 seconds ago   Up 50 seconds   0.0.0.0:9200->9200/tcp, 9300/tcp   primary-cluster-node-1
 ```
 
-### Step 3 - Test the setup with traffic
+#### Step 4 - Test the setup with traffic
 
 Now, you can send traffic to the HAProxy Primary on localhost:80 and see it replicated to the Shadow Cluster.
 
@@ -233,7 +257,7 @@ Feb 10 19:04:20 localhost haproxy[23]: message repeated 4 times: [ Request-URI:
 Feb 10 19:04:22 localhost haproxy[23]: Request-URI: /nyc_taxis/_search#012Request-Method: GET#012Request-Body: -#012Response-Body: -
 ```
 
-### Step 4 - Clean up the demo setup
+#### Step 5 - Clean up the demo setup
 
 As the terminal output suggestion, you can spin down the demo setup and clean up all created resources by hitting RETURN in the original terminal:
 

diff --git a/cluster_traffic_capture/build_docker_images.py b/cluster_traffic_capture/build_docker_images.py
@@ -152,7 +152,7 @@ def main():
 
     # Build the Docker images for the Primary HAProxy
     print("Building HAProxy Docker image for Primary Cluster...")
-    haproxy_image_primary = docker_client.build_image(str(workspace), primary_image, "haproxy-w-mirror")
+    haproxy_image_primary = docker_client.build_image(str(workspace), primary_image, "haproxy-w-mirror-aws")
     print(f"Primary HAProxy image available locally w/ tag: {haproxy_image_primary.tag}")
 
     # Build the Docker images for the Shadow HAProxy

diff --git a/cluster_traffic_capture/demo_haproxy.py b/cluster_traffic_capture/demo_haproxy.py
@@ -47,6 +47,38 @@ def main():
     if verbose:
         logging.root.setLevel(logging.DEBUG)
 
+    # =================================================================================================================
+    # Pull Necessary State
+    # =================================================================================================================
+    print("Pulling AWS credentials from ENV variables...")
+
+    print("Pulling ENV variable: AWS_ACCESS_KEY_ID")
+    aws_access_key_id = os.environ.get("AWS_ACCESS_KEY_ID")
+    if not aws_access_key_id:
+        print("ENV variable 'AWS_ACCESS_KEY_ID' not available; please make sure it is exported.")
+
+    print("Pulling ENV variable: AWS_SECRET_ACCESS_KEY")
+    aws_secret_access_key = os.environ.get("AWS_SECRET_ACCESS_KEY")
+    if not aws_secret_access_key:
+        print("ENV variable 'AWS_SECRET_ACCESS_KEY' not available; please make sure it is exported.")
+
+    print("Pulling ENV variable: AWS_SESSION_TOKEN")
+    aws_session_token = os.environ.get("AWS_SESSION_TOKEN")
+    if not aws_session_token:
+        print("ENV variable 'AWS_SESSION_TOKEN' not available; this will cause problems if using temporary creds")
+
+    if not aws_access_key_id or not aws_secret_access_key:
+        message = ("The AWS_ACCESS_KEY_ID and AWS_SECRET_ACCESS_KEY are required for the demo containers to function"
+                   " properly.  Please ensure they are correct and exported in your current shell evnironment.")
+        raise RuntimeError(message)
+
+    print("Pulling the AWS Region from the ENV variable AWS_REGION...")
+    aws_region = os.environ.get("AWS_REGION")
+    if not aws_region:
+        message = ("The AWS_REGION ENV variable is required for the demo containers to function properly.  Please"
+                   " ensure it is correct and exported in your current shell evnironment.")
+        raise RuntimeError(message)
+
     # =================================================================================================================
     # Setup Clusters
     # =================================================================================================================
@@ -105,6 +137,27 @@ def main():
     # Set Python's working directory to something predictable (this file's directory)
     demo_dir = os.path.dirname(__file__)
     os.chdir(demo_dir)
+
+    # We need to construct an AWS Config file in order to execute the demo.  This is required because the CloudWatch
+    # Agent checks to see whether it is running in ECS or On-Prem when it starts up.  The On-Prem path requires
+    # this file to supply the AWS Creds and AWS Region to post to.  Spoofing the check and getting it to think it's
+    # running in ECS is possible but requires faking the local metadata service.  For our purposes, making and
+    # installing the configuration file is easier to get the demo working on a laptop than faking the metadata service
+    # and can be done in a way to keep the Docker image(s) we're generating generic to On-Prem vs. ECS.
+    #
+    # There are a number of approaches we can take to make this configuration file available inside the container.
+    # Unfortunately, the easy routes either require writing a real file to disk and mounting it, which presents risks,
+    # or embedding it into the Dockerfile, which violates the boundary between the demo and "production" code.
+    # Therefore, we instead do it in a bit of a janky way by passing the credentials into the container using ENV
+    # variables and using an ENTRYPOINT script override to read them in-container and write the AWS Config file during
+    # launch.
+    #
+    # Why aren't we writing the Config to a Python temporary file we mount into the container?  Good question!  Two
+    # problems with them.  First, reading from them is wonky and their behavior is platform-specific.  Second, we still
+    # have to handle cleanup and certain situations will still result in the file not getting cleaned up (SIGKILL).
+    demo_config_dir_host = os.path.join(demo_dir, "docker_config_demo")
+    demo_config_dir_container = "/docker_config_demo"
+    demo_entrypoint = os.path.join(demo_config_dir_container, "demo_entrypoint.sh")
 
     # Build the Docker images for the Primary and Shadow HAProxy containers
     print("Building HAProxy Docker images for Primary and Shadow HAProxy containers...")
@@ -157,9 +210,16 @@ def subshell_print(message: str):
         container_name=primary_image,
         network=haproxy_network_primary,
         ports=[dfc.PortMapping(HAPROXY_INTERNAL_PORT, HAPROXY_PRIMARY_PORT)],
-        volumes=[],
+        volumes=[dfc.DockerVolume(demo_config_dir_container, None, demo_config_dir_host)],
         ulimits=[Ulimit(name='memlock', soft=-1, hard=-1)],
-        extra_hosts={TAG_DOCKER_HOST: "host-gateway"}
+        env_kv={
+            "AWS_REGION": aws_region,
+            "AWS_ACCESS_KEY_ID": aws_access_key_id,
+            "AWS_SECRET_ACCESS_KEY": aws_secret_access_key,
+            "AWS_SESSION_TOKEN": aws_session_token
+        },
+        extra_hosts={TAG_DOCKER_HOST: "host-gateway"},
+        entrypoint=[demo_entrypoint]
     )
 
     # =================================================================================================================