diff --git a/.gitignore b/.gitignore index 5d289476..38986b80 100644 --- a/.gitignore +++ b/.gitignore @@ -25,3 +25,4 @@ tf/ # build artifacts version.go build/ +.idea diff --git a/Makefile b/Makefile index 820666c4..9d8509a3 100644 --- a/Makefile +++ b/Makefile @@ -24,7 +24,7 @@ BUILDDIR ?= $(abspath build) # docker doesn't allow "+" in image tags: https://github.com/docker/distribution/issues/1201 export ROBOTEST_DOCKER_VERSION ?= $(subst +,-,$(VERSION)) export ROBOTEST_DOCKER_TAG ?= -export ROBOTEST_DOCKER_ARGS ?= --pull +export ROBOTEST_DOCKER_ARGS ?= DOCKERFLAGS := --rm=true $(NOROOT) -v $(PWD):$(SRCDIR) -v $(BUILDDIR):$(SRCDIR)/build -w $(SRCDIR) BUILDBOX := robotest:buildbox BUILDBOX_IIDFILE := $(BUILDDIR)/.robotest-buildbox.iid diff --git a/assets/terraform/vsphere/bootstrap/redhat.sh b/assets/terraform/vsphere/bootstrap/redhat.sh new file mode 100644 index 00000000..1f9ffa98 --- /dev/null +++ b/assets/terraform/vsphere/bootstrap/redhat.sh @@ -0,0 +1,26 @@ +#!/bin/bash +# +# Copyright 2020 Gravitational, Inc. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +# +# VM bootstrap script for CentOS/RHEL +# + +sudo sysctl -w net.bridge.bridge-nf-call-iptables=1 +sudo sysctl -w net.bridge.bridge-nf-call-ip6tables=1 +echo 'net.bridge.bridge-nf-call-iptables = 1' | sudo tee -a /etc/sysctl.conf +echo 'net.bridge.bridge-nf-call-ip6tables = 1' | sudo tee -a /etc/sysctl.conf +sudo sysctl -system +echo "Done" diff --git a/assets/terraform/vsphere/config.tf b/assets/terraform/vsphere/config.tf new file mode 100644 index 00000000..93e06e17 --- /dev/null +++ b/assets/terraform/vsphere/config.tf @@ -0,0 +1,87 @@ +variable "vsphere_user" { + description = "vSphere username" + type = string +} + +variable "vsphere_password" { + description = "vSphere password" + type = string +} + +variable "vsphere_server" { + description = "vSphere server" + type = string +} + +variable "datacenter" { + description = "The vSphere datacenter where resources will be created" + type = string +} + +variable "cluster" { + description = "The vSphere cluster where resources will be created" + type = string +} + +variable "datastore" { + description = "The vSphere datastore where VM disks will be stored" + type = string +} + +variable "vm_folder" { + description = "The vSphere folder where VMs will be created" + type = string +} + +variable "network" { + description = "Network for VM NIC." + type = string +} + +variable "template" { + description = "Template to clone VMs from" + type = string +} + +variable "node_tag" { + description = "vSphere-friendly cluster name to use as a prefix for resources." + type = string +} + +variable "vm_type" { + description = "Type of VM to provision" + type = string +} + +variable "os_user" { + description = "SSH user to login onto nodes" + type = string +} + +variable "ssh_pub_key_path" { + description = "Path to the public SSH key." + type = string +} + +variable "nodes" { + description = "Number of nodes to provision" + type = number + default = 1 +} + +provider "vsphere" { + user = var.vsphere_user + password = var.vsphere_password + vsphere_server = var.vsphere_server + + allow_unverified_ssl = true +} + +data "vsphere_datacenter" "dc" { + name = var.datacenter +} + +data "vsphere_compute_cluster" "cluster" { + name = var.cluster + datacenter_id = data.vsphere_datacenter.dc.id +} \ No newline at end of file diff --git a/assets/terraform/vsphere/network.tf b/assets/terraform/vsphere/network.tf new file mode 100644 index 00000000..1a75a480 --- /dev/null +++ b/assets/terraform/vsphere/network.tf @@ -0,0 +1,8 @@ +# +# Network +# + +data "vsphere_network" "network" { + name = var.network + datacenter_id = data.vsphere_datacenter.dc.id +} \ No newline at end of file diff --git a/assets/terraform/vsphere/node.tf b/assets/terraform/vsphere/node.tf new file mode 100644 index 00000000..1a99f2e6 --- /dev/null +++ b/assets/terraform/vsphere/node.tf @@ -0,0 +1,46 @@ +# +# Virtual Machine node +# + +resource "vsphere_virtual_machine" "node" { +#TODO: Move to vars!! + guest_id = "rhel7_64Guest" + count = var.nodes + name = "${var.node_tag}-node-${count.index}" + folder = var.vm_folder + + resource_pool_id = data.vsphere_compute_cluster.cluster.resource_pool_id + + num_cpus = 4 + memory = 8192 + + network_interface { + network_id = data.vsphere_network.network.id + } + + disk { + label = "disk0" + size = 64 + thin_provisioned = false #TODO(ag): Update the template to true and change here + } + + clone { + template_uuid = data.vsphere_virtual_machine.template.id + + } + + extra_config = { + "guestinfo.ssh_user" = var.os_user + "guestinfo.ssh_public_key_data" = file(var.ssh_pub_key_path) + } +} + +data "vsphere_datastore" "datastore" { + name = var.datastore + datacenter_id = data.vsphere_datacenter.dc.id +} + +data "vsphere_virtual_machine" "template" { + name = var.template + datacenter_id = data.vsphere_datacenter.dc.id +} \ No newline at end of file diff --git a/assets/terraform/vsphere/os.tf b/assets/terraform/vsphere/os.tf new file mode 100644 index 00000000..a6d13da5 --- /dev/null +++ b/assets/terraform/vsphere/os.tf @@ -0,0 +1,23 @@ +# +# OS configuration +# + +variable "oss" { + description = "Map of supported Linux distributions" + type = "map" + + default = { + "redhat:7.9" = "toTemplate1" + "redhat7.9" = "toTemplate1" + } +} + +#variable "os" { +# description = "Map of supported Linux distributions" +# type = "map" +# +# default = { +# "redhat:7.9" = "toTemplate1" +# "redhat7.9" = "toTemplate1" +# } +#} \ No newline at end of file diff --git a/assets/terraform/vsphere/output.tf b/assets/terraform/vsphere/output.tf new file mode 100644 index 00000000..2ccd6e51 --- /dev/null +++ b/assets/terraform/vsphere/output.tf @@ -0,0 +1,11 @@ +# +# Output Variables +# + +output "private_ips" { + value = vsphere_virtual_machine.node.*.default_ip_address +} + +output "public_ips" { + value = vsphere_virtual_machine.node.*.default_ip_address +} \ No newline at end of file diff --git a/assets/terraform/vsphere/versions.tf b/assets/terraform/vsphere/versions.tf new file mode 100644 index 00000000..ee0f9705 --- /dev/null +++ b/assets/terraform/vsphere/versions.tf @@ -0,0 +1,3 @@ +terraform { + required_version = ">= 0.12" +} \ No newline at end of file diff --git a/docker/Makefile b/docker/Makefile index 20b3404c..9b638645 100644 --- a/docker/Makefile +++ b/docker/Makefile @@ -17,7 +17,7 @@ DOCKER_REPO := quay.io/gravitational ROBOTEST_DOCKER_VERSION ?= # An empty tag will be ignored ROBOTEST_DOCKER_TAG ?= -ROBOTEST_DOCKER_ARGS ?= --pull +ROBOTEST_DOCKER_ARGS ?= GRAVITY_VERSION := 5.5.50 TERRAFORM_VERSION := 0.12.9 @@ -27,9 +27,10 @@ TERRAFORM_PROVIDER_AZURERM_VERSION := 1.5.0 TERRAFORM_PROVIDER_GOOGLE_VERSION := 2.15.0 TERRAFORM_PROVIDER_RANDOM_VERSION := 2.2.0 TERRAFORM_PROVIDER_TEMPLATE_VERSION := 2.1.2 +TERRAFORM_PROVIDER_VSPHERE_VERSION := 1.14.0 export -providers := AZURERM AWS GOOGLE RANDOM TEMPLATE +providers := AZURERM AWS GOOGLE RANDOM TEMPLATE VSPHERE provider_args := $(foreach provider,$(providers),--build-arg TERRAFORM_PROVIDER_$(provider)_VERSION=$$TERRAFORM_PROVIDER_$(provider)_VERSION) BUILD_ARGS := \ diff --git a/docker/build/Dockerfile b/docker/build/Dockerfile index 77971f16..49bb757e 100644 --- a/docker/build/Dockerfile +++ b/docker/build/Dockerfile @@ -11,7 +11,7 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -FROM quay.io/gravitational/debian-venti:go1.16.6-buster +FROM quay.io/gravitational/debian-venti:go1.17.5-stretch ARG UID ARG GID diff --git a/docker/e2e/Dockerfile b/docker/e2e/Dockerfile index 1c7f7554..baffa17a 100644 --- a/docker/e2e/Dockerfile +++ b/docker/e2e/Dockerfile @@ -11,7 +11,7 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -FROM quay.io/gravitational/debian-grande:stretch +FROM quay.io/gravitational/debian-grande:buster ARG TERRAFORM_VERSION ARG CHROMEDRIVER_VERSION @@ -59,6 +59,10 @@ RUN mkdir -p /robotest WORKDIR /robotest COPY entrypoint.sh /entrypoint.sh COPY build/robotest-e2e /usr/bin/robotest-e2e +COPY polaris.pub /robotest/polaris.pub +COPY polaris.pem /robotest/polaris.pem +RUN chmod 0644 /robotest/polaris.pem +#TODO: Change the user on the template and the SSH keys. polaris is not meant for this RUN chmod +x /usr/bin/robotest-e2e && \ chmod +x /entrypoint.sh diff --git a/docker/suite/Dockerfile b/docker/suite/Dockerfile index 091067bb..cf06f106 100644 --- a/docker/suite/Dockerfile +++ b/docker/suite/Dockerfile @@ -11,7 +11,7 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -FROM quay.io/gravitational/debian-grande:stretch +FROM quay.io/gravitational/debian-grande:buster RUN apt-get update && \ apt-get install -y curl unzip gnupg2 dirmngr @@ -23,6 +23,7 @@ ARG TERRAFORM_PROVIDER_AWS_VERSION ARG TERRAFORM_PROVIDER_GOOGLE_VERSION ARG TERRAFORM_PROVIDER_TEMPLATE_VERSION ARG TERRAFORM_PROVIDER_RANDOM_VERSION +ARG TERRAFORM_PROVIDER_VSPHERE_VERSION ENV TF_TARBALL https://releases.hashicorp.com/terraform/${TERRAFORM_VERSION}/terraform_${TERRAFORM_VERSION}_linux_amd64.zip ENV TF_PLUGINS \ @@ -33,7 +34,10 @@ ENV TF_PLUGINS \ # Google Compute Engine https://releases.hashicorp.com/terraform-provider-google/${TERRAFORM_PROVIDER_GOOGLE_VERSION}/terraform-provider-google_${TERRAFORM_PROVIDER_GOOGLE_VERSION}_linux_amd64.zip \ https://releases.hashicorp.com/terraform-provider-template/${TERRAFORM_PROVIDER_TEMPLATE_VERSION}/terraform-provider-template_${TERRAFORM_PROVIDER_TEMPLATE_VERSION}_linux_amd64.zip \ - https://releases.hashicorp.com/terraform-provider-random/${TERRAFORM_PROVIDER_RANDOM_VERSION}/terraform-provider-random_${TERRAFORM_PROVIDER_RANDOM_VERSION}_linux_amd64.zip + https://releases.hashicorp.com/terraform-provider-random/${TERRAFORM_PROVIDER_RANDOM_VERSION}/terraform-provider-random_${TERRAFORM_PROVIDER_RANDOM_VERSION}_linux_amd64.zip \ + # vSphere + https://releases.hashicorp.com/terraform-provider-vsphere/${TERRAFORM_PROVIDER_VSPHERE_VERSION}/terraform-provider-vsphere_1.14.0_linux_amd64.zip + RUN curl ${TF_TARBALL} -o terraform.zip && \ unzip terraform.zip -d /usr/bin && \ @@ -59,5 +63,9 @@ WORKDIR /robotest COPY build/robotest-suite /usr/bin/robotest-suite COPY terraform /robotest/terraform COPY run_suite.sh /usr/bin/run_suite.sh +COPY polaris.pub /robotest/polaris.pub +COPY polaris.pem /robotest/polaris.pem +RUN chmod 0660 /robotest/polaris.pem +#TODO: Change the user on the template and the SSH keys. polaris is not meant for this RUN chmod +x /usr/bin/robotest-suite diff --git a/infra/gravity/cluster_install.go b/infra/gravity/cluster_install.go index b925a3ce..7c861d1d 100644 --- a/infra/gravity/cluster_install.go +++ b/infra/gravity/cluster_install.go @@ -87,7 +87,21 @@ func (c *TestContext) OfflineInstall(nodes []Gravity, param InstallParam) error defer cancel() param.CloudProvider = c.provisionerCfg.CloudProvider - master := nodes[0].(*gravity) + + // Before the typecast, log the details of `nodes` + c.Logger().Infof("Nodes slice: %v", nodes) + for i, node := range nodes { + c.Logger().Infof("Node[%d]: type=%T, value=%v", i, node, node) + } + + // Attempt the typecast + master, ok := nodes[0].(*gravity) + if !ok { + c.Logger().Fatalf("Failed to typecast nodes[0] to *gravity. Actual type: %T", nodes[0]) + } + + // If the typecast is successful, proceed with the rest of your code + c.Logger().Infof("Typecast successful. master: %v", master) if param.Token == "" { param.Token = "ROBOTEST" } diff --git a/infra/gravity/config.go b/infra/gravity/config.go index e1553890..47f485aa 100644 --- a/infra/gravity/config.go +++ b/infra/gravity/config.go @@ -30,12 +30,13 @@ import ( "github.com/gravitational/robotest/infra/providers/azure" "github.com/gravitational/robotest/infra/providers/gce" "github.com/gravitational/robotest/infra/providers/ops" + "github.com/gravitational/robotest/infra/providers/vsphere" "github.com/gravitational/robotest/lib/constants" "github.com/gravitational/trace" "github.com/stretchr/testify/require" - "gopkg.in/go-playground/validator.v9" - "gopkg.in/yaml.v2" + validator "gopkg.in/go-playground/validator.v9" + yaml "gopkg.in/yaml.v2" ) // OS represents OS vendor/version @@ -46,11 +47,14 @@ type OS struct { // UnmarshalText interprets b as an OS vendor with a version. // I.e. given: // -// "vendor:version", it populates this OS instance accordingly +// "vendor:version", it populates this OS instance accordingly func (os *OS) UnmarshalText(b []byte) error { split := bytes.Split(b, []byte(":")) if len(split) != 2 { - return trace.BadParameter("OS should be in format vendor:version, got %q", b) + return trace.BadParameter( + "OS should be in format vendor:version, got %q", + b, + ) } os.Vendor = string(split[0]) os.Version = string(split[1]) @@ -85,7 +89,7 @@ func (drv StorageDriver) Driver() string { // CloudProvider, AWS, Azure, ScriptPath and InstallerURL type ProvisionerConfig struct { // DeployTo defines cloud to deploy to - CloudProvider string `yaml:"cloud" validate:"required,eq=aws|eq=azure|eq=gce|eq=ops"` + CloudProvider string `yaml:"cloud" validate:"required,eq=aws|eq=azure|eq=gce|eq=ops|eq=vsphere"` // AWS defines AWS connection parameters AWS *aws.Config `yaml:"aws"` // Azure defines Azure connection parameters @@ -94,6 +98,8 @@ type ProvisionerConfig struct { GCE *gce.Config `yaml:"gce"` // Ops defines Ops Center connection parameters Ops *ops.Config `yaml:"ops"` + // Vsphere defines Vcenter connection parameters + Vsphere *vsphere.Config `yaml:"vsphere"` // ScriptPath is the path to the terraform script or directory for provisioning ScriptPath string `yaml:"script_path" validate:"required"` @@ -125,6 +131,7 @@ type ProvisionerConfig struct { // LoadConfig loads essential parameters from YAML func LoadConfig(t *testing.T, configBytes []byte) (cfg ProvisionerConfig) { + fmt.Println(string(configBytes)) err := yaml.Unmarshal(configBytes, &cfg) require.NoError(t, err, string(configBytes)) @@ -139,6 +146,8 @@ func LoadConfig(t *testing.T, configBytes []byte) (cfg ProvisionerConfig) { case constants.GCE: require.NotNil(t, cfg.GCE) cfg.cloudRegions = newCloudRegions(strings.Split(cfg.GCE.Region, ",")) + case constants.Vsphere: + require.NotNil(t, cfg.Vsphere) case constants.Ops: require.NotNil(t, cfg.Ops) // set AWS environment variables to be used by subsequent commands @@ -191,7 +200,10 @@ func (config ProvisionerConfig) WithOS(os OS) ProvisionerConfig { cfg := config cfg.os = os cfg.tag = fmt.Sprintf("%s-%s%s", cfg.tag, os.Vendor, os.Version) - cfg.StateDir = filepath.Join(cfg.StateDir, fmt.Sprintf("%s%s", os.Vendor, os.Version)) + cfg.StateDir = filepath.Join( + cfg.StateDir, + fmt.Sprintf("%s%s", os.Vendor, os.Version), + ) return cfg } @@ -214,7 +226,7 @@ func (config ProvisionerConfig) WithStorageDriver(storageDriver StorageDriver) P // validateConfig checks that key parameters are present func validateConfig(config ProvisionerConfig) error { switch config.CloudProvider { - case constants.AWS, constants.Azure, constants.GCE, constants.Ops: + case constants.AWS, constants.Azure, constants.GCE, constants.Ops, constants.Vsphere: default: return trace.BadParameter("unknown cloud provider %s", config.CloudProvider) } @@ -227,9 +239,13 @@ func validateConfig(config ProvisionerConfig) error { var errs []error if validationErrors, ok := err.(validator.ValidationErrors); ok { for _, fieldError := range validationErrors { - errs = append(errs, - trace.BadParameter(` * %s="%v" fails "%s"`, - fieldError.Field(), fieldError.Value(), fieldError.Tag())) + errs = append( + errs, + trace.BadParameter( + ` * %s="%v" fails "%s"`, + fieldError.Field(), fieldError.Value(), fieldError.Tag(), + ), + ) } } return trace.NewAggregate(errs...) diff --git a/infra/gravity/node_commands.go b/infra/gravity/node_commands.go index 31412661..e748cbce 100644 --- a/infra/gravity/node_commands.go +++ b/infra/gravity/node_commands.go @@ -251,11 +251,12 @@ var installCmdTemplate = template.Must( --cloud-provider=generic --state-dir={{.StateDir}} \ --httpprofile=localhost:6061 \ {{if .Cluster}}--cluster={{.Cluster}}{{end}} \ - {{if .OpsAdvertiseAddr}}--ops-advertise-addr={{.OpsAdvertiseAddr}}{{end}}\ {{if .ServiceUID}}--service-uid={{.ServiceUID}}{{end}}\ {{if .ServiceGID}}--service-gid={{.ServiceGID}}{{end}}\ `)) +// Removed the above: {{if .OpsAdvertiseAddr}}--ops-advertise-addr={{.OpsAdvertiseAddr}}{{end}}\ + // Status queries cluster status func (g *gravity) Status(ctx context.Context) (*GravityStatus, error) { cmd := fmt.Sprintf("sudo gravity status --output=json --system-log-file=%v", diff --git a/infra/gravity/provision.go b/infra/gravity/provision.go index 1a7cdbce..2f462486 100644 --- a/infra/gravity/provision.go +++ b/infra/gravity/provision.go @@ -40,7 +40,7 @@ import ( "github.com/aws/aws-sdk-go/aws/credentials" "github.com/aws/aws-sdk-go/aws/session" "github.com/aws/aws-sdk-go/service/ec2" - "github.com/dustin/go-humanize" + humanize "github.com/dustin/go-humanize" "github.com/gravitational/trace" uuid "github.com/satori/go.uuid" "github.com/sirupsen/logrus" @@ -55,7 +55,12 @@ type cloudDynamicParams struct { env map[string]string } -func configureVMs(baseCtx context.Context, log logrus.FieldLogger, params cloudDynamicParams, nodes []*gravity) error { +func configureVMs( + baseCtx context.Context, + log logrus.FieldLogger, + params cloudDynamicParams, + nodes []*gravity, +) error { errChan := make(chan error, len(nodes)) ctx, cancel := context.WithCancel(baseCtx) @@ -77,7 +82,10 @@ func configureVMs(baseCtx context.Context, log logrus.FieldLogger, params cloudD } // Provision will attempt to provision the requested cluster -func (c *TestContext) Provision(cfg ProvisionerConfig) (cluster Cluster, err error) { +func (c *TestContext) Provision(cfg ProvisionerConfig) ( + cluster Cluster, + err error, +) { // store the configuration used for provisioning c.provisionerCfg = cfg @@ -90,6 +98,11 @@ func (c *TestContext) Provision(cfg ProvisionerConfig) (cluster Cluster, err err } case constants.Ops: cluster, err = c.provisionOps(cfg) + case constants.Vsphere: + cluster, _, err = c.provisionCloud(cfg) + if err != nil { + return Cluster{}, err + } default: err = trace.BadParameter("unkown cloud provider: %q", cfg.CloudProvider) } @@ -109,20 +122,36 @@ func (c *TestContext) Provision(cfg ProvisionerConfig) (cluster Cluster, err err } // provisionOps utilizes an ops center installation flow to complete cluster installation -func (c *TestContext) provisionOps(cfg ProvisionerConfig) (cluster Cluster, err error) { +func (c *TestContext) provisionOps(cfg ProvisionerConfig) ( + cluster Cluster, + err error, +) { c.Logger().WithField("config", cfg).Debug("Provisioning via Ops Center") // verify connection before starting provisioning c.Logger().Debug("attempting to connect to AWS api") - sess, err := session.NewSession(&aws.Config{ - Region: aws.String(cfg.Ops.EC2Region), - Credentials: credentials.NewStaticCredentials(cfg.Ops.EC2AccessKey, cfg.Ops.EC2SecretKey, ""), - }) + sess, err := session.NewSession( + &aws.Config{ + Region: aws.String(cfg.Ops.EC2Region), + Credentials: credentials.NewStaticCredentials( + cfg.Ops.EC2AccessKey, + cfg.Ops.EC2SecretKey, + "", + ), + }, + ) if err != nil { return cluster, trace.Wrap(err) } c.Logger().Debug("logging into the ops center") - out, err := exec.Command("tele", "login", "-o", cfg.Ops.URL, "--key", cfg.Ops.OpsKey).CombinedOutput() + out, err := exec.Command( + "tele", + "login", + "-o", + cfg.Ops.URL, + "--key", + cfg.Ops.OpsKey, + ).CombinedOutput() if err != nil { return cluster, trace.WrapWithMessage(err, string(out)) } @@ -189,7 +218,10 @@ Loop: // the cluster install completed, we can continue the install process break Loop default: - return cluster, trace.BadParameter("unexpected cluster status: %v", status) + return cluster, trace.BadParameter( + "unexpected cluster status: %v", + status, + ) } } } @@ -197,7 +229,11 @@ Loop: // now that the requested cluster has been created, we have to build the []Gravity slice of nodes c.Logger().Debug("Attempting to get a listing of instances from AWS for the cluster.") ec2svc := ec2.New(sess) - gravityNodes, err := c.getAWSNodes(ec2svc, "tag:KubernetesCluster", clusterName) + gravityNodes, err := c.getAWSNodes( + ec2svc, + "tag:KubernetesCluster", + clusterName, + ) if err != nil { return cluster, trace.Wrap(err) } @@ -208,7 +244,12 @@ Loop: return cluster, trace.Wrap(err) } - err = validateDiskSpeed(c.Context(), gravityNodes, cfg.dockerDevice, c.Logger()) + err = validateDiskSpeed( + c.Context(), + gravityNodes, + cfg.dockerDevice, + c.Logger(), + ) if err != nil { return cluster, trace.Wrap(err) } @@ -219,8 +260,12 @@ Loop: } // provisionCloud gets VMs up, running and ready to use -func (c *TestContext) provisionCloud(cfg ProvisionerConfig) (cluster Cluster, config *terraform.Config, err error) { - log := c.Logger().WithField("config", cfg) +func (c *TestContext) provisionCloud(cfg ProvisionerConfig) ( + cluster Cluster, + config *terraform.Config, + err error, +) { + log := c.Logger().WithField("vsphere-config", cfg.Vsphere) log.Debug("Provisioning VMs.") err = validateConfig(cfg) @@ -248,16 +293,22 @@ func (c *TestContext) provisionCloud(cfg ProvisionerConfig) (cluster Cluster, co gravityNodes, err := connectVMs(ctx, c.Logger(), infra.params, infra.nodes) if err != nil { log.WithError(err).Error("Some nodes failed to connect, tear down as unusable.") - return cluster, nil, trace.NewAggregate(err, destroyResource(infra.destroyFn)) + return cluster, nil, trace.NewAggregate( + err, + destroyResource(infra.destroyFn), + ) } - // Start streaming logs as soon as connected - c.streamLogs(gravityNodes) + //// Start streaming logs as soon as connected + //c.streamLogs(gravityNodes) log.Debug("Configuring VMs.") err = configureVMs(ctx, c.Logger(), infra.params, gravityNodes) if err != nil { log.WithError(err).Error("Some nodes failed to initialize, tear down as non-usable.") - return cluster, nil, trace.NewAggregate(err, destroyResource(infra.destroyFn)) + return cluster, nil, trace.NewAggregate( + err, + destroyResource(infra.destroyFn), + ) } err = c.postProvision(gravityNodes) @@ -267,7 +318,12 @@ func (c *TestContext) provisionCloud(cfg ProvisionerConfig) (cluster Cluster, co } if cfg.CloudProvider == constants.Azure { - err = validateDiskSpeed(c.Context(), gravityNodes, cfg.dockerDevice, c.Logger()) + err = validateDiskSpeed( + c.Context(), + gravityNodes, + cfg.dockerDevice, + c.Logger(), + ) if err != nil { return cluster, nil, trace.Wrap(err) } @@ -282,33 +338,33 @@ func (c *TestContext) provisionCloud(cfg ProvisionerConfig) (cluster Cluster, co return cluster, &infra.params.terraform, nil } -func (c *TestContext) streamLogs(gravityNodes []*gravity) { - c.Logger().Debug("Streaming logs.") - for _, node := range gravityNodes { - go func(node *gravity) { - err := node.streamStartupLogs(c.monitorCtx) - if err != nil && !utils.IsContextCancelledError(err) { - c.Logger().Warnf("Failed to stream startup script logs: %v.", err) - } - }(node) - go func(node *gravity) { - if err := node.streamLogs(c.monitorCtx); err != nil { - switch { - case sshutil.IsExitMissingError(err): - if c.Context().Err() != nil { - // This test has already been cancelled / has timed out - return - } - c.markPreempted(node) - case utils.IsContextCancelledError(err): - // Ignore - default: - c.Logger().Warnf("Failed to stream logs: %v.", err) - } - } - }(node) - } -} +//func (c *TestContext) streamLogs(gravityNodes []*gravity) { +// c.Logger().Debug("Streaming logs.") +// for _, node := range gravityNodes { +// go func(node *gravity) { +// err := node.streamStartupLogs(c.monitorCtx) +// if err != nil && !utils.IsContextCancelledError(err) { +// c.Logger().Warnf("Failed to stream startup script logs: %v.", err) +// } +// }(node) +// go func(node *gravity) { +// if err := node.streamLogs(c.monitorCtx); err != nil { +// switch { +// case sshutil.IsExitMissingError(err): +// if c.Context().Err() != nil { +// // This test has already been cancelled / has timed out +// return +// } +// c.markPreempted(node) +// case utils.IsContextCancelledError(err): +// // Ignore +// default: +// c.Logger().Warnf("Failed to stream logs: %v.", err) +// } +// } +// }(node) +// } +//} // postProvision runs common tasks for both ops and cloud provisioners once the VMs have been setup and are running func (c *TestContext) postProvision(gravityNodes []*gravity) error { @@ -318,7 +374,10 @@ func (c *TestContext) postProvision(gravityNodes []*gravity) error { c.Logger().Debug("synchronizing clocks") var timeNodes []sshutil.SshNode for _, node := range gravityNodes { - timeNodes = append(timeNodes, sshutil.SshNode{Client: node.Client(), Log: node.Logger()}) + timeNodes = append( + timeNodes, + sshutil.SshNode{Client: node.Client(), Log: node.Logger()}, + ) } if err := sshutil.WaitTimeSync(ctx, timeNodes); err != nil { return trace.Wrap(err) @@ -336,14 +395,26 @@ const ( ) // bootstrapAzure workarounds some issues with Azure platform init -func bootstrapAzure(ctx context.Context, g *gravity, param cloudDynamicParams) (err error) { - err = sshutil.WaitForFile(ctx, g.Client(), g.Logger(), - waagentProvisionFile, sshutil.TestRegularFile) +func bootstrapAzure( + ctx context.Context, + g *gravity, + param cloudDynamicParams, +) (err error) { + err = sshutil.WaitForFile( + ctx, g.Client(), g.Logger(), + waagentProvisionFile, sshutil.TestRegularFile, + ) if err != nil { return trace.Wrap(err) } - err = sshutil.TestFile(ctx, g.Client(), g.Logger(), cloudInitCompleteFile, sshutil.TestRegularFile) + err = sshutil.TestFile( + ctx, + g.Client(), + g.Logger(), + cloudInitCompleteFile, + sshutil.TestRegularFile, + ) if err == nil { g.Logger().Debug("node already bootstrapped") return nil @@ -352,56 +423,99 @@ func bootstrapAzure(ctx context.Context, g *gravity, param cloudDynamicParams) ( return trace.Wrap(err) } - err = sshutil.TestFile(ctx, g.Client(), g.Logger(), cloudInitSupportedFile, sshutil.TestRegularFile) + err = sshutil.TestFile( + ctx, + g.Client(), + g.Logger(), + cloudInitSupportedFile, + sshutil.TestRegularFile, + ) if err == nil { g.Logger().Debug("cloud-init underway") - return sshutil.WaitForFile(ctx, g.Client(), g.Logger(), cloudInitCompleteFile, sshutil.TestRegularFile) + return sshutil.WaitForFile( + ctx, + g.Client(), + g.Logger(), + cloudInitCompleteFile, + sshutil.TestRegularFile, + ) } if !trace.IsNotFound(err) { return trace.Wrap(err) } // apparently cloud-init scripts are not supported for given OS - err = sshutil.RunScript(ctx, g.Client(), g.Logger(), - filepath.Join(param.ScriptPath, "bootstrap", fmt.Sprintf("%s.sh", param.os.Vendor)), - sshutil.SUDO) + err = sshutil.RunScript( + ctx, g.Client(), g.Logger(), + filepath.Join( + param.ScriptPath, + "bootstrap", + fmt.Sprintf("%s.sh", param.os.Vendor), + ), + sshutil.SUDO, + ) return trace.Wrap(err) } // bootstrapCloud is a simple workflow to wait for cloud-init to complete -func bootstrapCloud(ctx context.Context, g *gravity, param cloudDynamicParams) (err error) { +func bootstrapCloud( + ctx context.Context, + g *gravity, + param cloudDynamicParams, +) (err error) { return trace.Wrap(g.waitForBootstrapScript(ctx)) } func (g *gravity) waitForBootstrapScript(ctx context.Context) error { - cmd := fmt.Sprintf(`sudo bash -c "[[ -f %v ]] && exit 2 || [[ -f %v ]] && exit 0 || exit 1"`, - cloudInitFailedFile, cloudInitCompleteFile) - err := wait.Retry(ctx, func() error { - err := sshutil.RunAndParse(ctx, g.Client(), g.Logger(), cmd, nil, sshutil.ParseDiscard) - if err == nil { - return nil - } - if exitError, ok := trace.Unwrap(err).(sshutil.ExitStatusError); ok { - /* - 0 bootstrap successful file found - 2 bootstrap failed file found - 1 no bootstrap file found - */ - switch exitError.ExitStatus() { - case 0: + cmd := fmt.Sprintf( + `sudo bash -c "[[ -f %v ]] && exit 2 || [[ -f %v ]] && exit 0 || exit 1"`, + cloudInitFailedFile, cloudInitCompleteFile, + ) + err := wait.Retry( + ctx, func() error { + err := sshutil.RunAndParse( + ctx, + g.Client(), + g.Logger(), + cmd, + nil, + sshutil.ParseDiscard, + ) + if err == nil { return nil - case 2: - return wait.Abort(trace.Errorf("bootstrap script failed")) - case 1: - return wait.Continue("bootstrap status file not found") } - } - return wait.Abort(trace.Wrap(err, "waiting for bootstrap script to complete")) - }) + if exitError, ok := trace.Unwrap(err).(sshutil.ExitStatusError); ok { + /* + 0 bootstrap successful file found + 2 bootstrap failed file found + 1 no bootstrap file found + */ + switch exitError.ExitStatus() { + case 0: + return nil + case 2: + return wait.Abort(trace.Errorf("bootstrap script failed")) + case 1: + return wait.Continue("bootstrap status file not found") + } + } + return wait.Abort( + trace.Wrap( + err, + "waiting for bootstrap script to complete", + ), + ) + }, + ) return trace.Wrap(err) } -func connectVMs(ctx context.Context, log logrus.FieldLogger, params cloudDynamicParams, nodes []infra.Node) (out []*gravity, err error) { +func connectVMs( + ctx context.Context, + log logrus.FieldLogger, + params cloudDynamicParams, + nodes []infra.Node, +) (out []*gravity, err error) { errC := make(chan error, len(nodes)) nodeC := make(chan interface{}, len(nodes)) @@ -425,22 +539,31 @@ func connectVMs(ctx context.Context, log logrus.FieldLogger, params cloudDynamic out = append(out, node.(*gravity)) } - sort.Slice(out, func(i, j int) bool { - return out[i].Node().PrivateAddr() < out[j].Node().PrivateAddr() - }) + sort.Slice( + out, func(i, j int) bool { + return out[i].Node().PrivateAddr() < out[j].Node().PrivateAddr() + }, + ) return out, nil } -func connectVM(ctx context.Context, log logrus.FieldLogger, node infra.Node, param cloudDynamicParams) (*gravity, error) { +func connectVM( + ctx context.Context, + log logrus.FieldLogger, + node infra.Node, + param cloudDynamicParams, +) (*gravity, error) { g := &gravity{ node: node, param: param, ts: time.Now(), - log: log.WithFields(logrus.Fields{ - "ip": node.PrivateAddr(), - "public_ip": node.Addr(), - }), + log: log.WithFields( + logrus.Fields{ + "ip": node.PrivateAddr(), + "public_ip": node.Addr(), + }, + ), } client, err := sshClient(ctx, g.node, g.log) @@ -457,7 +580,12 @@ func connectVM(ctx context.Context, log logrus.FieldLogger, node infra.Node, par // 2. (TODO) run bootstrap scripts - as Azure doesn't support them for RHEL/CentOS, will migrate here // 2. - i.e. run bootstrap commands, load installer, etc. // TODO: migrate bootstrap scripts here as well; -func configureVM(ctx context.Context, log logrus.FieldLogger, node *gravity, param cloudDynamicParams) (err error) { +func configureVM( + ctx context.Context, + log logrus.FieldLogger, + node *gravity, + param cloudDynamicParams, +) (err error) { switch param.CloudProvider { case constants.AWS: err = bootstrapCloud(ctx, node, param) @@ -465,10 +593,13 @@ func configureVM(ctx context.Context, log logrus.FieldLogger, node *gravity, par err = bootstrapAzure(ctx, node, param) case constants.GCE: err = bootstrapCloud(ctx, node, param) - case constants.Ops: - // For ops installs the installer is not needed + case constants.Ops, constants.Vsphere: + // For ops and vsphere installs the installer is not needed default: - return trace.BadParameter("unsupported cloud provider %s", param.CloudProvider) + return trace.BadParameter( + "unsupported cloud provider %s", + param.CloudProvider, + ) } if err != nil { return trace.Wrap(err) @@ -477,13 +608,21 @@ func configureVM(ctx context.Context, log logrus.FieldLogger, node *gravity, par return nil } -func validateDiskSpeed(ctx context.Context, nodes []*gravity, device string, logger logrus.FieldLogger) error { +func validateDiskSpeed( + ctx context.Context, + nodes []*gravity, + device string, + logger logrus.FieldLogger, +) error { logger.Debug("Ensuring disk speed is adequate across nodes.") ctx, cancel := context.WithTimeout(ctx, diskWaitTimeout) defer cancel() err := waitDisks(ctx, nodes, []string{"/iotest", device}, logger) if err != nil { - err = trace.Wrap(err, "VM disks did not meet performance requirements, tear down as non-usable") + err = trace.Wrap( + err, + "VM disks did not meet performance requirements, tear down as non-usable", + ) logger.WithError(err).Error("VM disks did not meet performance requirements, tear down as non-usable.") return trace.Wrap(err) } @@ -492,7 +631,12 @@ func validateDiskSpeed(ctx context.Context, nodes []*gravity, device string, log // waitDisks is a necessary workaround for Azure VMs to wait until their disk initialization processes are complete // otherwise it'll fail telekube pre-install checks -func waitDisks(ctx context.Context, nodes []*gravity, paths []string, logger logrus.FieldLogger) error { +func waitDisks( + ctx context.Context, + nodes []*gravity, + paths []string, + logger logrus.FieldLogger, +) error { errs := make(chan error, len(nodes)) for _, node := range nodes { @@ -505,36 +649,53 @@ func waitDisks(ctx context.Context, nodes []*gravity, paths []string, logger log } // waitDisk will wait specific disk performance to report OK -func waitDisk(ctx context.Context, node *gravity, paths []string, minSpeed uint64, logger logrus.FieldLogger) error { - err := wait.Retry(ctx, func() error { - for _, path := range paths { - if !strings.HasPrefix(path, "/dev") { - defer func() { - errRemove := sshutil.Run(ctx, node.Client(), node.Logger(), - fmt.Sprintf("sudo /bin/rm -f %s", path), nil) - if errRemove != nil { - logger.Warnf("Failed to remove path: %v.", errRemove) - } - }() - } - var out string - err := sshutil.RunAndParse(ctx, node.Client(), node.Logger(), - fmt.Sprintf("sudo dd if=/dev/zero of=%s bs=100K count=1024 conv=fdatasync 2>&1", path), - nil, sshutil.ParseAsString(&out)) - if err != nil { - return wait.Abort(trace.Wrap(err)) - } - speed, err := ParseDDOutput(out) - if err != nil { - return wait.Abort(trace.Wrap(err)) - } - if speed < minSpeed { - return wait.Continue("%s has %v/s < minimum of %v/s", - path, humanize.Bytes(speed), humanize.Bytes(minSpeed)) +func waitDisk( + ctx context.Context, + node *gravity, + paths []string, + minSpeed uint64, + logger logrus.FieldLogger, +) error { + err := wait.Retry( + ctx, func() error { + for _, path := range paths { + if !strings.HasPrefix(path, "/dev") { + defer func() { + errRemove := sshutil.Run( + ctx, node.Client(), node.Logger(), + fmt.Sprintf("sudo /bin/rm -f %s", path), nil, + ) + if errRemove != nil { + logger.Warnf("Failed to remove path: %v.", errRemove) + } + }() + } + var out string + err := sshutil.RunAndParse( + ctx, node.Client(), node.Logger(), + fmt.Sprintf( + "sudo dd if=/dev/zero of=%s bs=100K count=1024 conv=fdatasync 2>&1", + path, + ), + nil, sshutil.ParseAsString(&out), + ) + if err != nil { + return wait.Abort(trace.Wrap(err)) + } + speed, err := ParseDDOutput(out) + if err != nil { + return wait.Abort(trace.Wrap(err)) + } + if speed < minSpeed { + return wait.Continue( + "%s has %v/s < minimum of %v/s", + path, humanize.Bytes(speed), humanize.Bytes(minSpeed), + ) + } } - } - return nil - }) + return nil + }, + ) return trace.Wrap(err) } diff --git a/infra/gravity/terraform.go b/infra/gravity/terraform.go index a6760a6c..68a3cb17 100644 --- a/infra/gravity/terraform.go +++ b/infra/gravity/terraform.go @@ -202,6 +202,12 @@ func makeDynamicParams(baseConfig ProvisionerConfig) (*cloudDynamicParams, error "redhat": "redhat", "centos": "centos", }, + constants.Vsphere: { + "ubuntu": "ubuntu", + "debian": "admin", + "redhat": "polaris", + "centos": "centos", + }, constants.Ops: { "centos": "centos", }, @@ -254,6 +260,10 @@ func makeDynamicParams(baseConfig ProvisionerConfig) (*cloudDynamicParams, error param.terraform.GCE.Region = baseConfig.cloudRegions.Next() param.terraform.GCE.NodeTag = gce.TranslateClusterName(baseConfig.tag) param.terraform.VarFilePath = baseConfig.GCE.VarFilePath + case baseConfig.Vsphere != nil: + config := *baseConfig.Vsphere + param.terraform.Vsphere = &config + param.terraform.Vsphere.SSHUser = param.user } return ¶m, nil diff --git a/infra/providers/azure/azure.go b/infra/providers/azure/azure.go index de0d96d0..e3a9bbb0 100644 --- a/infra/providers/azure/azure.go +++ b/infra/providers/azure/azure.go @@ -37,8 +37,8 @@ type Token struct { } const ( - tokenURL = "https://login.microsoftonline.com/%s/oauth2/token" - managementURL = "https://management.azure.com/subscriptions/%s/resourcegroups/%s?api-version=2016-09-01" + tokenURL = "https://login.microsoftonline.us/%s/oauth2/token" + managementURL = "https://management.usgovcloudapi.net/subscriptions/%s/resourcegroups/%s?api-version=2016-09-01" ) // GetAuthToken retrieves OAuth token for an application diff --git a/infra/providers/vsphere/config.go b/infra/providers/vsphere/config.go new file mode 100644 index 00000000..49f98264 --- /dev/null +++ b/infra/providers/vsphere/config.go @@ -0,0 +1,22 @@ +package vsphere + +// Config specifies the vcenter specific parameters +type Config struct { + VsphereUser string `json:"vsphere_user" yaml:"vsphere_user"` + VspherePassword string `json:"vsphere_password" yaml:"vsphere_password"` + VsphereServer string `json:"vsphere_server" yaml:"vsphere_server"` + Datacenter string `json:"datacenter" yaml:"datacenter"` + Cluster string `json:"cluster" yaml:"cluster"` + VmFolder string `json:"vm_folder" yaml:"vm_folder"` + Network string `json:"network" yaml:"network"` + NodeTag string `json:"node_tag" yaml:"node_tag"` + VmType string `json:"vm_type" yaml:"vm_type"` + Datastore string `json:"datastore" yaml:"datastore"` + ResourcePool string `json:"resource_pool" yaml:"resource_pool"` + SSHUser string `json:"os_user" yaml:"os_user"` + // SSHKeyPath specifies the location of the SSH private key for remote access + SSHKeyPath string `json:"-" yaml:"ssh_key_path" validate:"required"` + // SSHPublicKeyPath specifies the location of the public SSH key + SSHPublicKeyPath string `json:"ssh_pub_key_path" yaml:"ssh_pub_key_path" validate:"required"` + Template string `json:"template" yaml:"template"` // TODO:Move this? +} diff --git a/infra/providers/vsphere/vsphere.go b/infra/providers/vsphere/vsphere.go new file mode 100644 index 00000000..61904924 --- /dev/null +++ b/infra/providers/vsphere/vsphere.go @@ -0,0 +1 @@ +package vsphere diff --git a/infra/terraform/config.go b/infra/terraform/config.go index d62c983b..427768a4 100644 --- a/infra/terraform/config.go +++ b/infra/terraform/config.go @@ -21,6 +21,7 @@ import ( "github.com/gravitational/robotest/infra/providers/aws" "github.com/gravitational/robotest/infra/providers/azure" "github.com/gravitational/robotest/infra/providers/gce" + "github.com/gravitational/robotest/infra/providers/vsphere" "github.com/gravitational/robotest/lib/constants" "github.com/gravitational/trace" @@ -73,6 +74,8 @@ func (c Config) SSHConfig() (user, keypath string) { return c.Azure.SSHUser, c.Azure.SSHKeyPath case constants.GCE: return c.GCE.SSHUser, c.GCE.SSHKeyPath + case constants.Vsphere: + return c.Vsphere.SSHUser, c.Vsphere.SSHKeyPath default: return "", "" } @@ -90,6 +93,8 @@ type Config struct { Azure *azure.Config // GCE defines Google Compute Engine connection parameters GCE *gce.Config + // Vsphere defines Vcenter connection parameters + Vsphere *vsphere.Config // OS specified the OS distribution OS string `json:"os" yaml:"os" validate:"required,eq=ubuntu|eq=redhat|eq=centos|eq=debian|eq=sles|eq=suse"` // ScriptPath is the path to the terraform script or directory for provisioning diff --git a/infra/terraform/terraform.go b/infra/terraform/terraform.go index 6d6cae31..904ecb95 100644 --- a/infra/terraform/terraform.go +++ b/infra/terraform/terraform.go @@ -417,6 +417,21 @@ func configToTerraformVars(cfg Config) (tfvars map[string]interface{}, err error if cfg.GCE.Zone != "" { tfvars["zone"] = cfg.GCE.Zone } + case constants.Vsphere: + tfvars["vsphere_user"] = cfg.Vsphere.VsphereUser + tfvars["vsphere_password"] = cfg.Vsphere.VspherePassword + tfvars["vsphere_server"] = cfg.Vsphere.VsphereServer + tfvars["datacenter"] = cfg.Vsphere.Datacenter + tfvars["cluster"] = cfg.Vsphere.Cluster + tfvars["datastore"] = cfg.Vsphere.Datastore + tfvars["vm_folder"] = cfg.Vsphere.VmFolder + tfvars["network"] = cfg.Vsphere.Network + tfvars["template"] = cfg.Vsphere.Template + tfvars["node_tag"] = cfg.Vsphere.NodeTag + tfvars["vm_type"] = cfg.Vsphere.VmType + tfvars["os_user"] = cfg.Vsphere.SSHUser + tfvars["ssh_key_path"] = cfg.Vsphere.SSHKeyPath + tfvars["ssh_pub_key_path"] = cfg.Vsphere.SSHPublicKeyPath default: return nil, trace.BadParameter("invalid cloud provider: %v", cfg.CloudProvider) } diff --git a/lib/constants/constants.go b/lib/constants/constants.go index 4348f738..c2afe5f4 100644 --- a/lib/constants/constants.go +++ b/lib/constants/constants.go @@ -69,4 +69,6 @@ const ( GCE = "gce" // Ops specifies a special cloud provider - a telekube Ops Center Ops = "ops" + // Vsphere specifies local Vsphere cluster + Vsphere = "vsphere" )