diff --git a/aws/infrastructure.tf b/aws/infrastructure.tf index b3649ad7..2c04af8b 100644 --- a/aws/infrastructure.tf +++ b/aws/infrastructure.tf @@ -193,10 +193,11 @@ locals { prefix = values.prefix tags = values.tags specs = { - cpus = data.aws_ec2_instance_type.instance_type[values.prefix].default_vcpus - ram = data.aws_ec2_instance_type.instance_type[values.prefix].memory_size - gpus = try(one(data.aws_ec2_instance_type.instance_type[values.prefix].gpus).count, 0) - mig = lookup(values, "mig", null) + cpus = data.aws_ec2_instance_type.instance_type[values.prefix].default_vcpus + ram = data.aws_ec2_instance_type.instance_type[values.prefix].memory_size + gpus = try(one(data.aws_ec2_instance_type.instance_type[values.prefix].gpus).count, 0) + mig = lookup(values, "mig", null) + shard = lookup(values, "shard", null) } volumes = contains(keys(module.design.volume_per_instance), x) ? { for pv_key, pv_values in var.volumes: diff --git a/azure/infrastructure.tf b/azure/infrastructure.tf index 615a7023..44f84e5b 100644 --- a/azure/infrastructure.tf +++ b/azure/infrastructure.tf @@ -156,10 +156,11 @@ locals { prefix = values.prefix tags = values.tags specs = { - cpus = local.vmsizes[values.type].vcpus - ram = local.vmsizes[values.type].ram - gpus = local.vmsizes[values.type].gpus - mig = lookup(values, "mig", null) + cpus = local.vmsizes[values.type].vcpus + ram = local.vmsizes[values.type].ram + gpus = local.vmsizes[values.type].gpus + mig = lookup(values, "mig", null) + shard = lookup(values, "shard", null) } volumes = contains(keys(module.design.volume_per_instance), x) ? { for pv_key, pv_values in var.volumes: diff --git a/docs/README.md b/docs/README.md index b9e2d581..00c04c4f 100644 --- a/docs/README.md +++ b/docs/README.md @@ -540,6 +540,7 @@ the operating system and service software ``` This is only functional with [MIG supported GPUs](https://docs.nvidia.com/datacenter/tesla/mig-user-guide/index.html#supported-gpus), and with x86-64 processors (see [NVIDIA/mig-parted issue #30](https://github.com/NVIDIA/mig-parted/issues/30)). +6. `shard`: total number of [Sharding](https://slurm.schedmd.com/gres.html#Sharding) on the node. Sharding allows sharing the same GPU on multiple jobs. The total number of shards is evenly distributed across all GPUs on the node. For some cloud providers, it possible to define additional attributes. The following sections present the available attributes per provider. diff --git a/gcp/infrastructure.tf b/gcp/infrastructure.tf index c621a439..8dc7f9d5 100644 --- a/gcp/infrastructure.tf +++ b/gcp/infrastructure.tf @@ -166,10 +166,11 @@ locals { prefix = values.prefix tags = values.tags specs = { - cpus = data.external.machine_type[values["prefix"]].result["vcpus"] - ram = data.external.machine_type[values["prefix"]].result["ram"] - gpus = try(data.external.machine_type[values["prefix"]].result["gpus"], lookup(values, "gpu_count", 0)) - mig = lookup(values, "mig", null) + cpus = data.external.machine_type[values["prefix"]].result["vcpus"] + ram = data.external.machine_type[values["prefix"]].result["ram"] + gpus = try(data.external.machine_type[values["prefix"]].result["gpus"], lookup(values, "gpu_count", 0)) + mig = lookup(values, "mig", null) + shard = lookup(values, "shard", null) } volumes = contains(keys(module.design.volume_per_instance), x) ? { for pv_key, pv_values in var.volumes: diff --git a/openstack/infrastructure.tf b/openstack/infrastructure.tf index ec40ebd3..ecd79399 100644 --- a/openstack/infrastructure.tf +++ b/openstack/infrastructure.tf @@ -121,13 +121,14 @@ locals { prefix = values.prefix tags = values.tags specs = { - cpus = data.openstack_compute_flavor_v2.flavors[values.prefix].vcpus - ram = data.openstack_compute_flavor_v2.flavors[values.prefix].ram - gpus = sum([ + cpus = data.openstack_compute_flavor_v2.flavors[values.prefix].vcpus + ram = data.openstack_compute_flavor_v2.flavors[values.prefix].ram + gpus = sum([ parseint(lookup(data.openstack_compute_flavor_v2.flavors[values.prefix].extra_specs, "resources:VGPU", "0"), 10), parseint(split(":", lookup(data.openstack_compute_flavor_v2.flavors[values.prefix].extra_specs, "pci_passthrough:alias", "gpu:0"))[1], 10) ]) - mig = lookup(values, "mig", null) + mig = lookup(values, "mig", null) + shard = lookup(values, "shard", null) } volumes = contains(keys(module.design.volume_per_instance), x) ? { for pv_key, pv_values in var.volumes: