Skip to content

Commit

Permalink
Update for sharding
Browse files Browse the repository at this point in the history
  • Loading branch information
Etienne authored and etiennedub committed Apr 12, 2024
1 parent 25a9cad commit c747de3
Show file tree
Hide file tree
Showing 6 changed files with 7 additions and 2 deletions.
1 change: 1 addition & 0 deletions aws/infrastructure.tf
Original file line number Diff line number Diff line change
Expand Up @@ -208,6 +208,7 @@ locals {
ram = data.aws_ec2_instance_type.instance_type[values.prefix].memory_size
gpus = try(one(data.aws_ec2_instance_type.instance_type[values.prefix].gpus).count, 0)
mig = lookup(values, "mig", null)
shard = lookup(values, "shard", null)
}
}
}
Expand Down
1 change: 1 addition & 0 deletions azure/infrastructure.tf
Original file line number Diff line number Diff line change
Expand Up @@ -171,6 +171,7 @@ locals {
ram = local.vmsizes[values.type].ram
gpus = local.vmsizes[values.type].gpus
mig = lookup(values, "mig", null)
shard = lookup(values, "shard", null)
}
}
}
Expand Down
2 changes: 1 addition & 1 deletion common/configuration/main.tf
Original file line number Diff line number Diff line change
Expand Up @@ -167,4 +167,4 @@ output "bastions" {
for host, values in var.inventory: host => values
if contains(values.tags, var.bastion_tag) && contains(values.tags, "public") && (!contains(values.tags, "pool"))
}
}
}
1 change: 1 addition & 0 deletions docs/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -560,6 +560,7 @@ the operating system and service software
```
This is only functional with [MIG supported GPUs](https://docs.nvidia.com/datacenter/tesla/mig-user-guide/index.html#supported-gpus),
and with x86-64 processors (see [NVIDIA/mig-parted issue #30](https://github.com/NVIDIA/mig-parted/issues/30)).
6. `shard`: total number of [Sharding](https://slurm.schedmd.com/gres.html#Sharding) on the node. Sharding allows sharing the same GPU on multiple jobs. The total number of shards is evenly distributed across all GPUs on the node.
For some cloud providers, it possible to define additional attributes.
The following sections present the available attributes per provider.
Expand Down
1 change: 1 addition & 0 deletions gcp/infrastructure.tf
Original file line number Diff line number Diff line change
Expand Up @@ -181,6 +181,7 @@ locals {
ram = data.external.machine_type[values["prefix"]].result["ram"]
gpus = try(data.external.machine_type[values["prefix"]].result["gpus"], lookup(values, "gpu_count", 0))
mig = lookup(values, "mig", null)
shard = lookup(values, "shard", null)
}
}
}
Expand Down
3 changes: 2 additions & 1 deletion openstack/infrastructure.tf
Original file line number Diff line number Diff line change
Expand Up @@ -138,6 +138,7 @@ locals {
parseint(split(":", lookup(data.openstack_compute_flavor_v2.flavors[values.prefix].extra_specs, "pci_passthrough:alias", "gpu:0"))[1], 10)
])
mig = lookup(values, "mig", null)
shard = lookup(values, "shard", null)
}
}
}
Expand All @@ -146,4 +147,4 @@ locals {
host => merge(module.configuration.inventory[host], {id=openstack_compute_instance_v2.instances[host].id})
if contains(module.configuration.inventory[host].tags, "public")
}
}
}

0 comments on commit c747de3

Please sign in to comment.