Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Update new cluster templates to match current nodegroup setup #5180

Merged
merged 6 commits into from
Nov 22, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions .pre-commit-config.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -50,6 +50,7 @@ repos:
rev: v3.0.0
hooks:
- id: terraform-fmt
exclude: terraform/aws/projects/template.tfvars

# Prevent unencrypted files from being committed
- repo: https://github.com/yuvipanda/pre-commit-hook-ensure-sops
Expand Down
14 changes: 8 additions & 6 deletions config/clusters/templates/aws/cluster.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -15,12 +15,14 @@ hubs: []
# Uncomment the lines below once the support infrastructure was deployed and
# you are ready to add the first cluster

# - name: <hub_name>
{% for hub in hubs %}
# - name: {{ hub }}
# # Tip: consider changing this to something more human friendly
# display_name: "{{ cluster_name }} - <hub_name>"
# domain: <hub_name>.{{ cluster_name }}.2i2c.cloud
# helm_chart: {{ hub_type }}
# display_name: "{{ cluster_name }} - {{ hub }}"
# domain: {{ hub }}.{{ cluster_name }}.2i2c.cloud
# helm_chart: basehub
# helm_chart_values_files:
# - common.values.yaml
# - <hub_name>.values.yaml
# - enc-<hub_name>.secret.values.yaml
# - {{ hub }}.values.yaml
# - enc-{{ hub }}.secret.values.yaml
{% endfor %}
2 changes: 1 addition & 1 deletion config/clusters/templates/common/cluster-entry.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@ hubs:
- name: {{ hub_name }}
display_name: {{ cluster_name }} {{ hub_name }}
domain: {{ hub_name }}.{{ cluster_name }}.2i2c.cloud
helm_chart: {{ hub_type }}
helm_chart: "basehub"
helm_chart_values_files:
- common.values.yaml
- {{ hub_name }}.values.yaml
Expand Down
2 changes: 1 addition & 1 deletion config/clusters/templates/gcp/cluster.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -31,7 +31,7 @@ hubs: []
# # Tip: consider changing this to something more human friendly
# display_name: "{{ cluster_name }} - <hub_name>"
# domain: <hub_name>.{{ cluster_name }}.2i2c.cloud
# helm_chart: {{ hub_type }}
# helm_chart: basehub
# helm_chart_values_files:
# - common.values.yaml
# - <hub_name>.values.yaml
Expand Down
1 change: 0 additions & 1 deletion deployer/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -260,7 +260,6 @@ for a GCP cluster.
- `cluster_name` - the name of the cluster
- `cluster_region`- the region where the cluster will be deployed
- `project_id` - the project ID of the GCP project
- `hub_type` (basehub/daskhub) - whether the hub deployed there would need dask or not
- `hub_name` - the name of the first hub which will be deployed in the cluster (usually `staging`)

The templates have a set of default features and define some opinionated characteristics for the cluster.
Expand Down
13 changes: 12 additions & 1 deletion deployer/commands/generate/dedicated_cluster/aws.py
Original file line number Diff line number Diff line change
Expand Up @@ -110,6 +110,14 @@ def aws(
...,
prompt="The AWS account id or alias. Declare 2i2c for 2i2c's SSO based accounts and paid_by_us=true",
),
hubs: str = typer.Option(
"staging",
prompt="The list of hubs that will be deployed in the cluster separated by a comma. Example: staging, prod.",
),
dask_nodes: bool = typer.Option(
False,
prompt='If this cluster needs dask nodes, please type "y", otherwise hit ENTER.',
),
force: bool = typer.Option(
False,
"--force",
Expand All @@ -134,9 +142,12 @@ def aws(
# Also store the provider, as it's useful for some jinja templates
# to differentiate between them when rendering the configuration
"provider": "aws",
"hub_type": "basehub",
"dask_nodes": dask_nodes,
"cluster_name": cluster_name,
"cluster_region": cluster_region,
"hubs": hubs.replace(
",", " "
).split(), # Convert the comma separated string to a list
"sign_in_url": sign_in_url,
"paid_by_us": str(paid_by_us).lower(),
}
Expand Down
6 changes: 5 additions & 1 deletion deployer/commands/generate/dedicated_cluster/gcp.py
Original file line number Diff line number Diff line change
Expand Up @@ -63,6 +63,10 @@ def gcp(
project_id: str = typer.Option(
..., prompt="Please insert the Project ID of the GCP project"
),
dask_nodes: bool = typer.Option(
False,
prompt='If this cluster needs dask nodes, please type "y", otherwise hit ENTER.',
),
force: bool = typer.Option(
False,
"--force",
Expand All @@ -79,7 +83,7 @@ def gcp(
# Also store the provider, as it's useful for some jinja templates
# to differentiate between them when rendering the configuration
"provider": "gcp",
"hub_type": "basehub",
"dask_nodes": dask_nodes,
"cluster_name": cluster_name,
"cluster_region": cluster_region,
"project_id": project_id,
Expand Down
1 change: 0 additions & 1 deletion deployer/commands/generate/hub_asset/cluster_entry.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,6 @@ def cluster_entry(
"""

vars = {
"hub_type": "basehub",
"cluster_name": cluster_name,
"hub_name": hub_name,
}
Expand Down
45 changes: 40 additions & 5 deletions eksctl/template.jsonnet
Original file line number Diff line number Diff line change
Expand Up @@ -36,11 +36,30 @@ local nodeAz = "<< cluster_region >>a";
// A `node.kubernetes.io/instance-type label is added, so pods
// can request a particular kind of node with a nodeSelector
local notebookNodes = [
{ instanceType: "r5.xlarge" },
{ instanceType: "r5.4xlarge" },
{ instanceType: "r5.16xlarge" },
<% for hub in hubs %>
// << hub >>
{
instanceType: "r5.xlarge",
namePrefix: "nb-<< hub >>",
labels+: { "2i2c/hub-name": "<< hub >>" },
tags+: { "2i2c:hub-name": "<< hub >>" },
},
{
instanceType: "r5.4xlarge",
namePrefix: "nb-<< hub >>",
labels+: { "2i2c/hub-name": "<< hub >>" },
tags+: { "2i2c:hub-name": "<< hub >>" },
},
{
instanceType: "r5.16xlarge",
namePrefix: "nb-<< hub >>",
labels+: { "2i2c/hub-name": "<< hub >>" },
tags+: { "2i2c:hub-name": "<< hub >>" },
},
<% endfor %>
];
<% if hub_type == "daskhub" %>

<% if dask_nodes %>
local daskNodes = [
// Node definitions for dask worker nodes. Config here is merged
// with our dask worker node definition, which uses spot instances.
Expand All @@ -52,7 +71,14 @@ local daskNodes = [
// A not yet fully established policy is being developed about using a single
// node pool, see https://github.com/2i2c-org/infrastructure/issues/2687.
//
{ instancesDistribution+: { instanceTypes: ["r5.4xlarge"] }},
<% for hub in hubs %>
{
namePrefix: "dask-<< hub >>",
labels+: { "2i2c/hub-name": "<< hub >>" },
tags+: { "2i2c:hub-name": "<< hub >>" },
instancesDistribution+: { instanceTypes: ["r5.4xlarge"] }
},
<% endfor %>
];
<% else %>
local daskNodes = [];
Expand Down Expand Up @@ -145,6 +171,9 @@ local daskNodes = [];
"hub.jupyter.org/node-purpose": "core",
"k8s.dask.org/node-purpose": "core",
},
tags+: {
"2i2c:node-purpose": "core"
},
},
] + [
ng + {
Expand All @@ -164,6 +193,9 @@ local daskNodes = [];
"hub.jupyter.org_dedicated": "user:NoSchedule",
"hub.jupyter.org/dedicated": "user:NoSchedule",
},
tags+: {
"2i2c:node-purpose": "user"
},
} + n for n in notebookNodes
] + ( if daskNodes != null then
[
Expand All @@ -182,6 +214,9 @@ local daskNodes = [];
"k8s.dask.org_dedicated" : "worker:NoSchedule",
"k8s.dask.org/dedicated" : "worker:NoSchedule",
},
tags+: {
"2i2c:node-purpose": "worker"
},
instancesDistribution+: {
onDemandBaseCapacity: 0,
onDemandPercentageAboveBaseCapacity: 0,
Expand Down
29 changes: 14 additions & 15 deletions terraform/aws/projects/template.tfvars
Original file line number Diff line number Diff line change
Expand Up @@ -9,26 +9,25 @@ cluster_nodes_location = "{{ cluster_region }}a"

enable_aws_ce_grafana_backend_iam = true

# Tip: uncomment and fill the missing info in the lines below if you want
# Tip: uncomment and verify any missing info in the lines below if you want
# to setup scratch buckets for the hubs on this cluster.
#
#user_buckets = {
# "scratch-staging" : {
# "delete_after" : 7,
# "tags" : { "2i2c:hub-name" : "staging" },
# },
# # Tip: add more scratch buckets below, if this cluster will be multi-tenant
#}

# Tip: uncomment and fill the missing info in the lines below if you want
{% for hub in hubs %}
# "scratch-{{ hub }}" : {
# "delete_after" : 7,
# "tags" : { "2i2c:hub-name" : "{{ hub }}" },
# },
{% endfor %}

# Tip: uncomment and verify any missing info in the lines below if you want
# to setup specific cloud permissions for the buckets in this cluster.
#
#hub_cloud_permissions = {
# "staging" : {
# hub_cloud_permissions = {
{% for hub in hubs %}
# "{{ hub }}" : {
# "user-sa" : {
# bucket_admin_access : ["scratch-staging"],
# bucket_admin_access : ["scratch-{{ hub }}"],
# },
# },
# # Tip: add more namespaces below, if this cluster will be multi-tenant
#}

{% endfor %}
2 changes: 1 addition & 1 deletion terraform/gcp/projects/cluster.tfvars.template
Original file line number Diff line number Diff line change
Expand Up @@ -78,7 +78,7 @@ notebook_nodes = {
}
}

{% if hub_type == "daskhub" %}
{% if dask_nodes == "daskhub" %}
dask_nodes = {
# A not yet fully established policy is being developed about using a single
# node pool, see https://github.com/2i2c-org/infrastructure/issues/2687.
Expand Down