Skip to content

Commit

Permalink
Set maxLayers to 1000
Browse files Browse the repository at this point in the history
  • Loading branch information
Atry committed Nov 16, 2023
1 parent 5f7cc8e commit 6145091
Showing 1 changed file with 54 additions and 52 deletions.
106 changes: 54 additions & 52 deletions examples/poetry-docker-job-azure/flake.nix
Original file line number Diff line number Diff line change
Expand Up @@ -10,63 +10,65 @@
};
inputs = unlockedInputs // lockedInputs;
in
inputs.nix-ml-ops.lib.mkFlake
{
inherit inputs;
}
{
debug = true;
inputs.nix-ml-ops.lib.mkFlake { inherit inputs; } {
debug = true;

imports = [
inputs.nix-ml-ops.flakeModules.cuda
inputs.nix-ml-ops.flakeModules.devcontainer
inputs.nix-ml-ops.flakeModules.nixIde
inputs.nix-ml-ops.flakeModules.nixLd
inputs.nix-ml-ops.flakeModules.pythonEnvsPoetry
inputs.nix-ml-ops.flakeModules.kubernetesJob
inputs.nix-ml-ops.flakeModules.aksCredential
inputs.nix-ml-ops.flakeModules.devcontainerAzureCliTools
inputs.nix-ml-ops.flakeModules.devenvPythonWithLibstdcxx
];
imports = [
inputs.nix-ml-ops.flakeModules.cuda
inputs.nix-ml-ops.flakeModules.devcontainer
inputs.nix-ml-ops.flakeModules.nixIde
inputs.nix-ml-ops.flakeModules.nixLd
inputs.nix-ml-ops.flakeModules.pythonEnvsPoetry
inputs.nix-ml-ops.flakeModules.kubernetesJob
inputs.nix-ml-ops.flakeModules.aksCredential
inputs.nix-ml-ops.flakeModules.devcontainerAzureCliTools
inputs.nix-ml-ops.flakeModules.devenvPythonWithLibstdcxx
];

perSystem = { pkgs, config, lib, ... }: {
ml-ops.common.pythonPackage.base-package = pkgs.python310;
ml-ops.jobs.my-job.launchers.my-launcher.kubernetes = {
aks = { };
imageRegistry = { };
helmTemplates.job.spec.template.spec = {
tolerations = [
{
key = "sku";
operator = "Equal";
value = "gpu";
effect = "NoSchedule";
}
{
key = "kubernetes.azure.com/scalesetpriority";
operator = "Equal";
value = "spot";
effect = "NoSchedule";
}
perSystem = perSystem@{ pkgs, inputs', ... }: {
ml-ops.common.pythonPackage.base-package = pkgs.python310;
ml-ops.jobs.my-job.launchers.my-launcher.kubernetes = {
aks = { };
imageRegistry = { };
helmTemplates.job.spec.template.spec = {
tolerations = [
{
key = "sku";
operator = "Equal";
value = "gpu";
effect = "NoSchedule";
}
{
key = "kubernetes.azure.com/scalesetpriority";
operator = "Equal";
value = "spot";
effect = "NoSchedule";
}
];
containers.master-node = {
resources.limits."nvidia.com/gpu" = 1;
args = [
"python"
"-c"
''
import sys
import torch
print("sys.version =", sys.version)
print("torch.cuda.is_available() = ", torch.cuda.is_available())
''
];
containers.master-node = {
resources.limits."nvidia.com/gpu" = 1;
args = [
"python"
"-c"
''
import sys
import torch
print("sys.version =", sys.version)
print("torch.cuda.is_available() = ", torch.cuda.is_available())
''
];
};
};
};
ml-ops.devcontainer.devenvShellModule.packages = [
pkgs.kubectl
];
};

# Set maxLayers to a large number to reuse layers from previous builds.
# Docker image layers limit is 127 by default, while containerd does not have such limit.
# See https://grahamc.com/blog/nix-and-layered-docker-images/
ml-ops.common.devenvShellModule.containers.processes.maxLayers = 1000;

ml-ops.devcontainer.devenvShellModule.packages = [
pkgs.kubectl
];
};
};
}

0 comments on commit 6145091

Please sign in to comment.