Skip to content

Commit

Permalink
Merge branch 'master' into parambole/fixit
Browse files Browse the repository at this point in the history
  • Loading branch information
parambole authored Jan 9, 2025
2 parents 8c44532 + d628936 commit c70659f
Show file tree
Hide file tree
Showing 13 changed files with 372 additions and 24 deletions.
11 changes: 11 additions & 0 deletions dags/legacy_test/tests/pytorch/common.libsonnet
Original file line number Diff line number Diff line change
Expand Up @@ -94,4 +94,15 @@ local volumes = import 'templates/volumes.libsonnet';
},
},
},

// A list of Python library versions that are known to work with
// the Huggingface models and workflows exercised in PyTorch/XLA CI.
HuggingfacePipVersionConstraints:: |||
accelerate==1.2.1
datasets==3.2.0
evaluate==0.4.3
huggingface-hub==0.27.1
safetensors==0.5.0
tokenizers==0.19.1
|||,
}
31 changes: 31 additions & 0 deletions dags/legacy_test/tests/pytorch/nightly/accelerate-smoke.libsonnet
Original file line number Diff line number Diff line change
@@ -0,0 +1,31 @@
local experimental = import '../experimental.libsonnet';
local common = import 'common.libsonnet';
local tpus = import 'templates/tpus.libsonnet';

{
local accelerate = self.accelerate,
accelerate:: common.PyTorchTest + common.Functional {
modelName: 'accelerate',
mode: 'smoke',
command: [
'accelerate',
'test',
],
},
local pjrt = self.pjrt,
pjrt:: common.PyTorchTpuVmMixin + common.Accelerate,

local v2_8 = self.v2_8,
v2_8:: {
accelerator: tpus.v2_8,
},
local v4_8 = self.v4_8,
v4_8:: {
accelerator: tpus.v4_8,
},

configs: [
accelerate + v2_8 + pjrt,
accelerate + v4_8 + pjrt,
],
}
42 changes: 42 additions & 0 deletions dags/legacy_test/tests/pytorch/nightly/common.libsonnet
Original file line number Diff line number Diff line change
Expand Up @@ -181,6 +181,48 @@ local volumes = import 'templates/volumes.libsonnet';
},
},


Accelerate:: {
local config = self,
tpuSettings+: {
tpuVmExports+: |||
export PATH=~/.local/bin:$PATH
|||,
tpuVmExtraSetup: |||
if [ -d "$HOME/.local/bin" ] ; then
export PATH="$HOME/.local/bin:$PATH"
fi
cat > ~/hf-constraints.txt << 'HF_CONSTRAINTS_EOF'
%s
HF_CONSTRAINTS_EOF
pip install pytest accelerate -c ~/hf-constraints.txt
mkdir -p ~/.cache/huggingface/accelerate/
cat > ~/.cache/huggingface/accelerate/default_config.yaml << 'HF_CONFIG_EOF'
compute_environment: LOCAL_MACHINE
distributed_type: XLA
downcast_bf16: 'no'
machine_rank: 0
main_training_function: main
mixed_precision: 'no'
num_machines: 1
num_processes: null
rdzv_backend: static
same_network: true
tpu_env: []
tpu_use_cluster: false
tpu_use_sudo: false
use_cpu: false
HF_CONFIG_EOF
accelerate env
||| % common.HuggingfacePipVersionConstraints,
},
},

HuggingfacePipVersionConstraints:: common.HuggingfacePipVersionConstraints,

// DEPRECATED: Use PyTorchTpuVmMixin instead
tpu_vm_nightly_install: self.PyTorchTpuVmMixin.tpuSettings.tpuVmPytorchSetup,
}
67 changes: 67 additions & 0 deletions dags/legacy_test/tests/pytorch/nightly/hf-bert.libsonnet
Original file line number Diff line number Diff line change
@@ -0,0 +1,67 @@
// Copyright 2023 Google LLC
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// https://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

local experimental = import '../experimental.libsonnet';
local common = import 'common.libsonnet';
local timeouts = import 'templates/timeouts.libsonnet';
local tpus = import 'templates/tpus.libsonnet';

{
local bert = self.bert,
bert:: common.PyTorchTest {
modelName: 'hf-bert',
volumeMap+: {
datasets: common.datasetsVolume,
},
command: [
'python3',
'pytorch/xla/test/pjrt/test_train_hf_transformer.py',
'--logdir=$(MODEL_DIR)',
],
},

local functional = self.functional,
functional:: common.Functional {
command+: [
'--short_data',
],
},
local convergence = self.convergence,
convergence:: common.Convergence,

local v4_8 = self.v4_8,
v4_8:: {
accelerator: tpus.v4_8,
},

local pjrt = self.pjrt,
pjrt:: common.PyTorchTpuVmMixin {
modelName+: '-pjrt',
tpuSettings+: {
tpuVmExtraSetup: |||
cat > ~/hf-constraints.txt << 'HF_CONSTRAINTS_EOF'
%s
HF_CONSTRAINTS_EOF
pip install pytest accelerate -c ~/hf-constraints.txt
pip install tensorboardX google-cloud-storage transformers evaluate scikit-learn -c ~/hf-constraints.txt
||| % common.HuggingfacePipVersionConstraints,
},
},

configs: [
bert + functional + v4_8 + pjrt + timeouts.Hours(2),
bert + convergence + v4_8 + pjrt + timeouts.Hours(12),
],
}
13 changes: 7 additions & 6 deletions dags/legacy_test/tests/pytorch/nightly/llama2-model.libsonnet
Original file line number Diff line number Diff line change
Expand Up @@ -108,25 +108,26 @@ local utils = import 'templates/utils.libsonnet';
export TPU_MEGACORE=megacore_dense
|||,
tpuVmExtraSetup: |||
cat > ~/hf-constraints.txt << 'HF_CONSTRAINTS_EOF'
%s
HF_CONSTRAINTS_EOF
# install tokenizer model
gsutil cp gs://tpu-pytorch/lsiyuan-experiment/llama/spiece.model .
# git clone and build transformers ### llama/transformers/
git clone -b llama2-google-next-training https://github.com/pytorch-tpu/transformers.git
cd transformers
sudo pip3 uninstall transformers
sudo pip3 install -e .
pip3 install datasets
pip3 install evaluate
pip3 install scikit-learn
pip3 install accelerate
sudo pip3 install -e . -c ~/hf-constraints.txt
pip3 install datasets evaluate scikit-learn accelerate -c ~/hf-constraints.txt
cd
# 7B config
mkdir 7B
cd 7B/
gsutil cp gs://manfei_public_experimental/2B.json .
|||,
||| % common.HuggingfacePipVersionConstraints,
},
},

Expand Down
6 changes: 5 additions & 1 deletion dags/legacy_test/tests/pytorch/nightly/targets.jsonnet
Original file line number Diff line number Diff line change
Expand Up @@ -12,15 +12,19 @@
// See the License for the specific language governing permissions and
// limitations under the License.

local accelerate = import 'accelerate-smoke.libsonnet';
local ci = import 'ci.libsonnet';
local hfBert = import 'hf-bert.libsonnet';
local llama2 = import 'llama2-model.libsonnet';
local mnist = import 'mnist.libsonnet';
local resnet50_mp = import 'resnet50-mp.libsonnet';

// Add new models here
std.flattenArrays([
accelerate.configs,
ci.configs,
llama2.configs,
hfBert.configs,
mnist.configs,
resnet50_mp.configs,
llama2.configs,
])
31 changes: 31 additions & 0 deletions dags/legacy_test/tests/pytorch/r2.6/accelerate-smoke.libsonnet
Original file line number Diff line number Diff line change
@@ -0,0 +1,31 @@
local experimental = import '../experimental.libsonnet';
local common = import 'common.libsonnet';
local tpus = import 'templates/tpus.libsonnet';

{
local accelerate = self.accelerate,
accelerate:: common.PyTorchTest + common.Functional {
modelName: 'accelerate',
mode: 'smoke',
command: [
'accelerate',
'test',
],
},
local pjrt = self.pjrt,
pjrt:: common.PyTorchTpuVmMixin + common.Accelerate,

local v2_8 = self.v2_8,
v2_8:: {
accelerator: tpus.v2_8,
},
local v4_8 = self.v4_8,
v4_8:: {
accelerator: tpus.v4_8,
},

configs: [
accelerate + v2_8 + pjrt,
accelerate + v4_8 + pjrt,
],
}
41 changes: 41 additions & 0 deletions dags/legacy_test/tests/pytorch/r2.6/common.libsonnet
Original file line number Diff line number Diff line change
Expand Up @@ -187,6 +187,47 @@ local rcVersion = 'rc3';
},


Accelerate:: {
local config = self,
tpuSettings+: {
tpuVmExports+: |||
export PATH=~/.local/bin:$PATH
|||,
tpuVmExtraSetup: |||
if [ -d "$HOME/.local/bin" ] ; then
export PATH="$HOME/.local/bin:$PATH"
fi
cat > ~/hf-constraints.txt << 'HF_CONSTRAINTS_EOF'
%s
HF_CONSTRAINTS_EOF
pip install pytest accelerate -c ~/hf-constraints.txt
mkdir -p ~/.cache/huggingface/accelerate/
cat > ~/.cache/huggingface/accelerate/default_config.yaml << 'HF_CONFIG_EOF'
compute_environment: LOCAL_MACHINE
distributed_type: XLA
downcast_bf16: 'no'
machine_rank: 0
main_training_function: main
mixed_precision: 'no'
num_machines: 1
num_processes: null
rdzv_backend: static
same_network: true
tpu_env: []
tpu_use_cluster: false
tpu_use_sudo: false
use_cpu: false
HF_CONFIG_EOF
accelerate env
||| % common.HuggingfacePipVersionConstraints,
},
},

HuggingfacePipVersionConstraints:: common.HuggingfacePipVersionConstraints,

// DEPRECATED: Use PyTorchTpuVmMixin instead
tpu_vm_r2_6_install: self.PyTorchTpuVmMixin.tpuSettings.tpuVmPytorchSetup,
}
67 changes: 67 additions & 0 deletions dags/legacy_test/tests/pytorch/r2.6/hf-bert.libsonnet
Original file line number Diff line number Diff line change
@@ -0,0 +1,67 @@
// Copyright 2023 Google LLC
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// https://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

local experimental = import '../experimental.libsonnet';
local common = import 'common.libsonnet';
local timeouts = import 'templates/timeouts.libsonnet';
local tpus = import 'templates/tpus.libsonnet';

{
local bert = self.bert,
bert:: common.PyTorchTest {
modelName: 'hf-bert',
volumeMap+: {
datasets: common.datasetsVolume,
},
command: [
'python3',
'pytorch/xla/test/pjrt/test_train_hf_transformer.py',
'--logdir=$(MODEL_DIR)',
],
},

local functional = self.functional,
functional:: common.Functional {
command+: [
'--short_data',
],
},
local convergence = self.convergence,
convergence:: common.Convergence,

local v4_8 = self.v4_8,
v4_8:: {
accelerator: tpus.v4_8,
},

local pjrt = self.pjrt,
pjrt:: common.PyTorchTpuVmMixin {
modelName+: '-pjrt',
tpuSettings+: {
tpuVmExtraSetup: |||
cat > ~/hf-constraints.txt << 'HF_CONSTRAINTS_EOF'
%s
HF_CONSTRAINTS_EOF
pip install pytest accelerate -c ~/hf-constraints.txt
pip install tensorboardX google-cloud-storage transformers evaluate scikit-learn -c ~/hf-constraints.txt
||| % common.HuggingfacePipVersionConstraints,
},
},

configs: [
bert + functional + v4_8 + pjrt + timeouts.Hours(2),
bert + convergence + v4_8 + pjrt + timeouts.Hours(12),
],
}
Loading

0 comments on commit c70659f

Please sign in to comment.