Skip to content

Commit

Permalink
Add back most Huggingface tests (#531)
Browse files Browse the repository at this point in the history
* Revert "Remove HuggingFace tests from PyTorch/XLA CI (#513)"

This reverts commit 313e33a.

* Add back most Huggingface tests

As proposed in pytorch/xla#8542, this change
adds back accelerate smoke test and bert training example to 2.6 and
nightly CI.

Additionally, llama2 training, accelerate smoke test, and bert training
are modified to install huggingface dependencies following a constraint
file.
  • Loading branch information
tengyifei authored Jan 9, 2025
1 parent 738dc43 commit d628936
Show file tree
Hide file tree
Showing 13 changed files with 372 additions and 24 deletions.
11 changes: 11 additions & 0 deletions dags/legacy_test/tests/pytorch/common.libsonnet
Original file line number Diff line number Diff line change
Expand Up @@ -94,4 +94,15 @@ local volumes = import 'templates/volumes.libsonnet';
},
},
},

// A list of Python library versions that are known to work with
// the Huggingface models and workflows exercised in PyTorch/XLA CI.
HuggingfacePipVersionConstraints:: |||
accelerate==1.2.1
datasets==3.2.0
evaluate==0.4.3
huggingface-hub==0.27.1
safetensors==0.5.0
tokenizers==0.19.1
|||,
}
31 changes: 31 additions & 0 deletions dags/legacy_test/tests/pytorch/nightly/accelerate-smoke.libsonnet
Original file line number Diff line number Diff line change
@@ -0,0 +1,31 @@
local experimental = import '../experimental.libsonnet';
local common = import 'common.libsonnet';
local tpus = import 'templates/tpus.libsonnet';

{
local accelerate = self.accelerate,
accelerate:: common.PyTorchTest + common.Functional {
modelName: 'accelerate',
mode: 'smoke',
command: [
'accelerate',
'test',
],
},
local pjrt = self.pjrt,
pjrt:: common.PyTorchTpuVmMixin + common.Accelerate,

local v2_8 = self.v2_8,
v2_8:: {
accelerator: tpus.v2_8,
},
local v4_8 = self.v4_8,
v4_8:: {
accelerator: tpus.v4_8,
},

configs: [
accelerate + v2_8 + pjrt,
accelerate + v4_8 + pjrt,
],
}
42 changes: 42 additions & 0 deletions dags/legacy_test/tests/pytorch/nightly/common.libsonnet
Original file line number Diff line number Diff line change
Expand Up @@ -181,6 +181,48 @@ local volumes = import 'templates/volumes.libsonnet';
},
},


Accelerate:: {
local config = self,
tpuSettings+: {
tpuVmExports+: |||
export PATH=~/.local/bin:$PATH
|||,
tpuVmExtraSetup: |||
if [ -d "$HOME/.local/bin" ] ; then
export PATH="$HOME/.local/bin:$PATH"
fi
cat > ~/hf-constraints.txt << 'HF_CONSTRAINTS_EOF'
%s
HF_CONSTRAINTS_EOF
pip install pytest accelerate -c ~/hf-constraints.txt
mkdir -p ~/.cache/huggingface/accelerate/
cat > ~/.cache/huggingface/accelerate/default_config.yaml << 'HF_CONFIG_EOF'
compute_environment: LOCAL_MACHINE
distributed_type: XLA
downcast_bf16: 'no'
machine_rank: 0
main_training_function: main
mixed_precision: 'no'
num_machines: 1
num_processes: null
rdzv_backend: static
same_network: true
tpu_env: []
tpu_use_cluster: false
tpu_use_sudo: false
use_cpu: false
HF_CONFIG_EOF
accelerate env
||| % common.HuggingfacePipVersionConstraints,
},
},

HuggingfacePipVersionConstraints:: common.HuggingfacePipVersionConstraints,

// DEPRECATED: Use PyTorchTpuVmMixin instead
tpu_vm_nightly_install: self.PyTorchTpuVmMixin.tpuSettings.tpuVmPytorchSetup,
}
67 changes: 67 additions & 0 deletions dags/legacy_test/tests/pytorch/nightly/hf-bert.libsonnet
Original file line number Diff line number Diff line change
@@ -0,0 +1,67 @@
// Copyright 2023 Google LLC
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// https://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

local experimental = import '../experimental.libsonnet';
local common = import 'common.libsonnet';
local timeouts = import 'templates/timeouts.libsonnet';
local tpus = import 'templates/tpus.libsonnet';

{
local bert = self.bert,
bert:: common.PyTorchTest {
modelName: 'hf-bert',
volumeMap+: {
datasets: common.datasetsVolume,
},
command: [
'python3',
'pytorch/xla/test/pjrt/test_train_hf_transformer.py',
'--logdir=$(MODEL_DIR)',
],
},

local functional = self.functional,
functional:: common.Functional {
command+: [
'--short_data',
],
},
local convergence = self.convergence,
convergence:: common.Convergence,

local v4_8 = self.v4_8,
v4_8:: {
accelerator: tpus.v4_8,
},

local pjrt = self.pjrt,
pjrt:: common.PyTorchTpuVmMixin {
modelName+: '-pjrt',
tpuSettings+: {
tpuVmExtraSetup: |||
cat > ~/hf-constraints.txt << 'HF_CONSTRAINTS_EOF'
%s
HF_CONSTRAINTS_EOF
pip install pytest accelerate -c ~/hf-constraints.txt
pip install tensorboardX google-cloud-storage transformers evaluate scikit-learn -c ~/hf-constraints.txt
||| % common.HuggingfacePipVersionConstraints,
},
},

configs: [
bert + functional + v4_8 + pjrt + timeouts.Hours(2),
bert + convergence + v4_8 + pjrt + timeouts.Hours(12),
],
}
13 changes: 7 additions & 6 deletions dags/legacy_test/tests/pytorch/nightly/llama2-model.libsonnet
Original file line number Diff line number Diff line change
Expand Up @@ -108,25 +108,26 @@ local utils = import 'templates/utils.libsonnet';
export TPU_MEGACORE=megacore_dense
|||,
tpuVmExtraSetup: |||
cat > ~/hf-constraints.txt << 'HF_CONSTRAINTS_EOF'
%s
HF_CONSTRAINTS_EOF
# install tokenizer model
gsutil cp gs://tpu-pytorch/lsiyuan-experiment/llama/spiece.model .
# git clone and build transformers ### llama/transformers/
git clone -b llama2-google-next-training https://github.com/pytorch-tpu/transformers.git
cd transformers
sudo pip3 uninstall transformers
sudo pip3 install -e .
pip3 install datasets
pip3 install evaluate
pip3 install scikit-learn
pip3 install accelerate
sudo pip3 install -e . -c ~/hf-constraints.txt
pip3 install datasets evaluate scikit-learn accelerate -c ~/hf-constraints.txt
cd
# 7B config
mkdir 7B
cd 7B/
gsutil cp gs://manfei_public_experimental/2B.json .
|||,
||| % common.HuggingfacePipVersionConstraints,
},
},

Expand Down
6 changes: 5 additions & 1 deletion dags/legacy_test/tests/pytorch/nightly/targets.jsonnet
Original file line number Diff line number Diff line change
Expand Up @@ -12,15 +12,19 @@
// See the License for the specific language governing permissions and
// limitations under the License.

local accelerate = import 'accelerate-smoke.libsonnet';
local ci = import 'ci.libsonnet';
local hfBert = import 'hf-bert.libsonnet';
local llama2 = import 'llama2-model.libsonnet';
local mnist = import 'mnist.libsonnet';
local resnet50_mp = import 'resnet50-mp.libsonnet';

// Add new models here
std.flattenArrays([
accelerate.configs,
ci.configs,
llama2.configs,
hfBert.configs,
mnist.configs,
resnet50_mp.configs,
llama2.configs,
])
31 changes: 31 additions & 0 deletions dags/legacy_test/tests/pytorch/r2.6/accelerate-smoke.libsonnet
Original file line number Diff line number Diff line change
@@ -0,0 +1,31 @@
local experimental = import '../experimental.libsonnet';
local common = import 'common.libsonnet';
local tpus = import 'templates/tpus.libsonnet';

{
local accelerate = self.accelerate,
accelerate:: common.PyTorchTest + common.Functional {
modelName: 'accelerate',
mode: 'smoke',
command: [
'accelerate',
'test',
],
},
local pjrt = self.pjrt,
pjrt:: common.PyTorchTpuVmMixin + common.Accelerate,

local v2_8 = self.v2_8,
v2_8:: {
accelerator: tpus.v2_8,
},
local v4_8 = self.v4_8,
v4_8:: {
accelerator: tpus.v4_8,
},

configs: [
accelerate + v2_8 + pjrt,
accelerate + v4_8 + pjrt,
],
}
41 changes: 41 additions & 0 deletions dags/legacy_test/tests/pytorch/r2.6/common.libsonnet
Original file line number Diff line number Diff line change
Expand Up @@ -187,6 +187,47 @@ local rcVersion = 'rc3';
},


Accelerate:: {
local config = self,
tpuSettings+: {
tpuVmExports+: |||
export PATH=~/.local/bin:$PATH
|||,
tpuVmExtraSetup: |||
if [ -d "$HOME/.local/bin" ] ; then
export PATH="$HOME/.local/bin:$PATH"
fi
cat > ~/hf-constraints.txt << 'HF_CONSTRAINTS_EOF'
%s
HF_CONSTRAINTS_EOF
pip install pytest accelerate -c ~/hf-constraints.txt
mkdir -p ~/.cache/huggingface/accelerate/
cat > ~/.cache/huggingface/accelerate/default_config.yaml << 'HF_CONFIG_EOF'
compute_environment: LOCAL_MACHINE
distributed_type: XLA
downcast_bf16: 'no'
machine_rank: 0
main_training_function: main
mixed_precision: 'no'
num_machines: 1
num_processes: null
rdzv_backend: static
same_network: true
tpu_env: []
tpu_use_cluster: false
tpu_use_sudo: false
use_cpu: false
HF_CONFIG_EOF
accelerate env
||| % common.HuggingfacePipVersionConstraints,
},
},

HuggingfacePipVersionConstraints:: common.HuggingfacePipVersionConstraints,

// DEPRECATED: Use PyTorchTpuVmMixin instead
tpu_vm_r2_6_install: self.PyTorchTpuVmMixin.tpuSettings.tpuVmPytorchSetup,
}
67 changes: 67 additions & 0 deletions dags/legacy_test/tests/pytorch/r2.6/hf-bert.libsonnet
Original file line number Diff line number Diff line change
@@ -0,0 +1,67 @@
// Copyright 2023 Google LLC
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// https://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

local experimental = import '../experimental.libsonnet';
local common = import 'common.libsonnet';
local timeouts = import 'templates/timeouts.libsonnet';
local tpus = import 'templates/tpus.libsonnet';

{
local bert = self.bert,
bert:: common.PyTorchTest {
modelName: 'hf-bert',
volumeMap+: {
datasets: common.datasetsVolume,
},
command: [
'python3',
'pytorch/xla/test/pjrt/test_train_hf_transformer.py',
'--logdir=$(MODEL_DIR)',
],
},

local functional = self.functional,
functional:: common.Functional {
command+: [
'--short_data',
],
},
local convergence = self.convergence,
convergence:: common.Convergence,

local v4_8 = self.v4_8,
v4_8:: {
accelerator: tpus.v4_8,
},

local pjrt = self.pjrt,
pjrt:: common.PyTorchTpuVmMixin {
modelName+: '-pjrt',
tpuSettings+: {
tpuVmExtraSetup: |||
cat > ~/hf-constraints.txt << 'HF_CONSTRAINTS_EOF'
%s
HF_CONSTRAINTS_EOF
pip install pytest accelerate -c ~/hf-constraints.txt
pip install tensorboardX google-cloud-storage transformers evaluate scikit-learn -c ~/hf-constraints.txt
||| % common.HuggingfacePipVersionConstraints,
},
},

configs: [
bert + functional + v4_8 + pjrt + timeouts.Hours(2),
bert + convergence + v4_8 + pjrt + timeouts.Hours(12),
],
}
Loading

0 comments on commit d628936

Please sign in to comment.