add tensor parallel embedding test #1
Workflow file for this run
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
name: nvidia-rtx-3090 tests | |
on: | |
workflow_dispatch: | |
workflow_call: | |
concurrency: | |
group: unit_tests-${{github.ref}}-${{github.event.pull_request.number || github.run_number}} | |
cancel-in-progress: true | |
jobs: | |
inter-layer: | |
runs-on: [ nvidia ] | |
strategy: | |
matrix: | |
ginter: [ 1, 2 ] | |
steps: | |
- uses: actions/checkout@v3 | |
- name: Install AxoNN | |
run: | | |
pip install -e . | |
- name: Download dataset | |
run: | | |
python -c "import torchvision; torchvision.datasets.MNIST(root=\"./axonn/tests\", download=True, train=True)" | |
- name: Train | |
run: | | |
export G_inter=${{ matrix.ginter }} | |
export G_data=$(( 2 / G_inter )) | |
echo "training with G_inter = ${G_inter}, G_data = $(( 2 / G_inter )) ${{ matrix.memopt }}" | |
mpirun -mca orte_allowed_exit_without_sync 1 -n 2 pytest --with-mpi ./axonn/tests/test_vit.py | |
- name: Uninstall AxoNN | |
run: | | |
pip uninstall --yes axonn | |
intra-layer: | |
runs-on: [ nvidia ] | |
steps: | |
- uses: actions/checkout@v3 | |
- name: Install AxoNN | |
run: | | |
pip install -e . | |
- name: Run intra-layer FC unit tests | |
run: | | |
mpirun -mca orte_allowed_exit_without_sync 1 -n 2 pytest --with-mpi ./axonn/tests/test_intra_layer_fc.py | |
- name: Run intra-layer Conv unit tests | |
run: | | |
mpirun -mca orte_allowed_exit_without_sync 1 -n 2 pytest --with-mpi ./axonn/tests/test_intra_layer_conv.py | |
- name: Run intra-layer Embedding unit tests | |
mpirun -mca orte_allowed_exit_without_sync 1 -n 2 pytest --with-mpi ./axonn/tests/test_intra_layer_emb.py -k bw_pass | |
mpirun -mca orte_allowed_exit_without_sync 1 -n 2 pytest --with-mpi ./axonn/tests/test_intra_layer_emb.py -k fw_pass | |
- name: Uninstall AxoNN | |
run: | | |
pip uninstall --yes axonn |