nv-a6000-fastgen #455

	name: nv-a6000-fastgen

	on:
	workflow_dispatch:
	schedule:
	- cron: "0 0 * * *"
	pull_request:
	paths-ignore:
	- 'mii/legacy/**'
	- 'tests/legacy/**'
	- '.github/workflows/nv-v100-legacy.yml'

	concurrency:
	group: ${{ github.workflow }}-${{ github.ref }}
	cancel-in-progress: true

	jobs:
	unit-tests:
	runs-on: [self-hosted, nvidia, a6000]
	container:
	image: nvcr.io/nvidia/pytorch:24.03-py3
	ports:
	- 80
	options: --gpus all --shm-size "8G"

	steps:
	- uses: actions/checkout@v4

	- name: Check container state
	run: \|
	ldd --version
	nvcc --version
	nvidia-smi
	python -c "import torch; print('torch:', torch.__version__, torch)"
	python -c "import torch; print('CUDA available:', torch.cuda.is_available())"
	- name: Install transformers
	run: \|
	git clone --depth=1 https://github.com/huggingface/transformers
	cd transformers
	git rev-parse --short HEAD
	python -m pip install .
	- name: Install deepspeed
	run: \|
	git clone --depth=1 https://github.com/microsoft/DeepSpeed
	cd DeepSpeed
	python -m pip install .
	ds_report
	- name: Install MII
	run: \|
	pip install .[dev]
	- name: Python environment
	run: \|
	python -m pip list
	- name: Unit tests
	run: \|
	unset TORCH_CUDA_ARCH_LIST # only jit compile for current arch
	cd tests
	python -m pytest --color=yes --durations=0 --verbose -rF ./

Provide feedback