From d618baca0d0dc211d2d3edd81f8ed23e313b45f4 Mon Sep 17 00:00:00 2001 From: nikitaved Date: Wed, 3 Apr 2024 07:42:29 -0400 Subject: [PATCH] test_populate_grads for nanogpt - use smaller model for avoiding OOMs in CIs --- thunder/tests/test_grad.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/thunder/tests/test_grad.py b/thunder/tests/test_grad.py index fb81364740..4c22c45149 100644 --- a/thunder/tests/test_grad.py +++ b/thunder/tests/test_grad.py @@ -1395,7 +1395,7 @@ def test_populate_grads_nanogpt(executor, device, dtype): from thunder.benchmarks import NanoGPTBenchmark, NanoGPTConfig # NOTE Currently setting dropout to zero for reproducibility, other settings taken from gpt2 config - config = NanoGPTConfig(dropout=0, n_layer=12, n_head=12, n_embd=768) + config = NanoGPTConfig(dropout=0, n_layer=12, n_head=12, n_embd=384) bench = NanoGPTBenchmark(config=config, requires_grad=True, device=device, dtype=dtype) model = bench.fn()