Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
11 changes: 11 additions & 0 deletions src/lightning/pytorch/utilities/data.py
Original file line number Diff line number Diff line change
Expand Up @@ -332,6 +332,17 @@ def _dataloader_init_kwargs_resolve_sampler(
"batch_size": 1,
"drop_last": False,
}
if batch_sampler is not None and batch_sampler_cls is BatchSampler:
# This is a PyTorch `BatchSampler` but maybe created by user, so batch_size and drop_last should be preserved
batch_size = batch_sampler.batch_size
drop_last = batch_sampler.drop_last if not is_predicting else False
return {
"sampler": sampler,
"shuffle": False,
"batch_sampler": None,
"batch_size": batch_size,
"drop_last": drop_last,
}

return {"sampler": sampler, "shuffle": False, "batch_sampler": None}

Expand Down
36 changes: 35 additions & 1 deletion tests/tests_pytorch/trainer/test_dataloaders.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@
import pytest
import torch
from lightning_utilities.test.warning import no_warning_call
from torch.utils.data import RandomSampler
from torch.utils.data import BatchSampler, RandomSampler
from torch.utils.data.dataloader import DataLoader
from torch.utils.data.dataset import Dataset, IterableDataset
from torch.utils.data.distributed import DistributedSampler
Expand Down Expand Up @@ -814,6 +814,40 @@ def test_dataloader_distributed_sampler(tmp_path):
trainer.test(model)


class DistribBatchSamplerCallback(Callback):
def __init__(self, expected_batch_size, expected_drop_last):
self.expected_batch_size = expected_batch_size
self.expected_drop_last = expected_drop_last

def on_train_start(self, trainer, pl_module):
assert isinstance(trainer.train_dataloader.sampler, DistributedSampler)
assert trainer.train_dataloader.batch_size == self.expected_batch_size
assert trainer.train_dataloader.drop_last == self.expected_drop_last


@pytest.mark.parametrize("batch_size", [1, 5])
@pytest.mark.parametrize("drop_last", [False, True])
@RunIf(min_cuda_gpus=2, skip_windows=True, standalone=True)
def test_dataloader_distributed_batch_sampler(tmp_path, batch_size, drop_last):
"""Test BatchSampler and it's arguments for DDP backend."""
seed_everything(123)
dataset = RandomDataset(32, 64)
sampler = RandomSampler(dataset)
batch_sampler = BatchSampler(sampler, batch_size=batch_size, drop_last=drop_last)
dataloader = DataLoader(dataset, batch_sampler=batch_sampler)
model = BoringModel()
trainer = Trainer(
accelerator="gpu",
devices=[0, 1],
num_nodes=1,
strategy="ddp",
default_root_dir=tmp_path,
max_steps=1,
callbacks=[DistribBatchSamplerCallback(expected_batch_size=batch_size, expected_drop_last=drop_last)],
)
trainer.fit(model, train_dataloaders=dataloader)


class TestModelUniqueDDPSampling(BoringModel):
def __init__(self):
super().__init__()
Expand Down
Loading