diff --git a/src/diffusers/pipelines/pipeline_utils.py b/src/diffusers/pipelines/pipeline_utils.py index 66cb4b3ce0cf..55d6505655ac 100644 --- a/src/diffusers/pipelines/pipeline_utils.py +++ b/src/diffusers/pipelines/pipeline_utils.py @@ -112,7 +112,7 @@ for library in LOADABLE_CLASSES: LIBRARIES.append(library) -SUPPORTED_DEVICE_MAP = ["balanced"] + [get_device()] +SUPPORTED_DEVICE_MAP = ["balanced"] + [get_device(), "cpu"] logger = logging.get_logger(__name__) @@ -468,8 +468,7 @@ def module_is_offloaded(module): pipeline_is_sequentially_offloaded = any( module_is_sequentially_offloaded(module) for _, module in self.components.items() ) - - is_pipeline_device_mapped = self.hf_device_map is not None and len(self.hf_device_map) > 1 + is_pipeline_device_mapped = self._is_pipeline_device_mapped() if is_pipeline_device_mapped: raise ValueError( "It seems like you have activated a device mapping strategy on the pipeline which doesn't allow explicit device placement using `to()`. You can call `reset_device_map()` to remove the existing device map from the pipeline." @@ -1188,7 +1187,7 @@ def enable_model_cpu_offload(self, gpu_id: int | None = None, device: torch.devi """ self._maybe_raise_error_if_group_offload_active(raise_error=True) - is_pipeline_device_mapped = self.hf_device_map is not None and len(self.hf_device_map) > 1 + is_pipeline_device_mapped = self._is_pipeline_device_mapped() if is_pipeline_device_mapped: raise ValueError( "It seems like you have activated a device mapping strategy on the pipeline so calling `enable_model_cpu_offload() isn't allowed. You can call `reset_device_map()` first and then call `enable_model_cpu_offload()`." @@ -1312,7 +1311,7 @@ def enable_sequential_cpu_offload(self, gpu_id: int | None = None, device: torch raise ImportError("`enable_sequential_cpu_offload` requires `accelerate v0.14.0` or higher") self.remove_all_hooks() - is_pipeline_device_mapped = self.hf_device_map is not None and len(self.hf_device_map) > 1 + is_pipeline_device_mapped = self._is_pipeline_device_mapped() if is_pipeline_device_mapped: raise ValueError( "It seems like you have activated a device mapping strategy on the pipeline so calling `enable_sequential_cpu_offload() isn't allowed. You can call `reset_device_map()` first and then call `enable_sequential_cpu_offload()`." @@ -2228,6 +2227,21 @@ def _maybe_raise_error_if_group_offload_active( return True return False + def _is_pipeline_device_mapped(self): + # We support passing `device_map="cuda"`, for example. This is helpful, in case + # users want to pass `device_map="cpu"` when initializing a pipeline. This explicit declaration is desirable + # in limited VRAM environments because quantized models often initialize directly on the accelerator. + device_map = self.hf_device_map + is_device_type_map = False + if isinstance(device_map, str): + try: + torch.device(device_map) + is_device_type_map = True + except RuntimeError: + pass + + return not is_device_type_map and isinstance(device_map, dict) and len(device_map) > 1 + class StableDiffusionMixin: r""" diff --git a/tests/models/testing_utils/quantization.py b/tests/models/testing_utils/quantization.py index 785c4a16ea4b..0f1fbde72485 100644 --- a/tests/models/testing_utils/quantization.py +++ b/tests/models/testing_utils/quantization.py @@ -628,6 +628,21 @@ def test_bnb_training(self): """Test that quantized models can be used for training with adapters.""" self._test_quantization_training(BitsAndBytesConfigMixin.BNB_CONFIGS["4bit_nf4"]) + @pytest.mark.parametrize( + "config_name", + list(BitsAndBytesConfigMixin.BNB_CONFIGS.keys()), + ids=list(BitsAndBytesConfigMixin.BNB_CONFIGS.keys()), + ) + def test_cpu_device_map(self, config_name): + config_kwargs = BitsAndBytesConfigMixin.BNB_CONFIGS[config_name] + model_quantized = self._create_quantized_model(config_kwargs, device_map="cpu") + + assert hasattr(model_quantized, "hf_device_map"), "Model should have hf_device_map attribute" + assert model_quantized.hf_device_map is not None, "hf_device_map should not be None" + assert model_quantized.device == torch.device("cpu"), ( + f"Model should be on CPU, but is on {model_quantized.device}" + ) + @is_quantization @is_quanto diff --git a/tests/pipelines/allegro/test_allegro.py b/tests/pipelines/allegro/test_allegro.py index b2e588de0647..c126a94ce10e 100644 --- a/tests/pipelines/allegro/test_allegro.py +++ b/tests/pipelines/allegro/test_allegro.py @@ -158,6 +158,10 @@ def test_save_load_local(self): def test_save_load_optional_components(self): pass + @unittest.skip("Decoding without tiling is not yet implemented") + def test_pipeline_with_accelerator_device_map(self): + pass + def test_inference(self): device = "cpu" diff --git a/tests/pipelines/kandinsky/test_kandinsky_combined.py b/tests/pipelines/kandinsky/test_kandinsky_combined.py index eba897659700..fdb36b433a94 100644 --- a/tests/pipelines/kandinsky/test_kandinsky_combined.py +++ b/tests/pipelines/kandinsky/test_kandinsky_combined.py @@ -34,9 +34,7 @@ class KandinskyPipelineCombinedFastTests(PipelineTesterMixin, unittest.TestCase): pipeline_class = KandinskyCombinedPipeline - params = [ - "prompt", - ] + params = ["prompt"] batch_params = ["prompt", "negative_prompt"] required_optional_params = [ "generator", @@ -148,6 +146,10 @@ def test_float16_inference(self): def test_dict_tuple_outputs_equivalent(self): super().test_dict_tuple_outputs_equivalent(expected_max_difference=5e-4) + @unittest.skip("Test not supported.") + def test_pipeline_with_accelerator_device_map(self): + pass + class KandinskyPipelineImg2ImgCombinedFastTests(PipelineTesterMixin, unittest.TestCase): pipeline_class = KandinskyImg2ImgCombinedPipeline @@ -264,6 +266,10 @@ def test_dict_tuple_outputs_equivalent(self): def test_save_load_optional_components(self): super().test_save_load_optional_components(expected_max_difference=5e-4) + @unittest.skip("Test not supported.") + def test_pipeline_with_accelerator_device_map(self): + pass + class KandinskyPipelineInpaintCombinedFastTests(PipelineTesterMixin, unittest.TestCase): pipeline_class = KandinskyInpaintCombinedPipeline @@ -384,3 +390,7 @@ def test_save_load_optional_components(self): def test_save_load_local(self): super().test_save_load_local(expected_max_difference=5e-3) + + @unittest.skip("Test not supported.") + def test_pipeline_with_accelerator_device_map(self): + pass diff --git a/tests/pipelines/kandinsky2_2/test_kandinsky_combined.py b/tests/pipelines/kandinsky2_2/test_kandinsky_combined.py index 62f5853da9a5..30aa2896089b 100644 --- a/tests/pipelines/kandinsky2_2/test_kandinsky_combined.py +++ b/tests/pipelines/kandinsky2_2/test_kandinsky_combined.py @@ -36,9 +36,7 @@ class KandinskyV22PipelineCombinedFastTests(PipelineTesterMixin, unittest.TestCase): pipeline_class = KandinskyV22CombinedPipeline - params = [ - "prompt", - ] + params = ["prompt"] batch_params = ["prompt", "negative_prompt"] required_optional_params = [ "generator", @@ -70,12 +68,7 @@ def get_dummy_components(self): def get_dummy_inputs(self, device, seed=0): prior_dummy = PriorDummies() inputs = prior_dummy.get_dummy_inputs(device=device, seed=seed) - inputs.update( - { - "height": 64, - "width": 64, - } - ) + inputs.update({"height": 64, "width": 64}) return inputs def test_kandinsky(self): @@ -155,12 +148,18 @@ def test_save_load_local(self): def test_save_load_optional_components(self): super().test_save_load_optional_components(expected_max_difference=5e-3) + @unittest.skip("Test not supported.") def test_callback_inputs(self): pass + @unittest.skip("Test not supported.") def test_callback_cfg(self): pass + @unittest.skip("Test not supported.") + def test_pipeline_with_accelerator_device_map(self): + pass + class KandinskyV22PipelineImg2ImgCombinedFastTests(PipelineTesterMixin, unittest.TestCase): pipeline_class = KandinskyV22Img2ImgCombinedPipeline @@ -279,12 +278,18 @@ def test_save_load_optional_components(self): def save_load_local(self): super().test_save_load_local(expected_max_difference=5e-3) + @unittest.skip("Test not supported.") def test_callback_inputs(self): pass + @unittest.skip("Test not supported.") def test_callback_cfg(self): pass + @unittest.skip("Test not supported.") + def test_pipeline_with_accelerator_device_map(self): + pass + class KandinskyV22PipelineInpaintCombinedFastTests(PipelineTesterMixin, unittest.TestCase): pipeline_class = KandinskyV22InpaintCombinedPipeline @@ -411,3 +416,7 @@ def test_callback_inputs(self): def test_callback_cfg(self): pass + + @unittest.skip("`device_map` is not yet supported for connected pipelines.") + def test_pipeline_with_accelerator_device_map(self): + pass diff --git a/tests/pipelines/kandinsky2_2/test_kandinsky_inpaint.py b/tests/pipelines/kandinsky2_2/test_kandinsky_inpaint.py index df1dd2d9872c..5659699fc7aa 100644 --- a/tests/pipelines/kandinsky2_2/test_kandinsky_inpaint.py +++ b/tests/pipelines/kandinsky2_2/test_kandinsky_inpaint.py @@ -296,6 +296,9 @@ def callback_inputs_test(pipe, i, t, callback_kwargs): output = pipe(**inputs)[0] assert output.abs().sum() == 0 + def test_pipeline_with_accelerator_device_map(self): + super().test_pipeline_with_accelerator_device_map(expected_max_difference=5e-3) + @slow @require_torch_accelerator diff --git a/tests/pipelines/kandinsky3/test_kandinsky3_img2img.py b/tests/pipelines/kandinsky3/test_kandinsky3_img2img.py index d3bfa4b3082c..23368a18dc0d 100644 --- a/tests/pipelines/kandinsky3/test_kandinsky3_img2img.py +++ b/tests/pipelines/kandinsky3/test_kandinsky3_img2img.py @@ -194,6 +194,9 @@ def test_inference_batch_single_identical(self): def test_save_load_dduf(self): super().test_save_load_dduf(atol=1e-3, rtol=1e-3) + def test_pipeline_with_accelerator_device_map(self): + super().test_pipeline_with_accelerator_device_map(expected_max_difference=5e-3) + @slow @require_torch_accelerator diff --git a/tests/pipelines/test_pipelines_common.py b/tests/pipelines/test_pipelines_common.py index 518df8f85398..9681285e94b4 100644 --- a/tests/pipelines/test_pipelines_common.py +++ b/tests/pipelines/test_pipelines_common.py @@ -2355,7 +2355,6 @@ def test_torch_dtype_dict(self): f"Component '{name}' has dtype {component.dtype} but expected {expected_dtype}", ) - @require_torch_accelerator def test_pipeline_with_accelerator_device_map(self, expected_max_difference=1e-4): components = self.get_dummy_components() pipe = self.pipeline_class(**components) diff --git a/tests/pipelines/visualcloze/test_pipeline_visualcloze_combined.py b/tests/pipelines/visualcloze/test_pipeline_visualcloze_combined.py index 00ae0441fe99..578cd1b8ec61 100644 --- a/tests/pipelines/visualcloze/test_pipeline_visualcloze_combined.py +++ b/tests/pipelines/visualcloze/test_pipeline_visualcloze_combined.py @@ -342,3 +342,7 @@ def test_save_load_float16(self, expected_max_diff=1e-2): self.assertLess( max_diff, expected_max_diff, "The output of the fp16 pipeline changed after saving and loading." ) + + @unittest.skip("Test not supported.") + def test_pipeline_with_accelerator_device_map(self): + pass diff --git a/tests/pipelines/visualcloze/test_pipeline_visualcloze_generation.py b/tests/pipelines/visualcloze/test_pipeline_visualcloze_generation.py index ab6b3ca5c587..c1b6ca076c4a 100644 --- a/tests/pipelines/visualcloze/test_pipeline_visualcloze_generation.py +++ b/tests/pipelines/visualcloze/test_pipeline_visualcloze_generation.py @@ -310,3 +310,7 @@ def test_save_load_float16(self, expected_max_diff=1e-2): @unittest.skip("Skipped due to missing layout_prompt. Needs further investigation.") def test_encode_prompt_works_in_isolation(self, extra_required_param_value_dict=None, atol=0.0001, rtol=0.0001): pass + + @unittest.skip("Needs to be revisited later.") + def test_pipeline_with_accelerator_device_map(self, expected_max_difference=0.0001): + pass