Fix is_sharded setting for loading quant model (#2094)

Signed-off-by: Kaihui-intel <kaihui.tang@intel.com>
intel · Dec 24, 2024 · 9da8f80 · 9da8f80
1 parent 2186b0a
commit 9da8f80
Showing 1 changed file with 3 additions and 2 deletions.
diff --git a/neural_compressor/torch/algorithms/weight_only/save_load.py b/neural_compressor/torch/algorithms/weight_only/save_load.py
@@ -576,7 +576,7 @@ def _get_loaded_state_dict(self, config):
             "_raise_exceptions_for_missing_entries": False,
             "_commit_hash": commit_hash,
         }
-        resolved_archive_file = self._get_resolved_archive_file(**kwargs)
+        resolved_archive_file, is_sharded = self._get_resolved_archive_file(**kwargs)
 
         self._model_local_dir = os.path.abspath(os.path.expanduser(os.path.dirname(resolved_archive_file)))
         # if hpu format tensor can be used directly, then update resolved_archive_file to the hpu format tensor file
@@ -640,6 +640,7 @@ def _get_resolved_archive_file(self, **kwargs):
         subfolder = kwargs.get("subfolder")
 
         resolved_archive_file = None
+        is_sharded = False
         is_local = os.path.isdir(self.model_name_or_path)
         if is_local:  # pragma: no cover
             # self.model_name_or_path is a local directory
@@ -787,7 +788,7 @@ def _get_resolved_archive_file(self, **kwargs):
         if is_local:
             resolved_archive_file = archive_file
 
-        return resolved_archive_file
+        return resolved_archive_file, is_sharded
 
     def _init_hf_model(self, model_class, config):
         from accelerate.big_modeling import init_empty_weights