Merge branch 'prompt-mask' of github.com:neuralmagic/sparseml into prompt-mask

horheynm · horheynm · commit 6614ef1b04e3 · 2024-03-27T17:56:38.000Z
diff --git a/.github/workflows/build-wheel.yml b/.github/workflows/build-wheel.yml
@@ -0,0 +1,58 @@
+name: Build PyPi Wheel
+on:
+  pull_request:
+    types: [opened, synchronize, reopened]
+    branches:
+      - main
+      - 'release/[0-9]+.[0-9]+'
+  push:
+    branches:
+      - main
+  release:
+    types: [created, published]
+  schedule:
+    - cron: '0 0 * * *'
+
+permissions:
+  id-token: write
+  contents: read
+
+concurrency:
+  group: ${{ github.workflow }}-${{ github.head_ref || github.run_id }}
+  cancel-in-progress: true
+
+# if not dev or release, will create a nightly build
+# everything is pushed to internal unless created through a nightly scheduled cron job which creates the build or 
+# missing release tag workflow/needs to be added in
+env:
+  INTERNAL: ${{ github.event_name != 'schedule' && github.event_name != 'release'}}
+  RELEASE: ${{ github.event_name =='release' || (startsWith(github.base_ref, 'release/') && github.event_name == 'pull_request')}}
+  DEV: ${{ github.base_ref == 'main' && github.event_name == 'pull_request'}}
+  NAME: ${{ github.event.number }} 
+
+jobs:
+  build_and_push:
+    runs-on: ubuntu-latest
+    outputs:
+      wheel: ${{ steps.push-wheel.outputs.wheel }}
+    steps:
+      - name: Checkout code
+        uses: actions/checkout@v3
+      - name: Login to s3
+        uses: aws-actions/configure-aws-credentials@v2
+        with:
+          role-to-assume: ${{ secrets.AWS_WEBIDENTITY_FOR_GITHUB_ACTIONS }}
+          aws-region: us-east-1 
+      - name: Build PyPi Wheel
+        id: build-wheel
+        uses: neuralmagic/nm-actions/actions/pypi_build@main
+        with:
+          dev: $DEV
+          release: $RELEASE
+          name: $NAME
+      - name: Push to s3 bucket
+        id: push-wheel
+        uses: neuralmagic/nm-actions/actions/s3_push@main
+        with:
+          filename: dist/*.whl
+          internal: $INTERNAL
diff --git a/_scratch/mask.py b/_scratch/mask.py
diff --git a/setup.py b/setup.py
@@ -20,6 +20,7 @@
 
 # default variables to be overwritten by the version.py file
 is_release = None
+is_dev = None
 version = "unknown"
 version_major_minor = version
 
@@ -28,7 +29,12 @@
 print(f"loaded version {version} from src/sparseml/version.py")
 version_nm_deps = f"{version_major_minor}.0"
 
-_PACKAGE_NAME = "sparseml" if is_release else "sparseml-nightly"
+if is_release:
+    _PACKAGE_NAME = "sparseml"
+elif is_dev:
+    _PACKAGE_NAME = "sparseml-dev"
+else:
+    _PACKAGE_NAME = "sparseml-nightly"
 
 _deps = [
     "setuptools<=59.5.0",
diff --git a/src/sparseml/transformers/finetune/runner.py b/src/sparseml/transformers/finetune/runner.py
@@ -40,7 +40,11 @@
 )
 from sparseml.transformers.finetune.model_args import ModelArguments
 from sparseml.transformers.finetune.training_args import TrainingArguments
-from sparseml.utils.fsdp.helpers import is_fsdp_model, unwrap_and_export_model
+from sparseml.utils.fsdp.helpers import (
+    find_and_move_state_dicts_to_cpu,
+    is_fsdp_model,
+    unwrap_and_export_model,
+)
 
 
 _LOGGER: logging.Logger = logging.getLogger(__name__)
@@ -175,6 +179,15 @@ def one_shot(self, stage: Optional[str] = None):
                     output_dir=self._output_dir,
                     tokenizer=self.tokenizer,
                 )
+                # only allow the main process move the state
+                # dicts to cpu
+                if self.trainer.accelerator.is_main_process:
+                    # assuming quantization is the last step
+                    # we no longer need the original model
+                    # and can safely delete it to save memory
+                    del self.trainer.model
+                    find_and_move_state_dicts_to_cpu(self._output_dir)
+
         else:
             save_model_and_recipe(
                 model=self.trainer.model,
diff --git a/src/sparseml/utils/fsdp/helpers.py b/src/sparseml/utils/fsdp/helpers.py
@@ -12,7 +12,9 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
+import logging
 import operator
+from pathlib import Path
 from typing import Optional, Union
 
 
@@ -25,6 +27,7 @@
 except ImportError:
     FullyShardedDataParallel = None
 
+import torch
 from torch.nn import Module
 
 from sparseml.core.model import ModifiableModel
@@ -39,8 +42,11 @@
     "unwrap_and_export_model",
     "save_pretrained_fsdp",
     "get_fsdp_parent",
+    "find_and_move_state_dicts_to_cpu",
 ]
 
+_LOGGER = logging.getLogger(__name__)
+
 
 def is_fsdp_model(model: Module) -> bool:
     """
@@ -113,6 +119,27 @@ def unwrap_and_export_model(model, accelerator, output_dir, tokenizer):
         )
 
 
+def find_and_move_state_dicts_to_cpu(output_dir: str):
+    """
+    Looks for state dicts in the output directory and overwrites them
+    with cpu state dicts.
+
+    this is needed for quantized models trained with FSDP as the state dict
+    contains device information, which can cause issues when loading the model
+    using transformers AutoModel.from_pretrained(...) if the device information
+    is not removed, assumes the state dicts are named pytorch_model*.bin
+    """
+
+    for model_file in Path(output_dir).rglob("pytorch_model*.bin"):
+        loaded_dict = torch.load(model_file)
+        for key, value in loaded_dict.items():
+            if isinstance(value, torch.Tensor):
+                loaded_dict[key] = value.cpu()
+
+        torch.save(loaded_dict, model_file)
+        _LOGGER.info(f"Moved state dict {model_file} to cpu")
+
+
 def save_pretrained_fsdp(model, accelerator, output_dir, save_safetensors: bool = True):
     full_state_dict_config = FullStateDictConfig(offload_to_cpu=True, rank0_only=True)
     """
diff --git a/src/sparseml/version.py b/src/sparseml/version.py
@@ -21,14 +21,17 @@
 
 version_base = "1.7.0"
 is_release = False  # change to True to set the generated version as a release version
+is_dev = False
+dev_number = None
 
 
 def _generate_version():
-    return (
-        version_base
-        if is_release
-        else f"{version_base}.{date.today().strftime('%Y%m%d')}"
-    )
+    if is_release:
+        return version_base
+    elif is_dev:
+        return f"{version_base}.dev{dev_number}"
+    else:
+        return f"{version_base}.{date.today().strftime('%Y%m%d')}"
 
 
 __all__ = [