neuralmagic
diff --git a/‎.github/workflows/build-container.yml
Lines changed: 80 additions & 0 deletions b/‎.github/workflows/build-container.yml
Lines changed: 80 additions & 0 deletions
diff --git a/‎docker/containers/docker_dev/Dockerfile
Lines changed: 25 additions & 0 deletions b/‎docker/containers/docker_dev/Dockerfile
Lines changed: 25 additions & 0 deletions
diff --git a/‎docker/containers/docker_nightly/Dockerfile
Lines changed: 21 additions & 0 deletions b/‎docker/containers/docker_nightly/Dockerfile
Lines changed: 21 additions & 0 deletions
diff --git a/‎docker/containers/docker_release/Dockerfile
Lines changed: 24 additions & 0 deletions b/‎docker/containers/docker_release/Dockerfile
Lines changed: 24 additions & 0 deletions
diff --git a/‎integrations/huggingface-transformers/finetuning/example_fsdp_config.yaml
Lines changed: 1 addition & 1 deletion b/‎integrations/huggingface-transformers/finetuning/example_fsdp_config.yaml
Lines changed: 1 addition & 1 deletion
diff --git a/‎integrations/huggingface-transformers/tutorials/sparse-transfer-learning-bert-python.md
Lines changed: 1 addition & 1 deletion b/‎integrations/huggingface-transformers/tutorials/sparse-transfer-learning-bert-python.md
Lines changed: 1 addition & 1 deletion
diff --git a/‎pyproject.toml
Lines changed: 7 additions & 1 deletion b/‎pyproject.toml
Lines changed: 7 additions & 1 deletion
diff --git a/‎setup.py
Lines changed: 4 additions & 4 deletions b/‎setup.py
Lines changed: 4 additions & 4 deletions
diff --git a/‎src/sparseml/evaluation/integrations/perplexity.py
Lines changed: 1 addition & 2 deletions b/‎src/sparseml/evaluation/integrations/perplexity.py
Lines changed: 1 addition & 2 deletions
diff --git a/‎src/sparseml/export/validators.py
Lines changed: 26 additions & 2 deletions b/‎src/sparseml/export/validators.py
Lines changed: 26 additions & 2 deletions
diff --git a/‎src/sparseml/modifiers/distillation/utils/pytorch/model_wrapper.py
Lines changed: 7 additions & 1 deletion b/‎src/sparseml/modifiers/distillation/utils/pytorch/model_wrapper.py
Lines changed: 7 additions & 1 deletion
diff --git a/‎src/sparseml/pytorch/model_load/helpers.py
Lines changed: 28 additions & 4 deletions b/‎src/sparseml/pytorch/model_load/helpers.py
Lines changed: 28 additions & 4 deletions
@@ -0,0 +1,80 @@
+name: Build Docker Container
+on:
+  pull_request:
+    types: [opened, synchronize, reopened]
+    branches:
+      - main
+      - 'release/[0-9]+.[0-9]+'
+  push:
+    branches:
+      - 'main'
+  release:
+    types: [created, published]
+  schedule:
+    - cron: '0 2 * * *'
+
+# TODO: docker containers created through a release cut vs PR to the release branch
+# will be pushed to different locations (i.e one will be sparseml the other will be test-sparseml).
+# These containers rely on the new internal pypi server being enabled. Once enabled,
+# this workflow can be expanded to make this distinction.
+env:
+  RELEASE: ${{ github.event_name =='release' || (startsWith(github.base_ref, 'release/') && github.event_name == 'pull_request')}}
+  DEV: ${{ github.base_ref == 'main' && github.event_name == 'pull_request'}}
+  NAME: ${{ github.event.number }} 
+
+permissions:
+  contents: read
+  packages: write
+
+jobs:
+  build-container:
+    name: Build sparseml container
+    runs-on: ubuntu-20.04
+    steps:
+      - name: Checkout code
+        uses: actions/checkout@v3
+        with:
+          fetch-depth: 1
+      - name: Set up Docker Buildx
+        id: buildx
+        uses: docker/setup-buildx-action@v2
+        with:
+          buildkitd-flags: --debug
+      - name: Get current date
+        id: date
+        run: echo "::set-output name=date::$(date +'%Y%m%d')"
+      - name: Get the current version
+        if: ${{ env.RELEASE == 'true' }}
+        id: version
+        run: echo "::set-output name=version::$(echo ${{ github.base_ref }} | cut -c 9-15)"
+      - name: Login to Github Packages
+        uses: docker/login-action@v2
+        with:
+          registry: ghcr.io
+          username: ${{ github.actor }}
+          password: ${{ secrets.GITHUB_TOKEN }}
+      - name: Build Dev Docker Container
+        if: ${{ env.DEV == 'true' }}
+        uses: docker/build-push-action@v4
+        with: 
+          context: ./docker/containers/docker_dev
+          build-args: |
+            BRANCH=${{github.head_ref}}
+          push: true
+          tags: ghcr.io/neuralmagic/sparseml-dev:${{ env.NAME }}
+      - name: Build Release Docker Container
+        if: ${{ env.RELEASE == 'true' }}
+        uses: docker/build-push-action@v4
+        with: 
+          context: ./docker/containers/docker_release
+          build-args: |
+            VERSION=${{ steps.version.outputs.version }}
+          push: true
+          tags: ghcr.io/neuralmagic/test-sparseml:latest, ghcr.io/neuralmagic/test-sparseml:${{ steps.version.outputs.version }}
+      - name: Build Nightly Docker Container
+        if: ${{ env.DEV == 'false' && env.RELEASE == 'false'}}
+        uses: docker/build-push-action@v4
+        with:
+          context: ./docker/containers/docker_nightly
+          push: true
+          tags: ghcr.io/neuralmagic/test-sparseml-nightly:latest, ghcr.io/neuralmagic/test-sparseml-nightly:${{ steps.date.outputs.date }}
@@ -0,0 +1,25 @@
+ARG SOURCE=ghcr.io/neuralmagic/cuda-python3.10
+
+ARG TORCH_VERSION=2.1.2
+ARG TORCHVISION_VERSION=0.16.2
+ARG CUDA=121
+ARG BRANCH
+
+FROM $SOURCE
+
+ARG BRANCH
+
+RUN python3.10 -m pip install --upgrade pip \
+    && python3.10 -m pip install --upgrade setuptools
+
+ARG CUDA
+ARG TORCH_VERSION
+ARG TORCHVISION_VERSION
+
+RUN python3.10 -m pip install torch==${TORCH_VERSION}+cu${CUDA} torchvision==${TORCHVISION_VERSION}+cu${CUDA} -f https://download.pytorch.org/whl/torch_stable.html \
+    && git clone https://github.com/neuralmagic/sparseml.git --depth 1 --single-branch -b ${BRANCH} \
+    && python3.10 -m pip install -e "./sparseml[dev]"
+
+HEALTHCHECK CMD python3.10 -c 'import sparseml'
+RUN python3.10 -m pip list | grep sparseml
+CMD bash
@@ -0,0 +1,21 @@
+ARG SOURCE=ghcr.io/neuralmagic/cuda-python3.10
+
+ARG TORCH_VERSION=2.1.2
+ARG TORCHVISION_VERSION=0.16.2
+ARG CUDA=121
+
+FROM $SOURCE
+
+RUN python3.10 -m pip install --upgrade pip \
+    && python3.10 -m pip install --upgrade setuptools
+
+ARG CUDA 
+ARG TORCH_VERSION
+ARG TORCHVISION_VERSION
+
+RUN python3.10 -m pip install torch==${TORCH_VERSION}+cu${CUDA} torchvision==${TORCHVISION_VERSION}+cu${CUDA} -f https://download.pytorch.org/whl/torch_stable.html \
+    && python3.10 -m pip install --no-cache-dir "sparseml-nightly[onnxruntime,torchvision,transformers,yolov5,ultralytics]" 
+
+HEALTHCHECK CMD python3.10 -c 'import sparseml'
+RUN python3.10 -m pip list | grep sparseml
+CMD bash
@@ -0,0 +1,24 @@
+ARG SOURCE=ghcr.io/neuralmagic/cuda-python3.10
+
+ARG TORCH_VERSION=2.1.2
+ARG TORCHVISION_VERSION=0.16.2
+ARG CUDA=121
+ARG VERSION
+
+FROM $SOURCE
+
+ARG VERSION
+
+ARG CUDA
+ARG TORCH_VERSION
+ARG TORCHVISION_VERSION
+
+RUN python3.10 -m pip install --upgrade pip \
+    && python3.10 -m pip install --upgrade setuptools
+
+RUN python3.10 -m pip install torch==${TORCH_VERSION}+cu${CUDA} torchvision==${TORCHVISION_VERSION}+cu${CUDA} -f https://download.pytorch.org/whl/torch_stable.html \
+    && python3.10 -m pip install --no-cache-dir "sparseml[onnxruntime,torchvision,transformers,yolov5,ultralytics]==$VERSION" 
+
+HEALTHCHECK CMD python3.10 -c 'import sparseml'
+RUN python3.10 -m pip list | grep sparseml
+CMD bash
@@ -15,7 +15,7 @@ fsdp_config:
 machine_rank: 0
 main_training_function: main
 num_machines: 1
-num_processes: 2
+num_processes: 4
 rdzv_backend: static
 same_network: true
 tpu_env: []
 
@@ -77,7 +77,7 @@ With the models downloaded, we will set up the Hugging Face `tokenizer`, `config
 We instantiate these classes by passing the local path to the directory containing the `pytorch_model.bin`, `tokenizer.json`, and `config.json` files from the SparseZoo download.
 
 ```python
-from sparseml.transformers.utils import SparseAutoModel
+from sparseml.transformers import SparseAutoModel
 from transformers import AutoModelForSequenceClassification, AutoConfig, AutoTokenizer
 
 NUM_LABELS = 2
 
@@ -3,4 +3,10 @@ line-length = 88
 target-version = ['py36']
 
 [tool.pytest.ini_options]
-tmp_path_retention_policy = "none"
+tmp_path_retention_policy = "none"
+markers = [
+    "integration: integration tests",
+    "unit: unit tests",
+    "custom: custom integration tests",
+    "smoke: smoke tests"
+]
@@ -39,7 +39,7 @@
 _deps = [
     "setuptools<=59.5.0",
     "pyyaml>=5.0.0",
-    "numpy>=1.0.0",
+    "numpy>=1.17.0",
     "matplotlib>=3.0.0",
     "merge-args>=0.1.0",
     "onnx>=1.5.0,<1.15.0",
@@ -79,8 +79,7 @@
     "opencv-python<=4.6.0.66",
 ]
 _transformers_deps = _pytorch_deps + [
-    f"{'nm-transformers' if is_release else 'nm-transformers-nightly'}"
-    f"~={version_nm_deps}",
+    "transformers<4.35.0",
     "datasets<=2.14.6",
     "dvc",
     "scikit-learn",
@@ -92,7 +91,7 @@
 ]
 _llm_deps = _transformers_deps + ["sentencepiece"]
 _yolov5_deps = _pytorch_vision_deps + [
-    f"{'nm-yolov5' if is_release else 'nm-yolov5-nightly'}~={version_nm_deps}"
+    f"{'nm-yolov5' if is_release else 'nm-yolov5-nightly'}<={version_nm_deps}"
 ]
 _notebook_deps = [
     "jupyter>=1.0.0",
@@ -120,6 +119,7 @@
     "tensorboard>=1.0,<2.9",
     "tensorboardX>=1.0",
     "evaluate>=0.4.1",
+    "parameterized",
 ]
 
 _docs_deps = [
 
@@ -14,8 +14,7 @@
 
 from typing import List, Optional, Union
 
-from sparseml.transformers.utils.sparse_model import SparseAutoModelForCausalLM
-from sparseml.transformers.utils.sparse_tokenizer import SparseAutoTokenizer
+from sparseml.transformers import SparseAutoModelForCausalLM, SparseAutoTokenizer
 
 
 try:
 
@@ -18,8 +18,10 @@
 from collections import OrderedDict
 from pathlib import Path
 from typing import Callable, List, Optional, Union
+from typing import OrderedDict as OrderedDictType
 
 import numpy
+import onnx
 
 from sparseml.export.export_data import InputsNames, LabelNames, OutputsNames
 from sparseml.export.helpers import ONNX_MODEL_NAME, onnx_data_files
@@ -164,8 +166,11 @@ def validate_correctness(
 
     sample_inputs_files = sorted(glob.glob(os.path.join(sample_inputs_path, "*")))
     sample_outputs_files = sorted(glob.glob(os.path.join(sample_outputs_path, "*")))
-
-    session = ort.InferenceSession(os.path.join(directory, onnx_model_name))
+    model_path = os.path.join(directory, onnx_model_name)
+    expected_input_names = [
+        inp.name for inp in onnx.load(model_path, load_external_data=False).graph.input
+    ]
+    session = ort.InferenceSession(model_path)
 
     validations = (
         []
@@ -180,6 +185,11 @@ def validate_correctness(
         sample_input_with_batch_dim = OrderedDict(
             (key, numpy.expand_dims(value, 0)) for key, value in sample_input.items()
         )
+
+        sample_input_with_batch_dim = _potentially_rename_input(
+            sample_input_with_batch_dim, expected_input_names
+        )
+
         outputs = session.run(None, sample_input_with_batch_dim)
         if isinstance(outputs, list):
             validations_sample = []
@@ -205,3 +215,17 @@ def validate_correctness(
         f"Successfully validated the exported model on all {len(validations)} samples."
     )
     return True
+
+
+def _potentially_rename_input(
+    sample_input_with_batch_dim: OrderedDictType[str, numpy.ndarray],
+    expected_input_names: List[str],
+) -> OrderedDictType[str, numpy.ndarray]:
+    # if required, rename the input names of the sample to match
+    # the input names of the model
+    input_names = list(sample_input_with_batch_dim.keys())
+    if set(input_names) != set(expected_input_names):
+        return OrderedDict(
+            zip(expected_input_names, sample_input_with_batch_dim.values())
+        )
+    return sample_input_with_batch_dim
@@ -49,7 +49,6 @@ def _clear_missing_keys(module, incompatible_keys):
         self.register_load_state_dict_post_hook(_clear_missing_keys)
 
     def forward(self, *args, **kwargs):
-        self.teacher_model.eval()
         if not self.kd_enabled:
             return self.student_model(*args, **kwargs)
 
@@ -118,6 +117,13 @@ def named_modules(
             memo=memo, prefix=prefix, remove_duplicate=remove_duplicate
         )
 
+    def named_children(self):
+        return self.student_model.named_children()
+
+    def train(self, mode: bool = True):
+        self.student_model.train(mode)
+        return self
+
     def __getattr__(self, name: str) -> Any:
         try:
             return super().__getattr__(name)
 
@@ -22,6 +22,7 @@
 from torch.nn import Module
 
 import sparseml.core.session as session_manager
+from safetensors import safe_open
 from sparseml.core.framework import Framework
 from sparseml.pytorch.sparsification.quantization.helpers import (
     initialize_channel_wise_scale_zp,
@@ -143,7 +144,8 @@ def reload_model_state(
     weight_files = [
         os.path.join(load_path, os.path.basename(f))
         for f in files
-        if f.startswith("pytorch_model") and f.endswith("bin")
+        if (f.startswith("pytorch_model") and f.endswith("bin"))
+        or (f.endswith("safetensors"))
     ]
     if not weight_files:
         _LOGGER.warning(
@@ -168,7 +170,10 @@ def reload_model_state(
     # change in keys due to architecture changes, reload statedict
     loaded_state_dict = {}
     for f in weight_files:
-        dd = torch.load(f, map_location="cpu")
+        if f.endswith("safetensors"):
+            dd = load_safetensors_state_dict(file_path=f)
+        else:
+            dd = torch.load(f, map_location="cpu")
         loaded_state_dict.update(dd)
 
     _, missing, unexpected, mismatched, _, _ = model._load_pretrained_model(
@@ -229,17 +234,25 @@ def reload_model_from_checkpoint(model: Module, checkpoint: Optional[str] = None
 
 
 def save_model_and_recipe(
-    model: Module, save_path: str, tokenizer: Optional[Any] = None
+    model: Module,
+    save_path: str,
+    tokenizer: Optional[Any] = None,
+    save_safetensors: bool = False,
+    save_compressed: bool = False,
 ):
     """
     Save a model, tokenizer and the currently loaded recipe to file
 
     :param model: pytorch model to save
     :param save_path: path to save output to
     :param tokenizer: model tokenizer to save
+    :param save_safetensors: whether to save as safetensors or pickle (bin)
+    :param save_compressed: whether to compress sparse weights on disk
     """
 
-    model.save_pretrained(save_path)
+    model.save_pretrained(
+        save_path, save_compressed=save_compressed, safe_serialization=save_safetensors
+    )
 
     if tokenizer is not None:
         tokenizer.save_pretrained(save_path)
@@ -326,3 +339,14 @@ def save_completed_stages(checkpoint_dir: str, completed_stages: List[str]):
     stage_path = os.path.join(checkpoint_dir, COMPLETED_STAGES_FILENAME)
     with open(stage_path, "w") as out_file:
         json.dump({"completed": completed_stages}, out_file)
+
+
+def load_safetensors_state_dict(file_path: str) -> Dict[str, torch.Tensor]:
+    """
+    Load a safetensors file from disk
+
+    :param file_path: path to the safetensors file
+    :return: dictionary of safetensors data
+    """
+    with safe_open(file_path, framework="pt", device="cpu") as f:
+        return {key: f.get_tensor(key) for key in f.keys()}