add flex_olmo model

iugoood · iugoood · commit 5df431c290ba · 2025-11-26T10:31:34.000+08:00
diff --git a/mindone/transformers/__init__.py b/mindone/transformers/__init__.py
@@ -539,6 +539,7 @@
     FlavaProcessor,
     FlavaTextModel,
 )
+from .models.flex_olmo import FlexOlmoForCausalLM, FlexOlmoModel, FlexOlmoPreTrainedModel
 from .models.fnet import (
     FNetForMaskedLM,
     FNetForMultipleChoice,
diff --git a/mindone/transformers/models/__init__.py b/mindone/transformers/models/__init__.py
@@ -79,6 +79,7 @@
     fastspeech2_conformer,
     flaubert,
     flava,
+    flex_olmo,
     fnet,
     focalnet,
     fsmt,
diff --git a/mindone/transformers/models/auto/configuration_auto.py b/mindone/transformers/models/auto/configuration_auto.py
@@ -100,6 +100,7 @@
         ("falcon_mamba", "FalconMambaConfig"),
         ("fastspeech2_conformer", "FastSpeech2ConformerConfig"),
         ("flava", "FlavaConfig"),
+        ("flex_olmo", "FlexOlmoConfig"),
         ("fnet", "FNetConfig"),
         ("focalnet", "FocalNetConfig"),
         ("fsmt", "FSMTConfig"),
@@ -368,6 +369,7 @@
         ("falcon_mamba", "FalconMamba"),
         ("fastspeech2_conformer", "FastSpeech2Conformer"),
         ("flava", "FLAVA"),
+        ("flex_olmo", "FlexOlmo"),
         ("fnet", "FNet"),
         ("focalnet", "FocalNet"),
         ("fsmt", "FairSeq Machine-Translation"),
diff --git a/mindone/transformers/models/auto/modeling_auto.py b/mindone/transformers/models/auto/modeling_auto.py
@@ -95,6 +95,7 @@
         ("falcon_mamba", "FalconMambaModel"),
         ("fastspeech2_conformer", "FastSpeech2ConformerModel"),
         ("flava", "FlavaModel"),
+        ("flex_olmo", "FlexOlmoModel"),
         ("fnet", "FNetModel"),
         ("focalnet", "FocalNetModel"),
         ("fsmt", "FSMTModel"),
@@ -433,6 +434,7 @@
         ("falcon", "FalconForCausalLM"),
         ("fuyu", "FuyuForCausalLM"),
         ("falcon_mamba", "FalconMambaForCausalLM"),
+        ("flex_olmo", "FlexOlmoForCausalLM"),
         ("gemma", "GemmaForCausalLM"),
         ("gemma2", "Gemma2ForCausalLM"),
         ("gemma3", "Gemma3ForCausalLM"),
diff --git a/mindone/transformers/models/flex_olmo/__init__.py b/mindone/transformers/models/flex_olmo/__init__.py
@@ -0,0 +1,18 @@
+# coding=utf-8
+# Copyright 2025 the HuggingFace Team. All rights reserved.
+#
+# This code is adapted from https://github.com/huggingface/transformers
+# with modifications to run transformers on mindspore.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+from .modeling_flex_olmo import *
diff --git a/mindone/transformers/models/flex_olmo/modeling_flex_olmo.py b/mindone/transformers/models/flex_olmo/modeling_flex_olmo.py
diff --git a/tests/transformers_tests/causal_lm_tester.py b/tests/transformers_tests/causal_lm_tester.py
@@ -39,7 +39,6 @@ def all_model_classes(self):
 
     def __init__(
         self,
-        parent,
         batch_size=13,
         seq_length=7,
         is_training=True,
@@ -80,7 +79,6 @@ def __init__(
         mamba_chunk_size=16,
     ):
         self._verify_model_attributes()
-        self.parent = parent
         self.batch_size = batch_size
         self.seq_length = seq_length
         self.is_training = is_training
diff --git a/tests/transformers_tests/models/flex_olmo/__init__.py b/tests/transformers_tests/models/flex_olmo/__init__.py
diff --git a/tests/transformers_tests/models/flex_olmo/test_modeling_flex_olmo.py b/tests/transformers_tests/models/flex_olmo/test_modeling_flex_olmo.py
@@ -0,0 +1,140 @@
+# coding=utf-8
+# Copyright 2025 the HuggingFace Team. All rights reserved.
+#
+# This code is adapted from https://github.com/huggingface/transformers
+# with modifications to run transformers on mindspore.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+"""Testing suite for the Mindspore FlexOlmo model."""
+
+import numpy as np
+import pytest
+import torch
+from transformers.models.flex_olmo.configuration_flex_olmo import FlexOlmoConfig
+
+import mindspore as ms
+
+from mindone.transformers.models.flex_olmo import FlexOlmoModel
+from tests.modeling_test_utils import compute_diffs, generalized_parse_args, get_modules
+
+from ...causal_lm_tester import CausalLMModelTester
+
+DTYPE_AND_THRESHOLDS = {"fp32": 5e-4, "fp16": 5e-3, "bf16": 5e-2}
+MODES = [1]  # not support graph mode yet
+
+
+class FlexOlmoModelTester(CausalLMModelTester):
+    base_model_class = FlexOlmoModel
+    config_class = FlexOlmoConfig
+
+
+model_tester = FlexOlmoModelTester()
+(
+    config,
+    input_ids,
+    token_type_ids,
+    input_mask,
+    sequence_labels,
+    token_labels,
+    choice_labels,
+) = model_tester.prepare_config_and_inputs()
+
+
+FLEXOLMO_CASES = [
+    [
+        "FlexOlmoModel",
+        "transformers.FlexOlmoModel",
+        "mindone.transformers.FlexOlmoModel",
+        (config,),
+        {},
+        (),
+        {
+            "input_ids": input_ids,
+            "attention_mask": input_mask,
+        },
+        {
+            "last_hidden_state": 0,
+        },
+    ],
+]
+
+
+# transformers need >= 4.41.2
+@pytest.mark.parametrize(
+    "name,pt_module,ms_module,init_args,init_kwargs,inputs_args,inputs_kwargs,outputs_map,dtype,mode",
+    [
+        case
+        + [
+            dtype,
+        ]
+        + [
+            mode,
+        ]
+        for case in FLEXOLMO_CASES
+        for dtype in DTYPE_AND_THRESHOLDS.keys()
+        for mode in MODES
+    ],
+)
+def test_named_modules(
+    name,
+    pt_module,
+    ms_module,
+    init_args,
+    init_kwargs,
+    inputs_args,
+    inputs_kwargs,
+    outputs_map,
+    dtype,
+    mode,
+):
+    ms.set_context(mode=mode)
+
+    (
+        pt_model,
+        ms_model,
+        pt_dtype,
+        ms_dtype,
+    ) = get_modules(pt_module, ms_module, dtype, *init_args, **init_kwargs)
+    pt_inputs_args, pt_inputs_kwargs, ms_inputs_args, ms_inputs_kwargs = generalized_parse_args(
+        pt_dtype, ms_dtype, *inputs_args, **inputs_kwargs
+    )
+
+    # set `hidden_dtype` if requiring, for some modules always compute in float
+    # precision and require specific `hidden_dtype` to cast before return
+    with torch.no_grad():
+        pt_outputs = pt_model(*pt_inputs_args, **pt_inputs_kwargs)
+    ms_outputs = ms_model(*ms_inputs_args, **ms_inputs_kwargs)
+    # print("ms:", ms_outputs)
+    # print("pt:", pt_outputs)
+    if outputs_map:
+        pt_outputs_n = []
+        ms_outputs_n = []
+        for pt_key, ms_idx in outputs_map.items():
+            # print("===map", pt_key, ms_idx)
+            pt_output = getattr(pt_outputs, pt_key)
+            ms_output = ms_outputs[ms_idx]
+            if isinstance(pt_output, (list, tuple)):
+                pt_outputs_n += list(pt_output)
+                ms_outputs_n += list(ms_output)
+            else:
+                pt_outputs_n.append(pt_output)
+                ms_outputs_n.append(ms_output)
+        diffs = compute_diffs(pt_outputs_n, ms_outputs_n)
+    else:
+        diffs = compute_diffs(pt_outputs, ms_outputs)
+
+    THRESHOLD = DTYPE_AND_THRESHOLDS[ms_dtype]
+    assert (np.array(diffs) < THRESHOLD).all(), (
+        f"ms_dtype: {ms_dtype}, pt_type:{pt_dtype}, "
+        f"Outputs({np.array(diffs).tolist()}) has diff bigger than {THRESHOLD}"
+    )

Original file line number	Diff line number	Diff line change
`@@ -539,6 +539,7 @@`
`539`	`539`	`FlavaProcessor,`
`540`	`540`	`FlavaTextModel,`
`541`	`541`	`)`
	`542`	`+from .models.flex_olmo import FlexOlmoForCausalLM, FlexOlmoModel, FlexOlmoPreTrainedModel`
`542`	`543`	`from .models.fnet import (`
`543`	`544`	`FNetForMaskedLM,`
`544`	`545`	`FNetForMultipleChoice,`