Skip to content

Commit

Permalink
fix (api): 为rms norm加上fp32强转,为load param加类型转换
Browse files Browse the repository at this point in the history
  • Loading branch information
PanZezhong1725 authored and YdrMaster committed Jan 31, 2024
1 parent 1f0fd6e commit 75a0f87
Show file tree
Hide file tree
Showing 2 changed files with 8 additions and 5 deletions.
9 changes: 5 additions & 4 deletions src/09python_ffi/src/refactor_graph/frontend/modeling.py
Original file line number Diff line number Diff line change
Expand Up @@ -489,19 +489,20 @@ def make_onnx(
return onnx.helper.make_model(graph)

def load_params(self, data: Dict[str, np.ndarray]):
if len(self._parameters) != len(data):
print(f"Warning: the number of loaded params does not match current model.")
if len(self._parameters) != len(data) or set(data.keys()) != set(self._parameters.keys()):
print(f"Warning: the number or name of loaded params does not match current model.")
for name in self._parameters:
new_data = data.get(name)
if new_data is not None:
if self._parameters[name].shape != new_data.shape:
print(
f"Warning: Shape mismatch for {name}, expecting {self._parameters[name].shape} but get {new_data.shape}"
f"Warning: Shape mismatch for {name}, expecting {self._parameters[name].shape} but get {new_data.shape}."
)
if self._parameters[name].dtype != new_data.dtype:
print(
f"Warning: Type mismatch for {name}, expecting {self._parameters[name].dtype} but get {new_data.dtype}"
f"Warning: Type mismatch for {name}. Casting to {self._parameters[name].dtype} from {new_data.dtype}."
)
new_data = new_data.astype(self._parameters[name].dtype)
self._parameters[name] = new_data
else:
print(f"Warning: Value for {name} is not provided for loading.")
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -9,11 +9,13 @@ def __init__(self, hidden_size, eps: float = 1e-6, dtype=DTYPE.F32, **kwargs):
self.hidden_size = hidden_size
self.dtype = dtype
self.weight = self.parameter(
np.ones(self.hidden_size, dtype=self.dtype.np_type()), "weight"
np.ones(self.hidden_size, dtype=DTYPE.F32.np_type()), "weight"
)

def __call__(self, hidden_states):
super().__call__([hidden_states])
hidden_states = self.cast(hidden_states, DTYPE.F32)
hidden_states = self.rms_norm(hidden_states, self.weight, self.eps)
hidden_states = self.cast(hidden_states, self.dtype)
self.outputs = [hidden_states]
return hidden_states

0 comments on commit 75a0f87

Please sign in to comment.