Skip to content

Commit

Permalink
Implement apply_clip argument to quantize() (#427)
Browse files Browse the repository at this point in the history
  • Loading branch information
casper-hansen authored Apr 6, 2024
1 parent c780d65 commit b5db7fc
Show file tree
Hide file tree
Showing 2 changed files with 17 additions and 7 deletions.
7 changes: 7 additions & 0 deletions awq/models/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -136,6 +136,12 @@ def quantize(
"This argument avoids real quantization by only applying the scales without quantizing down to FP16."
),
] = False,
apply_clip: Annotated[
bool,
Doc(
"Whether to apply clipping to the model during quantization. Some models may perform better with this set to False."
),
] = True,
):
"""
The main quantization function that you can use to quantize your model.
Expand Down Expand Up @@ -173,6 +179,7 @@ def quantize(
duo_scaling,
modules_to_not_convert=self.quant_config.modules_to_not_convert,
export_compatible=export_compatible,
apply_clip=apply_clip,
)
self.quantizer.quantize()

Expand Down
17 changes: 10 additions & 7 deletions awq/quantize/quantizer.py
Original file line number Diff line number Diff line change
Expand Up @@ -40,6 +40,7 @@ def __init__(
duo_scaling,
modules_to_not_convert=None,
export_compatible=False,
apply_clip=True,
) -> None:
self.awq_model = awq_model
self.model = model
Expand All @@ -53,6 +54,7 @@ def __init__(
self.text_column = text_column
self.duo_scaling = duo_scaling
self.export_compatible = export_compatible
self.apply_clip = apply_clip
self.modules_to_not_convert = (
modules_to_not_convert if modules_to_not_convert is not None else []
)
Expand Down Expand Up @@ -161,13 +163,14 @@ def quantize(self):
)

# [STEP 3]: Compute and apply clipping list
clip_list = self._search_best_clip(
self.modules[i], named_linears, input_feat
)
apply_clip(self.modules[i], clip_list)
clip_list = append_str_prefix(
clip_list, get_op_name(self.model, self.modules[i]) + "."
)
if self.apply_clip:
clip_list = self._search_best_clip(
self.modules[i], named_linears, input_feat
)
apply_clip(self.modules[i], clip_list)
clip_list = append_str_prefix(
clip_list, get_op_name(self.model, self.modules[i]) + "."
)

# [STEP 4]: Quantize weights
if not self.export_compatible:
Expand Down

0 comments on commit b5db7fc

Please sign in to comment.