Skip to content

Commit

Permalink
#396 Add custom & auto quantization (#397)
Browse files Browse the repository at this point in the history
  • Loading branch information
Only-bottle authored Nov 14, 2024
1 parent 057e643 commit ddc36e8
Show file tree
Hide file tree
Showing 35 changed files with 1,295 additions and 215 deletions.
81 changes: 81 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -188,6 +188,87 @@ from netspresso import NetsPresso
netspresso = NetsPresso(email="YOUR_EMAIL", password="YOUR_PASSWORD")
```

### ⭐⭐⭐ (New Feature) Quantizer ⭐⭐⭐

#### Automatic quantization

To start quantize a model, enter the model path, dataset path, and the desired quantization precision.

The quantized model will be saved to the specified output directory (`output_dir`).

```python
from netspresso.enums import QuantizationPrecision, SimilarityMetric

# 1. Declare quantizer
quantizer = netspresso.quantizer()

# 2. Run automatic quantization
quantization_result = quantizer.automatic_quantization(
input_model_path="./examples/sample_models/test.onnx",
output_dir="./outputs/quantized/automatic_quantization",
dataset_path="./examples/sample_datasets/pickle_calibration_dataset_128x128.npy",
weight_precision=QuantizationPrecision.INT8,
activation_precision=QuantizationPrecision.INT8,
threshold=0,
)
```

#### Custom precision quantization by layer name

This method enables you to apply precision settings tailored to each layer, based on the recommendations, to optimize model.

Or, you can modify it to your desired precision and optimize it.

```python
from netspresso.enums import QuantizationPrecision

# 1. Declare quantizer
quantizer = netspresso.quantizer()

# 2. Recommendation precision
metadata = quantizer.get_recommendation_precision(
input_model_path="./examples/sample_models/test.onnx",
output_dir="./outputs/quantized/recommendation",
dataset_path="./examples/sample_datasets/pickle_calibration_dataset_128x128.npy",
weight_precision=QuantizationPrecision.INT8,
activation_precision=QuantizationPrecision.INT8,
threshold=0,
)
recommendation_precisions = quantizer.load_recommendation_precision_result(metadata.recommendation_result_path)

# 2. Run quantization by layer name
quantization_result = quantizer.custom_precision_quantization_by_layer_name(
input_model_path="./examples/sample_models/test.onnx",
output_dir="./outputs/quantized/custom_precision_quantization_by_layer_name",
precision_by_layer_name=recommendation_precisions.layers,
dataset_path="./examples/sample_datasets/pickle_calibration_dataset_128x128.npy",
)
```

#### Automatic quantization

To start quantize a model, enter the model path, dataset path, and the desired quantization precision.

The quantized model will be saved to the specified output directory (`output_dir`).

```python
from netspresso.enums import QuantizationPrecision, SimilarityMetric

# 1. Declare quantizer
quantizer = netspresso.quantizer()

# 2. Run automatic quantization
compression_result = quantizer.automatic_quantization(
input_model_path="./examples/sample_models/test.onnx",
output_dir="./outputs/quantized/automatic_quantization",
dataset_path="./examples/sample_datasets/pickle_calibration_dataset_128x128.npy",
weight_precision=QuantizationPrecision.INT8,
activation_precision=QuantizationPrecision.INT8,
threshold=0,
)
```


### Trainer

#### Train
Expand Down
2 changes: 1 addition & 1 deletion docs/conf.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@
project = "NetsPresso"
copyright = "2023-2024, Nota Inc"
author = "Nota AI"
release = "1.7.0"
release = "1.13.0"

# -- General configuration ---------------------------------------------------
# https://www.sphinx-doc.org/en/master/usage/configuration.html#general-configuration
Expand Down
1 change: 1 addition & 0 deletions docs/description.rst
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@ API Description

description/api/trainer/trainer
description/api/compressor/compressor
description/api/quantizer/quantizer
description/api/converter/converter
description/api/benchmarker/benchmarker
description/api/enums/enums
9 changes: 9 additions & 0 deletions docs/description/api/enums/enum/onnx_operator.rst
Original file line number Diff line number Diff line change
@@ -0,0 +1,9 @@
OnnxOperator
############

.. autoclass:: netspresso.enums.__init__.OnnxOperator
:members:
:undoc-members:
:member-order: bysource
:exclude-members: create_literal

9 changes: 9 additions & 0 deletions docs/description/api/enums/enum/quantization_mode.rst
Original file line number Diff line number Diff line change
@@ -0,0 +1,9 @@
QuantizationMode
################

.. autoclass:: netspresso.enums.__init__.QuantizationMode
:members:
:undoc-members:
:member-order: bysource
:exclude-members: create_literal

9 changes: 9 additions & 0 deletions docs/description/api/enums/enum/quantization_precision.rst
Original file line number Diff line number Diff line change
@@ -0,0 +1,9 @@
QuantizationPrecision
#####################

.. autoclass:: netspresso.enums.__init__.QuantizationPrecision
:members:
:undoc-members:
:member-order: bysource
:exclude-members: create_literal

9 changes: 9 additions & 0 deletions docs/description/api/enums/enum/similarity_metric.rst
Original file line number Diff line number Diff line change
@@ -0,0 +1,9 @@
SimilarityMetric
################

.. autoclass:: netspresso.enums.__init__.SimilarityMetric
:members:
:undoc-members:
:member-order: bysource
:exclude-members: create_literal

6 changes: 5 additions & 1 deletion docs/description/api/enums/enums.rst
Original file line number Diff line number Diff line change
Expand Up @@ -17,4 +17,8 @@ Enums
enum/software_version
enum/hardware_type
enum/task_status
enum/data_type
enum/data_type
enum/quantization_mode
enum/quantization_precision
enum/similarity_metric
enum/onnx_operator
26 changes: 26 additions & 0 deletions docs/description/api/quantizer/quantize/automatic_quantization.rst
Original file line number Diff line number Diff line change
@@ -0,0 +1,26 @@
Automatic Quantization
======================

.. autofunction:: netspresso.quantizer.__init__.Quantizer.automatic_quantization


Example
-------

.. code-block:: python
from netspresso import NetsPresso
from netspresso.enums import QuantizationPrecision
netspresso = NetsPresso(email="YOUR_EMAIL", password="YOUR_PASSWORD")
quantizer = netspresso.quantizer()
quantization_result = quantizer.automatic_quantization(
input_model_path="./examples/sample_models/test.onnx",
output_dir="./outputs/quantized/automatic_quantization",
dataset_path="./examples/sample_datasets/pickle_calibration_dataset_128x128.npy",
weight_precision=QuantizationPrecision.INT8,
activation_precision=QuantizationPrecision.INT8,
threshold=0,
)
Original file line number Diff line number Diff line change
@@ -0,0 +1,35 @@
Custom Precision Quantization by Layer Name
===========================================

.. autofunction:: netspresso.quantizer.__init__.Quantizer.custom_precision_quantization_by_layer_name


Example
-------

.. code-block:: python
from netspresso import NetsPresso
from netspresso.enums import QuantizationPrecision
netspresso = NetsPresso(email="YOUR_EMAIL", password="YOUR_PASSWORD")
quantizer = netspresso.quantizer()
recommendation_metadata = quantizer.get_recommendation_precision(
input_model_path="./examples/sample_models/test.onnx",
output_dir="./outputs/quantized/automatic_quantization",
dataset_path="./examples/sample_datasets/pickle_calibration_dataset_128x128.npy",
weight_precision=QuantizationPrecision.INT8,
activation_precision=QuantizationPrecision.INT8,
threshold=0,
)
recommendation_precisions = quantizer.load_recommendation_precision_result(recommendation_metadata.recommendation_result_path)
quantization_result = quantizer.custom_precision_quantization_by_layer_name(
input_model_path="./examples/sample_models/test.onnx",
output_dir="./outputs/quantized/custom_precision_quantization_by_layer_name",
dataset_path="./examples/sample_datasets/pickle_calibration_dataset_128x128.npy",
precision_by_layer_name=recommendation_precisions.layers,
)
Original file line number Diff line number Diff line change
@@ -0,0 +1,35 @@
Custom Precision Quantization by Operator Type
==============================================

.. autofunction:: netspresso.quantizer.__init__.Quantizer.custom_precision_quantization_by_operator_type


Example
-------

.. code-block:: python
from netspresso import NetsPresso
from netspresso.enums import QuantizationPrecision
netspresso = NetsPresso(email="YOUR_EMAIL", password="YOUR_PASSWORD")
quantizer = netspresso.quantizer()
recommendation_metadata = quantizer.get_recommendation_precision(
input_model_path="./examples/sample_models/test.onnx",
output_dir="./outputs/quantized/automatic_quantization",
dataset_path="./examples/sample_datasets/pickle_calibration_dataset_128x128.npy",
weight_precision=QuantizationPrecision.INT8,
activation_precision=QuantizationPrecision.INT8,
threshold=0,
)
recommendation_precisions = quantizer.load_recommendation_precision_result(recommendation_metadata.recommendation_result_path)
quantization_result = quantizer.custom_precision_quantization_by_operator_type(
input_model_path="./examples/sample_models/test.onnx",
output_dir="./outputs/quantized/custom_precision_quantization_by_operator_type",
dataset_path="./examples/sample_datasets/pickle_calibration_dataset_128x128.npy",
precision_by_operator_type=recommendation_precisions.operators,
)
Original file line number Diff line number Diff line change
@@ -0,0 +1,27 @@
Recommendation precision
========================

.. autofunction:: netspresso.quantizer.__init__.Quantizer.get_recommendation_precision


Example
-------

.. code-block:: python
from netspresso import NetsPresso
from netspresso.enums import QuantizationPrecision
netspresso = NetsPresso(email="YOUR_EMAIL", password="YOUR_PASSWORD")
quantizer = netspresso.quantizer()
recommendation_metadata = quantizer.get_recommendation_precision(
input_model_path="./examples/sample_models/test.onnx",
output_dir="./outputs/quantized/automatic_quantization",
dataset_path="./examples/sample_datasets/pickle_calibration_dataset_128x128.npy",
weight_precision=QuantizationPrecision.INT8,
activation_precision=QuantizationPrecision.INT8,
threshold=0,
)
recommendation_precisions = quantizer.load_recommendation_precision_result(recommendation_metadata.recommendation_result_path)
Original file line number Diff line number Diff line change
@@ -0,0 +1,26 @@
Plain Quantization
======================

.. autofunction:: netspresso.quantizer.__init__.Quantizer.uniform_precision_quantization


Example
-------

.. code-block:: python
from netspresso import NetsPresso
from netspresso.enums import QuantizationPrecision, SimilarityMetric
netspresso = NetsPresso(email="YOUR_EMAIL", password="YOUR_PASSWORD")
quantizer = netspresso.quantizer()
quantization_result = quantizer.uniform_precision_quantization(
input_model_path="./examples/sample_models/test.onnx",
output_dir="./outputs/quantized/uniform_precision_quantization",
dataset_path="./examples/sample_datasets/pickle_calibration_dataset_128x128.npy",
metric=SimilarityMetric.SNR,
weight_precision=QuantizationPrecision.INT8,
activation_precision=QuantizationPrecision.INT8,
)
15 changes: 15 additions & 0 deletions docs/description/api/quantizer/quantizer.rst
Original file line number Diff line number Diff line change
@@ -0,0 +1,15 @@
Quantizer
==========


Quantize
--------

.. toctree::
:maxdepth: 1

quantize/uniform_precision_quantization
quantize/automatic_quantization
quantize/recommendation_precision
quantize/custom_precision_quantization_by_layer_name
quantize/custom_precision_quantization_by_operator_type
29 changes: 29 additions & 0 deletions examples/quantizer/automatic_quantization.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,29 @@
from pathlib import Path

from netspresso import NetsPresso
from netspresso.enums import QuantizationPrecision, SimilarityMetric


EMAIL = "YOUR_EMAIL"
PASSWORD = "YOUR_PASSWORD"

netspresso = NetsPresso(email=EMAIL, password=PASSWORD)

# 1. Declare quantizer
quantizer = netspresso.quantizer()

# 2. Set variables for quantize
input_model = "./examples/sample_models/test.onnx"
OUTPUT_DIR = "./outputs/quantized/automatic_quantization"
CALIBRATION_DATASET_PATH = "./examples/sample_datasets/pickle_calibration_dataset_128x128.npy"

# 3. Automatic Quantization
quantization_result = quantizer.automatic_quantization(
input_model_path=input_model,
output_dir=f"{OUTPUT_DIR}/{Path(input_model).stem}",
dataset_path=CALIBRATION_DATASET_PATH,
weight_precision=QuantizationPrecision.INT8,
activation_precision=QuantizationPrecision.INT8,
metric=SimilarityMetric.SNR,
threshold=0,
)
38 changes: 38 additions & 0 deletions examples/quantizer/custom_precision_quantization_by_layer_name.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,38 @@
from pathlib import Path

from netspresso import NetsPresso
from netspresso.enums import QuantizationPrecision


EMAIL = "YOUR_EMAIL"
PASSWORD = "YOUR_PASSWORD"

netspresso = NetsPresso(email=EMAIL, password=PASSWORD)


# 1. Declare quantizer
quantizer = netspresso.quantizer()

# 2. Set variables for quantize
input_model = "./examples/sample_models/test.onnx"
OUTPUT_DIR = "./outputs/quantized/onnx2onnx_2"
CALIBRATION_DATASET_PATH = "./examples/sample_datasets/pickle_calibration_dataset_128x128.npy"

# 3-1. Recommendation precision
recommendation_metadata = quantizer.get_recommendation_precision(
input_model_path=input_model,
output_dir=f"{OUTPUT_DIR}/{Path(input_model).stem}",
dataset_path=CALIBRATION_DATASET_PATH,
weight_precision=QuantizationPrecision.INT8,
activation_precision=QuantizationPrecision.INT8,
threshold=0,
)
recommendation_precisions = quantizer.load_recommendation_precision_result(recommendation_metadata.recommendation_result_path)

# 3-2. Custom Quantization
quantization_result = quantizer.custom_precision_quantization_by_layer_name(
input_model_path=input_model,
output_dir=f"{OUTPUT_DIR}/{Path(input_model).stem}",
precision_by_layer_name=recommendation_precisions.layers,
dataset_path=CALIBRATION_DATASET_PATH,
)
Loading

0 comments on commit ddc36e8

Please sign in to comment.