Skip to content

Commit

Permalink
Update sparsity preserving noise API
Browse files Browse the repository at this point in the history
Updated the API to use fraction instead of ratio as it is less confusing.

PiperOrigin-RevId: 700097661
  • Loading branch information
tensorflower-gardener committed Nov 25, 2024
1 parent 637f17e commit 41f4416
Show file tree
Hide file tree
Showing 3 changed files with 44 additions and 26 deletions.
7 changes: 3 additions & 4 deletions tensorflow_privacy/privacy/keras_models/dp_keras_model.py
Original file line number Diff line number Diff line change
Expand Up @@ -32,9 +32,8 @@
class SparsityPreservingDPSGDConfig:
"""Config for adding sparsity preserving noise to the gradients."""

# The ratio of how the noise is split between partition selection and gradient
# noise.
sparse_selection_ratio: float = 0.0
# The fraction of the privacy budget to use for partition selection.
sparse_selection_privacy_budget_fraction: float = 0.0
# The threshold to use for private partition selection.
sparse_selection_threshold: int = 100
# A `LayerRegistry` instance containing functions that help compute
Expand Down Expand Up @@ -364,7 +363,7 @@ def train_step(self, data):
noise_multiplier_sparse, noise_multiplier = (
sparse_noise_utils.split_noise_multiplier(
noise_multiplier,
self._sparsity_preserving_dpsgd_config.sparse_selection_ratio,
self._sparsity_preserving_dpsgd_config.sparse_selection_privacy_budget_fraction,
contribution_counts,
)
)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,7 @@

def split_noise_multiplier(
noise_multiplier: float,
sparse_selection_ratio: float,
sparse_selection_privacy_budget_fraction: float,
sparse_selection_contribution_counts: Sequence[Optional[tf.SparseTensor]],
) -> tuple[float, float]:
"""Splits noise multiplier between partition selection and gradient noise.
Expand All @@ -40,8 +40,8 @@ def split_noise_multiplier(
Args:
noise_multiplier: The original noise multiplier.
sparse_selection_ratio: The ratio of partition selection noise and gradient
noise.
sparse_selection_privacy_budget_fraction: The fraction of privacy budget to
use for partition selection.
sparse_selection_contribution_counts: The contribution counts for each
sparse selection variable. If a sparse selection count is None, it will be
ignored.
Expand All @@ -54,14 +54,22 @@ def split_noise_multiplier(
sparse selection contribution counts is None, or if there are no sparse
selection contribution counts.
"""
if sparse_selection_ratio <= 0.0 or sparse_selection_ratio >= 1.0:
raise ValueError('Sparse selection ratio must be between 0 and 1.')
if (
sparse_selection_privacy_budget_fraction <= 0.0
or sparse_selection_privacy_budget_fraction >= 1.0
):
raise ValueError(
'Sparse selection privacy budget fraction must be between 0 and 1.'
)
num_sparse_selections = sum(
1 for c in sparse_selection_contribution_counts if c is not None
)
if num_sparse_selections == 0:
raise ValueError('No sparse selections contribution counts found.')

sparse_selection_ratio = sparse_selection_privacy_budget_fraction / (
1.0 - sparse_selection_privacy_budget_fraction
)
ratio = (1.0 + sparse_selection_ratio**2.0) ** 0.5
total_noise_multiplier_sparse = noise_multiplier * ratio
noise_multiplier_partition_selection = (
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,7 @@ class SparseNoiseUtilsTest(tf.test.TestCase, parameterized.TestCase):
dict(
testcase_name='one_sparse_layer',
noise_multiplier=1.0,
sparse_selection_ratio=0.8,
sparse_selection_privacy_budget_fraction=0.1,
sparse_selection_contribution_counts=[
tf.SparseTensor(
indices=[[0]],
Expand All @@ -39,7 +39,7 @@ class SparseNoiseUtilsTest(tf.test.TestCase, parameterized.TestCase):
dict(
testcase_name='multiple_sparse_layer',
noise_multiplier=1.0,
sparse_selection_ratio=0.1,
sparse_selection_privacy_budget_fraction=0.1,
sparse_selection_contribution_counts=[
tf.SparseTensor(
indices=[[0]],
Expand All @@ -62,29 +62,34 @@ class SparseNoiseUtilsTest(tf.test.TestCase, parameterized.TestCase):
def test_split_noise_multiplier(
self,
noise_multiplier,
sparse_selection_ratio,
sparse_selection_privacy_budget_fraction,
sparse_selection_contribution_counts,
):
noise_multiplier_sparse, noise_multiplier_dense = (
sparse_selection_ratio = sparse_selection_privacy_budget_fraction / (
1.0 - sparse_selection_privacy_budget_fraction
)
noise_multiplier_partition_selection, noise_multiplier_dense = (
sparse_noise_utils.split_noise_multiplier(
noise_multiplier,
sparse_selection_ratio,
sparse_selection_privacy_budget_fraction,
sparse_selection_contribution_counts,
)
)
num_sparse_layers = len(sparse_selection_contribution_counts)

total_noise_multiplier_sparse = (
noise_multiplier_sparse / num_sparse_layers**0.5
total_noise_multiplier_partition_selection = (
noise_multiplier_partition_selection / num_sparse_layers**0.5
)
print('partition selection: ', total_noise_multiplier_partition_selection)
print('dense: ', noise_multiplier_dense)
self.assertAlmostEqual(
total_noise_multiplier_sparse,
total_noise_multiplier_partition_selection,
sparse_selection_ratio * noise_multiplier_dense,
)
total_noise_multiplier = (
1.0
/ (
1.0 / total_noise_multiplier_sparse**2
1.0 / total_noise_multiplier_partition_selection**2
+ 1.0 / noise_multiplier_dense**2
)
** 0.5
Expand All @@ -95,55 +100,61 @@ def test_split_noise_multiplier(
dict(
testcase_name='no_sparse_layers',
noise_multiplier=1.0,
sparse_selection_ratio=0.5,
sparse_selection_privacy_budget_fraction=0.5,
sparse_selection_contribution_counts=[],
error_message='No sparse selections contribution counts found.',
),
dict(
testcase_name='sparse_layers_none',
noise_multiplier=1.0,
sparse_selection_ratio=0.5,
sparse_selection_privacy_budget_fraction=0.5,
sparse_selection_contribution_counts=[None],
error_message='No sparse selections contribution counts found.',
),
dict(
testcase_name='zero_ratio',
noise_multiplier=1.0,
sparse_selection_ratio=0.0,
sparse_selection_privacy_budget_fraction=0.0,
sparse_selection_contribution_counts=[
tf.SparseTensor(
indices=[[0]],
values=[1],
dense_shape=[3],
)
],
error_message='Sparse selection ratio must be between 0 and 1.',
error_message=(
'Sparse selection privacy budget fraction must be between 0'
' and 1.'
),
),
dict(
testcase_name='one_ratio',
noise_multiplier=1.0,
sparse_selection_ratio=1.0,
sparse_selection_privacy_budget_fraction=1.0,
sparse_selection_contribution_counts=[
tf.SparseTensor(
indices=[[0]],
values=[1],
dense_shape=[3],
)
],
error_message='Sparse selection ratio must be between 0 and 1.',
error_message=(
'Sparse selection privacy budget fraction must be between 0'
' and 1.'
),
),
)
def test_split_noise_multiplier_errors(
self,
noise_multiplier,
sparse_selection_ratio,
sparse_selection_privacy_budget_fraction,
sparse_selection_contribution_counts,
error_message,
):
with self.assertRaisesRegex(ValueError, error_message):
sparse_noise_utils.split_noise_multiplier(
noise_multiplier,
sparse_selection_ratio,
sparse_selection_privacy_budget_fraction,
sparse_selection_contribution_counts,
)

Expand Down

0 comments on commit 41f4416

Please sign in to comment.