diff --git a/tensorflow_privacy/privacy/analysis/compute_dp_sgd_privacy.py b/tensorflow_privacy/privacy/analysis/compute_dp_sgd_privacy.py index 572799b38..e6673442d 100644 --- a/tensorflow_privacy/privacy/analysis/compute_dp_sgd_privacy.py +++ b/tensorflow_privacy/privacy/analysis/compute_dp_sgd_privacy.py @@ -18,20 +18,20 @@ Sampled Gaussian Mechanism. The mechanism's parameters are controlled by flags. Example: - compute_dp_sgd_privacy + compute_dp_sgd_privacy \ --N=60000 \ --batch_size=256 \ --noise_multiplier=1.12 \ --epochs=60 \ - --delta=1e-5 + --delta=1e-5 \ + --accountant_type=RDP -The output states that DP-SGD with these parameters satisfies (2.92, 1e-5)-DP. +Prints out the privacy statement corresponding to the above parameters. """ from absl import app from absl import flags - -from tensorflow_privacy.privacy.analysis.compute_dp_sgd_privacy_lib import compute_dp_sgd_privacy_statement +from tensorflow_privacy.privacy.analysis import compute_dp_sgd_privacy_lib _NUM_EXAMPLES = flags.DEFINE_integer( @@ -70,6 +70,9 @@ 'user-level DP guarantee.' ), ) +_ACCOUNTANT_TYPE = flags.DEFINE_enum( + 'accountant_type', 'RDP', ['RDP', 'PLD'], 'DP accountant to use.' +) flags.mark_flags_as_required(['N', 'batch_size', 'noise_multiplier', 'epochs']) @@ -77,7 +80,7 @@ def main(argv): del argv # argv is not used. - statement = compute_dp_sgd_privacy_statement( + statement = compute_dp_sgd_privacy_lib.compute_dp_sgd_privacy_statement( _NUM_EXAMPLES.value, _BATCH_SIZE.value, _NUM_EPOCHS.value, @@ -85,6 +88,7 @@ def main(argv): _DELTA.value, _USED_MICROBATCHING.value, _MAX_EXAMPLES_PER_USER.value, + compute_dp_sgd_privacy_lib.AccountantType(_ACCOUNTANT_TYPE.value), ) print(statement) diff --git a/tensorflow_privacy/privacy/analysis/compute_dp_sgd_privacy_lib.py b/tensorflow_privacy/privacy/analysis/compute_dp_sgd_privacy_lib.py index 99b135f78..28851b52f 100644 --- a/tensorflow_privacy/privacy/analysis/compute_dp_sgd_privacy_lib.py +++ b/tensorflow_privacy/privacy/analysis/compute_dp_sgd_privacy_lib.py @@ -14,6 +14,7 @@ # ============================================================================== """Library for computing privacy values for DP-SGD.""" +import enum import functools import math import textwrap @@ -34,6 +35,28 @@ def _logexpm1(x: float) -> float: return x + math.log(-math.expm1(-x)) +class AccountantType(enum.Enum): + """Accountant to use for privacy accounting.""" + + RDP = 'RDP' + PLD = 'PLD' + + def get_accountant(self) -> dp_accounting.PrivacyAccountant: + if self == AccountantType.RDP: + rdp_orders = ( + [1 + x / 10.0 for x in range(1, 100)] + + list(range(11, 64)) + + [128, 256, 512, 1024] + ) + return dp_accounting.rdp.RdpAccountant(rdp_orders) + if self == AccountantType.PLD: + pld_discretization = 1e-4 + return dp_accounting.pld.PLDAccountant( + value_discretization_interval=pld_discretization + ) + raise ValueError(f'Unsupported Accountant type {self}') + + def _compute_dp_sgd_user_privacy( num_epochs: float, noise_multiplier: float, @@ -41,6 +64,7 @@ def _compute_dp_sgd_user_privacy( max_examples_per_user: int, used_microbatching: bool = True, poisson_subsampling_probability: Optional[float] = None, + accountant_type: AccountantType = AccountantType.RDP, ) -> float: """Computes add-or-remove-one-user DP epsilon using group privacy. @@ -63,6 +87,7 @@ def _compute_dp_sgd_user_privacy( used_microbatching: If true, increases sensitivity by a factor of two. poisson_subsampling_probability: If not None, gives the probability that each record is chosen in a batch. If None, assumes no subsampling. + accountant_type: The privacy accountant for computing epsilon. Returns: The add-or-remove-one-user DP epsilon value using group privacy. @@ -92,6 +117,7 @@ def _compute_dp_sgd_user_privacy( user_delta, used_microbatching, poisson_subsampling_probability, + accountant_type, ) # The computation below to estimate user_eps works as follows. @@ -188,6 +214,7 @@ def _compute_dp_sgd_example_privacy( example_delta: float, used_microbatching: bool = True, poisson_subsampling_probability: Optional[float] = None, + accountant_type: AccountantType = AccountantType.RDP, ) -> float: """Computes add-or-remove-one-example DP epsilon. @@ -201,6 +228,7 @@ def _compute_dp_sgd_example_privacy( used_microbatching: If true, increases sensitivity by a factor of two. poisson_subsampling_probability: If not None, gives the probability that each record is chosen in a batch. If None, assumes no subsampling. + accountant_type: The privacy accountant for computing epsilon. Returns: The epsilon value. @@ -229,10 +257,10 @@ def _compute_dp_sgd_example_privacy( event_ = dp_accounting.SelfComposedDpEvent(count=count, event=event_) return ( - dp_accounting.rdp.RdpAccountant() + accountant_type.get_accountant() .compose(event_) .get_epsilon(example_delta) - ) # TODO(b/271341062) + ) def compute_dp_sgd_privacy_statement( @@ -243,6 +271,7 @@ def compute_dp_sgd_privacy_statement( delta: float, used_microbatching: bool = True, max_examples_per_user: Optional[int] = None, + accountant_type: AccountantType = AccountantType.RDP, ) -> str: """Produces a privacy report summarizing the DP guarantee. @@ -267,6 +296,7 @@ def compute_dp_sgd_privacy_statement( max_examples_per_user: If the data set is constructed to cap the maximum number of examples each user contributes, provide this argument to also print a user-level DP guarantee. + accountant_type: The privacy accountant for computing epsilon. Returns: A str precisely articulating the privacy guarantee. @@ -296,12 +326,16 @@ def compute_dp_sgd_privacy_statement( paragraph = textwrap.fill( f"""\ Example-level DP with add-or-remove-one adjacency at delta = {delta} computed \ -with RDP accounting:""", +with {accountant_type.value} accounting:""", width=80, ) example_eps_no_subsampling = _compute_dp_sgd_example_privacy( - num_epochs, noise_multiplier, delta, used_microbatching + num_epochs, + noise_multiplier, + delta, + used_microbatching, + accountant_type=accountant_type, ) example_eps_subsampling = _compute_dp_sgd_example_privacy( num_epochs, @@ -309,6 +343,7 @@ def compute_dp_sgd_privacy_statement( delta, used_microbatching, poisson_subsampling_probability=batch_size / number_of_examples, + accountant_type=accountant_type, ) paragraph += f""" @@ -320,13 +355,33 @@ def compute_dp_sgd_privacy_statement( paragraphs.append(paragraph) inf_user_eps = False - if max_examples_per_user is not None: + if max_examples_per_user is None: + paragraphs.append( + textwrap.fill( + """\ +No user-level privacy guarantee is possible without a bound on the number of \ +examples per user.""", + width=80, + ) + ) + elif accountant_type == AccountantType.PLD: + # TODO(b/271341062): Add User level DP support for PLD. + paragraphs.append( + textwrap.fill( + """\ +User-level DP epsilon computation is not supported for PLD accounting at this \ +time. Use RDP accounting to obtain user-level DP guarantees.""", + width=80, + ) + ) + else: # Case: max_examples_per_user is not None and accountant_type is RDP user_eps_no_subsampling = _compute_dp_sgd_user_privacy( num_epochs, noise_multiplier, delta, max_examples_per_user, used_microbatching, + accountant_type=accountant_type, ) user_eps_subsampling = _compute_dp_sgd_user_privacy( num_epochs, @@ -335,6 +390,7 @@ def compute_dp_sgd_privacy_statement( max_examples_per_user, used_microbatching, poisson_subsampling_probability=batch_size / number_of_examples, + accountant_type=accountant_type, ) if math.isinf(user_eps_no_subsampling): user_eps_no_subsampling_str = ' inf (**)' @@ -350,7 +406,7 @@ def compute_dp_sgd_privacy_statement( paragraph = textwrap.fill( f"""\ User-level DP with add-or-remove-one adjacency at delta = {delta} computed \ -using RDP accounting and group privacy:""", +using {accountant_type.value} accounting and group privacy:""", width=80, ) paragraph += f""" @@ -360,15 +416,6 @@ def compute_dp_sgd_privacy_statement( {user_eps_subsampling_str}""" paragraphs.append(paragraph) - else: - paragraphs.append( - textwrap.fill( - """\ -No user-level privacy guarantee is possible without a bound on the number of \ -examples per user.""", - width=80, - ) - ) paragraphs.append( textwrap.fill( diff --git a/tensorflow_privacy/privacy/analysis/compute_dp_sgd_privacy_test.py b/tensorflow_privacy/privacy/analysis/compute_dp_sgd_privacy_test.py index f1fdf15d7..94050c915 100644 --- a/tensorflow_privacy/privacy/analysis/compute_dp_sgd_privacy_test.py +++ b/tensorflow_privacy/privacy/analysis/compute_dp_sgd_privacy_test.py @@ -23,6 +23,8 @@ _example_privacy = compute_dp_sgd_privacy_lib._compute_dp_sgd_example_privacy _user_privacy = compute_dp_sgd_privacy_lib._compute_dp_sgd_user_privacy +_RDP = compute_dp_sgd_privacy_lib.AccountantType.RDP +_PLD = compute_dp_sgd_privacy_lib.AccountantType.PLD DP_SGD_STATEMENT_KWARGS = dict( @@ -81,13 +83,21 @@ def test_compute_dp_sgd_example_privacy_bad_args(self, override_args): _example_privacy(**args) @parameterized.named_parameters( - ('no_microbatching_no_subsampling', False, None, 10.8602036), - ('microbatching_no_subsampling', True, None, 26.2880374), - ('no_microbatching_with_subsampling', False, 1e-2, 3.2391922), - ('microbatching_with_subsampling', True, 1e-2, 22.5970358), + ('no_microbatching_no_subsampling_rdp', False, None, _RDP, 10.8602036), + ('microbatching_no_subsampling_rdp', True, None, _RDP, 26.2880374), + ('no_microbatching_with_subsampling_rdp', False, 1e-2, _RDP, 3.2391922), + ('microbatching_with_subsampling_rdp', True, 1e-2, _RDP, 22.5970358), + ('no_microbatching_no_subsampling_pld', False, None, _PLD, 10.1224946), + ('microbatching_no_subsampling_pld', True, None, _PLD, 24.7160779), + ('no_microbatching_with_subsampling_pld', False, 1e-2, _PLD, 2.4612381), + ('microbatching_with_subsampling_pld', True, 1e-2, _PLD, 18.6977407), ) def test_compute_dp_sgd_example_privacy( - self, used_microbatching, poisson_subsampling_probability, expected_eps + self, + used_microbatching, + poisson_subsampling_probability, + accountant_type, + expected_eps, ): num_epochs = 1.2 noise_multiplier = 0.7 @@ -98,6 +108,7 @@ def test_compute_dp_sgd_example_privacy( example_delta, used_microbatching, poisson_subsampling_probability, + accountant_type, ) self.assertAlmostEqual(eps, expected_eps) @@ -119,17 +130,21 @@ def test_compute_dp_sgd_user_privacy_bad_args(self, override_args): with self.assertRaises(ValueError): _user_privacy(**args) - def test_user_privacy_one_example_per_user(self): + @parameterized.named_parameters(('RDP', _RDP), ('PLD', _PLD)) + def test_user_privacy_one_example_per_user(self, accountant_type): num_epochs = 1.2 noise_multiplier = 0.7 delta = 1e-5 - example_eps = _example_privacy(num_epochs, noise_multiplier, delta) + example_eps = _example_privacy( + num_epochs, noise_multiplier, delta, accountant_type=accountant_type + ) user_eps = _user_privacy( num_epochs, noise_multiplier, delta, max_examples_per_user=1, + accountant_type=accountant_type, ) self.assertEqual(user_eps, example_eps) @@ -146,6 +161,7 @@ def test_user_privacy_epsilon_delta_consistency( noise_multiplier=noise_multiplier, example_delta=example_delta, poisson_subsampling_probability=q, + accountant_type=_RDP, ) user_delta = math.exp( @@ -161,12 +177,14 @@ def test_user_privacy_epsilon_delta_consistency( user_delta=user_delta, max_examples_per_user=max_examples_per_user, poisson_subsampling_probability=q, + accountant_type=_RDP, ) self.assertAlmostEqual(user_eps, example_eps * max_examples_per_user) - def test_dp_sgd_privacy_statement_no_user_dp(self): + def test_dp_sgd_privacy_statement_no_user_dp_with_rdp(self): statement = compute_dp_sgd_privacy_lib.compute_dp_sgd_privacy_statement( **DP_SGD_STATEMENT_KWARGS, + accountant_type=_RDP, ) expected_statement = """\ DP-SGD performed over 10000 examples with 64 examples per iteration, noise @@ -191,10 +209,11 @@ def test_dp_sgd_privacy_statement_no_user_dp(self): """ self.assertEqual(statement, expected_statement) - def test_dp_sgd_privacy_statement_user_dp(self): + def test_dp_sgd_privacy_statement_user_dp_with_rdp(self): statement = compute_dp_sgd_privacy_lib.compute_dp_sgd_privacy_statement( **DP_SGD_STATEMENT_KWARGS, max_examples_per_user=3, + accountant_type=_RDP, ) expected_statement = """\ DP-SGD performed over 10000 examples with 64 examples per iteration, noise @@ -221,10 +240,11 @@ def test_dp_sgd_privacy_statement_user_dp(self): """ self.assertEqual(statement, expected_statement) - def test_dp_sgd_privacy_statement_user_dp_infinite(self): + def test_dp_sgd_privacy_statement_user_dp_infinite_with_rdp(self): statement = compute_dp_sgd_privacy_lib.compute_dp_sgd_privacy_statement( **DP_SGD_STATEMENT_KWARGS, max_examples_per_user=10, + accountant_type=_RDP, ) expected_statement = """\ DP-SGD performed over 10000 examples with 64 examples per iteration, noise @@ -253,6 +273,63 @@ def test_dp_sgd_privacy_statement_user_dp_infinite(self): `max_examples_per_user`, but because conversion from example-level to user-level DP is not exact, it is possible for the upper bound on the user-level epsilon to still be infinite. +""" + self.assertEqual(statement, expected_statement) + + def test_dp_sgd_privacy_statement_no_user_dp_with_pld(self): + statement = compute_dp_sgd_privacy_lib.compute_dp_sgd_privacy_statement( + **DP_SGD_STATEMENT_KWARGS, + accountant_type=_PLD, + ) + expected_statement = """\ +DP-SGD performed over 10000 examples with 64 examples per iteration, noise +multiplier 2.0 for 5.0 epochs with microbatching, and no bound on number of +examples per user. + +This privacy guarantee protects the release of all model checkpoints in addition +to the final model. + +Example-level DP with add-or-remove-one adjacency at delta = 1e-06 computed with +PLD accounting: + Epsilon with each example occurring once per epoch: 12.595 + Epsilon assuming Poisson sampling (*): 1.199 + +No user-level privacy guarantee is possible without a bound on the number of +examples per user. + +(*) Poisson sampling is not usually done in training pipelines, but assuming +that the data was randomly shuffled, it is believed the actual epsilon should be +closer to this value than the conservative assumption of an arbitrary data +order. +""" + self.assertEqual(statement, expected_statement) + + def test_dp_sgd_privacy_statement_user_dp_with_pld(self): + statement = compute_dp_sgd_privacy_lib.compute_dp_sgd_privacy_statement( + **DP_SGD_STATEMENT_KWARGS, + max_examples_per_user=3, + accountant_type=_PLD, + ) + expected_statement = """\ +DP-SGD performed over 10000 examples with 64 examples per iteration, noise +multiplier 2.0 for 5.0 epochs with microbatching, and at most 3 examples per +user. + +This privacy guarantee protects the release of all model checkpoints in addition +to the final model. + +Example-level DP with add-or-remove-one adjacency at delta = 1e-06 computed with +PLD accounting: + Epsilon with each example occurring once per epoch: 12.595 + Epsilon assuming Poisson sampling (*): 1.199 + +User-level DP epsilon computation is not supported for PLD accounting at this +time. Use RDP accounting to obtain user-level DP guarantees. + +(*) Poisson sampling is not usually done in training pipelines, but assuming +that the data was randomly shuffled, it is believed the actual epsilon should be +closer to this value than the conservative assumption of an arbitrary data +order. """ self.assertEqual(statement, expected_statement)