提交 139a123d 编辑于 作者: Zheng Xu's avatar Zheng Xu 提交者: tensorflow-copybara
浏览文件

Upstream tree aggregation factory to tff.aggregators. The aggregator can give...

Upstream tree aggregation factory to tff.aggregators. The aggregator can give us differential privacy without relying on sampling for amplification. More details see "Practical and Private (Deep) Learning without Sampling or Shuffling" (https://arxiv.org/abs/2103.00039).

PiperOrigin-RevId: 391409199
上级 ae79a8bd
......@@ -14,9 +14,10 @@
"""Factory for aggregations parameterized by tensorflow_privacy DPQueries."""
import collections
from typing import Optional, Tuple
from typing import Collection, Optional, Tuple
import warnings
import tensorflow as tf
import tensorflow_privacy as tfp
from tensorflow_federated.python.aggregators import factory
......@@ -217,6 +218,64 @@ class DifferentiallyPrivateFactory(factory.UnweightedAggregationFactory):
return cls(query)
@classmethod
def tree_aggregation(
cls,
noise_multiplier: float,
clients_per_round: float,
l2_norm_clip: float,
record_specs: Collection[tf.TensorSpec],
noise_seed: Optional[int] = None,
use_efficient: bool = True,
) -> factory.UnweightedAggregationFactory:
"""`DifferentiallyPrivateFactory` with tree aggregation noise.
Performs clipping on client, averages clients records, and adds noise for
differential privacy. The noise is estimated based on tree aggregation for
the cumulative summation over rounds, and then take the residual between the
current round and the previous round. Combining this aggregator with a SGD
optimizer on server can be used to implement the DP-FTRL algorithm in
"Practical and Private (Deep) Learning without Sampling or Shuffling"
(https://arxiv.org/abs/2103.00039).
The standard deviation of the Gaussian noise added at each tree node is
`l2_norm_clip * noise_multiplier`. Note that noise is added during summation
of client model updates per round, *before* normalization (the noise will be
scaled down when dividing by `clients_per_round`). Thus `noise_multiplier`
can be used to compute the (epsilon, delta) privacy guarantee as described
in the paper.
Args:
noise_multiplier: Noise multiplier for the Gaussian noise in tree
aggregation. Must be non-negative, zero means no noise is applied.
clients_per_round: A positive number specifying the expected number of
clients per round.
l2_norm_clip: The value of the clipping norm. Must be positive.
record_specs: The specs of client results to be aggregated.
noise_seed: Random seed for the Gaussian noise generator. If `None`, a
nondeterministic seed based on system time will be generated.
use_efficient: If true, use the efficient tree aggregation algorithm based
on the paper "Efficient Use of Differentially Private Binary Trees".
Returns:
A `DifferentiallyPrivateFactory` with Gaussian noise by tree aggregation.
"""
if isinstance(clients_per_round, int):
clients_per_round = float(clients_per_round)
_check_float_nonnegative(noise_multiplier, 'noise_multiplier')
_check_float_positive(clients_per_round, 'clients_per_round')
_check_float_positive(l2_norm_clip, 'l2_norm_clip')
sum_query = tfp.TreeResidualSumQuery.build_l2_gaussian_query(
l2_norm_clip,
noise_multiplier,
record_specs,
noise_seed=noise_seed,
use_efficient=use_efficient)
mean_query = tfp.NormalizedQuery(sum_query, denominator=clients_per_round)
return cls(mean_query)
def __init__(self,
query: tfp.DPQuery,
record_aggregation_factory: Optional[
......
......@@ -237,6 +237,57 @@ class DPFactoryExecutionTest(test_case.TestCase, parameterized.TestCase):
# Check if the specified value is kept.
self.assertEqual(new_clip_count_stddev, clip_count_stddev)
@parameterized.named_parameters(
('total5_std2', 5, 8., 2., False),
('total6_std0d5', 6, 0.5, 0.5, False),
('total7_std1', 7, 3., 1., False),
('total8_std1', 8, 1., 1., False),
('total3_std1_eff', 3, 1. + 2. / 3., 1., True),
('total4_std1_eff', 4, 4. / 7., 1., True),
)
def test_tree_aggregation_factory(self, total_steps, expected_variance,
noise_std, use_efficient):
variable_shape, tolerance = [10000], 0.05
record = tf.zeros(variable_shape, tf.float32)
record_shape = tf.nest.map_structure(lambda t: t.shape, record)
record_type = computation_types.to_type((tf.float32, variable_shape))
specs = tf.nest.map_structure(tf.TensorSpec, record_shape)
tree_factory = differential_privacy.DifferentiallyPrivateFactory.tree_aggregation(
noise_multiplier=noise_std,
l2_norm_clip=1.,
record_specs=specs,
clients_per_round=1.,
noise_seed=1,
use_efficient=use_efficient,
)
process = tree_factory.create(record_type)
state = process.initialize()
client_data = [record]
cumsum_result = tf.zeros(variable_shape, tf.float32)
for _ in range(total_steps):
output = process.next(state, client_data)
state = output.state
cumsum_result += output.result
self.assertAllClose(
np.sqrt(expected_variance), np.std(cumsum_result), rtol=tolerance)
@parameterized.named_parameters(
('negative_clip', -1., 0.),
('zero_clip', 0., 0.),
('negative_noise', 1., -1.),
)
def test_tree_aggregation_factory_raise(self, clip_norm, noise_multiplier):
with self.assertRaisesRegex(ValueError, 'must be'):
differential_privacy.DifferentiallyPrivateFactory.tree_aggregation(
noise_multiplier=noise_multiplier,
l2_norm_clip=clip_norm,
record_specs=tf.TensorSpec([]),
clients_per_round=1.,
noise_seed=1)
if __name__ == '__main__':
execution_contexts.set_local_execution_context()
......
支持 Markdown
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册