Module dp_policy.titlei.mechanisms
Expand source code Browse git
import pandas as pd
from typing import Tuple
import numpy as np
from diffprivlib.mechanisms import Laplace as LaplaceMech
from diffprivlib.mechanisms import GaussianAnalytic as GaussianMech
from diffprivlib.accountant import BudgetAccountant
class Mechanism:
"""
A class for the different privacy mechanisms we employ to compute
poverty estimates.
"""
def __init__(
self, sensitivity=2.0, round=False, clip=True, noise_total=False
):
self.sensitivity = sensitivity
self.round = round
self.clip = clip
self.noise_total = noise_total
def poverty_estimates(
self,
pop_total: pd.Series,
children_total: pd.Series,
children_poverty: pd.Series
) -> Tuple[pd.DataFrame, pd.DataFrame, pd.DataFrame]:
"""Returns dataframe for children in poverty, children total, and total
population indexed by district ID.
Args:
pop_total (pd.Series): Total population in each district.
children_total (pd.Series): Total children in each district.
children_poverty (pd.Series): Children in poverty in each district.
Returns:
Tuple[pd.DataFrame, pd.DataFrame, pd.DataFrame]: Noised versions
of the input tuples.
"""
raise NotImplementedError
def post_processing(self, count: pd.Series) -> pd.Series:
"""Post processing methods for noised counts. (Rounding or clipping.)
Args:
count (pd.Series): Noised count to process.
Returns:
pd.Series: Processed count.
"""
if self.round:
count = np.round(count)
if self.clip:
count = np.clip(count, 0, None)
return count
class GroundTruth(Mechanism):
"""No randomization.
"""
def __init__(self, *args, **kwargs):
pass
def poverty_estimates(
self, pop_total, children_total, children_poverty
) -> Tuple[pd.DataFrame, pd.DataFrame, pd.DataFrame]:
return pop_total, children_total, children_poverty
class DummyMechanism():
"""No randomization.
"""
def randomise(self, x):
return x
class DiffPriv(Mechanism):
"""Differentially private mechanisms wrapping `diffprivlib`.
"""
def __init__(
self, epsilon, delta, *args, **kwargs
):
super().__init__(*args, **kwargs)
self.epsilon = epsilon
self.delta = delta
self.mechanism = None
# for advanced composition
self.accountant = BudgetAccountant(delta=self.delta)
self.accountant.set_default()
def poverty_estimates(
self, pop_total, children_total, children_poverty
) -> Tuple[pd.DataFrame, pd.DataFrame, pd.DataFrame]:
if self.mechanism is None:
raise NotImplementedError
# NOTE: as of 3/21, by default only adding noise to poverty estimate
# (for consistency with sampling, where est. var. is unavailable)
children_poverty = children_poverty.apply(self.mechanism.randomise)
if self.noise_total:
children_total = children_total.apply(self.mechanism.randomise)
# print("After estimation, privacy acc:", self.accountant.total())
# no negative values, please
# also rounding counts - post-processing
return self.post_processing(pop_total),\
self.post_processing(children_total),\
self.post_processing(children_poverty)
class Laplace(DiffPriv):
def __init__(self, *args, **kwargs):
super().__init__(*args, **kwargs)
self.mechanism = LaplaceMech(
epsilon=self.epsilon,
delta=self.delta,
sensitivity=self.sensitivity
)
class Gaussian(DiffPriv):
def __init__(self, *args, **kwargs):
super().__init__(*args, **kwargs)
self.mechanism = GaussianMech(
epsilon=self.epsilon,
delta=self.delta,
sensitivity=self.sensitivity
)
class Sampled(Mechanism):
"""Mechanism for simulating sampling errors.
"""
def __init__(
self,
*args,
multiplier: float = 1.0,
distribution: str = "gaussian",
**kwargs
):
"""
Args:
multiplier (float, optional): Scales sampling noise by a constant.
Defaults to 1.0.
distribution (str, optional): Distribution of sampling noise.
Supported options are 'gaussian' and 'laplace'. Defaults to
"gaussian".
"""
super().__init__(*args, **kwargs)
# these are fixed, because sampling error
# is theoretically immutable by algo means.
# reported estimates are non-negative integers.
self.clip = True
self.round = True
self.multiplier = multiplier
self.distribution = distribution
def poverty_estimates(
self, pop_total, children_total, children_poverty, cv
):
children_poverty = self._noise(children_poverty, cv)
if self.noise_total:
# NOTE: assuming CVs are same for total children.
# This is beyond Census guidance.
children_total = self._noise(children_total, cv)
return self.post_processing(pop_total), \
self.post_processing(children_total), \
self.post_processing(children_poverty)
def _noise(self, count: pd.Series, cv: pd.Series) -> pd.Series:
"""Add sampling noise.
Args:
count (pd.Series): Count to add sampling noise to.
cv (pd.Series): Coefficients of variation to use for sampling
variance.
Returns:
pd.Series: Noised counts.
"""
if self.distribution == "gaussian":
noised = np.random.normal(
count, # mean
count * cv * self.multiplier # stderr
)
elif self.distribution == "laplace":
noised = np.random.laplace(
# mean
count,
# variance is 2b^2 = (count * cv)^2
# b = count * cv * sqrt(1/2)
np.sqrt(0.5) * count * cv * self.multiplier
)
else:
raise ValueError(
f"{self.distribution} is not a valid distribution."
)
return np.clip(
noised,
0,
None
)
Classes
class DiffPriv (epsilon, delta, *args, **kwargs)
-
Differentially private mechanisms wrapping
diffprivlib
.Expand source code Browse git
class DiffPriv(Mechanism): """Differentially private mechanisms wrapping `diffprivlib`. """ def __init__( self, epsilon, delta, *args, **kwargs ): super().__init__(*args, **kwargs) self.epsilon = epsilon self.delta = delta self.mechanism = None # for advanced composition self.accountant = BudgetAccountant(delta=self.delta) self.accountant.set_default() def poverty_estimates( self, pop_total, children_total, children_poverty ) -> Tuple[pd.DataFrame, pd.DataFrame, pd.DataFrame]: if self.mechanism is None: raise NotImplementedError # NOTE: as of 3/21, by default only adding noise to poverty estimate # (for consistency with sampling, where est. var. is unavailable) children_poverty = children_poverty.apply(self.mechanism.randomise) if self.noise_total: children_total = children_total.apply(self.mechanism.randomise) # print("After estimation, privacy acc:", self.accountant.total()) # no negative values, please # also rounding counts - post-processing return self.post_processing(pop_total),\ self.post_processing(children_total),\ self.post_processing(children_poverty)
Ancestors
Subclasses
Inherited members
class DummyMechanism
-
No randomization.
Expand source code Browse git
class DummyMechanism(): """No randomization. """ def randomise(self, x): return x
Methods
def randomise(self, x)
-
Expand source code Browse git
def randomise(self, x): return x
class Gaussian (*args, **kwargs)
-
Differentially private mechanisms wrapping
diffprivlib
.Expand source code Browse git
class Gaussian(DiffPriv): def __init__(self, *args, **kwargs): super().__init__(*args, **kwargs) self.mechanism = GaussianMech( epsilon=self.epsilon, delta=self.delta, sensitivity=self.sensitivity )
Ancestors
Inherited members
class GroundTruth (*args, **kwargs)
-
No randomization.
Expand source code Browse git
class GroundTruth(Mechanism): """No randomization. """ def __init__(self, *args, **kwargs): pass def poverty_estimates( self, pop_total, children_total, children_poverty ) -> Tuple[pd.DataFrame, pd.DataFrame, pd.DataFrame]: return pop_total, children_total, children_poverty
Ancestors
Inherited members
class Laplace (*args, **kwargs)
-
Differentially private mechanisms wrapping
diffprivlib
.Expand source code Browse git
class Laplace(DiffPriv): def __init__(self, *args, **kwargs): super().__init__(*args, **kwargs) self.mechanism = LaplaceMech( epsilon=self.epsilon, delta=self.delta, sensitivity=self.sensitivity )
Ancestors
Inherited members
class Mechanism (sensitivity=2.0, round=False, clip=True, noise_total=False)
-
A class for the different privacy mechanisms we employ to compute poverty estimates.
Expand source code Browse git
class Mechanism: """ A class for the different privacy mechanisms we employ to compute poverty estimates. """ def __init__( self, sensitivity=2.0, round=False, clip=True, noise_total=False ): self.sensitivity = sensitivity self.round = round self.clip = clip self.noise_total = noise_total def poverty_estimates( self, pop_total: pd.Series, children_total: pd.Series, children_poverty: pd.Series ) -> Tuple[pd.DataFrame, pd.DataFrame, pd.DataFrame]: """Returns dataframe for children in poverty, children total, and total population indexed by district ID. Args: pop_total (pd.Series): Total population in each district. children_total (pd.Series): Total children in each district. children_poverty (pd.Series): Children in poverty in each district. Returns: Tuple[pd.DataFrame, pd.DataFrame, pd.DataFrame]: Noised versions of the input tuples. """ raise NotImplementedError def post_processing(self, count: pd.Series) -> pd.Series: """Post processing methods for noised counts. (Rounding or clipping.) Args: count (pd.Series): Noised count to process. Returns: pd.Series: Processed count. """ if self.round: count = np.round(count) if self.clip: count = np.clip(count, 0, None) return count
Subclasses
Methods
def post_processing(self, count: pandas.core.series.Series) ‑> pandas.core.series.Series
-
Post processing methods for noised counts. (Rounding or clipping.)
Args
count
:pd.Series
- Noised count to process.
Returns
pd.Series
- Processed count.
Expand source code Browse git
def post_processing(self, count: pd.Series) -> pd.Series: """Post processing methods for noised counts. (Rounding or clipping.) Args: count (pd.Series): Noised count to process. Returns: pd.Series: Processed count. """ if self.round: count = np.round(count) if self.clip: count = np.clip(count, 0, None) return count
def poverty_estimates(self, pop_total: pandas.core.series.Series, children_total: pandas.core.series.Series, children_poverty: pandas.core.series.Series) ‑> Tuple[pandas.core.frame.DataFrame, pandas.core.frame.DataFrame, pandas.core.frame.DataFrame]
-
Returns dataframe for children in poverty, children total, and total population indexed by district ID.
Args
pop_total
:pd.Series
- Total population in each district.
children_total
:pd.Series
- Total children in each district.
children_poverty
:pd.Series
- Children in poverty in each district.
Returns
Tuple[pd.DataFrame, pd.DataFrame, pd.DataFrame]
- Noised versions of the input tuples.
Expand source code Browse git
def poverty_estimates( self, pop_total: pd.Series, children_total: pd.Series, children_poverty: pd.Series ) -> Tuple[pd.DataFrame, pd.DataFrame, pd.DataFrame]: """Returns dataframe for children in poverty, children total, and total population indexed by district ID. Args: pop_total (pd.Series): Total population in each district. children_total (pd.Series): Total children in each district. children_poverty (pd.Series): Children in poverty in each district. Returns: Tuple[pd.DataFrame, pd.DataFrame, pd.DataFrame]: Noised versions of the input tuples. """ raise NotImplementedError
class Sampled (*args, multiplier: float = 1.0, distribution: str = 'gaussian', **kwargs)
-
Mechanism for simulating sampling errors.
Args
multiplier
:float
, optional- Scales sampling noise by a constant. Defaults to 1.0.
distribution
:str
, optional- Distribution of sampling noise. Supported options are 'gaussian' and 'laplace'. Defaults to "gaussian".
Expand source code Browse git
class Sampled(Mechanism): """Mechanism for simulating sampling errors. """ def __init__( self, *args, multiplier: float = 1.0, distribution: str = "gaussian", **kwargs ): """ Args: multiplier (float, optional): Scales sampling noise by a constant. Defaults to 1.0. distribution (str, optional): Distribution of sampling noise. Supported options are 'gaussian' and 'laplace'. Defaults to "gaussian". """ super().__init__(*args, **kwargs) # these are fixed, because sampling error # is theoretically immutable by algo means. # reported estimates are non-negative integers. self.clip = True self.round = True self.multiplier = multiplier self.distribution = distribution def poverty_estimates( self, pop_total, children_total, children_poverty, cv ): children_poverty = self._noise(children_poverty, cv) if self.noise_total: # NOTE: assuming CVs are same for total children. # This is beyond Census guidance. children_total = self._noise(children_total, cv) return self.post_processing(pop_total), \ self.post_processing(children_total), \ self.post_processing(children_poverty) def _noise(self, count: pd.Series, cv: pd.Series) -> pd.Series: """Add sampling noise. Args: count (pd.Series): Count to add sampling noise to. cv (pd.Series): Coefficients of variation to use for sampling variance. Returns: pd.Series: Noised counts. """ if self.distribution == "gaussian": noised = np.random.normal( count, # mean count * cv * self.multiplier # stderr ) elif self.distribution == "laplace": noised = np.random.laplace( # mean count, # variance is 2b^2 = (count * cv)^2 # b = count * cv * sqrt(1/2) np.sqrt(0.5) * count * cv * self.multiplier ) else: raise ValueError( f"{self.distribution} is not a valid distribution." ) return np.clip( noised, 0, None )
Ancestors
Inherited members