Module dp_policy.titlei.mechanisms

Expand source code Browse git
import pandas as pd
from typing import Tuple
import numpy as np
from diffprivlib.mechanisms import Laplace as LaplaceMech
from diffprivlib.mechanisms import GaussianAnalytic as GaussianMech
from diffprivlib.accountant import BudgetAccountant


class Mechanism:
    """
    A class for the different privacy mechanisms we employ to compute
    poverty estimates.
    """
    def __init__(
        self, sensitivity=2.0, round=False, clip=True, noise_total=False
    ):
        self.sensitivity = sensitivity
        self.round = round
        self.clip = clip
        self.noise_total = noise_total

    def poverty_estimates(
        self,
        pop_total: pd.Series,
        children_total: pd.Series,
        children_poverty: pd.Series
    ) -> Tuple[pd.DataFrame, pd.DataFrame, pd.DataFrame]:
        """Returns dataframe for children in poverty, children total, and total
        population indexed by district ID.

        Args:
            pop_total (pd.Series): Total population in each district.
            children_total (pd.Series): Total children in each district.
            children_poverty (pd.Series): Children in poverty in each district.

        Returns:
            Tuple[pd.DataFrame, pd.DataFrame, pd.DataFrame]: Noised versions
                of the input tuples.
        """
        raise NotImplementedError

    def post_processing(self, count: pd.Series) -> pd.Series:
        """Post processing methods for noised counts. (Rounding or clipping.)

        Args:
            count (pd.Series): Noised count to process.

        Returns:
            pd.Series: Processed count.
        """
        if self.round:
            count = np.round(count)
        if self.clip:
            count = np.clip(count, 0, None)
        return count


class GroundTruth(Mechanism):
    """No randomization.
    """
    def __init__(self, *args, **kwargs):
        pass

    def poverty_estimates(
        self, pop_total, children_total, children_poverty
    ) -> Tuple[pd.DataFrame, pd.DataFrame, pd.DataFrame]:
        return pop_total, children_total, children_poverty


class DummyMechanism():
    """No randomization.
    """
    def randomise(self, x):
        return x


class DiffPriv(Mechanism):
    """Differentially private mechanisms wrapping `diffprivlib`.
    """
    def __init__(
        self, epsilon, delta, *args, **kwargs
    ):
        super().__init__(*args, **kwargs)
        self.epsilon = epsilon
        self.delta = delta
        self.mechanism = None
        # for advanced composition
        self.accountant = BudgetAccountant(delta=self.delta)
        self.accountant.set_default()

    def poverty_estimates(
        self, pop_total, children_total, children_poverty
    ) -> Tuple[pd.DataFrame, pd.DataFrame, pd.DataFrame]:

        if self.mechanism is None:
            raise NotImplementedError

        # NOTE: as of 3/21, by default only adding noise to poverty estimate
        # (for consistency with sampling, where est. var. is unavailable)
        children_poverty = children_poverty.apply(self.mechanism.randomise)
        if self.noise_total:
            children_total = children_total.apply(self.mechanism.randomise)

        # print("After estimation, privacy acc:", self.accountant.total())
        # no negative values, please
        # also rounding counts - post-processing
        return self.post_processing(pop_total),\
            self.post_processing(children_total),\
            self.post_processing(children_poverty)


class Laplace(DiffPriv):
    def __init__(self, *args, **kwargs):
        super().__init__(*args, **kwargs)
        self.mechanism = LaplaceMech(
            epsilon=self.epsilon,
            delta=self.delta,
            sensitivity=self.sensitivity
        )


class Gaussian(DiffPriv):
    def __init__(self, *args, **kwargs):
        super().__init__(*args, **kwargs)
        self.mechanism = GaussianMech(
            epsilon=self.epsilon,
            delta=self.delta,
            sensitivity=self.sensitivity
        )


class Sampled(Mechanism):
    """Mechanism for simulating sampling errors.
    """
    def __init__(
        self,
        *args,
        multiplier: float = 1.0,
        distribution: str = "gaussian",
        **kwargs
    ):
        """
        Args:
            multiplier (float, optional): Scales sampling noise by a constant.
                Defaults to 1.0.
            distribution (str, optional): Distribution of sampling noise.
                Supported options are 'gaussian' and 'laplace'. Defaults to
                "gaussian".
        """
        super().__init__(*args, **kwargs)
        # these are fixed, because sampling error
        # is theoretically immutable by algo means.
        # reported estimates are non-negative integers.
        self.clip = True
        self.round = True
        self.multiplier = multiplier
        self.distribution = distribution

    def poverty_estimates(
        self, pop_total, children_total, children_poverty, cv
    ):
        children_poverty = self._noise(children_poverty, cv)
        if self.noise_total:
            # NOTE: assuming CVs are same for total children.
            # This is beyond Census guidance.
            children_total = self._noise(children_total, cv)

        return self.post_processing(pop_total), \
            self.post_processing(children_total), \
            self.post_processing(children_poverty)

    def _noise(self, count: pd.Series, cv: pd.Series) -> pd.Series:
        """Add sampling noise.

        Args:
            count (pd.Series): Count to add sampling noise to.
            cv (pd.Series): Coefficients of variation to use for sampling
                variance.

        Returns:
            pd.Series: Noised counts.
        """
        if self.distribution == "gaussian":
            noised = np.random.normal(
                count,  # mean
                count * cv * self.multiplier  # stderr
            )
        elif self.distribution == "laplace":
            noised = np.random.laplace(
                # mean
                count,
                # variance is 2b^2 = (count * cv)^2
                # b = count * cv * sqrt(1/2)
                np.sqrt(0.5) * count * cv * self.multiplier
            )
        else:
            raise ValueError(
                f"{self.distribution} is not a valid distribution."
            )
        return np.clip(
            noised,
            0,
            None
        )

Classes

class DiffPriv (epsilon, delta, *args, **kwargs)

Differentially private mechanisms wrapping diffprivlib.

Expand source code Browse git
class DiffPriv(Mechanism):
    """Differentially private mechanisms wrapping `diffprivlib`.
    """
    def __init__(
        self, epsilon, delta, *args, **kwargs
    ):
        super().__init__(*args, **kwargs)
        self.epsilon = epsilon
        self.delta = delta
        self.mechanism = None
        # for advanced composition
        self.accountant = BudgetAccountant(delta=self.delta)
        self.accountant.set_default()

    def poverty_estimates(
        self, pop_total, children_total, children_poverty
    ) -> Tuple[pd.DataFrame, pd.DataFrame, pd.DataFrame]:

        if self.mechanism is None:
            raise NotImplementedError

        # NOTE: as of 3/21, by default only adding noise to poverty estimate
        # (for consistency with sampling, where est. var. is unavailable)
        children_poverty = children_poverty.apply(self.mechanism.randomise)
        if self.noise_total:
            children_total = children_total.apply(self.mechanism.randomise)

        # print("After estimation, privacy acc:", self.accountant.total())
        # no negative values, please
        # also rounding counts - post-processing
        return self.post_processing(pop_total),\
            self.post_processing(children_total),\
            self.post_processing(children_poverty)

Ancestors

Subclasses

Inherited members

class DummyMechanism

No randomization.

Expand source code Browse git
class DummyMechanism():
    """No randomization.
    """
    def randomise(self, x):
        return x

Methods

def randomise(self, x)
Expand source code Browse git
def randomise(self, x):
    return x
class Gaussian (*args, **kwargs)

Differentially private mechanisms wrapping diffprivlib.

Expand source code Browse git
class Gaussian(DiffPriv):
    def __init__(self, *args, **kwargs):
        super().__init__(*args, **kwargs)
        self.mechanism = GaussianMech(
            epsilon=self.epsilon,
            delta=self.delta,
            sensitivity=self.sensitivity
        )

Ancestors

Inherited members

class GroundTruth (*args, **kwargs)

No randomization.

Expand source code Browse git
class GroundTruth(Mechanism):
    """No randomization.
    """
    def __init__(self, *args, **kwargs):
        pass

    def poverty_estimates(
        self, pop_total, children_total, children_poverty
    ) -> Tuple[pd.DataFrame, pd.DataFrame, pd.DataFrame]:
        return pop_total, children_total, children_poverty

Ancestors

Inherited members

class Laplace (*args, **kwargs)

Differentially private mechanisms wrapping diffprivlib.

Expand source code Browse git
class Laplace(DiffPriv):
    def __init__(self, *args, **kwargs):
        super().__init__(*args, **kwargs)
        self.mechanism = LaplaceMech(
            epsilon=self.epsilon,
            delta=self.delta,
            sensitivity=self.sensitivity
        )

Ancestors

Inherited members

class Mechanism (sensitivity=2.0, round=False, clip=True, noise_total=False)

A class for the different privacy mechanisms we employ to compute poverty estimates.

Expand source code Browse git
class Mechanism:
    """
    A class for the different privacy mechanisms we employ to compute
    poverty estimates.
    """
    def __init__(
        self, sensitivity=2.0, round=False, clip=True, noise_total=False
    ):
        self.sensitivity = sensitivity
        self.round = round
        self.clip = clip
        self.noise_total = noise_total

    def poverty_estimates(
        self,
        pop_total: pd.Series,
        children_total: pd.Series,
        children_poverty: pd.Series
    ) -> Tuple[pd.DataFrame, pd.DataFrame, pd.DataFrame]:
        """Returns dataframe for children in poverty, children total, and total
        population indexed by district ID.

        Args:
            pop_total (pd.Series): Total population in each district.
            children_total (pd.Series): Total children in each district.
            children_poverty (pd.Series): Children in poverty in each district.

        Returns:
            Tuple[pd.DataFrame, pd.DataFrame, pd.DataFrame]: Noised versions
                of the input tuples.
        """
        raise NotImplementedError

    def post_processing(self, count: pd.Series) -> pd.Series:
        """Post processing methods for noised counts. (Rounding or clipping.)

        Args:
            count (pd.Series): Noised count to process.

        Returns:
            pd.Series: Processed count.
        """
        if self.round:
            count = np.round(count)
        if self.clip:
            count = np.clip(count, 0, None)
        return count

Subclasses

Methods

def post_processing(self, count: pandas.core.series.Series) ‑> pandas.core.series.Series

Post processing methods for noised counts. (Rounding or clipping.)

Args

count : pd.Series
Noised count to process.

Returns

pd.Series
Processed count.
Expand source code Browse git
def post_processing(self, count: pd.Series) -> pd.Series:
    """Post processing methods for noised counts. (Rounding or clipping.)

    Args:
        count (pd.Series): Noised count to process.

    Returns:
        pd.Series: Processed count.
    """
    if self.round:
        count = np.round(count)
    if self.clip:
        count = np.clip(count, 0, None)
    return count
def poverty_estimates(self, pop_total: pandas.core.series.Series, children_total: pandas.core.series.Series, children_poverty: pandas.core.series.Series) ‑> Tuple[pandas.core.frame.DataFrame, pandas.core.frame.DataFrame, pandas.core.frame.DataFrame]

Returns dataframe for children in poverty, children total, and total population indexed by district ID.

Args

pop_total : pd.Series
Total population in each district.
children_total : pd.Series
Total children in each district.
children_poverty : pd.Series
Children in poverty in each district.

Returns

Tuple[pd.DataFrame, pd.DataFrame, pd.DataFrame]
Noised versions of the input tuples.
Expand source code Browse git
def poverty_estimates(
    self,
    pop_total: pd.Series,
    children_total: pd.Series,
    children_poverty: pd.Series
) -> Tuple[pd.DataFrame, pd.DataFrame, pd.DataFrame]:
    """Returns dataframe for children in poverty, children total, and total
    population indexed by district ID.

    Args:
        pop_total (pd.Series): Total population in each district.
        children_total (pd.Series): Total children in each district.
        children_poverty (pd.Series): Children in poverty in each district.

    Returns:
        Tuple[pd.DataFrame, pd.DataFrame, pd.DataFrame]: Noised versions
            of the input tuples.
    """
    raise NotImplementedError
class Sampled (*args, multiplier: float = 1.0, distribution: str = 'gaussian', **kwargs)

Mechanism for simulating sampling errors.

Args

multiplier : float, optional
Scales sampling noise by a constant. Defaults to 1.0.
distribution : str, optional
Distribution of sampling noise. Supported options are 'gaussian' and 'laplace'. Defaults to "gaussian".
Expand source code Browse git
class Sampled(Mechanism):
    """Mechanism for simulating sampling errors.
    """
    def __init__(
        self,
        *args,
        multiplier: float = 1.0,
        distribution: str = "gaussian",
        **kwargs
    ):
        """
        Args:
            multiplier (float, optional): Scales sampling noise by a constant.
                Defaults to 1.0.
            distribution (str, optional): Distribution of sampling noise.
                Supported options are 'gaussian' and 'laplace'. Defaults to
                "gaussian".
        """
        super().__init__(*args, **kwargs)
        # these are fixed, because sampling error
        # is theoretically immutable by algo means.
        # reported estimates are non-negative integers.
        self.clip = True
        self.round = True
        self.multiplier = multiplier
        self.distribution = distribution

    def poverty_estimates(
        self, pop_total, children_total, children_poverty, cv
    ):
        children_poverty = self._noise(children_poverty, cv)
        if self.noise_total:
            # NOTE: assuming CVs are same for total children.
            # This is beyond Census guidance.
            children_total = self._noise(children_total, cv)

        return self.post_processing(pop_total), \
            self.post_processing(children_total), \
            self.post_processing(children_poverty)

    def _noise(self, count: pd.Series, cv: pd.Series) -> pd.Series:
        """Add sampling noise.

        Args:
            count (pd.Series): Count to add sampling noise to.
            cv (pd.Series): Coefficients of variation to use for sampling
                variance.

        Returns:
            pd.Series: Noised counts.
        """
        if self.distribution == "gaussian":
            noised = np.random.normal(
                count,  # mean
                count * cv * self.multiplier  # stderr
            )
        elif self.distribution == "laplace":
            noised = np.random.laplace(
                # mean
                count,
                # variance is 2b^2 = (count * cv)^2
                # b = count * cv * sqrt(1/2)
                np.sqrt(0.5) * count * cv * self.multiplier
            )
        else:
            raise ValueError(
                f"{self.distribution} is not a valid distribution."
            )
        return np.clip(
            noised,
            0,
            None
        )

Ancestors

Inherited members