Module dp_policy.experiments
Expand source code Browse git
import os
import pandas as pd
from tqdm import tqdm
import numpy as np
import itertools
import matplotlib.pyplot as plt
import seaborn as sns
import dp_policy.config as config
from dp_policy.titlei.evaluation import \
discrimination_treatments_join, save_treatments, load_treatments, \
match_true, compare_treatments, misalloc_statistics
from dp_policy.titlei.mechanisms import Mechanism, Laplace
from dp_policy.titlei.utils import get_inputs, data, get_sppe
from dp_policy.titlei.allocators import Allocator, SonnenbergAuthorizer
from dp_policy.titlei.thresholders import \
AverageThresholder, RepeatThresholder, DummyThresholder, MOEThresholder, \
PastThresholder
from typing import List, Union
def titlei_funding(
allocator: Allocator,
inputs: pd.DataFrame,
mechanism: Mechanism,
sppe: pd.DataFrame,
normalize: bool = True,
allocator_kwargs: dict = {},
sampling_kwargs: dict = {},
**grants_kwargs
):
"""Allocate Title I funding.
Args:
allocator (Allocator): The allocator to use.
inputs (pd.DataFrame): The necessary inputs for the allocator -
usually the SAIPE - by district.
mechanism (Mechanism): The randomization mechanism to use.
sppe (pd.DataFrame): State per-pupil education data.
normalize (bool, optional): Whether to normalize the authorization
amounts. Defaults to True.
allocator_kwargs (dict, optional): Parameters for the allocator.
Defaults to {}.
sampling_kwargs (dict, optional): Parameters for the sampling
mechanism. Defaults to {}.
Returns:
pd.DataFrame: Returns input dataframe with grant allocations under
ground truth, sampling, and privacy randomization.
"""
alloc = allocator(
data(
inputs, mechanism, sppe,
sampling_kwargs=sampling_kwargs,
**grants_kwargs
),
**allocator_kwargs
)
return alloc.allocations(normalize=normalize)
def titlei_grid(
inputs: pd.DataFrame,
mech: Mechanism,
eps: list = list(np.logspace(-3, 10, num=10)) + [2.52],
delta: list = [0.0],
trials: int = 100,
mech_kwargs: dict = {},
auth: bool = False,
allocator_kwargs: dict = {},
sampling_kwargs: dict = {},
verbose: bool = True,
print_results: list = [2.52, 0.1],
plot_results: bool = False,
noise_to_plot: str = "children_eligible",
alpha: float = 0.05,
results: pd.DataFrame = None
):
"""Run many trials of Title I process under different privacy parameters.
Args:
inputs (pd.DataFrame): The necessary inputs for the allocator -
usually the SAIPE - by district.
mech (Mechanism): The randomization mechanism to use.
eps (list, optional): Values of epsilon to try. Defaults to
`list(np.logspace(-3, 10, num=10))+[2.52]`.
delta (list, optional): Values of delta to try. Defaults to [0.0].
trials (int, optional): Number of trials to run for each parameter
combo. Defaults to 100.
mech_kwargs (dict, optional): Parameters for the randomization
mechanism. Defaults to {}.
auth (bool, optional): Whether to use the raw authorization amounts.
Defaults to False.
allocator_kwargs (dict, optional): Parameters for the allocator.
Defaults to {}.
sampling_kwargs (dict, optional): Parameters for the sampling
mechanism. Defaults to {}.
verbose (bool, optional): Defaults to True.
print_results (list, optional): Which values of epsilon to print
results for. Defaults to [2.52, 0.1].
plot_results (bool, optional): Whether to plot results. Defaults to
False.
noise_to_plot (str, optional): Which variable to plot noise for.
Defaults to children_eligible.
alpha (float, optional): Confidence level for confidence bands.
Defaults to 0.05.
results (pd.DataFrame, optional): Pre-computed results to plot. If
provided, will skip grid calculation. Defaults to None.
Returns:
pd.DataFrame: Results grid.
"""
if results is None:
allocations = []
thresholder = allocator_kwargs.get('thresholder')
if verbose:
print(f"{len(eps)*len(delta)*trials} iters:")
for _ in tqdm(range(trials), desc='trial', disable=(not verbose)):
for d in tqdm(delta, desc='delta', leave=False, disable=True):
for e in tqdm(eps, desc='eps', leave=False, disable=True):
mechanism = mech(e, d, **mech_kwargs)
sppe = get_sppe(os.path.join(
config.root,
"data/sppe18.xlsx"
))
if thresholder is not None and isinstance(
thresholder, PastThresholder
):
thresholder.set_prior_estimates(
mechanism,
sppe,
verbose=False
)
allocations.append(titlei_funding(
SonnenbergAuthorizer,
inputs,
mechanism,
sppe,
allocator_kwargs=allocator_kwargs,
sampling_kwargs=sampling_kwargs,
verbose=False, # too noisy for a grid search
normalize=(not auth)
))
results = pd.concat(
allocations, axis=0,
keys=itertools.product(range(trials), delta, eps),
names=[
"trial", "delta", "epsilon"
] + list(allocations[-1].index.names)
)
if print_results:
prefixes = ["est", "dp", "dpest"]
if plot_results:
for e, alloc in results.groupby("epsilon"):
if e in print_results:
print(f"--- eps={e} ---")
data_error = alloc[f"est_{noise_to_plot}"] \
- alloc[f"true_{noise_to_plot}"]
dp_error = alloc[f"dpest_{noise_to_plot}"] \
- alloc[f"est_{noise_to_plot}"]
s = 1
# plt.scatter(
# np.log(alloc["true_children_eligible"]), data_error,
# s, label="Data deviations"
# )
# plt.scatter(
# np.log(alloc["true_children_eligible"]), dp_error,
# s, label="Privacy deviations"
# )
df = pd.melt(
pd.DataFrame({
"Privacy": dp_error,
"Data": data_error,
"Log count":
np.log(alloc[f"true_{noise_to_plot}"])
}).reset_index(),
id_vars=[
"trial",
"State FIPS Code",
"District ID",
"Log count"
],
value_vars=["Data", "Privacy"],
var_name="Deviation",
value_name="Noise"
)
sns.scatterplot(
data=df,
x="Log count",
y="Noise",
hue="Deviation",
s=s,
alpha=0.5,
palette=('#5D3A9B', "#E66100"),
rasterized=True
)
plt.legend()
plt.xlabel("Log count")
plt.ylabel("Noise")
plt.savefig(os.path.join(
config.root,
f"plots/robustness/noise_poverty_eps={e}.pdf"
if noise_to_plot == "children_eligible" else
"plots/robustness/noise_poverty_eps={}_{}.pdf".format(
e,
noise_to_plot
)
), transparent=True, dpi=100)
plt.show()
# plt.scatter(
# alloc["true_children_eligible"],
# data_error/alloc["true_children_eligible"],
# s, label="data"
# )
# plt.scatter(
# alloc["true_children_eligible"],
# dp_error/alloc["true_children_eligible"],
# s, label="dp"
# )
# plt.legend()
# plt.xlabel("# children in poverty")
# plt.ylabel("Noise per child in poverty")
# plt.show()
r = (
min(
data_error.quantile(0.05),
dp_error.quantile(0.05)
),
max(
data_error.quantile(0.95),
dp_error.quantile(0.95)
)
)
alpha = 0.25
sns.histplot(
df,
x="Noise",
hue="Deviation",
bins=75,
binrange=r,
alpha=alpha,
# kde=True,
# stat="density",
# linewidth=0,
# kde_kws=dict(
# clip=range
# ),
palette=('#5D3A9B', "#E66100")
# label="Data deviations",
# color='#5D3A9B'
)
# plt.legend()
plt.xlabel("Added noise")
plt.savefig(
os.path.join(
config.root,
f"plots/robustness/noise_eps={e}.pdf"
if noise_to_plot == "children_eligible" else
"plots/robustness/noise_eps={}_{}.pdf".format(
e,
noise_to_plot
)
),
transparent=True
)
plt.show()
for prefix in prefixes:
print("##", prefix)
for e, alloc in results.groupby("epsilon"):
for grant_type in (
"basic", "concentration", "targeted", "total"
):
if e in print_results:
error = alloc[f"{prefix}_grant_{grant_type}"] \
- alloc[f"true_grant_{grant_type}"]
print(f"## {grant_type} grants - eps={e} ##")
misalloc_statistics(
error,
allocations=alloc,
grant_type=grant_type
)
if plot_results and len(eps) > 2:
trials = len(np.unique(
results.index.get_level_values("trial")
))
grant_type = "total"
error = results[f"{prefix}_grant_{grant_type}"] \
- results[f"true_grant_{grant_type}"]
print(error.shape)
rmse = np.sqrt(error.pow(2).groupby(
["epsilon", "trial"]
).mean())
print(rmse)
mse = {
'mean': rmse.groupby("epsilon").mean(),
'lower': rmse.groupby("epsilon").quantile(alpha/2),
'upper': rmse.groupby("epsilon").quantile(1-alpha/2)
}
eps = mse['mean'].index
plt.plot(eps, mse['mean'])
plt.fill_between(
eps, mse['lower'], mse['upper'],
color='gray', alpha=0.25
)
ax = plt.gca()
ax.set_xscale('log')
plt.xlabel("Epsilon")
plt.ylabel(
f"Avg. RMSE in {grant_type} grants over {trials} trials"
)
plt.savefig(
os.path.join(
config.root,
"plots/robustness/eps_sensitivity_frontier.pdf"
),
dpi=300
)
plt.show()
return results
class Experiment:
"""Handles running and setting experiments.
"""
def __init__(
self,
name: str,
baseline: Union[str, pd.DataFrame] = "cached",
year: int = 2021,
trials: int = 1000,
eps: List[float] = [0.1],
delta: List[float] = [0.0],
no_match_true: bool = False
):
"""Initialize experiment and set baseline.
Args:
name (str): Experiment name.
baseline (Union[str, pd.DataFrame], optional): What baseline
(control) results to use. Defaults to "cached".
year (int, optional): What year to get inputs for. Defaults to
2021.
trials (int, optional): Number of trials to run for each condition.
Defaults to 1000.
eps (List[float], optional): Values of epsilon to try. Defaults to
[0.1].
delta (List[float], optional): Values of delta to try. Defaults to
[0.0].
no_match_true (bool, optional): Whether to use different baselines
for policies that modify the allocation procedure. Default is
to use the same baseline for both.
"""
self.name = name if not no_match_true else f"{name}_unmatched"
self.trials = trials
self.eps = eps
self.delta = delta
self.match_true = not no_match_true
self.saipe = get_inputs(year)
if str(baseline) == "cached":
print("Using cached baseline...")
try:
self.baseline = load_treatments("baseline")['baseline']
except FileNotFoundError:
print("[WARN] Could not find cached baseline, generating.")
self._generate_baseline()
elif baseline is None:
print("Generating baseline...")
self._generate_baseline()
else:
print("Using given baseline...")
self.baseline = baseline
def _generate_baseline(self):
"""Generate the control condition. Defaults to normal Laplace
mechanism with no special allocation parameters.
"""
self.baseline = titlei_grid(
self.saipe, Laplace,
eps=self.eps, delta=self.delta,
trials=self.trials,
print_results=False,
allocator_kwargs={'verbose': False}
)
save_treatments({'baseline': self.baseline}, "baseline")
discrimination_treatments_join("baseline", include_moes=True)
def run(self):
"""Run the experiment, save results, and create joined covariate file.
"""
treatments = self._get_treatments()
save_treatments(treatments, self.name)
self.discrimination_join()
def _get_treatments(self):
"""Generate the treatment results.
"""
raise NotImplementedError
def discrimination_join(self, **join_kwargs):
"""Join the results to demographic covariates and save.
"""
discrimination_treatments_join(self.name, **join_kwargs)
def plot(self, **kwargs):
"""Print and plot the results, comparing treatments.
"""
treatments = load_treatments(self.name)
compare_treatments(treatments, experiment_name=self.name, **kwargs)
@staticmethod
def get_experiment(
name: str,
*args, **kwargs
):
"""Factory class for generating experiment object from name.
Args:
name (str): Experiment name to fetch. Options include "baseline",
"hold_harmless", "post_processing", "thresholds", "epsilon",
"moving_average", "budget", "sampling", "vary_total_children".
Returns:
Experiment: The experiment object.
"""
experiments = {
'baseline': Baseline,
'hold_harmless': HoldHarmless,
'post_processing': PostProcessing,
'thresholds': Thresholds,
'epsilon': Epsilon,
'moving_average': MovingAverage,
'budget': Budget,
'sampling': Sampling,
'vary_total_children': VaryTotalChildren
}
Exp = experiments.get(name)
if Exp is None:
raise ValueError(f"{name} not a supported experiment name.")
return Exp(name, *args, **kwargs)
class Baseline(Experiment):
"""Baseline condition only.
"""
def _get_treatments(self):
return {
'baseline': self.baseline
}
class HoldHarmless(Experiment):
"""Hold harmless and state minimum provisions.
"""
def _get_treatments(self):
hold_harmless = titlei_grid(
self.saipe, Laplace,
eps=self.eps, delta=self.delta,
trials=self.trials, print_results=False,
allocator_kwargs={'hold_harmless': True}
)
state_minimum = titlei_grid(
self.saipe, Laplace,
eps=self.eps, delta=self.delta,
trials=self.trials, print_results=False,
allocator_kwargs={'state_minimum': True}
)
both = titlei_grid(
self.saipe, Laplace,
eps=self.eps, delta=self.delta,
trials=self.trials, print_results=False,
allocator_kwargs={
'hold_harmless': True,
'state_minimum': True
}
)
if self.match_true:
# ground truths are the same
match_true(self.baseline, [hold_harmless, state_minimum, both])
# save treatments to file for later
return {
'No provisions (baseline)': self.baseline,
'Hold harmless only': hold_harmless,
'State minimum only': state_minimum,
'Both provisions': both
}
class PostProcessing(Experiment):
"""Post-processing variations: none, clipping, and clipping + rounding.
"""
def _get_treatments(self):
none = titlei_grid(
self.saipe, Laplace,
eps=self.eps,
delta=self.delta,
trials=self.trials,
mech_kwargs=dict(
clip=False,
round=False
),
print_results=False
)
clipping = self.baseline
rounding = titlei_grid(
self.saipe, Laplace,
eps=self.eps,
delta=self.delta,
trials=self.trials,
mech_kwargs=dict(
clip=True,
round=True
),
print_results=False
)
if self.match_true:
match_true(self.baseline, [none, rounding])
return {
'None': none,
'Clipping (baseline)': clipping,
'Clipping + Rounding': rounding
}
class MovingAverage(Experiment):
"""Moving average inputs, with multiple lag conditions.
"""
def __init__(
self,
name: str,
truth: str = "average",
**kwargs
):
"""
Args:
truth (str, optional): Baseline strategy to use. "average" uses
the 5-year average as the baseline. Defaults to "average".
Raises:
Exception: _description_
"""
if truth != "average":
raise Exception(
"Not supporting truth values other than `average`."
)
self.truth = truth
super().__init__(f"{name}_truth={truth}", **kwargs)
def _get_treatments(self):
single_year = self.baseline
averaged = [
titlei_grid(
get_inputs(2021, avg_lag=i+1),
Laplace,
eps=self.eps,
delta=self.delta,
trials=self.trials,
print_results=False
)
for i in range(4)
]
if self.match_true:
match_true(averaged[-1], [single_year] + averaged[:-1])
return {
'Lag 0': single_year,
**{f"Lag {i+1}": a for i, a in enumerate(averaged)}
}
class Thresholds(Experiment):
"""Thresholding experiments.
"""
def _get_treatments(self):
# hard threshold
hard = self.baseline
# use average
averaged = titlei_grid(
self.saipe,
Laplace,
eps=self.eps,
delta=self.delta,
trials=self.trials,
allocator_kwargs={
'thresholder': AverageThresholder(2021, 4)
},
print_results=False
)
# must be ineligible 2x in a row
repeat2 = titlei_grid(
self.saipe,
Laplace,
eps=self.eps,
delta=self.delta,
trials=self.trials,
allocator_kwargs={
'thresholder': RepeatThresholder(2021, 1)
},
print_results=False
)
# must be ineligible 3x in a row
repeat3 = titlei_grid(
self.saipe,
Laplace,
eps=self.eps,
delta=self.delta,
trials=self.trials,
allocator_kwargs={
'thresholder': RepeatThresholder(2021, 2)
},
print_results=False
)
# no threshold
none = titlei_grid(
self.saipe,
Laplace,
eps=self.eps,
delta=self.delta,
trials=self.trials,
allocator_kwargs={
'thresholder': DummyThresholder()
},
print_results=False
)
# moe
moe_01 = titlei_grid(
self.saipe,
Laplace,
eps=self.eps,
delta=self.delta,
trials=self.trials,
allocator_kwargs={
'thresholder': MOEThresholder(alpha=0.1)
},
print_results=False
)
if self.match_true:
match_true(hard, [averaged, repeat2, repeat3, none, moe_01])
return {
'None': none,
'Hard (baseline)': hard,
'Averaged': averaged,
'Repeated years (2)': repeat2,
'Repeated years (3)': repeat3,
'Margin of error (90%)': moe_01
}
class Budget(Experiment):
"""Budget experiments.
"""
def _get_treatments(self):
if len(self.eps) > 0:
print(
"[WARN] Using first value of epsilon "
"{} for budget calcs.".format(
self.baseline.index.get_level_values("epsilon")[0]
)
)
e = self.eps[0]
alpha = 0.05
test = self.baseline.loc[pd.IndexSlice[:, 0.0, e, :, :], :]
budgets = {
"Biden proposal": 2e10
}
for name, prefixes in {
"+ loss": ("dpest", "true"),
# "+ marginal loss (DP)": ("dpest", "est"),
# "+ loss (data error)": ("est", "true")
}.items():
error = test[f"{prefixes[1]}_grant_total"] \
- test[f"{prefixes[0]}_grant_total"]
err_grouped = error.groupby(
["State FIPS Code", "District ID"]
)
exp_error = err_grouped.mean()
budgets[name] = exp_error[exp_error < 0].abs().sum()
if prefixes == ("dpest", "true"):
# how much money to remove alpha quantile loss?
quantile = err_grouped.quantile(alpha)
budgets[f"+ {(alpha)*100}% quant. loss"] = \
quantile[quantile < 0].abs().sum()
# how much money to cover all misallocated dollars?
# budgets[f"+ exp. misalloc."] = exp_error.abs().sum()
budgets = {"{} (${:.1e})".format(k, v): v for k, v in budgets.items()}
usual_budget = self.saipe["official_total_alloc"]\
.groupby(["State FIPS Code", "District ID"]).first().sum()
print("baseline budget: {:.1e}".format(usual_budget))
print(budgets)
treatments = {
name: titlei_grid(
self.saipe, Laplace,
eps=[e], delta=self.delta,
trials=self.trials, print_results=False,
allocator_kwargs={
'appropriation': round(budget + usual_budget, 2)
}
) for name, budget in budgets.items()
}
if self.match_true:
match_true(self.baseline, list(treatments.values()))
treatments["FY2019 appropriation (baseline)"] = test
return treatments
class Epsilon(Experiment):
"""Privacy parameter experiments.
"""
def _get_treatments(self):
return {
e: titlei_grid(
self.saipe, Laplace,
eps=[e], delta=self.delta,
trials=self.trials, print_results=False
) if e not in self.eps
else self.baseline.loc[pd.IndexSlice[:, 0.0, e, :, :], :].copy()
for e in [1e-3, 1e-2, 1e-1, 1, 10, 30]
}
def discrimination_join(self):
return super().discrimination_join(epsilon=None)
def plot(self):
return super().plot(epsilon=None)
class Sampling(Experiment):
"""Sampling error experiments.
"""
def _get_treatments(self):
gaussian = {
f"Gaussian ({m})": titlei_grid(
self.saipe, Laplace,
eps=self.eps, delta=self.delta,
trials=self.trials, print_results=False,
sampling_kwargs=dict(
multiplier=m,
distribution="gaussian"
)
)
for m in [0.5, 0.75, 1, 1.5]
}
laplace = {
f"Laplace ({m})": titlei_grid(
self.saipe, Laplace,
eps=self.eps, delta=self.delta,
trials=self.trials, print_results=False,
sampling_kwargs=dict(
multiplier=m,
distribution="laplace"
)
)
for m in [0.5, 1, 1.5]
}
return {
**gaussian,
**laplace
}
class VaryTotalChildren(Experiment):
"""Robustness check: what happens if the # of total children is not fixed?
"""
def _get_treatments(self):
varies = titlei_grid(
self.saipe, Laplace,
eps=self.eps, delta=self.delta,
trials=self.trials, print_results=False,
mech_kwargs=dict(
noise_total=True
),
sampling_kwargs=dict(
noise_total=True
)
)
return {
'Total number of children is fixed (baseline)': self.baseline,
'Total number of children varies': varies
}
Functions
def titlei_funding(allocator: Allocator, inputs: pandas.core.frame.DataFrame, mechanism: Mechanism, sppe: pandas.core.frame.DataFrame, normalize: bool = True, allocator_kwargs: dict = {}, sampling_kwargs: dict = {}, **grants_kwargs)
-
Allocate Title I funding.
Args
allocator
:Allocator
- The allocator to use.
inputs
:pd.DataFrame
- The necessary inputs for the allocator - usually the SAIPE - by district.
mechanism
:Mechanism
- The randomization mechanism to use.
sppe
:pd.DataFrame
- State per-pupil education data.
normalize
:bool
, optional- Whether to normalize the authorization amounts. Defaults to True.
allocator_kwargs
:dict
, optional- Parameters for the allocator. Defaults to {}.
sampling_kwargs
:dict
, optional- Parameters for the sampling mechanism. Defaults to {}.
Returns
pd.DataFrame
- Returns input dataframe with grant allocations under ground truth, sampling, and privacy randomization.
Expand source code Browse git
def titlei_funding( allocator: Allocator, inputs: pd.DataFrame, mechanism: Mechanism, sppe: pd.DataFrame, normalize: bool = True, allocator_kwargs: dict = {}, sampling_kwargs: dict = {}, **grants_kwargs ): """Allocate Title I funding. Args: allocator (Allocator): The allocator to use. inputs (pd.DataFrame): The necessary inputs for the allocator - usually the SAIPE - by district. mechanism (Mechanism): The randomization mechanism to use. sppe (pd.DataFrame): State per-pupil education data. normalize (bool, optional): Whether to normalize the authorization amounts. Defaults to True. allocator_kwargs (dict, optional): Parameters for the allocator. Defaults to {}. sampling_kwargs (dict, optional): Parameters for the sampling mechanism. Defaults to {}. Returns: pd.DataFrame: Returns input dataframe with grant allocations under ground truth, sampling, and privacy randomization. """ alloc = allocator( data( inputs, mechanism, sppe, sampling_kwargs=sampling_kwargs, **grants_kwargs ), **allocator_kwargs ) return alloc.allocations(normalize=normalize)
def titlei_grid(inputs: pandas.core.frame.DataFrame, mech: Mechanism, eps: list = [0.001, 0.027825594022071243, 0.774263682681127, 21.54434690031882, 599.4842503189409, 16681.005372000593, 464158.8833612772, 12915496.650148828, 359381366.3804626, 10000000000.0, 2.52], delta: list = [0.0], trials: int = 100, mech_kwargs: dict = {}, auth: bool = False, allocator_kwargs: dict = {}, sampling_kwargs: dict = {}, verbose: bool = True, print_results: list = [2.52, 0.1], plot_results: bool = False, noise_to_plot: str = 'children_eligible', alpha: float = 0.05, results: pandas.core.frame.DataFrame = None)
-
Run many trials of Title I process under different privacy parameters.
Args
inputs
:pd.DataFrame
- The necessary inputs for the allocator - usually the SAIPE - by district.
mech
:Mechanism
- The randomization mechanism to use.
eps
:list
, optional- Values of epsilon to try. Defaults to
list(np.logspace(-3, 10, num=10))+[2.52]
. delta
:list
, optional- Values of delta to try. Defaults to [0.0].
trials
:int
, optional- Number of trials to run for each parameter combo. Defaults to 100.
mech_kwargs
:dict
, optional- Parameters for the randomization mechanism. Defaults to {}.
auth
:bool
, optional- Whether to use the raw authorization amounts. Defaults to False.
allocator_kwargs
:dict
, optional- Parameters for the allocator. Defaults to {}.
sampling_kwargs
:dict
, optional- Parameters for the sampling mechanism. Defaults to {}.
verbose
:bool
, optional- Defaults to True.
print_results
:list
, optional- Which values of epsilon to print results for. Defaults to [2.52, 0.1].
plot_results
:bool
, optional- Whether to plot results. Defaults to False.
noise_to_plot
:str
, optional- Which variable to plot noise for. Defaults to children_eligible.
alpha
:float
, optional- Confidence level for confidence bands. Defaults to 0.05.
results
:pd.DataFrame
, optional- Pre-computed results to plot. If provided, will skip grid calculation. Defaults to None.
Returns
pd.DataFrame
- Results grid.
Expand source code Browse git
def titlei_grid( inputs: pd.DataFrame, mech: Mechanism, eps: list = list(np.logspace(-3, 10, num=10)) + [2.52], delta: list = [0.0], trials: int = 100, mech_kwargs: dict = {}, auth: bool = False, allocator_kwargs: dict = {}, sampling_kwargs: dict = {}, verbose: bool = True, print_results: list = [2.52, 0.1], plot_results: bool = False, noise_to_plot: str = "children_eligible", alpha: float = 0.05, results: pd.DataFrame = None ): """Run many trials of Title I process under different privacy parameters. Args: inputs (pd.DataFrame): The necessary inputs for the allocator - usually the SAIPE - by district. mech (Mechanism): The randomization mechanism to use. eps (list, optional): Values of epsilon to try. Defaults to `list(np.logspace(-3, 10, num=10))+[2.52]`. delta (list, optional): Values of delta to try. Defaults to [0.0]. trials (int, optional): Number of trials to run for each parameter combo. Defaults to 100. mech_kwargs (dict, optional): Parameters for the randomization mechanism. Defaults to {}. auth (bool, optional): Whether to use the raw authorization amounts. Defaults to False. allocator_kwargs (dict, optional): Parameters for the allocator. Defaults to {}. sampling_kwargs (dict, optional): Parameters for the sampling mechanism. Defaults to {}. verbose (bool, optional): Defaults to True. print_results (list, optional): Which values of epsilon to print results for. Defaults to [2.52, 0.1]. plot_results (bool, optional): Whether to plot results. Defaults to False. noise_to_plot (str, optional): Which variable to plot noise for. Defaults to children_eligible. alpha (float, optional): Confidence level for confidence bands. Defaults to 0.05. results (pd.DataFrame, optional): Pre-computed results to plot. If provided, will skip grid calculation. Defaults to None. Returns: pd.DataFrame: Results grid. """ if results is None: allocations = [] thresholder = allocator_kwargs.get('thresholder') if verbose: print(f"{len(eps)*len(delta)*trials} iters:") for _ in tqdm(range(trials), desc='trial', disable=(not verbose)): for d in tqdm(delta, desc='delta', leave=False, disable=True): for e in tqdm(eps, desc='eps', leave=False, disable=True): mechanism = mech(e, d, **mech_kwargs) sppe = get_sppe(os.path.join( config.root, "data/sppe18.xlsx" )) if thresholder is not None and isinstance( thresholder, PastThresholder ): thresholder.set_prior_estimates( mechanism, sppe, verbose=False ) allocations.append(titlei_funding( SonnenbergAuthorizer, inputs, mechanism, sppe, allocator_kwargs=allocator_kwargs, sampling_kwargs=sampling_kwargs, verbose=False, # too noisy for a grid search normalize=(not auth) )) results = pd.concat( allocations, axis=0, keys=itertools.product(range(trials), delta, eps), names=[ "trial", "delta", "epsilon" ] + list(allocations[-1].index.names) ) if print_results: prefixes = ["est", "dp", "dpest"] if plot_results: for e, alloc in results.groupby("epsilon"): if e in print_results: print(f"--- eps={e} ---") data_error = alloc[f"est_{noise_to_plot}"] \ - alloc[f"true_{noise_to_plot}"] dp_error = alloc[f"dpest_{noise_to_plot}"] \ - alloc[f"est_{noise_to_plot}"] s = 1 # plt.scatter( # np.log(alloc["true_children_eligible"]), data_error, # s, label="Data deviations" # ) # plt.scatter( # np.log(alloc["true_children_eligible"]), dp_error, # s, label="Privacy deviations" # ) df = pd.melt( pd.DataFrame({ "Privacy": dp_error, "Data": data_error, "Log count": np.log(alloc[f"true_{noise_to_plot}"]) }).reset_index(), id_vars=[ "trial", "State FIPS Code", "District ID", "Log count" ], value_vars=["Data", "Privacy"], var_name="Deviation", value_name="Noise" ) sns.scatterplot( data=df, x="Log count", y="Noise", hue="Deviation", s=s, alpha=0.5, palette=('#5D3A9B', "#E66100"), rasterized=True ) plt.legend() plt.xlabel("Log count") plt.ylabel("Noise") plt.savefig(os.path.join( config.root, f"plots/robustness/noise_poverty_eps={e}.pdf" if noise_to_plot == "children_eligible" else "plots/robustness/noise_poverty_eps={}_{}.pdf".format( e, noise_to_plot ) ), transparent=True, dpi=100) plt.show() # plt.scatter( # alloc["true_children_eligible"], # data_error/alloc["true_children_eligible"], # s, label="data" # ) # plt.scatter( # alloc["true_children_eligible"], # dp_error/alloc["true_children_eligible"], # s, label="dp" # ) # plt.legend() # plt.xlabel("# children in poverty") # plt.ylabel("Noise per child in poverty") # plt.show() r = ( min( data_error.quantile(0.05), dp_error.quantile(0.05) ), max( data_error.quantile(0.95), dp_error.quantile(0.95) ) ) alpha = 0.25 sns.histplot( df, x="Noise", hue="Deviation", bins=75, binrange=r, alpha=alpha, # kde=True, # stat="density", # linewidth=0, # kde_kws=dict( # clip=range # ), palette=('#5D3A9B', "#E66100") # label="Data deviations", # color='#5D3A9B' ) # plt.legend() plt.xlabel("Added noise") plt.savefig( os.path.join( config.root, f"plots/robustness/noise_eps={e}.pdf" if noise_to_plot == "children_eligible" else "plots/robustness/noise_eps={}_{}.pdf".format( e, noise_to_plot ) ), transparent=True ) plt.show() for prefix in prefixes: print("##", prefix) for e, alloc in results.groupby("epsilon"): for grant_type in ( "basic", "concentration", "targeted", "total" ): if e in print_results: error = alloc[f"{prefix}_grant_{grant_type}"] \ - alloc[f"true_grant_{grant_type}"] print(f"## {grant_type} grants - eps={e} ##") misalloc_statistics( error, allocations=alloc, grant_type=grant_type ) if plot_results and len(eps) > 2: trials = len(np.unique( results.index.get_level_values("trial") )) grant_type = "total" error = results[f"{prefix}_grant_{grant_type}"] \ - results[f"true_grant_{grant_type}"] print(error.shape) rmse = np.sqrt(error.pow(2).groupby( ["epsilon", "trial"] ).mean()) print(rmse) mse = { 'mean': rmse.groupby("epsilon").mean(), 'lower': rmse.groupby("epsilon").quantile(alpha/2), 'upper': rmse.groupby("epsilon").quantile(1-alpha/2) } eps = mse['mean'].index plt.plot(eps, mse['mean']) plt.fill_between( eps, mse['lower'], mse['upper'], color='gray', alpha=0.25 ) ax = plt.gca() ax.set_xscale('log') plt.xlabel("Epsilon") plt.ylabel( f"Avg. RMSE in {grant_type} grants over {trials} trials" ) plt.savefig( os.path.join( config.root, "plots/robustness/eps_sensitivity_frontier.pdf" ), dpi=300 ) plt.show() return results
Classes
class Baseline (name: str, baseline: Union[str, pandas.core.frame.DataFrame] = 'cached', year: int = 2021, trials: int = 1000, eps: List[float] = [0.1], delta: List[float] = [0.0], no_match_true: bool = False)
-
Baseline condition only.
Initialize experiment and set baseline.
Args
name
:str
- Experiment name.
baseline
:Union[str, pd.DataFrame]
, optional- What baseline (control) results to use. Defaults to "cached".
year
:int
, optional- What year to get inputs for. Defaults to 2021.
trials
:int
, optional- Number of trials to run for each condition. Defaults to 1000.
eps
:List[float]
, optional- Values of epsilon to try. Defaults to [0.1].
delta
:List[float]
, optional- Values of delta to try. Defaults to [0.0].
no_match_true
:bool
, optional- Whether to use different baselines for policies that modify the allocation procedure. Default is to use the same baseline for both.
Expand source code Browse git
class Baseline(Experiment): """Baseline condition only. """ def _get_treatments(self): return { 'baseline': self.baseline }
Ancestors
Inherited members
class Budget (name: str, baseline: Union[str, pandas.core.frame.DataFrame] = 'cached', year: int = 2021, trials: int = 1000, eps: List[float] = [0.1], delta: List[float] = [0.0], no_match_true: bool = False)
-
Budget experiments.
Initialize experiment and set baseline.
Args
name
:str
- Experiment name.
baseline
:Union[str, pd.DataFrame]
, optional- What baseline (control) results to use. Defaults to "cached".
year
:int
, optional- What year to get inputs for. Defaults to 2021.
trials
:int
, optional- Number of trials to run for each condition. Defaults to 1000.
eps
:List[float]
, optional- Values of epsilon to try. Defaults to [0.1].
delta
:List[float]
, optional- Values of delta to try. Defaults to [0.0].
no_match_true
:bool
, optional- Whether to use different baselines for policies that modify the allocation procedure. Default is to use the same baseline for both.
Expand source code Browse git
class Budget(Experiment): """Budget experiments. """ def _get_treatments(self): if len(self.eps) > 0: print( "[WARN] Using first value of epsilon " "{} for budget calcs.".format( self.baseline.index.get_level_values("epsilon")[0] ) ) e = self.eps[0] alpha = 0.05 test = self.baseline.loc[pd.IndexSlice[:, 0.0, e, :, :], :] budgets = { "Biden proposal": 2e10 } for name, prefixes in { "+ loss": ("dpest", "true"), # "+ marginal loss (DP)": ("dpest", "est"), # "+ loss (data error)": ("est", "true") }.items(): error = test[f"{prefixes[1]}_grant_total"] \ - test[f"{prefixes[0]}_grant_total"] err_grouped = error.groupby( ["State FIPS Code", "District ID"] ) exp_error = err_grouped.mean() budgets[name] = exp_error[exp_error < 0].abs().sum() if prefixes == ("dpest", "true"): # how much money to remove alpha quantile loss? quantile = err_grouped.quantile(alpha) budgets[f"+ {(alpha)*100}% quant. loss"] = \ quantile[quantile < 0].abs().sum() # how much money to cover all misallocated dollars? # budgets[f"+ exp. misalloc."] = exp_error.abs().sum() budgets = {"{} (${:.1e})".format(k, v): v for k, v in budgets.items()} usual_budget = self.saipe["official_total_alloc"]\ .groupby(["State FIPS Code", "District ID"]).first().sum() print("baseline budget: {:.1e}".format(usual_budget)) print(budgets) treatments = { name: titlei_grid( self.saipe, Laplace, eps=[e], delta=self.delta, trials=self.trials, print_results=False, allocator_kwargs={ 'appropriation': round(budget + usual_budget, 2) } ) for name, budget in budgets.items() } if self.match_true: match_true(self.baseline, list(treatments.values())) treatments["FY2019 appropriation (baseline)"] = test return treatments
Ancestors
Inherited members
class Epsilon (name: str, baseline: Union[str, pandas.core.frame.DataFrame] = 'cached', year: int = 2021, trials: int = 1000, eps: List[float] = [0.1], delta: List[float] = [0.0], no_match_true: bool = False)
-
Privacy parameter experiments.
Initialize experiment and set baseline.
Args
name
:str
- Experiment name.
baseline
:Union[str, pd.DataFrame]
, optional- What baseline (control) results to use. Defaults to "cached".
year
:int
, optional- What year to get inputs for. Defaults to 2021.
trials
:int
, optional- Number of trials to run for each condition. Defaults to 1000.
eps
:List[float]
, optional- Values of epsilon to try. Defaults to [0.1].
delta
:List[float]
, optional- Values of delta to try. Defaults to [0.0].
no_match_true
:bool
, optional- Whether to use different baselines for policies that modify the allocation procedure. Default is to use the same baseline for both.
Expand source code Browse git
class Epsilon(Experiment): """Privacy parameter experiments. """ def _get_treatments(self): return { e: titlei_grid( self.saipe, Laplace, eps=[e], delta=self.delta, trials=self.trials, print_results=False ) if e not in self.eps else self.baseline.loc[pd.IndexSlice[:, 0.0, e, :, :], :].copy() for e in [1e-3, 1e-2, 1e-1, 1, 10, 30] } def discrimination_join(self): return super().discrimination_join(epsilon=None) def plot(self): return super().plot(epsilon=None)
Ancestors
Inherited members
class Experiment (name: str, baseline: Union[str, pandas.core.frame.DataFrame] = 'cached', year: int = 2021, trials: int = 1000, eps: List[float] = [0.1], delta: List[float] = [0.0], no_match_true: bool = False)
-
Handles running and setting experiments.
Initialize experiment and set baseline.
Args
name
:str
- Experiment name.
baseline
:Union[str, pd.DataFrame]
, optional- What baseline (control) results to use. Defaults to "cached".
year
:int
, optional- What year to get inputs for. Defaults to 2021.
trials
:int
, optional- Number of trials to run for each condition. Defaults to 1000.
eps
:List[float]
, optional- Values of epsilon to try. Defaults to [0.1].
delta
:List[float]
, optional- Values of delta to try. Defaults to [0.0].
no_match_true
:bool
, optional- Whether to use different baselines for policies that modify the allocation procedure. Default is to use the same baseline for both.
Expand source code Browse git
class Experiment: """Handles running and setting experiments. """ def __init__( self, name: str, baseline: Union[str, pd.DataFrame] = "cached", year: int = 2021, trials: int = 1000, eps: List[float] = [0.1], delta: List[float] = [0.0], no_match_true: bool = False ): """Initialize experiment and set baseline. Args: name (str): Experiment name. baseline (Union[str, pd.DataFrame], optional): What baseline (control) results to use. Defaults to "cached". year (int, optional): What year to get inputs for. Defaults to 2021. trials (int, optional): Number of trials to run for each condition. Defaults to 1000. eps (List[float], optional): Values of epsilon to try. Defaults to [0.1]. delta (List[float], optional): Values of delta to try. Defaults to [0.0]. no_match_true (bool, optional): Whether to use different baselines for policies that modify the allocation procedure. Default is to use the same baseline for both. """ self.name = name if not no_match_true else f"{name}_unmatched" self.trials = trials self.eps = eps self.delta = delta self.match_true = not no_match_true self.saipe = get_inputs(year) if str(baseline) == "cached": print("Using cached baseline...") try: self.baseline = load_treatments("baseline")['baseline'] except FileNotFoundError: print("[WARN] Could not find cached baseline, generating.") self._generate_baseline() elif baseline is None: print("Generating baseline...") self._generate_baseline() else: print("Using given baseline...") self.baseline = baseline def _generate_baseline(self): """Generate the control condition. Defaults to normal Laplace mechanism with no special allocation parameters. """ self.baseline = titlei_grid( self.saipe, Laplace, eps=self.eps, delta=self.delta, trials=self.trials, print_results=False, allocator_kwargs={'verbose': False} ) save_treatments({'baseline': self.baseline}, "baseline") discrimination_treatments_join("baseline", include_moes=True) def run(self): """Run the experiment, save results, and create joined covariate file. """ treatments = self._get_treatments() save_treatments(treatments, self.name) self.discrimination_join() def _get_treatments(self): """Generate the treatment results. """ raise NotImplementedError def discrimination_join(self, **join_kwargs): """Join the results to demographic covariates and save. """ discrimination_treatments_join(self.name, **join_kwargs) def plot(self, **kwargs): """Print and plot the results, comparing treatments. """ treatments = load_treatments(self.name) compare_treatments(treatments, experiment_name=self.name, **kwargs) @staticmethod def get_experiment( name: str, *args, **kwargs ): """Factory class for generating experiment object from name. Args: name (str): Experiment name to fetch. Options include "baseline", "hold_harmless", "post_processing", "thresholds", "epsilon", "moving_average", "budget", "sampling", "vary_total_children". Returns: Experiment: The experiment object. """ experiments = { 'baseline': Baseline, 'hold_harmless': HoldHarmless, 'post_processing': PostProcessing, 'thresholds': Thresholds, 'epsilon': Epsilon, 'moving_average': MovingAverage, 'budget': Budget, 'sampling': Sampling, 'vary_total_children': VaryTotalChildren } Exp = experiments.get(name) if Exp is None: raise ValueError(f"{name} not a supported experiment name.") return Exp(name, *args, **kwargs)
Subclasses
- Baseline
- Budget
- Epsilon
- HoldHarmless
- MovingAverage
- PostProcessing
- Sampling
- Thresholds
- VaryTotalChildren
Static methods
def get_experiment(name: str, *args, **kwargs)
-
Factory class for generating experiment object from name.
Args
name
:str
- Experiment name to fetch. Options include "baseline", "hold_harmless", "post_processing", "thresholds", "epsilon", "moving_average", "budget", "sampling", "vary_total_children".
Returns
Experiment
- The experiment object.
Expand source code Browse git
@staticmethod def get_experiment( name: str, *args, **kwargs ): """Factory class for generating experiment object from name. Args: name (str): Experiment name to fetch. Options include "baseline", "hold_harmless", "post_processing", "thresholds", "epsilon", "moving_average", "budget", "sampling", "vary_total_children". Returns: Experiment: The experiment object. """ experiments = { 'baseline': Baseline, 'hold_harmless': HoldHarmless, 'post_processing': PostProcessing, 'thresholds': Thresholds, 'epsilon': Epsilon, 'moving_average': MovingAverage, 'budget': Budget, 'sampling': Sampling, 'vary_total_children': VaryTotalChildren } Exp = experiments.get(name) if Exp is None: raise ValueError(f"{name} not a supported experiment name.") return Exp(name, *args, **kwargs)
Methods
def discrimination_join(self, **join_kwargs)
-
Join the results to demographic covariates and save.
Expand source code Browse git
def discrimination_join(self, **join_kwargs): """Join the results to demographic covariates and save. """ discrimination_treatments_join(self.name, **join_kwargs)
def plot(self, **kwargs)
-
Print and plot the results, comparing treatments.
Expand source code Browse git
def plot(self, **kwargs): """Print and plot the results, comparing treatments. """ treatments = load_treatments(self.name) compare_treatments(treatments, experiment_name=self.name, **kwargs)
def run(self)
-
Run the experiment, save results, and create joined covariate file.
Expand source code Browse git
def run(self): """Run the experiment, save results, and create joined covariate file. """ treatments = self._get_treatments() save_treatments(treatments, self.name) self.discrimination_join()
class HoldHarmless (name: str, baseline: Union[str, pandas.core.frame.DataFrame] = 'cached', year: int = 2021, trials: int = 1000, eps: List[float] = [0.1], delta: List[float] = [0.0], no_match_true: bool = False)
-
Hold harmless and state minimum provisions.
Initialize experiment and set baseline.
Args
name
:str
- Experiment name.
baseline
:Union[str, pd.DataFrame]
, optional- What baseline (control) results to use. Defaults to "cached".
year
:int
, optional- What year to get inputs for. Defaults to 2021.
trials
:int
, optional- Number of trials to run for each condition. Defaults to 1000.
eps
:List[float]
, optional- Values of epsilon to try. Defaults to [0.1].
delta
:List[float]
, optional- Values of delta to try. Defaults to [0.0].
no_match_true
:bool
, optional- Whether to use different baselines for policies that modify the allocation procedure. Default is to use the same baseline for both.
Expand source code Browse git
class HoldHarmless(Experiment): """Hold harmless and state minimum provisions. """ def _get_treatments(self): hold_harmless = titlei_grid( self.saipe, Laplace, eps=self.eps, delta=self.delta, trials=self.trials, print_results=False, allocator_kwargs={'hold_harmless': True} ) state_minimum = titlei_grid( self.saipe, Laplace, eps=self.eps, delta=self.delta, trials=self.trials, print_results=False, allocator_kwargs={'state_minimum': True} ) both = titlei_grid( self.saipe, Laplace, eps=self.eps, delta=self.delta, trials=self.trials, print_results=False, allocator_kwargs={ 'hold_harmless': True, 'state_minimum': True } ) if self.match_true: # ground truths are the same match_true(self.baseline, [hold_harmless, state_minimum, both]) # save treatments to file for later return { 'No provisions (baseline)': self.baseline, 'Hold harmless only': hold_harmless, 'State minimum only': state_minimum, 'Both provisions': both }
Ancestors
Inherited members
class MovingAverage (name: str, truth: str = 'average', **kwargs)
-
Moving average inputs, with multiple lag conditions.
Args
truth
:str
, optional- Baseline strategy to use. "average" uses the 5-year average as the baseline. Defaults to "average".
Raises
Exception
- description
Expand source code Browse git
class MovingAverage(Experiment): """Moving average inputs, with multiple lag conditions. """ def __init__( self, name: str, truth: str = "average", **kwargs ): """ Args: truth (str, optional): Baseline strategy to use. "average" uses the 5-year average as the baseline. Defaults to "average". Raises: Exception: _description_ """ if truth != "average": raise Exception( "Not supporting truth values other than `average`." ) self.truth = truth super().__init__(f"{name}_truth={truth}", **kwargs) def _get_treatments(self): single_year = self.baseline averaged = [ titlei_grid( get_inputs(2021, avg_lag=i+1), Laplace, eps=self.eps, delta=self.delta, trials=self.trials, print_results=False ) for i in range(4) ] if self.match_true: match_true(averaged[-1], [single_year] + averaged[:-1]) return { 'Lag 0': single_year, **{f"Lag {i+1}": a for i, a in enumerate(averaged)} }
Ancestors
Inherited members
class PostProcessing (name: str, baseline: Union[str, pandas.core.frame.DataFrame] = 'cached', year: int = 2021, trials: int = 1000, eps: List[float] = [0.1], delta: List[float] = [0.0], no_match_true: bool = False)
-
Post-processing variations: none, clipping, and clipping + rounding.
Initialize experiment and set baseline.
Args
name
:str
- Experiment name.
baseline
:Union[str, pd.DataFrame]
, optional- What baseline (control) results to use. Defaults to "cached".
year
:int
, optional- What year to get inputs for. Defaults to 2021.
trials
:int
, optional- Number of trials to run for each condition. Defaults to 1000.
eps
:List[float]
, optional- Values of epsilon to try. Defaults to [0.1].
delta
:List[float]
, optional- Values of delta to try. Defaults to [0.0].
no_match_true
:bool
, optional- Whether to use different baselines for policies that modify the allocation procedure. Default is to use the same baseline for both.
Expand source code Browse git
class PostProcessing(Experiment): """Post-processing variations: none, clipping, and clipping + rounding. """ def _get_treatments(self): none = titlei_grid( self.saipe, Laplace, eps=self.eps, delta=self.delta, trials=self.trials, mech_kwargs=dict( clip=False, round=False ), print_results=False ) clipping = self.baseline rounding = titlei_grid( self.saipe, Laplace, eps=self.eps, delta=self.delta, trials=self.trials, mech_kwargs=dict( clip=True, round=True ), print_results=False ) if self.match_true: match_true(self.baseline, [none, rounding]) return { 'None': none, 'Clipping (baseline)': clipping, 'Clipping + Rounding': rounding }
Ancestors
Inherited members
class Sampling (name: str, baseline: Union[str, pandas.core.frame.DataFrame] = 'cached', year: int = 2021, trials: int = 1000, eps: List[float] = [0.1], delta: List[float] = [0.0], no_match_true: bool = False)
-
Sampling error experiments.
Initialize experiment and set baseline.
Args
name
:str
- Experiment name.
baseline
:Union[str, pd.DataFrame]
, optional- What baseline (control) results to use. Defaults to "cached".
year
:int
, optional- What year to get inputs for. Defaults to 2021.
trials
:int
, optional- Number of trials to run for each condition. Defaults to 1000.
eps
:List[float]
, optional- Values of epsilon to try. Defaults to [0.1].
delta
:List[float]
, optional- Values of delta to try. Defaults to [0.0].
no_match_true
:bool
, optional- Whether to use different baselines for policies that modify the allocation procedure. Default is to use the same baseline for both.
Expand source code Browse git
class Sampling(Experiment): """Sampling error experiments. """ def _get_treatments(self): gaussian = { f"Gaussian ({m})": titlei_grid( self.saipe, Laplace, eps=self.eps, delta=self.delta, trials=self.trials, print_results=False, sampling_kwargs=dict( multiplier=m, distribution="gaussian" ) ) for m in [0.5, 0.75, 1, 1.5] } laplace = { f"Laplace ({m})": titlei_grid( self.saipe, Laplace, eps=self.eps, delta=self.delta, trials=self.trials, print_results=False, sampling_kwargs=dict( multiplier=m, distribution="laplace" ) ) for m in [0.5, 1, 1.5] } return { **gaussian, **laplace }
Ancestors
Inherited members
class Thresholds (name: str, baseline: Union[str, pandas.core.frame.DataFrame] = 'cached', year: int = 2021, trials: int = 1000, eps: List[float] = [0.1], delta: List[float] = [0.0], no_match_true: bool = False)
-
Thresholding experiments.
Initialize experiment and set baseline.
Args
name
:str
- Experiment name.
baseline
:Union[str, pd.DataFrame]
, optional- What baseline (control) results to use. Defaults to "cached".
year
:int
, optional- What year to get inputs for. Defaults to 2021.
trials
:int
, optional- Number of trials to run for each condition. Defaults to 1000.
eps
:List[float]
, optional- Values of epsilon to try. Defaults to [0.1].
delta
:List[float]
, optional- Values of delta to try. Defaults to [0.0].
no_match_true
:bool
, optional- Whether to use different baselines for policies that modify the allocation procedure. Default is to use the same baseline for both.
Expand source code Browse git
class Thresholds(Experiment): """Thresholding experiments. """ def _get_treatments(self): # hard threshold hard = self.baseline # use average averaged = titlei_grid( self.saipe, Laplace, eps=self.eps, delta=self.delta, trials=self.trials, allocator_kwargs={ 'thresholder': AverageThresholder(2021, 4) }, print_results=False ) # must be ineligible 2x in a row repeat2 = titlei_grid( self.saipe, Laplace, eps=self.eps, delta=self.delta, trials=self.trials, allocator_kwargs={ 'thresholder': RepeatThresholder(2021, 1) }, print_results=False ) # must be ineligible 3x in a row repeat3 = titlei_grid( self.saipe, Laplace, eps=self.eps, delta=self.delta, trials=self.trials, allocator_kwargs={ 'thresholder': RepeatThresholder(2021, 2) }, print_results=False ) # no threshold none = titlei_grid( self.saipe, Laplace, eps=self.eps, delta=self.delta, trials=self.trials, allocator_kwargs={ 'thresholder': DummyThresholder() }, print_results=False ) # moe moe_01 = titlei_grid( self.saipe, Laplace, eps=self.eps, delta=self.delta, trials=self.trials, allocator_kwargs={ 'thresholder': MOEThresholder(alpha=0.1) }, print_results=False ) if self.match_true: match_true(hard, [averaged, repeat2, repeat3, none, moe_01]) return { 'None': none, 'Hard (baseline)': hard, 'Averaged': averaged, 'Repeated years (2)': repeat2, 'Repeated years (3)': repeat3, 'Margin of error (90%)': moe_01 }
Ancestors
Inherited members
class VaryTotalChildren (name: str, baseline: Union[str, pandas.core.frame.DataFrame] = 'cached', year: int = 2021, trials: int = 1000, eps: List[float] = [0.1], delta: List[float] = [0.0], no_match_true: bool = False)
-
Robustness check: what happens if the # of total children is not fixed?
Initialize experiment and set baseline.
Args
name
:str
- Experiment name.
baseline
:Union[str, pd.DataFrame]
, optional- What baseline (control) results to use. Defaults to "cached".
year
:int
, optional- What year to get inputs for. Defaults to 2021.
trials
:int
, optional- Number of trials to run for each condition. Defaults to 1000.
eps
:List[float]
, optional- Values of epsilon to try. Defaults to [0.1].
delta
:List[float]
, optional- Values of delta to try. Defaults to [0.0].
no_match_true
:bool
, optional- Whether to use different baselines for policies that modify the allocation procedure. Default is to use the same baseline for both.
Expand source code Browse git
class VaryTotalChildren(Experiment): """Robustness check: what happens if the # of total children is not fixed? """ def _get_treatments(self): varies = titlei_grid( self.saipe, Laplace, eps=self.eps, delta=self.delta, trials=self.trials, print_results=False, mech_kwargs=dict( noise_total=True ), sampling_kwargs=dict( noise_total=True ) ) return { 'Total number of children is fixed (baseline)': self.baseline, 'Total number of children varies': varies }
Ancestors
Inherited members