from copy import deepcopy
import numpy as np
import pandas as pd
from binney.data.data import LRSpecs
from binney.model.model import BinomialModel
from anml.bootstrap.bootstrap import Bootstrap
class BinneyBootstrap(Bootstrap):
def __init__(self, model: BinomialModel, df: pd.DataFrame, **kwargs):
super().__init__(model=model, **kwargs)
self.df = df
self.lr_specs = None
def attach_specs(self, lr_specs: LRSpecs):
self.lr_specs = deepcopy(lr_specs)
def detach_specs(self):
self.lr_specs = None
def _process(self, **kwargs):
raise NotImplementedError()
[docs]class BinomialBootstrap(BinneyBootstrap):
"""
Non-parametric bootstrap implementation for the BinomRun
modeling process.
"""
def __init__(self, **kwargs):
super().__init__(**kwargs)
@staticmethod
def _sample(df: pd.DataFrame, col_obs: str, col_total: str) -> pd.DataFrame:
"""
Creates a new data frame by sampling from the binomial distribution
with p = k / n and n = n from the original data, where n is the sample
size and k is the number of successes.
Returns
-------
data frame with re-sampled observations
"""
sample_df = df.copy()
p = sample_df[col_obs] / sample_df[col_total]
sample_df[col_obs] = np.random.binomial(n=sample_df[col_total], p=p)
return sample_df
def _process(self, fit_callable, **kwargs):
new_df = self._sample(
df=self.df,
col_obs=self.lr_specs.data_specs.col_obs,
col_total=self.lr_specs.data_specs.col_total
)
self.lr_specs.configure_data(df=new_df)
self.model.detach_specs()
self.model.attach_specs(self.lr_specs)
fit_callable(solver=self.solver, data=self.lr_specs.data, **kwargs)
[docs]class BernoulliBootstrap(BinneyBootstrap):
"""
Non-parametric bootstrap implementation for a dataset with 1's and 0's
in a logistic regression modeling process.
"""
def __init__(self, **kwargs):
super().__init__(**kwargs)
@staticmethod
def _sample(df: pd.DataFrame) -> pd.DataFrame:
"""
Creates a new data frame by sampling from the binomial distribution
with p = k / n and n = n from the original data, where n is the sample
size and k is the number of successes.
Returns
-------
data frame with re-sampled observations
"""
sample_df = df.copy()
sample_df = sample_df.sample(n=len(sample_df), replace=True)
return sample_df
def _process(self, fit_callable, **kwargs):
new_df = self._sample(df=self.df)
self.lr_specs.configure_data(df=new_df)
self.model.detach_specs()
self.model.attach_specs(self.lr_specs)
fit_callable(solver=self.solver, data=self.lr_specs.data, **kwargs)
class BernoulliStratifiedBootstrap(BernoulliBootstrap):
"""
Non-parametric bootstrap implementation for the BinomRun
modeling process, but with stratified re-sampling for groups
when there is bernoulli data.
"""
def __init__(self, col_group, **kwargs):
super().__init__(**kwargs)
self.col_group = col_group
def _sample(self, df: pd.DataFrame) -> pd.DataFrame:
sample_df = df.copy()
sample_df = sample_df.groupby(
self.col_group, group_keys=False
).apply(lambda x: x.sample(len(x), replace=True))
return sample_df