Source code for binney.run.bootstrap

from copy import deepcopy

import numpy as np
import pandas as pd
from binney.data.data import LRSpecs
from binney.model.model import BinomialModel

from anml.bootstrap.bootstrap import Bootstrap


class BinneyBootstrap(Bootstrap):
    def __init__(self, model: BinomialModel, df: pd.DataFrame, **kwargs):

        super().__init__(model=model, **kwargs)
        self.df = df
        self.lr_specs = None

    def attach_specs(self, lr_specs: LRSpecs):
        self.lr_specs = deepcopy(lr_specs)

    def detach_specs(self):
        self.lr_specs = None

    def _process(self, **kwargs):
        raise NotImplementedError()


[docs]class BinomialBootstrap(BinneyBootstrap): """ Non-parametric bootstrap implementation for the BinomRun modeling process. """ def __init__(self, **kwargs): super().__init__(**kwargs) @staticmethod def _sample(df: pd.DataFrame, col_obs: str, col_total: str) -> pd.DataFrame: """ Creates a new data frame by sampling from the binomial distribution with p = k / n and n = n from the original data, where n is the sample size and k is the number of successes. Returns ------- data frame with re-sampled observations """ sample_df = df.copy() p = sample_df[col_obs] / sample_df[col_total] sample_df[col_obs] = np.random.binomial(n=sample_df[col_total], p=p) return sample_df def _process(self, fit_callable, **kwargs): new_df = self._sample( df=self.df, col_obs=self.lr_specs.data_specs.col_obs, col_total=self.lr_specs.data_specs.col_total ) self.lr_specs.configure_data(df=new_df) self.model.detach_specs() self.model.attach_specs(self.lr_specs) fit_callable(solver=self.solver, data=self.lr_specs.data, **kwargs)
[docs]class BernoulliBootstrap(BinneyBootstrap): """ Non-parametric bootstrap implementation for a dataset with 1's and 0's in a logistic regression modeling process. """ def __init__(self, **kwargs): super().__init__(**kwargs) @staticmethod def _sample(df: pd.DataFrame) -> pd.DataFrame: """ Creates a new data frame by sampling from the binomial distribution with p = k / n and n = n from the original data, where n is the sample size and k is the number of successes. Returns ------- data frame with re-sampled observations """ sample_df = df.copy() sample_df = sample_df.sample(n=len(sample_df), replace=True) return sample_df def _process(self, fit_callable, **kwargs): new_df = self._sample(df=self.df) self.lr_specs.configure_data(df=new_df) self.model.detach_specs() self.model.attach_specs(self.lr_specs) fit_callable(solver=self.solver, data=self.lr_specs.data, **kwargs)
class BernoulliStratifiedBootstrap(BernoulliBootstrap): """ Non-parametric bootstrap implementation for the BinomRun modeling process, but with stratified re-sampling for groups when there is bernoulli data. """ def __init__(self, col_group, **kwargs): super().__init__(**kwargs) self.col_group = col_group def _sample(self, df: pd.DataFrame) -> pd.DataFrame: sample_df = df.copy() sample_df = sample_df.groupby( self.col_group, group_keys=False ).apply(lambda x: x.sample(len(x), replace=True)) return sample_df