📚 The CoCalc Library - books, templates and other resources
License: OTHER
#other strats.1# TODO: UBC strat, epsilon-greedy23import scipy.stats as stats4import numpy as np56rand = np.random.rand7beta = stats.beta8910class GeneralBanditStrat(object):1112"""13Implements a online, learning strategy to solve14the Multi-Armed Bandit problem.1516parameters:17bandits: a Bandit class with .pull method18choice_function: accepts a self argument (which gives access to all the variables), and19returns and int between 0 and n-120methods:21sample_bandits(n): sample and train on n pulls.2223attributes:24N: the cumulative number of samples25choices: the historical choices as a (N,) array26bb_score: the historical score as a (N,) array2728"""2930def __init__(self, bandits, choice_function):3132self.bandits = bandits33n_bandits = len(self.bandits)34self.wins = np.zeros(n_bandits)35self.trials = np.zeros(n_bandits)36self.N = 037self.choices = []38self.score = []39self.choice_function = choice_function4041def sample_bandits(self, n=1):4243score = np.zeros(n)44choices = np.zeros(n)4546for k in range(n):47#sample from the bandits's priors, and select the largest sample48choice = self.choice_function(self)4950#sample the chosen bandit51result = self.bandits.pull(choice)5253#update priors and score54self.wins[choice] += result55self.trials[choice] += 156score[k] = result57self.N += 158choices[k] = choice5960self.score = np.r_[self.score, score]61self.choices = np.r_[self.choices, choices]62return636465def bayesian_bandit_choice(self):66return np.argmax(np.random.beta(1 + self.wins, 1 + self.trials - self.wins))6768def max_mean(self):69"""pick the bandit with the current best observed proportion of winning """70return np.argmax(self.wins / (self.trials +1))7172def lower_credible_choice( self ):73"""pick the bandit with the best LOWER BOUND. See chapter 5"""74def lb(a,b):75return a/(a+b) - 1.65*np.sqrt((a*b)/( (a+b)**2*(a+b+1)))76a = self.wins + 177b = self.trials - self.wins + 178return np.argmax(lb(a,b))7980def upper_credible_choice(self):81"""pick the bandit with the best LOWER BOUND. See chapter 5"""82def lb(a,b):83return a/(a+b) + 1.65*np.sqrt((a*b)/((a+b)**2*(a+b+1)))84a = self.wins + 185b = self.trials - self.wins + 186return np.argmax(lb(a,b))8788def random_choice(self):89return np.random.randint(0, len(self.wins))909192def ucb_bayes(self):93C = 094n = 1000095alpha =1 - 1./((self.N+1))96return np.argmax(beta.ppf(alpha,971 + self.wins,981 + self.trials - self.wins))99100101102103class Bandits(object):104"""105This class represents N bandits machines.106107parameters:108p_array: a (n,) Numpy array of probabilities >0, <1.109110methods:111pull( i ): return the results, 0 or 1, of pulling112the ith bandit.113"""114def __init__(self, p_array):115self.p = p_array116self.optimal = np.argmax(p_array)117118def pull(self, i):119#i is which arm to pull120return rand() < self.p[i]121122def __len__(self):123return len(self.p)124125126