-
Notifications
You must be signed in to change notification settings - Fork 26
/
Copy pathDataGenerator.py
96 lines (76 loc) · 3.55 KB
/
DataGenerator.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
import numpy as np
class DataGenerator():
"""
Generate badit data.
Defaults:
K=2 arms
D=2 features/arm
"""
def __init__(self,K=2,D=2,feature_type='binary',reward_type='binary'):
self.D = D # dimension of the feature vector
self.K = K # number of bandits
self.reward_type = reward_type
self.feature_type = feature_type
self.means = np.random.normal(size=self.K)
self.stds = 1 + 2*np.random.rand(self.K)
# generate the weight vectors. initialize estimate of feature
# importance for each arm's d features
self.generate_weight_vectors()
def generate_weight_vectors(self,loc=0.0,scale=1.0):
self.W = np.random.normal(loc=loc,scale=scale,size=(self.K,self.D))
#self.W = np.ones((self.K,self.D))
def generate_samples(self,n=1000):
# the sample feature vectors X are only binary
if self.feature_type == 'binary':
X = np.random.randint(0,2,size=(n,self.D))
elif self.feature_type == 'integer':
X = np.random.randint(0,5,size=(n,self.D))
# the rewards are functions of the inner products of the
# feature vectors with (current) weight estimates
IP = np.dot(X,self.W.T)
# now get the rewards
if self.reward_type == 'binary':
R = ((np.sign(np.random.normal(self.means + IP,self.stds)) + 1) / 2).astype(int)
elif self.reward_type == 'positive':
#R = np.random.lognormal(self.means + IP,self.stds)
R = np.abs(np.random.normal(self.means + IP,self.stds))
elif self.reward_type == 'mixed':
R = (np.sign(np.random.normal(self.means + IP,self.stds)) + 1) / 2
R *= np.random.lognormal(self.means + IP,self.stds)
return X,R
# generate all bernoulli rewards ahead of time
def generate_bernoulli_bandit_data(self,num_samples):
#initialize parameter estimates
CTRs_that_generated_data = np.tile(np.random.rand(self.K),(num_samples,1))
#did the trial succeed?
true_rewards = np.random.rand(num_samples,self.K) < CTRs_that_generated_data
return true_rewards,CTRs_that_generated_data
# Thompson Sampling
# basic idea: samples from distribution and compares those values for the arms instead
# http://www.economics.uci.edu/~ivan/asmb.874.pdf
# http://camdp.com/blogs/multi-armed-bandits
def thompson_sampling(self,observed_data):
return np.argmax( np.random.beta(observed_data[:,0], observed_data[:,1]) )
# the bandit algorithm
def run_bandit_alg(self,true_rewards,CTRs_that_generated_data,choice_func):
num_samples,K = true_rewards.shape
observed_data = np.zeros((K,2))
# seed the estimated params
prior_a = 1. # aka successes
prior_b = 1. # aka failures
observed_data[:,0] += prior_a # allocating the initial conditions
observed_data[:,1] += prior_b
regret = np.zeros(num_samples)
for i in range(0,num_samples):
# pulling a lever & updating observed_data
this_choice = choice_func(observed_data)
# update parameters
if true_rewards[i,this_choice] == 1:
update_ind = 0
else:
update_ind = 1
observed_data[this_choice,update_ind] += 1
# updated expected regret
regret[i] = np.max(CTRs_that_generated_data[i,:]) - CTRs_that_generated_data[i,this_choice]
cum_regret = np.cumsum(regret)
return cum_regret