Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
jxareas
GitHub Repository: jxareas/Machine-Learning-Notebooks
Path: blob/master/1_Supervised_Machine_Learning/Week 3. Classification/lab_utils_common.py
2826 views
1
"""
2
lab_utils_common
3
contains common routines and variable definitions
4
used by all the labs in this week.
5
by contrast, specific, large plotting routines will be in separate files
6
and are generally imported into the week where they are used.
7
those files will import this file
8
"""
9
import copy
10
import math
11
import numpy as np
12
import matplotlib.pyplot as plt
13
from matplotlib.patches import FancyArrowPatch
14
from ipywidgets import Output
15
16
np.set_printoptions(precision=2)
17
18
dlc = dict(dlblue = '#0096ff', dlorange = '#FF9300', dldarkred='#C00000', dlmagenta='#FF40FF', dlpurple='#7030A0')
19
dlblue = '#0096ff'; dlorange = '#FF9300'; dldarkred='#C00000'; dlmagenta='#FF40FF'; dlpurple='#7030A0'
20
dlcolors = [dlblue, dlorange, dldarkred, dlmagenta, dlpurple]
21
plt.style.use('./deeplearning.mplstyle')
22
23
def sigmoid(z):
24
"""
25
Compute the sigmoid of z
26
27
Parameters
28
----------
29
z : array_like
30
A scalar or numpy array of any size.
31
32
Returns
33
-------
34
g : array_like
35
sigmoid(z)
36
"""
37
z = np.clip( z, -500, 500 ) # protect against overflow
38
g = 1.0/(1.0+np.exp(-z))
39
40
return g
41
42
##########################################################
43
# Regression Routines
44
##########################################################
45
46
def predict_logistic(X, w, b):
47
""" performs prediction """
48
return sigmoid(X @ w + b)
49
50
def predict_linear(X, w, b):
51
""" performs prediction """
52
return X @ w + b
53
54
def compute_cost_logistic(X, y, w, b, lambda_=0, safe=False):
55
"""
56
Computes cost using logistic loss, non-matrix version
57
58
Args:
59
X (ndarray): Shape (m,n) matrix of examples with n features
60
y (ndarray): Shape (m,) target values
61
w (ndarray): Shape (n,) parameters for prediction
62
b (scalar): parameter for prediction
63
lambda_ : (scalar, float) Controls amount of regularization, 0 = no regularization
64
safe : (boolean) True-selects under/overflow safe algorithm
65
Returns:
66
cost (scalar): cost
67
"""
68
69
m,n = X.shape
70
cost = 0.0
71
for i in range(m):
72
z_i = np.dot(X[i],w) + b #(n,)(n,) or (n,) ()
73
if safe: #avoids overflows
74
cost += -(y[i] * z_i ) + log_1pexp(z_i)
75
else:
76
f_wb_i = sigmoid(z_i) #(n,)
77
cost += -y[i] * np.log(f_wb_i) - (1 - y[i]) * np.log(1 - f_wb_i) # scalar
78
cost = cost/m
79
80
reg_cost = 0
81
if lambda_ != 0:
82
for j in range(n):
83
reg_cost += (w[j]**2) # scalar
84
reg_cost = (lambda_/(2*m))*reg_cost
85
86
return cost + reg_cost
87
88
89
def log_1pexp(x, maximum=20):
90
''' approximate log(1+exp^x)
91
https://stats.stackexchange.com/questions/475589/numerical-computation-of-cross-entropy-in-practice
92
Args:
93
x : (ndarray Shape (n,1) or (n,) input
94
out : (ndarray Shape matches x output ~= np.log(1+exp(x))
95
'''
96
97
out = np.zeros_like(x,dtype=float)
98
i = x <= maximum
99
ni = np.logical_not(i)
100
101
out[i] = np.log(1 + np.exp(x[i]))
102
out[ni] = x[ni]
103
return out
104
105
106
def compute_cost_matrix(X, y, w, b, logistic=False, lambda_=0, safe=True):
107
"""
108
Computes the cost using using matrices
109
Args:
110
X : (ndarray, Shape (m,n)) matrix of examples
111
y : (ndarray Shape (m,) or (m,1)) target value of each example
112
w : (ndarray Shape (n,) or (n,1)) Values of parameter(s) of the model
113
b : (scalar ) Values of parameter of the model
114
verbose : (Boolean) If true, print out intermediate value f_wb
115
Returns:
116
total_cost: (scalar) cost
117
"""
118
m = X.shape[0]
119
y = y.reshape(-1,1) # ensure 2D
120
w = w.reshape(-1,1) # ensure 2D
121
if logistic:
122
if safe: #safe from overflow
123
z = X @ w + b #(m,n)(n,1)=(m,1)
124
cost = -(y * z) + log_1pexp(z)
125
cost = np.sum(cost)/m # (scalar)
126
else:
127
f = sigmoid(X @ w + b) # (m,n)(n,1) = (m,1)
128
cost = (1/m)*(np.dot(-y.T, np.log(f)) - np.dot((1-y).T, np.log(1-f))) # (1,m)(m,1) = (1,1)
129
cost = cost[0,0] # scalar
130
else:
131
f = X @ w + b # (m,n)(n,1) = (m,1)
132
cost = (1/(2*m)) * np.sum((f - y)**2) # scalar
133
134
reg_cost = (lambda_/(2*m)) * np.sum(w**2) # scalar
135
136
total_cost = cost + reg_cost # scalar
137
138
return total_cost # scalar
139
140
def compute_gradient_matrix(X, y, w, b, logistic=False, lambda_=0):
141
"""
142
Computes the gradient using matrices
143
144
Args:
145
X : (ndarray, Shape (m,n)) matrix of examples
146
y : (ndarray Shape (m,) or (m,1)) target value of each example
147
w : (ndarray Shape (n,) or (n,1)) Values of parameters of the model
148
b : (scalar ) Values of parameter of the model
149
logistic: (boolean) linear if false, logistic if true
150
lambda_: (float) applies regularization if non-zero
151
Returns
152
dj_dw: (array_like Shape (n,1)) The gradient of the cost w.r.t. the parameters w
153
dj_db: (scalar) The gradient of the cost w.r.t. the parameter b
154
"""
155
m = X.shape[0]
156
y = y.reshape(-1,1) # ensure 2D
157
w = w.reshape(-1,1) # ensure 2D
158
159
f_wb = sigmoid( X @ w + b ) if logistic else X @ w + b # (m,n)(n,1) = (m,1)
160
err = f_wb - y # (m,1)
161
dj_dw = (1/m) * (X.T @ err) # (n,m)(m,1) = (n,1)
162
dj_db = (1/m) * np.sum(err) # scalar
163
164
dj_dw += (lambda_/m) * w # regularize # (n,1)
165
166
return dj_db, dj_dw # scalar, (n,1)
167
168
def gradient_descent(X, y, w_in, b_in, alpha, num_iters, logistic=False, lambda_=0, verbose=True):
169
"""
170
Performs batch gradient descent to learn theta. Updates theta by taking
171
num_iters gradient steps with learning rate alpha
172
173
Args:
174
X (ndarray): Shape (m,n) matrix of examples
175
y (ndarray): Shape (m,) or (m,1) target value of each example
176
w_in (ndarray): Shape (n,) or (n,1) Initial values of parameters of the model
177
b_in (scalar): Initial value of parameter of the model
178
logistic: (boolean) linear if false, logistic if true
179
lambda_: (float) applies regularization if non-zero
180
alpha (float): Learning rate
181
num_iters (int): number of iterations to run gradient descent
182
183
Returns:
184
w (ndarray): Shape (n,) or (n,1) Updated values of parameters; matches incoming shape
185
b (scalar): Updated value of parameter
186
"""
187
# An array to store cost J and w's at each iteration primarily for graphing later
188
J_history = []
189
w = copy.deepcopy(w_in) #avoid modifying global w within function
190
b = b_in
191
w = w.reshape(-1,1) #prep for matrix operations
192
y = y.reshape(-1,1)
193
194
for i in range(num_iters):
195
196
# Calculate the gradient and update the parameters
197
dj_db,dj_dw = compute_gradient_matrix(X, y, w, b, logistic, lambda_)
198
199
# Update Parameters using w, b, alpha and gradient
200
w = w - alpha * dj_dw
201
b = b - alpha * dj_db
202
203
# Save cost J at each iteration
204
if i<100000: # prevent resource exhaustion
205
J_history.append( compute_cost_matrix(X, y, w, b, logistic, lambda_) )
206
207
# Print cost every at intervals 10 times or as many iterations if < 10
208
if i% math.ceil(num_iters / 10) == 0:
209
if verbose: print(f"Iteration {i:4d}: Cost {J_history[-1]} ")
210
211
return w.reshape(w_in.shape), b, J_history #return final w,b and J history for graphing
212
213
def zscore_normalize_features(X):
214
"""
215
computes X, zcore normalized by column
216
217
Args:
218
X (ndarray): Shape (m,n) input data, m examples, n features
219
220
Returns:
221
X_norm (ndarray): Shape (m,n) input normalized by column
222
mu (ndarray): Shape (n,) mean of each feature
223
sigma (ndarray): Shape (n,) standard deviation of each feature
224
"""
225
# find the mean of each column/feature
226
mu = np.mean(X, axis=0) # mu will have shape (n,)
227
# find the standard deviation of each column/feature
228
sigma = np.std(X, axis=0) # sigma will have shape (n,)
229
# element-wise, subtract mu for that column from each example, divide by std for that column
230
X_norm = (X - mu) / sigma
231
232
return X_norm, mu, sigma
233
234
#check our work
235
#from sklearn.preprocessing import scale
236
#scale(X_orig, axis=0, with_mean=True, with_std=True, copy=True)
237
238
######################################################
239
# Common Plotting Routines
240
######################################################
241
242
243
def plot_data(X, y, ax, pos_label="y=1", neg_label="y=0", s=80, loc='best' ):
244
""" plots logistic data with two axis """
245
# Find Indices of Positive and Negative Examples
246
pos = y == 1
247
neg = y == 0
248
pos = pos.reshape(-1,) #work with 1D or 1D y vectors
249
neg = neg.reshape(-1,)
250
251
# Plot examples
252
ax.scatter(X[pos, 0], X[pos, 1], marker='x', s=s, c = 'red', label=pos_label)
253
ax.scatter(X[neg, 0], X[neg, 1], marker='o', s=s, label=neg_label, facecolors='none', edgecolors=dlblue, lw=3)
254
ax.legend(loc=loc)
255
256
ax.figure.canvas.toolbar_visible = False
257
ax.figure.canvas.header_visible = False
258
ax.figure.canvas.footer_visible = False
259
260
def plt_tumor_data(x, y, ax):
261
""" plots tumor data on one axis """
262
pos = y == 1
263
neg = y == 0
264
265
ax.scatter(x[pos], y[pos], marker='x', s=80, c = 'red', label="malignant")
266
ax.scatter(x[neg], y[neg], marker='o', s=100, label="benign", facecolors='none', edgecolors=dlblue,lw=3)
267
ax.set_ylim(-0.175,1.1)
268
ax.set_ylabel('y')
269
ax.set_xlabel('Tumor Size')
270
ax.set_title("Logistic Regression on Categorical Data")
271
272
ax.figure.canvas.toolbar_visible = False
273
ax.figure.canvas.header_visible = False
274
ax.figure.canvas.footer_visible = False
275
276
# Draws a threshold at 0.5
277
def draw_vthresh(ax,x):
278
""" draws a threshold """
279
ylim = ax.get_ylim()
280
xlim = ax.get_xlim()
281
ax.fill_between([xlim[0], x], [ylim[1], ylim[1]], alpha=0.2, color=dlblue)
282
ax.fill_between([x, xlim[1]], [ylim[1], ylim[1]], alpha=0.2, color=dldarkred)
283
ax.annotate("z >= 0", xy= [x,0.5], xycoords='data',
284
xytext=[30,5],textcoords='offset points')
285
d = FancyArrowPatch(
286
posA=(x, 0.5), posB=(x+3, 0.5), color=dldarkred,
287
arrowstyle='simple, head_width=5, head_length=10, tail_width=0.0',
288
)
289
ax.add_artist(d)
290
ax.annotate("z < 0", xy= [x,0.5], xycoords='data',
291
xytext=[-50,5],textcoords='offset points', ha='left')
292
f = FancyArrowPatch(
293
posA=(x, 0.5), posB=(x-3, 0.5), color=dlblue,
294
arrowstyle='simple, head_width=5, head_length=10, tail_width=0.0',
295
)
296
ax.add_artist(f)
297
298