CoCalc -- lab_utils

GitHub Repository: jxareas/Machine-Learning-Notebooks
Path: blob/master/1_Supervised_Machine_Learning/Week 3. Classification/lab_utils_common.py
²⁸²⁶ views
1
"""
2
lab_utils_common
3
   contains common routines and variable definitions
4
   used by all the labs in this week.
5
   by contrast, specific, large plotting routines will be in separate files
6
   and are generally imported into the week where they are used.
7
   those files will import this file
8
"""
9
import copy
10
import math
11
import numpy as np
12
import matplotlib.pyplot as plt
13
from matplotlib.patches import FancyArrowPatch
14
from ipywidgets import Output
15

16
np.set_printoptions(precision=2)
17

18
dlc = dict(dlblue = '#0096ff', dlorange = '#FF9300', dldarkred='#C00000', dlmagenta='#FF40FF', dlpurple='#7030A0')
19
dlblue = '#0096ff'; dlorange = '#FF9300'; dldarkred='#C00000'; dlmagenta='#FF40FF'; dlpurple='#7030A0'
20
dlcolors = [dlblue, dlorange, dldarkred, dlmagenta, dlpurple]
21
plt.style.use('./deeplearning.mplstyle')
22

23
def sigmoid(z):
24
    """
25
    Compute the sigmoid of z
26

27
    Parameters
28
    ----------
29
    z : array_like
30
        A scalar or numpy array of any size.
31

32
    Returns
33
    -------
34
     g : array_like
35
         sigmoid(z)
36
    """
37
    z = np.clip( z, -500, 500 )           # protect against overflow
38
    g = 1.0/(1.0+np.exp(-z))
39

40
    return g
41

42
##########################################################
43
# Regression Routines
44
##########################################################
45

46
def predict_logistic(X, w, b):
47
    """ performs prediction """
48
    return sigmoid(X @ w + b)
49

50
def predict_linear(X, w, b):
51
    """ performs prediction """
52
    return X @ w + b
53

54
def compute_cost_logistic(X, y, w, b, lambda_=0, safe=False):
55
    """
56
    Computes cost using logistic loss, non-matrix version
57

58
    Args:
59
      X (ndarray): Shape (m,n)  matrix of examples with n features
60
      y (ndarray): Shape (m,)   target values
61
      w (ndarray): Shape (n,)   parameters for prediction
62
      b (scalar):               parameter  for prediction
63
      lambda_ : (scalar, float) Controls amount of regularization, 0 = no regularization
64
      safe : (boolean)          True-selects under/overflow safe algorithm
65
    Returns:
66
      cost (scalar): cost
67
    """
68

69
    m,n = X.shape
70
    cost = 0.0
71
    for i in range(m):
72
        z_i    = np.dot(X[i],w) + b                                             #(n,)(n,) or (n,) ()
73
        if safe:  #avoids overflows
74
            cost += -(y[i] * z_i ) + log_1pexp(z_i)
75
        else:
76
            f_wb_i = sigmoid(z_i)                                                   #(n,)
77
            cost  += -y[i] * np.log(f_wb_i) - (1 - y[i]) * np.log(1 - f_wb_i)       # scalar
78
    cost = cost/m
79

80
    reg_cost = 0
81
    if lambda_ != 0:
82
        for j in range(n):
83
            reg_cost += (w[j]**2)                                               # scalar
84
        reg_cost = (lambda_/(2*m))*reg_cost
85

86
    return cost + reg_cost
87

88

89
def log_1pexp(x, maximum=20):
90
    ''' approximate log(1+exp^x)
91
        https://stats.stackexchange.com/questions/475589/numerical-computation-of-cross-entropy-in-practice
92
    Args:
93
    x   : (ndarray Shape (n,1) or (n,)  input
94
    out : (ndarray Shape matches x      output ~= np.log(1+exp(x))
95
    '''
96

97
    out  = np.zeros_like(x,dtype=float)
98
    i    = x <= maximum
99
    ni   = np.logical_not(i)
100

101
    out[i]  = np.log(1 + np.exp(x[i]))
102
    out[ni] = x[ni]
103
    return out
104

105

106
def compute_cost_matrix(X, y, w, b, logistic=False, lambda_=0, safe=True):
107
    """
108
    Computes the cost using  using matrices
109
    Args:
110
      X : (ndarray, Shape (m,n))          matrix of examples
111
      y : (ndarray  Shape (m,) or (m,1))  target value of each example
112
      w : (ndarray  Shape (n,) or (n,1))  Values of parameter(s) of the model
113
      b : (scalar )                       Values of parameter of the model
114
      verbose : (Boolean) If true, print out intermediate value f_wb
115
    Returns:
116
      total_cost: (scalar)                cost
117
    """
118
    m = X.shape[0]
119
    y = y.reshape(-1,1)             # ensure 2D
120
    w = w.reshape(-1,1)             # ensure 2D
121
    if logistic:
122
        if safe:  #safe from overflow
123
            z = X @ w + b                                                           #(m,n)(n,1)=(m,1)
124
            cost = -(y * z) + log_1pexp(z)
125
            cost = np.sum(cost)/m                                                   # (scalar)
126
        else:
127
            f    = sigmoid(X @ w + b)                                               # (m,n)(n,1) = (m,1)
128
            cost = (1/m)*(np.dot(-y.T, np.log(f)) - np.dot((1-y).T, np.log(1-f)))   # (1,m)(m,1) = (1,1)
129
            cost = cost[0,0]                                                        # scalar
130
    else:
131
        f    = X @ w + b                                                        # (m,n)(n,1) = (m,1)
132
        cost = (1/(2*m)) * np.sum((f - y)**2)                                   # scalar
133

134
    reg_cost = (lambda_/(2*m)) * np.sum(w**2)                                   # scalar
135

136
    total_cost = cost + reg_cost                                                # scalar
137

138
    return total_cost                                                           # scalar
139

140
def compute_gradient_matrix(X, y, w, b, logistic=False, lambda_=0):
141
    """
142
    Computes the gradient using matrices
143

144
    Args:
145
      X : (ndarray, Shape (m,n))          matrix of examples
146
      y : (ndarray  Shape (m,) or (m,1))  target value of each example
147
      w : (ndarray  Shape (n,) or (n,1))  Values of parameters of the model
148
      b : (scalar )                       Values of parameter of the model
149
      logistic: (boolean)                 linear if false, logistic if true
150
      lambda_:  (float)                   applies regularization if non-zero
151
    Returns
152
      dj_dw: (array_like Shape (n,1))     The gradient of the cost w.r.t. the parameters w
153
      dj_db: (scalar)                     The gradient of the cost w.r.t. the parameter b
154
    """
155
    m = X.shape[0]
156
    y = y.reshape(-1,1)             # ensure 2D
157
    w = w.reshape(-1,1)             # ensure 2D
158

159
    f_wb  = sigmoid( X @ w + b ) if logistic else  X @ w + b      # (m,n)(n,1) = (m,1)
160
    err   = f_wb - y                                              # (m,1)
161
    dj_dw = (1/m) * (X.T @ err)                                   # (n,m)(m,1) = (n,1)
162
    dj_db = (1/m) * np.sum(err)                                   # scalar
163

164
    dj_dw += (lambda_/m) * w        # regularize                  # (n,1)
165

166
    return dj_db, dj_dw                                           # scalar, (n,1)
167

168
def gradient_descent(X, y, w_in, b_in, alpha, num_iters, logistic=False, lambda_=0, verbose=True):
169
    """
170
    Performs batch gradient descent to learn theta. Updates theta by taking
171
    num_iters gradient steps with learning rate alpha
172

173
    Args:
174
      X (ndarray):    Shape (m,n)         matrix of examples
175
      y (ndarray):    Shape (m,) or (m,1) target value of each example
176
      w_in (ndarray): Shape (n,) or (n,1) Initial values of parameters of the model
177
      b_in (scalar):                      Initial value of parameter of the model
178
      logistic: (boolean)                 linear if false, logistic if true
179
      lambda_:  (float)                   applies regularization if non-zero
180
      alpha (float):                      Learning rate
181
      num_iters (int):                    number of iterations to run gradient descent
182

183
    Returns:
184
      w (ndarray): Shape (n,) or (n,1)    Updated values of parameters; matches incoming shape
185
      b (scalar):                         Updated value of parameter
186
    """
187
    # An array to store cost J and w's at each iteration primarily for graphing later
188
    J_history = []
189
    w = copy.deepcopy(w_in)  #avoid modifying global w within function
190
    b = b_in
191
    w = w.reshape(-1,1)      #prep for matrix operations
192
    y = y.reshape(-1,1)
193

194
    for i in range(num_iters):
195

196
        # Calculate the gradient and update the parameters
197
        dj_db,dj_dw = compute_gradient_matrix(X, y, w, b, logistic, lambda_)
198

199
        # Update Parameters using w, b, alpha and gradient
200
        w = w - alpha * dj_dw
201
        b = b - alpha * dj_db
202

203
        # Save cost J at each iteration
204
        if i<100000:      # prevent resource exhaustion
205
            J_history.append( compute_cost_matrix(X, y, w, b, logistic, lambda_) )
206

207
        # Print cost every at intervals 10 times or as many iterations if < 10
208
        if i% math.ceil(num_iters / 10) == 0:
209
            if verbose: print(f"Iteration {i:4d}: Cost {J_history[-1]}   ")
210

211
    return w.reshape(w_in.shape), b, J_history  #return final w,b and J history for graphing
212

213
def zscore_normalize_features(X):
214
    """
215
    computes  X, zcore normalized by column
216

217
    Args:
218
      X (ndarray): Shape (m,n) input data, m examples, n features
219

220
    Returns:
221
      X_norm (ndarray): Shape (m,n)  input normalized by column
222
      mu (ndarray):     Shape (n,)   mean of each feature
223
      sigma (ndarray):  Shape (n,)   standard deviation of each feature
224
    """
225
    # find the mean of each column/feature
226
    mu     = np.mean(X, axis=0)                 # mu will have shape (n,)
227
    # find the standard deviation of each column/feature
228
    sigma  = np.std(X, axis=0)                  # sigma will have shape (n,)
229
    # element-wise, subtract mu for that column from each example, divide by std for that column
230
    X_norm = (X - mu) / sigma
231

232
    return X_norm, mu, sigma
233

234
#check our work
235
#from sklearn.preprocessing import scale
236
#scale(X_orig, axis=0, with_mean=True, with_std=True, copy=True)
237

238
######################################################
239
# Common Plotting Routines
240
######################################################
241

242

243
def plot_data(X, y, ax, pos_label="y=1", neg_label="y=0", s=80, loc='best' ):
244
    """ plots logistic data with two axis """
245
    # Find Indices of Positive and Negative Examples
246
    pos = y == 1
247
    neg = y == 0
248
    pos = pos.reshape(-1,)  #work with 1D or 1D y vectors
249
    neg = neg.reshape(-1,)
250

251
    # Plot examples
252
    ax.scatter(X[pos, 0], X[pos, 1], marker='x', s=s, c = 'red', label=pos_label)
253
    ax.scatter(X[neg, 0], X[neg, 1], marker='o', s=s, label=neg_label, facecolors='none', edgecolors=dlblue, lw=3)
254
    ax.legend(loc=loc)
255

256
    ax.figure.canvas.toolbar_visible = False
257
    ax.figure.canvas.header_visible = False
258
    ax.figure.canvas.footer_visible = False
259

260
def plt_tumor_data(x, y, ax):
261
    """ plots tumor data on one axis """
262
    pos = y == 1
263
    neg = y == 0
264

265
    ax.scatter(x[pos], y[pos], marker='x', s=80, c = 'red', label="malignant")
266
    ax.scatter(x[neg], y[neg], marker='o', s=100, label="benign", facecolors='none', edgecolors=dlblue,lw=3)
267
    ax.set_ylim(-0.175,1.1)
268
    ax.set_ylabel('y')
269
    ax.set_xlabel('Tumor Size')
270
    ax.set_title("Logistic Regression on Categorical Data")
271

272
    ax.figure.canvas.toolbar_visible = False
273
    ax.figure.canvas.header_visible = False
274
    ax.figure.canvas.footer_visible = False
275

276
# Draws a threshold at 0.5
277
def draw_vthresh(ax,x):
278
    """ draws a threshold """
279
    ylim = ax.get_ylim()
280
    xlim = ax.get_xlim()
281
    ax.fill_between([xlim[0], x], [ylim[1], ylim[1]], alpha=0.2, color=dlblue)
282
    ax.fill_between([x, xlim[1]], [ylim[1], ylim[1]], alpha=0.2, color=dldarkred)
283
    ax.annotate("z >= 0", xy= [x,0.5], xycoords='data',
284
                xytext=[30,5],textcoords='offset points')
285
    d = FancyArrowPatch(
286
        posA=(x, 0.5), posB=(x+3, 0.5), color=dldarkred,
287
        arrowstyle='simple, head_width=5, head_length=10, tail_width=0.0',
288
    )
289
    ax.add_artist(d)
290
    ax.annotate("z < 0", xy= [x,0.5], xycoords='data',
291
                 xytext=[-50,5],textcoords='offset points', ha='left')
292
    f = FancyArrowPatch(
293
        posA=(x, 0.5), posB=(x-3, 0.5), color=dlblue,
294
        arrowstyle='simple, head_width=5, head_length=10, tail_width=0.0',
295
    )
296
    ax.add_artist(f)
297

298
Product

Resources

Company