Path: blob/master/1_Supervised_Machine_Learning/Week 3. Classification/lab_utils_common.py
2826 views
"""1lab_utils_common2contains common routines and variable definitions3used by all the labs in this week.4by contrast, specific, large plotting routines will be in separate files5and are generally imported into the week where they are used.6those files will import this file7"""8import copy9import math10import numpy as np11import matplotlib.pyplot as plt12from matplotlib.patches import FancyArrowPatch13from ipywidgets import Output1415np.set_printoptions(precision=2)1617dlc = dict(dlblue = '#0096ff', dlorange = '#FF9300', dldarkred='#C00000', dlmagenta='#FF40FF', dlpurple='#7030A0')18dlblue = '#0096ff'; dlorange = '#FF9300'; dldarkred='#C00000'; dlmagenta='#FF40FF'; dlpurple='#7030A0'19dlcolors = [dlblue, dlorange, dldarkred, dlmagenta, dlpurple]20plt.style.use('./deeplearning.mplstyle')2122def sigmoid(z):23"""24Compute the sigmoid of z2526Parameters27----------28z : array_like29A scalar or numpy array of any size.3031Returns32-------33g : array_like34sigmoid(z)35"""36z = np.clip( z, -500, 500 ) # protect against overflow37g = 1.0/(1.0+np.exp(-z))3839return g4041##########################################################42# Regression Routines43##########################################################4445def predict_logistic(X, w, b):46""" performs prediction """47return sigmoid(X @ w + b)4849def predict_linear(X, w, b):50""" performs prediction """51return X @ w + b5253def compute_cost_logistic(X, y, w, b, lambda_=0, safe=False):54"""55Computes cost using logistic loss, non-matrix version5657Args:58X (ndarray): Shape (m,n) matrix of examples with n features59y (ndarray): Shape (m,) target values60w (ndarray): Shape (n,) parameters for prediction61b (scalar): parameter for prediction62lambda_ : (scalar, float) Controls amount of regularization, 0 = no regularization63safe : (boolean) True-selects under/overflow safe algorithm64Returns:65cost (scalar): cost66"""6768m,n = X.shape69cost = 0.070for i in range(m):71z_i = np.dot(X[i],w) + b #(n,)(n,) or (n,) ()72if safe: #avoids overflows73cost += -(y[i] * z_i ) + log_1pexp(z_i)74else:75f_wb_i = sigmoid(z_i) #(n,)76cost += -y[i] * np.log(f_wb_i) - (1 - y[i]) * np.log(1 - f_wb_i) # scalar77cost = cost/m7879reg_cost = 080if lambda_ != 0:81for j in range(n):82reg_cost += (w[j]**2) # scalar83reg_cost = (lambda_/(2*m))*reg_cost8485return cost + reg_cost868788def log_1pexp(x, maximum=20):89''' approximate log(1+exp^x)90https://stats.stackexchange.com/questions/475589/numerical-computation-of-cross-entropy-in-practice91Args:92x : (ndarray Shape (n,1) or (n,) input93out : (ndarray Shape matches x output ~= np.log(1+exp(x))94'''9596out = np.zeros_like(x,dtype=float)97i = x <= maximum98ni = np.logical_not(i)99100out[i] = np.log(1 + np.exp(x[i]))101out[ni] = x[ni]102return out103104105def compute_cost_matrix(X, y, w, b, logistic=False, lambda_=0, safe=True):106"""107Computes the cost using using matrices108Args:109X : (ndarray, Shape (m,n)) matrix of examples110y : (ndarray Shape (m,) or (m,1)) target value of each example111w : (ndarray Shape (n,) or (n,1)) Values of parameter(s) of the model112b : (scalar ) Values of parameter of the model113verbose : (Boolean) If true, print out intermediate value f_wb114Returns:115total_cost: (scalar) cost116"""117m = X.shape[0]118y = y.reshape(-1,1) # ensure 2D119w = w.reshape(-1,1) # ensure 2D120if logistic:121if safe: #safe from overflow122z = X @ w + b #(m,n)(n,1)=(m,1)123cost = -(y * z) + log_1pexp(z)124cost = np.sum(cost)/m # (scalar)125else:126f = sigmoid(X @ w + b) # (m,n)(n,1) = (m,1)127cost = (1/m)*(np.dot(-y.T, np.log(f)) - np.dot((1-y).T, np.log(1-f))) # (1,m)(m,1) = (1,1)128cost = cost[0,0] # scalar129else:130f = X @ w + b # (m,n)(n,1) = (m,1)131cost = (1/(2*m)) * np.sum((f - y)**2) # scalar132133reg_cost = (lambda_/(2*m)) * np.sum(w**2) # scalar134135total_cost = cost + reg_cost # scalar136137return total_cost # scalar138139def compute_gradient_matrix(X, y, w, b, logistic=False, lambda_=0):140"""141Computes the gradient using matrices142143Args:144X : (ndarray, Shape (m,n)) matrix of examples145y : (ndarray Shape (m,) or (m,1)) target value of each example146w : (ndarray Shape (n,) or (n,1)) Values of parameters of the model147b : (scalar ) Values of parameter of the model148logistic: (boolean) linear if false, logistic if true149lambda_: (float) applies regularization if non-zero150Returns151dj_dw: (array_like Shape (n,1)) The gradient of the cost w.r.t. the parameters w152dj_db: (scalar) The gradient of the cost w.r.t. the parameter b153"""154m = X.shape[0]155y = y.reshape(-1,1) # ensure 2D156w = w.reshape(-1,1) # ensure 2D157158f_wb = sigmoid( X @ w + b ) if logistic else X @ w + b # (m,n)(n,1) = (m,1)159err = f_wb - y # (m,1)160dj_dw = (1/m) * (X.T @ err) # (n,m)(m,1) = (n,1)161dj_db = (1/m) * np.sum(err) # scalar162163dj_dw += (lambda_/m) * w # regularize # (n,1)164165return dj_db, dj_dw # scalar, (n,1)166167def gradient_descent(X, y, w_in, b_in, alpha, num_iters, logistic=False, lambda_=0, verbose=True):168"""169Performs batch gradient descent to learn theta. Updates theta by taking170num_iters gradient steps with learning rate alpha171172Args:173X (ndarray): Shape (m,n) matrix of examples174y (ndarray): Shape (m,) or (m,1) target value of each example175w_in (ndarray): Shape (n,) or (n,1) Initial values of parameters of the model176b_in (scalar): Initial value of parameter of the model177logistic: (boolean) linear if false, logistic if true178lambda_: (float) applies regularization if non-zero179alpha (float): Learning rate180num_iters (int): number of iterations to run gradient descent181182Returns:183w (ndarray): Shape (n,) or (n,1) Updated values of parameters; matches incoming shape184b (scalar): Updated value of parameter185"""186# An array to store cost J and w's at each iteration primarily for graphing later187J_history = []188w = copy.deepcopy(w_in) #avoid modifying global w within function189b = b_in190w = w.reshape(-1,1) #prep for matrix operations191y = y.reshape(-1,1)192193for i in range(num_iters):194195# Calculate the gradient and update the parameters196dj_db,dj_dw = compute_gradient_matrix(X, y, w, b, logistic, lambda_)197198# Update Parameters using w, b, alpha and gradient199w = w - alpha * dj_dw200b = b - alpha * dj_db201202# Save cost J at each iteration203if i<100000: # prevent resource exhaustion204J_history.append( compute_cost_matrix(X, y, w, b, logistic, lambda_) )205206# Print cost every at intervals 10 times or as many iterations if < 10207if i% math.ceil(num_iters / 10) == 0:208if verbose: print(f"Iteration {i:4d}: Cost {J_history[-1]} ")209210return w.reshape(w_in.shape), b, J_history #return final w,b and J history for graphing211212def zscore_normalize_features(X):213"""214computes X, zcore normalized by column215216Args:217X (ndarray): Shape (m,n) input data, m examples, n features218219Returns:220X_norm (ndarray): Shape (m,n) input normalized by column221mu (ndarray): Shape (n,) mean of each feature222sigma (ndarray): Shape (n,) standard deviation of each feature223"""224# find the mean of each column/feature225mu = np.mean(X, axis=0) # mu will have shape (n,)226# find the standard deviation of each column/feature227sigma = np.std(X, axis=0) # sigma will have shape (n,)228# element-wise, subtract mu for that column from each example, divide by std for that column229X_norm = (X - mu) / sigma230231return X_norm, mu, sigma232233#check our work234#from sklearn.preprocessing import scale235#scale(X_orig, axis=0, with_mean=True, with_std=True, copy=True)236237######################################################238# Common Plotting Routines239######################################################240241242def plot_data(X, y, ax, pos_label="y=1", neg_label="y=0", s=80, loc='best' ):243""" plots logistic data with two axis """244# Find Indices of Positive and Negative Examples245pos = y == 1246neg = y == 0247pos = pos.reshape(-1,) #work with 1D or 1D y vectors248neg = neg.reshape(-1,)249250# Plot examples251ax.scatter(X[pos, 0], X[pos, 1], marker='x', s=s, c = 'red', label=pos_label)252ax.scatter(X[neg, 0], X[neg, 1], marker='o', s=s, label=neg_label, facecolors='none', edgecolors=dlblue, lw=3)253ax.legend(loc=loc)254255ax.figure.canvas.toolbar_visible = False256ax.figure.canvas.header_visible = False257ax.figure.canvas.footer_visible = False258259def plt_tumor_data(x, y, ax):260""" plots tumor data on one axis """261pos = y == 1262neg = y == 0263264ax.scatter(x[pos], y[pos], marker='x', s=80, c = 'red', label="malignant")265ax.scatter(x[neg], y[neg], marker='o', s=100, label="benign", facecolors='none', edgecolors=dlblue,lw=3)266ax.set_ylim(-0.175,1.1)267ax.set_ylabel('y')268ax.set_xlabel('Tumor Size')269ax.set_title("Logistic Regression on Categorical Data")270271ax.figure.canvas.toolbar_visible = False272ax.figure.canvas.header_visible = False273ax.figure.canvas.footer_visible = False274275# Draws a threshold at 0.5276def draw_vthresh(ax,x):277""" draws a threshold """278ylim = ax.get_ylim()279xlim = ax.get_xlim()280ax.fill_between([xlim[0], x], [ylim[1], ylim[1]], alpha=0.2, color=dlblue)281ax.fill_between([x, xlim[1]], [ylim[1], ylim[1]], alpha=0.2, color=dldarkred)282ax.annotate("z >= 0", xy= [x,0.5], xycoords='data',283xytext=[30,5],textcoords='offset points')284d = FancyArrowPatch(285posA=(x, 0.5), posB=(x+3, 0.5), color=dldarkred,286arrowstyle='simple, head_width=5, head_length=10, tail_width=0.0',287)288ax.add_artist(d)289ax.annotate("z < 0", xy= [x,0.5], xycoords='data',290xytext=[-50,5],textcoords='offset points', ha='left')291f = FancyArrowPatch(292posA=(x, 0.5), posB=(x-3, 0.5), color=dlblue,293arrowstyle='simple, head_width=5, head_length=10, tail_width=0.0',294)295ax.add_artist(f)296297298