📚 The CoCalc Library - books, templates and other resources
License: OTHER
from sklearn.decomposition import PCA1import matplotlib.pyplot as plt2import numpy as np34from joblib import Memory56memory = Memory(cachedir="cache")789def plot_pca_illustration():10rnd = np.random.RandomState(5)11X_ = rnd.normal(size=(300, 2))12X_blob = np.dot(X_, rnd.normal(size=(2, 2))) + rnd.normal(size=2)1314pca = PCA()15pca.fit(X_blob)16X_pca = pca.transform(X_blob)1718S = X_pca.std(axis=0)1920fig, axes = plt.subplots(2, 2, figsize=(10, 10))21axes = axes.ravel()2223axes[0].set_title("Original data")24axes[0].scatter(X_blob[:, 0], X_blob[:, 1], c=X_pca[:, 0], linewidths=0,25s=60, cmap='viridis')26axes[0].set_xlabel("feature 1")27axes[0].set_ylabel("feature 2")28axes[0].arrow(pca.mean_[0], pca.mean_[1], S[0] * pca.components_[0, 0],29S[0] * pca.components_[0, 1], width=.1, head_width=.3,30color='k')31axes[0].arrow(pca.mean_[0], pca.mean_[1], S[1] * pca.components_[1, 0],32S[1] * pca.components_[1, 1], width=.1, head_width=.3,33color='k')34axes[0].text(-1.5, -.5, "Component 2", size=14)35axes[0].text(-4, -4, "Component 1", size=14)36axes[0].set_aspect('equal')3738axes[1].set_title("Transformed data")39axes[1].scatter(X_pca[:, 0], X_pca[:, 1], c=X_pca[:, 0], linewidths=0,40s=60, cmap='viridis')41axes[1].set_xlabel("First principal component")42axes[1].set_ylabel("Second principal component")43axes[1].set_aspect('equal')44axes[1].set_ylim(-8, 8)4546pca = PCA(n_components=1)47pca.fit(X_blob)48X_inverse = pca.inverse_transform(pca.transform(X_blob))4950axes[2].set_title("Transformed data w/ second component dropped")51axes[2].scatter(X_pca[:, 0], np.zeros(X_pca.shape[0]), c=X_pca[:, 0],52linewidths=0, s=60, cmap='viridis')53axes[2].set_xlabel("First principal component")54axes[2].set_aspect('equal')55axes[2].set_ylim(-8, 8)5657axes[3].set_title("Back-rotation using only first component")58axes[3].scatter(X_inverse[:, 0], X_inverse[:, 1], c=X_pca[:, 0],59linewidths=0, s=60, cmap='viridis')60axes[3].set_xlabel("feature 1")61axes[3].set_ylabel("feature 2")62axes[3].set_aspect('equal')63axes[3].set_xlim(-8, 4)64axes[3].set_ylim(-8, 4)656667def plot_pca_whitening():68rnd = np.random.RandomState(5)69X_ = rnd.normal(size=(300, 2))70X_blob = np.dot(X_, rnd.normal(size=(2, 2))) + rnd.normal(size=2)7172pca = PCA(whiten=True)73pca.fit(X_blob)74X_pca = pca.transform(X_blob)7576fig, axes = plt.subplots(1, 2, figsize=(10, 10))77axes = axes.ravel()7879axes[0].set_title("Original data")80axes[0].scatter(X_blob[:, 0], X_blob[:, 1], c=X_pca[:, 0], linewidths=0, s=60, cmap='viridis')81axes[0].set_xlabel("feature 1")82axes[0].set_ylabel("feature 2")83axes[0].set_aspect('equal')8485axes[1].set_title("Whitened data")86axes[1].scatter(X_pca[:, 0], X_pca[:, 1], c=X_pca[:, 0], linewidths=0, s=60, cmap='viridis')87axes[1].set_xlabel("First principal component")88axes[1].set_ylabel("Second principal component")89axes[1].set_aspect('equal')90axes[1].set_xlim(-3, 4)919293@memory.cache94def pca_faces(X_train, X_test):95# copy and pasted from nmf. refactor?96# Build NMF models with 10, 50, 100, 500 components97# this list will hold the back-transformd test-data98reduced_images = []99for n_components in [10, 50, 100, 500]:100# build the NMF model101pca = PCA(n_components=n_components)102pca.fit(X_train)103# transform the test data (afterwards has n_components many dimensions)104X_test_pca = pca.transform(X_test)105# back-transform the transformed test-data106# (afterwards it's in the original space again)107X_test_back = pca.inverse_transform(X_test_pca)108reduced_images.append(X_test_back)109return reduced_images110111112def plot_pca_faces(X_train, X_test, image_shape):113reduced_images = pca_faces(X_train, X_test)114115# plot the first three images in the test set:116fix, axes = plt.subplots(3, 5, figsize=(15, 12),117subplot_kw={'xticks': (), 'yticks': ()})118for i, ax in enumerate(axes):119# plot original image120ax[0].imshow(X_test[i].reshape(image_shape),121vmin=0, vmax=1)122# plot the four back-transformed images123for a, X_test_back in zip(ax[1:], reduced_images):124a.imshow(X_test_back[i].reshape(image_shape), vmin=0, vmax=1)125126# label the top row127axes[0, 0].set_title("original image")128for ax, n_components in zip(axes[0, 1:], [10, 50, 100, 500]):129ax.set_title("%d components" % n_components)130131132