Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
Download

📚 The CoCalc Library - books, templates and other resources

132928 views
License: OTHER
1
from sklearn.decomposition import PCA
2
import matplotlib.pyplot as plt
3
import numpy as np
4
5
from joblib import Memory
6
7
memory = Memory(cachedir="cache")
8
9
10
def plot_pca_illustration():
11
rnd = np.random.RandomState(5)
12
X_ = rnd.normal(size=(300, 2))
13
X_blob = np.dot(X_, rnd.normal(size=(2, 2))) + rnd.normal(size=2)
14
15
pca = PCA()
16
pca.fit(X_blob)
17
X_pca = pca.transform(X_blob)
18
19
S = X_pca.std(axis=0)
20
21
fig, axes = plt.subplots(2, 2, figsize=(10, 10))
22
axes = axes.ravel()
23
24
axes[0].set_title("Original data")
25
axes[0].scatter(X_blob[:, 0], X_blob[:, 1], c=X_pca[:, 0], linewidths=0,
26
s=60, cmap='viridis')
27
axes[0].set_xlabel("feature 1")
28
axes[0].set_ylabel("feature 2")
29
axes[0].arrow(pca.mean_[0], pca.mean_[1], S[0] * pca.components_[0, 0],
30
S[0] * pca.components_[0, 1], width=.1, head_width=.3,
31
color='k')
32
axes[0].arrow(pca.mean_[0], pca.mean_[1], S[1] * pca.components_[1, 0],
33
S[1] * pca.components_[1, 1], width=.1, head_width=.3,
34
color='k')
35
axes[0].text(-1.5, -.5, "Component 2", size=14)
36
axes[0].text(-4, -4, "Component 1", size=14)
37
axes[0].set_aspect('equal')
38
39
axes[1].set_title("Transformed data")
40
axes[1].scatter(X_pca[:, 0], X_pca[:, 1], c=X_pca[:, 0], linewidths=0,
41
s=60, cmap='viridis')
42
axes[1].set_xlabel("First principal component")
43
axes[1].set_ylabel("Second principal component")
44
axes[1].set_aspect('equal')
45
axes[1].set_ylim(-8, 8)
46
47
pca = PCA(n_components=1)
48
pca.fit(X_blob)
49
X_inverse = pca.inverse_transform(pca.transform(X_blob))
50
51
axes[2].set_title("Transformed data w/ second component dropped")
52
axes[2].scatter(X_pca[:, 0], np.zeros(X_pca.shape[0]), c=X_pca[:, 0],
53
linewidths=0, s=60, cmap='viridis')
54
axes[2].set_xlabel("First principal component")
55
axes[2].set_aspect('equal')
56
axes[2].set_ylim(-8, 8)
57
58
axes[3].set_title("Back-rotation using only first component")
59
axes[3].scatter(X_inverse[:, 0], X_inverse[:, 1], c=X_pca[:, 0],
60
linewidths=0, s=60, cmap='viridis')
61
axes[3].set_xlabel("feature 1")
62
axes[3].set_ylabel("feature 2")
63
axes[3].set_aspect('equal')
64
axes[3].set_xlim(-8, 4)
65
axes[3].set_ylim(-8, 4)
66
67
68
def plot_pca_whitening():
69
rnd = np.random.RandomState(5)
70
X_ = rnd.normal(size=(300, 2))
71
X_blob = np.dot(X_, rnd.normal(size=(2, 2))) + rnd.normal(size=2)
72
73
pca = PCA(whiten=True)
74
pca.fit(X_blob)
75
X_pca = pca.transform(X_blob)
76
77
fig, axes = plt.subplots(1, 2, figsize=(10, 10))
78
axes = axes.ravel()
79
80
axes[0].set_title("Original data")
81
axes[0].scatter(X_blob[:, 0], X_blob[:, 1], c=X_pca[:, 0], linewidths=0, s=60, cmap='viridis')
82
axes[0].set_xlabel("feature 1")
83
axes[0].set_ylabel("feature 2")
84
axes[0].set_aspect('equal')
85
86
axes[1].set_title("Whitened data")
87
axes[1].scatter(X_pca[:, 0], X_pca[:, 1], c=X_pca[:, 0], linewidths=0, s=60, cmap='viridis')
88
axes[1].set_xlabel("First principal component")
89
axes[1].set_ylabel("Second principal component")
90
axes[1].set_aspect('equal')
91
axes[1].set_xlim(-3, 4)
92
93
94
@memory.cache
95
def pca_faces(X_train, X_test):
96
# copy and pasted from nmf. refactor?
97
# Build NMF models with 10, 50, 100, 500 components
98
# this list will hold the back-transformd test-data
99
reduced_images = []
100
for n_components in [10, 50, 100, 500]:
101
# build the NMF model
102
pca = PCA(n_components=n_components)
103
pca.fit(X_train)
104
# transform the test data (afterwards has n_components many dimensions)
105
X_test_pca = pca.transform(X_test)
106
# back-transform the transformed test-data
107
# (afterwards it's in the original space again)
108
X_test_back = pca.inverse_transform(X_test_pca)
109
reduced_images.append(X_test_back)
110
return reduced_images
111
112
113
def plot_pca_faces(X_train, X_test, image_shape):
114
reduced_images = pca_faces(X_train, X_test)
115
116
# plot the first three images in the test set:
117
fix, axes = plt.subplots(3, 5, figsize=(15, 12),
118
subplot_kw={'xticks': (), 'yticks': ()})
119
for i, ax in enumerate(axes):
120
# plot original image
121
ax[0].imshow(X_test[i].reshape(image_shape),
122
vmin=0, vmax=1)
123
# plot the four back-transformed images
124
for a, X_test_back in zip(ax[1:], reduced_images):
125
a.imshow(X_test_back[i].reshape(image_shape), vmin=0, vmax=1)
126
127
# label the top row
128
axes[0, 0].set_title("original image")
129
for ax, n_components in zip(axes[0, 1:], [10, 50, 100, 500]):
130
ax.set_title("%d components" % n_components)
131
132