Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
Download

📚 The CoCalc Library - books, templates and other resources

132928 views
License: OTHER
1
import numpy as np
2
import matplotlib.pyplot as plt
3
import warnings
4
5
6
def plot_venn_diagram():
7
fig, ax = plt.subplots(subplot_kw=dict(frameon=False, xticks=[], yticks=[]))
8
ax.add_patch(plt.Circle((0.3, 0.3), 0.3, fc='red', alpha=0.5))
9
ax.add_patch(plt.Circle((0.6, 0.3), 0.3, fc='blue', alpha=0.5))
10
ax.add_patch(plt.Rectangle((-0.1, -0.1), 1.1, 0.8, fc='none', ec='black'))
11
ax.text(0.2, 0.3, '$x$', size=30, ha='center', va='center')
12
ax.text(0.7, 0.3, '$y$', size=30, ha='center', va='center')
13
ax.text(0.0, 0.6, '$I$', size=30)
14
ax.axis('equal')
15
16
17
def plot_example_decision_tree():
18
fig = plt.figure(figsize=(10, 4))
19
ax = fig.add_axes([0, 0, 0.8, 1], frameon=False, xticks=[], yticks=[])
20
ax.set_title('Example Decision Tree: Animal Classification', size=24)
21
22
def text(ax, x, y, t, size=20, **kwargs):
23
ax.text(x, y, t,
24
ha='center', va='center', size=size,
25
bbox=dict(boxstyle='round', ec='k', fc='w'), **kwargs)
26
27
text(ax, 0.5, 0.9, "How big is\nthe animal?", 20)
28
text(ax, 0.3, 0.6, "Does the animal\nhave horns?", 18)
29
text(ax, 0.7, 0.6, "Does the animal\nhave two legs?", 18)
30
text(ax, 0.12, 0.3, "Are the horns\nlonger than 10cm?", 14)
31
text(ax, 0.38, 0.3, "Is the animal\nwearing a collar?", 14)
32
text(ax, 0.62, 0.3, "Does the animal\nhave wings?", 14)
33
text(ax, 0.88, 0.3, "Does the animal\nhave a tail?", 14)
34
35
text(ax, 0.4, 0.75, "> 1m", 12, alpha=0.4)
36
text(ax, 0.6, 0.75, "< 1m", 12, alpha=0.4)
37
38
text(ax, 0.21, 0.45, "yes", 12, alpha=0.4)
39
text(ax, 0.34, 0.45, "no", 12, alpha=0.4)
40
41
text(ax, 0.66, 0.45, "yes", 12, alpha=0.4)
42
text(ax, 0.79, 0.45, "no", 12, alpha=0.4)
43
44
ax.plot([0.3, 0.5, 0.7], [0.6, 0.9, 0.6], '-k')
45
ax.plot([0.12, 0.3, 0.38], [0.3, 0.6, 0.3], '-k')
46
ax.plot([0.62, 0.7, 0.88], [0.3, 0.6, 0.3], '-k')
47
ax.plot([0.0, 0.12, 0.20], [0.0, 0.3, 0.0], '--k')
48
ax.plot([0.28, 0.38, 0.48], [0.0, 0.3, 0.0], '--k')
49
ax.plot([0.52, 0.62, 0.72], [0.0, 0.3, 0.0], '--k')
50
ax.plot([0.8, 0.88, 1.0], [0.0, 0.3, 0.0], '--k')
51
ax.axis([0, 1, 0, 1])
52
53
54
def visualize_tree(estimator, X, y, boundaries=True,
55
xlim=None, ylim=None):
56
estimator.fit(X, y)
57
58
if xlim is None:
59
xlim = (X[:, 0].min() - 0.1, X[:, 0].max() + 0.1)
60
if ylim is None:
61
ylim = (X[:, 1].min() - 0.1, X[:, 1].max() + 0.1)
62
63
x_min, x_max = xlim
64
y_min, y_max = ylim
65
xx, yy = np.meshgrid(np.linspace(x_min, x_max, 100),
66
np.linspace(y_min, y_max, 100))
67
Z = estimator.predict(np.c_[xx.ravel(), yy.ravel()])
68
69
# Put the result into a color plot
70
Z = Z.reshape(xx.shape)
71
plt.figure()
72
plt.pcolormesh(xx, yy, Z, alpha=0.2, cmap='rainbow')
73
plt.clim(y.min(), y.max())
74
75
# Plot also the training points
76
plt.scatter(X[:, 0], X[:, 1], c=y, s=50, cmap='rainbow')
77
plt.axis('off')
78
79
plt.xlim(x_min, x_max)
80
plt.ylim(y_min, y_max)
81
plt.clim(y.min(), y.max())
82
83
# Plot the decision boundaries
84
def plot_boundaries(i, xlim, ylim):
85
if i < 0:
86
return
87
88
tree = estimator.tree_
89
90
if tree.feature[i] == 0:
91
plt.plot([tree.threshold[i], tree.threshold[i]], ylim, '-k')
92
plot_boundaries(tree.children_left[i],
93
[xlim[0], tree.threshold[i]], ylim)
94
plot_boundaries(tree.children_right[i],
95
[tree.threshold[i], xlim[1]], ylim)
96
97
elif tree.feature[i] == 1:
98
plt.plot(xlim, [tree.threshold[i], tree.threshold[i]], '-k')
99
plot_boundaries(tree.children_left[i], xlim,
100
[ylim[0], tree.threshold[i]])
101
plot_boundaries(tree.children_right[i], xlim,
102
[tree.threshold[i], ylim[1]])
103
104
if boundaries:
105
plot_boundaries(0, plt.xlim(), plt.ylim())
106
107
108
def plot_tree_interactive(X, y):
109
from sklearn.tree import DecisionTreeClassifier
110
111
def interactive_tree(depth=1):
112
clf = DecisionTreeClassifier(max_depth=depth, random_state=0)
113
visualize_tree(clf, X, y)
114
115
from IPython.html.widgets import interact
116
return interact(interactive_tree, depth=[1, 5])
117
118
119
def plot_kmeans_interactive(min_clusters=1, max_clusters=6):
120
from IPython.html.widgets import interact
121
from sklearn.metrics.pairwise import euclidean_distances
122
from sklearn.datasets.samples_generator import make_blobs
123
124
with warnings.catch_warnings():
125
warnings.filterwarnings('ignore')
126
127
X, y = make_blobs(n_samples=300, centers=4,
128
random_state=0, cluster_std=0.60)
129
130
def _kmeans_step(frame=0, n_clusters=4):
131
rng = np.random.RandomState(2)
132
labels = np.zeros(X.shape[0])
133
centers = rng.randn(n_clusters, 2)
134
135
nsteps = frame // 3
136
137
for i in range(nsteps + 1):
138
old_centers = centers
139
if i < nsteps or frame % 3 > 0:
140
dist = euclidean_distances(X, centers)
141
labels = dist.argmin(1)
142
143
if i < nsteps or frame % 3 > 1:
144
centers = np.array([X[labels == j].mean(0)
145
for j in range(n_clusters)])
146
nans = np.isnan(centers)
147
centers[nans] = old_centers[nans]
148
149
150
# plot the data and cluster centers
151
plt.scatter(X[:, 0], X[:, 1], c=labels, s=50, cmap='rainbow',
152
vmin=0, vmax=n_clusters - 1);
153
plt.scatter(old_centers[:, 0], old_centers[:, 1], marker='o',
154
c=np.arange(n_clusters),
155
s=200, cmap='rainbow')
156
plt.scatter(old_centers[:, 0], old_centers[:, 1], marker='o',
157
c='black', s=50)
158
159
# plot new centers if third frame
160
if frame % 3 == 2:
161
for i in range(n_clusters):
162
plt.annotate('', centers[i], old_centers[i],
163
arrowprops=dict(arrowstyle='->', linewidth=1))
164
plt.scatter(centers[:, 0], centers[:, 1], marker='o',
165
c=np.arange(n_clusters),
166
s=200, cmap='rainbow')
167
plt.scatter(centers[:, 0], centers[:, 1], marker='o',
168
c='black', s=50)
169
170
plt.xlim(-4, 4)
171
plt.ylim(-2, 10)
172
173
if frame % 3 == 1:
174
plt.text(3.8, 9.5, "1. Reassign points to nearest centroid",
175
ha='right', va='top', size=14)
176
elif frame % 3 == 2:
177
plt.text(3.8, 9.5, "2. Update centroids to cluster means",
178
ha='right', va='top', size=14)
179
180
181
return interact(_kmeans_step, frame=[0, 50],
182
n_clusters=[min_clusters, max_clusters])
183
184
185
def plot_image_components(x, coefficients=None, mean=0, components=None,
186
imshape=(8, 8), n_components=6, fontsize=12):
187
if coefficients is None:
188
coefficients = x
189
190
if components is None:
191
components = np.eye(len(coefficients), len(x))
192
193
mean = np.zeros_like(x) + mean
194
195
196
fig = plt.figure(figsize=(1.2 * (5 + n_components), 1.2 * 2))
197
g = plt.GridSpec(2, 5 + n_components, hspace=0.3)
198
199
def show(i, j, x, title=None):
200
ax = fig.add_subplot(g[i, j], xticks=[], yticks=[])
201
ax.imshow(x.reshape(imshape), interpolation='nearest')
202
if title:
203
ax.set_title(title, fontsize=fontsize)
204
205
show(slice(2), slice(2), x, "True")
206
207
approx = mean.copy()
208
show(0, 2, np.zeros_like(x) + mean, r'$\mu$')
209
show(1, 2, approx, r'$1 \cdot \mu$')
210
211
for i in range(0, n_components):
212
approx = approx + coefficients[i] * components[i]
213
show(0, i + 3, components[i], r'$c_{0}$'.format(i + 1))
214
show(1, i + 3, approx,
215
r"${0:.2f} \cdot c_{1}$".format(coefficients[i], i + 1))
216
plt.gca().text(0, 1.05, '$+$', ha='right', va='bottom',
217
transform=plt.gca().transAxes, fontsize=fontsize)
218
219
show(slice(2), slice(-2, None), approx, "Approx")
220
221
222
def plot_pca_interactive(data, n_components=6):
223
from sklearn.decomposition import PCA
224
from IPython.html.widgets import interact
225
226
pca = PCA(n_components=n_components)
227
Xproj = pca.fit_transform(data)
228
229
def show_decomp(i=0):
230
plot_image_components(data[i], Xproj[i],
231
pca.mean_, pca.components_)
232
233
interact(show_decomp, i=(0, data.shape[0] - 1));
234
235