Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
Download

📚 The CoCalc Library - books, templates and other resources

132928 views
License: OTHER
1
import numpy as np
2
3
from sklearn.datasets import make_blobs
4
from sklearn.cluster import KMeans
5
from sklearn.metrics import pairwise_distances
6
import matplotlib.pyplot as plt
7
import matplotlib as mpl
8
from cycler import cycler
9
10
from .tools import discrete_scatter
11
from .plot_2d_separator import plot_2d_classification
12
from .plot_helpers import cm3
13
14
15
def plot_kmeans_algorithm():
16
17
X, y = make_blobs(random_state=1)
18
# we don't want cyan in there
19
with mpl.rc_context(rc={'axes.prop_cycle': cycler('color', ['#0000aa',
20
'#ff2020',
21
'#50ff50'])}):
22
fig, axes = plt.subplots(3, 3, figsize=(10, 8), subplot_kw={'xticks': (), 'yticks': ()})
23
axes = axes.ravel()
24
axes[0].set_title("Input data")
25
discrete_scatter(X[:, 0], X[:, 1], ax=axes[0], markers=['o'], c='w')
26
27
axes[1].set_title("Initialization")
28
init = X[:3, :]
29
discrete_scatter(X[:, 0], X[:, 1], ax=axes[1], markers=['o'], c='w')
30
discrete_scatter(init[:, 0], init[:, 1], [0, 1, 2], ax=axes[1],
31
markers=['^'], markeredgewidth=2)
32
33
axes[2].set_title("Assign Points (1)")
34
km = KMeans(n_clusters=3, init=init, max_iter=1, n_init=1).fit(X)
35
centers = km.cluster_centers_
36
# need to compute labels by hand. scikit-learn does two e-steps for max_iter=1
37
# (and it's totally my fault)
38
labels = np.argmin(pairwise_distances(init, X), axis=0)
39
discrete_scatter(X[:, 0], X[:, 1], labels, markers=['o'],
40
ax=axes[2])
41
discrete_scatter(init[:, 0], init[:, 1], [0, 1, 2],
42
ax=axes[2], markers=['^'], markeredgewidth=2)
43
44
axes[3].set_title("Recompute Centers (1)")
45
discrete_scatter(X[:, 0], X[:, 1], labels, markers=['o'],
46
ax=axes[3])
47
discrete_scatter(centers[:, 0], centers[:, 1], [0, 1, 2],
48
ax=axes[3], markers=['^'], markeredgewidth=2)
49
50
axes[4].set_title("Reassign Points (2)")
51
km = KMeans(n_clusters=3, init=init, max_iter=1, n_init=1).fit(X)
52
labels = km.labels_
53
discrete_scatter(X[:, 0], X[:, 1], labels, markers=['o'],
54
ax=axes[4])
55
discrete_scatter(centers[:, 0], centers[:, 1], [0, 1, 2],
56
ax=axes[4], markers=['^'], markeredgewidth=2)
57
58
km = KMeans(n_clusters=3, init=init, max_iter=2, n_init=1).fit(X)
59
axes[5].set_title("Recompute Centers (2)")
60
centers = km.cluster_centers_
61
discrete_scatter(X[:, 0], X[:, 1], labels, markers=['o'],
62
ax=axes[5])
63
discrete_scatter(centers[:, 0], centers[:, 1], [0, 1, 2],
64
ax=axes[5], markers=['^'], markeredgewidth=2)
65
66
axes[6].set_title("Reassign Points (3)")
67
labels = km.labels_
68
discrete_scatter(X[:, 0], X[:, 1], labels, markers=['o'],
69
ax=axes[6])
70
markers = discrete_scatter(centers[:, 0], centers[:, 1], [0, 1, 2],
71
ax=axes[6], markers=['^'],
72
markeredgewidth=2)
73
74
axes[7].set_title("Recompute Centers (3)")
75
km = KMeans(n_clusters=3, init=init, max_iter=3, n_init=1).fit(X)
76
centers = km.cluster_centers_
77
discrete_scatter(X[:, 0], X[:, 1], labels, markers=['o'],
78
ax=axes[7])
79
discrete_scatter(centers[:, 0], centers[:, 1], [0, 1, 2],
80
ax=axes[7], markers=['^'], markeredgewidth=2)
81
axes[8].set_axis_off()
82
axes[8].legend(markers, ["Cluster 0", "Cluster 1", "Cluster 2"], loc='best')
83
84
85
def plot_kmeans_boundaries():
86
X, y = make_blobs(random_state=1)
87
init = X[:3, :]
88
km = KMeans(n_clusters=3, init=init, max_iter=2, n_init=1).fit(X)
89
discrete_scatter(X[:, 0], X[:, 1], km.labels_, markers=['o'])
90
discrete_scatter(km.cluster_centers_[:, 0], km.cluster_centers_[:, 1],
91
[0, 1, 2], markers=['^'], markeredgewidth=2)
92
plot_2d_classification(km, X, cm=cm3, alpha=.4)
93
94
95
def plot_kmeans_faces(km, pca, X_pca, X_people, y_people, target_names):
96
n_clusters = 10
97
image_shape = (87, 65)
98
fig, axes = plt.subplots(n_clusters, 11, subplot_kw={'xticks': (), 'yticks': ()},
99
figsize=(10, 15), gridspec_kw={"hspace": .3})
100
101
for cluster in range(n_clusters):
102
center = km.cluster_centers_[cluster]
103
mask = km.labels_ == cluster
104
dists = np.sum((X_pca - center) ** 2, axis=1)
105
dists[~mask] = np.inf
106
inds = np.argsort(dists)[:5]
107
dists[~mask] = -np.inf
108
inds = np.r_[inds, np.argsort(dists)[-5:]]
109
axes[cluster, 0].imshow(pca.inverse_transform(center).reshape(image_shape), vmin=0, vmax=1)
110
for image, label, asdf, ax in zip(X_people[inds], y_people[inds],
111
km.labels_[inds], axes[cluster, 1:]):
112
ax.imshow(image.reshape(image_shape), vmin=0, vmax=1)
113
ax.set_title("%s" % (target_names[label].split()[-1]), fontdict={'fontsize': 9})
114
115
# add some boxes to illustrate which are similar and which dissimilar
116
rec = plt.Rectangle([-5, -30], 73, 1295, fill=False, lw=2)
117
rec = axes[0, 0].add_patch(rec)
118
rec.set_clip_on(False)
119
axes[0, 0].text(0, -40, "Center")
120
121
rec = plt.Rectangle([-5, -30], 385, 1295, fill=False, lw=2)
122
rec = axes[0, 1].add_patch(rec)
123
rec.set_clip_on(False)
124
axes[0, 1].text(0, -40, "Close to center")
125
126
rec = plt.Rectangle([-5, -30], 385, 1295, fill=False, lw=2)
127
rec = axes[0, 6].add_patch(rec)
128
rec.set_clip_on(False)
129
axes[0, 6].text(0, -40, "Far from center")
130
131