Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
Download

📚 The CoCalc Library - books, templates and other resources

132928 views
License: OTHER
1
import matplotlib.pyplot as plt
2
import numpy as np
3
from sklearn.datasets import make_blobs
4
from sklearn.cluster import AgglomerativeClustering
5
from sklearn.neighbors import KernelDensity
6
7
8
def plot_agglomerative_algorithm():
9
# generate synthetic two-dimensional data
10
X, y = make_blobs(random_state=0, n_samples=12)
11
12
agg = AgglomerativeClustering(n_clusters=X.shape[0], compute_full_tree=True).fit(X)
13
14
fig, axes = plt.subplots(X.shape[0] // 5, 5, subplot_kw={'xticks': (),
15
'yticks': ()},
16
figsize=(20, 8))
17
18
eps = X.std() / 2
19
20
x_min, x_max = X[:, 0].min() - eps, X[:, 0].max() + eps
21
y_min, y_max = X[:, 1].min() - eps, X[:, 1].max() + eps
22
23
xx, yy = np.meshgrid(np.linspace(x_min, x_max, 100), np.linspace(y_min, y_max, 100))
24
gridpoints = np.c_[xx.ravel().reshape(-1, 1), yy.ravel().reshape(-1, 1)]
25
26
for i, ax in enumerate(axes.ravel()):
27
ax.set_xlim(x_min, x_max)
28
ax.set_ylim(y_min, y_max)
29
agg.n_clusters = X.shape[0] - i
30
agg.fit(X)
31
ax.set_title("Step %d" % i)
32
ax.scatter(X[:, 0], X[:, 1], s=60, c='grey')
33
bins = np.bincount(agg.labels_)
34
for cluster in range(agg.n_clusters):
35
if bins[cluster] > 1:
36
points = X[agg.labels_ == cluster]
37
other_points = X[agg.labels_ != cluster]
38
39
kde = KernelDensity(bandwidth=.5).fit(points)
40
scores = kde.score_samples(gridpoints)
41
score_inside = np.min(kde.score_samples(points))
42
score_outside = np.max(kde.score_samples(other_points))
43
levels = .8 * score_inside + .2 * score_outside
44
ax.contour(xx, yy, scores.reshape(100, 100), levels=[levels],
45
colors='k', linestyles='solid', linewidths=2)
46
47
axes[0, 0].set_title("Initialization")
48
49
50
def plot_agglomerative():
51
X, y = make_blobs(random_state=0, n_samples=12)
52
agg = AgglomerativeClustering(n_clusters=3)
53
54
eps = X.std() / 2.
55
56
x_min, x_max = X[:, 0].min() - eps, X[:, 0].max() + eps
57
y_min, y_max = X[:, 1].min() - eps, X[:, 1].max() + eps
58
59
xx, yy = np.meshgrid(np.linspace(x_min, x_max, 100), np.linspace(y_min, y_max, 100))
60
gridpoints = np.c_[xx.ravel().reshape(-1, 1), yy.ravel().reshape(-1, 1)]
61
62
ax = plt.gca()
63
for i, x in enumerate(X):
64
ax.text(x[0] + .1, x[1], "%d" % i, horizontalalignment='left', verticalalignment='center')
65
66
ax.scatter(X[:, 0], X[:, 1], s=60, c='grey')
67
ax.set_xticks(())
68
ax.set_yticks(())
69
70
for i in range(11):
71
agg.n_clusters = X.shape[0] - i
72
agg.fit(X)
73
74
bins = np.bincount(agg.labels_)
75
for cluster in range(agg.n_clusters):
76
if bins[cluster] > 1:
77
points = X[agg.labels_ == cluster]
78
other_points = X[agg.labels_ != cluster]
79
80
kde = KernelDensity(bandwidth=.5).fit(points)
81
scores = kde.score_samples(gridpoints)
82
score_inside = np.min(kde.score_samples(points))
83
score_outside = np.max(kde.score_samples(other_points))
84
levels = .8 * score_inside + .2 * score_outside
85
ax.contour(xx, yy, scores.reshape(100, 100), levels=[levels],
86
colors='k', linestyles='solid', linewidths=1)
87
88
ax.set_xlim(x_min, x_max)
89
ax.set_ylim(y_min, y_max)
90
91