📚 The CoCalc Library - books, templates and other resources
License: OTHER
import matplotlib.pyplot as plt1import numpy as np2from sklearn.datasets import make_blobs3from sklearn.cluster import AgglomerativeClustering4from sklearn.neighbors import KernelDensity567def plot_agglomerative_algorithm():8# generate synthetic two-dimensional data9X, y = make_blobs(random_state=0, n_samples=12)1011agg = AgglomerativeClustering(n_clusters=X.shape[0], compute_full_tree=True).fit(X)1213fig, axes = plt.subplots(X.shape[0] // 5, 5, subplot_kw={'xticks': (),14'yticks': ()},15figsize=(20, 8))1617eps = X.std() / 21819x_min, x_max = X[:, 0].min() - eps, X[:, 0].max() + eps20y_min, y_max = X[:, 1].min() - eps, X[:, 1].max() + eps2122xx, yy = np.meshgrid(np.linspace(x_min, x_max, 100), np.linspace(y_min, y_max, 100))23gridpoints = np.c_[xx.ravel().reshape(-1, 1), yy.ravel().reshape(-1, 1)]2425for i, ax in enumerate(axes.ravel()):26ax.set_xlim(x_min, x_max)27ax.set_ylim(y_min, y_max)28agg.n_clusters = X.shape[0] - i29agg.fit(X)30ax.set_title("Step %d" % i)31ax.scatter(X[:, 0], X[:, 1], s=60, c='grey')32bins = np.bincount(agg.labels_)33for cluster in range(agg.n_clusters):34if bins[cluster] > 1:35points = X[agg.labels_ == cluster]36other_points = X[agg.labels_ != cluster]3738kde = KernelDensity(bandwidth=.5).fit(points)39scores = kde.score_samples(gridpoints)40score_inside = np.min(kde.score_samples(points))41score_outside = np.max(kde.score_samples(other_points))42levels = .8 * score_inside + .2 * score_outside43ax.contour(xx, yy, scores.reshape(100, 100), levels=[levels],44colors='k', linestyles='solid', linewidths=2)4546axes[0, 0].set_title("Initialization")474849def plot_agglomerative():50X, y = make_blobs(random_state=0, n_samples=12)51agg = AgglomerativeClustering(n_clusters=3)5253eps = X.std() / 2.5455x_min, x_max = X[:, 0].min() - eps, X[:, 0].max() + eps56y_min, y_max = X[:, 1].min() - eps, X[:, 1].max() + eps5758xx, yy = np.meshgrid(np.linspace(x_min, x_max, 100), np.linspace(y_min, y_max, 100))59gridpoints = np.c_[xx.ravel().reshape(-1, 1), yy.ravel().reshape(-1, 1)]6061ax = plt.gca()62for i, x in enumerate(X):63ax.text(x[0] + .1, x[1], "%d" % i, horizontalalignment='left', verticalalignment='center')6465ax.scatter(X[:, 0], X[:, 1], s=60, c='grey')66ax.set_xticks(())67ax.set_yticks(())6869for i in range(11):70agg.n_clusters = X.shape[0] - i71agg.fit(X)7273bins = np.bincount(agg.labels_)74for cluster in range(agg.n_clusters):75if bins[cluster] > 1:76points = X[agg.labels_ == cluster]77other_points = X[agg.labels_ != cluster]7879kde = KernelDensity(bandwidth=.5).fit(points)80scores = kde.score_samples(gridpoints)81score_inside = np.min(kde.score_samples(points))82score_outside = np.max(kde.score_samples(other_points))83levels = .8 * score_inside + .2 * score_outside84ax.contour(xx, yy, scores.reshape(100, 100), levels=[levels],85colors='k', linestyles='solid', linewidths=1)8687ax.set_xlim(x_min, x_max)88ax.set_ylim(y_min, y_max)899091