📚 The CoCalc Library - books, templates and other resources
License: OTHER
import numbers1import numpy as np23from sklearn.utils import check_array, check_random_state4from sklearn.utils import shuffle as shuffle_5from sklearn.utils.deprecation import deprecated678@deprecated("Please import make_blobs directly from scikit-learn")9def make_blobs(n_samples=100, n_features=2, centers=2, cluster_std=1.0,10center_box=(-10.0, 10.0), shuffle=True, random_state=None):11"""Generate isotropic Gaussian blobs for clustering.1213Read more in the :ref:`User Guide <sample_generators>`.1415Parameters16----------17n_samples : int, or tuple, optional (default=100)18The total number of points equally divided among clusters.1920n_features : int, optional (default=2)21The number of features for each sample.2223centers : int or array of shape [n_centers, n_features], optional24(default=3)25The number of centers to generate, or the fixed center locations.2627cluster_std: float or sequence of floats, optional (default=1.0)28The standard deviation of the clusters.2930center_box: pair of floats (min, max), optional (default=(-10.0, 10.0))31The bounding box for each cluster center when centers are32generated at random.3334shuffle : boolean, optional (default=True)35Shuffle the samples.3637random_state : int, RandomState instance or None, optional (default=None)38If int, random_state is the seed used by the random number generator;39If RandomState instance, random_state is the random number generator;40If None, the random number generator is the RandomState instance used41by `np.random`.4243Returns44-------45X : array of shape [n_samples, n_features]46The generated samples.4748y : array of shape [n_samples]49The integer labels for cluster membership of each sample.5051Examples52--------53>>> from sklearn.datasets.samples_generator import make_blobs54>>> X, y = make_blobs(n_samples=10, centers=3, n_features=2,55... random_state=0)56>>> print(X.shape)57(10, 2)58>>> y59array([0, 0, 1, 0, 2, 2, 2, 1, 1, 0])6061See also62--------63make_classification: a more intricate variant64"""65generator = check_random_state(random_state)6667if isinstance(centers, numbers.Integral):68centers = generator.uniform(center_box[0], center_box[1],69size=(centers, n_features))70else:71centers = check_array(centers)72n_features = centers.shape[1]7374if isinstance(cluster_std, numbers.Real):75cluster_std = np.ones(len(centers)) * cluster_std7677X = []78y = []7980n_centers = centers.shape[0]81if isinstance(n_samples, numbers.Integral):82n_samples_per_center = [int(n_samples // n_centers)] * n_centers83for i in range(n_samples % n_centers):84n_samples_per_center[i] += 185else:86n_samples_per_center = n_samples8788for i, (n, std) in enumerate(zip(n_samples_per_center, cluster_std)):89X.append(centers[i] + generator.normal(scale=std,90size=(n, n_features)))91y += [i] * n9293X = np.concatenate(X)94y = np.array(y)9596if shuffle:97X, y = shuffle_(X, y, random_state=generator)9899return X, y100101102