CoCalc -- plot_2d_separator.py

📚 The CoCalc Library - books, templates and other resources
cocalc-examples / introduction_to_ml_with_python / mglearn / plot_2d_separator.py
¹³²⁹²³ views
License: OTHER
1
import numpy as np
2
import matplotlib.pyplot as plt
3
from .plot_helpers import cm2, cm3, discrete_scatter
4

5
def _call_classifier_chunked(classifier_pred_or_decide, X):
6
    # The chunk_size is used to chunk the large arrays to work with x86
7
    # memory models that are restricted to < 2 GB in memory allocation. The
8
    # chunk_size value used here is based on a measurement with the
9
    # MLPClassifier using the following parameters:
10
    # MLPClassifier(solver='lbfgs', random_state=0,
11
    #               hidden_layer_sizes=[1000,1000,1000])
12
    # by reducing the value it is possible to trade in time for memory.
13
    # It is possible to chunk the array as the calculations are independent of
14
    # each other.
15
    # Note: an intermittent version made a distinction between
16
    # 32- and 64 bit architectures avoiding the chunking. Testing revealed
17
    # that even on 64 bit architectures the chunking increases the
18
    # performance by a factor of 3-5, largely due to the avoidance of memory
19
    # swapping.
20
    chunk_size = 10000
21

22
    # We use a list to collect all result chunks
23
    Y_result_chunks = []
24

25
    # Call the classifier in chunks.
26
    for x_chunk in np.array_split(X, np.arange(chunk_size, X.shape[0],
27
                                               chunk_size, dtype=np.int32),
28
                                  axis=0):
29
        Y_result_chunks.append(classifier_pred_or_decide(x_chunk))
30

31
    return np.concatenate(Y_result_chunks)
32

33

34
def plot_2d_classification(classifier, X, fill=False, ax=None, eps=None,
35
                           alpha=1, cm=cm3):
36
    # multiclass
37
    if eps is None:
38
        eps = X.std() / 2.
39

40
    if ax is None:
41
        ax = plt.gca()
42

43
    x_min, x_max = X[:, 0].min() - eps, X[:, 0].max() + eps
44
    y_min, y_max = X[:, 1].min() - eps, X[:, 1].max() + eps
45
    xx = np.linspace(x_min, x_max, 1000)
46
    yy = np.linspace(y_min, y_max, 1000)
47

48
    X1, X2 = np.meshgrid(xx, yy)
49
    X_grid = np.c_[X1.ravel(), X2.ravel()]
50
    decision_values = classifier.predict(X_grid)
51
    ax.imshow(decision_values.reshape(X1.shape), extent=(x_min, x_max,
52
                                                         y_min, y_max),
53
              aspect='auto', origin='lower', alpha=alpha, cmap=cm)
54
    ax.set_xlim(x_min, x_max)
55
    ax.set_ylim(y_min, y_max)
56
    ax.set_xticks(())
57
    ax.set_yticks(())
58

59

60
def plot_2d_scores(classifier, X, ax=None, eps=None, alpha=1, cm="viridis",
61
                   function=None):
62
    # binary with fill
63
    if eps is None:
64
        eps = X.std() / 2.
65

66
    if ax is None:
67
        ax = plt.gca()
68

69
    x_min, x_max = X[:, 0].min() - eps, X[:, 0].max() + eps
70
    y_min, y_max = X[:, 1].min() - eps, X[:, 1].max() + eps
71
    xx = np.linspace(x_min, x_max, 100)
72
    yy = np.linspace(y_min, y_max, 100)
73

74
    X1, X2 = np.meshgrid(xx, yy)
75
    X_grid = np.c_[X1.ravel(), X2.ravel()]
76
    if function is None:
77
        function = getattr(classifier, "decision_function",
78
                           getattr(classifier, "predict_proba"))
79
    else:
80
        function = getattr(classifier, function)
81
    decision_values = function(X_grid)
82
    if decision_values.ndim > 1 and decision_values.shape[1] > 1:
83
        # predict_proba
84
        decision_values = decision_values[:, 1]
85
    grr = ax.imshow(decision_values.reshape(X1.shape),
86
                    extent=(x_min, x_max, y_min, y_max), aspect='auto',
87
                    origin='lower', alpha=alpha, cmap=cm)
88

89
    ax.set_xlim(x_min, x_max)
90
    ax.set_ylim(y_min, y_max)
91
    ax.set_xticks(())
92
    ax.set_yticks(())
93
    return grr
94

95

96
def plot_2d_separator(classifier, X, fill=False, ax=None, eps=None, alpha=1,
97
                      cm=cm2, linewidth=None, threshold=None,
98
                      linestyle="solid"):
99
    # binary?
100
    if eps is None:
101
        eps = X.std() / 2.
102

103
    if ax is None:
104
        ax = plt.gca()
105

106
    x_min, x_max = X[:, 0].min() - eps, X[:, 0].max() + eps
107
    y_min, y_max = X[:, 1].min() - eps, X[:, 1].max() + eps
108
    xx = np.linspace(x_min, x_max, 1000)
109
    yy = np.linspace(y_min, y_max, 1000)
110

111
    X1, X2 = np.meshgrid(xx, yy)
112
    X_grid = np.c_[X1.ravel(), X2.ravel()]
113
    if hasattr(classifier, "decision_function"):
114
        decision_values = _call_classifier_chunked(classifier.decision_function,
115
                                                   X_grid)
116
        levels = [0] if threshold is None else [threshold]
117
        fill_levels = [decision_values.min()] + levels + [
118
            decision_values.max()]
119
    else:
120
        # no decision_function
121
        decision_values = _call_classifier_chunked(classifier.predict_proba,
122
                                                   X_grid)[:, 1]
123
        levels = [.5] if threshold is None else [threshold]
124
        fill_levels = [0] + levels + [1]
125
    if fill:
126
        ax.contourf(X1, X2, decision_values.reshape(X1.shape),
127
                    levels=fill_levels, alpha=alpha, cmap=cm)
128
    else:
129
        ax.contour(X1, X2, decision_values.reshape(X1.shape), levels=levels,
130
                   colors="black", alpha=alpha, linewidths=linewidth,
131
                   linestyles=linestyle, zorder=5)
132

133
    ax.set_xlim(x_min, x_max)
134
    ax.set_ylim(y_min, y_max)
135
    ax.set_xticks(())
136
    ax.set_yticks(())
137

138

139
if __name__ == '__main__':
140
    from sklearn.datasets import make_blobs
141
    from sklearn.linear_model import LogisticRegression
142
    X, y = make_blobs(centers=2, random_state=42)
143
    clf = LogisticRegression(solver='lbfgs').fit(X, y)
144
    plot_2d_separator(clf, X, fill=True)
145
    discrete_scatter(X[:, 0], X[:, 1], y)
146
    plt.show()
147

148
Product

Resources

Company