Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
Download

Jupyter notebook s2.ipynb

6 views
Kernel: Python 3

Задача

Используя набор данных 'Wine':

  • Построить классификаторы по методу ближайших соседей по парам признаков; (выбрать три пары признаков) и визуализируйте на какие области разбивается плоскоть;

  • Построить классификаторы по методу ближайших соседей по наиболее информативным главным компонентам;

  • Оценить точность классификаторов.

Строить классификаторы по 3,5,7 ближайшим соседям.

import numpy as np import pandas as pd import matplotlib.pyplot as plt %matplotlib inline from sklearn import neighbors, metrics, decomposition
attrs = """class_id Alcohol Malic acid Ash Alcalinity of ash Magnesium Total phenols Flavanoids Nonflavanoid phenols Proanthocyanins Color intensity Hue OD280/OD315 of diluted wines Proline""".split('\n') wine_df = pd.read_csv("data/wine.data", sep=",", names = attrs) print(wine_df)
class_id Alcohol Malic acid Ash Alcalinity of ash Magnesium \ 0 1 14.23 1.71 2.43 15.6 127 1 1 13.20 1.78 2.14 11.2 100 2 1 13.16 2.36 2.67 18.6 101 3 1 14.37 1.95 2.50 16.8 113 4 1 13.24 2.59 2.87 21.0 118 5 1 14.20 1.76 2.45 15.2 112 6 1 14.39 1.87 2.45 14.6 96 7 1 14.06 2.15 2.61 17.6 121 8 1 14.83 1.64 2.17 14.0 97 9 1 13.86 1.35 2.27 16.0 98 10 1 14.10 2.16 2.30 18.0 105 11 1 14.12 1.48 2.32 16.8 95 12 1 13.75 1.73 2.41 16.0 89 13 1 14.75 1.73 2.39 11.4 91 14 1 14.38 1.87 2.38 12.0 102 15 1 13.63 1.81 2.70 17.2 112 16 1 14.30 1.92 2.72 20.0 120 17 1 13.83 1.57 2.62 20.0 115 18 1 14.19 1.59 2.48 16.5 108 19 1 13.64 3.10 2.56 15.2 116 20 1 14.06 1.63 2.28 16.0 126 21 1 12.93 3.80 2.65 18.6 102 22 1 13.71 1.86 2.36 16.6 101 23 1 12.85 1.60 2.52 17.8 95 24 1 13.50 1.81 2.61 20.0 96 25 1 13.05 2.05 3.22 25.0 124 26 1 13.39 1.77 2.62 16.1 93 27 1 13.30 1.72 2.14 17.0 94 28 1 13.87 1.90 2.80 19.4 107 29 1 14.02 1.68 2.21 16.0 96 .. ... ... ... ... ... ... 148 3 13.32 3.24 2.38 21.5 92 149 3 13.08 3.90 2.36 21.5 113 150 3 13.50 3.12 2.62 24.0 123 151 3 12.79 2.67 2.48 22.0 112 152 3 13.11 1.90 2.75 25.5 116 153 3 13.23 3.30 2.28 18.5 98 154 3 12.58 1.29 2.10 20.0 103 155 3 13.17 5.19 2.32 22.0 93 156 3 13.84 4.12 2.38 19.5 89 157 3 12.45 3.03 2.64 27.0 97 158 3 14.34 1.68 2.70 25.0 98 159 3 13.48 1.67 2.64 22.5 89 160 3 12.36 3.83 2.38 21.0 88 161 3 13.69 3.26 2.54 20.0 107 162 3 12.85 3.27 2.58 22.0 106 163 3 12.96 3.45 2.35 18.5 106 164 3 13.78 2.76 2.30 22.0 90 165 3 13.73 4.36 2.26 22.5 88 166 3 13.45 3.70 2.60 23.0 111 167 3 12.82 3.37 2.30 19.5 88 168 3 13.58 2.58 2.69 24.5 105 169 3 13.40 4.60 2.86 25.0 112 170 3 12.20 3.03 2.32 19.0 96 171 3 12.77 2.39 2.28 19.5 86 172 3 14.16 2.51 2.48 20.0 91 173 3 13.71 5.65 2.45 20.5 95 174 3 13.40 3.91 2.48 23.0 102 175 3 13.27 4.28 2.26 20.0 120 176 3 13.17 2.59 2.37 20.0 120 177 3 14.13 4.10 2.74 24.5 96 Total phenols Flavanoids Nonflavanoid phenols Proanthocyanins \ 0 2.80 3.06 0.28 2.29 1 2.65 2.76 0.26 1.28 2 2.80 3.24 0.30 2.81 3 3.85 3.49 0.24 2.18 4 2.80 2.69 0.39 1.82 5 3.27 3.39 0.34 1.97 6 2.50 2.52 0.30 1.98 7 2.60 2.51 0.31 1.25 8 2.80 2.98 0.29 1.98 9 2.98 3.15 0.22 1.85 10 2.95 3.32 0.22 2.38 11 2.20 2.43 0.26 1.57 12 2.60 2.76 0.29 1.81 13 3.10 3.69 0.43 2.81 14 3.30 3.64 0.29 2.96 15 2.85 2.91 0.30 1.46 16 2.80 3.14 0.33 1.97 17 2.95 3.40 0.40 1.72 18 3.30 3.93 0.32 1.86 19 2.70 3.03 0.17 1.66 20 3.00 3.17 0.24 2.10 21 2.41 2.41 0.25 1.98 22 2.61 2.88 0.27 1.69 23 2.48 2.37 0.26 1.46 24 2.53 2.61 0.28 1.66 25 2.63 2.68 0.47 1.92 26 2.85 2.94 0.34 1.45 27 2.40 2.19 0.27 1.35 28 2.95 2.97 0.37 1.76 29 2.65 2.33 0.26 1.98 .. ... ... ... ... 148 1.93 0.76 0.45 1.25 149 1.41 1.39 0.34 1.14 150 1.40 1.57 0.22 1.25 151 1.48 1.36 0.24 1.26 152 2.20 1.28 0.26 1.56 153 1.80 0.83 0.61 1.87 154 1.48 0.58 0.53 1.40 155 1.74 0.63 0.61 1.55 156 1.80 0.83 0.48 1.56 157 1.90 0.58 0.63 1.14 158 2.80 1.31 0.53 2.70 159 2.60 1.10 0.52 2.29 160 2.30 0.92 0.50 1.04 161 1.83 0.56 0.50 0.80 162 1.65 0.60 0.60 0.96 163 1.39 0.70 0.40 0.94 164 1.35 0.68 0.41 1.03 165 1.28 0.47 0.52 1.15 166 1.70 0.92 0.43 1.46 167 1.48 0.66 0.40 0.97 168 1.55 0.84 0.39 1.54 169 1.98 0.96 0.27 1.11 170 1.25 0.49 0.40 0.73 171 1.39 0.51 0.48 0.64 172 1.68 0.70 0.44 1.24 173 1.68 0.61 0.52 1.06 174 1.80 0.75 0.43 1.41 175 1.59 0.69 0.43 1.35 176 1.65 0.68 0.53 1.46 177 2.05 0.76 0.56 1.35 Color intensity Hue OD280/OD315 of diluted wines Proline 0 5.640000 1.04 3.92 1065 1 4.380000 1.05 3.40 1050 2 5.680000 1.03 3.17 1185 3 7.800000 0.86 3.45 1480 4 4.320000 1.04 2.93 735 5 6.750000 1.05 2.85 1450 6 5.250000 1.02 3.58 1290 7 5.050000 1.06 3.58 1295 8 5.200000 1.08 2.85 1045 9 7.220000 1.01 3.55 1045 10 5.750000 1.25 3.17 1510 11 5.000000 1.17 2.82 1280 12 5.600000 1.15 2.90 1320 13 5.400000 1.25 2.73 1150 14 7.500000 1.20 3.00 1547 15 7.300000 1.28 2.88 1310 16 6.200000 1.07 2.65 1280 17 6.600000 1.13 2.57 1130 18 8.700000 1.23 2.82 1680 19 5.100000 0.96 3.36 845 20 5.650000 1.09 3.71 780 21 4.500000 1.03 3.52 770 22 3.800000 1.11 4.00 1035 23 3.930000 1.09 3.63 1015 24 3.520000 1.12 3.82 845 25 3.580000 1.13 3.20 830 26 4.800000 0.92 3.22 1195 27 3.950000 1.02 2.77 1285 28 4.500000 1.25 3.40 915 29 4.700000 1.04 3.59 1035 .. ... ... ... ... 148 8.420000 0.55 1.62 650 149 9.400000 0.57 1.33 550 150 8.600000 0.59 1.30 500 151 10.800000 0.48 1.47 480 152 7.100000 0.61 1.33 425 153 10.520000 0.56 1.51 675 154 7.600000 0.58 1.55 640 155 7.900000 0.60 1.48 725 156 9.010000 0.57 1.64 480 157 7.500000 0.67 1.73 880 158 13.000000 0.57 1.96 660 159 11.750000 0.57 1.78 620 160 7.650000 0.56 1.58 520 161 5.880000 0.96 1.82 680 162 5.580000 0.87 2.11 570 163 5.280000 0.68 1.75 675 164 9.580000 0.70 1.68 615 165 6.620000 0.78 1.75 520 166 10.680000 0.85 1.56 695 167 10.260000 0.72 1.75 685 168 8.660000 0.74 1.80 750 169 8.500000 0.67 1.92 630 170 5.500000 0.66 1.83 510 171 9.899999 0.57 1.63 470 172 9.700000 0.62 1.71 660 173 7.700000 0.64 1.74 740 174 7.300000 0.70 1.56 750 175 10.200000 0.59 1.56 835 176 9.300000 0.60 1.62 840 177 9.200000 0.61 1.60 560 [178 rows x 14 columns]
clf_u = neighbors.KNeighborsClassifier(n_neighbors=7, weights='uniform') clf_d = neighbors.KNeighborsClassifier(n_neighbors=7, weights='distance')
X1 = wine_df[attrs[1]].values X2 = wine_df[attrs[2]].values X=np.c_[X1,X2] Y = wine_df["class_id"].values
clf_u.fit(X,Y) clf_d.fit(X,Y)
KNeighborsClassifier(algorithm='auto', leaf_size=30, metric='minkowski', metric_params=None, n_jobs=1, n_neighbors=7, p=2, weights='distance')
Y_u = clf_u.predict(X) Y_d = clf_d.predict(X)
print("uniform", metrics.accuracy_score(Y,Y_u)) print("distance", metrics.accuracy_score(Y,Y_d))
uniform 0.837078651685 distance 1.0
x1_min, x1_max = X1.min(), X1.max() x2_min, x2_max = X2.min(), X2.max() x1 = np.linspace(x1_min, x1_max, 100) x2 = np.linspace(x2_min, x2_max, 100) xx1, xx2 = np.meshgrid(x1,x2) XX = np.c_[xx1.ravel(), xx2.ravel()] ZZ_u = clf_u.predict(XX) ZZ_d = clf_d.predict(XX) ZZ_u = np.reshape(ZZ_u, xx1.shape) ZZ_d = np.reshape(ZZ_d, xx1.shape)
plt.figure(figsize=(16.0,16.0)) plt.winter() plt.subplot(2,2,1) plt.title('uniform') plt.imshow(ZZ_u, extent = [x1_min, x1_max, x2_min, x2_max], origin='bottom', aspect='auto') plt.grid(1) plt.subplot(2,2,3) plt.title('distance') plt.imshow(ZZ_d, extent = [x1_min, x1_max, x2_min, x2_max], origin='bottom', aspect='auto') plt.grid(1) plt.subplot(2,2,2) plt.title('uniform') plt.imshow(ZZ_u, extent = [x1_min, x1_max, x2_min, x2_max], origin='bottom', aspect='auto') plt.scatter(X1, X2, s=36, c=Y) plt.xlabel(attrs[1]) plt.ylabel(attrs[2]) plt.grid(1) plt.subplot(2,2,4) plt.title('distance') plt.imshow(ZZ_d, extent = [x1_min, x1_max, x2_min, x2_max], origin='bottom', aspect='auto') plt.scatter(X1, X2, s=36, c=Y) plt.xlabel(attrs[1]) plt.ylabel(attrs[2]) plt.grid(1)
Image in a Jupyter notebook
cols = tuple([wine_df[attr].values for attr in attrs[1:]]) X = np.c_[cols] Y = wine_df["class_id"].values
print(X.shape, Y.shape)
(178, 13) (178,)
pca = decomposition.PCA() pca.fit(X)
PCA(copy=True, iterated_power='auto', n_components=None, random_state=None, svd_solver='auto', tol=0.0, whiten=False)
plt.figure(figsize=(10,10)) plt.bar(range(13), pca.explained_variance_ratio_) plt.show() print(pca.explained_variance_ratio_)
Image in a Jupyter notebook
[ 9.98091230e-01 1.73591562e-03 9.49589576e-05 5.02173562e-05 1.23636847e-05 8.46213034e-06 2.80681456e-06 1.52308053e-06 1.12783044e-06 7.21415811e-07 3.78060267e-07 2.12013755e-07 8.25392788e-08]
pca.n_components = 2 U = pca.fit_transform(X)
clf = neighbors.KNeighborsClassifier(n_neighbors=5, weights='distance') clf.fit(U,Y)
KNeighborsClassifier(algorithm='auto', leaf_size=30, metric='minkowski', metric_params=None, n_jobs=1, n_neighbors=5, p=2, weights='distance')
XX = np.c_[xx1.ravel(), xx2.ravel()] ZZ = clf.predict(XX) ZZ = np.reshape(ZZ, xx1.shape)
plt.figure(figsize=(12.0,8.0)) plt.winter() plt.title('distance') plt.imshow(ZZ_d, extent = [x1_min, x1_max, x2_min, x2_max], origin='bottom', aspect='auto') plt.scatter(X1, X2,s=35, c=Y) plt.grid(1)
Image in a Jupyter notebook