Задача.
Загрузить набор данных Wine.
Первый атрибут -- номер класса.
Построить изображения проекций (на одном рисунке) набора данных на избранные пары значений признаков (выбрать некоторым образом 12 различных пар признаков).
Методом главных компонент постройте сокращенный набор, содержащий наиболее существенные признаки (главные компоненты).
Построить изображения проекций (на одном рисунке) набора данных по всем парам выбранных новых признаков
import random as rnd
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
attrs = """class_id
Alcohol
Malic acid
Ash
Alcalinity of ash
Magnesium
Total phenols
Flavanoids
Nonflavanoid phenols
Proanthocyanins
Color intensity
Hue
OD280/OD315 of diluted wines
Proline""".split('\n')
print(attrs)
['class_id', 'Alcohol', 'Malic acid', 'Ash', 'Alcalinity of ash', 'Magnesium', 'Total phenols', 'Flavanoids', 'Nonflavanoid phenols', 'Proanthocyanins', 'Color intensity', 'Hue', 'OD280/OD315 of diluted wines', 'Proline']
wine_df=pd.read_csv("data/wine.data", sep=",", names=attrs)
wine_df
| class_id | Alcohol | Malic acid | Ash | Alcalinity of ash | Magnesium | Total phenols | Flavanoids | Nonflavanoid phenols | Proanthocyanins | Color intensity | Hue | OD280/OD315 of diluted wines | Proline | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 0 | 1 | 14.23 | 1.71 | 2.43 | 15.6 | 127 | 2.80 | 3.06 | 0.28 | 2.29 | 5.640000 | 1.04 | 3.92 | 1065 |
| 1 | 1 | 13.20 | 1.78 | 2.14 | 11.2 | 100 | 2.65 | 2.76 | 0.26 | 1.28 | 4.380000 | 1.05 | 3.40 | 1050 |
| 2 | 1 | 13.16 | 2.36 | 2.67 | 18.6 | 101 | 2.80 | 3.24 | 0.30 | 2.81 | 5.680000 | 1.03 | 3.17 | 1185 |
| 3 | 1 | 14.37 | 1.95 | 2.50 | 16.8 | 113 | 3.85 | 3.49 | 0.24 | 2.18 | 7.800000 | 0.86 | 3.45 | 1480 |
| 4 | 1 | 13.24 | 2.59 | 2.87 | 21.0 | 118 | 2.80 | 2.69 | 0.39 | 1.82 | 4.320000 | 1.04 | 2.93 | 735 |
| 5 | 1 | 14.20 | 1.76 | 2.45 | 15.2 | 112 | 3.27 | 3.39 | 0.34 | 1.97 | 6.750000 | 1.05 | 2.85 | 1450 |
| 6 | 1 | 14.39 | 1.87 | 2.45 | 14.6 | 96 | 2.50 | 2.52 | 0.30 | 1.98 | 5.250000 | 1.02 | 3.58 | 1290 |
| 7 | 1 | 14.06 | 2.15 | 2.61 | 17.6 | 121 | 2.60 | 2.51 | 0.31 | 1.25 | 5.050000 | 1.06 | 3.58 | 1295 |
| 8 | 1 | 14.83 | 1.64 | 2.17 | 14.0 | 97 | 2.80 | 2.98 | 0.29 | 1.98 | 5.200000 | 1.08 | 2.85 | 1045 |
| 9 | 1 | 13.86 | 1.35 | 2.27 | 16.0 | 98 | 2.98 | 3.15 | 0.22 | 1.85 | 7.220000 | 1.01 | 3.55 | 1045 |
| 10 | 1 | 14.10 | 2.16 | 2.30 | 18.0 | 105 | 2.95 | 3.32 | 0.22 | 2.38 | 5.750000 | 1.25 | 3.17 | 1510 |
| 11 | 1 | 14.12 | 1.48 | 2.32 | 16.8 | 95 | 2.20 | 2.43 | 0.26 | 1.57 | 5.000000 | 1.17 | 2.82 | 1280 |
| 12 | 1 | 13.75 | 1.73 | 2.41 | 16.0 | 89 | 2.60 | 2.76 | 0.29 | 1.81 | 5.600000 | 1.15 | 2.90 | 1320 |
| 13 | 1 | 14.75 | 1.73 | 2.39 | 11.4 | 91 | 3.10 | 3.69 | 0.43 | 2.81 | 5.400000 | 1.25 | 2.73 | 1150 |
| 14 | 1 | 14.38 | 1.87 | 2.38 | 12.0 | 102 | 3.30 | 3.64 | 0.29 | 2.96 | 7.500000 | 1.20 | 3.00 | 1547 |
| 15 | 1 | 13.63 | 1.81 | 2.70 | 17.2 | 112 | 2.85 | 2.91 | 0.30 | 1.46 | 7.300000 | 1.28 | 2.88 | 1310 |
| 16 | 1 | 14.30 | 1.92 | 2.72 | 20.0 | 120 | 2.80 | 3.14 | 0.33 | 1.97 | 6.200000 | 1.07 | 2.65 | 1280 |
| 17 | 1 | 13.83 | 1.57 | 2.62 | 20.0 | 115 | 2.95 | 3.40 | 0.40 | 1.72 | 6.600000 | 1.13 | 2.57 | 1130 |
| 18 | 1 | 14.19 | 1.59 | 2.48 | 16.5 | 108 | 3.30 | 3.93 | 0.32 | 1.86 | 8.700000 | 1.23 | 2.82 | 1680 |
| 19 | 1 | 13.64 | 3.10 | 2.56 | 15.2 | 116 | 2.70 | 3.03 | 0.17 | 1.66 | 5.100000 | 0.96 | 3.36 | 845 |
| 20 | 1 | 14.06 | 1.63 | 2.28 | 16.0 | 126 | 3.00 | 3.17 | 0.24 | 2.10 | 5.650000 | 1.09 | 3.71 | 780 |
| 21 | 1 | 12.93 | 3.80 | 2.65 | 18.6 | 102 | 2.41 | 2.41 | 0.25 | 1.98 | 4.500000 | 1.03 | 3.52 | 770 |
| 22 | 1 | 13.71 | 1.86 | 2.36 | 16.6 | 101 | 2.61 | 2.88 | 0.27 | 1.69 | 3.800000 | 1.11 | 4.00 | 1035 |
| 23 | 1 | 12.85 | 1.60 | 2.52 | 17.8 | 95 | 2.48 | 2.37 | 0.26 | 1.46 | 3.930000 | 1.09 | 3.63 | 1015 |
| 24 | 1 | 13.50 | 1.81 | 2.61 | 20.0 | 96 | 2.53 | 2.61 | 0.28 | 1.66 | 3.520000 | 1.12 | 3.82 | 845 |
| 25 | 1 | 13.05 | 2.05 | 3.22 | 25.0 | 124 | 2.63 | 2.68 | 0.47 | 1.92 | 3.580000 | 1.13 | 3.20 | 830 |
| 26 | 1 | 13.39 | 1.77 | 2.62 | 16.1 | 93 | 2.85 | 2.94 | 0.34 | 1.45 | 4.800000 | 0.92 | 3.22 | 1195 |
| 27 | 1 | 13.30 | 1.72 | 2.14 | 17.0 | 94 | 2.40 | 2.19 | 0.27 | 1.35 | 3.950000 | 1.02 | 2.77 | 1285 |
| 28 | 1 | 13.87 | 1.90 | 2.80 | 19.4 | 107 | 2.95 | 2.97 | 0.37 | 1.76 | 4.500000 | 1.25 | 3.40 | 915 |
| 29 | 1 | 14.02 | 1.68 | 2.21 | 16.0 | 96 | 2.65 | 2.33 | 0.26 | 1.98 | 4.700000 | 1.04 | 3.59 | 1035 |
| ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... |
| 148 | 3 | 13.32 | 3.24 | 2.38 | 21.5 | 92 | 1.93 | 0.76 | 0.45 | 1.25 | 8.420000 | 0.55 | 1.62 | 650 |
| 149 | 3 | 13.08 | 3.90 | 2.36 | 21.5 | 113 | 1.41 | 1.39 | 0.34 | 1.14 | 9.400000 | 0.57 | 1.33 | 550 |
| 150 | 3 | 13.50 | 3.12 | 2.62 | 24.0 | 123 | 1.40 | 1.57 | 0.22 | 1.25 | 8.600000 | 0.59 | 1.30 | 500 |
| 151 | 3 | 12.79 | 2.67 | 2.48 | 22.0 | 112 | 1.48 | 1.36 | 0.24 | 1.26 | 10.800000 | 0.48 | 1.47 | 480 |
| 152 | 3 | 13.11 | 1.90 | 2.75 | 25.5 | 116 | 2.20 | 1.28 | 0.26 | 1.56 | 7.100000 | 0.61 | 1.33 | 425 |
| 153 | 3 | 13.23 | 3.30 | 2.28 | 18.5 | 98 | 1.80 | 0.83 | 0.61 | 1.87 | 10.520000 | 0.56 | 1.51 | 675 |
| 154 | 3 | 12.58 | 1.29 | 2.10 | 20.0 | 103 | 1.48 | 0.58 | 0.53 | 1.40 | 7.600000 | 0.58 | 1.55 | 640 |
| 155 | 3 | 13.17 | 5.19 | 2.32 | 22.0 | 93 | 1.74 | 0.63 | 0.61 | 1.55 | 7.900000 | 0.60 | 1.48 | 725 |
| 156 | 3 | 13.84 | 4.12 | 2.38 | 19.5 | 89 | 1.80 | 0.83 | 0.48 | 1.56 | 9.010000 | 0.57 | 1.64 | 480 |
| 157 | 3 | 12.45 | 3.03 | 2.64 | 27.0 | 97 | 1.90 | 0.58 | 0.63 | 1.14 | 7.500000 | 0.67 | 1.73 | 880 |
| 158 | 3 | 14.34 | 1.68 | 2.70 | 25.0 | 98 | 2.80 | 1.31 | 0.53 | 2.70 | 13.000000 | 0.57 | 1.96 | 660 |
| 159 | 3 | 13.48 | 1.67 | 2.64 | 22.5 | 89 | 2.60 | 1.10 | 0.52 | 2.29 | 11.750000 | 0.57 | 1.78 | 620 |
| 160 | 3 | 12.36 | 3.83 | 2.38 | 21.0 | 88 | 2.30 | 0.92 | 0.50 | 1.04 | 7.650000 | 0.56 | 1.58 | 520 |
| 161 | 3 | 13.69 | 3.26 | 2.54 | 20.0 | 107 | 1.83 | 0.56 | 0.50 | 0.80 | 5.880000 | 0.96 | 1.82 | 680 |
| 162 | 3 | 12.85 | 3.27 | 2.58 | 22.0 | 106 | 1.65 | 0.60 | 0.60 | 0.96 | 5.580000 | 0.87 | 2.11 | 570 |
| 163 | 3 | 12.96 | 3.45 | 2.35 | 18.5 | 106 | 1.39 | 0.70 | 0.40 | 0.94 | 5.280000 | 0.68 | 1.75 | 675 |
| 164 | 3 | 13.78 | 2.76 | 2.30 | 22.0 | 90 | 1.35 | 0.68 | 0.41 | 1.03 | 9.580000 | 0.70 | 1.68 | 615 |
| 165 | 3 | 13.73 | 4.36 | 2.26 | 22.5 | 88 | 1.28 | 0.47 | 0.52 | 1.15 | 6.620000 | 0.78 | 1.75 | 520 |
| 166 | 3 | 13.45 | 3.70 | 2.60 | 23.0 | 111 | 1.70 | 0.92 | 0.43 | 1.46 | 10.680000 | 0.85 | 1.56 | 695 |
| 167 | 3 | 12.82 | 3.37 | 2.30 | 19.5 | 88 | 1.48 | 0.66 | 0.40 | 0.97 | 10.260000 | 0.72 | 1.75 | 685 |
| 168 | 3 | 13.58 | 2.58 | 2.69 | 24.5 | 105 | 1.55 | 0.84 | 0.39 | 1.54 | 8.660000 | 0.74 | 1.80 | 750 |
| 169 | 3 | 13.40 | 4.60 | 2.86 | 25.0 | 112 | 1.98 | 0.96 | 0.27 | 1.11 | 8.500000 | 0.67 | 1.92 | 630 |
| 170 | 3 | 12.20 | 3.03 | 2.32 | 19.0 | 96 | 1.25 | 0.49 | 0.40 | 0.73 | 5.500000 | 0.66 | 1.83 | 510 |
| 171 | 3 | 12.77 | 2.39 | 2.28 | 19.5 | 86 | 1.39 | 0.51 | 0.48 | 0.64 | 9.899999 | 0.57 | 1.63 | 470 |
| 172 | 3 | 14.16 | 2.51 | 2.48 | 20.0 | 91 | 1.68 | 0.70 | 0.44 | 1.24 | 9.700000 | 0.62 | 1.71 | 660 |
| 173 | 3 | 13.71 | 5.65 | 2.45 | 20.5 | 95 | 1.68 | 0.61 | 0.52 | 1.06 | 7.700000 | 0.64 | 1.74 | 740 |
| 174 | 3 | 13.40 | 3.91 | 2.48 | 23.0 | 102 | 1.80 | 0.75 | 0.43 | 1.41 | 7.300000 | 0.70 | 1.56 | 750 |
| 175 | 3 | 13.27 | 4.28 | 2.26 | 20.0 | 120 | 1.59 | 0.69 | 0.43 | 1.35 | 10.200000 | 0.59 | 1.56 | 835 |
| 176 | 3 | 13.17 | 2.59 | 2.37 | 20.0 | 120 | 1.65 | 0.68 | 0.53 | 1.46 | 9.300000 | 0.60 | 1.62 | 840 |
| 177 | 3 | 14.13 | 4.10 | 2.74 | 24.5 | 96 | 2.05 | 0.76 | 0.56 | 1.35 | 9.200000 | 0.61 | 1.60 | 560 |
178 rows × 14 columns
List=[]
for i in range(13):
for j in range(i+1,13):
List.append([i+1,j+1])
pair=[]
i=0
while i < 12:
tmp = rnd.choice(List)
if tmp not in pair:
pair.append(tmp)
i=i+1
else:
continue
print(pair)
[[4, 7], [7, 13], [5, 11], [2, 8], [1, 10], [10, 13], [3, 8], [6, 7], [7, 11], [4, 10], [4, 11], [8, 11]]
plt.figure(figsize=(18.0, 10.0))
for i in range(12):
plt.subplot(3, 4, i+1)
plt.scatter(wine_df[attrs[pair[i][0]]], wine_df[attrs[pair[i][1]]], s=25, c=wine_df[attrs[0]])
plt.xlabel(attrs[pair[i][0]])
plt.ylabel(attrs[pair[i][1]])
plt.grid(1)
plt.tight_layout()
plt.show()
cols = [wine_df[attr] for attr in attrs[:]]
print(cols)
[0 1
1 1
2 1
3 1
4 1
5 1
6 1
7 1
8 1
9 1
10 1
11 1
12 1
13 1
14 1
15 1
16 1
17 1
18 1
19 1
20 1
21 1
22 1
23 1
24 1
25 1
26 1
27 1
28 1
29 1
..
148 3
149 3
150 3
151 3
152 3
153 3
154 3
155 3
156 3
157 3
158 3
159 3
160 3
161 3
162 3
163 3
164 3
165 3
166 3
167 3
168 3
169 3
170 3
171 3
172 3
173 3
174 3
175 3
176 3
177 3
Name: class_id, dtype: int64, 0 14.23
1 13.20
2 13.16
3 14.37
4 13.24
5 14.20
6 14.39
7 14.06
8 14.83
9 13.86
10 14.10
11 14.12
12 13.75
13 14.75
14 14.38
15 13.63
16 14.30
17 13.83
18 14.19
19 13.64
20 14.06
21 12.93
22 13.71
23 12.85
24 13.50
25 13.05
26 13.39
27 13.30
28 13.87
29 14.02
...
148 13.32
149 13.08
150 13.50
151 12.79
152 13.11
153 13.23
154 12.58
155 13.17
156 13.84
157 12.45
158 14.34
159 13.48
160 12.36
161 13.69
162 12.85
163 12.96
164 13.78
165 13.73
166 13.45
167 12.82
168 13.58
169 13.40
170 12.20
171 12.77
172 14.16
173 13.71
174 13.40
175 13.27
176 13.17
177 14.13
Name: Alcohol, dtype: float64, 0 1.71
1 1.78
2 2.36
3 1.95
4 2.59
5 1.76
6 1.87
7 2.15
8 1.64
9 1.35
10 2.16
11 1.48
12 1.73
13 1.73
14 1.87
15 1.81
16 1.92
17 1.57
18 1.59
19 3.10
20 1.63
21 3.80
22 1.86
23 1.60
24 1.81
25 2.05
26 1.77
27 1.72
28 1.90
29 1.68
...
148 3.24
149 3.90
150 3.12
151 2.67
152 1.90
153 3.30
154 1.29
155 5.19
156 4.12
157 3.03
158 1.68
159 1.67
160 3.83
161 3.26
162 3.27
163 3.45
164 2.76
165 4.36
166 3.70
167 3.37
168 2.58
169 4.60
170 3.03
171 2.39
172 2.51
173 5.65
174 3.91
175 4.28
176 2.59
177 4.10
Name: Malic acid, dtype: float64, 0 2.43
1 2.14
2 2.67
3 2.50
4 2.87
5 2.45
6 2.45
7 2.61
8 2.17
9 2.27
10 2.30
11 2.32
12 2.41
13 2.39
14 2.38
15 2.70
16 2.72
17 2.62
18 2.48
19 2.56
20 2.28
21 2.65
22 2.36
23 2.52
24 2.61
25 3.22
26 2.62
27 2.14
28 2.80
29 2.21
...
148 2.38
149 2.36
150 2.62
151 2.48
152 2.75
153 2.28
154 2.10
155 2.32
156 2.38
157 2.64
158 2.70
159 2.64
160 2.38
161 2.54
162 2.58
163 2.35
164 2.30
165 2.26
166 2.60
167 2.30
168 2.69
169 2.86
170 2.32
171 2.28
172 2.48
173 2.45
174 2.48
175 2.26
176 2.37
177 2.74
Name: Ash, dtype: float64, 0 15.6
1 11.2
2 18.6
3 16.8
4 21.0
5 15.2
6 14.6
7 17.6
8 14.0
9 16.0
10 18.0
11 16.8
12 16.0
13 11.4
14 12.0
15 17.2
16 20.0
17 20.0
18 16.5
19 15.2
20 16.0
21 18.6
22 16.6
23 17.8
24 20.0
25 25.0
26 16.1
27 17.0
28 19.4
29 16.0
...
148 21.5
149 21.5
150 24.0
151 22.0
152 25.5
153 18.5
154 20.0
155 22.0
156 19.5
157 27.0
158 25.0
159 22.5
160 21.0
161 20.0
162 22.0
163 18.5
164 22.0
165 22.5
166 23.0
167 19.5
168 24.5
169 25.0
170 19.0
171 19.5
172 20.0
173 20.5
174 23.0
175 20.0
176 20.0
177 24.5
Name: Alcalinity of ash, dtype: float64, 0 127
1 100
2 101
3 113
4 118
5 112
6 96
7 121
8 97
9 98
10 105
11 95
12 89
13 91
14 102
15 112
16 120
17 115
18 108
19 116
20 126
21 102
22 101
23 95
24 96
25 124
26 93
27 94
28 107
29 96
...
148 92
149 113
150 123
151 112
152 116
153 98
154 103
155 93
156 89
157 97
158 98
159 89
160 88
161 107
162 106
163 106
164 90
165 88
166 111
167 88
168 105
169 112
170 96
171 86
172 91
173 95
174 102
175 120
176 120
177 96
Name: Magnesium, dtype: int64, 0 2.80
1 2.65
2 2.80
3 3.85
4 2.80
5 3.27
6 2.50
7 2.60
8 2.80
9 2.98
10 2.95
11 2.20
12 2.60
13 3.10
14 3.30
15 2.85
16 2.80
17 2.95
18 3.30
19 2.70
20 3.00
21 2.41
22 2.61
23 2.48
24 2.53
25 2.63
26 2.85
27 2.40
28 2.95
29 2.65
...
148 1.93
149 1.41
150 1.40
151 1.48
152 2.20
153 1.80
154 1.48
155 1.74
156 1.80
157 1.90
158 2.80
159 2.60
160 2.30
161 1.83
162 1.65
163 1.39
164 1.35
165 1.28
166 1.70
167 1.48
168 1.55
169 1.98
170 1.25
171 1.39
172 1.68
173 1.68
174 1.80
175 1.59
176 1.65
177 2.05
Name: Total phenols, dtype: float64, 0 3.06
1 2.76
2 3.24
3 3.49
4 2.69
5 3.39
6 2.52
7 2.51
8 2.98
9 3.15
10 3.32
11 2.43
12 2.76
13 3.69
14 3.64
15 2.91
16 3.14
17 3.40
18 3.93
19 3.03
20 3.17
21 2.41
22 2.88
23 2.37
24 2.61
25 2.68
26 2.94
27 2.19
28 2.97
29 2.33
...
148 0.76
149 1.39
150 1.57
151 1.36
152 1.28
153 0.83
154 0.58
155 0.63
156 0.83
157 0.58
158 1.31
159 1.10
160 0.92
161 0.56
162 0.60
163 0.70
164 0.68
165 0.47
166 0.92
167 0.66
168 0.84
169 0.96
170 0.49
171 0.51
172 0.70
173 0.61
174 0.75
175 0.69
176 0.68
177 0.76
Name: Flavanoids, dtype: float64, 0 0.28
1 0.26
2 0.30
3 0.24
4 0.39
5 0.34
6 0.30
7 0.31
8 0.29
9 0.22
10 0.22
11 0.26
12 0.29
13 0.43
14 0.29
15 0.30
16 0.33
17 0.40
18 0.32
19 0.17
20 0.24
21 0.25
22 0.27
23 0.26
24 0.28
25 0.47
26 0.34
27 0.27
28 0.37
29 0.26
...
148 0.45
149 0.34
150 0.22
151 0.24
152 0.26
153 0.61
154 0.53
155 0.61
156 0.48
157 0.63
158 0.53
159 0.52
160 0.50
161 0.50
162 0.60
163 0.40
164 0.41
165 0.52
166 0.43
167 0.40
168 0.39
169 0.27
170 0.40
171 0.48
172 0.44
173 0.52
174 0.43
175 0.43
176 0.53
177 0.56
Name: Nonflavanoid phenols, dtype: float64, 0 2.29
1 1.28
2 2.81
3 2.18
4 1.82
5 1.97
6 1.98
7 1.25
8 1.98
9 1.85
10 2.38
11 1.57
12 1.81
13 2.81
14 2.96
15 1.46
16 1.97
17 1.72
18 1.86
19 1.66
20 2.10
21 1.98
22 1.69
23 1.46
24 1.66
25 1.92
26 1.45
27 1.35
28 1.76
29 1.98
...
148 1.25
149 1.14
150 1.25
151 1.26
152 1.56
153 1.87
154 1.40
155 1.55
156 1.56
157 1.14
158 2.70
159 2.29
160 1.04
161 0.80
162 0.96
163 0.94
164 1.03
165 1.15
166 1.46
167 0.97
168 1.54
169 1.11
170 0.73
171 0.64
172 1.24
173 1.06
174 1.41
175 1.35
176 1.46
177 1.35
Name: Proanthocyanins, dtype: float64, 0 5.640000
1 4.380000
2 5.680000
3 7.800000
4 4.320000
5 6.750000
6 5.250000
7 5.050000
8 5.200000
9 7.220000
10 5.750000
11 5.000000
12 5.600000
13 5.400000
14 7.500000
15 7.300000
16 6.200000
17 6.600000
18 8.700000
19 5.100000
20 5.650000
21 4.500000
22 3.800000
23 3.930000
24 3.520000
25 3.580000
26 4.800000
27 3.950000
28 4.500000
29 4.700000
...
148 8.420000
149 9.400000
150 8.600000
151 10.800000
152 7.100000
153 10.520000
154 7.600000
155 7.900000
156 9.010000
157 7.500000
158 13.000000
159 11.750000
160 7.650000
161 5.880000
162 5.580000
163 5.280000
164 9.580000
165 6.620000
166 10.680000
167 10.260000
168 8.660000
169 8.500000
170 5.500000
171 9.899999
172 9.700000
173 7.700000
174 7.300000
175 10.200000
176 9.300000
177 9.200000
Name: Color intensity, dtype: float64, 0 1.04
1 1.05
2 1.03
3 0.86
4 1.04
5 1.05
6 1.02
7 1.06
8 1.08
9 1.01
10 1.25
11 1.17
12 1.15
13 1.25
14 1.20
15 1.28
16 1.07
17 1.13
18 1.23
19 0.96
20 1.09
21 1.03
22 1.11
23 1.09
24 1.12
25 1.13
26 0.92
27 1.02
28 1.25
29 1.04
...
148 0.55
149 0.57
150 0.59
151 0.48
152 0.61
153 0.56
154 0.58
155 0.60
156 0.57
157 0.67
158 0.57
159 0.57
160 0.56
161 0.96
162 0.87
163 0.68
164 0.70
165 0.78
166 0.85
167 0.72
168 0.74
169 0.67
170 0.66
171 0.57
172 0.62
173 0.64
174 0.70
175 0.59
176 0.60
177 0.61
Name: Hue, dtype: float64, 0 3.92
1 3.40
2 3.17
3 3.45
4 2.93
5 2.85
6 3.58
7 3.58
8 2.85
9 3.55
10 3.17
11 2.82
12 2.90
13 2.73
14 3.00
15 2.88
16 2.65
17 2.57
18 2.82
19 3.36
20 3.71
21 3.52
22 4.00
23 3.63
24 3.82
25 3.20
26 3.22
27 2.77
28 3.40
29 3.59
...
148 1.62
149 1.33
150 1.30
151 1.47
152 1.33
153 1.51
154 1.55
155 1.48
156 1.64
157 1.73
158 1.96
159 1.78
160 1.58
161 1.82
162 2.11
163 1.75
164 1.68
165 1.75
166 1.56
167 1.75
168 1.80
169 1.92
170 1.83
171 1.63
172 1.71
173 1.74
174 1.56
175 1.56
176 1.62
177 1.60
Name: OD280/OD315 of diluted wines, dtype: float64, 0 1065
1 1050
2 1185
3 1480
4 735
5 1450
6 1290
7 1295
8 1045
9 1045
10 1510
11 1280
12 1320
13 1150
14 1547
15 1310
16 1280
17 1130
18 1680
19 845
20 780
21 770
22 1035
23 1015
24 845
25 830
26 1195
27 1285
28 915
29 1035
...
148 650
149 550
150 500
151 480
152 425
153 675
154 640
155 725
156 480
157 880
158 660
159 620
160 520
161 680
162 570
163 675
164 615
165 520
166 695
167 685
168 750
169 630
170 510
171 470
172 660
173 740
174 750
175 835
176 840
177 560
Name: Proline, dtype: int64]
x=cols[1].values
for i in range(2,14):
x=np.c_[x,cols[i].values]
y = cols[0].values
print(x)
print(y)
[[ 1.42300000e+01 1.71000000e+00 2.43000000e+00 ..., 1.04000000e+00
3.92000000e+00 1.06500000e+03]
[ 1.32000000e+01 1.78000000e+00 2.14000000e+00 ..., 1.05000000e+00
3.40000000e+00 1.05000000e+03]
[ 1.31600000e+01 2.36000000e+00 2.67000000e+00 ..., 1.03000000e+00
3.17000000e+00 1.18500000e+03]
...,
[ 1.32700000e+01 4.28000000e+00 2.26000000e+00 ..., 5.90000000e-01
1.56000000e+00 8.35000000e+02]
[ 1.31700000e+01 2.59000000e+00 2.37000000e+00 ..., 6.00000000e-01
1.62000000e+00 8.40000000e+02]
[ 1.41300000e+01 4.10000000e+00 2.74000000e+00 ..., 6.10000000e-01
1.60000000e+00 5.60000000e+02]]
[1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2
2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2
2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3
3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3]
import sklearn.decomposition as decomposition
pca = decomposition.PCA()
pca.fit(x)
PCA(copy=True, iterated_power='auto', n_components=None, random_state=None, svd_solver='auto', tol=0.0, whiten=False)
pca.explained_variance_ratio_
array([ 9.98091230e-01, 1.73591562e-03, 9.49589576e-05,
5.02173562e-05, 1.23636847e-05, 8.46213034e-06,
2.80681456e-06, 1.52308053e-06, 1.12783044e-06,
7.21415811e-07, 3.78060267e-07, 2.12013755e-07,
8.25392788e-08])
plt.figure(figsize=(10,8))
plt.bar(range(1,14),pca.explained_variance_ratio_)
plt.grid(1)
plt.minorticks_on()
plt.show()
pca.n_components = 2
U = pca.fit_transform(x)
print(U.shape)
print(pca.noise_variance_)
(178, 2) 1.55306269038
plt.figure(figsize=(5.0,10.0))
plt.scatter(U[:, 0], U[:,1], s=25, c=y)
plt.xlabel("Компонент 1")
plt.ylabel("Компонент 2")
plt.grid(1)
plt.show()