Задача
Используем набор данных Wine
. Используем первые две главные компоненты в качестве признаков.
SVC
для каждого типа ядер (linear
,rbf
,poly
).import numpy as np
import pandas as pd
from sklearn import svm, metrics
import matplotlib.pyplot as plt
%matplotlib inline
attrs = """class_id
Alcohol
Malic acid
Ash
Alcalinity of ash
Magnesium
Total phenols
Flavanoids
Nonflavanoid phenols
Proanthocyanins
Color intensity
Hue
OD280/OD315 of diluted wines
Proline""".split('\n')
print(attrs)
wine_df=pd.read_csv("wine.data", sep=",", names=attrs)
wine_df
cols = tuple([wine_df[attr].values for attr in attrs[1:]])
x=np.c_[cols]
y = wine_df['class_id']
print(x, y)
import sklearn.decomposition as decomposition
pca = decomposition.PCA()
pca.fit(x)
pca.explained_variance_ratio_
plt.figure(figsize=(15,10))
plt.bar(range(1,14),pca.explained_variance_ratio_)
plt.grid(1)
plt.minorticks_on()
plt.show()
pca.n_components = 2
X12 = pca.fit_transform(x)
print(pca.noise_variance_)
plt.figure(figsize=(8.0,10.0))
plt.scatter(X12[:, 0], X12[:,1], s=25, c=y)
plt.xlabel("Компонент 1")
plt.ylabel("Компонент 2")
plt.grid(1)
plt.show()
def plot_map2d(clf, XX):
x_min, x_max = XX[:,0].min(), XX[:,0].max()
y_min, y_max = XX[:,1].min(), XX[:,1].max()
x_range = np.linspace(x_min, x_max, 200)
y_range = np.linspace(y_min, y_max, 200)
xx, yy = np.meshgrid(x_range,y_range)
Z = clf.predict(np.c_[xx.ravel(), yy.ravel()])
Z = Z.reshape(xx.shape)
plt.imshow(Z, extent=(x_min, x_max,y_min, y_max), aspect="auto", interpolation="bilinear", origin="lower")
def myPlot(kname, X, **kw):
plt.figure(figsize=(8.0,10.0))
clf = svm.SVC(kernel=kname, **kw)
clf.fit(X,y)
print('Точность', kname, metrics.accuracy_score(y, clf.predict(X12)))
plt.title(kname)
plot_map2d(clf, X)
plt.scatter(clf.support_vectors_[:,0], clf.support_vectors_[:,1], s=144, c='w')
plt.scatter(X[:,0], X[:,1], s=25, c=y)
plt.xlabel('komp1')
plt.ylabel('komp2')
plt.show()
myPlot('linear',X12)
myPlot('rbf',X12, gamma=0.001)
myPlot('poly',X12)