Kernel SVM¶
Importing the libraries¶
In [1]:
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
Importing the dataset¶
In [2]:
df = pd.read_csv('Social_Network_Ads.csv')
df.head(10)
Out[2]:
| Age | EstimatedSalary | Purchased | |
|---|---|---|---|
| 0 | 19 | 19000 | 0 |
| 1 | 35 | 20000 | 0 |
| 2 | 26 | 43000 | 0 |
| 3 | 27 | 57000 | 0 |
| 4 | 19 | 76000 | 0 |
| 5 | 27 | 58000 | 0 |
| 6 | 27 | 84000 | 0 |
| 7 | 32 | 150000 | 1 |
| 8 | 25 | 33000 | 0 |
| 9 | 35 | 65000 | 0 |
In [3]:
X = df.iloc[:, :-1].values
y = df.iloc[:, -1].values
Splitting the dataset into the Training set and Test set¶
In [4]:
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.25, random_state = 0)
In [5]:
print(X_train[0:3])
[[ 44 39000] [ 32 120000] [ 38 50000]]
In [6]:
print(y_train[0:3])
[0 1 0]
Feature Scaling¶
In [7]:
from sklearn.preprocessing import StandardScaler
sc = StandardScaler()
X_train = sc.fit_transform(X_train)
X_test = sc.transform(X_test)
In [8]:
print(X_train[0:3])
[[ 0.58164944 -0.88670699] [-0.60673761 1.46173768] [-0.01254409 -0.5677824 ]]
In [9]:
print(X_test[0:3])
[[-0.80480212 0.50496393] [-0.01254409 -0.5677824 ] [-0.30964085 0.1570462 ]]
Training the Kernel SVM model on the Training set¶
In [10]:
from sklearn.svm import SVC
classifier = SVC(kernel = 'rbf', random_state = 0)
classifier.fit(X_train, y_train)
Out[10]:
SVC(random_state=0)In a Jupyter environment, please rerun this cell to show the HTML representation or trust the notebook.
On GitHub, the HTML representation is unable to render, please try loading this page with nbviewer.org.
Parameters
| C | 1.0 | |
| kernel | 'rbf' | |
| degree | 3 | |
| gamma | 'scale' | |
| coef0 | 0.0 | |
| shrinking | True | |
| probability | False | |
| tol | 0.001 | |
| cache_size | 200 | |
| class_weight | None | |
| verbose | False | |
| max_iter | -1 | |
| decision_function_shape | 'ovr' | |
| break_ties | False | |
| random_state | 0 |
Predicting a new result¶
In [11]:
print(classifier.predict(sc.transform([[30,87000]])))
[0]
Predicting the Test set results¶
In [12]:
y_pred = classifier.predict(X_test)
#print(np.concatenate((y_pred.reshape(len(y_pred),1), y_test.reshape(len(y_test),1)),1))
Making the Confusion Matrix¶
In [13]:
from sklearn.metrics import confusion_matrix, accuracy_score
cm = confusion_matrix(y_test, y_pred)
print(cm)
[[64 4] [ 3 29]]
In [14]:
accuracy_score(y_test, y_pred)
Out[14]:
0.93
Visualising the Training set results¶
In [15]:
colors = ['#FA8072', '#1E90FF']
In [16]:
from matplotlib.colors import ListedColormap
X_set, y_set = sc.inverse_transform(X_train), y_train
X1, X2 = np.meshgrid(np.arange(start = X_set[:, 0].min() - 10, stop = X_set[:, 0].max() + 10, step = 0.25),
np.arange(start = X_set[:, 1].min() - 1000, stop = X_set[:, 1].max() + 1000, step = 0.25))
plt.contourf(X1, X2, classifier.predict(sc.transform(np.array([X1.ravel(), X2.ravel()]).T)).reshape(X1.shape),
alpha = 0.75, cmap = ListedColormap(colors))
plt.xlim(X1.min(), X1.max())
plt.ylim(X2.min(), X2.max())
for i, j in enumerate(np.unique(y_set)):
plt.scatter(X_set[y_set == j, 0], X_set[y_set == j, 1], color = ListedColormap(colors)(i), label = j)
plt.title('Kernel SVM (Training set)')
plt.xlabel('Age')
plt.ylabel('Estimated Salary')
plt.legend()
plt.show()
Visualising the Test set results¶
In [17]:
from matplotlib.colors import ListedColormap
X_set, y_set = sc.inverse_transform(X_test), y_test
# Create a grid of points
X1, X2 = np.meshgrid(
np.arange(start=X_set[:, 0].min() - 1, stop=X_set[:, 0].max() + 1, step=0.25),
np.arange(start=X_set[:, 1].min() - 1, stop=X_set[:, 1].max() + 1, step=0.25)
)
# Predict for each point on the grid
Z = classifier.predict(sc.transform(np.array([X1.ravel(), X2.ravel()]).T)).reshape(X1.shape)
# Plot the decision boundary
plt.contourf(X1, X2, Z, alpha=0.75, cmap = ListedColormap(['#FA8072', '#1E90FF']) )
plt.xlim(X1.min(), X1.max())
plt.ylim(X2.min(), X2.max())
# Plot the test set points
for i, j in enumerate(np.unique(y_set)):
plt.scatter(
X_set[y_set == j, 0], X_set[y_set == j, 1],
color=colors[i], label=j
)
# Add titles and labels
plt.title('Kernel SVM (Test set)')
plt.xlabel('Age')
plt.ylabel('Estimated Salary')
plt.legend()
plt.show()