import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from sklearn import svm
from io import StringIO


data_string = """
x1,   x2,   y
 0,    1,   1
 0,    2,   1
 2,    1,   1
 2,    2,   1
 1,    1,   1
 1,    2,   1
-1,   -1,  -1
-1,   -2,  -1
 0,   -1,  -1
 0,   -2,  -1
 1,   -1,  -1
 1,   -2,  -1
 1.5, -1.1, 1
"""
df = pd.read_csv(StringIO(data_string), sep='\s*,\s+', engine='python')
X = df[['x1', 'x2']]
y = df.y
print(f'X={X}, y={y}')

X=     x1   x2
0   0.0  1.0
1   0.0  2.0
2   2.0  1.0
3   2.0  2.0
4   1.0  1.0
5   1.0  2.0
6  -1.0 -1.0
7  -1.0 -2.0
8   0.0 -1.0
9   0.0 -2.0
10  1.0 -1.0
11  1.0 -2.0
12  1.5 -1.1, y=0     1
1     1
2     1
3     1
4     1
5     1
6    -1
7    -1
8    -1
9    -1
10   -1
11   -1
12    1
Name: y, dtype: int64


clf = svm.SVC(kernel="linear", C=1000)
clf.fit(X, y)
w = clf.coef_[0]
b = clf.intercept_[0]
print(f'The decision boundary is {w[0]:.3} * weight + {w[1]:.3} * mileage + {b:.3} = 0.')
print(f'The training accuracy is {clf.score(X, y):.3}.')
print(f'clf.score={clf.score(X, y)}')

plt.plot(X.x1[y == -1], X.x2[y == -1], '.r', label='-1')
plt.plot(X.x1[y ==  1], X.x2[y ==  1], '.b', label='+1')
low = -3
high=3
plt.xlim(low, high)
plt.ylim(low, high)
xplot = np.linspace(start=low, stop=high)
yplot = -(clf.coef_[0][0] * xplot + clf.intercept_) / clf.coef_[0][1]
plt.plot(xplot, yplot, label=r'decision boundary $\mathbf{wx} + b = 0$')
plt.plot(xplot, yplot + 1 / clf.coef_[0][1], ':', label=r'+1 support $\mathbf{wx} + b =  1$')
plt.plot(xplot, yplot - 1 / clf.coef_[0][1], ':', label=r'+1 support $\mathbf{wx} + b = -1$')
# plt.legend()
plt.show(block=False)

The decision boundary is 4.66 * weight + 3.33 * mileage + -2.33 = 0.
The training accuracy is 1.0.
clf.score=1.0


clf = svm.SVC(kernel="linear", C=1)
clf.fit(X, y)
w = clf.coef_[0]
b = clf.intercept_[0]
print(f'The decision boundary is {w[0]:.3} * weight + {w[1]:.3} * mileage + {b:.3} = 0.')
print(f'The training accuracy is {clf.score(X, y):.3}.')
print(f'clf.score={clf.score(X, y)}')

plt.plot(X.x1[y == -1], X.x2[y == -1], '.r', label='-1')
plt.plot(X.x1[y ==  1], X.x2[y ==  1], '.b', label='+1')
low = -3
high=3
plt.xlim(low, high)
plt.ylim(low, high)
xplot = np.linspace(start=low, stop=high)
yplot = -(clf.coef_[0][0] * xplot + clf.intercept_) / clf.coef_[0][1]
plt.plot(xplot, yplot, label=r'decision boundary $\mathbf{wx} + b = 0$')
plt.plot(xplot, yplot + 1 / clf.coef_[0][1], ':', label=r'+1 support $\mathbf{wx} + b =  1$')
plt.plot(xplot, yplot - 1 / clf.coef_[0][1], ':', label=r'+1 support $\mathbf{wx} + b = -1$')
# plt.legend()
plt.show(block=False)

The decision boundary is 0.5 * weight + 1.0 * mileage + -0.0 = 0.
The training accuracy is 0.923.
clf.score=0.9230769230769231


df = pd.read_csv('http://www.stat.wisc.edu/~jgillett/451/data/mtcars.csv', index_col=0)
X = df[['wt', 'mpg']]
y = df.am
clf = svm.SVC(kernel="linear", C=1000) # also try C=1; notice margin and accuracy
clf.fit(X, y)
w = clf.coef_[0]
b = clf.intercept_[0]
print(f'The decision boundary is {w[0]:.3} * weight + {w[1]:.3} * mileage + {b:.3} = 0.')
print(f'The training accuracy is {clf.score(X, y):.3}.')

plt.plot(X.wt[y == 0], X.mpg[y == 0], '.r', label='automatic')
plt.plot(X.wt[y == 1], X.mpg[y == 1], '+b', label='manual') # '+' = '+ marker'
plt.xlim(0, 6)
plt.ylim(0, 35)
plt.xlabel('weight (1000s of pounds)')
plt.ylabel('gas mileage (miles per gallon)')
plt.title('SVM to guess transmission from car weight and mileage')
xplot = np.linspace(start=0, stop=6)
yplot = -(clf.coef_[0][0] * xplot + clf.intercept_) / clf.coef_[0][1]
plt.plot(xplot, yplot, label=r'decision boundary $\mathbf{wx} + b = 0$')
plt.plot(xplot, yplot + 1 / clf.coef_[0][1], ':', label=r'+1 support $\mathbf{wx} + b =  1$')
plt.plot(xplot, yplot - 1 / clf.coef_[0][1], ':', label=r'+1 support $\mathbf{wx} + b = -1$')
plt.legend()
plt.show(block=False)

The decision boundary is -4.77 * weight + -0.24 * mileage + 19.5 = 0.
The training accuracy is 0.938.


n = 10 # we will plot 4*n points, 2*n red and 2*n blue
radius = (2, 5)
X = np.zeros(shape=(4 * n, 2))
sigma = 0.5
for i in (0, 1):
    rng = np.random.default_rng(seed=0)
    x = np.linspace(start=-radius[i], stop=radius[i], num=n)
    x_low  = x + rng.normal(loc=0, scale=sigma, size=n)
    x_high = x + rng.normal(loc=0, scale=sigma, size=n)
    y_low   = -np.sqrt(radius[i]**2 - x**2) + rng.normal(loc=0, scale=sigma, size=n)
    y_high  =  np.sqrt(radius[i]**2 - x**2) + rng.normal(loc=0, scale=sigma, size=n)
    X[(i * 2*n):((i + 1) * 2*n), 0] = np.concatenate((x_low, x_high))
    X[(i * 2*n):((i + 1) * 2*n), 1] = np.concatenate((y_low, y_high))

y = np.concatenate((np.full(shape=2*n, fill_value=0), np.full(shape=2*n, fill_value=1)))

# save data to file for future use
df = pd.DataFrame({'x0': X[:, 0], 'x1': X[:, 1], 'y': y})
df.to_csv(path_or_buf='circles.csv', index=False, float_format='%.3f')


plt.plot(X[y == 0, 0], X[y == 0, 1], '.r', label='0')
plt.plot(X[y == 1, 0], X[y == 1, 1], '+b', label='1')
r = 6
plt.xlim(-r, r)
plt.ylim(-r, r)
plt.title('SVM data that are not linearly separable call for kernel trick.')
plt.legend()
plt.show(block=False)


fig = plt.figure(figsize=(8, 8)) # (width, height) in inches
ax  = fig.add_subplot(111, projection='3d') # 111 => nrows=1, ncols=1, index=1
# plot 2D data in z=0 plane
ax.plot3D(X[y==0,0], X[y==0,1], 0, 'or', markersize=3, label='original 2D 0') # 'or' = circle, red
ax.plot3D(X[y==1,0], X[y==1,1], 0, '+b', markersize=3, label='original 2D 1') # '+r' = plus, blue

def phi(x, y): # this function maps the 2D point (x, y) to the 3D point given in its return line
    return (x**2, np.sqrt(2)*x*y, y**2)

# plot 3D transformed data:
# transform vectors of x- and y-plotting coordinates into 3D, for the (classification) y==0 case:
xplot, yplot, zplot = phi(X[y==0,0], X[y==0,1])
ax.plot3D(xplot, yplot, zplot, 'or', label='transformed 3D 0')
# transform for the y==1 case:
xplot, yplot, zplot = phi(X[y==1,0], X[y==1,1])
ax.plot3D(xplot, yplot, zplot, '+b', label='transformed 3D 1')

ax.view_init(elev=10, azim=-70)
plt.legend(loc='center left')
#plt.title(f'Transform 2D (p, q) to 3D $(p^2, \\sqrt{{2}}pq, q^2)$') # default title is too high
ax.set_title(f'Transform 2D (p, q) to 3D $(p^2, \\sqrt{{2}}pq, q^2)$', y=0.87) # y=1.0 is top of plot
plt.xlabel('p')
plt.ylabel('q')
#plt.show(block=False)

plt.savefig(fname='circlesSVM_3D.png')


clf_linear = svm.SVC(kernel='linear', C=1)
clf_linear.fit(X, y)
print(f'clf_linear.score(X, y)={clf_linear.score(X, y)}')

clf_linear.score(X, y)=0.5


clf_RBF = svm.SVC(kernel='rbf', C=1, gamma='scale')
clf_RBF.fit(X, y)
print(f'clf_RBF.score(X, y)={clf_RBF.score(X, y)}')

clf_RBF.score(X, y)=1.0

Soft margin SVM¶

Make fake data to see the effect of C on decision boundary and margin width.¶

Train classifier with C=1000 (to approach hard margin SVM),¶

Repeat the last block of code (oops), this time with with C=1,¶

More practice with soft margin SVM:¶

Nonlinear boundary: use kernel trick¶

Make fake data consisting of (noisy) concentric circles.¶

Plot data.¶

Make 3D plot of transformed data to understand how kernel trick can help.¶

Notice that a linear SVM gives low accuracy.¶

The kernel trick's implicit transformation into higher dimensions works well.¶