import pandas as pd
import matplotlib.pyplot as plt
from sklearn import linear_model
X = [[1], [2], [3]]
y = [1, 3, 2]
model = linear_model.LinearRegression()
model.fit(X, y)
b = model.intercept_
w = model.coef_[0]
print(f'intercept={b}, slope={w}')
intercept=1.0000000000000004, slope=0.4999999999999998
df = pd.read_csv('http://www.stat.wisc.edu/~jgillett/451/data/mtcars.csv', index_col=0)
# df
X = df[['wt']]
y = df['mpg']
model = linear_model.LinearRegression()
model.fit(X, y)
b = model.intercept_
w = model.coef_[0]
print(f'intercept={b}, slope={w}')
y_hat = model.predict(X) # equivalent to y_hat = w * X[:, 0] + b
x = df.wt # we need a 1D array for plotting (and a 2D array for .fit() above)
plt.plot(x, y, '.', color='black', label='data')
plt.title('mtcars')
plt.xlabel('weight')
plt.ylabel('mpg')
plt.xlim(0, 6)
plt.ylim(0, 40)
# next line: \ and { and } are doubled ('escaped') to get to Latex engine
plt.plot(x, y_hat, color='black',
label=f'$\\hat{{y}}=${round(b, 2)} + ({round(w, 2)})$x$')
# add vertical lines from (x, y) to (x, y_hat) to show errors:
plt.plot([x, x], [y, y_hat], # [x1, x2], [y1, y2]
color='black', linewidth=.5, label=None) # label=None prevents duplicate legend entries
plt.plot([], [], color='black', linewidth=.5, label='errors') # add one legend entry
plt.plot(x, y_hat, '.', color='red', label='fitted values')
plt.legend()
plt.show(block=False)
print(f'R^2 is {model.score(X, y):.3}') # coefficient of determination
# make a prediction at wt=3
X_new = pd.DataFrame({'wt': [3]})
model.predict(X_new)
intercept=37.28512616734204, slope=-5.344471572722678
R^2 is 0.753
array([21.25171145])
of the form mileage = intercept + (slope wt)(weight) + (slope hp)(horsepower)
X = df[['wt', 'hp']]
y = df['mpg']
model.fit(X, y)
print(f'mpg = {model.intercept_:.3} + ({model.coef_[0]:.3})wt + ({model.coef_[1]:.3})hp')
y_hat = model.predict(X) # equivalent to y_hat =
# model.intercept_ + model.coef_[0] * X[:, 0] + model.coef_[1] * X[:, 1]
print(f'R^2 is {model.score(X, y):.3}') # coefficient of determination
# make a prediction at wt=3, hp=150
model.predict(pd.DataFrame({'wt': [3], 'hp': [150]}))
mpg = 37.2 + (-3.88)wt + (-0.0318)hp R^2 is 0.827
array([20.82783584])