import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
def normalEqn(X, y):
# Add intercept
m = len(X)
b = np.ones((m,1))
Xb = np.concatenate([b, X], axis=1)
# Normal equation
tmp1 = Xb.T.dot(Xb)
tmp2 = Xb.T.dot(y)
'''
Matrix inverse is slow and introduces unnecessary error
Anytime you see the math written as: x = A^-1 * b
you instead want: x = np.linalg.solve(A, b)
'''
return np.linalg.solve(tmp1, tmp2)
X = np.array([1,2,3,4,5]).reshape(-1,1)
Y = np.array([7,9,12,15,16])
b, a = normalEqn(X, Y)
print(b,a)
plt.scatter(X,Y)
_X = np.arange(X.min(), X.max()+1, 1)
_Y = a*_X+b
plt.plot(_X, _Y, '-r')
import statsmodels.api as sm
def ols(X, y):
Xb = sm.add_constant(X)
est = sm.OLS(Y, Xb).fit()
return est.params
ols(X, Y)
!wget http://cda.psych.uiuc.edu/coursefiles/st01/carsmall.mat
from scipy.io import loadmat
mat = loadmat('carsmall.mat')
mat.keys()
df = pd.DataFrame()
for k in mat.keys():
if k.startswith('__'):
continue
df[k] = mat[k]
df.head()
df.dtypes
df.shape
df.dropna(subset=['Weight', 'Horsepower', 'MPG'], inplace=True)
df.shape
X = df[['Weight', 'Horsepower']].values
Y = df['MPG'].values
print(X.shape, Y.shape)
a = normalEqn(X, Y)
a
a = ols(X, Y)
a
# Add intercept
m = len(X)
b = np.ones((m,1))
Xb = np.concatenate([b, X], axis=1)
# Prediction
predictedY = np.dot(Xb, a)
# calculate the r-squared
SSres = Y - predictedY
SStot = Y - Y.mean()
rSquared = 1 - (SSres.dot(SSres) / SStot.dot(SStot))
print("r-squared: ", rSquared)
x1fit = np.arange(X[:,0].min(), X[:,0].max()+1, 100)
x2fit = np.arange(X[:,1].min(), X[:,1].max()+1, 10)
X1FIT,X2FIT = np.meshgrid(x1fit, x2fit)
YFIT = a[0] + a[1]*X1FIT + a[2]*X2FIT
fig = plt.figure(figsize=(16,8))
ax = fig.add_subplot(111, projection='3d')
ax.scatter(X[:, 0], X[:, 1], Y, color='r', label='Actual BP')
ax.plot_surface(X1FIT,X2FIT,YFIT)
'''
print('ax.azim {}'.format(ax.azim))
print('ax.elev {}'.format(ax.elev))
'''
ax.view_init(10,-60)
ax.set_xlabel('Weight')
ax.set_ylabel('Horsepower')
ax.set_zlabel('MPG')
plt.subplots_adjust(left=0, right=1, top=1, bottom=0)