import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
On connaît l'âge et l'expérience d'une personne, on veut pouvoir déduire si une personne est badass dans son domaine ou non.
df = pd.DataFrame({
'Age': [20,16.2,20.2,18.8,18.9,16.7,13.6,20.0,18.0,21.2,
25,31.2,25.2,23.8,23.9,21.7,18.6,25.0,23.0,26.2],
'Experience': [2.3,2.2,1.8,1.4,3.2,3.9,1.4,1.4,3.6,4.3,
4.3,4.2,3.8,3.4,5.2,5.9,3.4,3.4,5.6,6.3],
'Badass': [0,0,0,0,0,0,0,0,0,0,
1,1,1,1,1,1,1,1,1,1]
})
df
colors = np.full_like(df['Badass'], 'red', dtype='object')
colors[df['Badass'] == 1] = 'blue'
plt.scatter(df['Age'], df['Experience'], color=colors)
X = df.drop('Badass', axis=1).values
Y = df['Badass'].values
# Cas à prédire
x = [21.2, 4.3]
from sklearn.linear_model import LogisticRegression
model = LogisticRegression(C=1e20, solver='liblinear', random_state=0)
%time model.fit(X, Y)
print(model.intercept_, model.coef_)
b0 = model.intercept_[0]
b1 = model.coef_[0][0]
b2 = model.coef_[0][1]
plt.scatter(df['Age'], df['Experience'], color=colors)
# Decision boundary (with threshold 0.5)
_X = np.linspace(df['Age'].min(), df['Age'].max(),10)
_Y = (-b1/b2)*_X + (-b0/b2)
plt.plot(_X, _Y, '-k')
# Plot using contour
_X1 = np.linspace(df['Age'].min(), df['Age'].max(),10)
_X2 = np.linspace(df['Experience'].min(), df['Experience'].max(),10)
xx1, xx2 = np.meshgrid(_X1, _X2)
grid = np.c_[xx1.ravel(), xx2.ravel()]
preds = model.predict_proba(grid)[:, 1].reshape(xx1.shape)
plt.scatter(df['Age'], df['Experience'], color=colors)
plt.contour(xx1, xx2, preds, levels=[.5], cmap="Greys", vmin=0, vmax=.6)
print('Probabilité de badass:', model.predict_proba([x])[0][1])
print('Prediction:', model.predict([x])[0])
def sigmoid(z):
return 1 / (1 + np.exp(-z))
def loss(h, y):
return (-y * np.log(h) - (1 - y) * np.log(1 - h)).mean()
def gradientDescent(X, y, theta, alpha, epochs, verbose=True):
m = len(y)
for i in range(epochs):
h = sigmoid(X.dot(theta))
gradient = (X.T.dot(h - y)) / m
theta -= alpha * gradient
if(verbose and i % 1000 == 0):
z = np.dot(X, theta)
h = sigmoid(z)
print('loss:', loss(h, y))
return theta
# Add intercept
m = len(X)
b = np.ones((m,1))
Xb = np.concatenate([b, X], axis=1)
# Fit
theta = np.random.rand(3)
theta = gradientDescent(Xb, Y, theta=theta, alpha=0.1, epochs=10000)
theta
b0 = theta[0]
b1 = theta[1]
b2 = theta[2]
plt.scatter(df['Age'], df['Experience'], color=colors)
# Decision boundary (with threshold 0.5)
_X = np.linspace(df['Age'].min(), df['Age'].max(),10)
_Y = (-b1/b2)*_X + (-b0/b2)
plt.plot(_X, _Y, '-k')
z = b0 + b1 * x[0] + b2 * x[1]
p = 1 / (1 + np.exp(-z))
print('Probabilité de badass:', p)
print('Prediction:', (1 if p > 0.5 else 0))