
(Not to be confused with the Mean square error in a paramater estimate)
$$ L = \frac{1}{n} \sum d_i^2 = \frac{1}{n} \sum (\hat{y_i} - y) ^ 2 $$
$$ a^* \;,\; b^* = arg min_{(a, b)} L $$
$$ \frac{\partial L}{\partial a} = \frac{2}{n} \sum \; (\hat{y_i} - y_i) $$
$$ \frac{\partial L}{\partial b} = \frac{2}{n} \sum \; (\hat{y_i} - y_i) \; x_i $$
$$ a_{n+1} = a_n - \eta * \frac{\partial L}{\partial a} $$
$$ b_{n+1} = b_n - \eta * \frac{\partial L}{\partial b} $$

(Slide from https://people.eecs.berkeley.edu/~jegonzal/assets/slides/linear_regression.pdf)
import matplotlib.pyplot as plt
%matplotlib inline
import seaborn as sns
import numpy as np
x = np.arange(-2, 2, 1/200)
y = 5*x
b = np.arange(-10, 16, 1/2000)
a = np.arange(-10, 10, 1/2000)
def f(a, b):
return np.mean((a + b*x - y) ** 2)
h = [f(101, r) for r in b]
plt.plot(b, h)
plt.show()
h = [f(r, 10) for r in a]
plt.plot(a, h)
plt.show()

Note: We deal mostly with Binary classification
Aim : Find a linear boundary that separates the two regions

Is the following data linearly separable

x = np.array([-1, 1, 2, 3, 5, 6, 7, 9])
sns.scatterplot(x, [0]*len(x), marker="x", hue=np.array([0, 0, 0, 0, 1, 1, 1, 1]), s=200)
x = np.array([-1, 1, 2, 3, 5, 6, 7, 9])
y = np.array([0, 0, 0, 0, 1, 1, 1, 1])
sns.scatterplot(x, y, marker="x", hue=y, s=200)
def sigmoid(x):
return 1 / (np.exp(-x) + 1)
d = np.arange(-10, 10, 1/2000)
plt.plot(d, sigmoid(d))
x = np.array([-1, 1, 2, 3, 5, 6, 7, 9])
y = np.array([0, 0, 0, 0, 1, 1, 1, 1])
ax = plt.gca()
ax.set_ylim(-0.1, 1.1)
sns.scatterplot(x, y, marker="x", hue=y, s=200)
plt.plot(x, 2*x - 8, c="y")
plt.plot(x, 20*x - 80, c="r")
plt.show()
2*x - 8
20*x - 80
sns.scatterplot(x, y, marker="x", hue=y, s=200)
plt.plot(x, sigmoid(2 * x - 8))
sns.scatterplot(x, y, marker="x", hue=y, s=200)
plt.plot(x, sigmoid(20* x - 80))

$$ BCE(y, \hat{y}) = -1 * \big( y_i \; ln\; \hat{y} + (1 - y_i) \; ln \; (1 - \hat{y}) \big) $$
$$ $$

def BCE(a, b):
x = np.array([-1, 1, 2, 3, 5, 6, 7, 9])
y = np.array([0, 0, 0, 0, 1, 1, 1, 1])
logits = a + b*x
probabilities = sigmoid(logits) ## h_theta(x)
return np.mean(np.max(logits, 0) - logits * y + np.log(1 + np.exp(-np.abs(logits)))) # [ Look above ]
b = np.arange(-10, 16, 1/2000)
k = [BCE(-8, r) for r in b]
plt.plot(b, k)
plt.show()
a = np.arange(-100, 25, 1/2000)
k = [BCE(r, 2) for r in a]
plt.plot(a, k)
plt.show()
def f(a, b):
x = np.array([-1, 1, 2, 3, 5, 6, 7, 9])
y = np.array([0, 0, 0, 0, 1, 1, 1, 1])
h = sigmoid(a + b*x)
return np.mean((h - y) ** 2)
b = np.arange(-10, 16, 1/2000)
k = [f(4, r) for r in b]
plt.plot(b, k)
plt.show()
a = np.arange(-25, 15, 1/2000)
k = [f(r, 10) for r in a]
plt.plot(a, k)
plt.show()