diff --git a/linear_regression/polynomial.gif b/linear_regression/polynomial.gif index 84bf44e..2c067fa 100644 Binary files a/linear_regression/polynomial.gif and b/linear_regression/polynomial.gif differ diff --git a/linear_regression/polynomial.org b/linear_regression/polynomial.org index 7201122..921d755 100644 --- a/linear_regression/polynomial.org +++ b/linear_regression/polynomial.org @@ -15,19 +15,21 @@ h_w(x) = w_1 + w_2x + w_3x^2 Then, we should define a cost function. A common approach is to use the *Mean Square Error* cost function: \begin{equation}\label{eq:cost} - J(w) = \frac{1}{2n} \sum_{i=0}^n (h_w(x^{(i)}) - \hat{y}^{(i)})^2 + J(w) = \frac{1}{2n} \sum_{i=0}^n (h_w(x^{(i)}) - y^{(i)})^2 \end{equation} -Note that in Equation \ref{eq:cost} we average by $2n$ and not $n$. This is because it get simplify -while doing the partial derivatives as we will see below. This is a pure cosmetic approach which do -not impact the gradient decent (see [[https://math.stackexchange.com/questions/884887/why-divide-by-2m][here]] for more informations). The next step is to $min_w J(w)$ -for each weight $w_i$ (performing the gradient decent). Thus we compute each partial derivatives: +With $n$ the number of observations, $x^{(i)}$ the value of the independant variable associated with +the observation $y^{(i)}$. Note that in Equation \ref{eq:cost} we average by $2n$ and not $n$. This +is because it simplify the partial derivatives expression as we will see below. This is a pure +cosmetic approach which do not impact the gradient decent (see [[https://math.stackexchange.com/questions/884887/why-divide-by-2m][here]] for more informations). The next +step is to $min_w J(w)$ for each weight $w_i$ (performing the gradient decent, see [[https://towardsdatascience.com/gradient-descent-demystified-bc30b26e432a][here]]). Thus we +compute each partial derivatives: \begin{align} \frac{\partial J(w)}{\partial w_1}&=\frac{\partial J(w)}{\partial h_w(x)}\frac{\partial h_w(x)}{\partial w_1}\nonumber\\ - &= \frac{1}{n} \sum_{i=0}^n (h_w(x^{(i)}) - \hat{y}^{(i)})\\ + &= \frac{1}{n} \sum_{i=0}^n (h_w(x^{(i)}) - y^{(i)})\\ \text{similarly:}\nonumber\\ - \frac{\partial J(w)}{\partial w_2}&= \frac{1}{n} \sum_{i=0}^n x(h_w(x^{(i)}) - \hat{y}^{(i)})\\ - \frac{\partial J(w)}{\partial w_3}&= \frac{1}{n} \sum_{i=0}^n x^2(h_w(x^{(i)}) - \hat{y}^{(i)}) + \frac{\partial J(w)}{\partial w_2}&= \frac{1}{n} \sum_{i=0}^n x(h_w(x^{(i)}) - y^{(i)})\\ + \frac{\partial J(w)}{\partial w_3}&= \frac{1}{n} \sum_{i=0}^n x^2(h_w(x^{(i)}) - y^{(i)}) \end{align} diff --git a/linear_regression/polynomial.pdf b/linear_regression/polynomial.pdf index ecf2f28..5f72883 100644 Binary files a/linear_regression/polynomial.pdf and b/linear_regression/polynomial.pdf differ diff --git a/linear_regression/polynomial.py b/linear_regression/polynomial.py index a62474a..662014f 100755 --- a/linear_regression/polynomial.py +++ b/linear_regression/polynomial.py @@ -27,13 +27,17 @@ def dh3(): return(1/len(x)*np.sum((h(x)-y)*(x**2))) # Perform the gradient decent -fig, ax = plt.subplots() -frame=0 # Current frame (plot animation) +fig, ax = plt.subplots(dpi=300) +ax.set_xlim([0, 7]) +ax.set_ylim([0, 5]) +ax.plot(x,y,"ro") +h_data,=ax.plot(x,h(x)) alpha=0.005 # Proportion of the gradient to take into account accuracy=0.000001 # Accuracy of the decent done=False def decent(i): - global w1,w2,w3,x,y,frame + global w1,w2,w3,x,y + skip_frame=0 # Current frame (plot animation) while True: w1_old=w1 w1_new=w1-alpha*dh1() @@ -47,14 +51,9 @@ def decent(i): if abs(w1_new-w1_old) <= accuracy and abs(w2_new-w2_old) <= accuracy and abs(w2_new-w2_old) <= accuracy: done=True - frame+=1 - if frame >=1000: - frame=0 - ax.clear() - ax.set_xlim([0, 7]) - ax.set_ylim([0, 5]) - ax.plot(x,y,"ro") - ax.plot(x,h(x)) + skip_frame+=1 + if skip_frame >=1000: + h_data.set_ydata(h(x)) break def IsDone(): @@ -65,5 +64,5 @@ def IsDone(): yield i anim=FuncAnimation(fig,decent,frames=IsDone,repeat=False) -anim.save('polynomial.gif',dpi=80,writer="imagemagick") +anim.save('polynomial.gif',writer="imagemagick",dpi=300)