commit 340299b7ace63f11136fe6315a23adce75cc2120 Author: manzerbredes Date: Sat Feb 13 14:39:28 2021 +0100 Add linear regression diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..01564af --- /dev/null +++ b/.gitignore @@ -0,0 +1 @@ +*.tex diff --git a/data/polynomial.csv b/data/polynomial.csv new file mode 100644 index 0000000..e339b1c --- /dev/null +++ b/data/polynomial.csv @@ -0,0 +1,8 @@ +x,y +1,1 +2,3 +3,4 +4,4.5 +5,4.6 +6,4.65 + diff --git a/linear_regression/polynomial.gif b/linear_regression/polynomial.gif new file mode 100644 index 0000000..84bf44e Binary files /dev/null and b/linear_regression/polynomial.gif differ diff --git a/linear_regression/polynomial.org b/linear_regression/polynomial.org new file mode 100644 index 0000000..7201122 --- /dev/null +++ b/linear_regression/polynomial.org @@ -0,0 +1,33 @@ +#+TITLE: Gradient Decent Based Polynomial Regression +#+AUTHOR: Loic Guegan + +#+OPTIONS: toc:nil + +#+LATEX_HEADER: \usepackage{fullpage} +#+latex_header: \hypersetup{colorlinks=true,linkcolor=blue} + +First, choose a polynomial function $h_w(x)$ according to the data complexity. +In our case, we have: +\begin{equation} +h_w(x) = w_1 + w_2x + w_3x^2 +\end{equation} + +Then, we should define a cost function. A common approach is to use the *Mean Square Error* +cost function: +\begin{equation}\label{eq:cost} + J(w) = \frac{1}{2n} \sum_{i=0}^n (h_w(x^{(i)}) - \hat{y}^{(i)})^2 +\end{equation} + +Note that in Equation \ref{eq:cost} we average by $2n$ and not $n$. This is because it get simplify +while doing the partial derivatives as we will see below. This is a pure cosmetic approach which do +not impact the gradient decent (see [[https://math.stackexchange.com/questions/884887/why-divide-by-2m][here]] for more informations). The next step is to $min_w J(w)$ +for each weight $w_i$ (performing the gradient decent). Thus we compute each partial derivatives: +\begin{align} + \frac{\partial J(w)}{\partial w_1}&=\frac{\partial J(w)}{\partial h_w(x)}\frac{\partial h_w(x)}{\partial w_1}\nonumber\\ + &= \frac{1}{n} \sum_{i=0}^n (h_w(x^{(i)}) - \hat{y}^{(i)})\\ + \text{similarly:}\nonumber\\ + \frac{\partial J(w)}{\partial w_2}&= \frac{1}{n} \sum_{i=0}^n x(h_w(x^{(i)}) - \hat{y}^{(i)})\\ + \frac{\partial J(w)}{\partial w_3}&= \frac{1}{n} \sum_{i=0}^n x^2(h_w(x^{(i)}) - \hat{y}^{(i)}) +\end{align} + + diff --git a/linear_regression/polynomial.pdf b/linear_regression/polynomial.pdf new file mode 100644 index 0000000..ecf2f28 Binary files /dev/null and b/linear_regression/polynomial.pdf differ diff --git a/linear_regression/polynomial.py b/linear_regression/polynomial.py new file mode 100755 index 0000000..a62474a --- /dev/null +++ b/linear_regression/polynomial.py @@ -0,0 +1,69 @@ +#!/usr/bin/env python + +import pandas as pd +import matplotlib.pyplot as plt +from matplotlib.animation import FuncAnimation +import numpy as np + +# Load the data +csv="../data/polynomial.csv" +data=pd.read_csv(csv) +x=np.array(data["x"]) +y=np.array(data["y"]) + +# Define the weight +w1=w2=w3=10 + +# Define our model +def h(x): + return(w1+w2*x+w3*(x**2)) + +# Define all partial derivative +def dh1(): + return(1/len(x)*np.sum(h(x)-y)) +def dh2(): + return(1/len(x)*np.sum((h(x)-y)*x)) +def dh3(): + return(1/len(x)*np.sum((h(x)-y)*(x**2))) + +# Perform the gradient decent +fig, ax = plt.subplots() +frame=0 # Current frame (plot animation) +alpha=0.005 # Proportion of the gradient to take into account +accuracy=0.000001 # Accuracy of the decent +done=False +def decent(i): + global w1,w2,w3,x,y,frame + while True: + w1_old=w1 + w1_new=w1-alpha*dh1() + w2_old=w2 + w2_new=w2-alpha*dh2() + w3_old=w3 + w3_new=w3-alpha*dh3() + w1=w1_new + w2=w2_new + w3=w3_new + + if abs(w1_new-w1_old) <= accuracy and abs(w2_new-w2_old) <= accuracy and abs(w2_new-w2_old) <= accuracy: + done=True + frame+=1 + if frame >=1000: + frame=0 + ax.clear() + ax.set_xlim([0, 7]) + ax.set_ylim([0, 5]) + ax.plot(x,y,"ro") + ax.plot(x,h(x)) + break + +def IsDone(): + global done + i = 0 + while not done: + i += 1 + yield i + +anim=FuncAnimation(fig,decent,frames=IsDone,repeat=False) +anim.save('polynomial.gif',dpi=80,writer="imagemagick") +