Add linear regression

author: manzerbredes <manzerbredes@mailbox.org> 2021-02-13 14:39:28 +0100
committer: manzerbredes <manzerbredes@mailbox.org> 2021-02-13 14:39:28 +0100
commit: 340299b7ace63f11136fe6315a23adce75cc2120 (patch)
tree: b5b1923f2634c498ee7b9a6be8928784499801fb
6 files changed, 111 insertions, 0 deletions
diff --git a/.gitignore b/.gitignore
new file mode 100644
index 0000000..01564af
--- /dev/null
+++ b/.gitignore
@@ -0,0 +1 @@
+*.tex
diff --git a/data/polynomial.csv b/data/polynomial.csv
new file mode 100644
index 0000000..e339b1c
--- /dev/null
+++ b/data/polynomial.csv
@@ -0,0 +1,8 @@
+x,y
+1,1
+2,3
+3,4
+4,4.5
+5,4.6
+6,4.65
+
diff --git a/linear_regression/polynomial.gif b/linear_regression/polynomial.gif
new file mode 100644
index 0000000..84bf44e
--- /dev/null
+++ b/linear_regression/polynomial.gif
diff --git a/linear_regression/polynomial.org b/linear_regression/polynomial.org
new file mode 100644
index 0000000..7201122
--- /dev/null
+++ b/linear_regression/polynomial.org
@@ -0,0 +1,33 @@
+#+TITLE: Gradient Decent Based Polynomial Regression
+#+AUTHOR: Loic Guegan
+
+#+OPTIONS: toc:nil
+
+#+LATEX_HEADER: \usepackage{fullpage}
+#+latex_header: \hypersetup{colorlinks=true,linkcolor=blue}
+
+First, choose a polynomial function $h_w(x)$ according to the data complexity. 
+In our case, we have: 
+\begin{equation}
+h_w(x) = w_1 + w_2x + w_3x^2
+\end{equation}
+
+Then, we should define a cost function. A common approach is to use the *Mean Square Error*
+cost function:
+\begin{equation}\label{eq:cost}
+    J(w) = \frac{1}{2n} \sum_{i=0}^n (h_w(x^{(i)}) - \hat{y}^{(i)})^2
+\end{equation}
+
+Note that in Equation \ref{eq:cost} we average by $2n$ and not $n$. This is because it get simplify
+while doing the partial derivatives as we will see below. This is a pure cosmetic approach which do
+not impact the gradient decent (see [[https://math.stackexchange.com/questions/884887/why-divide-by-2m][here]] for more informations). The next step is to $min_w J(w)$
+for each weight $w_i$ (performing the gradient decent). Thus we compute each partial derivatives:
+\begin{align}
+    \frac{\partial J(w)}{\partial w_1}&=\frac{\partial J(w)}{\partial h_w(x)}\frac{\partial h_w(x)}{\partial w_1}\nonumber\\
+    &= \frac{1}{n} \sum_{i=0}^n (h_w(x^{(i)}) - \hat{y}^{(i)})\\
+    \text{similarly:}\nonumber\\
+    \frac{\partial J(w)}{\partial w_2}&= \frac{1}{n} \sum_{i=0}^n x(h_w(x^{(i)}) - \hat{y}^{(i)})\\
+    \frac{\partial J(w)}{\partial w_3}&= \frac{1}{n} \sum_{i=0}^n x^2(h_w(x^{(i)}) - \hat{y}^{(i)})
+\end{align}
+
+
diff --git a/linear_regression/polynomial.pdf b/linear_regression/polynomial.pdf
new file mode 100644
index 0000000..ecf2f28
--- /dev/null
+++ b/linear_regression/polynomial.pdf
diff --git a/linear_regression/polynomial.py b/linear_regression/polynomial.py
new file mode 100755
index 0000000..a62474a
--- /dev/null
+++ b/linear_regression/polynomial.py
@@ -0,0 +1,69 @@
+#!/usr/bin/env python
+
+import pandas as pd
+import matplotlib.pyplot as plt
+from matplotlib.animation import FuncAnimation
+import numpy as np
+
+# Load the data
+csv="../data/polynomial.csv"
+data=pd.read_csv(csv)
+x=np.array(data["x"])
+y=np.array(data["y"])
+
+# Define the weight
+w1=w2=w3=10
+
+# Define our model
+def h(x):
+    return(w1+w2*x+w3*(x**2))
+
+# Define all partial derivative
+def dh1():
+    return(1/len(x)*np.sum(h(x)-y))
+def dh2():
+    return(1/len(x)*np.sum((h(x)-y)*x))
+def dh3():
+    return(1/len(x)*np.sum((h(x)-y)*(x**2)))
+
+# Perform the gradient decent
+fig, ax = plt.subplots()
+frame=0 # Current frame (plot animation)
+alpha=0.005 # Proportion of the gradient to take into account
+accuracy=0.000001 # Accuracy of the decent
+done=False
+def decent(i):
+    global w1,w2,w3,x,y,frame
+    while True: 
+        w1_old=w1
+        w1_new=w1-alpha*dh1()
+        w2_old=w2
+        w2_new=w2-alpha*dh2()
+        w3_old=w3
+        w3_new=w3-alpha*dh3()
+        w1=w1_new
+        w2=w2_new
+        w3=w3_new
+
+        if abs(w1_new-w1_old) <= accuracy and abs(w2_new-w2_old) <= accuracy and abs(w2_new-w2_old) <= accuracy:
+            done=True
+        frame+=1
+        if frame >=1000:
+            frame=0
+            ax.clear()
+            ax.set_xlim([0, 7])
+            ax.set_ylim([0, 5])
+            ax.plot(x,y,"ro")
+            ax.plot(x,h(x))
+            break
+
+def IsDone():
+    global done
+    i = 0
+    while not done:
+        i += 1
+        yield i
+        
+anim=FuncAnimation(fig,decent,frames=IsDone,repeat=False)
+anim.save('polynomial.gif',dpi=80,writer="imagemagick")
+
author	manzerbredes <manzerbredes@mailbox.org>	2021-02-13 14:39:28 +0100
committer	manzerbredes <manzerbredes@mailbox.org>	2021-02-13 14:39:28 +0100
commit	340299b7ace63f11136fe6315a23adce75cc2120 (patch)
tree	b5b1923f2634c498ee7b9a6be8928784499801fb