1 files changed, 9 insertions, 40 deletions
diff --git a/qlearning.py b/qlearning.py
index c731253..a33734c 100755
--- a/qlearning.py
+++ b/qlearning.py
@@ -20,10 +20,9 @@ class QTable:
     # Obstacle at right?
     # Obstacle at down?
     # Obstacle at left?
-    # Tail in front?
-    ##### Totally 13 boolean features so 2^13=8192 states
+    ##### Totally 12 boolean features so 2^12=4096 states
     ##### Totally 4 actions for the AI (up, right,down,left)
-    ##### Totally 4*2^13 thus 32768 table entries
+    ##### Totally 4*2^12 thus 16384 table entries
     ##### Reward +1 when eat an apple
     ##### Reward -10 when hit obstacle
     """
@@ -34,7 +33,7 @@ class QTable:
         if os.path.exists(file):
             self.qtable=np.loadtxt(file)
         else:
-            self.qtable=np.zeros((2**13, 4))
+            self.qtable=np.zeros((2**12, 4))
             with open(file+"_generation","w") as f:
                 f.write("0")
 
@@ -67,30 +66,8 @@ class QTable:
         obstacle_down=(down in game.snake or self.isWall(down, game))
         obstacle_left=(left in game.snake or self.isWall(left, game))
 
-        tail_in_front=0
-        if snake_go_right:
-            for x in range(h[0],game.grid_width):
-                if (x,h[1]) in game.snake[1:]:
-                    tail_in_front=1
-                    break
-        elif snake_go_left:
-            for x in range(0,h[0]):
-                if (x,h[1]) in game.snake[1:]:
-                    tail_in_front=1
-                    break
-        elif snake_go_up:
-            for y in range(0,h[1]):
-                if (h[0],y) in game.snake[1:]:
-                    tail_in_front=1
-                    break
-        elif snake_go_down:
-            for y in range(h[1],game.grid_height):
-                if (h[0],y) in game.snake[1:]:
-                    tail_in_front=1
-                    break
         # This come from me I do not now if it is the best way to identify a state
         state=\
-        2**12*tail_in_front+\
         2**11*snake_go_up+\
         2**10*snake_go_right+\
         2**9*snake_go_down+\
@@ -106,8 +83,8 @@ class QTable:
         return(state)
 
     def apply_bellman(self,state,action,new_state,reward):
-        alpha=0.5
-        gamma=0.9
+        alpha=0.1
+        gamma=0.95
         self.qtable[state,action]=self.qtable[state,action]+alpha*(reward+gamma*np.max(self.qtable[new_state])-self.qtable[state,action])
         self.save_counter+=1
         if self.save_counter>=self.save_every:
@@ -140,18 +117,17 @@ class QTable:
 
 
 # Perform learning
-width,height=40,30 # My advice is start with a small grid 5x5 to have many interaction and avoid early toy effect
+width,height=80,50 # My advice is start with a small grid 5x5 to have many interaction and avoid early toy effect
 perf=0
 perf_list=list()
 last_state=None
 last_action=None
-game=Snake(length=1,fps=500,startat=(random.randint(0,width-1),random.randint(0,height-1)),grid_width=width,grid_height=height)
+game=Snake(length=1,fps=500,grid_pts=20,startat=(random.randint(0,width-1),random.randint(0,height-1)),grid_width=width,grid_height=height)
 qtable=QTable("qtable.txt")
 while True:
     result=0
     stuck=0
     stuck_tolerance=1
-    stuck_count=0
     state=qtable.get_state(game)
     while result >= 0: 
         action=qtable.get_action(state)
@@ -163,21 +139,14 @@ while True:
         if result==-1:
             reward=-10
             stuck=0
-            stuck_count=0
         elif result==1:
             reward=1
             stuck=0
-            stuck_count=0
 
         # Agent is stuck
         if stuck>=(game.grid_width*game.grid_height)/stuck_tolerance:
-            stuck=0
-            stuck_count+=1
-            game.new_apple()
-            print("Stuck! Try with a new apple...")
-        if stuck_count>2:
-            print("Can't get out of stuck. Abort!")
-            stuck_count=0
+            print("Stuck! Apply penality and abort!")
+            qtable.apply_bellman(state,action,new_state,-1)
             game.new_game()
             break