Minor changes

author: Loic Guegan <manzerbredes@mailbox.org> 2022-11-02 09:12:06 +0100
committer: Loic Guegan <manzerbredes@mailbox.org> 2022-11-02 09:12:06 +0100
commit: 5b3efc4a2170855b21fe141aa06b8fdaa81776b9 (patch)
tree: 6cdcc11e8d57ffdd878826284713b7a718949b2a /qlearning.py
parent: 85a180809dd9046feb0b64ae38b8a436379f98eb (diff)
1 files changed, 41 insertions, 15 deletions
diff --git a/qlearning.py b/qlearning.py
index 5a247aa..46d19d9 100755
--- a/qlearning.py
+++ b/qlearning.py
@@ -67,28 +67,41 @@ class QTable:
         obstacle_left=(left in game.snake or self.isWall(left, game))
 
         tail_in_front=0
-        if game.direction == 3:
+        if snake_go_right:
             for x in range(h[0],game.grid_width):
                 if (x,h[1]) in game.snake[1:]:
                     tail_in_front=1
                     break
-        elif game.direction == 9:
+        elif snake_go_left:
             for x in range(0,h[0]):
                 if (x,h[1]) in game.snake[1:]:
                     tail_in_front=1
                     break
-        elif game.direction == 12:
+        elif snake_go_up:
             for y in range(0,h[1]):
                 if (h[0],y) in game.snake[1:]:
                     tail_in_front=1
                     break
-        elif game.direction == 6:
+        elif snake_go_down:
             for y in range(h[1],game.grid_height):
                 if (h[0],y) in game.snake[1:]:
                     tail_in_front=1
                     break
         # This come from me I do not now if it is the best way to identify a state
-        state=2**12*tail_in_front+2**11*snake_go_up+2**10*snake_go_right+2**9*snake_go_down+2**8*snake_go_left+2**7*apple_up+2**6*apple_right+2**5*apple_down+2**4*apple_left+2**3*obstacle_up+2**2*obstacle_right+2**1*obstacle_down+obstacle_left
+        state=\
+        2**12*tail_in_front+\
+        2**11*snake_go_up+\
+        2**10*snake_go_right+\
+        2**9*snake_go_down+\
+        2**8*snake_go_left+\
+        2**7*apple_up+\
+        2**6*apple_right+\
+        2**5*apple_down+\
+        2**4*apple_left+\
+        2**3*obstacle_up+\
+        2**2*obstacle_right+\
+        2**1*obstacle_down+\
+        obstacle_left
         return(state)
 
     def apply_bellman(self,state,action,new_state,reward):
@@ -123,19 +136,32 @@ qtable=QTable("qtable.txt")
 
 for i in range(0,10000):
     result=0
+    stuck=0
+    state=qtable.get_state(game)
     while result >= 0: 
-        state=qtable.get_state(game)
         action=qtable.get_action(state)
         result=game.play3(action)
-        if last_state!=None:
-            reward=0
-            if result==-1:
-                reward=-10
-            elif result==1:
-                reward=1
-            qtable.apply_bellman(last_state,last_action,state,reward)
-        last_state=state
-        last_action=action
+        new_state=qtable.get_state(game)
+
+        # Agent is stuck
+        if stuck>=game.grid_width*game.grid_height:
+            game.new_game()
+            break
+
+        # Compute reward and update stuck
+        reward=0
+        if result==-1:
+            reward=-10
+            stuck=0
+        elif result==1:
+            reward=1
+            stuck=0
+
+        # Apply learning    
+        qtable.apply_bellman(state,action,new_state,reward)
+        state=new_state
+        stuck+=1
+
     # Measurements
     score=game.last_score
     perf=max(perf,score)
author	Loic Guegan <manzerbredes@mailbox.org>	2022-11-02 09:12:06 +0100
committer	Loic Guegan <manzerbredes@mailbox.org>	2022-11-02 09:12:06 +0100
commit	5b3efc4a2170855b21fe141aa06b8fdaa81776b9 (patch)
tree	6cdcc11e8d57ffdd878826284713b7a718949b2a /qlearning.py
parent	85a180809dd9046feb0b64ae38b8a436379f98eb (diff)