diff options
| author | Loic Guegan <manzerbredes@mailbox.org> | 2022-11-02 09:12:06 +0100 |
|---|---|---|
| committer | Loic Guegan <manzerbredes@mailbox.org> | 2022-11-02 09:12:06 +0100 |
| commit | 5b3efc4a2170855b21fe141aa06b8fdaa81776b9 (patch) | |
| tree | 6cdcc11e8d57ffdd878826284713b7a718949b2a /qlearning.py | |
| parent | 85a180809dd9046feb0b64ae38b8a436379f98eb (diff) | |
Minor changes
Diffstat (limited to 'qlearning.py')
| -rwxr-xr-x | qlearning.py | 56 |
1 files changed, 41 insertions, 15 deletions
diff --git a/qlearning.py b/qlearning.py index 5a247aa..46d19d9 100755 --- a/qlearning.py +++ b/qlearning.py @@ -67,28 +67,41 @@ class QTable: obstacle_left=(left in game.snake or self.isWall(left, game)) tail_in_front=0 - if game.direction == 3: + if snake_go_right: for x in range(h[0],game.grid_width): if (x,h[1]) in game.snake[1:]: tail_in_front=1 break - elif game.direction == 9: + elif snake_go_left: for x in range(0,h[0]): if (x,h[1]) in game.snake[1:]: tail_in_front=1 break - elif game.direction == 12: + elif snake_go_up: for y in range(0,h[1]): if (h[0],y) in game.snake[1:]: tail_in_front=1 break - elif game.direction == 6: + elif snake_go_down: for y in range(h[1],game.grid_height): if (h[0],y) in game.snake[1:]: tail_in_front=1 break # This come from me I do not now if it is the best way to identify a state - state=2**12*tail_in_front+2**11*snake_go_up+2**10*snake_go_right+2**9*snake_go_down+2**8*snake_go_left+2**7*apple_up+2**6*apple_right+2**5*apple_down+2**4*apple_left+2**3*obstacle_up+2**2*obstacle_right+2**1*obstacle_down+obstacle_left + state=\ + 2**12*tail_in_front+\ + 2**11*snake_go_up+\ + 2**10*snake_go_right+\ + 2**9*snake_go_down+\ + 2**8*snake_go_left+\ + 2**7*apple_up+\ + 2**6*apple_right+\ + 2**5*apple_down+\ + 2**4*apple_left+\ + 2**3*obstacle_up+\ + 2**2*obstacle_right+\ + 2**1*obstacle_down+\ + obstacle_left return(state) def apply_bellman(self,state,action,new_state,reward): @@ -123,19 +136,32 @@ qtable=QTable("qtable.txt") for i in range(0,10000): result=0 + stuck=0 + state=qtable.get_state(game) while result >= 0: - state=qtable.get_state(game) action=qtable.get_action(state) result=game.play3(action) - if last_state!=None: - reward=0 - if result==-1: - reward=-10 - elif result==1: - reward=1 - qtable.apply_bellman(last_state,last_action,state,reward) - last_state=state - last_action=action + new_state=qtable.get_state(game) + + # Agent is stuck + if stuck>=game.grid_width*game.grid_height: + game.new_game() + break + + # Compute reward and update stuck + reward=0 + if result==-1: + reward=-10 + stuck=0 + elif result==1: + reward=1 + stuck=0 + + # Apply learning + qtable.apply_bellman(state,action,new_state,reward) + state=new_state + stuck+=1 + # Measurements score=game.last_score perf=max(perf,score) |
