summaryrefslogtreecommitdiff
path: root/qlearning.py
diff options
context:
space:
mode:
authorLoic Guegan <manzerbredes@mailbox.org>2022-11-02 09:12:06 +0100
committerLoic Guegan <manzerbredes@mailbox.org>2022-11-02 09:12:06 +0100
commit5b3efc4a2170855b21fe141aa06b8fdaa81776b9 (patch)
tree6cdcc11e8d57ffdd878826284713b7a718949b2a /qlearning.py
parent85a180809dd9046feb0b64ae38b8a436379f98eb (diff)
Minor changes
Diffstat (limited to 'qlearning.py')
-rwxr-xr-xqlearning.py56
1 files changed, 41 insertions, 15 deletions
diff --git a/qlearning.py b/qlearning.py
index 5a247aa..46d19d9 100755
--- a/qlearning.py
+++ b/qlearning.py
@@ -67,28 +67,41 @@ class QTable:
obstacle_left=(left in game.snake or self.isWall(left, game))
tail_in_front=0
- if game.direction == 3:
+ if snake_go_right:
for x in range(h[0],game.grid_width):
if (x,h[1]) in game.snake[1:]:
tail_in_front=1
break
- elif game.direction == 9:
+ elif snake_go_left:
for x in range(0,h[0]):
if (x,h[1]) in game.snake[1:]:
tail_in_front=1
break
- elif game.direction == 12:
+ elif snake_go_up:
for y in range(0,h[1]):
if (h[0],y) in game.snake[1:]:
tail_in_front=1
break
- elif game.direction == 6:
+ elif snake_go_down:
for y in range(h[1],game.grid_height):
if (h[0],y) in game.snake[1:]:
tail_in_front=1
break
# This come from me I do not now if it is the best way to identify a state
- state=2**12*tail_in_front+2**11*snake_go_up+2**10*snake_go_right+2**9*snake_go_down+2**8*snake_go_left+2**7*apple_up+2**6*apple_right+2**5*apple_down+2**4*apple_left+2**3*obstacle_up+2**2*obstacle_right+2**1*obstacle_down+obstacle_left
+ state=\
+ 2**12*tail_in_front+\
+ 2**11*snake_go_up+\
+ 2**10*snake_go_right+\
+ 2**9*snake_go_down+\
+ 2**8*snake_go_left+\
+ 2**7*apple_up+\
+ 2**6*apple_right+\
+ 2**5*apple_down+\
+ 2**4*apple_left+\
+ 2**3*obstacle_up+\
+ 2**2*obstacle_right+\
+ 2**1*obstacle_down+\
+ obstacle_left
return(state)
def apply_bellman(self,state,action,new_state,reward):
@@ -123,19 +136,32 @@ qtable=QTable("qtable.txt")
for i in range(0,10000):
result=0
+ stuck=0
+ state=qtable.get_state(game)
while result >= 0:
- state=qtable.get_state(game)
action=qtable.get_action(state)
result=game.play3(action)
- if last_state!=None:
- reward=0
- if result==-1:
- reward=-10
- elif result==1:
- reward=1
- qtable.apply_bellman(last_state,last_action,state,reward)
- last_state=state
- last_action=action
+ new_state=qtable.get_state(game)
+
+ # Agent is stuck
+ if stuck>=game.grid_width*game.grid_height:
+ game.new_game()
+ break
+
+ # Compute reward and update stuck
+ reward=0
+ if result==-1:
+ reward=-10
+ stuck=0
+ elif result==1:
+ reward=1
+ stuck=0
+
+ # Apply learning
+ qtable.apply_bellman(state,action,new_state,reward)
+ state=new_state
+ stuck+=1
+
# Measurements
score=game.last_score
perf=max(perf,score)