summaryrefslogtreecommitdiff
path: root/qlearning.py
diff options
context:
space:
mode:
Diffstat (limited to 'qlearning.py')
-rwxr-xr-xqlearning.py49
1 files changed, 9 insertions, 40 deletions
diff --git a/qlearning.py b/qlearning.py
index c731253..a33734c 100755
--- a/qlearning.py
+++ b/qlearning.py
@@ -20,10 +20,9 @@ class QTable:
# Obstacle at right?
# Obstacle at down?
# Obstacle at left?
- # Tail in front?
- ##### Totally 13 boolean features so 2^13=8192 states
+ ##### Totally 12 boolean features so 2^12=4096 states
##### Totally 4 actions for the AI (up, right,down,left)
- ##### Totally 4*2^13 thus 32768 table entries
+ ##### Totally 4*2^12 thus 16384 table entries
##### Reward +1 when eat an apple
##### Reward -10 when hit obstacle
"""
@@ -34,7 +33,7 @@ class QTable:
if os.path.exists(file):
self.qtable=np.loadtxt(file)
else:
- self.qtable=np.zeros((2**13, 4))
+ self.qtable=np.zeros((2**12, 4))
with open(file+"_generation","w") as f:
f.write("0")
@@ -67,30 +66,8 @@ class QTable:
obstacle_down=(down in game.snake or self.isWall(down, game))
obstacle_left=(left in game.snake or self.isWall(left, game))
- tail_in_front=0
- if snake_go_right:
- for x in range(h[0],game.grid_width):
- if (x,h[1]) in game.snake[1:]:
- tail_in_front=1
- break
- elif snake_go_left:
- for x in range(0,h[0]):
- if (x,h[1]) in game.snake[1:]:
- tail_in_front=1
- break
- elif snake_go_up:
- for y in range(0,h[1]):
- if (h[0],y) in game.snake[1:]:
- tail_in_front=1
- break
- elif snake_go_down:
- for y in range(h[1],game.grid_height):
- if (h[0],y) in game.snake[1:]:
- tail_in_front=1
- break
# This come from me I do not now if it is the best way to identify a state
state=\
- 2**12*tail_in_front+\
2**11*snake_go_up+\
2**10*snake_go_right+\
2**9*snake_go_down+\
@@ -106,8 +83,8 @@ class QTable:
return(state)
def apply_bellman(self,state,action,new_state,reward):
- alpha=0.5
- gamma=0.9
+ alpha=0.1
+ gamma=0.95
self.qtable[state,action]=self.qtable[state,action]+alpha*(reward+gamma*np.max(self.qtable[new_state])-self.qtable[state,action])
self.save_counter+=1
if self.save_counter>=self.save_every:
@@ -140,18 +117,17 @@ class QTable:
# Perform learning
-width,height=40,30 # My advice is start with a small grid 5x5 to have many interaction and avoid early toy effect
+width,height=80,50 # My advice is start with a small grid 5x5 to have many interaction and avoid early toy effect
perf=0
perf_list=list()
last_state=None
last_action=None
-game=Snake(length=1,fps=500,startat=(random.randint(0,width-1),random.randint(0,height-1)),grid_width=width,grid_height=height)
+game=Snake(length=1,fps=500,grid_pts=20,startat=(random.randint(0,width-1),random.randint(0,height-1)),grid_width=width,grid_height=height)
qtable=QTable("qtable.txt")
while True:
result=0
stuck=0
stuck_tolerance=1
- stuck_count=0
state=qtable.get_state(game)
while result >= 0:
action=qtable.get_action(state)
@@ -163,21 +139,14 @@ while True:
if result==-1:
reward=-10
stuck=0
- stuck_count=0
elif result==1:
reward=1
stuck=0
- stuck_count=0
# Agent is stuck
if stuck>=(game.grid_width*game.grid_height)/stuck_tolerance:
- stuck=0
- stuck_count+=1
- game.new_apple()
- print("Stuck! Try with a new apple...")
- if stuck_count>2:
- print("Can't get out of stuck. Abort!")
- stuck_count=0
+ print("Stuck! Apply penality and abort!")
+ qtable.apply_bellman(state,action,new_state,-1)
game.new_game()
break