diff options
Diffstat (limited to 'qlearning.py')
| -rwxr-xr-x | qlearning.py | 49 |
1 files changed, 9 insertions, 40 deletions
diff --git a/qlearning.py b/qlearning.py index c731253..a33734c 100755 --- a/qlearning.py +++ b/qlearning.py @@ -20,10 +20,9 @@ class QTable: # Obstacle at right? # Obstacle at down? # Obstacle at left? - # Tail in front? - ##### Totally 13 boolean features so 2^13=8192 states + ##### Totally 12 boolean features so 2^12=4096 states ##### Totally 4 actions for the AI (up, right,down,left) - ##### Totally 4*2^13 thus 32768 table entries + ##### Totally 4*2^12 thus 16384 table entries ##### Reward +1 when eat an apple ##### Reward -10 when hit obstacle """ @@ -34,7 +33,7 @@ class QTable: if os.path.exists(file): self.qtable=np.loadtxt(file) else: - self.qtable=np.zeros((2**13, 4)) + self.qtable=np.zeros((2**12, 4)) with open(file+"_generation","w") as f: f.write("0") @@ -67,30 +66,8 @@ class QTable: obstacle_down=(down in game.snake or self.isWall(down, game)) obstacle_left=(left in game.snake or self.isWall(left, game)) - tail_in_front=0 - if snake_go_right: - for x in range(h[0],game.grid_width): - if (x,h[1]) in game.snake[1:]: - tail_in_front=1 - break - elif snake_go_left: - for x in range(0,h[0]): - if (x,h[1]) in game.snake[1:]: - tail_in_front=1 - break - elif snake_go_up: - for y in range(0,h[1]): - if (h[0],y) in game.snake[1:]: - tail_in_front=1 - break - elif snake_go_down: - for y in range(h[1],game.grid_height): - if (h[0],y) in game.snake[1:]: - tail_in_front=1 - break # This come from me I do not now if it is the best way to identify a state state=\ - 2**12*tail_in_front+\ 2**11*snake_go_up+\ 2**10*snake_go_right+\ 2**9*snake_go_down+\ @@ -106,8 +83,8 @@ class QTable: return(state) def apply_bellman(self,state,action,new_state,reward): - alpha=0.5 - gamma=0.9 + alpha=0.1 + gamma=0.95 self.qtable[state,action]=self.qtable[state,action]+alpha*(reward+gamma*np.max(self.qtable[new_state])-self.qtable[state,action]) self.save_counter+=1 if self.save_counter>=self.save_every: @@ -140,18 +117,17 @@ class QTable: # Perform learning -width,height=40,30 # My advice is start with a small grid 5x5 to have many interaction and avoid early toy effect +width,height=80,50 # My advice is start with a small grid 5x5 to have many interaction and avoid early toy effect perf=0 perf_list=list() last_state=None last_action=None -game=Snake(length=1,fps=500,startat=(random.randint(0,width-1),random.randint(0,height-1)),grid_width=width,grid_height=height) +game=Snake(length=1,fps=500,grid_pts=20,startat=(random.randint(0,width-1),random.randint(0,height-1)),grid_width=width,grid_height=height) qtable=QTable("qtable.txt") while True: result=0 stuck=0 stuck_tolerance=1 - stuck_count=0 state=qtable.get_state(game) while result >= 0: action=qtable.get_action(state) @@ -163,21 +139,14 @@ while True: if result==-1: reward=-10 stuck=0 - stuck_count=0 elif result==1: reward=1 stuck=0 - stuck_count=0 # Agent is stuck if stuck>=(game.grid_width*game.grid_height)/stuck_tolerance: - stuck=0 - stuck_count+=1 - game.new_apple() - print("Stuck! Try with a new apple...") - if stuck_count>2: - print("Can't get out of stuck. Abort!") - stuck_count=0 + print("Stuck! Apply penality and abort!") + qtable.apply_bellman(state,action,new_state,-1) game.new_game() break |
