diff --git a/qlearning.py b/qlearning.py index e35d778..60775e3 100755 --- a/qlearning.py +++ b/qlearning.py @@ -38,7 +38,12 @@ def isWall(h,game): return(True) return(False) + +last_state=None +last_action=None def event_handler(game,event): + global last_state,last_action + h=game.snake[0] left=(h[0]-1,h[1]) right=(h[0]+1,h[1]) @@ -70,14 +75,30 @@ def event_handler(game,event): # This come from me I do not now if it is the best way to identify a state state=2**11*snake_go_up+2**10*snake_go_right+2**9*snake_go_down+2**8*snake_go_left+2**7*apple_up+2**6*apple_right+2**5*apple_down+2**4*apple_left+2**3*obstacle_up+2**2*obstacle_right+2**1*obstacle_down+obstacle_left - + # Choose an action if np.max(qtable[state]) > 0: action = np.argmax(qtable[state]) else: - action=random.choice((12,3,6,9)) + action=random.choice((0,1,2,3)) - game.direction=action + # Update current state Q + if last_state != None: + qtable[last_state,last_action]=qtable[last_state,last_action]+0.5*(reward+0.5*qtable[state]) + last_state=state + last_action=action + + # Apply the action + snake_action=12 + if action==1: + snake_action=3 + elif action==2: + snake_action=6 + elif action==3: + snake_action=9 + game.direction=snake_action for i in range(0,10): + last_state=None + last_action=None score=game.run(event_handler=event_handler) print("Game ended with "+str(score)) \ No newline at end of file