#!/usr/bin/env python import sys,random import numpy as np # Import snake game from snake import Snake # Setup QTable # Boolean features: # Snake go up? # Snake go right? # Snake go down? # Snake go left? # Apple at up? # Apple at right? # Apple at down? # Apple at left? # Obstacle at up? # Obstacle at right? # Obstacle at down? # Obstacle at left? ##### Totally 12 boolean features so 2^12=4096 states ##### Totally 4 actions for the AI (up, right,down,left) ##### Totally 4*2^12 thus 16 384 table entries ##### Reward +1 when eat an apple ##### Reward -10 when hit obstacle qtable=np.zeros((4096, 4)) game=Snake() def isWall(h,game): if h[0]<0 or h[1]<0 or h[0] >= game.grid_width or h[1] >= game.grid_height: return(True) return(False) last_state=None last_action=None def event_handler(game,event): global last_state,last_action h=game.snake[0] left=(h[0]-1,h[1]) right=(h[0]+1,h[1]) up=(h[0],h[1]-1) down=(h[0],h[1]+1) a=game.apple snake_go_up=(game.direction==12) snake_go_right=(game.direction==3) snake_go_down=(game.direction==6) snake_go_left=(game.direction==9) apple_up=(up==a) apple_right=(right==a) apple_down=(down==a) apple_left=(left==a) obstacle_up=(up in game.snake or isWall(up, game)) obstacle_right=(right in game.snake or isWall(right, game)) obstacle_down=(down in game.snake or isWall(down, game)) obstacle_left=(left in game.snake or isWall(left, game)) reward=0 if event==1: reward=1 elif event==-1: reward=-10 # This come from me I do not now if it is the best way to identify a state state=2**11*snake_go_up+2**10*snake_go_right+2**9*snake_go_down+2**8*snake_go_left+2**7*apple_up+2**6*apple_right+2**5*apple_down+2**4*apple_left+2**3*obstacle_up+2**2*obstacle_right+2**1*obstacle_down+obstacle_left # Choose an action if np.max(qtable[state]) > 0: action = np.argmax(qtable[state]) else: action=random.choice((0,1,2,3)) # Update current state Q if last_state != None: qtable[last_state,last_action]=qtable[last_state,last_action]+0.5*(reward+0.5*qtable[state]) last_state=state last_action=action # Apply the action snake_action=12 if action==1: snake_action=3 elif action==2: snake_action=6 elif action==3: snake_action=9 game.direction=snake_action for i in range(0,10): last_state=None last_action=None score=game.run(event_handler=event_handler) print("Game ended with "+str(score))