#!/usr/bin/env python import sys,random,os import numpy as np # Import snake game from snake import Snake # Setup QTable # Boolean features: # Snake go up? # Snake go right? # Snake go down? # Snake go left? # Apple at up? # Apple at right? # Apple at down? # Apple at left? # Obstacle at up? # Obstacle at right? # Obstacle at down? # Obstacle at left? # Queue in front? ##### Totally 13 boolean features so 2^13=8192 states ##### Totally 4 actions for the AI (up, right,down,left) ##### Totally 4*2^13 thus 32768 table entries ##### Reward +1 when eat an apple ##### Reward -10 when hit obstacle qtable=np.zeros((2**13, 4)) game=Snake(length=4,fps=200,startat=(10,10)) def isWall(h,game): if h[0]<0 or h[1]<0 or h[0] >= game.grid_width or h[1] >= game.grid_height: return(True) return(False) last_state=None last_action=None attempt=0 def event_handler(game,event): global last_state,last_action,attempt h=game.snake[0] left=(h[0]-1,h[1]) right=(h[0]+1,h[1]) up=(h[0],h[1]-1) down=(h[0],h[1]+1) a=game.apple snake_go_up=(game.direction==12) snake_go_right=(game.direction==3) snake_go_down=(game.direction==6) snake_go_left=(game.direction==9) apple_up=(a[1]h[0]) apple_down=(a[1]>h[1]) apple_left=(a[0] 0: #qactions=qtable[state] #options=np.flatnonzero(qactions == np.max(qactions)) # Since Q value might be equals for several actions #action = random.choice(options) action=np.argmax(qtable[state]) # Avoid infinite loop if attempt>game.grid_height*game.grid_width: return(-1) # Update current state Q if last_state != None: qtable[last_state,last_action]=qtable[last_state,last_action]+0.7*(reward+0.9*np.max(qtable[state])-qtable[last_state,last_action]) last_state=state last_action=action # Apply the action snake_action=12 if action==1: snake_action=3 elif action==2: snake_action=6 elif action==3: snake_action=9 game.direction=snake_action return(0) if os.path.exists("qtable.txt"): qtable=np.loadtxt("qtable.txt") perf=0 for i in range(0,10000): last_state=None last_action=None score=game.run(event_handler=event_handler) attempt=0 if i%10 == 0: np.savetxt('qtable.txt',qtable) perf=max(perf,score) print("Game ended with "+str(score)+" best so far is "+str(perf))