#!/usr/bin/env python import sys,random,os import numpy as np # Import snake game from snake import Snake class QTable: """ # Boolean features: # Snake go up? # Snake go right? # Snake go down? # Snake go left? # Apple at up? # Apple at right? # Apple at down? # Apple at left? # Obstacle at up? # Obstacle at right? # Obstacle at down? # Obstacle at left? # Tail in front? ##### Totally 13 boolean features so 2^13=8192 states ##### Totally 4 actions for the AI (up, right,down,left) ##### Totally 4*2^13 thus 32768 table entries ##### Reward +1 when eat an apple ##### Reward -10 when hit obstacle """ def __init__(self, file, save_every=10): self.file=file self.save_every=save_every self.update_counter=0 if os.path.exists(file): self.qtable=np.loadtxt(file) else: self.qtable=np.zeros((2**13, 4)) def isWall(self,h,game): if h[0]<0 or h[1]<0 or h[0] >= game.grid_width or h[1] >= game.grid_height: return(True) return(False) def get_state(self,game): # First compute usefull values h=game.snake[0] left=(h[0]-1,h[1]) right=(h[0]+1,h[1]) up=(h[0],h[1]-1) down=(h[0],h[1]+1) a=game.apple snake_go_up=(game.direction==12) snake_go_right=(game.direction==3) snake_go_down=(game.direction==6) snake_go_left=(game.direction==9) apple_up=(a[1]h[0]) apple_down=(a[1]>h[1]) apple_left=(a[0]=self.save_every: np.savetxt(self.file,self.qtable) self.update_counter=0 def get_action(self,state): # Choose an action action=random.choice((0,1,2,3)) if np.max(self.qtable[state]) > 0: #qactions=qtable[state] #options=np.flatnonzero(qactions == np.max(qactions)) # Since Q value might be equals for several actions #action = random.choice(options) action=np.argmax(self.qtable[state]) return(action) # Perform learning perf=0 last_state=None last_action=None game=Snake(length=1,fps=300,startat=(random.randint(0,29),random.randint(0,29))) qtable=QTable("qtable.txt") while True: result=0 stuck=0 state=qtable.get_state(game) while result >= 0: action=qtable.get_action(state) result=game.play3(action) new_state=qtable.get_state(game) # Agent is stuck if stuck>=(game.grid_width*game.grid_height)/2: game.new_game() break # Compute reward and update stuck reward=0 if result==-1: reward=-10 stuck=0 elif result==1: reward=1 stuck=0 # Apply learning qtable.apply_bellman(state,action,new_state,reward) state=new_state stuck+=1 # Measurements score=game.last_score perf=max(perf,score) print("Game ended with "+str(score)+" best so far is "+str(perf))