#!/usr/bin/env python import sys,random,os import numpy as np # Import snake game from snake import Snake class QTable: """ # Boolean features: # Snake go up? # Snake go right? # Snake go down? # Snake go left? # Apple at up? # Apple at right? # Apple at down? # Apple at left? # Obstacle at up? # Obstacle at right? # Obstacle at down? # Obstacle at left? # Tail in front? ##### Totally 13 boolean features so 2^13=8192 states ##### Totally 4 actions for the AI (up, right,down,left) ##### Totally 4*2^13 thus 32768 table entries ##### Reward +1 when eat an apple ##### Reward -10 when hit obstacle """ def __init__(self, file, save_every=5000): self.file=file self.save_every=save_every self.save_counter=0 if os.path.exists(file): self.qtable=np.loadtxt(file) else: self.qtable=np.zeros((2**13, 4)) with open(file+"_generation","w") as f: f.write("0") def isWall(self,h,game): if h[0]<0 or h[1]<0 or h[0] >= game.grid_width or h[1] >= game.grid_height: return(True) return(False) def get_state(self,game): # First compute usefull values h=game.snake[0] left=(h[0]-1,h[1]) right=(h[0]+1,h[1]) up=(h[0],h[1]-1) down=(h[0],h[1]+1) a=game.apple snake_go_up=(game.direction==12) snake_go_right=(game.direction==3) snake_go_down=(game.direction==6) snake_go_left=(game.direction==9) apple_up=(a[1]h[0]) apple_down=(a[1]>h[1]) apple_left=(a[0]=self.save_every: np.savetxt(self.file,self.qtable) if os.path.exists(self.file+"_generation"): generation=0 with open(self.file+"_generation","r") as f: generation=int(f.readline().rstrip()) generation+=self.save_every with open(self.file+"_generation","w") as f: f.write(str(generation)) print("Checkpointing generation "+str(generation)) self.save_counter=0 def get_action(self,state): # Choose an action action=random.choice((0,1,2,3)) if np.max(self.qtable[state]) > 0: #qactions=qtable[state] #options=np.flatnonzero(qactions == np.max(qactions)) # Since Q value might be equals for several actions #action = random.choice(options) action=np.argmax(self.qtable[state]) return(action) def get_random_action(self): return(random.choice((0,1,2,3))) # Perform learning width,height=50,30 perf=0 last_state=None last_action=None game=Snake(length=1,fps=500,startat=(random.randint(0,width-1),random.randint(0,height-1)),grid_width=width,grid_height=height) qtable=QTable("qtable.txt") while True: result=0 stuck=0 stuck_tolerance=1 stuck_count=0 state=qtable.get_state(game) while result >= 0: action=qtable.get_action(state) result=game.play3(action) new_state=qtable.get_state(game) # Compute reward and update stuck reward=0 if result==-1: reward=-10 stuck=0 stuck_count=0 elif result==1: reward=1 stuck=0 stuck_count=0 # Agent is stuck if stuck>=(game.grid_width*game.grid_height)/stuck_tolerance: stuck=0 stuck_count+=1 action=qtable.get_random_action() print("Stuck!") if stuck_count>2: stuck_count=0 game.new_game() break # Apply learning qtable.apply_bellman(state,action,new_state,reward) state=new_state stuck+=1 # Measurements score=game.last_score perf=max(perf,score) print("Game ended with "+str(score)+" best so far is "+str(perf))