snakeq/qlearning.py
2022-11-01 23:40:05 +01:00

147 lines
No EOL
3.9 KiB
Python
Executable file

#!/usr/bin/env python
import sys,random,os
import numpy as np
# Import snake game
from snake import Snake
# Setup QTable
# Boolean features:
# Snake go up?
# Snake go right?
# Snake go down?
# Snake go left?
# Apple at up?
# Apple at right?
# Apple at down?
# Apple at left?
# Obstacle at up?
# Obstacle at right?
# Obstacle at down?
# Obstacle at left?
# Queue in front?
##### Totally 13 boolean features so 2^13=8192 states
##### Totally 4 actions for the AI (up, right,down,left)
##### Totally 4*2^13 thus 32768 table entries
##### Reward +1 when eat an apple
##### Reward -10 when hit obstacle
qtable=np.zeros((2**13, 4))
game=Snake(length=1,fps=200,startat=(10,10))
def isWall(h,game):
if h[0]<0 or h[1]<0 or h[0] >= game.grid_width or h[1] >= game.grid_height:
return(True)
return(False)
last_state=None
last_action=None
attempt=0
def event_handler(game,event):
global last_state,last_action,attempt
h=game.snake[0]
left=(h[0]-1,h[1])
right=(h[0]+1,h[1])
up=(h[0],h[1]-1)
down=(h[0],h[1]+1)
a=game.apple
snake_go_up=(game.direction==12)
snake_go_right=(game.direction==3)
snake_go_down=(game.direction==6)
snake_go_left=(game.direction==9)
apple_up=(a[1]<h[1])
apple_right=(a[0]>h[0])
apple_down=(a[1]>h[1])
apple_left=(a[0]<h[0])
obstacle_up=(up in game.snake or isWall(up, game))
obstacle_right=(right in game.snake or isWall(right, game))
obstacle_down=(down in game.snake or isWall(down, game))
obstacle_left=(left in game.snake or isWall(left, game))
queue_in_front=0
if game.direction == 3:
for x in range(h[0],game.grid_width):
if (x,h[1]) in game.snake[1:]:
queue_in_front=1
break
elif game.direction == 9:
for x in range(0,h[0]):
if (x,h[1]) in game.snake[1:]:
queue_in_front=1
break
elif game.direction == 12:
for y in range(0,h[1]):
if (h[0],y) in game.snake[1:]:
queue_in_front=1
break
elif game.direction == 6:
for y in range(h[1],game.grid_height):
if (h[0],y) in game.snake[1:]:
queue_in_front=1
break
reward=0
if event==0:
attempt+=1
if event==-1:
reward=-10
attempt=0
elif event==1:
reward=5
attempt=0
# This come from me I do not now if it is the best way to identify a state
state=2**12*queue_in_front+2**11*snake_go_up+2**10*snake_go_right+2**9*snake_go_down+2**8*snake_go_left+2**7*apple_up+2**6*apple_right+2**5*apple_down+2**4*apple_left+2**3*obstacle_up+2**2*obstacle_right+2**1*obstacle_down+obstacle_left
# Choose an action
action=random.choice((0,1,2,3))
if np.max(qtable[state]) > 0:
#qactions=qtable[state]
#options=np.flatnonzero(qactions == np.max(qactions)) # Since Q value might be equals for several actions
#action = random.choice(options)
action=np.argmax(qtable[state])
# Avoid infinite loop
if attempt>game.grid_height*game.grid_width:
return(-1)
# Update current state Q
if last_state != None:
qtable[last_state,last_action]=qtable[last_state,last_action]+0.7*(reward+0.9*np.max(qtable[state])-qtable[last_state,last_action])
last_state=state
last_action=action
# Apply the action
snake_action=12
if action==1:
snake_action=3
elif action==2:
snake_action=6
elif action==3:
snake_action=9
game.direction=snake_action
return(0)
if os.path.exists("qtable.txt"):
qtable=np.loadtxt("qtable.txt")
perf=0
for i in range(0,10000):
last_state=None
last_action=None
score=game.run(event_handler=event_handler)
attempt=0
if i%10 == 0:
np.savetxt('qtable.txt',qtable)
perf=max(perf,score)
print("Game ended with "+str(score)+" best so far is "+str(perf))