Minor changes
This commit is contained in:
parent
85a180809d
commit
5b3efc4a21
3 changed files with 47 additions and 8211 deletions
56
qlearning.py
56
qlearning.py
|
@ -67,28 +67,41 @@ class QTable:
|
||||||
obstacle_left=(left in game.snake or self.isWall(left, game))
|
obstacle_left=(left in game.snake or self.isWall(left, game))
|
||||||
|
|
||||||
tail_in_front=0
|
tail_in_front=0
|
||||||
if game.direction == 3:
|
if snake_go_right:
|
||||||
for x in range(h[0],game.grid_width):
|
for x in range(h[0],game.grid_width):
|
||||||
if (x,h[1]) in game.snake[1:]:
|
if (x,h[1]) in game.snake[1:]:
|
||||||
tail_in_front=1
|
tail_in_front=1
|
||||||
break
|
break
|
||||||
elif game.direction == 9:
|
elif snake_go_left:
|
||||||
for x in range(0,h[0]):
|
for x in range(0,h[0]):
|
||||||
if (x,h[1]) in game.snake[1:]:
|
if (x,h[1]) in game.snake[1:]:
|
||||||
tail_in_front=1
|
tail_in_front=1
|
||||||
break
|
break
|
||||||
elif game.direction == 12:
|
elif snake_go_up:
|
||||||
for y in range(0,h[1]):
|
for y in range(0,h[1]):
|
||||||
if (h[0],y) in game.snake[1:]:
|
if (h[0],y) in game.snake[1:]:
|
||||||
tail_in_front=1
|
tail_in_front=1
|
||||||
break
|
break
|
||||||
elif game.direction == 6:
|
elif snake_go_down:
|
||||||
for y in range(h[1],game.grid_height):
|
for y in range(h[1],game.grid_height):
|
||||||
if (h[0],y) in game.snake[1:]:
|
if (h[0],y) in game.snake[1:]:
|
||||||
tail_in_front=1
|
tail_in_front=1
|
||||||
break
|
break
|
||||||
# This come from me I do not now if it is the best way to identify a state
|
# This come from me I do not now if it is the best way to identify a state
|
||||||
state=2**12*tail_in_front+2**11*snake_go_up+2**10*snake_go_right+2**9*snake_go_down+2**8*snake_go_left+2**7*apple_up+2**6*apple_right+2**5*apple_down+2**4*apple_left+2**3*obstacle_up+2**2*obstacle_right+2**1*obstacle_down+obstacle_left
|
state=\
|
||||||
|
2**12*tail_in_front+\
|
||||||
|
2**11*snake_go_up+\
|
||||||
|
2**10*snake_go_right+\
|
||||||
|
2**9*snake_go_down+\
|
||||||
|
2**8*snake_go_left+\
|
||||||
|
2**7*apple_up+\
|
||||||
|
2**6*apple_right+\
|
||||||
|
2**5*apple_down+\
|
||||||
|
2**4*apple_left+\
|
||||||
|
2**3*obstacle_up+\
|
||||||
|
2**2*obstacle_right+\
|
||||||
|
2**1*obstacle_down+\
|
||||||
|
obstacle_left
|
||||||
return(state)
|
return(state)
|
||||||
|
|
||||||
def apply_bellman(self,state,action,new_state,reward):
|
def apply_bellman(self,state,action,new_state,reward):
|
||||||
|
@ -123,19 +136,32 @@ qtable=QTable("qtable.txt")
|
||||||
|
|
||||||
for i in range(0,10000):
|
for i in range(0,10000):
|
||||||
result=0
|
result=0
|
||||||
|
stuck=0
|
||||||
|
state=qtable.get_state(game)
|
||||||
while result >= 0:
|
while result >= 0:
|
||||||
state=qtable.get_state(game)
|
|
||||||
action=qtable.get_action(state)
|
action=qtable.get_action(state)
|
||||||
result=game.play3(action)
|
result=game.play3(action)
|
||||||
if last_state!=None:
|
new_state=qtable.get_state(game)
|
||||||
reward=0
|
|
||||||
if result==-1:
|
# Agent is stuck
|
||||||
reward=-10
|
if stuck>=game.grid_width*game.grid_height:
|
||||||
elif result==1:
|
game.new_game()
|
||||||
reward=1
|
break
|
||||||
qtable.apply_bellman(last_state,last_action,state,reward)
|
|
||||||
last_state=state
|
# Compute reward and update stuck
|
||||||
last_action=action
|
reward=0
|
||||||
|
if result==-1:
|
||||||
|
reward=-10
|
||||||
|
stuck=0
|
||||||
|
elif result==1:
|
||||||
|
reward=1
|
||||||
|
stuck=0
|
||||||
|
|
||||||
|
# Apply learning
|
||||||
|
qtable.apply_bellman(state,action,new_state,reward)
|
||||||
|
state=new_state
|
||||||
|
stuck+=1
|
||||||
|
|
||||||
# Measurements
|
# Measurements
|
||||||
score=game.last_score
|
score=game.last_score
|
||||||
perf=max(perf,score)
|
perf=max(perf,score)
|
||||||
|
|
8192
qtable.txt
8192
qtable.txt
File diff suppressed because it is too large
Load diff
10
snake.py
10
snake.py
|
@ -150,6 +150,11 @@ class Snake:
|
||||||
# Refresh screen
|
# Refresh screen
|
||||||
self.draw()
|
self.draw()
|
||||||
self.clock.tick(self.fps)
|
self.clock.tick(self.fps)
|
||||||
|
# Ensure we not quit
|
||||||
|
for event in pygame.event.get():
|
||||||
|
if event.type == pygame.QUIT:
|
||||||
|
pygame.quit()
|
||||||
|
sys.exit()
|
||||||
return(code)
|
return(code)
|
||||||
|
|
||||||
def play2(self,direction):
|
def play2(self,direction):
|
||||||
|
@ -185,10 +190,7 @@ class Snake:
|
||||||
while True:
|
while True:
|
||||||
# Check inputs
|
# Check inputs
|
||||||
for event in pygame.event.get():
|
for event in pygame.event.get():
|
||||||
if event.type == pygame.QUIT:
|
if event.type == pygame.KEYDOWN:
|
||||||
pygame.quit()
|
|
||||||
sys.exit()
|
|
||||||
elif event.type == pygame.KEYDOWN:
|
|
||||||
if event.key == pygame.K_LEFT and self.direction != 3:
|
if event.key == pygame.K_LEFT and self.direction != 3:
|
||||||
self.direction=9
|
self.direction=9
|
||||||
break
|
break
|
||||||
|
|
Loading…
Add table
Reference in a new issue