-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathmaze_env.py
142 lines (122 loc) · 4.56 KB
/
maze_env.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
import numpy as np
import time
import sys
import tkinter as tk
import random
UNIT = 40 # pixels
MAZE_H = 7 # grid height
MAZE_W = 7 # grid width
class Maze(tk.Tk, object):
def __init__(self):
super(Maze, self).__init__()
self.action_space = ['u', 'd', 'l', 'r']
self.n_actions = len(self.action_space)
self.n_features = 2
self.title('maze')
self.geometry('{0}x{1}'.format(MAZE_H * UNIT, MAZE_H * UNIT))
self._build_maze()
def _build_maze(self):
self.canvas = tk.Canvas(self, bg='white',
height=MAZE_H * UNIT,
width=MAZE_W * UNIT)
# 画网格
for c in range(0, MAZE_W * UNIT, UNIT):
x0, y0, x1, y1 = c, 0, c, MAZE_H * UNIT
self.canvas.create_line(x0, y0, x1, y1)
for r in range(0, MAZE_H * UNIT, UNIT):
x0, y0, x1, y1 = 0, r, MAZE_H * UNIT, r
self.canvas.create_line(x0, y0, x1, y1)
# 定义中心点
origin = np.array([20, 20])
# 障碍物1
hell1_center = origin + np.array([UNIT * 4, UNIT])
self.hell1 = self.canvas.create_rectangle(
hell1_center[0] - 15, hell1_center[1] - 15,
hell1_center[0] + 15, hell1_center[1] + 15,
fill='black')
# 障碍物2
hell2_center = origin + np.array([UNIT*2, UNIT * 4])
self.hell2 = self.canvas.create_rectangle(
hell2_center[0] - 15, hell2_center[1] - 15,
hell2_center[0] + 15, hell2_center[1] + 15,
fill='black')
# 障碍物3
hell3_center = origin + np.array([UNIT * 3, UNIT * 4])
self.hell3 = self.canvas.create_rectangle(
hell3_center[0] - 15, hell3_center[1] - 15,
hell3_center[0] + 15, hell2_center[1] + 15,
fill='black')
# create oval
oval_center = origin + np.array([UNIT * 3,0])
self.oval = self.canvas.create_oval(
oval_center[0] - 15, oval_center[1] - 15,
oval_center[0] + 15, oval_center[1] + 15,
fill='yellow')
# create red rect
agent_center = origin + np.array([UNIT*3,UNIT*6])
self.rect = self.canvas.create_rectangle(
agent_center[0] - 15, agent_center[1] - 15,
agent_center[0] + 15, agent_center[1] + 15,
fill='red')
# pack all
self.canvas.pack()
def reset(self):
self.update()
time.sleep(0.1)
self.canvas.delete(self.rect)
agent = np.array([20, 20])
agent = agent + np.array([UNIT*3,UNIT*6])
self.rect = self.canvas.create_rectangle(
agent[0] - 15, agent[1] - 15,
agent[0] + 15, agent[1] + 15,
fill='red')
# return observation
return (np.array(self.canvas.coords(self.rect)[:2])-np.array(self.canvas.coords(self.oval)[:2]))/(MAZE_H*UNIT)
def step(self, action):
s = self.canvas.coords(self.rect)
base_action = np.array([0, 0])
if action == 0: # up
if s[1] > UNIT:
base_action[1] -= UNIT
reward = 1
elif action == 1: # down
if s[1] < (MAZE_H - 1) * UNIT:
base_action[1] += UNIT
elif action == 2: # right
if s[0] < (MAZE_W - 1) * UNIT:
base_action[0] += UNIT
elif action == 3: # left
if s[0] > UNIT:
base_action[0] -= UNIT
self.canvas.move(self.rect, base_action[0], base_action[1]) # move agent
next_coords = self.canvas.coords(self.rect) # next state
# reward function
if next_coords == self.canvas.coords(self.oval):
reward = 10
done = True
print("成功到达终点")
elif next_coords in [self.canvas.coords(self.hell1),self.canvas.coords(self.hell2),self.canvas.coords(self.hell3)]:
reward = -10
done = True
print("撞到障碍物游戏结束!!!")
else:
reward = 0
done = False
s_ = (np.array(next_coords[:2]) - np.array(self.canvas.coords(self.oval)[:2])) / (MAZE_H * UNIT)
return s_, reward, done
def render(self):
time.sleep(0.1)
self.update()
# def update():
# for t in range(10):
# s = env.reset()
# while True:
# env.render()
# a = random.randint(0,3)
# s, r, done = env.step(a)
# if done:
# break
# if __name__ == '__main__':
# env = Maze()
# env.after(100, update)
# env.mainloop()