forked from MerlinRoudier/Projet_tech_week01
-
Notifications
You must be signed in to change notification settings - Fork 0
/
main.py
62 lines (51 loc) · 1.41 KB
/
main.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
from environnement import Env
import agent
import torch
def set_obstacles(s,size):
o=[]
t=s.split(' ')
for i in range(size):
for j in range(size):
if t[size*i+j]=='O':
o+=[(i,j)]
return o
def decisionMapping(agent: agent.LRLAgent, size: int, goalpos : tuple, startpos: tuple) -> None:
d = {
0: "up",
1: "right",
2: "down",
3: "left"
}
total = list()
for i in range(size):
tmp = list()
for j in range(size):
features = torch.tensor([i/goal_pos[0], j/goal_pos[1], i+j/sum(goal_pos)])
action = d[int(torch.argmax(torch.matmul(agent.weights, features)))]
tmp.append(action)
total.append(tmp)
for i in range(len(total)):
for j in range(len(total[0])):
print(total[i][j], end=" ")
print("")
o='\
* * * * * O * * * * \
O O O O * * * O * * \
* O * O * O O O O * \
* * * * * * * O * * \
O O O O * O O O * * \
* * * * * O * * * * \
* O O * O O * O * * \
* O * * * * * O O * \
* O O O O O * O * * \
* * * * * * * O O *'
#o=set_obstacles(o,10)
goal_pos=(4,4)
env=Env(size=5, rendering='visual', goal_pos=goal_pos)
env.add_agent(typeAgent='lrl', alpha=.30, gamma=.1, epsilon=.3)
#env.gen_maze()
#decisionMapping(env.agents[0], 5, (4,4), (0,0))
env.train(nb_i=700)
#env.agents[0].save_q_table()
env.start()
#decisionMapping(env.agents[0], 5, (4,4), (0,0))