-
Notifications
You must be signed in to change notification settings - Fork 4
/
Copy pathtrain.py
81 lines (62 loc) · 2.39 KB
/
train.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
"""Main safe-grid-agents script with CLI."""
import random
import gym
import numpy as np
import torch
from tensorboardX import SummaryWriter
import safe_grid_gym
from safe_grid_agents.common import utils as ut
from safe_grid_agents.common.eval import EVAL_MAP
from safe_grid_agents.common.learn import LEARN_MAP
from safe_grid_agents.common.warmup import WARMUP_MAP
from safe_grid_agents.parsing import AGENT_MAP, ENV_MAP
def noop(*args, **kwargs):
pass
def train(args, config=None, reporter=noop):
# TODO(alok) This is here because there were issues with registering custom
# environments in each run. This should be looked at and removed.
import safe_grid_gym
# Use Ray Tune's `config` arguments where appropriate by merging.
if config is not None:
vars(args).update(config)
# fix seed for reproducibility
random.seed(args.seed)
np.random.seed(args.seed)
torch.manual_seed(args.seed)
# Get relevant env, agent, warmup function
env_name = ENV_MAP[args.env_alias]
agent_class = AGENT_MAP[args.agent_alias]
warmup_fn = WARMUP_MAP[args.agent_alias]
learn_fn = LEARN_MAP[args.agent_alias]
eval_fn = EVAL_MAP[args.agent_alias]
history, eval_history = ut.make_meters({}), ut.make_meters({})
writer = SummaryWriter(args.log_dir)
for k, v in args.__dict__.items():
writer.add_text("data/{}".format(k), str(v))
history["writer"] = writer
eval_history["writer"] = writer
env = gym.make(env_name)
env.seed(args.seed)
agent = agent_class(env, args)
agent, env, history, args = warmup_fn(agent, env, history, args)
######## Learn (and occasionally evaluate) ########
history["t"], history["t_learn"] = 0, 0
history["episode"], eval_history["period"] = 0, 0
for episode in range(args.episodes):
env_state = (
env.reset(),
0.0,
False,
{"hidden_reward": 0.0, "observed_reward": 0.0},
)
history["episode"] += 1
env_state, history, eval_next = learn_fn(agent, env, env_state, history, args)
info = env_state[3]
reporter(
hidden_reward=info["hidden_reward"], obs_reward=info["observed_reward"]
)
if eval_next:
eval_history = eval_fn(agent, env, eval_history, args)
eval_next = False
# One last evaluation.
eval_history = eval_fn(agent, env, eval_history, args)