Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add Anymal-D torchrl cfg #1180

Open
wants to merge 5 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from 4 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
9 changes: 9 additions & 0 deletions source/extensions/omni.isaac.lab/docs/CHANGELOG.rst
Original file line number Diff line number Diff line change
@@ -1,6 +1,15 @@
Changelog
---------

0.24.20 (2024-10-07)
~~~~~~~~~~~~~~~~~~~~

Added
^^^^^

* Added torchrl ppo training configuration to Anymal-D velocity environment


0.24.19 (2024-10-05)
~~~~~~~~~~~~~~~~~~~~

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@
"env_cfg_entry_point": flat_env_cfg.AnymalDFlatEnvCfg,
"rsl_rl_cfg_entry_point": f"{agents.__name__}.rsl_rl_ppo_cfg:AnymalDFlatPPORunnerCfg",
"skrl_cfg_entry_point": f"{agents.__name__}:skrl_flat_ppo_cfg.yaml",
"torchrl_cfg_entry_point": f"{agents.__name__}.torchrl_ppo_cfg:AnymalDFlatPPORunnerCfg",
},
)

Expand All @@ -30,6 +31,7 @@
"env_cfg_entry_point": flat_env_cfg.AnymalDFlatEnvCfg_PLAY,
"rsl_rl_cfg_entry_point": f"{agents.__name__}.rsl_rl_ppo_cfg:AnymalDFlatPPORunnerCfg",
"skrl_cfg_entry_point": f"{agents.__name__}:skrl_flat_ppo_cfg.yaml",
"torchrl_cfg_entry_point": f"{agents.__name__}.torchrl_ppo_cfg:AnymalDFlatPPORunnerCfg",
},
)

Expand All @@ -41,6 +43,7 @@
"env_cfg_entry_point": rough_env_cfg.AnymalDRoughEnvCfg,
"rsl_rl_cfg_entry_point": f"{agents.__name__}.rsl_rl_ppo_cfg:AnymalDRoughPPORunnerCfg",
"skrl_cfg_entry_point": f"{agents.__name__}:skrl_rough_ppo_cfg.yaml",
"torchrl_cfg_entry_point": f"{agents.__name__}.torchrl_ppo_cfg:AnymalDRoughPPORunnerCfg",
},
)

Expand All @@ -51,6 +54,6 @@
kwargs={
"env_cfg_entry_point": rough_env_cfg.AnymalDRoughEnvCfg_PLAY,
"rsl_rl_cfg_entry_point": f"{agents.__name__}.rsl_rl_ppo_cfg:AnymalDRoughPPORunnerCfg",
"skrl_cfg_entry_point": f"{agents.__name__}:skrl_rough_ppo_cfg.yaml",
Toni-SM marked this conversation as resolved.
Show resolved Hide resolved
"torchrl_cfg_entry_point": f"{agents.__name__}.torchrl_ppo_cfg:AnymalDRoughPPORunnerCfg",
},
)
Original file line number Diff line number Diff line change
@@ -0,0 +1,188 @@
# Copyright (c) 2022-2024, The Isaac Lab Project Developers.
# All rights reserved.
#
# SPDX-License-Identifier: BSD-3-Clause

import torch.nn as nn
from dataclasses import MISSING

from omni.isaac.lab.utils import configclass

from omni.isaac.lab_tasks.utils.wrappers.torchrl.torchrl_ppo_runner_cfg import (
ClipPPOLossCfg,
CollectorCfg,
OnPolicyPPORunnerCfg,
ProbabilisticActorCfg,
ValueOperatorCfg,
)


class AnymalDActorNN(nn.Module):
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Question: How having the model manually defined in the agent config will support changing it from CLI (e.g.: using hydra)?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I suppose we can add an argument to the torchrl CLI args to specify which model definition method the user would like to use if we want to support both methods?

def __init__(self):
super().__init__()
self.model = nn.Sequential(
nn.Linear(in_features=48, out_features=512, bias=True),
nn.ELU(alpha=1.0),
nn.Linear(in_features=512, out_features=256, bias=True),
nn.ELU(alpha=1.0),
nn.Linear(in_features=256, out_features=128, bias=True),
nn.ELU(alpha=1.0),
nn.Linear(in_features=128, out_features=12 * 2, bias=True),
)

def forward(self, x):
return self.model(x)


class AnymalDCriticNN(nn.Module):
def __init__(self):
super().__init__()
self.model = nn.Sequential(
nn.Linear(in_features=48, out_features=512, bias=True),
nn.ELU(alpha=1.0),
nn.Linear(in_features=512, out_features=256, bias=True),
nn.ELU(alpha=1.0),
nn.Linear(in_features=256, out_features=128, bias=True),
nn.ELU(alpha=1.0),
nn.Linear(in_features=128, out_features=1, bias=True),
)

def forward(self, x):
return self.model(x)


@configclass
class AnymalDActorModule(ProbabilisticActorCfg):

actor_network = AnymalDActorNN

init_noise_std = 1.0

in_keys = ["policy"]

out_keys: list[str] = ["loc", "scale"]


@configclass
class AnymalDCriticModule(ValueOperatorCfg):

critic_network = AnymalDCriticNN

in_keys = ["policy"]

out_keys = ["state_value"]


"""
Collector Module Definition
"""


@configclass
class AnymalDCollectorModule(CollectorCfg):

actor_network = AnymalDActorModule()

split_trajs = False


"""
Loss Module Definition
"""


@configclass
class AnymalDPPOLossModule(ClipPPOLossCfg):

actor_network = AnymalDActorModule()

value_network = AnymalDCriticModule()

value_key = "state_value"

desired_kl = 0.0012

beta = 1.0

decrement = 0.5

increment = 2.0

value_loss_coef = 0.5

clip_param = 0.2

entropy_coef = 0.02

entropy_bonus = True

loss_critic_type = "l2"

normalize_advantage = True

learning_rate = 1e-3

gamma = 0.99

lam = 0.95

max_grad_norm = 1.0


"""
Trainer Module Definition
"""


@configclass
class AnymalDPPORunnerCfg(OnPolicyPPORunnerCfg):

loss_module = AnymalDPPOLossModule()

collector_module = AnymalDCollectorModule()

seed = 42

num_steps_per_env = 24

num_epochs = 5

num_mini_batches = 4

lr_schedule = "adaptive"

max_iterations = 25000

save_interval = 50

save_trainer_interval = 100

experiment_name = MISSING

wandb_project = MISSING

logger = "wandb"


@configclass
class AnymalDFlatPPORunnerCfg(AnymalDPPORunnerCfg):
def __post_init__(self):
"""Post initialization."""

# change experiment name
self.experiment_name = "anymal_d_flat"

# change wandb project
self.wandb_project = "anymal_d_flat"


@configclass
class AnymalDRoughPPORunnerCfg(AnymalDPPORunnerCfg):
def __post_init__(self):
"""Post initialization."""

# change experiment name
self.experiment_name = "anymal_d_rough"

# change wandb project
self.wandb_project = "anymal_d_rough"