haosulab · StoneT2000 · Nov 19, 2024 · Oct 21, 2024 · Oct 22, 2024 · Oct 22, 2024
diff --git a/examples/baselines/ppo/ppo.py b/examples/baselines/ppo/ppo.py
@@ -468,4 +468,4 @@ def clip_action(action: torch.Tensor):
             print(f"model saved to {model_path}")
         logger.close()
     envs.close()
-    eval_envs.close()
+    eval_envs.close()
diff --git a/mani_skill/envs/tasks/tabletop/__init__.py b/mani_skill/envs/tasks/tabletop/__init__.py
@@ -14,4 +14,5 @@
 from .poke_cube import PokeCubeEnv
 from .place_sphere import PlaceSphereEnv
 from .roll_ball import RollBallEnv
-from .push_t import PushTEnv
+from .push_t import PushTEnv
+from .pull_cube_tool import PullCubeToolEnv
diff --git a/mani_skill/envs/tasks/tabletop/pull_cube.py b/mani_skill/envs/tasks/tabletop/pull_cube.py
@@ -135,4 +135,4 @@ def compute_dense_reward(self, obs: Any, action: Array, info: Dict):
 
     def compute_normalized_dense_reward(self, obs: Any, action: Array, info: Dict):
         max_reward = 3.0
-        return self.compute_dense_reward(obs=obs, action=action, info=info) / max_reward
+        return self.compute_dense_reward(obs=obs, action=action, info=info) / max_reward
diff --git a/mani_skill/envs/tasks/tabletop/pull_cube_tool.py b/mani_skill/envs/tasks/tabletop/pull_cube_tool.py
@@ -0,0 +1,268 @@
+from typing import Any, Dict, Union
+import numpy as np
+import torch
+import sapien
+from mani_skill.agents.robots import Fetch, Panda
+from mani_skill.envs.sapien_env import BaseEnv
+from mani_skill.envs.utils import randomization
+from mani_skill.sensors.camera import CameraConfig
+from mani_skill.utils import sapien_utils
+from mani_skill.utils.building import actors
+from mani_skill.utils.registration import register_env
+from mani_skill.utils.scene_builder.table import TableSceneBuilder
+from mani_skill.utils.structs import Pose
+from mani_skill.utils.structs.types import GPUMemoryConfig, SimConfig
+
+
+@register_env("PullCubeTool-v1", max_episode_steps=100)
+class PullCubeToolEnv(BaseEnv):
+    """
+    Task Description
+    -----------------
+    Given an L-shaped tool that is within the reach of the robot, leverage the
+    tool to pull a cube that is out of it's reach
+
+    Randomizations
+    ---------------
+    - The cube's position (x,y) is randomized on top of a table in the region "<out of manipulator
+    reach, but within reach of tool>". It is placed flat on the table
+    - The target goal region is the region on top of the table marked by "<within reach of arm>"
+
+    Success Conditions
+    -----------------
+    - The cube's xy position is within the goal region of the arm's base (marked by reachability)
+    """
+
+    SUPPORTED_ROBOTS = ["panda", "fetch"]
+    SUPPORTED_REWARD_MODES = ("normalized_dense", "dense", "sparse", "none")
+    agent: Union[Panda, Fetch]
+
+    goal_radius = 0.3
+    cube_half_size = 0.02
+    handle_length = 0.2
+    hook_length = 0.05
+    width = 0.05
+    height = 0.05
+    cube_size = 0.02
+    arm_reach = 0.35
+
+    def __init__(self, *args, robot_uids="panda", robot_init_qpos_noise=0.02, **kwargs):
+        self.robot_init_qpos_noise = robot_init_qpos_noise
+        super().__init__(*args, robot_uids=robot_uids, **kwargs)
+
+    @property
+    def _default_sim_config(self):
+        return SimConfig(
+            gpu_memory_config=GPUMemoryConfig(
+                found_lost_pairs_capacity=2**25, max_rigid_patch_count=2**18
+            )
+        )
+
+    @property
+    def _default_sensor_configs(self):
+        pose = sapien_utils.look_at(eye=[0.3, 0, 0.5], target=[-0.1, 0, 0.1])
+        return [
+            CameraConfig(
+                "base_camera",
+                pose=pose,
+                width=128,
+                height=128,
+                fov=np.pi / 2,
+                near=0.01,
+                far=100,
+            )
+        ]
+
+    @property
+    def _default_human_render_camera_configs(self):
+        pose = sapien_utils.look_at([0.6, 0.7, 0.6], [0.0, 0.0, 0.35])
+        return [
+            CameraConfig(
+                "render_camera",
+                pose=pose,
+                width=512,
+                height=512,
+                fov=1,
+                near=0.01,
+                far=100,
+            )
+        ]
+
+    def _build_l_shaped_tool(self, handle_length, hook_length, width, height):
+        builder = self.scene.create_actor_builder()
+
+        mat = sapien.render.RenderMaterial()
+        mat.set_base_color([1, 0, 0, 1])
+        mat.metallic = 1.0
+        mat.roughness = 0.0
+        mat.specular = 1.0
+
+        builder.add_box_collision(sapien.Pose([handle_length / 2, 0, 0]),[handle_length / 2, width / 2, height / 2]
+                                  , density= 500)
+        builder.add_box_visual(
+            sapien.Pose([handle_length / 2, 0, 0]),
+            [handle_length / 2, width / 2, height / 2],
+            material=mat,
+        )
+
+        builder.add_box_collision(
+            sapien.Pose([handle_length - hook_length / 2, width, 0]),
+            [hook_length / 2, width , height / 2],
+        )
+        builder.add_box_visual(
+            sapien.Pose([handle_length - hook_length / 2, width, 0]),
+            [hook_length / 2, width, height / 2],
+            material=mat,
+        )
+
+        return builder.build(name="l_shape_tool")
+
+    def _load_scene(self, options: dict):
+        self.scene_builder = TableSceneBuilder(
+            self, robot_init_qpos_noise=self.robot_init_qpos_noise
+        )
+        self.scene_builder.build()
+
+        self.cube = actors.build_cube(
+            self.scene,
+            half_size=self.cube_half_size,
+            color=np.array([12, 42, 160, 255]) / 255,
+            name="cube",
+            body_type="dynamic",
+        )
+
+        self.l_shape_tool = self._build_l_shaped_tool(
+            handle_length=self.handle_length,
+            hook_length=self.hook_length,
+            width=self.width,
+            height=self.height,
+        )
+
+
+    def _initialize_episode(self, env_idx: torch.Tensor, options: dict):
+        with torch.device(self.device):
+            b = len(env_idx)
+            self.scene_builder.initialize(env_idx)
+
+            tool_xyz = torch.zeros((b, 3), device=self.device)
+            tool_xyz[..., :2] = - torch.rand((b, 2), device=self.device) * 0.2 - 0.1
+            tool_xyz[..., 2] = self.height / 2 
+            tool_q = torch.tensor([1, 0, 0, 0], device=self.device).expand(b, 4)
+
+            tool_pose = Pose.create_from_pq(p=tool_xyz, q=tool_q)
+            self.l_shape_tool.set_pose(tool_pose)
+
+            cube_xyz = torch.zeros((b, 3), device=self.device)
+            cube_xyz[..., 0] = self.arm_reach + torch.rand(b, device=self.device) * (
+                self.handle_length
+            ) - 0.3
+            cube_xyz[..., 1] = torch.rand(b, device=self.device) * 0.3 - 0.25
+            cube_xyz[..., 2] = self.cube_size / 2 + 0.015
+
+            cube_q = randomization.random_quaternions(
+                b,
+                lock_x=True,
+                lock_y=True,
+                lock_z=False,
+                bounds=(-np.pi / 6, np.pi / 6),
+                device=self.device,
+            )
+
+            cube_pose = Pose.create_from_pq(p=cube_xyz, q=cube_q)
+            self.cube.set_pose(cube_pose)
+
+    def _get_obs_extra(self, info: Dict):
+        obs = dict(
+            tcp_pose=self.agent.tcp.pose.raw_pose,
+        )
+
+        if self._obs_mode in ["state", "state_dict"]:
+            obs.update(
+                cube_pose=self.cube.pose.raw_pose,
+                tool_pose=self.l_shape_tool.pose.raw_pose,
+            )
+
+        return obs
+
+    def evaluate(self):
+        cube_pos = self.cube.pose.p
+
+        robot_base_pos = self.agent.robot.get_links()[0].pose.p
+
+        cube_to_base_dist = torch.linalg.norm(cube_pos[:, :2] - robot_base_pos[:, :2], dim=1)
+
+        # Success condition - cube is pulled close enough 
+        cube_pulled_close = cube_to_base_dist < 0.6 #         
+
+        workspace_center = robot_base_pos.clone()
+        workspace_center[:, 0] += self.arm_reach * 0.1  
+        cube_to_workspace_dist = torch.linalg.norm(cube_pos - workspace_center, dim=1)
+        progress = 1 - torch.tanh(3.0 * cube_to_workspace_dist)
+
+        return {
+            "success": cube_pulled_close,
+            "success_once": cube_pulled_close,
+            "success_at_end": cube_pulled_close,
+            "cube_progress": progress.mean(),
+            "cube_distance": cube_to_workspace_dist.mean(),
+            "reward": self.compute_normalized_dense_reward(None, None, {"success": cube_pulled_close}),
+        }
+
+    def compute_dense_reward(self, obs: Any, action: torch.Tensor, info: Dict):
+
+        tcp_pos = self.agent.tcp.pose.p
+        cube_pos = self.cube.pose.p
+        tool_pos = self.l_shape_tool.pose.p
+        robot_base_pos = self.agent.robot.get_links()[0].pose.p
+
+        # Stage 1: Reach and grasp tool
+        tool_grasp_pos = tool_pos + torch.tensor([0.02, 0, 0], device=self.device)
+        tcp_to_tool_dist = torch.linalg.norm(tcp_pos - tool_grasp_pos, dim=1)
+        reaching_reward = 2.0 * (1 - torch.tanh(5.0 * tcp_to_tool_dist))
+
+        # Add specific grasping reward
+        is_grasping = self.agent.is_grasping(self.l_shape_tool, max_angle=20)  
+        grasping_reward = 2.0 * is_grasping
+        tool_reached = tcp_to_tool_dist < 0.01
+
+        # Stage 2: Position tool behind cube
+        ideal_hook_pos = cube_pos + torch.tensor(
+            [-(self.hook_length + self.cube_half_size), -0.067, 0], 
+            device=self.device
+        )
+        tool_positioning_dist = torch.linalg.norm(tool_pos - ideal_hook_pos, dim=1)
+        positioning_reward = 1.5 * (1 - torch.tanh(3.0 * tool_positioning_dist))
+        tool_positioned = tool_positioning_dist < 0.05
+
+        # Stage 3: Pull cube to workspace
+        workspace_target = robot_base_pos + torch.tensor([0.05, 0, 0], device=self.device)
+        cube_to_workspace_dist = torch.linalg.norm(cube_pos - workspace_target, dim=1)
+        initial_dist = torch.linalg.norm(
+            torch.tensor([self.arm_reach + 0.1, 0, self.cube_size/2], device=self.device) - workspace_target, 
+            dim=1
+        )
+        pulling_progress = (initial_dist - cube_to_workspace_dist) / initial_dist
+        pulling_reward = 3.0 * pulling_progress * tool_positioned
+
+        # Combine rewards with staging and grasping dependency
+        reward = reaching_reward + grasping_reward
+        reward += positioning_reward * is_grasping  
+        reward += pulling_reward * is_grasping  
+
+        # Penalties
+        cube_pushed_away = cube_pos[:, 0] > (self.arm_reach + 0.15)
+        reward[cube_pushed_away] -= 2.0
+
+        # Success bonus
+        if "success" in info:
+            reward[info["success"]] += 5.0
+
+        return reward
+
+    def compute_normalized_dense_reward(self, obs: Any, action: torch.Tensor, info: Dict):
+        """
+        Normalizes the dense reward by the maximum possible reward (success bonus)
+        """
+        max_reward = 5.0  # Maximum possible reward from success bonus
+        dense_reward = self.compute_dense_reward(obs=obs, action=action, info=info)
+        return dense_reward / max_reward
diff --git a/mani_skill/examples/motionplanning/panda/motionplanner.py b/mani_skill/examples/motionplanning/panda/motionplanner.py
@@ -170,8 +170,8 @@ def open_gripper(self):
                 self.base_env.render_human()
         return obs, reward, terminated, truncated, info
 
-    def close_gripper(self, t=6):
-        self.gripper_state = CLOSED
+    def close_gripper(self, t=6, gripper_state = CLOSED):
+        self.gripper_state = gripper_state
         qpos = self.robot.get_qpos()[0, :-2].cpu().numpy()
         for i in range(t):
             if self.control_mode == "pd_joint_pos":

diff --git a/mani_skill/examples/motionplanning/panda/run.py b/mani_skill/examples/motionplanning/panda/run.py
@@ -9,13 +9,17 @@
 import os.path as osp
 from mani_skill.utils.wrappers.record import RecordEpisode
 from mani_skill.trajectory.merge_trajectory import merge_trajectories
-from mani_skill.examples.motionplanning.panda.solutions import solvePushCube, solvePickCube, solveStackCube, solvePegInsertionSide, solvePlugCharger
+from mani_skill.examples.motionplanning.panda.solutions import solvePushCube, solvePickCube, solveStackCube, solvePegInsertionSide, solvePlugCharger, solvePullCubeTool, solveLiftPegUpright, solvePullCube
 MP_SOLUTIONS = {
     "PickCube-v1": solvePickCube,
     "StackCube-v1": solveStackCube,
     "PegInsertionSide-v1": solvePegInsertionSide,
     "PlugCharger-v1": solvePlugCharger,
     "PushCube-v1": solvePushCube,
+    "PullCubeTool-v1": solvePullCubeTool,
+    "LiftPegUpright-v1": solveLiftPegUpright,
+    "PullCube-v1": solvePullCube
+
 }
 def parse_args(args=None):
     parser = argparse.ArgumentParser()

diff --git a/mani_skill/examples/motionplanning/panda/solutions/__init__.py b/mani_skill/examples/motionplanning/panda/solutions/__init__.py
@@ -3,3 +3,6 @@
 from .peg_insertion_side import solve as solvePegInsertionSide
 from .plug_charger import solve as solvePlugCharger
 from .push_cube import solve as solvePushCube
+from .pull_cube_tool import solve as solvePullCubeTool
+from .lift_peg_upright import solve as solveLiftPegUpright
+from .pull_cube import solve as solvePullCube