diff --git a/configs/agents/rl/push_t/gym_config.json b/configs/agents/rl/push_t/gym_config.json new file mode 100644 index 00000000..7bb1216d --- /dev/null +++ b/configs/agents/rl/push_t/gym_config.json @@ -0,0 +1,193 @@ +{ + "id": "PushTRL", + "max_episodes": 5, + "env": { + "num_envs": 128, + "sim_steps_per_control": 4, + "events": { + "randomize_t": { + "func": "randomize_rigid_object_pose", + "mode": "reset", + "params": { + "entity_cfg": { + "uid": "t" + }, + "position_range": [ + [-0.2, -0.2, 0.0], + [0.2, 0.2, 0.0] + ], + "relative_position": true + } + }, + "randomize_goal": { + "func": "randomize_target_pose", + "mode": "reset", + "params": { + "position_range": [ + [-0.3, -0.3, 0.05], + [0.3, 0.3, 0.05] + ], + "relative_position": false, + "store_key": "goal_pose" + } + } + }, + "observations": { + "robot_qpos": { + "func": "normalize_robot_joint_data", + "mode": "modify", + "name": "robot/qpos", + "params": { + "joint_ids": [0, 1, 2, 3, 4, 5] + } + }, + "robot_ee_pos": { + "func": "get_robot_eef_pose", + "mode": "add", + "name": "robot/ee_pos", + "params": { + "part_name": "arm" + } + }, + "t_pos": { + "func": "get_rigid_object_pose", + "mode": "add", + "name": "object/t_pos", + "params": { + "entity_cfg": { + "uid": "t" + } + } + }, + "goal_pos": { + "func": "target_position", + "mode": "add", + "name": "object/goal_pos", + "params": { + "target_pose_key": "goal_pose" + } + } + }, + "rewards": { + "reaching_reward": { + "func": "reaching_behind_object", + "mode": "add", + "weight": 0.1, + "params": { + "object_cfg": { + "uid": "t" + }, + "target_pose_key": "goal_pose", + "behind_offset": 0.015, + "height_offset": 0.015, + "distance_scale": 5.0, + "part_name": "arm" + } + }, + "place_reward": { + "func": "incremental_distance_to_target", + "mode": "add", + "weight": 1.0, + "params": { + "source_entity_cfg": { + "uid": "t" + }, + "target_pose_key": "goal_pose", + "tanh_scale": 10.0, + "positive_weight": 2.0, + "negative_weight": 0.5, + "use_xy_only": true + } + }, + "action_penalty": { + "func": "action_smoothness_penalty", + "mode": "add", + "weight": 0.01, + "params": {} + }, + "success_bonus": { + "func": "success_reward", + "mode": "add", + "weight": 10.0, + "params": {} + } + }, + "extensions": { + "obs_mode": "state", + "episode_length": 100, + "joint_limits": 0.5, + "action_scale": 0.1, + "success_threshold": 0.1 + } + }, + "robot": { + "uid": "Manipulator", + "urdf_cfg": { + "components": [ + { + "component_type": "arm", + "urdf_path": "UniversalRobots/UR10/UR10.urdf" + }, + { + "component_type": "hand", + "urdf_path": "DH_PGI_140_80/DH_PGI_140_80.urdf" + } + ] + }, + "init_pos": [0.0, 0.0, 0.0], + "init_rot": [0.0, 0.0, 0.0], + "init_qpos": [0.0, -1.57, 1.57, -1.57, -1.57, 0.0, 0.04, 0.04], + "drive_pros": { + "drive_type": "force", + "stiffness": 100000.0, + "damping": 1000.0, + "max_velocity": 2.0, + "max_effort": 500.0 + }, + "solver_cfg": { + "arm": { + "class_type": "PytorchSolver", + "end_link_name": "ee_link", + "root_link_name": "base_link", + "tcp": [ + [1.0, 0.0, 0.0, 0.0], + [0.0, 1.0, 0.0, 0.0], + [0.0, 0.0, 1.0, 0.16], + [0.0, 0.0, 0.0, 1.0] + ] + } + }, + "control_parts": { + "arm": ["JOINT[1-6]"] + } + }, + "sensor": [], + "light": {}, + "background": [], + "rigid_object": [ + { + "uid": "t", + "shape": { + "shape_type": "Cube", + "size": [0.1, 0.1, 0.1] + }, + "body_type": "dynamic", + "init_pos": [-0.6, -0.4, 0.05], + "attrs": { + "mass": 10.0, + "static_friction": 3.0, + "dynamic_friction": 2.0, + "linear_damping": 2.0, + "angular_damping": 2.0, + "contact_offset": 0.003, + "rest_offset": 0.001, + "restitution": 0.1, + "max_depenetration_velocity": 10.0, + "max_linear_velocity": 1.0, + "max_angular_velocity": 1.0 + } + } + ], + "rigid_object_group": [], + "articulation": [] +} diff --git a/configs/agents/rl/push_t/train_config.json b/configs/agents/rl/push_t/train_config.json new file mode 100644 index 00000000..b7a101b1 --- /dev/null +++ b/configs/agents/rl/push_t/train_config.json @@ -0,0 +1,67 @@ +{ + "trainer": { + "exp_name": "push_t_ppo", + "gym_config": "configs/agents/rl/push_t/gym_config.json", + "seed": 42, + "device": "cuda:0", + "headless": false, + "enable_rt": false, + "gpu_id": 0, + "num_envs": 8, + "iterations": 1000, + "rollout_steps": 1024, + "eval_freq": 200, + "save_freq": 200, + "use_wandb": true, + "wandb_project_name": "embodychain-push_t", + "events": { + "eval": { + "record_camera": { + "func": "record_camera_data_async", + "mode": "interval", + "interval_step": 1, + "params": { + "name": "main_cam", + "resolution": [640, 480], + "eye": [-1.4, 1.4, 2.0], + "target": [0, 0, 0], + "up": [0, 0, 1], + "intrinsics": [600, 600, 320, 240], + "save_path": "./outputs/videos/eval" + } + } + } + } + }, + "policy": { + "name": "actor_critic", + "actor": { + "type": "mlp", + "network_cfg": { + "hidden_sizes": [256, 256], + "activation": "relu" + } + }, + "critic": { + "type": "mlp", + "network_cfg": { + "hidden_sizes": [256, 256], + "activation": "relu" + } + } + }, + "algorithm": { + "name": "ppo", + "cfg": { + "learning_rate": 0.0001, + "n_epochs": 10, + "batch_size": 8192, + "gamma": 0.99, + "gae_lambda": 0.95, + "clip_coef": 0.2, + "ent_coef": 0.01, + "vf_coef": 0.5, + "max_grad_norm": 0.5 + } + } +} diff --git a/embodichain/lab/gym/envs/tasks/rl/push_t.py b/embodichain/lab/gym/envs/tasks/rl/push_t.py new file mode 100644 index 00000000..8b2ba311 --- /dev/null +++ b/embodichain/lab/gym/envs/tasks/rl/push_t.py @@ -0,0 +1,149 @@ +# ---------------------------------------------------------------------------- +# Copyright (c) 2021-2025 DexForce Technology Co., Ltd. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ---------------------------------------------------------------------------- + +from typing import Dict, Any, Sequence + +import torch + +from embodichain.lab.gym.utils.registration import register_env +from embodichain.lab.gym.envs import EmbodiedEnv, EmbodiedEnvCfg +from embodichain.lab.sim.cfg import MarkerCfg +from embodichain.lab.sim.types import EnvObs, EnvAction + + +@register_env("PushTRL", max_episode_steps=50, override=True) +class PushTEnv(EmbodiedEnv): + """Push-T task. + + The task requires pushing a T-shaped block to a goal position on the tabletop. + Success is defined by the object being within a planar distance threshold. + """ + + def __init__(self, cfg=None, **kwargs): + if cfg is None: + cfg = EmbodiedEnvCfg() + + super().__init__(cfg, **kwargs) + + self.control_part_name = getattr(self, "control_part_name", "arm") + self.require_on_table = getattr(self, "require_on_table", True) + self.table_height = getattr(self, "table_height", 0.0) + + @property + def goal_pose(self) -> torch.Tensor | None: + """Get current goal poses (4x4 matrices) for all environments.""" + return getattr(self, "_goal_pose", None) + + def _draw_goal_marker(self) -> None: + """Draw axis marker at goal position for visualization.""" + if self.goal_pose is None: + return + + goal_poses = self.goal_pose.detach().cpu().numpy() + for arena_idx in range(self.cfg.num_envs): + marker_name = f"goal_marker_{arena_idx}" + self.sim.remove_marker(marker_name) + + marker_cfg = MarkerCfg( + name=marker_name, + marker_type="axis", + axis_xpos=[goal_poses[arena_idx]], + axis_size=0.003, + axis_len=0.02, + arena_index=arena_idx, + ) + self.sim.draw_marker(cfg=marker_cfg) + + def _initialize_episode( + self, env_ids: Sequence[int] | None = None, **kwargs + ) -> None: + super()._initialize_episode(env_ids=env_ids, **kwargs) + # self._draw_goal_marker() + + def _step_action(self, action: EnvAction) -> EnvAction: + scaled_action = action * self.action_scale + scaled_action = torch.clamp( + scaled_action, -self.joint_limits, self.joint_limits + ) + current_qpos = self.robot.body_data.qpos + target_qpos = current_qpos.clone() + target_qpos[:, :6] += scaled_action[:, :6] + self.robot.set_qpos(qpos=target_qpos) + return scaled_action + + def _get_eef_pos(self) -> torch.Tensor: + """Get end-effector position using FK.""" + if self.control_part_name: + try: + joint_ids = self.robot.get_joint_ids(self.control_part_name) + qpos = self.robot.get_qpos()[:, joint_ids] + ee_pose = self.robot.compute_fk( + name=self.control_part_name, qpos=qpos, to_matrix=True + ) + except (ValueError, KeyError, AttributeError): + qpos = self.robot.get_qpos() + ee_pose = self.robot.compute_fk(qpos=qpos, to_matrix=True) + else: + qpos = self.robot.get_qpos() + ee_pose = self.robot.compute_fk(qpos=qpos, to_matrix=True) + return ee_pose[:, :3, 3] + + def get_info(self, **kwargs) -> Dict[str, Any]: + cube = self.sim.get_rigid_object("cube") + cube_pos = cube.body_data.pose[:, :3] + ee_pos = self._get_eef_pos() + + if self.goal_pose is not None: + goal_pos = self.goal_pose[:, :3, 3] + xy_distance = torch.norm(cube_pos[:, :2] - goal_pos[:, :2], dim=1) + is_success = xy_distance < self.success_threshold + if self.require_on_table: + is_success = is_success & (cube_pos[:, 2] >= self.table_height - 1e-3) + else: + xy_distance = torch.zeros(self.cfg.num_envs, device=self.device) + is_success = torch.zeros( + self.cfg.num_envs, device=self.device, dtype=torch.bool + ) + + ee_to_cube = torch.norm(ee_pos - cube_pos, dim=1) + info = { + "success": is_success, + "fail": torch.zeros( + self.cfg.num_envs, device=self.device, dtype=torch.bool + ), + "elapsed_steps": self._elapsed_steps, + "goal_pose": self.goal_pose, + } + info["metrics"] = { + "distance_to_goal": xy_distance, + "eef_to_cube": ee_to_cube, + "cube_height": cube_pos[:, 2], + } + return info + + def check_truncated(self, obs: EnvObs, info: Dict[str, Any]) -> torch.Tensor: + is_timeout = self._elapsed_steps >= self.episode_length + cube = self.sim.get_rigid_object("cube") + cube_pos = cube.body_data.pose[:, :3] + is_fallen = cube_pos[:, 2] < -0.1 + return is_timeout | is_fallen + + def evaluate(self, **kwargs) -> Dict[str, Any]: + info = self.get_info(**kwargs) + return { + "success": info["success"][0].item(), + "distance_to_goal": info["metrics"]["distance_to_goal"], + }