From 1a1ec76897889cdb7a436229386a3ff50dab5e69 Mon Sep 17 00:00:00 2001 From: ustcscgyer Date: Tue, 10 Feb 2026 23:09:20 +0000 Subject: [PATCH 01/10] Add video recording to the job --- mimickit/engines/engine_builder.py | 4 +- mimickit/engines/isaac_lab_engine.py | 23 ++-- mimickit/envs/add_env.py | 4 +- mimickit/envs/amp_env.py | 4 +- mimickit/envs/ase_env.py | 4 +- mimickit/envs/char_dof_test_env.py | 4 +- mimickit/envs/char_env.py | 4 +- mimickit/envs/deepmimic_env.py | 4 +- mimickit/envs/env_builder.py | 24 +++-- mimickit/envs/sim_env.py | 15 ++- mimickit/envs/static_objects_env.py | 4 +- mimickit/envs/task_location_env.py | 4 +- mimickit/envs/task_steering_env.py | 4 +- mimickit/envs/view_motion_env.py | 4 +- mimickit/learning/base_agent.py | 11 ++ mimickit/run.py | 27 ++++- mimickit/util/display.py | 24 +++++ mimickit/util/video_recorder.py | 153 +++++++++++++++++++++++++++ requirements.txt | 1 + 19 files changed, 277 insertions(+), 45 deletions(-) create mode 100644 mimickit/util/display.py create mode 100644 mimickit/util/video_recorder.py diff --git a/mimickit/engines/engine_builder.py b/mimickit/engines/engine_builder.py index 20314b8..e19bb47 100644 --- a/mimickit/engines/engine_builder.py +++ b/mimickit/engines/engine_builder.py @@ -3,7 +3,7 @@ except: pass -def build_engine(config, num_envs, device, visualize): +def build_engine(config, num_envs, device, visualize, enable_cameras=False): eng_name = config["engine_name"] if (eng_name == "isaac_gym"): @@ -11,7 +11,7 @@ def build_engine(config, num_envs, device, visualize): engine = isaac_gym_engine.IsaacGymEngine(config, num_envs, device, visualize) elif (eng_name == "isaac_lab"): import engines.isaac_lab_engine as isaac_lab_engine - engine = isaac_lab_engine.IsaacLabEngine(config, num_envs, device, visualize) + engine = isaac_lab_engine.IsaacLabEngine(config, num_envs, device, visualize, enable_cameras=enable_cameras) elif (eng_name == "newton"): import engines.newton_engine as newton_engine engine = newton_engine.NewtonEngine(config, num_envs, device, visualize) diff --git a/mimickit/engines/isaac_lab_engine.py b/mimickit/engines/isaac_lab_engine.py index 612aab6..5fbdcf5 100644 --- a/mimickit/engines/isaac_lab_engine.py +++ b/mimickit/engines/isaac_lab_engine.py @@ -59,7 +59,7 @@ def is_valid_clone(self, other): class IsaacLabEngine(engine.Engine): - def __init__(self, config, num_envs, device, visualize): + def __init__(self, config, num_envs, device, visualize, enable_cameras=False): super().__init__() self._device = device @@ -72,7 +72,7 @@ def __init__(self, config, num_envs, device, visualize): self._sim_steps = int(sim_freq / control_freq) sim_timestep = 1.0 / sim_freq - self._create_simulator(sim_timestep, visualize) + self._create_simulator(sim_timestep, visualize, enable_cameras) self._env_spacing = config["env_spacing"] self._obj_cfgs = [] @@ -86,10 +86,12 @@ def __init__(self, config, num_envs, device, visualize): self._build_ground() self._env_offsets = self._compute_env_offsets(num_envs) + if (visualize or enable_cameras): + self._build_camera() + if (visualize): self._prev_frame_time = 0.0 self._build_lights() - self._build_camera() self._build_draw_interface() self._setup_keyboard() @@ -652,8 +654,17 @@ def _setup_keyboard(self): self._keyboard_callbacks = dict() return - def _create_simulator(self, sim_timestep, visualize): - self._app_launcher = AppLauncher({"headless": not visualize, "device": self._device}) + def _create_simulator(self, sim_timestep, visualize, enable_cameras=False): + # Headless rendering (enable_cameras without a display) requires a virtual display + if enable_cameras and not visualize: + from util.display import ensure_virtual_display + ensure_virtual_display() + + self._app_launcher = AppLauncher({ + "headless": not visualize, + "device": self._device, + "enable_cameras": enable_cameras or visualize, + }) import isaaclab.sim as sim_utils from isaacsim.core.utils.stage import get_current_stage @@ -727,7 +738,7 @@ def _post_sim_step(self): def _clear_forces(self): for obj in self._objs: - if (obj.has_external_wrench): + if (getattr(obj, 'has_external_wrentch', False)): forces = torch.zeros([1, 3], dtype=torch.float, device=self._device) torques = torch.zeros([1, 3], dtype=torch.float, device=self._device) obj.set_external_force_and_torque(forces=forces, torques=torques, diff --git a/mimickit/envs/add_env.py b/mimickit/envs/add_env.py index f2dbac4..3739378 100644 --- a/mimickit/envs/add_env.py +++ b/mimickit/envs/add_env.py @@ -5,9 +5,9 @@ import util.torch_util as torch_util class ADDEnv(amp_env.AMPEnv): - def __init__(self, env_config, engine_config, num_envs, device, visualize): + def __init__(self, env_config, engine_config, num_envs, device, visualize, enable_cameras=False): super().__init__(env_config=env_config, engine_config=engine_config, - num_envs=num_envs, device=device, visualize=visualize) + num_envs=num_envs, device=device, visualize=visualize, enable_cameras=enable_cameras) return def _build_disc_obs_buffers(self): diff --git a/mimickit/envs/amp_env.py b/mimickit/envs/amp_env.py index f7aff7f..35dcf1f 100644 --- a/mimickit/envs/amp_env.py +++ b/mimickit/envs/amp_env.py @@ -7,11 +7,11 @@ import util.torch_util as torch_util class AMPEnv(deepmimic_env.DeepMimicEnv): - def __init__(self, env_config, engine_config, num_envs, device, visualize): + def __init__(self, env_config, engine_config, num_envs, device, visualize, enable_cameras=False): self._num_disc_obs_steps = env_config["num_disc_obs_steps"] super().__init__(env_config=env_config, engine_config=engine_config, num_envs=num_envs, device=device, - visualize=visualize) + visualize=visualize, enable_cameras=enable_cameras) return def get_disc_obs_space(self): diff --git a/mimickit/envs/ase_env.py b/mimickit/envs/ase_env.py index 53df737..df61aad 100644 --- a/mimickit/envs/ase_env.py +++ b/mimickit/envs/ase_env.py @@ -4,11 +4,11 @@ import envs.char_env as char_env class ASEEnv(amp_env.AMPEnv): - def __init__(self, env_config, engine_config, num_envs, device, visualize): + def __init__(self, env_config, engine_config, num_envs, device, visualize, enable_cameras=False): self._default_reset_prob = env_config["default_reset_prob"] super().__init__(env_config=env_config, engine_config=engine_config, - num_envs=num_envs, device=device, visualize=visualize) + num_envs=num_envs, device=device, visualize=visualize, enable_cameras=enable_cameras) return def _reset_char(self, env_ids): diff --git a/mimickit/envs/char_dof_test_env.py b/mimickit/envs/char_dof_test_env.py index bd57a91..bf30c46 100644 --- a/mimickit/envs/char_dof_test_env.py +++ b/mimickit/envs/char_dof_test_env.py @@ -6,11 +6,11 @@ import engines.engine as engine class CharDofTestEnv(char_env.CharEnv): - def __init__(self, env_config, engine_config, num_envs, device, visualize): + def __init__(self, env_config, engine_config, num_envs, device, visualize, enable_cameras=False): self._time_per_dof = 4.0 super().__init__(env_config=env_config, engine_config=engine_config, - num_envs=num_envs, device=device, visualize=visualize) + num_envs=num_envs, device=device, visualize=visualize, enable_cameras=enable_cameras) self._episode_length = self._time_per_dof * self._pd_low.shape[0] return diff --git a/mimickit/envs/char_env.py b/mimickit/envs/char_env.py index d19f9de..ece39d1 100644 --- a/mimickit/envs/char_env.py +++ b/mimickit/envs/char_env.py @@ -15,13 +15,13 @@ import engines.engine as engine class CharEnv(sim_env.SimEnv): - def __init__(self, env_config, engine_config, num_envs, device, visualize): + def __init__(self, env_config, engine_config, num_envs, device, visualize, enable_cameras=False): self._global_obs = env_config["global_obs"] self._root_height_obs = env_config.get("root_height_obs", True) self._zero_center_action = env_config.get("zero_center_action", False) super().__init__(env_config=env_config, engine_config=engine_config, - num_envs=num_envs, device=device, visualize=visualize) + num_envs=num_envs, device=device, visualize=visualize, enable_cameras=enable_cameras) char_id = self._get_char_id() self._print_char_prop(0, char_id) diff --git a/mimickit/envs/deepmimic_env.py b/mimickit/envs/deepmimic_env.py index 255fca1..cdd8204 100644 --- a/mimickit/envs/deepmimic_env.py +++ b/mimickit/envs/deepmimic_env.py @@ -10,7 +10,7 @@ import util.torch_util as torch_util class DeepMimicEnv(char_env.CharEnv): - def __init__(self, env_config, engine_config, num_envs, device, visualize): + def __init__(self, env_config, engine_config, num_envs, device, visualize, enable_cameras=False): self._enable_early_termination = env_config["enable_early_termination"] self._num_phase_encoding = env_config.get("num_phase_encoding", 0) @@ -40,7 +40,7 @@ def __init__(self, env_config, engine_config, num_envs, device, visualize): self._visualize_ref_char = env_config.get("visualize_ref_char", True) super().__init__(env_config=env_config, engine_config=engine_config, - num_envs=num_envs, device=device, visualize=visualize) + num_envs=num_envs, device=device, visualize=visualize, enable_cameras=enable_cameras) return def get_reward_succ(self): diff --git a/mimickit/envs/env_builder.py b/mimickit/envs/env_builder.py index c949504..60ce354 100644 --- a/mimickit/envs/env_builder.py +++ b/mimickit/envs/env_builder.py @@ -5,7 +5,7 @@ from util.logger import Logger -def build_env(env_file, engine_file, num_envs, device, visualize): +def build_env(env_file, engine_file, num_envs, device, visualize, enable_cameras=False): env_config, engine_config = load_configs(env_file, engine_file) env_name = env_config["env_name"] @@ -13,37 +13,39 @@ def build_env(env_file, engine_file, num_envs, device, visualize): if (env_name == "char"): import envs.char_env as char_env - env = char_env.CharEnv(env_config=env_config, engine_config=engine_config, num_envs=num_envs, device=device, visualize=visualize) + EnvClass = char_env.CharEnv elif (env_name == "deepmimic"): import envs.deepmimic_env as deepmimic_env - env = deepmimic_env.DeepMimicEnv(env_config=env_config, engine_config=engine_config, num_envs=num_envs, device=device, visualize=visualize) + EnvClass = deepmimic_env.DeepMimicEnv elif (env_name == "amp"): import envs.amp_env as amp_env - env = amp_env.AMPEnv(env_config=env_config, engine_config=engine_config, num_envs=num_envs, device=device, visualize=visualize) + EnvClass = amp_env.AMPEnv elif (env_name == "ase"): import envs.ase_env as ase_env - env = ase_env.ASEEnv(env_config=env_config, engine_config=engine_config, num_envs=num_envs, device=device, visualize=visualize) + EnvClass = ase_env.ASEEnv elif (env_name == "add"): import envs.add_env as add_env - env = add_env.ADDEnv(env_config=env_config, engine_config=engine_config, num_envs=num_envs, device=device, visualize=visualize) + EnvClass = add_env.ADDEnv elif (env_name == "char_dof_test"): import envs.char_dof_test_env as char_dof_test_env - env = char_dof_test_env.CharDofTestEnv(env_config=env_config, engine_config=engine_config, num_envs=num_envs, device=device, visualize=visualize) + EnvClass = char_dof_test_env.CharDofTestEnv elif (env_name == "view_motion"): import envs.view_motion_env as view_motion_env - env = view_motion_env.ViewMotionEnv(env_config=env_config, engine_config=engine_config, num_envs=num_envs, device=device, visualize=visualize) + EnvClass = view_motion_env.ViewMotionEnv elif (env_name == "task_location"): import envs.task_location_env as task_location_env - env = task_location_env.TaskLocationEnv(env_config=env_config, engine_config=engine_config, num_envs=num_envs, device=device, visualize=visualize) + EnvClass = task_location_env.TaskLocationEnv elif (env_name == "task_steering"): import envs.task_steering_env as task_steering_env - env = task_steering_env.TaskSteeringEnv(env_config=env_config, engine_config=engine_config, num_envs=num_envs, device=device, visualize=visualize) + EnvClass = task_steering_env.TaskSteeringEnv elif (env_name == "static_objects"): import envs.static_objects_env as static_objects_env - env = static_objects_env.StaticObjectsEnv(env_config=env_config, engine_config=engine_config, num_envs=num_envs, device=device, visualize=visualize) + EnvClass = static_objects_env.StaticObjectsEnv else: assert(False), "Unsupported env: {}".format(env_name) + env = EnvClass(env_config, engine_config, num_envs, device, visualize, enable_cameras) + return env def load_config(file): diff --git a/mimickit/envs/sim_env.py b/mimickit/envs/sim_env.py index 9262a9f..2e138fc 100644 --- a/mimickit/envs/sim_env.py +++ b/mimickit/envs/sim_env.py @@ -18,13 +18,13 @@ class PlayMode(enum.Enum): class SimEnv(base_env.BaseEnv): NAME = "sim_env" - def __init__(self, env_config, engine_config, num_envs, device, visualize): + def __init__(self, env_config, engine_config, num_envs, device, visualize, enable_cameras=False): super().__init__(visualize=visualize) self._device = device self._episode_length = env_config["episode_length"] # episode length in seconds - self._engine = self._build_engine(engine_config, num_envs, device, visualize) + self._engine = self._build_engine(engine_config, num_envs, device, visualize, enable_cameras) self._build_envs(env_config, num_envs) self._engine.initialize_sim() @@ -32,9 +32,12 @@ def __init__(self, env_config, engine_config, num_envs, device, visualize): self._build_sim_tensors(env_config) self._build_data_buffers() + self._enable_cameras = enable_cameras + if self._visualize or self._enable_cameras: + self._build_camera(env_config) + if self._visualize: self._play_mode = PlayMode.PLAY - self._build_camera(env_config) self._setup_gui() return @@ -75,6 +78,8 @@ def step(self, action): if (self._visualize): self._render() + elif (self._enable_cameras): + self._update_camera() return self._obs_buf, self._reward_buf, self._done_buf, self._info @@ -160,8 +165,8 @@ def _post_physics_step(self): self._update_done() return - def _build_engine(self, engine_config, num_envs, device, visualize): - engine = engine_builder.build_engine(engine_config, num_envs, device, visualize) + def _build_engine(self, engine_config, num_envs, device, visualize, enable_cameras=False): + engine = engine_builder.build_engine(engine_config, num_envs, device, visualize, enable_cameras=enable_cameras) return engine @abc.abstractmethod diff --git a/mimickit/envs/static_objects_env.py b/mimickit/envs/static_objects_env.py index 7f3a473..bb61e21 100644 --- a/mimickit/envs/static_objects_env.py +++ b/mimickit/envs/static_objects_env.py @@ -3,9 +3,9 @@ import numpy as np class StaticObjectsEnv(deepmimic_env.DeepMimicEnv): - def __init__(self, env_config, engine_config, num_envs, device, visualize): + def __init__(self, env_config, engine_config, num_envs, device, visualize, enable_cameras=False): super().__init__(env_config=env_config, engine_config=engine_config, - num_envs=num_envs, device=device, visualize=visualize) + num_envs=num_envs, device=device, visualize=visualize, enable_cameras=enable_cameras) return def _build_env(self, env_id, config): diff --git a/mimickit/envs/task_location_env.py b/mimickit/envs/task_location_env.py index f170f52..b53f87b 100644 --- a/mimickit/envs/task_location_env.py +++ b/mimickit/envs/task_location_env.py @@ -6,14 +6,14 @@ import util.torch_util as torch_util class TaskLocationEnv(amp_env.AMPEnv): - def __init__(self, env_config, engine_config, num_envs, device, visualize): + def __init__(self, env_config, engine_config, num_envs, device, visualize, enable_cameras=False): self._tar_speed = env_config["tar_speed"] self._tar_change_time_min = env_config["tar_change_time_min"] self._tar_change_time_max = env_config["tar_change_time_max"] self._tar_dist_max = env_config["tar_dist_max"] super().__init__(env_config=env_config, engine_config=engine_config, - num_envs=num_envs, device=device, visualize=visualize) + num_envs=num_envs, device=device, visualize=visualize, enable_cameras=enable_cameras) return def _build_envs(self, config, num_envs): diff --git a/mimickit/envs/task_steering_env.py b/mimickit/envs/task_steering_env.py index 7e017a6..991ea26 100644 --- a/mimickit/envs/task_steering_env.py +++ b/mimickit/envs/task_steering_env.py @@ -7,7 +7,7 @@ class TaskSteeringEnv(amp_env.AMPEnv): - def __init__(self, env_config, engine_config, num_envs, device, visualize): + def __init__(self, env_config, engine_config, num_envs, device, visualize, enable_cameras=False): self._rand_tar_dir = env_config.get("rand_tar_dir", True) self._rand_face_dir = env_config.get("rand_face_dir", True) self._tar_speed_min = env_config["tar_speed_min"] @@ -21,7 +21,7 @@ def __init__(self, env_config, engine_config, num_envs, device, visualize): self._reward_steering_vel_scale = float(env_config["reward_steering_vel_scale"]) super().__init__(env_config=env_config, engine_config=engine_config, - num_envs=num_envs, device=device, visualize=visualize) + num_envs=num_envs, device=device, visualize=visualize, enable_cameras=enable_cameras) return def _build_envs(self, config, num_envs): diff --git a/mimickit/envs/view_motion_env.py b/mimickit/envs/view_motion_env.py index 66a107e..0642aa9 100644 --- a/mimickit/envs/view_motion_env.py +++ b/mimickit/envs/view_motion_env.py @@ -8,12 +8,12 @@ import torch class ViewMotionEnv(char_env.CharEnv): - def __init__(self, env_config, engine_config, num_envs, device, visualize): + def __init__(self, env_config, engine_config, num_envs, device, visualize, enable_cameras=False): self._time_scale = 1.0 engine_config["sim_freq"] = engine_config["control_freq"] super().__init__(env_config=env_config, engine_config=engine_config, - num_envs=num_envs, device=device, visualize=visualize) + num_envs=num_envs, device=device, visualize=visualize, enable_cameras=enable_cameras) return def _build_envs(self, env_config, num_envs): diff --git a/mimickit/learning/base_agent.py b/mimickit/learning/base_agent.py index 8efd5fb..a258922 100644 --- a/mimickit/learning/base_agent.py +++ b/mimickit/learning/base_agent.py @@ -46,6 +46,7 @@ def __init__(self, config, env, device): self._mode = AgentMode.TRAIN self._curr_obs = None self._curr_info = None + self._video_recorder = None return def train_model(self, max_samples, out_dir, save_int_models, logger_type): @@ -87,6 +88,10 @@ def train_model(self, max_samples, out_dir, save_int_models, logger_type): self._iter += 1 + # flush any in-progress video recording at end of training + if (self._video_recorder is not None): + self._video_recorder.flush() + return def test_model(self, num_episodes): @@ -269,6 +274,9 @@ def _init_iter(self): def _rollout_train(self, num_steps): for i in range(num_steps): + if (self._video_recorder is not None): + self._video_recorder.pre_step() + action, action_info = self._decide_action(self._curr_obs, self._curr_info) self._record_data_pre_step(self._curr_obs, self._curr_info, action, action_info) @@ -278,6 +286,9 @@ def _rollout_train(self, num_steps): self._curr_obs, self._curr_info = self._reset_done_envs(done) self._exp_buffer.inc() + + if (self._video_recorder is not None): + self._video_recorder.post_step() return def _rollout_test(self, num_episodes): diff --git a/mimickit/run.py b/mimickit/run.py index fe333f0..f074894 100644 --- a/mimickit/run.py +++ b/mimickit/run.py @@ -33,12 +33,14 @@ def load_args(argv): def build_env(args, num_envs, device, visualize): env_file = args.parse_string("env_config") engine_file = args.parse_string("engine_config") - env = env_builder.build_env(env_file, engine_file, num_envs, device, visualize) + enable_cameras = args.parse_bool("video", False) + env = env_builder.build_env(env_file, engine_file, num_envs, device, visualize, enable_cameras=enable_cameras) return env def build_agent(args, env, device): agent_file = args.parse_string("agent_config") agent = agent_builder.build_agent(agent_file, env, device) + agent._video_recorder = build_video_recorder(args, env) return agent def train(agent, max_samples, out_dir, save_int_models, logger_type): @@ -92,6 +94,29 @@ def set_rand_seed(args): util.set_rand_seed(rand_seed) return +def build_video_recorder(args, env): + video = args.parse_bool("video", False) + if not video: + return None + + import engines.isaac_lab_engine as isaac_lab_engine + if not isinstance(env._engine, isaac_lab_engine.IsaacLabEngine): + Logger.print("Video recording is only supported with Isaac Lab engine, skipping") + return None + + from util.video_recorder import VideoRecorder + + video_length = args.parse_int("video_length", 200) + video_interval = args.parse_int("video_interval", 2000) + + engine = env._engine + recorder = VideoRecorder(engine=engine, + video_length=video_length, + video_interval=video_interval) + + Logger.print("Video recording enabled: length={}, interval={}".format(video_length, video_interval)) + return recorder + def run(rank, num_procs, device, master_port, args): mode = args.parse_string("mode", "train") num_envs = args.parse_int("num_envs", 1) diff --git a/mimickit/util/display.py b/mimickit/util/display.py new file mode 100644 index 0000000..6d7335b --- /dev/null +++ b/mimickit/util/display.py @@ -0,0 +1,24 @@ +import os +import subprocess + +from util.logger import Logger + +def ensure_virtual_display(display=":99"): + """Start Xvfb virtual display if no DISPLAY is set. Needed for headless Vulkan rendering. + + If DISPLAY is already set, uses it (assumes it's valid). Otherwise starts Xvfb on the + specified display number. + """ + if "DISPLAY" in os.environ: + return + + try: + subprocess.Popen(["Xvfb", display, "-screen", "0", "1024x768x24"], + stdout=subprocess.DEVNULL, stderr=subprocess.DEVNULL) + import time + time.sleep(1) + os.environ["DISPLAY"] = display + Logger.print("Started virtual display on {}".format(display)) + except FileNotFoundError: + Logger.print("WARNING: Xvfb not found. Install with: apt-get install xvfb") + Logger.print("Headless camera rendering may not work without a virtual display.") diff --git a/mimickit/util/video_recorder.py b/mimickit/util/video_recorder.py new file mode 100644 index 0000000..4d26c3e --- /dev/null +++ b/mimickit/util/video_recorder.py @@ -0,0 +1,153 @@ +from __future__ import annotations + +import numpy as np +import os +import tempfile +from typing import TYPE_CHECKING + +from util.logger import Logger + +if TYPE_CHECKING: + import engines.engine as engine + + +class VideoRecorder: + """Records video frames from the simulation and uploads to WandB. + + Works with Isaac Lab engine in headless mode using the omni.replicator + annotator API to capture viewport images. + + TODO: Add support for other engines beyond Isaac Lab. + + Args: + engine: The simulation engine (e.g. IsaacLabEngine). + video_length: Number of steps per video recording. + video_interval: Interval (in training steps) between video recordings. + resolution: Tuple (width, height) for the captured frames. + fps: Frames per second for the output video. + cam_prim_path: USD prim path for the camera to capture from. + """ + + def __init__(self, engine: engine.Engine, video_length: int = 200, + video_interval: int = 2000, resolution: tuple[int, int] = (640, 480), + fps: int = 30, cam_prim_path: str = "/OmniverseKit_Persp") -> None: + self._engine: engine.Engine = engine + self._video_length: int = video_length + self._video_interval: int = video_interval + self._resolution: tuple[int, int] = resolution + self._fps: int = fps + self._cam_prim_path: str = cam_prim_path + + self._recorded_frames: list[np.ndarray] = [] + self._recording: bool = False + self._global_step: int = 0 + self._video_count: int = 0 + + self._annotator: object | None = None + self._render_product: object | None = None + + return + + def _ensure_annotator(self) -> None: + """Lazily create the render product and RGB annotator.""" + if self._annotator is not None: + return + + import omni.replicator.core as rep + + self._render_product = rep.create.render_product( + self._cam_prim_path, self._resolution + ) + self._annotator = rep.AnnotatorRegistry.get_annotator("rgb", device="cpu") + self._annotator.attach([self._render_product]) + Logger.print("[VideoRecorder] Created RGB annotator for {}".format(self._cam_prim_path)) + return + + def _capture_frame(self) -> None: + """Capture a single RGB frame from the viewport.""" + self._ensure_annotator() + + # Render the scene to update the viewport + self._engine._sim.render() + + rgb_data = self._annotator.get_data() + if rgb_data is None or rgb_data.size == 0: + # Renderer still warming up + frame = np.zeros((self._resolution[1], self._resolution[0], 3), dtype=np.uint8) + else: + frame = np.frombuffer(rgb_data, dtype=np.uint8).reshape(*rgb_data.shape) + frame = frame[:, :, :3] # drop alpha channel + + self._recorded_frames.append(frame) + return + + def pre_step(self) -> None: + """Call before each training step to check if recording should start.""" + if not self._recording and (self._global_step % self._video_interval == 0): + self._start_recording() + return + + def post_step(self) -> None: + """Call after each training step to capture frames and stop if done.""" + if self._recording: + self._capture_frame() + + if len(self._recorded_frames) >= self._video_length: + self._stop_recording() + + self._global_step += 1 + return + + def _start_recording(self) -> None: + """Begin a new video recording.""" + self._recorded_frames = [] + self._recording = True + Logger.print("[VideoRecorder] Started recording (step {})".format(self._global_step)) + return + + def _stop_recording(self) -> None: + """Stop recording, create video, upload to WandB, and clean up.""" + if not self._recording or len(self._recorded_frames) == 0: + self._recording = False + return + + self._recording = False + + try: + import wandb + from moviepy.video.io.ImageSequenceClip import ImageSequenceClip + + clip = ImageSequenceClip(self._recorded_frames, fps=self._fps) + + with tempfile.NamedTemporaryFile(suffix=".mp4", delete=False) as tmp: + temp_path: str = tmp.name + + clip.write_videofile(temp_path, logger=None) + + if wandb.run is not None: + wandb.log({ + "video": wandb.Video(temp_path, format="mp4"), + "video_step": self._global_step, + }) + Logger.print("[VideoRecorder] Uploaded video to WandB ({} frames, step {})".format( + len(self._recorded_frames), self._global_step)) + else: + Logger.print("[VideoRecorder] WandB not initialized, skipping upload") + + # Clean up temp file + os.remove(temp_path) + + except ImportError as e: + Logger.print("[VideoRecorder] Missing dependency: {}. Video not saved.".format(e)) + except Exception as e: + Logger.print("[VideoRecorder] Error creating video: {}".format(e)) + + self._recorded_frames = [] + self._video_count += 1 + return + + def flush(self) -> None: + """Force stop and upload any in-progress recording.""" + if self._recording: + self._stop_recording() + return diff --git a/requirements.txt b/requirements.txt index c66c425..9758d29 100644 --- a/requirements.txt +++ b/requirements.txt @@ -5,3 +5,4 @@ pyyaml tensorboardX torch>=1.9.1 wandb>=0.17.4 +setuptools From 88f803663558e8cf31fe466011ee253a2f9cda98 Mon Sep 17 00:00:00 2001 From: ustcscgyer Date: Wed, 11 Feb 2026 01:22:29 +0000 Subject: [PATCH 02/10] Turn on light for video rendering --- mimickit/engines/isaac_lab_engine.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/mimickit/engines/isaac_lab_engine.py b/mimickit/engines/isaac_lab_engine.py index 5fbdcf5..ed5eae4 100644 --- a/mimickit/engines/isaac_lab_engine.py +++ b/mimickit/engines/isaac_lab_engine.py @@ -88,10 +88,10 @@ def __init__(self, config, num_envs, device, visualize, enable_cameras=False): if (visualize or enable_cameras): self._build_camera() + self._build_lights() if (visualize): self._prev_frame_time = 0.0 - self._build_lights() self._build_draw_interface() self._setup_keyboard() From 730526afc0f3ac7f79b5bc8a58022c929900f74a Mon Sep 17 00:00:00 2001 From: ustcscgyer Date: Wed, 11 Feb 2026 01:54:07 +0000 Subject: [PATCH 03/10] Add annotation --- mimickit/util/display.py | 11 +++++++---- mimickit/util/video_recorder.py | 13 +++++++------ 2 files changed, 14 insertions(+), 10 deletions(-) diff --git a/mimickit/util/display.py b/mimickit/util/display.py index 6d7335b..eb3a2da 100644 --- a/mimickit/util/display.py +++ b/mimickit/util/display.py @@ -1,9 +1,11 @@ import os import subprocess +import time +from typing import Any from util.logger import Logger -def ensure_virtual_display(display=":99"): +def ensure_virtual_display(display: str = ":99") -> None: """Start Xvfb virtual display if no DISPLAY is set. Needed for headless Vulkan rendering. If DISPLAY is already set, uses it (assumes it's valid). Otherwise starts Xvfb on the @@ -13,9 +15,10 @@ def ensure_virtual_display(display=":99"): return try: - subprocess.Popen(["Xvfb", display, "-screen", "0", "1024x768x24"], - stdout=subprocess.DEVNULL, stderr=subprocess.DEVNULL) - import time + process: subprocess.Popen[bytes] = subprocess.Popen( + ["Xvfb", display, "-screen", "0", "1024x768x24"], + stdout=subprocess.DEVNULL, stderr=subprocess.DEVNULL + ) time.sleep(1) os.environ["DISPLAY"] = display Logger.print("Started virtual display on {}".format(display)) diff --git a/mimickit/util/video_recorder.py b/mimickit/util/video_recorder.py index 4d26c3e..f9f2b80 100644 --- a/mimickit/util/video_recorder.py +++ b/mimickit/util/video_recorder.py @@ -3,12 +3,13 @@ import numpy as np import os import tempfile -from typing import TYPE_CHECKING +from typing import TYPE_CHECKING, Any from util.logger import Logger if TYPE_CHECKING: import engines.engine as engine + from omni.replicator.core import Annotator, RenderProduct class VideoRecorder: @@ -43,8 +44,8 @@ def __init__(self, engine: engine.Engine, video_length: int = 200, self._global_step: int = 0 self._video_count: int = 0 - self._annotator: object | None = None - self._render_product: object | None = None + self._annotator: Any | None = None + self._render_product: Any | None = None return @@ -70,10 +71,10 @@ def _capture_frame(self) -> None: # Render the scene to update the viewport self._engine._sim.render() - rgb_data = self._annotator.get_data() + rgb_data: Any = self._annotator.get_data() if rgb_data is None or rgb_data.size == 0: # Renderer still warming up - frame = np.zeros((self._resolution[1], self._resolution[0], 3), dtype=np.uint8) + frame: np.ndarray = np.zeros((self._resolution[1], self._resolution[0], 3), dtype=np.uint8) else: frame = np.frombuffer(rgb_data, dtype=np.uint8).reshape(*rgb_data.shape) frame = frame[:, :, :3] # drop alpha channel @@ -117,7 +118,7 @@ def _stop_recording(self) -> None: import wandb from moviepy.video.io.ImageSequenceClip import ImageSequenceClip - clip = ImageSequenceClip(self._recorded_frames, fps=self._fps) + clip: ImageSequenceClip = ImageSequenceClip(self._recorded_frames, fps=self._fps) with tempfile.NamedTemporaryFile(suffix=".mp4", delete=False) as tmp: temp_path: str = tmp.name From 9a0ab46f5248672c38810798dd1bf6b0f282872d Mon Sep 17 00:00:00 2001 From: ustcscgyer Date: Wed, 11 Feb 2026 05:23:42 +0000 Subject: [PATCH 04/10] Correct the logging step count --- mimickit/learning/base_agent.py | 3 +++ mimickit/util/video_recorder.py | 12 ++++++++++-- mimickit/util/wandb_logger.py | 15 ++++++++++++++- 3 files changed, 27 insertions(+), 3 deletions(-) diff --git a/mimickit/learning/base_agent.py b/mimickit/learning/base_agent.py index a258922..2999aaf 100644 --- a/mimickit/learning/base_agent.py +++ b/mimickit/learning/base_agent.py @@ -56,6 +56,9 @@ def train_model(self, max_samples, out_dir, save_int_models, logger_type): log_file = os.path.join(out_dir, "log.txt") self._logger = self._build_logger(logger_type, log_file, self._config) + if (self._video_recorder is not None): + self._video_recorder.set_logger_step_tracker(self._logger) + if (save_int_models): int_out_dir = os.path.join(out_dir, "int_models") if (mp_util.is_root_proc() and not os.path.exists(int_out_dir)): diff --git a/mimickit/util/video_recorder.py b/mimickit/util/video_recorder.py index f9f2b80..0119713 100644 --- a/mimickit/util/video_recorder.py +++ b/mimickit/util/video_recorder.py @@ -47,6 +47,15 @@ def __init__(self, engine: engine.Engine, video_length: int = 200, self._annotator: Any | None = None self._render_product: Any | None = None + self._logger_step_tracker: Any | None = None + + return + + def set_logger_step_tracker(self, logger: Any) -> None: + """ + A temporary hack to get the step value from the logger. + """ + self._logger_step_tracker = logger return def _ensure_annotator(self) -> None: @@ -128,8 +137,7 @@ def _stop_recording(self) -> None: if wandb.run is not None: wandb.log({ "video": wandb.Video(temp_path, format="mp4"), - "video_step": self._global_step, - }) + }, step=self._logger_step_tracker.get_current_step()) Logger.print("[VideoRecorder] Uploaded video to WandB ({} frames, step {})".format( len(self._recorded_frames), self._global_step)) else: diff --git a/mimickit/util/wandb_logger.py b/mimickit/util/wandb_logger.py index c6697ab..5be147d 100644 --- a/mimickit/util/wandb_logger.py +++ b/mimickit/util/wandb_logger.py @@ -82,4 +82,17 @@ def _build_key_tags(self): curr_tags = "{:s}/{:s}".format(curr_tag, key) tags.append(curr_tags) - return tags \ No newline at end of file + return tags + + def get_current_step(self) -> int: + """Get the current step value that would be used for wandb.log(). + + Returns the step value based on the step_key if set, otherwise returns row_count. + This matches the logic used in write_log(). + """ + step_val = self._row_count + if (self._step_key is not None): + entry = self.log_current_row.get(self._step_key, "") + if entry != "": + step_val = entry.val + return int(step_val) \ No newline at end of file From 659436112fbee3d6369315e5d4e354a17bb7849d Mon Sep 17 00:00:00 2001 From: ustcscgyer Date: Wed, 11 Feb 2026 06:38:02 +0000 Subject: [PATCH 05/10] Address Jason's feedback --- data/agents/deepmimic_g1_ppo_agent.yaml | 2 +- data/agents/deepmimic_humanoid_ppo_agent.yaml | 2 +- mimickit/engines/engine.py | 5 +- mimickit/engines/engine_builder.py | 4 +- mimickit/engines/isaac_lab_engine.py | 27 ++++-- mimickit/envs/add_env.py | 4 +- mimickit/envs/amp_env.py | 4 +- mimickit/envs/ase_env.py | 4 +- mimickit/envs/char_dof_test_env.py | 4 +- mimickit/envs/char_env.py | 4 +- mimickit/envs/deepmimic_env.py | 4 +- mimickit/envs/env_builder.py | 25 +++--- mimickit/envs/sim_env.py | 14 ++-- mimickit/envs/static_objects_env.py | 4 +- mimickit/envs/task_location_env.py | 4 +- mimickit/envs/task_steering_env.py | 4 +- mimickit/envs/view_motion_env.py | 4 +- mimickit/learning/base_agent.py | 33 ++++---- mimickit/run.py | 28 +------ mimickit/util/video_recorder.py | 84 ++++++++----------- requirements.txt | 1 - 21 files changed, 123 insertions(+), 142 deletions(-) diff --git a/data/agents/deepmimic_g1_ppo_agent.yaml b/data/agents/deepmimic_g1_ppo_agent.yaml index eda570e..cd0720d 100644 --- a/data/agents/deepmimic_g1_ppo_agent.yaml +++ b/data/agents/deepmimic_g1_ppo_agent.yaml @@ -14,7 +14,7 @@ optimizer: discount: 0.99 steps_per_iter: 32 -iters_per_output: 100 +iters_per_output: 5 test_episodes: 32 normalizer_samples: 100000000 diff --git a/data/agents/deepmimic_humanoid_ppo_agent.yaml b/data/agents/deepmimic_humanoid_ppo_agent.yaml index eda570e..cd0720d 100644 --- a/data/agents/deepmimic_humanoid_ppo_agent.yaml +++ b/data/agents/deepmimic_humanoid_ppo_agent.yaml @@ -14,7 +14,7 @@ optimizer: discount: 0.99 steps_per_iter: 32 -iters_per_output: 100 +iters_per_output: 5 test_episodes: 32 normalizer_samples: 100000000 diff --git a/mimickit/engines/engine.py b/mimickit/engines/engine.py index fd6bd85..1048376 100644 --- a/mimickit/engines/engine.py +++ b/mimickit/engines/engine.py @@ -193,4 +193,7 @@ def draw_lines(self, env_id, start_verts, end_verts, cols, line_width): return def register_keyboard_callback(self, key_str, callback_func): - return \ No newline at end of file + return + + def get_video_recorder(self): + return None \ No newline at end of file diff --git a/mimickit/engines/engine_builder.py b/mimickit/engines/engine_builder.py index e19bb47..5ee3632 100644 --- a/mimickit/engines/engine_builder.py +++ b/mimickit/engines/engine_builder.py @@ -3,7 +3,7 @@ except: pass -def build_engine(config, num_envs, device, visualize, enable_cameras=False): +def build_engine(config, num_envs, device, visualize, record_video=False): eng_name = config["engine_name"] if (eng_name == "isaac_gym"): @@ -11,7 +11,7 @@ def build_engine(config, num_envs, device, visualize, enable_cameras=False): engine = isaac_gym_engine.IsaacGymEngine(config, num_envs, device, visualize) elif (eng_name == "isaac_lab"): import engines.isaac_lab_engine as isaac_lab_engine - engine = isaac_lab_engine.IsaacLabEngine(config, num_envs, device, visualize, enable_cameras=enable_cameras) + engine = isaac_lab_engine.IsaacLabEngine(config, num_envs, device, visualize, record_video=record_video) elif (eng_name == "newton"): import engines.newton_engine as newton_engine engine = newton_engine.NewtonEngine(config, num_envs, device, visualize) diff --git a/mimickit/engines/isaac_lab_engine.py b/mimickit/engines/isaac_lab_engine.py index ed5eae4..20f92c5 100644 --- a/mimickit/engines/isaac_lab_engine.py +++ b/mimickit/engines/isaac_lab_engine.py @@ -1,4 +1,5 @@ from isaaclab.app import AppLauncher +from util.video_recorder import VideoRecorder import carb @@ -59,9 +60,10 @@ def is_valid_clone(self, other): class IsaacLabEngine(engine.Engine): - def __init__(self, config, num_envs, device, visualize, enable_cameras=False): + def __init__(self, config, num_envs, device, visualize, record_video=False): super().__init__() + self.video_recorder = None self._device = device sim_freq = config.get("sim_freq", 60) control_freq = config.get("control_freq", 10) @@ -72,7 +74,7 @@ def __init__(self, config, num_envs, device, visualize, enable_cameras=False): self._sim_steps = int(sim_freq / control_freq) sim_timestep = 1.0 / sim_freq - self._create_simulator(sim_timestep, visualize, enable_cameras) + self._create_simulator(sim_timestep, visualize, record_video) self._env_spacing = config["env_spacing"] self._obj_cfgs = [] @@ -86,17 +88,28 @@ def __init__(self, config, num_envs, device, visualize, enable_cameras=False): self._build_ground() self._env_offsets = self._compute_env_offsets(num_envs) - if (visualize or enable_cameras): + if (visualize or record_video): self._build_camera() self._build_lights() + if (record_video): + self.create_video_recorder() + if (visualize): self._prev_frame_time = 0.0 self._build_draw_interface() self._setup_keyboard() return + + def create_video_recorder(self): + self.video_recorder = VideoRecorder(self) + Logger.print("Video recording enabled") + return + def get_video_recorder(self): + return self.video_recorder + def get_name(self): return "isaac_lab" @@ -654,16 +667,16 @@ def _setup_keyboard(self): self._keyboard_callbacks = dict() return - def _create_simulator(self, sim_timestep, visualize, enable_cameras=False): - # Headless rendering (enable_cameras without a display) requires a virtual display - if enable_cameras and not visualize: + def _create_simulator(self, sim_timestep, visualize, record_video=False): + # Headless rendering (record_video without a display) requires a virtual display + if record_video and not visualize: from util.display import ensure_virtual_display ensure_virtual_display() self._app_launcher = AppLauncher({ "headless": not visualize, "device": self._device, - "enable_cameras": enable_cameras or visualize, + "enable_cameras": record_video or visualize, }) import isaaclab.sim as sim_utils diff --git a/mimickit/envs/add_env.py b/mimickit/envs/add_env.py index 3739378..8d9a327 100644 --- a/mimickit/envs/add_env.py +++ b/mimickit/envs/add_env.py @@ -5,9 +5,9 @@ import util.torch_util as torch_util class ADDEnv(amp_env.AMPEnv): - def __init__(self, env_config, engine_config, num_envs, device, visualize, enable_cameras=False): + def __init__(self, env_config, engine_config, num_envs, device, visualize, record_video=False): super().__init__(env_config=env_config, engine_config=engine_config, - num_envs=num_envs, device=device, visualize=visualize, enable_cameras=enable_cameras) + num_envs=num_envs, device=device, visualize=visualize, record_video=record_video) return def _build_disc_obs_buffers(self): diff --git a/mimickit/envs/amp_env.py b/mimickit/envs/amp_env.py index 35dcf1f..93e85df 100644 --- a/mimickit/envs/amp_env.py +++ b/mimickit/envs/amp_env.py @@ -7,11 +7,11 @@ import util.torch_util as torch_util class AMPEnv(deepmimic_env.DeepMimicEnv): - def __init__(self, env_config, engine_config, num_envs, device, visualize, enable_cameras=False): + def __init__(self, env_config, engine_config, num_envs, device, visualize, record_video=False): self._num_disc_obs_steps = env_config["num_disc_obs_steps"] super().__init__(env_config=env_config, engine_config=engine_config, num_envs=num_envs, device=device, - visualize=visualize, enable_cameras=enable_cameras) + visualize=visualize, record_video=record_video) return def get_disc_obs_space(self): diff --git a/mimickit/envs/ase_env.py b/mimickit/envs/ase_env.py index df61aad..9621eae 100644 --- a/mimickit/envs/ase_env.py +++ b/mimickit/envs/ase_env.py @@ -4,11 +4,11 @@ import envs.char_env as char_env class ASEEnv(amp_env.AMPEnv): - def __init__(self, env_config, engine_config, num_envs, device, visualize, enable_cameras=False): + def __init__(self, env_config, engine_config, num_envs, device, visualize, record_video=False): self._default_reset_prob = env_config["default_reset_prob"] super().__init__(env_config=env_config, engine_config=engine_config, - num_envs=num_envs, device=device, visualize=visualize, enable_cameras=enable_cameras) + num_envs=num_envs, device=device, visualize=visualize, record_video=record_video) return def _reset_char(self, env_ids): diff --git a/mimickit/envs/char_dof_test_env.py b/mimickit/envs/char_dof_test_env.py index bf30c46..d53f625 100644 --- a/mimickit/envs/char_dof_test_env.py +++ b/mimickit/envs/char_dof_test_env.py @@ -6,11 +6,11 @@ import engines.engine as engine class CharDofTestEnv(char_env.CharEnv): - def __init__(self, env_config, engine_config, num_envs, device, visualize, enable_cameras=False): + def __init__(self, env_config, engine_config, num_envs, device, visualize, record_video=False): self._time_per_dof = 4.0 super().__init__(env_config=env_config, engine_config=engine_config, - num_envs=num_envs, device=device, visualize=visualize, enable_cameras=enable_cameras) + num_envs=num_envs, device=device, visualize=visualize, record_video=record_video) self._episode_length = self._time_per_dof * self._pd_low.shape[0] return diff --git a/mimickit/envs/char_env.py b/mimickit/envs/char_env.py index ece39d1..8ceaad0 100644 --- a/mimickit/envs/char_env.py +++ b/mimickit/envs/char_env.py @@ -15,13 +15,13 @@ import engines.engine as engine class CharEnv(sim_env.SimEnv): - def __init__(self, env_config, engine_config, num_envs, device, visualize, enable_cameras=False): + def __init__(self, env_config, engine_config, num_envs, device, visualize, record_video=False): self._global_obs = env_config["global_obs"] self._root_height_obs = env_config.get("root_height_obs", True) self._zero_center_action = env_config.get("zero_center_action", False) super().__init__(env_config=env_config, engine_config=engine_config, - num_envs=num_envs, device=device, visualize=visualize, enable_cameras=enable_cameras) + num_envs=num_envs, device=device, visualize=visualize, record_video=record_video) char_id = self._get_char_id() self._print_char_prop(0, char_id) diff --git a/mimickit/envs/deepmimic_env.py b/mimickit/envs/deepmimic_env.py index cdd8204..315217d 100644 --- a/mimickit/envs/deepmimic_env.py +++ b/mimickit/envs/deepmimic_env.py @@ -10,7 +10,7 @@ import util.torch_util as torch_util class DeepMimicEnv(char_env.CharEnv): - def __init__(self, env_config, engine_config, num_envs, device, visualize, enable_cameras=False): + def __init__(self, env_config, engine_config, num_envs, device, visualize, record_video=False): self._enable_early_termination = env_config["enable_early_termination"] self._num_phase_encoding = env_config.get("num_phase_encoding", 0) @@ -40,7 +40,7 @@ def __init__(self, env_config, engine_config, num_envs, device, visualize, enabl self._visualize_ref_char = env_config.get("visualize_ref_char", True) super().__init__(env_config=env_config, engine_config=engine_config, - num_envs=num_envs, device=device, visualize=visualize, enable_cameras=enable_cameras) + num_envs=num_envs, device=device, visualize=visualize, record_video=record_video) return def get_reward_succ(self): diff --git a/mimickit/envs/env_builder.py b/mimickit/envs/env_builder.py index 60ce354..75e16c6 100644 --- a/mimickit/envs/env_builder.py +++ b/mimickit/envs/env_builder.py @@ -5,47 +5,46 @@ from util.logger import Logger -def build_env(env_file, engine_file, num_envs, device, visualize, enable_cameras=False): +def build_env(env_file, engine_file, num_envs, device, visualize, record_video=False): env_config, engine_config = load_configs(env_file, engine_file) env_name = env_config["env_name"] Logger.print("Building {} env".format(env_name)) + # Build environment based on env_name if (env_name == "char"): import envs.char_env as char_env - EnvClass = char_env.CharEnv + env = char_env.CharEnv(env_config=env_config, engine_config=engine_config, num_envs=num_envs, device=device, visualize=visualize, record_video=record_video) elif (env_name == "deepmimic"): import envs.deepmimic_env as deepmimic_env - EnvClass = deepmimic_env.DeepMimicEnv + env = deepmimic_env.DeepMimicEnv(env_config=env_config, engine_config=engine_config, num_envs=num_envs, device=device, visualize=visualize, record_video=record_video) elif (env_name == "amp"): import envs.amp_env as amp_env - EnvClass = amp_env.AMPEnv + env = amp_env.AMPEnv(env_config=env_config, engine_config=engine_config, num_envs=num_envs, device=device, visualize=visualize, record_video=record_video) elif (env_name == "ase"): import envs.ase_env as ase_env - EnvClass = ase_env.ASEEnv + env = ase_env.ASEEnv(env_config=env_config, engine_config=engine_config, num_envs=num_envs, device=device, visualize=visualize, record_video=record_video) elif (env_name == "add"): import envs.add_env as add_env - EnvClass = add_env.ADDEnv + env = add_env.ADDEnv(env_config=env_config, engine_config=engine_config, num_envs=num_envs, device=device, visualize=visualize, record_video=record_video) elif (env_name == "char_dof_test"): import envs.char_dof_test_env as char_dof_test_env - EnvClass = char_dof_test_env.CharDofTestEnv + env = char_dof_test_env.CharDofTestEnv(env_config=env_config, engine_config=engine_config, num_envs=num_envs, device=device, visualize=visualize, record_video=record_video) elif (env_name == "view_motion"): import envs.view_motion_env as view_motion_env - EnvClass = view_motion_env.ViewMotionEnv + env = view_motion_env.ViewMotionEnv(env_config=env_config, engine_config=engine_config, num_envs=num_envs, device=device, visualize=visualize, record_video=record_video) elif (env_name == "task_location"): import envs.task_location_env as task_location_env - EnvClass = task_location_env.TaskLocationEnv + env = task_location_env.TaskLocationEnv(env_config=env_config, engine_config=engine_config, num_envs=num_envs, device=device, visualize=visualize, record_video=record_video) elif (env_name == "task_steering"): import envs.task_steering_env as task_steering_env - EnvClass = task_steering_env.TaskSteeringEnv + env = task_steering_env.TaskSteeringEnv(env_config=env_config, engine_config=engine_config, num_envs=num_envs, device=device, visualize=visualize, record_video=record_video) elif (env_name == "static_objects"): import envs.static_objects_env as static_objects_env - EnvClass = static_objects_env.StaticObjectsEnv + env = static_objects_env.StaticObjectsEnv(env_config=env_config, engine_config=engine_config, num_envs=num_envs, device=device, visualize=visualize, record_video=record_video) else: assert(False), "Unsupported env: {}".format(env_name) - env = EnvClass(env_config, engine_config, num_envs, device, visualize, enable_cameras) - return env def load_config(file): diff --git a/mimickit/envs/sim_env.py b/mimickit/envs/sim_env.py index 2e138fc..c4fa257 100644 --- a/mimickit/envs/sim_env.py +++ b/mimickit/envs/sim_env.py @@ -18,13 +18,13 @@ class PlayMode(enum.Enum): class SimEnv(base_env.BaseEnv): NAME = "sim_env" - def __init__(self, env_config, engine_config, num_envs, device, visualize, enable_cameras=False): + def __init__(self, env_config, engine_config, num_envs, device, visualize, record_video=False): super().__init__(visualize=visualize) self._device = device self._episode_length = env_config["episode_length"] # episode length in seconds - self._engine = self._build_engine(engine_config, num_envs, device, visualize, enable_cameras) + self._engine = self._build_engine(engine_config, num_envs, device, visualize, record_video) self._build_envs(env_config, num_envs) self._engine.initialize_sim() @@ -32,8 +32,8 @@ def __init__(self, env_config, engine_config, num_envs, device, visualize, enabl self._build_sim_tensors(env_config) self._build_data_buffers() - self._enable_cameras = enable_cameras - if self._visualize or self._enable_cameras: + self._record_video = record_video + if self._visualize or self._record_video: self._build_camera(env_config) if self._visualize: @@ -78,7 +78,7 @@ def step(self, action): if (self._visualize): self._render() - elif (self._enable_cameras): + elif (self._record_video): self._update_camera() return self._obs_buf, self._reward_buf, self._done_buf, self._info @@ -165,8 +165,8 @@ def _post_physics_step(self): self._update_done() return - def _build_engine(self, engine_config, num_envs, device, visualize, enable_cameras=False): - engine = engine_builder.build_engine(engine_config, num_envs, device, visualize, enable_cameras=enable_cameras) + def _build_engine(self, engine_config, num_envs, device, visualize, record_video=False): + engine = engine_builder.build_engine(engine_config, num_envs, device, visualize, record_video=record_video) return engine @abc.abstractmethod diff --git a/mimickit/envs/static_objects_env.py b/mimickit/envs/static_objects_env.py index bb61e21..b1975fc 100644 --- a/mimickit/envs/static_objects_env.py +++ b/mimickit/envs/static_objects_env.py @@ -3,9 +3,9 @@ import numpy as np class StaticObjectsEnv(deepmimic_env.DeepMimicEnv): - def __init__(self, env_config, engine_config, num_envs, device, visualize, enable_cameras=False): + def __init__(self, env_config, engine_config, num_envs, device, visualize, record_video=False): super().__init__(env_config=env_config, engine_config=engine_config, - num_envs=num_envs, device=device, visualize=visualize, enable_cameras=enable_cameras) + num_envs=num_envs, device=device, visualize=visualize, record_video=record_video) return def _build_env(self, env_id, config): diff --git a/mimickit/envs/task_location_env.py b/mimickit/envs/task_location_env.py index b53f87b..b2be179 100644 --- a/mimickit/envs/task_location_env.py +++ b/mimickit/envs/task_location_env.py @@ -6,14 +6,14 @@ import util.torch_util as torch_util class TaskLocationEnv(amp_env.AMPEnv): - def __init__(self, env_config, engine_config, num_envs, device, visualize, enable_cameras=False): + def __init__(self, env_config, engine_config, num_envs, device, visualize, record_video=False): self._tar_speed = env_config["tar_speed"] self._tar_change_time_min = env_config["tar_change_time_min"] self._tar_change_time_max = env_config["tar_change_time_max"] self._tar_dist_max = env_config["tar_dist_max"] super().__init__(env_config=env_config, engine_config=engine_config, - num_envs=num_envs, device=device, visualize=visualize, enable_cameras=enable_cameras) + num_envs=num_envs, device=device, visualize=visualize, record_video=record_video) return def _build_envs(self, config, num_envs): diff --git a/mimickit/envs/task_steering_env.py b/mimickit/envs/task_steering_env.py index 991ea26..a5db497 100644 --- a/mimickit/envs/task_steering_env.py +++ b/mimickit/envs/task_steering_env.py @@ -7,7 +7,7 @@ class TaskSteeringEnv(amp_env.AMPEnv): - def __init__(self, env_config, engine_config, num_envs, device, visualize, enable_cameras=False): + def __init__(self, env_config, engine_config, num_envs, device, visualize, record_video=False): self._rand_tar_dir = env_config.get("rand_tar_dir", True) self._rand_face_dir = env_config.get("rand_face_dir", True) self._tar_speed_min = env_config["tar_speed_min"] @@ -21,7 +21,7 @@ def __init__(self, env_config, engine_config, num_envs, device, visualize, enabl self._reward_steering_vel_scale = float(env_config["reward_steering_vel_scale"]) super().__init__(env_config=env_config, engine_config=engine_config, - num_envs=num_envs, device=device, visualize=visualize, enable_cameras=enable_cameras) + num_envs=num_envs, device=device, visualize=visualize, record_video=record_video) return def _build_envs(self, config, num_envs): diff --git a/mimickit/envs/view_motion_env.py b/mimickit/envs/view_motion_env.py index 0642aa9..5c21572 100644 --- a/mimickit/envs/view_motion_env.py +++ b/mimickit/envs/view_motion_env.py @@ -8,12 +8,12 @@ import torch class ViewMotionEnv(char_env.CharEnv): - def __init__(self, env_config, engine_config, num_envs, device, visualize, enable_cameras=False): + def __init__(self, env_config, engine_config, num_envs, device, visualize, record_video=False): self._time_scale = 1.0 engine_config["sim_freq"] = engine_config["control_freq"] super().__init__(env_config=env_config, engine_config=engine_config, - num_envs=num_envs, device=device, visualize=visualize, enable_cameras=enable_cameras) + num_envs=num_envs, device=device, visualize=visualize, record_video=record_video) return def _build_envs(self, env_config, num_envs): diff --git a/mimickit/learning/base_agent.py b/mimickit/learning/base_agent.py index 2999aaf..3bf8da7 100644 --- a/mimickit/learning/base_agent.py +++ b/mimickit/learning/base_agent.py @@ -46,7 +46,6 @@ def __init__(self, config, env, device): self._mode = AgentMode.TRAIN self._curr_obs = None self._curr_info = None - self._video_recorder = None return def train_model(self, max_samples, out_dir, save_int_models, logger_type): @@ -55,9 +54,10 @@ def train_model(self, max_samples, out_dir, save_int_models, logger_type): out_model_file = os.path.join(out_dir, "model.pt") log_file = os.path.join(out_dir, "log.txt") self._logger = self._build_logger(logger_type, log_file, self._config) - - if (self._video_recorder is not None): - self._video_recorder.set_logger_step_tracker(self._logger) + + # Set logger on video recorder if it exists in the engine + if self._env._engine.get_video_recorder(): + self._env._engine.video_recorder.set_logger_step_tracker(self._logger) if (save_int_models): int_out_dir = os.path.join(out_dir, "int_models") @@ -91,13 +91,11 @@ def train_model(self, max_samples, out_dir, save_int_models, logger_type): self._iter += 1 - # flush any in-progress video recording at end of training - if (self._video_recorder is not None): - self._video_recorder.flush() - return def test_model(self, num_episodes): + Logger.print("[BaseAgent] Testing model for {} episodes".format(num_episodes)) + self.eval() self.set_mode(AgentMode.TEST) @@ -277,9 +275,6 @@ def _init_iter(self): def _rollout_train(self, num_steps): for i in range(num_steps): - if (self._video_recorder is not None): - self._video_recorder.pre_step() - action, action_info = self._decide_action(self._curr_obs, self._curr_info) self._record_data_pre_step(self._curr_obs, self._curr_info, action, action_info) @@ -289,14 +284,16 @@ def _rollout_train(self, num_steps): self._curr_obs, self._curr_info = self._reset_done_envs(done) self._exp_buffer.inc() - - if (self._video_recorder is not None): - self._video_recorder.post_step() return def _rollout_test(self, num_episodes): self._test_return_tracker.reset() + # Start video recording if available + video_recorder = self._env._engine.get_video_recorder() + if video_recorder: + video_recorder.start_recording() + if (num_episodes == 0): test_info = { "mean_return": 0.0, @@ -315,6 +312,10 @@ def _rollout_test(self, num_episodes): next_obs, r, done, next_info = self._step_env(action) self._test_return_tracker.update(r, done) + # Capture frame for video recording + if video_recorder: + video_recorder.capture_frame() + self._curr_obs, self._curr_info = self._reset_done_envs(done) eps_per_env = self._test_return_tracker.get_eps_per_env() @@ -328,6 +329,10 @@ def _rollout_test(self, num_episodes): "mean_ep_len": test_ep_len.item(), "num_eps": self._test_return_tracker.get_episodes() } + + # Stop video recording and upload + if video_recorder: + video_recorder.stop_recording() return test_info @abc.abstractmethod diff --git a/mimickit/run.py b/mimickit/run.py index f074894..3e926b1 100644 --- a/mimickit/run.py +++ b/mimickit/run.py @@ -33,14 +33,13 @@ def load_args(argv): def build_env(args, num_envs, device, visualize): env_file = args.parse_string("env_config") engine_file = args.parse_string("engine_config") - enable_cameras = args.parse_bool("video", False) - env = env_builder.build_env(env_file, engine_file, num_envs, device, visualize, enable_cameras=enable_cameras) + record_video = args.parse_bool("video", False) + env = env_builder.build_env(env_file, engine_file, num_envs, device, visualize, record_video=record_video) return env def build_agent(args, env, device): agent_file = args.parse_string("agent_config") agent = agent_builder.build_agent(agent_file, env, device) - agent._video_recorder = build_video_recorder(args, env) return agent def train(agent, max_samples, out_dir, save_int_models, logger_type): @@ -94,29 +93,6 @@ def set_rand_seed(args): util.set_rand_seed(rand_seed) return -def build_video_recorder(args, env): - video = args.parse_bool("video", False) - if not video: - return None - - import engines.isaac_lab_engine as isaac_lab_engine - if not isinstance(env._engine, isaac_lab_engine.IsaacLabEngine): - Logger.print("Video recording is only supported with Isaac Lab engine, skipping") - return None - - from util.video_recorder import VideoRecorder - - video_length = args.parse_int("video_length", 200) - video_interval = args.parse_int("video_interval", 2000) - - engine = env._engine - recorder = VideoRecorder(engine=engine, - video_length=video_length, - video_interval=video_interval) - - Logger.print("Video recording enabled: length={}, interval={}".format(video_length, video_interval)) - return recorder - def run(rank, num_procs, device, master_port, args): mode = args.parse_string("mode", "train") num_envs = args.parse_int("num_envs", 1) diff --git a/mimickit/util/video_recorder.py b/mimickit/util/video_recorder.py index 0119713..6b8991b 100644 --- a/mimickit/util/video_recorder.py +++ b/mimickit/util/video_recorder.py @@ -4,6 +4,8 @@ import os import tempfile from typing import TYPE_CHECKING, Any +import wandb +from moviepy.video.io.ImageSequenceClip import ImageSequenceClip from util.logger import Logger @@ -22,27 +24,20 @@ class VideoRecorder: Args: engine: The simulation engine (e.g. IsaacLabEngine). - video_length: Number of steps per video recording. - video_interval: Interval (in training steps) between video recordings. resolution: Tuple (width, height) for the captured frames. fps: Frames per second for the output video. cam_prim_path: USD prim path for the camera to capture from. """ - def __init__(self, engine: engine.Engine, video_length: int = 200, - video_interval: int = 2000, resolution: tuple[int, int] = (640, 480), + def __init__(self, engine: engine.Engine, resolution: tuple[int, int] = (640, 480), fps: int = 30, cam_prim_path: str = "/OmniverseKit_Persp") -> None: self._engine: engine.Engine = engine - self._video_length: int = video_length - self._video_interval: int = video_interval self._resolution: tuple[int, int] = resolution self._fps: int = fps self._cam_prim_path: str = cam_prim_path self._recorded_frames: list[np.ndarray] = [] self._recording: bool = False - self._global_step: int = 0 - self._video_count: int = 0 self._annotator: Any | None = None self._render_product: Any | None = None @@ -91,28 +86,29 @@ def _capture_frame(self) -> None: self._recorded_frames.append(frame) return - def pre_step(self) -> None: - """Call before each training step to check if recording should start.""" - if not self._recording and (self._global_step % self._video_interval == 0): - self._start_recording() + def start_recording(self) -> None: + """Begin a new video recording.""" + if self._recording: + Logger.print("[VideoRecorder] Already recording, stopping previous recording first") + self.stop_recording() + + self._recorded_frames = [] + self._recording = True + Logger.print("[VideoRecorder] Started recording") return - def post_step(self) -> None: - """Call after each training step to capture frames and stop if done.""" + def capture_frame(self) -> None: + """Capture a frame during recording. Call this each step while recording.""" if self._recording: self._capture_frame() - - if len(self._recorded_frames) >= self._video_length: - self._stop_recording() - - self._global_step += 1 return - def _start_recording(self) -> None: - """Begin a new video recording.""" - self._recorded_frames = [] - self._recording = True - Logger.print("[VideoRecorder] Started recording (step {})".format(self._global_step)) + def stop_recording(self) -> None: + """Stop recording, create video, upload to WandB, and clean up.""" + if not self._recording: + return + + self._stop_recording() return def _stop_recording(self) -> None: @@ -124,39 +120,29 @@ def _stop_recording(self) -> None: self._recording = False try: - import wandb - from moviepy.video.io.ImageSequenceClip import ImageSequenceClip + if len(self._recorded_frames) == 0: + Logger.print("[VideoRecorder] No frames recorded, skipping video creation") + return clip: ImageSequenceClip = ImageSequenceClip(self._recorded_frames, fps=self._fps) - with tempfile.NamedTemporaryFile(suffix=".mp4", delete=False) as tmp: + with tempfile.NamedTemporaryFile(suffix=".mp4", delete=True) as tmp: temp_path: str = tmp.name - - clip.write_videofile(temp_path, logger=None) - - if wandb.run is not None: - wandb.log({ - "video": wandb.Video(temp_path, format="mp4"), - }, step=self._logger_step_tracker.get_current_step()) - Logger.print("[VideoRecorder] Uploaded video to WandB ({} frames, step {})".format( - len(self._recorded_frames), self._global_step)) - else: - Logger.print("[VideoRecorder] WandB not initialized, skipping upload") - - # Clean up temp file - os.remove(temp_path) - + + clip.write_videofile(temp_path, logger=None) + if wandb.run is not None: + step_val = self._logger_step_tracker.get_current_step() + wandb.log({ + "video": wandb.Video(temp_path, format="mp4"), + }, step=step_val) + Logger.print("[VideoRecorder] Uploaded video to WandB ({} frames, step {})".format( + len(self._recorded_frames), step_val)) + else: + Logger.print("[VideoRecorder] WandB not initialized, skipping upload") except ImportError as e: Logger.print("[VideoRecorder] Missing dependency: {}. Video not saved.".format(e)) except Exception as e: Logger.print("[VideoRecorder] Error creating video: {}".format(e)) self._recorded_frames = [] - self._video_count += 1 - return - - def flush(self) -> None: - """Force stop and upload any in-progress recording.""" - if self._recording: - self._stop_recording() return diff --git a/requirements.txt b/requirements.txt index 9758d29..c66c425 100644 --- a/requirements.txt +++ b/requirements.txt @@ -5,4 +5,3 @@ pyyaml tensorboardX torch>=1.9.1 wandb>=0.17.4 -setuptools From 96a8345a778fda143d80825458b9caa7e71402e9 Mon Sep 17 00:00:00 2001 From: ustcscgyer Date: Wed, 11 Feb 2026 07:11:10 +0000 Subject: [PATCH 06/10] Fix a typo --- data/agents/deepmimic_g1_ppo_agent.yaml | 2 +- data/agents/deepmimic_humanoid_ppo_agent.yaml | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/data/agents/deepmimic_g1_ppo_agent.yaml b/data/agents/deepmimic_g1_ppo_agent.yaml index cd0720d..eda570e 100644 --- a/data/agents/deepmimic_g1_ppo_agent.yaml +++ b/data/agents/deepmimic_g1_ppo_agent.yaml @@ -14,7 +14,7 @@ optimizer: discount: 0.99 steps_per_iter: 32 -iters_per_output: 5 +iters_per_output: 100 test_episodes: 32 normalizer_samples: 100000000 diff --git a/data/agents/deepmimic_humanoid_ppo_agent.yaml b/data/agents/deepmimic_humanoid_ppo_agent.yaml index cd0720d..eda570e 100644 --- a/data/agents/deepmimic_humanoid_ppo_agent.yaml +++ b/data/agents/deepmimic_humanoid_ppo_agent.yaml @@ -14,7 +14,7 @@ optimizer: discount: 0.99 steps_per_iter: 32 -iters_per_output: 5 +iters_per_output: 100 test_episodes: 32 normalizer_samples: 100000000 From f0d1c9f058e0eb63e37a413ac26e1c4b5964cf03 Mon Sep 17 00:00:00 2001 From: ustcscgyer Date: Wed, 11 Feb 2026 16:54:38 +0000 Subject: [PATCH 07/10] Set indipendent camera for rendering --- mimickit/engines/engine.py | 4 +++ mimickit/engines/isaac_gym_engine.py | 5 ++- mimickit/engines/isaac_lab_engine.py | 15 +++++--- mimickit/engines/newton_engine.py | 6 +++- mimickit/envs/base_env.py | 12 ++++++- mimickit/envs/char_env.py | 15 ++++++++ mimickit/envs/env_builder.py | 6 ++++ mimickit/envs/sim_env.py | 7 ++-- mimickit/run.py | 6 ++++ mimickit/util/video_recorder.py | 54 +++++++++++++++++++++++++--- 10 files changed, 114 insertions(+), 16 deletions(-) diff --git a/mimickit/engines/engine.py b/mimickit/engines/engine.py index 1048376..c6fb192 100644 --- a/mimickit/engines/engine.py +++ b/mimickit/engines/engine.py @@ -188,6 +188,10 @@ def calc_obj_mass(self, env_id, obj_id): @abc.abstractmethod def get_control_mode(self): return + + #abc.abstractmethod + def create_video_recorder(self, camera_config=None): + return def draw_lines(self, env_id, start_verts, end_verts, cols, line_width): return diff --git a/mimickit/engines/isaac_gym_engine.py b/mimickit/engines/isaac_gym_engine.py index adc4ff8..2314484 100644 --- a/mimickit/engines/isaac_gym_engine.py +++ b/mimickit/engines/isaac_gym_engine.py @@ -919,4 +919,7 @@ def _process_gui_events(self): elif (evt.action in self._keyboard_callbacks): callback = self._keyboard_callbacks[evt.action] callback() - return \ No newline at end of file + return + + def create_video_recorder(self, camera_config=None): + raise NotImplementedError("Video recording not supported for Isaac Gym engine") \ No newline at end of file diff --git a/mimickit/engines/isaac_lab_engine.py b/mimickit/engines/isaac_lab_engine.py index 20f92c5..e927fab 100644 --- a/mimickit/engines/isaac_lab_engine.py +++ b/mimickit/engines/isaac_lab_engine.py @@ -92,8 +92,9 @@ def __init__(self, config, num_envs, device, visualize, record_video=False): self._build_camera() self._build_lights() - if (record_video): - self.create_video_recorder() + # Video recorder will be created after environment is initialized + # so it can query environment for camera config if needed + self._record_video = record_video if (visualize): self._prev_frame_time = 0.0 @@ -102,8 +103,14 @@ def __init__(self, config, num_envs, device, visualize, record_video=False): return - def create_video_recorder(self): - self.video_recorder = VideoRecorder(self) + def create_video_recorder(self, camera_config={}): + """Create the video recorder with optional camera configuration. + + Args: + camera_config: Optional camera config dict. If None, uses defaults. + """ + self.video_recorder = VideoRecorder(self, camera_config) + Logger.print("Video recording enabled") return diff --git a/mimickit/engines/newton_engine.py b/mimickit/engines/newton_engine.py index 64692a2..7700aaf 100644 --- a/mimickit/engines/newton_engine.py +++ b/mimickit/engines/newton_engine.py @@ -1075,6 +1075,9 @@ def _on_keyboard_event(self, symbol, modifiers): callback() return + def create_video_recorder(self, camera_config=None): + raise NotImplementedError("Video recording not supported for Newton engine") + @wp.kernel def clamp_arrays(x: wp.array(dtype=float), @@ -1141,4 +1144,5 @@ def exp_map_to_quat_indexed(in_dof: wp.array(dtype=float), out_q[q_idx + 1] = q[1] out_q[q_idx + 2] = q[2] out_q[q_idx + 3] = q[3] - return \ No newline at end of file + return + diff --git a/mimickit/envs/base_env.py b/mimickit/envs/base_env.py index 683edd2..62e194a 100644 --- a/mimickit/envs/base_env.py +++ b/mimickit/envs/base_env.py @@ -66,4 +66,14 @@ def get_env_time(self, env_ids=None): return 0.0 def record_diagnostics(self): - return self._diagnostics \ No newline at end of file + return self._diagnostics + + def get_video_camera_config(self) -> dict: + """Optional method for environments to provide camera config for video recording. + + Returns: + dict with optional keys: + - cam_pos: np.ndarray, camera position [x, y, z] + - cam_target: np.ndarray, camera target [x, y, z] + """ + return {} \ No newline at end of file diff --git a/mimickit/envs/char_env.py b/mimickit/envs/char_env.py index 8ceaad0..feb0663 100644 --- a/mimickit/envs/char_env.py +++ b/mimickit/envs/char_env.py @@ -344,6 +344,21 @@ def _build_body_ids_tensor(self, body_names): def _has_key_bodies(self): return len(self._key_body_ids) > 0 + def get_video_camera_config(self) -> dict: + """Provide camera config for video recording.""" + env_id = 0 + char_id = self._get_char_id() + char_root_pos = self._engine.get_root_pos(char_id) + char_pos = char_root_pos[env_id].cpu().numpy() + + cam_pos = np.array([char_pos[0], char_pos[1] - 5.0, 3.0]) + cam_target = np.array([char_pos[0], char_pos[1], 1.0]) + + return { + "cam_pos": cam_pos, + "cam_target": cam_target, + } + def _build_camera(self, env_config): env_id = 0 char_id = self._get_char_id() diff --git a/mimickit/envs/env_builder.py b/mimickit/envs/env_builder.py index 75e16c6..efe3559 100644 --- a/mimickit/envs/env_builder.py +++ b/mimickit/envs/env_builder.py @@ -47,6 +47,12 @@ def build_env(env_file, engine_file, num_envs, device, visualize, record_video=F return env +def get_engine_name(engine_file): + engine_config = load_config(engine_file) + if engine_config is not None: + return engine_config.get("engine_name", "") + return "" + def load_config(file): if (file is not None and file != ""): with open(file, "r") as stream: diff --git a/mimickit/envs/sim_env.py b/mimickit/envs/sim_env.py index c4fa257..add5b5c 100644 --- a/mimickit/envs/sim_env.py +++ b/mimickit/envs/sim_env.py @@ -33,10 +33,9 @@ def __init__(self, env_config, engine_config, num_envs, device, visualize, recor self._build_data_buffers() self._record_video = record_video - if self._visualize or self._record_video: - self._build_camera(env_config) if self._visualize: + self._build_camera(env_config) self._play_mode = PlayMode.PLAY self._setup_gui() @@ -78,8 +77,6 @@ def step(self, action): if (self._visualize): self._render() - elif (self._record_video): - self._update_camera() return self._obs_buf, self._reward_buf, self._done_buf, self._info @@ -167,6 +164,8 @@ def _post_physics_step(self): def _build_engine(self, engine_config, num_envs, device, visualize, record_video=False): engine = engine_builder.build_engine(engine_config, num_envs, device, visualize, record_video=record_video) + if record_video: + engine.create_video_recorder(camera_config=self.get_video_camera_config()) return engine @abc.abstractmethod diff --git a/mimickit/run.py b/mimickit/run.py index 3e926b1..a5341dc 100644 --- a/mimickit/run.py +++ b/mimickit/run.py @@ -34,6 +34,12 @@ def build_env(args, num_envs, device, visualize): env_file = args.parse_string("env_config") engine_file = args.parse_string("engine_config") record_video = args.parse_bool("video", False) + + # Video recording is only supported on Isaac Lab engine + if record_video and env_builder.get_engine_name(engine_file) != "isaac_lab": + Logger.print("Warning: video recording only supported for isaac_lab engine, disabling") + record_video = False + env = env_builder.build_env(env_file, engine_file, num_envs, device, visualize, record_video=record_video) return env diff --git a/mimickit/util/video_recorder.py b/mimickit/util/video_recorder.py index 6b8991b..2c51128 100644 --- a/mimickit/util/video_recorder.py +++ b/mimickit/util/video_recorder.py @@ -20,21 +20,36 @@ class VideoRecorder: Works with Isaac Lab engine in headless mode using the omni.replicator annotator API to capture viewport images. - TODO: Add support for other engines beyond Isaac Lab. + The recorder manages its own camera controls, independent of the environment's + visualization camera. This allows recording without interfering with visualization. Args: engine: The simulation engine (e.g. IsaacLabEngine). + camera_config: Dict with optional keys: + - cam_pos: np.ndarray, camera position [x, y, z]. Defaults to [0, -5, 3]. + - cam_target: np.ndarray, camera target [x, y, z]. Defaults to [0, 0, 0]. + - track_obj_id: int, object ID to track (None for still camera). + - track_env_id: int, environment ID for tracking (default 0). resolution: Tuple (width, height) for the captured frames. fps: Frames per second for the output video. - cam_prim_path: USD prim path for the camera to capture from. """ - def __init__(self, engine: engine.Engine, resolution: tuple[int, int] = (640, 480), - fps: int = 30, cam_prim_path: str = "/OmniverseKit_Persp") -> None: + VIDEO_CAM_PATH = "/World/VideoRecorderCamera" + + def __init__(self, engine: engine.Engine, camera_config: dict = {}, + resolution: tuple[int, int] = (640, 480), fps: int = 30) -> None: self._engine: engine.Engine = engine self._resolution: tuple[int, int] = resolution self._fps: int = fps - self._cam_prim_path: str = cam_prim_path + + # Create dedicated camera prim for video recording + self._cam_prim_path: str = self._build_camera() + + # Camera control (from camera_config dict) + cam_pos = camera_config.get("cam_pos", np.array([0.0, -5.0, 3.0])) + cam_target = camera_config.get("cam_target", np.array([0.0, 0.0, 0.0])) + self._cam_pos: np.ndarray = np.array(cam_pos, dtype=np.float32) + self._cam_target: np.ndarray = np.array(cam_target, dtype=np.float32) self._recorded_frames: list[np.ndarray] = [] self._recording: bool = False @@ -44,8 +59,25 @@ def __init__(self, engine: engine.Engine, resolution: tuple[int, int] = (640, 48 self._logger_step_tracker: Any | None = None + # Initialize camera pose + self._set_camera_pose(self._cam_pos, self._cam_target) + return + def _build_camera(self) -> str: + """Create a dedicated camera prim for video recording, independent from visualization.""" + import isaacsim.core.utils.prims as prim_utils + from omni.kit.viewport.utility.camera_state import ViewportCameraState + + cam_path = self.VIDEO_CAM_PATH + stage = self._engine._stage + if not stage.GetPrimAtPath(cam_path).IsValid(): + prim_utils.create_prim(cam_path, "Camera") + Logger.print("[VideoRecorder] Created dedicated video camera at {}".format(cam_path)) + + self._camera_state = ViewportCameraState(cam_path) + return cam_path + def set_logger_step_tracker(self, logger: Any) -> None: """ A temporary hack to get the step value from the logger. @@ -53,6 +85,18 @@ def set_logger_step_tracker(self, logger: Any) -> None: self._logger_step_tracker = logger return + def _set_camera_pose(self, pos: np.ndarray, target: np.ndarray) -> None: + """Set the video camera pose, same interface as Camera.lookat() -> engine.set_camera_pose().""" + env_offset = self._engine._env_offsets[0].cpu().numpy() + cam_pos = pos.copy() + cam_target = target.copy() + + cam_pos[:2] += env_offset + cam_target[:2] += env_offset + self._camera_state.set_position_world(cam_pos, True) + self._camera_state.set_target_world(cam_target, True) + return + def _ensure_annotator(self) -> None: """Lazily create the render product and RGB annotator.""" if self._annotator is not None: From 0cb703908e825928bbe043751b949eb06657f40f Mon Sep 17 00:00:00 2001 From: ustcscgyer Date: Wed, 11 Feb 2026 16:58:33 +0000 Subject: [PATCH 08/10] Fix a bug --- mimickit/envs/sim_env.py | 8 +++-- mimickit/util/video_recorder.py | 52 ++++++++++++++++++--------------- 2 files changed, 34 insertions(+), 26 deletions(-) diff --git a/mimickit/envs/sim_env.py b/mimickit/envs/sim_env.py index add5b5c..81b5ca5 100644 --- a/mimickit/envs/sim_env.py +++ b/mimickit/envs/sim_env.py @@ -34,6 +34,12 @@ def __init__(self, env_config, engine_config, num_envs, device, visualize, recor self._record_video = record_video + # Create video recorder after environment is fully initialized + # so it can query environment for camera config if needed + if self._record_video: + camera_config = self.get_video_camera_config() + self._engine.create_video_recorder(camera_config=camera_config) + if self._visualize: self._build_camera(env_config) self._play_mode = PlayMode.PLAY @@ -164,8 +170,6 @@ def _post_physics_step(self): def _build_engine(self, engine_config, num_envs, device, visualize, record_video=False): engine = engine_builder.build_engine(engine_config, num_envs, device, visualize, record_video=record_video) - if record_video: - engine.create_video_recorder(camera_config=self.get_video_camera_config()) return engine @abc.abstractmethod diff --git a/mimickit/util/video_recorder.py b/mimickit/util/video_recorder.py index 2c51128..fd131ba 100644 --- a/mimickit/util/video_recorder.py +++ b/mimickit/util/video_recorder.py @@ -34,7 +34,7 @@ class VideoRecorder: fps: Frames per second for the output video. """ - VIDEO_CAM_PATH = "/World/VideoRecorderCamera" + VIDEO_CAM_PATH = "/OmniverseKit_Persp" # Use viewport camera (works in headless mode) def __init__(self, engine: engine.Engine, camera_config: dict = {}, resolution: tuple[int, int] = (640, 480), fps: int = 30) -> None: @@ -46,10 +46,8 @@ def __init__(self, engine: engine.Engine, camera_config: dict = {}, self._cam_prim_path: str = self._build_camera() # Camera control (from camera_config dict) - cam_pos = camera_config.get("cam_pos", np.array([0.0, -5.0, 3.0])) - cam_target = camera_config.get("cam_target", np.array([0.0, 0.0, 0.0])) - self._cam_pos: np.ndarray = np.array(cam_pos, dtype=np.float32) - self._cam_target: np.ndarray = np.array(cam_target, dtype=np.float32) + self._cam_pos = camera_config.get("cam_pos", np.array([0.0, -5.0, 3.0])) + self._cam_target = camera_config.get("cam_target", np.array([0.0, 0.0, 0.0])) self._recorded_frames: list[np.ndarray] = [] self._recording: bool = False @@ -59,24 +57,14 @@ def __init__(self, engine: engine.Engine, camera_config: dict = {}, self._logger_step_tracker: Any | None = None - # Initialize camera pose - self._set_camera_pose(self._cam_pos, self._cam_target) - return def _build_camera(self) -> str: - """Create a dedicated camera prim for video recording, independent from visualization.""" - import isaacsim.core.utils.prims as prim_utils - from omni.kit.viewport.utility.camera_state import ViewportCameraState - - cam_path = self.VIDEO_CAM_PATH - stage = self._engine._stage - if not stage.GetPrimAtPath(cam_path).IsValid(): - prim_utils.create_prim(cam_path, "Camera") - Logger.print("[VideoRecorder] Created dedicated video camera at {}".format(cam_path)) - - self._camera_state = ViewportCameraState(cam_path) - return cam_path + """Get ViewportCameraState for the viewport camera (shared with visualization).""" + # Use engine's camera state (viewport camera works in headless mode) + self._camera_state = self._engine._camera_state + Logger.print("[VideoRecorder] Using viewport camera at {}".format(self.VIDEO_CAM_PATH)) + return self.VIDEO_CAM_PATH def set_logger_step_tracker(self, logger: Any) -> None: """ @@ -86,15 +74,16 @@ def set_logger_step_tracker(self, logger: Any) -> None: return def _set_camera_pose(self, pos: np.ndarray, target: np.ndarray) -> None: - """Set the video camera pose, same interface as Camera.lookat() -> engine.set_camera_pose().""" + """Set the video camera pose using ViewportCameraState (same pattern as engine.set_camera_pose).""" env_offset = self._engine._env_offsets[0].cpu().numpy() cam_pos = pos.copy() cam_target = target.copy() cam_pos[:2] += env_offset cam_target[:2] += env_offset - self._camera_state.set_position_world(cam_pos, True) - self._camera_state.set_target_world(cam_target, True) + + self._camera_state.set_position_world(cam_pos.tolist(), True) + self._camera_state.set_target_world(cam_target.tolist(), True) return def _ensure_annotator(self) -> None: @@ -113,9 +102,19 @@ def _ensure_annotator(self) -> None: return def _capture_frame(self) -> None: - """Capture a single RGB frame from the viewport.""" + """Capture a single RGB frame from the viewport. + + Saves and restores camera state to avoid conflicts with visualization. + """ self._ensure_annotator() + # Save current camera state (in case visualization is active) + saved_pos = self._camera_state.position_world + saved_target = self._camera_state.target_world + + # Set video camera position + self._set_camera_pose(self._cam_pos, self._cam_target) + # Render the scene to update the viewport self._engine._sim.render() @@ -128,6 +127,11 @@ def _capture_frame(self) -> None: frame = frame[:, :, :3] # drop alpha channel self._recorded_frames.append(frame) + + # Restore visualization camera state + self._camera_state.set_position_world(saved_pos, True) + self._camera_state.set_target_world(saved_target, True) + return def start_recording(self) -> None: From 03a52e3751a17c1cca74f1f65d34d9ea19f0ab84 Mon Sep 17 00:00:00 2001 From: ustcscgyer Date: Wed, 11 Feb 2026 18:00:15 +0000 Subject: [PATCH 09/10] Small cleanup --- mimickit/util/video_recorder.py | 2 -- 1 file changed, 2 deletions(-) diff --git a/mimickit/util/video_recorder.py b/mimickit/util/video_recorder.py index fd131ba..133fca3 100644 --- a/mimickit/util/video_recorder.py +++ b/mimickit/util/video_recorder.py @@ -28,8 +28,6 @@ class VideoRecorder: camera_config: Dict with optional keys: - cam_pos: np.ndarray, camera position [x, y, z]. Defaults to [0, -5, 3]. - cam_target: np.ndarray, camera target [x, y, z]. Defaults to [0, 0, 0]. - - track_obj_id: int, object ID to track (None for still camera). - - track_env_id: int, environment ID for tracking (default 0). resolution: Tuple (width, height) for the captured frames. fps: Frames per second for the output video. """ From 0cd84a72c28d3b19344d77e458917475649d9c9a Mon Sep 17 00:00:00 2001 From: ustcscgyer Date: Wed, 11 Feb 2026 18:20:31 +0000 Subject: [PATCH 10/10] Refactor video capture behavior into isaaclab engine --- mimickit/engines/engine.py | 10 ++++++++-- mimickit/engines/isaac_lab_engine.py | 11 +++++++++++ mimickit/engines/newton_engine.py | 3 +-- mimickit/learning/base_agent.py | 14 +++----------- 4 files changed, 23 insertions(+), 15 deletions(-) diff --git a/mimickit/engines/engine.py b/mimickit/engines/engine.py index c6fb192..afc5161 100644 --- a/mimickit/engines/engine.py +++ b/mimickit/engines/engine.py @@ -189,7 +189,7 @@ def calc_obj_mass(self, env_id, obj_id): def get_control_mode(self): return - #abc.abstractmethod + @abc.abstractmethod def create_video_recorder(self, camera_config=None): return @@ -200,4 +200,10 @@ def register_keyboard_callback(self, key_str, callback_func): return def get_video_recorder(self): - return None \ No newline at end of file + return None + + def pre_rollout_test(self): + return + + def post_rollout_test(self): + return \ No newline at end of file diff --git a/mimickit/engines/isaac_lab_engine.py b/mimickit/engines/isaac_lab_engine.py index e927fab..91e9c4b 100644 --- a/mimickit/engines/isaac_lab_engine.py +++ b/mimickit/engines/isaac_lab_engine.py @@ -147,6 +147,7 @@ def initialize_sim(self): def step(self): self._update_reset_objs() + self.get_video_recorder().capture_frame() for i in range(self._sim_steps): self._pre_sim_step() @@ -1189,3 +1190,13 @@ def _on_keyboard_event(self, event): callback = self._keyboard_callbacks[event.input] callback() return + + def pre_rollout_test(self): + if self._record_video: + self.video_recorder.start_recording() + return + + def post_rollout_test(self): + if self._record_video: + self.video_recorder.stop_recording() + return \ No newline at end of file diff --git a/mimickit/engines/newton_engine.py b/mimickit/engines/newton_engine.py index 7700aaf..67eaf0f 100644 --- a/mimickit/engines/newton_engine.py +++ b/mimickit/engines/newton_engine.py @@ -1144,5 +1144,4 @@ def exp_map_to_quat_indexed(in_dof: wp.array(dtype=float), out_q[q_idx + 1] = q[1] out_q[q_idx + 2] = q[2] out_q[q_idx + 3] = q[3] - return - + return \ No newline at end of file diff --git a/mimickit/learning/base_agent.py b/mimickit/learning/base_agent.py index 3bf8da7..e961249 100644 --- a/mimickit/learning/base_agent.py +++ b/mimickit/learning/base_agent.py @@ -289,10 +289,7 @@ def _rollout_train(self, num_steps): def _rollout_test(self, num_episodes): self._test_return_tracker.reset() - # Start video recording if available - video_recorder = self._env._engine.get_video_recorder() - if video_recorder: - video_recorder.start_recording() + self._env._engine.pre_rollout_test() if (num_episodes == 0): test_info = { @@ -312,10 +309,6 @@ def _rollout_test(self, num_episodes): next_obs, r, done, next_info = self._step_env(action) self._test_return_tracker.update(r, done) - # Capture frame for video recording - if video_recorder: - video_recorder.capture_frame() - self._curr_obs, self._curr_info = self._reset_done_envs(done) eps_per_env = self._test_return_tracker.get_eps_per_env() @@ -330,9 +323,8 @@ def _rollout_test(self, num_episodes): "num_eps": self._test_return_tracker.get_episodes() } - # Stop video recording and upload - if video_recorder: - video_recorder.stop_recording() + self._env._engine.post_rollout_test() + return test_info @abc.abstractmethod