diff --git a/mimickit/engines/engine.py b/mimickit/engines/engine.py index fd6bd85..afc5161 100644 --- a/mimickit/engines/engine.py +++ b/mimickit/engines/engine.py @@ -188,9 +188,22 @@ def calc_obj_mass(self, env_id, obj_id): @abc.abstractmethod def get_control_mode(self): return + + @abc.abstractmethod + def create_video_recorder(self, camera_config=None): + return def draw_lines(self, env_id, start_verts, end_verts, cols, line_width): return def register_keyboard_callback(self, key_str, callback_func): + return + + def get_video_recorder(self): + return None + + def pre_rollout_test(self): + return + + def post_rollout_test(self): return \ No newline at end of file diff --git a/mimickit/engines/engine_builder.py b/mimickit/engines/engine_builder.py index 20314b8..5ee3632 100644 --- a/mimickit/engines/engine_builder.py +++ b/mimickit/engines/engine_builder.py @@ -3,7 +3,7 @@ except: pass -def build_engine(config, num_envs, device, visualize): +def build_engine(config, num_envs, device, visualize, record_video=False): eng_name = config["engine_name"] if (eng_name == "isaac_gym"): @@ -11,7 +11,7 @@ def build_engine(config, num_envs, device, visualize): engine = isaac_gym_engine.IsaacGymEngine(config, num_envs, device, visualize) elif (eng_name == "isaac_lab"): import engines.isaac_lab_engine as isaac_lab_engine - engine = isaac_lab_engine.IsaacLabEngine(config, num_envs, device, visualize) + engine = isaac_lab_engine.IsaacLabEngine(config, num_envs, device, visualize, record_video=record_video) elif (eng_name == "newton"): import engines.newton_engine as newton_engine engine = newton_engine.NewtonEngine(config, num_envs, device, visualize) diff --git a/mimickit/engines/isaac_gym_engine.py b/mimickit/engines/isaac_gym_engine.py index adc4ff8..2314484 100644 --- a/mimickit/engines/isaac_gym_engine.py +++ b/mimickit/engines/isaac_gym_engine.py @@ -919,4 +919,7 @@ def _process_gui_events(self): elif (evt.action in self._keyboard_callbacks): callback = self._keyboard_callbacks[evt.action] callback() - return \ No newline at end of file + return + + def create_video_recorder(self, camera_config=None): + raise NotImplementedError("Video recording not supported for Isaac Gym engine") \ No newline at end of file diff --git a/mimickit/engines/isaac_lab_engine.py b/mimickit/engines/isaac_lab_engine.py index 612aab6..91e9c4b 100644 --- a/mimickit/engines/isaac_lab_engine.py +++ b/mimickit/engines/isaac_lab_engine.py @@ -1,4 +1,5 @@ from isaaclab.app import AppLauncher +from util.video_recorder import VideoRecorder import carb @@ -59,9 +60,10 @@ def is_valid_clone(self, other): class IsaacLabEngine(engine.Engine): - def __init__(self, config, num_envs, device, visualize): + def __init__(self, config, num_envs, device, visualize, record_video=False): super().__init__() + self.video_recorder = None self._device = device sim_freq = config.get("sim_freq", 60) control_freq = config.get("control_freq", 10) @@ -72,7 +74,7 @@ def __init__(self, config, num_envs, device, visualize): self._sim_steps = int(sim_freq / control_freq) sim_timestep = 1.0 / sim_freq - self._create_simulator(sim_timestep, visualize) + self._create_simulator(sim_timestep, visualize, record_video) self._env_spacing = config["env_spacing"] self._obj_cfgs = [] @@ -86,15 +88,35 @@ def __init__(self, config, num_envs, device, visualize): self._build_ground() self._env_offsets = self._compute_env_offsets(num_envs) + if (visualize or record_video): + self._build_camera() + self._build_lights() + + # Video recorder will be created after environment is initialized + # so it can query environment for camera config if needed + self._record_video = record_video + if (visualize): self._prev_frame_time = 0.0 - self._build_lights() - self._build_camera() self._build_draw_interface() self._setup_keyboard() return + + def create_video_recorder(self, camera_config={}): + """Create the video recorder with optional camera configuration. + + Args: + camera_config: Optional camera config dict. If None, uses defaults. + """ + self.video_recorder = VideoRecorder(self, camera_config) + + Logger.print("Video recording enabled") + return + def get_video_recorder(self): + return self.video_recorder + def get_name(self): return "isaac_lab" @@ -125,6 +147,7 @@ def initialize_sim(self): def step(self): self._update_reset_objs() + self.get_video_recorder().capture_frame() for i in range(self._sim_steps): self._pre_sim_step() @@ -652,8 +675,17 @@ def _setup_keyboard(self): self._keyboard_callbacks = dict() return - def _create_simulator(self, sim_timestep, visualize): - self._app_launcher = AppLauncher({"headless": not visualize, "device": self._device}) + def _create_simulator(self, sim_timestep, visualize, record_video=False): + # Headless rendering (record_video without a display) requires a virtual display + if record_video and not visualize: + from util.display import ensure_virtual_display + ensure_virtual_display() + + self._app_launcher = AppLauncher({ + "headless": not visualize, + "device": self._device, + "enable_cameras": record_video or visualize, + }) import isaaclab.sim as sim_utils from isaacsim.core.utils.stage import get_current_stage @@ -727,7 +759,7 @@ def _post_sim_step(self): def _clear_forces(self): for obj in self._objs: - if (obj.has_external_wrench): + if (getattr(obj, 'has_external_wrentch', False)): forces = torch.zeros([1, 3], dtype=torch.float, device=self._device) torques = torch.zeros([1, 3], dtype=torch.float, device=self._device) obj.set_external_force_and_torque(forces=forces, torques=torques, @@ -1158,3 +1190,13 @@ def _on_keyboard_event(self, event): callback = self._keyboard_callbacks[event.input] callback() return + + def pre_rollout_test(self): + if self._record_video: + self.video_recorder.start_recording() + return + + def post_rollout_test(self): + if self._record_video: + self.video_recorder.stop_recording() + return \ No newline at end of file diff --git a/mimickit/engines/newton_engine.py b/mimickit/engines/newton_engine.py index 64692a2..67eaf0f 100644 --- a/mimickit/engines/newton_engine.py +++ b/mimickit/engines/newton_engine.py @@ -1075,6 +1075,9 @@ def _on_keyboard_event(self, symbol, modifiers): callback() return + def create_video_recorder(self, camera_config=None): + raise NotImplementedError("Video recording not supported for Newton engine") + @wp.kernel def clamp_arrays(x: wp.array(dtype=float), diff --git a/mimickit/envs/add_env.py b/mimickit/envs/add_env.py index f2dbac4..8d9a327 100644 --- a/mimickit/envs/add_env.py +++ b/mimickit/envs/add_env.py @@ -5,9 +5,9 @@ import util.torch_util as torch_util class ADDEnv(amp_env.AMPEnv): - def __init__(self, env_config, engine_config, num_envs, device, visualize): + def __init__(self, env_config, engine_config, num_envs, device, visualize, record_video=False): super().__init__(env_config=env_config, engine_config=engine_config, - num_envs=num_envs, device=device, visualize=visualize) + num_envs=num_envs, device=device, visualize=visualize, record_video=record_video) return def _build_disc_obs_buffers(self): diff --git a/mimickit/envs/amp_env.py b/mimickit/envs/amp_env.py index f7aff7f..93e85df 100644 --- a/mimickit/envs/amp_env.py +++ b/mimickit/envs/amp_env.py @@ -7,11 +7,11 @@ import util.torch_util as torch_util class AMPEnv(deepmimic_env.DeepMimicEnv): - def __init__(self, env_config, engine_config, num_envs, device, visualize): + def __init__(self, env_config, engine_config, num_envs, device, visualize, record_video=False): self._num_disc_obs_steps = env_config["num_disc_obs_steps"] super().__init__(env_config=env_config, engine_config=engine_config, num_envs=num_envs, device=device, - visualize=visualize) + visualize=visualize, record_video=record_video) return def get_disc_obs_space(self): diff --git a/mimickit/envs/ase_env.py b/mimickit/envs/ase_env.py index 53df737..9621eae 100644 --- a/mimickit/envs/ase_env.py +++ b/mimickit/envs/ase_env.py @@ -4,11 +4,11 @@ import envs.char_env as char_env class ASEEnv(amp_env.AMPEnv): - def __init__(self, env_config, engine_config, num_envs, device, visualize): + def __init__(self, env_config, engine_config, num_envs, device, visualize, record_video=False): self._default_reset_prob = env_config["default_reset_prob"] super().__init__(env_config=env_config, engine_config=engine_config, - num_envs=num_envs, device=device, visualize=visualize) + num_envs=num_envs, device=device, visualize=visualize, record_video=record_video) return def _reset_char(self, env_ids): diff --git a/mimickit/envs/base_env.py b/mimickit/envs/base_env.py index 683edd2..62e194a 100644 --- a/mimickit/envs/base_env.py +++ b/mimickit/envs/base_env.py @@ -66,4 +66,14 @@ def get_env_time(self, env_ids=None): return 0.0 def record_diagnostics(self): - return self._diagnostics \ No newline at end of file + return self._diagnostics + + def get_video_camera_config(self) -> dict: + """Optional method for environments to provide camera config for video recording. + + Returns: + dict with optional keys: + - cam_pos: np.ndarray, camera position [x, y, z] + - cam_target: np.ndarray, camera target [x, y, z] + """ + return {} \ No newline at end of file diff --git a/mimickit/envs/char_dof_test_env.py b/mimickit/envs/char_dof_test_env.py index bd57a91..d53f625 100644 --- a/mimickit/envs/char_dof_test_env.py +++ b/mimickit/envs/char_dof_test_env.py @@ -6,11 +6,11 @@ import engines.engine as engine class CharDofTestEnv(char_env.CharEnv): - def __init__(self, env_config, engine_config, num_envs, device, visualize): + def __init__(self, env_config, engine_config, num_envs, device, visualize, record_video=False): self._time_per_dof = 4.0 super().__init__(env_config=env_config, engine_config=engine_config, - num_envs=num_envs, device=device, visualize=visualize) + num_envs=num_envs, device=device, visualize=visualize, record_video=record_video) self._episode_length = self._time_per_dof * self._pd_low.shape[0] return diff --git a/mimickit/envs/char_env.py b/mimickit/envs/char_env.py index d19f9de..feb0663 100644 --- a/mimickit/envs/char_env.py +++ b/mimickit/envs/char_env.py @@ -15,13 +15,13 @@ import engines.engine as engine class CharEnv(sim_env.SimEnv): - def __init__(self, env_config, engine_config, num_envs, device, visualize): + def __init__(self, env_config, engine_config, num_envs, device, visualize, record_video=False): self._global_obs = env_config["global_obs"] self._root_height_obs = env_config.get("root_height_obs", True) self._zero_center_action = env_config.get("zero_center_action", False) super().__init__(env_config=env_config, engine_config=engine_config, - num_envs=num_envs, device=device, visualize=visualize) + num_envs=num_envs, device=device, visualize=visualize, record_video=record_video) char_id = self._get_char_id() self._print_char_prop(0, char_id) @@ -344,6 +344,21 @@ def _build_body_ids_tensor(self, body_names): def _has_key_bodies(self): return len(self._key_body_ids) > 0 + def get_video_camera_config(self) -> dict: + """Provide camera config for video recording.""" + env_id = 0 + char_id = self._get_char_id() + char_root_pos = self._engine.get_root_pos(char_id) + char_pos = char_root_pos[env_id].cpu().numpy() + + cam_pos = np.array([char_pos[0], char_pos[1] - 5.0, 3.0]) + cam_target = np.array([char_pos[0], char_pos[1], 1.0]) + + return { + "cam_pos": cam_pos, + "cam_target": cam_target, + } + def _build_camera(self, env_config): env_id = 0 char_id = self._get_char_id() diff --git a/mimickit/envs/deepmimic_env.py b/mimickit/envs/deepmimic_env.py index 255fca1..315217d 100644 --- a/mimickit/envs/deepmimic_env.py +++ b/mimickit/envs/deepmimic_env.py @@ -10,7 +10,7 @@ import util.torch_util as torch_util class DeepMimicEnv(char_env.CharEnv): - def __init__(self, env_config, engine_config, num_envs, device, visualize): + def __init__(self, env_config, engine_config, num_envs, device, visualize, record_video=False): self._enable_early_termination = env_config["enable_early_termination"] self._num_phase_encoding = env_config.get("num_phase_encoding", 0) @@ -40,7 +40,7 @@ def __init__(self, env_config, engine_config, num_envs, device, visualize): self._visualize_ref_char = env_config.get("visualize_ref_char", True) super().__init__(env_config=env_config, engine_config=engine_config, - num_envs=num_envs, device=device, visualize=visualize) + num_envs=num_envs, device=device, visualize=visualize, record_video=record_video) return def get_reward_succ(self): diff --git a/mimickit/envs/env_builder.py b/mimickit/envs/env_builder.py index c949504..efe3559 100644 --- a/mimickit/envs/env_builder.py +++ b/mimickit/envs/env_builder.py @@ -5,47 +5,54 @@ from util.logger import Logger -def build_env(env_file, engine_file, num_envs, device, visualize): +def build_env(env_file, engine_file, num_envs, device, visualize, record_video=False): env_config, engine_config = load_configs(env_file, engine_file) env_name = env_config["env_name"] Logger.print("Building {} env".format(env_name)) + # Build environment based on env_name if (env_name == "char"): import envs.char_env as char_env - env = char_env.CharEnv(env_config=env_config, engine_config=engine_config, num_envs=num_envs, device=device, visualize=visualize) + env = char_env.CharEnv(env_config=env_config, engine_config=engine_config, num_envs=num_envs, device=device, visualize=visualize, record_video=record_video) elif (env_name == "deepmimic"): import envs.deepmimic_env as deepmimic_env - env = deepmimic_env.DeepMimicEnv(env_config=env_config, engine_config=engine_config, num_envs=num_envs, device=device, visualize=visualize) + env = deepmimic_env.DeepMimicEnv(env_config=env_config, engine_config=engine_config, num_envs=num_envs, device=device, visualize=visualize, record_video=record_video) elif (env_name == "amp"): import envs.amp_env as amp_env - env = amp_env.AMPEnv(env_config=env_config, engine_config=engine_config, num_envs=num_envs, device=device, visualize=visualize) + env = amp_env.AMPEnv(env_config=env_config, engine_config=engine_config, num_envs=num_envs, device=device, visualize=visualize, record_video=record_video) elif (env_name == "ase"): import envs.ase_env as ase_env - env = ase_env.ASEEnv(env_config=env_config, engine_config=engine_config, num_envs=num_envs, device=device, visualize=visualize) + env = ase_env.ASEEnv(env_config=env_config, engine_config=engine_config, num_envs=num_envs, device=device, visualize=visualize, record_video=record_video) elif (env_name == "add"): import envs.add_env as add_env - env = add_env.ADDEnv(env_config=env_config, engine_config=engine_config, num_envs=num_envs, device=device, visualize=visualize) + env = add_env.ADDEnv(env_config=env_config, engine_config=engine_config, num_envs=num_envs, device=device, visualize=visualize, record_video=record_video) elif (env_name == "char_dof_test"): import envs.char_dof_test_env as char_dof_test_env - env = char_dof_test_env.CharDofTestEnv(env_config=env_config, engine_config=engine_config, num_envs=num_envs, device=device, visualize=visualize) + env = char_dof_test_env.CharDofTestEnv(env_config=env_config, engine_config=engine_config, num_envs=num_envs, device=device, visualize=visualize, record_video=record_video) elif (env_name == "view_motion"): import envs.view_motion_env as view_motion_env - env = view_motion_env.ViewMotionEnv(env_config=env_config, engine_config=engine_config, num_envs=num_envs, device=device, visualize=visualize) + env = view_motion_env.ViewMotionEnv(env_config=env_config, engine_config=engine_config, num_envs=num_envs, device=device, visualize=visualize, record_video=record_video) elif (env_name == "task_location"): import envs.task_location_env as task_location_env - env = task_location_env.TaskLocationEnv(env_config=env_config, engine_config=engine_config, num_envs=num_envs, device=device, visualize=visualize) + env = task_location_env.TaskLocationEnv(env_config=env_config, engine_config=engine_config, num_envs=num_envs, device=device, visualize=visualize, record_video=record_video) elif (env_name == "task_steering"): import envs.task_steering_env as task_steering_env - env = task_steering_env.TaskSteeringEnv(env_config=env_config, engine_config=engine_config, num_envs=num_envs, device=device, visualize=visualize) + env = task_steering_env.TaskSteeringEnv(env_config=env_config, engine_config=engine_config, num_envs=num_envs, device=device, visualize=visualize, record_video=record_video) elif (env_name == "static_objects"): import envs.static_objects_env as static_objects_env - env = static_objects_env.StaticObjectsEnv(env_config=env_config, engine_config=engine_config, num_envs=num_envs, device=device, visualize=visualize) + env = static_objects_env.StaticObjectsEnv(env_config=env_config, engine_config=engine_config, num_envs=num_envs, device=device, visualize=visualize, record_video=record_video) else: assert(False), "Unsupported env: {}".format(env_name) return env +def get_engine_name(engine_file): + engine_config = load_config(engine_file) + if engine_config is not None: + return engine_config.get("engine_name", "") + return "" + def load_config(file): if (file is not None and file != ""): with open(file, "r") as stream: diff --git a/mimickit/envs/sim_env.py b/mimickit/envs/sim_env.py index 9262a9f..81b5ca5 100644 --- a/mimickit/envs/sim_env.py +++ b/mimickit/envs/sim_env.py @@ -18,13 +18,13 @@ class PlayMode(enum.Enum): class SimEnv(base_env.BaseEnv): NAME = "sim_env" - def __init__(self, env_config, engine_config, num_envs, device, visualize): + def __init__(self, env_config, engine_config, num_envs, device, visualize, record_video=False): super().__init__(visualize=visualize) self._device = device self._episode_length = env_config["episode_length"] # episode length in seconds - self._engine = self._build_engine(engine_config, num_envs, device, visualize) + self._engine = self._build_engine(engine_config, num_envs, device, visualize, record_video) self._build_envs(env_config, num_envs) self._engine.initialize_sim() @@ -32,9 +32,17 @@ def __init__(self, env_config, engine_config, num_envs, device, visualize): self._build_sim_tensors(env_config) self._build_data_buffers() + self._record_video = record_video + + # Create video recorder after environment is fully initialized + # so it can query environment for camera config if needed + if self._record_video: + camera_config = self.get_video_camera_config() + self._engine.create_video_recorder(camera_config=camera_config) + if self._visualize: - self._play_mode = PlayMode.PLAY self._build_camera(env_config) + self._play_mode = PlayMode.PLAY self._setup_gui() return @@ -160,8 +168,8 @@ def _post_physics_step(self): self._update_done() return - def _build_engine(self, engine_config, num_envs, device, visualize): - engine = engine_builder.build_engine(engine_config, num_envs, device, visualize) + def _build_engine(self, engine_config, num_envs, device, visualize, record_video=False): + engine = engine_builder.build_engine(engine_config, num_envs, device, visualize, record_video=record_video) return engine @abc.abstractmethod diff --git a/mimickit/envs/static_objects_env.py b/mimickit/envs/static_objects_env.py index 7f3a473..b1975fc 100644 --- a/mimickit/envs/static_objects_env.py +++ b/mimickit/envs/static_objects_env.py @@ -3,9 +3,9 @@ import numpy as np class StaticObjectsEnv(deepmimic_env.DeepMimicEnv): - def __init__(self, env_config, engine_config, num_envs, device, visualize): + def __init__(self, env_config, engine_config, num_envs, device, visualize, record_video=False): super().__init__(env_config=env_config, engine_config=engine_config, - num_envs=num_envs, device=device, visualize=visualize) + num_envs=num_envs, device=device, visualize=visualize, record_video=record_video) return def _build_env(self, env_id, config): diff --git a/mimickit/envs/task_location_env.py b/mimickit/envs/task_location_env.py index f170f52..b2be179 100644 --- a/mimickit/envs/task_location_env.py +++ b/mimickit/envs/task_location_env.py @@ -6,14 +6,14 @@ import util.torch_util as torch_util class TaskLocationEnv(amp_env.AMPEnv): - def __init__(self, env_config, engine_config, num_envs, device, visualize): + def __init__(self, env_config, engine_config, num_envs, device, visualize, record_video=False): self._tar_speed = env_config["tar_speed"] self._tar_change_time_min = env_config["tar_change_time_min"] self._tar_change_time_max = env_config["tar_change_time_max"] self._tar_dist_max = env_config["tar_dist_max"] super().__init__(env_config=env_config, engine_config=engine_config, - num_envs=num_envs, device=device, visualize=visualize) + num_envs=num_envs, device=device, visualize=visualize, record_video=record_video) return def _build_envs(self, config, num_envs): diff --git a/mimickit/envs/task_steering_env.py b/mimickit/envs/task_steering_env.py index 7e017a6..a5db497 100644 --- a/mimickit/envs/task_steering_env.py +++ b/mimickit/envs/task_steering_env.py @@ -7,7 +7,7 @@ class TaskSteeringEnv(amp_env.AMPEnv): - def __init__(self, env_config, engine_config, num_envs, device, visualize): + def __init__(self, env_config, engine_config, num_envs, device, visualize, record_video=False): self._rand_tar_dir = env_config.get("rand_tar_dir", True) self._rand_face_dir = env_config.get("rand_face_dir", True) self._tar_speed_min = env_config["tar_speed_min"] @@ -21,7 +21,7 @@ def __init__(self, env_config, engine_config, num_envs, device, visualize): self._reward_steering_vel_scale = float(env_config["reward_steering_vel_scale"]) super().__init__(env_config=env_config, engine_config=engine_config, - num_envs=num_envs, device=device, visualize=visualize) + num_envs=num_envs, device=device, visualize=visualize, record_video=record_video) return def _build_envs(self, config, num_envs): diff --git a/mimickit/envs/view_motion_env.py b/mimickit/envs/view_motion_env.py index 66a107e..5c21572 100644 --- a/mimickit/envs/view_motion_env.py +++ b/mimickit/envs/view_motion_env.py @@ -8,12 +8,12 @@ import torch class ViewMotionEnv(char_env.CharEnv): - def __init__(self, env_config, engine_config, num_envs, device, visualize): + def __init__(self, env_config, engine_config, num_envs, device, visualize, record_video=False): self._time_scale = 1.0 engine_config["sim_freq"] = engine_config["control_freq"] super().__init__(env_config=env_config, engine_config=engine_config, - num_envs=num_envs, device=device, visualize=visualize) + num_envs=num_envs, device=device, visualize=visualize, record_video=record_video) return def _build_envs(self, env_config, num_envs): diff --git a/mimickit/learning/base_agent.py b/mimickit/learning/base_agent.py index 8efd5fb..e961249 100644 --- a/mimickit/learning/base_agent.py +++ b/mimickit/learning/base_agent.py @@ -54,6 +54,10 @@ def train_model(self, max_samples, out_dir, save_int_models, logger_type): out_model_file = os.path.join(out_dir, "model.pt") log_file = os.path.join(out_dir, "log.txt") self._logger = self._build_logger(logger_type, log_file, self._config) + + # Set logger on video recorder if it exists in the engine + if self._env._engine.get_video_recorder(): + self._env._engine.video_recorder.set_logger_step_tracker(self._logger) if (save_int_models): int_out_dir = os.path.join(out_dir, "int_models") @@ -90,6 +94,8 @@ def train_model(self, max_samples, out_dir, save_int_models, logger_type): return def test_model(self, num_episodes): + Logger.print("[BaseAgent] Testing model for {} episodes".format(num_episodes)) + self.eval() self.set_mode(AgentMode.TEST) @@ -283,6 +289,8 @@ def _rollout_train(self, num_steps): def _rollout_test(self, num_episodes): self._test_return_tracker.reset() + self._env._engine.pre_rollout_test() + if (num_episodes == 0): test_info = { "mean_return": 0.0, @@ -314,6 +322,9 @@ def _rollout_test(self, num_episodes): "mean_ep_len": test_ep_len.item(), "num_eps": self._test_return_tracker.get_episodes() } + + self._env._engine.post_rollout_test() + return test_info @abc.abstractmethod diff --git a/mimickit/run.py b/mimickit/run.py index fe333f0..a5341dc 100644 --- a/mimickit/run.py +++ b/mimickit/run.py @@ -33,7 +33,14 @@ def load_args(argv): def build_env(args, num_envs, device, visualize): env_file = args.parse_string("env_config") engine_file = args.parse_string("engine_config") - env = env_builder.build_env(env_file, engine_file, num_envs, device, visualize) + record_video = args.parse_bool("video", False) + + # Video recording is only supported on Isaac Lab engine + if record_video and env_builder.get_engine_name(engine_file) != "isaac_lab": + Logger.print("Warning: video recording only supported for isaac_lab engine, disabling") + record_video = False + + env = env_builder.build_env(env_file, engine_file, num_envs, device, visualize, record_video=record_video) return env def build_agent(args, env, device): diff --git a/mimickit/util/display.py b/mimickit/util/display.py new file mode 100644 index 0000000..eb3a2da --- /dev/null +++ b/mimickit/util/display.py @@ -0,0 +1,27 @@ +import os +import subprocess +import time +from typing import Any + +from util.logger import Logger + +def ensure_virtual_display(display: str = ":99") -> None: + """Start Xvfb virtual display if no DISPLAY is set. Needed for headless Vulkan rendering. + + If DISPLAY is already set, uses it (assumes it's valid). Otherwise starts Xvfb on the + specified display number. + """ + if "DISPLAY" in os.environ: + return + + try: + process: subprocess.Popen[bytes] = subprocess.Popen( + ["Xvfb", display, "-screen", "0", "1024x768x24"], + stdout=subprocess.DEVNULL, stderr=subprocess.DEVNULL + ) + time.sleep(1) + os.environ["DISPLAY"] = display + Logger.print("Started virtual display on {}".format(display)) + except FileNotFoundError: + Logger.print("WARNING: Xvfb not found. Install with: apt-get install xvfb") + Logger.print("Headless camera rendering may not work without a virtual display.") diff --git a/mimickit/util/video_recorder.py b/mimickit/util/video_recorder.py new file mode 100644 index 0000000..133fca3 --- /dev/null +++ b/mimickit/util/video_recorder.py @@ -0,0 +1,194 @@ +from __future__ import annotations + +import numpy as np +import os +import tempfile +from typing import TYPE_CHECKING, Any +import wandb +from moviepy.video.io.ImageSequenceClip import ImageSequenceClip + +from util.logger import Logger + +if TYPE_CHECKING: + import engines.engine as engine + from omni.replicator.core import Annotator, RenderProduct + + +class VideoRecorder: + """Records video frames from the simulation and uploads to WandB. + + Works with Isaac Lab engine in headless mode using the omni.replicator + annotator API to capture viewport images. + + The recorder manages its own camera controls, independent of the environment's + visualization camera. This allows recording without interfering with visualization. + + Args: + engine: The simulation engine (e.g. IsaacLabEngine). + camera_config: Dict with optional keys: + - cam_pos: np.ndarray, camera position [x, y, z]. Defaults to [0, -5, 3]. + - cam_target: np.ndarray, camera target [x, y, z]. Defaults to [0, 0, 0]. + resolution: Tuple (width, height) for the captured frames. + fps: Frames per second for the output video. + """ + + VIDEO_CAM_PATH = "/OmniverseKit_Persp" # Use viewport camera (works in headless mode) + + def __init__(self, engine: engine.Engine, camera_config: dict = {}, + resolution: tuple[int, int] = (640, 480), fps: int = 30) -> None: + self._engine: engine.Engine = engine + self._resolution: tuple[int, int] = resolution + self._fps: int = fps + + # Create dedicated camera prim for video recording + self._cam_prim_path: str = self._build_camera() + + # Camera control (from camera_config dict) + self._cam_pos = camera_config.get("cam_pos", np.array([0.0, -5.0, 3.0])) + self._cam_target = camera_config.get("cam_target", np.array([0.0, 0.0, 0.0])) + + self._recorded_frames: list[np.ndarray] = [] + self._recording: bool = False + + self._annotator: Any | None = None + self._render_product: Any | None = None + + self._logger_step_tracker: Any | None = None + + return + + def _build_camera(self) -> str: + """Get ViewportCameraState for the viewport camera (shared with visualization).""" + # Use engine's camera state (viewport camera works in headless mode) + self._camera_state = self._engine._camera_state + Logger.print("[VideoRecorder] Using viewport camera at {}".format(self.VIDEO_CAM_PATH)) + return self.VIDEO_CAM_PATH + + def set_logger_step_tracker(self, logger: Any) -> None: + """ + A temporary hack to get the step value from the logger. + """ + self._logger_step_tracker = logger + return + + def _set_camera_pose(self, pos: np.ndarray, target: np.ndarray) -> None: + """Set the video camera pose using ViewportCameraState (same pattern as engine.set_camera_pose).""" + env_offset = self._engine._env_offsets[0].cpu().numpy() + cam_pos = pos.copy() + cam_target = target.copy() + + cam_pos[:2] += env_offset + cam_target[:2] += env_offset + + self._camera_state.set_position_world(cam_pos.tolist(), True) + self._camera_state.set_target_world(cam_target.tolist(), True) + return + + def _ensure_annotator(self) -> None: + """Lazily create the render product and RGB annotator.""" + if self._annotator is not None: + return + + import omni.replicator.core as rep + + self._render_product = rep.create.render_product( + self._cam_prim_path, self._resolution + ) + self._annotator = rep.AnnotatorRegistry.get_annotator("rgb", device="cpu") + self._annotator.attach([self._render_product]) + Logger.print("[VideoRecorder] Created RGB annotator for {}".format(self._cam_prim_path)) + return + + def _capture_frame(self) -> None: + """Capture a single RGB frame from the viewport. + + Saves and restores camera state to avoid conflicts with visualization. + """ + self._ensure_annotator() + + # Save current camera state (in case visualization is active) + saved_pos = self._camera_state.position_world + saved_target = self._camera_state.target_world + + # Set video camera position + self._set_camera_pose(self._cam_pos, self._cam_target) + + # Render the scene to update the viewport + self._engine._sim.render() + + rgb_data: Any = self._annotator.get_data() + if rgb_data is None or rgb_data.size == 0: + # Renderer still warming up + frame: np.ndarray = np.zeros((self._resolution[1], self._resolution[0], 3), dtype=np.uint8) + else: + frame = np.frombuffer(rgb_data, dtype=np.uint8).reshape(*rgb_data.shape) + frame = frame[:, :, :3] # drop alpha channel + + self._recorded_frames.append(frame) + + # Restore visualization camera state + self._camera_state.set_position_world(saved_pos, True) + self._camera_state.set_target_world(saved_target, True) + + return + + def start_recording(self) -> None: + """Begin a new video recording.""" + if self._recording: + Logger.print("[VideoRecorder] Already recording, stopping previous recording first") + self.stop_recording() + + self._recorded_frames = [] + self._recording = True + Logger.print("[VideoRecorder] Started recording") + return + + def capture_frame(self) -> None: + """Capture a frame during recording. Call this each step while recording.""" + if self._recording: + self._capture_frame() + return + + def stop_recording(self) -> None: + """Stop recording, create video, upload to WandB, and clean up.""" + if not self._recording: + return + + self._stop_recording() + return + + def _stop_recording(self) -> None: + """Stop recording, create video, upload to WandB, and clean up.""" + if not self._recording or len(self._recorded_frames) == 0: + self._recording = False + return + + self._recording = False + + try: + if len(self._recorded_frames) == 0: + Logger.print("[VideoRecorder] No frames recorded, skipping video creation") + return + + clip: ImageSequenceClip = ImageSequenceClip(self._recorded_frames, fps=self._fps) + + with tempfile.NamedTemporaryFile(suffix=".mp4", delete=True) as tmp: + temp_path: str = tmp.name + + clip.write_videofile(temp_path, logger=None) + if wandb.run is not None: + step_val = self._logger_step_tracker.get_current_step() + wandb.log({ + "video": wandb.Video(temp_path, format="mp4"), + }, step=step_val) + Logger.print("[VideoRecorder] Uploaded video to WandB ({} frames, step {})".format( + len(self._recorded_frames), step_val)) + else: + Logger.print("[VideoRecorder] WandB not initialized, skipping upload") + except ImportError as e: + Logger.print("[VideoRecorder] Missing dependency: {}. Video not saved.".format(e)) + except Exception as e: + Logger.print("[VideoRecorder] Error creating video: {}".format(e)) + + self._recorded_frames = [] + return diff --git a/mimickit/util/wandb_logger.py b/mimickit/util/wandb_logger.py index c6697ab..5be147d 100644 --- a/mimickit/util/wandb_logger.py +++ b/mimickit/util/wandb_logger.py @@ -82,4 +82,17 @@ def _build_key_tags(self): curr_tags = "{:s}/{:s}".format(curr_tag, key) tags.append(curr_tags) - return tags \ No newline at end of file + return tags + + def get_current_step(self) -> int: + """Get the current step value that would be used for wandb.log(). + + Returns the step value based on the step_key if set, otherwise returns row_count. + This matches the logic used in write_log(). + """ + step_val = self._row_count + if (self._step_key is not None): + entry = self.log_current_row.get(self._step_key, "") + if entry != "": + step_val = entry.val + return int(step_val) \ No newline at end of file