ubi-coro · lbergmann1 · Nov 19, 2025 · Nov 19, 2025 · Nov 19, 2025 · Nov 19, 2025
diff --git a/docs/_static/envs/img_push_box_env_1_mover.png b/docs/_static/envs/img_push_box_env_1_mover.png
diff --git a/docs/_static/envs/img_push_l_env_1_mover.png b/docs/_static/envs/img_push_l_env_1_mover.png
diff --git a/docs/_static/envs/img_push_t_env_1_mover.png b/docs/_static/envs/img_push_t_env_1_mover.png
diff --git a/docs/_static/envs/img_push_x_env_1_mover.png b/docs/_static/envs/img_push_x_env_1_mover.png
diff --git a/docs/_static/envs/img_state_based_pusing_env_with_static_obstacles.png b/docs/_static/envs/img_state_based_pusing_env_with_static_obstacles.png
diff --git a/docs/environments.rst b/docs/environments.rst
@@ -17,6 +17,8 @@ A detailed documentation of all environments can be found in the following subse
 
    environments/state_based_global_pushing_env
    environments/state_based_push_t_env
+   environments/state_based_push_x_env
+   environments/state_based_push_l_env
    environments/state_based_push_box_env
    environments/state_based_static_obstacle_pushing_env
 

diff --git a/docs/environments/long_horizon_global_trajectory_planning_env.rst b/docs/environments/long_horizon_global_trajectory_planning_env.rst
@@ -136,6 +136,8 @@ To use the example, please install Stable-Baselines3 as described in the
     from stable_baselines3 import SAC, HerReplayBuffer
     import magbotsim
 
+    gym.register_envs(magbotsim)
+
     render_mode = None
     mover_params = {'size': np.array([0.113 / 2, 0.113 / 2, 0.012 / 2]), 'mass': 0.628}
     collision_params = {'shape': 'box', 'size': np.array([0.113 / 2 + 1e-6, 0.113 / 2 + 1e-6]), 'offset': 0.0, 'offset_wall': 0.0}

diff --git a/docs/environments/state_based_global_pushing_env.rst b/docs/environments/state_based_global_pushing_env.rst
@@ -177,6 +177,8 @@ described in the `documentation <https://stable-baselines3.readthedocs.io/en/mas
     from stable_baselines3 import SAC, HerReplayBuffer
     import magbotsim
 
+    gym.register_envs(magbotsim)
+
     render_mode = None
     mover_params = {'size': np.array([0.113 / 2, 0.113 / 2, 0.012 / 2]), 'mass': 0.628}
     collision_params = {'shape': 'box', 'size': np.array([0.113 / 2 + 1e-6, 0.113 / 2 + 1e-6]), 'offset': 0.0, 'offset_wall': 0.0}

diff --git a/docs/environments/state_based_push_box_env.rst b/docs/environments/state_based_push_box_env.rst
@@ -34,6 +34,8 @@ described in the `documentation <https://stable-baselines3.readthedocs.io/en/mas
     from stable_baselines3 import SAC, HerReplayBuffer
     import magbotsim
 
+    gym.register_envs(magbotsim)
+
     render_mode = None
     mover_params = {'size': np.array([0.113 / 2, 0.113 / 2, 0.012 / 2]), 'mass': 0.628}
     collision_params = {'shape': 'box', 'size': np.array([0.113 / 2 + 1e-6, 0.113 / 2 + 1e-6]), 'offset': 0.0, 'offset_wall': 0.0}

diff --git a/docs/environments/state_based_push_l_env.rst b/docs/environments/state_based_push_l_env.rst
@@ -0,0 +1,67 @@
+.. _state_based_push_l_env:
+
+State-Based Push-L Environment
+==============================
+
+The ``StateBasedPushLEnv`` is an object pushing environment with an L-shaped object:
+
+.. image:: ../_static/envs/img_push_l_env_1_mover.png
+    :width: 40%
+    :align: center
+
+The Push-L task is a manipulation benchmark where the goal is to push an L-shaped object to a target
+pose (position and orientation). Similar to the Push-T task, this challenge requires precise control to achieve
+both translational and rotational alignment of the asymmetric object. The L-shape adds unique dynamics compared
+to the T-shape due to its different mass distribution and geometric properties.
+
+This environment is a preconfigured version of :ref:`state_based_global_pushing_env` specifically designed for the
+Push-L manipulation task. Please refer to the :ref:`state_based_global_pushing_env` for additional information about
+the observation space, action space, immediate rewards, episode termination/truncation, and environment reset.
+
+Basic Usage
+-----------
+The following example shows how to train an agent using `Stable-Baselines3 <https://stable-baselines3.readthedocs.io/en/master/>`_. To use the example, please install Stable-Baselines3 as
+described in the `documentation <https://stable-baselines3.readthedocs.io/en/master/guide/install.html>`_.
+
+.. note::
+    This is a simplified example that is not guaranteed to converge, as the default parameters are used. However, it is important to note that
+    the parameter ``copy_info_dict`` is set to ``True``. This way, it is not necessary to check for collision again to compute the reward when a
+    transition is relabeled by HER, since the information is already available in the ``info``-dict.
+
+
+.. code-block:: python
+
+    import numpy as np
+    import gymnasium as gym
+    from stable_baselines3 import SAC, HerReplayBuffer
+    import magbotsim
+
+    gym.register_envs(magbotsim)
+
+    render_mode = None
+    mover_params = {'size': np.array([0.155 / 2, 0.155 / 2, 0.012 / 2]), 'mass': 1.24}
+    collision_params = {'shape': 'box', 'size': np.array([0.155 / 2 + 1e-6, 0.155 / 2 + 1e-6]), 'offset': 0.0, 'offset_wall': 0.0}
+    env_params = {'mover_params': mover_params, 'collision_params': collision_params, 'render_mode': render_mode}
+
+    env = gym.make('StateBasedPushLEnv-v0', **env_params)
+    # copy_info_dict=True, as information about collisions is stored in the info dictionary to avoid
+    # computationally expensive collision checking calculations when the data is relabeled (HER)
+    model = SAC(
+        policy='MultiInputPolicy',
+        env=env,
+        replay_buffer_class=HerReplayBuffer,
+        replay_buffer_kwargs={'copy_info_dict': True},
+        verbose=1
+    )
+    model.learn(total_timesteps=int(1e6))
+
+Version History
+---------------
+- v0: initial version of the environment
+
+Parameters
+----------
+.. automodule:: magbotsim.rl_envs.object_manipulation.pushing.state_based_push_l_env
+  :members:
+  :no-index:
+  :show-inheritance:
diff --git a/docs/environments/state_based_push_t_env.rst b/docs/environments/state_based_push_t_env.rst
@@ -37,6 +37,8 @@ described in the `documentation <https://stable-baselines3.readthedocs.io/en/mas
     from stable_baselines3 import SAC, HerReplayBuffer
     import magbotsim
 
+    gym.register_envs(magbotsim)
+
     render_mode = None
     mover_params = {'size': np.array([0.113 / 2, 0.113 / 2, 0.012 / 2]), 'mass': 0.628}
     collision_params = {'shape': 'box', 'size': np.array([0.113 / 2 + 1e-6, 0.113 / 2 + 1e-6]), 'offset': 0.0, 'offset_wall': 0.0}

diff --git a/docs/environments/state_based_push_x_env.rst b/docs/environments/state_based_push_x_env.rst
@@ -0,0 +1,68 @@
+.. _state_based_push_x_env:
+
+State-Based Push-X Environment
+==============================
+
+The ``StateBasedPushXEnv`` is an object pushing environment with an X-shaped object:
+
+.. image:: ../_static/envs/img_push_x_env_1_mover.png
+    :width: 40%
+    :align: center
+
+The Push-X task is a manipulation benchmark where the goal is to push an X-shaped object to a target
+pose (position and orientation). Similar to the Push-T task, this challenge requires precise control to achieve
+both translational and rotational alignment of the asymmetric object. The X-shape presents unique symmetry properties
+compared to the T-shape and L-shape, with its four-fold rotational symmetry making orientation alignment particularly
+interesting.
+
+This environment is a preconfigured version of :ref:`state_based_global_pushing_env` specifically designed for the
+Push-X manipulation task. Please refer to the :ref:`state_based_global_pushing_env` for additional information about
+the observation space, action space, immediate rewards, episode termination/truncation, and environment reset.
+
+Basic Usage
+-----------
+The following example shows how to train an agent using `Stable-Baselines3 <https://stable-baselines3.readthedocs.io/en/master/>`_. To use the example, please install Stable-Baselines3 as
+described in the `documentation <https://stable-baselines3.readthedocs.io/en/master/guide/install.html>`_.
+
+.. note::
+    This is a simplified example that is not guaranteed to converge, as the default parameters are used. However, it is important to note that
+    the parameter ``copy_info_dict`` is set to ``True``. This way, it is not necessary to check for collision again to compute the reward when a
+    transition is relabeled by HER, since the information is already available in the ``info``-dict.
+
+
+.. code-block:: python
+
+    import numpy as np
+    import gymnasium as gym
+    from stable_baselines3 import SAC, HerReplayBuffer
+    import magbotsim
+
+    gym.register_envs(magbotsim)
+
+    render_mode = None
+    mover_params = {'size': np.array([0.155 / 2, 0.155 / 2, 0.012 / 2]), 'mass': 1.24}
+    collision_params = {'shape': 'box', 'size': np.array([0.155 / 2 + 1e-6, 0.155 / 2 + 1e-6]), 'offset': 0.0, 'offset_wall': 0.0}
+    env_params = {'mover_params': mover_params, 'collision_params': collision_params, 'render_mode': render_mode}
+
+    env = gym.make('StateBasedPushXEnv-v0', **env_params)
+    # copy_info_dict=True, as information about collisions is stored in the info dictionary to avoid
+    # computationally expensive collision checking calculations when the data is relabeled (HER)
+    model = SAC(
+        policy='MultiInputPolicy',
+        env=env,
+        replay_buffer_class=HerReplayBuffer,
+        replay_buffer_kwargs={'copy_info_dict': True},
+        verbose=1
+    )
+    model.learn(total_timesteps=int(1e6))
+
+Version History
+---------------
+- v0: initial version of the environment
+
+Parameters
+----------
+.. automodule:: magbotsim.rl_envs.object_manipulation.pushing.state_based_push_x_env
+  :members:
+  :no-index:
+  :show-inheritance:
diff --git a/docs/environments/state_based_static_obstacle_pushing_env.rst b/docs/environments/state_based_static_obstacle_pushing_env.rst
@@ -129,6 +129,8 @@ described in the `documentation <https://stable-baselines3.readthedocs.io/en/mas
     from stable_baselines3 import SAC, HerReplayBuffer
     import magbotsim
 
+    gym.register_envs(magbotsim)
+
     render_mode = None
     mover_params = {'size': np.array([0.113 / 2, 0.113 / 2, 0.012 / 2]), 'mass': 0.628}
     collision_params = {'shape': 'box', 'size': np.array([0.113 / 2 + 1e-6, 0.113 / 2 + 1e-6]), 'offset': 0.0, 'offset_wall': 0.0}

diff --git a/magbotsim/__init__.py b/magbotsim/__init__.py
@@ -63,31 +63,24 @@ def register_gymnasium_envs():
             entry_point=f'magbotsim.rl_envs.object_manipulation.pushing.state_based_push_box_env:StateBasedPushBoxEnvB{i}',
         )
 
-    register(
-        id='StateBasedPushTEnv-v0',
-        entry_point='magbotsim.rl_envs.object_manipulation.pushing.state_based_push_t_env:StateBasedPushTEnv',
-        max_episode_steps=50,
-    )
+    # Letter-Pushing Envs
+    for letter in ['t', 'x', 'l']:
+        env_name = f'StateBasedPush{letter.upper()}Env'
+        path = f'magbotsim.rl_envs.object_manipulation.pushing.state_based_push_{letter}_env'
 
-    # Push T Benchmarks
-    for i in range(len(BENCHMARK_PLANNING_LAYOUTS)):
+        # Base Environment
         register(
-            id=f'StateBasedPushTEnvB{i}-v0',
-            entry_point=f'magbotsim.rl_envs.object_manipulation.pushing.state_based_push_t_env:StateBasedPushTEnvB{i}',
+            id=f'{env_name}-v0',
+            entry_point=f'{path}:{env_name}',
+            max_episode_steps=50,
         )
 
-    register(
-        id='StateBasedPushXEnv-v0',
-        entry_point='magbotsim.rl_envs.object_manipulation.pushing.state_based_push_x_env:StateBasedPushXEnv',
-        max_episode_steps=50,
-    )
-
-    # Push X Benchmarks
-    for i in range(len(BENCHMARK_PLANNING_LAYOUTS)):
-        register(
-            id=f'StateBasedPushXEnvB{i}-v0',
-            entry_point=f'magbotsim.rl_envs.object_manipulation.pushing.state_based_push_x_env:StateBasedPushXEnvB{i}',
-        )
+        # Benchmark Environments
+        for i in range(len(BENCHMARK_PLANNING_LAYOUTS)):
+            register(
+                id=f'{env_name}B{i}-v0',
+                entry_point=f'{path}:{env_name}B{i}',
+            )
 
 
 register_gymnasium_envs()