Emerge-Lab · vcharraut · May 21, 2026 · May 21, 2026 · May 21, 2026 · May 21, 2026
diff --git a/pufferlib/config/default.ini b/pufferlib/config/default.ini
@@ -27,6 +27,7 @@ cpu_offload = False
 device = cuda
 optimizer = adamw
 anneal_lr = True
+amp = True
 precision = float32
 total_timesteps = 10_000_000
 learning_rate = 0.015
@@ -44,6 +45,7 @@ adam_beta2 = 0.999
 adam_eps = 1e-12
 
 data_dir = experiments
+resume_state_path = None
 checkpoint_interval = 200
 batch_size = auto
 minibatch_size = 8192
@@ -52,8 +54,8 @@ minibatch_size = 8192
 max_minibatch_size = 32768
 bptt_horizon = 64
 compile = False
-compile_mode = max-autotune-no-cudagraphs
-compile_fullgraph = True
+compile_mode = default
+compile_fullgraph = False
 
 vtrace_rho_clip = 1.0
 vtrace_c_clip = 1.0

diff --git a/pufferlib/config/ocean/drive.ini b/pufferlib/config/ocean/drive.ini
@@ -104,7 +104,7 @@ reward_ade = 0.0
 
 ; --- Map ---
 ; Path to map used for training
-map_dir = "pufferlib/resources/drive/binaries/carla_py123d"
+map_dir = "pufferlib/resources/drive/binaries/carla"
 ; Number of maps to load from map_dir
 num_maps = 8
 
@@ -224,6 +224,22 @@ num_episodes = 100
 score_threshold = -inf
 render = true
 
+[eval]
+; Set to True to enable periodic multi-scenario evaluation during training
+multi_scenario_eval = False
+; Frequency of evaluation during training (in epochs)
+eval_interval = 25
+num_agents = 512
+; Batch size for eval_multi_scenarios (number of scenarios per batch)
+; Path to dataset used for evaluation
+map_dir = "pufferlib/resources/drive/binaries/eval"
+; Simulation mode for evaluation: "gigaflow" or "replay"
+multi_scenario_simulation_mode = "replay"
+; Total number of scenarios to evaluate
+multi_scenario_num_scenarios = 250
+backend = PufferEnv
+
+
 [eval.validation_replay]
 type = "multi_scenario"
 enabled = true

diff --git a/pufferlib/ocean/drive/datatypes.h b/pufferlib/ocean/drive/datatypes.h
@@ -101,7 +101,7 @@ struct Waypoint {
     float cos_heading; // Cached cosf(heading) - set in build_path
     float sin_heading; // Cached sinf(heading) - set in build_path
     float kappa;       // Curvature at this point
-    int lane_idx;      // Index of the lane this waypoint belongs to (for GT path) or closest to (for expert path)
+    int lane_idx;      // Index of the lane this waypoint
 };
 struct Path {
     struct Waypoint waypoints[MAX_NUM_WP_PATH];

diff --git a/pufferlib/ocean/drive/drive.h b/pufferlib/ocean/drive/drive.h
@@ -77,8 +77,7 @@
 
 // Collision and distance thresholds
 #define MAX_CHECKED_LANES 32
-#define COLLISION_QUICK_CHECK_DIST 15.0f  // Quick distance check before OBB SAT
-#define INIT_COLLISION_SHRINK_FACTOR 0.7f // Shrink agent dims at init to prevent collisions
+#define COLLISION_QUICK_CHECK_DIST 15.0f // Quick distance check before OBB SAT
 #define AGENT_STOPPED_SPEED_THRESHOLD 0.2f
 #define MAX_STOPPED_SECONDS 60.0f
 #define TRAFFIC_LIGHT_DISTANCE_THRESHOLD 10.0f
@@ -128,6 +127,7 @@
 #define ROAD_FEATURES 7
 #define PARTNER_FEATURES 8
 #define TRAFFIC_CONTROL_FEATURES 7
+#define PADDED_OBSERVATION_VALUE -0.001f
 #define STATIC_TARGET_FEATURES 3
 #define DYNAMIC_TARGET_FEATURES 5
 
@@ -3010,8 +3010,6 @@ static int spawn_agent(Drive *env, int agent_idx, int num_agents) {
 }
 
 static void set_start_position(Drive *env) {
-    bool is_log_replay = (env->control_mode == CONTROL_SDC_ONLY);
-
     for (int i = 0; i < env->num_total_agents; i++) {
         int is_active = 0;
         for (int j = 0; j < env->active_agent_count; j++) {
@@ -3072,12 +3070,6 @@ static void set_start_position(Drive *env) {
                 agent->sim_vy = agent->log_velocity_y[step];
                 update_agent_speed(agent);
             }
-
-            // Shrink width and length slightly to avoid initial collisions (not in log-replay)
-            if (!is_log_replay) {
-                agent->sim_length *= INIT_COLLISION_SHRINK_FACTOR;
-                agent->sim_width *= INIT_COLLISION_SHRINK_FACTOR;
-            }
         }
 
         // Reset agent metrics and state
@@ -3096,7 +3088,7 @@ static bool should_control_agent(Drive *env, int agent_idx) {
     Agent *agent = &env->agents[agent_idx];
 
     if (env->control_mode == CONTROL_SDC_ONLY) {
-        return agent_idx == 0 && agent->route_length != 0;
+        return agent_idx == EGO_IDX && agent->route_length != 0;
     }
 
     if (env->control_mode == CONTROL_WOSAC) {
@@ -3141,13 +3133,6 @@ void set_active_agents(Drive *env) {
 
     // In GIGAFLOW mode, spawn agents dynamically on the map
     if (env->simulation_mode == SIMULATION_GIGAFLOW) {
-        if (env->grid_map->num_drivable_grid_cell == 0) {
-            env->agents = (Agent *) calloc(1, sizeof(Agent));
-            env->active_agent_indices = (int *) malloc(sizeof(int));
-            env->active_agent_count = 0;
-            env->num_agents = 0;
-            return;
-        }
         int num_agents_to_create = env->num_controllable_agents;
 
         // Initialize agents for GIGAFLOW mode
@@ -3503,6 +3488,37 @@ static int compute_observation_size(Drive *env) {
     return max_obs;
 }
 
+// Fold a heading into [-pi/2, pi/2] so opposite directions map to one orientation.
+static inline float wrap_heading(float angle) {
+    if (angle > (float) M_PI / 2.0f) {
+        angle -= (float) M_PI;
+    } else if (angle < -(float) M_PI / 2.0f) {
+        angle += (float) M_PI;
+    }
+    return angle;
+}
+
+// Fill `rows` x `features` observation slots with the padding sentinel.
+static inline void fill_padded_observation_rows(float *obs, int rows, int features) {
+    for (int r = 0; r < rows; r++) {
+        for (int c = 0; c < features; c++) {
+            obs[r * features + c] = PADDED_OBSERVATION_VALUE;
+        }
+    }
+}
+
+// Pad `rows` traffic-control slots with the sentinel; type/state columns set to NONE/UNKNOWN.
+static inline void fill_padded_traffic_control_rows(float *obs, int rows) {
+    for (int r = 0; r < rows; r++) {
+        int base = r * TRAFFIC_CONTROL_FEATURES;
+        for (int c = 0; c < TRAFFIC_CONTROL_FEATURES - 2; c++) {
+            obs[base + c] = PADDED_OBSERVATION_VALUE;
+        }
+        obs[base + TRAFFIC_CONTROL_FEATURES - 2] = TRAFFIC_CONTROL_TYPE_NONE;
+        obs[base + TRAFFIC_CONTROL_FEATURES - 1] = TRAFFIC_CONTROL_STATE_UNKNOWN;
+    }
+}
+
 void allocate(Drive *env) {
     init(env);
     int max_obs = compute_observation_size(env);
@@ -4260,7 +4276,7 @@ static void compute_observations(Drive *env) {
         // ===== Partner observations =====
         if (ego_entity->is_blind_partner && random_uniform(0.0f, 1.0f) < env->partner_blindness_trigger_prob) {
             int total_partner_floats = env->max_partner_observations * PARTNER_FEATURES;
-            memset(&obs[obs_idx], 0, total_partner_floats * sizeof(float));
+            fill_padded_observation_rows(&obs[obs_idx], env->max_partner_observations, PARTNER_FEATURES);
             obs_idx += total_partner_floats;
         } else {
             // Collect candidate agents within max observation distance, then sort and select closest ones.
@@ -4359,7 +4375,7 @@ static void compute_observations(Drive *env) {
                 }
             }
             int remaining_partner_obs = (env->max_partner_observations - cars_seen) * PARTNER_FEATURES;
-            memset(&obs[obs_idx], 0, remaining_partner_obs * sizeof(float));
+            fill_padded_observation_rows(&obs[obs_idx], env->max_partner_observations - cars_seen, PARTNER_FEATURES);
             obs_idx += remaining_partner_obs;
         }
 
@@ -4440,6 +4456,12 @@ static void compute_observations(Drive *env) {
             float dy_norm = (length > 0) ? dy / length : dy;
             float cos_angle = dx_norm * ego_entity->cos_heading + dy_norm * ego_entity->sin_heading;
             float sin_angle = -dx_norm * ego_entity->sin_heading + dy_norm * ego_entity->cos_heading;
+            // Road edges are undirected, so fold the heading to a single orientation.
+            if (is_edge && length > 0) {
+                float angle = wrap_heading(atan2f(sin_angle, cos_angle));
+                cos_angle = cosf(angle);
+                sin_angle = sinf(angle);
+            }
 
             float *target;
             int *counter;
@@ -4483,24 +4505,24 @@ static void compute_observations(Drive *env) {
             subsample_road_observation_rows(lanes_buffer, lanes_collected, lane_to_write);
             subsample_road_observation_rows(boundaries_buffer, boundaries_collected, boundary_to_write);
             memcpy(&obs[lane_obs_idx], lanes_buffer, lane_to_write * ROAD_FEATURES * sizeof(float));
-            memset(
+            fill_padded_observation_rows(
                 &obs[lane_obs_idx + lane_to_write * ROAD_FEATURES],
-                0,
-                (env->obs_lane_segment_count - lane_to_write) * ROAD_FEATURES * sizeof(float));
+                env->obs_lane_segment_count - lane_to_write,
+                ROAD_FEATURES);
             memcpy(&obs[boundary_obs_idx], boundaries_buffer, boundary_to_write * ROAD_FEATURES * sizeof(float));
-            memset(
+            fill_padded_observation_rows(
                 &obs[boundary_obs_idx + boundary_to_write * ROAD_FEATURES],
-                0,
-                (env->obs_boundary_segment_count - boundary_to_write) * ROAD_FEATURES * sizeof(float));
+                env->obs_boundary_segment_count - boundary_to_write,
+                ROAD_FEATURES);
         } else {
-            memset(
+            fill_padded_observation_rows(
                 &obs[lane_obs_idx + lanes_collected * ROAD_FEATURES],
-                0,
-                (env->obs_lane_segment_count - lanes_collected) * ROAD_FEATURES * sizeof(float));
-            memset(
+                env->obs_lane_segment_count - lanes_collected,
+                ROAD_FEATURES);
+            fill_padded_observation_rows(
                 &obs[boundary_obs_idx + boundaries_collected * ROAD_FEATURES],
-                0,
-                (env->obs_boundary_segment_count - boundaries_collected) * ROAD_FEATURES * sizeof(float));
+                env->obs_boundary_segment_count - boundaries_collected,
+                ROAD_FEATURES);
         }
 
         // ===== Traffic control observations =====
@@ -4598,9 +4620,9 @@ static void compute_observations(Drive *env) {
             controls_added++;
         }
 
-        // Zero out remaining traffic control slots
-        int remaining_traffic_obs = (env->max_traffic_control_observations - controls_added) * TRAFFIC_CONTROL_FEATURES;
-        memset(&obs[obs_idx], 0, remaining_traffic_obs * sizeof(float));
+        int remaining_traffic_controls = env->max_traffic_control_observations - controls_added;
+        fill_padded_traffic_control_rows(&obs[obs_idx], remaining_traffic_controls);
+        obs_idx += remaining_traffic_controls * TRAFFIC_CONTROL_FEATURES;
     }
 }
 

diff --git a/pufferlib/ocean/drive/drive.py b/pufferlib/ocean/drive/drive.py
@@ -255,7 +255,8 @@ def __init__(
             self.control_mode = 3
         else:
             raise ValueError(
-                f"control_mode must be one of 'control_vehicles', 'control_agents', 'control_wosac', or 'control_sdc_only'. Got: {self.control_mode_str}"
+                "control_mode must be one of 'control_vehicles', 'control_agents', 'control_wosac', or "
+                f"'control_sdc_only'. Got: {self.control_mode_str}"
             )
         if self.init_mode_str == "create_all_valid":
             self.init_mode = 0

diff --git a/pufferlib/ocean/torch.py b/pufferlib/ocean/torch.py
@@ -35,6 +35,9 @@ def _create_encoder(self, in_features, input_size, encoder_gigaflow, dropout=0.0
                 pufferlib.pytorch.layer_init(nn.Linear(input_size, input_size)),
             )
 
+    def _encode_and_pool(self, objects, encoder):
+        return encoder(objects).max(dim=1).values
+
     def __init__(
         self,
         env,
@@ -46,6 +49,7 @@ def __init__(
         dropout,
     ):
         super().__init__()
+        self.input_size = input_size
 
         # Observation dimensions from environment config
         self.max_partner_observations = env.max_partner_observations
@@ -145,19 +149,17 @@ def forward(self, observations, ego_dim):
         # Encode Lanes and Boundaries separately
         if self.obs_lane_segment_count > 0:
             lane_objects = lane_observations.view(-1, self.obs_lane_segment_count, self.road_features_count)
-            lane_features, _ = self.lane_encoder(lane_objects).max(dim=1)
+            lane_features = self._encode_and_pool(lane_objects, self.lane_encoder)
             feature_list.append(lane_features)
         if self.obs_boundary_segment_count > 0:
             boundary_objects = boundary_observations.view(-1, self.obs_boundary_segment_count, self.road_features_count)
-
-            boundary_features, _ = self.boundary_encoder(boundary_objects).max(dim=1)
+            boundary_features = self._encode_and_pool(boundary_objects, self.boundary_encoder)
             feature_list.append(boundary_features)
 
         # Encode Partners
         if self.max_partner_observations > 0:
             partner_objects = partner_observations.view(-1, self.max_partner_observations, self.partner_features_count)
-            partner_encoded = self.partner_encoder(partner_objects)
-            partner_features, _ = partner_encoded.max(dim=1)
+            partner_features = self._encode_and_pool(partner_objects, self.partner_encoder)
             feature_list.append(partner_features)
 
         # Encode Traffic Controls
@@ -171,16 +173,16 @@ def forward(self, observations, ego_dim):
             traffic_control_type_onehot = F.one_hot(
                 traffic_control_type.long(),
                 num_classes=binding.NUM_TRAFFIC_CONTROL_TYPES,
-            ).float()
+            ).to(traffic_control_continuous.dtype)
             traffic_control_state_onehot = F.one_hot(
                 traffic_control_state.long(),
                 num_classes=binding.NUM_TRAFFIC_CONTROL_STATES,
-            ).float()
+            ).to(traffic_control_continuous.dtype)
             traffic_control_objects = torch.cat(
                 [traffic_control_continuous, traffic_control_type_onehot, traffic_control_state_onehot],
                 dim=2,
             )
-            traffic_control_features, _ = self.traffic_control_encoder(traffic_control_objects).max(dim=1)
+            traffic_control_features = self._encode_and_pool(traffic_control_objects, self.traffic_control_encoder)
             feature_list.append(traffic_control_features)
 
         # Add optional features if enabled