Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 4 additions & 2 deletions pufferlib/config/default.ini
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,7 @@ cpu_offload = False
device = cuda
optimizer = adamw
anneal_lr = True
amp = True
precision = float32
total_timesteps = 10_000_000
learning_rate = 0.015
Expand All @@ -44,6 +45,7 @@ adam_beta2 = 0.999
adam_eps = 1e-12

data_dir = experiments
resume_state_path = None
checkpoint_interval = 200
batch_size = auto
minibatch_size = 8192
Expand All @@ -52,8 +54,8 @@ minibatch_size = 8192
max_minibatch_size = 32768
bptt_horizon = 64
compile = False
compile_mode = max-autotune-no-cudagraphs
compile_fullgraph = True
compile_mode = default
compile_fullgraph = False

vtrace_rho_clip = 1.0
vtrace_c_clip = 1.0
Expand Down
18 changes: 17 additions & 1 deletion pufferlib/config/ocean/drive.ini
Original file line number Diff line number Diff line change
Expand Up @@ -104,7 +104,7 @@ reward_ade = 0.0

; --- Map ---
; Path to map used for training
map_dir = "pufferlib/resources/drive/binaries/carla_py123d"
map_dir = "pufferlib/resources/drive/binaries/carla"
; Number of maps to load from map_dir
num_maps = 8

Expand Down Expand Up @@ -224,6 +224,22 @@ num_episodes = 100
score_threshold = -inf
render = true

[eval]
; Set to True to enable periodic multi-scenario evaluation during training
multi_scenario_eval = False
; Frequency of evaluation during training (in epochs)
eval_interval = 25
num_agents = 512
; Batch size for eval_multi_scenarios (number of scenarios per batch)
; Path to dataset used for evaluation
map_dir = "pufferlib/resources/drive/binaries/eval"
; Simulation mode for evaluation: "gigaflow" or "replay"
multi_scenario_simulation_mode = "replay"
; Total number of scenarios to evaluate
multi_scenario_num_scenarios = 250
backend = PufferEnv
Comment on lines +227 to +240


[eval.validation_replay]
type = "multi_scenario"
enabled = true
Expand Down
2 changes: 1 addition & 1 deletion pufferlib/ocean/drive/datatypes.h
Original file line number Diff line number Diff line change
Expand Up @@ -101,7 +101,7 @@ struct Waypoint {
float cos_heading; // Cached cosf(heading) - set in build_path
float sin_heading; // Cached sinf(heading) - set in build_path
float kappa; // Curvature at this point
int lane_idx; // Index of the lane this waypoint belongs to (for GT path) or closest to (for expert path)
int lane_idx; // Index of the lane this waypoint
};
struct Path {
struct Waypoint waypoints[MAX_NUM_WP_PATH];
Expand Down
92 changes: 57 additions & 35 deletions pufferlib/ocean/drive/drive.h
Original file line number Diff line number Diff line change
Expand Up @@ -77,8 +77,7 @@

// Collision and distance thresholds
#define MAX_CHECKED_LANES 32
#define COLLISION_QUICK_CHECK_DIST 15.0f // Quick distance check before OBB SAT
#define INIT_COLLISION_SHRINK_FACTOR 0.7f // Shrink agent dims at init to prevent collisions
#define COLLISION_QUICK_CHECK_DIST 15.0f // Quick distance check before OBB SAT
#define AGENT_STOPPED_SPEED_THRESHOLD 0.2f
#define MAX_STOPPED_SECONDS 60.0f
#define TRAFFIC_LIGHT_DISTANCE_THRESHOLD 10.0f
Expand Down Expand Up @@ -128,6 +127,7 @@
#define ROAD_FEATURES 7
#define PARTNER_FEATURES 8
#define TRAFFIC_CONTROL_FEATURES 7
#define PADDED_OBSERVATION_VALUE -0.001f
#define STATIC_TARGET_FEATURES 3
#define DYNAMIC_TARGET_FEATURES 5

Expand Down Expand Up @@ -3010,8 +3010,6 @@ static int spawn_agent(Drive *env, int agent_idx, int num_agents) {
}

static void set_start_position(Drive *env) {
bool is_log_replay = (env->control_mode == CONTROL_SDC_ONLY);

for (int i = 0; i < env->num_total_agents; i++) {
int is_active = 0;
for (int j = 0; j < env->active_agent_count; j++) {
Expand Down Expand Up @@ -3072,12 +3070,6 @@ static void set_start_position(Drive *env) {
agent->sim_vy = agent->log_velocity_y[step];
update_agent_speed(agent);
}

// Shrink width and length slightly to avoid initial collisions (not in log-replay)
if (!is_log_replay) {
agent->sim_length *= INIT_COLLISION_SHRINK_FACTOR;
agent->sim_width *= INIT_COLLISION_SHRINK_FACTOR;
}
}

// Reset agent metrics and state
Expand All @@ -3096,7 +3088,7 @@ static bool should_control_agent(Drive *env, int agent_idx) {
Agent *agent = &env->agents[agent_idx];

if (env->control_mode == CONTROL_SDC_ONLY) {
return agent_idx == 0 && agent->route_length != 0;
return agent_idx == EGO_IDX && agent->route_length != 0;
}

if (env->control_mode == CONTROL_WOSAC) {
Expand Down Expand Up @@ -3141,13 +3133,6 @@ void set_active_agents(Drive *env) {

// In GIGAFLOW mode, spawn agents dynamically on the map
if (env->simulation_mode == SIMULATION_GIGAFLOW) {
if (env->grid_map->num_drivable_grid_cell == 0) {
env->agents = (Agent *) calloc(1, sizeof(Agent));
env->active_agent_indices = (int *) malloc(sizeof(int));
env->active_agent_count = 0;
env->num_agents = 0;
return;
}
int num_agents_to_create = env->num_controllable_agents;

// Initialize agents for GIGAFLOW mode
Expand Down Expand Up @@ -3503,6 +3488,37 @@ static int compute_observation_size(Drive *env) {
return max_obs;
}

// Fold a heading into [-pi/2, pi/2] so opposite directions map to one orientation.
static inline float wrap_heading(float angle) {
if (angle > (float) M_PI / 2.0f) {
angle -= (float) M_PI;
} else if (angle < -(float) M_PI / 2.0f) {
angle += (float) M_PI;
}
return angle;
}

// Fill `rows` x `features` observation slots with the padding sentinel.
static inline void fill_padded_observation_rows(float *obs, int rows, int features) {
for (int r = 0; r < rows; r++) {
for (int c = 0; c < features; c++) {
obs[r * features + c] = PADDED_OBSERVATION_VALUE;
}
}
}

// Pad `rows` traffic-control slots with the sentinel; type/state columns set to NONE/UNKNOWN.
static inline void fill_padded_traffic_control_rows(float *obs, int rows) {
for (int r = 0; r < rows; r++) {
int base = r * TRAFFIC_CONTROL_FEATURES;
for (int c = 0; c < TRAFFIC_CONTROL_FEATURES - 2; c++) {
obs[base + c] = PADDED_OBSERVATION_VALUE;
}
obs[base + TRAFFIC_CONTROL_FEATURES - 2] = TRAFFIC_CONTROL_TYPE_NONE;
obs[base + TRAFFIC_CONTROL_FEATURES - 1] = TRAFFIC_CONTROL_STATE_UNKNOWN;
}
}

void allocate(Drive *env) {
init(env);
int max_obs = compute_observation_size(env);
Expand Down Expand Up @@ -4260,7 +4276,7 @@ static void compute_observations(Drive *env) {
// ===== Partner observations =====
if (ego_entity->is_blind_partner && random_uniform(0.0f, 1.0f) < env->partner_blindness_trigger_prob) {
int total_partner_floats = env->max_partner_observations * PARTNER_FEATURES;
memset(&obs[obs_idx], 0, total_partner_floats * sizeof(float));
fill_padded_observation_rows(&obs[obs_idx], env->max_partner_observations, PARTNER_FEATURES);
obs_idx += total_partner_floats;
} else {
// Collect candidate agents within max observation distance, then sort and select closest ones.
Expand Down Expand Up @@ -4359,7 +4375,7 @@ static void compute_observations(Drive *env) {
}
}
int remaining_partner_obs = (env->max_partner_observations - cars_seen) * PARTNER_FEATURES;
memset(&obs[obs_idx], 0, remaining_partner_obs * sizeof(float));
fill_padded_observation_rows(&obs[obs_idx], env->max_partner_observations - cars_seen, PARTNER_FEATURES);
obs_idx += remaining_partner_obs;
}

Expand Down Expand Up @@ -4440,6 +4456,12 @@ static void compute_observations(Drive *env) {
float dy_norm = (length > 0) ? dy / length : dy;
float cos_angle = dx_norm * ego_entity->cos_heading + dy_norm * ego_entity->sin_heading;
float sin_angle = -dx_norm * ego_entity->sin_heading + dy_norm * ego_entity->cos_heading;
// Road edges are undirected, so fold the heading to a single orientation.
if (is_edge && length > 0) {
float angle = wrap_heading(atan2f(sin_angle, cos_angle));
cos_angle = cosf(angle);
sin_angle = sinf(angle);
}

float *target;
int *counter;
Expand Down Expand Up @@ -4483,24 +4505,24 @@ static void compute_observations(Drive *env) {
subsample_road_observation_rows(lanes_buffer, lanes_collected, lane_to_write);
subsample_road_observation_rows(boundaries_buffer, boundaries_collected, boundary_to_write);
memcpy(&obs[lane_obs_idx], lanes_buffer, lane_to_write * ROAD_FEATURES * sizeof(float));
memset(
fill_padded_observation_rows(
&obs[lane_obs_idx + lane_to_write * ROAD_FEATURES],
0,
(env->obs_lane_segment_count - lane_to_write) * ROAD_FEATURES * sizeof(float));
env->obs_lane_segment_count - lane_to_write,
ROAD_FEATURES);
memcpy(&obs[boundary_obs_idx], boundaries_buffer, boundary_to_write * ROAD_FEATURES * sizeof(float));
memset(
fill_padded_observation_rows(
&obs[boundary_obs_idx + boundary_to_write * ROAD_FEATURES],
0,
(env->obs_boundary_segment_count - boundary_to_write) * ROAD_FEATURES * sizeof(float));
env->obs_boundary_segment_count - boundary_to_write,
ROAD_FEATURES);
} else {
memset(
fill_padded_observation_rows(
&obs[lane_obs_idx + lanes_collected * ROAD_FEATURES],
0,
(env->obs_lane_segment_count - lanes_collected) * ROAD_FEATURES * sizeof(float));
memset(
env->obs_lane_segment_count - lanes_collected,
ROAD_FEATURES);
fill_padded_observation_rows(
&obs[boundary_obs_idx + boundaries_collected * ROAD_FEATURES],
0,
(env->obs_boundary_segment_count - boundaries_collected) * ROAD_FEATURES * sizeof(float));
env->obs_boundary_segment_count - boundaries_collected,
ROAD_FEATURES);
}

// ===== Traffic control observations =====
Expand Down Expand Up @@ -4598,9 +4620,9 @@ static void compute_observations(Drive *env) {
controls_added++;
}

// Zero out remaining traffic control slots
int remaining_traffic_obs = (env->max_traffic_control_observations - controls_added) * TRAFFIC_CONTROL_FEATURES;
memset(&obs[obs_idx], 0, remaining_traffic_obs * sizeof(float));
int remaining_traffic_controls = env->max_traffic_control_observations - controls_added;
fill_padded_traffic_control_rows(&obs[obs_idx], remaining_traffic_controls);
obs_idx += remaining_traffic_controls * TRAFFIC_CONTROL_FEATURES;
}
}

Expand Down
3 changes: 2 additions & 1 deletion pufferlib/ocean/drive/drive.py
Original file line number Diff line number Diff line change
Expand Up @@ -255,7 +255,8 @@ def __init__(
self.control_mode = 3
else:
raise ValueError(
f"control_mode must be one of 'control_vehicles', 'control_agents', 'control_wosac', or 'control_sdc_only'. Got: {self.control_mode_str}"
"control_mode must be one of 'control_vehicles', 'control_agents', 'control_wosac', or "
f"'control_sdc_only'. Got: {self.control_mode_str}"
)
if self.init_mode_str == "create_all_valid":
self.init_mode = 0
Expand Down
18 changes: 10 additions & 8 deletions pufferlib/ocean/torch.py
Original file line number Diff line number Diff line change
Expand Up @@ -35,6 +35,9 @@ def _create_encoder(self, in_features, input_size, encoder_gigaflow, dropout=0.0
pufferlib.pytorch.layer_init(nn.Linear(input_size, input_size)),
)

def _encode_and_pool(self, objects, encoder):
return encoder(objects).max(dim=1).values

def __init__(
self,
env,
Expand All @@ -46,6 +49,7 @@ def __init__(
dropout,
):
super().__init__()
self.input_size = input_size

# Observation dimensions from environment config
self.max_partner_observations = env.max_partner_observations
Expand Down Expand Up @@ -145,19 +149,17 @@ def forward(self, observations, ego_dim):
# Encode Lanes and Boundaries separately
if self.obs_lane_segment_count > 0:
lane_objects = lane_observations.view(-1, self.obs_lane_segment_count, self.road_features_count)
lane_features, _ = self.lane_encoder(lane_objects).max(dim=1)
lane_features = self._encode_and_pool(lane_objects, self.lane_encoder)
feature_list.append(lane_features)
if self.obs_boundary_segment_count > 0:
boundary_objects = boundary_observations.view(-1, self.obs_boundary_segment_count, self.road_features_count)

boundary_features, _ = self.boundary_encoder(boundary_objects).max(dim=1)
boundary_features = self._encode_and_pool(boundary_objects, self.boundary_encoder)
feature_list.append(boundary_features)

# Encode Partners
if self.max_partner_observations > 0:
partner_objects = partner_observations.view(-1, self.max_partner_observations, self.partner_features_count)
partner_encoded = self.partner_encoder(partner_objects)
partner_features, _ = partner_encoded.max(dim=1)
partner_features = self._encode_and_pool(partner_objects, self.partner_encoder)
feature_list.append(partner_features)

# Encode Traffic Controls
Expand All @@ -171,16 +173,16 @@ def forward(self, observations, ego_dim):
traffic_control_type_onehot = F.one_hot(
traffic_control_type.long(),
num_classes=binding.NUM_TRAFFIC_CONTROL_TYPES,
).float()
).to(traffic_control_continuous.dtype)
traffic_control_state_onehot = F.one_hot(
traffic_control_state.long(),
num_classes=binding.NUM_TRAFFIC_CONTROL_STATES,
).float()
).to(traffic_control_continuous.dtype)
traffic_control_objects = torch.cat(
[traffic_control_continuous, traffic_control_type_onehot, traffic_control_state_onehot],
dim=2,
)
traffic_control_features, _ = self.traffic_control_encoder(traffic_control_objects).max(dim=1)
traffic_control_features = self._encode_and_pool(traffic_control_objects, self.traffic_control_encoder)
feature_list.append(traffic_control_features)

# Add optional features if enabled
Expand Down
Loading
Loading