Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
57 commits
Select commit Hold shift + click to select a range
a417266
Trains and Evals
Kinvert Jan 13, 2026
49af2d4
Reward Changes
Kinvert Jan 13, 2026
daaf902
Rendered with spheres or something
Kinvert Jan 13, 2026
332a9ae
Good Claude - Wireframe Planes
Kinvert Jan 13, 2026
0116b97
Physics model: incidence, comments, test suite
Kinvert Jan 13, 2026
b29bf5a
Renamed md Files
Kinvert Jan 13, 2026
95eb2ef
Moved Physics to File
Kinvert Jan 13, 2026
3582d2d
Physics in Own File - Test Flights
Kinvert Jan 14, 2026
1c30c54
Coordinated Turn Tests
Kinvert Jan 14, 2026
1131e83
Simple Optimizations
Kinvert Jan 14, 2026
374871d
Small Perf - Move cosf Out of Loop
Kinvert Jan 14, 2026
8598067
Autopilot Seperate File
Kinvert Jan 14, 2026
80bcf31
Vectorized Autopilot
Kinvert Jan 14, 2026
0a1c2e6
Weighted Random Actions
Kinvert Jan 15, 2026
63a7aae
Observation Schemas Swept
Kinvert Jan 15, 2026
04dd016
Rewards Fixed - Sweepable
Kinvert Jan 15, 2026
26709b9
Preparing for Sweeps
Kinvert Jan 15, 2026
a31d1dc
Fix Terminals and Loggin
Kinvert Jan 15, 2026
3cc5b58
More Sweep Prep
Kinvert Jan 15, 2026
17f18c1
Fix Reward and Score
Kinvert Jan 15, 2026
d639ee3
Temp Undo Later - Clamp logstd
Kinvert Jan 15, 2026
2606e20
Apply Sweep df1 84 u5i33hej
Kinvert Jan 16, 2026
bc72836
New Obs Schemas - New Sweep Prep
Kinvert Jan 16, 2026
fe7e26a
Roll Penalty - Elevator Might Be Inversed
Kinvert Jan 16, 2026
652ab7a
Fix Elevator Problems
Kinvert Jan 17, 2026
30fa9fe
Fix Obs 5 Schema and Adjust Penalties
Kinvert Jan 17, 2026
ab222bf
Increase Batch Size for Speed
Kinvert Jan 17, 2026
7fd88f1
Next Sweep Improvements - Likes to Aileron Roll too Much
Kinvert Jan 17, 2026
9dca5c6
Reduce Prints
Kinvert Jan 17, 2026
b68d1b2
Simplify Penalties and Rewards
Kinvert Jan 18, 2026
03d1ebc
Try to Avoid NAN
Kinvert Jan 18, 2026
7a15539
Trying to Stop NANs
Kinvert Jan 18, 2026
2c3073f
Debug Prints
Kinvert Jan 18, 2026
be1e31c
Fix Mean Outside Bounds
Kinvert Jan 18, 2026
f6c821d
Still Trying to Fix Blowups
Kinvert Jan 18, 2026
3f0f8b4
Revert Some Ini Values
Kinvert Jan 18, 2026
6c61df6
Restore Much of Ini to 9dca5c6
Kinvert Jan 18, 2026
faf6eb6
Reduce Learning Rate Again
Kinvert Jan 18, 2026
4e640ee
Trying to Fix Curriculum - Agent Trains Poorly
Kinvert Jan 18, 2026
f302224
Aim Annealing - Removed Some Penalties
Kinvert Jan 19, 2026
f000fb8
Added More Debugging
Kinvert Jan 19, 2026
7a75d2b
Some Fixes - SPS Gains - New Sweep Soon
Kinvert Jan 19, 2026
92aa6c5
Fixed Rewards That Turn Negative
Kinvert Jan 19, 2026
fd1941f
Reduce Negative G Penalties
Kinvert Jan 19, 2026
d8a8475
Revert to df5 (f3022) + SPS gains, Ready for df7
Kinvert Jan 19, 2026
4c3ebd3
Clamp for nans - df7 2.0
Kinvert Jan 19, 2026
bfa061f
This Potentially Helps with Curriculum
Kinvert Jan 20, 2026
214338e
3M SPS Prep for df8 Sweep
Kinvert Jan 20, 2026
f2af35e
df9 Sweep Prep - Sweeping Stages
Kinvert Jan 20, 2026
060bbfb
Safer Sweeps - Obs Clamps - Coeff Ranges
Kinvert Jan 20, 2026
153bd08
Add sweep persistence and override injection for Protein
Kinvert Jan 21, 2026
8c7260b
df10 Sweep Prep - Simplified Rewards, New Obs Scheme
Kinvert Jan 21, 2026
4b72007
Observation Scheme Tests
Kinvert Jan 22, 2026
784856b
Rudder Damping - Obs HUD - Test Updates
Kinvert Jan 22, 2026
b0f22a3
Code Cleanup
Kinvert Jan 22, 2026
6859683
Reduce Sweep Params - Rudder Drag - Restructure and Add Tests
Kinvert Jan 22, 2026
84d8241
Logs Update
Kinvert Jan 22, 2026
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -162,3 +162,4 @@ pufferlib/ocean/impulse_wars/*-release/
pufferlib/ocean/impulse_wars/debug-*/
pufferlib/ocean/impulse_wars/release-*/
pufferlib/ocean/impulse_wars/benchmark/
pufferlib/ocean/dogfight/dogfight_test
407 changes: 407 additions & 0 deletions pufferlib/SWEEP_PERSISTENCE.md

Large diffs are not rendered by default.

27 changes: 6 additions & 21 deletions pufferlib/config/default.ini
Original file line number Diff line number Diff line change
Expand Up @@ -28,24 +28,24 @@ device = cuda
optimizer = muon
anneal_lr = True
precision = float32
total_timesteps = 10_000_000
total_timesteps = 100_000_000
learning_rate = 0.015
gamma = 0.995
gae_lambda = 0.90
gae_lambda = 0.95
update_epochs = 1
clip_coef = 0.2
vf_coef = 2.0
vf_clip_coef = 0.2
max_grad_norm = 1.5
ent_coef = 0.001
ent_coef = 0.01
adam_beta1 = 0.95
adam_beta2 = 0.999
adam_eps = 1e-12

data_dir = experiments
checkpoint_interval = 200
batch_size = auto
minibatch_size = 8192
minibatch_size = 16384

# Accumulate gradients above this size
max_minibatch_size = 32768
Expand All @@ -58,7 +58,7 @@ vtrace_rho_clip = 1.0
vtrace_c_clip = 1.0

prio_alpha = 0.8
prio_beta0 = 0.2
prio_beta0 = 0.5

[sweep]
method = Protein
Expand All @@ -75,24 +75,9 @@ prune_pareto = True
#mean = 8
#scale = auto

# TODO: Elim from base
[sweep.train.total_timesteps]
distribution = log_normal
min = 3e7
max = 1e10
mean = 2e8
scale = time

[sweep.train.bptt_horizon]
distribution = uniform_pow2
min = 16
max = 64
mean = 64
scale = auto

[sweep.train.minibatch_size]
distribution = uniform_pow2
min = 8192
min = 16384
max = 65536
mean = 32768
scale = auto
Expand Down
146 changes: 146 additions & 0 deletions pufferlib/config/ocean/dogfight.ini
Original file line number Diff line number Diff line change
@@ -0,0 +1,146 @@
[base]
env_name = puffer_dogfight
package = ocean
policy_name = Policy
rnn_name = Recurrent

[vec]
num_envs = 8

[env]
reward_aim_scale = 0.05
reward_closing_scale = 0.003
penalty_neg_g = 0.02
speed_min = 50.0

max_steps = 3000
num_envs = 1024
obs_scheme = 1

curriculum_enabled = 1
curriculum_randomize = 0
advance_threshold = 0.7

[train]
adam_beta1 = 0.9768629406862324
adam_beta2 = 0.999302214750495
adam_eps = 6.906760212075045e-12
batch_size = auto
bptt_horizon = 64
checkpoint_interval = 200
clip_coef = 0.4928184678032994
ent_coef = 0.008
gae_lambda = 0.8325103714810463
gamma = 0.8767105842751813
learning_rate = 0.00024
max_grad_norm = 0.831714766100049
max_minibatch_size = 65536
minibatch_size = 65536
prio_alpha = 0.8195880336315146
prio_beta0 = 0.9429570720846501
seed = 42
total_timesteps = 400_000_000
update_epochs = 4
vf_clip_coef = 3.2638480501249436
vf_coef = 4.293249868787825
vtrace_c_clip = 1.911078435368836
vtrace_rho_clip = 3.797866655513644

[sweep]
downsample = 1
goal = maximize
method = Protein
metric = ultimate
prune_pareto = True
use_gpu = True

[sweep.env.reward_aim_scale]
distribution = uniform
min = 0.02
max = 0.1
mean = 0.05
scale = auto

[sweep.env.reward_closing_scale]
distribution = uniform
min = 0.001
max = 0.01
mean = 0.003
scale = auto

[sweep.env.penalty_neg_g]
distribution = uniform
min = 0.01
max = 0.05
mean = 0.02
scale = auto

[sweep.env.obs_scheme]
distribution = int_uniform
max = 5
mean = 0
min = 0
scale = 1.0

[sweep.env.advance_threshold]
distribution = uniform
min = 0.5
max = 0.85
mean = 0.7
scale = auto

[sweep.env.max_steps]
distribution = int_uniform
min = 300
max = 1500
mean = 900
scale = 1.0

[sweep.train.learning_rate]
distribution = log_normal
max = 0.0005
mean = 0.00025
min = 0.0001
scale = 0.5

[sweep.train.vf_coef]
distribution = uniform
min = 1.0
max = 5.0
mean = 3.0
scale = auto

[sweep.train.clip_coef]
distribution = uniform
min = 0.3
max = 1.0
mean = 0.5
scale = auto

[sweep.train.ent_coef]
distribution = log_normal
min = 0.002
max = 0.02
mean = 0.008
scale = 0.5

[sweep.train.max_grad_norm]
distribution = uniform
min = 0.5
max = 2.0
mean = 1.0
scale = auto

[sweep.train.gae_lambda]
distribution = logit_normal
min = 0.9
max = 0.999
mean = 0.95
scale = auto

[sweep.train.gamma]
distribution = logit_normal
min = 0.95
max = 0.9999
mean = 0.99
scale = auto
2 changes: 1 addition & 1 deletion pufferlib/environments/mani_skill/torch.py
Original file line number Diff line number Diff line change
Expand Up @@ -64,7 +64,7 @@ def decode_actions(self, hidden):
'''Decodes a batch of hidden states into (multi)discrete actions.
Assumes no time dimension (handled by LSTM wrappers).'''
mean = self.decoder_mean(hidden)
logstd = self.decoder_logstd.expand_as(mean)
logstd = self.decoder_logstd.expand_as(mean).clamp(min=-20, max=2)
std = torch.exp(logstd)
logits = torch.distributions.Normal(mean, std)
values = self.value(hidden)
Expand Down
2 changes: 1 addition & 1 deletion pufferlib/models.py
Original file line number Diff line number Diff line change
Expand Up @@ -88,7 +88,7 @@ def decode_actions(self, hidden):
logits = self.decoder(hidden).split(self.action_nvec, dim=1)
elif self.is_continuous:
mean = self.decoder_mean(hidden)
logstd = self.decoder_logstd.expand_as(mean)
logstd = self.decoder_logstd.expand_as(mean).clamp(min=-20, max=2)
std = torch.exp(logstd)
logits = torch.distributions.Normal(mean, std)
else:
Expand Down
Loading