Skip to content

Commit 545f1bc

Browse files
added sapo tau params
1 parent 2ee1e1c commit 545f1bc

File tree

1 file changed

+4
-0
lines changed

1 file changed

+4
-0
lines changed

verl/workers/config/actor.py

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -72,6 +72,8 @@ class ActorConfig(BaseConfig):
7272
clip_ratio_c (float): Clipping ratio for critic loss.
7373
loss_agg_mode (str): Loss aggregation mode. Options: 'token-mean', 'sample-mean'.
7474
entropy_coeff (float): Entropy coefficient for regularization.
75+
tau_pos (float): Positive tau for SAPO smoothing (>= 1.0 keeps rewards stable).
76+
tau_neg (float): Negative tau for SAPO smoothing (> tau_pos for asymmetry).
7577
use_kl_loss (bool): Whether to use KL divergence loss.
7678
use_torch_compile (bool): Whether to use torch.compile for optimization.
7779
kl_loss_coef (float): KL divergence loss coefficient.
@@ -109,6 +111,8 @@ class ActorConfig(BaseConfig):
109111
clip_ratio_c: float = 3.0
110112
loss_agg_mode: str = "token-mean"
111113
entropy_coeff: float = 0
114+
tau_pos: float = 1.0
115+
tau_neg: float = 1.05
112116
calculate_entropy: bool = False
113117
use_kl_loss: bool = False
114118
use_torch_compile: bool = True

0 commit comments

Comments
 (0)