File tree Expand file tree Collapse file tree 1 file changed +4
-0
lines changed
Expand file tree Collapse file tree 1 file changed +4
-0
lines changed Original file line number Diff line number Diff line change @@ -72,6 +72,8 @@ class ActorConfig(BaseConfig):
7272 clip_ratio_c (float): Clipping ratio for critic loss.
7373 loss_agg_mode (str): Loss aggregation mode. Options: 'token-mean', 'sample-mean'.
7474 entropy_coeff (float): Entropy coefficient for regularization.
75+ tau_pos (float): Positive tau for SAPO smoothing (>= 1.0 keeps rewards stable).
76+ tau_neg (float): Negative tau for SAPO smoothing (> tau_pos for asymmetry).
7577 use_kl_loss (bool): Whether to use KL divergence loss.
7678 use_torch_compile (bool): Whether to use torch.compile for optimization.
7779 kl_loss_coef (float): KL divergence loss coefficient.
@@ -109,6 +111,8 @@ class ActorConfig(BaseConfig):
109111 clip_ratio_c : float = 3.0
110112 loss_agg_mode : str = "token-mean"
111113 entropy_coeff : float = 0
114+ tau_pos : float = 1.0
115+ tau_neg : float = 1.05
112116 calculate_entropy : bool = False
113117 use_kl_loss : bool = False
114118 use_torch_compile : bool = True
You can’t perform that action at this time.
0 commit comments