diff --git a/docs/en/get_started/customization.md b/docs/en/get_started/customization.md index 77f5cd5e34..3f31ee493d 100644 --- a/docs/en/get_started/customization.md +++ b/docs/en/get_started/customization.md @@ -298,8 +298,6 @@ def get_pg_loss_reducer( - Dr.GRPO: Divide by a constant instead of effective token count - Custom loss normalization strategies -**Example**: `examples/DrGRPO/custom_reducer.py:get_pg_loss_reducer` - --- ### 12. Reward Post-Processing (`--custom-reward-post-process-path`) diff --git a/docs/zh/get_started/customization.md b/docs/zh/get_started/customization.md index 5b95f05463..fd067c04c9 100644 --- a/docs/zh/get_started/customization.md +++ b/docs/zh/get_started/customization.md @@ -298,8 +298,6 @@ def get_pg_loss_reducer( - Dr.GRPO:除以常数而非有效 token 数 - 自定义损失归一化策略 -**示例**: `examples/DrGRPO/custom_reducer.py:get_pg_loss_reducer` - --- ### 12. 奖励后处理 (`--custom-reward-post-process-path`) diff --git a/slime/utils/arguments.py b/slime/utils/arguments.py index 6efe85eae7..a7059bba87 100644 --- a/slime/utils/arguments.py +++ b/slime/utils/arguments.py @@ -1050,7 +1050,7 @@ def add_algo_arguments(parser): "--custom-pg-loss-reducer-function-path", type=str, default=None, - help="Path to a custom reducer function for pg_loss only. When set, pg_loss will use this custom reducer while other metrics (pg_clipfrac, ppo_kl, entropy_loss, etc.) still use the default sum_of_sample_mean. (e.g., examples/Dr.GRPO/custom_reducer.py:get_pg_loss_reducer).", + help="Path to a custom reducer function for pg_loss only. When set, pg_loss will use this custom reducer while other metrics (pg_clipfrac, ppo_kl, entropy_loss, etc.) still use the default sum_of_sample_mean.", ) parser.add_argument(