-
Notifications
You must be signed in to change notification settings - Fork 3
Deep Finance Update with New Judge #7
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from all commits
bac05b5
ba41164
c7ca8c7
7f2b017
9dd3c42
757f8a1
079e4bd
bcce8f0
4662d63
de81c1d
248acc4
9d651fd
7475ecc
b95d491
f20ab91
ea87d4b
3082bca
ef44b63
0889483
db7114c
5a25550
623b7d9
0aaab86
04f4959
d0ff68b
1c356d7
37dcbcc
529ae7e
f4eb231
1e07515
08ba184
3d55692
a478827
88be3e4
fb41962
a1f909b
8d2e5d7
3c85960
9b541c5
06fda5f
63cc682
c9b87ac
3bd4c7d
8a18d40
835bdd8
11ed325
a500e90
d9cbdc0
4538f5a
6f0c420
818a4f7
1bb7f60
460318f
57a3a54
beaa540
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -11,9 +11,12 @@ | |
| - judge_time/ Judge time consumption statistics | ||
| """ | ||
|
|
||
| from typing import List, Dict, Any | ||
| from typing import List, Dict, Any, TYPE_CHECKING | ||
| import numpy as np | ||
|
|
||
| if TYPE_CHECKING: | ||
| from ajet.schema.trajectory import Reward | ||
|
|
||
|
|
||
| def extract_reward_stats_from_trajectories(trajectories: List[Any]) -> List[Dict[str, Any]]: | ||
| """ | ||
|
|
@@ -72,22 +75,15 @@ def compute_reward_metrics(reward_stats_list: List[Dict[str, Any]], prefix: str | |
| metrics[f"{prefix}rewards/penalty_count"] = len(non_zero_penalties) | ||
| metrics[f"{prefix}rewards/penalty_rate"] = len(non_zero_penalties) / n * 100 if n > 0 else 0.0 | ||
|
|
||
| # ========== Detect OpenJudge Usage ========== | ||
| # ========== OpenJudge Metrics (PresentationQualityGrader, GroundingGrader) ========== | ||
| openjudge_enabled_count = sum(1 for rs in reward_stats_list if rs.get('openjudge_enabled', False)) | ||
|
|
||
| if openjudge_enabled_count > 0: | ||
| # ========== OpenJudge Metrics ========== | ||
|
|
||
| # Dynamically extract OpenJudge grader fields | ||
| # Currently supported graders: report_resolution, trajectory_faithfulness, | ||
| # rubrics_performance, trajectory_comprehensive, information_gain, action_loop | ||
| # OpenJudge graders: presentation_quality, grounding | ||
| openjudge_graders = [ | ||
| "report_resolution", | ||
| "trajectory_faithfulness", | ||
| "rubrics_performance", | ||
| "trajectory_comprehensive", | ||
| "information_gain", | ||
| "action_loop", | ||
| "presentation_quality", | ||
| "grounding", | ||
| "planning" | ||
|
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. The |
||
| ] | ||
|
|
||
| for grader_name in openjudge_graders: | ||
|
|
@@ -151,3 +147,18 @@ def compute_reward_metrics_from_trajectories(trajectories: List[Any], prefix: st | |
| reward_stats_list = extract_reward_stats_from_trajectories(trajectories) | ||
| return compute_reward_metrics(reward_stats_list, prefix=prefix) | ||
|
|
||
|
|
||
| def populate_reward_metadata_from_stats(reward: "Reward", reward_stats: Dict[str, Any]) -> None: | ||
| """ | ||
| Populate Reward.metadata with all reward statistics. | ||
|
|
||
| Args: | ||
| reward: The Reward object to populate | ||
| reward_stats: The reward_stats dictionary from judge | ||
| """ | ||
| if not reward_stats: | ||
| return | ||
|
|
||
| # Directly copy all reward_stats into metadata | ||
| reward.metadata.update(reward_stats) | ||
|
|
||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1 @@ | ||
| # tutorial/example_deep_finance package |
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -1,29 +1,29 @@ | ||
| #!/bin/bash | ||
| set -e | ||
| set -e | ||
| #=============================================================================== | ||
| # 1. 配置区域 - 用户只需修改这里 | ||
| #=============================================================================== | ||
| SUFFIX="deep_finance" # 实验后缀,影响所有日志和实验名称 | ||
| PREFIX="open" # 实验前缀,影响日志和实验所在文件夹 | ||
| SUFFIX="newjudge" # 实验后缀,影响所有日志和实验名称 | ||
| PREFIX="ajet_newjudge" # 实验前缀,影响日志和实验所在文件夹 | ||
|
|
||
| # OpenJudge 模型配置 | ||
| OPENJUDGE_LLM='qwen-flash' # OpenJudge 评分模型 | ||
| RM_LLM='qwen-max' # RM Gallery 评分模型 | ||
| JUDGE_CONCURRENCY=10 | ||
|
|
||
| # 奖励权重配置 | ||
| RM_WEIGHT=0.4 | ||
| CITATION_AUDIT_WEIGHT=0.2 | ||
| REPORT_RESOLUTION_WEIGHT=0.2 | ||
| TRAJECTORY_FAITHFULNESS_WEIGHT=0.2 | ||
| RM_WEIGHT=0.5 | ||
| PRESENTATION_QUALITY_WEIGHT=0.25 | ||
| GROUNDING_WEIGHT=0.25 | ||
|
|
||
| # 训练参数配置 | ||
| NUM_REPEAT=4 # group size,每个query rollout NUM_REPEAT次 | ||
| TRAIN_BATCH_SIZE=32 # 训练batchsize | ||
| NUM_STEPS=6 # 每个样本step轮数 | ||
| DEEPFINANCE_TOOL_RESULT_MAX_CHARS=10000 | ||
|
|
||
| # 主目录 | ||
| # 主目录(需要更改) | ||
| export AJET_ROOT="/mnt/data_cpfs/taoshuchang.tsc/deepresearch/AgentJet_new" | ||
|
|
||
| NNODES=${WORLD_SIZE} | ||
|
|
||
|
|
@@ -46,7 +46,7 @@ fi | |
| # 2. 动态生成配置文件 (从yaml template生成yaml) | ||
| #=============================================================================== | ||
| # 修改:配置文件生成路径,现在动态生成到 yaml 目录下 | ||
| CONFIG_TEMPLATE="tutorial/example_deep_finance/yaml_template/deep_finance_template.yaml" | ||
| CONFIG_TEMPLATE="tutorial/example_deep_finance/deep_finance.yaml" | ||
| CONFIG_FILE="${AJET_ROOT}/tutorial/example_deep_finance/yaml/${SUFFIX}.yaml" | ||
| mkdir -p $(dirname ${CONFIG_FILE}) | ||
|
|
||
|
|
@@ -55,12 +55,11 @@ sed -e "s|{{SUFFIX}}|${SUFFIX}|g" \ | |
| -e "s|{{MODEL_PATH}}|${MODEL_PATH}|g" \ | ||
| -e "s|{{NNODES}}|${NNODES}|g" \ | ||
| -e "s|{{RM_WEIGHT}}|${RM_WEIGHT}|g" \ | ||
| -e "s|{{CITATION_AUDIT_WEIGHT}}|${CITATION_AUDIT_WEIGHT}|g" \ | ||
| -e "s|{{PRESENTATION_QUALITY_WEIGHT}}|${PRESENTATION_QUALITY_WEIGHT}|g" \ | ||
| -e "s|{{GROUNDING_WEIGHT}}|${GROUNDING_WEIGHT}|g" \ | ||
| -e "s|{{OPENJUDGE_LLM}}|${OPENJUDGE_LLM}|g" \ | ||
| -e "s|{{RM_LLM}}|${RM_LLM}|g" \ | ||
| -e "s|{{JUDGE_CONCURRENCY}}|${JUDGE_CONCURRENCY}|g" \ | ||
| -e "s|{{REPORT_RESOLUTION_WEIGHT}}|${REPORT_RESOLUTION_WEIGHT}|g" \ | ||
| -e "s|{{TRAJECTORY_FAITHFULNESS_WEIGHT}}|${TRAJECTORY_FAITHFULNESS_WEIGHT}|g" \ | ||
| -e "s|{{NUM_REPEAT}}|${NUM_REPEAT}|g" \ | ||
| -e "s|{{NUM_STEPS}}|${NUM_STEPS}|g" \ | ||
| -e "s|{{TRAIN_BATCH_SIZE}}|${TRAIN_BATCH_SIZE}|g" \ | ||
|
|
@@ -72,7 +71,7 @@ sed -e "s|{{SUFFIX}}|${SUFFIX}|g" \ | |
| ${AJET_ROOT}/${CONFIG_TEMPLATE} > ${CONFIG_FILE} | ||
|
|
||
| echo "配置文件已生成: ${CONFIG_FILE}" | ||
| echo "参数确认: RM=${RM_WEIGHT}, Citation=${CITATION_AUDIT_WEIGHT}, OpenJudge=${OPENJUDGE_LLM}, RM_LLM=${RM_LLM}" | ||
| echo "参数确认: RM=${RM_WEIGHT}, PresentationQuality=${PRESENTATION_QUALITY_WEIGHT}, Grounding=${GROUNDING_WEIGHT}, OpenJudge=${OPENJUDGE_LLM}, RM_LLM=${RM_LLM}" | ||
|
|
||
| #=============================================================================== | ||
| # 3. 环境配置 | ||
|
|
@@ -106,15 +105,15 @@ export DEEPFINANCE_MCP_CONFIG DEEPFINANCE_TOOL_RESULT_MAX_CHARS | |
| # 其他服务配置 | ||
| HF_ENDPOINT="https://hf-mirror.com" | ||
| ES_HOSTS="http://11.160.132.46:8200" | ||
| export HF_ENDPOINT ES_HOSTS | ||
| export HF_ENDPOINT ES_HOSTS | ||
|
|
||
| # log 文件位置 | ||
| CURRENT_TIME=$(date "+%Y%m%d_%H%M%S") | ||
| LOG_DIR="${AJET_ROOT}/logs/${PREFIX}" | ||
| MASTER_IP_FILE="${LOG_DIR}/master-ip_${SUFFIX}.log" | ||
| ENV_SERVICE_LOG="${LOG_DIR}/env_service_${SUFFIX}_${CURRENT_TIME}.log" | ||
| TRAIN_LOG="${LOG_DIR}/train_${SUFFIX}_${CURRENT_TIME}.log" | ||
|
|
||
| env_log_prefix="${SUFFIX}__${CURRENT_TIME}" | ||
| # 多机训练参数配置 | ||
| GPUS_PER_NODE=8 | ||
| EXPECTED_WORKERS=$WORLD_SIZE | ||
|
|
@@ -156,6 +155,8 @@ export NCCL_ASYNC_ERROR_HANDLING=1 | |
|
|
||
| export PYTHONPATH="${AJET_ROOT}:${PYTHONPATH}" | ||
| export RAY_CLUSTER_MODE="multi_node" | ||
| export DEEPFINANCE_PATH="${ENV_SERVICE_ROOT}" # AgentJet 内部可能使用此路径 | ||
| export DEEPFINANCE_SCRIPT="source /mnt/data/taoshuchang.tsc/anaconda3/etc/profile.d/conda.sh && conda activate finworld_1209 && cd ${ENV_SERVICE_ROOT} && DEEPFINANCE_TOOL_RESULT_MAX_CHARS=${DEEPFINANCE_TOOL_RESULT_MAX_CHARS} DEEPFINANCE_MCP_CONFIG=${DEEPFINANCE_MCP_CONFIG} CACHE_TYPE=${CACHE_TYPE} MONGO_URI=${MONGO_URI} MONGO_DB_NAME=${MONGO_DB_NAME} MONGO_COLLECTION_NAME=${MONGO_COLLECTION_NAME} python -m env_service.env_service --env finworld --portal 0.0.0.0 --port 8080" | ||
|
Comment on lines
+158
to
+159
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. The |
||
|
|
||
|
|
||
| #=============================================================================== | ||
|
|
@@ -202,11 +203,12 @@ if [[ $HOSTNAME == *"-master-"* ]]; then | |
|
|
||
| # 启动训练任务(最核心) | ||
| python ajet/launcher.py \ | ||
| --with-deepfinance \ | ||
| --conf ${CONFIG_FILE} \ | ||
| --backbone="verl" \ | ||
| --prefix=${SUFFIX} \ | ||
| --prefix=${env_log_prefix} \ | ||
| 2>&1 | tee ${TRAIN_LOG} | ||
|
|
||
|
|
||
| #=============================================================================== | ||
| # 6.2 Worker 节点启动流程 | ||
|
|
@@ -218,4 +220,4 @@ else | |
| ray stop || true | ||
| ray start --address $MASTER_ADDR:6379 --num-gpus 8 | ||
| while true; do sleep 60; done | ||
| fi | ||
| fi | ||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
This block of commented-out code should be removed. Keeping commented-out code, especially large blocks, clutters the codebase and can lead to confusion. If this is an example, it should be moved to documentation or a separate example file.