open-tinker · lwaekfjlk · Dec 23, 2025 · Dec 23, 2025
diff --git a/README.md b/README.md
@@ -150,10 +150,9 @@ python opentinker/client/math_rl.py \
     data_path=data/math_agentloop/train.parquet \
     val_data_path=data/math_agentloop/test.parquet \
     scheduler_url=http://<server_endpoint>:<scheduler_port> \
-    interaction.config.env_port=<env_port> \
-    interaction.config.env_host=<client_endpoint>
+    env_url=http://<client_endpoint>:<env_port>
 
-# multi turn tool ca
+# multi turn tool call
 python opentinker/client/math_tool_rl.py \
     tokenizer_path=Qwen/Qwen2.5-1.5B \
     batch_size=16 \
@@ -162,8 +161,7 @@ python opentinker/client/math_tool_rl.py \
     save_freq=1000 \
     test_freq=5 \
     scheduler_url=http://<server_endpoint>:<scheduler_port> \
-    interaction.config.env_port=<env_port> \
-    interaction.config.env_host=<client_endpoint>
+    env_url=http://<client_endpoint>:<env_port>
 ```
 
 **Gomoku RL (Multi-turn):**
@@ -176,8 +174,7 @@ python opentinker/client/gomoku_rl.py \
     save_freq=1000 \
     test_freq=5 \
     scheduler_url=http://<server_endpoint>:<scheduler_port> \
-    interaction.config.env_port=<env_port> \
-    interaction.config.env_host=<client_endpoint>
+    env_url=http://<client_endpoint>:<env_port>
 ```
 
 **Math Inference:**
@@ -188,17 +185,17 @@ python opentinker/client/math_inference.py \
     data_path=data/math/test.parquet \
     output_path=./tmp/results.jsonl \
     max_samples=5 \
-    env_endpoint=http://<client_endpoint>:<env_port> \
-    scheduler_url=http://<server_endpoint>:<scheduler_port>
+    scheduler_url=http://<server_endpoint>:<scheduler_port> \
+    env_url=http://<client_endpoint>:<env_port>
 
 # multi turn tool call
 python opentinker/client/math_tool_inference.py \
     model_path=<model_name> \
     data_path=data/math/test.parquet \
     output_path=./tmp/results.jsonl \
     max_samples=5 \
-    env_endpoint=http://<client_endpoint>:<env_port> \
-    scheduler_url=http://<server_endpoint>:<scheduler_port>
+    scheduler_url=http://<server_endpoint>:<scheduler_port> \
+    env_url=http://<client_endpoint>:<env_port>
 ```
 
 **Gomoku Inference:**
@@ -207,8 +204,8 @@ python opentinker/client/gomoku_inference.py \
     model_path=<model_name> \
     output_path=./tmp/results.jsonl \
     max_samples=5 \
-    env_endpoint=http://<client_endpoint>:<env_port> \
-    scheduler_url=http://<server_endpoint>:<scheduler_port>
+    scheduler_url=http://<server_endpoint>:<scheduler_port> \
+    env_url=http://<client_endpoint>:<env_port>
 ```
 
 

diff --git a/opentinker/client/client_config/generic_env_param.yaml b/opentinker/client/client_config/generic_env_param.yaml
@@ -5,9 +5,10 @@
 # - No reward function (environment provides rewards)
 # - Interaction configuration for Gym environment
 
-server_url: "http://localhost:8000" # 如果是scheduler版本，不需要这个参数
+# Server settings
 scheduler_url: "http://localhost:8766"
 scheduler_api_key: "otk_98b8db24ccd64c92e1fdd9a232e209fa"
+env_url: "http://localhost:8084"  # Environment server URL
 
 # GPU allocation
 num_gpus: 4
@@ -29,7 +30,7 @@ interaction:
     name: "gym_env"  # Name referenced in dataset's interaction_kwargs
     class_path: opentinker.environment.gym_environment_interaction.GymEnvironmentInteraction
     config:
-        env_endpoint: "http://localhost:8084"  # Mock environment server
+        env_endpoint: ${env_url}  # References top-level env_url
         max_steps: 50
         observation_template: "Environment: {observation}"
 

diff --git a/opentinker/client/client_config/gomoku_inference_config.yaml b/opentinker/client/client_config/gomoku_inference_config.yaml
@@ -23,7 +23,7 @@ output_path: null          # Output results file (jsonl)
 max_samples: 10            # Number of games to play
 
 # Environment settings  
-env_endpoint: http://localhost:8091
+env_url: http://localhost:8091
 
 # Multi-turn settings (Gomoku is multi-turn game)
 multi_turn:

diff --git a/opentinker/client/client_config/gomoku_inference_scheduler_config.yaml b/opentinker/client/client_config/gomoku_inference_scheduler_config.yaml
@@ -29,7 +29,7 @@ output_path: null          # Output results file (jsonl)
 max_samples: 10            # Number of games to play
 
 # Environment settings  
-env_endpoint: http://0.0.0.0:8091
+env_url: http://0.0.0.0:8091
 
 # Multi-turn settings (Gomoku is multi-turn game)
 multi_turn:

diff --git a/opentinker/client/client_config/gomoku_param.yaml b/opentinker/client/client_config/gomoku_param.yaml
@@ -53,9 +53,7 @@ interaction:
   name: gomoku
   class_path: opentinker.environment.gym_environment_interaction.GymEnvironmentInteraction
   config:
-    env_host: 0.0.0.0
-    env_port: 8088
-    env_endpoint: http://${interaction.config.env_host}:${interaction.config.env_port}
+    env_endpoint: ${env_url}  # References top-level env_url
     max_steps: 81 # 这是后端GymEnvironmentInteraction的参数
     max_total_steps: 39  # 调用环境的step方法的最大次数限制（防止invalid move hacking）
     max_initial_moves: 0
@@ -73,10 +71,11 @@ multi_turn:
     experiment_name: "gomoku_interaction"  # Experiment name in Weave
 
 
-# Scheduler settings
+# Server settings
 scheduler_url: "http://0.0.0.0:8780"
 scheduler_api_key: otk_98b8db24ccd64c92e1fdd9a232e209fa # this is user id
+env_url: "http://0.0.0.0:8088"  # Game environment server URL
 
 # GPU settings
-num_gpus: 4
+num_gpus: 2
 
diff --git a/opentinker/client/client_config/math_code_interpreter_inference_config.yaml b/opentinker/client/client_config/math_code_interpreter_inference_config.yaml
@@ -29,7 +29,7 @@ output_path: null          # Output results file (jsonl)
 max_samples: null          # Limit samples (null = all)
 
 # Environment settings (code interpreter math server)
-env_endpoint: http://0.0.0.0:8088
+env_url: http://0.0.0.0:8088
 
 # Multi-turn settings (allow code execution iterations)
 multi_turn:

diff --git a/opentinker/client/client_config/math_code_interpreter_param.yaml b/opentinker/client/client_config/math_code_interpreter_param.yaml
@@ -62,14 +62,13 @@ interaction:
   name: math_code_interpreter
   class_path: opentinker.environment.gym_environment_interaction.GymEnvironmentInteraction
   config:
-    env_host: 0.0.0.0
-    env_port: 8088
-    env_endpoint: http://${interaction.config.env_host}:${interaction.config.env_port}
+    env_endpoint: ${env_url}  # References top-level env_url
     max_steps: 5    # Max interaction steps (code executions)
 
-# Scheduler settings
+# Server settings
 scheduler_url: "http://0.0.0.0:8780"
 scheduler_api_key: null
+env_url: "http://0.0.0.0:8088"  # Code interpreter server URL
 
 # GPU settings
 num_gpus: 4

diff --git a/opentinker/client/client_config/math_inference_config.yaml b/opentinker/client/client_config/math_inference_config.yaml
@@ -20,7 +20,7 @@ output_path: null          # Output results file (jsonl)
 max_samples: null          # Limit samples (null = all)
 
 # Environment settings  
-env_endpoint: http://localhost:8088
+env_url: http://localhost:8088
 
 # Multi-turn settings (same as training config)
 multi_turn:

diff --git a/opentinker/client/client_config/math_inference_scheduler_config.yaml b/opentinker/client/client_config/math_inference_scheduler_config.yaml
@@ -27,7 +27,7 @@ output_path: null          # Output results file (jsonl)
 max_samples: null          # Limit samples (null = all)
 
 # Environment settings  
-env_endpoint: http://0.0.0.0:8088
+env_url: http://0.0.0.0:8088
 
 # Multi-turn settings (same as training config)
 multi_turn:

diff --git a/opentinker/client/client_config/math_param.yaml b/opentinker/client/client_config/math_param.yaml
@@ -57,9 +57,7 @@ interaction:
   name: math
   class_path: opentinker.environment.gym_environment_interaction.GymEnvironmentInteraction
   config:
-    env_host: 0.0.0.0
-    env_port: 8088
-    env_endpoint: http://${interaction.config.env_host}:${interaction.config.env_port}
+    env_endpoint: ${env_url}  # References top-level env_url
     max_steps: 1     # Max interaction steps
 
 multi_turn:
@@ -69,9 +67,10 @@ multi_turn:
     weave_project: null
     experiment_name: "math_interaction"
 
-# Scheduler settings
+# Server settings
 scheduler_url: "http://0.0.0.0:8780"
 scheduler_api_key: otk_98b8db24ccd64c92e1fdd9a232e209fa
+env_url: "http://0.0.0.0:8088"  # Math environment server URL
 
 # GPU settings
 num_gpus: 4
diff --git a/opentinker/client/gomoku_inference.py b/opentinker/client/gomoku_inference.py
@@ -47,12 +47,12 @@ def main(args):
     print(f"✓ Inference job {job_id} started at {vllm_server_url}")
 
     # 2. Setup GameStatsClient for per-step metrics (with job_id isolation)
-    game_stats = GameStatsClient(args.env_endpoint, job_id=job_id)
+    game_stats = GameStatsClient(args.env_url, job_id=job_id)
     if game_stats.health_check():
-        print(f"✓ Connected to game server at {args.env_endpoint}")
+        print(f"✓ Connected to game server at {args.env_url}")
         game_stats.reset_all()  # Reset stats for this job before inference
     else:
-        print(f"⚠ Game server not available at {args.env_endpoint}, continuing without stats")
+        print(f"⚠ Game server not available at {args.env_url}, continuing without stats")
         game_stats = None
 
     # 3. Run inference using the remote vLLM server
@@ -67,7 +67,7 @@ def main(args):
         tokenizer_path=args.get("tokenizer_path") or args.model_path,
         data_path=args.get("data_path"),  # None for dynamic generation
         game_class=GomokuGame,
-        env_endpoint=args.env_endpoint,
+        env_endpoint=args.env_url,
         job_id=job_id,  # Pass job_id for stats isolation
         output_path=args.get("output_path"),
         temperature=args.temperature,

diff --git a/opentinker/client/gomoku_rl.py b/opentinker/client/gomoku_rl.py
@@ -81,10 +81,11 @@ def main(args):
         "max_total_steps": interaction_config.get("max_total_steps", 40),
     }
 
-    env_endpoint = interaction_config.env_endpoint
+    # Use top-level env_url (with fallback to interaction.config.env_endpoint)
+    env_url = args.get("env_url") or interaction_config.get("env_endpoint")
 
     print("\nSetting up GameEnvironment with GomokuGame...")
-    print(f"  Environment endpoint: {env_endpoint}")
+    print(f"  Environment URL: {env_url}")
     print(f"  Board size: {game_kwargs['board_size']}")
     print(f"  Job ID for stats: {job_id}")
 
@@ -99,12 +100,12 @@ def main(args):
     print(f"  Interaction config path: {env.get_interaction_config_path()}")
 
     # 3. Setup GameStatsClient for per-step metrics (use env.job_id for consistency)
-    game_stats = GameStatsClient(env_endpoint, job_id=env.job_id)
+    game_stats = GameStatsClient(env_url, job_id=env.job_id)
     if game_stats.health_check():
-        print(f"✓ Connected to game server for metrics at {env_endpoint}")
+        print(f"✓ Connected to game server for metrics at {env_url}")
         game_stats.reset_all()  # Reset all stats before training
     else:
-        print(f"⚠ Game server at {env_endpoint} not responding - metrics disabled")
+        print(f"⚠ Game server at {env_url} not responding - metrics disabled")
         game_stats = None
 
     # 4. Connect to allocated server

diff --git a/opentinker/client/math_inference.py b/opentinker/client/math_inference.py
@@ -49,12 +49,12 @@ def main(args):
     print(f"✓ Inference job {job_id} started at {vllm_server_url}")
 
     # 2. Setup GameStatsClient for per-step metrics (with job_id isolation)
-    game_stats = GameStatsClient(args.env_endpoint, job_id=job_id)
+    game_stats = GameStatsClient(args.env_url, job_id=job_id)
     if game_stats.health_check():
-        print(f"✓ Connected to game server at {args.env_endpoint}")
+        print(f"✓ Connected to game server at {args.env_url}")
         game_stats.reset_all()  # Reset stats for this job before inference
     else:
-        print(f"⚠ Game server not available at {args.env_endpoint}, continuing without stats")
+        print(f"⚠ Game server not available at {args.env_url}, continuing without stats")
         game_stats = None
 
     # 3. Run inference using the remote vLLM server
@@ -66,7 +66,7 @@ def main(args):
         tokenizer_path=args.get("tokenizer_path") or args.model_path,
         data_path=args.data_path,
         game_class=MathGame,
-        env_endpoint=args.env_endpoint,
+        env_endpoint=args.env_url,
         job_id=job_id,  # Pass job_id for stats isolation
         output_path=args.get("output_path"),
         temperature=args.temperature,

diff --git a/opentinker/client/math_rl.py b/opentinker/client/math_rl.py
@@ -46,7 +46,8 @@ def main(args):
     print(f"✓ Job {job_id} allocated at {server_url}")
 
     # 2. Setup environment (job_id is automatically handled)
-    env_endpoint = args.interaction.config.env_endpoint
+    # Use top-level env_url (with fallback to interaction.config.env_endpoint)
+    env_url = args.get("env_url") or args.interaction.config.get("env_endpoint")
     env = MathGameEnvironment(
         game_class=MathGame,
         config=args,
@@ -57,13 +58,13 @@ def main(args):
     print(f"✓ Environment created, interaction config: {env.get_interaction_config_path()}")
 
     # 3. Setup game stats client (use env.job_id for consistency)
-    game_stats = GameStatsClient(env_endpoint, job_id=env.job_id)
+    game_stats = GameStatsClient(env_url, job_id=env.job_id)
     if game_stats.health_check():
         game_stats.reset_all()
-        print(f"✓ Connected to math server at {env_endpoint}")
+        print(f"✓ Connected to math server at {env_url}")
     else:
         game_stats = None
-        print(f"⚠ Math server not responding at {env_endpoint}")
+        print(f"⚠ Math server not responding at {env_url}")
 
     # 4. Connect to training server
     client = ServiceClient(

diff --git a/opentinker/client/math_tool_inference.py b/opentinker/client/math_tool_inference.py
@@ -49,12 +49,12 @@ def main(args):
     print(f"✓ Inference job {job_id} started at {vllm_server_url}")
 
     # 2. Setup GameStatsClient for per-step metrics (with job_id isolation)
-    game_stats = GameStatsClient(args.env_endpoint, job_id=job_id)
+    game_stats = GameStatsClient(args.env_url, job_id=job_id)
     if game_stats.health_check():
-        print(f"✓ Connected to code interpreter game server at {args.env_endpoint}")
+        print(f"✓ Connected to code interpreter game server at {args.env_url}")
         game_stats.reset_all()  # Reset stats for this job before inference
     else:
-        print(f"⚠ Game server not available at {args.env_endpoint}, continuing without stats")
+        print(f"⚠ Game server not available at {args.env_url}, continuing without stats")
         print(f"  Make sure to start: python opentinker/environment/math/code_interpreter_math_server.py --port 8088")
         game_stats = None
 
@@ -69,7 +69,7 @@ def main(args):
         tokenizer_path=args.get("tokenizer_path") or args.model_path,
         data_path=args.data_path,
         game_class=CodeInterpreterMathGame,
-        env_endpoint=args.env_endpoint,
+        env_endpoint=args.env_url,
         job_id=job_id,  # Pass job_id for stats isolation
         output_path=args.get("output_path"),
         temperature=args.temperature,

diff --git a/opentinker/client/math_tool_rl.py b/opentinker/client/math_tool_rl.py
@@ -46,7 +46,8 @@ def main(args):
 
     # 2. Setup environment
     print("\n[2/4] Setting up environment...")
-    env_endpoint = args.interaction.config.env_endpoint
+    # Use top-level env_url (with fallback to interaction.config.env_endpoint)
+    env_url = args.get("env_url") or args.interaction.config.get("env_endpoint")
     env = MathCodeInterpreterEnvironment(
         game_class=CodeInterpreterMathGame,
         config=args,
@@ -56,18 +57,18 @@ def main(args):
     )
     print(f"✓ Environment created")
     print(f"  - Interaction config: {env.get_interaction_config_path()}")
-    print(f"  - Game server endpoint: {env_endpoint}")
+    print(f"  - Game server endpoint: {env_url}")
 
     # 3. Setup game stats client
     print("\n[3/4] Connecting to game server...")
-    game_stats = GameStatsClient(env_endpoint, job_id=env.job_id)
+    game_stats = GameStatsClient(env_url, job_id=env.job_id)
     if game_stats.health_check():
         game_stats.reset_all()
-        print(f"✓ Connected to game server at {env_endpoint}")
+        print(f"✓ Connected to game server at {env_url}")
     else:
         game_stats = None
-        print(f"⚠ Game server not responding at {env_endpoint}")
-        print(f"  Make sure to start: python opentinker/environment/math/code_interpreter_math_server.py --port {args.interaction.config.env_port}")
+        print(f"⚠ Game server not responding at {env_url}")
+        print(f"  Make sure to start: python opentinker/environment/math/code_interpreter_math_server.py")
 
     # 4. Connect to training server and train
     print("\n[4/4] Starting training...")