Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
23 changes: 10 additions & 13 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -150,10 +150,9 @@ python opentinker/client/math_rl.py \
data_path=data/math_agentloop/train.parquet \
val_data_path=data/math_agentloop/test.parquet \
scheduler_url=http://<server_endpoint>:<scheduler_port> \
interaction.config.env_port=<env_port> \
interaction.config.env_host=<client_endpoint>
env_url=http://<client_endpoint>:<env_port>

# multi turn tool ca
# multi turn tool call
python opentinker/client/math_tool_rl.py \
tokenizer_path=Qwen/Qwen2.5-1.5B \
batch_size=16 \
Expand All @@ -162,8 +161,7 @@ python opentinker/client/math_tool_rl.py \
save_freq=1000 \
test_freq=5 \
scheduler_url=http://<server_endpoint>:<scheduler_port> \
interaction.config.env_port=<env_port> \
interaction.config.env_host=<client_endpoint>
env_url=http://<client_endpoint>:<env_port>
```

**Gomoku RL (Multi-turn):**
Expand All @@ -176,8 +174,7 @@ python opentinker/client/gomoku_rl.py \
save_freq=1000 \
test_freq=5 \
scheduler_url=http://<server_endpoint>:<scheduler_port> \
interaction.config.env_port=<env_port> \
interaction.config.env_host=<client_endpoint>
env_url=http://<client_endpoint>:<env_port>
```

**Math Inference:**
Expand All @@ -188,17 +185,17 @@ python opentinker/client/math_inference.py \
data_path=data/math/test.parquet \
output_path=./tmp/results.jsonl \
max_samples=5 \
env_endpoint=http://<client_endpoint>:<env_port> \
scheduler_url=http://<server_endpoint>:<scheduler_port>
scheduler_url=http://<server_endpoint>:<scheduler_port> \
env_url=http://<client_endpoint>:<env_port>

# multi turn tool call
python opentinker/client/math_tool_inference.py \
model_path=<model_name> \
data_path=data/math/test.parquet \
output_path=./tmp/results.jsonl \
max_samples=5 \
env_endpoint=http://<client_endpoint>:<env_port> \
scheduler_url=http://<server_endpoint>:<scheduler_port>
scheduler_url=http://<server_endpoint>:<scheduler_port> \
env_url=http://<client_endpoint>:<env_port>
```

**Gomoku Inference:**
Expand All @@ -207,8 +204,8 @@ python opentinker/client/gomoku_inference.py \
model_path=<model_name> \
output_path=./tmp/results.jsonl \
max_samples=5 \
env_endpoint=http://<client_endpoint>:<env_port> \
scheduler_url=http://<server_endpoint>:<scheduler_port>
scheduler_url=http://<server_endpoint>:<scheduler_port> \
env_url=http://<client_endpoint>:<env_port>
```


Expand Down
5 changes: 3 additions & 2 deletions opentinker/client/client_config/generic_env_param.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -5,9 +5,10 @@
# - No reward function (environment provides rewards)
# - Interaction configuration for Gym environment

server_url: "http://localhost:8000" # 如果是scheduler版本,不需要这个参数
# Server settings
scheduler_url: "http://localhost:8766"
scheduler_api_key: "otk_98b8db24ccd64c92e1fdd9a232e209fa"
env_url: "http://localhost:8084" # Environment server URL

# GPU allocation
num_gpus: 4
Expand All @@ -29,7 +30,7 @@ interaction:
name: "gym_env" # Name referenced in dataset's interaction_kwargs
class_path: opentinker.environment.gym_environment_interaction.GymEnvironmentInteraction
config:
env_endpoint: "http://localhost:8084" # Mock environment server
env_endpoint: ${env_url} # References top-level env_url
max_steps: 50
observation_template: "Environment: {observation}"

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,7 @@ output_path: null # Output results file (jsonl)
max_samples: 10 # Number of games to play

# Environment settings
env_endpoint: http://localhost:8091
env_url: http://localhost:8091

# Multi-turn settings (Gomoku is multi-turn game)
multi_turn:
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,7 @@ output_path: null # Output results file (jsonl)
max_samples: 10 # Number of games to play

# Environment settings
env_endpoint: http://0.0.0.0:8091
env_url: http://0.0.0.0:8091

# Multi-turn settings (Gomoku is multi-turn game)
multi_turn:
Expand Down
9 changes: 4 additions & 5 deletions opentinker/client/client_config/gomoku_param.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -53,9 +53,7 @@ interaction:
name: gomoku
class_path: opentinker.environment.gym_environment_interaction.GymEnvironmentInteraction
config:
env_host: 0.0.0.0
env_port: 8088
env_endpoint: http://${interaction.config.env_host}:${interaction.config.env_port}
env_endpoint: ${env_url} # References top-level env_url
max_steps: 81 # 这是后端GymEnvironmentInteraction的参数
max_total_steps: 39 # 调用环境的step方法的最大次数限制(防止invalid move hacking)
max_initial_moves: 0
Expand All @@ -73,10 +71,11 @@ multi_turn:
experiment_name: "gomoku_interaction" # Experiment name in Weave


# Scheduler settings
# Server settings
scheduler_url: "http://0.0.0.0:8780"
scheduler_api_key: otk_98b8db24ccd64c92e1fdd9a232e209fa # this is user id
env_url: "http://0.0.0.0:8088" # Game environment server URL

# GPU settings
num_gpus: 4
num_gpus: 2

Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,7 @@ output_path: null # Output results file (jsonl)
max_samples: null # Limit samples (null = all)

# Environment settings (code interpreter math server)
env_endpoint: http://0.0.0.0:8088
env_url: http://0.0.0.0:8088

# Multi-turn settings (allow code execution iterations)
multi_turn:
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -62,14 +62,13 @@ interaction:
name: math_code_interpreter
class_path: opentinker.environment.gym_environment_interaction.GymEnvironmentInteraction
config:
env_host: 0.0.0.0
env_port: 8088
env_endpoint: http://${interaction.config.env_host}:${interaction.config.env_port}
env_endpoint: ${env_url} # References top-level env_url
max_steps: 5 # Max interaction steps (code executions)

# Scheduler settings
# Server settings
scheduler_url: "http://0.0.0.0:8780"
scheduler_api_key: null
env_url: "http://0.0.0.0:8088" # Code interpreter server URL

# GPU settings
num_gpus: 4
Expand Down
2 changes: 1 addition & 1 deletion opentinker/client/client_config/math_inference_config.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,7 @@ output_path: null # Output results file (jsonl)
max_samples: null # Limit samples (null = all)

# Environment settings
env_endpoint: http://localhost:8088
env_url: http://localhost:8088

# Multi-turn settings (same as training config)
multi_turn:
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,7 @@ output_path: null # Output results file (jsonl)
max_samples: null # Limit samples (null = all)

# Environment settings
env_endpoint: http://0.0.0.0:8088
env_url: http://0.0.0.0:8088

# Multi-turn settings (same as training config)
multi_turn:
Expand Down
7 changes: 3 additions & 4 deletions opentinker/client/client_config/math_param.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -57,9 +57,7 @@ interaction:
name: math
class_path: opentinker.environment.gym_environment_interaction.GymEnvironmentInteraction
config:
env_host: 0.0.0.0
env_port: 8088
env_endpoint: http://${interaction.config.env_host}:${interaction.config.env_port}
env_endpoint: ${env_url} # References top-level env_url
max_steps: 1 # Max interaction steps

multi_turn:
Expand All @@ -69,9 +67,10 @@ multi_turn:
weave_project: null
experiment_name: "math_interaction"

# Scheduler settings
# Server settings
scheduler_url: "http://0.0.0.0:8780"
scheduler_api_key: otk_98b8db24ccd64c92e1fdd9a232e209fa
env_url: "http://0.0.0.0:8088" # Math environment server URL

# GPU settings
num_gpus: 4
8 changes: 4 additions & 4 deletions opentinker/client/gomoku_inference.py
Original file line number Diff line number Diff line change
Expand Up @@ -47,12 +47,12 @@ def main(args):
print(f"✓ Inference job {job_id} started at {vllm_server_url}")

# 2. Setup GameStatsClient for per-step metrics (with job_id isolation)
game_stats = GameStatsClient(args.env_endpoint, job_id=job_id)
game_stats = GameStatsClient(args.env_url, job_id=job_id)
if game_stats.health_check():
print(f"✓ Connected to game server at {args.env_endpoint}")
print(f"✓ Connected to game server at {args.env_url}")
game_stats.reset_all() # Reset stats for this job before inference
else:
print(f"⚠ Game server not available at {args.env_endpoint}, continuing without stats")
print(f"⚠ Game server not available at {args.env_url}, continuing without stats")
game_stats = None

# 3. Run inference using the remote vLLM server
Expand All @@ -67,7 +67,7 @@ def main(args):
tokenizer_path=args.get("tokenizer_path") or args.model_path,
data_path=args.get("data_path"), # None for dynamic generation
game_class=GomokuGame,
env_endpoint=args.env_endpoint,
env_endpoint=args.env_url,
job_id=job_id, # Pass job_id for stats isolation
output_path=args.get("output_path"),
temperature=args.temperature,
Expand Down
11 changes: 6 additions & 5 deletions opentinker/client/gomoku_rl.py
Original file line number Diff line number Diff line change
Expand Up @@ -81,10 +81,11 @@ def main(args):
"max_total_steps": interaction_config.get("max_total_steps", 40),
}

env_endpoint = interaction_config.env_endpoint
# Use top-level env_url (with fallback to interaction.config.env_endpoint)
env_url = args.get("env_url") or interaction_config.get("env_endpoint")

print("\nSetting up GameEnvironment with GomokuGame...")
print(f" Environment endpoint: {env_endpoint}")
print(f" Environment URL: {env_url}")
print(f" Board size: {game_kwargs['board_size']}")
print(f" Job ID for stats: {job_id}")

Expand All @@ -99,12 +100,12 @@ def main(args):
print(f" Interaction config path: {env.get_interaction_config_path()}")

# 3. Setup GameStatsClient for per-step metrics (use env.job_id for consistency)
game_stats = GameStatsClient(env_endpoint, job_id=env.job_id)
game_stats = GameStatsClient(env_url, job_id=env.job_id)
if game_stats.health_check():
print(f"✓ Connected to game server for metrics at {env_endpoint}")
print(f"✓ Connected to game server for metrics at {env_url}")
game_stats.reset_all() # Reset all stats before training
else:
print(f"⚠ Game server at {env_endpoint} not responding - metrics disabled")
print(f"⚠ Game server at {env_url} not responding - metrics disabled")
game_stats = None

# 4. Connect to allocated server
Expand Down
8 changes: 4 additions & 4 deletions opentinker/client/math_inference.py
Original file line number Diff line number Diff line change
Expand Up @@ -49,12 +49,12 @@ def main(args):
print(f"✓ Inference job {job_id} started at {vllm_server_url}")

# 2. Setup GameStatsClient for per-step metrics (with job_id isolation)
game_stats = GameStatsClient(args.env_endpoint, job_id=job_id)
game_stats = GameStatsClient(args.env_url, job_id=job_id)
if game_stats.health_check():
print(f"✓ Connected to game server at {args.env_endpoint}")
print(f"✓ Connected to game server at {args.env_url}")
game_stats.reset_all() # Reset stats for this job before inference
else:
print(f"⚠ Game server not available at {args.env_endpoint}, continuing without stats")
print(f"⚠ Game server not available at {args.env_url}, continuing without stats")
game_stats = None

# 3. Run inference using the remote vLLM server
Expand All @@ -66,7 +66,7 @@ def main(args):
tokenizer_path=args.get("tokenizer_path") or args.model_path,
data_path=args.data_path,
game_class=MathGame,
env_endpoint=args.env_endpoint,
env_endpoint=args.env_url,
job_id=job_id, # Pass job_id for stats isolation
output_path=args.get("output_path"),
temperature=args.temperature,
Expand Down
9 changes: 5 additions & 4 deletions opentinker/client/math_rl.py
Original file line number Diff line number Diff line change
Expand Up @@ -46,7 +46,8 @@ def main(args):
print(f"✓ Job {job_id} allocated at {server_url}")

# 2. Setup environment (job_id is automatically handled)
env_endpoint = args.interaction.config.env_endpoint
# Use top-level env_url (with fallback to interaction.config.env_endpoint)
env_url = args.get("env_url") or args.interaction.config.get("env_endpoint")
env = MathGameEnvironment(
game_class=MathGame,
config=args,
Expand All @@ -57,13 +58,13 @@ def main(args):
print(f"✓ Environment created, interaction config: {env.get_interaction_config_path()}")

# 3. Setup game stats client (use env.job_id for consistency)
game_stats = GameStatsClient(env_endpoint, job_id=env.job_id)
game_stats = GameStatsClient(env_url, job_id=env.job_id)
if game_stats.health_check():
game_stats.reset_all()
print(f"✓ Connected to math server at {env_endpoint}")
print(f"✓ Connected to math server at {env_url}")
else:
game_stats = None
print(f"⚠ Math server not responding at {env_endpoint}")
print(f"⚠ Math server not responding at {env_url}")

# 4. Connect to training server
client = ServiceClient(
Expand Down
8 changes: 4 additions & 4 deletions opentinker/client/math_tool_inference.py
Original file line number Diff line number Diff line change
Expand Up @@ -49,12 +49,12 @@ def main(args):
print(f"✓ Inference job {job_id} started at {vllm_server_url}")

# 2. Setup GameStatsClient for per-step metrics (with job_id isolation)
game_stats = GameStatsClient(args.env_endpoint, job_id=job_id)
game_stats = GameStatsClient(args.env_url, job_id=job_id)
if game_stats.health_check():
print(f"✓ Connected to code interpreter game server at {args.env_endpoint}")
print(f"✓ Connected to code interpreter game server at {args.env_url}")
game_stats.reset_all() # Reset stats for this job before inference
else:
print(f"⚠ Game server not available at {args.env_endpoint}, continuing without stats")
print(f"⚠ Game server not available at {args.env_url}, continuing without stats")
print(f" Make sure to start: python opentinker/environment/math/code_interpreter_math_server.py --port 8088")
game_stats = None

Expand All @@ -69,7 +69,7 @@ def main(args):
tokenizer_path=args.get("tokenizer_path") or args.model_path,
data_path=args.data_path,
game_class=CodeInterpreterMathGame,
env_endpoint=args.env_endpoint,
env_endpoint=args.env_url,
job_id=job_id, # Pass job_id for stats isolation
output_path=args.get("output_path"),
temperature=args.temperature,
Expand Down
13 changes: 7 additions & 6 deletions opentinker/client/math_tool_rl.py
Original file line number Diff line number Diff line change
Expand Up @@ -46,7 +46,8 @@ def main(args):

# 2. Setup environment
print("\n[2/4] Setting up environment...")
env_endpoint = args.interaction.config.env_endpoint
# Use top-level env_url (with fallback to interaction.config.env_endpoint)
env_url = args.get("env_url") or args.interaction.config.get("env_endpoint")
env = MathCodeInterpreterEnvironment(
game_class=CodeInterpreterMathGame,
config=args,
Expand All @@ -56,18 +57,18 @@ def main(args):
)
print(f"✓ Environment created")
print(f" - Interaction config: {env.get_interaction_config_path()}")
print(f" - Game server endpoint: {env_endpoint}")
print(f" - Game server endpoint: {env_url}")

# 3. Setup game stats client
print("\n[3/4] Connecting to game server...")
game_stats = GameStatsClient(env_endpoint, job_id=env.job_id)
game_stats = GameStatsClient(env_url, job_id=env.job_id)
if game_stats.health_check():
game_stats.reset_all()
print(f"✓ Connected to game server at {env_endpoint}")
print(f"✓ Connected to game server at {env_url}")
else:
game_stats = None
print(f"⚠ Game server not responding at {env_endpoint}")
print(f" Make sure to start: python opentinker/environment/math/code_interpreter_math_server.py --port {args.interaction.config.env_port}")
print(f"⚠ Game server not responding at {env_url}")
print(f" Make sure to start: python opentinker/environment/math/code_interpreter_math_server.py")

# 4. Connect to training server and train
print("\n[4/4] Starting training...")
Expand Down