NVIDIA · podkidyshev · May 15, 2026 · May 12, 2026 · May 12, 2026 · May 14, 2026
@@ -123,8 +123,17 @@ def step(self, action: Any) -> Tuple[list, float, bool, dict]:
         cached_result = self.get_cached_trajectory_result(action)
         if cached_result is not None:
             logging.info(
-                "Retrieved cached result from trajectory with reward %s. Skipping step.",
+                "Retrieved cached result from trajectory with reward %s (from step %s). Skipping execution.",
                 cached_result.reward,
+                cached_result.step,
+            )
+            self.write_trajectory(
+                TrajectoryEntry(
+                    step=self.test_run.step,
+                    action=action,
+                    reward=cached_result.reward,
+                    observation=cached_result.observation,
+                )
             )
             return cached_result.observation, cached_result.reward, False, {}
 

@@ -377,3 +377,45 @@ def test_get_cached_trajectory_result(
         assert expected_step is None
     else:
         assert actual.step == expected_step
+
+
+def test_cached_step_appends_trajectory_row(nemorun: NeMoRunTestDefinition, tmp_path: Path) -> None:
+    """Cache hits must still append a row to trajectory.csv so the visible step list matches agent_steps."""
+    tdef = nemorun.model_copy(deep=True)
+    tdef.cmd_args.data.global_batch_size = 8
+    tdef.agent_metrics = ["default"]
+    test_run = TestRun(
+        name="cache_tr",
+        test=tdef,
+        num_nodes=1,
+        nodes=[],
+        reports={NeMoRunReportGenerationStrategy},
+    )
+
+    runner = MagicMock(spec=BaseRunner)
+    runner.scenario_root = tmp_path / "scenario"
+    runner.system = MagicMock()
+
+    env = CloudAIGymEnv(test_run=test_run, runner=runner, rewards=RewardOverrides())
+    cached_action = {"trainer.max_steps": 1000}
+    env.test_run.current_iteration = 0
+    env.trajectory = {0: [TrajectoryEntry(step=1, action=cached_action, reward=0.42, observation=[0.84])]}
+
+    env.test_run.step = 5
+    obs, reward, done, _info = env.step(cached_action)
+
+    runner.run.assert_not_called()
+    assert reward == 0.42
+    assert obs == [0.84]
+    assert done is False
+    rows = env.trajectory[0]
+    assert len(rows) == 2
+    assert rows[-1].step == 5
+    assert rows[-1].reward == 0.42
+    assert rows[-1].action == cached_action
+
+    csv_path = env.trajectory_file_path
+    assert csv_path.exists()
+    contents = csv_path.read_text().strip().splitlines()
+    assert contents[0] == "step,action,reward,observation"
+    assert contents[-1].startswith("5,")
@@ -202,6 +202,7 @@ def _job_output_path(tr: TestRun, create: bool = True):
     expected_trajectory = pd.DataFrame(
         data=[
             [1, "{'candidate': 1}", -1.0, "[-1.0]"],
+            [2, "{'candidate': 1}", -1.0, "[-1.0]"],
             [3, "{'candidate': 2}", -1.0, "[-1.0]"],
         ],
         columns=["step", "action", "reward", "observation"],