update voxtral-realtime build flag and readme for cuda-windows support (pytorch#18417)

Gasoonjia · web-flow · commit f1a61fcc3a4d · 2026-03-23T13:10:20.000-07:00
Differential Revision: D97788666
diff --git a/CMakePresets.json b/CMakePresets.json
@@ -152,7 +152,8 @@
         "llm-release"
       ],
       "cacheVariables": {
-        "EXECUTORCH_BUILD_CUDA": "ON"
+        "EXECUTORCH_BUILD_CUDA": "ON",
+        "CMAKE_CUDA_ARCHITECTURES": "native"
       },
       "condition": {
         "type": "inList",
diff --git a/examples/models/voxtral_realtime/CMakePresets.json b/examples/models/voxtral_realtime/CMakePresets.json
@@ -57,6 +57,7 @@
             "name": "voxtral-realtime-cpu",
             "displayName": "Build Voxtral Realtime runner (CPU)",
             "configurePreset": "voxtral-realtime-cpu",
+            "configuration": "Release",
             "targets": [
                 "voxtral_realtime_runner"
             ]
@@ -73,6 +74,7 @@
         {
             "name": "voxtral-realtime-cuda",
             "displayName": "Build Voxtral Realtime runner (CUDA)",
+            "configuration": "Release",
             "configurePreset": "voxtral-realtime-cuda",
             "targets": [
                 "voxtral_realtime_runner"
diff --git a/examples/models/voxtral_realtime/README.md b/examples/models/voxtral_realtime/README.md
@@ -198,7 +198,6 @@ capability to avoid "invalid device function" errors (the `int4mm` kernels
 require SM 80+).
 
 ```powershell
-$env:CMAKE_CUDA_ARCHITECTURES="80;86;89;90;120"
 cmake --workflow --preset llm-release-cuda
 Push-Location examples/models/voxtral_realtime
 cmake --workflow --preset voxtral-realtime-cuda
diff --git a/tools/cmake/preset/README.md b/tools/cmake/preset/README.md
@@ -65,6 +65,20 @@ $ cmake --workflow --preset llm-debug-cuda
 $ cmake --workflow --preset llm-debug-metal
 ```
 
+> [!NOTE]
+> **CUDA architecture selection:** The `llm-release-cuda` (and `llm-debug-cuda`)
+> preset sets `CMAKE_CUDA_ARCHITECTURES=native`, which auto-detects the GPU
+> on the build machine at configure time. To target a different architecture,
+> override it with `-D` on the configure step:
+> ```bash
+> cmake --preset llm-release-cuda -DCMAKE_CUDA_ARCHITECTURES="80;86;89;90;120"
+> cmake --build --preset llm-release-cuda --config Release
+> ```
+> Note that `cmake --workflow` does not accept `-D` flags, so you must run
+> configure and build as separate steps when overriding. Also note that on
+> Windows, setting `CMAKE_CUDA_ARCHITECTURES` via environment variable does
+> **not** work with CMake presets — you must use the `-D` flag.
+
 #### Understanding workflow components
 
 A workflow preset typically consists of: