@@ -9,6 +9,7 @@ import * as Inspectable from "effect/Inspectable"
99import * as Option from "effect/Option"
1010import * as Sink from "effect/Sink"
1111import * as Stream from "effect/Stream"
12+ import fc from "fast-check"
1213
1314import type { TemplateConfig } from "../../src/core/domain.js"
1415import { gpuModeAfterDockerFailure } from "../../src/core/gpu.js"
@@ -91,15 +92,29 @@ const decideStdout = (cmd: RecordedCommand): string => {
9192 return ""
9293}
9394
94- const nvidiaRuntimeFailure =
95- "Error response from daemon: failed to create task for container: nvidia-container-cli: initialization error: load library failed: libnvidia-ml.so.1"
95+ const nvidiaContainerCliMarker = "nvidia-container-cli"
96+ const libNvidiaMlMarker = "libnvidia-ml.so.1"
97+ const missingDeviceDriverMarker = "could not select device driver"
98+
99+ const nvidiaRuntimeFailure = `Error response from daemon: failed to create task for container: ${ nvidiaContainerCliMarker } : initialization error: load library failed: ${ libNvidiaMlMarker } `
96100
97101const nvidiaMissingDeviceDriverFailure =
98- ' Error response from daemon: could not select device driver "" with capabilities: [[gpu]]'
102+ ` Error response from daemon: ${ missingDeviceDriverMarker } "" with capabilities: [[gpu]]`
99103
100104const arbitraryComposeFailure =
101105 "Error response from daemon: network sandbox setup failed"
102106
107+ const nvidiaFailureMarkers : ReadonlyArray < string > = [
108+ nvidiaContainerCliMarker ,
109+ libNvidiaMlMarker ,
110+ missingDeviceDriverMarker
111+ ]
112+
113+ const containsNvidiaFailureMarker = ( details : string ) : boolean => {
114+ const normalized = details . toLowerCase ( )
115+ return nvidiaFailureMarkers . some ( ( marker ) => normalized . includes ( marker ) )
116+ }
117+
103118const hasNvidiaFallbackWarning = ( logs : ReadonlyArray < string > , expectedDetail : string ) : boolean =>
104119 logs . some ( ( entry ) =>
105120 entry . includes ( "NVIDIA runtime failed" ) &&
@@ -365,6 +380,29 @@ describe("runDockerComposeUpWithPortCheck", () => {
365380 expect ( gpuModeAfterDockerFailure ( "none" , arbitraryComposeFailure ) ) . toBe ( "none" )
366381 } )
367382
383+ it ( "satisfies the GPU fallback classifier invariant" , ( ) => {
384+ const dockerFailureDetails = fc . oneof (
385+ fc . string ( ) ,
386+ fc
387+ . tuple (
388+ fc . string ( ) ,
389+ fc . constantFrom ( nvidiaContainerCliMarker , libNvidiaMlMarker , missingDeviceDriverMarker ) ,
390+ fc . string ( )
391+ )
392+ . map ( ( [ left , marker , right ] ) => `${ left } ${ marker } ${ right } ` )
393+ )
394+
395+ fc . assert (
396+ fc . property ( dockerFailureDetails , ( details ) => {
397+ const expectedGpu = containsNvidiaFailureMarker ( details ) ? "none" : "all"
398+
399+ expect ( gpuModeAfterDockerFailure ( "all" , details ) ) . toBe ( expectedGpu )
400+ expect ( gpuModeAfterDockerFailure ( "none" , details ) ) . toBe ( "none" )
401+ } ) ,
402+ { numRuns : 50 }
403+ )
404+ } )
405+
368406 it . effect ( "falls back to GPU none before retrying reuse mode when the host NVIDIA runtime is unavailable" , ( ) =>
369407 withTempDir ( ( root ) =>
370408 Effect . gen ( function * ( _ ) {
0 commit comments