Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
26 changes: 24 additions & 2 deletions Sources/Containerization/VZVirtualMachine+Helpers.swift
Original file line number Diff line number Diff line change
Expand Up @@ -122,13 +122,15 @@ extension VZVirtualMachine {
}

extension VZVirtualMachine {
func waitForAgent(queue: DispatchQueue) async throws -> FileHandle {
func waitForAgent(queue: DispatchQueue) async throws -> (FileHandle, VsockTransport) {
let agentConnectionRetryCount: Int = 150
let agentConnectionSleepDuration: Duration = .milliseconds(20)

for _ in 0...agentConnectionRetryCount {
do {
return try await self.connect(queue: queue, port: Vminitd.port).dupHandle()
let conn = try await self.connect(queue: queue, port: Vminitd.port)
let handle = try conn.dupFileDescriptor()
return (handle, VsockTransport(conn))
} catch {
try await Task.sleep(for: agentConnectionSleepDuration)
continue
Expand All @@ -139,6 +141,12 @@ extension VZVirtualMachine {
}

extension VZVirtioSocketConnection {
/// Duplicates the file descriptor and immediately closes the connection.
///
/// Only safe when the returned fd is used synchronously before any
/// suspension point. For deferred use (e.g., gRPC/NIO), use
/// ``dupFileDescriptor()`` and keep the connection alive via
/// ``VsockTransport``.
func dupHandle() throws -> FileHandle {
let fd = dup(self.fileDescriptor)
if fd == -1 {
Expand All @@ -147,6 +155,20 @@ extension VZVirtioSocketConnection {
self.close()
return FileHandle(fileDescriptor: fd, closeOnDealloc: false)
}

/// Duplicates the connection's file descriptor without closing the connection.
///
/// The caller must keep the `VZVirtioSocketConnection` alive until the dup'd
/// descriptor is no longer needed. The Virtualization framework tears down the
/// vsock endpoint when the connection is closed, which invalidates dup'd
/// descriptors.
func dupFileDescriptor() throws -> FileHandle {
let fd = dup(self.fileDescriptor)
if fd == -1 {
throw POSIXError.fromErrno()
}
return FileHandle(fileDescriptor: fd, closeOnDealloc: false)
}
}

#endif
11 changes: 4 additions & 7 deletions Sources/Containerization/VZVirtualMachineInstance.swift
Original file line number Diff line number Diff line change
Expand Up @@ -125,10 +125,8 @@ extension VZVirtualMachineInstance: VirtualMachineInstance {

try await self.vm.start(queue: self.queue)

let agent = Vminitd(
connection: try await self.vm.waitForAgent(queue: self.queue),
group: self.group
)
let (handle, transport) = try await self.vm.waitForAgent(queue: self.queue)
let agent = Vminitd(connection: handle, transport: transport, group: self.group)

do {
if self.config.rosetta {
Expand Down Expand Up @@ -189,9 +187,8 @@ extension VZVirtualMachineInstance: VirtualMachineInstance {
queue: queue,
port: Vminitd.port
)
let handle = try conn.dupHandle()
let agent = Vminitd(connection: handle, group: self.group)
return agent
let handle = try conn.dupFileDescriptor()
return Vminitd(connection: handle, transport: VsockTransport(conn), group: self.group)
} catch {
if let err = error as? ContainerizationError {
throw err
Expand Down
17 changes: 17 additions & 0 deletions Sources/Containerization/Vminitd.swift
Original file line number Diff line number Diff line change
Expand Up @@ -33,16 +33,33 @@ public struct Vminitd: Sendable {

let client: Client

/// Retains the underlying vsock connection to keep the file descriptor
/// valid for the gRPC client's lifetime. The Virtualization framework
/// tears down the vsock endpoint when the connection is closed, which
/// invalidates dup'd descriptors. Must remain open until the gRPC
/// channel is shut down.
private let transport: VsockTransport?

public init(client: Client) {
self.client = client
self.transport = nil
}

public init(connection: FileHandle, group: EventLoopGroup) {
self.client = .init(connection: connection, group: group)
self.transport = nil
}

init(connection: FileHandle, transport: VsockTransport, group: EventLoopGroup) {
self.client = .init(connection: connection, group: group)
self.transport = transport
}

/// Close the connection to the guest agent.
public func close() async throws {
// Shut down the gRPC channel first (NIO closes the dup'd fd),
// then close the vsock endpoint so the guest sees EOF immediately.
defer { transport?.close() }
try await client.close()
}
}
Expand Down
7 changes: 7 additions & 0 deletions Sources/Containerization/VsockListener.swift
Original file line number Diff line number Diff line change
Expand Up @@ -52,6 +52,13 @@ public final class VsockListener: NSObject, Sendable, AsyncSequence {
#if os(macOS)

extension VsockListener: VZVirtioSocketListenerDelegate {
/// Accepts a new vsock connection by dup'ing its fd and closing the original.
///
/// The dup'd fd is yielded into the `AsyncStream` for immediate consumption.
/// Callers must use the `FileHandle` before any suspension point — the
/// Virtualization framework tears down the vsock endpoint when the connection
/// is closed, which can invalidate dup'd descriptors if the underlying kernel
/// object is reclaimed. For deferred use (e.g., gRPC/NIO), see `VsockTransport`.
public func listener(
_: VZVirtioSocketListener, shouldAcceptNewConnection conn: VZVirtioSocketConnection,
from _: VZVirtioSocketDevice
Expand Down
55 changes: 55 additions & 0 deletions Sources/Containerization/VsockTransport.swift
Original file line number Diff line number Diff line change
@@ -0,0 +1,55 @@
//===----------------------------------------------------------------------===//
// Copyright © 2025-2026 Apple Inc. and the Containerization project authors.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// https://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
//===----------------------------------------------------------------------===//

#if os(macOS)
import Foundation
import Virtualization

/// Manages the lifecycle of a VZVirtioSocketConnection for use as a gRPC transport.
///
/// When a vsock connection's file descriptor is dup'd and handed to gRPC/NIO,
/// the original VZVirtioSocketConnection must remain open. The Virtualization
/// framework tears down the host-to-guest vsock mapping when the connection is
/// closed, which invalidates dup'd descriptors. This wrapper keeps the
/// connection alive and provides explicit close semantics.
///
/// Uses `@unchecked Sendable` because VZVirtioSocketConnection is not Sendable,
/// which also prevents using Mutex (its init requires a `sending` parameter that
/// conflicts with the non-Sendable connection at async call sites).
final class VsockTransport: @unchecked Sendable {
private var connection: VZVirtioSocketConnection?
private let lock = NSLock()

init(_ connection: VZVirtioSocketConnection) {
self.connection = connection
}

/// Closes the underlying vsock connection, tearing down the host-side endpoint.
func close() {
lock.lock()
defer { lock.unlock() }
connection?.close()
connection = nil
}

deinit {
// No lock needed: deinit runs only after all strong references are
// released, so no concurrent close() call is possible.
connection?.close()
}
}

#endif
70 changes: 70 additions & 0 deletions Sources/Integration/ContainerTests.swift
Original file line number Diff line number Diff line change
Expand Up @@ -3198,6 +3198,76 @@ extension IntegrationSuite {
}
}

/// Exercises the dialAgent() → gRPC RPC path that previously crashed with
/// EBADF when the VZVirtioSocketConnection was closed before the gRPC
/// client made its first call.
///
/// Each exec() call creates a new vsock connection via dialAgent(). The
/// gRPC ClientConnection defers NIO channel creation until the first RPC
/// (createProcess). A delay between exec() and start() widens the window
/// where the fd must remain valid — if the VZVirtioSocketConnection is
/// closed prematurely, the fd may be invalidated by the time NIO tries
/// fcntl(F_SETNOSIGPIPE), causing a precondition failure.
///
/// The same VsockTransport fix also applies to the waitForAgent() startup
/// path (where the first RPC is setTime via TimeSyncer). That path is
/// implicitly exercised by every integration test that boots a container,
/// but isn't stress-tested with an artificial delay here because the timing
/// depends on VM boot and Rosetta setup, which aren't controllable.
func testExecDeferredConnectionStability() async throws {
let id = "test-exec-deferred-connection-stability"

let bs = try await bootstrap(id)
let container = try LinuxContainer(id, rootfs: bs.rootfs, vmm: bs.vmm) { config in
config.process.arguments = ["/bin/sleep", "1000"]
config.bootLog = bs.bootLog
}

do {
try await container.create()
try await container.start()

// Run multiple sequential exec calls with delays between creating the
// gRPC connection (exec) and making the first RPC (start). This is the
// pattern that triggered the EBADF crash: the fd was dup'd, the
// VZVirtioSocketConnection was closed, and by the time NIO tried to
// create the channel the fd was invalid.
for i in 0..<10 {
let buffer = BufferWriter()
let exec = try await container.exec("deferred-\(i)") { config in
config.arguments = ["/bin/echo", "exec-\(i)"]
config.stdout = buffer
}

// Delay between exec() (which calls dialAgent/creates gRPC connection)
// and start() (which triggers the first RPC/NIO channel creation).
try await Task.sleep(for: .milliseconds(100))

try await exec.start()
let status = try await exec.wait()
try await exec.delete()

guard status.exitCode == 0 else {
throw IntegrationError.assert(msg: "exec deferred-\(i) status \(status) != 0")
}

guard let output = String(data: buffer.data, encoding: .utf8) else {
throw IntegrationError.assert(msg: "failed to read output from deferred-\(i)")
}
guard output.trimmingCharacters(in: .whitespacesAndNewlines) == "exec-\(i)" else {
throw IntegrationError.assert(msg: "deferred-\(i) output mismatch: \(output)")
}
}

try await container.kill(SIGKILL)
try await container.wait()
try await container.stop()
} catch {
try? await container.stop()
throw error
}
}

@available(macOS 26.0, *)
func testNetworkingDisabled() async throws {
let id = "test-networking-disabled"
Expand Down
1 change: 1 addition & 0 deletions Sources/Integration/Suite.swift
Original file line number Diff line number Diff line change
Expand Up @@ -357,6 +357,7 @@ struct IntegrationSuite: AsyncParsableCommand {
Test("container useInit zombie reaping", testUseInitZombieReaping),
Test("container useInit with terminal", testUseInitWithTerminal),
Test("container useInit with stdin", testUseInitWithStdin),
Test("exec deferred connection stability", testExecDeferredConnectionStability),

// Pods
Test("pod single container", testPodSingleContainer),
Expand Down
Loading