Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
34 changes: 27 additions & 7 deletions internal/controller/vm/vm.go
Original file line number Diff line number Diff line change
Expand Up @@ -188,12 +188,30 @@ func (c *Controller) StartVM(ctx context.Context, opts *StartOptions) (err error
}()
defer cancel()

// we should set up the necessary listeners for guest-host communication.
// The guest needs to connect to predefined vsock ports.
// The host must already be listening on these ports before the guest attempts to connect,
// otherwise the connection would fail.
c.setupEntropyListener(gctx, g)
c.setupLoggingListener(gctx, g)
// Set up the host-side hvsock listeners for entropy and logs before
// starting the VM. The guest dials predefined vsock ports early in boot,
// so the listeners must be bound up front to avoid a race.
// Each setup call creates the listener synchronously and dispatches an
// accept goroutine onto the errgroup:
// - entropy: writes seed bytes to the guest, then returns.
// - logging: accepts the connection and spawns a long-running relay
// for guest logs; the accept goroutine itself returns immediately.
//
// We intentionally wait on the error group after VM start but before
// establishing the GCS connection, to ensure entropy is seeded and the
// log channel is wired up first.
if err = c.setupEntropyListener(gctx, g); err != nil {
return fmt.Errorf("failed to set up entropy listener: %w", err)
}
if err = c.setupLoggingListener(gctx, g); err != nil {
return fmt.Errorf("failed to set up logging listener: %w", err)
}
// Open the host-side GCS hvsock listener before VM start so the host
// is listening when the in-VM GCS dials. Otherwise, GCS falls back to
// the internal HCS bridge and our accept hangs until timeout.
if err = c.guest.PrepareConnection(opts.GCSServiceID); err != nil {
return fmt.Errorf("failed to prepare guest connection: %w", err)
}

err = c.uvm.Start(ctx)
if err != nil {
Expand All @@ -210,7 +228,9 @@ func (c *Controller) StartVM(ctx context.Context, opts *StartOptions) (err error
return err
}

err = c.guest.CreateConnection(ctx, opts.GCSServiceID, opts.ConfigOptions...)
// VM is started, entropy is seeded and log channel is up. Accept the
// GCS dial on the prepared listener and run the GCS protocol handshake.
err = c.guest.CreateConnection(ctx, opts.ConfigOptions...)
if err != nil {
return fmt.Errorf("failed to create guest connection: %w", err)
}
Expand Down
51 changes: 30 additions & 21 deletions internal/controller/vm/vm_lcow.go
Original file line number Diff line number Diff line change
Expand Up @@ -111,16 +111,18 @@ func (c *Controller) Plan9Controller() *plan9.Controller {
// Linux VMs require entropy to initialize their random number generators during boot.
// This method listens on a predefined vsock port and provides cryptographically secure
// random data to the Linux init process when it connects.
func (c *Controller) setupEntropyListener(ctx context.Context, group *errgroup.Group) {
func (c *Controller) setupEntropyListener(ctx context.Context, group *errgroup.Group) error {
// The Linux guest will connect to this port during init to receive entropy.
entropyConn, err := winio.ListenHvsock(&winio.HvsockAddr{
VMID: c.uvm.RuntimeID(),
ServiceID: winio.VsockServiceID(vmutils.LinuxEntropyVsockPort),
})
if err != nil {
return fmt.Errorf("failed to listen on hvSocket for entropy: %w", err)
}

group.Go(func() error {
// The Linux guest will connect to this port during init to receive entropy.
entropyConn, err := winio.ListenHvsock(&winio.HvsockAddr{
VMID: c.uvm.RuntimeID(),
ServiceID: winio.VsockServiceID(vmutils.LinuxEntropyVsockPort),
})
if err != nil {
return fmt.Errorf("failed to listen on hvSocket for entropy: %w", err)
}
defer entropyConn.Close()

// Prepare to provide entropy to the init process in the background. This
// must be done in a goroutine since, when using the internal bridge, the
Expand All @@ -135,31 +137,34 @@ func (c *Controller) setupEntropyListener(ctx context.Context, group *errgroup.G
// Write the required amount of entropy to the connection.
// The init process will read this data and use it to seed the kernel's
// random number generator (CRNG).
_, err = io.CopyN(conn, rand.Reader, vmutils.LinuxEntropyBytes)
if err != nil {
if _, err = io.CopyN(conn, rand.Reader, vmutils.LinuxEntropyBytes); err != nil {
return fmt.Errorf("failed to write entropy to connection: %w", err)
}

return nil
})

return nil
}

// setupLoggingListener sets up logging for LCOW UVMs.
//
// This method establishes a vsock connection to receive log output from GCS
// running inside the Linux VM. The logs are parsed and
// forwarded to the host's logging system for monitoring and debugging.
func (c *Controller) setupLoggingListener(ctx context.Context, group *errgroup.Group) {
func (c *Controller) setupLoggingListener(ctx context.Context, group *errgroup.Group) error {
// The GCS will connect to this port to stream log output.
logConn, err := winio.ListenHvsock(&winio.HvsockAddr{
VMID: c.uvm.RuntimeID(),
ServiceID: winio.VsockServiceID(vmutils.LinuxLogVsockPort),
})
if err != nil {
close(c.logOutputDone)
return fmt.Errorf("failed to listen on hvSocket for logs: %w", err)
}

group.Go(func() error {
// The GCS will connect to this port to stream log output.
logConn, err := winio.ListenHvsock(&winio.HvsockAddr{
VMID: c.uvm.RuntimeID(),
ServiceID: winio.VsockServiceID(vmutils.LinuxLogVsockPort),
})
if err != nil {
close(c.logOutputDone)
return fmt.Errorf("failed to listen on hvSocket for logs: %w", err)
}
defer logConn.Close()

// Accept the connection from the GCS.
conn, err := vmmanager.AcceptConnection(ctx, c.uvm, logConn, true)
Expand All @@ -170,6 +175,8 @@ func (c *Controller) setupLoggingListener(ctx context.Context, group *errgroup.G

// Launch a separate goroutine to process logs for the lifetime of the VM.
go func() {
defer conn.Close()

// Parse GCS log output and forward it to the host logging system.
vmutils.ParseGCSLogrus(c.uvm.ID())(conn)

Expand All @@ -180,6 +187,8 @@ func (c *Controller) setupLoggingListener(ctx context.Context, group *errgroup.G

return nil
})

return nil
}

// finalizeGCSConnection finalizes the GCS connection for LCOW VMs.
Expand Down
31 changes: 16 additions & 15 deletions internal/controller/vm/vm_wcow.go
Original file line number Diff line number Diff line change
Expand Up @@ -40,7 +40,7 @@ func (c *Controller) buildConfidentialOptions(_ context.Context) (*guestresource
// This is a no-op implementation to satisfy the platform-specific interface.
//
// For comparison, LCOW VMs require entropy to be provided during boot.
func (c *Controller) setupEntropyListener(_ context.Context, _ *errgroup.Group) {}
func (c *Controller) setupEntropyListener(_ context.Context, _ *errgroup.Group) error { return nil }

// setupLoggingListener sets up logging for WCOW UVMs.
//
Expand All @@ -52,25 +52,22 @@ func (c *Controller) setupEntropyListener(_ context.Context, _ *errgroup.Group)
// The listener is configured to accept only one concurrent connection at a time
// to prevent resource exhaustion, but will accept new connections if the current one is closed.
// This supports scenarios where the logging service inside the VM needs to restart.
func (c *Controller) setupLoggingListener(ctx context.Context, _ *errgroup.Group) {
func (c *Controller) setupLoggingListener(ctx context.Context, _ *errgroup.Group) error {
baseListener, err := winio.ListenHvsock(&winio.HvsockAddr{
VMID: c.uvm.RuntimeID(),
ServiceID: prot.WindowsLoggingHvsockServiceID,
})
if err != nil {
close(c.logOutputDone)
return fmt.Errorf("failed to listen for windows logging connections: %w", err)
}

// For Windows, the listener can receive a connection later (after VM starts),
// so we start the output handler in a goroutine with a non-timeout context.
// This allows the output handler to run independently of the VM creation lifecycle.
// This is useful for the case when the logging service is restarted.
go func() {
baseListener, err := winio.ListenHvsock(&winio.HvsockAddr{
VMID: c.uvm.RuntimeID(),
ServiceID: prot.WindowsLoggingHvsockServiceID,
})
if err != nil {
// Close the output done channel to signal that logging setup
// has failed and no logs will be processed.
close(c.logOutputDone)
logrus.WithError(err).Error("failed to listen for windows logging connections")

// Return early due to error.
return
}
defer baseListener.Close()

// Use a WaitGroup to track active log processing goroutines.
// This ensures we wait for all log processing to complete before closing logOutputDone.
Expand All @@ -91,6 +88,8 @@ func (c *Controller) setupLoggingListener(ctx context.Context, _ *errgroup.Group
wg.Add(1)
go func() {
defer wg.Done()
defer conn.Close()

logrus.Info("uvm output handler starting")

// Parse GCS log output and forward it to the host logging system.
Expand All @@ -107,6 +106,8 @@ func (c *Controller) setupLoggingListener(ctx context.Context, _ *errgroup.Group
// Signal that log output processing has completed.
close(c.logOutputDone)
}()

return nil
}

// finalizeGCSConnection finalizes the GCS connection for WCOW UVMs.
Expand Down
2 changes: 2 additions & 0 deletions internal/vm/guestmanager/doc.go
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,8 @@ GCS connection:

g, err := guestmanager.New(ctx, uvm)
if err != nil { // handle error }
if err := g.PrepareConnection(gcsServiceID); err != nil { // handle error }
// (start the UVM here)
if err := g.CreateConnection(ctx); err != nil { // handle error }

After the connection is established, use the manager interfaces for guest-side changes:
Expand Down
52 changes: 38 additions & 14 deletions internal/vm/guestmanager/guest.go
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@ package guestmanager
import (
"context"
"fmt"
"net"
"sync"

"github.com/Microsoft/hcsshim/internal/gcs"
Expand Down Expand Up @@ -46,6 +47,8 @@ type Guest struct {
// gc is the active GCS connection to the guest.
// It will be nil if no connection is active.
gc *gcs.GuestConnection
// gcListener is bound by PrepareConnection and consumed by CreateConnection.
gcListener net.Listener
}

// New creates a new Guest Manager.
Expand All @@ -67,35 +70,53 @@ func WithInitializationState(state *gcs.InitialGuestState) ConfigOption {
}
}

// CreateConnection accepts the GCS connection and performs initial setup.
func (gm *Guest) CreateConnection(ctx context.Context, GCSServiceID guid.GUID, opts ...ConfigOption) error {
// PrepareConnection opens the host-side hvsock listener for the given GCS
// service ID. Must be called before VM start so the host is listening when
// the in-VM GCS dials. Idempotent for the same service ID.
func (gm *Guest) PrepareConnection(GCSServiceID guid.GUID) error {
gm.mu.Lock()
defer gm.mu.Unlock()

// Return early if a connection is already active.
if gm.gc != nil {
// If the caller tried to connect to a different GCS service then error out.
// Idempotent if already prepared/connected with the same service ID.
if gm.gcListener != nil || gm.gc != nil {
if gm.gcsServiceID != GCSServiceID {
return fmt.Errorf("gcs service id mismatch: expected %s, got %s", gm.gcsServiceID, GCSServiceID)
}
return nil
}

gm.gcsServiceID = GCSServiceID

// The guest needs to connect to predefined GCS port.
// The host must already be listening on these port before the guest attempts to connect,
// otherwise the connection would fail.
vmConn, err := winio.ListenHvsock(&winio.HvsockAddr{
l, err := winio.ListenHvsock(&winio.HvsockAddr{
VMID: gm.uvm.RuntimeID(),
ServiceID: GCSServiceID,
})
if err != nil {
return fmt.Errorf("failed to listen for guest connection: %w", err)
}

// Accept the connection
conn, err := vmmanager.AcceptConnection(ctx, gm.uvm, vmConn, true)
gm.gcsServiceID = GCSServiceID
gm.gcListener = l
return nil
}

// CreateConnection accepts the GCS dial on the prepared listener and runs
// the GCS protocol handshake. Must be called after VM start. Idempotent if
// a connection already exists.
func (gm *Guest) CreateConnection(ctx context.Context, opts ...ConfigOption) error {
gm.mu.Lock()
defer gm.mu.Unlock()

if gm.gc != nil {
return nil
}
if gm.gcListener == nil {
return fmt.Errorf("CreateConnection called before PrepareConnection")
}

// AcceptConnection takes ownership of the listener and closes it.
l := gm.gcListener
gm.gcListener = nil

conn, err := vmmanager.AcceptConnection(ctx, gm.uvm, l, true)
if err != nil {
return fmt.Errorf("failed to connect to GCS: %w", err)
}
Expand Down Expand Up @@ -134,6 +155,9 @@ func (gm *Guest) CloseConnection() error {
err = gm.gc.Close()
gm.gc = nil
}

if gm.gcListener != nil {
_ = gm.gcListener.Close()
gm.gcListener = nil
}
return err
}
3 changes: 2 additions & 1 deletion internal/vm/vmmanager/utils.go
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,7 @@ func AcceptConnection(ctx context.Context, uvm vmWaiter, l net.Listener, closeCo
conn net.Conn
err error
}
resultCh := make(chan acceptResult)
resultCh := make(chan acceptResult, 1)

go func() {
c, err := l.Accept()
Expand All @@ -45,6 +45,7 @@ func AcceptConnection(ctx context.Context, uvm vmWaiter, l net.Listener, closeCo
}
return res.conn, res.err
case <-ctx.Done():
_ = l.Close()
return nil, ctx.Err()
case <-vmExitCh:
}
Expand Down