Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
18 changes: 14 additions & 4 deletions coreapi/lifecycle.go
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@ import (
"log/slog"
"sort"
"sync"
"time"
)

// Service is the lifecycle contract every L11 plugin implements.
Expand Down Expand Up @@ -145,9 +146,15 @@ func stopWithPanicRecovery(ctx context.Context, s Service) (err error) {
// shutdown cannot crash the daemon; the panic is converted to an error
// and all remaining services still get their Stop call.
//
// Errors from individual Stop calls (including recovered panics) are
// collected; the first one is returned but every service still gets
// its Stop call invoked.
// To prevent one hung plugin from blocking the entire shutdown sequence,
// each per-plugin Stop call is given its own 5-second timeout via a
// per-plugin context derived from the parent. If a plugin exceeds its
// deadline, context.DeadlineExceeded is surfaced as a Stop error and
// remaining plugins continue shutting down.
//
// Errors from individual Stop calls (including recovered panics and
// deadline expirations) are collected; the first one is returned but
// every service still gets its Stop call invoked.
func (sr *ServiceRegistry) StopAll(ctx context.Context) error {
sr.mu.Lock()
queue := append([]Service(nil), sr.started...)
Expand All @@ -156,7 +163,10 @@ func (sr *ServiceRegistry) StopAll(ctx context.Context) error {

var firstErr error
for i := len(queue) - 1; i >= 0; i-- {
if err := stopWithPanicRecovery(ctx, queue[i]); err != nil && firstErr == nil {
pluginCtx, cancel := context.WithTimeout(ctx, 5*time.Second)
err := stopWithPanicRecovery(pluginCtx, queue[i])
cancel()
if err != nil && firstErr == nil {
firstErr = err
}
}
Expand Down
34 changes: 34 additions & 0 deletions coreapi/zz_lifecycle_edge_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -59,6 +59,40 @@ func TestServiceRegistry_StopAllStopsAllEvenAfterError(t *testing.T) {
}
}

func TestServiceRegistry_StopAllTimingOutHangingPlugin(t *testing.T) {
t.Parallel()
sr := &coreapi.ServiceRegistry{}
bStopped := false
a := &hangingService{name: "a", order: 1}
bb := &recordingStopWithErr{name: "b", order: 2, stopped: &bStopped}
_ = sr.Register(a)
_ = sr.Register(bb)
_ = sr.StartAll(context.Background(), coreapi.Deps{})
// StopAll should not block forever; the hanging plugin should time out
err := sr.StopAll(context.Background())
if !errors.Is(err, context.DeadlineExceeded) {
t.Errorf("StopAll = %v, want DeadlineExceeded from hung plugin a", err)
}
if !bStopped {
t.Error("service b was not stopped despite a hanging")
}
}

// hangingService never returns from Stop — simulates a plugin that
// blocks indefinitely, used to verify per-plugin timeout in StopAll.
type hangingService struct {
name string
order int
}

func (h *hangingService) Name() string { return h.name }
func (h *hangingService) Order() int { return h.order }
func (h *hangingService) Start(ctx context.Context, deps coreapi.Deps) error { return nil }
func (h *hangingService) Stop(ctx context.Context) error {
<-ctx.Done()
return ctx.Err()
}

type recordingStopWithErr struct {
name string
order int
Expand Down
Loading