@@ -16,6 +16,7 @@ import (
1616 "log/slog"
1717 "net"
1818 "net/http"
19+ "strconv"
1920 "strings"
2021 "sync"
2122 "time"
@@ -67,6 +68,13 @@ const (
6768 // defaultSessionTTL is the default session time-to-live duration.
6869 // Sessions that are inactive for this duration will be automatically cleaned up.
6970 defaultSessionTTL = 30 * time .Minute
71+
72+ // defaultIdleCheckInterval is how often the idle reaper scans for inactive sessions.
73+ defaultIdleCheckInterval = time .Minute
74+
75+ // defaultRetryAfterSeconds is the Retry-After value returned with HTTP 503
76+ // when the global session limit is reached.
77+ defaultRetryAfterSeconds = 30
7078)
7179
7280//go:generate mockgen -destination=mocks/mock_watcher.go -package=mocks -source=server.go Watcher
@@ -160,6 +168,21 @@ type Config struct {
160168 // SessionFactory creates MultiSessions for Phase 2 session management.
161169 // Required when SessionManagementV2 is true; ignored otherwise.
162170 SessionFactory vmcpsession.MultiSessionFactory
171+
172+ // MaxSessions is the global concurrent session limit when SessionManagementV2 is enabled.
173+ // Requests that would exceed this limit receive HTTP 503 with a Retry-After header.
174+ // 0 uses the default (100). Requires SessionManagementV2 = true.
175+ MaxSessions int
176+
177+ // MaxSessionsPerClient is the per-identity session limit when SessionManagementV2 is enabled.
178+ // Keyed by auth.Identity.Subject; anonymous clients are not limited.
179+ // 0 uses the default (10). Requires SessionManagementV2 = true.
180+ MaxSessionsPerClient int
181+
182+ // IdleSessionTimeout is the duration after which inactive sessions are proactively
183+ // expired when SessionManagementV2 is enabled. Must be ≤ SessionTTL.
184+ // 0 uses the default (5 minutes). Requires SessionManagementV2 = true.
185+ IdleSessionTimeout time.Duration
163186}
164187
165188// Server is the Virtual MCP Server that aggregates multiple backends.
@@ -277,6 +300,24 @@ func New(
277300 if cfg .SessionTTL == 0 {
278301 cfg .SessionTTL = defaultSessionTTL
279302 }
303+ if cfg .MaxSessions == 0 {
304+ cfg .MaxSessions = sessionmanager .DefaultMaxSessions
305+ }
306+ if cfg .MaxSessionsPerClient == 0 {
307+ cfg .MaxSessionsPerClient = sessionmanager .DefaultMaxSessionsPerClient
308+ }
309+ if cfg .IdleSessionTimeout == 0 {
310+ cfg .IdleSessionTimeout = sessionmanager .DefaultIdleSessionTimeout
311+ }
312+ // IdleSessionTimeout must not exceed SessionTTL: if it did, the transport
313+ // TTL reaper could evict sessions before the idle reaper fires, leaving
314+ // per-client counters and idle-tracking maps stale.
315+ if cfg .IdleSessionTimeout > cfg .SessionTTL {
316+ slog .Warn ("IdleSessionTimeout exceeds SessionTTL; clamping to SessionTTL" ,
317+ "idle_session_timeout" , cfg .IdleSessionTimeout ,
318+ "session_ttl" , cfg .SessionTTL )
319+ cfg .IdleSessionTimeout = cfg .SessionTTL
320+ }
280321
281322 // Create hooks for SDK integration
282323 hooks := & server.Hooks {}
@@ -400,7 +441,12 @@ func New(
400441 if cfg .SessionFactory == nil {
401442 return nil , fmt .Errorf ("SessionManagementV2 is enabled but no SessionFactory was provided" )
402443 }
403- vmcpSessMgr = sessionmanager .New (sessionManager , cfg .SessionFactory , backendRegistry )
444+ limits := sessionmanager.Limits {
445+ MaxSessions : cfg .MaxSessions ,
446+ MaxSessionsPerClient : cfg .MaxSessionsPerClient ,
447+ IdleSessionTimeout : cfg .IdleSessionTimeout ,
448+ }
449+ vmcpSessMgr = sessionmanager .New (sessionManager , cfg .SessionFactory , backendRegistry , limits )
404450 slog .Info ("session-scoped backend lifecycle enabled" )
405451
406452 // Warn about incompatible optimizer configuration and disable it
@@ -557,6 +603,13 @@ func (s *Server) Handler(_ context.Context) (http.Handler, error) {
557603 slog .Info ("audit middleware enabled for MCP endpoints" )
558604 }
559605
606+ // Apply session limit middleware when V2 session management is active.
607+ // Runs before auth so over-limit requests are rejected early without auth overhead.
608+ if s .vmcpSessionMgr != nil && s .config .MaxSessions > 0 {
609+ mcpHandler = s .sessionLimitMiddleware (mcpHandler )
610+ slog .Info ("session limit middleware enabled" , "max_sessions" , s .config .MaxSessions )
611+ }
612+
560613 // Apply authentication middleware if configured (runs first in chain)
561614 if s .config .AuthMiddleware != nil {
562615 mcpHandler = s .config .AuthMiddleware (mcpHandler )
@@ -575,6 +628,37 @@ func (s *Server) Handler(_ context.Context) (http.Handler, error) {
575628 return mux , nil
576629}
577630
631+ // sessionLimitMiddleware is a best-effort fast-fail for new session requests
632+ // (no Mcp-Session-Id header): it returns HTTP 503 + Retry-After before the
633+ // request reaches the SDK when the global session cap appears to be reached.
634+ // Existing sessions (with a valid Mcp-Session-Id) are never affected.
635+ //
636+ // This check is intentionally optimistic (non-atomic): it avoids the overhead
637+ // of routing and SDK processing for clearly-over-limit requests, but it does
638+ // not guarantee strict enforcement under concurrent load. Strict enforcement
639+ // is provided atomically by sessionmanager.Manager.Generate(), which uses an
640+ // increment-first reservation to prevent races between concurrent initialize
641+ // requests.
642+ func (s * Server ) sessionLimitMiddleware (next http.Handler ) http.Handler {
643+ // Resolve the concrete manager once so we can call ActiveSessionCount().
644+ mgr , _ := s .vmcpSessionMgr .(* sessionmanager.Manager )
645+ return http .HandlerFunc (func (w http.ResponseWriter , r * http.Request ) {
646+ if r .Header .Get ("Mcp-Session-Id" ) == "" && mgr != nil {
647+ if mgr .ActiveSessionCount () >= s .config .MaxSessions {
648+ w .Header ().Set ("Retry-After" , strconv .Itoa (defaultRetryAfterSeconds ))
649+ w .Header ().Set ("Content-Type" , "application/json" )
650+ w .WriteHeader (http .StatusServiceUnavailable )
651+ _ , _ = w .Write ([]byte (
652+ `{"error":{"code":-32000,"message":"Maximum concurrent sessions exceeded. ` +
653+ `Please try again later or contact administrator."}}` ,
654+ ))
655+ return
656+ }
657+ }
658+ next .ServeHTTP (w , r )
659+ })
660+ }
661+
578662// Start starts the Virtual MCP Server and begins serving requests.
579663//
580664//nolint:gocyclo // Complexity from health monitoring and startup orchestration is acceptable
@@ -667,6 +751,19 @@ func (s *Server) Start(ctx context.Context) error {
667751 }
668752 }
669753
754+ // Start idle session reaper if V2 session management is active with an idle timeout.
755+ if mgr , ok := s .vmcpSessionMgr .(* sessionmanager.Manager ); ok && s .config .IdleSessionTimeout > 0 {
756+ idleCtx , idleCancel := context .WithCancel (ctx )
757+ mgr .StartIdleReaper (idleCtx , defaultIdleCheckInterval )
758+ slog .Info ("idle session reaper started" ,
759+ "idle_timeout" , s .config .IdleSessionTimeout ,
760+ "check_interval" , defaultIdleCheckInterval )
761+ s .shutdownFuncs = append (s .shutdownFuncs , func (context.Context ) error {
762+ idleCancel ()
763+ return nil
764+ })
765+ }
766+
670767 // Start status reporter if configured
671768 if s .statusReporter != nil {
672769 shutdown , err := s .statusReporter .Start (ctx )
0 commit comments