Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions DEVELOPMENT.md
Original file line number Diff line number Diff line change
Expand Up @@ -122,6 +122,7 @@ Common settings:
| `metrics.resource_refresh_interval` | Refresh interval for cached resource capacity metrics | `120s` |
| `limits.max_concurrent_builds` | Max concurrent image builds | `1` |
| `limits.max_overlay_size` | Max overlay filesystem size | `100GB` |
| `limits.name_patterns` | Ordered regex overrides for per-instance CPU/memory/overlay limits | _(empty)_ |
| `acme.email` | Email for ACME certificate registration | _(empty)_ |
| `acme.dns_provider` | DNS provider for ACME challenges | _(empty)_ |
| `acme.cloudflare_api_token` | Cloudflare API token | _(empty)_ |
Expand Down
40 changes: 34 additions & 6 deletions cmd/api/config/config.go
Original file line number Diff line number Diff line change
Expand Up @@ -153,12 +153,13 @@ type RegistryConfig struct {

// LimitsConfig holds per-instance and aggregate resource limits.
type LimitsConfig struct {
MaxVcpusPerInstance int `koanf:"max_vcpus_per_instance"`
MaxMemoryPerInstance string `koanf:"max_memory_per_instance"`
MaxTotalVolumeStorage string `koanf:"max_total_volume_storage"`
MaxConcurrentBuilds int `koanf:"max_concurrent_builds"`
MaxOverlaySize string `koanf:"max_overlay_size"`
MaxImageStorage float64 `koanf:"max_image_storage"`
MaxVcpusPerInstance int `koanf:"max_vcpus_per_instance"`
MaxMemoryPerInstance string `koanf:"max_memory_per_instance"`
MaxTotalVolumeStorage string `koanf:"max_total_volume_storage"`
MaxConcurrentBuilds int `koanf:"max_concurrent_builds"`
MaxOverlaySize string `koanf:"max_overlay_size"`
MaxImageStorage float64 `koanf:"max_image_storage"`
NamePatterns []NamePatternLimitsConfig `koanf:"name_patterns"`
}

// OversubscriptionConfig holds oversubscription ratios (1.0 = no oversubscription).
Expand Down Expand Up @@ -361,6 +362,7 @@ func defaultConfig() *Config {
MaxConcurrentBuilds: 1,
MaxOverlaySize: "100GB",
MaxImageStorage: 0.2,
NamePatterns: nil,
},

Oversubscription: OversubscriptionConfig{
Expand Down Expand Up @@ -532,6 +534,24 @@ func (c *Config) Validate() error {
if c.Build.Timeout <= 0 {
return fmt.Errorf("build.timeout must be positive, got %d", c.Build.Timeout)
}
if c.Limits.MaxVcpusPerInstance < 0 {
return fmt.Errorf("limits.max_vcpus_per_instance must be >= 0, got %d", c.Limits.MaxVcpusPerInstance)
}
if err := validateOptionalByteSize("limits.max_memory_per_instance", c.Limits.MaxMemoryPerInstance); err != nil {
return err
}
if err := validateByteSize("limits.max_overlay_size", c.Limits.MaxOverlaySize); err != nil {
return err
}
if c.Limits.MaxConcurrentBuilds <= 0 {
return fmt.Errorf("limits.max_concurrent_builds must be positive, got %d", c.Limits.MaxConcurrentBuilds)
}
if c.Limits.MaxImageStorage < 0 {
return fmt.Errorf("limits.max_image_storage must be >= 0, got %v", c.Limits.MaxImageStorage)
}
if err := validateNamePatternLimits(c.Limits.NamePatterns); err != nil {
return err
}
if err := validateDuration("images.auto_delete.unused_for", c.Images.AutoDelete.UnusedFor); err != nil {
return err
}
Expand Down Expand Up @@ -606,6 +626,14 @@ func validateByteSize(field string, value string) error {
return nil
}

func validateOptionalByteSize(field string, value string) error {
value = strings.TrimSpace(value)
if value == "" {
return nil
}
return validateByteSize(field, value)
}

func validateDuration(field string, value string) error {
if strings.TrimSpace(value) == "" {
return fmt.Errorf("%s must not be empty", field)
Expand Down
84 changes: 84 additions & 0 deletions cmd/api/config/config_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -288,3 +288,87 @@ func TestValidateAllowsDisabledSnapshotCompressionDefaultWithoutValidAlgorithm(t
t.Fatalf("expected disabled snapshot compression default to ignore algorithm/level, got %v", err)
}
}

func TestLoadParsesNamePatternLimits(t *testing.T) {
tmp := t.TempDir()
cfgPath := filepath.Join(tmp, "config.yaml")
configYAML := `
limits:
name_patterns:
- pattern: '^prod-'
max_vcpus_per_instance: 8
max_memory_per_instance: 64GB
- pattern: '^tiny-'
max_overlay_size: 5GB
`
if err := os.WriteFile(cfgPath, []byte(configYAML), 0600); err != nil {
t.Fatalf("write temp config: %v", err)
}

cfg, err := Load(cfgPath)
if err != nil {
t.Fatalf("load config: %v", err)
}

if len(cfg.Limits.NamePatterns) != 2 {
t.Fatalf("expected 2 name pattern limit entries, got %d", len(cfg.Limits.NamePatterns))
}
if cfg.Limits.NamePatterns[0].Pattern != "^prod-" {
t.Fatalf("expected first pattern to load, got %q", cfg.Limits.NamePatterns[0].Pattern)
}
if cfg.Limits.NamePatterns[0].MaxVcpusPerInstance == nil || *cfg.Limits.NamePatterns[0].MaxVcpusPerInstance != 8 {
t.Fatalf("expected first max_vcpus_per_instance to load, got %#v", cfg.Limits.NamePatterns[0].MaxVcpusPerInstance)
}
if cfg.Limits.NamePatterns[0].MaxMemoryPerInstance == nil || *cfg.Limits.NamePatterns[0].MaxMemoryPerInstance != "64GB" {
t.Fatalf("expected first max_memory_per_instance to load, got %#v", cfg.Limits.NamePatterns[0].MaxMemoryPerInstance)
}
if cfg.Limits.NamePatterns[1].MaxOverlaySize == nil || *cfg.Limits.NamePatterns[1].MaxOverlaySize != "5GB" {
t.Fatalf("expected second max_overlay_size to load, got %#v", cfg.Limits.NamePatterns[1].MaxOverlaySize)
}
}

func TestValidateRejectsInvalidNamePatternLimitRegex(t *testing.T) {
cfg := defaultConfig()
cfg.Limits.NamePatterns = []NamePatternLimitsConfig{
{Pattern: "["},
}

err := cfg.Validate()
if err == nil || !strings.Contains(err.Error(), "limits.name_patterns[0].pattern") {
t.Fatalf("expected invalid regex validation error, got %v", err)
}
}

func TestValidateRejectsInvalidNamePatternLimitSize(t *testing.T) {
cfg := defaultConfig()
cfg.Limits.NamePatterns = []NamePatternLimitsConfig{
{
Pattern: "^prod-",
MaxMemoryPerInstance: strPtr("definitely-not-a-size"),
},
}

err := cfg.Validate()
if err == nil || !strings.Contains(err.Error(), "limits.name_patterns[0].max_memory_per_instance") {
t.Fatalf("expected invalid size validation error, got %v", err)
}
}

func TestValidateAllowsUnlimitedNamePatternLimitSize(t *testing.T) {
cfg := defaultConfig()
cfg.Limits.NamePatterns = []NamePatternLimitsConfig{
{
Pattern: "^prod-",
MaxMemoryPerInstance: strPtr("0"),
MaxOverlaySize: strPtr("0"),
},
}

if err := cfg.Validate(); err != nil {
t.Fatalf("expected zero-valued size overrides to validate, got %v", err)
}
}

func strPtr(v string) *string {
return &v
}
53 changes: 53 additions & 0 deletions cmd/api/config/limits_patterns.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,53 @@
package config

import (
"fmt"
"regexp"
"strings"
)

// NamePatternLimitsConfig holds per-name regex resource limit overrides.
// The first matching pattern wins. Omitted fields fall back to the global limits block.
type NamePatternLimitsConfig struct {
Pattern string `koanf:"pattern"`
MaxVcpusPerInstance *int `koanf:"max_vcpus_per_instance"`
MaxMemoryPerInstance *string `koanf:"max_memory_per_instance"`
MaxOverlaySize *string `koanf:"max_overlay_size"`
}

func validateNamePatternLimits(patterns []NamePatternLimitsConfig) error {
for i := range patterns {
cfg := &patterns[i]
cfg.Pattern = strings.TrimSpace(cfg.Pattern)
if cfg.Pattern == "" {
return fmt.Errorf("limits.name_patterns[%d].pattern must not be empty", i)
}
if _, err := regexp.Compile(cfg.Pattern); err != nil {
return fmt.Errorf("limits.name_patterns[%d].pattern must be a valid regex, got %q: %w", i, cfg.Pattern, err)
}
if cfg.MaxVcpusPerInstance != nil && *cfg.MaxVcpusPerInstance < 0 {
return fmt.Errorf("limits.name_patterns[%d].max_vcpus_per_instance must be >= 0, got %d", i, *cfg.MaxVcpusPerInstance)
}
if cfg.MaxMemoryPerInstance != nil {
value := strings.TrimSpace(*cfg.MaxMemoryPerInstance)
if value == "" {
return fmt.Errorf("limits.name_patterns[%d].max_memory_per_instance must not be empty", i)
}
*cfg.MaxMemoryPerInstance = value
if err := validateOptionalByteSize(fmt.Sprintf("limits.name_patterns[%d].max_memory_per_instance", i), value); err != nil {
return err
}
}
if cfg.MaxOverlaySize != nil {
value := strings.TrimSpace(*cfg.MaxOverlaySize)
if value == "" {
return fmt.Errorf("limits.name_patterns[%d].max_overlay_size must not be empty", i)
}
*cfg.MaxOverlaySize = value
if err := validateOptionalByteSize(fmt.Sprintf("limits.name_patterns[%d].max_overlay_size", i), value); err != nil {
return err
}
}
}
return nil
}
6 changes: 6 additions & 0 deletions config.example.darwin.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -115,6 +115,12 @@ limits:
max_vcpus_per_instance: 4
max_memory_per_instance: 8GB
# max_total_volume_storage: "" # 0 or empty = unlimited
# name_patterns:
# - pattern: '^build-'
# max_vcpus_per_instance: 8
# - pattern: '^tiny-'
# max_memory_per_instance: 2GB
# max_overlay_size: 5GB

# =============================================================================
# OpenTelemetry (optional, same as Linux)
Expand Down
6 changes: 6 additions & 0 deletions config.example.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -146,3 +146,9 @@ data_dir: /var/lib/hypeman
# max_total_volume_storage: "" # 0 or empty = unlimited
# max_concurrent_builds: 1
# max_overlay_size: 100GB
# name_patterns:
# - pattern: '^prod-'
# max_vcpus_per_instance: 32
# max_memory_per_instance: 64GB
# - pattern: '^sandbox-'
# max_overlay_size: 20GB
22 changes: 22 additions & 0 deletions lib/instances/admission.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,22 @@
package instances

func volumeOverlayReservationBytes(volumes []VolumeAttachment) int64 {
var total int64
for _, vol := range volumes {
if vol.Overlay {
total += vol.OverlaySize
}
}
return total
}

func requestedDiskReservationBytes(overlaySize int64, volumes []VolumeAttachment) int64 {
return overlaySize + volumeOverlayReservationBytes(volumes)
}

func storedDiskReservationBytes(stored *StoredMetadata) int64 {
if stored == nil {
return 0
}
return requestedDiskReservationBytes(stored.OverlaySize, stored.Volumes)
}
33 changes: 33 additions & 0 deletions lib/instances/admission_test.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,33 @@
package instances

import (
"testing"

"github.com/stretchr/testify/assert"
)

func TestRequestedDiskReservationBytes(t *testing.T) {
t.Parallel()

diskBytes := requestedDiskReservationBytes(10, []VolumeAttachment{
{VolumeID: "base-only", Overlay: false, OverlaySize: 100},
{VolumeID: "overlay-a", Overlay: true, OverlaySize: 20},
{VolumeID: "overlay-b", Overlay: true, OverlaySize: 30},
})

assert.Equal(t, int64(60), diskBytes)
}

func TestStoredDiskReservationBytes(t *testing.T) {
t.Parallel()

diskBytes := storedDiskReservationBytes(&StoredMetadata{
OverlaySize: 15,
Volumes: []VolumeAttachment{
{VolumeID: "base-only", Overlay: false, OverlaySize: 100},
{VolumeID: "overlay", Overlay: true, OverlaySize: 25},
},
})

assert.Equal(t, int64(40), diskBytes)
}
31 changes: 18 additions & 13 deletions lib/instances/create.go
Original file line number Diff line number Diff line change
Expand Up @@ -164,31 +164,32 @@ func (m *manager) createInstance(
if overlaySize == 0 {
overlaySize = 10 * 1024 * 1024 * 1024 // 10GB default
}
// Validate overlay size against max
if overlaySize > m.limits.MaxOverlaySize {
return nil, fmt.Errorf("overlay size %d exceeds maximum allowed size %d", overlaySize, m.limits.MaxOverlaySize)
}
vcpus := req.Vcpus
if vcpus == 0 {
vcpus = 2
}

// Validate per-instance resource limits
if m.limits.MaxVcpusPerInstance > 0 && vcpus > m.limits.MaxVcpusPerInstance {
return nil, fmt.Errorf("vcpus %d exceeds maximum allowed %d per instance", vcpus, m.limits.MaxVcpusPerInstance)
}
totalMemory := size + hotplugSize
if m.limits.MaxMemoryPerInstance > 0 && totalMemory > m.limits.MaxMemoryPerInstance {
return nil, fmt.Errorf("total memory %d (size + hotplug_size) exceeds maximum allowed %d per instance", totalMemory, m.limits.MaxMemoryPerInstance)
if err := validateResourceLimitsForName(req.Name, m.limits, overlaySize, vcpus, totalMemory); err != nil {
return nil, err
}

// Validate aggregate resource limits via ResourceValidator (if configured)
diskBytes := requestedDiskReservationBytes(overlaySize, req.Volumes)
reservedResources := false

// Reserve aggregate resources for this create while it is in flight.
if m.resourceValidator != nil {
needsGPU := req.GPU != nil && req.GPU.Profile != ""
if err := m.resourceValidator.ValidateAllocation(ctx, vcpus, totalMemory, req.NetworkBandwidthDownload, req.NetworkBandwidthUpload, req.DiskIOBps, needsGPU); err != nil {
log.ErrorContext(ctx, "resource validation failed", "error", err)
if err := m.resourceValidator.ReserveAllocation(ctx, id, vcpus, totalMemory, req.NetworkBandwidthDownload, req.NetworkBandwidthUpload, req.DiskIOBps, diskBytes, needsGPU); err != nil {
log.ErrorContext(ctx, "resource reservation failed", "error", err)
return nil, fmt.Errorf("%w: %v", ErrInsufficientResources, err)
}
reservedResources = true
defer func() {
if reservedResources {
m.resourceValidator.FinishAllocation(id)
}
}()
}

if req.Env == nil {
Expand Down Expand Up @@ -492,6 +493,10 @@ func (m *manager) createInstance(
return nil, err
}
startVMSpanEnd(nil)
if reservedResources {
m.resourceValidator.FinishAllocation(id)
reservedResources = false
}

// 20. Persist runtime metadata updates after VM boot.
meta = &metadata{StoredMetadata: *stored}
Expand Down
3 changes: 3 additions & 0 deletions lib/instances/fork.go
Original file line number Diff line number Diff line change
Expand Up @@ -297,6 +297,9 @@ func (m *manager) forkInstanceFromStoppedOrStandby(ctx context.Context, id strin
forkMeta.IP = ""
forkMeta.MAC = ""
}
if err := validateResourceLimitsForName(req.Name, m.limits, forkMeta.OverlaySize, forkMeta.Vcpus, forkMeta.Size+forkMeta.HotplugSize); err != nil {
return nil, err
}

if source.State == StateStandby {
snapshotConfigPath := m.paths.InstanceSnapshotConfig(forkID)
Expand Down
10 changes: 8 additions & 2 deletions lib/instances/manager.go
Original file line number Diff line number Diff line change
Expand Up @@ -74,17 +74,23 @@ type ImageUsageRecorderSetter interface {

// ResourceLimits contains configurable resource limits for instances
type ResourceLimits struct {
MaxOverlaySize int64 // Maximum overlay disk size in bytes per instance
MaxOverlaySize int64 // Maximum overlay disk size in bytes per instance (0 = unlimited)
MaxVcpusPerInstance int // Maximum vCPUs per instance (0 = unlimited)
MaxMemoryPerInstance int64 // Maximum memory in bytes per instance (0 = unlimited)
NamePatterns []NamedResourceLimit
}

// ResourceValidator validates if resources can be allocated
type ResourceValidator interface {
// ValidateAllocation checks if the requested resources are available.
// Returns nil if allocation is allowed, or a detailed error describing
// which resource is insufficient and the current capacity/usage.
ValidateAllocation(ctx context.Context, vcpus int, memoryBytes int64, networkDownloadBps int64, networkUploadBps int64, diskIOBps int64, needsGPU bool) error
ValidateAllocation(ctx context.Context, vcpus int, memoryBytes int64, networkDownloadBps int64, networkUploadBps int64, diskIOBps int64, diskBytes int64, needsGPU bool) error
// ReserveAllocation tentatively reserves resources for an in-flight operation.
// Call FinishAllocation once the operation fails or becomes visible to resource accounting.
ReserveAllocation(ctx context.Context, instanceID string, vcpus int, memoryBytes int64, networkDownloadBps int64, networkUploadBps int64, diskIOBps int64, diskBytes int64, needsGPU bool) error
// FinishAllocation removes any pending reservation for the given instance ID.
FinishAllocation(instanceID string)
}

type manager struct {
Expand Down
Loading
Loading