Skip to content

Commit 5bc10ad

Browse files
committed
Harden burst-proof accounting paths
1 parent ee9468e commit 5bc10ad

File tree

7 files changed

+119
-9
lines changed

7 files changed

+119
-9
lines changed

lib/images/disk_usage.go

Lines changed: 60 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -9,29 +9,52 @@ import (
99

1010
// totalReadyImageBytesFromMetadata sums ready image sizes directly from metadata.json files.
1111
// This is conservative for admission control and disk accounting: if metadata says an
12-
// image is ready, we count its recorded size without re-validating the rootfs path.
12+
// image is ready, we count its recorded size without re-validating the rootfs path. If
13+
// the metadata file is unreadable or malformed, we fall back to counting any rootfs disk
14+
// files found in the digest directory so we do not undercount host disk usage.
1315
func totalReadyImageBytesFromMetadata(imagesDir string) (int64, error) {
1416
var total int64
1517

1618
err := filepath.Walk(imagesDir, func(path string, info os.FileInfo, err error) error {
1719
if err != nil {
18-
return nil
20+
if os.IsNotExist(err) {
21+
return nil
22+
}
23+
return err
1924
}
2025
if info.IsDir() || info.Name() != "metadata.json" {
2126
return nil
2227
}
2328

2429
data, err := os.ReadFile(path)
2530
if err != nil {
26-
return nil
31+
rootfsBytes, fallbackErr := totalRootfsBytesInDigestDir(filepath.Dir(path))
32+
if fallbackErr == nil {
33+
total += rootfsBytes
34+
return nil
35+
}
36+
return fmt.Errorf("read image metadata %s: %w", path, err)
2737
}
2838

2939
var meta imageMetadata
3040
if err := json.Unmarshal(data, &meta); err != nil {
31-
return nil
41+
rootfsBytes, fallbackErr := totalRootfsBytesInDigestDir(filepath.Dir(path))
42+
if fallbackErr == nil {
43+
total += rootfsBytes
44+
return nil
45+
}
46+
return fmt.Errorf("unmarshal image metadata %s: %w", path, err)
3247
}
3348
if meta.Status == StatusReady && meta.SizeBytes > 0 {
3449
total += meta.SizeBytes
50+
return nil
51+
}
52+
if meta.Status == StatusReady {
53+
rootfsBytes, err := totalRootfsBytesInDigestDir(filepath.Dir(path))
54+
if err != nil {
55+
return fmt.Errorf("stat ready image rootfs for %s: %w", path, err)
56+
}
57+
total += rootfsBytes
3558
}
3659
return nil
3760
})
@@ -50,7 +73,10 @@ func totalOCICacheBlobBytesFromFilesystem(blobDir string) (int64, error) {
5073

5174
err := filepath.Walk(blobDir, func(path string, info os.FileInfo, err error) error {
5275
if err != nil {
53-
return nil
76+
if os.IsNotExist(err) {
77+
return nil
78+
}
79+
return err
5480
}
5581
if info.IsDir() {
5682
return nil
@@ -117,3 +143,32 @@ func (m *manager) computeDiskUsageTotals() (int64, int64, error) {
117143
}
118144
return readyImageBytes, ociCacheBytes, nil
119145
}
146+
147+
func totalRootfsBytesInDigestDir(digestDir string) (int64, error) {
148+
rootfsPaths, err := filepath.Glob(filepath.Join(digestDir, "rootfs.*"))
149+
if err != nil {
150+
return 0, err
151+
}
152+
if len(rootfsPaths) == 0 {
153+
return 0, os.ErrNotExist
154+
}
155+
156+
var total int64
157+
for _, rootfsPath := range rootfsPaths {
158+
info, err := os.Stat(rootfsPath)
159+
if err != nil {
160+
if os.IsNotExist(err) {
161+
continue
162+
}
163+
return 0, err
164+
}
165+
if info.IsDir() {
166+
continue
167+
}
168+
total += info.Size()
169+
}
170+
if total == 0 {
171+
return 0, os.ErrNotExist
172+
}
173+
return total, nil
174+
}

lib/images/disk_usage_test.go

Lines changed: 37 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,37 @@
1+
package images
2+
3+
import (
4+
"os"
5+
"path/filepath"
6+
"testing"
7+
8+
"github.com/stretchr/testify/require"
9+
)
10+
11+
func TestTotalReadyImageBytesFromMetadata_UsesRootfsFallbackForMalformedMetadata(t *testing.T) {
12+
t.Parallel()
13+
14+
imagesDir := t.TempDir()
15+
digestDir := filepath.Join(imagesDir, "docker.io", "library", "alpine", "sha256deadbeef")
16+
require.NoError(t, os.MkdirAll(digestDir, 0o755))
17+
require.NoError(t, os.WriteFile(filepath.Join(digestDir, "metadata.json"), []byte("{not-json"), 0o644))
18+
require.NoError(t, os.WriteFile(filepath.Join(digestDir, "rootfs.erofs"), []byte("rootfs-data"), 0o644))
19+
20+
total, err := totalReadyImageBytesFromMetadata(imagesDir)
21+
require.NoError(t, err)
22+
require.Equal(t, int64(len("rootfs-data")), total)
23+
}
24+
25+
func TestTotalReadyImageBytesFromMetadata_UsesRootfsFallbackForReadyImageWithoutSize(t *testing.T) {
26+
t.Parallel()
27+
28+
imagesDir := t.TempDir()
29+
digestDir := filepath.Join(imagesDir, "docker.io", "library", "alpine", "sha256deadbeef")
30+
require.NoError(t, os.MkdirAll(digestDir, 0o755))
31+
require.NoError(t, os.WriteFile(filepath.Join(digestDir, "metadata.json"), []byte(`{"status":"ready","size_bytes":0}`), 0o644))
32+
require.NoError(t, os.WriteFile(filepath.Join(digestDir, "rootfs.erofs"), []byte("another-rootfs"), 0o644))
33+
34+
total, err := totalReadyImageBytesFromMetadata(imagesDir)
35+
require.NoError(t, err)
36+
require.Equal(t, int64(len("another-rootfs")), total)
37+
}

lib/instances/admission_allocations.go

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -47,6 +47,9 @@ func (m *manager) ensureAdmissionAllocations() error {
4747
if err != nil {
4848
continue
4949
}
50+
// Cold-start cache hydration uses socket existence as the ground-truth
51+
// signal for "currently active" because persisted metadata can outlive a
52+
// crashed or already-stopped hypervisor process.
5053
allocations[id] = m.allocationFromStoredMetadata(&meta.StoredMetadata, admissionSocketActive(&meta.StoredMetadata))
5154
}
5255

@@ -71,6 +74,9 @@ func (m *manager) syncAdmissionAllocation(meta *metadata) {
7174
m.admissionAllocations = make(map[string]resources.InstanceAllocation)
7275
}
7376

77+
// Incremental cache updates trust metadata because lifecycle paths update
78+
// visibility explicitly once boot/restore has succeeded, avoiding repeated
79+
// filesystem/socket probes on the hot path.
7480
m.admissionAllocations[meta.Id] = m.allocationFromStoredMetadata(&meta.StoredMetadata, admissionMetadataActive(&meta.StoredMetadata))
7581
}
7682

lib/instances/create.go

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -511,6 +511,10 @@ func (m *manager) createInstance(
511511
return nil, err
512512
}
513513
startVMSpanEnd(nil)
514+
// Mark the instance visible before releasing its pending reservation so we
515+
// never create an undercount window. The tiny overlap is intentionally
516+
// over-conservative: concurrent admissions may briefly see both visible and
517+
// pending usage for this instance, but they will not oversubscribe the host.
514518
m.setAdmissionAllocationActive(stored, true)
515519
if reservedResources {
516520
m.resourceValidator.FinishAllocation(id)

lib/instances/restore.go

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -261,6 +261,10 @@ func (m *manager) restoreInstance(
261261
return nil, fmt.Errorf("resume vm failed: %w", err)
262262
}
263263
resumeSpanEnd(nil)
264+
// Mark the instance visible before releasing its pending reservation so we
265+
// never create an undercount window. The tiny overlap is intentionally
266+
// over-conservative: concurrent admissions may briefly see both visible and
267+
// pending usage for this instance, but they will not oversubscribe the host.
264268
m.setAdmissionAllocationActive(stored, true)
265269
if reservedResources {
266270
m.resourceValidator.FinishAllocation(id)

lib/instances/start.go

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -196,6 +196,10 @@ func (m *manager) startInstance(
196196
return nil, err
197197
}
198198
startVMSpanEnd(nil)
199+
// Mark the instance visible before releasing its pending reservation so we
200+
// never create an undercount window. The tiny overlap is intentionally
201+
// over-conservative: concurrent admissions may briefly see both visible and
202+
// pending usage for this instance, but they will not oversubscribe the host.
199203
m.setAdmissionAllocationActive(stored, true)
200204
if reservedResources {
201205
m.resourceValidator.FinishAllocation(id)

lib/resources/resource.go

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -443,8 +443,8 @@ func (m *Manager) GetFullStatus(ctx context.Context) (*FullResourceStatus, error
443443

444444
// CanAllocate checks if the requested amount can be allocated for a resource type.
445445
func (m *Manager) CanAllocate(ctx context.Context, rt ResourceType, amount int64) (bool, error) {
446-
m.mu.Lock()
447-
defer m.mu.Unlock()
446+
m.mu.RLock()
447+
defer m.mu.RUnlock()
448448

449449
usage, err := m.collectAdmissionUsageLocked(ctx)
450450
if err != nil {
@@ -713,8 +713,8 @@ func (m *Manager) validateAllocationLocked(ctx context.Context, excludeID string
713713
// Returns nil if allocation is allowed, or a detailed error describing
714714
// which resource is insufficient and the current capacity/usage.
715715
func (m *Manager) ValidateAllocation(ctx context.Context, vcpus int, memoryBytes int64, networkDownloadBps int64, networkUploadBps int64, diskIOBps int64, diskBytes int64, needsGPU bool) error {
716-
m.mu.Lock()
717-
defer m.mu.Unlock()
716+
m.mu.RLock()
717+
defer m.mu.RUnlock()
718718

719719
req := newPendingAllocation(vcpus, memoryBytes, networkDownloadBps, networkUploadBps, diskIOBps, diskBytes, needsGPU)
720720
return m.validateAllocationLocked(ctx, "", req)

0 commit comments

Comments
 (0)