Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 3 additions & 2 deletions lib/hypervisor/firecracker/config.go
Original file line number Diff line number Diff line change
Expand Up @@ -101,8 +101,9 @@ type instanceInfo struct {
}

type restoreMetadata struct {
NetworkOverrides []networkOverride `json:"network_overrides,omitempty"`
SnapshotSourceDataDir string `json:"snapshot_source_data_dir,omitempty"`
NetworkOverrides []networkOverride `json:"network_overrides,omitempty"`
SnapshotSourceDataDir string `json:"snapshot_source_data_dir,omitempty"`
RetainSnapshotSourceDataDirAlias bool `json:"retain_snapshot_source_data_dir_alias,omitempty"`
}

func toBootSource(cfg hypervisor.VMConfig) bootSource {
Expand Down
25 changes: 22 additions & 3 deletions lib/hypervisor/firecracker/fork.go
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,8 @@ package firecracker

import (
"context"
"fmt"
"os"
"path/filepath"

"github.com/kernel/hypeman/lib/hypervisor"
Expand Down Expand Up @@ -48,9 +50,26 @@ func (s *Starter) PrepareFork(ctx context.Context, req hypervisor.ForkPrepareReq
}
}
if req.SourceDataDir != "" && req.TargetDataDir != "" && req.SourceDataDir != req.TargetDataDir {
if meta.SnapshotSourceDataDir != req.SourceDataDir {
meta.SnapshotSourceDataDir = req.SourceDataDir
changed = true
if meta.RetainSnapshotSourceDataDirAlias && meta.SnapshotSourceDataDir != "" {
// Keep the upstream source path for snapshot-derived forks. The retained
// Firecracker base can still reference that path after later diff snapshots.
} else {
retainAlias := false
if _, err := os.Stat(req.SourceDataDir); err != nil {
if os.IsNotExist(err) {
retainAlias = true
} else {
return hypervisor.ForkPrepareResult{}, fmt.Errorf("stat snapshot source data dir %q: %w", req.SourceDataDir, err)
}
}
if meta.SnapshotSourceDataDir != req.SourceDataDir {
meta.SnapshotSourceDataDir = req.SourceDataDir
changed = true
}
if meta.RetainSnapshotSourceDataDirAlias != retainAlias {
meta.RetainSnapshotSourceDataDirAlias = retainAlias
changed = true
}
}
}

Expand Down
96 changes: 96 additions & 0 deletions lib/hypervisor/firecracker/fork_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -40,4 +40,100 @@ func TestPrepareFork_SnapshotRewritePersistsRestoreMetadata(t *testing.T) {
require.Len(t, meta.NetworkOverrides, 1)
assert.Equal(t, "tap-new", meta.NetworkOverrides[0].HostDevName)
assert.Equal(t, filepath.Join(tmp, "source"), meta.SnapshotSourceDataDir)
assert.True(t, meta.RetainSnapshotSourceDataDirAlias)
}

func TestPrepareFork_DoesNotRetainExistingSourceAlias(t *testing.T) {
starter := NewStarter()
tmp := t.TempDir()
sourceDir := filepath.Join(tmp, "source")
targetDir := filepath.Join(tmp, "target")
require.NoError(t, os.MkdirAll(sourceDir, 0755))
require.NoError(t, os.MkdirAll(targetDir, 0755))
require.NoError(t, saveRestoreMetadata(targetDir, nil))

_, err := starter.PrepareFork(context.Background(), hypervisor.ForkPrepareRequest{
SnapshotConfigPath: filepath.Join(targetDir, "snapshots", "snapshot-latest", "config.json"),
SourceDataDir: sourceDir,
TargetDataDir: targetDir,
})
require.NoError(t, err)

meta, err := loadRestoreMetadata(targetDir)
require.NoError(t, err)
assert.Equal(t, sourceDir, meta.SnapshotSourceDataDir)
assert.False(t, meta.RetainSnapshotSourceDataDirAlias)
}

func TestPrepareFork_ReturnsSourceStatErrors(t *testing.T) {
starter := NewStarter()
tmp := t.TempDir()
targetDir := filepath.Join(tmp, "target")
require.NoError(t, os.MkdirAll(targetDir, 0755))
require.NoError(t, saveRestoreMetadata(targetDir, nil))

_, err := starter.PrepareFork(context.Background(), hypervisor.ForkPrepareRequest{
SnapshotConfigPath: filepath.Join(targetDir, "snapshots", "snapshot-latest", "config.json"),
SourceDataDir: filepath.Join(tmp, "source") + "\x00",
TargetDataDir: targetDir,
})
require.Error(t, err)
assert.Contains(t, err.Error(), "stat snapshot source data dir")
}

func TestPrepareFork_PreservesRetainedUpstreamAlias(t *testing.T) {
starter := NewStarter()
tmp := t.TempDir()
upstreamDir := filepath.Join(tmp, "upstream")
sourceDir := filepath.Join(tmp, "source")
targetDir := filepath.Join(tmp, "target")
require.NoError(t, os.MkdirAll(sourceDir, 0755))
require.NoError(t, os.MkdirAll(targetDir, 0755))
require.NoError(t, saveRestoreMetadataState(targetDir, &restoreMetadata{
SnapshotSourceDataDir: upstreamDir,
RetainSnapshotSourceDataDirAlias: true,
}))

_, err := starter.PrepareFork(context.Background(), hypervisor.ForkPrepareRequest{
SnapshotConfigPath: filepath.Join(targetDir, "snapshots", "snapshot-latest", "config.json"),
SourceDataDir: sourceDir,
TargetDataDir: targetDir,
})
require.NoError(t, err)

meta, err := loadRestoreMetadata(targetDir)
require.NoError(t, err)
assert.Equal(t, upstreamDir, meta.SnapshotSourceDataDir)
assert.True(t, meta.RetainSnapshotSourceDataDirAlias)
}

func TestPrepareFork_NetworkRewritePreservesRetainedAlias(t *testing.T) {
starter := NewStarter()
tmp := t.TempDir()
upstreamDir := filepath.Join(tmp, "upstream")
targetDir := filepath.Join(tmp, "target")
require.NoError(t, os.MkdirAll(targetDir, 0755))
require.NoError(t, saveRestoreMetadataState(targetDir, &restoreMetadata{
SnapshotSourceDataDir: upstreamDir,
RetainSnapshotSourceDataDirAlias: true,
NetworkOverrides: []networkOverride{{
IfaceID: "eth0",
HostDevName: "tap-old",
}},
}))

_, err := starter.PrepareFork(context.Background(), hypervisor.ForkPrepareRequest{
SnapshotConfigPath: filepath.Join(targetDir, "snapshots", "snapshot-latest", "config.json"),
Network: &hypervisor.ForkNetworkConfig{
TAPDevice: "tap-new",
},
})
require.NoError(t, err)

meta, err := loadRestoreMetadata(targetDir)
require.NoError(t, err)
require.Len(t, meta.NetworkOverrides, 1)
assert.Equal(t, "tap-new", meta.NetworkOverrides[0].HostDevName)
assert.Equal(t, upstreamDir, meta.SnapshotSourceDataDir)
assert.True(t, meta.RetainSnapshotSourceDataDirAlias)
}
2 changes: 1 addition & 1 deletion lib/hypervisor/firecracker/process.go
Original file line number Diff line number Diff line change
Expand Up @@ -125,7 +125,7 @@ func (s *Starter) RestoreVM(ctx context.Context, p *paths.Paths, version string,
if err != nil {
return 0, nil, fmt.Errorf("load firecracker snapshot: %w", err)
}
if meta.SnapshotSourceDataDir != "" {
if meta.SnapshotSourceDataDir != "" && !meta.RetainSnapshotSourceDataDirAlias {
meta.SnapshotSourceDataDir = ""
if err := saveRestoreMetadataState(filepath.Dir(socketPath), meta); err != nil {
return 0, nil, fmt.Errorf("clear firecracker snapshot source alias metadata: %w", err)
Expand Down
4 changes: 4 additions & 0 deletions lib/hypervisor/qemu/process.go
Original file line number Diff line number Diff line change
Expand Up @@ -466,6 +466,10 @@ func (s *Starter) RestoreVM(ctx context.Context, p *paths.Paths, version string,
}
log.DebugContext(ctx, "VM ready", "duration_ms", time.Since(migrationWaitStart).Milliseconds())

if err := saveVMConfig(filepath.Dir(socketPath), config); err != nil {
return 0, nil, fmt.Errorf("save restored vm config: %w", err)
}

cu.Release()
log.DebugContext(ctx, "QEMU restore complete", "pid", pid, "total_duration_ms", time.Since(startTime).Milliseconds())
return pid, hv, nil
Expand Down
90 changes: 90 additions & 0 deletions lib/instances/firecracker_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,7 @@ import (
"github.com/kernel/hypeman/lib/network"
"github.com/kernel/hypeman/lib/paths"
"github.com/kernel/hypeman/lib/resources"
snapshottest "github.com/kernel/hypeman/lib/snapshot/testsupport"
"github.com/kernel/hypeman/lib/system"
"github.com/kernel/hypeman/lib/volumes"
"github.com/stretchr/testify/assert"
Expand Down Expand Up @@ -557,6 +558,95 @@ func TestFirecrackerSnapshotFeature(t *testing.T) {
})
}

func TestFirecrackerWarmForkChain(t *testing.T) {
t.Parallel()
requireFirecrackerIntegrationPrereqs(t)

mgr, tmpDir := setupTestManagerForFirecrackerNoNetwork(t)
ctx := context.Background()
p := paths.New(tmpDir)

imageManager, err := images.NewManager(p, 1, nil)
require.NoError(t, err)
imageName := integrationTestImageRef(t, "docker.io/library/alpine:latest")
snapshottest.EnsureImageReady(t, ctx, p, imageManager, imageName)

systemManager := system.NewManager(p)
require.NoError(t, systemManager.EnsureSystemFiles(ctx))

source, err := mgr.CreateInstance(ctx, CreateInstanceRequest{
Name: "fc-warm-chain-src",
Image: imageName,
Size: 1024 * 1024 * 1024,
OverlaySize: 1024 * 1024 * 1024,
Vcpus: 1,
NetworkEnabled: false,
Hypervisor: hypervisor.TypeFirecracker,
Cmd: []string{"sleep", "infinity"},
})
require.NoError(t, err)
sourceID := source.Id
sourceDeleted := false
t.Cleanup(func() {
if !sourceDeleted {
_ = mgr.DeleteInstance(context.Background(), sourceID)
}
})

source, err = waitForInstanceState(ctx, mgr, sourceID, StateRunning, integrationTestTimeout(20*time.Second))
require.NoError(t, err)
require.NoError(t, waitForExecAgent(ctx, mgr, sourceID, 30*time.Second))

snapshot, err := mgr.CreateSnapshot(ctx, sourceID, CreateSnapshotRequest{
Kind: SnapshotKindStandby,
Name: "fc-warm-chain-snap",
})
require.NoError(t, err)
require.Equal(t, SnapshotKindStandby, snapshot.Kind)

require.NoError(t, mgr.DeleteInstance(ctx, sourceID))
sourceDeleted = true

warm, err := mgr.ForkSnapshot(ctx, snapshot.Id, ForkSnapshotRequest{
Name: "fc-warm-chain-warm",
TargetState: StateRunning,
})
require.NoError(t, err)
warmID := warm.Id
warmDeleted := false
t.Cleanup(func() {
if !warmDeleted {
_ = mgr.DeleteInstance(context.Background(), warmID)
}
})
warm, err = waitForInstanceState(ctx, mgr, warmID, StateRunning, integrationTestTimeout(20*time.Second))
require.NoError(t, err)
require.NoError(t, waitForExecAgent(ctx, mgr, warmID, 30*time.Second))

child, err := mgr.ForkInstance(ctx, warmID, ForkInstanceRequest{
Name: "fc-warm-chain-child",
FromRunning: true,
TargetState: StateStopped,
})
require.NoError(t, err)
require.Equal(t, StateStopped, child.State)
childID := child.Id
t.Cleanup(func() { _ = mgr.DeleteInstance(context.Background(), childID) })

warm, err = mgr.GetInstance(ctx, warmID)
require.NoError(t, err)
if warm.State != StateRunning {
warm, err = waitForInstanceState(ctx, mgr, warmID, StateRunning, integrationTestTimeout(20*time.Second))
require.NoError(t, err)
}
require.Equal(t, StateRunning, warm.State)
require.NoError(t, waitForExecAgent(ctx, mgr, warmID, 30*time.Second))

require.NoError(t, mgr.DeleteInstance(ctx, warmID))
warmDeleted = true
require.NoError(t, mgr.DeleteSnapshot(ctx, snapshot.Id))
}

// TestFirecrackerForkIsolation verifies CoW isolation between a firecracker
// source's standby snapshot and a fork derived from it. A fork must end up
// with its own mem-file inode (reflink-cloned, not hardlinked) so that
Expand Down
10 changes: 9 additions & 1 deletion lib/instances/fork.go
Original file line number Diff line number Diff line change
Expand Up @@ -68,7 +68,7 @@ func (m *manager) forkInstance(ctx context.Context, id string, req ForkInstanceR
}

forked, forkErr := m.forkInstanceFromStoppedOrStandby(ctx, id, req, true)
if forkErr == nil {
if forkErr == nil && targetState != StateStopped {
if err := m.rotateSourceVsockForRestore(ctx, id, forked.Id); err != nil {
forkErr = fmt.Errorf("prepare source snapshot for restore: %w", err)
if cleanupErr := m.cleanupForkInstanceOnError(ctx, forked.Id); cleanupErr != nil {
Expand Down Expand Up @@ -437,6 +437,14 @@ func (m *manager) applyForkTargetState(ctx context.Context, forkID string, targe
if err := os.RemoveAll(m.paths.InstanceSnapshotLatest(forkID)); err != nil {
return nil, fmt.Errorf("remove fork snapshot: %w", err)
}
meta, err := m.loadMetadata(forkID)
if err != nil {
return nil, fmt.Errorf("load stopped fork metadata: %w", err)
}
meta.StoredMetadata.VsockCID = generateVsockCID(forkID)
if err := m.saveMetadata(meta); err != nil {
return nil, fmt.Errorf("save stopped fork metadata: %w", err)
}
return returnWithReadiness(m.getInstance(ctx, forkID))
}
case StateRunning:
Expand Down
Loading
Loading