Skip to content

Commit b3f652d

Browse files
committed
<fix>[kvm]: preserve skip list on nodeLeft with TTL orphan mechanism
- nodeLeft moves VMs to orphaned skip list with timestamp (not immediate removal) - Periodic cleanup task (60s) removes orphaned entries after timeout (default 10min) - VmTracer double-check: orphaned VMs in Starting/Migrating skip missing-VM handling - New GlobalConfig: kvm.orphanedVmSkipTimeout - Prevents split-brain when MN restarts while VMs starting on kvmagent Resolves: ZSTAC-80821 Change-Id: Ie56e937941034d29598627dcdf88d61260b06434
1 parent 1d41921 commit b3f652d

3 files changed

Lines changed: 100 additions & 3 deletions

File tree

compute/src/main/java/org/zstack/compute/vm/VmTracer.java

Lines changed: 13 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -116,7 +116,8 @@ private void checkFromManagementServerSide() {
116116
String vmUuid = e.getKey();
117117
VmInstanceState expectedState = e.getValue();
118118
if (expectedState != VmInstanceState.Stopped && expectedState != VmInstanceState.Created && !hostSideStates.containsKey(vmUuid)
119-
&& (vmsToSkipHostSide == null || !vmsToSkipHostSide.contains(vmUuid))) {
119+
&& (vmsToSkipHostSide == null || !vmsToSkipHostSide.contains(vmUuid))
120+
&& !shouldSkipMissingVmHandling(vmUuid, expectedState)) {
120121
handleMissingVm(vmUuid, expectedState);
121122
}
122123
}
@@ -173,6 +174,17 @@ protected Map<String, VmInstanceState> buildManagementServerSideVmStates(String
173174
return mgmtSideStates;
174175
}
175176

177+
/**
178+
* Hook for subclasses to add additional skip logic for missing VM handling.
179+
* @param vmUuid the VM uuid
180+
* @param expectedState the expected state in DB
181+
* @return true if missing VM handling should be skipped
182+
*/
183+
protected boolean shouldSkipMissingVmHandling(String vmUuid, VmInstanceState expectedState) {
184+
// Default: no additional skip logic
185+
return false;
186+
}
187+
176188
protected void reportVmState(final String hostUuid, final Map<String, VmInstanceState> vmStates, final Set<String> vmsToSkipHostSide, final Map<String, VmInstanceState> mgmtSideStates) {
177189
if (logger.isTraceEnabled()) {
178190
for (Map.Entry<String, VmInstanceState> e : vmStates.entrySet()) {

plugin/kvm/src/main/java/org/zstack/kvm/KVMGlobalConfig.java

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -145,4 +145,8 @@ public class KVMGlobalConfig {
145145
@BindResourceConfig({HostVO.class, ClusterVO.class})
146146
public static GlobalConfig MINIMUM_MEMORY_SIZE_BEFORE_START_VM = new GlobalConfig(CATEGORY, "min.free.memory.size");
147147

148+
@GlobalConfigValidation(numberGreaterThan = 0)
149+
@GlobalConfigDef(defaultValue = "600", type = Long.class, description = "timeout in seconds for orphaned VM skip list entries (default 10 minutes)")
150+
public static GlobalConfig ORPHANED_VM_SKIP_TIMEOUT = new GlobalConfig(CATEGORY, "orphanedVmSkipTimeout");
151+
148152
}

plugin/kvm/src/main/java/org/zstack/kvm/KvmVmSyncPingTask.java

Lines changed: 83 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -12,6 +12,7 @@
1212
import org.zstack.core.thread.ChainTask;
1313
import org.zstack.core.thread.SyncTaskChain;
1414
import org.zstack.core.thread.ThreadFacade;
15+
import org.zstack.core.thread.PeriodicTask;
1516
import org.zstack.header.Component;
1617
import org.zstack.header.apimediator.ApiMediatorConstant;
1718
import org.zstack.header.core.Completion;
@@ -39,6 +40,7 @@
3940

4041
import java.util.*;
4142
import java.util.concurrent.ConcurrentHashMap;
43+
import java.util.concurrent.TimeUnit;
4244

4345
import static org.zstack.core.Platform.getReflections;
4446
import static org.zstack.core.Platform.operr;
@@ -65,6 +67,9 @@ public class KvmVmSyncPingTask extends VmTracer implements KVMPingAgentNoFailure
6567
// A map from apiId to VM instance uuid
6668
private ConcurrentHashMap<String, ConcurrentHashMap<String, String>> vmApis = new ConcurrentHashMap<>();
6769
private ConcurrentHashMap<String, Set<String>> vmsToSkip = new ConcurrentHashMap<>();
70+
// Orphaned skip list: VMs whose management node left but might still be starting
71+
// Map: vmUuid -> timestamp when orphaned
72+
private ConcurrentHashMap<String, Long> orphanedVmsToSkip = new ConcurrentHashMap<>();
6873
private List<Class<? extends Message>> skipVmTracerMessages = new ArrayList<>();
6974
private List<Class> skipVmTracerReplies = new ArrayList<>();
7075
private Map<String, Integer> vmInShutdownMap = new ConcurrentHashMap<>();
@@ -142,12 +147,16 @@ protected void run(Map tokens, Object data) {
142147
String vmUuid = vmApis.get(data1.getManagementNodeId()).remove(data1.getApiId());
143148
logger.info("Continuing tracing VM: " + vmUuid);
144149
vmsToSkip.get(data1.getManagementNodeId()).remove(vmUuid);
150+
// Also remove from orphaned list if present
151+
orphanedVmsToSkip.remove(vmUuid);
145152
return;
146153
}
147154

148155
if (data1.getVmUuid() != null) {
149156
logger.info("Continuing tracing VM: " + data1.getVmUuid());
150157
vmsToSkip.get(data1.getManagementNodeId()).remove(data1.getVmUuid());
158+
// Also remove from orphaned list if present
159+
orphanedVmsToSkip.remove(data1.getVmUuid());
151160
}
152161
}
153162
});
@@ -307,8 +316,45 @@ public HypervisorType getHypervisorTypeForReestablishExtensionPoint() {
307316
return HypervisorType.valueOf(KVMConstant.KVM_HYPERVISOR_TYPE);
308317
}
309318

319+
private void startOrphanedVmCleanupTask() {
320+
thdf.submitPeriodicTask(new PeriodicTask() {
321+
@Override
322+
public TimeUnit getTimeUnit() {
323+
return TimeUnit.SECONDS;
324+
}
325+
326+
@Override
327+
public long getInterval() {
328+
return 60; // Check every 60 seconds
329+
}
330+
331+
@Override
332+
public String getName() {
333+
return "orphaned-vm-skip-list-cleanup";
334+
}
335+
336+
@Override
337+
public void run() {
338+
long now = System.currentTimeMillis();
339+
long timeoutMs = KVMGlobalConfig.ORPHANED_VM_SKIP_TIMEOUT.value(Long.class) * 1000;
340+
341+
orphanedVmsToSkip.entrySet().removeIf(entry -> {
342+
long age = now - entry.getValue();
343+
if (age > timeoutMs) {
344+
logger.info(String.format("Removing VM[uuid:%s] from orphaned skip list after %d seconds",
345+
entry.getKey(), age / 1000));
346+
return true;
347+
}
348+
return false;
349+
});
350+
}
351+
});
352+
}
353+
310354
@Override
311355
public boolean start() {
356+
startOrphanedVmCleanupTask();
357+
312358
restf.registerSyncHttpCallHandler(KVMConstant.KVM_REPORT_VM_STATE, ReportVmStateCmd.class, new SyncHttpCallHandler<ReportVmStateCmd>() {
313359
private void reportState(final ReportVmStateCmd cmd) {
314360
thdf.chainSubmit(new ChainTask(null) {
@@ -446,7 +492,18 @@ public void nodeJoin(ManagementNodeInventory inv) {
446492
@Override
447493
public void nodeLeft(ManagementNodeInventory inv) {
448494
vmApis.remove(inv.getUuid());
449-
vmsToSkip.remove(inv.getUuid());
495+
496+
// Don't immediately remove skip list - move to orphaned set with timestamp
497+
// This prevents split-brain when MN restarts and VMs are still starting on kvmagent
498+
Set<String> vmsToOrphan = vmsToSkip.remove(inv.getUuid());
499+
if (vmsToOrphan != null && !vmsToOrphan.isEmpty()) {
500+
long now = System.currentTimeMillis();
501+
for (String vmUuid : vmsToOrphan) {
502+
orphanedVmsToSkip.put(vmUuid, now);
503+
logger.info(String.format("VM[uuid:%s] moved to orphaned skip list due to management node[uuid:%s] left",
504+
vmUuid, inv.getUuid()));
505+
}
506+
}
450507
}
451508

452509
@Override
@@ -460,6 +517,30 @@ public void iJoin(ManagementNodeInventory inv) {
460517
}
461518

462519
public boolean isVmDoNotNeedToTrace(String vmUuid) {
463-
return vmsToSkip.values().stream().anyMatch(vmsToSkipSet -> vmsToSkipSet.contains(vmUuid));
520+
return vmsToSkip.values().stream().anyMatch(vmsToSkipSet -> vmsToSkipSet.contains(vmUuid))
521+
|| orphanedVmsToSkip.containsKey(vmUuid);
522+
}
523+
524+
protected boolean isVmOrphaned(String vmUuid) {
525+
return orphanedVmsToSkip.containsKey(vmUuid);
526+
}
527+
528+
@Override
529+
protected boolean shouldSkipMissingVmHandling(String vmUuid, VmInstanceState expectedState) {
530+
// Double-check: if VM is orphaned and DB state indicates it's in a transient state,
531+
// skip this trace cycle to avoid split-brain
532+
if (!orphanedVmsToSkip.containsKey(vmUuid)) {
533+
return false;
534+
}
535+
536+
// If the VM is orphaned and in Starting/Migrating state, it might still be
537+
// starting on kvmagent - don't judge it as Stopped yet
538+
if (expectedState == VmInstanceState.Starting || expectedState == VmInstanceState.Migrating) {
539+
logger.info(String.format("VM[uuid:%s] is orphaned and in state[%s], skipping missing VM check to avoid split-brain",
540+
vmUuid, expectedState));
541+
return true;
542+
}
543+
544+
return false;
464545
}
465546
}

0 commit comments

Comments
 (0)