|
74 | 74 | import java.sql.SQLException; |
75 | 75 | import java.sql.Timestamp; |
76 | 76 | import java.util.ArrayList; |
| 77 | +import java.util.HashSet; |
77 | 78 | import java.util.List; |
78 | 79 | import java.util.Map; |
79 | 80 | import java.util.Set; |
@@ -107,6 +108,15 @@ public class ManagementNodeManagerImpl extends AbstractService implements Manage |
107 | 108 | // A dictionary (nodeId -> ManagementNodeInventory) of joined management Node |
108 | 109 | final private Map<String, ManagementNodeInventory> joinedManagementNodes = new ConcurrentHashMap<>(); |
109 | 110 |
|
| 111 | + // Lock to serialize lifecycle events from heartbeat reconciliation and canonical event callbacks, |
| 112 | + // preventing race conditions where a nodeJoin event is immediately followed by a stale nodeLeft |
| 113 | + // from the heartbeat thread, or vice versa. See ZSTAC-77711. |
| 114 | + private final Object lifecycleLock = new Object(); |
| 115 | + |
| 116 | + // Track nodes found in hash ring but missing from DB. Only call nodeLeft after a node |
| 117 | + // is missing for two consecutive heartbeat cycles, to avoid removing nodes that just joined. |
| 118 | + private final Set<String> suspectedMissingFromDb = new HashSet<>(); |
| 119 | + |
110 | 120 | private static int NODE_STARTING = 0; |
111 | 121 | private static int NODE_RUNNING = 1; |
112 | 122 | private static int NODE_FAILED = -1; |
@@ -368,12 +378,16 @@ protected void run(Map tokens, Object data) { |
368 | 378 |
|
369 | 379 | ManagementNodeLifeCycleData d = (ManagementNodeLifeCycleData) data; |
370 | 380 |
|
371 | | - if (LifeCycle.NodeJoin.toString().equals(d.getLifeCycle())) { |
372 | | - nodeLifeCycle.nodeJoin(d.getInventory()); |
373 | | - } else if (LifeCycle.NodeLeft.toString().equals(d.getLifeCycle())) { |
374 | | - nodeLifeCycle.nodeLeft(d.getInventory()); |
375 | | - } else { |
376 | | - throw new CloudRuntimeException(String.format("unknown lifecycle[%s]", d.getLifeCycle())); |
| 381 | + synchronized (lifecycleLock) { |
| 382 | + if (LifeCycle.NodeJoin.toString().equals(d.getLifeCycle())) { |
| 383 | + // Clear from suspected set since the node is confirmed alive |
| 384 | + suspectedMissingFromDb.remove(d.getInventory().getUuid()); |
| 385 | + nodeLifeCycle.nodeJoin(d.getInventory()); |
| 386 | + } else if (LifeCycle.NodeLeft.toString().equals(d.getLifeCycle())) { |
| 387 | + nodeLifeCycle.nodeLeft(d.getInventory()); |
| 388 | + } else { |
| 389 | + throw new CloudRuntimeException(String.format("unknown lifecycle[%s]", d.getLifeCycle())); |
| 390 | + } |
377 | 391 | } |
378 | 392 | } |
379 | 393 | }; |
@@ -860,34 +874,55 @@ private void checkAllNodesHealth() { |
860 | 874 |
|
861 | 875 | Set<String> nodeUuidsInDb = nodesInDb.stream().map(ManagementNodeVO::getUuid).collect(Collectors.toSet()); |
862 | 876 |
|
863 | | - // When a node is dying, we may not receive the the dead notification because the message bus may be also dead |
864 | | - // at that moment. By checking if the node UUID is still in our hash ring, we know what nodes should be kicked out |
865 | | - destinationMaker.getManagementNodesInHashRing().forEach(nodeUuid -> { |
866 | | - if (!nodeUuidsInDb.contains(nodeUuid)) { |
867 | | - logger.warn(String.format("found that a management node[uuid:%s] had no heartbeat in database but still in our hash ring," + |
868 | | - "notify that it's dead", nodeUuid)); |
869 | | - ManagementNodeInventory inv = new ManagementNodeInventory(); |
870 | | - inv.setUuid(nodeUuid); |
871 | | - inv.setHostName(destinationMaker.getNodeInfo(nodeUuid).getNodeIP()); |
872 | | - |
873 | | - nodeLifeCycle.nodeLeft(inv); |
874 | | - } |
875 | | - }); |
876 | | - |
877 | | - // check if any node missing in our hash ring |
878 | | - nodesInDb.forEach(n -> { |
879 | | - if (n.getUuid().equals(node().getUuid()) || suspects.contains(n)) { |
880 | | - return; |
881 | | - } |
882 | | - |
883 | | - new Runnable() { |
884 | | - @Override |
885 | | - @AsyncThread |
886 | | - public void run() { |
887 | | - nodeLifeCycle.nodeJoin(ManagementNodeInventory.valueOf(n)); |
| 877 | + // Reconcile hash ring with DB under lifecycleLock to prevent race with |
| 878 | + // canonical event callbacks (nodeJoin/nodeLeft). See ZSTAC-77711. |
| 879 | + synchronized (lifecycleLock) { |
| 880 | + // When a node is dying, we may not receive the dead notification because the message bus may be also dead |
| 881 | + // at that moment. By checking if the node UUID is still in our hash ring, we know what nodes should be kicked out. |
| 882 | + // Use two-round confirmation: first round marks as suspected, second round actually removes. |
| 883 | + Set<String> currentSuspected = new HashSet<>(); |
| 884 | + destinationMaker.getManagementNodesInHashRing().forEach(nodeUuid -> { |
| 885 | + if (!nodeUuidsInDb.contains(nodeUuid)) { |
| 886 | + if (suspectedMissingFromDb.contains(nodeUuid)) { |
| 887 | + // Second consecutive detection — confirmed missing, remove from hash ring |
| 888 | + logger.warn(String.format("management node[uuid:%s] confirmed missing from database for two consecutive" + |
| 889 | + " heartbeat cycles, removing from hash ring", nodeUuid)); |
| 890 | + ManagementNodeInventory inv = new ManagementNodeInventory(); |
| 891 | + inv.setUuid(nodeUuid); |
| 892 | + try { |
| 893 | + inv.setHostName(destinationMaker.getNodeInfo(nodeUuid).getNodeIP()); |
| 894 | + } catch (Exception e) { |
| 895 | + logger.warn(String.format("cannot get node info for node[uuid:%s], use empty hostname", nodeUuid)); |
| 896 | + } |
| 897 | + |
| 898 | + nodeLifeCycle.nodeLeft(inv); |
| 899 | + } else { |
| 900 | + // First detection — mark as suspected, defer removal to next cycle |
| 901 | + logger.warn(String.format("management node[uuid:%s] not found in database but still in hash ring," + |
| 902 | + " marking as suspected (will remove on next heartbeat if still missing)", nodeUuid)); |
| 903 | + currentSuspected.add(nodeUuid); |
| 904 | + } |
888 | 905 | } |
889 | | - }.run(); |
890 | | - }); |
| 906 | + }); |
| 907 | + // Update suspected set: only keep nodes that are newly suspected this round |
| 908 | + suspectedMissingFromDb.clear(); |
| 909 | + suspectedMissingFromDb.addAll(currentSuspected); |
| 910 | + |
| 911 | + // check if any node missing in our hash ring |
| 912 | + nodesInDb.forEach(n -> { |
| 913 | + if (n.getUuid().equals(node().getUuid()) || suspects.contains(n)) { |
| 914 | + return; |
| 915 | + } |
| 916 | + |
| 917 | + new Runnable() { |
| 918 | + @Override |
| 919 | + @AsyncThread |
| 920 | + public void run() { |
| 921 | + nodeLifeCycle.nodeJoin(ManagementNodeInventory.valueOf(n)); |
| 922 | + } |
| 923 | + }.run(); |
| 924 | + }); |
| 925 | + } |
891 | 926 | } |
892 | 927 |
|
893 | 928 | @Override |
|
0 commit comments