Skip to content

Commit f59ff96

Browse files
committed
Allow cleaning up of networks stuck in Implementing state
1 parent 4348386 commit f59ff96

File tree

2 files changed

+77
-1
lines changed

2 files changed

+77
-1
lines changed

framework/jobs/src/main/java/org/apache/cloudstack/framework/jobs/impl/AsyncJobManagerImpl.java

Lines changed: 76 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -969,6 +969,13 @@ public void reallyRun() {
969969
}
970970

971971
logger.trace("End cleanup expired async-jobs");
972+
973+
// 3) Cleanup orphaned networks stuck in Implementing state without async jobs
974+
try {
975+
cleanupOrphanedNetworks();
976+
} catch (Throwable e) {
977+
logger.error("Unexpected exception when trying to cleanup orphaned networks", e);
978+
}
972979
} catch (Throwable e) {
973980
logger.error("Unexpected exception when trying to execute queue item, ", e);
974981
}
@@ -1284,6 +1291,75 @@ private void cleanupFailedSnapshotsCreatedWithDefaultStrategy(final long msid) {
12841291
}
12851292
}
12861293

1294+
/**
1295+
* Cleanup networks that are stuck in Implementing state without associated async jobs.
1296+
* Similar to how expired unfinished jobs are cleaned up, this only processes networks
1297+
* that have been stuck for longer than the job expiration threshold.
1298+
*/
1299+
private void cleanupOrphanedNetworks() {
1300+
try {
1301+
SearchCriteria<NetworkVO> sc = networkDao.createSearchCriteria();
1302+
sc.addAnd("state", SearchCriteria.Op.EQ, Network.State.Implementing);
1303+
sc.addAnd("removed", SearchCriteria.Op.NULL);
1304+
List<NetworkVO> implementingNetworks = networkDao.search(sc, null);
1305+
1306+
if (implementingNetworks == null || implementingNetworks.isEmpty()) {
1307+
return;
1308+
}
1309+
1310+
logger.debug("Found {} networks in Implementing state, checking for orphaned networks", implementingNetworks.size());
1311+
1312+
final long expireMinutes = JobExpireMinutes.value();
1313+
final Date cutoffTime = new Date(System.currentTimeMillis() - (expireMinutes * 60 * 1000));
1314+
1315+
for (NetworkVO network : implementingNetworks) {
1316+
if (network.getCreated().after(cutoffTime)) {
1317+
logger.trace("Network {} in Implementing state is only {} minutes old (threshold: {} minutes), skipping cleanup",
1318+
network.getId(),
1319+
(System.currentTimeMillis() - network.getCreated().getTime()) / 60000,
1320+
expireMinutes);
1321+
continue;
1322+
}
1323+
1324+
List<AsyncJobVO> jobs = _jobDao.findInstancePendingAsyncJobs("Network", network.getAccountId());
1325+
boolean hasActiveJob = false;
1326+
for (AsyncJobVO job : jobs) {
1327+
if (job.getInstanceId() != null && job.getInstanceId().equals(network.getId())) {
1328+
hasActiveJob = true;
1329+
break;
1330+
}
1331+
}
1332+
1333+
if (hasActiveJob) {
1334+
logger.debug("Network {} in Implementing state has active async job, skipping cleanup", network.getId());
1335+
continue;
1336+
}
1337+
1338+
logger.warn("Found orphaned network {} in Implementing state without async job. " +
1339+
"Network created: {}, age: {} minutes, expiration threshold: {} minutes. Transitioning to Shutdown state.",
1340+
network.getId(), network.getCreated(),
1341+
(System.currentTimeMillis() - network.getCreated().getTime()) / 60000,
1342+
expireMinutes);
1343+
updateNetworkState(network);
1344+
1345+
}
1346+
} catch (Exception e) {
1347+
logger.error("Error while cleaning up orphaned networks", e);
1348+
}
1349+
}
1350+
1351+
private void updateNetworkState(NetworkVO network) {
1352+
try {
1353+
networkOrchestrationService.stateTransitTo(network, Network.Event.OperationFailed);
1354+
logger.info("Successfully transitioned orphaned network {} to Shutdown state using state machine", network.getId());
1355+
} catch (final NoTransitionException e) {
1356+
logger.debug("State transition failed for orphaned network {}, forcing state update", network.getId());
1357+
network.setState(Network.State.Shutdown);
1358+
networkDao.update(network.getId(), network);
1359+
logger.info("Successfully forced orphaned network {} to Shutdown state", network.getId());
1360+
}
1361+
}
1362+
12871363
@Override
12881364
public void onManagementNodeJoined(List<? extends ManagementServerHost> nodeList, long selfNodeId) {
12891365
}

tools/marvin/setup.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -27,7 +27,7 @@
2727
raise RuntimeError("python setuptools is required to build Marvin")
2828

2929

30-
VERSION = "4.20.3.0-SNAPSHOT"
30+
VERSION = "4.20.3.0"
3131

3232
setup(name="Marvin",
3333
version=VERSION,

0 commit comments

Comments
 (0)