@@ -969,6 +969,13 @@ public void reallyRun() {
969969 }
970970
971971 logger .trace ("End cleanup expired async-jobs" );
972+
973+ // 3) Cleanup orphaned networks stuck in Implementing state without async jobs
974+ try {
975+ cleanupOrphanedNetworks ();
976+ } catch (Throwable e ) {
977+ logger .error ("Unexpected exception when trying to cleanup orphaned networks" , e );
978+ }
972979 } catch (Throwable e ) {
973980 logger .error ("Unexpected exception when trying to execute queue item, " , e );
974981 }
@@ -1284,6 +1291,75 @@ private void cleanupFailedSnapshotsCreatedWithDefaultStrategy(final long msid) {
12841291 }
12851292 }
12861293
1294+ /**
1295+ * Cleanup networks that are stuck in Implementing state without associated async jobs.
1296+ * Similar to how expired unfinished jobs are cleaned up, this only processes networks
1297+ * that have been stuck for longer than the job expiration threshold.
1298+ */
1299+ private void cleanupOrphanedNetworks () {
1300+ try {
1301+ SearchCriteria <NetworkVO > sc = networkDao .createSearchCriteria ();
1302+ sc .addAnd ("state" , SearchCriteria .Op .EQ , Network .State .Implementing );
1303+ sc .addAnd ("removed" , SearchCriteria .Op .NULL );
1304+ List <NetworkVO > implementingNetworks = networkDao .search (sc , null );
1305+
1306+ if (implementingNetworks == null || implementingNetworks .isEmpty ()) {
1307+ return ;
1308+ }
1309+
1310+ logger .debug ("Found {} networks in Implementing state, checking for orphaned networks" , implementingNetworks .size ());
1311+
1312+ final long expireMinutes = JobExpireMinutes .value ();
1313+ final Date cutoffTime = new Date (System .currentTimeMillis () - (expireMinutes * 60 * 1000 ));
1314+
1315+ for (NetworkVO network : implementingNetworks ) {
1316+ if (network .getCreated ().after (cutoffTime )) {
1317+ logger .trace ("Network {} in Implementing state is only {} minutes old (threshold: {} minutes), skipping cleanup" ,
1318+ network .getId (),
1319+ (System .currentTimeMillis () - network .getCreated ().getTime ()) / 60000 ,
1320+ expireMinutes );
1321+ continue ;
1322+ }
1323+
1324+ List <AsyncJobVO > jobs = _jobDao .findInstancePendingAsyncJobs ("Network" , network .getAccountId ());
1325+ boolean hasActiveJob = false ;
1326+ for (AsyncJobVO job : jobs ) {
1327+ if (job .getInstanceId () != null && job .getInstanceId ().equals (network .getId ())) {
1328+ hasActiveJob = true ;
1329+ break ;
1330+ }
1331+ }
1332+
1333+ if (hasActiveJob ) {
1334+ logger .debug ("Network {} in Implementing state has active async job, skipping cleanup" , network .getId ());
1335+ continue ;
1336+ }
1337+
1338+ logger .warn ("Found orphaned network {} in Implementing state without async job. " +
1339+ "Network created: {}, age: {} minutes, expiration threshold: {} minutes. Transitioning to Shutdown state." ,
1340+ network .getId (), network .getCreated (),
1341+ (System .currentTimeMillis () - network .getCreated ().getTime ()) / 60000 ,
1342+ expireMinutes );
1343+ updateNetworkState (network );
1344+
1345+ }
1346+ } catch (Exception e ) {
1347+ logger .error ("Error while cleaning up orphaned networks" , e );
1348+ }
1349+ }
1350+
1351+ private void updateNetworkState (NetworkVO network ) {
1352+ try {
1353+ networkOrchestrationService .stateTransitTo (network , Network .Event .OperationFailed );
1354+ logger .info ("Successfully transitioned orphaned network {} to Shutdown state using state machine" , network .getId ());
1355+ } catch (final NoTransitionException e ) {
1356+ logger .debug ("State transition failed for orphaned network {}, forcing state update" , network .getId ());
1357+ network .setState (Network .State .Shutdown );
1358+ networkDao .update (network .getId (), network );
1359+ logger .info ("Successfully forced orphaned network {} to Shutdown state" , network .getId ());
1360+ }
1361+ }
1362+
12871363 @ Override
12881364 public void onManagementNodeJoined (List <? extends ManagementServerHost > nodeList , long selfNodeId ) {
12891365 }
0 commit comments