Skip to content

Commit e2a7bd2

Browse files
committed
fix for failed backup jobs, handling unfit vms
Signed-off-by: Abhishek Kumar <abhishek.mrt22@gmail.com>
1 parent ebfe83d commit e2a7bd2

5 files changed

Lines changed: 399 additions & 9 deletions

File tree

engine/schema/src/main/java/com/cloud/storage/dao/VolumeDao.java

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -46,6 +46,9 @@ public interface VolumeDao extends GenericDao<VolumeVO, Long>, StateDao<Volume.S
4646

4747
List<VolumeVO> findByInstanceAndType(long id, Volume.Type vType);
4848

49+
List<VolumeVO> findByInstanceAndNotStates(long id, Volume.State...states);
50+
51+
4952
List<VolumeVO> findIncludingRemovedByInstanceAndType(long id, Volume.Type vType);
5053

5154
List<VolumeVO> findNonDestroyedVolumesByInstanceIdAndPoolId(long instanceId, long poolId);

engine/schema/src/main/java/com/cloud/storage/dao/VolumeDaoImpl.java

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -208,6 +208,17 @@ public List<VolumeVO> findByInstanceAndType(long id, Type vType) {
208208
return listBy(sc);
209209
}
210210

211+
@Override
212+
public List<VolumeVO> findByInstanceAndNotStates(long id, Volume.State...states) {
213+
SearchBuilder<VolumeVO> sb = createSearchBuilder();
214+
sb.and("instanceId", sb.entity().getInstanceId(), Op.EQ);
215+
sb.and("state", sb.entity().getState(), Op.NIN);
216+
SearchCriteria<VolumeVO> sc = sb.create();
217+
sc.setParameters("instanceId", id);
218+
sc.setParameters("state", (Object[]) states);
219+
return listBy(sc);
220+
}
221+
211222
@Override
212223
public List<VolumeVO> findIncludingRemovedByInstanceAndType(long id, Type vType) {
213224
SearchCriteria<VolumeVO> sc = AllFieldsSearch.create();

engine/schema/src/test/java/com/cloud/storage/dao/VolumeDaoImplTest.java

Lines changed: 64 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -41,6 +41,7 @@
4141
import org.mockito.Spy;
4242
import org.mockito.junit.MockitoJUnitRunner;
4343

44+
import com.cloud.storage.Volume;
4445
import com.cloud.storage.VolumeVO;
4546
import com.cloud.utils.db.Filter;
4647
import com.cloud.utils.db.SearchBuilder;
@@ -113,6 +114,69 @@ public void testListPoolIdsByVolumeCount_without_cluster_details() throws SQLExc
113114
verify(preparedStatementMock, times(1)).executeQuery();
114115
}
115116

117+
@Test
118+
public void findByInstanceAndNotState_queriesWithInstanceIdAndExcludedStates() {
119+
SearchBuilder<VolumeVO> sb = Mockito.mock(SearchBuilder.class);
120+
SearchCriteria<VolumeVO> sc = Mockito.mock(SearchCriteria.class);
121+
Mockito.when(sb.create()).thenReturn(sc);
122+
Mockito.doReturn(new ArrayList<>()).when(volumeDao).listBy(sc);
123+
Mockito.when(volumeDao.createSearchBuilder()).thenReturn(sb);
124+
VolumeVO mockedVO = Mockito.mock(VolumeVO.class);
125+
Mockito.when(sb.entity()).thenReturn(mockedVO);
126+
127+
volumeDao.findByInstanceAndNotStates(42L, Volume.State.Ready);
128+
129+
Mockito.verify(sc).setParameters("instanceId", 42L);
130+
Mockito.verify(sc).setParameters("state", (Object[]) new Volume.State[]{Volume.State.Ready});
131+
}
132+
133+
@Test
134+
public void findByInstanceAndNotStates_withMultipleExcludedStates_passesAllStatesToCriteria() {
135+
SearchBuilder<VolumeVO> sb = Mockito.mock(SearchBuilder.class);
136+
SearchCriteria<VolumeVO> sc = Mockito.mock(SearchCriteria.class);
137+
Mockito.when(sb.create()).thenReturn(sc);
138+
Mockito.doReturn(new ArrayList<>()).when(volumeDao).listBy(sc);
139+
Mockito.when(volumeDao.createSearchBuilder()).thenReturn(sb);
140+
VolumeVO mockedVO = Mockito.mock(VolumeVO.class);
141+
Mockito.when(sb.entity()).thenReturn(mockedVO);
142+
143+
volumeDao.findByInstanceAndNotStates(7L, Volume.State.Destroy, Volume.State.Expunged);
144+
145+
Mockito.verify(sc).setParameters("instanceId", 7L);
146+
Mockito.verify(sc).setParameters("state",
147+
(Object[]) new Volume.State[]{Volume.State.Destroy, Volume.State.Expunged});
148+
}
149+
150+
@Test
151+
public void findByInstanceAndNotStates_returnsResultFromDao() {
152+
SearchBuilder<VolumeVO> sb = Mockito.mock(SearchBuilder.class);
153+
SearchCriteria<VolumeVO> sc = Mockito.mock(SearchCriteria.class);
154+
Mockito.when(sb.create()).thenReturn(sc);
155+
VolumeVO vol = Mockito.mock(VolumeVO.class);
156+
Mockito.doReturn(List.of(vol)).when(volumeDao).listBy(sc);
157+
Mockito.when(volumeDao.createSearchBuilder()).thenReturn(sb);
158+
Mockito.when(sb.entity()).thenReturn(Mockito.mock(VolumeVO.class));
159+
160+
List<VolumeVO> result = volumeDao.findByInstanceAndNotStates(1L, Volume.State.Ready);
161+
162+
Assert.assertEquals(1, result.size());
163+
Assert.assertSame(vol, result.get(0));
164+
}
165+
166+
@Test
167+
public void findByInstanceAndNotStates_noMatchingVolumes_returnsEmptyList() {
168+
SearchBuilder<VolumeVO> sb = Mockito.mock(SearchBuilder.class);
169+
SearchCriteria<VolumeVO> sc = Mockito.mock(SearchCriteria.class);
170+
Mockito.when(sb.create()).thenReturn(sc);
171+
Mockito.doReturn(new ArrayList<>()).when(volumeDao).listBy(sc);
172+
Mockito.when(volumeDao.createSearchBuilder()).thenReturn(sb);
173+
Mockito.when(sb.entity()).thenReturn(Mockito.mock(VolumeVO.class));
174+
175+
List<VolumeVO> result = volumeDao.findByInstanceAndNotStates(99L, Volume.State.Ready);
176+
177+
Assert.assertTrue(result.isEmpty());
178+
}
179+
116180
@Test
117181
public void testSearchRemovedByVmsNoVms() {
118182
Assert.assertTrue(CollectionUtils.isEmpty(volumeDao.searchRemovedByVms(
@@ -141,5 +205,4 @@ public void testSearchRemovedByVms() {
141205
Mockito.any(SearchCriteria.class), Mockito.any(Filter.class), Mockito.eq(null),
142206
Mockito.eq(false));
143207
}
144-
145208
}

server/src/main/java/org/apache/cloudstack/backup/KVMBackupExportServiceImpl.java

Lines changed: 40 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -52,6 +52,7 @@
5252
import org.apache.cloudstack.jobs.JobInfo;
5353
import org.apache.cloudstack.storage.datastore.db.PrimaryDataStoreDao;
5454
import org.apache.cloudstack.storage.datastore.db.StoragePoolVO;
55+
import org.apache.commons.collections.CollectionUtils;
5556
import org.apache.commons.lang3.StringUtils;
5657
import org.joda.time.DateTime;
5758
import org.springframework.stereotype.Component;
@@ -86,6 +87,7 @@
8687
import com.cloud.vm.VMInstanceVO;
8788
import com.cloud.vm.VirtualMachine;
8889
import com.cloud.vm.VirtualMachine.State;
90+
import com.cloud.vm.VirtualMachineManager;
8991
import com.cloud.vm.VmDetailConstants;
9092
import com.cloud.vm.VmWork;
9193
import com.cloud.vm.VmWorkConstants;
@@ -136,6 +138,9 @@ public class KVMBackupExportServiceImpl extends ManagerBase implements KVMBackup
136138
@Inject
137139
AsyncJobManager asyncJobManager;
138140

141+
@Inject
142+
VirtualMachineManager virtualMachineManager;
143+
139144
VmWorkJobHandlerProxy jobHandlerProxy = new VmWorkJobHandlerProxy(this);
140145

141146
private void verifyKVMBackupExportServiceSupported(Long zoneId) {
@@ -145,24 +150,44 @@ private void verifyKVMBackupExportServiceSupported(Long zoneId) {
145150
}
146151
}
147152

153+
protected void validateVmVolumesForBackup(VMInstanceVO vm) {
154+
List<VolumeVO> volumes = volumeDao.findByInstanceAndNotStates(vm.getId(), Volume.State.Ready);
155+
List<String> nonReadyVolumeIds = volumes
156+
.stream()
157+
.map(VolumeVO::getUuid)
158+
.collect(Collectors.toList());
159+
if (CollectionUtils.isNotEmpty(nonReadyVolumeIds)) {
160+
throw new CloudRuntimeException(String.format("Volumes [%s] of Instance: %s are not in Ready state",
161+
StringUtils.join(nonReadyVolumeIds, ","), vm.getUuid()));
162+
}
163+
}
164+
148165
@Override
149166
public Backup createBackup(StartBackupCmd cmd) {
150167
Long vmId = cmd.getVmId();
151168

152169
VMInstanceVO vm = vmInstanceDao.findById(vmId);
153170
if (vm == null) {
154-
throw new CloudRuntimeException("VM not found: " + vmId);
171+
throw new CloudRuntimeException("Instance not found: " + vmId);
155172
}
156173

157174
verifyKVMBackupExportServiceSupported(vm.getDataCenterId());
158175

159176
if (vm.getState() != State.Running && vm.getState() != State.Stopped) {
160-
throw new CloudRuntimeException("VM must be running or stopped to start backup");
177+
throw new CloudRuntimeException("Instance must be running or stopped to start Backup");
161178
}
162179

163180
Backup existingBackup = backupDao.findByVmId(vmId);
164181
if (existingBackup != null && existingBackup.getStatus() == Backup.Status.BackingUp) {
165-
throw new CloudRuntimeException("Backup already in progress for VM: " + vmId);
182+
throw new CloudRuntimeException("Backup already in progress for Instance: " + vm.getUuid());
183+
}
184+
185+
validateVmVolumesForBackup(vm);
186+
187+
Pair<Long, Long> clusterAndHostId = virtualMachineManager.findClusterAndHostIdForVm(vm, false);
188+
Long hostId = clusterAndHostId.second();
189+
if (hostId == null) {
190+
throw new CloudRuntimeException("Host cannot be determined for Instance: " + vm.getUuid());
166191
}
167192

168193
BackupVO backup = new BackupVO();
@@ -190,8 +215,6 @@ public Backup createBackup(StartBackupCmd cmd) {
190215
backup.setToCheckpointId(toCheckpointId);
191216
backup.setFromCheckpointId(fromCheckpointId);
192217
backup.setType("FULL");
193-
194-
Long hostId = vm.getHostId() != null ? vm.getHostId() : vm.getLastHostId();
195218
backup.setHostId(hostId);
196219

197220
return backupDao.persist(backup);
@@ -231,15 +254,20 @@ public Backup startBackup(StartBackupCmd cmd) {
231254
Long vmId = cmd.getVmId();
232255
VMInstanceVO vm = vmInstanceDao.findById(vmId);
233256
if (vm == null) {
234-
throw new CloudRuntimeException("VM not found: " + vmId);
257+
removeFailedBackup(backup);
258+
throw new CloudRuntimeException("Instance not found for Backup: " + backup.getUuid());
235259
}
236260
List<VolumeVO> volumes = volumeDao.findByInstance(vmId);
237261
Map<String, String> diskPathUuidMap = new HashMap<>();
238262
for (Volume vol : volumes) {
263+
if (vol.getPoolId() == null) {
264+
removeFailedBackup(backup);
265+
throw new CloudRuntimeException("Storage Pool cannot be determined for Volume: " + vol.getUuid());
266+
}
239267
String volumePath = getVolumePathForFileBasedBackend(vol);
240268
diskPathUuidMap.put(volumePath, vol.getUuid());
241269
}
242-
long hostId = backup.getHostId();
270+
Long hostId = backup.getHostId();
243271

244272
VMInstanceDetailVO lastCheckpointId = vmInstanceDetailsDao.findDetail(vmId, VmDetailConstants.LAST_CHECKPOINT_ID);
245273
if (lastCheckpointId != null) {
@@ -249,6 +277,10 @@ public Backup startBackup(StartBackupCmd cmd) {
249277
logger.warn("Failed to delete last checkpoint {} for VM {}, proceeding with backup start", lastCheckpointId.getValue(), vmId, e);
250278
}
251279
}
280+
if (hostId == null) {
281+
removeFailedBackup(backup);
282+
throw new CloudRuntimeException("Host cannot be found for Backup: " + backup.getUuid());
283+
}
252284

253285
Host host = hostDao.findById(hostId);
254286
Map<String, String> vmDetails = vmInstanceDetailsDao.listDetailsKeyPairs(vmId);
@@ -276,7 +308,7 @@ public Backup startBackup(StartBackupCmd cmd) {
276308
if (!answer.getResult()) {
277309
removeFailedBackup(backup);
278310
logger.error("Failed to start {} due to: {}", backup, answer.getDetails());
279-
throw new CloudRuntimeException("Failed to start backup: " + answer.getDetails());
311+
throw new CloudRuntimeException("Failed to start Backup: " + answer.getDetails());
280312
}
281313

282314
// Update backup with checkpoint creation time

0 commit comments

Comments
 (0)