3838import org .apache .cloudstack .api .ApiCommandResourceType ;
3939import org .apache .cloudstack .api .ApiErrorCode ;
4040import org .apache .cloudstack .context .CallContext ;
41+ import org .apache .cloudstack .engine .orchestration .service .NetworkOrchestrationService ;
4142import org .apache .cloudstack .engine .subsystem .api .storage .SnapshotDataFactory ;
4243import org .apache .cloudstack .engine .subsystem .api .storage .SnapshotInfo ;
4344import org .apache .cloudstack .engine .subsystem .api .storage .SnapshotService ;
45+ import org .apache .cloudstack .engine .subsystem .api .storage .VolumeDataFactory ;
46+ import org .apache .cloudstack .engine .subsystem .api .storage .VolumeInfo ;
47+ import org .apache .cloudstack .engine .subsystem .api .storage .VolumeService ;
4448import org .apache .cloudstack .framework .config .ConfigKey ;
4549import org .apache .cloudstack .framework .config .Configurable ;
4650import org .apache .cloudstack .framework .jobs .AsyncJob ;
6569import org .apache .log4j .NDC ;
6670
6771import com .cloud .cluster .ClusterManagerListener ;
72+ import com .cloud .network .Network ;
73+ import com .cloud .network .dao .NetworkDao ;
74+ import com .cloud .network .dao .NetworkVO ;
6875import com .cloud .storage .Snapshot ;
76+ import com .cloud .storage .Volume ;
77+ import com .cloud .storage .VolumeDetailVO ;
6978import com .cloud .storage .dao .SnapshotDao ;
7079import com .cloud .storage .dao .SnapshotDetailsDao ;
7180import com .cloud .storage .dao .SnapshotDetailsVO ;
93102import com .cloud .utils .db .TransactionStatus ;
94103import com .cloud .utils .exception .CloudRuntimeException ;
95104import com .cloud .utils .exception .ExceptionUtil ;
105+ import com .cloud .utils .fsm .NoTransitionException ;
96106import com .cloud .utils .mgmt .JmxUtil ;
107+ import com .cloud .vm .VMInstanceVO ;
108+ import com .cloud .vm .VirtualMachine ;
109+ import com .cloud .vm .VirtualMachineManager ;
97110import com .cloud .vm .dao .VMInstanceDao ;
98111
99112public class AsyncJobManagerImpl extends ManagerBase implements AsyncJobManager , ClusterManagerListener , Configurable {
@@ -148,6 +161,15 @@ public class AsyncJobManagerImpl extends ManagerBase implements AsyncJobManager,
148161 @ Inject
149162 private SnapshotDetailsDao _snapshotDetailsDao ;
150163
164+ @ Inject
165+ private VolumeDataFactory volFactory ;
166+ @ Inject
167+ private VirtualMachineManager virtualMachineManager ;
168+ @ Inject
169+ private NetworkDao networkDao ;
170+ @ Inject
171+ private NetworkOrchestrationService networkOrchestrationService ;
172+
151173 private volatile long _executionRunNumber = 1 ;
152174
153175 private final ScheduledExecutorService _heartbeatScheduler = Executors .newScheduledThreadPool (1 , new NamedThreadFactory ("AsyncJobMgr-Heartbeat" ));
@@ -1089,6 +1111,7 @@ public void doInTransactionWithoutResult(TransactionStatus status) {
10891111 if (s_logger .isDebugEnabled ()) {
10901112 s_logger .debug ("Cancel left-over job-" + job .getId ());
10911113 }
1114+ cleanupResources (job );
10921115 job .setStatus (JobInfo .Status .FAILED );
10931116 job .setResultCode (ApiErrorCode .INTERNAL_ERROR .getHttpCode ());
10941117 job .setResult ("job cancelled because of management server restart or shutdown" );
@@ -1101,33 +1124,115 @@ public void doInTransactionWithoutResult(TransactionStatus status) {
11011124 s_logger .debug ("Purge queue item for cancelled job-" + job .getId ());
11021125 }
11031126 _queueMgr .purgeAsyncJobQueueItemId (job .getId ());
1104- if (ApiCommandResourceType .Volume .toString ().equals (job .getInstanceType ())) {
1105-
1106- try {
1107- _volumeDetailsDao .removeDetail (job .getInstanceId (), "SNAPSHOT_ID" );
1108- _volsDao .remove (job .getInstanceId ());
1109- } catch (Exception e ) {
1110- s_logger .error ("Unexpected exception while removing concurrent request meta data :" + e .getLocalizedMessage ());
1111- }
1112- }
1113- }
1114- final List <SnapshotDetailsVO > snapshotList = _snapshotDetailsDao .findDetails (AsyncJob .Constants .MS_ID , Long .toString (msid ), false );
1115- for (final SnapshotDetailsVO snapshotDetailsVO : snapshotList ) {
1116- SnapshotInfo snapshot = snapshotFactory .getSnapshotOnPrimaryStore (snapshotDetailsVO .getResourceId ());
1117- if (snapshot == null ) {
1118- _snapshotDetailsDao .remove (snapshotDetailsVO .getId ());
1119- continue ;
1120- }
1121- snapshotSrv .processEventOnSnapshotObject (snapshot , Snapshot .Event .OperationFailed );
1122- _snapshotDetailsDao .removeDetail (snapshotDetailsVO .getResourceId (), AsyncJob .Constants .MS_ID );
11231127 }
1128+ cleanupFailedSnapshotsCreatedWithDefaultStrategy (msid );
11241129 }
11251130 });
11261131 } catch (Throwable e ) {
11271132 s_logger .warn ("Unexpected exception in cleaning up left over jobs for mamagement server node " + msid , e );
11281133 }
11291134 }
11301135
1136+ /*
1137+ Cleanup Resources in transition state and move them to appropriate state
1138+ This will allow other operation on the resource, instead of being stuck in transition state
1139+ */
1140+ protected boolean cleanupResources (AsyncJobVO job ) {
1141+ try {
1142+ ApiCommandResourceType resourceType = ApiCommandResourceType .fromString (job .getInstanceType ());
1143+ if (resourceType == null ) {
1144+ s_logger .warn ("Unknown ResourceType. Skip Cleanup: " + job .getInstanceType ());
1145+ return true ;
1146+ }
1147+ switch (resourceType ) {
1148+ case Volume :
1149+ return cleanupVolume (job .getInstanceId ());
1150+ case VirtualMachine :
1151+ return cleanupVirtualMachine (job .getInstanceId ());
1152+ case Network :
1153+ return cleanupNetwork (job .getInstanceId ());
1154+ }
1155+ } catch (Exception e ) {
1156+ s_logger .warn ("Error while cleaning up resource: [" + job .getInstanceType ().toString () + "] with Id: " + job .getInstanceId (), e );
1157+ return false ;
1158+ }
1159+ return true ;
1160+ }
1161+
1162+ private boolean cleanupVolume (final long volumeId ) {
1163+ VolumeInfo vol = volFactory .getVolume (volumeId );
1164+ if (vol == null ) {
1165+ s_logger .warn ("Volume not found. Skip Cleanup. VolumeId: " + volumeId );
1166+ return true ;
1167+ }
1168+ if (vol .getState ().isTransitional ()) {
1169+ s_logger .debug ("Cleaning up volume with Id: " + volumeId );
1170+ boolean status = vol .stateTransit (Volume .Event .OperationFailed );
1171+ cleanupFailedVolumesCreatedFromSnapshots (volumeId );
1172+ return status ;
1173+ }
1174+ s_logger .debug ("Volume not in transition state. Skip cleanup. VolumeId: " + volumeId );
1175+ return true ;
1176+ }
1177+
1178+ private boolean cleanupVirtualMachine (final long vmId ) throws Exception {
1179+ VMInstanceVO vmInstanceVO = _vmInstanceDao .findById (vmId );
1180+ if (vmInstanceVO == null ) {
1181+ s_logger .warn ("Instance not found. Skip Cleanup. InstanceId: " + vmId );
1182+ return true ;
1183+ }
1184+ if (vmInstanceVO .getState ().isTransitional ()) {
1185+ s_logger .debug ("Cleaning up Instance with Id: " + vmId );
1186+ return virtualMachineManager .stateTransitTo (vmInstanceVO , VirtualMachine .Event .OperationFailed , vmInstanceVO .getHostId ());
1187+ }
1188+ s_logger .debug ("Instance not in transition state. Skip cleanup. InstanceId: " + vmId );
1189+ return true ;
1190+ }
1191+
1192+ private boolean cleanupNetwork (final long networkId ) throws Exception {
1193+ NetworkVO networkVO = networkDao .findById (networkId );
1194+ if (networkVO == null ) {
1195+ s_logger .warn ("Network not found. Skip Cleanup. NetworkId: " + networkId );
1196+ return true ;
1197+ }
1198+ if (Network .State .Implementing .equals (networkVO .getState ())) {
1199+ try {
1200+ s_logger .debug ("Cleaning up Network with Id: " + networkId );
1201+ return networkOrchestrationService .stateTransitTo (networkVO , Network .Event .OperationFailed );
1202+ } catch (final NoTransitionException e ) {
1203+ networkVO .setState (Network .State .Shutdown );
1204+ networkDao .update (networkVO .getId (), networkVO );
1205+ }
1206+ }
1207+ s_logger .debug ("Network not in transition state. Skip cleanup. NetworkId: " + networkId );
1208+ return true ;
1209+ }
1210+
1211+ private void cleanupFailedVolumesCreatedFromSnapshots (final long volumeId ) {
1212+ try {
1213+ VolumeDetailVO volumeDetail = _volumeDetailsDao .findDetail (volumeId , VolumeService .SNAPSHOT_ID );
1214+ if (volumeDetail != null ) {
1215+ _volumeDetailsDao .removeDetail (volumeId , VolumeService .SNAPSHOT_ID );
1216+ _volsDao .remove (volumeId );
1217+ }
1218+ } catch (Exception e ) {
1219+ s_logger .error ("Unexpected exception while removing concurrent request meta data :" + e .getLocalizedMessage ());
1220+ }
1221+ }
1222+
1223+ private void cleanupFailedSnapshotsCreatedWithDefaultStrategy (final long msid ) {
1224+ final List <SnapshotDetailsVO > snapshotList = _snapshotDetailsDao .findDetails (AsyncJob .Constants .MS_ID , Long .toString (msid ), false );
1225+ for (final SnapshotDetailsVO snapshotDetailsVO : snapshotList ) {
1226+ SnapshotInfo snapshot = snapshotFactory .getSnapshotOnPrimaryStore (snapshotDetailsVO .getResourceId ());
1227+ if (snapshot == null ) {
1228+ _snapshotDetailsDao .remove (snapshotDetailsVO .getId ());
1229+ continue ;
1230+ }
1231+ snapshotSrv .processEventOnSnapshotObject (snapshot , Snapshot .Event .OperationFailed );
1232+ _snapshotDetailsDao .removeDetail (snapshotDetailsVO .getResourceId (), AsyncJob .Constants .MS_ID );
1233+ }
1234+ }
1235+
11311236 @ Override
11321237 public void onManagementNodeJoined (List <? extends ManagementServerHost > nodeList , long selfNodeId ) {
11331238 }
0 commit comments