Skip to content

Commit 7300f5b

Browse files
YashasG98l-technicore
authored andcommitted
Updated UHP post detach logout wait logic
1 parent 8b3bb2a commit 7300f5b

File tree

6 files changed

+85
-7
lines changed

6 files changed

+85
-7
lines changed

pkg/cloudprovider/providers/oci/instances_test.go

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -900,6 +900,10 @@ func (c *MockComputeClient) FindActiveVolumeAttachment(ctx context.Context, comp
900900
return nil, nil
901901
}
902902

903+
func (c *MockComputeClient) WaitForUHPVolumeLoggedOut(ctx context.Context, attachmentID string) error {
904+
return nil
905+
}
906+
903907
func (c *MockBlockStorageClient) AwaitVolumeBackupAvailableOrTimeout(ctx context.Context, id string) (*core.VolumeBackup, error) {
904908
return &core.VolumeBackup{}, nil
905909
}

pkg/csi/driver/bv_controller.go

Lines changed: 12 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -855,13 +855,19 @@ func (d *BlockVolumeControllerDriver) ControllerUnpublishVolume(ctx context.Cont
855855
multipath := false
856856

857857
if attachedVolume.GetIsMultipath() != nil {
858+
var cancel context.CancelFunc
859+
// this will not override parent context, it will evaluate to min(parent context timeout, current time + 90 seconds)
860+
ctx, cancel = context.WithTimeout(ctx, 90*time.Second)
861+
defer cancel()
858862
multipath = *attachedVolume.GetIsMultipath()
859-
}
860-
861-
// sleeping to ensure block volume plugin logs out of iscsi connections on nodes before delete
862-
if multipath {
863-
log.Info("Waiting for 90 seconds to ensure block volume plugin logs out of iscsi connections on nodes")
864-
time.Sleep(90 * time.Second)
863+
if multipath {
864+
log.With("instanceID", *attachedVolume.GetInstanceId()).Info("Waiting for UHP Volume to logout")
865+
err = d.client.Compute().WaitForUHPVolumeLoggedOut(ctx, *attachedVolume.GetId())
866+
if err != nil {
867+
log.With("service", "compute", "verb", "get", "resource", "volumeAttachment", "statusCode", util.GetHttpStatusCode(err)).
868+
With("instanceID", *attachedVolume.GetInstanceId()).Warnf("timed out waiting for UHP volume logout, will not wait further or retry, will mark detach success: %s", err.Error())
869+
}
870+
}
865871
}
866872

867873
log.Info("Un-publishing Volume Completed")

pkg/csi/driver/bv_controller_test.go

Lines changed: 35 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -155,6 +155,15 @@ var (
155155
Id: common.String("volume-attachment-stuck-in-attaching-state"),
156156
InstanceId: common.String("sample-provider-id"),
157157
},
158+
"uhp-volume-attachment-stuck-in-logged-in-state": {
159+
DisplayName: common.String("uhp-volume-attachment-stuck-in-logged-in-state"),
160+
LifecycleState: core.VolumeAttachmentLifecycleStateDetached,
161+
AvailabilityDomain: common.String("NWuj:PHX-AD-2"),
162+
Id: common.String("uhp-volume-attachment-stuck-in-logged-in-state"),
163+
InstanceId: common.String("sample-provider-id"),
164+
IscsiLoginState: core.VolumeAttachmentIscsiLoginStateLoginSucceeded,
165+
IsMultipath: common.Bool(true),
166+
},
158167
}
159168

160169
subnets = map[string]*core.Subnet{
@@ -747,6 +756,21 @@ func (c *MockComputeClient) WaitForVolumeDetached(ctx context.Context, attachmen
747756
return nil
748757
}
749758

759+
func (c *MockComputeClient) WaitForUHPVolumeLoggedOut(ctx context.Context, attachmentID string) error {
760+
if err := wait.PollImmediateUntil(testPollInterval, func() (done bool, err error) {
761+
va := volume_attachments[attachmentID]
762+
763+
if va.GetIscsiLoginState() == core.VolumeAttachmentIscsiLoginStateLogoutSucceeded {
764+
return true, nil
765+
}
766+
return false, nil
767+
}, ctx.Done()); err != nil {
768+
return errors.WithStack(err)
769+
}
770+
771+
return nil
772+
}
773+
750774
func (p *MockProvisionerClient) Compute() client.ComputeInterface {
751775
return &MockComputeClient{}
752776
}
@@ -1182,6 +1206,17 @@ func TestControllerDriver_ControllerUnpublishVolume(t *testing.T) {
11821206
want: nil,
11831207
wantErr: errors.New("context deadline exceeded"),
11841208
},
1209+
{
1210+
name: "Unpublish should not fail on UHP Volume logout timeout",
1211+
args: args{
1212+
req: &csi.ControllerUnpublishVolumeRequest{
1213+
VolumeId: "uhp-volume-attachment-stuck-in-logged-in-state",
1214+
NodeId: "sample-node-id",
1215+
},
1216+
},
1217+
want: &csi.ControllerUnpublishVolumeResponse{},
1218+
wantErr: nil,
1219+
},
11851220
}
11861221
for _, tt := range tests {
11871222
t.Run(tt.name, func(t *testing.T) {

pkg/oci/client/volume_attachment.go

Lines changed: 26 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -54,6 +54,10 @@ type VolumeAttachmentInterface interface {
5454
WaitForVolumeDetached(ctx context.Context, attachmentID string) error
5555

5656
FindActiveVolumeAttachment(ctx context.Context, compartmentID, volumeID string) (core.VolumeAttachment, error)
57+
58+
// WaitForUHPVolumeLoggedOut WaitForUHPVolumeLogout polls waiting for a OCI UHP block volume attachment to be in the
59+
// LOGGED_OUT state.
60+
WaitForUHPVolumeLoggedOut(ctx context.Context, attachmentID string) error
5761
}
5862

5963
var _ VolumeAttachmentInterface = &client{}
@@ -187,7 +191,7 @@ func (c *client) getDevicePath(ctx context.Context, instanceID string) (*string,
187191
if len(listInstanceDevicesResp.Items) == 0 {
188192
c.logger.With("service", "compute", "verb", listVerb, "resource", instanceResource).
189193
With("instanceID", instanceID).Warn("No consistent device paths available for worker node.")
190-
return nil, fmt.Errorf("Max number of volumes are already attached to instance %s. Please schedule workload on different node.", instanceID)
194+
return nil, fmt.Errorf("Max number of volumes are already attached to instance %s. Please schedule workload on different node.", instanceID)
191195
}
192196
//Picks device path from available path randomly so that 2 volume attachments don't get same path when operations happen concurrently resulting in failure of one of them.
193197
device := listInstanceDevicesResp.Items[rand.Intn(len(listInstanceDevicesResp.Items))].Name
@@ -339,3 +343,24 @@ func (c *client) FindActiveVolumeAttachment(ctx context.Context, compartmentID,
339343

340344
return nil, errors.WithStack(errNotFound)
341345
}
346+
347+
func (c *client) WaitForUHPVolumeLoggedOut(ctx context.Context, attachmentID string) error {
348+
if err := wait.PollImmediateUntil(attachmentPollInterval, func() (done bool, err error) {
349+
va, err := c.GetVolumeAttachment(ctx, attachmentID)
350+
351+
if err != nil {
352+
if IsRetryable(err) {
353+
return false, nil
354+
}
355+
return true, errors.WithStack(err)
356+
}
357+
if va.GetIscsiLoginState() == core.VolumeAttachmentIscsiLoginStateLogoutSucceeded {
358+
return true, nil
359+
}
360+
return false, nil
361+
}, ctx.Done()); err != nil {
362+
return errors.WithStack(err)
363+
}
364+
365+
return nil
366+
}

pkg/volume/provisioner/block/block_test.go

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -285,6 +285,10 @@ func (c *MockComputeClient) FindActiveVolumeAttachment(ctx context.Context, comp
285285
return nil, nil
286286
}
287287

288+
func (c *MockComputeClient) WaitForUHPVolumeLoggedOut(ctx context.Context, attachmentID string) error {
289+
return nil
290+
}
291+
288292
// MockVirtualNetworkClient mocks VirtualNetwork client implementation
289293
type MockVirtualNetworkClient struct {
290294
}

pkg/volume/provisioner/fss/fss_test.go

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -270,6 +270,10 @@ func (c *MockComputeClient) FindActiveVolumeAttachment(ctx context.Context, comp
270270
return nil, nil
271271
}
272272

273+
func (c *MockComputeClient) WaitForUHPVolumeLoggedOut(ctx context.Context, attachmentID string) error {
274+
return nil
275+
}
276+
273277
// MockVirtualNetworkClient mocks VirtualNetwork client implementation
274278
type MockVirtualNetworkClient struct {
275279
}

0 commit comments

Comments
 (0)