Skip to content

Commit 3c94e56

Browse files
committed
Workaround for 2D FFT failures on NVIDIA GPUs
- Issue: #25 - Inplace transpose were being used for power of 2 dimensions - If device from NVIDIA, then alternative path is taken
1 parent 3ed0dc3 commit 3c94e56

File tree

1 file changed

+14
-0
lines changed

1 file changed

+14
-0
lines changed

src/library/plan.cpp

Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1512,6 +1512,20 @@ clfftStatus clfftBakePlan( clfftPlanHandle plHandle, cl_uint numQueues, cl_comma
15121512
return CLFFT_SUCCESS;
15131513
}
15141514

1515+
// TODO : Check for a better way to do this.
1516+
bool isnvidia = false;
1517+
for (size_t Idx = 0; !isnvidia && Idx < numQueues; Idx++)
1518+
{
1519+
cl_command_queue QIdx = commQueueFFT[Idx];
1520+
cl_device_id Device;
1521+
clGetCommandQueueInfo(QIdx, CL_QUEUE_DEVICE, sizeof(Device), &Device, NULL);
1522+
char Vendor[256];
1523+
clGetDeviceInfo(Device, CL_DEVICE_VENDOR, sizeof(Vendor), &Vendor, NULL);
1524+
isnvidia |= (strncmp(Vendor, "NVIDIA", 6) == 0);
1525+
}
1526+
// nvidia gpus are failing when doing transpose for 2D FFTs
1527+
if (isnvidia) break;
1528+
15151529
if (fftPlan->length.size() != 2) break;
15161530
if (!(IsPo2(fftPlan->length[0])) || !(IsPo2(fftPlan->length[1])))
15171531
break;

0 commit comments

Comments
 (0)