Skip to content

Commit cdb2919

Browse files
committed
NV workaround. Using the command queue to get the device id
1 parent fd063c2 commit cdb2919

File tree

5 files changed

+20
-14
lines changed

5 files changed

+20
-14
lines changed

src/library/generator.copy.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -445,7 +445,7 @@ clfftStatus FFTPlan::GetMax1DLengthPvt<Copy> (size_t * longest) const
445445
using namespace CopyGenerator;
446446

447447
template<>
448-
clfftStatus FFTPlan::GenerateKernelPvt<Copy>(FFTRepo& fftRepo ) const
448+
clfftStatus FFTPlan::GenerateKernelPvt<Copy>(FFTRepo& fftRepo, const cl_command_queue commQueueFFT ) const
449449
{
450450
FFTKernelGenKeyParams params;
451451
OPENCL_V( this->GetKernelGenKeyPvt<Copy> (params), _T("GetKernelGenKey() failed!") );

src/library/generator.stockham.cpp

Lines changed: 9 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -3229,24 +3229,30 @@ clfftStatus FFTPlan::GetMax1DLengthPvt<Stockham> (size_t * longest) const
32293229
}
32303230

32313231
template<>
3232-
clfftStatus FFTPlan::GenerateKernelPvt<Stockham>(FFTRepo& fftRepo ) const
3232+
clfftStatus FFTPlan::GenerateKernelPvt<Stockham>(FFTRepo& fftRepo, const cl_command_queue commQueueFFT ) const
32333233
{
32343234
FFTKernelGenKeyParams params;
32353235
OPENCL_V( this->GetKernelGenKeyPvt<Stockham> (params), _T("GetKernelGenKey() failed!") );
32363236

3237+
cl_int status = CL_SUCCESS;
3238+
cl_device_id Device = NULL;
3239+
status = clGetCommandQueueInfo(commQueueFFT, CL_QUEUE_DEVICE, sizeof(cl_device_id), &Device, NULL);
3240+
3241+
OPENCL_V( status, _T( "clGetCommandQueueInfo failed" ) );
3242+
32373243
std::string programCode;
32383244
Precision pr = (params.fft_precision == CLFFT_SINGLE) ? P_SINGLE : P_DOUBLE;
32393245
switch(pr)
32403246
{
32413247
case P_SINGLE:
32423248
{
32433249
Kernel<P_SINGLE> kernel(params);
3244-
kernel.GenerateKernel(programCode, devices[0]);
3250+
kernel.GenerateKernel(programCode, Device);
32453251
} break;
32463252
case P_DOUBLE:
32473253
{
32483254
Kernel<P_DOUBLE> kernel(params);
3249-
kernel.GenerateKernel(programCode, devices[0]);
3255+
kernel.GenerateKernel(programCode, Device);
32503256
} break;
32513257
}
32523258

src/library/generator.transpose.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -822,7 +822,7 @@ clfftStatus FFTPlan::GetWorkSizesPvt<Transpose> (std::vector<size_t> & globalWS,
822822
// OpenCL does not take unicode strings as input, so this routine returns only ASCII strings
823823
// Feed this generator the FFTPlan, and it returns the generated program as a string
824824
template<>
825-
clfftStatus FFTPlan::GenerateKernelPvt<Transpose> ( FFTRepo& fftRepo ) const
825+
clfftStatus FFTPlan::GenerateKernelPvt<Transpose> ( FFTRepo& fftRepo, const cl_command_queue commQueueFFT ) const
826826
{
827827
FFTKernelGenKeyParams params;
828828
OPENCL_V( this->GetKernelGenKeyPvt<Transpose> (params), _T("GetKernelGenKey() failed!") );

src/library/plan.cpp

Lines changed: 7 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -543,7 +543,7 @@ clfftStatus clfftBakePlan( clfftPlanHandle plHandle, cl_uint numQueues, cl_comma
543543

544544
if(fftPlan->gen == Copy)
545545
{
546-
OPENCL_V( fftPlan->GenerateKernel( fftRepo ), _T( "GenerateKernel() failed" ) );
546+
OPENCL_V( fftPlan->GenerateKernel( fftRepo, *commQueueFFT ), _T( "GenerateKernel() failed" ) );
547547
OPENCL_V( CompileKernels( *commQueueFFT, plHandle, fftPlan->gen, fftPlan ), _T( "CompileKernels() failed" ) );
548548
fftPlan->baked = true;
549549
return CLFFT_SUCCESS;
@@ -1505,7 +1505,7 @@ clfftStatus clfftBakePlan( clfftPlanHandle plHandle, cl_uint numQueues, cl_comma
15051505
//break;
15061506
if (fftPlan->transflag) //Transpose for 2D
15071507
{
1508-
OPENCL_V( fftPlan->GenerateKernel( fftRepo ), _T( "GenerateTransposeProgram() failed" ) );
1508+
OPENCL_V( fftPlan->GenerateKernel( fftRepo, *commQueueFFT ), _T( "GenerateTransposeProgram() failed" ) );
15091509
OPENCL_V( CompileKernels( *commQueueFFT, plHandle, fftPlan->gen, fftPlan ), _T( "CompileKernels() failed" ) );
15101510

15111511
fftPlan->baked = true;
@@ -2445,7 +2445,7 @@ clfftStatus clfftBakePlan( clfftPlanHandle plHandle, cl_uint numQueues, cl_comma
24452445
}
24462446

24472447
// For the radices that we have factored, we need to load/compile and build the appropriate OpenCL kernels
2448-
OPENCL_V( fftPlan->GenerateKernel( fftRepo ), _T( "GenerateKernel() failed" ) );
2448+
OPENCL_V( fftPlan->GenerateKernel( fftRepo, *commQueueFFT ), _T( "GenerateKernel() failed" ) );
24492449

24502450
// For the radices that we have factored, we need to load/compile and build the appropriate OpenCL kernels
24512451
OPENCL_V( CompileKernels( *commQueueFFT, plHandle, fftPlan->gen, fftPlan ), _T( "CompileKernels() failed" ) );
@@ -3265,13 +3265,13 @@ clfftStatus FFTPlan::GetKernelGenKey (FFTKernelGenKeyParams & params) const
32653265
}
32663266
}
32673267

3268-
clfftStatus FFTPlan::GenerateKernel (FFTRepo & fftRepo) const
3268+
clfftStatus FFTPlan::GenerateKernel (FFTRepo & fftRepo, const cl_command_queue commQueueFFT) const
32693269
{
32703270
switch(gen)
32713271
{
3272-
case Stockham: return GenerateKernelPvt<Stockham>(fftRepo);
3273-
case Transpose: return GenerateKernelPvt<Transpose>(fftRepo);
3274-
case Copy: return GenerateKernelPvt<Copy>(fftRepo);
3272+
case Stockham: return GenerateKernelPvt<Stockham>(fftRepo, commQueueFFT);
3273+
case Transpose: return GenerateKernelPvt<Transpose>(fftRepo, commQueueFFT);
3274+
case Copy: return GenerateKernelPvt<Copy>(fftRepo, commQueueFFT);
32753275
default: assert(false); return CLFFT_NOTIMPLEMENTED;
32763276
}
32773277
}

src/library/plan.h

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -202,7 +202,7 @@ class FFTPlan
202202
clfftStatus GetKernelGenKeyPvt (FFTKernelGenKeyParams & params) const;
203203

204204
template <clfftGenerators G>
205-
clfftStatus GenerateKernelPvt (FFTRepo& fftRepo) const;
205+
clfftStatus GenerateKernelPvt (FFTRepo& fftRepo, const cl_command_queue commQueueFFT ) const;
206206

207207
template <clfftGenerators G>
208208
clfftStatus GetMax1DLengthPvt (size_t *longest ) const;
@@ -338,7 +338,7 @@ class FFTPlan
338338

339339
clfftStatus GetWorkSizes (std::vector<size_t> & globalws, std::vector<size_t> & localws) const;
340340
clfftStatus GetKernelGenKey (FFTKernelGenKeyParams & params) const;
341-
clfftStatus GenerateKernel (FFTRepo & fftRepo) const;
341+
clfftStatus GenerateKernel (FFTRepo & fftRepo, const cl_command_queue commQueueFFT) const;
342342
clfftStatus GetMax1DLength (size_t *longest ) const;
343343

344344
void ResetBinarySizes();

0 commit comments

Comments
 (0)