@@ -2348,7 +2348,7 @@ namespace StockhamGenerator
23482348
23492349 }
23502350
2351- void GenerateKernel (std::string &str)
2351+ void GenerateKernel (std::string &str, cl_device_id Dev_ID )
23522352 {
23532353 std::string twType = RegBaseType<PR>(2 );
23542354 std::string rType = RegBaseType<PR>(1 );
@@ -2501,8 +2501,19 @@ namespace StockhamGenerator
25012501 else str += " fft_back" ;
25022502 str += " (" ;
25032503
2504- // TODO : address this kludge
2505- str += " __constant cb_t *cb __attribute__((max_constant_size(32))), " ;
2504+ // TODO : address this kludge
2505+ size_t SizeParam_ret = 0 ;
2506+ clGetDeviceInfo (Dev_ID, CL_DEVICE_VENDOR, 0 , NULL , &SizeParam_ret);
2507+ char * nameVendor = new char [SizeParam_ret];
2508+ clGetDeviceInfo (Dev_ID, CL_DEVICE_VENDOR, SizeParam_ret, nameVendor, NULL );
2509+
2510+ // nv compiler doesn't support __constant kernel argument
2511+ if (strncmp (nameVendor, " NVIDIA" ,6 )!=0 )
2512+ str += " __constant cb_t *cb __attribute__((max_constant_size(32))), " ;
2513+ else
2514+ str += " __global cb_t *cb, " ;
2515+
2516+ delete [] nameVendor;
25062517
25072518 // Function attributes
25082519 if (params.fft_placeness == CLFFT_INPLACE)
@@ -3218,24 +3229,30 @@ clfftStatus FFTPlan::GetMax1DLengthPvt<Stockham> (size_t * longest) const
32183229}
32193230
32203231template <>
3221- clfftStatus FFTPlan::GenerateKernelPvt<Stockham>(FFTRepo& fftRepo ) const
3232+ clfftStatus FFTPlan::GenerateKernelPvt<Stockham>(FFTRepo& fftRepo, const cl_command_queue commQueueFFT ) const
32223233{
32233234 FFTKernelGenKeyParams params;
32243235 OPENCL_V ( this ->GetKernelGenKeyPvt <Stockham> (params), _T (" GetKernelGenKey() failed!" ) );
32253236
3237+ cl_int status = CL_SUCCESS;
3238+ cl_device_id Device = NULL ;
3239+ status = clGetCommandQueueInfo (commQueueFFT, CL_QUEUE_DEVICE, sizeof (cl_device_id), &Device, NULL );
3240+
3241+ OPENCL_V ( status, _T ( " clGetCommandQueueInfo failed" ) );
3242+
32263243 std::string programCode;
32273244 Precision pr = (params.fft_precision == CLFFT_SINGLE) ? P_SINGLE : P_DOUBLE;
32283245 switch (pr)
32293246 {
32303247 case P_SINGLE:
32313248 {
32323249 Kernel<P_SINGLE> kernel (params);
3233- kernel.GenerateKernel (programCode);
3250+ kernel.GenerateKernel (programCode, Device );
32343251 } break ;
32353252 case P_DOUBLE:
32363253 {
32373254 Kernel<P_DOUBLE> kernel (params);
3238- kernel.GenerateKernel (programCode);
3255+ kernel.GenerateKernel (programCode, Device );
32393256 } break ;
32403257 }
32413258
0 commit comments