@@ -2348,7 +2348,7 @@ namespace StockhamGenerator
23482348
23492349 }
23502350
2351- void GenerateKernel (std::string &str)
2351+ void GenerateKernel (std::string &str, cl_device_id Dev_ID )
23522352 {
23532353 std::string twType = RegBaseType<PR>(2 );
23542354 std::string rType = RegBaseType<PR>(1 );
@@ -2501,8 +2501,19 @@ namespace StockhamGenerator
25012501 else str += " fft_back" ;
25022502 str += " (" ;
25032503
2504- // TODO : address this kludge
2505- str += " __constant cb_t *cb __attribute__((max_constant_size(32))), " ;
2504+ // TODO : address this kludge
2505+ size_t SizeParam_ret = 0 ;
2506+ clGetDeviceInfo (Dev_ID, CL_DEVICE_VENDOR, 0 , NULL , &SizeParam_ret);
2507+ char * nameVendor = new char [SizeParam_ret];
2508+ clGetDeviceInfo (Dev_ID, CL_DEVICE_VENDOR, SizeParam_ret, nameVendor, NULL );
2509+
2510+ // nv compiler doesn't support __constant kernel argument
2511+ if (strncmp (nameVendor, " NVIDIA" ,6 )!=0 )
2512+ str += " __constant cb_t *cb __attribute__((max_constant_size(32))), " ;
2513+ else
2514+ str += " __global cb_t *cb, " ;
2515+
2516+ delete [] nameVendor;
25062517
25072518 // Function attributes
25082519 if (params.fft_placeness == CLFFT_INPLACE)
@@ -3230,12 +3241,12 @@ clfftStatus FFTPlan::GenerateKernelPvt<Stockham>(FFTRepo& fftRepo ) const
32303241 case P_SINGLE:
32313242 {
32323243 Kernel<P_SINGLE> kernel (params);
3233- kernel.GenerateKernel (programCode);
3244+ kernel.GenerateKernel (programCode, devices[ 0 ] );
32343245 } break ;
32353246 case P_DOUBLE:
32363247 {
32373248 Kernel<P_DOUBLE> kernel (params);
3238- kernel.GenerateKernel (programCode);
3249+ kernel.GenerateKernel (programCode, devices[ 0 ] );
32393250 } break ;
32403251 }
32413252
0 commit comments