Skip to content

Commit fd063c2

Browse files
committed
workaround to make clfft run on NV
1 parent 0def5e1 commit fd063c2

File tree

1 file changed

+16
-5
lines changed

1 file changed

+16
-5
lines changed

src/library/generator.stockham.cpp

Lines changed: 16 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -2348,7 +2348,7 @@ namespace StockhamGenerator
23482348

23492349
}
23502350

2351-
void GenerateKernel(std::string &str)
2351+
void GenerateKernel(std::string &str, cl_device_id Dev_ID)
23522352
{
23532353
std::string twType = RegBaseType<PR>(2);
23542354
std::string rType = RegBaseType<PR>(1);
@@ -2501,8 +2501,19 @@ namespace StockhamGenerator
25012501
else str += "fft_back";
25022502
str += "(";
25032503

2504-
// TODO : address this kludge
2505-
str += "__constant cb_t *cb __attribute__((max_constant_size(32))), ";
2504+
// TODO : address this kludge
2505+
size_t SizeParam_ret = 0;
2506+
clGetDeviceInfo(Dev_ID, CL_DEVICE_VENDOR, 0, NULL, &SizeParam_ret);
2507+
char* nameVendor = new char[SizeParam_ret];
2508+
clGetDeviceInfo(Dev_ID, CL_DEVICE_VENDOR, SizeParam_ret, nameVendor, NULL);
2509+
2510+
//nv compiler doesn't support __constant kernel argument
2511+
if (strncmp(nameVendor, "NVIDIA",6)!=0)
2512+
str += "__constant cb_t *cb __attribute__((max_constant_size(32))), ";
2513+
else
2514+
str += "__global cb_t *cb, ";
2515+
2516+
delete [] nameVendor;
25062517

25072518
// Function attributes
25082519
if(params.fft_placeness == CLFFT_INPLACE)
@@ -3230,12 +3241,12 @@ clfftStatus FFTPlan::GenerateKernelPvt<Stockham>(FFTRepo& fftRepo ) const
32303241
case P_SINGLE:
32313242
{
32323243
Kernel<P_SINGLE> kernel(params);
3233-
kernel.GenerateKernel(programCode);
3244+
kernel.GenerateKernel(programCode, devices[0]);
32343245
} break;
32353246
case P_DOUBLE:
32363247
{
32373248
Kernel<P_DOUBLE> kernel(params);
3238-
kernel.GenerateKernel(programCode);
3249+
kernel.GenerateKernel(programCode, devices[0]);
32393250
} break;
32403251
}
32413252

0 commit comments

Comments
 (0)