Skip to content

Commit ed38aa4

Browse files
fix bug when working multiple devices.
The lib requires to work with multiple context for the moment in that case. Therefore we need to get one kernel object per context
1 parent 3ed0dc3 commit ed38aa4

File tree

7 files changed

+93
-51
lines changed

7 files changed

+93
-51
lines changed

src/library/generator.copy.cpp

Lines changed: 25 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -447,28 +447,33 @@ using namespace CopyGenerator;
447447
template<>
448448
clfftStatus FFTPlan::GenerateKernelPvt<Copy>(FFTRepo& fftRepo, const cl_command_queue commQueueFFT ) const
449449
{
450-
FFTKernelGenKeyParams params;
451-
OPENCL_V( this->GetKernelGenKeyPvt<Copy> (params), _T("GetKernelGenKey() failed!") );
450+
FFTKernelGenKeyParams params;
451+
OPENCL_V( this->GetKernelGenKeyPvt<Copy> (params), _T("GetKernelGenKey() failed!") );
452+
453+
std::string programCode;
454+
Precision pr = (params.fft_precision == CLFFT_SINGLE) ? P_SINGLE : P_DOUBLE;
455+
switch(pr)
456+
{
457+
case P_SINGLE:
458+
{
459+
CopyKernel<P_SINGLE> kernel(params);
460+
kernel.GenerateKernel(programCode);
461+
} break;
462+
case P_DOUBLE:
463+
{
464+
CopyKernel<P_DOUBLE> kernel(params);
465+
kernel.GenerateKernel(programCode);
466+
} break;
467+
}
452468

453-
std::string programCode;
454-
Precision pr = (params.fft_precision == CLFFT_SINGLE) ? P_SINGLE : P_DOUBLE;
455-
switch(pr)
456-
{
457-
case P_SINGLE:
458-
{
459-
CopyKernel<P_SINGLE> kernel(params);
460-
kernel.GenerateKernel(programCode);
461-
} break;
462-
case P_DOUBLE:
463-
{
464-
CopyKernel<P_DOUBLE> kernel(params);
465-
kernel.GenerateKernel(programCode);
466-
} break;
467-
}
469+
cl_int status = CL_SUCCESS;
470+
cl_context QueueContext = NULL;
471+
status = clGetCommandQueueInfo(commQueueFFT, CL_QUEUE_CONTEXT, sizeof(cl_context), &QueueContext, NULL);
468472

473+
OPENCL_V( status, _T( "clGetCommandQueueInfo failed" ) );
469474

470-
OPENCL_V( fftRepo.setProgramCode( Copy, params, programCode ), _T( "fftRepo.setclString() failed!" ) );
471-
OPENCL_V( fftRepo.setProgramEntryPoints( Copy, params, "copy_c2h", "copy_h2c" ), _T( "fftRepo.setProgramEntryPoint() failed!" ) );
475+
OPENCL_V( fftRepo.setProgramCode( Copy, params, programCode, QueueContext ), _T( "fftRepo.setclString() failed!" ) );
476+
OPENCL_V( fftRepo.setProgramEntryPoints( Copy, params, "copy_c2h", "copy_h2c", QueueContext ), _T( "fftRepo.setProgramEntryPoint() failed!" ) );
472477

473-
return CLFFT_SUCCESS;
478+
return CLFFT_SUCCESS;
474479
}

src/library/generator.stockham.cpp

Lines changed: 7 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -3238,6 +3238,11 @@ clfftStatus FFTPlan::GenerateKernelPvt<Stockham>(FFTRepo& fftRepo, const cl_comm
32383238
cl_device_id Device = NULL;
32393239
status = clGetCommandQueueInfo(commQueueFFT, CL_QUEUE_DEVICE, sizeof(cl_device_id), &Device, NULL);
32403240

3241+
OPENCL_V( status, _T( "clGetCommandQueueInfo failed" ) );
3242+
3243+
cl_context QueueContext = NULL;
3244+
status = clGetCommandQueueInfo(commQueueFFT, CL_QUEUE_CONTEXT, sizeof(cl_context), &QueueContext, NULL);
3245+
32413246
OPENCL_V( status, _T( "clGetCommandQueueInfo failed" ) );
32423247

32433248
std::string programCode;
@@ -3260,8 +3265,8 @@ clfftStatus FFTPlan::GenerateKernelPvt<Stockham>(FFTRepo& fftRepo, const cl_comm
32603265
ReadKernelFromFile(programCode);
32613266
#endif
32623267

3263-
OPENCL_V( fftRepo.setProgramCode( Stockham, params, programCode ), _T( "fftRepo.setclString() failed!" ) );
3264-
OPENCL_V( fftRepo.setProgramEntryPoints( Stockham, params, "fft_fwd", "fft_back" ), _T( "fftRepo.setProgramEntryPoint() failed!" ) );
3268+
OPENCL_V( fftRepo.setProgramCode( Stockham, params, programCode, QueueContext ), _T( "fftRepo.setclString() failed!" ) );
3269+
OPENCL_V( fftRepo.setProgramEntryPoints( Stockham, params, "fft_fwd", "fft_back", QueueContext ), _T( "fftRepo.setProgramEntryPoint() failed!" ) );
32653270

32663271
return CLFFT_SUCCESS;
32673272
}

src/library/generator.transpose.cpp

Lines changed: 8 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -830,8 +830,14 @@ clfftStatus FFTPlan::GenerateKernelPvt<Transpose> ( FFTRepo& fftRepo, const cl_c
830830
std::string programCode;
831831
OPENCL_V( GenerateTransposeKernel( params, programCode ), _T( "GenerateTransposeKernel() failed!" ) );
832832

833-
OPENCL_V( fftRepo.setProgramCode( Transpose, params, programCode ), _T( "fftRepo.setclString() failed!" ) );
834-
OPENCL_V( fftRepo.setProgramEntryPoints( Transpose, params, "fft_trans", "fft_trans" ), _T( "fftRepo.setProgramEntryPoint() failed!" ) );
833+
cl_int status = CL_SUCCESS;
834+
cl_context QueueContext = NULL;
835+
status = clGetCommandQueueInfo(commQueueFFT, CL_QUEUE_CONTEXT, sizeof(cl_context), &QueueContext, NULL);
836+
837+
OPENCL_V( status, _T( "clGetCommandQueueInfo failed" ) );
838+
839+
OPENCL_V( fftRepo.setProgramCode( Transpose, params, programCode, QueueContext ), _T( "fftRepo.setclString() failed!" ) );
840+
OPENCL_V( fftRepo.setProgramEntryPoints( Transpose, params, "fft_trans", "fft_trans",QueueContext ), _T( "fftRepo.setProgramEntryPoint() failed!" ) );
835841

836842
return CLFFT_SUCCESS;
837843
}

src/library/plan.cpp

Lines changed: 7 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -194,7 +194,7 @@ clfftStatus clfftCreateDefaultPlan( clfftPlanHandle* plHandle, cl_context contex
194194
}
195195

196196
// Read the kernels that this plan uses from file, and store into the plan
197-
clfftStatus WriteKernel( const clfftPlanHandle plHandle, const clfftGenerators gen, const FFTKernelGenKeyParams& fftParams )
197+
clfftStatus WriteKernel( const clfftPlanHandle plHandle, const clfftGenerators gen, const FFTKernelGenKeyParams& fftParams, const cl_context& context )
198198
{
199199
FFTRepo& fftRepo = FFTRepo::getInstance( );
200200

@@ -220,7 +220,7 @@ clfftStatus WriteKernel( const clfftPlanHandle plHandle, const clfftGenerators g
220220
}
221221

222222
std::string kernel;
223-
OPENCL_V( fftRepo.getProgramCode( gen, fftParams, kernel ), _T( "fftRepo.getProgramCode failed." ) );
223+
OPENCL_V( fftRepo.getProgramCode( gen, fftParams, kernel, context ), _T( "fftRepo.getProgramCode failed." ) );
224224

225225
kernelFile.get( ) << kernel << std::endl;
226226

@@ -250,16 +250,16 @@ clfftStatus CompileKernels( const cl_command_queue commQueueFFT, const clfftPlan
250250
OPENCL_V( fftPlan->GetKernelGenKey( fftParams ), _T("GetKernelGenKey() failed!") );
251251

252252
cl_program program;
253-
if( fftRepo.getclProgram( gen, fftParams, program ) == CLFFT_INVALID_PROGRAM )
253+
if( fftRepo.getclProgram( gen, fftParams, program, fftPlan->context ) == CLFFT_INVALID_PROGRAM )
254254
{
255255
// If the user wishes us to write the kernels out to disk, we do so
256256
if( fftRepo.setupData.debugFlags & CLFFT_DUMP_PROGRAMS )
257257
{
258-
OPENCL_V( WriteKernel( plHandle, gen, fftParams ), _T( "WriteKernel failed." ) );
258+
OPENCL_V( WriteKernel( plHandle, gen, fftParams, fftPlan->context ), _T( "WriteKernel failed." ) );
259259
}
260260

261261
std::string programCode;
262-
OPENCL_V( fftRepo.getProgramCode( gen, fftParams, programCode ), _T( "fftRepo.getProgramCode failed." ) );
262+
OPENCL_V( fftRepo.getProgramCode( gen, fftParams, programCode, fftPlan->context ), _T( "fftRepo.getProgramCode failed." ) );
263263

264264
const char* source = programCode.c_str();
265265
program = clCreateProgramWithSource( fftPlan->context, 1, &source, NULL, &status );
@@ -317,7 +317,7 @@ clfftStatus CompileKernels( const cl_command_queue commQueueFFT, const clfftPlan
317317
if( fftRepo.getclKernel( program, CLFFT_FORWARD, kernel ) == CLFFT_INVALID_KERNEL )
318318
{
319319
std::string entryPoint;
320-
OPENCL_V( fftRepo.getProgramEntryPoint( gen, fftParams, CLFFT_FORWARD, entryPoint ), _T( "fftRepo.getProgramEntryPoint failed." ) );
320+
OPENCL_V( fftRepo.getProgramEntryPoint( gen, fftParams, CLFFT_FORWARD, entryPoint, fftPlan->context ), _T( "fftRepo.getProgramEntryPoint failed." ) );
321321

322322
kernel = clCreateKernel( program, entryPoint.c_str( ), &status );
323323
OPENCL_V( status, _T( "clCreateKernel failed" ) );
@@ -331,7 +331,7 @@ clfftStatus CompileKernels( const cl_command_queue commQueueFFT, const clfftPlan
331331
if( fftRepo.getclKernel( program, CLFFT_BACKWARD, kernel ) == CLFFT_INVALID_KERNEL )
332332
{
333333
std::string entryPoint;
334-
OPENCL_V( fftRepo.getProgramEntryPoint( gen, fftParams, CLFFT_BACKWARD, entryPoint ), _T( "fftRepo.getProgramEntryPoint failed." ) );
334+
OPENCL_V( fftRepo.getProgramEntryPoint( gen, fftParams, CLFFT_BACKWARD, entryPoint, fftPlan->context ), _T( "fftRepo.getProgramEntryPoint failed." ) );
335335

336336
kernel = clCreateKernel( program, entryPoint.c_str( ), &status );
337337
OPENCL_V( status, _T( "clCreateKernel failed" ) );

src/library/repo.cpp

Lines changed: 33 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -34,6 +34,9 @@ size_t FFTRepo::planCount = 1;
3434
void* FFTRepo::timerHandle = NULL;
3535
GpuStatTimer* FFTRepo::pStatTimer = NULL;
3636

37+
38+
39+
3740
clfftStatus FFTRepo::releaseResources( )
3841
{
3942
scopedLock sLock( lockRepo, _T( "releaseResources" ) );
@@ -88,11 +91,13 @@ clfftStatus FFTRepo::releaseResources( )
8891
return CLFFT_SUCCESS;
8992
}
9093

91-
clfftStatus FFTRepo::setProgramCode( const clfftGenerators gen, const FFTKernelGenKeyParams& fftParam, const std::string& kernel )
94+
clfftStatus FFTRepo::setProgramCode( const clfftGenerators gen, const FFTKernelGenKeyParams& fftParam, const std::string& kernel, const cl_context& context )
9295
{
9396
scopedLock sLock( lockRepo, _T( "setProgramCode" ) );
9497

95-
std::pair< clfftGenerators, FFTKernelGenKeyParams > key = std::make_pair( gen, fftParam );
98+
std::pair<FFTKernelGenKeyParams, cl_context> Params = std::make_pair(fftParam, context);
99+
fftRepoKey key = std::make_pair( gen, Params );
100+
96101

97102
// Prefix copyright statement at the top of generated kernels
98103
std::stringstream ss;
@@ -121,26 +126,28 @@ clfftStatus FFTRepo::setProgramCode( const clfftGenerators gen, const FFTKernelG
121126
return CLFFT_SUCCESS;
122127
}
123128

124-
clfftStatus FFTRepo::getProgramCode( const clfftGenerators gen, const FFTKernelGenKeyParams& fftParam, std::string& kernel )
129+
clfftStatus FFTRepo::getProgramCode( const clfftGenerators gen, const FFTKernelGenKeyParams& fftParam, std::string& kernel, const cl_context& context )
125130
{
126131
scopedLock sLock( lockRepo, _T( "getProgramCode" ) );
127132

128-
std::pair< clfftGenerators, FFTKernelGenKeyParams > key = std::make_pair( gen, fftParam );
133+
std::pair<FFTKernelGenKeyParams, cl_context> Params = std::make_pair(fftParam, context);
134+
fftRepoKey key = std::make_pair( gen, Params );
129135

130136
fftRepo_iterator pos = mapFFTs.find( key);
131137
if( pos == mapFFTs.end( ) )
132138
return CLFFT_FILE_NOT_FOUND;
133139

134-
kernel = pos->second.ProgramString;
140+
kernel = pos->second.ProgramString;
135141
return CLFFT_SUCCESS;
136142
}
137143

138144
clfftStatus FFTRepo::setProgramEntryPoints( const clfftGenerators gen, const FFTKernelGenKeyParams& fftParam,
139-
const char * kernel_fwd, const char * kernel_back )
145+
const char * kernel_fwd, const char * kernel_back, const cl_context& context )
140146
{
141147
scopedLock sLock( lockRepo, _T( "setProgramEntryPoints" ) );
142148

143-
std::pair< clfftGenerators, FFTKernelGenKeyParams > key = std::make_pair( gen, fftParam );
149+
std::pair<FFTKernelGenKeyParams, cl_context> Params = std::make_pair(fftParam, context);
150+
fftRepoKey key = std::make_pair( gen, Params );
144151

145152
fftRepoValue& fft = mapFFTs[ key ];
146153
fft.EntryPoint_fwd = kernel_fwd;
@@ -150,11 +157,12 @@ clfftStatus FFTRepo::setProgramEntryPoints( const clfftGenerators gen, const FFT
150157
}
151158

152159
clfftStatus FFTRepo::getProgramEntryPoint( const clfftGenerators gen, const FFTKernelGenKeyParams& fftParam,
153-
clfftDirection dir, std::string& kernel )
160+
clfftDirection dir, std::string& kernel, const cl_context& context )
154161
{
155162
scopedLock sLock( lockRepo, _T( "getProgramEntryPoint" ) );
156163

157-
std::pair< clfftGenerators, FFTKernelGenKeyParams > key = std::make_pair( gen, fftParam );
164+
std::pair<FFTKernelGenKeyParams, cl_context> Params = std::make_pair(fftParam, context);
165+
fftRepoKey key = std::make_pair( gen, Params );
158166

159167
fftRepo_iterator pos = mapFFTs.find( key );
160168
if( pos == mapFFTs.end( ) )
@@ -182,7 +190,14 @@ clfftStatus FFTRepo::setclProgram( const clfftGenerators gen, const FFTKernelGen
182190
{
183191
scopedLock sLock( lockRepo, _T( "setclProgram" ) );
184192

185-
std::pair< clfftGenerators, FFTKernelGenKeyParams > key = std::make_pair( gen, fftParam );
193+
cl_int status = CL_SUCCESS;
194+
cl_context ProgramContext = NULL;
195+
status = clGetProgramInfo(prog, CL_PROGRAM_CONTEXT, sizeof(cl_context), &ProgramContext, NULL);
196+
197+
OPENCL_V( status, _T( "clGetCommandQueueInfo failed" ) );
198+
199+
std::pair<FFTKernelGenKeyParams, cl_context> Params = std::make_pair(fftParam, ProgramContext);
200+
fftRepoKey key = std::make_pair( gen, Params );
186201

187202
fftRepo_iterator pos = mapFFTs.find( key );
188203
if( pos == mapFFTs.end( ) )
@@ -198,18 +213,24 @@ clfftStatus FFTRepo::setclProgram( const clfftGenerators gen, const FFTKernelGen
198213
return CLFFT_SUCCESS;
199214
}
200215

201-
clfftStatus FFTRepo::getclProgram( const clfftGenerators gen, const FFTKernelGenKeyParams& fftParam, cl_program& prog )
216+
clfftStatus FFTRepo::getclProgram( const clfftGenerators gen, const FFTKernelGenKeyParams& fftParam, cl_program& prog, const cl_context& PlanContext )
202217
{
203218
scopedLock sLock( lockRepo, _T( "getclProgram" ) );
204219

205-
std::pair< clfftGenerators, FFTKernelGenKeyParams > key = std::make_pair( gen, fftParam );
220+
std::pair<FFTKernelGenKeyParams, cl_context> Params = std::make_pair(fftParam, PlanContext);
221+
fftRepoKey key = std::make_pair( gen, Params );
206222

207223
fftRepo_iterator pos = mapFFTs.find( key );
208224
if( pos == mapFFTs.end( ) )
209225
return CLFFT_INVALID_PROGRAM;
210226
prog = pos->second.clProgram;
211227
if (NULL == prog)
212228
return CLFFT_INVALID_PROGRAM;
229+
230+
cl_context ProgContext;
231+
clGetProgramInfo(prog, CL_PROGRAM_CONTEXT, sizeof(cl_context), &ProgContext, NULL);
232+
if (PlanContext!=ProgContext)
233+
return CLFFT_INVALID_PROGRAM;
213234

214235
return CLFFT_SUCCESS;
215236
}

src/library/repo.h

Lines changed: 12 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -22,6 +22,7 @@
2222
#include "private.h"
2323
#include "plan.h"
2424
#include "lock.h"
25+
2526
#include "../statTimer/statisticalTimer.GPU.h"
2627

2728

@@ -48,10 +49,14 @@ class FFTRepo
4849

4950
// Map structure to map parameters that a generator uses to a specific set of kernels that the generator
5051
// has created
51-
typedef std::pair< clfftGenerators, FFTKernelGenKeyParams > fftRepoKey;
52+
//typedef std::pair< clfftGenerators, FFTKernelGenKeyParams > fftRepoKey;
53+
54+
typedef std::pair< clfftGenerators, std::pair<FFTKernelGenKeyParams, cl_context> > fftRepoKey;
5255
typedef std::map< fftRepoKey, fftRepoValue > fftRepoType;
5356
typedef fftRepoType::iterator fftRepo_iterator;
5457

58+
59+
5560
fftRepoType mapFFTs;
5661

5762
struct fftKernels {
@@ -134,25 +139,25 @@ class FFTRepo
134139

135140
clfftStatus releaseResources( );
136141

137-
clfftStatus setProgramCode( const clfftGenerators gen, const FFTKernelGenKeyParams&, const std::string& kernel );
138-
clfftStatus getProgramCode( const clfftGenerators gen, const FFTKernelGenKeyParams&, std::string& kernel );
142+
clfftStatus setProgramCode( const clfftGenerators gen, const FFTKernelGenKeyParams&, const std::string& kernel, const cl_context& context);
143+
clfftStatus getProgramCode( const clfftGenerators gen, const FFTKernelGenKeyParams&, std::string& kernel, const cl_context& context );
139144

140145
clfftStatus setProgramEntryPoints( const clfftGenerators gen, const FFTKernelGenKeyParams& fftParam,
141-
const char * kernel_fwd, const char * kernel_back );
142-
clfftStatus getProgramEntryPoint( const clfftGenerators gen, const FFTKernelGenKeyParams& fftParam, clfftDirection dir, std::string& kernel );
146+
const char * kernel_fwd, const char * kernel_back, const cl_context& context );
147+
clfftStatus getProgramEntryPoint( const clfftGenerators gen, const FFTKernelGenKeyParams& fftParam, clfftDirection dir, std::string& kernel , const cl_context& context);
143148

144149
clfftStatus setclProgram( const clfftGenerators gen, const FFTKernelGenKeyParams& fftParam, const cl_program& kernel );
145-
clfftStatus getclProgram( const clfftGenerators gen, const FFTKernelGenKeyParams& fftParam, cl_program& kernel );
150+
clfftStatus getclProgram( const clfftGenerators gen, const FFTKernelGenKeyParams& fftParam, cl_program& kernel, const cl_context& PlanContext );
146151

147152
clfftStatus setclKernel ( cl_program prog, clfftDirection dir, const cl_kernel& kernel );
148153
clfftStatus getclKernel ( cl_program prog, clfftDirection dir, cl_kernel& kernel );
149154

150155
clfftStatus createPlan( clfftPlanHandle* plHandle, FFTPlan*& fftPlan );
151156
clfftStatus getPlan( clfftPlanHandle plHandle, FFTPlan*& fftPlan, lockRAII*& planLock );
152157
clfftStatus deletePlan( clfftPlanHandle* plHandle );
158+
153159

154160
};
155161

156-
157162
#endif
158163

src/library/transform.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1152,7 +1152,7 @@ clfftStatus clfftEnqueueTransform(
11521152

11531153
cl_program prog;
11541154
cl_kernel kern;
1155-
OPENCL_V( fftRepo.getclProgram( fftPlan->gen, fftParams, prog ), _T( "fftRepo.getclProgram failed" ) );
1155+
OPENCL_V( fftRepo.getclProgram( fftPlan->gen, fftParams, prog, fftPlan->context ), _T( "fftRepo.getclProgram failed" ) );
11561156
OPENCL_V( fftRepo.getclKernel( prog, dir, kern ), _T( "fftRepo.getclKernels failed" ) );
11571157

11581158

0 commit comments

Comments
 (0)