Skip to content

Commit c12b25c

Browse files
author
Kent Knox
committed
Merge pull request #29 from BenjaminCoquelle/develop
fix bug when working multiple devices.
2 parents 0b41606 + ed38aa4 commit c12b25c

File tree

7 files changed

+93
-51
lines changed

7 files changed

+93
-51
lines changed

src/library/generator.copy.cpp

Lines changed: 25 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -447,28 +447,33 @@ using namespace CopyGenerator;
447447
template<>
448448
clfftStatus FFTPlan::GenerateKernelPvt<Copy>(FFTRepo& fftRepo, const cl_command_queue commQueueFFT ) const
449449
{
450-
FFTKernelGenKeyParams params;
451-
OPENCL_V( this->GetKernelGenKeyPvt<Copy> (params), _T("GetKernelGenKey() failed!") );
450+
FFTKernelGenKeyParams params;
451+
OPENCL_V( this->GetKernelGenKeyPvt<Copy> (params), _T("GetKernelGenKey() failed!") );
452+
453+
std::string programCode;
454+
Precision pr = (params.fft_precision == CLFFT_SINGLE) ? P_SINGLE : P_DOUBLE;
455+
switch(pr)
456+
{
457+
case P_SINGLE:
458+
{
459+
CopyKernel<P_SINGLE> kernel(params);
460+
kernel.GenerateKernel(programCode);
461+
} break;
462+
case P_DOUBLE:
463+
{
464+
CopyKernel<P_DOUBLE> kernel(params);
465+
kernel.GenerateKernel(programCode);
466+
} break;
467+
}
452468

453-
std::string programCode;
454-
Precision pr = (params.fft_precision == CLFFT_SINGLE) ? P_SINGLE : P_DOUBLE;
455-
switch(pr)
456-
{
457-
case P_SINGLE:
458-
{
459-
CopyKernel<P_SINGLE> kernel(params);
460-
kernel.GenerateKernel(programCode);
461-
} break;
462-
case P_DOUBLE:
463-
{
464-
CopyKernel<P_DOUBLE> kernel(params);
465-
kernel.GenerateKernel(programCode);
466-
} break;
467-
}
469+
cl_int status = CL_SUCCESS;
470+
cl_context QueueContext = NULL;
471+
status = clGetCommandQueueInfo(commQueueFFT, CL_QUEUE_CONTEXT, sizeof(cl_context), &QueueContext, NULL);
468472

473+
OPENCL_V( status, _T( "clGetCommandQueueInfo failed" ) );
469474

470-
OPENCL_V( fftRepo.setProgramCode( Copy, params, programCode ), _T( "fftRepo.setclString() failed!" ) );
471-
OPENCL_V( fftRepo.setProgramEntryPoints( Copy, params, "copy_c2h", "copy_h2c" ), _T( "fftRepo.setProgramEntryPoint() failed!" ) );
475+
OPENCL_V( fftRepo.setProgramCode( Copy, params, programCode, QueueContext ), _T( "fftRepo.setclString() failed!" ) );
476+
OPENCL_V( fftRepo.setProgramEntryPoints( Copy, params, "copy_c2h", "copy_h2c", QueueContext ), _T( "fftRepo.setProgramEntryPoint() failed!" ) );
472477

473-
return CLFFT_SUCCESS;
478+
return CLFFT_SUCCESS;
474479
}

src/library/generator.stockham.cpp

Lines changed: 7 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -3238,6 +3238,11 @@ clfftStatus FFTPlan::GenerateKernelPvt<Stockham>(FFTRepo& fftRepo, const cl_comm
32383238
cl_device_id Device = NULL;
32393239
status = clGetCommandQueueInfo(commQueueFFT, CL_QUEUE_DEVICE, sizeof(cl_device_id), &Device, NULL);
32403240

3241+
OPENCL_V( status, _T( "clGetCommandQueueInfo failed" ) );
3242+
3243+
cl_context QueueContext = NULL;
3244+
status = clGetCommandQueueInfo(commQueueFFT, CL_QUEUE_CONTEXT, sizeof(cl_context), &QueueContext, NULL);
3245+
32413246
OPENCL_V( status, _T( "clGetCommandQueueInfo failed" ) );
32423247

32433248
std::string programCode;
@@ -3260,8 +3265,8 @@ clfftStatus FFTPlan::GenerateKernelPvt<Stockham>(FFTRepo& fftRepo, const cl_comm
32603265
ReadKernelFromFile(programCode);
32613266
#endif
32623267

3263-
OPENCL_V( fftRepo.setProgramCode( Stockham, params, programCode ), _T( "fftRepo.setclString() failed!" ) );
3264-
OPENCL_V( fftRepo.setProgramEntryPoints( Stockham, params, "fft_fwd", "fft_back" ), _T( "fftRepo.setProgramEntryPoint() failed!" ) );
3268+
OPENCL_V( fftRepo.setProgramCode( Stockham, params, programCode, QueueContext ), _T( "fftRepo.setclString() failed!" ) );
3269+
OPENCL_V( fftRepo.setProgramEntryPoints( Stockham, params, "fft_fwd", "fft_back", QueueContext ), _T( "fftRepo.setProgramEntryPoint() failed!" ) );
32653270

32663271
return CLFFT_SUCCESS;
32673272
}

src/library/generator.transpose.cpp

Lines changed: 8 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -830,8 +830,14 @@ clfftStatus FFTPlan::GenerateKernelPvt<Transpose> ( FFTRepo& fftRepo, const cl_c
830830
std::string programCode;
831831
OPENCL_V( GenerateTransposeKernel( params, programCode ), _T( "GenerateTransposeKernel() failed!" ) );
832832

833-
OPENCL_V( fftRepo.setProgramCode( Transpose, params, programCode ), _T( "fftRepo.setclString() failed!" ) );
834-
OPENCL_V( fftRepo.setProgramEntryPoints( Transpose, params, "fft_trans", "fft_trans" ), _T( "fftRepo.setProgramEntryPoint() failed!" ) );
833+
cl_int status = CL_SUCCESS;
834+
cl_context QueueContext = NULL;
835+
status = clGetCommandQueueInfo(commQueueFFT, CL_QUEUE_CONTEXT, sizeof(cl_context), &QueueContext, NULL);
836+
837+
OPENCL_V( status, _T( "clGetCommandQueueInfo failed" ) );
838+
839+
OPENCL_V( fftRepo.setProgramCode( Transpose, params, programCode, QueueContext ), _T( "fftRepo.setclString() failed!" ) );
840+
OPENCL_V( fftRepo.setProgramEntryPoints( Transpose, params, "fft_trans", "fft_trans",QueueContext ), _T( "fftRepo.setProgramEntryPoint() failed!" ) );
835841

836842
return CLFFT_SUCCESS;
837843
}

src/library/plan.cpp

Lines changed: 7 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -194,7 +194,7 @@ clfftStatus clfftCreateDefaultPlan( clfftPlanHandle* plHandle, cl_context contex
194194
}
195195

196196
// Read the kernels that this plan uses from file, and store into the plan
197-
clfftStatus WriteKernel( const clfftPlanHandle plHandle, const clfftGenerators gen, const FFTKernelGenKeyParams& fftParams )
197+
clfftStatus WriteKernel( const clfftPlanHandle plHandle, const clfftGenerators gen, const FFTKernelGenKeyParams& fftParams, const cl_context& context )
198198
{
199199
FFTRepo& fftRepo = FFTRepo::getInstance( );
200200

@@ -220,7 +220,7 @@ clfftStatus WriteKernel( const clfftPlanHandle plHandle, const clfftGenerators g
220220
}
221221

222222
std::string kernel;
223-
OPENCL_V( fftRepo.getProgramCode( gen, fftParams, kernel ), _T( "fftRepo.getProgramCode failed." ) );
223+
OPENCL_V( fftRepo.getProgramCode( gen, fftParams, kernel, context ), _T( "fftRepo.getProgramCode failed." ) );
224224

225225
kernelFile.get( ) << kernel << std::endl;
226226

@@ -250,16 +250,16 @@ clfftStatus CompileKernels( const cl_command_queue commQueueFFT, const clfftPlan
250250
OPENCL_V( fftPlan->GetKernelGenKey( fftParams ), _T("GetKernelGenKey() failed!") );
251251

252252
cl_program program;
253-
if( fftRepo.getclProgram( gen, fftParams, program ) == CLFFT_INVALID_PROGRAM )
253+
if( fftRepo.getclProgram( gen, fftParams, program, fftPlan->context ) == CLFFT_INVALID_PROGRAM )
254254
{
255255
// If the user wishes us to write the kernels out to disk, we do so
256256
if( fftRepo.setupData.debugFlags & CLFFT_DUMP_PROGRAMS )
257257
{
258-
OPENCL_V( WriteKernel( plHandle, gen, fftParams ), _T( "WriteKernel failed." ) );
258+
OPENCL_V( WriteKernel( plHandle, gen, fftParams, fftPlan->context ), _T( "WriteKernel failed." ) );
259259
}
260260

261261
std::string programCode;
262-
OPENCL_V( fftRepo.getProgramCode( gen, fftParams, programCode ), _T( "fftRepo.getProgramCode failed." ) );
262+
OPENCL_V( fftRepo.getProgramCode( gen, fftParams, programCode, fftPlan->context ), _T( "fftRepo.getProgramCode failed." ) );
263263

264264
const char* source = programCode.c_str();
265265
program = clCreateProgramWithSource( fftPlan->context, 1, &source, NULL, &status );
@@ -317,7 +317,7 @@ clfftStatus CompileKernels( const cl_command_queue commQueueFFT, const clfftPlan
317317
if( fftRepo.getclKernel( program, CLFFT_FORWARD, kernel ) == CLFFT_INVALID_KERNEL )
318318
{
319319
std::string entryPoint;
320-
OPENCL_V( fftRepo.getProgramEntryPoint( gen, fftParams, CLFFT_FORWARD, entryPoint ), _T( "fftRepo.getProgramEntryPoint failed." ) );
320+
OPENCL_V( fftRepo.getProgramEntryPoint( gen, fftParams, CLFFT_FORWARD, entryPoint, fftPlan->context ), _T( "fftRepo.getProgramEntryPoint failed." ) );
321321

322322
kernel = clCreateKernel( program, entryPoint.c_str( ), &status );
323323
OPENCL_V( status, _T( "clCreateKernel failed" ) );
@@ -331,7 +331,7 @@ clfftStatus CompileKernels( const cl_command_queue commQueueFFT, const clfftPlan
331331
if( fftRepo.getclKernel( program, CLFFT_BACKWARD, kernel ) == CLFFT_INVALID_KERNEL )
332332
{
333333
std::string entryPoint;
334-
OPENCL_V( fftRepo.getProgramEntryPoint( gen, fftParams, CLFFT_BACKWARD, entryPoint ), _T( "fftRepo.getProgramEntryPoint failed." ) );
334+
OPENCL_V( fftRepo.getProgramEntryPoint( gen, fftParams, CLFFT_BACKWARD, entryPoint, fftPlan->context ), _T( "fftRepo.getProgramEntryPoint failed." ) );
335335

336336
kernel = clCreateKernel( program, entryPoint.c_str( ), &status );
337337
OPENCL_V( status, _T( "clCreateKernel failed" ) );

src/library/repo.cpp

Lines changed: 33 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -34,6 +34,9 @@ size_t FFTRepo::planCount = 1;
3434
void* FFTRepo::timerHandle = NULL;
3535
GpuStatTimer* FFTRepo::pStatTimer = NULL;
3636

37+
38+
39+
3740
clfftStatus FFTRepo::releaseResources( )
3841
{
3942
scopedLock sLock( lockRepo, _T( "releaseResources" ) );
@@ -88,11 +91,13 @@ clfftStatus FFTRepo::releaseResources( )
8891
return CLFFT_SUCCESS;
8992
}
9093

91-
clfftStatus FFTRepo::setProgramCode( const clfftGenerators gen, const FFTKernelGenKeyParams& fftParam, const std::string& kernel )
94+
clfftStatus FFTRepo::setProgramCode( const clfftGenerators gen, const FFTKernelGenKeyParams& fftParam, const std::string& kernel, const cl_context& context )
9295
{
9396
scopedLock sLock( lockRepo, _T( "setProgramCode" ) );
9497

95-
std::pair< clfftGenerators, FFTKernelGenKeyParams > key = std::make_pair( gen, fftParam );
98+
std::pair<FFTKernelGenKeyParams, cl_context> Params = std::make_pair(fftParam, context);
99+
fftRepoKey key = std::make_pair( gen, Params );
100+
96101

97102
// Prefix copyright statement at the top of generated kernels
98103
std::stringstream ss;
@@ -121,26 +126,28 @@ clfftStatus FFTRepo::setProgramCode( const clfftGenerators gen, const FFTKernelG
121126
return CLFFT_SUCCESS;
122127
}
123128

124-
clfftStatus FFTRepo::getProgramCode( const clfftGenerators gen, const FFTKernelGenKeyParams& fftParam, std::string& kernel )
129+
clfftStatus FFTRepo::getProgramCode( const clfftGenerators gen, const FFTKernelGenKeyParams& fftParam, std::string& kernel, const cl_context& context )
125130
{
126131
scopedLock sLock( lockRepo, _T( "getProgramCode" ) );
127132

128-
std::pair< clfftGenerators, FFTKernelGenKeyParams > key = std::make_pair( gen, fftParam );
133+
std::pair<FFTKernelGenKeyParams, cl_context> Params = std::make_pair(fftParam, context);
134+
fftRepoKey key = std::make_pair( gen, Params );
129135

130136
fftRepo_iterator pos = mapFFTs.find( key);
131137
if( pos == mapFFTs.end( ) )
132138
return CLFFT_FILE_NOT_FOUND;
133139

134-
kernel = pos->second.ProgramString;
140+
kernel = pos->second.ProgramString;
135141
return CLFFT_SUCCESS;
136142
}
137143

138144
clfftStatus FFTRepo::setProgramEntryPoints( const clfftGenerators gen, const FFTKernelGenKeyParams& fftParam,
139-
const char * kernel_fwd, const char * kernel_back )
145+
const char * kernel_fwd, const char * kernel_back, const cl_context& context )
140146
{
141147
scopedLock sLock( lockRepo, _T( "setProgramEntryPoints" ) );
142148

143-
std::pair< clfftGenerators, FFTKernelGenKeyParams > key = std::make_pair( gen, fftParam );
149+
std::pair<FFTKernelGenKeyParams, cl_context> Params = std::make_pair(fftParam, context);
150+
fftRepoKey key = std::make_pair( gen, Params );
144151

145152
fftRepoValue& fft = mapFFTs[ key ];
146153
fft.EntryPoint_fwd = kernel_fwd;
@@ -150,11 +157,12 @@ clfftStatus FFTRepo::setProgramEntryPoints( const clfftGenerators gen, const FFT
150157
}
151158

152159
clfftStatus FFTRepo::getProgramEntryPoint( const clfftGenerators gen, const FFTKernelGenKeyParams& fftParam,
153-
clfftDirection dir, std::string& kernel )
160+
clfftDirection dir, std::string& kernel, const cl_context& context )
154161
{
155162
scopedLock sLock( lockRepo, _T( "getProgramEntryPoint" ) );
156163

157-
std::pair< clfftGenerators, FFTKernelGenKeyParams > key = std::make_pair( gen, fftParam );
164+
std::pair<FFTKernelGenKeyParams, cl_context> Params = std::make_pair(fftParam, context);
165+
fftRepoKey key = std::make_pair( gen, Params );
158166

159167
fftRepo_iterator pos = mapFFTs.find( key );
160168
if( pos == mapFFTs.end( ) )
@@ -182,7 +190,14 @@ clfftStatus FFTRepo::setclProgram( const clfftGenerators gen, const FFTKernelGen
182190
{
183191
scopedLock sLock( lockRepo, _T( "setclProgram" ) );
184192

185-
std::pair< clfftGenerators, FFTKernelGenKeyParams > key = std::make_pair( gen, fftParam );
193+
cl_int status = CL_SUCCESS;
194+
cl_context ProgramContext = NULL;
195+
status = clGetProgramInfo(prog, CL_PROGRAM_CONTEXT, sizeof(cl_context), &ProgramContext, NULL);
196+
197+
OPENCL_V( status, _T( "clGetCommandQueueInfo failed" ) );
198+
199+
std::pair<FFTKernelGenKeyParams, cl_context> Params = std::make_pair(fftParam, ProgramContext);
200+
fftRepoKey key = std::make_pair( gen, Params );
186201

187202
fftRepo_iterator pos = mapFFTs.find( key );
188203
if( pos == mapFFTs.end( ) )
@@ -198,18 +213,24 @@ clfftStatus FFTRepo::setclProgram( const clfftGenerators gen, const FFTKernelGen
198213
return CLFFT_SUCCESS;
199214
}
200215

201-
clfftStatus FFTRepo::getclProgram( const clfftGenerators gen, const FFTKernelGenKeyParams& fftParam, cl_program& prog )
216+
clfftStatus FFTRepo::getclProgram( const clfftGenerators gen, const FFTKernelGenKeyParams& fftParam, cl_program& prog, const cl_context& PlanContext )
202217
{
203218
scopedLock sLock( lockRepo, _T( "getclProgram" ) );
204219

205-
std::pair< clfftGenerators, FFTKernelGenKeyParams > key = std::make_pair( gen, fftParam );
220+
std::pair<FFTKernelGenKeyParams, cl_context> Params = std::make_pair(fftParam, PlanContext);
221+
fftRepoKey key = std::make_pair( gen, Params );
206222

207223
fftRepo_iterator pos = mapFFTs.find( key );
208224
if( pos == mapFFTs.end( ) )
209225
return CLFFT_INVALID_PROGRAM;
210226
prog = pos->second.clProgram;
211227
if (NULL == prog)
212228
return CLFFT_INVALID_PROGRAM;
229+
230+
cl_context ProgContext;
231+
clGetProgramInfo(prog, CL_PROGRAM_CONTEXT, sizeof(cl_context), &ProgContext, NULL);
232+
if (PlanContext!=ProgContext)
233+
return CLFFT_INVALID_PROGRAM;
213234

214235
return CLFFT_SUCCESS;
215236
}

src/library/repo.h

Lines changed: 12 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -22,6 +22,7 @@
2222
#include "private.h"
2323
#include "plan.h"
2424
#include "lock.h"
25+
2526
#include "../statTimer/statisticalTimer.GPU.h"
2627

2728

@@ -48,10 +49,14 @@ class FFTRepo
4849

4950
// Map structure to map parameters that a generator uses to a specific set of kernels that the generator
5051
// has created
51-
typedef std::pair< clfftGenerators, FFTKernelGenKeyParams > fftRepoKey;
52+
//typedef std::pair< clfftGenerators, FFTKernelGenKeyParams > fftRepoKey;
53+
54+
typedef std::pair< clfftGenerators, std::pair<FFTKernelGenKeyParams, cl_context> > fftRepoKey;
5255
typedef std::map< fftRepoKey, fftRepoValue > fftRepoType;
5356
typedef fftRepoType::iterator fftRepo_iterator;
5457

58+
59+
5560
fftRepoType mapFFTs;
5661

5762
struct fftKernels {
@@ -134,25 +139,25 @@ class FFTRepo
134139

135140
clfftStatus releaseResources( );
136141

137-
clfftStatus setProgramCode( const clfftGenerators gen, const FFTKernelGenKeyParams&, const std::string& kernel );
138-
clfftStatus getProgramCode( const clfftGenerators gen, const FFTKernelGenKeyParams&, std::string& kernel );
142+
clfftStatus setProgramCode( const clfftGenerators gen, const FFTKernelGenKeyParams&, const std::string& kernel, const cl_context& context);
143+
clfftStatus getProgramCode( const clfftGenerators gen, const FFTKernelGenKeyParams&, std::string& kernel, const cl_context& context );
139144

140145
clfftStatus setProgramEntryPoints( const clfftGenerators gen, const FFTKernelGenKeyParams& fftParam,
141-
const char * kernel_fwd, const char * kernel_back );
142-
clfftStatus getProgramEntryPoint( const clfftGenerators gen, const FFTKernelGenKeyParams& fftParam, clfftDirection dir, std::string& kernel );
146+
const char * kernel_fwd, const char * kernel_back, const cl_context& context );
147+
clfftStatus getProgramEntryPoint( const clfftGenerators gen, const FFTKernelGenKeyParams& fftParam, clfftDirection dir, std::string& kernel , const cl_context& context);
143148

144149
clfftStatus setclProgram( const clfftGenerators gen, const FFTKernelGenKeyParams& fftParam, const cl_program& kernel );
145-
clfftStatus getclProgram( const clfftGenerators gen, const FFTKernelGenKeyParams& fftParam, cl_program& kernel );
150+
clfftStatus getclProgram( const clfftGenerators gen, const FFTKernelGenKeyParams& fftParam, cl_program& kernel, const cl_context& PlanContext );
146151

147152
clfftStatus setclKernel ( cl_program prog, clfftDirection dir, const cl_kernel& kernel );
148153
clfftStatus getclKernel ( cl_program prog, clfftDirection dir, cl_kernel& kernel );
149154

150155
clfftStatus createPlan( clfftPlanHandle* plHandle, FFTPlan*& fftPlan );
151156
clfftStatus getPlan( clfftPlanHandle plHandle, FFTPlan*& fftPlan, lockRAII*& planLock );
152157
clfftStatus deletePlan( clfftPlanHandle* plHandle );
158+
153159

154160
};
155161

156-
157162
#endif
158163

src/library/transform.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1162,7 +1162,7 @@ clfftStatus clfftEnqueueTransform(
11621162

11631163
cl_program prog;
11641164
cl_kernel kern;
1165-
OPENCL_V( fftRepo.getclProgram( fftPlan->gen, fftParams, prog ), _T( "fftRepo.getclProgram failed" ) );
1165+
OPENCL_V( fftRepo.getclProgram( fftPlan->gen, fftParams, prog, fftPlan->context ), _T( "fftRepo.getclProgram failed" ) );
11661166
OPENCL_V( fftRepo.getclKernel( prog, dir, kern ), _T( "fftRepo.getclKernels failed" ) );
11671167

11681168

0 commit comments

Comments
 (0)