Skip to content

Commit dd116f5

Browse files
author
Timmy
committed
do not split 1d if size<threshold
1 parent edf0746 commit dd116f5

File tree

1 file changed

+9
-8
lines changed

1 file changed

+9
-8
lines changed

src/library/plan.cpp

Lines changed: 9 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -66,17 +66,14 @@ static bool pow235(size_t num, size_t &pow2, size_t &pow3, size_t &pow5)
6666
return true;
6767
}
6868

69-
static bool split1D_for_inplace(size_t num, vector<vector<size_t> > &splitNums, clfftPrecision precision)
69+
static bool split1D_for_inplace(size_t num, vector<vector<size_t> > &splitNums, clfftPrecision precision, size_t threshold)
7070
{
7171
/* a helper function to split big 1D to friendly 2D sizes for inplace transpose kernels
7272
currently only radix 2, 3 and 5 are supported
7373
the algorithm looks for ways to split up the 1D into 2D such that one of the dimensions is multiples of the other dimension.
7474
And this mupliple is radix2, 3 or 5.
7575
each splited dimentsion should be further splited until that it is smaller than 4096
7676
*/
77-
size_t threshold = 4096;
78-
if (precision == CLFFT_DOUBLE)
79-
threshold = 2048;
8077
if (num <= threshold)
8178
return true;
8279
if (num % 2 != 0 && num % 3 != 0 && num % 5 != 0)
@@ -174,8 +171,8 @@ static bool split1D_for_inplace(size_t num, vector<vector<size_t> > &splitNums,
174171
splitVec.push_back(temp);
175172
splitNums.push_back(splitVec);
176173

177-
status = status && split1D_for_inplace(temp*divide_factor, splitNums, precision);
178-
status = status && split1D_for_inplace(temp, splitNums, precision);
174+
status = status && split1D_for_inplace(temp*divide_factor, splitNums, precision, threshold);
175+
status = status && split1D_for_inplace(temp, splitNums, precision, threshold);
179176
return status;
180177

181178
}
@@ -794,13 +791,17 @@ clfftStatus clfftBakePlan( clfftPlanHandle plHandle, cl_uint numQueues, cl_comma
794791
if (fftPlan->length[0] == 354294)
795792
clLengths[1] = 243;
796793
*/
794+
size_t threshold = 4096;
795+
if (fftPlan->precision == CLFFT_DOUBLE)
796+
threshold = 2048;
797797
if (clfftGetRequestLibNoMemAlloc() &&
798798
fftPlan->placeness == CLFFT_INPLACE &&
799-
(fftPlan->inputLayout == fftPlan->outputLayout) )
799+
(fftPlan->inputLayout == fftPlan->outputLayout)
800+
&& fftPlan->length[0] > threshold)
800801
{
801802
//for inplace fft with inplace transpose, the split logic is different
802803
vector<vector<size_t> > splitNums;
803-
bool implemented = split1D_for_inplace(fftPlan->length[0], splitNums, fftPlan->precision);
804+
bool implemented = split1D_for_inplace(fftPlan->length[0], splitNums, fftPlan->precision, threshold);
804805
if (implemented)
805806
clLengths[1] = splitNums[0][0];
806807
}

0 commit comments

Comments
 (0)