Skip to content

Commit 4afd37a

Browse files
fwyzardsmuzaffar
authored andcommitted
Use hard-coded cache-size parameters on CUDA devices
1 parent 498a024 commit 4afd37a

File tree

1 file changed

+13
-11
lines changed

1 file changed

+13
-11
lines changed

Eigen/src/Core/products/GeneralBlockPanelKernel.h

Lines changed: 13 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -97,19 +97,21 @@ inline void manage_caching_sizes(Action action, std::ptrdiff_t* l1, std::ptrdiff
9797
#ifdef EIGEN_CUDA_ARCH
9898
if (action==GetAction)
9999
{
100-
// assume some sensible numbers
100+
#if EIGEN_CUDA_ARCH >= 700
101+
// Volta, Turing, or newer
102+
// - the L1 cache is configurable at runtime, with a minimum of 32 KB/SM
103+
// - the L2 cache depends on the actual card, with a minimum of 64 KB/SM
101104
*l1 = 32 * 1024;
102-
*l2 = 1024 * 1024;
105+
*l2 = 64 * 1024;
103106
*l3 = 0;
104-
// query the L2 cache size
105-
int currentDevice;
106-
int l2CacheSize;
107-
cudaGetDevice(&currentDevice);
108-
cudaDeviceGetAttribute(&l2CacheSize, cudaDevAttrL2CacheSize, currentDevice);
109-
if (l2CacheSize)
110-
{
111-
*l2 = l2CacheSize;
112-
}
107+
#else
108+
// Kepler, Maxwell, Pascal
109+
// - the L1 cache is configurable at runtime, with a minimum of 16 KB/SM
110+
// - the L2 cache depends on the actual card, with a minimum of 64 KB/SM
111+
*l1 = 16 * 1024;
112+
*l2 = 64 * 1024;
113+
*l3 = 0;
114+
#endif
113115
}
114116
else
115117
{

0 commit comments

Comments
 (0)