We read every piece of feedback, and take your input very seriously.
To see all available qualifiers, see our documentation.
There was an error while loading. Please reload this page.
1 parent d4107d9 commit 9e3dfd3Copy full SHA for 9e3dfd3
includes/Tensorium/Core/MatrixKernels/GemmKernel_bigger.hpp
@@ -20,7 +20,7 @@ template <typename T> class GemmKernelBigger {
20
static constexpr int SimdWidth = Simd::width;
21
static constexpr int TileRows = SimdWidth * 4;
22
static constexpr int TileCols = 6;
23
- static constexpr int NThreads = 16;
+ static constexpr int NThreads = 72;
24
25
static constexpr int BlockDepth = 256;
26
static constexpr int BlockRows = 384;
@@ -770,7 +770,7 @@ template <typename T> class GemmKernelBigger {
770
}
771
772
#ifndef NTHREADS
773
-# define NTHREADS 8
+# define NTHREADS 36
774
#endif
775
776
#define MC (16 * (40 / NTHREADS) * NTHREADS)
0 commit comments