Skip to content

Commit 9e3dfd3

Browse files
committed
GEMM kernel parallel modified to fit threads number
1 parent d4107d9 commit 9e3dfd3

File tree

1 file changed

+2
-2
lines changed

1 file changed

+2
-2
lines changed

includes/Tensorium/Core/MatrixKernels/GemmKernel_bigger.hpp

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -20,7 +20,7 @@ template <typename T> class GemmKernelBigger {
2020
static constexpr int SimdWidth = Simd::width;
2121
static constexpr int TileRows = SimdWidth * 4;
2222
static constexpr int TileCols = 6;
23-
static constexpr int NThreads = 16;
23+
static constexpr int NThreads = 72;
2424

2525
static constexpr int BlockDepth = 256;
2626
static constexpr int BlockRows = 384;
@@ -770,7 +770,7 @@ template <typename T> class GemmKernelBigger {
770770
}
771771

772772
#ifndef NTHREADS
773-
# define NTHREADS 8
773+
# define NTHREADS 36
774774
#endif
775775

776776
#define MC (16 * (40 / NTHREADS) * NTHREADS)

0 commit comments

Comments
 (0)