Skip to content

Commit 3bafdab

Browse files
committed
modify helper kernel type instantiations
1 parent c869596 commit 3bafdab

File tree

2 files changed

+7
-6
lines changed

2 files changed

+7
-6
lines changed

src/numerics/miscCuda.cu

Lines changed: 5 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -69,8 +69,8 @@ __global__ void elementWiseSqrt(float* __restrict__ input,
6969
}
7070

7171

72-
template<typename T>
73-
__global__ void copyKernel(const T* in, T* out, const size_t size){
72+
template<typename T, typename S>
73+
__global__ void copyKernel(const T* __restrict__ in, S* __restrict__ out, const size_t size){
7474

7575
for(size_t idx = blockIdx.x * blockDim.x + threadIdx.x; idx < size; idx += blockDim.x * gridDim.x) {
7676
out[idx] = in[idx];
@@ -440,10 +440,11 @@ template __global__ void elementWiseMult(uint16_t*, const uint16_t*, const size_
440440

441441
template __global__ void elementWiseDiv(const float*, const float*, float*, const size_t);
442442
template __global__ void elementWiseDiv(const uint16_t*, const float*, float*, const size_t);
443-
template __global__ void elementWiseDiv(const uint16_t*, const uint16_t*, uint16_t*, const size_t);
443+
template __global__ void elementWiseDiv(const uint8_t*, const float*, float*, const size_t);
444444

445445
template __global__ void copyKernel(const float*, float*, const size_t);
446-
template __global__ void copyKernel(const uint16_t*, uint16_t*, const size_t);
446+
template __global__ void copyKernel(const uint16_t*, float*, const size_t);
447+
template __global__ void copyKernel(const uint8_t*, float*, const size_t);
447448

448449
template __global__ void fillWithValue(float*, float, const size_t);
449450
template __global__ void fillWithValue(uint16_t*, uint16_t, const size_t);

src/numerics/miscCuda.hpp

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -91,8 +91,8 @@ __global__ void addSquare(float* in1, const float* in2, size_t size);
9191
__global__ void elementWiseSqrt(float* input, size_t size);
9292

9393

94-
template<typename T>
95-
__global__ void copyKernel(const T* in, T* out, const size_t size);
94+
template<typename T, typename S>
95+
__global__ void copyKernel(const T* in, S* out, const size_t size);
9696

9797

9898
template<typename T>

0 commit comments

Comments
 (0)