@@ -69,8 +69,8 @@ __global__ void elementWiseSqrt(float* __restrict__ input,
6969}
7070
7171
72- template <typename T>
73- __global__ void copyKernel (const T* in, T* out, const size_t size){
72+ template <typename T, typename S >
73+ __global__ void copyKernel (const T* __restrict__ in, S* __restrict__ out, const size_t size){
7474
7575 for (size_t idx = blockIdx .x * blockDim .x + threadIdx .x ; idx < size; idx += blockDim .x * gridDim .x ) {
7676 out[idx] = in[idx];
@@ -440,10 +440,11 @@ template __global__ void elementWiseMult(uint16_t*, const uint16_t*, const size_
440440
441441template __global__ void elementWiseDiv (const float *, const float *, float *, const size_t );
442442template __global__ void elementWiseDiv (const uint16_t *, const float *, float *, const size_t );
443- template __global__ void elementWiseDiv (const uint16_t *, const uint16_t *, uint16_t *, const size_t );
443+ template __global__ void elementWiseDiv (const uint8_t *, const float *, float *, const size_t );
444444
445445template __global__ void copyKernel (const float *, float *, const size_t );
446- template __global__ void copyKernel (const uint16_t *, uint16_t *, const size_t );
446+ template __global__ void copyKernel (const uint16_t *, float *, const size_t );
447+ template __global__ void copyKernel (const uint8_t *, float *, const size_t );
447448
448449template __global__ void fillWithValue (float *, float , const size_t );
449450template __global__ void fillWithValue (uint16_t *, uint16_t , const size_t );
0 commit comments