Skip to content

Commit ba2207e

Browse files
committed
add functions addSquare and elementWiseSqrt
1 parent c1a51d0 commit ba2207e

File tree

2 files changed

+32
-0
lines changed

2 files changed

+32
-0
lines changed

src/numerics/miscCuda.cu

Lines changed: 26 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -43,6 +43,32 @@ __global__ void elementWiseDiv(const T* numerator,
4343
}
4444

4545

46+
/**
47+
* Add the square of one vector to another. For 0 <= idx < size:
48+
* in1[idx] += in2[idx] * in2[idx];
49+
*/
50+
__global__ void addSquare(float* in1,
51+
const float* in2,
52+
const size_t size) {
53+
54+
for(size_t idx = blockIdx.x * blockDim.x + threadIdx.x; idx < size; idx += blockDim.x * gridDim.x) {
55+
in1[idx] += in2[idx] * in2[idx];
56+
}
57+
}
58+
59+
60+
/**
61+
* Take the square root of each element in the input vector
62+
*/
63+
__global__ void elementWiseSqrt(float* __restrict__ input,
64+
const size_t size) {
65+
66+
for(size_t idx = blockIdx.x * blockDim.x + threadIdx.x; idx < size; idx += blockDim.x * gridDim.x) {
67+
input[idx] = sqrtf(input[idx]);
68+
}
69+
}
70+
71+
4672
template<typename T>
4773
__global__ void copyKernel(const T* in, T* out, const size_t size){
4874

src/numerics/miscCuda.hpp

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -85,6 +85,12 @@ __global__ void elementWiseMult(T* in1, const T* in2, const size_t size);
8585
template<typename T, typename S>
8686
__global__ void elementWiseDiv(const T* numerator, const S* denominator, S* out, const size_t size);
8787

88+
89+
__global__ void addSquare(float* in1, const float* in2, size_t size);
90+
91+
__global__ void elementWiseSqrt(float* input, size_t size);
92+
93+
8894
template<typename T>
8995
__global__ void copyKernel(const T* in, T* out, const size_t size);
9096

0 commit comments

Comments
 (0)