Skip to content

Commit 7fe03f7

Browse files
committed
Bug fix for add<float32x4_t>.
1 parent 9e7da67 commit 7fe03f7

File tree

1 file changed

+1
-1
lines changed

1 file changed

+1
-1
lines changed

paddle/cuda/include/hl_tensor_ops.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -461,7 +461,7 @@ class add<float32x4_t> {
461461
public:
462462
INLINE float32x4_t operator()(const float32x4_t a,
463463
const float32x4_t b) const {
464-
return vmulq_f32(a, b);
464+
return vaddq_f32(a, b);
465465
}
466466
};
467467

0 commit comments

Comments
 (0)