Bug fix for add<float32x4_t>.

hedaoyuan · hedaoyuan · commit 7fe03f7b3a72 · 2017-09-15T23:29:33.000+08:00
diff --git a/paddle/cuda/include/hl_tensor_ops.h b/paddle/cuda/include/hl_tensor_ops.h
@@ -461,7 +461,7 @@ class add<float32x4_t> {
 public:
   INLINE float32x4_t operator()(const float32x4_t a,
                                 const float32x4_t b) const {
-    return vmulq_f32(a, b);
+    return vaddq_f32(a, b);
   }
 };
 

Original file line number	Diff line number	Diff line change
`@@ -461,7 +461,7 @@ class add<float32x4_t> {`
`461`	`461`	`public:`
`462`	`462`	`INLINE float32x4_t operator()(const float32x4_t a,`
`463`	`463`	`const float32x4_t b) const {`
`464`		`- return vmulq_f32(a, b);`
	`464`	`+ return vaddq_f32(a, b);`
`465`	`465`	`}`
`466`	`466`	`};`
`467`	`467`