Skip to content

Commit ad99d54

Browse files
authored
[GCU] Add gcu kernels (#845)
1 parent c060e99 commit ad99d54

File tree

214 files changed

+36852
-149
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

214 files changed

+36852
-149
lines changed

backends/gcu/CMakeLists.txt

Lines changed: 8 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -20,7 +20,6 @@ set(CUSTOM_GCU_NAME "paddle-custom-gcu")
2020
set(CMAKE_MODULE_PATH ${CMAKE_MODULE_PATH} "${CMAKE_SOURCE_DIR}/cmake")
2121
message(STATUS "CMAKE_DEBUG CMAKE_MODULE_PATH is: ${CMAKE_MODULE_PATH}")
2222

23-
set(TOPS_RELEASE_VERSION 2.4.1)
2423
if((NOT DEFINED PACKAGE_VERSION)
2524
OR ("${PACKAGE_VERSION}" STREQUAL "")
2625
OR ("${PACKAGE_VERSION}" STREQUAL "123.456"))
@@ -31,14 +30,8 @@ if((NOT DEFINED PACKAGE_VERSION)
3130
OUTPUT_VARIABLE GIT_HASH
3231
OUTPUT_STRIP_TRAILING_WHITESPACE)
3332
message(STATUS "git hash: ${GIT_HASH}")
34-
set(GIT_HASH ".${GIT_HASH}")
35-
else()
36-
set(GIT_HASH "")
37-
set(TOPS_RELEASE_VERSION ${PACKAGE_VERSION})
33+
set(PACKAGE_VERSION ${GIT_HASH})
3834
endif()
39-
set(PACKAGE_VERSION ${GIT_HASH}-${TOPS_RELEASE_VERSION})
40-
41-
message(STATUS "tops release version: ${TOPS_RELEASE_VERSION}")
4235
message(STATUS "package version: ${PACKAGE_VERSION}")
4336

4437
include(paddle)
@@ -49,7 +42,7 @@ include(external/gcu)
4942
include_directories(${CMAKE_SOURCE_DIR})
5043
include_directories(/opt/tops/include)
5144

52-
set(OUTPUT_PADDLE_PACKAGE_VERSION ${PADDLE_VERSION}${PACKAGE_VERSION})
45+
set(OUTPUT_PADDLE_PACKAGE_VERSION "${PADDLE_VERSION}.${PACKAGE_VERSION}")
5346
option(WITH_MKLDNN "compile with MKLDNN support" ON)
5447
option(WITH_KERNELS "compile with custom kernels" ON)
5548
option(WITH_TESTING "compile with unit testing" OFF)
@@ -61,8 +54,12 @@ message(STATUS "CXX compiler: ${CMAKE_CXX_COMPILER}, version: "
6154
message(STATUS "C compiler: ${CMAKE_C_COMPILER}, version: "
6255
"${CMAKE_C_COMPILER_ID} ${CMAKE_C_COMPILER_VERSION}")
6356
message(STATUS "AR tools: ${CMAKE_AR}")
64-
# Note: _GLIBCXX_USE_CXX11_ABI should be same with libpaddle.so, default is 1
65-
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -D_GLIBCXX_USE_CXX11_ABI=1")
57+
58+
if(DEFINED PY_VERSION)
59+
message(STATUS "User define PY_VERSION: ${PY_VERSION}")
60+
set(Python_EXECUTABLE "python${PY_VERSION}")
61+
endif()
62+
message(STATUS "Python_EXECUTABLE: ${Python_EXECUTABLE}")
6663

6764
# custom runtime
6865
set(CUSTOM_GCU_SRCS runtime/runtime.cc)

backends/gcu/README.md

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -46,9 +46,9 @@ python -c "import paddle; print(paddle.device.get_all_custom_device_type())"
4646
# 2) Check currently installed version.
4747
python -c "import paddle_custom_device; paddle_custom_device.gcu.version()"
4848
# Expect to get output like this.
49-
version: 0.0.0.ffc0377-2.4.1
50-
commit: ffc037739c55508532ee67b565517be2b4ae584d
51-
plugin version: 0.0.1
49+
version: 0.0.0.9e03b0a
50+
commit: 9e03b0a42a530d07fb60e141ee618fc02595bd96
51+
tops-sdk: 2.5.20231128
5252

5353
# 3) Unit test, compiled with -DWITH_TESTING=ON and executed in the build directory.
5454
ctest

backends/gcu/README_cn.md

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -45,9 +45,9 @@ python -c "import paddle; print(paddle.device.get_all_custom_device_type())"
4545
# 2) 检查当前安装版本
4646
python -c "import paddle_custom_device; paddle_custom_device.gcu.version()"
4747
# 预期得到类似以下的输出结果
48-
version: 0.0.0.ffc0377-2.4.1
49-
commit: ffc037739c55508532ee67b565517be2b4ae584d
50-
plugin version: 0.0.1
48+
version: 0.0.0.9e03b0a
49+
commit: 9e03b0a42a530d07fb60e141ee618fc02595bd96
50+
tops-sdk: 2.5.20231128
5151

5252
# 3) 单元测试,带上-DWITH_TESTING=ON编译后在build目录下执行
5353
ctest

backends/gcu/backend/equivalence_trans/all_ops.h

Lines changed: 64 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -20,19 +20,83 @@ limitations under the License. */
2020
#include "backend/equivalence_trans/insensitive_ops/activation.h"
2121
#include "backend/equivalence_trans/insensitive_ops/adam.h"
2222
#include "backend/equivalence_trans/insensitive_ops/adamw.h"
23+
#include "backend/equivalence_trans/insensitive_ops/add_n.h"
24+
#include "backend/equivalence_trans/insensitive_ops/argmax.h"
25+
#include "backend/equivalence_trans/insensitive_ops/argmin.h"
26+
#include "backend/equivalence_trans/insensitive_ops/argsort.h"
27+
#include "backend/equivalence_trans/insensitive_ops/assign.h"
28+
#include "backend/equivalence_trans/insensitive_ops/assign_value.h"
2329
#include "backend/equivalence_trans/insensitive_ops/atan.h"
30+
#include "backend/equivalence_trans/insensitive_ops/batch_norm.h"
31+
#include "backend/equivalence_trans/insensitive_ops/bilinear_interp_v2.h"
32+
#include "backend/equivalence_trans/insensitive_ops/bitwise_not.h"
33+
#include "backend/equivalence_trans/insensitive_ops/cast.h"
34+
#include "backend/equivalence_trans/insensitive_ops/clip.h"
35+
#include "backend/equivalence_trans/insensitive_ops/concat.h"
36+
#include "backend/equivalence_trans/insensitive_ops/conv2d.h"
37+
#include "backend/equivalence_trans/insensitive_ops/conv3d.h"
2438
#include "backend/equivalence_trans/insensitive_ops/cos.h"
39+
#include "backend/equivalence_trans/insensitive_ops/cross_entropy.h"
40+
#include "backend/equivalence_trans/insensitive_ops/dropout.h"
2541
#include "backend/equivalence_trans/insensitive_ops/elementwise_binary.h"
2642
#include "backend/equivalence_trans/insensitive_ops/elementwise_unary.h"
43+
#include "backend/equivalence_trans/insensitive_ops/embedding.h"
44+
#include "backend/equivalence_trans/insensitive_ops/equal.h"
45+
#include "backend/equivalence_trans/insensitive_ops/expand.h"
46+
#include "backend/equivalence_trans/insensitive_ops/expand_as.h"
47+
#include "backend/equivalence_trans/insensitive_ops/fill_constant.h"
48+
#include "backend/equivalence_trans/insensitive_ops/flatten.h"
2749
#include "backend/equivalence_trans/insensitive_ops/floor.h"
50+
#include "backend/equivalence_trans/insensitive_ops/full_like.h"
51+
#include "backend/equivalence_trans/insensitive_ops/gather.h"
52+
#include "backend/equivalence_trans/insensitive_ops/gather_nd.h"
2853
#include "backend/equivalence_trans/insensitive_ops/gelu.h"
54+
#include "backend/equivalence_trans/insensitive_ops/grid_sampler.h"
55+
#include "backend/equivalence_trans/insensitive_ops/huber_loss.h"
56+
#include "backend/equivalence_trans/insensitive_ops/instance_norm.h"
57+
#include "backend/equivalence_trans/insensitive_ops/isinf_v2.h"
58+
#include "backend/equivalence_trans/insensitive_ops/label_smooth.h"
59+
#include "backend/equivalence_trans/insensitive_ops/layer_norm.h"
2960
#include "backend/equivalence_trans/insensitive_ops/log.h"
61+
#include "backend/equivalence_trans/insensitive_ops/log_loss.h"
62+
#include "backend/equivalence_trans/insensitive_ops/log_softmax.h"
63+
#include "backend/equivalence_trans/insensitive_ops/logical_and.h"
64+
#include "backend/equivalence_trans/insensitive_ops/logical_not.h"
3065
#include "backend/equivalence_trans/insensitive_ops/matmul_v2.h"
3166
#include "backend/equivalence_trans/insensitive_ops/maximum.h"
3267
#include "backend/equivalence_trans/insensitive_ops/mean.h"
68+
#include "backend/equivalence_trans/insensitive_ops/meshgrid.h"
3369
#include "backend/equivalence_trans/insensitive_ops/minimum.h"
3470
#include "backend/equivalence_trans/insensitive_ops/momentum.h"
71+
#include "backend/equivalence_trans/insensitive_ops/nearest_interp_v2.h"
72+
#include "backend/equivalence_trans/insensitive_ops/not_equal.h"
73+
#include "backend/equivalence_trans/insensitive_ops/one_hot.h"
74+
#include "backend/equivalence_trans/insensitive_ops/pool2d.h"
75+
#include "backend/equivalence_trans/insensitive_ops/prior_box.h"
76+
#include "backend/equivalence_trans/insensitive_ops/range.h"
3577
#include "backend/equivalence_trans/insensitive_ops/reduce_x.h"
78+
#include "backend/equivalence_trans/insensitive_ops/reshape.h"
3679
#include "backend/equivalence_trans/insensitive_ops/rmsprop.h"
80+
#include "backend/equivalence_trans/insensitive_ops/roi_align.h"
81+
#include "backend/equivalence_trans/insensitive_ops/scale.h"
82+
#include "backend/equivalence_trans/insensitive_ops/scatter.h"
83+
#include "backend/equivalence_trans/insensitive_ops/set_value.h"
84+
#include "backend/equivalence_trans/insensitive_ops/sigmoid_cross_entropy_with_logits.h"
85+
#include "backend/equivalence_trans/insensitive_ops/size.h"
86+
#include "backend/equivalence_trans/insensitive_ops/slice.h"
87+
#include "backend/equivalence_trans/insensitive_ops/softmax.h"
88+
#include "backend/equivalence_trans/insensitive_ops/softmax_with_cross_entropy.h"
89+
#include "backend/equivalence_trans/insensitive_ops/split.h"
3790
#include "backend/equivalence_trans/insensitive_ops/sqrt.h"
91+
#include "backend/equivalence_trans/insensitive_ops/squared_l2_norm.h"
92+
#include "backend/equivalence_trans/insensitive_ops/squeeze.h"
93+
#include "backend/equivalence_trans/insensitive_ops/stack.h"
94+
#include "backend/equivalence_trans/insensitive_ops/strided_slice.h"
3895
#include "backend/equivalence_trans/insensitive_ops/tanh.h"
96+
#include "backend/equivalence_trans/insensitive_ops/tile.h"
97+
#include "backend/equivalence_trans/insensitive_ops/topk.h"
98+
#include "backend/equivalence_trans/insensitive_ops/transpose.h"
99+
#include "backend/equivalence_trans/insensitive_ops/tril_triu.h"
100+
#include "backend/equivalence_trans/insensitive_ops/unsqueeze.h"
101+
#include "backend/equivalence_trans/insensitive_ops/where.h"
102+
#include "backend/equivalence_trans/insensitive_ops/yolo_box.h"
Lines changed: 68 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,68 @@
1+
/* Copyright (c) 2023 PaddlePaddle Authors. All Rights Reserved.
2+
3+
Licensed under the Apache License, Version 2.0 (the "License");
4+
you may not use this file except in compliance with the License.
5+
You may obtain a copy of the License at
6+
7+
http://www.apache.org/licenses/LICENSE-2.0
8+
9+
Unless required by applicable law or agreed to in writing, software
10+
distributed under the License is distributed on an "AS IS" BASIS,
11+
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12+
See the License for the specific language governing permissions and
13+
limitations under the License. */
14+
15+
#pragma once
16+
17+
#include <memory>
18+
#include <vector>
19+
20+
#include "backend/register/register.h"
21+
22+
namespace backend {
23+
const char *const kSum = "sum";
24+
25+
IMPLEMT_EQUIVALENCE_TRANS_FUNC(
26+
gcu_builder, op, map_inputs, running_mode, SumEquivalenceTrans) {
27+
std::vector<builder::Op> inputs;
28+
auto input_num = map_inputs["X"].size();
29+
for (size_t i = 0; i < input_num; ++i) {
30+
inputs.emplace_back(*(map_inputs["X"].at(i)));
31+
}
32+
size_t rank_0 = inputs[0].GetType().GetShape().size();
33+
bool all_inputs_same_rank =
34+
std::all_of(inputs.begin(), inputs.end(), [rank_0](builder::Op op) {
35+
return op.GetType().GetShape().size() == rank_0 && rank_0 == 4;
36+
});
37+
all_inputs_same_rank &= (running_mode == RunningMode::ADAPTIVE);
38+
if (all_inputs_same_rank) {
39+
for (size_t i = 0; i < input_num; i++) {
40+
inputs[i] = builder::Transpose(inputs[i], {0, 2, 3, 1});
41+
}
42+
}
43+
if (input_num == 1) {
44+
return all_inputs_same_rank
45+
? std::make_shared<GcuOp>(builder::Transpose(
46+
builder::Reshape(inputs[0], inputs[0].GetType()),
47+
{0, 3, 1, 2}))
48+
: std::make_shared<GcuOp>(
49+
builder::Reshape(inputs[0], inputs[0].GetType()));
50+
} else {
51+
builder::Op res;
52+
for (size_t i = 0; i < input_num; ++i) {
53+
if (inputs[i].GetType().GetSize() != 0) {
54+
if (!res.IsValid())
55+
res = inputs[i];
56+
else
57+
res = res + inputs[i];
58+
}
59+
}
60+
return all_inputs_same_rank
61+
? std::make_shared<GcuOp>(builder::Transpose(res, {0, 3, 1, 2}))
62+
: std::make_shared<GcuOp>(res);
63+
}
64+
}
65+
66+
EQUIVALENCE_TRANS_FUNC_REG(kSum, INSENSITIVE, SumEquivalenceTrans);
67+
68+
} // namespace backend
Lines changed: 74 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,74 @@
1+
/* Copyright (c) 2023 PaddlePaddle Authors. All Rights Reserved.
2+
3+
Licensed under the Apache License, Version 2.0 (the "License");
4+
you may not use this file except in compliance with the License.
5+
You may obtain a copy of the License at
6+
7+
http://www.apache.org/licenses/LICENSE-2.0
8+
9+
Unless required by applicable law or agreed to in writing, software
10+
distributed under the License is distributed on an "AS IS" BASIS,
11+
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12+
See the License for the specific language governing permissions and
13+
limitations under the License. */
14+
15+
#pragma once
16+
17+
#include <memory>
18+
#include <string>
19+
#include <vector>
20+
21+
#include "backend/register/register.h"
22+
23+
namespace backend {
24+
const char *const kArgMax = "arg_max";
25+
26+
IMPLEMT_EQUIVALENCE_TRANS_FUNC(
27+
gcu_builder, op, map_inputs, running_mode, ArgMaxEquivalenceTrans) {
28+
int64_t axis = PADDLE_GET_CONST(int64_t, op->GetAttr("axis"));
29+
auto keepdims = PADDLE_GET_CONST(bool, op->GetAttr("keepdims"));
30+
auto flatten = PADDLE_GET_CONST(bool, op->GetAttr("flatten"));
31+
GcuOp data = *(map_inputs["X"].at(0));
32+
int64_t rank = data.GetType().GetRank();
33+
GcuOp result;
34+
if (flatten) {
35+
auto data_shape = data.GetType().GetShape();
36+
int64_t new_shape = 1;
37+
for (auto dim : data_shape) {
38+
new_shape *= dim;
39+
}
40+
builder::Type output_type(
41+
{
42+
new_shape,
43+
},
44+
data.GetType().GetPrimitiveType());
45+
auto out = builder::Reshape(data, output_type);
46+
result = builder::ArgMax(out, /*axis*/ 0, keepdims);
47+
} else {
48+
if (axis < 0) {
49+
axis = axis + rank;
50+
}
51+
result = builder::ArgMax(data, axis, keepdims);
52+
}
53+
auto dtype = PADDLE_GET_CONST(int, op->GetAttr("dtype"));
54+
auto ptype = builder::PrimitiveType::NONE();
55+
if (dtype == static_cast<int>(phi::DataType::INT32)) {
56+
ptype = builder::PrimitiveType::S32();
57+
} else if (dtype == static_cast<int>(phi::DataType::INT64)) {
58+
ptype = builder::PrimitiveType::S64();
59+
} else {
60+
PADDLE_THROW(phi::errors::Unimplemented(
61+
"Unsupported data type(code %d) for ArgMax, only supports int32 and "
62+
"int64.",
63+
dtype));
64+
}
65+
if (ptype != data.GetType().GetPrimitiveType()) {
66+
auto shape = result.GetType().GetShape();
67+
result = builder::Convert(result, builder::Type(shape, ptype));
68+
}
69+
return std::make_shared<GcuOp>(result);
70+
}
71+
72+
EQUIVALENCE_TRANS_FUNC_REG(kArgMax, INSENSITIVE, ArgMaxEquivalenceTrans);
73+
74+
} // namespace backend
Lines changed: 74 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,74 @@
1+
/* Copyright (c) 2023 PaddlePaddle Authors. All Rights Reserved.
2+
3+
Licensed under the Apache License, Version 2.0 (the "License");
4+
you may not use this file except in compliance with the License.
5+
You may obtain a copy of the License at
6+
7+
http://www.apache.org/licenses/LICENSE-2.0
8+
9+
Unless required by applicable law or agreed to in writing, software
10+
distributed under the License is distributed on an "AS IS" BASIS,
11+
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12+
See the License for the specific language governing permissions and
13+
limitations under the License. */
14+
15+
#pragma once
16+
17+
#include <memory>
18+
#include <string>
19+
#include <vector>
20+
21+
#include "backend/register/register.h"
22+
23+
namespace backend {
24+
const char *const kArgMin = "arg_min";
25+
26+
IMPLEMT_EQUIVALENCE_TRANS_FUNC(
27+
gcu_builder, op, map_inputs, running_mode, ArgMinEquivalenceTrans) {
28+
int64_t axis = PADDLE_GET_CONST(int64_t, op->GetAttr("axis"));
29+
auto keepdims = PADDLE_GET_CONST(bool, op->GetAttr("keepdims"));
30+
auto flatten = PADDLE_GET_CONST(bool, op->GetAttr("flatten"));
31+
GcuOp data = *(map_inputs["X"].at(0));
32+
int64_t rank = data.GetType().GetRank();
33+
GcuOp result;
34+
if (flatten) {
35+
auto data_shape = data.GetType().GetShape();
36+
int64_t new_shape = 1;
37+
for (auto dim : data_shape) {
38+
new_shape *= dim;
39+
}
40+
builder::Type output_type(
41+
{
42+
new_shape,
43+
},
44+
data.GetType().GetPrimitiveType());
45+
auto out = builder::Reshape(data, output_type);
46+
result = builder::ArgMin(out, /*axis*/ 0, keepdims);
47+
} else {
48+
if (axis < 0) {
49+
axis = axis + rank;
50+
}
51+
result = builder::ArgMin(data, axis, keepdims);
52+
}
53+
auto dtype = PADDLE_GET_CONST(int, op->GetAttr("dtype"));
54+
auto ptype = builder::PrimitiveType::NONE();
55+
if (dtype == static_cast<int>(phi::DataType::INT32)) {
56+
ptype = builder::PrimitiveType::S32();
57+
} else if (dtype == static_cast<int>(phi::DataType::INT64)) {
58+
ptype = builder::PrimitiveType::S64();
59+
} else {
60+
PADDLE_THROW(phi::errors::Unimplemented(
61+
"Unsupported data type(code %d) for ArgMin, only supports int32 and "
62+
"int64.",
63+
dtype));
64+
}
65+
if (ptype != data.GetType().GetPrimitiveType()) {
66+
auto shape = result.GetType().GetShape();
67+
result = builder::Convert(result, builder::Type(shape, ptype));
68+
}
69+
return std::make_shared<GcuOp>(result);
70+
}
71+
72+
EQUIVALENCE_TRANS_FUNC_REG(kArgMin, INSENSITIVE, ArgMinEquivalenceTrans);
73+
74+
} // namespace backend

0 commit comments

Comments
 (0)