Skip to content

Commit d4d4580

Browse files
authored
Merge pull request PaddlePaddle#4140 from tensor-tang/mkldnn_pool
Add MKLDNN pool
2 parents ee8efb5 + 37268db commit d4d4580

File tree

4 files changed

+490
-1
lines changed

4 files changed

+490
-1
lines changed
Lines changed: 277 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,277 @@
1+
/* Copyright (c) 2017 PaddlePaddle Authors. All Rights Reserve.
2+
3+
Licensed under the Apache License, Version 2.0 (the "License");
4+
you may not use this file except in compliance with the License.
5+
You may obtain a copy of the License at
6+
7+
http://www.apache.org/licenses/LICENSE-2.0
8+
9+
Unless required by applicable law or agreed to in writing, software
10+
distributed under the License is distributed on an "AS IS" BASIS,
11+
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12+
See the License for the specific language governing permissions and
13+
limitations under the License. */
14+
15+
#include "MKLDNNPoolLayer.h"
16+
#include "paddle/math/MathUtils.h"
17+
#include "paddle/utils/Logging.h"
18+
19+
using namespace mkldnn; // NOLINT
20+
typedef memory::format format;
21+
22+
namespace paddle {
23+
24+
REGISTER_LAYER(mkldnn_pool, MKLDNNPoolLayer);
25+
26+
bool MKLDNNPoolLayer::init(const LayerMap& layerMap,
27+
const ParameterMap& parameterMap) {
28+
if (!MKLDNNLayer::init(layerMap, parameterMap)) {
29+
return false;
30+
}
31+
32+
/* the size of inputs for pool-layer is 1 */
33+
CHECK_EQ(config_.inputs_size(), 1);
34+
const PoolConfig& conf = config_.inputs(0).pool_conf();
35+
ic_ = conf.channels();
36+
ih_ = conf.img_size_y();
37+
iw_ = conf.img_size();
38+
oc_ = ic_;
39+
oh_ = conf.output_y();
40+
ow_ = conf.output_x();
41+
fh_ = conf.size_y();
42+
fw_ = conf.size_x();
43+
ph_ = conf.padding_y();
44+
pw_ = conf.padding();
45+
sh_ = conf.stride_y();
46+
sw_ = conf.stride();
47+
48+
const std::string& type = conf.pool_type();
49+
if (type == "max-projection") {
50+
poolAlgo_ = algorithm::pooling_max;
51+
} else if (type == "avg-projection") {
52+
// paddle only use exclude_padding
53+
poolAlgo_ = algorithm::pooling_avg_exclude_padding;
54+
} else {
55+
LOG(FATAL) << "unknow pooling type!";
56+
}
57+
return true;
58+
}
59+
60+
void MKLDNNPoolLayer::reshape(
61+
int& bs, int& ic, int& ih, int& iw, int oc, int& oh, int& ow) {
62+
reshapeInput(bs, ih, iw);
63+
// ic_ and oc can not be changed
64+
CHECK_EQ(inputElemenCnt_ / bs / ih / iw, (size_t)ic)
65+
<< "Input channel can not be changed";
66+
67+
// cal output sizes
68+
// paddle used false caffeMode for pooling
69+
oh = outputSize(ih, fh_, ph_, sh_, false);
70+
ow = outputSize(iw, fw_, pw_, sw_, false);
71+
reshapeOutput(oh, ow);
72+
73+
resizeOutput(bs, oc * oh * ow);
74+
75+
printSizeInfo();
76+
}
77+
78+
void MKLDNNPoolLayer::resetFwd(std::vector<primitive>& pipeline,
79+
MKLDNNMatrixPtr& in,
80+
MKLDNNMatrixPtr& wgt,
81+
MKLDNNMatrixPtr& bias,
82+
MKLDNNMatrixPtr& out) {
83+
resetFwdBuffers(in, out);
84+
85+
resetFwdPD(fwdPD_, in, out);
86+
87+
resetFwdPipeline(pipeline, fwdPD_, in, out);
88+
89+
printValueFormatFlow();
90+
}
91+
92+
void MKLDNNPoolLayer::resetBwd(std::vector<primitive>& pipeline,
93+
MKLDNNMatrixPtr& in,
94+
MKLDNNMatrixPtr& wgt,
95+
MKLDNNMatrixPtr& bias,
96+
MKLDNNMatrixPtr& out) {
97+
std::shared_ptr<pool_bwd::primitive_desc> pd;
98+
99+
resetBwdBuffers(in, out);
100+
101+
resetBwdPD(pd, in, out);
102+
103+
resetBwdPipeline(pipeline, pd, in, out);
104+
105+
printGradFormatFlow();
106+
}
107+
108+
void MKLDNNPoolLayer::updateInputData() {
109+
inVal_->setData(getInputValue(0, CPU_DEVICE)->getData());
110+
}
111+
112+
void MKLDNNPoolLayer::resetFwdBuffers(MKLDNNMatrixPtr& in,
113+
MKLDNNMatrixPtr& out) {
114+
resetInValue(in);
115+
116+
resetOutValue(out);
117+
}
118+
119+
void MKLDNNPoolLayer::resetInValue(MKLDNNMatrixPtr& in) {
120+
if (inputIsOnlyMKLDNN()) {
121+
const MatrixPtr& dnnIn = getInputValue(0);
122+
in = std::dynamic_pointer_cast<MKLDNNMatrix>(dnnIn);
123+
CHECK(in) << "Input should be MKLDNNMatrix";
124+
} else {
125+
CHECK_EQ(getPrev(0)->getDeviceId(), CPU_DEVICE) << "Only support CPU yet";
126+
const MatrixPtr& cpuIn = getInputValue(0, CPU_DEVICE);
127+
in = MKLDNNMatrix::create(
128+
cpuIn, {bs_, ic_, ih_, iw_}, format::nchw, engine_);
129+
}
130+
}
131+
132+
void MKLDNNPoolLayer::resetOutValue(MKLDNNMatrixPtr& out) {
133+
CHECK(inVal_) << "Should reset input value first";
134+
memory::dims outDims = memory::dims{bs_, oc_, oh_, ow_};
135+
out = MKLDNNMatrix::create(
136+
output_.value, outDims, inVal_->getFormat(), engine_);
137+
output_.value = std::dynamic_pointer_cast<Matrix>(out);
138+
139+
// create reorder if output value has cpu device and pd do not match
140+
cpuOutVal_ = nullptr;
141+
cvtOutVal_ = nullptr;
142+
if (!outputIsOnlyMKLDNN()) {
143+
const MatrixPtr& cpuOut = getOutput(CPU_DEVICE).value;
144+
cpuOutVal_ = MKLDNNMatrix::create(cpuOut, outDims, format::nchw, engine_);
145+
if (cpuOutVal_->getPrimitiveDesc() != out->getPrimitiveDesc()) {
146+
cvtOutVal_ = MKLDNNMatrix::createReorder(out, cpuOutVal_);
147+
CHECK(cvtOutVal_) << "should not be emptry";
148+
} else {
149+
// CPU output share the same data of MKLDNN output
150+
cpuOut->setData(out->getData());
151+
cpuOutVal_ = out;
152+
}
153+
}
154+
}
155+
156+
void MKLDNNPoolLayer::resetFwdPD(std::shared_ptr<pool_fwd::primitive_desc>& pd,
157+
MKLDNNMatrixPtr in,
158+
MKLDNNMatrixPtr out) {
159+
memory::dims inDims = memory::dims{bs_, ic_, ih_, iw_};
160+
memory::dims outDims = memory::dims{bs_, oc_, oh_, ow_};
161+
memory::dims kernels = memory::dims{fh_, fw_};
162+
memory::dims strides = memory::dims{sh_, sw_};
163+
memory::dims padL = memory::dims{ph_, pw_};
164+
memory::dims padR = getPaddingR();
165+
padding_kind padKind = padding_kind::zero;
166+
prop_kind pk = passType_ == PASS_TEST ? prop_kind::forward_scoring
167+
: prop_kind::forward_training;
168+
auto fwdDesc = pool_fwd::desc(pk,
169+
poolAlgo_,
170+
in->getMemoryDesc(),
171+
out->getMemoryDesc(),
172+
strides,
173+
kernels,
174+
padL,
175+
padR,
176+
padKind);
177+
pd.reset(new pool_fwd::primitive_desc(fwdDesc, engine_));
178+
179+
// prepare workspace if necessary
180+
workspace_ =
181+
(passType_ != PASS_TEST && poolAlgo_ == algorithm::pooling_max)
182+
? std::make_shared<memory>(memory(pd->workspace_primitive_desc()))
183+
: nullptr;
184+
}
185+
186+
void MKLDNNPoolLayer::resetFwdPipeline(
187+
std::vector<primitive>& pipeline,
188+
std::shared_ptr<pool_fwd::primitive_desc>& pd,
189+
MKLDNNMatrixPtr& in,
190+
MKLDNNMatrixPtr& out) {
191+
pipeline.clear();
192+
fwd_ = workspace_
193+
? std::make_shared<pool_fwd>(pool_fwd(*pd, *in, *out, *workspace_))
194+
: std::make_shared<pool_fwd>(pool_fwd(*pd, *in, *out));
195+
pipeline.push_back(*fwd_);
196+
197+
if (cvtOutVal_) {
198+
pipeline.push_back(*cvtOutVal_);
199+
}
200+
}
201+
202+
void MKLDNNPoolLayer::resetBwdBuffers(MKLDNNMatrixPtr& in,
203+
MKLDNNMatrixPtr& out) {
204+
resetOutGrad(out);
205+
206+
resetInGrad(in);
207+
}
208+
void MKLDNNPoolLayer::resetOutGrad(MKLDNNMatrixPtr& out) {
209+
CHECK(outVal_) << "Should have output value";
210+
out = MKLDNNMatrix::create(output_.grad, outVal_->getPrimitiveDesc());
211+
212+
// create reorder if output value has cpu device and pd do not match
213+
cpuOutGrad_ = nullptr;
214+
cvtOutGrad_ = nullptr;
215+
if (!outputIsOnlyMKLDNN()) {
216+
const MatrixPtr& cpuOut = getOutput(CPU_DEVICE).grad;
217+
cpuOutGrad_ = MKLDNNMatrix::create(
218+
cpuOut, memory::dims{bs_, oc_, oh_, ow_}, format::nchw, engine_);
219+
if (cpuOutGrad_->getPrimitiveDesc() != out->getPrimitiveDesc()) {
220+
cvtOutGrad_ = MKLDNNMatrix::createReorder(cpuOutGrad_, out);
221+
CHECK(cvtOutGrad_) << "should not be emptry";
222+
} else {
223+
// share the same data of CPU output
224+
output_.grad->setData(cpuOut->getData());
225+
out = cpuOutGrad_;
226+
}
227+
}
228+
}
229+
230+
void MKLDNNPoolLayer::resetInGrad(MKLDNNMatrixPtr& in) {
231+
in = nullptr;
232+
const MatrixPtr& inGrad = inputLayers_[0]->getOutput().grad;
233+
if (inGrad == nullptr) {
234+
return;
235+
}
236+
CHECK(inVal_);
237+
in = MKLDNNMatrix::create(inGrad, inVal_->getPrimitiveDesc());
238+
}
239+
240+
void MKLDNNPoolLayer::resetBwdPD(std::shared_ptr<pool_bwd::primitive_desc>& pd,
241+
MKLDNNMatrixPtr& in,
242+
MKLDNNMatrixPtr& out) {
243+
memory::dims kernels = memory::dims{fh_, fw_};
244+
memory::dims strides = memory::dims{sh_, sw_};
245+
memory::dims padL = memory::dims{ph_, pw_};
246+
memory::dims padR = getPaddingR();
247+
CHECK(in);
248+
CHECK(out);
249+
auto bwdDesc = pool_bwd::desc(poolAlgo_,
250+
in->getMemoryDesc(),
251+
out->getMemoryDesc(),
252+
strides,
253+
kernels,
254+
padL,
255+
padR,
256+
padding_kind::zero);
257+
pd.reset(new pool_bwd::primitive_desc(bwdDesc, engine_, *fwdPD_));
258+
}
259+
260+
void MKLDNNPoolLayer::resetBwdPipeline(
261+
std::vector<primitive>& pipeline,
262+
std::shared_ptr<pool_bwd::primitive_desc>& pd,
263+
MKLDNNMatrixPtr& in,
264+
MKLDNNMatrixPtr& out) {
265+
pipeline.clear();
266+
if (cvtOutGrad_) {
267+
pipeline.push_back(*cvtOutGrad_);
268+
}
269+
270+
bwdData_ =
271+
workspace_
272+
? std::make_shared<pool_bwd>(pool_bwd(*pd, *out, *workspace_, *in))
273+
: std::make_shared<pool_bwd>(pool_bwd(*pd, *out, *in));
274+
pipeline.push_back(*bwdData_);
275+
}
276+
277+
} // namespace paddle

0 commit comments

Comments
 (0)