Skip to content
This repository has been archived by the owner on Nov 17, 2023. It is now read-only.

[MXNET-497] fix bugs in MKLDNN operators to handle the kAddTo request #11129

Merged
merged 69 commits into from
Jul 8, 2018
Merged
Show file tree
Hide file tree
Changes from 54 commits
Commits
Show all changes
69 commits
Select commit Hold shift + click to select a range
31fdc8b
fix lint
azai91 Jun 2, 2018
b644e02
requests added to opattr
azai91 Jun 4, 2018
612f64f
comment out addto
azai91 Jun 5, 2018
2c4b41e
can invalidate kAddTo request mkldarrays
azai91 Jun 6, 2018
2dc646a
revert adding kAddTo to invalidate
azai91 Jun 6, 2018
5278ef9
use copy of output instead of creating new array
azai91 Jun 6, 2018
3adcd8d
convert output to default if fallback
azai91 Jun 6, 2018
2489b86
do not make copy when init
azai91 Jun 6, 2018
c7e64f3
copyex fallback copies to old array with kAddTo
azai91 Jun 6, 2018
67001ce
change input mem desc to output mem desc if not equal
azai91 Jun 6, 2018
5a75f53
reorder memory in commitoutput
azai91 Jun 6, 2018
f5b63fc
allocate temp memory
azai91 Jun 7, 2018
4d52987
fix var names
azai91 Jun 7, 2018
6b62e97
create helper reorder function to handle diff format/shapes
azai91 Jun 7, 2018
9da3655
fix typos
azai91 Jun 7, 2018
c0c38ca
fix typos
azai91 Jun 7, 2018
2338046
remove unused code
azai91 Jun 7, 2018
f974c3c
fix param
azai91 Jun 7, 2018
918a864
fix header files
azai91 Jun 7, 2018
50fc6ca
force input memory to output
azai91 Jun 7, 2018
a9915be
reorder2default keeps pointer to mkldnn memory
azai91 Jun 7, 2018
630c091
pass reference
azai91 Jun 7, 2018
aa6c406
remove extra lines
azai91 Jun 7, 2018
75c5160
do not get raw mem from ptr
azai91 Jun 8, 2018
f65ea9c
remove isView check
azai91 Jun 8, 2018
3483f28
fallback writes back to output
azai91 Jun 8, 2018
0428e0f
remove redundant line
azai91 Jun 8, 2018
1cdd60c
remove commented out code
azai91 Jun 8, 2018
c9e8f85
use fallback in copy (refactor)
azai91 Jun 8, 2018
996d0ef
remove unused header
azai91 Jun 8, 2018
4532209
fix lint
azai91 Jun 8, 2018
410c491
reorder2default only if mkldnn flag
azai91 Jun 11, 2018
2efdc3b
only reorder if mkldnn
azai91 Jun 11, 2018
dc3cd8d
does not assume 1 output
azai91 Jun 12, 2018
ad66611
sum compares input and output shape
azai91 Jun 13, 2018
860fa21
compare address and pd in sum
azai91 Jun 13, 2018
a727eea
refactor mkldnnsum
azai91 Jun 13, 2018
c76aee3
fix const param
azai91 Jun 13, 2018
64422aa
fix header
azai91 Jun 13, 2018
ac2b3a1
Merge branch 'master' into test-kAddTo
azai91 Jun 25, 2018
bb10946
improve control flow when setting output blob
azai91 Jun 25, 2018
ad31578
fix merge
azai91 Jun 25, 2018
0e03c96
remove kaddto comment
azai91 Jun 25, 2018
6ef7b87
add reqests to operators
azai91 Jun 25, 2018
90c9acb
fix spacing
azai91 Jun 25, 2018
7d0f275
do sum in place
azai91 Jun 25, 2018
3edf492
fix conditionals
azai91 Jun 25, 2018
5c20e46
remove redundant reqs
azai91 Jun 25, 2018
cd70dac
use wait to read all
azai91 Jun 25, 2018
0972ffa
fix lint
azai91 Jun 25, 2018
637c76a
create multiple outputs
azai91 Jun 26, 2018
5718651
create multiple copies for kaddto
azai91 Jun 26, 2018
d91df93
retrigger
azai91 Jun 26, 2018
993c7aa
retriggrer
azai91 Jun 26, 2018
e7d18be
merge
azai91 Jun 26, 2018
e2a464d
retrigger
azai91 Jun 26, 2018
dc742c8
retrigger
azai91 Jun 26, 2018
92c50f0
another retrigger
azai91 Jun 27, 2018
eb97f3d
Merge branch 'master' into test-kAddTo
azai91 Jun 27, 2018
113903a
retrigger
azai91 Jun 27, 2018
ecbde64
retrigger
azai91 Jun 27, 2018
be84769
another another retrigger
azai91 Jun 27, 2018
5181420
Merge branch 'master' into test-kAddTo
azai91 Jun 27, 2018
0731a58
merge
azai91 Jun 29, 2018
ad3c70e
fix merge
azai91 Jun 29, 2018
2874d0a
retrigger
azai91 Jun 29, 2018
0e249f7
merge
azai91 Jul 2, 2018
581495f
add kAddto to relu op
azai91 Jul 3, 2018
9e7c22e
retrigger
azai91 Jul 5, 2018
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
15 changes: 13 additions & 2 deletions src/common/exec_utils.h
Original file line number Diff line number Diff line change
Expand Up @@ -98,10 +98,21 @@ inline bool SetupDefaultBlobsOut(const std::vector<NDArray>& src,
is_default = nd.IsDefaultData();
#endif
if (!is_default) {
NDArray temp = bufs != nullptr ? bufs->at(i) : NDArray(nd.shape(), nd.ctx(),
true, nd.dtype());
#if MXNET_USE_MKLDNN == 1
NDArray temp;
if (bufs != nullptr) {
temp = bufs->at(i);
} else if (kAddTo == req->at(i) && nd.IsMKLDNNData()) {
temp = nd.Reorder2Default();
} else if (kAddTo == req->at(i)) {
temp = nd;
} else {
temp = NDArray(nd.shape(), nd.ctx(), true, nd.dtype());
}
CHECK(temp.IsDefaultData());
#else
NDArray temp = bufs != nullptr ? bufs->at(i) : NDArray(nd.shape(), nd.ctx(),
true, nd.dtype());
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

sparse arrays doesn't have kAddTo? @eric-haibin-lin

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yes. The executor won't generate kAddTo for sparse outputs. Sparse operators don't support that

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

indent.

#endif
temp_src->emplace_back(nd);
temp_dst->emplace_back(temp);
Expand Down
74 changes: 3 additions & 71 deletions src/ndarray/ndarray.cc
Original file line number Diff line number Diff line change
Expand Up @@ -482,7 +482,7 @@ const mkldnn::memory *NDArray::GetMKLDNNData(
if (mem->get_primitive_desc() == desc
|| (desc1.data.format == GetDefaultFormat(desc1)
&& desc2.data.format == GetDefaultFormat(desc2))) {
return GetMKLDNNExact(ptr_->mkl_mem_->GetRaw(), desc);
return GetMKLDNNExact(mem, desc);
} else {
return nullptr;
}
Expand Down Expand Up @@ -638,82 +638,14 @@ void NDArray::CopyFrom(const mkldnn::memory &mem) {

CHECK(mem.get_primitive_desc().get_size() == shape().Size() * GetTypeSize(dtype_))
<< "The size of NDArray doesn't match the requested MKLDNN memory desc";
MKLDNNStream *stream = MKLDNNStream::Get();
// If this array uses MKLDNN layout, we have to make sure it's not a view.
// Otherwise, we'll have to change the layout inside the array.

if (IsMKLDNNData() && IsView())
ptr_->Reorder2Default();

const mkldnn::memory *this_mem = GetMKLDNNData();
mkldnn::memory::primitive_desc from_pd = mem.get_primitive_desc();
mkldnn::memory::desc from_desc = from_pd.desc();
mkldnn::memory::primitive_desc this_pd = this_mem->get_primitive_desc();
mkldnn::memory::desc this_desc = this_pd.desc();
mkldnn_memory_format_t from_def_format = GetDefaultFormat(from_desc);
mkldnn_memory_format_t this_def_format = GetDefaultFormat(this_desc);
if (IsView()) {
// Sliced array must use the default layout.
CHECK_EQ(GetDefaultFormat(this_desc), this_desc.data.format);
}
// It's possible that the memory and the NDArray don't have the same shape.
if (!same_shape(this_desc, from_desc)
// If the source memory uses the default layout, we can reshape directly.
&& from_def_format == from_desc.data.format) {
// In this case, we can simply create a new MKLDNN memory for the required
// shape.
mkldnn::memory::dims dims(this_desc.data.dims,
this_desc.data.dims + this_desc.data.ndims);
auto this_dtype = static_cast<mkldnn::memory::data_type>(this_desc.data.data_type);
auto this_format = static_cast<mkldnn::memory::format>(GetDefaultFormat(this_desc));
mkldnn::memory::desc data_md(dims, this_dtype, this_format);
mkldnn::memory::primitive_desc pd(data_md, from_pd.get_engine());
mkldnn_mem_ptr tmp_mem(new mkldnn::memory(pd, mem.get_data_handle()));
stream->RegisterMem(tmp_mem);
stream->RegisterPrim(mkldnn::reorder(*tmp_mem, *this_mem));
} else if (!same_shape(this_desc, from_desc)) {
// In this case, the source memory stores data in a customized layout. We
// need to reorganize the data in memory before we can reshape.
mkldnn::memory::primitive_desc def_pd = GetPrimitiveDesc(from_pd, from_def_format);
mkldnn::memory *def_mem = TmpMemMgr::Get()->Alloc(def_pd);
stream->RegisterPrim(mkldnn::reorder(mem, *def_mem));
// Now we can reshape it
mkldnn::memory::dims dims(this_desc.data.dims,
this_desc.data.dims + this_desc.data.ndims);
auto this_dtype = static_cast<mkldnn::memory::data_type>(this_desc.data.data_type);
auto this_format = static_cast<mkldnn::memory::format>(GetDefaultFormat(this_desc));
mkldnn::memory::desc data_md(dims, this_dtype, this_format);
mkldnn::memory::primitive_desc pd(data_md, from_pd.get_engine());
mkldnn_mem_ptr tmp_mem(new mkldnn::memory(pd, def_mem->get_data_handle()));
stream->RegisterMem(tmp_mem);
stream->RegisterPrim(mkldnn::reorder(*tmp_mem, *this_mem));
} else if (from_pd == this_pd) {
// If the layout is the same, we can just copy data.
stream->RegisterPrim(mkldnn::reorder(mem, *this_mem));
} else {
// If both are not using the default layouts. There isn't much we can do,
// other than reorder data layout directly.
if (this_def_format != this_desc.data.format
&& from_def_format != from_desc.data.format) {
stream->RegisterPrim(mkldnn::reorder(mem, *this_mem));
} else if (this_def_format == this_desc.data.format) {
// If the dest mem uses the default memory layout, we can simply use
// the default format of the source memory to improve perf of reorder.
mkldnn::memory::primitive_desc pd = GetPrimitiveDesc(from_pd,
from_def_format);
mkldnn_mem_ptr tmp_mem(new mkldnn::memory(pd, this_mem->get_data_handle()));
stream->RegisterMem(tmp_mem);
stream->RegisterPrim(mkldnn::reorder(mem, *tmp_mem));
} else {
// If the src mem uses the default memory layout, we can use
// the default format of the source memory to improve perf.
mkldnn::memory::primitive_desc pd = GetPrimitiveDesc(this_pd,
this_def_format);
mkldnn_mem_ptr tmp_mem(new mkldnn::memory(pd, mem.get_data_handle()));
stream->RegisterMem(tmp_mem);
stream->RegisterPrim(mkldnn::reorder(*tmp_mem, *this_mem));
}
}
const mkldnn::memory* this_mem = GetMKLDNNData();
MKLDNNCopy(mem, this_mem);
}

mkldnn::memory *NDArray::CreateMKLDNNData(const mkldnn::memory::primitive_desc &desc) {
Expand Down
6 changes: 3 additions & 3 deletions src/operator/nn/mkldnn/mkldnn_act.cc
Original file line number Diff line number Diff line change
Expand Up @@ -168,10 +168,10 @@ void MKLDNNActivationForward(const nnvm::NodeAttrs& attrs, const OpContext &ctx,

auto input_mem = in_buffer.GetMKLDNNData();
MKLDNNActForward &fwd = GetActForward(param, ctx, in_buffer, *input_mem);
auto out_mem = CreateMKLDNNMem(out_data, fwd.fwd_pd.dst_primitive_desc(), req, &in_buffer);
fwd.SetNewMem(*input_mem, *out_mem.second);
auto out_mem_t = CreateMKLDNNMem(out_data, fwd.fwd_pd.dst_primitive_desc(), req, &in_buffer);
fwd.SetNewMem(*input_mem, *out_mem_t.second);
stream->RegisterPrim(fwd.GetFwd());
CommitOutput(out_data, out_mem);
CommitOutput(out_data, out_mem_t);
stream->Submit();
}

Expand Down
4 changes: 4 additions & 0 deletions src/operator/nn/mkldnn/mkldnn_base-inl.h
Original file line number Diff line number Diff line change
Expand Up @@ -319,6 +319,10 @@ enum OutDataOp {

typedef std::pair<OutDataOp, mkldnn::memory *> mkldnn_output_t;


void MKLDNNCopy(const mkldnn::memory &mem,
const mkldnn::memory* this_mem);

/*
* These two functions try to create MKLDNN memory in an NDArray based on `req'.
* The difference is that the first function can create MKLDNN memory with
Expand Down
102 changes: 93 additions & 9 deletions src/operator/nn/mkldnn/mkldnn_base.cc
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,8 @@
#include <atomic>
#include "./mkldnn_base-inl.h"
#include "./mkldnn_ops-inl.h"
#include "../../../common/exec_utils.h"


namespace mxnet {

Expand Down Expand Up @@ -77,6 +79,75 @@ mkldnn::memory *TmpMemMgr::Alloc(const mkldnn::memory::primitive_desc &pd) {
}
}

void MKLDNNCopy(const mkldnn::memory &mem, const mkldnn::memory* this_mem) {
MKLDNNStream *stream = MKLDNNStream::Get();

mkldnn::memory::primitive_desc from_pd = mem.get_primitive_desc();
mkldnn::memory::desc from_desc = from_pd.desc();
mkldnn::memory::primitive_desc this_pd = this_mem->get_primitive_desc();
mkldnn::memory::desc this_desc = this_pd.desc();
mkldnn_memory_format_t from_def_format = GetDefaultFormat(from_desc);
mkldnn_memory_format_t this_def_format = GetDefaultFormat(this_desc);
// It's possible that the memory and the NDArray don't have the same shape.
if (!same_shape(this_desc, from_desc)
// If the source memory uses the default layout, we can reshape directly.
&& from_def_format == from_desc.data.format) {
// In this case, we can simply create a new MKLDNN memory for the required
// shape.
mkldnn::memory::dims dims(this_desc.data.dims,
this_desc.data.dims + this_desc.data.ndims);
auto this_dtype = static_cast<mkldnn::memory::data_type>(this_desc.data.data_type);
auto this_format = static_cast<mkldnn::memory::format>(GetDefaultFormat(this_desc));
mkldnn::memory::desc data_md(dims, this_dtype, this_format);
mkldnn::memory::primitive_desc pd(data_md, from_pd.get_engine());
mkldnn_mem_ptr tmp_mem(new mkldnn::memory(pd, mem.get_data_handle()));
stream->RegisterMem(tmp_mem);
stream->RegisterPrim(mkldnn::reorder(*tmp_mem, *this_mem));
} else if (!same_shape(this_desc, from_desc)) {
// In this case, the source memory stores data in a customized layout. We
// need to reorganize the data in memory before we can reshape.
mkldnn::memory::primitive_desc def_pd = GetPrimitiveDesc(from_pd, from_def_format);
mkldnn::memory *def_mem = TmpMemMgr::Get()->Alloc(def_pd);
stream->RegisterPrim(mkldnn::reorder(mem, *def_mem));
// Now we can reshape it
mkldnn::memory::dims dims(this_desc.data.dims,
this_desc.data.dims + this_desc.data.ndims);
auto this_dtype = static_cast<mkldnn::memory::data_type>(this_desc.data.data_type);
auto this_format = static_cast<mkldnn::memory::format>(GetDefaultFormat(this_desc));
mkldnn::memory::desc data_md(dims, this_dtype, this_format);
mkldnn::memory::primitive_desc pd(data_md, from_pd.get_engine());
mkldnn_mem_ptr tmp_mem(new mkldnn::memory(pd, def_mem->get_data_handle()));
stream->RegisterMem(tmp_mem);
stream->RegisterPrim(mkldnn::reorder(*tmp_mem, *this_mem));
} else if (from_pd == this_pd) {
// If the layout is the same, we can just copy data.
stream->RegisterPrim(mkldnn::reorder(mem, *this_mem));
} else {
// If both are not using the default layouts. There isn't much we can do,
// other than reorder data layout directly.
if (this_def_format != this_desc.data.format
&& from_def_format != from_desc.data.format) {
stream->RegisterPrim(mkldnn::reorder(mem, *this_mem));
} else if (this_def_format == this_desc.data.format) {
// If the dest mem uses the default memory layout, we can simply use
// the default format of the source memory to improve perf of reorder.
mkldnn::memory::primitive_desc pd = GetPrimitiveDesc(from_pd,
from_def_format);
mkldnn_mem_ptr tmp_mem(new mkldnn::memory(pd, this_mem->get_data_handle()));
stream->RegisterMem(tmp_mem);
stream->RegisterPrim(mkldnn::reorder(mem, *tmp_mem));
} else {
// If the src mem uses the default memory layout, we can use
// the default format of the source memory to improve perf.
mkldnn::memory::primitive_desc pd = GetPrimitiveDesc(this_pd,
this_def_format);
mkldnn_mem_ptr tmp_mem(new mkldnn::memory(pd, mem.get_data_handle()));
stream->RegisterMem(tmp_mem);
stream->RegisterPrim(mkldnn::reorder(*tmp_mem, *this_mem));
}
}
}

bool CanWriteTo(const NDArray &out_arr,
const NDArray &in_arr,
const mkldnn::memory::primitive_desc &desc) {
Expand Down Expand Up @@ -141,13 +212,16 @@ void CommitOutput(const NDArray &arr, const mkldnn_output_t &res) {
if (res.first == CopyBack) {
const_cast<NDArray &>(arr).CopyFrom(*res.second);
} else if (res.first == AddBack) {
auto res_memory = res.second;
auto target_pd = arr.GetMKLDNNData()->get_primitive_desc();
auto mem = arr.GetMKLDNNData(res.second->get_primitive_desc());
CHECK(mem != nullptr);
// We have to allocate new memory for the sum result.
auto sum_res = TmpMemMgr::Get()->Alloc(
res.second->get_primitive_desc());
op::MKLDNNSum(*res.second, *mem, *sum_res);
const_cast<NDArray &>(arr).CopyFrom(*sum_res);
if (mem == nullptr) {
auto tmp_memory = TmpMemMgr::Get()->Alloc(target_pd);
MKLDNNCopy(*res_memory, tmp_memory);
res_memory = tmp_memory;
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

As my understanding, MKLDNNCopy already reordered the res_memory with tmp_memory, so why we need to assign tmp_memory to res_memory again?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

in line 224 we use res_memory and add it with mem.

mem = arr.GetMKLDNNData();
}
op::MKLDNNSum(*mem, *res_memory, *mem);
}
}

Expand Down Expand Up @@ -317,18 +391,28 @@ void FallBackCompute(FCompute fn, const nnvm::NodeAttrs &attrs,
MKLDNNStream::Get()->Submit();

std::vector<TBlob> out_blobs(outputs.size());
std::vector<NDArray> temp_src, temp_dst;
for (size_t i = 0; i < out_blobs.size(); i++) {
NDArray output = outputs[i];
// ensure output does not use mkldnn mem.
// for inplace, we already converted & copied input above.
if ((req[i] == kWriteTo) || (req[i] == kWriteInplace))
if ((req[i] == kWriteTo) || (req[i] == kWriteInplace)) {
const_cast<NDArray &>(output).InvalidateMKLDNNData();
else if (req[i] == kAddTo)
output = outputs[i].Reorder2Default();
} else if (req[i] == kAddTo && output.IsMKLDNNData()) {
NDArray temp = outputs[i].Reorder2Default();
temp_src.emplace_back(temp);
temp_dst.emplace_back(outputs[i]);
output = temp;
}
CHECK(output.IsDefaultData());
out_blobs[i] = output.data();
}

fn(attrs, ctx, in_blobs, req, out_blobs);
for (size_t i = 0; i < out_blobs.size(); i++) {
if (req[i] == kAddTo && outputs[i].IsMKLDNNData())
mxnet::common::CastNonDefaultStorage(temp_src, temp_dst, ctx, false);
}
}

template<typename DType>
Expand Down
15 changes: 7 additions & 8 deletions src/operator/nn/mkldnn/mkldnn_copy.cc
Original file line number Diff line number Diff line change
Expand Up @@ -44,14 +44,13 @@ void MKLDNNCopy(const nnvm::NodeAttrs& attrs, const OpContext &ctx,
auto in_mem = data.GetMKLDNNData();
if (req == kAddTo) {
TmpMemMgr::Get()->Init(ctx.requested[0]);
// We should try and force the output memory has the same format
// as the input memory. If not, we'll have to reorder memory.
auto out_mem = out_data.GetMKLDNNData(in_mem->get_primitive_desc());
if (out_mem == nullptr)
out_mem = out_data.GetMKLDNNData();
auto sum_res = TmpMemMgr::Get()->Alloc(out_mem->get_primitive_desc());
MKLDNNSum(*in_mem, *out_mem, *sum_res);
const_cast<NDArray &>(out_data).CopyFrom(*sum_res);
// We should try and force the input memory has the same format
// as the input output. If not, we'll have to reorder memory.
auto out_mem = out_data.GetMKLDNNData();
in_mem = data.GetMKLDNNData(out_mem ->get_primitive_desc());
if (in_mem == nullptr)
in_mem = data.GetMKLDNNDataReorder(out_mem->get_primitive_desc());
MKLDNNSum(*out_mem, *in_mem, *out_mem);
} else {
const_cast<NDArray &>(out_data).CopyFrom(*in_mem);
}
Expand Down
4 changes: 1 addition & 3 deletions src/operator/tensor/elemwise_unary_op_basic.cc
Original file line number Diff line number Diff line change
Expand Up @@ -171,9 +171,7 @@ static void CopyEx(const nnvm::NodeAttrs& attrs,
// This happens if inputs are supposed to be in MKLDNN format
// but MKLDNN doesn't support the data type or the shape. We're
// forced to convert it to the default format.
std::vector<TBlob> in_blobs {inputs[0].data()};
std::vector<TBlob> out_blobs {outputs[0].data()};
UnaryOp::IdentityCompute<cpu>(attrs, ctx, in_blobs, req, out_blobs);
FallBackCompute(UnaryOp::IdentityCompute<cpu>, attrs, ctx, inputs, req, outputs);
return;
}
#endif
Expand Down
Loading