PaddlePaddle · heavengate · Jul 27, 2022 · Jul 20, 2022 · Jul 20, 2022 · Jul 21, 2022
diff --git a/paddle/fluid/eager/auto_code_generator/final_state_generator/python_c_gen.py b/paddle/fluid/eager/auto_code_generator/final_state_generator/python_c_gen.py
@@ -57,6 +57,8 @@ def SkipAPIGeneration(forward_api_name):
     'adam',
     'adamw_',
     'adamw',
+    'average_accumulates',
+    'average_accumulates_',
     'decayed_adagrad_',
     'decayed_adagrad',
     'dgc_momentum_',

diff --git a/paddle/fluid/operators/average_accumulates_op.cc b/paddle/fluid/operators/average_accumulates_op.cc
@@ -13,6 +13,8 @@ See the License for the specific language governing permissions and
 limitations under the License. */
 
 #include "paddle/fluid/operators/average_accumulates_op.h"
+#include "paddle/fluid/framework/infershape_utils.h"
+#include "paddle/phi/infermeta/multiary.h"
 
 namespace paddle {
 namespace operators {
@@ -208,7 +210,12 @@ And for a mini-batch in training, accumulators were computed as below steps:
 }  // namespace operators
 }  // namespace paddle
 
+DECLARE_INFER_SHAPE_FUNCTOR(average_accumulates,
+                            AverageAccumulatesInferShapeFunctor,
+                            PD_INFER_META(phi::AverageAccumulatesInferMeta));
+
 namespace ops = paddle::operators;
+
 REGISTER_OPERATOR(
     average_accumulates,
     ops::AverageAccumulatesOp,

diff --git a/paddle/fluid/operators/average_accumulates_op.cu b/paddle/fluid/operators/average_accumulates_op.cu
@@ -34,6 +34,7 @@ void GetAccumulators<paddle::platform::CUDADeviceContext>(
                in_old_num_accumulates->data<int64_t>(),
                sizeof(int64_t),
                stream);
+
   memory::Copy(platform::CPUPlace(),
                num_accumulates_,
                cuda_place,

diff --git a/paddle/phi/api/yaml/legacy_api.yaml b/paddle/phi/api/yaml/legacy_api.yaml
@@ -264,6 +264,17 @@
   kernel :
     func : auc
 
+#average_accumulates
+- api : average_accumulates_
+  args : (Tensor param, Tensor in_sum_1, Tensor in_sum_2, Tensor in_sum_3, Tensor in_num_accumulates, Tensor in_old_num_accumulates, Tensor in_num_updates, float average_window, int64_t max_average_window, int64_t min_average_window)
+  output : Tensor(out_sum_1), Tensor(out_sum_2), Tensor(out_sum_3), Tensor(out_num_accumulates), Tensor(out_old_num_accumulates), Tensor(out_num_updates)
+  infer_meta:
+    func : AverageAccumulatesInferMeta
+  kernel :
+    func : average_accumulates {dense, dense, dense, dense, dense ,dense, dense -> dense, dense, dense, dense, dense, dense}
+    data_type : param
+  inplace : (in_sum_1 -> out_sum_1), (in_sum_2 -> out_sum_2), (in_sum_3 -> out_sum_3), (in_num_accumulates -> out_num_accumulates), (in_old_num_accumulates -> out_old_num_accumulates), (in_num_updates -> out_num_updates)
+
 # batch_norm
 - api : batch_norm
   args : (Tensor x, Tensor scale, Tensor bias, Tensor mean, Tensor variance, float momentum, float epsilon, str data_layout, bool is_test, bool use_global_stats, bool trainable_statistics, bool fuse_with_relu)

diff --git a/paddle/phi/infermeta/multiary.cc b/paddle/phi/infermeta/multiary.cc
@@ -434,6 +434,68 @@ void AucInferMeta(const MetaTensor& input,
   }
 }
 
+void AverageAccumulatesInferMeta(const MetaTensor& param,
+                                 const MetaTensor& in_sum_1,
+                                 const MetaTensor& in_sum_2,
+                                 const MetaTensor& in_sum_3,
+                                 const MetaTensor& in_num_accumulates,
+                                 const MetaTensor& in_old_num_accumulates,
+                                 const MetaTensor& in_num_updates,
+                                 float average_window,
+                                 int64_t max_average_window,
+                                 int64_t min_average_window,
+                                 MetaTensor* out_sum_1,
+                                 MetaTensor* out_sum_2,
+                                 MetaTensor* out_sum_3,
+                                 MetaTensor* out_num_accumulates,
+                                 MetaTensor* out_old_num_accumulates,
+                                 MetaTensor* out_num_updates) {
+  // auto in_dim = param.dims;
+  PADDLE_ENFORCE_NE(
+      out_sum_1,
+      nullptr,
+      errors::NotFound(
+          "Output(out_sum_1) of AverageAccumulates should not be null."));
+  PADDLE_ENFORCE_NE(
+      out_sum_2,
+      nullptr,
+      errors::NotFound(
+          "Output(out_sum_2) of AverageAccumulates should not be null."));
+  PADDLE_ENFORCE_NE(
+      out_sum_3,
+      nullptr,
+      errors::NotFound(
+          "Output(out_sum_3) of AverageAccumulates should not be null."));
+  PADDLE_ENFORCE_NE(out_num_accumulates,
+                    nullptr,
+                    errors::NotFound("Output(out_num_accumulates) of "
+                                     "AverageAccumulates should not be null."));
+
+  PADDLE_ENFORCE_NE(out_old_num_accumulates,
+                    nullptr,
+                    errors::NotFound("Output(out_old_num_accumulates) of "
+                                     "AverageAccumulates should not be null."));
+
+  PADDLE_ENFORCE_NE(
+      out_num_updates,
+      nullptr,
+      errors::NotFound(
+          "Output(out_num_updates) of AverageAccumulates should not be null."));
+
+  out_sum_1->set_dims(in_sum_1.dims());
+  out_sum_1->set_dtype(in_sum_1.dtype());
+  out_sum_2->set_dims(in_sum_2.dims());
+  out_sum_2->set_dtype(in_sum_2.dtype());
+  out_sum_3->set_dims(in_sum_3.dims());
+  out_sum_3->set_dtype(in_sum_3.dtype());
+  out_num_accumulates->set_dims({1});
+  out_num_accumulates->set_dtype(in_num_accumulates.dtype());
+  out_old_num_accumulates->set_dims({1});
+  out_old_num_accumulates->set_dtype(in_old_num_accumulates.dtype());
+  out_num_updates->set_dims({1});
+  out_num_updates->set_dtype(in_num_updates.dtype());
+}
+
 void BatchNormInferMeta(const MetaTensor& x,
                         const MetaTensor& scale,
                         const MetaTensor& bias,

diff --git a/paddle/phi/infermeta/multiary.h b/paddle/phi/infermeta/multiary.h
@@ -134,6 +134,23 @@ void AucInferMeta(const MetaTensor& input,
                   MetaTensor* stat_neg_out,
                   MetaConfig config = MetaConfig());
 
+void AverageAccumulatesInferMeta(const MetaTensor& param,
+                                 const MetaTensor& in_sum_1,
+                                 const MetaTensor& in_sum_2,
+                                 const MetaTensor& in_sum_3,
+                                 const MetaTensor& in_num_accumulates,
+                                 const MetaTensor& in_old_num_accumulates,
+                                 const MetaTensor& in_num_updates,
+                                 float average_window,
+                                 int64_t max_average_window,
+                                 int64_t min_average_window,
+                                 MetaTensor* out_sum_1,
+                                 MetaTensor* out_sum_2,
+                                 MetaTensor* out_sum_3,
+                                 MetaTensor* out_num_accumulates,
+                                 MetaTensor* out_old_num_accumulates,
+                                 MetaTensor* out_num_updates);
+
 void BatchNormInferMeta(const MetaTensor& x,
                         const MetaTensor& scale,
                         const MetaTensor& bias,

diff --git a/paddle/phi/kernels/average_accumulates_kernel.h b/paddle/phi/kernels/average_accumulates_kernel.h
@@ -0,0 +1,57 @@
+// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#pragma once
+
+#include "paddle/phi/core/dense_tensor.h"
+
+namespace phi {
+
+template <typename Context>
+void GetAccumulators(const Context& dev_ctx,
+                     const DenseTensor& in_num_accumulates,
+                     const DenseTensor& in_old_num_accumulates,
+                     const DenseTensor& in_num_updates,
+                     int64_t* num_updates,
+                     int64_t* num_accumulates,
+                     int64_t* old_num_accumulates);
+
+template <typename Context>
+void SetAccumulators(const Context& dev_ctx,
+                     int64_t num_updates,
+                     int64_t num_accumulates,
+                     int64_t old_num_accumulates,
+                     DenseTensor* out_num_accumulates,
+                     DenseTensor* out_old_num_accumulates,
+                     DenseTensor* out_num_updates);
+
+template <typename T, typename Context>
+void AverageAccumulatesKernel(const Context& dev_ctx,
+                              const DenseTensor& param,
+                              const DenseTensor& in_sum_1,
+                              const DenseTensor& in_sum_2,
+                              const DenseTensor& in_sum_3,
+                              const DenseTensor& in_num_accumulates,
+                              const DenseTensor& in_old_num_accumulates,
+                              const DenseTensor& in_num_updates,
+                              float average_window,
+                              int64_t max_average_window,
+                              int64_t min_average_window,
+                              DenseTensor* out_sum_1,
+                              DenseTensor* out_sum_2,
+                              DenseTensor* out_sum_3,
+                              DenseTensor* out_num_accumulates,
+                              DenseTensor* out_old_num_accumulates,
+                              DenseTensor* out_num_updates);
+}  // namespace phi
diff --git a/paddle/phi/kernels/cpu/average_accumulates_kernel.cc b/paddle/phi/kernels/cpu/average_accumulates_kernel.cc
@@ -0,0 +1,56 @@
+/* Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License. */
+
+#include "paddle/phi/kernels/average_accumulates_kernel.h"
+#include "paddle/phi/kernels/impl/average_accumulates_kernel_impl.h"
+
+#include "paddle/phi/backends/cpu/cpu_context.h"
+#include "paddle/phi/core/kernel_registry.h"
+
+namespace phi {
+
+template <>
+void GetAccumulators<phi::CPUContext>(const phi::CPUContext& dev_ctx,
+                                      const DenseTensor& in_num_accumulates,
+                                      const DenseTensor& in_old_num_accumulates,
+                                      const DenseTensor& in_num_updates,
+                                      int64_t* num_updates,
+                                      int64_t* num_accumulates,
+                                      int64_t* old_num_accumulates) {
+  *old_num_accumulates = in_old_num_accumulates.data<int64_t>()[0];
+  *num_accumulates = in_num_accumulates.data<int64_t>()[0];
+  *num_updates = in_num_updates.data<int64_t>()[0];
+}
+
+template <>
+void SetAccumulators<phi::CPUContext>(const phi::CPUContext& dev_ctx,
+                                      int64_t num_updates,
+                                      int64_t num_accumulates,
+                                      int64_t old_num_accumulates,
+                                      DenseTensor* out_num_accumulates,
+                                      DenseTensor* out_old_num_accumulates,
+                                      DenseTensor* out_num_updates) {
+  out_old_num_accumulates->data<int64_t>()[0] = old_num_accumulates;
+  out_num_accumulates->data<int64_t>()[0] = num_accumulates;
+  out_num_updates->data<int64_t>()[0] = num_updates;
+}
+
+}  // namespace phi
+
+PD_REGISTER_KERNEL(average_accumulates,
+                   CPU,
+                   ALL_LAYOUT,
+                   phi::AverageAccumulatesKernel,
+                   float,
+                   double) {}
diff --git a/paddle/phi/kernels/gpu/average_accumulates_kernel.cu b/paddle/phi/kernels/gpu/average_accumulates_kernel.cu
@@ -0,0 +1,100 @@
+/* Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License. */
+
+#include "paddle/phi/kernels/average_accumulates_kernel.h"
+#include "paddle/phi/kernels/impl/average_accumulates_kernel_impl.h"
+
+#include "paddle/phi/backends/gpu/gpu_context.h"
+#include "paddle/phi/core/kernel_registry.h"
+
+namespace phi {
+
+template <>
+void GetAccumulators<phi::GPUContext>(const phi::GPUContext& dev_ctx,
+                                      const DenseTensor& in_num_accumulates,
+                                      const DenseTensor& in_old_num_accumulates,
+                                      const DenseTensor& in_num_updates,
+                                      int64_t* num_updates,
+                                      int64_t* num_accumulates,
+                                      int64_t* old_num_accumulates) {
+  auto stream = dev_ctx.stream();
+  auto cuda_place = in_old_num_accumulates.place();
+  paddle::memory::Copy(phi::CPUPlace(),
+                       old_num_accumulates,
+                       cuda_place,
+                       in_old_num_accumulates.data<int64_t>(),
+                       sizeof(int64_t),
+                       stream);
+  paddle::memory::Copy(phi::CPUPlace(),
+                       num_accumulates,
+                       cuda_place,
+                       in_num_accumulates.data<int64_t>(),
+                       sizeof(int64_t),
+                       stream);
+  paddle::memory::Copy(phi::CPUPlace(),
+                       num_updates,
+                       cuda_place,
+                       in_num_updates.data<int64_t>(),
+                       sizeof(int64_t),
+                       stream);
+}
+
+template <>
+void SetAccumulators<phi::GPUContext>(const phi::GPUContext& dev_ctx,
+                                      int64_t num_updates,
+                                      int64_t num_accumulates,
+                                      int64_t old_num_accumulates,
+                                      DenseTensor* out_num_accumulates,
+                                      DenseTensor* out_old_num_accumulates,
+                                      DenseTensor* out_num_updates) {
+  int64_t* out_num_accumulates_ptr =
+      dev_ctx.template Alloc<int64_t>(out_num_accumulates);
+  int64_t* out_old_num_accumulates_ptr =
+      dev_ctx.template Alloc<int64_t>(out_old_num_accumulates);
+  int64_t* out_num_updates_ptr =
+      dev_ctx.template Alloc<int64_t>(out_num_updates);
+
+  auto stream = dev_ctx.stream();
+
+  auto cuda_place = out_old_num_accumulates->place();
+  paddle::memory::Copy(dev_ctx.GetPlace(),
+                       out_num_accumulates_ptr,
+                       phi::CPUPlace(),
+                       &num_accumulates,
+                       sizeof(int64_t),
+                       stream);
+
+  paddle::memory::Copy(dev_ctx.GetPlace(),
+                       out_old_num_accumulates_ptr,
+                       phi::CPUPlace(),
+                       &old_num_accumulates,
+                       sizeof(int64_t),
+                       stream);
+
+  paddle::memory::Copy(cuda_place,
+                       out_num_updates_ptr,
+                       phi::CPUPlace(),
+                       &num_updates,
+                       sizeof(int64_t),
+                       stream);
+}
+
+}  // namespace phi
+
+PD_REGISTER_KERNEL(average_accumulates,
+                   GPU,
+                   ALL_LAYOUT,
+                   phi::AverageAccumulatesKernel,
+                   float,
+                   double) {}