PaddlePaddle · OliverLPH · Sep 22, 2021 · Sep 21, 2021 · Sep 21, 2021 · Sep 22, 2021
diff --git a/paddle/fluid/inference/tests/infer_ut/CMakeLists.txt b/paddle/fluid/inference/tests/infer_ut/CMakeLists.txt
@@ -93,20 +93,42 @@ if (USE_TENSORRT AND WITH_GPU)
   file(READ ${TENSORRT_INCLUDE_DIR}/NvInfer.h TENSORRT_VERSION_FILE_CONTENTS)
   string(REGEX MATCH "define NV_TENSORRT_MAJOR +([0-9]+)" TENSORRT_MAJOR_VERSION
     "${TENSORRT_VERSION_FILE_CONTENTS}")
+  string(REGEX MATCH "define NV_TENSORRT_MINOR +([0-9]+)" TENSORRT_MINOR_VERSION
+    "${TENSORRT_VERSION_FILE_CONTENTS}")
+  string(REGEX MATCH "define NV_TENSORRT_PATCH +([0-9]+)" TENSORRT_PATCH_VERSION
+    "${TENSORRT_VERSION_FILE_CONTENTS}")
+  string(REGEX MATCH "define NV_TENSORRT_BUILD +([0-9]+)" TENSORRT_BUILD_VERSION
+    "${TENSORRT_VERSION_FILE_CONTENTS}")
   if("${TENSORRT_MAJOR_VERSION}" STREQUAL "")
     file(READ ${TENSORRT_INCLUDE_DIR}/NvInferVersion.h TENSORRT_VERSION_FILE_CONTENTS)
     string(REGEX MATCH "define NV_TENSORRT_MAJOR +([0-9]+)" TENSORRT_MAJOR_VERSION
       "${TENSORRT_VERSION_FILE_CONTENTS}")
+    string(REGEX MATCH "define NV_TENSORRT_MINOR +([0-9]+)" TENSORRT_MINOR_VERSION
+      "${TENSORRT_VERSION_FILE_CONTENTS}")
+    string(REGEX MATCH "define NV_TENSORRT_PATCH +([0-9]+)" TENSORRT_PATCH_VERSION
+      "${TENSORRT_VERSION_FILE_CONTENTS}")
+    string(REGEX MATCH "define NV_TENSORRT_BUILD +([0-9]+)" TENSORRT_BUILD_VERSION
+      "${TENSORRT_VERSION_FILE_CONTENTS}")
   endif()
   if("${TENSORRT_MAJOR_VERSION}" STREQUAL "")
     message(SEND_ERROR "Failed to detect TensorRT version.")
   endif()
   string(REGEX REPLACE "define NV_TENSORRT_MAJOR +([0-9]+)" "\\1"
     TENSORRT_MAJOR_VERSION "${TENSORRT_MAJOR_VERSION}")
+  string(REGEX REPLACE "define NV_TENSORRT_MINOR +([0-9]+)" "\\1"
+    TENSORRT_MINOR_VERSION "${TENSORRT_MINOR_VERSION}")
+  string(REGEX REPLACE "define NV_TENSORRT_PATCH +([0-9]+)" "\\1"
+    TENSORRT_PATCH_VERSION "${TENSORRT_PATCH_VERSION}")
+  string(REGEX REPLACE "define NV_TENSORRT_BUILD +([0-9]+)" "\\1"
+    TENSORRT_BUILD_VERSION "${TENSORRT_BUILD_VERSION}")
   message(STATUS "Current TensorRT header is ${TENSORRT_INCLUDE_DIR}/NvInfer.h. "
-    "Current TensorRT version is v${TENSORRT_MAJOR_VERSION}. ")
+    "Current TensorRT version is v${TENSORRT_MAJOR_VERSION}.${TENSORRT_MINOR_VERSION}.${TENSORRT_PATCH_VERSION}.${TENSORRT_BUILD_VERSION} ")
   include_directories("${TENSORRT_INCLUDE_DIR}")
   link_directories("${TENSORRT_LIB_DIR}")
+  add_compile_definitions(NV_TENSORRT_MAJOR=${TENSORRT_MAJOR_VERSION})
+  add_compile_definitions(NV_TENSORRT_MINOR=${TENSORRT_MINOR_VERSION})
+  add_compile_definitions(NV_TENSORRT_PATCH=${TENSORRT_PATCH_VERSION})
+  add_compile_definitions(NV_TENSORRT_BUILD=${TENSORRT_BUILD_VERSION})
 endif()
 
 if(WITH_MKL)

diff --git a/paddle/fluid/inference/tests/infer_ut/run.sh b/paddle/fluid/inference/tests/infer_ut/run.sh
@@ -115,6 +115,20 @@ for model_name in $unknown_download_list; do
     download $url_prefix $model_name
 done
 
+# ernie int8 quant with matmul
+unknown_nlp_download_list='quant_post_model_xnli_predict_matmul'
+for model_name in $unknown_nlp_download_list; do
+    url_prefix="https://paddle-qa.bj.bcebos.com/inference_model/unknown/nlp"
+    download $url_prefix $model_name
+done
+
+# mobilnetv1 with prune op attribute
+dev_class_download_list='MobileNetV1'
+for model_name in $dev_class_download_list; do
+    url_prefix="https://paddle-qa.bj.bcebos.com/inference_model/2021-09-16/class"
+    download $url_prefix $model_name
+done
+
 function compile_test() {
     mkdir -p ${build_dir}
     cd ${build_dir}
@@ -255,6 +269,31 @@ if [ $? -ne 0 ]; then
     EXIT_CODE=8
 fi
 
+printf "${YELLOW} start test_ernie_xnli_int8 ${NC} \n";
+compile_test "test_ernie_xnli_int8"
+ernie_qat_model="quant_post_model_xnli_predict_matmul"
+${exe_dir}/test_ernie_xnli_int8 \
+    --modeldir=$DATA_DIR/$ernie_qat_model/$ernie_qat_model \
+    --datadir=$DATA_DIR/$ernie_qat_model/$ernie_qat_model/xnli_var_len \
+    --truth_data=$DATA_DIR/$ernie_qat_model/$ernie_qat_model/truth_data \
+    --gtest_filter=${test_suite_list} \
+    --gtest_output=xml:${log_dir}/test_ernie_xnli_int8.xml
+if [ $? -ne 0 ]; then
+    echo "${RED} test_ernie_xnli_int8 runs failed ${NC}" >> ${exe_dir}/test_summary.txt
+    EXIT_CODE=8
+fi
+
+printf "${YELLOW} start test_mobilnetv1 ${NC} \n";
+compile_test "test_mobilnetv1"
+${exe_dir}/test_mobilnetv1 \
+    --modeldir=$DATA_DIR/MobileNetV1/MobileNetV1 \
+    --gtest_filter=${test_suite_list} \
+    --gtest_output=xml:${log_dir}/test_mobilnetv1.xml
+if [ $? -ne 0 ]; then
+    echo "${RED} test_mobilnetv1 runs failed ${NC}" >> ${exe_dir}/test_summary.txt
+    EXIT_CODE=8
+fi
+
 set +x
 
 test_suites=$(echo ${test_suite_list} | sed 's/:/ /g')

diff --git a/paddle/fluid/inference/tests/infer_ut/test_ernie_xnli_int8.cc b/paddle/fluid/inference/tests/infer_ut/test_ernie_xnli_int8.cc
@@ -0,0 +1,194 @@
+// Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "test_helper.h"  // NOLINT
+#include "test_suite.h"   // NOLINT
+
+DEFINE_string(modeldir, "", "Directory of the inference model.");
+DEFINE_string(datadir, "", "dataset.");
+DEFINE_string(truth_data, "", "Directory of the inference data truth result");
+
+namespace paddle_infer {
+
+std::shared_ptr<Predictor> InitPredictor() {
+  Config config;
+  config.SetModel(FLAGS_modeldir + "/__model__",
+                  FLAGS_modeldir + "/__params__");
+  config.EnableUseGpu(1000, 0);
+  // Open the memory optim.
+  config.EnableMemoryOptim();
+
+  int max_batch = 32;
+  int max_single_seq_len = 128;
+  int opt_single_seq_len = 64;
+  int min_batch_seq_len = 1;
+  int max_batch_seq_len = 512;
+  int opt_batch_seq_len = 256;
+
+  std::string input_name0 = "eval_placeholder_0";
+  std::string input_name1 = "eval_placeholder_1";
+  std::string input_name2 = "eval_placeholder_2";
+  std::string input_name3 = "eval_placeholder_3";
+
+  std::vector<int> min_shape = {min_batch_seq_len};
+  std::vector<int> max_shape = {max_batch_seq_len};
+  std::vector<int> opt_shape = {opt_batch_seq_len};
+  // Set the input's min, max, opt shape
+  std::map<std::string, std::vector<int>> min_input_shape = {
+      {input_name0, min_shape},
+      {input_name1, min_shape},
+      {input_name2, {1}},
+      {input_name3, {1, min_batch_seq_len, 1}}};
+  std::map<std::string, std::vector<int>> max_input_shape = {
+      {input_name0, max_shape},
+      {input_name1, max_shape},
+      {input_name2, {max_batch + 1}},
+      {input_name3, {1, max_single_seq_len, 1}}};
+  std::map<std::string, std::vector<int>> opt_input_shape = {
+      {input_name0, opt_shape},
+      {input_name1, opt_shape},
+      {input_name2, {max_batch + 1}},
+      {input_name3, {1, opt_single_seq_len, 1}}};
+
+  // only kHalf supported
+  config.EnableTensorRtEngine(1 << 30, 1, 5, Config::Precision::kInt8, false,
+                              false);
+  // erinie varlen must be used with dynamic shape
+  config.SetTRTDynamicShapeInfo(min_input_shape, max_input_shape,
+                                opt_input_shape);
+  // erinie varlen must be used with oss
+  config.EnableTensorRtOSS();
+
+  return CreatePredictor(config);
+}
+
+// Parse tensor from string
+template <typename T>
+std::vector<T> ParseTensor(const std::string &field) {
+  std::string mat_str = field;
+
+  std::vector<T> mat;
+  paddle::test::Split(mat_str, ' ', &mat);
+
+  return mat;
+}
+
+void run(Predictor *predictor, std::vector<float> *out_data) {
+  clock_t start, end;
+  start = clock();
+  CHECK(predictor->Run());
+  end = clock();
+
+  auto output_names = predictor->GetOutputNames();
+  auto output_t = predictor->GetOutputHandle(output_names[0]);
+  std::vector<int> output_shape = output_t->shape();
+  int out_num = std::accumulate(output_shape.begin(), output_shape.end(), 1,
+                                std::multiplies<int>());
+  out_data->resize(out_num);
+  output_t->CopyToCpu(out_data->data());
+  return;
+}
+
+auto PrepareOutput(std::string input_file) -> std::deque<float> {
+  std::ifstream fin(input_file);
+  std::string line;
+  std::vector<std::string> buffer;
+  while (std::getline(fin, line)) {
+    buffer.emplace_back(line);
+  }
+  std::deque<float> resDeque(buffer.size());
+  std::transform(buffer.begin(), buffer.end(), resDeque.begin(),
+                 [](const std::string &val) { return std::stof(val); });
+
+  return resDeque;
+}  // PrepareOutput
+
+TEST(tensorrt_tester_ernie_xnli, oss_varlen_truth_data_int8) {
+  auto resDeque = PrepareOutput(FLAGS_truth_data);
+  auto predictor = InitPredictor();
+
+  ASSERT_FALSE(FLAGS_datadir.empty());
+  std::ifstream fin(FLAGS_datadir);
+  std::string line;
+
+  int lineno = 0;
+  int max_seq_len = 128;
+  const int run_batch = 1;
+  int correct_num = 0;
+  while (std::getline(fin, line)) {
+    std::vector<std::string> fields;
+    paddle::test::Split(line, ';', &fields);
+
+    auto src_ids = ParseTensor<int32_t>(fields[0]);
+    auto sent_ids = ParseTensor<int32_t>(fields[1]);
+    auto pos_ids = ParseTensor<int64_t>(fields[2]);
+
+    int run_seq_len = src_ids.size();
+    int32_t i3[2] = {0, run_seq_len};
+    int32_t i4[max_seq_len] = {0};
+
+    auto input_names = predictor->GetInputNames();
+
+    // first input
+    auto input_t1 = predictor->GetInputHandle(input_names[0]);
+    input_t1->Reshape({run_seq_len});
+    input_t1->CopyFromCpu(src_ids.data());
+
+    // second input
+    auto input_t2 = predictor->GetInputHandle(input_names[1]);
+    input_t2->Reshape({run_seq_len});
+    input_t2->CopyFromCpu(sent_ids.data());
+
+    // third input
+    auto input_t3 = predictor->GetInputHandle(input_names[2]);
+    input_t3->Reshape({run_batch + 1});
+    input_t3->CopyFromCpu(i3);
+
+    // fourth input
+    auto input_t4 = predictor->GetInputHandle(input_names[3]);
+    input_t4->Reshape({1, max_seq_len, 1});
+    input_t4->CopyFromCpu(i4);
+
+    std::vector<float> out_data;
+    run(predictor.get(), &out_data);
+
+    lineno++;
+    int maxPosition =
+        max_element(out_data.begin(), out_data.end()) - out_data.begin();
+
+    if (maxPosition == resDeque[0]) {
+      correct_num += 1;
+    }
+    resDeque.pop_front();
+
+    VLOG(2) << "predict result: " << maxPosition;
+    for (auto r : out_data) {
+      VLOG(2) << r;
+    }
+  }
+  ASSERT_GT(correct_num,
+            4741);  // total input 5010, int8 res should greater than 4741
+  LOG(INFO) << "=== finish oss test ===";
+}
+
+}  // namespace paddle_infer
+
+int main(int argc, char **argv) {
+  ::testing::InitGoogleTest(&argc, argv);
+  ::google::ParseCommandLineFlags(&argc, &argv, true);
+#if IS_TRT_VERSION_GE(7200)
+  return RUN_ALL_TESTS();
+#endif
+  return 0;
+}
diff --git a/paddle/fluid/inference/tests/infer_ut/test_helper.h b/paddle/fluid/inference/tests/infer_ut/test_helper.h
@@ -0,0 +1,79 @@
+// Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+#pragma once
+#include <sstream>
+#include <string>
+#include <vector>
+
+namespace paddle {
+namespace test {
+
+// split string to vector<string> by sep
+static void split(const std::string &str, char sep,
+                  std::vector<std::string> *pieces, bool ignore_null = true) {
+  pieces->clear();
+  if (str.empty()) {
+    if (!ignore_null) {
+      pieces->push_back(str);
+    }
+    return;
+  }
+  size_t pos = 0;
+  size_t next = str.find(sep, pos);
+  while (next != std::string::npos) {
+    pieces->push_back(str.substr(pos, next - pos));
+    pos = next + 1;
+    next = str.find(sep, pos);
+  }
+  if (!str.substr(pos).empty()) {
+    pieces->push_back(str.substr(pos));
+  }
+}
+
+template <typename T>
+void GetValueFromStream(std::stringstream *ss, T *t) {
+  (*ss) >> (*t);
+}
+
+template <>
+void GetValueFromStream<std::string>(std::stringstream *ss, std::string *t) {
+  *t = ss->str();
+}
+
+// Split string to multiple vector
+template <typename T>
+void Split(const std::string &line, char sep, std::vector<T> *v) {
+  std::stringstream ss;
+  T t;
+  for (auto c : line) {
+    if (c != sep) {
+      ss << c;
+    } else {
+      GetValueFromStream<T>(&ss, &t);
+      v->push_back(std::move(t));
+      ss.str({});
+      ss.clear();
+    }
+  }
+
+  if (!ss.str().empty()) {
+    GetValueFromStream<T>(&ss, &t);
+    v->push_back(std::move(t));
+    ss.str({});
+    ss.clear();
+  }
+}
+
+}  // namespace test
+}  // namespace paddle