Skip to content

Commit

Permalink
support ernie-int8 test and prune op attribute test (#35890)
Browse files Browse the repository at this point in the history
* support ernie-int8 test and prune op attribute test

* remove using and use namespace

* remove macro and use shell instead

* Revert "remove macro and use shell instead"

This reverts commit 615964b.

* fix grammar error

* fix shell error
  • Loading branch information
OliverLPH authored Sep 22, 2021
1 parent 7ebbcbb commit e8789c1
Show file tree
Hide file tree
Showing 6 changed files with 440 additions and 5 deletions.
24 changes: 23 additions & 1 deletion paddle/fluid/inference/tests/infer_ut/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -93,20 +93,42 @@ if (USE_TENSORRT AND WITH_GPU)
file(READ ${TENSORRT_INCLUDE_DIR}/NvInfer.h TENSORRT_VERSION_FILE_CONTENTS)
string(REGEX MATCH "define NV_TENSORRT_MAJOR +([0-9]+)" TENSORRT_MAJOR_VERSION
"${TENSORRT_VERSION_FILE_CONTENTS}")
string(REGEX MATCH "define NV_TENSORRT_MINOR +([0-9]+)" TENSORRT_MINOR_VERSION
"${TENSORRT_VERSION_FILE_CONTENTS}")
string(REGEX MATCH "define NV_TENSORRT_PATCH +([0-9]+)" TENSORRT_PATCH_VERSION
"${TENSORRT_VERSION_FILE_CONTENTS}")
string(REGEX MATCH "define NV_TENSORRT_BUILD +([0-9]+)" TENSORRT_BUILD_VERSION
"${TENSORRT_VERSION_FILE_CONTENTS}")
if("${TENSORRT_MAJOR_VERSION}" STREQUAL "")
file(READ ${TENSORRT_INCLUDE_DIR}/NvInferVersion.h TENSORRT_VERSION_FILE_CONTENTS)
string(REGEX MATCH "define NV_TENSORRT_MAJOR +([0-9]+)" TENSORRT_MAJOR_VERSION
"${TENSORRT_VERSION_FILE_CONTENTS}")
string(REGEX MATCH "define NV_TENSORRT_MINOR +([0-9]+)" TENSORRT_MINOR_VERSION
"${TENSORRT_VERSION_FILE_CONTENTS}")
string(REGEX MATCH "define NV_TENSORRT_PATCH +([0-9]+)" TENSORRT_PATCH_VERSION
"${TENSORRT_VERSION_FILE_CONTENTS}")
string(REGEX MATCH "define NV_TENSORRT_BUILD +([0-9]+)" TENSORRT_BUILD_VERSION
"${TENSORRT_VERSION_FILE_CONTENTS}")
endif()
if("${TENSORRT_MAJOR_VERSION}" STREQUAL "")
message(SEND_ERROR "Failed to detect TensorRT version.")
endif()
string(REGEX REPLACE "define NV_TENSORRT_MAJOR +([0-9]+)" "\\1"
TENSORRT_MAJOR_VERSION "${TENSORRT_MAJOR_VERSION}")
string(REGEX REPLACE "define NV_TENSORRT_MINOR +([0-9]+)" "\\1"
TENSORRT_MINOR_VERSION "${TENSORRT_MINOR_VERSION}")
string(REGEX REPLACE "define NV_TENSORRT_PATCH +([0-9]+)" "\\1"
TENSORRT_PATCH_VERSION "${TENSORRT_PATCH_VERSION}")
string(REGEX REPLACE "define NV_TENSORRT_BUILD +([0-9]+)" "\\1"
TENSORRT_BUILD_VERSION "${TENSORRT_BUILD_VERSION}")
message(STATUS "Current TensorRT header is ${TENSORRT_INCLUDE_DIR}/NvInfer.h. "
"Current TensorRT version is v${TENSORRT_MAJOR_VERSION}. ")
"Current TensorRT version is v${TENSORRT_MAJOR_VERSION}.${TENSORRT_MINOR_VERSION}.${TENSORRT_PATCH_VERSION}.${TENSORRT_BUILD_VERSION} ")
include_directories("${TENSORRT_INCLUDE_DIR}")
link_directories("${TENSORRT_LIB_DIR}")
add_compile_definitions(NV_TENSORRT_MAJOR=${TENSORRT_MAJOR_VERSION})
add_compile_definitions(NV_TENSORRT_MINOR=${TENSORRT_MINOR_VERSION})
add_compile_definitions(NV_TENSORRT_PATCH=${TENSORRT_PATCH_VERSION})
add_compile_definitions(NV_TENSORRT_BUILD=${TENSORRT_BUILD_VERSION})
endif()

if(WITH_MKL)
Expand Down
39 changes: 39 additions & 0 deletions paddle/fluid/inference/tests/infer_ut/run.sh
Original file line number Diff line number Diff line change
Expand Up @@ -115,6 +115,20 @@ for model_name in $unknown_download_list; do
download $url_prefix $model_name
done

# ernie int8 quant with matmul
unknown_nlp_download_list='quant_post_model_xnli_predict_matmul'
for model_name in $unknown_nlp_download_list; do
url_prefix="https://paddle-qa.bj.bcebos.com/inference_model/unknown/nlp"
download $url_prefix $model_name
done

# mobilnetv1 with prune op attribute
dev_class_download_list='MobileNetV1'
for model_name in $dev_class_download_list; do
url_prefix="https://paddle-qa.bj.bcebos.com/inference_model/2021-09-16/class"
download $url_prefix $model_name
done

function compile_test() {
mkdir -p ${build_dir}
cd ${build_dir}
Expand Down Expand Up @@ -255,6 +269,31 @@ if [ $? -ne 0 ]; then
EXIT_CODE=8
fi

printf "${YELLOW} start test_ernie_xnli_int8 ${NC} \n";
compile_test "test_ernie_xnli_int8"
ernie_qat_model="quant_post_model_xnli_predict_matmul"
${exe_dir}/test_ernie_xnli_int8 \
--modeldir=$DATA_DIR/$ernie_qat_model/$ernie_qat_model \
--datadir=$DATA_DIR/$ernie_qat_model/$ernie_qat_model/xnli_var_len \
--truth_data=$DATA_DIR/$ernie_qat_model/$ernie_qat_model/truth_data \
--gtest_filter=${test_suite_list} \
--gtest_output=xml:${log_dir}/test_ernie_xnli_int8.xml
if [ $? -ne 0 ]; then
echo "${RED} test_ernie_xnli_int8 runs failed ${NC}" >> ${exe_dir}/test_summary.txt
EXIT_CODE=8
fi

printf "${YELLOW} start test_mobilnetv1 ${NC} \n";
compile_test "test_mobilnetv1"
${exe_dir}/test_mobilnetv1 \
--modeldir=$DATA_DIR/MobileNetV1/MobileNetV1 \
--gtest_filter=${test_suite_list} \
--gtest_output=xml:${log_dir}/test_mobilnetv1.xml
if [ $? -ne 0 ]; then
echo "${RED} test_mobilnetv1 runs failed ${NC}" >> ${exe_dir}/test_summary.txt
EXIT_CODE=8
fi

set +x

test_suites=$(echo ${test_suite_list} | sed 's/:/ /g')
Expand Down
194 changes: 194 additions & 0 deletions paddle/fluid/inference/tests/infer_ut/test_ernie_xnli_int8.cc
Original file line number Diff line number Diff line change
@@ -0,0 +1,194 @@
// Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

#include "test_helper.h" // NOLINT
#include "test_suite.h" // NOLINT

DEFINE_string(modeldir, "", "Directory of the inference model.");
DEFINE_string(datadir, "", "dataset.");
DEFINE_string(truth_data, "", "Directory of the inference data truth result");

namespace paddle_infer {

std::shared_ptr<Predictor> InitPredictor() {
Config config;
config.SetModel(FLAGS_modeldir + "/__model__",
FLAGS_modeldir + "/__params__");
config.EnableUseGpu(1000, 0);
// Open the memory optim.
config.EnableMemoryOptim();

int max_batch = 32;
int max_single_seq_len = 128;
int opt_single_seq_len = 64;
int min_batch_seq_len = 1;
int max_batch_seq_len = 512;
int opt_batch_seq_len = 256;

std::string input_name0 = "eval_placeholder_0";
std::string input_name1 = "eval_placeholder_1";
std::string input_name2 = "eval_placeholder_2";
std::string input_name3 = "eval_placeholder_3";

std::vector<int> min_shape = {min_batch_seq_len};
std::vector<int> max_shape = {max_batch_seq_len};
std::vector<int> opt_shape = {opt_batch_seq_len};
// Set the input's min, max, opt shape
std::map<std::string, std::vector<int>> min_input_shape = {
{input_name0, min_shape},
{input_name1, min_shape},
{input_name2, {1}},
{input_name3, {1, min_batch_seq_len, 1}}};
std::map<std::string, std::vector<int>> max_input_shape = {
{input_name0, max_shape},
{input_name1, max_shape},
{input_name2, {max_batch + 1}},
{input_name3, {1, max_single_seq_len, 1}}};
std::map<std::string, std::vector<int>> opt_input_shape = {
{input_name0, opt_shape},
{input_name1, opt_shape},
{input_name2, {max_batch + 1}},
{input_name3, {1, opt_single_seq_len, 1}}};

// only kHalf supported
config.EnableTensorRtEngine(1 << 30, 1, 5, Config::Precision::kInt8, false,
false);
// erinie varlen must be used with dynamic shape
config.SetTRTDynamicShapeInfo(min_input_shape, max_input_shape,
opt_input_shape);
// erinie varlen must be used with oss
config.EnableTensorRtOSS();

return CreatePredictor(config);
}

// Parse tensor from string
template <typename T>
std::vector<T> ParseTensor(const std::string &field) {
std::string mat_str = field;

std::vector<T> mat;
paddle::test::Split(mat_str, ' ', &mat);

return mat;
}

void run(Predictor *predictor, std::vector<float> *out_data) {
clock_t start, end;
start = clock();
CHECK(predictor->Run());
end = clock();

auto output_names = predictor->GetOutputNames();
auto output_t = predictor->GetOutputHandle(output_names[0]);
std::vector<int> output_shape = output_t->shape();
int out_num = std::accumulate(output_shape.begin(), output_shape.end(), 1,
std::multiplies<int>());
out_data->resize(out_num);
output_t->CopyToCpu(out_data->data());
return;
}

auto PrepareOutput(std::string input_file) -> std::deque<float> {
std::ifstream fin(input_file);
std::string line;
std::vector<std::string> buffer;
while (std::getline(fin, line)) {
buffer.emplace_back(line);
}
std::deque<float> resDeque(buffer.size());
std::transform(buffer.begin(), buffer.end(), resDeque.begin(),
[](const std::string &val) { return std::stof(val); });

return resDeque;
} // PrepareOutput

TEST(tensorrt_tester_ernie_xnli, oss_varlen_truth_data_int8) {
auto resDeque = PrepareOutput(FLAGS_truth_data);
auto predictor = InitPredictor();

ASSERT_FALSE(FLAGS_datadir.empty());
std::ifstream fin(FLAGS_datadir);
std::string line;

int lineno = 0;
int max_seq_len = 128;
const int run_batch = 1;
int correct_num = 0;
while (std::getline(fin, line)) {
std::vector<std::string> fields;
paddle::test::Split(line, ';', &fields);

auto src_ids = ParseTensor<int32_t>(fields[0]);
auto sent_ids = ParseTensor<int32_t>(fields[1]);
auto pos_ids = ParseTensor<int64_t>(fields[2]);

int run_seq_len = src_ids.size();
int32_t i3[2] = {0, run_seq_len};
int32_t i4[max_seq_len] = {0};

auto input_names = predictor->GetInputNames();

// first input
auto input_t1 = predictor->GetInputHandle(input_names[0]);
input_t1->Reshape({run_seq_len});
input_t1->CopyFromCpu(src_ids.data());

// second input
auto input_t2 = predictor->GetInputHandle(input_names[1]);
input_t2->Reshape({run_seq_len});
input_t2->CopyFromCpu(sent_ids.data());

// third input
auto input_t3 = predictor->GetInputHandle(input_names[2]);
input_t3->Reshape({run_batch + 1});
input_t3->CopyFromCpu(i3);

// fourth input
auto input_t4 = predictor->GetInputHandle(input_names[3]);
input_t4->Reshape({1, max_seq_len, 1});
input_t4->CopyFromCpu(i4);

std::vector<float> out_data;
run(predictor.get(), &out_data);

lineno++;
int maxPosition =
max_element(out_data.begin(), out_data.end()) - out_data.begin();

if (maxPosition == resDeque[0]) {
correct_num += 1;
}
resDeque.pop_front();

VLOG(2) << "predict result: " << maxPosition;
for (auto r : out_data) {
VLOG(2) << r;
}
}
ASSERT_GT(correct_num,
4741); // total input 5010, int8 res should greater than 4741
LOG(INFO) << "=== finish oss test ===";
}

} // namespace paddle_infer

int main(int argc, char **argv) {
::testing::InitGoogleTest(&argc, argv);
::google::ParseCommandLineFlags(&argc, &argv, true);
#if IS_TRT_VERSION_GE(7200)
return RUN_ALL_TESTS();
#endif
return 0;
}
79 changes: 79 additions & 0 deletions paddle/fluid/inference/tests/infer_ut/test_helper.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,79 @@
// Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#pragma once
#include <sstream>
#include <string>
#include <vector>

namespace paddle {
namespace test {

// split string to vector<string> by sep
static void split(const std::string &str, char sep,
std::vector<std::string> *pieces, bool ignore_null = true) {
pieces->clear();
if (str.empty()) {
if (!ignore_null) {
pieces->push_back(str);
}
return;
}
size_t pos = 0;
size_t next = str.find(sep, pos);
while (next != std::string::npos) {
pieces->push_back(str.substr(pos, next - pos));
pos = next + 1;
next = str.find(sep, pos);
}
if (!str.substr(pos).empty()) {
pieces->push_back(str.substr(pos));
}
}

template <typename T>
void GetValueFromStream(std::stringstream *ss, T *t) {
(*ss) >> (*t);
}

template <>
void GetValueFromStream<std::string>(std::stringstream *ss, std::string *t) {
*t = ss->str();
}

// Split string to multiple vector
template <typename T>
void Split(const std::string &line, char sep, std::vector<T> *v) {
std::stringstream ss;
T t;
for (auto c : line) {
if (c != sep) {
ss << c;
} else {
GetValueFromStream<T>(&ss, &t);
v->push_back(std::move(t));
ss.str({});
ss.clear();
}
}

if (!ss.str().empty()) {
GetValueFromStream<T>(&ss, &t);
v->push_back(std::move(t));
ss.str({});
ss.clear();
}
}

} // namespace test
} // namespace paddle
Loading

0 comments on commit e8789c1

Please sign in to comment.