Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

support ernie-int8 test and prune op attribute test #35890

Merged
merged 6 commits into from
Sep 22, 2021
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
24 changes: 23 additions & 1 deletion paddle/fluid/inference/tests/infer_ut/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -93,20 +93,42 @@ if (USE_TENSORRT AND WITH_GPU)
file(READ ${TENSORRT_INCLUDE_DIR}/NvInfer.h TENSORRT_VERSION_FILE_CONTENTS)
string(REGEX MATCH "define NV_TENSORRT_MAJOR +([0-9]+)" TENSORRT_MAJOR_VERSION
"${TENSORRT_VERSION_FILE_CONTENTS}")
string(REGEX MATCH "define NV_TENSORRT_MINOR +([0-9]+)" TENSORRT_MINOR_VERSION
"${TENSORRT_VERSION_FILE_CONTENTS}")
string(REGEX MATCH "define NV_TENSORRT_PATCH +([0-9]+)" TENSORRT_PATCH_VERSION
"${TENSORRT_VERSION_FILE_CONTENTS}")
string(REGEX MATCH "define NV_TENSORRT_BUILD +([0-9]+)" TENSORRT_BUILD_VERSION
"${TENSORRT_VERSION_FILE_CONTENTS}")
if("${TENSORRT_MAJOR_VERSION}" STREQUAL "")
file(READ ${TENSORRT_INCLUDE_DIR}/NvInferVersion.h TENSORRT_VERSION_FILE_CONTENTS)
string(REGEX MATCH "define NV_TENSORRT_MAJOR +([0-9]+)" TENSORRT_MAJOR_VERSION
"${TENSORRT_VERSION_FILE_CONTENTS}")
string(REGEX MATCH "define NV_TENSORRT_MINOR +([0-9]+)" TENSORRT_MINOR_VERSION
"${TENSORRT_VERSION_FILE_CONTENTS}")
string(REGEX MATCH "define NV_TENSORRT_PATCH +([0-9]+)" TENSORRT_PATCH_VERSION
"${TENSORRT_VERSION_FILE_CONTENTS}")
string(REGEX MATCH "define NV_TENSORRT_BUILD +([0-9]+)" TENSORRT_BUILD_VERSION
"${TENSORRT_VERSION_FILE_CONTENTS}")
endif()
if("${TENSORRT_MAJOR_VERSION}" STREQUAL "")
message(SEND_ERROR "Failed to detect TensorRT version.")
endif()
string(REGEX REPLACE "define NV_TENSORRT_MAJOR +([0-9]+)" "\\1"
TENSORRT_MAJOR_VERSION "${TENSORRT_MAJOR_VERSION}")
string(REGEX REPLACE "define NV_TENSORRT_MINOR +([0-9]+)" "\\1"
TENSORRT_MINOR_VERSION "${TENSORRT_MINOR_VERSION}")
string(REGEX REPLACE "define NV_TENSORRT_PATCH +([0-9]+)" "\\1"
TENSORRT_PATCH_VERSION "${TENSORRT_PATCH_VERSION}")
string(REGEX REPLACE "define NV_TENSORRT_BUILD +([0-9]+)" "\\1"
TENSORRT_BUILD_VERSION "${TENSORRT_BUILD_VERSION}")
message(STATUS "Current TensorRT header is ${TENSORRT_INCLUDE_DIR}/NvInfer.h. "
"Current TensorRT version is v${TENSORRT_MAJOR_VERSION}. ")
"Current TensorRT version is v${TENSORRT_MAJOR_VERSION}.${TENSORRT_MINOR_VERSION}.${TENSORRT_PATCH_VERSION}.${TENSORRT_BUILD_VERSION} ")
include_directories("${TENSORRT_INCLUDE_DIR}")
link_directories("${TENSORRT_LIB_DIR}")
add_compile_definitions(NV_TENSORRT_MAJOR=${TENSORRT_MAJOR_VERSION})
add_compile_definitions(NV_TENSORRT_MINOR=${TENSORRT_MINOR_VERSION})
add_compile_definitions(NV_TENSORRT_PATCH=${TENSORRT_PATCH_VERSION})
add_compile_definitions(NV_TENSORRT_BUILD=${TENSORRT_BUILD_VERSION})
endif()

if(WITH_MKL)
Expand Down
39 changes: 39 additions & 0 deletions paddle/fluid/inference/tests/infer_ut/run.sh
Original file line number Diff line number Diff line change
Expand Up @@ -115,6 +115,20 @@ for model_name in $unknown_download_list; do
download $url_prefix $model_name
done

# ernie int8 quant with matmul
unknown_nlp_download_list='quant_post_model_xnli_predict_matmul'
for model_name in $unknown_nlp_download_list; do
url_prefix="https://paddle-qa.bj.bcebos.com/inference_model/unknown/nlp"
download $url_prefix $model_name
done

# mobilnetv1 with prune op attribute
dev_class_download_list='MobileNetV1'
for model_name in $dev_class_download_list; do
url_prefix="https://paddle-qa.bj.bcebos.com/inference_model/2021-09-16/class"
download $url_prefix $model_name
done

function compile_test() {
mkdir -p ${build_dir}
cd ${build_dir}
Expand Down Expand Up @@ -255,6 +269,31 @@ if [ $? -ne 0 ]; then
EXIT_CODE=8
fi

printf "${YELLOW} start test_ernie_xnli_int8 ${NC} \n";
compile_test "test_ernie_xnli_int8"
ernie_qat_model="quant_post_model_xnli_predict_matmul"
${exe_dir}/test_ernie_xnli_int8 \
--modeldir=$DATA_DIR/$ernie_qat_model/$ernie_qat_model \
--datadir=$DATA_DIR/$ernie_qat_model/$ernie_qat_model/xnli_var_len \
--truth_data=$DATA_DIR/$ernie_qat_model/$ernie_qat_model/truth_data \
--gtest_filter=${test_suite_list} \
--gtest_output=xml:${log_dir}/test_ernie_xnli_int8.xml
if [ $? -ne 0 ]; then
echo "${RED} test_ernie_xnli_int8 runs failed ${NC}" >> ${exe_dir}/test_summary.txt
EXIT_CODE=8
fi

printf "${YELLOW} start test_mobilnetv1 ${NC} \n";
compile_test "test_mobilnetv1"
${exe_dir}/test_mobilnetv1 \
--modeldir=$DATA_DIR/MobileNetV1/MobileNetV1 \
--gtest_filter=${test_suite_list} \
--gtest_output=xml:${log_dir}/test_mobilnetv1.xml
if [ $? -ne 0 ]; then
echo "${RED} test_mobilnetv1 runs failed ${NC}" >> ${exe_dir}/test_summary.txt
EXIT_CODE=8
fi

set +x

test_suites=$(echo ${test_suite_list} | sed 's/:/ /g')
Expand Down
194 changes: 194 additions & 0 deletions paddle/fluid/inference/tests/infer_ut/test_ernie_xnli_int8.cc
Original file line number Diff line number Diff line change
@@ -0,0 +1,194 @@
// Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

#include "test_helper.h" // NOLINT
#include "test_suite.h" // NOLINT

DEFINE_string(modeldir, "", "Directory of the inference model.");
DEFINE_string(datadir, "", "dataset.");
DEFINE_string(truth_data, "", "Directory of the inference data truth result");

namespace paddle_infer {

std::shared_ptr<Predictor> InitPredictor() {
Config config;
config.SetModel(FLAGS_modeldir + "/__model__",
FLAGS_modeldir + "/__params__");
config.EnableUseGpu(1000, 0);
// Open the memory optim.
config.EnableMemoryOptim();

int max_batch = 32;
int max_single_seq_len = 128;
int opt_single_seq_len = 64;
int min_batch_seq_len = 1;
int max_batch_seq_len = 512;
int opt_batch_seq_len = 256;

std::string input_name0 = "eval_placeholder_0";
std::string input_name1 = "eval_placeholder_1";
std::string input_name2 = "eval_placeholder_2";
std::string input_name3 = "eval_placeholder_3";

std::vector<int> min_shape = {min_batch_seq_len};
std::vector<int> max_shape = {max_batch_seq_len};
std::vector<int> opt_shape = {opt_batch_seq_len};
// Set the input's min, max, opt shape
std::map<std::string, std::vector<int>> min_input_shape = {
{input_name0, min_shape},
{input_name1, min_shape},
{input_name2, {1}},
{input_name3, {1, min_batch_seq_len, 1}}};
std::map<std::string, std::vector<int>> max_input_shape = {
{input_name0, max_shape},
{input_name1, max_shape},
{input_name2, {max_batch + 1}},
{input_name3, {1, max_single_seq_len, 1}}};
std::map<std::string, std::vector<int>> opt_input_shape = {
{input_name0, opt_shape},
{input_name1, opt_shape},
{input_name2, {max_batch + 1}},
{input_name3, {1, opt_single_seq_len, 1}}};

// only kHalf supported
config.EnableTensorRtEngine(1 << 30, 1, 5, Config::Precision::kInt8, false,
false);
// erinie varlen must be used with dynamic shape
config.SetTRTDynamicShapeInfo(min_input_shape, max_input_shape,
opt_input_shape);
// erinie varlen must be used with oss
config.EnableTensorRtOSS();

return CreatePredictor(config);
}

// Parse tensor from string
template <typename T>
std::vector<T> ParseTensor(const std::string &field) {
std::string mat_str = field;

std::vector<T> mat;
paddle::test::Split(mat_str, ' ', &mat);

return mat;
}

void run(Predictor *predictor, std::vector<float> *out_data) {
clock_t start, end;
start = clock();
CHECK(predictor->Run());
end = clock();

auto output_names = predictor->GetOutputNames();
auto output_t = predictor->GetOutputHandle(output_names[0]);
std::vector<int> output_shape = output_t->shape();
int out_num = std::accumulate(output_shape.begin(), output_shape.end(), 1,
std::multiplies<int>());
out_data->resize(out_num);
output_t->CopyToCpu(out_data->data());
return;
}

auto PrepareOutput(std::string input_file) -> std::deque<float> {
std::ifstream fin(input_file);
std::string line;
std::vector<std::string> buffer;
while (std::getline(fin, line)) {
buffer.emplace_back(line);
}
std::deque<float> resDeque(buffer.size());
std::transform(buffer.begin(), buffer.end(), resDeque.begin(),
[](const std::string &val) { return std::stof(val); });

return resDeque;
} // PrepareOutput

TEST(tensorrt_tester_ernie_xnli, oss_varlen_truth_data_int8) {
auto resDeque = PrepareOutput(FLAGS_truth_data);
auto predictor = InitPredictor();

ASSERT_FALSE(FLAGS_datadir.empty());
std::ifstream fin(FLAGS_datadir);
std::string line;

int lineno = 0;
int max_seq_len = 128;
const int run_batch = 1;
int correct_num = 0;
while (std::getline(fin, line)) {
std::vector<std::string> fields;
paddle::test::Split(line, ';', &fields);

auto src_ids = ParseTensor<int32_t>(fields[0]);
auto sent_ids = ParseTensor<int32_t>(fields[1]);
auto pos_ids = ParseTensor<int64_t>(fields[2]);

int run_seq_len = src_ids.size();
int32_t i3[2] = {0, run_seq_len};
int32_t i4[max_seq_len] = {0};

auto input_names = predictor->GetInputNames();

// first input
auto input_t1 = predictor->GetInputHandle(input_names[0]);
input_t1->Reshape({run_seq_len});
input_t1->CopyFromCpu(src_ids.data());

// second input
auto input_t2 = predictor->GetInputHandle(input_names[1]);
input_t2->Reshape({run_seq_len});
input_t2->CopyFromCpu(sent_ids.data());

// third input
auto input_t3 = predictor->GetInputHandle(input_names[2]);
input_t3->Reshape({run_batch + 1});
input_t3->CopyFromCpu(i3);

// fourth input
auto input_t4 = predictor->GetInputHandle(input_names[3]);
input_t4->Reshape({1, max_seq_len, 1});
input_t4->CopyFromCpu(i4);

std::vector<float> out_data;
run(predictor.get(), &out_data);

lineno++;
int maxPosition =
max_element(out_data.begin(), out_data.end()) - out_data.begin();

if (maxPosition == resDeque[0]) {
correct_num += 1;
}
resDeque.pop_front();

VLOG(2) << "predict result: " << maxPosition;
for (auto r : out_data) {
VLOG(2) << r;
}
}
ASSERT_GT(correct_num,
4741); // total input 5010, int8 res should greater than 4741
LOG(INFO) << "=== finish oss test ===";
}

} // namespace paddle_infer

int main(int argc, char **argv) {
::testing::InitGoogleTest(&argc, argv);
::google::ParseCommandLineFlags(&argc, &argv, true);
#if IS_TRT_VERSION_GE(7200)
return RUN_ALL_TESTS();
#endif
return 0;
}
79 changes: 79 additions & 0 deletions paddle/fluid/inference/tests/infer_ut/test_helper.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,79 @@
// Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#pragma once
#include <sstream>
#include <string>
#include <vector>

namespace paddle {
namespace test {

// split string to vector<string> by sep
static void split(const std::string &str, char sep,
std::vector<std::string> *pieces, bool ignore_null = true) {
pieces->clear();
if (str.empty()) {
if (!ignore_null) {
pieces->push_back(str);
}
return;
}
size_t pos = 0;
size_t next = str.find(sep, pos);
while (next != std::string::npos) {
pieces->push_back(str.substr(pos, next - pos));
pos = next + 1;
next = str.find(sep, pos);
}
if (!str.substr(pos).empty()) {
pieces->push_back(str.substr(pos));
}
}

template <typename T>
void GetValueFromStream(std::stringstream *ss, T *t) {
(*ss) >> (*t);
}

template <>
void GetValueFromStream<std::string>(std::stringstream *ss, std::string *t) {
*t = ss->str();
}

// Split string to multiple vector
template <typename T>
void Split(const std::string &line, char sep, std::vector<T> *v) {
std::stringstream ss;
T t;
for (auto c : line) {
if (c != sep) {
ss << c;
} else {
GetValueFromStream<T>(&ss, &t);
v->push_back(std::move(t));
ss.str({});
ss.clear();
}
}

if (!ss.str().empty()) {
GetValueFromStream<T>(&ss, &t);
v->push_back(std::move(t));
ss.str({});
ss.clear();
}
}

} // namespace test
} // namespace paddle
Loading