Skip to content

Commit

Permalink
Add StringTensor (PaddlePaddle#39830)
Browse files Browse the repository at this point in the history
* add string tensor and case convert kernels

* Add strings empty kernel; Reorganize the structure of case convert kernel

* Add string infermeta

* Update mutable_data of string tensor

* rename kernel name

* add string copy tmp

* Fix strings copy device bug

* add utf8 gpu converter

* add string tensor c++ api

* Remove mutable_data of string tensor

* update string tensor interface

* remove charcases_flag.h

* remove some fluid headers

* Add make_ddim

* __HIPCC__ -> PADDLE_WITH_HIP

* remove fluid headers

* fix cpu compile

* remove std::hash

* Fix cudaMalloc

* Remove strings/impl directory

* Fix infrt/get_phi_kernel_info.py;Add custom_kernels deps

* Add empty kernel test

* Remove some comments

* Modify lower/upper api encoding type: string->bool

* STRING->PSTRING; Add CreateInferLikeMeta

* Add code gen for C++ String API

* remove strings_api_utils.h

* Add ignore file (strings_api.h, strings_api.cc)

* update strings gen script

* change args order of case convert kernels

* Add comments for pstring, StringTensor

* cpstring_internal.h -> cpstring_impl.h

* Update accordding to comments:

1. Remove fluid headers
2. paddle::platform::errors -> phi::errors
3. Use 'place.GetType() == phi::AllocationType::GPU' instead of 'paddle::platform::is_cpu_space()'
4. Use camel code style

* Remove all singletons in strings kernels

* fix rocm compile

* Fix py3 compile

* Fix c++ coverage

* 1. Add pstring proto type
2. Add StringTensor debug info
3. Rename case_convert_kernel to strings_lower_upper
4. Remove serialize derialize strings kernel

* DataLayout::PSTRING -> DataLayout::PSTRING_UNION

* Register pstring data type

* Fix strings api gen

* Fix dense tensor register pstring dtype

* Fix error messages

* remove line

* add pstring unittest

* remove test string api unitest

* remove empty line

* Remove some headers to decrease the size of executable file
  • Loading branch information
joey12300 authored Mar 26, 2022
1 parent 3b89542 commit 0695e1a
Show file tree
Hide file tree
Showing 62 changed files with 7,679 additions and 24 deletions.
2 changes: 2 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -9,10 +9,12 @@ paddle/phi/api/backward/backward_api.h
paddle/phi/api/backward/sparse_bw_api.h
paddle/phi/api/include/api.h
paddle/phi/api/include/sparse_api.h
paddle/phi/api/include/strings_api.h
paddle/phi/api/lib/api.cc
paddle/phi/api/lib/dygraph_api.*
paddle/phi/api/lib/backward_api.cc
paddle/phi/api/lib/sparse_api.cc
paddle/phi/api/lib/strings_api.cc
paddle/phi/api/lib/sparse_bw_api.cc
paddle/phi/extension.h
paddle/phi/include/*
Expand Down
10 changes: 10 additions & 0 deletions paddle/fluid/framework/convert_utils.cc
Original file line number Diff line number Diff line change
Expand Up @@ -47,6 +47,8 @@ paddle::experimental::DataType TransToPhiDataType(
return DataType::BFLOAT16;
case paddle::framework::proto::VarType::BOOL:
return DataType::BOOL;
case paddle::framework::proto::VarType::PSTRING:
return DataType::PSTRING;
default:
return DataType::UNDEFINED;
}
Expand Down Expand Up @@ -81,6 +83,8 @@ paddle::framework::proto::VarType::Type TransToProtoVarType(
return paddle::framework::proto::VarType::BF16;
case DataType::BOOL:
return paddle::framework::proto::VarType::BOOL;
case DataType::PSTRING:
return paddle::framework::proto::VarType::PSTRING;
default:
PADDLE_THROW(paddle::platform::errors::Unimplemented(
"Unsupported data type `%s` when casting it into "
Expand Down Expand Up @@ -117,6 +121,8 @@ size_t DataTypeSize(DataType dtype) {
return sizeof(paddle::platform::complex<float>);
case DataType::COMPLEX128:
return sizeof(paddle::platform::complex<double>);
case DataType::PSTRING:
return sizeof(paddle::platform::pstring);
default:
return 0;
}
Expand Down Expand Up @@ -145,6 +151,8 @@ DataType String2DataType(const std::string& str) {
return DataType::COMPLEX64;
} else if (str == "complex128") {
return DataType::COMPLEX128;
} else if (str == "pstring") {
return DataType::PSTRING;
} else if (str == "bfloat16") {
return DataType::BFLOAT16;
} else {
Expand Down Expand Up @@ -176,6 +184,8 @@ std::string DataType2String(DataType dtype) {
return "complex64";
case DataType::COMPLEX128:
return "complex128";
case DataType::PSTRING:
return "pstring";
case DataType::BFLOAT16:
return "bfloat16";
default:
Expand Down
5 changes: 4 additions & 1 deletion paddle/fluid/framework/data_type.cc
Original file line number Diff line number Diff line change
Expand Up @@ -18,9 +18,11 @@

#include "paddle/fluid/platform/bfloat16.h"
#include "paddle/fluid/platform/float16.h"
#include "paddle/phi/common/pstring.h"

using float16 = paddle::platform::float16;
using bfloat16 = paddle::platform::bfloat16;
using pstring = phi::dtype::pstring;

namespace paddle {
namespace framework {
Expand Down Expand Up @@ -58,7 +60,8 @@ static DataTypeMap* InitDataTypeMap() {
RegisterType<cc_type>(retv, proto_type, #cc_type)

_ForEachDataType_(RegType);

// Register pstring individually
RegType(pstring, proto::VarType::PSTRING);
#undef RegType
return retv;
}
Expand Down
2 changes: 2 additions & 0 deletions paddle/fluid/framework/framework.proto
Original file line number Diff line number Diff line change
Expand Up @@ -152,6 +152,8 @@ message VarType {
STRINGS = 26;
VOCAB = 27;
FEED_LIST = 28;
// The data type of phi::StringTensor
PSTRING = 29;
}

required Type type = 1;
Expand Down
2 changes: 1 addition & 1 deletion paddle/phi/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,7 @@ add_subdirectory(tools)
add_subdirectory(tests)

# make an unity target for compile deps
set(PHI_DEPS convert_utils dense_tensor phi_context kernel_factory kernel_context arg_map_context infermeta lod_utils op_compat_infos sparse_csr_tensor sparse_coo_tensor)
set(PHI_DEPS convert_utils dense_tensor phi_context kernel_factory kernel_context arg_map_context infermeta lod_utils op_compat_infos sparse_csr_tensor sparse_coo_tensor string_tensor)
get_property(phi_kernels GLOBAL PROPERTY PHI_KERNELS)
set(PHI_DEPS ${PHI_DEPS} ${phi_kernels})

Expand Down
2 changes: 1 addition & 1 deletion paddle/phi/api/CMakeLists.txt
Original file line number Diff line number Diff line change
@@ -1,2 +1,2 @@
add_subdirectory(lib)
cc_library(phi_api SRCS all.cc DEPS phi_function_api phi_bw_function_api sparse_api sparse_bw_api)
cc_library(phi_api SRCS all.cc DEPS phi_function_api phi_bw_function_api sparse_api sparse_bw_api strings_api)
25 changes: 23 additions & 2 deletions paddle/phi/api/lib/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -51,6 +51,14 @@ set(sparse_bw_api_source_file ${CMAKE_SOURCE_DIR}/paddle/phi/api/lib/sparse_bw_a
set(sparse_bw_api_header_file_tmp ${sparse_bw_api_header_file}.tmp)
set(sparse_bw_api_source_file_tmp ${sparse_bw_api_source_file}.tmp)

# strings api file
set(strings_api_gen_file ${CMAKE_SOURCE_DIR}/python/paddle/utils/code_gen/strings_api_gen.py)
set(strings_api_yaml_file ${CMAKE_SOURCE_DIR}/python/paddle/utils/code_gen/strings_api.yaml)
set(strings_api_header_file ${CMAKE_SOURCE_DIR}/paddle/phi/api/include/strings_api.h)
set(strings_api_source_file ${CMAKE_SOURCE_DIR}/paddle/phi/api/lib/strings_api.cc)
set(strings_api_header_file_tmp ${strings_api_header_file}.tmp)
set(strings_api_source_file_tmp ${strings_api_source_file}.tmp)

# wrapped infermeta file
set(wrapped_infermeta_gen_file ${CMAKE_SOURCE_DIR}/python/paddle/utils/code_gen/wrapped_infermeta_gen.py)
set(api_yaml_file ${CMAKE_SOURCE_DIR}/python/paddle/utils/code_gen/api.yaml)
Expand Down Expand Up @@ -114,6 +122,19 @@ add_custom_command(
DEPENDS ${sparse_bw_api_yaml_file} ${sparse_bw_api_gen_file} ${api_gen_base} ${api_gen_file} ${sparse_api_gen_file} ${bw_api_gen_file}
VERBATIM)

# generate strings api
add_custom_command(
OUTPUT ${strings_api_header_file} ${strings_api_source_file}
COMMAND ${PYTHON_EXECUTABLE} ${strings_api_gen_file}
--api_yaml_path ${strings_api_yaml_file}
--api_header_path ${strings_api_header_file_tmp}
--api_source_path ${strings_api_source_file_tmp}
COMMAND ${CMAKE_COMMAND} -E copy_if_different ${strings_api_header_file_tmp} ${strings_api_header_file}
COMMAND ${CMAKE_COMMAND} -E copy_if_different ${strings_api_source_file_tmp} ${strings_api_source_file}
COMMENT "copy_if_different ${strings_api_header_file} ${strings_strings_api_source_file}"
DEPENDS ${strings_api_yaml_file} ${strings_api_gen_file} ${api_gen_base} ${api_gen_file}
VERBATIM)

# generate dygraph(intermediate) api
add_custom_command(
OUTPUT ${dygraph_api_header_file} ${dygraph_api_source_file}
Expand Down Expand Up @@ -152,5 +173,5 @@ cc_library(phi_bw_function_api SRCS ${bw_api_source_file} DEPS phi_tensor_raw ph
cc_library(sparse_api SRCS ${sparse_api_source_file} DEPS phi_tensor_raw phi kernel_dispatch api_gen_utils sparse_api_custom_impl)
cc_library(sparse_bw_api SRCS ${sparse_bw_api_source_file} DEPS phi_tensor_raw phi kernel_dispatch api_gen_utils sparse_api sparse_api_custom_impl)
cc_library(phi_dygraph_api SRCS ${dygraph_api_source_file} DEPS phi_tensor_raw phi kernel_dispatch api_gen_utils phi_data_transform phi_function_api sparse_api)

cc_library(phi_tensor SRCS tensor_method.cc DEPS phi_tensor_raw phi_function_api api_gen_utils kernel_dispatch infermeta sparse_api)
cc_library(strings_api SRCS ${strings_api_source_file} DEPS phi_tensor_raw phi kernel_dispatch api_gen_utils)
cc_library(phi_tensor SRCS tensor_method.cc DEPS phi_tensor_raw phi_function_api api_gen_utils kernel_dispatch infermeta sparse_api strings_api)
1 change: 1 addition & 0 deletions paddle/phi/api/lib/api_declare.h
Original file line number Diff line number Diff line change
Expand Up @@ -19,3 +19,4 @@ limitations under the License. */

// PD_DECLARE_API(Math);
// PD_DECLARE_API(SparseApi);
// PD_DECLARE_API(StringsApi);
23 changes: 23 additions & 0 deletions paddle/phi/api/lib/api_gen_utils.cc
Original file line number Diff line number Diff line change
Expand Up @@ -56,6 +56,10 @@ std::shared_ptr<phi::SelectedRows> TensorToSelectedRows(
return nullptr;
}

std::shared_ptr<phi::StringTensor> TensorToStringTensor(const Tensor& tensor) {
return std::dynamic_pointer_cast<phi::StringTensor>(tensor.impl());
}

/* ----------------- for infer_meta --------------------- */

phi::MetaTensor MakeMetaTensor(const phi::DenseTensor& tensor) {
Expand Down Expand Up @@ -92,6 +96,10 @@ paddle::optional<phi::MetaTensor> MakeMetaTensor(
return {paddle::none};
}

phi::MetaTensor MakeMetaTensor(const phi::StringTensor& tensor) {
return phi::MetaTensor(tensor);
}

/* ------------------ for output ----------------------- */

phi::DenseTensor* SetKernelOutput(Backend backend, Tensor* out) {
Expand Down Expand Up @@ -148,5 +156,20 @@ phi::TensorBase* SetSparseKernelOutput(Tensor* out, TensorType type) {
return out->impl().get();
}

phi::TensorBase* SetStringsKernelOutput(Backend backend,
Tensor* out,
TensorType type) {
if (!out->initialized()) {
if (type == TensorType::STRING_TENSOR) {
if (out->impl() == nullptr) {
auto strings_tensor = std::make_shared<phi::StringTensor>();
out->set_impl(strings_tensor);
}
return out->impl().get();
}
}
return out->impl().get();
}

} // namespace experimental
} // namespace paddle
11 changes: 10 additions & 1 deletion paddle/phi/api/lib/api_gen_utils.h
Original file line number Diff line number Diff line change
Expand Up @@ -22,11 +22,12 @@ limitations under the License. */
#include "paddle/phi/core/selected_rows.h"
#include "paddle/phi/core/sparse_coo_tensor.h"
#include "paddle/phi/core/sparse_csr_tensor.h"
#include "paddle/phi/core/string_tensor.h"

namespace paddle {
namespace experimental {

enum class TensorType { DENSE_TENSOR, SPARSE_CSR, SPARSE_COO };
enum class TensorType { DENSE_TENSOR, SPARSE_CSR, SPARSE_COO, STRING_TENSOR };

/* ------------------ for input ----------------------- */

Expand All @@ -43,6 +44,8 @@ std::shared_ptr<phi::SelectedRows> TensorToSelectedRows(const Tensor& tensor);
std::shared_ptr<phi::SelectedRows> TensorToSelectedRows(
const paddle::optional<Tensor>& tensor);

std::shared_ptr<phi::StringTensor> TensorToStringTensor(const Tensor& tensor);

/* ----------------- for infer_meta --------------------- */

phi::MetaTensor MakeMetaTensor(const phi::DenseTensor& tensor);
Expand All @@ -58,6 +61,8 @@ phi::MetaTensor MakeMetaTensor(const phi::SelectedRows& tensor);
paddle::optional<phi::MetaTensor> MakeMetaTensor(
const paddle::optional<const phi::SelectedRows&>& tensor);

phi::MetaTensor MakeMetaTensor(const phi::StringTensor& tensor);

/* ------------------ for output ----------------------- */

phi::DenseTensor* SetKernelOutput(Backend backend, Tensor* out);
Expand All @@ -70,5 +75,9 @@ phi::SelectedRows* SetSelectedRowsKernelOutput(Backend backend, Tensor* out);

phi::TensorBase* SetSparseKernelOutput(Tensor* out, TensorType type);

phi::TensorBase* SetStringsKernelOutput(Backend backend,
Tensor* out,
TensorType type);

} // namespace experimental
} // namespace paddle
2 changes: 1 addition & 1 deletion paddle/phi/api/lib/utils/CMakeLists.txt
Original file line number Diff line number Diff line change
@@ -1,2 +1,2 @@
cc_library(phi_api_utils SRCS storage.cc tensor_utils.cc DEPS
tensor_base convert_utils dense_tensor lod_tensor selected_rows_utils place var_type_traits scalar)
tensor_base convert_utils dense_tensor lod_tensor selected_rows_utils place var_type_traits scalar string_tensor)
Loading

0 comments on commit 0695e1a

Please sign in to comment.