From 4923b8d72d39a4189ca7c7b9e20359d6ba527a10 Mon Sep 17 00:00:00 2001 From: Mike Kruskal <62662355+mkruskal-google@users.noreply.github.com> Date: Mon, 24 Jun 2024 14:21:21 -0700 Subject: [PATCH] Fix string_type bugs in edition 2023 (#17211) * Fix a bug in which proto code uses ctype instead of string_type internally. We change InferLegacyProtoFeatures to set ctype based on string_type when string_type is set. We need to update CppGenerator::ValidateFeatures to allow both ctype and string_type to be set because it runs after InferLegacyProtoFeatures. PiperOrigin-RevId: 645480157 * Regenerate stale files * Fix bad merge where cpp_type isn't useable during build in 27.x * Infer string type feature from ctype pre-editions. This will allow internal code to simply check the feature value instead of checking both ctype and string_type. PiperOrigin-RevId: 625897380 --------- Co-authored-by: Protobuf Team Bot --- src/file_lists.cmake | 1 + src/google/protobuf/BUILD.bazel | 2 + src/google/protobuf/compiler/cpp/generator.cc | 16 +++- src/google/protobuf/descriptor.cc | 61 +++++++++++++++- src/google/protobuf/descriptor_unittest.cc | 73 ++++++++++++++++++- .../protobuf/unittest_string_type.proto | 16 ++++ 6 files changed, 164 insertions(+), 5 deletions(-) create mode 100644 src/google/protobuf/unittest_string_type.proto diff --git a/src/file_lists.cmake b/src/file_lists.cmake index 5190b7cb5012..4488f3596d1e 100644 --- a/src/file_lists.cmake +++ b/src/file_lists.cmake @@ -1081,6 +1081,7 @@ set(protobuf_test_protos_files ${protobuf_SOURCE_DIR}/src/google/protobuf/unittest_proto3_lite.proto ${protobuf_SOURCE_DIR}/src/google/protobuf/unittest_proto3_optional.proto ${protobuf_SOURCE_DIR}/src/google/protobuf/unittest_retention.proto + ${protobuf_SOURCE_DIR}/src/google/protobuf/unittest_string_type.proto ${protobuf_SOURCE_DIR}/src/google/protobuf/unittest_string_view.proto ${protobuf_SOURCE_DIR}/src/google/protobuf/unittest_well_known_types.proto ) diff --git a/src/google/protobuf/BUILD.bazel b/src/google/protobuf/BUILD.bazel index 358a2e94b6b1..6ac0549f5ceb 100644 --- a/src/google/protobuf/BUILD.bazel +++ b/src/google/protobuf/BUILD.bazel @@ -835,6 +835,7 @@ filegroup( "unittest_proto3_lite.proto", "unittest_proto3_optional.proto", "unittest_retention.proto", + "unittest_string_type.proto", "unittest_well_known_types.proto", ], visibility = ["//:__subpackages__"], @@ -925,6 +926,7 @@ proto_library( deps = [ ":any_proto", ":api_proto", + ":cpp_features_proto", ":descriptor_proto", ":duration_proto", ":empty_proto", diff --git a/src/google/protobuf/compiler/cpp/generator.cc b/src/google/protobuf/compiler/cpp/generator.cc index 6b166e74fb5e..8ee87e546d30 100644 --- a/src/google/protobuf/compiler/cpp/generator.cc +++ b/src/google/protobuf/compiler/cpp/generator.cc @@ -397,9 +397,19 @@ absl::Status CppGenerator::ValidateFeatures(const FileDescriptor* file) const { " specifies string_type=CORD which is not supported " "for extensions.")); } else if (field.options().has_ctype()) { - status = absl::FailedPreconditionError(absl::StrCat( - field.full_name(), - " specifies both string_type and ctype which is not supported.")); + // NOTE: this is just a sanity check. This case should never happen + // because descriptor builder makes string_type override ctype. + const FieldOptions::CType ctype = field.options().ctype(); + const pb::CppFeatures::StringType string_type = + unresolved_features.string_type(); + if ((ctype == FieldOptions::STRING && + string_type != pb::CppFeatures::STRING) || + (ctype == FieldOptions::CORD && + string_type != pb::CppFeatures::CORD)) { + status = absl::FailedPreconditionError( + absl::StrCat(field.full_name(), + " specifies inconsistent string_type and ctype.")); + } } } diff --git a/src/google/protobuf/descriptor.cc b/src/google/protobuf/descriptor.cc index ce5e0cc1660f..9b4a8fec66fc 100644 --- a/src/google/protobuf/descriptor.cc +++ b/src/google/protobuf/descriptor.cc @@ -3047,6 +3047,10 @@ void FieldDescriptor::CopyTo(FieldDescriptorProto* proto) const { if (&options() != &FieldOptions::default_instance()) { *proto->mutable_options() = options(); + if (proto_features_->GetExtension(pb::cpp).has_string_type()) { + // ctype must have been set in InferLegacyProtoFeatures so avoid copying. + proto->mutable_options()->clear_ctype(); + } } RestoreFeaturesToOptions(proto_features_, proto); @@ -5434,6 +5438,16 @@ static void InferLegacyProtoFeatures(const ProtoT& proto, static void InferLegacyProtoFeatures(const FieldDescriptorProto& proto, const FieldOptions& options, Edition edition, FeatureSet& features) { + if (!features.MutableExtension(pb::cpp)->has_string_type()) { + if (options.ctype() == FieldOptions::CORD) { + features.MutableExtension(pb::cpp)->set_string_type( + pb::CppFeatures::CORD); + } + } + + // Everything below is specifically for proto2/proto. + if (!IsLegacyEdition(edition)) return; + if (proto.label() == FieldDescriptorProto::LABEL_REQUIRED) { features.set_field_presence(FeatureSet::LEGACY_REQUIRED); } @@ -5450,6 +5464,25 @@ static void InferLegacyProtoFeatures(const FieldDescriptorProto& proto, } } +// TODO: we should update proto code to not need ctype to be set +// when string_type is set. +static void EnforceCTypeStringTypeConsistency( + Edition edition, uint8_t type, + const pb::CppFeatures& cpp_features, FieldOptions& options) { + if (&options == &FieldOptions::default_instance()) return; + if (edition < Edition::EDITION_2024 && + (type == FieldDescriptor::TYPE_STRING || + type == FieldDescriptor::TYPE_BYTES)) { + switch (cpp_features.string_type()) { + case pb::CppFeatures::CORD: + options.set_ctype(FieldOptions::CORD); + break; + default: + break; + } + } +} + template void DescriptorBuilder::ResolveFeaturesImpl( Edition edition, const typename DescriptorT::Proto& proto, @@ -5479,8 +5512,8 @@ void DescriptorBuilder::ResolveFeaturesImpl( AddError(descriptor->name(), proto, error_location, "Features are only valid under editions."); } - InferLegacyProtoFeatures(proto, *options, edition, base_features); } + InferLegacyProtoFeatures(proto, *options, edition, base_features); if (base_features.ByteSizeLong() == 0 && !force_merge) { // Nothing to merge, and we aren't forcing it. @@ -6067,6 +6100,24 @@ FileDescriptor* DescriptorBuilder::BuildFileImpl( iter != options_to_interpret_.end(); ++iter) { option_interpreter.InterpretNonExtensionOptions(&(*iter)); } + + // TODO: move this check back to generator.cc once we no longer + // need to set both ctype and string_type internally. + internal::VisitDescriptors( + *result, proto, + [&](const FieldDescriptor& field, const FieldDescriptorProto& proto) { + if (field.options_->has_ctype() && field.options_->features() + .GetExtension(pb::cpp) + .has_string_type()) { + AddError( + field.full_name(), proto, DescriptorPool::ErrorCollector::TYPE, + absl::StrFormat("Field %s specifies both string_type and ctype " + "which is not supported.", + field.full_name()) + .c_str()); + } + }); + // Handle feature resolution. This must occur after option interpretation, // but before validation. internal::VisitDescriptors( @@ -6084,6 +6135,14 @@ FileDescriptor* DescriptorBuilder::BuildFileImpl( alloc); }); + internal::VisitDescriptors(*result, [&](const FieldDescriptor& field) { + EnforceCTypeStringTypeConsistency( + field.file()->edition(), field.type_, + field.merged_features_->GetExtension(pb::cpp), + const_cast< // NOLINT(google3-runtime-proto-const-cast) + FieldOptions&>(*field.options_)); + }); + // Post-process cleanup for field features. internal::VisitDescriptors( *result, proto, diff --git a/src/google/protobuf/descriptor_unittest.cc b/src/google/protobuf/descriptor_unittest.cc index 97065c55cb4f..f5e2c69ab8d2 100644 --- a/src/google/protobuf/descriptor_unittest.cc +++ b/src/google/protobuf/descriptor_unittest.cc @@ -73,6 +73,7 @@ #include "google/protobuf/unittest_lazy_dependencies_custom_option.pb.h" #include "google/protobuf/unittest_lazy_dependencies_enum.pb.h" #include "google/protobuf/unittest_proto3_arena.pb.h" +#include "google/protobuf/unittest_string_type.pb.h" // Must be included last. @@ -7513,6 +7514,20 @@ TEST_F(FeaturesTest, Proto2Features) { } field { name: "utf8" number: 6 label: LABEL_REPEATED type: TYPE_STRING } field { name: "req" number: 7 label: LABEL_REQUIRED type: TYPE_INT32 } + field { + name: "cord" + number: 8 + label: LABEL_OPTIONAL + type: TYPE_STRING + options { ctype: CORD } + } + field { + name: "piece" + number: 9 + label: LABEL_OPTIONAL + type: TYPE_STRING + options { ctype: STRING_PIECE } + } } enum_type { name: "Foo2" @@ -7563,6 +7578,10 @@ TEST_F(FeaturesTest, Proto2Features) { Utf8CheckMode::kVerify); EXPECT_EQ(GetUtf8CheckMode(message->FindFieldByName("str"), /*is_lite=*/true), Utf8CheckMode::kNone); + EXPECT_EQ(GetCoreFeatures(message->FindFieldByName("cord")) + .GetExtension(pb::cpp) + .string_type(), + pb::CppFeatures::CORD); EXPECT_FALSE(field->is_packed()); EXPECT_FALSE(field->legacy_enum_field_treated_as_closed()); EXPECT_FALSE(HasPreservingUnknownEnumSemantics(field)); @@ -7815,6 +7834,58 @@ TEST_F(FeaturesTest, Edition2023Defaults) { pb::VALUE3); } +TEST_F(FeaturesTest, Edition2023InferredFeatures) { + FileDescriptorProto file_proto = ParseTextOrDie(R"pb( + name: "foo.proto" + syntax: "editions" + edition: EDITION_2023 + message_type { + name: "Foo" + field { name: "str" number: 1 label: LABEL_OPTIONAL type: TYPE_STRING } + field { + name: "cord" + number: 2 + label: LABEL_OPTIONAL + type: TYPE_STRING + options { ctype: CORD } + } + field { + name: "piece" + number: 3 + label: LABEL_OPTIONAL + type: TYPE_STRING + options { ctype: STRING_PIECE } + } + field { + name: "view" + number: 4 + label: LABEL_OPTIONAL + type: TYPE_STRING + options { + features { + [pb.cpp] { string_type: VIEW } + } + } + } + } + )pb"); + + BuildDescriptorMessagesInTestPool(); + BuildFileInTestPool(pb::CppFeatures::GetDescriptor()->file()); + const FileDescriptor* file = ABSL_DIE_IF_NULL(pool_.BuildFile(file_proto)); + const Descriptor* message = file->message_type(0); + + EXPECT_EQ( + GetCoreFeatures(message->field(0)).GetExtension(pb::cpp).string_type(), + pb::CppFeatures::STRING); + EXPECT_EQ( + GetCoreFeatures(message->field(1)).GetExtension(pb::cpp).string_type(), + pb::CppFeatures::CORD); + EXPECT_EQ( + GetCoreFeatures(message->field(3)).GetExtension(pb::cpp).string_type(), + pb::CppFeatures::VIEW); +} + TEST_F(FeaturesTest, Edition2024Defaults) { FileDescriptorProto file_proto = ParseTextOrDie(R"pb( name: "foo.proto" @@ -9614,7 +9685,7 @@ TEST_F(FeaturesTest, EnumFeatureHelpers) { type_name: "FooOpen" options { features { - [pb.cpp] { legacy_closed_enum: true string_type: STRING } + [pb.cpp] { legacy_closed_enum: true } } } } diff --git a/src/google/protobuf/unittest_string_type.proto b/src/google/protobuf/unittest_string_type.proto new file mode 100644 index 000000000000..611588f19185 --- /dev/null +++ b/src/google/protobuf/unittest_string_type.proto @@ -0,0 +1,16 @@ +// Protocol Buffers - Google's data interchange format +// Copyright 2008 Google Inc. All rights reserved. +// +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file or at +// https://developers.google.com/open-source/licenses/bsd + +edition = "2023"; + +package protobuf_unittest; + +import "google/protobuf/cpp_features.proto"; + +message EntryProto { + bytes value = 3 [features.(pb.cpp).string_type = CORD]; +}