Skip to content

Commit

Permalink
Fix string_type bugs in edition 2023 (#17211)
Browse files Browse the repository at this point in the history
* Fix a bug in which proto code uses ctype instead of string_type internally.

We change InferLegacyProtoFeatures to set ctype based on string_type when string_type is set. We need to update CppGenerator::ValidateFeatures to allow both ctype and string_type to be set because it runs after InferLegacyProtoFeatures.

PiperOrigin-RevId: 645480157

* Regenerate stale files

* Fix bad merge where cpp_type isn't useable during build in 27.x

* Infer string type feature from ctype pre-editions.

This will allow internal code to simply check the feature value instead of checking both ctype and string_type.

PiperOrigin-RevId: 625897380

---------

Co-authored-by: Protobuf Team Bot <[email protected]>
  • Loading branch information
mkruskal-google and protobuf-github-bot authored Jun 24, 2024
1 parent b0a3c23 commit 4923b8d
Show file tree
Hide file tree
Showing 6 changed files with 164 additions and 5 deletions.
1 change: 1 addition & 0 deletions src/file_lists.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -1081,6 +1081,7 @@ set(protobuf_test_protos_files
${protobuf_SOURCE_DIR}/src/google/protobuf/unittest_proto3_lite.proto
${protobuf_SOURCE_DIR}/src/google/protobuf/unittest_proto3_optional.proto
${protobuf_SOURCE_DIR}/src/google/protobuf/unittest_retention.proto
${protobuf_SOURCE_DIR}/src/google/protobuf/unittest_string_type.proto
${protobuf_SOURCE_DIR}/src/google/protobuf/unittest_string_view.proto
${protobuf_SOURCE_DIR}/src/google/protobuf/unittest_well_known_types.proto
)
Expand Down
2 changes: 2 additions & 0 deletions src/google/protobuf/BUILD.bazel
Original file line number Diff line number Diff line change
Expand Up @@ -835,6 +835,7 @@ filegroup(
"unittest_proto3_lite.proto",
"unittest_proto3_optional.proto",
"unittest_retention.proto",
"unittest_string_type.proto",
"unittest_well_known_types.proto",
],
visibility = ["//:__subpackages__"],
Expand Down Expand Up @@ -925,6 +926,7 @@ proto_library(
deps = [
":any_proto",
":api_proto",
":cpp_features_proto",
":descriptor_proto",
":duration_proto",
":empty_proto",
Expand Down
16 changes: 13 additions & 3 deletions src/google/protobuf/compiler/cpp/generator.cc
Original file line number Diff line number Diff line change
Expand Up @@ -397,9 +397,19 @@ absl::Status CppGenerator::ValidateFeatures(const FileDescriptor* file) const {
" specifies string_type=CORD which is not supported "
"for extensions."));
} else if (field.options().has_ctype()) {
status = absl::FailedPreconditionError(absl::StrCat(
field.full_name(),
" specifies both string_type and ctype which is not supported."));
// NOTE: this is just a sanity check. This case should never happen
// because descriptor builder makes string_type override ctype.
const FieldOptions::CType ctype = field.options().ctype();
const pb::CppFeatures::StringType string_type =
unresolved_features.string_type();
if ((ctype == FieldOptions::STRING &&
string_type != pb::CppFeatures::STRING) ||
(ctype == FieldOptions::CORD &&
string_type != pb::CppFeatures::CORD)) {
status = absl::FailedPreconditionError(
absl::StrCat(field.full_name(),
" specifies inconsistent string_type and ctype."));
}
}
}

Expand Down
61 changes: 60 additions & 1 deletion src/google/protobuf/descriptor.cc
Original file line number Diff line number Diff line change
Expand Up @@ -3047,6 +3047,10 @@ void FieldDescriptor::CopyTo(FieldDescriptorProto* proto) const {

if (&options() != &FieldOptions::default_instance()) {
*proto->mutable_options() = options();
if (proto_features_->GetExtension(pb::cpp).has_string_type()) {
// ctype must have been set in InferLegacyProtoFeatures so avoid copying.
proto->mutable_options()->clear_ctype();
}
}

RestoreFeaturesToOptions(proto_features_, proto);
Expand Down Expand Up @@ -5434,6 +5438,16 @@ static void InferLegacyProtoFeatures(const ProtoT& proto,
static void InferLegacyProtoFeatures(const FieldDescriptorProto& proto,
const FieldOptions& options,
Edition edition, FeatureSet& features) {
if (!features.MutableExtension(pb::cpp)->has_string_type()) {
if (options.ctype() == FieldOptions::CORD) {
features.MutableExtension(pb::cpp)->set_string_type(
pb::CppFeatures::CORD);
}
}

// Everything below is specifically for proto2/proto.
if (!IsLegacyEdition(edition)) return;

if (proto.label() == FieldDescriptorProto::LABEL_REQUIRED) {
features.set_field_presence(FeatureSet::LEGACY_REQUIRED);
}
Expand All @@ -5450,6 +5464,25 @@ static void InferLegacyProtoFeatures(const FieldDescriptorProto& proto,
}
}

// TODO: we should update proto code to not need ctype to be set
// when string_type is set.
static void EnforceCTypeStringTypeConsistency(
Edition edition, uint8_t type,
const pb::CppFeatures& cpp_features, FieldOptions& options) {
if (&options == &FieldOptions::default_instance()) return;
if (edition < Edition::EDITION_2024 &&
(type == FieldDescriptor::TYPE_STRING ||
type == FieldDescriptor::TYPE_BYTES)) {
switch (cpp_features.string_type()) {
case pb::CppFeatures::CORD:
options.set_ctype(FieldOptions::CORD);
break;
default:
break;
}
}
}

template <class DescriptorT>
void DescriptorBuilder::ResolveFeaturesImpl(
Edition edition, const typename DescriptorT::Proto& proto,
Expand Down Expand Up @@ -5479,8 +5512,8 @@ void DescriptorBuilder::ResolveFeaturesImpl(
AddError(descriptor->name(), proto, error_location,
"Features are only valid under editions.");
}
InferLegacyProtoFeatures(proto, *options, edition, base_features);
}
InferLegacyProtoFeatures(proto, *options, edition, base_features);

if (base_features.ByteSizeLong() == 0 && !force_merge) {
// Nothing to merge, and we aren't forcing it.
Expand Down Expand Up @@ -6067,6 +6100,24 @@ FileDescriptor* DescriptorBuilder::BuildFileImpl(
iter != options_to_interpret_.end(); ++iter) {
option_interpreter.InterpretNonExtensionOptions(&(*iter));
}

// TODO: move this check back to generator.cc once we no longer
// need to set both ctype and string_type internally.
internal::VisitDescriptors(
*result, proto,
[&](const FieldDescriptor& field, const FieldDescriptorProto& proto) {
if (field.options_->has_ctype() && field.options_->features()
.GetExtension(pb::cpp)
.has_string_type()) {
AddError(
field.full_name(), proto, DescriptorPool::ErrorCollector::TYPE,
absl::StrFormat("Field %s specifies both string_type and ctype "
"which is not supported.",
field.full_name())
.c_str());
}
});

// Handle feature resolution. This must occur after option interpretation,
// but before validation.
internal::VisitDescriptors(
Expand All @@ -6084,6 +6135,14 @@ FileDescriptor* DescriptorBuilder::BuildFileImpl(
alloc);
});

internal::VisitDescriptors(*result, [&](const FieldDescriptor& field) {
EnforceCTypeStringTypeConsistency(
field.file()->edition(), field.type_,
field.merged_features_->GetExtension(pb::cpp),
const_cast< // NOLINT(google3-runtime-proto-const-cast)
FieldOptions&>(*field.options_));
});

// Post-process cleanup for field features.
internal::VisitDescriptors(
*result, proto,
Expand Down
73 changes: 72 additions & 1 deletion src/google/protobuf/descriptor_unittest.cc
Original file line number Diff line number Diff line change
Expand Up @@ -73,6 +73,7 @@
#include "google/protobuf/unittest_lazy_dependencies_custom_option.pb.h"
#include "google/protobuf/unittest_lazy_dependencies_enum.pb.h"
#include "google/protobuf/unittest_proto3_arena.pb.h"
#include "google/protobuf/unittest_string_type.pb.h"


// Must be included last.
Expand Down Expand Up @@ -7513,6 +7514,20 @@ TEST_F(FeaturesTest, Proto2Features) {
}
field { name: "utf8" number: 6 label: LABEL_REPEATED type: TYPE_STRING }
field { name: "req" number: 7 label: LABEL_REQUIRED type: TYPE_INT32 }
field {
name: "cord"
number: 8
label: LABEL_OPTIONAL
type: TYPE_STRING
options { ctype: CORD }
}
field {
name: "piece"
number: 9
label: LABEL_OPTIONAL
type: TYPE_STRING
options { ctype: STRING_PIECE }
}
}
enum_type {
name: "Foo2"
Expand Down Expand Up @@ -7563,6 +7578,10 @@ TEST_F(FeaturesTest, Proto2Features) {
Utf8CheckMode::kVerify);
EXPECT_EQ(GetUtf8CheckMode(message->FindFieldByName("str"), /*is_lite=*/true),
Utf8CheckMode::kNone);
EXPECT_EQ(GetCoreFeatures(message->FindFieldByName("cord"))
.GetExtension(pb::cpp)
.string_type(),
pb::CppFeatures::CORD);
EXPECT_FALSE(field->is_packed());
EXPECT_FALSE(field->legacy_enum_field_treated_as_closed());
EXPECT_FALSE(HasPreservingUnknownEnumSemantics(field));
Expand Down Expand Up @@ -7815,6 +7834,58 @@ TEST_F(FeaturesTest, Edition2023Defaults) {
pb::VALUE3);
}

TEST_F(FeaturesTest, Edition2023InferredFeatures) {
FileDescriptorProto file_proto = ParseTextOrDie(R"pb(
name: "foo.proto"
syntax: "editions"
edition: EDITION_2023
message_type {
name: "Foo"
field { name: "str" number: 1 label: LABEL_OPTIONAL type: TYPE_STRING }
field {
name: "cord"
number: 2
label: LABEL_OPTIONAL
type: TYPE_STRING
options { ctype: CORD }
}
field {
name: "piece"
number: 3
label: LABEL_OPTIONAL
type: TYPE_STRING
options { ctype: STRING_PIECE }
}
field {
name: "view"
number: 4
label: LABEL_OPTIONAL
type: TYPE_STRING
options {
features {
[pb.cpp] { string_type: VIEW }
}
}
}
}
)pb");

BuildDescriptorMessagesInTestPool();
BuildFileInTestPool(pb::CppFeatures::GetDescriptor()->file());
const FileDescriptor* file = ABSL_DIE_IF_NULL(pool_.BuildFile(file_proto));
const Descriptor* message = file->message_type(0);

EXPECT_EQ(
GetCoreFeatures(message->field(0)).GetExtension(pb::cpp).string_type(),
pb::CppFeatures::STRING);
EXPECT_EQ(
GetCoreFeatures(message->field(1)).GetExtension(pb::cpp).string_type(),
pb::CppFeatures::CORD);
EXPECT_EQ(
GetCoreFeatures(message->field(3)).GetExtension(pb::cpp).string_type(),
pb::CppFeatures::VIEW);
}

TEST_F(FeaturesTest, Edition2024Defaults) {
FileDescriptorProto file_proto = ParseTextOrDie(R"pb(
name: "foo.proto"
Expand Down Expand Up @@ -9614,7 +9685,7 @@ TEST_F(FeaturesTest, EnumFeatureHelpers) {
type_name: "FooOpen"
options {
features {
[pb.cpp] { legacy_closed_enum: true string_type: STRING }
[pb.cpp] { legacy_closed_enum: true }
}
}
}
Expand Down
16 changes: 16 additions & 0 deletions src/google/protobuf/unittest_string_type.proto
Original file line number Diff line number Diff line change
@@ -0,0 +1,16 @@
// Protocol Buffers - Google's data interchange format
// Copyright 2008 Google Inc. All rights reserved.
//
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file or at
// https://developers.google.com/open-source/licenses/bsd

edition = "2023";

package protobuf_unittest;

import "google/protobuf/cpp_features.proto";

message EntryProto {
bytes value = 3 [features.(pb.cpp).string_type = CORD];
}

0 comments on commit 4923b8d

Please sign in to comment.