From 139ea4d38525281349e38bcfadce449a6990421c Mon Sep 17 00:00:00 2001 From: Protobuf Team Bot Date: Tue, 27 Feb 2024 09:47:47 -0800 Subject: [PATCH] Tweak "group to property name" mapping for C#. Under editions, where fields using a delimited encoding have independent field names from type names, we want to use the specified field name. This change keeps the existing naming for properties generated from proto2 files, while improving the experience under editions. This introduces a C# incompatibility when upgrading a proto from proto2 to editions, but we anticipate this being a relatively rare problem. PiperOrigin-RevId: 610783407 --- .../compiler/csharp/csharp_generator.h | 1 + .../compiler/csharp/csharp_helpers.cc | 177 ++++++++++-------- 2 files changed, 105 insertions(+), 73 deletions(-) diff --git a/src/google/protobuf/compiler/csharp/csharp_generator.h b/src/google/protobuf/compiler/csharp/csharp_generator.h index 9bdc07087025a..cb955f3b78a4c 100644 --- a/src/google/protobuf/compiler/csharp/csharp_generator.h +++ b/src/google/protobuf/compiler/csharp/csharp_generator.h @@ -35,6 +35,7 @@ class PROTOC_EXPORT Generator : public CodeGenerator { GeneratorContext* generator_context, std::string* error) const override; uint64_t GetSupportedFeatures() const override; + using CodeGenerator::GetEdition; }; } // namespace csharp diff --git a/src/google/protobuf/compiler/csharp/csharp_helpers.cc b/src/google/protobuf/compiler/csharp/csharp_helpers.cc index 382d255018639..1d87a12c491c6 100644 --- a/src/google/protobuf/compiler/csharp/csharp_helpers.cc +++ b/src/google/protobuf/compiler/csharp/csharp_helpers.cc @@ -24,6 +24,7 @@ #include "absl/strings/string_view.h" #include "google/protobuf/compiler/csharp/csharp_enum_field.h" #include "google/protobuf/compiler/csharp/csharp_field_base.h" +#include "google/protobuf/compiler/csharp/csharp_generator.h" #include "google/protobuf/compiler/csharp/csharp_map_field.h" #include "google/protobuf/compiler/csharp/csharp_message_field.h" #include "google/protobuf/compiler/csharp/csharp_options.h" @@ -87,11 +88,11 @@ CSharpType GetCSharpType(FieldDescriptor::Type type) { // types are added. } ABSL_LOG(FATAL) << "Can't get here."; - return (CSharpType) -1; + return (CSharpType)-1; } -// Convert a string which is expected to be SHOUTY_CASE (but may not be *precisely* shouty) -// into a PascalCase string. Precise rules implemented: +// Convert a string which is expected to be SHOUTY_CASE (but may not be +// *precisely* shouty) into a PascalCase string. Precise rules implemented: // Previous input character Current character Case // Any Non-alphanumeric Skipped @@ -124,12 +125,11 @@ std::string ShoutyToPascalCase(absl::string_view input) { return result; } -// Attempt to remove a prefix from a value, ignoring casing and skipping underscores. -// (foo, foo_bar) => bar - underscore after prefix is skipped -// (FOO, foo_bar) => bar - casing is ignored -// (foo_bar, foobarbaz) => baz - underscore in prefix is ignored -// (foobar, foo_barbaz) => baz - underscore in value is ignored -// (foo, bar) => bar - prefix isn't matched; return original value +// Attempt to remove a prefix from a value, ignoring casing and skipping +// underscores. (foo, foo_bar) => bar - underscore after prefix is skipped (FOO, +// foo_bar) => bar - casing is ignored (foo_bar, foobarbaz) => baz - underscore +// in prefix is ignored (foobar, foo_barbaz) => baz - underscore in value is +// ignored (foo, bar) => bar - prefix isn't matched; return original value std::string TryRemovePrefix(absl::string_view prefix, absl::string_view value) { // First normalize to a lower-case no-underscores prefix to match against std::string prefix_to_match = ""; @@ -142,13 +142,14 @@ std::string TryRemovePrefix(absl::string_view prefix, absl::string_view value) { // This keeps track of how much of value we've consumed size_t prefix_index, value_index; for (prefix_index = 0, value_index = 0; - prefix_index < prefix_to_match.size() && value_index < value.size(); - value_index++) { + prefix_index < prefix_to_match.size() && value_index < value.size(); + value_index++) { // Skip over underscores in the value if (value[value_index] == '_') { continue; } - if (absl::ascii_tolower(value[value_index]) != prefix_to_match[prefix_index++]) { + if (absl::ascii_tolower(value[value_index]) != + prefix_to_match[prefix_index++]) { // Failed to match the prefix - bail out early. return std::string(value); } @@ -164,7 +165,8 @@ std::string TryRemovePrefix(absl::string_view prefix, absl::string_view value) { value_index++; } - // If there's nothing left (e.g. it was a prefix with only underscores afterwards), don't strip. + // If there's nothing left (e.g. it was a prefix with only underscores + // afterwards), don't strip. if (value_index == value.size()) { return std::string(value); } @@ -181,8 +183,8 @@ std::string GetEnumValueName(absl::string_view enum_name, absl::string_view enum_value_name) { std::string stripped = TryRemovePrefix(enum_name, enum_value_name); std::string result = ShoutyToPascalCase(stripped); - // Just in case we have an enum name of FOO and a value of FOO_2... make sure the returned - // string is a valid identifier. + // Just in case we have an enum name of FOO and a value of FOO_2... make sure + // the returned string is a valid identifier. if (absl::ascii_isdigit(result[0])) { return absl::StrCat("_", result); } @@ -237,11 +239,17 @@ std::string GetFullExtensionName(const FieldDescriptor* descriptor) { GetPropertyName(descriptor)); } -// Groups are hacky: The name of the field is just the lower-cased name -// of the group type. In C#, though, we would like to retain the original -// capitalization of the type name. +// Groups in proto2 are hacky: The name of the field is just the lower-cased +// name of the group type. In C#, though, we would like to retain the original +// capitalization of the type name. Fields with an encoding of "delimited" in +// editions are like groups, but have a real name, so we use that. This means +// upgrading a proto from proto2 to editions *can* be a breaking change for C#, +// but it's unlikely to cause significant issues (as C# has primarily been used +// with proto3, and even with proto2 groups, only some group names will cause +// compatibility issues). std::string GetFieldName(const FieldDescriptor* descriptor) { - if (descriptor->type() == FieldDescriptor::TYPE_GROUP) { + if (descriptor->type() == FieldDescriptor::TYPE_GROUP && + Generator::GetEdition(*descriptor->file()) == Edition::EDITION_PROTO2) { return descriptor->message_type()->name(); } else { return descriptor->name(); @@ -254,37 +262,32 @@ std::string GetFieldConstantName(const FieldDescriptor* field) { std::string GetPropertyName(const FieldDescriptor* descriptor) { // Names of members declared or overridden in the message. - static const auto& reserved_member_names = *new absl::flat_hash_set({ - "Types", - "Descriptor", - "Equals", - "ToString", - "GetHashCode", - "WriteTo", - "Clone", - "CalculateSize", - "MergeFrom", - "OnConstruction", - "Parser" - }); + static const auto& reserved_member_names = + *new absl::flat_hash_set( + {"Types", "Descriptor", "Equals", "ToString", "GetHashCode", + "WriteTo", "Clone", "CalculateSize", "MergeFrom", "OnConstruction", + "Parser"}); // TODO: consider introducing csharp_property_name field option std::string property_name = UnderscoresToPascalCase(GetFieldName(descriptor)); // Avoid either our own type name or reserved names. - // There are various ways of ending up with naming collisions, but we try to avoid obvious - // ones. In particular, we avoid the names of all the members we generate. - // Note that we *don't* add an underscore for MemberwiseClone or GetType. Those generate - // warnings, but not errors; changing the name now could be a breaking change. - if (property_name == descriptor->containing_type()->name() - || reserved_member_names.find(property_name) != reserved_member_names.end()) { + // There are various ways of ending up with naming collisions, but we try to + // avoid obvious ones. In particular, we avoid the names of all the members we + // generate. Note that we *don't* add an underscore for MemberwiseClone or + // GetType. Those generate warnings, but not errors; changing the name now + // could be a breaking change. + if (property_name == descriptor->containing_type()->name() || + reserved_member_names.find(property_name) != + reserved_member_names.end()) { absl::StrAppend(&property_name, "_"); } return property_name; } std::string GetOneofCaseName(const FieldDescriptor* descriptor) { - // The name in a oneof case enum is the same as for the property, but as we always have a "None" - // value as well, we need to reserve that by appending an underscore. + // The name in a oneof case enum is the same as for the property, but as we + // always have a "None" value as well, we need to reserve that by appending an + // underscore. std::string property_name = GetPropertyName(descriptor); return property_name == "None" ? "None_" : property_name; } @@ -294,29 +297,47 @@ std::string GetOneofCaseName(const FieldDescriptor* descriptor) { // returns -1. int GetFixedSize(FieldDescriptor::Type type) { switch (type) { - case FieldDescriptor::TYPE_INT32 : return -1; - case FieldDescriptor::TYPE_INT64 : return -1; - case FieldDescriptor::TYPE_UINT32 : return -1; - case FieldDescriptor::TYPE_UINT64 : return -1; - case FieldDescriptor::TYPE_SINT32 : return -1; - case FieldDescriptor::TYPE_SINT64 : return -1; - case FieldDescriptor::TYPE_FIXED32 : return internal::WireFormatLite::kFixed32Size; - case FieldDescriptor::TYPE_FIXED64 : return internal::WireFormatLite::kFixed64Size; - case FieldDescriptor::TYPE_SFIXED32: return internal::WireFormatLite::kSFixed32Size; - case FieldDescriptor::TYPE_SFIXED64: return internal::WireFormatLite::kSFixed64Size; - case FieldDescriptor::TYPE_FLOAT : return internal::WireFormatLite::kFloatSize; - case FieldDescriptor::TYPE_DOUBLE : return internal::WireFormatLite::kDoubleSize; - - case FieldDescriptor::TYPE_BOOL : return internal::WireFormatLite::kBoolSize; - case FieldDescriptor::TYPE_ENUM : return -1; - - case FieldDescriptor::TYPE_STRING : return -1; - case FieldDescriptor::TYPE_BYTES : return -1; - case FieldDescriptor::TYPE_GROUP : return -1; - case FieldDescriptor::TYPE_MESSAGE : return -1; - - // No default because we want the compiler to complain if any new - // types are added. + case FieldDescriptor::TYPE_INT32: + return -1; + case FieldDescriptor::TYPE_INT64: + return -1; + case FieldDescriptor::TYPE_UINT32: + return -1; + case FieldDescriptor::TYPE_UINT64: + return -1; + case FieldDescriptor::TYPE_SINT32: + return -1; + case FieldDescriptor::TYPE_SINT64: + return -1; + case FieldDescriptor::TYPE_FIXED32: + return internal::WireFormatLite::kFixed32Size; + case FieldDescriptor::TYPE_FIXED64: + return internal::WireFormatLite::kFixed64Size; + case FieldDescriptor::TYPE_SFIXED32: + return internal::WireFormatLite::kSFixed32Size; + case FieldDescriptor::TYPE_SFIXED64: + return internal::WireFormatLite::kSFixed64Size; + case FieldDescriptor::TYPE_FLOAT: + return internal::WireFormatLite::kFloatSize; + case FieldDescriptor::TYPE_DOUBLE: + return internal::WireFormatLite::kDoubleSize; + + case FieldDescriptor::TYPE_BOOL: + return internal::WireFormatLite::kBoolSize; + case FieldDescriptor::TYPE_ENUM: + return -1; + + case FieldDescriptor::TYPE_STRING: + return -1; + case FieldDescriptor::TYPE_BYTES: + return -1; + case FieldDescriptor::TYPE_GROUP: + return -1; + case FieldDescriptor::TYPE_MESSAGE: + return -1; + + // No default because we want the compiler to complain if any new + // types are added. } ABSL_LOG(FATAL) << "Can't get here."; return -1; @@ -373,41 +394,51 @@ FieldGeneratorBase* CreateFieldGenerator(const FieldDescriptor* descriptor, if (descriptor->is_map()) { return new MapFieldGenerator(descriptor, presenceIndex, options); } else { - return new RepeatedMessageFieldGenerator(descriptor, presenceIndex, options); + return new RepeatedMessageFieldGenerator(descriptor, presenceIndex, + options); } } else { if (IsWrapperType(descriptor)) { if (descriptor->real_containing_oneof()) { - return new WrapperOneofFieldGenerator(descriptor, presenceIndex, options); + return new WrapperOneofFieldGenerator(descriptor, presenceIndex, + options); } else { - return new WrapperFieldGenerator(descriptor, presenceIndex, options); + return new WrapperFieldGenerator(descriptor, presenceIndex, + options); } } else { if (descriptor->real_containing_oneof()) { - return new MessageOneofFieldGenerator(descriptor, presenceIndex, options); + return new MessageOneofFieldGenerator(descriptor, presenceIndex, + options); } else { - return new MessageFieldGenerator(descriptor, presenceIndex, options); + return new MessageFieldGenerator(descriptor, presenceIndex, + options); } } } case FieldDescriptor::TYPE_ENUM: if (descriptor->is_repeated()) { - return new RepeatedEnumFieldGenerator(descriptor, presenceIndex, options); + return new RepeatedEnumFieldGenerator(descriptor, presenceIndex, + options); } else { if (descriptor->real_containing_oneof()) { - return new EnumOneofFieldGenerator(descriptor, presenceIndex, options); + return new EnumOneofFieldGenerator(descriptor, presenceIndex, + options); } else { return new EnumFieldGenerator(descriptor, presenceIndex, options); } } default: if (descriptor->is_repeated()) { - return new RepeatedPrimitiveFieldGenerator(descriptor, presenceIndex, options); + return new RepeatedPrimitiveFieldGenerator(descriptor, presenceIndex, + options); } else { if (descriptor->real_containing_oneof()) { - return new PrimitiveOneofFieldGenerator(descriptor, presenceIndex, options); + return new PrimitiveOneofFieldGenerator(descriptor, presenceIndex, + options); } else { - return new PrimitiveFieldGenerator(descriptor, presenceIndex, options); + return new PrimitiveFieldGenerator(descriptor, presenceIndex, + options); } } }