From ef4898fc5e8be4fe43ca9a51709a4097ff028c8d Mon Sep 17 00:00:00 2001 From: Thomas Van Lenten Date: Fri, 12 Jul 2024 10:05:33 -0700 Subject: [PATCH] [ObjC] Improve parsing validations Pass through the expected end marker and validate it so calling code can't forget to do the validations. PiperOrigin-RevId: 651809006 --- objectivec/GPBCodedInputStream.m | 9 +- objectivec/GPBMessage.m | 107 +++++++++--------- objectivec/GPBMessage_PackagePrivate.h | 10 +- objectivec/GPBUnknownFieldSet.m | 4 - .../GPBUnknownFieldSet_PackagePrivate.h | 2 - 5 files changed, 61 insertions(+), 71 deletions(-) diff --git a/objectivec/GPBCodedInputStream.m b/objectivec/GPBCodedInputStream.m index ae1124cef03e1..c39648b588326 100644 --- a/objectivec/GPBCodedInputStream.m +++ b/objectivec/GPBCodedInputStream.m @@ -429,9 +429,9 @@ - (void)readGroup:(int32_t)fieldNumber extensionRegistry:(id)extensionRegistry { CheckRecursionLimit(&state_); ++state_.recursionDepth; - [message mergeFromCodedInputStream:self extensionRegistry:extensionRegistry]; - GPBCodedInputStreamCheckLastTagWas(&state_, - GPBWireFormatMakeTag(fieldNumber, GPBWireFormatEndGroup)); + [message mergeFromCodedInputStream:self + extensionRegistry:extensionRegistry + endingTag:GPBWireFormatMakeTag(fieldNumber, GPBWireFormatEndGroup)]; --state_.recursionDepth; } @@ -452,8 +452,7 @@ - (void)readMessage:(GPBMessage *)message size_t length2 = (size_t)length; // Cast safe on 32bit because of CheckFieldSize() above. size_t oldLimit = GPBCodedInputStreamPushLimit(&state_, length2); ++state_.recursionDepth; - [message mergeFromCodedInputStream:self extensionRegistry:extensionRegistry]; - GPBCodedInputStreamCheckLastTagWas(&state_, 0); + [message mergeFromCodedInputStream:self extensionRegistry:extensionRegistry endingTag:0]; --state_.recursionDepth; GPBCodedInputStreamPopLimit(&state_, oldLimit); } diff --git a/objectivec/GPBMessage.m b/objectivec/GPBMessage.m index b2c3ba77c4fbd..d29d0f3348bdd 100644 --- a/objectivec/GPBMessage.m +++ b/objectivec/GPBMessage.m @@ -746,7 +746,9 @@ static void DecodeSingleValueFromInputStream(GPBExtensionDescriptor *extension, if (GPBExtensionIsWireFormat(description)) { // For MessageSet fields the message length will have already been // read. - [targetMessage mergeFromCodedInputStream:input extensionRegistry:extensionRegistry]; + [targetMessage mergeFromCodedInputStream:input + extensionRegistry:extensionRegistry + endingTag:0]; } else { [input readMessage:targetMessage extensionRegistry:extensionRegistry]; } @@ -1027,7 +1029,7 @@ - (instancetype)initWithCodedInputStream:(GPBCodedInputStream *)input error:(NSError **)errorPtr { if ((self = [self init])) { @try { - [self mergeFromCodedInputStream:input extensionRegistry:extensionRegistry]; + [self mergeFromCodedInputStream:input extensionRegistry:extensionRegistry endingTag:0]; if (errorPtr) { *errorPtr = nil; } @@ -2078,8 +2080,7 @@ - (void)clearExtension:(GPBExtensionDescriptor *)extension { - (void)mergeFromData:(NSData *)data extensionRegistry:(id)extensionRegistry { GPBCodedInputStream *input = [[GPBCodedInputStream alloc] initWithData:data]; @try { - [self mergeFromCodedInputStream:input extensionRegistry:extensionRegistry]; - [input checkLastTagWas:0]; + [self mergeFromCodedInputStream:input extensionRegistry:extensionRegistry endingTag:0]; } @finally { [input release]; } @@ -2090,7 +2091,7 @@ - (BOOL)mergeFromData:(NSData *)data error:(NSError **)errorPtr { GPBCodedInputStream *input = [[GPBCodedInputStream alloc] initWithData:data]; @try { - [self mergeFromCodedInputStream:input extensionRegistry:extensionRegistry]; + [self mergeFromCodedInputStream:input extensionRegistry:extensionRegistry endingTag:0]; [input checkLastTagWas:0]; if (errorPtr) { *errorPtr = nil; @@ -2227,38 +2228,36 @@ - (void)parseMessageSet:(GPBCodedInputStream *)input } } -- (BOOL)parseUnknownField:(GPBCodedInputStream *)input +- (void)parseUnknownField:(GPBCodedInputStream *)input extensionRegistry:(id)extensionRegistry tag:(uint32_t)tag { - GPBWireFormat wireType = GPBWireFormatGetTagWireType(tag); int32_t fieldNumber = GPBWireFormatGetTagFieldNumber(tag); - GPBDescriptor *descriptor = [self descriptor]; GPBExtensionDescriptor *extension = [extensionRegistry extensionForDescriptor:descriptor fieldNumber:fieldNumber]; if (extension == nil) { if (descriptor.wireFormat && GPBWireFormatMessageSetItemTag == tag) { [self parseMessageSet:input extensionRegistry:extensionRegistry]; - return YES; + return; } } else { + GPBWireFormat wireType = GPBWireFormatGetTagWireType(tag); if (extension.wireType == wireType) { ExtensionMergeFromInputStream(extension, extension.packable, input, extensionRegistry, self); - return YES; + return; } // Primitive, repeated types can be packed on unpacked on the wire, and are // parsed either way. if ([extension isRepeated] && !GPBDataTypeIsObject(extension->description_->dataType) && (extension.alternateWireType == wireType)) { ExtensionMergeFromInputStream(extension, !extension.packable, input, extensionRegistry, self); - return YES; + return; } } - if ([GPBUnknownFieldSet isFieldTag:tag]) { - GPBUnknownFieldSet *unknownFields = GetOrMakeUnknownFields(self); - return [unknownFields mergeFieldFrom:tag input:input]; - } else { - return NO; + GPBUnknownFieldSet *unknownFields = GetOrMakeUnknownFields(self); + if (![unknownFields mergeFieldFrom:tag input:input]) { + [NSException raise:NSInternalInconsistencyException + format:@"Internal Error: Unable to parse unknown field %u", tag]; } } @@ -2465,7 +2464,12 @@ static void MergeRepeatedNotPackedFieldFromCodedInputStream( } - (void)mergeFromCodedInputStream:(GPBCodedInputStream *)input - extensionRegistry:(id)extensionRegistry { + extensionRegistry:(id)extensionRegistry + endingTag:(uint32_t)endingTag { +#if defined(DEBUG) && DEBUG + NSAssert(endingTag == 0 || GPBWireFormatGetTagWireType(endingTag) == GPBWireFormatEndGroup, + @"endingTag should have been an endGroup tag"); +#endif // DEBUG GPBDescriptor *descriptor = [self descriptor]; GPBCodedInputStreamState *state = &input->state_; uint32_t tag = 0; @@ -2475,8 +2479,11 @@ - (void)mergeFromCodedInputStream:(GPBCodedInputStream *)input while (YES) { BOOL merged = NO; tag = GPBCodedInputStreamReadTag(state); - if (tag == 0) { - break; // Reached end. + if (tag == endingTag || tag == 0) { + // If we got to the end (tag zero), when we were expecting the end group, this will + // raise the error. + GPBCodedInputStreamCheckLastTagWas(state, endingTag); + return; } for (NSUInteger i = 0; i < numFields; ++i) { if (startingIndex >= numFields) startingIndex = 0; @@ -2514,46 +2521,37 @@ - (void)mergeFromCodedInputStream:(GPBCodedInputStream *)input } } // for(i < numFields) - if (!merged && (tag != 0)) { - // Primitive, repeated types can be packed on unpacked on the wire, and - // are parsed either way. The above loop covered tag in the preferred - // for, so this need to check the alternate form. - for (NSUInteger i = 0; i < numFields; ++i) { - if (startingIndex >= numFields) startingIndex = 0; - GPBFieldDescriptor *fieldDescriptor = fields[startingIndex]; - if ((fieldDescriptor.fieldType == GPBFieldTypeRepeated) && - !GPBFieldDataTypeIsObject(fieldDescriptor) && - (GPBFieldAlternateTag(fieldDescriptor) == tag)) { - BOOL alternateIsPacked = !fieldDescriptor.isPackable; - if (alternateIsPacked) { - MergeRepeatedPackedFieldFromCodedInputStream(self, fieldDescriptor, input); - // Well formed protos will only have a repeated field that is - // packed once, advance the starting index to the next field. - startingIndex += 1; - } else { - MergeRepeatedNotPackedFieldFromCodedInputStream(self, fieldDescriptor, input, - extensionRegistry); - } - merged = YES; - break; - } else { + if (merged) continue; // On to the next tag + + // Primitive, repeated types can be packed or unpacked on the wire, and + // are parsed either way. The above loop covered tag in the preferred + // for, so this need to check the alternate form. + for (NSUInteger i = 0; i < numFields; ++i) { + if (startingIndex >= numFields) startingIndex = 0; + GPBFieldDescriptor *fieldDescriptor = fields[startingIndex]; + if ((fieldDescriptor.fieldType == GPBFieldTypeRepeated) && + !GPBFieldDataTypeIsObject(fieldDescriptor) && + (GPBFieldAlternateTag(fieldDescriptor) == tag)) { + BOOL alternateIsPacked = !fieldDescriptor.isPackable; + if (alternateIsPacked) { + MergeRepeatedPackedFieldFromCodedInputStream(self, fieldDescriptor, input); + // Well formed protos will only have a repeated field that is + // packed once, advance the starting index to the next field. startingIndex += 1; + } else { + MergeRepeatedNotPackedFieldFromCodedInputStream(self, fieldDescriptor, input, + extensionRegistry); } + merged = YES; + break; + } else { + startingIndex += 1; } } - if (!merged) { - if (tag == 0) { - // zero signals EOF / limit reached - return; - } else { - if (![self parseUnknownField:input extensionRegistry:extensionRegistry tag:tag]) { - // it's an endgroup tag - return; - } - } - } // if(!merged) + if (merged) continue; // On to the next tag + [self parseUnknownField:input extensionRegistry:extensionRegistry tag:tag]; } // while(YES) } @@ -3554,8 +3552,7 @@ - (instancetype)initWithCoder:(NSCoder *)aDecoder { if (data.length) { GPBCodedInputStream *input = [[GPBCodedInputStream alloc] initWithData:data]; @try { - [self mergeFromCodedInputStream:input extensionRegistry:nil]; - [input checkLastTagWas:0]; + [self mergeFromCodedInputStream:input extensionRegistry:nil endingTag:0]; } @finally { [input release]; } diff --git a/objectivec/GPBMessage_PackagePrivate.h b/objectivec/GPBMessage_PackagePrivate.h index e562e5dabbd61..65a1a7c5bad1f 100644 --- a/objectivec/GPBMessage_PackagePrivate.h +++ b/objectivec/GPBMessage_PackagePrivate.h @@ -47,15 +47,15 @@ typedef struct GPBMessage_Storage *GPBMessage_StoragePtr; // Parses a message of this type from the input and merges it with this // message. // +// `endingTag` should be zero if expected to consume to the end of input, but if +// the input is supposed to be a Group, it should be the endgroup tag to look for. +// // Warning: This does not verify that all required fields are present in // the input message. -// Note: The caller should call -// -[CodedInputStream checkLastTagWas:] after calling this to -// verify that the last tag seen was the appropriate end-group tag, -// or zero for EOF. // NOTE: This will throw if there is an error while parsing. - (void)mergeFromCodedInputStream:(GPBCodedInputStream *)input - extensionRegistry:(id)extensionRegistry; + extensionRegistry:(id)extensionRegistry + endingTag:(uint32_t)endingTag; - (void)addUnknownMapEntry:(int32_t)fieldNum value:(NSData *)data; diff --git a/objectivec/GPBUnknownFieldSet.m b/objectivec/GPBUnknownFieldSet.m index 768734070dc3f..5fe8ce03c4840 100644 --- a/objectivec/GPBUnknownFieldSet.m +++ b/objectivec/GPBUnknownFieldSet.m @@ -212,10 +212,6 @@ - (NSData *)data { return data; } -+ (BOOL)isFieldTag:(int32_t)tag { - return GPBWireFormatGetTagWireType(tag) != GPBWireFormatEndGroup; -} - - (void)addField:(GPBUnknownField *)field { int32_t number = [field number]; checkNumber(number); diff --git a/objectivec/GPBUnknownFieldSet_PackagePrivate.h b/objectivec/GPBUnknownFieldSet_PackagePrivate.h index 37cb61b3b74e5..7de20ed0cf13c 100644 --- a/objectivec/GPBUnknownFieldSet_PackagePrivate.h +++ b/objectivec/GPBUnknownFieldSet_PackagePrivate.h @@ -14,8 +14,6 @@ @interface GPBUnknownFieldSet () -+ (BOOL)isFieldTag:(int32_t)tag; - - (NSData *)data; - (size_t)serializedSize;