From b5fe79a4cacca4ee905a27102647e8fe7a21560f Mon Sep 17 00:00:00 2001 From: Eric Erhardt Date: Thu, 27 Feb 2020 15:08:50 -0600 Subject: [PATCH 01/26] Update ref assembly for building netstandard2.0. Index and Range APIs are netcoreapp3.0+ only. Includes System.Text.Rune for netstandard2.0, since that is currently only netcoreapp3.0. --- .../ref/System.Utf8String.Experimental.cs | 33 -------- .../ref/System.Utf8String.Experimental.csproj | 23 +++++- ...stem.Utf8String.Experimental.netcoreapp.cs | 55 ++++++++++++++ ...tem.Utf8String.Experimental.netstandard.cs | 76 +++++++++++++++++++ 4 files changed, 151 insertions(+), 36 deletions(-) create mode 100644 src/libraries/System.Utf8String.Experimental/ref/System.Utf8String.Experimental.netcoreapp.cs create mode 100644 src/libraries/System.Utf8String.Experimental/ref/System.Utf8String.Experimental.netstandard.cs diff --git a/src/libraries/System.Utf8String.Experimental/ref/System.Utf8String.Experimental.cs b/src/libraries/System.Utf8String.Experimental/ref/System.Utf8String.Experimental.cs index a6a2339b633fb..92da1a3a8fc3a 100644 --- a/src/libraries/System.Utf8String.Experimental/ref/System.Utf8String.Experimental.cs +++ b/src/libraries/System.Utf8String.Experimental/ref/System.Utf8String.Experimental.cs @@ -55,15 +55,11 @@ public static partial class Utf8Extensions public static System.ReadOnlySpan AsBytes(this System.Utf8String? text, int start) { throw null; } public static System.ReadOnlySpan AsBytes(this System.Utf8String? text, int start, int length) { throw null; } public static System.ReadOnlyMemory AsMemory(this System.Utf8String? text) { throw null; } - public static System.ReadOnlyMemory AsMemory(this System.Utf8String? text, System.Index startIndex) { throw null; } public static System.ReadOnlyMemory AsMemory(this System.Utf8String? text, int start) { throw null; } public static System.ReadOnlyMemory AsMemory(this System.Utf8String? text, int start, int length) { throw null; } - public static System.ReadOnlyMemory AsMemory(this System.Utf8String? text, System.Range range) { throw null; } public static System.ReadOnlyMemory AsMemoryBytes(this System.Utf8String? text) { throw null; } - public static System.ReadOnlyMemory AsMemoryBytes(this System.Utf8String? text, System.Index startIndex) { throw null; } public static System.ReadOnlyMemory AsMemoryBytes(this System.Utf8String? text, int start) { throw null; } public static System.ReadOnlyMemory AsMemoryBytes(this System.Utf8String? text, int start, int length) { throw null; } - public static System.ReadOnlyMemory AsMemoryBytes(this System.Utf8String? text, System.Range range) { throw null; } public static System.Text.Utf8Span AsSpan(this System.Utf8String? text) { throw null; } public static System.Text.Utf8Span AsSpan(this System.Utf8String? text, int start) { throw null; } public static System.Text.Utf8Span AsSpan(this System.Utf8String? text, int start, int length) { throw null; } @@ -96,10 +92,8 @@ public Utf8String(string value) { } public bool Contains(System.Text.Rune value, System.StringComparison comparison) { throw null; } public bool Contains(System.Utf8String value) { throw null; } public bool Contains(System.Utf8String value, System.StringComparison comparison) { throw null; } - public static System.Utf8String Create(int length, TState state, System.Buffers.SpanAction action) { throw null; } public static System.Utf8String CreateFromRelaxed(System.ReadOnlySpan buffer) { throw null; } public static System.Utf8String CreateFromRelaxed(System.ReadOnlySpan buffer) { throw null; } - public static System.Utf8String CreateRelaxed(int length, TState state, System.Buffers.SpanAction action) { throw null; } public bool EndsWith(char value) { throw null; } public bool EndsWith(char value, System.StringComparison comparison) { throw null; } public bool EndsWith(System.Text.Rune value) { throw null; } @@ -144,7 +138,6 @@ public Utf8String(string value) { } public bool StartsWith(System.Text.Rune value, System.StringComparison comparison) { throw null; } public bool StartsWith(System.Utf8String value) { throw null; } public bool StartsWith(System.Utf8String value, System.StringComparison comparison) { throw null; } - public System.Utf8String this[System.Range range] { get { throw null; } } public byte[] ToByteArray() { throw null; } public char[] ToCharArray() { throw null; } public System.Utf8String ToLower(System.Globalization.CultureInfo culture) { throw null; } @@ -157,20 +150,7 @@ public Utf8String(string value) { } public System.Utf8String TrimStart() { throw null; } public static bool TryCreateFrom(System.ReadOnlySpan buffer, [System.Diagnostics.CodeAnalysis.NotNullWhenAttribute(true)] out System.Utf8String? value) { throw null; } public static bool TryCreateFrom(System.ReadOnlySpan buffer, [System.Diagnostics.CodeAnalysis.NotNullWhenAttribute(true)] out System.Utf8String? value) { throw null; } - public bool TryFind(char value, out System.Range range) { throw null; } - public bool TryFind(char value, System.StringComparison comparisonType, out System.Range range) { throw null; } - public bool TryFind(System.Text.Rune value, out System.Range range) { throw null; } - public bool TryFind(System.Text.Rune value, System.StringComparison comparisonType, out System.Range range) { throw null; } - public bool TryFind(System.Utf8String value, out System.Range range) { throw null; } - public bool TryFind(System.Utf8String value, System.StringComparison comparisonType, out System.Range range) { throw null; } - public bool TryFindLast(char value, out System.Range range) { throw null; } - public bool TryFindLast(char value, System.StringComparison comparisonType, out System.Range range) { throw null; } - public bool TryFindLast(System.Text.Rune value, out System.Range range) { throw null; } - public bool TryFindLast(System.Text.Rune value, System.StringComparison comparisonType, out System.Range range) { throw null; } - public bool TryFindLast(System.Utf8String value, out System.Range range) { throw null; } - public bool TryFindLast(System.Utf8String value, System.StringComparison comparisonType, out System.Range range) { throw null; } public static System.Utf8String UnsafeCreateWithoutValidation(System.ReadOnlySpan utf8Contents) { throw null; } - public static System.Utf8String UnsafeCreateWithoutValidation(int length, TState state, System.Buffers.SpanAction action) { throw null; } public readonly partial struct ByteEnumerable : System.Collections.Generic.IEnumerable { private readonly object _dummy; @@ -326,7 +306,6 @@ public readonly ref partial struct Utf8Span public int Normalize(System.Span destination, System.Text.NormalizationForm normalizationForm = System.Text.NormalizationForm.FormC) { throw null; } public static bool operator !=(System.Text.Utf8Span left, System.Text.Utf8Span right) { throw null; } public static bool operator ==(System.Text.Utf8Span left, System.Text.Utf8Span right) { throw null; } - public System.Text.Utf8Span this[System.Range range] { get { throw null; } } public SplitResult Split(char separator, System.Utf8StringSplitOptions options = System.Utf8StringSplitOptions.None) { throw null; } public SplitResult Split(System.Text.Rune separator, System.Utf8StringSplitOptions options = System.Utf8StringSplitOptions.None) { throw null; } public SplitResult Split(System.Text.Utf8Span separator, System.Utf8StringSplitOptions options = System.Utf8StringSplitOptions.None) { throw null; } @@ -364,18 +343,6 @@ public readonly ref partial struct Utf8Span public System.Utf8String ToUpperInvariant() { throw null; } public int ToUpperInvariant(System.Span destination) { throw null; } public System.Utf8String ToUtf8String() { throw null; } - public bool TryFind(char value, out System.Range range) { throw null; } - public bool TryFind(char value, System.StringComparison comparisonType, out System.Range range) { throw null; } - public bool TryFind(System.Text.Rune value, out System.Range range) { throw null; } - public bool TryFind(System.Text.Rune value, System.StringComparison comparisonType, out System.Range range) { throw null; } - public bool TryFind(System.Text.Utf8Span value, out System.Range range) { throw null; } - public bool TryFind(System.Text.Utf8Span value, System.StringComparison comparisonType, out System.Range range) { throw null; } - public bool TryFindLast(char value, out System.Range range) { throw null; } - public bool TryFindLast(char value, System.StringComparison comparisonType, out System.Range range) { throw null; } - public bool TryFindLast(System.Text.Rune value, out System.Range range) { throw null; } - public bool TryFindLast(System.Text.Rune value, System.StringComparison comparisonType, out System.Range range) { throw null; } - public bool TryFindLast(System.Text.Utf8Span value, out System.Range range) { throw null; } - public bool TryFindLast(System.Text.Utf8Span value, System.StringComparison comparisonType, out System.Range range) { throw null; } public static System.Text.Utf8Span UnsafeCreateWithoutValidation(System.ReadOnlySpan buffer) { throw null; } public readonly ref struct CharEnumerable { diff --git a/src/libraries/System.Utf8String.Experimental/ref/System.Utf8String.Experimental.csproj b/src/libraries/System.Utf8String.Experimental/ref/System.Utf8String.Experimental.csproj index 8356230578e6c..236be98487247 100644 --- a/src/libraries/System.Utf8String.Experimental/ref/System.Utf8String.Experimental.csproj +++ b/src/libraries/System.Utf8String.Experimental/ref/System.Utf8String.Experimental.csproj @@ -1,15 +1,32 @@ - + true $(NoWarn);0809;0618 - $(NetCoreAppCurrent) + netstandard2.0;netcoreapp3.0;$(NetCoreAppCurrent) + true enable - + + + + + + + + + + + + + + + + + diff --git a/src/libraries/System.Utf8String.Experimental/ref/System.Utf8String.Experimental.netcoreapp.cs b/src/libraries/System.Utf8String.Experimental/ref/System.Utf8String.Experimental.netcoreapp.cs new file mode 100644 index 0000000000000..3c976cc41bfce --- /dev/null +++ b/src/libraries/System.Utf8String.Experimental/ref/System.Utf8String.Experimental.netcoreapp.cs @@ -0,0 +1,55 @@ +// Licensed to the .NET Foundation under one or more agreements. +// The .NET Foundation licenses this file to you under the MIT license. +// See the LICENSE file in the project root for more information. +// ------------------------------------------------------------------------------ +// Changes to this file must follow the https://aka.ms/api-review process. +// ------------------------------------------------------------------------------ + +namespace System +{ + public static partial class Utf8Extensions + { + public static System.ReadOnlyMemory AsMemory(this System.Utf8String? text, System.Index startIndex) { throw null; } + public static System.ReadOnlyMemory AsMemory(this System.Utf8String? text, System.Range range) { throw null; } + public static System.ReadOnlyMemory AsMemoryBytes(this System.Utf8String? text, System.Index startIndex) { throw null; } + public static System.ReadOnlyMemory AsMemoryBytes(this System.Utf8String? text, System.Range range) { throw null; } + } + public sealed partial class Utf8String : System.IComparable, System.IEquatable + { + public static System.Utf8String Create(int length, TState state, System.Buffers.SpanAction action) { throw null; } + public static System.Utf8String CreateRelaxed(int length, TState state, System.Buffers.SpanAction action) { throw null; } + public System.Utf8String this[System.Range range] { get { throw null; } } + public bool TryFind(char value, out System.Range range) { throw null; } + public bool TryFind(char value, System.StringComparison comparisonType, out System.Range range) { throw null; } + public bool TryFind(System.Text.Rune value, out System.Range range) { throw null; } + public bool TryFind(System.Text.Rune value, System.StringComparison comparisonType, out System.Range range) { throw null; } + public bool TryFind(System.Utf8String value, out System.Range range) { throw null; } + public bool TryFind(System.Utf8String value, System.StringComparison comparisonType, out System.Range range) { throw null; } + public bool TryFindLast(char value, out System.Range range) { throw null; } + public bool TryFindLast(char value, System.StringComparison comparisonType, out System.Range range) { throw null; } + public bool TryFindLast(System.Text.Rune value, out System.Range range) { throw null; } + public bool TryFindLast(System.Text.Rune value, System.StringComparison comparisonType, out System.Range range) { throw null; } + public bool TryFindLast(System.Utf8String value, out System.Range range) { throw null; } + public bool TryFindLast(System.Utf8String value, System.StringComparison comparisonType, out System.Range range) { throw null; } + public static System.Utf8String UnsafeCreateWithoutValidation(int length, TState state, System.Buffers.SpanAction action) { throw null; } + } +} +namespace System.Text +{ + public readonly ref partial struct Utf8Span + { + public System.Text.Utf8Span this[System.Range range] { get { throw null; } } + public bool TryFind(char value, out System.Range range) { throw null; } + public bool TryFind(char value, System.StringComparison comparisonType, out System.Range range) { throw null; } + public bool TryFind(System.Text.Rune value, out System.Range range) { throw null; } + public bool TryFind(System.Text.Rune value, System.StringComparison comparisonType, out System.Range range) { throw null; } + public bool TryFind(System.Text.Utf8Span value, out System.Range range) { throw null; } + public bool TryFind(System.Text.Utf8Span value, System.StringComparison comparisonType, out System.Range range) { throw null; } + public bool TryFindLast(char value, out System.Range range) { throw null; } + public bool TryFindLast(char value, System.StringComparison comparisonType, out System.Range range) { throw null; } + public bool TryFindLast(System.Text.Rune value, out System.Range range) { throw null; } + public bool TryFindLast(System.Text.Rune value, System.StringComparison comparisonType, out System.Range range) { throw null; } + public bool TryFindLast(System.Text.Utf8Span value, out System.Range range) { throw null; } + public bool TryFindLast(System.Text.Utf8Span value, System.StringComparison comparisonType, out System.Range range) { throw null; } + } +} diff --git a/src/libraries/System.Utf8String.Experimental/ref/System.Utf8String.Experimental.netstandard.cs b/src/libraries/System.Utf8String.Experimental/ref/System.Utf8String.Experimental.netstandard.cs new file mode 100644 index 0000000000000..0cf292727a56f --- /dev/null +++ b/src/libraries/System.Utf8String.Experimental/ref/System.Utf8String.Experimental.netstandard.cs @@ -0,0 +1,76 @@ +// Licensed to the .NET Foundation under one or more agreements. +// The .NET Foundation licenses this file to you under the MIT license. +// See the LICENSE file in the project root for more information. +// ------------------------------------------------------------------------------ +// Changes to this file must follow the https://aka.ms/api-review process. +// ------------------------------------------------------------------------------ + +namespace System.Text +{ + public readonly partial struct Rune : System.IComparable, System.IEquatable + { + private readonly int _dummyPrimitive; + public Rune(char ch) { throw null; } + public Rune(char highSurrogate, char lowSurrogate) { throw null; } + public Rune(int value) { throw null; } + [System.CLSCompliantAttribute(false)] + public Rune(uint value) { throw null; } + public bool IsAscii { get { throw null; } } + public bool IsBmp { get { throw null; } } + public int Plane { get { throw null; } } + public static System.Text.Rune ReplacementChar { get { throw null; } } + public int Utf16SequenceLength { get { throw null; } } + public int Utf8SequenceLength { get { throw null; } } + public int Value { get { throw null; } } + public int CompareTo(System.Text.Rune other) { throw null; } + public static System.Buffers.OperationStatus DecodeFromUtf16(System.ReadOnlySpan source, out System.Text.Rune result, out int charsConsumed) { throw null; } + public static System.Buffers.OperationStatus DecodeFromUtf8(System.ReadOnlySpan source, out System.Text.Rune result, out int bytesConsumed) { throw null; } + public static System.Buffers.OperationStatus DecodeLastFromUtf16(System.ReadOnlySpan source, out System.Text.Rune result, out int charsConsumed) { throw null; } + public static System.Buffers.OperationStatus DecodeLastFromUtf8(System.ReadOnlySpan source, out System.Text.Rune value, out int bytesConsumed) { throw null; } + public int EncodeToUtf16(System.Span destination) { throw null; } + public int EncodeToUtf8(System.Span destination) { throw null; } + public override bool Equals(object? obj) { throw null; } + public bool Equals(System.Text.Rune other) { throw null; } + public override int GetHashCode() { throw null; } + public static double GetNumericValue(System.Text.Rune value) { throw null; } + public static System.Text.Rune GetRuneAt(string input, int index) { throw null; } + public static System.Globalization.UnicodeCategory GetUnicodeCategory(System.Text.Rune value) { throw null; } + public static bool IsControl(System.Text.Rune value) { throw null; } + public static bool IsDigit(System.Text.Rune value) { throw null; } + public static bool IsLetter(System.Text.Rune value) { throw null; } + public static bool IsLetterOrDigit(System.Text.Rune value) { throw null; } + public static bool IsLower(System.Text.Rune value) { throw null; } + public static bool IsNumber(System.Text.Rune value) { throw null; } + public static bool IsPunctuation(System.Text.Rune value) { throw null; } + public static bool IsSeparator(System.Text.Rune value) { throw null; } + public static bool IsSymbol(System.Text.Rune value) { throw null; } + public static bool IsUpper(System.Text.Rune value) { throw null; } + public static bool IsValid(int value) { throw null; } + [System.CLSCompliantAttribute(false)] + public static bool IsValid(uint value) { throw null; } + public static bool IsWhiteSpace(System.Text.Rune value) { throw null; } + public static bool operator ==(System.Text.Rune left, System.Text.Rune right) { throw null; } + public static explicit operator System.Text.Rune(char ch) { throw null; } + public static explicit operator System.Text.Rune(int value) { throw null; } + [System.CLSCompliantAttribute(false)] + public static explicit operator System.Text.Rune(uint value) { throw null; } + public static bool operator >(System.Text.Rune left, System.Text.Rune right) { throw null; } + public static bool operator >=(System.Text.Rune left, System.Text.Rune right) { throw null; } + public static bool operator !=(System.Text.Rune left, System.Text.Rune right) { throw null; } + public static bool operator <(System.Text.Rune left, System.Text.Rune right) { throw null; } + public static bool operator <=(System.Text.Rune left, System.Text.Rune right) { throw null; } + public static System.Text.Rune ToLower(System.Text.Rune value, System.Globalization.CultureInfo culture) { throw null; } + public static System.Text.Rune ToLowerInvariant(System.Text.Rune value) { throw null; } + public override string ToString() { throw null; } + public static System.Text.Rune ToUpper(System.Text.Rune value, System.Globalization.CultureInfo culture) { throw null; } + public static System.Text.Rune ToUpperInvariant(System.Text.Rune value) { throw null; } + public static bool TryCreate(char highSurrogate, char lowSurrogate, out System.Text.Rune result) { throw null; } + public static bool TryCreate(char ch, out System.Text.Rune result) { throw null; } + public static bool TryCreate(int value, out System.Text.Rune result) { throw null; } + [System.CLSCompliantAttribute(false)] + public static bool TryCreate(uint value, out System.Text.Rune result) { throw null; } + public bool TryEncodeToUtf16(System.Span destination, out int charsWritten) { throw null; } + public bool TryEncodeToUtf8(System.Span destination, out int bytesWritten) { throw null; } + public static bool TryGetRuneAt(string input, int index, out System.Text.Rune value) { throw null; } + } +} From 9a2168d4b0f23c9388ddded4165bdc8de62ed232 Mon Sep 17 00:00:00 2001 From: Eric Erhardt Date: Fri, 28 Feb 2020 10:15:41 -0600 Subject: [PATCH 02/26] Move Utf8String code from coreclr to libraries --- .../System.Private.CoreLib/System.Private.CoreLib.csproj | 3 +++ .../src/System.Private.CoreLib.Shared.projitems | 6 +++++- 2 files changed, 8 insertions(+), 1 deletion(-) diff --git a/src/coreclr/src/System.Private.CoreLib/System.Private.CoreLib.csproj b/src/coreclr/src/System.Private.CoreLib/System.Private.CoreLib.csproj index 653e61bf4d8aa..a147e8d3bbe8e 100644 --- a/src/coreclr/src/System.Private.CoreLib/System.Private.CoreLib.csproj +++ b/src/coreclr/src/System.Private.CoreLib/System.Private.CoreLib.csproj @@ -298,9 +298,12 @@ Common\Interop\Windows\OleAut32\Interop.SysAllocStringByteLen.cs +<<<<<<< HEAD +======= +>>>>>>> Move Utf8String code from coreclr to libraries diff --git a/src/libraries/System.Private.CoreLib/src/System.Private.CoreLib.Shared.projitems b/src/libraries/System.Private.CoreLib/src/System.Private.CoreLib.Shared.projitems index 62e69dd3b3663..8c1b6b731815c 100644 --- a/src/libraries/System.Private.CoreLib/src/System.Private.CoreLib.Shared.projitems +++ b/src/libraries/System.Private.CoreLib/src/System.Private.CoreLib.Shared.projitems @@ -1827,4 +1827,8 @@ - \ No newline at end of file +<<<<<<< HEAD + +======= + +>>>>>>> Move Utf8String code from coreclr to libraries From f296c30a5bf3935af56e1fe0cb6af98bf36baa5e Mon Sep 17 00:00:00 2001 From: Eric Erhardt Date: Fri, 28 Feb 2020 16:08:47 -0600 Subject: [PATCH 03/26] Get Rune working on netstandard2.0 --- .../src/System/Char8.cs | 2 +- .../src/System/Text/Rune.cs | 76 ++++++++++-- .../src/Resources/Strings.resx | 9 ++ .../src/System.Utf8String.Experimental.csproj | 112 +++++++++++++++++- .../System/Globalization/GlobalizationMode.cs | 11 ++ .../src/System/Text/ThrowHelper.cs | 55 +++++++++ 6 files changed, 248 insertions(+), 17 deletions(-) create mode 100644 src/libraries/System.Utf8String.Experimental/src/System/Globalization/GlobalizationMode.cs create mode 100644 src/libraries/System.Utf8String.Experimental/src/System/Text/ThrowHelper.cs diff --git a/src/libraries/System.Private.CoreLib/src/System/Char8.cs b/src/libraries/System.Private.CoreLib/src/System/Char8.cs index ba22e91944b83..70ecdd0dda408 100644 --- a/src/libraries/System.Private.CoreLib/src/System/Char8.cs +++ b/src/libraries/System.Private.CoreLib/src/System/Char8.cs @@ -5,7 +5,7 @@ namespace System { /// - /// Represents a UTF-8 code unit, the elemental type of . + /// Represents a UTF-8 code unit, the elemental type of TODO cref Utf8String. /// public readonly struct Char8 : IComparable, IEquatable { diff --git a/src/libraries/System.Private.CoreLib/src/System/Text/Rune.cs b/src/libraries/System.Private.CoreLib/src/System/Text/Rune.cs index fd3ac737eae97..e0e1a4e25fc1b 100644 --- a/src/libraries/System.Private.CoreLib/src/System/Text/Rune.cs +++ b/src/libraries/System.Private.CoreLib/src/System/Text/Rune.cs @@ -20,6 +20,10 @@ namespace System.Text [DebuggerDisplay("{DebuggerDisplay,nq}")] public readonly struct Rune : IComparable, IEquatable { + private const char HIGH_SURROGATE_START = '\ud800'; + private const char LOW_SURROGATE_START = '\udc00'; + private const int HIGH_SURROGATE_RANGE = 0x3FF; + private const byte IsWhiteSpaceFlag = 0x80; private const byte IsLetterOrDigitFlag = 0x40; private const byte UnicodeCategoryMask = 0x1F; @@ -175,10 +179,10 @@ private Rune(uint scalarValue, bool unused) /// public int Value => (int)_value; - private static Rune ChangeCaseCultureAware(Rune rune, TextInfo textInfo, bool toUpper) + private static Rune ChangeCaseCultureAware(Rune rune, CultureInfo culture, bool toUpper) { Debug.Assert(!GlobalizationMode.Invariant, "This should've been checked by the caller."); - Debug.Assert(textInfo != null, "This should've been checked by the caller."); + Debug.Assert(culture != null, "This should've been checked by the caller."); Span original = stackalloc char[2]; // worst case scenario = 2 code units (for a surrogate pair) Span modified = stackalloc char[2]; // case change should preserve UTF-16 code unit count @@ -187,14 +191,25 @@ private static Rune ChangeCaseCultureAware(Rune rune, TextInfo textInfo, bool to original = original.Slice(0, charCount); modified = modified.Slice(0, charCount); +#if CORECLR if (toUpper) { - textInfo.ChangeCaseToUpper(original, modified); + culture!.TextInfo.ChangeCaseToUpper(original, modified); } else { - textInfo.ChangeCaseToLower(original, modified); + culture!.TextInfo.ChangeCaseToLower(original, modified); } +#else + if (toUpper) + { + MemoryExtensions.ToUpper(original, modified, culture); + } + else + { + MemoryExtensions.ToLower(original, modified, culture); + } +#endif // We use simple case folding rules, which disallows moving between the BMP and supplementary // planes when performing a case conversion. The helper methods which reconstruct a Rune @@ -827,6 +842,7 @@ private static int ReadRuneFromString(string input, int index) /// public override string ToString() { +#if CORECLR if (IsBmp) { return string.CreateFromChar((char)_value); @@ -836,6 +852,25 @@ public override string ToString() UnicodeUtility.GetUtf16SurrogatesFromSupplementaryPlaneScalar(_value, out char high, out char low); return string.CreateFromChar(high, low); } +#else + if (IsBmp) + { + + return ((char)_value).ToString(); + } + else + { + Span buffer = stackalloc char[2]; + UnicodeUtility.GetUtf16SurrogatesFromSupplementaryPlaneScalar(_value, out buffer[0], out buffer[1]); + unsafe + { + fixed (char* pBuffer = buffer) + { + return new string(pBuffer, 0, 2); + } + } + } +#endif } /// @@ -865,17 +900,17 @@ public static bool TryCreate(char highSurrogate, char lowSurrogate, out Rune res // First, extend both to 32 bits, then calculate the offset of // each candidate surrogate char from the start of its range. - uint highSurrogateOffset = (uint)highSurrogate - CharUnicodeInfo.HIGH_SURROGATE_START; - uint lowSurrogateOffset = (uint)lowSurrogate - CharUnicodeInfo.LOW_SURROGATE_START; + uint highSurrogateOffset = (uint)highSurrogate - HIGH_SURROGATE_START; + uint lowSurrogateOffset = (uint)lowSurrogate - LOW_SURROGATE_START; // This is a single comparison which allows us to check both for validity at once since // both the high surrogate range and the low surrogate range are the same length. // If the comparison fails, we call to a helper method to throw the correct exception message. - if ((highSurrogateOffset | lowSurrogateOffset) <= CharUnicodeInfo.HIGH_SURROGATE_RANGE) + if ((highSurrogateOffset | lowSurrogateOffset) <= HIGH_SURROGATE_RANGE) { // The 0x40u << 10 below is to account for uuuuu = wwww + 1 in the surrogate encoding. - result = UnsafeCreate((highSurrogateOffset << 10) + ((uint)lowSurrogate - CharUnicodeInfo.LOW_SURROGATE_START) + (0x40u << 10)); + result = UnsafeCreate((highSurrogateOffset << 10) + ((uint)lowSurrogate - LOW_SURROGATE_START) + (0x40u << 10)); return true; } else @@ -1070,7 +1105,12 @@ public static double GetNumericValue(Rune value) else { // not an ASCII char; fall back to globalization table +#if CORECLR return CharUnicodeInfo.GetNumericValue(value.Value); +#else + // TODO: figure out a better way than allocating here + return CharUnicodeInfo.GetNumericValue(value.ToString(), 0); +#endif } } @@ -1089,7 +1129,12 @@ public static UnicodeCategory GetUnicodeCategory(Rune value) private static UnicodeCategory GetUnicodeCategoryNonAscii(Rune value) { Debug.Assert(!value.IsAscii, "Shouldn't use this non-optimized code path for ASCII characters."); +#if CORECLR return CharUnicodeInfo.GetUnicodeCategory(value.Value); +#else + // TODO: figure out a better way than allocating here + return CharUnicodeInfo.GetUnicodeCategory(value.ToString(), 0); +#endif } // Returns true iff this Unicode category represents a letter @@ -1240,7 +1285,12 @@ public static bool IsWhiteSpace(Rune value) // Only BMP code points can be white space, so only call into CharUnicodeInfo // if the incoming value is within the BMP. - return value.IsBmp && CharUnicodeInfo.GetIsWhiteSpace((char)value._value); + return value.IsBmp && +#if CORECLR + CharUnicodeInfo.GetIsWhiteSpace((char)value._value); +#else + char.IsWhiteSpace((char)value._value); +#endif } public static Rune ToLower(Rune value, CultureInfo culture) @@ -1259,7 +1309,7 @@ public static Rune ToLower(Rune value, CultureInfo culture) return ToLowerInvariant(value); } - return ChangeCaseCultureAware(value, culture!.TextInfo, toUpper: false); + return ChangeCaseCultureAware(value, culture, toUpper: false); } public static Rune ToLowerInvariant(Rune value) @@ -1283,7 +1333,7 @@ public static Rune ToLowerInvariant(Rune value) // Non-ASCII data requires going through the case folding tables. - return ChangeCaseCultureAware(value, TextInfo.Invariant, toUpper: false); + return ChangeCaseCultureAware(value, CultureInfo.InvariantCulture, toUpper: false); } public static Rune ToUpper(Rune value, CultureInfo culture) @@ -1302,7 +1352,7 @@ public static Rune ToUpper(Rune value, CultureInfo culture) return ToUpperInvariant(value); } - return ChangeCaseCultureAware(value, culture!.TextInfo, toUpper: true); + return ChangeCaseCultureAware(value, culture, toUpper: true); } public static Rune ToUpperInvariant(Rune value) @@ -1326,7 +1376,7 @@ public static Rune ToUpperInvariant(Rune value) // Non-ASCII data requires going through the case folding tables. - return ChangeCaseCultureAware(value, TextInfo.Invariant, toUpper: true); + return ChangeCaseCultureAware(value, CultureInfo.InvariantCulture, toUpper: true); } } } diff --git a/src/libraries/System.Utf8String.Experimental/src/Resources/Strings.resx b/src/libraries/System.Utf8String.Experimental/src/Resources/Strings.resx index 1af7de150c99c..d75675748c34a 100644 --- a/src/libraries/System.Utf8String.Experimental/src/Resources/Strings.resx +++ b/src/libraries/System.Utf8String.Experimental/src/Resources/Strings.resx @@ -117,4 +117,13 @@ System.Resources.ResXResourceWriter, System.Windows.Forms, Version=4.0.0.0, Culture=neutral, PublicKeyToken=b77a5c561934e089 + + Index was out of range. Must be non-negative and less than the size of the collection. + + + Cannot extract a Unicode scalar value from the specified index in the input. + + + Destination is too short. + \ No newline at end of file diff --git a/src/libraries/System.Utf8String.Experimental/src/System.Utf8String.Experimental.csproj b/src/libraries/System.Utf8String.Experimental/src/System.Utf8String.Experimental.csproj index fb6e385f0988b..13b9b5031918e 100644 --- a/src/libraries/System.Utf8String.Experimental/src/System.Utf8String.Experimental.csproj +++ b/src/libraries/System.Utf8String.Experimental/src/System.Utf8String.Experimental.csproj @@ -1,15 +1,121 @@ - + true +<<<<<<< HEAD true $(NetCoreAppCurrent)-Windows_NT;$(NetCoreAppCurrent)-Unix enable +======= + true + + netstandard2.0;$(NetCoreAppCurrent)-Windows_NT + enable + $(DefineContants);FEATURE_UTF8STRING + + false +>>>>>>> Get Rune working on netstandard2.0 - + + - + + + + + System\Char8.cs + + + System\Text\UnicodeDebug.cs + + + System\Text\UnicodeUtility.cs + + + System\Text\Unicode\Utf16Utility.cs + + + System\Text\Rune.cs + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/src/libraries/System.Utf8String.Experimental/src/System/Globalization/GlobalizationMode.cs b/src/libraries/System.Utf8String.Experimental/src/System/Globalization/GlobalizationMode.cs new file mode 100644 index 0000000000000..2f653ee75e970 --- /dev/null +++ b/src/libraries/System.Utf8String.Experimental/src/System/Globalization/GlobalizationMode.cs @@ -0,0 +1,11 @@ +// Licensed to the .NET Foundation under one or more agreements. +// The .NET Foundation licenses this file to you under the MIT license. +// See the LICENSE file in the project root for more information. + +namespace System.Globalization +{ + internal static partial class GlobalizationMode + { + internal static bool Invariant { get; } = false; // TODO: should we enable this? + } +} diff --git a/src/libraries/System.Utf8String.Experimental/src/System/Text/ThrowHelper.cs b/src/libraries/System.Utf8String.Experimental/src/System/Text/ThrowHelper.cs new file mode 100644 index 0000000000000..ed319b71c5693 --- /dev/null +++ b/src/libraries/System.Utf8String.Experimental/src/System/Text/ThrowHelper.cs @@ -0,0 +1,55 @@ +// Licensed to the .NET Foundation under one or more agreements. +// The .NET Foundation licenses this file to you under the MIT license. +// See the LICENSE file in the project root for more information. + +using System.Diagnostics.CodeAnalysis; +using System.Runtime.CompilerServices; + +namespace System +{ + internal static class ThrowHelper + { + internal static void ThrowArgumentNullException(ExceptionArgument argument) { throw CreateArgumentNullException(argument); } + [MethodImpl(MethodImplOptions.NoInlining)] + private static Exception CreateArgumentNullException(ExceptionArgument argument) { return new ArgumentNullException(argument.ToString()); } + + internal static void ThrowArgumentOutOfRangeException(ExceptionArgument argument) { throw CreateArgumentOutOfRangeException(argument); } + [MethodImpl(MethodImplOptions.NoInlining)] + private static Exception CreateArgumentOutOfRangeException(ExceptionArgument argument) { return new ArgumentOutOfRangeException(argument.ToString()); } + + [DoesNotReturn] + internal static void ThrowArgumentException_DestinationTooShort() + { + throw new ArgumentException(SR.Argument_DestinationTooShort, "destination"); + } + + [DoesNotReturn] + internal static void ThrowArgumentException_CannotExtractScalar(ExceptionArgument argument) + { + throw new ArgumentException(SR.Argument_CannotExtractScalar, argument.ToString()); + } + + internal static void ThrowArgumentOutOfRange_IndexException() + { + throw GetArgumentOutOfRangeException(ExceptionArgument.index, + SR.ArgumentOutOfRange_Index); + } + + private static ArgumentOutOfRangeException GetArgumentOutOfRangeException(ExceptionArgument argument, string resource) + { + return new ArgumentOutOfRangeException(argument.ToString(), resource); + } + } + + // + // The convention for this enum is using the argument name as the enum name + // + internal enum ExceptionArgument + { + ch, + culture, + index, + input, + value, + } +} From aa1e4c82ca9e54fb42dd5d7713dc84bafefcf6ab Mon Sep 17 00:00:00 2001 From: Eric Erhardt Date: Fri, 28 Feb 2020 21:56:09 -0600 Subject: [PATCH 04/26] Get Utf8Span compiling on netstandard2.0. --- .../System.Private.CoreLib.Shared.projitems | 1 + .../src/System/Numerics/BitOperations.cs | 2 - .../src/System/Text/ASCIIUtility.cs | 1 - .../Text/Unicode/Utf8Utility.Helpers.cs | 1 - .../Text/Unicode/Utf8Utility.Validation.cs | 2 +- .../Text/Unicode/Utf8Utility.WhiteSpace.cs | 103 ++++++++++++++---- .../src/System/Text/Unicode/Utf8Utility.cs | 84 +++++++------- .../src/System/Text/Utf8Span.cs | 100 +++++++++-------- .../src/System/Text/Utf8Span.netcoreapp.cs | 43 ++++++++ .../src/System/Text/Utf8StringComparer.cs | 78 ++++++------- .../src/Resources/Strings.resx | 6 + .../src/System.Utf8String.Experimental.csproj | 48 +++++--- .../Runtime/Intrinsics/Intrinsics.Shims.cs | 92 ++++++++++++++++ .../src/System/Text/ThrowHelper.cs | 5 + src/libraries/shims/ApiCompat.proj | 11 -- 15 files changed, 402 insertions(+), 175 deletions(-) create mode 100644 src/libraries/System.Private.CoreLib/src/System/Text/Utf8Span.netcoreapp.cs create mode 100644 src/libraries/System.Utf8String.Experimental/src/System/Runtime/Intrinsics/Intrinsics.Shims.cs diff --git a/src/libraries/System.Private.CoreLib/src/System.Private.CoreLib.Shared.projitems b/src/libraries/System.Private.CoreLib/src/System.Private.CoreLib.Shared.projitems index 8c1b6b731815c..1cc55a46be446 100644 --- a/src/libraries/System.Private.CoreLib/src/System.Private.CoreLib.Shared.projitems +++ b/src/libraries/System.Private.CoreLib/src/System.Private.CoreLib.Shared.projitems @@ -1824,6 +1824,7 @@ + diff --git a/src/libraries/System.Private.CoreLib/src/System/Numerics/BitOperations.cs b/src/libraries/System.Private.CoreLib/src/System/Numerics/BitOperations.cs index 5510659216d30..8385687e86d51 100644 --- a/src/libraries/System.Private.CoreLib/src/System/Numerics/BitOperations.cs +++ b/src/libraries/System.Private.CoreLib/src/System/Numerics/BitOperations.cs @@ -6,8 +6,6 @@ using System.Runtime.InteropServices; using System.Runtime.Intrinsics.X86; -using Internal.Runtime.CompilerServices; - // Some routines inspired by the Stanford Bit Twiddling Hacks by Sean Eron Anderson: // http://graphics.stanford.edu/~seander/bithacks.html diff --git a/src/libraries/System.Private.CoreLib/src/System/Text/ASCIIUtility.cs b/src/libraries/System.Private.CoreLib/src/System/Text/ASCIIUtility.cs index b1f59d3b46090..63d392c9112be 100644 --- a/src/libraries/System.Private.CoreLib/src/System/Text/ASCIIUtility.cs +++ b/src/libraries/System.Private.CoreLib/src/System/Text/ASCIIUtility.cs @@ -7,7 +7,6 @@ using System.Runtime.CompilerServices; using System.Runtime.Intrinsics; using System.Runtime.Intrinsics.X86; -using Internal.Runtime.CompilerServices; #pragma warning disable SA1121 // explicitly using type aliases instead of built-in types #if TARGET_64BIT diff --git a/src/libraries/System.Private.CoreLib/src/System/Text/Unicode/Utf8Utility.Helpers.cs b/src/libraries/System.Private.CoreLib/src/System/Text/Unicode/Utf8Utility.Helpers.cs index 5cdf7574e82a3..2a199f94a1681 100644 --- a/src/libraries/System.Private.CoreLib/src/System/Text/Unicode/Utf8Utility.Helpers.cs +++ b/src/libraries/System.Private.CoreLib/src/System/Text/Unicode/Utf8Utility.Helpers.cs @@ -6,7 +6,6 @@ using System.Diagnostics; using System.Numerics; using System.Runtime.CompilerServices; -using Internal.Runtime.CompilerServices; namespace System.Text.Unicode { diff --git a/src/libraries/System.Private.CoreLib/src/System/Text/Unicode/Utf8Utility.Validation.cs b/src/libraries/System.Private.CoreLib/src/System/Text/Unicode/Utf8Utility.Validation.cs index 9f721d5408db5..ac9215c2e348b 100644 --- a/src/libraries/System.Private.CoreLib/src/System/Text/Unicode/Utf8Utility.Validation.cs +++ b/src/libraries/System.Private.CoreLib/src/System/Text/Unicode/Utf8Utility.Validation.cs @@ -5,7 +5,7 @@ using System.Diagnostics; using System.Numerics; using System.Runtime.Intrinsics.X86; -using Internal.Runtime.CompilerServices; +using System.Runtime.CompilerServices; #pragma warning disable SA1121 // explicitly using type aliases instead of built-in types #if TARGET_64BIT diff --git a/src/libraries/System.Private.CoreLib/src/System/Text/Unicode/Utf8Utility.WhiteSpace.cs b/src/libraries/System.Private.CoreLib/src/System/Text/Unicode/Utf8Utility.WhiteSpace.cs index 373f764efd7d0..57e65f71fe39d 100644 --- a/src/libraries/System.Private.CoreLib/src/System/Text/Unicode/Utf8Utility.WhiteSpace.cs +++ b/src/libraries/System.Private.CoreLib/src/System/Text/Unicode/Utf8Utility.WhiteSpace.cs @@ -3,9 +3,10 @@ // See the LICENSE file in the project root for more information. using System.Runtime.InteropServices; -using Internal.Runtime.CompilerServices; +using System.Runtime.CompilerServices; #pragma warning disable SA1121 // explicitly using type aliases instead of built-in types +#if CORECLR #if TARGET_64BIT using nint = System.Int64; using nuint = System.UInt64; @@ -13,11 +14,16 @@ using nint = System.Int32; using nuint = System.UInt32; #endif +#else +using nint = System.Int64; +using nuint = System.UInt64; +#endif namespace System.Text.Unicode { internal static partial class Utf8Utility { +#if CORECLR /// /// Returns the index in where the first non-whitespace character /// appears, or the input length if the data contains only whitespace characters. @@ -40,12 +46,12 @@ private static nuint GetIndexOfFirstNonWhiteSpaceChar(ref byte utf8Data, nuint l // Very quick check: see if the byte is in the range [ 21 .. 7F ]. // If so, we can skip the more expensive logic later in this method. - if ((sbyte)Unsafe.AddByteOffset(ref utf8Data, i) > (sbyte)0x20) + if ((sbyte)Unsafe.AddByteOffset(ref utf8Data, (IntPtr)i) > (sbyte)0x20) //TODO: remove IntPtr cast { break; } - uint possibleAsciiByte = Unsafe.AddByteOffset(ref utf8Data, i); + uint possibleAsciiByte = Unsafe.AddByteOffset(ref utf8Data, (IntPtr)i); //TODO: remove IntPtr cast if (UnicodeUtility.IsAsciiCodePoint(possibleAsciiByte)) { // The simple comparison failed. Let's read the actual byte value, @@ -76,34 +82,31 @@ private static nuint GetIndexOfFirstNonWhiteSpaceChar(ref byte utf8Data, nuint l return i; } - +#else /// - /// Returns the index in where the trailing whitespace sequence - /// begins, or 0 if the data contains only whitespace characters, or the span length if the - /// data does not end with any whitespace characters. + /// Returns the index in where the first non-whitespace character + /// appears, or the input length if the data contains only whitespace characters. /// - public static int GetIndexOfTrailingWhiteSpaceSequence(ReadOnlySpan utf8Data) - { - return (int)GetIndexOfTrailingWhiteSpaceSequence(ref MemoryMarshal.GetReference(utf8Data), (uint)utf8Data.Length); - } - - private static nuint GetIndexOfTrailingWhiteSpaceSequence(ref byte utf8Data, nuint length) + public static int GetIndexOfFirstNonWhiteSpaceChar(ReadOnlySpan utf8Data) { // This method is optimized for the case where the input data is ASCII, and if the // data does need to be trimmed it's likely that only a relatively small number of // bytes will be trimmed. - while (length > 0) + int i = 0; + int length = utf8Data.Length; + + while (i < length) { // Very quick check: see if the byte is in the range [ 21 .. 7F ]. // If so, we can skip the more expensive logic later in this method. - if ((sbyte)Unsafe.Add(ref Unsafe.AddByteOffset(ref utf8Data, length), -1) > (sbyte)0x20) + if (utf8Data[i] > (sbyte)0x20) { break; } - uint possibleAsciiByte = Unsafe.Add(ref Unsafe.AddByteOffset(ref utf8Data, length), -1); + uint possibleAsciiByte = utf8Data[i]; if (UnicodeUtility.IsAsciiCodePoint(possibleAsciiByte)) { // The simple comparison failed. Let's read the actual byte value, @@ -112,7 +115,7 @@ private static nuint GetIndexOfTrailingWhiteSpaceSequence(ref byte utf8Data, nui if (Rune.IsWhiteSpace(Rune.UnsafeCreate(possibleAsciiByte))) { - length--; + i++; continue; } } @@ -121,10 +124,10 @@ private static nuint GetIndexOfTrailingWhiteSpaceSequence(ref byte utf8Data, nui // Not ASCII data. Go back to the slower "decode the entire scalar" // code path, then compare it against our Unicode tables. - Rune.DecodeLastFromUtf8(new ReadOnlySpan(ref utf8Data, (int)length), out Rune decodedRune, out int bytesConsumed); + Rune.DecodeFromUtf8(utf8Data.Slice(i), out Rune decodedRune, out int bytesConsumed); if (Rune.IsWhiteSpace(decodedRune)) { - length -= (uint)bytesConsumed; + i += bytesConsumed; continue; } } @@ -132,7 +135,67 @@ private static nuint GetIndexOfTrailingWhiteSpaceSequence(ref byte utf8Data, nui break; // If we got here, we saw a non-whitespace subsequence. } - return length; + return i; } +#endif + + //TODO: eerhardt + ///// + ///// Returns the index in where the trailing whitespace sequence + ///// begins, or 0 if the data contains only whitespace characters, or the span length if the + ///// data does not end with any whitespace characters. + ///// + //public static int GetIndexOfTrailingWhiteSpaceSequence(ReadOnlySpan utf8Data) + //{ + // return (int)GetIndexOfTrailingWhiteSpaceSequence(ref MemoryMarshal.GetReference(utf8Data), (uint)utf8Data.Length); + //} + + //private static nuint GetIndexOfTrailingWhiteSpaceSequence(ref byte utf8Data, nuint length) + //{ + // // This method is optimized for the case where the input data is ASCII, and if the + // // data does need to be trimmed it's likely that only a relatively small number of + // // bytes will be trimmed. + + // while (length > 0) + // { + // // Very quick check: see if the byte is in the range [ 21 .. 7F ]. + // // If so, we can skip the more expensive logic later in this method. + + // if ((sbyte)Unsafe.Add(ref Unsafe.AddByteOffset(ref utf8Data, (IntPtr)length), -1) > (sbyte)0x20) //TODO: remove IntPtr cast + // { + // break; + // } + + // uint possibleAsciiByte = Unsafe.Add(ref Unsafe.AddByteOffset(ref utf8Data, (IntPtr)length), -1); //TODO: remove IntPtr cast + // if (UnicodeUtility.IsAsciiCodePoint(possibleAsciiByte)) + // { + // // The simple comparison failed. Let's read the actual byte value, + // // and if it's ASCII we can delegate to Rune's inlined method + // // implementation. + + // if (Rune.IsWhiteSpace(Rune.UnsafeCreate(possibleAsciiByte))) + // { + // length--; + // continue; + // } + // } + // else + // { + // // Not ASCII data. Go back to the slower "decode the entire scalar" + // // code path, then compare it against our Unicode tables. + + // Rune.DecodeLastFromUtf8(new ReadOnlySpan(ref utf8Data, (int)length), out Rune decodedRune, out int bytesConsumed); + // if (Rune.IsWhiteSpace(decodedRune)) + // { + // length -= (uint)bytesConsumed; + // continue; + // } + // } + + // break; // If we got here, we saw a non-whitespace subsequence. + // } + + // return length; + //} } } diff --git a/src/libraries/System.Private.CoreLib/src/System/Text/Unicode/Utf8Utility.cs b/src/libraries/System.Private.CoreLib/src/System/Text/Unicode/Utf8Utility.cs index bb6853b072e58..1ad517a4599ab 100644 --- a/src/libraries/System.Private.CoreLib/src/System/Text/Unicode/Utf8Utility.cs +++ b/src/libraries/System.Private.CoreLib/src/System/Text/Unicode/Utf8Utility.cs @@ -7,7 +7,6 @@ using System.IO; using System.Runtime.CompilerServices; using System.Runtime.InteropServices; -using Internal.Runtime.CompilerServices; namespace System.Text.Unicode { @@ -58,55 +57,56 @@ public static unsafe bool IsWellFormedUtf8(ReadOnlySpan utf8Data) } } - /// - /// Returns if it is null or contains only well-formed UTF-8 data; - /// otherwises allocates a new instance containing the same data as - /// but where all invalid UTF-8 sequences have been replaced - /// with U+FFFD. - /// - public static Utf8String ValidateAndFixupUtf8String(Utf8String value) - { - if (value.Length == 0) - { - return value; - } + //TODO: eerhardt + ///// + ///// Returns if it is null or contains only well-formed UTF-8 data; + ///// otherwises allocates a new instance containing the same data as + ///// but where all invalid UTF-8 sequences have been replaced + ///// with U+FFFD. + ///// + //public static Utf8String ValidateAndFixupUtf8String(Utf8String value) + //{ + // if (value.Length == 0) + // { + // return value; + // } - ReadOnlySpan valueAsBytes = value.AsBytes(); + // ReadOnlySpan valueAsBytes = value.AsBytes(); - int idxOfFirstInvalidData = GetIndexOfFirstInvalidUtf8Sequence(valueAsBytes, out _); - if (idxOfFirstInvalidData < 0) - { - return value; - } + // int idxOfFirstInvalidData = GetIndexOfFirstInvalidUtf8Sequence(valueAsBytes, out _); + // if (idxOfFirstInvalidData < 0) + // { + // return value; + // } - // TODO_UTF8STRING: Replace this with the faster implementation once it's available. - // (The faster implementation is in the dev/utf8string_bak branch currently.) + // // TODO_UTF8STRING: Replace this with the faster implementation once it's available. + // // (The faster implementation is in the dev/utf8string_bak branch currently.) - MemoryStream memStream = new MemoryStream(); - memStream.Write(valueAsBytes.Slice(0, idxOfFirstInvalidData)); + // MemoryStream memStream = new MemoryStream(); + // memStream.Write(valueAsBytes.Slice(0, idxOfFirstInvalidData)); - valueAsBytes = valueAsBytes.Slice(idxOfFirstInvalidData); - do - { - if (Rune.DecodeFromUtf8(valueAsBytes, out _, out int bytesConsumed) == OperationStatus.Done) - { - // Valid scalar value - copy data as-is to MemoryStream - memStream.Write(valueAsBytes.Slice(0, bytesConsumed)); - } - else - { - // Invalid scalar value - copy U+FFFD to MemoryStream - memStream.Write(ReplacementCharSequence); - } + // valueAsBytes = valueAsBytes.Slice(idxOfFirstInvalidData); + // do + // { + // if (Rune.DecodeFromUtf8(valueAsBytes, out _, out int bytesConsumed) == OperationStatus.Done) + // { + // // Valid scalar value - copy data as-is to MemoryStream + // memStream.Write(valueAsBytes.Slice(0, bytesConsumed)); + // } + // else + // { + // // Invalid scalar value - copy U+FFFD to MemoryStream + // memStream.Write(ReplacementCharSequence); + // } - valueAsBytes = valueAsBytes.Slice(bytesConsumed); - } while (!valueAsBytes.IsEmpty); + // valueAsBytes = valueAsBytes.Slice(bytesConsumed); + // } while (!valueAsBytes.IsEmpty); - bool success = memStream.TryGetBuffer(out ArraySegment memStreamBuffer); - Debug.Assert(success, "Couldn't get underlying MemoryStream buffer."); + // bool success = memStream.TryGetBuffer(out ArraySegment memStreamBuffer); + // Debug.Assert(success, "Couldn't get underlying MemoryStream buffer."); - return Utf8String.UnsafeCreateWithoutValidation(memStreamBuffer); - } + // return Utf8String.UnsafeCreateWithoutValidation(memStreamBuffer); + //} #endif // FEATURE_UTF8STRING } } diff --git a/src/libraries/System.Private.CoreLib/src/System/Text/Utf8Span.cs b/src/libraries/System.Private.CoreLib/src/System/Text/Utf8Span.cs index 5c9ba2c589a59..c5b69f065bc7d 100644 --- a/src/libraries/System.Private.CoreLib/src/System/Text/Utf8Span.cs +++ b/src/libraries/System.Private.CoreLib/src/System/Text/Utf8Span.cs @@ -8,11 +8,12 @@ using System.Runtime.CompilerServices; using System.Runtime.InteropServices; using System.Text.Unicode; -using Internal.Runtime.CompilerServices; +//using Internal.Runtime.CompilerServices; #pragma warning disable 0809 //warning CS0809: Obsolete member 'Utf8Span.Equals(object)' overrides non-obsolete member 'object.Equals(object)' #pragma warning disable SA1121 // explicitly using type aliases instead of built-in types +#if CORECLR #if TARGET_64BIT using nint = System.Int64; using nuint = System.UInt64; @@ -20,20 +21,25 @@ using nint = System.Int32; using nuint = System.UInt32; #endif +#else +using nint = System.Int64; +using nuint = System.UInt64; +#endif namespace System.Text { [StructLayout(LayoutKind.Auto)] public readonly ref partial struct Utf8Span { - /// - /// Creates a from an existing instance. - /// - [MethodImpl(MethodImplOptions.AggressiveInlining)] - public Utf8Span(Utf8String? value) - { - Bytes = Utf8Extensions.AsBytes(value); - } + //TODO: eerhardt + ///// + ///// Creates a from an existing instance. + ///// + //[MethodImpl(MethodImplOptions.AggressiveInlining)] + //public Utf8Span(Utf8String? value) + //{ + // Bytes = Utf8Extensions.AsBytes(value); + //} /// /// Ctor for internal use only. Caller _must_ validate both invariants hold: @@ -63,31 +69,6 @@ private Utf8Span(ReadOnlySpan rawData) /// public int Length => Bytes.Length; - public Utf8Span this[Range range] - { - get - { - (int offset, int length) = range.GetOffsetAndLength(Length); - - // Check for a split across a multi-byte subsequence on the way out. - // Reminder: Unlike Utf8String, we can't safely dereference past the end of the span. - - ref byte newRef = ref DangerousGetMutableReference(offset); - if (length > 0 && Utf8Utility.IsUtf8ContinuationByte(newRef)) - { - Utf8String.ThrowImproperStringSplit(); - } - - int endIdx = offset + length; - if (endIdx < Length && Utf8Utility.IsUtf8ContinuationByte(DangerousGetMutableReference(endIdx))) - { - Utf8String.ThrowImproperStringSplit(); - } - - return UnsafeCreateWithoutValidation(new ReadOnlySpan(ref newRef, length)); - } - } - /// /// Returns a mutable reference to the first byte of this /// (or, if this is empty, to where the first byte would be). @@ -117,7 +98,7 @@ internal ref byte DangerousGetMutableReference(nuint index) // Allow retrieving references to just past the end of the span (but shouldn't dereference this). Debug.Assert(index <= (uint)Length, "Caller should've performed bounds checking."); - return ref Unsafe.AddByteOffset(ref DangerousGetMutableReference(), index); + return ref Unsafe.AddByteOffset(ref DangerousGetMutableReference(), (IntPtr)index); //TODO: nuint - remove cast } public bool IsEmptyOrWhiteSpace() => (Utf8Utility.GetIndexOfFirstNonWhiteSpaceChar(Bytes) == Length); @@ -156,7 +137,11 @@ public override int GetHashCode() // UTF-8 textual data, not over arbitrary binary sequences. ulong seed = Marvin.DefaultSeed; +#if CORECLR return Marvin.ComputeHash32(ref MemoryMarshal.GetReference(Bytes), (uint)Length /* in bytes */, (uint)seed, (uint)(seed >> 32)); +#else + return Marvin.ComputeHash32(Bytes, seed); +#endif } public int GetHashCode(StringComparison comparison) @@ -204,7 +189,7 @@ public bool IsNormalized(NormalizationForm normalizationForm = NormalizationForm /// /// Gets an immutable reference that can be used in a statement. Unlike - /// , the resulting reference is not guaranteed to be null-terminated. + /// TODO cref="Utf8String"/>, the resulting reference is not guaranteed to be null-terminated. /// /// /// If this instance is empty, returns . Dereferencing @@ -225,7 +210,20 @@ public override string ToString() // TODO_UTF8STRING: Since we know the underlying data is immutable, well-formed UTF-8, // we can perform transcoding using an optimized code path that skips all safety checks. +#if CORECLR || NETCOREAPP return Encoding.UTF8.GetString(Bytes); +#else + byte[] buffer = ArrayPool.Shared.Rent(Length); + try + { + Bytes.CopyTo(buffer); + return Encoding.UTF8.GetString(buffer, 0, Length); + } + finally + { + ArrayPool.Shared.Return(buffer); + } +#endif } /// @@ -253,23 +251,39 @@ internal unsafe string ToStringNoReplacement() int utf16CharCount = Length + utf16CodeUnitCountAdjustment; Debug.Assert(utf16CharCount <= Length && utf16CharCount >= 0); +#if CORECLR || NETCOREAPP // TODO_UTF8STRING: Can we call string.FastAllocate directly? - return string.Create(utf16CharCount, (pbData: (IntPtr)pData, cbData: Length), (chars, state) => { OperationStatus status = Utf8.ToUtf16(new ReadOnlySpan((byte*)state.pbData, state.cbData), chars, out _, out _, replaceInvalidSequences: false); Debug.Assert(status == OperationStatus.Done, "Did somebody mutate this Utf8String instance unexpectedly?"); }); +#else + char[] buffer = ArrayPool.Shared.Rent(utf16CharCount); + try + { + fixed (char* pBuffer = buffer) + { + Encoding.UTF8.GetChars(pData, Length, pBuffer, utf16CharCount); + return new string(pBuffer, 0, utf16CharCount); + } + } + finally + { + ArrayPool.Shared.Return(buffer); + } +#endif } } - public Utf8String ToUtf8String() - { - // TODO_UTF8STRING: Since we know the underlying data is immutable, well-formed UTF-8, - // we can perform transcoding using an optimized code path that skips all safety checks. + //TODO eerhardt + //public Utf8String ToUtf8String() + //{ + // // TODO_UTF8STRING: Since we know the underlying data is immutable, well-formed UTF-8, + // // we can perform transcoding using an optimized code path that skips all safety checks. - return Utf8String.UnsafeCreateWithoutValidation(Bytes); - } + // return Utf8String.UnsafeCreateWithoutValidation(Bytes); + //} /// /// Wraps a instance around the provided , diff --git a/src/libraries/System.Private.CoreLib/src/System/Text/Utf8Span.netcoreapp.cs b/src/libraries/System.Private.CoreLib/src/System/Text/Utf8Span.netcoreapp.cs new file mode 100644 index 0000000000000..111060238f554 --- /dev/null +++ b/src/libraries/System.Private.CoreLib/src/System/Text/Utf8Span.netcoreapp.cs @@ -0,0 +1,43 @@ +// Licensed to the .NET Foundation under one or more agreements. +// The .NET Foundation licenses this file to you under the MIT license. +// See the LICENSE file in the project root for more information. + +using System.Buffers; +using System.ComponentModel; +using System.Diagnostics; +using System.Runtime.CompilerServices; +using System.Runtime.InteropServices; +using System.Text.Unicode; +using Internal.Runtime.CompilerServices; + +namespace System.Text +{ + [StructLayout(LayoutKind.Auto)] + public readonly ref partial struct Utf8Span + { + public Utf8Span this[Range range] + { + get + { + (int offset, int length) = range.GetOffsetAndLength(Length); + + // Check for a split across a multi-byte subsequence on the way out. + // Reminder: Unlike Utf8String, we can't safely dereference past the end of the span. + + ref byte newRef = ref DangerousGetMutableReference(offset); + if (length > 0 && Utf8Utility.IsUtf8ContinuationByte(newRef)) + { + Utf8String.ThrowImproperStringSplit(); + } + + int endIdx = offset + length; + if (endIdx < Length && Utf8Utility.IsUtf8ContinuationByte(DangerousGetMutableReference(endIdx))) + { + Utf8String.ThrowImproperStringSplit(); + } + + return UnsafeCreateWithoutValidation(new ReadOnlySpan(ref newRef, length)); + } + } + } +} diff --git a/src/libraries/System.Private.CoreLib/src/System/Text/Utf8StringComparer.cs b/src/libraries/System.Private.CoreLib/src/System/Text/Utf8StringComparer.cs index 6a264b4cc02e0..de9fe7f338d0c 100644 --- a/src/libraries/System.Private.CoreLib/src/System/Text/Utf8StringComparer.cs +++ b/src/libraries/System.Private.CoreLib/src/System/Text/Utf8StringComparer.cs @@ -8,7 +8,7 @@ namespace System.Text { - internal abstract class Utf8StringComparer : IComparer, IEqualityComparer + internal abstract class Utf8StringComparer //TODO eerhardt : IComparer, IEqualityComparer { // Nobody except for nested classes can create instances of this type. private Utf8StringComparer() { } @@ -46,19 +46,19 @@ public static Utf8StringComparer FromComparison(StringComparison comparisonType) }; } - public abstract int Compare(Utf8String? x, Utf8String? y); + //public abstract int Compare(Utf8String? x, Utf8String? y); public abstract int Compare(Utf8Span x, Utf8Span y); - public abstract bool Equals(Utf8String? x, Utf8String? y); + //public abstract bool Equals(Utf8String? x, Utf8String? y); public abstract bool Equals(Utf8Span x, Utf8Span y); #pragma warning disable CS8614 // Remove warning disable when nullable attributes are respected - public abstract int GetHashCode(Utf8String obj); + //public abstract int GetHashCode(Utf8String obj); #pragma warning restore CS8614 public abstract int GetHashCode(Utf8Span obj); private sealed class CultureAwareComparer : Utf8StringComparer { - internal static readonly CultureAwareComparer Invariant = new CultureAwareComparer(CompareInfo.Invariant, CompareOptions.None); - internal static readonly CultureAwareComparer InvariantIgnoreCase = new CultureAwareComparer(CompareInfo.Invariant, CompareOptions.IgnoreCase); + internal static readonly CultureAwareComparer Invariant = new CultureAwareComparer(CultureInfo.InvariantCulture.CompareInfo, CompareOptions.None); + internal static readonly CultureAwareComparer InvariantIgnoreCase = new CultureAwareComparer(CultureInfo.InvariantCulture.CompareInfo, CompareOptions.IgnoreCase); private readonly CompareInfo _compareInfo; private readonly CompareOptions _options; @@ -71,12 +71,12 @@ internal CultureAwareComparer(CompareInfo compareInfo, CompareOptions options) _options = options; } - public override int Compare(Utf8String? x, Utf8String? y) - { - // TODO_UTF8STRING: Avoid the allocations below. + //public override int Compare(Utf8String? x, Utf8String? y) + //{ + // // TODO_UTF8STRING: Avoid the allocations below. - return _compareInfo.Compare(x?.ToString(), y?.ToString(), _options); - } + // return _compareInfo.Compare(x?.ToString(), y?.ToString(), _options); + //} public override int Compare(Utf8Span x, Utf8Span y) { @@ -85,15 +85,15 @@ public override int Compare(Utf8Span x, Utf8Span y) return _compareInfo.Compare(x.ToString(), y.ToString(), _options); } - public override bool Equals(Utf8String? x, Utf8String? y) => Compare(x, y) == 0; + //public override bool Equals(Utf8String? x, Utf8String? y) => Compare(x, y) == 0; public override bool Equals(Utf8Span x, Utf8Span y) => Compare(x, y) == 0; - public override int GetHashCode(Utf8String? obj) - { - // TODO_UTF8STRING: Avoid the allocations below. + //public override int GetHashCode(Utf8String? obj) + //{ + // // TODO_UTF8STRING: Avoid the allocations below. - return (obj is null) ? 0 : _compareInfo.GetHashCode(obj.ToString(), _options); - } + // return (obj is null) ? 0 : _compareInfo.GetHashCode(obj.ToString(), _options); + //} public override int GetHashCode(Utf8Span obj) { @@ -110,12 +110,12 @@ private sealed class OrdinalComparer : Utf8StringComparer // All accesses must be through the static factory. private OrdinalComparer() { } - public override int Compare(Utf8String? x, Utf8String? y) - { - // TODO_UTF8STRING: Avoid the allocations below. + //public override int Compare(Utf8String? x, Utf8String? y) + //{ + // // TODO_UTF8STRING: Avoid the allocations below. - return string.CompareOrdinal(x?.ToString(), y?.ToString()); - } + // return string.CompareOrdinal(x?.ToString(), y?.ToString()); + //} public override int Compare(Utf8Span x, Utf8Span y) { @@ -124,9 +124,9 @@ public override int Compare(Utf8Span x, Utf8Span y) return string.CompareOrdinal(x.ToString(), y.ToString()); } - public override bool Equals(Utf8String? x, Utf8String? y) => Utf8String.Equals(x, y); + //public override bool Equals(Utf8String? x, Utf8String? y) => Utf8String.Equals(x, y); public override bool Equals(Utf8Span x, Utf8Span y) => Utf8Span.Equals(x, y); - public override int GetHashCode(Utf8String obj) => obj.GetHashCode(); + //public override int GetHashCode(Utf8String obj) => obj.GetHashCode(); public override int GetHashCode(Utf8Span obj) => obj.GetHashCode(); } @@ -137,12 +137,12 @@ private sealed class OrdinalIgnoreCaseComparer : Utf8StringComparer // All accesses must be through the static factory. private OrdinalIgnoreCaseComparer() { } - public override int Compare(Utf8String? x, Utf8String? y) - { - // TODO_UTF8STRING: Avoid the allocations below. + //public override int Compare(Utf8String? x, Utf8String? y) + //{ + // // TODO_UTF8STRING: Avoid the allocations below. - return StringComparer.OrdinalIgnoreCase.Compare(x?.ToString(), y?.ToString()); - } + // return StringComparer.OrdinalIgnoreCase.Compare(x?.ToString(), y?.ToString()); + //} public override int Compare(Utf8Span x, Utf8Span y) { @@ -151,12 +151,12 @@ public override int Compare(Utf8Span x, Utf8Span y) return StringComparer.OrdinalIgnoreCase.Compare(x.ToString(), y.ToString()); } - public override bool Equals(Utf8String? x, Utf8String? y) - { - // TODO_UTF8STRING: Avoid the allocations below. + //public override bool Equals(Utf8String? x, Utf8String? y) + //{ + // // TODO_UTF8STRING: Avoid the allocations below. - return StringComparer.OrdinalIgnoreCase.Equals(x?.ToString(), y?.ToString()); - } + // return StringComparer.OrdinalIgnoreCase.Equals(x?.ToString(), y?.ToString()); + //} public override bool Equals(Utf8Span x, Utf8Span y) { @@ -165,12 +165,12 @@ public override bool Equals(Utf8Span x, Utf8Span y) return StringComparer.OrdinalIgnoreCase.Equals(x.ToString(), y.ToString()); } - public override int GetHashCode(Utf8String obj) - { - // TODO_UTF8STRING: Avoid the allocations below. + //public override int GetHashCode(Utf8String obj) + //{ + // // TODO_UTF8STRING: Avoid the allocations below. - return StringComparer.OrdinalIgnoreCase.GetHashCode(obj.ToString()); - } + // return StringComparer.OrdinalIgnoreCase.GetHashCode(obj.ToString()); + //} public override int GetHashCode(Utf8Span obj) { diff --git a/src/libraries/System.Utf8String.Experimental/src/Resources/Strings.resx b/src/libraries/System.Utf8String.Experimental/src/Resources/Strings.resx index d75675748c34a..e6a62fd36ce32 100644 --- a/src/libraries/System.Utf8String.Experimental/src/Resources/Strings.resx +++ b/src/libraries/System.Utf8String.Experimental/src/Resources/Strings.resx @@ -126,4 +126,10 @@ Destination is too short. + + The string comparison type passed in is currently not supported. + + + Cannot call Utf8Span.Equals(object). Use Equals(Utf8Span) or operator == instead. + \ No newline at end of file diff --git a/src/libraries/System.Utf8String.Experimental/src/System.Utf8String.Experimental.csproj b/src/libraries/System.Utf8String.Experimental/src/System.Utf8String.Experimental.csproj index 13b9b5031918e..a1fba3855afcb 100644 --- a/src/libraries/System.Utf8String.Experimental/src/System.Utf8String.Experimental.csproj +++ b/src/libraries/System.Utf8String.Experimental/src/System.Utf8String.Experimental.csproj @@ -22,7 +22,11 @@ + + + Common\System\Marvin.cs + System\Char8.cs @@ -38,6 +42,34 @@ System\Text\Rune.cs + + System\Text\Utf8StringComparer.cs + + + System\Numerics\BitOperations.cs + + + System\Text\ASCIIUtility.cs + + + System\Text\ASCIIUtility.Helpers.cs + + + System\Text\Unicode\Utf8Utility.cs + + + System\Text\Unicode\Utf8Utility.Helpers.cs + + + System\Text\Unicode\Utf8Utility.Validation.cs + + + System\Text\Unicode\Utf8Utility.WhiteSpace.cs + + + + System\Text\Utf8Span.cs + - - - + diff --git a/src/libraries/System.Utf8String.Experimental/src/System/Runtime/Intrinsics/Intrinsics.Shims.cs b/src/libraries/System.Utf8String.Experimental/src/System/Runtime/Intrinsics/Intrinsics.Shims.cs new file mode 100644 index 0000000000000..0fe792f8757d4 --- /dev/null +++ b/src/libraries/System.Utf8String.Experimental/src/System/Runtime/Intrinsics/Intrinsics.Shims.cs @@ -0,0 +1,92 @@ +// Licensed to the .NET Foundation under one or more agreements. +// The .NET Foundation licenses this file to you under the MIT license. +// See the LICENSE file in the project root for more information. + +namespace System.Runtime.Intrinsics +{ + internal static class Vector128 + { + public static Vector128 Create(short value) => throw new PlatformNotSupportedException(); + public static Vector128 Create(ushort value) => throw new PlatformNotSupportedException(); + public static Vector128 AsByte(this Vector128 vector) where T : struct => throw new PlatformNotSupportedException(); + public static Vector128 AsInt16(this Vector128 vector) where T : struct => throw new PlatformNotSupportedException(); + public static Vector128 AsUInt16(this Vector128 vector) where T : struct => throw new PlatformNotSupportedException(); + public static Vector128 AsUInt32(this Vector128 vector) where T : struct => throw new PlatformNotSupportedException(); + public static Vector128 AsUInt64(this Vector128 vector) where T : struct => throw new PlatformNotSupportedException(); + } + internal readonly struct Vector128 + where T : struct + { + public static Vector128 Zero => throw new PlatformNotSupportedException(); + } +} + +namespace System.Runtime.Intrinsics.X86 +{ + internal abstract class Bmi1 + { + public abstract class X64 + { + public static bool IsSupported { get; } = false; + public static ulong TrailingZeroCount(ulong value) => throw new PlatformNotSupportedException(); + } + public static bool IsSupported { get; } = false; + public static uint TrailingZeroCount(uint value) => throw new PlatformNotSupportedException(); + } + internal abstract class Lzcnt + { + public abstract class X64 + { + public static bool IsSupported { get; } = false; + public static ulong LeadingZeroCount(ulong value) => throw new PlatformNotSupportedException(); + } + + public static bool IsSupported { get; } = false; + public static uint LeadingZeroCount(uint value) => throw new PlatformNotSupportedException(); + } + internal abstract class Popcnt + { + public abstract class X64 + { + public static bool IsSupported { get; } = false; + public static ulong PopCount(ulong value) => throw new PlatformNotSupportedException(); + } + public static bool IsSupported { get; } = false; + public static uint PopCount(uint value) => throw new PlatformNotSupportedException(); + } + + internal abstract class Sse2 + { + public abstract class X64 + { + public static bool IsSupported { get; } = false; + public static Vector128 ConvertScalarToVector128UInt64(ulong value) => throw new PlatformNotSupportedException(); + public static ulong ConvertToUInt64(Vector128 value) => throw new PlatformNotSupportedException(); + } + public static bool IsSupported { get; } = false; + public static Vector128 AddSaturate(Vector128 left, Vector128 right) => throw new PlatformNotSupportedException(); + public static Vector128 ConvertScalarToVector128UInt32(uint value) => throw new PlatformNotSupportedException(); + public static uint ConvertToUInt32(Vector128 value) => throw new PlatformNotSupportedException(); + public static unsafe Vector128 LoadAlignedVector128(byte* address) => throw new PlatformNotSupportedException(); + public static unsafe Vector128 LoadAlignedVector128(ushort* address) => throw new PlatformNotSupportedException(); + public static unsafe Vector128 LoadVector128(byte* address) => throw new PlatformNotSupportedException(); + public static unsafe Vector128 LoadVector128(short* address) => throw new PlatformNotSupportedException(); + public static unsafe Vector128 LoadVector128(ushort* address) => throw new PlatformNotSupportedException(); + public static int MoveMask(Vector128 value) => throw new PlatformNotSupportedException(); + public static Vector128 Or(Vector128 left, Vector128 right) => throw new PlatformNotSupportedException(); + public static Vector128 Or(Vector128 left, Vector128 right) => throw new PlatformNotSupportedException(); + public static Vector128 PackUnsignedSaturate(Vector128 left, Vector128 right) => throw new PlatformNotSupportedException(); + public static unsafe void Store(byte* address, Vector128 source) => throw new PlatformNotSupportedException(); + public static unsafe void StoreAligned(byte* address, Vector128 source) => throw new PlatformNotSupportedException(); + public static unsafe void StoreScalar(ulong* address, Vector128 source) => throw new PlatformNotSupportedException(); + public static Vector128 UnpackHigh(Vector128 left, Vector128 right) => throw new PlatformNotSupportedException(); + public static Vector128 UnpackLow(Vector128 left, Vector128 right) => throw new PlatformNotSupportedException(); + } + + internal abstract class Sse41 + { + public static bool IsSupported { get; } = false; + public static bool TestZ(Vector128 left, Vector128 right) => throw new PlatformNotSupportedException(); + public static bool TestZ(Vector128 left, Vector128 right) => throw new PlatformNotSupportedException(); + } +} diff --git a/src/libraries/System.Utf8String.Experimental/src/System/Text/ThrowHelper.cs b/src/libraries/System.Utf8String.Experimental/src/System/Text/ThrowHelper.cs index ed319b71c5693..db6276c21d66b 100644 --- a/src/libraries/System.Utf8String.Experimental/src/System/Text/ThrowHelper.cs +++ b/src/libraries/System.Utf8String.Experimental/src/System/Text/ThrowHelper.cs @@ -17,6 +17,11 @@ internal static class ThrowHelper [MethodImpl(MethodImplOptions.NoInlining)] private static Exception CreateArgumentOutOfRangeException(ExceptionArgument argument) { return new ArgumentOutOfRangeException(argument.ToString()); } + [DoesNotReturn] + internal static void ThrowInvalidOperationException() { throw CreateInvalidOperationException(); } + [MethodImpl(MethodImplOptions.NoInlining)] + private static Exception CreateInvalidOperationException() { return new InvalidOperationException(); } + [DoesNotReturn] internal static void ThrowArgumentException_DestinationTooShort() { diff --git a/src/libraries/shims/ApiCompat.proj b/src/libraries/shims/ApiCompat.proj index a340a3d1a5aec..29ec65df3065e 100644 --- a/src/libraries/shims/ApiCompat.proj +++ b/src/libraries/shims/ApiCompat.proj @@ -85,17 +85,6 @@ - - - - - - - From a52f01be45e4cff8d5edacf0adc16c388e5cb5df Mon Sep 17 00:00:00 2001 From: Eric Erhardt Date: Mon, 2 Mar 2020 17:54:00 -0600 Subject: [PATCH 05/26] Get Utf8String building on netstandard2.0 --- .../System.Private.CoreLib.csproj | 2 + .../src/System/Utf8Extensions.CoreCLR.cs | 23 +++ .../src/System/Utf8String.CoreCLR.cs | 29 +++ .../System.Private.CoreLib.Shared.projitems | 3 +- .../src/System/Numerics/BitOperations.cs | 4 + .../src/System/Text/ASCIIUtility.cs | 4 + .../Text/Unicode/Utf8Utility.Helpers.cs | 4 + .../Text/Unicode/Utf8Utility.Validation.cs | 4 + ...s => Utf8Utility.WhiteSpace.netcoreapp.cs} | 103 ++--------- .../Utf8Utility.WhiteSpace.netstandard.cs | 119 ++++++++++++ .../src/System/Text/Unicode/Utf8Utility.cs | 107 ++++++----- .../src/System/Text/Utf8Span.Comparison.cs | 8 + .../src/System/Text/Utf8Span.cs | 35 ++-- .../src/System/Text/Utf8Span.netcoreapp.cs | 1 - .../src/System/Text/Utf8StringComparer.cs | 74 ++++---- .../src/System/Utf8Extensions.cs | 20 +- .../src/System/Utf8String.Construction.cs | 156 ++-------------- .../src/System/Utf8String.cs | 75 ++++---- .../src/System/Utf8String.netcoreapp.cs | 175 ++++++++++++++++++ .../src/Resources/Strings.resx | 6 + .../src/System.Utf8String.Experimental.csproj | 34 ++-- .../src/System/IO/Utf8StringStream.cs | 13 +- .../src/System/Net/Http/Utf8StringContent.cs | 27 +++ .../Runtime/Intrinsics/Intrinsics.Shims.cs | 1 - .../src/System/Text/ThrowHelper.cs | 1 + .../src/System/Utf8Extensions.Portable.cs | 22 +++ .../src/System/Utf8String.Portable.cs | 109 +++++++++++ 27 files changed, 765 insertions(+), 394 deletions(-) create mode 100644 src/coreclr/src/System.Private.CoreLib/src/System/Utf8Extensions.CoreCLR.cs rename src/libraries/System.Private.CoreLib/src/System/Text/Unicode/{Utf8Utility.WhiteSpace.cs => Utf8Utility.WhiteSpace.netcoreapp.cs} (54%) create mode 100644 src/libraries/System.Private.CoreLib/src/System/Text/Unicode/Utf8Utility.WhiteSpace.netstandard.cs create mode 100644 src/libraries/System.Private.CoreLib/src/System/Utf8String.netcoreapp.cs create mode 100644 src/libraries/System.Utf8String.Experimental/src/System/Utf8Extensions.Portable.cs create mode 100644 src/libraries/System.Utf8String.Experimental/src/System/Utf8String.Portable.cs diff --git a/src/coreclr/src/System.Private.CoreLib/System.Private.CoreLib.csproj b/src/coreclr/src/System.Private.CoreLib/System.Private.CoreLib.csproj index a147e8d3bbe8e..94d9e7e217fa0 100644 --- a/src/coreclr/src/System.Private.CoreLib/System.Private.CoreLib.csproj +++ b/src/coreclr/src/System.Private.CoreLib/System.Private.CoreLib.csproj @@ -279,6 +279,8 @@ + + diff --git a/src/coreclr/src/System.Private.CoreLib/src/System/Utf8Extensions.CoreCLR.cs b/src/coreclr/src/System.Private.CoreLib/src/System/Utf8Extensions.CoreCLR.cs new file mode 100644 index 0000000000000..ef0fbe9334398 --- /dev/null +++ b/src/coreclr/src/System.Private.CoreLib/src/System/Utf8Extensions.CoreCLR.cs @@ -0,0 +1,23 @@ +// Licensed to the .NET Foundation under one or more agreements. +// The .NET Foundation licenses this file to you under the MIT license. +// See the LICENSE file in the project root for more information. + +using System.Runtime.CompilerServices; + +namespace System +{ + public static partial class Utf8Extensions + { + [MethodImpl(MethodImplOptions.AggressiveInlining)] + private static ReadOnlySpan CreateSpan(Utf8String text) => + new ReadOnlySpan(ref text.DangerousGetMutableReference(), text.Length); + + [MethodImpl(MethodImplOptions.AggressiveInlining)] + private static ReadOnlySpan CreateSpan(Utf8String text, int start) => + new ReadOnlySpan(ref text.DangerousGetMutableReference(start), text.Length - start); + + [MethodImpl(MethodImplOptions.AggressiveInlining)] + private static ReadOnlySpan CreateSpan(Utf8String text, int start, int length) => + new ReadOnlySpan(ref text.DangerousGetMutableReference(start), length); + } +} diff --git a/src/coreclr/src/System.Private.CoreLib/src/System/Utf8String.CoreCLR.cs b/src/coreclr/src/System.Private.CoreLib/src/System/Utf8String.CoreCLR.cs index f8ab2a5140b16..9a04b332b24ed 100644 --- a/src/coreclr/src/System.Private.CoreLib/src/System/Utf8String.CoreCLR.cs +++ b/src/coreclr/src/System.Private.CoreLib/src/System/Utf8String.CoreCLR.cs @@ -263,6 +263,7 @@ private Utf8String Ctor(string value) return Ctor(value.AsSpan()); } +<<<<<<< HEAD /* * METHODS */ @@ -280,6 +281,15 @@ internal ReadOnlySpan AsBytesSkipNullCheck() int length = Length; return new ReadOnlySpan(ref DangerousGetMutableReference(), length); } +======= + /// + /// Returns a mutable reference to the first byte of this + /// (or the null terminator if the string is empty). + /// + /// + [MethodImpl(MethodImplOptions.AggressiveInlining)] + internal ref byte DangerousGetMutableReference() => ref Unsafe.AsRef(in _firstByte); +>>>>>>> Get Utf8String building on netstandard2.0 /// /// Returns a mutable that can be used to populate this @@ -296,6 +306,7 @@ internal Span DangerousGetMutableSpan() } /// +<<<<<<< HEAD /// Returns a mutable reference to the first byte of this /// (or the null terminator if the string is empty). /// @@ -304,6 +315,8 @@ internal Span DangerousGetMutableSpan() internal ref byte DangerousGetMutableReference() => ref Unsafe.AsRef(in _firstByte); /// +======= +>>>>>>> Get Utf8String building on netstandard2.0 /// Gets an immutable reference that can be used in a statement. The resulting /// reference can be pinned and used as a null-terminated LPCUTF8STR. /// @@ -313,9 +326,25 @@ internal Span DangerousGetMutableSpan() [EditorBrowsable(EditorBrowsableState.Never)] // for compiler use only public ref readonly byte GetPinnableReference() => ref _firstByte; +<<<<<<< HEAD /* * HELPER METHODS */ +======= + /// + /// Similar to , but skips the null check on the input. + /// Throws a if the input is null. + /// + [MethodImpl(MethodImplOptions.AggressiveInlining)] + internal ReadOnlySpan AsBytesSkipNullCheck() + { + // By dereferencing Length first, the JIT will skip the null check that normally precedes + // most instance method calls, and it'll use the field dereference as the null check. + + int length = Length; + return new ReadOnlySpan(ref DangerousGetMutableReference(), length); + } +>>>>>>> Get Utf8String building on netstandard2.0 /// /// Creates a new zero-initialized instance of the specified length. Actual storage allocated is "length + 1" bytes diff --git a/src/libraries/System.Private.CoreLib/src/System.Private.CoreLib.Shared.projitems b/src/libraries/System.Private.CoreLib/src/System.Private.CoreLib.Shared.projitems index 1cc55a46be446..d84f7e117cfd9 100644 --- a/src/libraries/System.Private.CoreLib/src/System.Private.CoreLib.Shared.projitems +++ b/src/libraries/System.Private.CoreLib/src/System.Private.CoreLib.Shared.projitems @@ -861,7 +861,7 @@ - + @@ -1817,6 +1817,7 @@ + diff --git a/src/libraries/System.Private.CoreLib/src/System/Numerics/BitOperations.cs b/src/libraries/System.Private.CoreLib/src/System/Numerics/BitOperations.cs index 8385687e86d51..3aac79a25563b 100644 --- a/src/libraries/System.Private.CoreLib/src/System/Numerics/BitOperations.cs +++ b/src/libraries/System.Private.CoreLib/src/System/Numerics/BitOperations.cs @@ -6,6 +6,10 @@ using System.Runtime.InteropServices; using System.Runtime.Intrinsics.X86; +#if SYSTEM_PRIVATE_CORELIB +using Internal.Runtime.CompilerServices; +#endif + // Some routines inspired by the Stanford Bit Twiddling Hacks by Sean Eron Anderson: // http://graphics.stanford.edu/~seander/bithacks.html diff --git a/src/libraries/System.Private.CoreLib/src/System/Text/ASCIIUtility.cs b/src/libraries/System.Private.CoreLib/src/System/Text/ASCIIUtility.cs index 63d392c9112be..4f70613a3eef0 100644 --- a/src/libraries/System.Private.CoreLib/src/System/Text/ASCIIUtility.cs +++ b/src/libraries/System.Private.CoreLib/src/System/Text/ASCIIUtility.cs @@ -8,6 +8,10 @@ using System.Runtime.Intrinsics; using System.Runtime.Intrinsics.X86; +#if SYSTEM_PRIVATE_CORELIB +using Internal.Runtime.CompilerServices; +#endif + #pragma warning disable SA1121 // explicitly using type aliases instead of built-in types #if TARGET_64BIT using nint = System.Int64; diff --git a/src/libraries/System.Private.CoreLib/src/System/Text/Unicode/Utf8Utility.Helpers.cs b/src/libraries/System.Private.CoreLib/src/System/Text/Unicode/Utf8Utility.Helpers.cs index 2a199f94a1681..4021195ff0646 100644 --- a/src/libraries/System.Private.CoreLib/src/System/Text/Unicode/Utf8Utility.Helpers.cs +++ b/src/libraries/System.Private.CoreLib/src/System/Text/Unicode/Utf8Utility.Helpers.cs @@ -7,6 +7,10 @@ using System.Numerics; using System.Runtime.CompilerServices; +#if SYSTEM_PRIVATE_CORELIB +using Internal.Runtime.CompilerServices; +#endif + namespace System.Text.Unicode { internal static partial class Utf8Utility diff --git a/src/libraries/System.Private.CoreLib/src/System/Text/Unicode/Utf8Utility.Validation.cs b/src/libraries/System.Private.CoreLib/src/System/Text/Unicode/Utf8Utility.Validation.cs index ac9215c2e348b..d218f4483e7f4 100644 --- a/src/libraries/System.Private.CoreLib/src/System/Text/Unicode/Utf8Utility.Validation.cs +++ b/src/libraries/System.Private.CoreLib/src/System/Text/Unicode/Utf8Utility.Validation.cs @@ -7,6 +7,10 @@ using System.Runtime.Intrinsics.X86; using System.Runtime.CompilerServices; +#if SYSTEM_PRIVATE_CORELIB +using Internal.Runtime.CompilerServices; +#endif + #pragma warning disable SA1121 // explicitly using type aliases instead of built-in types #if TARGET_64BIT using nint = System.Int64; diff --git a/src/libraries/System.Private.CoreLib/src/System/Text/Unicode/Utf8Utility.WhiteSpace.cs b/src/libraries/System.Private.CoreLib/src/System/Text/Unicode/Utf8Utility.WhiteSpace.netcoreapp.cs similarity index 54% rename from src/libraries/System.Private.CoreLib/src/System/Text/Unicode/Utf8Utility.WhiteSpace.cs rename to src/libraries/System.Private.CoreLib/src/System/Text/Unicode/Utf8Utility.WhiteSpace.netcoreapp.cs index 57e65f71fe39d..cb98eaf6ea415 100644 --- a/src/libraries/System.Private.CoreLib/src/System/Text/Unicode/Utf8Utility.WhiteSpace.cs +++ b/src/libraries/System.Private.CoreLib/src/System/Text/Unicode/Utf8Utility.WhiteSpace.netcoreapp.cs @@ -3,10 +3,9 @@ // See the LICENSE file in the project root for more information. using System.Runtime.InteropServices; -using System.Runtime.CompilerServices; +using Internal.Runtime.CompilerServices; #pragma warning disable SA1121 // explicitly using type aliases instead of built-in types -#if CORECLR #if TARGET_64BIT using nint = System.Int64; using nuint = System.UInt64; @@ -14,16 +13,11 @@ using nint = System.Int32; using nuint = System.UInt32; #endif -#else -using nint = System.Int64; -using nuint = System.UInt64; -#endif namespace System.Text.Unicode { internal static partial class Utf8Utility { -#if CORECLR /// /// Returns the index in where the first non-whitespace character /// appears, or the input length if the data contains only whitespace characters. @@ -46,12 +40,12 @@ private static nuint GetIndexOfFirstNonWhiteSpaceChar(ref byte utf8Data, nuint l // Very quick check: see if the byte is in the range [ 21 .. 7F ]. // If so, we can skip the more expensive logic later in this method. - if ((sbyte)Unsafe.AddByteOffset(ref utf8Data, (IntPtr)i) > (sbyte)0x20) //TODO: remove IntPtr cast + if ((sbyte)Unsafe.AddByteOffset(ref utf8Data, i) > (sbyte)0x20) //TODO: remove IntPtr cast { break; } - uint possibleAsciiByte = Unsafe.AddByteOffset(ref utf8Data, (IntPtr)i); //TODO: remove IntPtr cast + uint possibleAsciiByte = Unsafe.AddByteOffset(ref utf8Data, i); //TODO: remove IntPtr cast if (UnicodeUtility.IsAsciiCodePoint(possibleAsciiByte)) { // The simple comparison failed. Let's read the actual byte value, @@ -82,31 +76,34 @@ private static nuint GetIndexOfFirstNonWhiteSpaceChar(ref byte utf8Data, nuint l return i; } -#else + /// - /// Returns the index in where the first non-whitespace character - /// appears, or the input length if the data contains only whitespace characters. + /// Returns the index in where the trailing whitespace sequence + /// begins, or 0 if the data contains only whitespace characters, or the span length if the + /// data does not end with any whitespace characters. /// - public static int GetIndexOfFirstNonWhiteSpaceChar(ReadOnlySpan utf8Data) + public static int GetIndexOfTrailingWhiteSpaceSequence(ReadOnlySpan utf8Data) + { + return (int)GetIndexOfTrailingWhiteSpaceSequence(ref MemoryMarshal.GetReference(utf8Data), (uint)utf8Data.Length); + } + + private static nuint GetIndexOfTrailingWhiteSpaceSequence(ref byte utf8Data, nuint length) { // This method is optimized for the case where the input data is ASCII, and if the // data does need to be trimmed it's likely that only a relatively small number of // bytes will be trimmed. - int i = 0; - int length = utf8Data.Length; - - while (i < length) + while (length > 0) { // Very quick check: see if the byte is in the range [ 21 .. 7F ]. // If so, we can skip the more expensive logic later in this method. - if (utf8Data[i] > (sbyte)0x20) + if ((sbyte)Unsafe.Add(ref Unsafe.AddByteOffset(ref utf8Data, (IntPtr)length), -1) > (sbyte)0x20) //TODO: remove IntPtr cast { break; } - uint possibleAsciiByte = utf8Data[i]; + uint possibleAsciiByte = Unsafe.Add(ref Unsafe.AddByteOffset(ref utf8Data, (IntPtr)length), -1); //TODO: remove IntPtr cast if (UnicodeUtility.IsAsciiCodePoint(possibleAsciiByte)) { // The simple comparison failed. Let's read the actual byte value, @@ -115,7 +112,7 @@ public static int GetIndexOfFirstNonWhiteSpaceChar(ReadOnlySpan utf8Data) if (Rune.IsWhiteSpace(Rune.UnsafeCreate(possibleAsciiByte))) { - i++; + length--; continue; } } @@ -124,10 +121,10 @@ public static int GetIndexOfFirstNonWhiteSpaceChar(ReadOnlySpan utf8Data) // Not ASCII data. Go back to the slower "decode the entire scalar" // code path, then compare it against our Unicode tables. - Rune.DecodeFromUtf8(utf8Data.Slice(i), out Rune decodedRune, out int bytesConsumed); + Rune.DecodeLastFromUtf8(new ReadOnlySpan(ref utf8Data, (int)length), out Rune decodedRune, out int bytesConsumed); if (Rune.IsWhiteSpace(decodedRune)) { - i += bytesConsumed; + length -= (uint)bytesConsumed; continue; } } @@ -135,67 +132,7 @@ public static int GetIndexOfFirstNonWhiteSpaceChar(ReadOnlySpan utf8Data) break; // If we got here, we saw a non-whitespace subsequence. } - return i; + return length; } -#endif - - //TODO: eerhardt - ///// - ///// Returns the index in where the trailing whitespace sequence - ///// begins, or 0 if the data contains only whitespace characters, or the span length if the - ///// data does not end with any whitespace characters. - ///// - //public static int GetIndexOfTrailingWhiteSpaceSequence(ReadOnlySpan utf8Data) - //{ - // return (int)GetIndexOfTrailingWhiteSpaceSequence(ref MemoryMarshal.GetReference(utf8Data), (uint)utf8Data.Length); - //} - - //private static nuint GetIndexOfTrailingWhiteSpaceSequence(ref byte utf8Data, nuint length) - //{ - // // This method is optimized for the case where the input data is ASCII, and if the - // // data does need to be trimmed it's likely that only a relatively small number of - // // bytes will be trimmed. - - // while (length > 0) - // { - // // Very quick check: see if the byte is in the range [ 21 .. 7F ]. - // // If so, we can skip the more expensive logic later in this method. - - // if ((sbyte)Unsafe.Add(ref Unsafe.AddByteOffset(ref utf8Data, (IntPtr)length), -1) > (sbyte)0x20) //TODO: remove IntPtr cast - // { - // break; - // } - - // uint possibleAsciiByte = Unsafe.Add(ref Unsafe.AddByteOffset(ref utf8Data, (IntPtr)length), -1); //TODO: remove IntPtr cast - // if (UnicodeUtility.IsAsciiCodePoint(possibleAsciiByte)) - // { - // // The simple comparison failed. Let's read the actual byte value, - // // and if it's ASCII we can delegate to Rune's inlined method - // // implementation. - - // if (Rune.IsWhiteSpace(Rune.UnsafeCreate(possibleAsciiByte))) - // { - // length--; - // continue; - // } - // } - // else - // { - // // Not ASCII data. Go back to the slower "decode the entire scalar" - // // code path, then compare it against our Unicode tables. - - // Rune.DecodeLastFromUtf8(new ReadOnlySpan(ref utf8Data, (int)length), out Rune decodedRune, out int bytesConsumed); - // if (Rune.IsWhiteSpace(decodedRune)) - // { - // length -= (uint)bytesConsumed; - // continue; - // } - // } - - // break; // If we got here, we saw a non-whitespace subsequence. - // } - - // return length; - //} } } diff --git a/src/libraries/System.Private.CoreLib/src/System/Text/Unicode/Utf8Utility.WhiteSpace.netstandard.cs b/src/libraries/System.Private.CoreLib/src/System/Text/Unicode/Utf8Utility.WhiteSpace.netstandard.cs new file mode 100644 index 0000000000000..8e09579a5757c --- /dev/null +++ b/src/libraries/System.Private.CoreLib/src/System/Text/Unicode/Utf8Utility.WhiteSpace.netstandard.cs @@ -0,0 +1,119 @@ +// Licensed to the .NET Foundation under one or more agreements. +// The .NET Foundation licenses this file to you under the MIT license. +// See the LICENSE file in the project root for more information. + +namespace System.Text.Unicode +{ + internal static partial class Utf8Utility + { + /// + /// Returns the index in where the first non-whitespace character + /// appears, or the input length if the data contains only whitespace characters. + /// + public static int GetIndexOfFirstNonWhiteSpaceChar(ReadOnlySpan utf8Data) + { + // This method is optimized for the case where the input data is ASCII, and if the + // data does need to be trimmed it's likely that only a relatively small number of + // bytes will be trimmed. + + int i = 0; + int length = utf8Data.Length; + + while (i < length) + { + // Very quick check: see if the byte is in the range [ 21 .. 7F ]. + // If so, we can skip the more expensive logic later in this method. + + if (utf8Data[i] > (sbyte)0x20) + { + break; + } + + uint possibleAsciiByte = utf8Data[i]; + if (UnicodeUtility.IsAsciiCodePoint(possibleAsciiByte)) + { + // The simple comparison failed. Let's read the actual byte value, + // and if it's ASCII we can delegate to Rune's inlined method + // implementation. + + if (Rune.IsWhiteSpace(Rune.UnsafeCreate(possibleAsciiByte))) + { + i++; + continue; + } + } + else + { + // Not ASCII data. Go back to the slower "decode the entire scalar" + // code path, then compare it against our Unicode tables. + + Rune.DecodeFromUtf8(utf8Data.Slice(i), out Rune decodedRune, out int bytesConsumed); + if (Rune.IsWhiteSpace(decodedRune)) + { + i += bytesConsumed; + continue; + } + } + + break; // If we got here, we saw a non-whitespace subsequence. + } + + return i; + } + + /// + /// Returns the index in where the trailing whitespace sequence + /// begins, or 0 if the data contains only whitespace characters, or the span length if the + /// data does not end with any whitespace characters. + /// + public static int GetIndexOfTrailingWhiteSpaceSequence(ReadOnlySpan utf8Data) + { + // This method is optimized for the case where the input data is ASCII, and if the + // data does need to be trimmed it's likely that only a relatively small number of + // bytes will be trimmed. + + int length = utf8Data.Length; + + while (length > 0) + { + // Very quick check: see if the byte is in the range [ 21 .. 7F ]. + // If so, we can skip the more expensive logic later in this method. + + if ((sbyte)utf8Data[length - 1] > (sbyte)0x20) + { + break; + } + + uint possibleAsciiByte = utf8Data[length - 1]; + if (UnicodeUtility.IsAsciiCodePoint(possibleAsciiByte)) + { + // The simple comparison failed. Let's read the actual byte value, + // and if it's ASCII we can delegate to Rune's inlined method + // implementation. + + if (Rune.IsWhiteSpace(Rune.UnsafeCreate(possibleAsciiByte))) + { + length--; + continue; + } + } + else + { + // Not ASCII data. Go back to the slower "decode the entire scalar" + // code path, then compare it against our Unicode tables. + + Rune.DecodeLastFromUtf8(utf8Data.Slice(length), out Rune decodedRune, out int bytesConsumed); + if (Rune.IsWhiteSpace(decodedRune)) + { + length -= bytesConsumed; + continue; + } + } + + break; // If we got here, we saw a non-whitespace subsequence. + } + + return length; + } + } +} diff --git a/src/libraries/System.Private.CoreLib/src/System/Text/Unicode/Utf8Utility.cs b/src/libraries/System.Private.CoreLib/src/System/Text/Unicode/Utf8Utility.cs index 1ad517a4599ab..247013a1f9076 100644 --- a/src/libraries/System.Private.CoreLib/src/System/Text/Unicode/Utf8Utility.cs +++ b/src/libraries/System.Private.CoreLib/src/System/Text/Unicode/Utf8Utility.cs @@ -8,6 +8,10 @@ using System.Runtime.CompilerServices; using System.Runtime.InteropServices; +#if SYSTEM_PRIVATE_CORELIB +using Internal.Runtime.CompilerServices; +#endif + namespace System.Text.Unicode { internal static partial class Utf8Utility @@ -57,56 +61,57 @@ public static unsafe bool IsWellFormedUtf8(ReadOnlySpan utf8Data) } } - //TODO: eerhardt - ///// - ///// Returns if it is null or contains only well-formed UTF-8 data; - ///// otherwises allocates a new instance containing the same data as - ///// but where all invalid UTF-8 sequences have been replaced - ///// with U+FFFD. - ///// - //public static Utf8String ValidateAndFixupUtf8String(Utf8String value) - //{ - // if (value.Length == 0) - // { - // return value; - // } - - // ReadOnlySpan valueAsBytes = value.AsBytes(); - - // int idxOfFirstInvalidData = GetIndexOfFirstInvalidUtf8Sequence(valueAsBytes, out _); - // if (idxOfFirstInvalidData < 0) - // { - // return value; - // } - - // // TODO_UTF8STRING: Replace this with the faster implementation once it's available. - // // (The faster implementation is in the dev/utf8string_bak branch currently.) - - // MemoryStream memStream = new MemoryStream(); - // memStream.Write(valueAsBytes.Slice(0, idxOfFirstInvalidData)); - - // valueAsBytes = valueAsBytes.Slice(idxOfFirstInvalidData); - // do - // { - // if (Rune.DecodeFromUtf8(valueAsBytes, out _, out int bytesConsumed) == OperationStatus.Done) - // { - // // Valid scalar value - copy data as-is to MemoryStream - // memStream.Write(valueAsBytes.Slice(0, bytesConsumed)); - // } - // else - // { - // // Invalid scalar value - copy U+FFFD to MemoryStream - // memStream.Write(ReplacementCharSequence); - // } - - // valueAsBytes = valueAsBytes.Slice(bytesConsumed); - // } while (!valueAsBytes.IsEmpty); - - // bool success = memStream.TryGetBuffer(out ArraySegment memStreamBuffer); - // Debug.Assert(success, "Couldn't get underlying MemoryStream buffer."); - - // return Utf8String.UnsafeCreateWithoutValidation(memStreamBuffer); - //} + /// + /// Returns if it is null or contains only well-formed UTF-8 data; + /// otherwises allocates a new instance containing the same data as + /// but where all invalid UTF-8 sequences have been replaced + /// with U+FFFD. + /// + public static Utf8String ValidateAndFixupUtf8String(Utf8String value) + { + if (value.Length == 0) + { + return value; + } + + ReadOnlySpan valueAsBytes = value.AsBytes(); + + int idxOfFirstInvalidData = GetIndexOfFirstInvalidUtf8Sequence(valueAsBytes, out _); + if (idxOfFirstInvalidData < 0) + { + return value; + } + + // TODO_UTF8STRING: Replace this with the faster implementation once it's available. + // (The faster implementation is in the dev/utf8string_bak branch currently.) + + MemoryStream memStream = new MemoryStream(); +#if CORECLR || NETCOREAPP // TODO: eerhardt + memStream.Write(valueAsBytes.Slice(0, idxOfFirstInvalidData)); + + valueAsBytes = valueAsBytes.Slice(idxOfFirstInvalidData); + do + { + if (Rune.DecodeFromUtf8(valueAsBytes, out _, out int bytesConsumed) == OperationStatus.Done) + { + // Valid scalar value - copy data as-is to MemoryStream + memStream.Write(valueAsBytes.Slice(0, bytesConsumed)); + } + else + { + // Invalid scalar value - copy U+FFFD to MemoryStream + memStream.Write(ReplacementCharSequence); + } + + valueAsBytes = valueAsBytes.Slice(bytesConsumed); + } while (!valueAsBytes.IsEmpty); +#endif + + bool success = memStream.TryGetBuffer(out ArraySegment memStreamBuffer); + Debug.Assert(success, "Couldn't get underlying MemoryStream buffer."); + + return Utf8String.UnsafeCreateWithoutValidation(memStreamBuffer); + } #endif // FEATURE_UTF8STRING - } + } } diff --git a/src/libraries/System.Private.CoreLib/src/System/Text/Utf8Span.Comparison.cs b/src/libraries/System.Private.CoreLib/src/System/Text/Utf8Span.Comparison.cs index 3abd979f43389..1145d21785ba0 100644 --- a/src/libraries/System.Private.CoreLib/src/System/Text/Utf8Span.Comparison.cs +++ b/src/libraries/System.Private.CoreLib/src/System/Text/Utf8Span.Comparison.cs @@ -68,7 +68,11 @@ public bool Contains(Rune value, StringComparison comparison) { // TODO_UTF8STRING: Optimize me to avoid allocations. +#if SYSTEM_PRIVATE_CORELIB || NETCOREAPP return this.ToString().Contains(value.ToString(), comparison); +#else + return this.ToString().IndexOf(value.ToString(), comparison) >= 0; +#endif } /// @@ -88,7 +92,11 @@ public bool Contains(Utf8Span value, StringComparison comparison) { // TODO_UTF8STRING: Optimize me to avoid allocations. +#if SYSTEM_PRIVATE_CORELIB || NETCOREAPP return this.ToString().Contains(value.ToString(), comparison); +#else + return this.ToString().IndexOf(value.ToString(), comparison) != -1; +#endif } /// diff --git a/src/libraries/System.Private.CoreLib/src/System/Text/Utf8Span.cs b/src/libraries/System.Private.CoreLib/src/System/Text/Utf8Span.cs index c5b69f065bc7d..352093fd8093e 100644 --- a/src/libraries/System.Private.CoreLib/src/System/Text/Utf8Span.cs +++ b/src/libraries/System.Private.CoreLib/src/System/Text/Utf8Span.cs @@ -8,7 +8,10 @@ using System.Runtime.CompilerServices; using System.Runtime.InteropServices; using System.Text.Unicode; -//using Internal.Runtime.CompilerServices; + +#if SYSTEM_PRIVATE_CORELIB +using Internal.Runtime.CompilerServices; +#endif #pragma warning disable 0809 //warning CS0809: Obsolete member 'Utf8Span.Equals(object)' overrides non-obsolete member 'object.Equals(object)' @@ -31,15 +34,14 @@ namespace System.Text [StructLayout(LayoutKind.Auto)] public readonly ref partial struct Utf8Span { - //TODO: eerhardt - ///// - ///// Creates a from an existing instance. - ///// - //[MethodImpl(MethodImplOptions.AggressiveInlining)] - //public Utf8Span(Utf8String? value) - //{ - // Bytes = Utf8Extensions.AsBytes(value); - //} + /// + /// Creates a from an existing instance. + /// + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public Utf8Span(Utf8String? value) + { + Bytes = Utf8Extensions.AsBytes(value); + } /// /// Ctor for internal use only. Caller _must_ validate both invariants hold: @@ -276,14 +278,13 @@ internal unsafe string ToStringNoReplacement() } } - //TODO eerhardt - //public Utf8String ToUtf8String() - //{ - // // TODO_UTF8STRING: Since we know the underlying data is immutable, well-formed UTF-8, - // // we can perform transcoding using an optimized code path that skips all safety checks. + public Utf8String ToUtf8String() + { + // TODO_UTF8STRING: Since we know the underlying data is immutable, well-formed UTF-8, + // we can perform transcoding using an optimized code path that skips all safety checks. - // return Utf8String.UnsafeCreateWithoutValidation(Bytes); - //} + return Utf8String.UnsafeCreateWithoutValidation(Bytes); + } /// /// Wraps a instance around the provided , diff --git a/src/libraries/System.Private.CoreLib/src/System/Text/Utf8Span.netcoreapp.cs b/src/libraries/System.Private.CoreLib/src/System/Text/Utf8Span.netcoreapp.cs index 111060238f554..d6b06d6b11cb7 100644 --- a/src/libraries/System.Private.CoreLib/src/System/Text/Utf8Span.netcoreapp.cs +++ b/src/libraries/System.Private.CoreLib/src/System/Text/Utf8Span.netcoreapp.cs @@ -12,7 +12,6 @@ namespace System.Text { - [StructLayout(LayoutKind.Auto)] public readonly ref partial struct Utf8Span { public Utf8Span this[Range range] diff --git a/src/libraries/System.Private.CoreLib/src/System/Text/Utf8StringComparer.cs b/src/libraries/System.Private.CoreLib/src/System/Text/Utf8StringComparer.cs index de9fe7f338d0c..7b477053fa506 100644 --- a/src/libraries/System.Private.CoreLib/src/System/Text/Utf8StringComparer.cs +++ b/src/libraries/System.Private.CoreLib/src/System/Text/Utf8StringComparer.cs @@ -8,7 +8,7 @@ namespace System.Text { - internal abstract class Utf8StringComparer //TODO eerhardt : IComparer, IEqualityComparer + internal abstract class Utf8StringComparer : IComparer, IEqualityComparer { // Nobody except for nested classes can create instances of this type. private Utf8StringComparer() { } @@ -46,12 +46,12 @@ public static Utf8StringComparer FromComparison(StringComparison comparisonType) }; } - //public abstract int Compare(Utf8String? x, Utf8String? y); + public abstract int Compare(Utf8String? x, Utf8String? y); public abstract int Compare(Utf8Span x, Utf8Span y); - //public abstract bool Equals(Utf8String? x, Utf8String? y); + public abstract bool Equals(Utf8String? x, Utf8String? y); public abstract bool Equals(Utf8Span x, Utf8Span y); #pragma warning disable CS8614 // Remove warning disable when nullable attributes are respected - //public abstract int GetHashCode(Utf8String obj); + public abstract int GetHashCode(Utf8String obj); #pragma warning restore CS8614 public abstract int GetHashCode(Utf8Span obj); @@ -71,12 +71,12 @@ internal CultureAwareComparer(CompareInfo compareInfo, CompareOptions options) _options = options; } - //public override int Compare(Utf8String? x, Utf8String? y) - //{ - // // TODO_UTF8STRING: Avoid the allocations below. + public override int Compare(Utf8String? x, Utf8String? y) + { + // TODO_UTF8STRING: Avoid the allocations below. - // return _compareInfo.Compare(x?.ToString(), y?.ToString(), _options); - //} + return _compareInfo.Compare(x?.ToString(), y?.ToString(), _options); + } public override int Compare(Utf8Span x, Utf8Span y) { @@ -85,15 +85,15 @@ public override int Compare(Utf8Span x, Utf8Span y) return _compareInfo.Compare(x.ToString(), y.ToString(), _options); } - //public override bool Equals(Utf8String? x, Utf8String? y) => Compare(x, y) == 0; + public override bool Equals(Utf8String? x, Utf8String? y) => Compare(x, y) == 0; public override bool Equals(Utf8Span x, Utf8Span y) => Compare(x, y) == 0; - //public override int GetHashCode(Utf8String? obj) - //{ - // // TODO_UTF8STRING: Avoid the allocations below. + public override int GetHashCode(Utf8String? obj) + { + // TODO_UTF8STRING: Avoid the allocations below. - // return (obj is null) ? 0 : _compareInfo.GetHashCode(obj.ToString(), _options); - //} + return (obj is null) ? 0 : _compareInfo.GetHashCode(obj.ToString(), _options); + } public override int GetHashCode(Utf8Span obj) { @@ -110,12 +110,12 @@ private sealed class OrdinalComparer : Utf8StringComparer // All accesses must be through the static factory. private OrdinalComparer() { } - //public override int Compare(Utf8String? x, Utf8String? y) - //{ - // // TODO_UTF8STRING: Avoid the allocations below. + public override int Compare(Utf8String? x, Utf8String? y) + { + // TODO_UTF8STRING: Avoid the allocations below. - // return string.CompareOrdinal(x?.ToString(), y?.ToString()); - //} + return string.CompareOrdinal(x?.ToString(), y?.ToString()); + } public override int Compare(Utf8Span x, Utf8Span y) { @@ -124,9 +124,9 @@ public override int Compare(Utf8Span x, Utf8Span y) return string.CompareOrdinal(x.ToString(), y.ToString()); } - //public override bool Equals(Utf8String? x, Utf8String? y) => Utf8String.Equals(x, y); + public override bool Equals(Utf8String? x, Utf8String? y) => Utf8String.Equals(x, y); public override bool Equals(Utf8Span x, Utf8Span y) => Utf8Span.Equals(x, y); - //public override int GetHashCode(Utf8String obj) => obj.GetHashCode(); + public override int GetHashCode(Utf8String obj) => obj.GetHashCode(); public override int GetHashCode(Utf8Span obj) => obj.GetHashCode(); } @@ -137,12 +137,12 @@ private sealed class OrdinalIgnoreCaseComparer : Utf8StringComparer // All accesses must be through the static factory. private OrdinalIgnoreCaseComparer() { } - //public override int Compare(Utf8String? x, Utf8String? y) - //{ - // // TODO_UTF8STRING: Avoid the allocations below. + public override int Compare(Utf8String? x, Utf8String? y) + { + // TODO_UTF8STRING: Avoid the allocations below. - // return StringComparer.OrdinalIgnoreCase.Compare(x?.ToString(), y?.ToString()); - //} + return StringComparer.OrdinalIgnoreCase.Compare(x?.ToString(), y?.ToString()); + } public override int Compare(Utf8Span x, Utf8Span y) { @@ -151,12 +151,12 @@ public override int Compare(Utf8Span x, Utf8Span y) return StringComparer.OrdinalIgnoreCase.Compare(x.ToString(), y.ToString()); } - //public override bool Equals(Utf8String? x, Utf8String? y) - //{ - // // TODO_UTF8STRING: Avoid the allocations below. + public override bool Equals(Utf8String? x, Utf8String? y) + { + // TODO_UTF8STRING: Avoid the allocations below. - // return StringComparer.OrdinalIgnoreCase.Equals(x?.ToString(), y?.ToString()); - //} + return StringComparer.OrdinalIgnoreCase.Equals(x?.ToString(), y?.ToString()); + } public override bool Equals(Utf8Span x, Utf8Span y) { @@ -165,12 +165,12 @@ public override bool Equals(Utf8Span x, Utf8Span y) return StringComparer.OrdinalIgnoreCase.Equals(x.ToString(), y.ToString()); } - //public override int GetHashCode(Utf8String obj) - //{ - // // TODO_UTF8STRING: Avoid the allocations below. + public override int GetHashCode(Utf8String obj) + { + // TODO_UTF8STRING: Avoid the allocations below. - // return StringComparer.OrdinalIgnoreCase.GetHashCode(obj.ToString()); - //} + return StringComparer.OrdinalIgnoreCase.GetHashCode(obj.ToString()); + } public override int GetHashCode(Utf8Span obj) { diff --git a/src/libraries/System.Private.CoreLib/src/System/Utf8Extensions.cs b/src/libraries/System.Private.CoreLib/src/System/Utf8Extensions.cs index 9a7d37b885034..86b310410f60f 100644 --- a/src/libraries/System.Private.CoreLib/src/System/Utf8Extensions.cs +++ b/src/libraries/System.Private.CoreLib/src/System/Utf8Extensions.cs @@ -9,7 +9,7 @@ namespace System { - public static class Utf8Extensions + public static partial class Utf8Extensions { /// /// Projects as a . @@ -30,7 +30,7 @@ public static ReadOnlySpan AsBytes(this Utf8String? text) if (text is null) return default; - return new ReadOnlySpan(ref text.DangerousGetMutableReference(), text.Length); + return CreateSpan(text); } /// @@ -55,7 +55,7 @@ public static ReadOnlySpan AsBytes(this Utf8String? text, int start) if ((uint)start > (uint)text.Length) ThrowHelper.ThrowArgumentOutOfRangeException(ExceptionArgument.start); - return new ReadOnlySpan(ref text.DangerousGetMutableReference(start), text.Length - start); + return CreateSpan(text, start); } /// @@ -87,7 +87,7 @@ public static ReadOnlySpan AsBytes(this Utf8String? text, int start, int l ThrowHelper.ThrowArgumentOutOfRangeException(ExceptionArgument.start); #endif - return new ReadOnlySpan(ref text.DangerousGetMutableReference(start), length); + return CreateSpan(text, start, length); } /// @@ -135,10 +135,10 @@ public static Utf8Span AsSpan(this Utf8String? text, int start) if (Utf8Utility.IsUtf8ContinuationByte(text.DangerousGetMutableReference(start))) { - Utf8String.ThrowImproperStringSplit(); + // TODO: eerhardt Utf8String.ThrowImproperStringSplit(); } - return Utf8Span.UnsafeCreateWithoutValidation(new ReadOnlySpan(ref text.DangerousGetMutableReference(start), text.Length - start)); + return Utf8Span.UnsafeCreateWithoutValidation(CreateSpan(text, start)); } /// @@ -180,12 +180,12 @@ public static Utf8Span AsSpan(this Utf8String? text, int start, int length) if (Utf8Utility.IsUtf8ContinuationByte(text.DangerousGetMutableReference(start)) || Utf8Utility.IsUtf8ContinuationByte(text.DangerousGetMutableReference(start + length))) { - Utf8String.ThrowImproperStringSplit(); + // TODO: eerhardt Utf8String.ThrowImproperStringSplit(); } - return Utf8Span.UnsafeCreateWithoutValidation(new ReadOnlySpan(ref text.DangerousGetMutableReference(start), length)); + return Utf8Span.UnsafeCreateWithoutValidation(CreateSpan(text, start, length)); } - +/* // TODO eerhardt /// Creates a new over the portion of the target . /// The target . /// Returns default when is null. @@ -389,7 +389,7 @@ public static ReadOnlyMemory AsMemoryBytes(this Utf8String? text, Range ra (int start, int length) = range.GetOffsetAndLength(text.Length); return new ReadOnlyMemory(text, start, length); } - +*/ /// /// Returns a representation of this instance. /// diff --git a/src/libraries/System.Private.CoreLib/src/System/Utf8String.Construction.cs b/src/libraries/System.Private.CoreLib/src/System/Utf8String.Construction.cs index aedea4d935f03..71595b40daf32 100644 --- a/src/libraries/System.Private.CoreLib/src/System/Utf8String.Construction.cs +++ b/src/libraries/System.Private.CoreLib/src/System/Utf8String.Construction.cs @@ -13,10 +13,15 @@ namespace System { public sealed partial class Utf8String { +#if CORECLR // TODO: eerhardt private const int MAX_STACK_TRANSCODE_CHAR_COUNT = 128; // For values beyond U+FFFF, it's 4 UTF-8 bytes per 2 UTF-16 chars (2:1 ratio) private const int MAX_UTF8_BYTES_PER_UTF16_CHAR = 3; +<<<<<<< HEAD +======= +#endif +>>>>>>> Get Utf8String building on netstandard2.0 /* * STATIC FACTORIES @@ -48,7 +53,11 @@ public static bool TryCreateFrom(ReadOnlySpan buffer, [NotNullWhen(true)] // Create and populate the Utf8String instance. Utf8String newString = FastAllocateSkipZeroInit(buffer.Length); +#if CORECLR Buffer.Memmove(ref newString.DangerousGetMutableReference(), ref MemoryMarshal.GetReference(buffer), (uint)buffer.Length); +#else + buffer.CopyTo(newString.DangerousGetMutableSpan()); +#endif // Now perform validation. // Reminder: Perform validation over the copy, not over the source. @@ -65,6 +74,7 @@ public static bool TryCreateFrom(ReadOnlySpan buffer, [NotNullWhen(true)] } } +#if CORECLR // TODO: eerhardt /// /// Creates a instance from existing UTF-16 data, transcoding the /// existing data to UTF-8 upon creation. @@ -90,6 +100,7 @@ public static bool TryCreateFrom(ReadOnlySpan buffer, [NotNullWhen(true)] value = CreateFromUtf16Common(buffer, replaceInvalidSequences: false); return !(value is null); } +#endif /// /// Creates a instance from existing UTF-8 data. @@ -111,13 +122,18 @@ public static Utf8String CreateFromRelaxed(ReadOnlySpan buffer) // Create and populate the Utf8String instance. Utf8String newString = FastAllocateSkipZeroInit(buffer.Length); +#if CORECLR Buffer.Memmove(ref newString.DangerousGetMutableReference(), ref MemoryMarshal.GetReference(buffer), (uint)buffer.Length); +#else + buffer.CopyTo(newString.DangerousGetMutableSpan()); +#endif // Now perform validation & fixup. return Utf8Utility.ValidateAndFixupUtf8String(newString); } +#if CORECLR // TODO: eerhardt /// /// Creates a instance from existing UTF-16 data. /// @@ -145,6 +161,7 @@ public static Utf8String CreateFromRelaxed(ReadOnlySpan buffer) return newString; } +#endif internal static Utf8String CreateFromRune(Rune value) { @@ -169,6 +186,7 @@ internal static Utf8String CreateFromRune(Rune value) } } +#if CORECLR // TODO: eerhardt // Returns 'null' if the input buffer does not represent well-formed UTF-16 data and 'replaceInvalidSequences' is false. private static Utf8String? CreateFromUtf16Common(ReadOnlySpan value, bool replaceInvalidSequences) { @@ -255,98 +273,7 @@ internal static Utf8String CreateFromRune(Rune value) return newString; } - - /// - /// Creates a new instance, allowing the provided delegate to populate the - /// instance data of the returned object. - /// - /// Type of the state object provided to . - /// The length, in bytes, of the instance to create. - /// The state object to provide to . - /// The callback which will be invoked to populate the returned . - /// - /// Thrown if populates the buffer with ill-formed UTF-8 data. - /// - /// - /// The runtime will perform UTF-8 validation over the contents provided by the delegate. - /// If an invalid UTF-8 subsequence is detected, an exception is thrown. - /// - public static Utf8String Create(int length, TState state, SpanAction action) - { - if (length < 0) - { - ThrowHelper.ThrowLengthArgumentOutOfRange_ArgumentOutOfRange_NeedNonNegNum(); - } - - if (action is null) - { - ThrowHelper.ThrowArgumentNullException(ExceptionArgument.action); - } - - if (length == 0) - { - return Empty; // special-case empty input - } - - // Create and populate the Utf8String instance. - // Can't use FastAllocateSkipZeroInit here because we're handing the raw buffer to user code. - - Utf8String newString = FastAllocate(length); - action(newString.DangerousGetMutableSpan(), state); - - // Now perform validation. - - if (!Utf8Utility.IsWellFormedUtf8(newString.AsBytes())) - { - throw new ArgumentException( - message: SR.Utf8String_CallbackProvidedMalformedData, - paramName: nameof(action)); - } - - return newString; - } - - /// - /// Creates a new instance, allowing the provided delegate to populate the - /// instance data of the returned object. - /// - /// Type of the state object provided to . - /// The length, in bytes, of the instance to create. - /// The state object to provide to . - /// The callback which will be invoked to populate the returned . - /// - /// The runtime will perform UTF-8 validation over the contents provided by the delegate. - /// If an invalid UTF-8 subsequence is detected, the invalid subsequence is replaced with - /// in the returned instance. This could result in the returned instance - /// having a different byte length than specified by the parameter. - /// - public static Utf8String CreateRelaxed(int length, TState state, SpanAction action) - { - if (length < 0) - { - ThrowHelper.ThrowLengthArgumentOutOfRange_ArgumentOutOfRange_NeedNonNegNum(); - } - - if (action is null) - { - ThrowHelper.ThrowArgumentNullException(ExceptionArgument.action); - } - - if (length == 0) - { - return Empty; // special-case empty input - } - - // Create and populate the Utf8String instance. - // Can't use FastAllocateSkipZeroInit here because we're handing the raw buffer to user code. - - Utf8String newString = FastAllocate(length); - action(newString.DangerousGetMutableSpan(), state); - - // Now perform validation and fixup. - - return Utf8Utility.ValidateAndFixupUtf8String(newString); - } +#endif /// /// Creates a new instance populated with a copy of the provided contents. @@ -379,51 +306,6 @@ public static Utf8String UnsafeCreateWithoutValidation(ReadOnlySpan utf8Co return newString; } - /// - /// Creates a new instance, allowing the provided delegate to populate the - /// instance data of the returned object. Please see remarks for important safety information about - /// this method. - /// - /// Type of the state object provided to . - /// The length, in bytes, of the instance to create. - /// The state object to provide to . - /// The callback which will be invoked to populate the returned . - /// - /// This factory method can be used as an optimization to skip the validation step that - /// normally performs. The contract - /// of this method requires that populate the buffer with well-formed UTF-8 - /// data, as contractually guarantees that it contains only well-formed UTF-8 data, - /// and runtime instability could occur if a caller violates this guarantee. - /// - public static Utf8String UnsafeCreateWithoutValidation(int length, TState state, SpanAction action) - { - if (length < 0) - { - ThrowHelper.ThrowLengthArgumentOutOfRange_ArgumentOutOfRange_NeedNonNegNum(); - } - - if (action is null) - { - ThrowHelper.ThrowArgumentNullException(ExceptionArgument.action); - } - - if (length == 0) - { - return Empty; // special-case empty input - } - - // Create and populate the Utf8String instance. - // Can't use FastAllocateSkipZeroInit here because we're handing the raw buffer to user code. - - Utf8String newString = FastAllocate(length); - action(newString.DangerousGetMutableSpan(), state); - - // The line below is removed entirely in release builds. - - Debug.Assert(Utf8Utility.IsWellFormedUtf8(newString.AsBytes()), "Callback populated the buffer with ill-formed UTF-8 data."); - - return newString; - } /* * HELPER METHODS diff --git a/src/libraries/System.Private.CoreLib/src/System/Utf8String.cs b/src/libraries/System.Private.CoreLib/src/System/Utf8String.cs index 7d9511d6f818d..bdd7dedf91587 100644 --- a/src/libraries/System.Private.CoreLib/src/System/Utf8String.cs +++ b/src/libraries/System.Private.CoreLib/src/System/Utf8String.cs @@ -8,7 +8,10 @@ using System.Runtime.CompilerServices; using System.Text; using System.Text.Unicode; + +#if CORECLR using Internal.Runtime.CompilerServices; +#endif #pragma warning disable SA1121 // explicitly using type aliases instead of built-in types #if TARGET_64BIT @@ -53,6 +56,7 @@ public sealed partial class Utf8String : IComparable, IEquatable new Utf8Span(value); /* +<<<<<<< HEAD * INDEXERS */ @@ -73,6 +77,8 @@ public Utf8String this[Range range] } /* +======= +>>>>>>> Get Utf8String building on netstandard2.0 * METHODS */ @@ -122,7 +128,11 @@ internal ref byte DangerousGetMutableReference(nuint index) // Allow retrieving references to the null terminator. Debug.Assert(index <= (uint)Length, "Caller should've performed bounds checking."); +#if CORECLR return ref Unsafe.AddByteOffset(ref DangerousGetMutableReference(), index); +#else + return ref Unsafe.AddByteOffset(ref DangerousGetMutableReference(), (IntPtr)index); // TODO: IntPtr cast? +#endif } /// @@ -149,7 +159,11 @@ public bool Equals(Utf8String? value) return !(value is null) && this.Length == value.Length +#if CORECLR && SpanHelpers.SequenceEqual(ref this.DangerousGetMutableReference(), ref value.DangerousGetMutableReference(), (uint)Length); +#else + && this.DangerousGetMutableSpan().SequenceEqual(value.DangerousGetMutableSpan()); +#endif } /// @@ -174,7 +188,11 @@ public static bool Equals(Utf8String? left, Utf8String? right) return !(left is null) && !(right is null) && left.Length == right.Length +#if CORECLR && SpanHelpers.SequenceEqual(ref left.DangerousGetMutableReference(), ref right.DangerousGetMutableReference(), (uint)left.Length); +#else + && left.DangerousGetMutableSpan().SequenceEqual(right.DangerousGetMutableSpan()); +#endif } /// @@ -196,7 +214,11 @@ public override int GetHashCode() // TODO_UTF8STRING: Consider whether this should use a different seed than String.GetHashCode. ulong seed = Marvin.DefaultSeed; +#if CORECLR return Marvin.ComputeHash32(ref DangerousGetMutableReference(), (uint)_length /* in bytes */, (uint)seed, (uint)(seed >> 32)); +#else + return Marvin.ComputeHash32(_bytes, seed); +#endif } /// @@ -238,10 +260,11 @@ public static bool IsNullOrWhiteSpace([NotNullWhen(false)] Utf8String? value) return (value is null) || value.AsSpan().IsEmptyOrWhiteSpace(); } - /// - /// Returns the entire as an array of UTF-8 bytes. - /// - public byte[] ToByteArray() => this.AsSpanSkipNullCheck().ToByteArray(); + // TODO: eerhardt - need Utf8Span.Conversion.cs + ///// + ///// Returns the entire as an array of UTF-8 bytes.GetPinnableReference + ///// + //public byte[] ToByteArray() => this.AsSpanSkipNullCheck().ToByteArray(); /// /// Converts this instance to a . @@ -250,44 +273,20 @@ public override string ToString() { // TODO_UTF8STRING: Optimize the call below, potentially by avoiding the two-pass. +#if CORECLR || NETCOREAPP return Encoding.UTF8.GetString(this.AsBytesSkipNullCheck()); - } - - /// - /// Converts this instance to a . - /// - /// - /// This routine throws if the underlying instance - /// contains invalid UTF-8 data. - /// - internal unsafe string ToStringNoReplacement() - { - // TODO_UTF8STRING: Optimize the call below, potentially by avoiding the two-pass. - - int utf16CharCount; - - fixed (byte* pData = &_firstByte) +#else + byte[] buffer = ArrayPool.Shared.Rent(Length); + try { - byte* pFirstInvalidByte = Utf8Utility.GetPointerToFirstInvalidByte(pData, this.Length, out int utf16CodeUnitCountAdjustment, out _); - if (pFirstInvalidByte != pData + (uint)this.Length) - { - // Saw bad UTF-8 data. - // TODO_UTF8STRING: Throw a better exception below? - - ThrowHelper.ThrowInvalidOperationException(); - } - - utf16CharCount = this.Length + utf16CodeUnitCountAdjustment; - Debug.Assert(utf16CharCount <= this.Length && utf16CharCount >= 0); + _bytes.CopyTo(buffer.AsSpan()); + return Encoding.UTF8.GetString(buffer, 0, Length); } - - // TODO_UTF8STRING: Can we call string.FastAllocate directly? - - return string.Create(utf16CharCount, this, (chars, thisObj) => + finally { - OperationStatus status = Utf8.ToUtf16(thisObj.AsBytes(), chars, out _, out _, replaceInvalidSequences: false); - Debug.Assert(status == OperationStatus.Done, "Did somebody mutate this Utf8String instance unexpectedly?"); - }); + ArrayPool.Shared.Return(buffer); + } +#endif } } } diff --git a/src/libraries/System.Private.CoreLib/src/System/Utf8String.netcoreapp.cs b/src/libraries/System.Private.CoreLib/src/System/Utf8String.netcoreapp.cs new file mode 100644 index 0000000000000..341b75aae444d --- /dev/null +++ b/src/libraries/System.Private.CoreLib/src/System/Utf8String.netcoreapp.cs @@ -0,0 +1,175 @@ +// Licensed to the .NET Foundation under one or more agreements. +// The .NET Foundation licenses this file to you under the MIT license. +// See the LICENSE file in the project root for more information. + +using System.Buffers; +using System.ComponentModel; +using System.Diagnostics; +using System.Diagnostics.CodeAnalysis; +using System.Runtime.CompilerServices; +using System.Text; +using System.Text.Unicode; + +namespace System +{ + public sealed partial class Utf8String + { + /* + * INDEXERS + */ + + public Utf8String this[Range range] + { + [MethodImpl(MethodImplOptions.AggressiveInlining)] + get + { + // The two lines immediately below provide no bounds checking. + // The Substring method we call will both perform a bounds check + // and check for an improper split across a multi-byte subsequence. + + int startIdx = range.Start.GetOffset(Length); + int endIdx = range.End.GetOffset(Length); + + return Substring(startIdx, endIdx - startIdx); + } + } + + /// + /// Creates a new instance, allowing the provided delegate to populate the + /// instance data of the returned object. + /// + /// Type of the state object provided to . + /// The length, in bytes, of the instance to create. + /// The state object to provide to . + /// The callback which will be invoked to populate the returned . + /// + /// Thrown if populates the buffer with ill-formed UTF-8 data. + /// + /// + /// The runtime will perform UTF-8 validation over the contents provided by the delegate. + /// If an invalid UTF-8 subsequence is detected, an exception is thrown. + /// + public static Utf8String Create(int length, TState state, SpanAction action) + { + if (length < 0) + { + ThrowHelper.ThrowLengthArgumentOutOfRange_ArgumentOutOfRange_NeedNonNegNum(); + } + + if (action is null) + { + ThrowHelper.ThrowArgumentNullException(ExceptionArgument.action); + } + + if (length == 0) + { + return Empty; // special-case empty input + } + + // Create and populate the Utf8String instance. + // Can't use FastAllocateSkipZeroInit here because we're handing the raw buffer to user code. + + Utf8String newString = FastAllocate(length); + action(newString.DangerousGetMutableSpan(), state); + + // Now perform validation. + + if (!Utf8Utility.IsWellFormedUtf8(newString.AsBytes())) + { + throw new ArgumentException( + message: SR.Utf8String_CallbackProvidedMalformedData, + paramName: nameof(action)); + } + + return newString; + } + + /// + /// Creates a new instance, allowing the provided delegate to populate the + /// instance data of the returned object. + /// + /// Type of the state object provided to . + /// The length, in bytes, of the instance to create. + /// The state object to provide to . + /// The callback which will be invoked to populate the returned . + /// + /// The runtime will perform UTF-8 validation over the contents provided by the delegate. + /// If an invalid UTF-8 subsequence is detected, the invalid subsequence is replaced with + /// in the returned instance. This could result in the returned instance + /// having a different byte length than specified by the parameter. + /// + public static Utf8String CreateRelaxed(int length, TState state, SpanAction action) + { + if (length < 0) + { + ThrowHelper.ThrowLengthArgumentOutOfRange_ArgumentOutOfRange_NeedNonNegNum(); + } + + if (action is null) + { + ThrowHelper.ThrowArgumentNullException(ExceptionArgument.action); + } + + if (length == 0) + { + return Empty; // special-case empty input + } + + // Create and populate the Utf8String instance. + // Can't use FastAllocateSkipZeroInit here because we're handing the raw buffer to user code. + + Utf8String newString = FastAllocate(length); + action(newString.DangerousGetMutableSpan(), state); + + // Now perform validation and fixup. + + return Utf8Utility.ValidateAndFixupUtf8String(newString); + } + + /// + /// Creates a new instance, allowing the provided delegate to populate the + /// instance data of the returned object. Please see remarks for important safety information about + /// this method. + /// + /// Type of the state object provided to . + /// The length, in bytes, of the instance to create. + /// The state object to provide to . + /// The callback which will be invoked to populate the returned . + /// + /// This factory method can be used as an optimization to skip the validation step that + /// normally performs. The contract + /// of this method requires that populate the buffer with well-formed UTF-8 + /// data, as contractually guarantees that it contains only well-formed UTF-8 data, + /// and runtime instability could occur if a caller violates this guarantee. + /// + public static Utf8String UnsafeCreateWithoutValidation(int length, TState state, SpanAction action) + { + if (length < 0) + { + ThrowHelper.ThrowLengthArgumentOutOfRange_ArgumentOutOfRange_NeedNonNegNum(); + } + + if (action is null) + { + ThrowHelper.ThrowArgumentNullException(ExceptionArgument.action); + } + + if (length == 0) + { + return Empty; // special-case empty input + } + + // Create and populate the Utf8String instance. + // Can't use FastAllocateSkipZeroInit here because we're handing the raw buffer to user code. + + Utf8String newString = FastAllocate(length); + action(newString.DangerousGetMutableSpan(), state); + + // The line below is removed entirely in release builds. + + Debug.Assert(Utf8Utility.IsWellFormedUtf8(newString.AsBytes()), "Callback populated the buffer with ill-formed UTF-8 data."); + + return newString; + } + } +} diff --git a/src/libraries/System.Utf8String.Experimental/src/Resources/Strings.resx b/src/libraries/System.Utf8String.Experimental/src/Resources/Strings.resx index e6a62fd36ce32..d9472723eea42 100644 --- a/src/libraries/System.Utf8String.Experimental/src/Resources/Strings.resx +++ b/src/libraries/System.Utf8String.Experimental/src/Resources/Strings.resx @@ -132,4 +132,10 @@ Cannot call Utf8Span.Equals(object). Use Equals(Utf8Span) or operator == instead. + + The input buffer contained ill-formed UTF-16 data. + + + The input buffer contained ill-formed UTF-8 data. + \ No newline at end of file diff --git a/src/libraries/System.Utf8String.Experimental/src/System.Utf8String.Experimental.csproj b/src/libraries/System.Utf8String.Experimental/src/System.Utf8String.Experimental.csproj index a1fba3855afcb..0a0c4eae5f9bc 100644 --- a/src/libraries/System.Utf8String.Experimental/src/System.Utf8String.Experimental.csproj +++ b/src/libraries/System.Utf8String.Experimental/src/System.Utf8String.Experimental.csproj @@ -24,6 +24,8 @@ + + Common\System\Marvin.cs @@ -63,27 +65,30 @@ System\Text\Unicode\Utf8Utility.Validation.cs - - System\Text\Unicode\Utf8Utility.WhiteSpace.cs + + System\Text\Unicode\Utf8Utility.WhiteSpace.netstandard.cs + + + System\Utf8StringSplitOptions.cs System\Text\Utf8Span.cs - System\Utf8String.Construction.cs - + - System\Text\Utf8Span.Enumeration.cs - + + + System\Text\Utf8Span.Searching.cs + --> diff --git a/src/libraries/System.Utf8String.Experimental/src/System/IO/Utf8StringStream.cs b/src/libraries/System.Utf8String.Experimental/src/System/IO/Utf8StringStream.cs index 39bf7ebf469b3..1fd9ef1c1197c 100644 --- a/src/libraries/System.Utf8String.Experimental/src/System/IO/Utf8StringStream.cs +++ b/src/libraries/System.Utf8String.Experimental/src/System/IO/Utf8StringStream.cs @@ -57,7 +57,11 @@ public override int Read(byte[] buffer, int offset, int count) return Read(new Span(buffer, offset, count)); } - public override int Read(Span buffer) + public +#if NETCOREAPP + override +#endif + int Read(Span buffer) { ReadOnlySpan contentToWrite = _content.AsBytes(_position); if (buffer.Length < contentToWrite.Length) @@ -76,10 +80,12 @@ public override Task ReadAsync(byte[] buffer, int offset, int count, Cancel return Task.FromResult(Read(new Span(buffer, offset, count))); } +#if NETCOREAPP public override ValueTask ReadAsync(Memory buffer, CancellationToken cancellationToken = default) { return new ValueTask(Read(buffer.Span)); } +#endif public override int ReadByte() { @@ -122,12 +128,15 @@ public override long Seek(long offset, SeekOrigin origin) public override void Write(byte[] buffer, int offset, int count) => throw new NotSupportedException(); +#if NETCOREAPP public override void Write(ReadOnlySpan buffer) => throw new NotSupportedException(); +#endif public override Task WriteAsync(byte[] buffer, int offset, int count, CancellationToken cancellationToken) => throw new NotSupportedException(); +#if NETCOREAPP public override ValueTask WriteAsync(ReadOnlyMemory buffer, CancellationToken cancellationToken = default) => throw new NotSupportedException(); - +#endif public override void WriteByte(byte value) => throw new NotSupportedException(); } } diff --git a/src/libraries/System.Utf8String.Experimental/src/System/Net/Http/Utf8StringContent.cs b/src/libraries/System.Utf8String.Experimental/src/System/Net/Http/Utf8StringContent.cs index 5f7ab00395b15..cf4a9f39959cd 100644 --- a/src/libraries/System.Utf8String.Experimental/src/System/Net/Http/Utf8StringContent.cs +++ b/src/libraries/System.Utf8String.Experimental/src/System/Net/Http/Utf8StringContent.cs @@ -2,8 +2,11 @@ // The .NET Foundation licenses this file to you under the MIT license. // See the LICENSE file in the project root for more information. +#nullable enable +using System.Buffers; using System.IO; using System.Net.Http.Headers; +using System.Runtime.InteropServices; using System.Threading; using System.Threading.Tasks; @@ -40,11 +43,35 @@ public Utf8StringContent(Utf8String content, string? mediaType) protected override Task CreateContentReadStreamAsync() => Task.FromResult(new Utf8StringStream(_content)); +#if NETCOREAPP protected override Task SerializeToStreamAsync(Stream stream, TransportContext? context) => SerializeToStreamAsync(stream, context, default); protected override Task SerializeToStreamAsync(Stream stream, TransportContext? context, CancellationToken cancellationToken) => stream.WriteAsync(_content.AsMemoryBytes(), cancellationToken).AsTask(); +#else + protected async override Task SerializeToStreamAsync(Stream stream, TransportContext? context) + { + ReadOnlyMemory buffer = _content.AsMemoryBytes(); + if (MemoryMarshal.TryGetArray(buffer, out ArraySegment array)) + { + await stream.WriteAsync(array.Array, array.Offset, array.Count).ConfigureAwait(false); + } + else + { + byte[] localBuffer = ArrayPool.Shared.Rent(buffer.Length); + try + { + buffer.Span.CopyTo(localBuffer); + await stream.WriteAsync(localBuffer, 0, buffer.Length).ConfigureAwait(false); + } + finally + { + ArrayPool.Shared.Return(localBuffer); + } + } + } +#endif protected override bool TryComputeLength(out long length) { diff --git a/src/libraries/System.Utf8String.Experimental/src/System/Runtime/Intrinsics/Intrinsics.Shims.cs b/src/libraries/System.Utf8String.Experimental/src/System/Runtime/Intrinsics/Intrinsics.Shims.cs index 0fe792f8757d4..34d8051bb85b6 100644 --- a/src/libraries/System.Utf8String.Experimental/src/System/Runtime/Intrinsics/Intrinsics.Shims.cs +++ b/src/libraries/System.Utf8String.Experimental/src/System/Runtime/Intrinsics/Intrinsics.Shims.cs @@ -40,7 +40,6 @@ public abstract class X64 public static bool IsSupported { get; } = false; public static ulong LeadingZeroCount(ulong value) => throw new PlatformNotSupportedException(); } - public static bool IsSupported { get; } = false; public static uint LeadingZeroCount(uint value) => throw new PlatformNotSupportedException(); } diff --git a/src/libraries/System.Utf8String.Experimental/src/System/Text/ThrowHelper.cs b/src/libraries/System.Utf8String.Experimental/src/System/Text/ThrowHelper.cs index db6276c21d66b..8d5a72fc3fc3f 100644 --- a/src/libraries/System.Utf8String.Experimental/src/System/Text/ThrowHelper.cs +++ b/src/libraries/System.Utf8String.Experimental/src/System/Text/ThrowHelper.cs @@ -56,5 +56,6 @@ internal enum ExceptionArgument index, input, value, + start, } } diff --git a/src/libraries/System.Utf8String.Experimental/src/System/Utf8Extensions.Portable.cs b/src/libraries/System.Utf8String.Experimental/src/System/Utf8Extensions.Portable.cs new file mode 100644 index 0000000000000..bead1fa36aef8 --- /dev/null +++ b/src/libraries/System.Utf8String.Experimental/src/System/Utf8Extensions.Portable.cs @@ -0,0 +1,22 @@ +// Licensed to the .NET Foundation under one or more agreements. +// The .NET Foundation licenses this file to you under the MIT license. +// See the LICENSE file in the project root for more information. + +using System.Runtime.CompilerServices; + +namespace System +{ + public static partial class Utf8Extensions + { + [MethodImpl(MethodImplOptions.AggressiveInlining)] + private static ReadOnlySpan CreateSpan(Utf8String text) => text.DangerousGetMutableSpan(); + + [MethodImpl(MethodImplOptions.AggressiveInlining)] + private static ReadOnlySpan CreateSpan(Utf8String text, int start) => + text.DangerousGetMutableSpan().Slice(start); + + [MethodImpl(MethodImplOptions.AggressiveInlining)] + private static ReadOnlySpan CreateSpan(Utf8String text, int start, int length) => + text.DangerousGetMutableSpan().Slice(start, length); + } +} diff --git a/src/libraries/System.Utf8String.Experimental/src/System/Utf8String.Portable.cs b/src/libraries/System.Utf8String.Experimental/src/System/Utf8String.Portable.cs new file mode 100644 index 0000000000000..c4f18866b34cc --- /dev/null +++ b/src/libraries/System.Utf8String.Experimental/src/System/Utf8String.Portable.cs @@ -0,0 +1,109 @@ +// Licensed to the .NET Foundation under one or more agreements. +// The .NET Foundation licenses this file to you under the MIT license. +// See the LICENSE file in the project root for more information. + +using System.ComponentModel; +using System.Runtime.CompilerServices; +using System.Runtime.InteropServices; + +namespace System +{ + public sealed partial class Utf8String + { + private static ReadOnlySpan s_EmptyRef => new byte[] { 0x00 }; + private readonly byte[] _bytes; + + /// + /// Returns the length (in UTF-8 code units, or s) of this instance. + /// + public int Length => _bytes.Length - 1; // -1 because the bytes are always null-terminated + + public Utf8String(ReadOnlySpan value) + { + _bytes = Array.Empty(); //TODO: eerhardt //TODO: eerhardt + } + + public Utf8String(byte[] value, int startIndex, int length) + { + _bytes = Array.Empty(); //TODO: eerhardt + } + + [CLSCompliant(false)] + public unsafe Utf8String(byte* value) + { + _bytes = Array.Empty(); //TODO: eerhardt + } + + public Utf8String(ReadOnlySpan value) + { + _bytes = Array.Empty(); //TODO: eerhardt + } + + public Utf8String(char[] value, int startIndex, int length) + { + _bytes = Array.Empty(); //TODO: eerhardt + } + + [CLSCompliant(false)] + public unsafe Utf8String(char* value) + { + _bytes = Array.Empty(); //TODO: eerhardt + } + + public Utf8String(string value) + { + _bytes = Array.Empty(); //TODO: eerhardt + } + + private Utf8String(byte[] bytes) + { + _bytes = bytes; + } + + /// + /// Returns a mutable reference to the first byte of this + /// (or the null terminator if the string is empty). + /// + /// + [MethodImpl(MethodImplOptions.AggressiveInlining)] + internal ref byte DangerousGetMutableReference() => + ref MemoryMarshal.GetReference(_bytes.Length > 0 ? _bytes.AsSpan() : s_EmptyRef); + + /// + /// Returns a mutable that can be used to populate this + /// instance. Only to be used during construction. + /// + [MethodImpl(MethodImplOptions.AggressiveInlining)] + internal Span DangerousGetMutableSpan() => _bytes; + + /// + /// Gets an immutable reference that can be used in a statement. The resulting + /// reference can be pinned and used as a null-terminated LPCUTF8STR. + /// + /// + /// If this instance is empty, returns a reference to the null terminator. + /// + [EditorBrowsable(EditorBrowsableState.Never)] // for compiler use only + public ref readonly byte GetPinnableReference() => ref _bytes.AsSpan().GetPinnableReference(); + + /// + /// Similar to , but skips the null check on the input. + /// Throws a if the input is null. + /// + [MethodImpl(MethodImplOptions.AggressiveInlining)] + internal ReadOnlySpan AsBytesSkipNullCheck() => _bytes; + + /// + /// Creates a new zero-initialized instance of the specified length. Actual storage allocated is "length + 1" bytes + /// because instances are null-terminated. + /// + /// + /// The implementation of this method checks its input argument for overflow. + /// + private static Utf8String FastAllocate(int length) + { + // just simulate a "fast allocate", since this is portable + return new Utf8String(new byte[length + 1]); + } + } +} From ee192250d875118535bdc959165f78c10ba90ce0 Mon Sep 17 00:00:00 2001 From: Eric Erhardt Date: Tue, 3 Mar 2020 12:11:29 -0600 Subject: [PATCH 06/26] Get Utf8StringContent building on netstandard2.0 again. --- .../src/System/Utf8Extensions.CoreCLR.cs | 4 ++++ .../src/System/Utf8Extensions.cs | 12 +++++++----- .../src/System.Utf8String.Experimental.csproj | 3 +-- .../src/System/Utf8Extensions.Portable.cs | 4 ++++ .../src/System/Utf8String.Portable.cs | 4 ++++ 5 files changed, 20 insertions(+), 7 deletions(-) diff --git a/src/coreclr/src/System.Private.CoreLib/src/System/Utf8Extensions.CoreCLR.cs b/src/coreclr/src/System.Private.CoreLib/src/System/Utf8Extensions.CoreCLR.cs index ef0fbe9334398..6e9199abb8466 100644 --- a/src/coreclr/src/System.Private.CoreLib/src/System/Utf8Extensions.CoreCLR.cs +++ b/src/coreclr/src/System.Private.CoreLib/src/System/Utf8Extensions.CoreCLR.cs @@ -19,5 +19,9 @@ private static ReadOnlySpan CreateSpan(Utf8String text, int start) => [MethodImpl(MethodImplOptions.AggressiveInlining)] private static ReadOnlySpan CreateSpan(Utf8String text, int start, int length) => new ReadOnlySpan(ref text.DangerousGetMutableReference(start), length); + + [MethodImpl(MethodImplOptions.AggressiveInlining)] + private static ReadOnlyMemory CreateMemoryBytes(Utf8String text, int start, int length) => + new ReadOnlyMemory(text, start, length); } } diff --git a/src/libraries/System.Private.CoreLib/src/System/Utf8Extensions.cs b/src/libraries/System.Private.CoreLib/src/System/Utf8Extensions.cs index 86b310410f60f..14cddf23ff1d1 100644 --- a/src/libraries/System.Private.CoreLib/src/System/Utf8Extensions.cs +++ b/src/libraries/System.Private.CoreLib/src/System/Utf8Extensions.cs @@ -287,6 +287,7 @@ public static ReadOnlyMemory AsMemory(this Utf8String? text, Range range) (int start, int length) = range.GetOffsetAndLength(text.Length); return new ReadOnlyMemory(text, start, length); } +*/ /// Creates a new over the portion of the target . /// The target . @@ -296,7 +297,7 @@ public static ReadOnlyMemory AsMemoryBytes(this Utf8String? text) if (text is null) return default; - return new ReadOnlyMemory(text, 0, text.Length); + return CreateMemoryBytes(text, 0, text.Length); } /// Creates a new over the portion of the target . @@ -318,9 +319,9 @@ public static ReadOnlyMemory AsMemoryBytes(this Utf8String? text, int star if ((uint)start > (uint)text.Length) ThrowHelper.ThrowArgumentOutOfRangeException(ExceptionArgument.start); - return new ReadOnlyMemory(text, start, text.Length - start); + return CreateMemoryBytes(text, start, text.Length - start); } - +/* // TODO eerhardt /// Creates a new over the portion of the target . /// The target . /// The index at which to begin this slice. @@ -340,6 +341,7 @@ public static ReadOnlyMemory AsMemoryBytes(this Utf8String? text, Index st return new ReadOnlyMemory(text, actualIndex, text.Length - actualIndex); } +*/ /// Creates a new over the portion of the target . /// The target . @@ -367,9 +369,9 @@ public static ReadOnlyMemory AsMemoryBytes(this Utf8String? text, int star ThrowHelper.ThrowArgumentOutOfRangeException(ExceptionArgument.start); #endif - return new ReadOnlyMemory(text, start, length); + return CreateMemoryBytes(text, start, length); } - +/* // TODO eerhardt /// Creates a new over the portion of the target . /// The target . /// The range used to indicate the start and length of the sliced string. diff --git a/src/libraries/System.Utf8String.Experimental/src/System.Utf8String.Experimental.csproj b/src/libraries/System.Utf8String.Experimental/src/System.Utf8String.Experimental.csproj index 0a0c4eae5f9bc..a6d118651478f 100644 --- a/src/libraries/System.Utf8String.Experimental/src/System.Utf8String.Experimental.csproj +++ b/src/libraries/System.Utf8String.Experimental/src/System.Utf8String.Experimental.csproj @@ -15,8 +15,7 @@ false >>>>>>> Get Rune working on netstandard2.0 - - + diff --git a/src/libraries/System.Utf8String.Experimental/src/System/Utf8Extensions.Portable.cs b/src/libraries/System.Utf8String.Experimental/src/System/Utf8Extensions.Portable.cs index bead1fa36aef8..fca325f2ff4d3 100644 --- a/src/libraries/System.Utf8String.Experimental/src/System/Utf8Extensions.Portable.cs +++ b/src/libraries/System.Utf8String.Experimental/src/System/Utf8Extensions.Portable.cs @@ -18,5 +18,9 @@ private static ReadOnlySpan CreateSpan(Utf8String text, int start) => [MethodImpl(MethodImplOptions.AggressiveInlining)] private static ReadOnlySpan CreateSpan(Utf8String text, int start, int length) => text.DangerousGetMutableSpan().Slice(start, length); + + [MethodImpl(MethodImplOptions.AggressiveInlining)] + private static ReadOnlyMemory CreateMemoryBytes(Utf8String text, int start, int length) => + text.CreateMemoryBytes(start, length); } } diff --git a/src/libraries/System.Utf8String.Experimental/src/System/Utf8String.Portable.cs b/src/libraries/System.Utf8String.Experimental/src/System/Utf8String.Portable.cs index c4f18866b34cc..fef907166c626 100644 --- a/src/libraries/System.Utf8String.Experimental/src/System/Utf8String.Portable.cs +++ b/src/libraries/System.Utf8String.Experimental/src/System/Utf8String.Portable.cs @@ -105,5 +105,9 @@ private static Utf8String FastAllocate(int length) // just simulate a "fast allocate", since this is portable return new Utf8String(new byte[length + 1]); } + + [MethodImpl(MethodImplOptions.AggressiveInlining)] + internal ReadOnlyMemory CreateMemoryBytes(int start, int length) => + _bytes.AsMemory(start, length); } } From 8193573dfa6947d2a7d2f8133a128ca2f5538e04 Mon Sep 17 00:00:00 2001 From: Eric Erhardt Date: Tue, 3 Mar 2020 12:50:13 -0600 Subject: [PATCH 07/26] Compile Utf8String/Span.Conversion.cs for netstandard. --- .../src/System/Text/Unicode/Utf8.cs | 11 ++++++- .../Text/Unicode/Utf8Utility.Transcoding.cs | 4 +++ .../src/System/Text/Utf8Span.Conversion.cs | 32 +++++++++++++++---- .../src/System/Utf8String.cs | 9 +++--- .../src/System.Utf8String.Experimental.csproj | 14 +++++--- .../Runtime/Intrinsics/Intrinsics.Shims.cs | 6 ++++ .../src/System/Utf8String.Portable.cs | 2 +- 7 files changed, 61 insertions(+), 17 deletions(-) diff --git a/src/libraries/System.Private.CoreLib/src/System/Text/Unicode/Utf8.cs b/src/libraries/System.Private.CoreLib/src/System/Text/Unicode/Utf8.cs index 1904df7dbf5d5..7992cd52a0f79 100644 --- a/src/libraries/System.Private.CoreLib/src/System/Text/Unicode/Utf8.cs +++ b/src/libraries/System.Private.CoreLib/src/System/Text/Unicode/Utf8.cs @@ -4,12 +4,21 @@ using System.Buffers; using System.Diagnostics; +using System.Runtime.CompilerServices; using System.Runtime.InteropServices; + +#if SYSTEM_PRIVATE_CORELIB using Internal.Runtime.CompilerServices; +#endif namespace System.Text.Unicode { - public static class Utf8 +#if SYSTEM_PRIVATE_CORELIB + public +#else + internal +#endif + static class Utf8 { /* * OperationStatus-based APIs for transcoding of chunked data. diff --git a/src/libraries/System.Private.CoreLib/src/System/Text/Unicode/Utf8Utility.Transcoding.cs b/src/libraries/System.Private.CoreLib/src/System/Text/Unicode/Utf8Utility.Transcoding.cs index b9d7fd305b609..bf6f31f1b4f72 100644 --- a/src/libraries/System.Private.CoreLib/src/System/Text/Unicode/Utf8Utility.Transcoding.cs +++ b/src/libraries/System.Private.CoreLib/src/System/Text/Unicode/Utf8Utility.Transcoding.cs @@ -6,9 +6,13 @@ using System.Buffers.Binary; using System.Diagnostics; using System.Numerics; +using System.Runtime.CompilerServices; using System.Runtime.Intrinsics; using System.Runtime.Intrinsics.X86; + +#if SYSTEM_PRIVATE_CORELIB using Internal.Runtime.CompilerServices; +#endif #pragma warning disable SA1121 // explicitly using type aliases instead of built-in types #if TARGET_64BIT diff --git a/src/libraries/System.Private.CoreLib/src/System/Text/Utf8Span.Conversion.cs b/src/libraries/System.Private.CoreLib/src/System/Text/Utf8Span.Conversion.cs index 48754b0097b3c..ad6586b61714c 100644 --- a/src/libraries/System.Private.CoreLib/src/System/Text/Utf8Span.Conversion.cs +++ b/src/libraries/System.Private.CoreLib/src/System/Text/Utf8Span.Conversion.cs @@ -43,7 +43,11 @@ public int Normalize(Span destination, NormalizationForm normalizationForm { // TODO_UTF8STRING: Reduce allocations in this code path. - ReadOnlySpan normalized = this.ToString().Normalize(normalizationForm); + ReadOnlySpan normalized = this.ToString().Normalize(normalizationForm) +#if NETSTANDARD + .AsSpan() +#endif + ; OperationStatus status = Utf8.FromUtf16(normalized, destination, out int _, out int bytesWritten, replaceInvalidSequences: false, isFinalBlock: true); Debug.Assert(status == OperationStatus.Done || status == OperationStatus.DestinationTooSmall, "Normalize shouldn't have produced malformed Unicode string."); @@ -81,7 +85,7 @@ public unsafe char[] ToCharArray() Debug.Assert(pbUtf8Invalid == pbUtf8 + this.Length, "Invalid UTF-8 data seen in buffer."); char[] asUtf16 = new char[this.Length + utf16CodeUnitCountAdjustment]; - fixed (char* pbUtf16 = &MemoryMarshal.GetArrayDataReference(asUtf16)) + fixed (char* pbUtf16 = asUtf16) { OperationStatus status = Utf8Utility.TranscodeToUtf16(pbUtf8, this.Length, pbUtf16, asUtf16.Length, out byte* pbUtf8End, out char* pchUtf16End); Debug.Assert(status == OperationStatus.Done, "The buffer changed out from under us unexpectedly?"); @@ -158,7 +162,11 @@ public int ToLower(Span destination, CultureInfo culture) ThrowHelper.ThrowArgumentNullException(ExceptionArgument.culture); } - ReadOnlySpan asLower = this.ToString().ToLower(culture); + ReadOnlySpan asLower = this.ToString().ToLower(culture) +#if NETSTANDARD + .AsSpan() +#endif + ; OperationStatus status = Utf8.FromUtf16(asLower, destination, out int _, out int bytesWritten, replaceInvalidSequences: false, isFinalBlock: true); Debug.Assert(status == OperationStatus.Done || status == OperationStatus.DestinationTooSmall, "ToLower shouldn't have produced malformed Unicode string."); @@ -206,7 +214,11 @@ public int ToLowerInvariant(Span destination) { // TODO_UTF8STRING: Avoid intermediate allocations. - ReadOnlySpan asLowerInvariant = this.ToString().ToLowerInvariant(); + ReadOnlySpan asLowerInvariant = this.ToString().ToLowerInvariant() +#if NETSTANDARD + .AsSpan() +#endif + ; OperationStatus status = Utf8.FromUtf16(asLowerInvariant, destination, out int _, out int bytesWritten, replaceInvalidSequences: false, isFinalBlock: true); Debug.Assert(status == OperationStatus.Done || status == OperationStatus.DestinationTooSmall, "ToLowerInvariant shouldn't have produced malformed Unicode string."); @@ -262,7 +274,11 @@ public int ToUpper(Span destination, CultureInfo culture) ThrowHelper.ThrowArgumentNullException(ExceptionArgument.culture); } - ReadOnlySpan asUpper = this.ToString().ToUpper(culture); + ReadOnlySpan asUpper = this.ToString().ToUpper(culture) +#if NETSTANDARD + .AsSpan() +#endif + ; OperationStatus status = Utf8.FromUtf16(asUpper, destination, out int _, out int bytesWritten, replaceInvalidSequences: false, isFinalBlock: true); Debug.Assert(status == OperationStatus.Done || status == OperationStatus.DestinationTooSmall, "ToUpper shouldn't have produced malformed Unicode string."); @@ -310,7 +326,11 @@ public int ToUpperInvariant(Span destination) { // TODO_UTF8STRING: Avoid intermediate allocations. - ReadOnlySpan asUpperInvariant = this.ToString().ToUpperInvariant(); + ReadOnlySpan asUpperInvariant = this.ToString().ToUpperInvariant() +#if NETSTANDARD + .AsSpan() +#endif + ; OperationStatus status = Utf8.FromUtf16(asUpperInvariant, destination, out int _, out int bytesWritten, replaceInvalidSequences: false, isFinalBlock: true); Debug.Assert(status == OperationStatus.Done || status == OperationStatus.DestinationTooSmall, "ToUpperInvariant shouldn't have produced malformed Unicode string."); diff --git a/src/libraries/System.Private.CoreLib/src/System/Utf8String.cs b/src/libraries/System.Private.CoreLib/src/System/Utf8String.cs index bdd7dedf91587..141eddd4e861c 100644 --- a/src/libraries/System.Private.CoreLib/src/System/Utf8String.cs +++ b/src/libraries/System.Private.CoreLib/src/System/Utf8String.cs @@ -260,11 +260,10 @@ public static bool IsNullOrWhiteSpace([NotNullWhen(false)] Utf8String? value) return (value is null) || value.AsSpan().IsEmptyOrWhiteSpace(); } - // TODO: eerhardt - need Utf8Span.Conversion.cs - ///// - ///// Returns the entire as an array of UTF-8 bytes.GetPinnableReference - ///// - //public byte[] ToByteArray() => this.AsSpanSkipNullCheck().ToByteArray(); + /// + /// Returns the entire as an array of UTF-8 bytes.GetPinnableReference + /// + public byte[] ToByteArray() => this.AsSpanSkipNullCheck().ToByteArray(); /// /// Converts this instance to a . diff --git a/src/libraries/System.Utf8String.Experimental/src/System.Utf8String.Experimental.csproj b/src/libraries/System.Utf8String.Experimental/src/System.Utf8String.Experimental.csproj index a6d118651478f..1bc68a762e3b8 100644 --- a/src/libraries/System.Utf8String.Experimental/src/System.Utf8String.Experimental.csproj +++ b/src/libraries/System.Utf8String.Experimental/src/System.Utf8String.Experimental.csproj @@ -55,6 +55,9 @@ System\Text\ASCIIUtility.Helpers.cs + + System\Text\Unicode\Utf8.cs + System\Text\Unicode\Utf8Utility.cs @@ -64,6 +67,9 @@ System\Text\Unicode\Utf8Utility.Validation.cs + + System\Text\Unicode\Utf8Utility.Transcoding.cs + System\Text\Unicode\Utf8Utility.WhiteSpace.netstandard.cs @@ -87,10 +93,10 @@ System\Utf8String.Construction.cs - + System\Text\Utf8Span.Enumeration.cs diff --git a/src/libraries/System.Utf8String.Experimental/src/System/Runtime/Intrinsics/Intrinsics.Shims.cs b/src/libraries/System.Utf8String.Experimental/src/System/Runtime/Intrinsics/Intrinsics.Shims.cs index 34d8051bb85b6..2ff672158474d 100644 --- a/src/libraries/System.Utf8String.Experimental/src/System/Runtime/Intrinsics/Intrinsics.Shims.cs +++ b/src/libraries/System.Utf8String.Experimental/src/System/Runtime/Intrinsics/Intrinsics.Shims.cs @@ -8,11 +8,13 @@ internal static class Vector128 { public static Vector128 Create(short value) => throw new PlatformNotSupportedException(); public static Vector128 Create(ushort value) => throw new PlatformNotSupportedException(); + public static Vector128 CreateScalarUnsafe(ulong value) => throw new PlatformNotSupportedException(); public static Vector128 AsByte(this Vector128 vector) where T : struct => throw new PlatformNotSupportedException(); public static Vector128 AsInt16(this Vector128 vector) where T : struct => throw new PlatformNotSupportedException(); public static Vector128 AsUInt16(this Vector128 vector) where T : struct => throw new PlatformNotSupportedException(); public static Vector128 AsUInt32(this Vector128 vector) where T : struct => throw new PlatformNotSupportedException(); public static Vector128 AsUInt64(this Vector128 vector) where T : struct => throw new PlatformNotSupportedException(); + public static T GetElement(this Vector128 vector, int index) where T : struct => throw new PlatformNotSupportedException(); } internal readonly struct Vector128 where T : struct @@ -84,6 +86,10 @@ public abstract class X64 internal abstract class Sse41 { + public abstract class X64 + { + public static bool IsSupported { get; } = false; + } public static bool IsSupported { get; } = false; public static bool TestZ(Vector128 left, Vector128 right) => throw new PlatformNotSupportedException(); public static bool TestZ(Vector128 left, Vector128 right) => throw new PlatformNotSupportedException(); diff --git a/src/libraries/System.Utf8String.Experimental/src/System/Utf8String.Portable.cs b/src/libraries/System.Utf8String.Experimental/src/System/Utf8String.Portable.cs index fef907166c626..0b1c1cfaf4c71 100644 --- a/src/libraries/System.Utf8String.Experimental/src/System/Utf8String.Portable.cs +++ b/src/libraries/System.Utf8String.Experimental/src/System/Utf8String.Portable.cs @@ -20,7 +20,7 @@ public sealed partial class Utf8String public Utf8String(ReadOnlySpan value) { - _bytes = Array.Empty(); //TODO: eerhardt //TODO: eerhardt + _bytes = Array.Empty(); //TODO: eerhardt } public Utf8String(byte[] value, int startIndex, int length) From 4e2615ab1268fa570d17a1e61a3fadfee6de2d83 Mon Sep 17 00:00:00 2001 From: Eric Erhardt Date: Tue, 3 Mar 2020 15:11:23 -0600 Subject: [PATCH 08/26] Build Utf8String Comparison and Enumeration for netstandard. --- .../Text/Unicode/Utf16Utility.Validation.cs | 4 ++++ .../src/System/Text/Utf8Span.Comparison.cs | 2 +- .../src/System/Utf8String.Comparison.cs | 19 ++++++++++++++++++- .../src/System/Utf8String.Construction.cs | 10 +++------- .../src/System.Utf8String.Experimental.csproj | 14 ++++++++++---- .../Runtime/Intrinsics/Intrinsics.Shims.cs | 8 ++++++++ 6 files changed, 44 insertions(+), 13 deletions(-) diff --git a/src/libraries/System.Private.CoreLib/src/System/Text/Unicode/Utf16Utility.Validation.cs b/src/libraries/System.Private.CoreLib/src/System/Text/Unicode/Utf16Utility.Validation.cs index be733006d599a..caffdbf92e003 100644 --- a/src/libraries/System.Private.CoreLib/src/System/Text/Unicode/Utf16Utility.Validation.cs +++ b/src/libraries/System.Private.CoreLib/src/System/Text/Unicode/Utf16Utility.Validation.cs @@ -5,8 +5,12 @@ using System.Diagnostics; using System.Runtime.Intrinsics; using System.Runtime.Intrinsics.X86; +using System.Runtime.CompilerServices; using System.Numerics; + +#if SYSTEM_PRIVATE_CORELIB using Internal.Runtime.CompilerServices; +#endif #pragma warning disable SA1121 // explicitly using type aliases instead of built-in types #if TARGET_64BIT diff --git a/src/libraries/System.Private.CoreLib/src/System/Text/Utf8Span.Comparison.cs b/src/libraries/System.Private.CoreLib/src/System/Text/Utf8Span.Comparison.cs index 1145d21785ba0..a44f14402857f 100644 --- a/src/libraries/System.Private.CoreLib/src/System/Text/Utf8Span.Comparison.cs +++ b/src/libraries/System.Private.CoreLib/src/System/Text/Utf8Span.Comparison.cs @@ -95,7 +95,7 @@ public bool Contains(Utf8Span value, StringComparison comparison) #if SYSTEM_PRIVATE_CORELIB || NETCOREAPP return this.ToString().Contains(value.ToString(), comparison); #else - return this.ToString().IndexOf(value.ToString(), comparison) != -1; + return this.ToString().IndexOf(value.ToString(), comparison) >= 0; #endif } diff --git a/src/libraries/System.Private.CoreLib/src/System/Utf8String.Comparison.cs b/src/libraries/System.Private.CoreLib/src/System/Utf8String.Comparison.cs index 2d5007e76293b..0417789ae498c 100644 --- a/src/libraries/System.Private.CoreLib/src/System/Utf8String.Comparison.cs +++ b/src/libraries/System.Private.CoreLib/src/System/Utf8String.Comparison.cs @@ -58,7 +58,11 @@ public static bool AreEquivalent(Utf8String? utf8Text, string? utf16Text) return false; } - return AreEquivalentOrdinalSkipShortCircuitingChecks(utf8Text.AsBytes(), utf16Text); + return AreEquivalentOrdinalSkipShortCircuitingChecks(utf8Text.AsBytes(), utf16Text +#if NETSTANDARD + .AsSpan() +#endif + ); } /// @@ -172,9 +176,14 @@ public bool Contains(Rune value) Span runeBytes = stackalloc byte[Utf8Utility.MaxBytesPerScalar]; int runeBytesWritten = value.EncodeToUtf8(runeBytes); +#if SYSTEM_PRIVATE_CORELIB return SpanHelpers.IndexOf( ref DangerousGetMutableReference(), Length, ref MemoryMarshal.GetReference(runeBytes), runeBytesWritten) >= 0; +#else + return DangerousGetMutableSpan() + .IndexOf(runeBytes.Slice(0, runeBytesWritten)) >= 0; +#endif } /// @@ -185,7 +194,11 @@ public bool Contains(Rune value, StringComparison comparison) { // TODO_UTF8STRING: Optimize me to avoid allocations. +#if SYSTEM_PRIVATE_CORELIB || NETCOREAPP return ToString().Contains(value.ToString(), comparison); +#else + return ToString().IndexOf(value.ToString(), comparison) >= 0; +#endif } /// @@ -215,7 +228,11 @@ public bool Contains(Utf8String value, StringComparison comparison) // TODO_UTF8STRING: Optimize me to avoid allocations. +#if SYSTEM_PRIVATE_CORELIB || NETCOREAPP return ToString().Contains(value.ToString(), comparison); +#else + return ToString().IndexOf(value.ToString(), comparison) >= 0; +#endif } /// diff --git a/src/libraries/System.Private.CoreLib/src/System/Utf8String.Construction.cs b/src/libraries/System.Private.CoreLib/src/System/Utf8String.Construction.cs index 71595b40daf32..83bbaca71f847 100644 --- a/src/libraries/System.Private.CoreLib/src/System/Utf8String.Construction.cs +++ b/src/libraries/System.Private.CoreLib/src/System/Utf8String.Construction.cs @@ -13,15 +13,17 @@ namespace System { public sealed partial class Utf8String { -#if CORECLR // TODO: eerhardt private const int MAX_STACK_TRANSCODE_CHAR_COUNT = 128; // For values beyond U+FFFF, it's 4 UTF-8 bytes per 2 UTF-16 chars (2:1 ratio) private const int MAX_UTF8_BYTES_PER_UTF16_CHAR = 3; <<<<<<< HEAD +<<<<<<< HEAD ======= #endif >>>>>>> Get Utf8String building on netstandard2.0 +======= +>>>>>>> Build Utf8String Comparison and Enumeration for netstandard. /* * STATIC FACTORIES @@ -74,7 +76,6 @@ public static bool TryCreateFrom(ReadOnlySpan buffer, [NotNullWhen(true)] } } -#if CORECLR // TODO: eerhardt /// /// Creates a instance from existing UTF-16 data, transcoding the /// existing data to UTF-8 upon creation. @@ -100,7 +101,6 @@ public static bool TryCreateFrom(ReadOnlySpan buffer, [NotNullWhen(true)] value = CreateFromUtf16Common(buffer, replaceInvalidSequences: false); return !(value is null); } -#endif /// /// Creates a instance from existing UTF-8 data. @@ -133,7 +133,6 @@ public static Utf8String CreateFromRelaxed(ReadOnlySpan buffer) return Utf8Utility.ValidateAndFixupUtf8String(newString); } -#if CORECLR // TODO: eerhardt /// /// Creates a instance from existing UTF-16 data. /// @@ -161,7 +160,6 @@ public static Utf8String CreateFromRelaxed(ReadOnlySpan buffer) return newString; } -#endif internal static Utf8String CreateFromRune(Rune value) { @@ -186,7 +184,6 @@ internal static Utf8String CreateFromRune(Rune value) } } -#if CORECLR // TODO: eerhardt // Returns 'null' if the input buffer does not represent well-formed UTF-16 data and 'replaceInvalidSequences' is false. private static Utf8String? CreateFromUtf16Common(ReadOnlySpan value, bool replaceInvalidSequences) { @@ -273,7 +270,6 @@ internal static Utf8String CreateFromRune(Rune value) return newString; } -#endif /// /// Creates a new instance populated with a copy of the provided contents. diff --git a/src/libraries/System.Utf8String.Experimental/src/System.Utf8String.Experimental.csproj b/src/libraries/System.Utf8String.Experimental/src/System.Utf8String.Experimental.csproj index 1bc68a762e3b8..316978e5d877f 100644 --- a/src/libraries/System.Utf8String.Experimental/src/System.Utf8String.Experimental.csproj +++ b/src/libraries/System.Utf8String.Experimental/src/System.Utf8String.Experimental.csproj @@ -28,6 +28,9 @@ Common\System\Marvin.cs + + Common\System\NotImplemented.cs + System\Char8.cs @@ -40,6 +43,9 @@ System\Text\Unicode\Utf16Utility.cs + + System\Text\Unicode\Utf16Utility.Validation.cs + System\Text\Rune.cs @@ -87,19 +93,19 @@ System\Utf8String.cs - + System\Utf8String.Construction.cs System\Utf8String.Conversion.cs - - - + + + System\Text\Utf8Span.cs + System\Text\Utf8Span.Comparison.cs @@ -131,16 +130,12 @@ System\Text\Utf8Span.Enumeration.cs - - - - - + + System\Text\Utf8Span.Searching.cs + diff --git a/src/libraries/System.Utf8String.Experimental/src/System/ThrowHelper.cs b/src/libraries/System.Utf8String.Experimental/src/System/ThrowHelper.cs index 94cd1c281015c..5d4f6b128e3ea 100644 --- a/src/libraries/System.Utf8String.Experimental/src/System/ThrowHelper.cs +++ b/src/libraries/System.Utf8String.Experimental/src/System/ThrowHelper.cs @@ -9,10 +9,19 @@ namespace System { internal static class ThrowHelper { + [DoesNotReturn] + internal static void ThrowArgumentException(string resource, ExceptionArgument argument) + { + throw new ArgumentException(resource, argument.ToString()); + } + internal static void ThrowArgumentNullException(ExceptionArgument argument) { throw CreateArgumentNullException(argument); } [MethodImpl(MethodImplOptions.NoInlining)] private static Exception CreateArgumentNullException(ExceptionArgument argument) { return new ArgumentNullException(argument.ToString()); } + [DoesNotReturn] + internal static void ThrowArgumentOutOfRangeException() { throw new ArgumentOutOfRangeException(); } + internal static void ThrowArgumentOutOfRangeException(ExceptionArgument argument) { throw CreateArgumentOutOfRangeException(argument); } [MethodImpl(MethodImplOptions.NoInlining)] private static Exception CreateArgumentOutOfRangeException(ExceptionArgument argument) { return new ArgumentOutOfRangeException(argument.ToString()); } @@ -59,6 +68,7 @@ private static ArgumentOutOfRangeException GetArgumentOutOfRangeException(Except internal enum ExceptionArgument { ch, + comparisonType, culture, index, input, From 414c89099c70f1fde9e53876da4b6ae20fa8f81a Mon Sep 17 00:00:00 2001 From: Eric Erhardt Date: Wed, 4 Mar 2020 12:25:54 -0600 Subject: [PATCH 11/26] Moving code around so it lines up with the ref assemblies. Getting the full build working again. --- .../src/System/Utf8Extensions.CoreCLR.cs | 102 ++++++++++++++++ .../src/System/Utf8Extensions.cs | 114 +----------------- ...System.Utf8String.Experimental.Forwards.cs | 7 ++ .../ref/System.Utf8String.Experimental.cs | 3 - .../ref/System.Utf8String.Experimental.csproj | 4 + ...stem.Utf8String.Experimental.netcoreapp.cs | 5 - ...tem.Utf8String.Experimental.netcoreapp5.cs | 18 +++ .../src/System.Utf8String.Experimental.csproj | 3 + .../src/System/ThrowHelper.cs | 1 + src/libraries/shims/ApiCompat.proj | 11 ++ 10 files changed, 151 insertions(+), 117 deletions(-) create mode 100644 src/libraries/System.Utf8String.Experimental/ref/System.Utf8String.Experimental.Forwards.cs create mode 100644 src/libraries/System.Utf8String.Experimental/ref/System.Utf8String.Experimental.netcoreapp5.cs diff --git a/src/coreclr/src/System.Private.CoreLib/src/System/Utf8Extensions.CoreCLR.cs b/src/coreclr/src/System.Private.CoreLib/src/System/Utf8Extensions.CoreCLR.cs index 6e9199abb8466..7f0d20d70816c 100644 --- a/src/coreclr/src/System.Private.CoreLib/src/System/Utf8Extensions.CoreCLR.cs +++ b/src/coreclr/src/System.Private.CoreLib/src/System/Utf8Extensions.CoreCLR.cs @@ -8,6 +8,108 @@ namespace System { public static partial class Utf8Extensions { + /// Creates a new over the portion of the target . + /// The target . + /// Returns default when is null. + public static ReadOnlyMemory AsMemory(this Utf8String? text) + { + if (text is null) + return default; + + return new ReadOnlyMemory(text, 0, text.Length); + } + + /// Creates a new over the portion of the target . + /// The target . + /// The index at which to begin this slice. + /// Returns default when is null. + /// + /// Thrown when the specified index is not in range (<0 or >text.Length). + /// + public static ReadOnlyMemory AsMemory(this Utf8String? text, int start) + { + if (text is null) + { + if (start != 0) + ThrowHelper.ThrowArgumentOutOfRangeException(ExceptionArgument.start); + return default; + } + + if ((uint)start > (uint)text.Length) + ThrowHelper.ThrowArgumentOutOfRangeException(ExceptionArgument.start); + + return new ReadOnlyMemory(text, start, text.Length - start); + } + + /// Creates a new over the portion of the target . + /// The target . + /// The index at which to begin this slice. + public static ReadOnlyMemory AsMemory(this Utf8String? text, Index startIndex) + { + if (text is null) + { + if (!startIndex.Equals(Index.Start)) + ThrowHelper.ThrowArgumentNullException(ExceptionArgument.text); + + return default; + } + + int actualIndex = startIndex.GetOffset(text.Length); + if ((uint)actualIndex > (uint)text.Length) + ThrowHelper.ThrowArgumentOutOfRangeException(); + + return new ReadOnlyMemory(text, actualIndex, text.Length - actualIndex); + } + + /// Creates a new over the portion of the target . + /// The target . + /// The index at which to begin this slice. + /// The desired length for the slice (exclusive). + /// Returns default when is null. + /// + /// Thrown when the specified index or is not in range. + /// + public static ReadOnlyMemory AsMemory(this Utf8String? text, int start, int length) + { + if (text is null) + { + if (start != 0 || length != 0) + ThrowHelper.ThrowArgumentOutOfRangeException(ExceptionArgument.start); + return default; + } + +#if TARGET_64BIT + // See comment in Span.Slice for how this works. + if ((ulong)(uint)start + (ulong)(uint)length > (ulong)(uint)text.Length) + ThrowHelper.ThrowArgumentOutOfRangeException(ExceptionArgument.start); +#else + if ((uint)start > (uint)text.Length || (uint)length > (uint)(text.Length - start)) + ThrowHelper.ThrowArgumentOutOfRangeException(ExceptionArgument.start); +#endif + + return new ReadOnlyMemory(text, start, length); + } + + /// Creates a new over the portion of the target . + /// The target . + /// The range used to indicate the start and length of the sliced string. + public static ReadOnlyMemory AsMemory(this Utf8String? text, Range range) + { + if (text is null) + { + Index startIndex = range.Start; + Index endIndex = range.End; + + if (!startIndex.Equals(Index.Start) || !endIndex.Equals(Index.Start)) + ThrowHelper.ThrowArgumentNullException(ExceptionArgument.text); + + return default; + } + + (int start, int length) = range.GetOffsetAndLength(text.Length); + return new ReadOnlyMemory(text, start, length); + } + [MethodImpl(MethodImplOptions.AggressiveInlining)] private static ReadOnlySpan CreateSpan(Utf8String text) => new ReadOnlySpan(ref text.DangerousGetMutableReference(), text.Length); diff --git a/src/libraries/System.Private.CoreLib/src/System/Utf8Extensions.cs b/src/libraries/System.Private.CoreLib/src/System/Utf8Extensions.cs index a258c44f83946..2613506abc402 100644 --- a/src/libraries/System.Private.CoreLib/src/System/Utf8Extensions.cs +++ b/src/libraries/System.Private.CoreLib/src/System/Utf8Extensions.cs @@ -185,109 +185,6 @@ public static Utf8Span AsSpan(this Utf8String? text, int start, int length) return Utf8Span.UnsafeCreateWithoutValidation(CreateSpan(text, start, length)); } -/* // TODO eerhardt - /// Creates a new over the portion of the target . - /// The target . - /// Returns default when is null. - public static ReadOnlyMemory AsMemory(this Utf8String? text) - { - if (text is null) - return default; - - return new ReadOnlyMemory(text, 0, text.Length); - } - - /// Creates a new over the portion of the target . - /// The target . - /// The index at which to begin this slice. - /// Returns default when is null. - /// - /// Thrown when the specified index is not in range (<0 or >text.Length). - /// - public static ReadOnlyMemory AsMemory(this Utf8String? text, int start) - { - if (text is null) - { - if (start != 0) - ThrowHelper.ThrowArgumentOutOfRangeException(ExceptionArgument.start); - return default; - } - - if ((uint)start > (uint)text.Length) - ThrowHelper.ThrowArgumentOutOfRangeException(ExceptionArgument.start); - - return new ReadOnlyMemory(text, start, text.Length - start); - } - - /// Creates a new over the portion of the target . - /// The target . - /// The index at which to begin this slice. - public static ReadOnlyMemory AsMemory(this Utf8String? text, Index startIndex) - { - if (text is null) - { - if (!startIndex.Equals(Index.Start)) - ThrowHelper.ThrowArgumentNullException(ExceptionArgument.text); - - return default; - } - - int actualIndex = startIndex.GetOffset(text.Length); - if ((uint)actualIndex > (uint)text.Length) - ThrowHelper.ThrowArgumentOutOfRangeException(); - - return new ReadOnlyMemory(text, actualIndex, text.Length - actualIndex); - } - - /// Creates a new over the portion of the target . - /// The target . - /// The index at which to begin this slice. - /// The desired length for the slice (exclusive). - /// Returns default when is null. - /// - /// Thrown when the specified index or is not in range. - /// - public static ReadOnlyMemory AsMemory(this Utf8String? text, int start, int length) - { - if (text is null) - { - if (start != 0 || length != 0) - ThrowHelper.ThrowArgumentOutOfRangeException(ExceptionArgument.start); - return default; - } - -#if TARGET_64BIT - // See comment in Span.Slice for how this works. - if ((ulong)(uint)start + (ulong)(uint)length > (ulong)(uint)text.Length) - ThrowHelper.ThrowArgumentOutOfRangeException(ExceptionArgument.start); -#else - if ((uint)start > (uint)text.Length || (uint)length > (uint)(text.Length - start)) - ThrowHelper.ThrowArgumentOutOfRangeException(ExceptionArgument.start); -#endif - - return new ReadOnlyMemory(text, start, length); - } - - /// Creates a new over the portion of the target . - /// The target . - /// The range used to indicate the start and length of the sliced string. - public static ReadOnlyMemory AsMemory(this Utf8String? text, Range range) - { - if (text is null) - { - Index startIndex = range.Start; - Index endIndex = range.End; - - if (!startIndex.Equals(Index.Start) || !endIndex.Equals(Index.Start)) - ThrowHelper.ThrowArgumentNullException(ExceptionArgument.text); - - return default; - } - - (int start, int length) = range.GetOffsetAndLength(text.Length); - return new ReadOnlyMemory(text, start, length); - } -*/ /// Creates a new over the portion of the target . /// The target . @@ -321,7 +218,7 @@ public static ReadOnlyMemory AsMemoryBytes(this Utf8String? text, int star return CreateMemoryBytes(text, start, text.Length - start); } -/* // TODO eerhardt + /// Creates a new over the portion of the target . /// The target . /// The index at which to begin this slice. @@ -339,9 +236,8 @@ public static ReadOnlyMemory AsMemoryBytes(this Utf8String? text, Index st if ((uint)actualIndex > (uint)text.Length) ThrowHelper.ThrowArgumentOutOfRangeException(); - return new ReadOnlyMemory(text, actualIndex, text.Length - actualIndex); + return CreateMemoryBytes(text, actualIndex, text.Length - actualIndex); } -*/ /// Creates a new over the portion of the target . /// The target . @@ -371,7 +267,7 @@ public static ReadOnlyMemory AsMemoryBytes(this Utf8String? text, int star return CreateMemoryBytes(text, start, length); } -/* // TODO eerhardt + /// Creates a new over the portion of the target . /// The target . /// The range used to indicate the start and length of the sliced string. @@ -389,9 +285,9 @@ public static ReadOnlyMemory AsMemoryBytes(this Utf8String? text, Range ra } (int start, int length) = range.GetOffsetAndLength(text.Length); - return new ReadOnlyMemory(text, start, length); + return CreateMemoryBytes(text, start, length); } -*/ + /// /// Returns a representation of this instance. /// diff --git a/src/libraries/System.Utf8String.Experimental/ref/System.Utf8String.Experimental.Forwards.cs b/src/libraries/System.Utf8String.Experimental/ref/System.Utf8String.Experimental.Forwards.cs new file mode 100644 index 0000000000000..67c2ae89d5393 --- /dev/null +++ b/src/libraries/System.Utf8String.Experimental/ref/System.Utf8String.Experimental.Forwards.cs @@ -0,0 +1,7 @@ +// Licensed to the .NET Foundation under one or more agreements. +// The .NET Foundation licenses this file to you under the MIT license. +// See the LICENSE file in the project root for more information. + +[assembly: System.Runtime.CompilerServices.TypeForwardedTo(typeof(System.Index))] +[assembly: System.Runtime.CompilerServices.TypeForwardedTo(typeof(System.Range))] +[assembly: System.Runtime.CompilerServices.TypeForwardedTo(typeof(System.Text.Rune))] diff --git a/src/libraries/System.Utf8String.Experimental/ref/System.Utf8String.Experimental.cs b/src/libraries/System.Utf8String.Experimental/ref/System.Utf8String.Experimental.cs index f6ece6ab9fd19..795d73f913651 100644 --- a/src/libraries/System.Utf8String.Experimental/ref/System.Utf8String.Experimental.cs +++ b/src/libraries/System.Utf8String.Experimental/ref/System.Utf8String.Experimental.cs @@ -54,9 +54,6 @@ public static partial class Utf8Extensions public static System.ReadOnlySpan AsBytes(this System.Utf8String? text) { throw null; } public static System.ReadOnlySpan AsBytes(this System.Utf8String? text, int start) { throw null; } public static System.ReadOnlySpan AsBytes(this System.Utf8String? text, int start, int length) { throw null; } - public static System.ReadOnlyMemory AsMemory(this System.Utf8String? text) { throw null; } - public static System.ReadOnlyMemory AsMemory(this System.Utf8String? text, int start) { throw null; } - public static System.ReadOnlyMemory AsMemory(this System.Utf8String? text, int start, int length) { throw null; } public static System.ReadOnlyMemory AsMemoryBytes(this System.Utf8String? text) { throw null; } public static System.ReadOnlyMemory AsMemoryBytes(this System.Utf8String? text, int start) { throw null; } public static System.ReadOnlyMemory AsMemoryBytes(this System.Utf8String? text, System.Index startIndex) { throw null; } diff --git a/src/libraries/System.Utf8String.Experimental/ref/System.Utf8String.Experimental.csproj b/src/libraries/System.Utf8String.Experimental/ref/System.Utf8String.Experimental.csproj index 236be98487247..bf1c7bece927d 100644 --- a/src/libraries/System.Utf8String.Experimental/ref/System.Utf8String.Experimental.csproj +++ b/src/libraries/System.Utf8String.Experimental/ref/System.Utf8String.Experimental.csproj @@ -14,8 +14,12 @@ + + + + diff --git a/src/libraries/System.Utf8String.Experimental/ref/System.Utf8String.Experimental.netcoreapp.cs b/src/libraries/System.Utf8String.Experimental/ref/System.Utf8String.Experimental.netcoreapp.cs index a57b47eea0b9c..5b0733e695ff0 100644 --- a/src/libraries/System.Utf8String.Experimental/ref/System.Utf8String.Experimental.netcoreapp.cs +++ b/src/libraries/System.Utf8String.Experimental/ref/System.Utf8String.Experimental.netcoreapp.cs @@ -7,11 +7,6 @@ namespace System { - public static partial class Utf8Extensions - { - public static System.ReadOnlyMemory AsMemory(this System.Utf8String? text, System.Index startIndex) { throw null; } - public static System.ReadOnlyMemory AsMemory(this System.Utf8String? text, System.Range range) { throw null; } - } public sealed partial class Utf8String : System.IComparable, System.IEquatable { public static System.Utf8String Create(int length, TState state, System.Buffers.SpanAction action) { throw null; } diff --git a/src/libraries/System.Utf8String.Experimental/ref/System.Utf8String.Experimental.netcoreapp5.cs b/src/libraries/System.Utf8String.Experimental/ref/System.Utf8String.Experimental.netcoreapp5.cs new file mode 100644 index 0000000000000..bc2c0ff22f6d8 --- /dev/null +++ b/src/libraries/System.Utf8String.Experimental/ref/System.Utf8String.Experimental.netcoreapp5.cs @@ -0,0 +1,18 @@ +// Licensed to the .NET Foundation under one or more agreements. +// The .NET Foundation licenses this file to you under the MIT license. +// See the LICENSE file in the project root for more information. +// ------------------------------------------------------------------------------ +// Changes to this file must follow the https://aka.ms/api-review process. +// ------------------------------------------------------------------------------ + +namespace System +{ + public static partial class Utf8Extensions + { + public static System.ReadOnlyMemory AsMemory(this System.Utf8String? text) { throw null; } + public static System.ReadOnlyMemory AsMemory(this System.Utf8String? text, System.Index startIndex) { throw null; } + public static System.ReadOnlyMemory AsMemory(this System.Utf8String? text, int start) { throw null; } + public static System.ReadOnlyMemory AsMemory(this System.Utf8String? text, int start, int length) { throw null; } + public static System.ReadOnlyMemory AsMemory(this System.Utf8String? text, System.Range range) { throw null; } + } +} diff --git a/src/libraries/System.Utf8String.Experimental/src/System.Utf8String.Experimental.csproj b/src/libraries/System.Utf8String.Experimental/src/System.Utf8String.Experimental.csproj index c66b6ef7ae573..2998f48326ad1 100644 --- a/src/libraries/System.Utf8String.Experimental/src/System.Utf8String.Experimental.csproj +++ b/src/libraries/System.Utf8String.Experimental/src/System.Utf8String.Experimental.csproj @@ -11,9 +11,12 @@ netstandard2.0;$(NetCoreAppCurrent)-Windows_NT enable $(DefineContants);FEATURE_UTF8STRING +<<<<<<< HEAD false >>>>>>> Get Rune working on netstandard2.0 +======= +>>>>>>> Moving code around so it lines up with the ref assemblies. diff --git a/src/libraries/System.Utf8String.Experimental/src/System/ThrowHelper.cs b/src/libraries/System.Utf8String.Experimental/src/System/ThrowHelper.cs index 5d4f6b128e3ea..359029d78e088 100644 --- a/src/libraries/System.Utf8String.Experimental/src/System/ThrowHelper.cs +++ b/src/libraries/System.Utf8String.Experimental/src/System/ThrowHelper.cs @@ -74,6 +74,7 @@ internal enum ExceptionArgument input, length, start, + text, value, } } diff --git a/src/libraries/shims/ApiCompat.proj b/src/libraries/shims/ApiCompat.proj index 29ec65df3065e..a340a3d1a5aec 100644 --- a/src/libraries/shims/ApiCompat.proj +++ b/src/libraries/shims/ApiCompat.proj @@ -85,6 +85,17 @@ + + + + + + + From f5b7269f782fda5fb966596ae63c5d19a334bedf Mon Sep 17 00:00:00 2001 From: Eric Erhardt Date: Wed, 4 Mar 2020 16:35:14 -0600 Subject: [PATCH 12/26] Build for netcoreapp3.0. Get the package building. --- .../System.Private.CoreLib.Shared.projitems | 2 +- .../src/System/Numerics/BitOperations.cs | 7 ++- ...p.cs => Utf8Utility.WhiteSpace.CoreLib.cs} | 8 +-- ...s => Utf8Utility.WhiteSpace.NonCoreLib.cs} | 4 +- .../src/System/Text/Utf8Span.Manipulation.cs | 4 ++ .../src/System/Utf8String.Manipulation.cs | 4 ++ .../System.Utf8String.Experimental.pkgproj | 2 +- .../src/Resources/Strings.resx | 3 + .../src/System.Utf8String.Experimental.csproj | 55 ++++++++++++------- .../src/System/ThrowHelper.cs | 9 +++ 10 files changed, 70 insertions(+), 28 deletions(-) rename src/libraries/System.Private.CoreLib/src/System/Text/Unicode/{Utf8Utility.WhiteSpace.netcoreapp.cs => Utf8Utility.WhiteSpace.CoreLib.cs} (95%) rename src/libraries/System.Private.CoreLib/src/System/Text/Unicode/{Utf8Utility.WhiteSpace.netstandard.cs => Utf8Utility.WhiteSpace.NonCoreLib.cs} (96%) diff --git a/src/libraries/System.Private.CoreLib/src/System.Private.CoreLib.Shared.projitems b/src/libraries/System.Private.CoreLib/src/System.Private.CoreLib.Shared.projitems index d84f7e117cfd9..89cb461992c68 100644 --- a/src/libraries/System.Private.CoreLib/src/System.Private.CoreLib.Shared.projitems +++ b/src/libraries/System.Private.CoreLib/src/System.Private.CoreLib.Shared.projitems @@ -861,7 +861,7 @@ - + diff --git a/src/libraries/System.Private.CoreLib/src/System/Numerics/BitOperations.cs b/src/libraries/System.Private.CoreLib/src/System/Numerics/BitOperations.cs index 3aac79a25563b..2506043f6f721 100644 --- a/src/libraries/System.Private.CoreLib/src/System/Numerics/BitOperations.cs +++ b/src/libraries/System.Private.CoreLib/src/System/Numerics/BitOperations.cs @@ -20,7 +20,12 @@ namespace System.Numerics /// The methods use hardware intrinsics when available on the underlying platform, /// otherwise they use optimized software fallbacks. /// - public static class BitOperations +#if SYSTEM_PRIVATE_CORELIB + public +#else + internal +#endif + static class BitOperations { // C# no-alloc optimization that directly wraps the data section of the dll (similar to string constants) // https://github.com/dotnet/roslyn/pull/24621 diff --git a/src/libraries/System.Private.CoreLib/src/System/Text/Unicode/Utf8Utility.WhiteSpace.netcoreapp.cs b/src/libraries/System.Private.CoreLib/src/System/Text/Unicode/Utf8Utility.WhiteSpace.CoreLib.cs similarity index 95% rename from src/libraries/System.Private.CoreLib/src/System/Text/Unicode/Utf8Utility.WhiteSpace.netcoreapp.cs rename to src/libraries/System.Private.CoreLib/src/System/Text/Unicode/Utf8Utility.WhiteSpace.CoreLib.cs index cb98eaf6ea415..373f764efd7d0 100644 --- a/src/libraries/System.Private.CoreLib/src/System/Text/Unicode/Utf8Utility.WhiteSpace.netcoreapp.cs +++ b/src/libraries/System.Private.CoreLib/src/System/Text/Unicode/Utf8Utility.WhiteSpace.CoreLib.cs @@ -40,12 +40,12 @@ private static nuint GetIndexOfFirstNonWhiteSpaceChar(ref byte utf8Data, nuint l // Very quick check: see if the byte is in the range [ 21 .. 7F ]. // If so, we can skip the more expensive logic later in this method. - if ((sbyte)Unsafe.AddByteOffset(ref utf8Data, i) > (sbyte)0x20) //TODO: remove IntPtr cast + if ((sbyte)Unsafe.AddByteOffset(ref utf8Data, i) > (sbyte)0x20) { break; } - uint possibleAsciiByte = Unsafe.AddByteOffset(ref utf8Data, i); //TODO: remove IntPtr cast + uint possibleAsciiByte = Unsafe.AddByteOffset(ref utf8Data, i); if (UnicodeUtility.IsAsciiCodePoint(possibleAsciiByte)) { // The simple comparison failed. Let's read the actual byte value, @@ -98,12 +98,12 @@ private static nuint GetIndexOfTrailingWhiteSpaceSequence(ref byte utf8Data, nui // Very quick check: see if the byte is in the range [ 21 .. 7F ]. // If so, we can skip the more expensive logic later in this method. - if ((sbyte)Unsafe.Add(ref Unsafe.AddByteOffset(ref utf8Data, (IntPtr)length), -1) > (sbyte)0x20) //TODO: remove IntPtr cast + if ((sbyte)Unsafe.Add(ref Unsafe.AddByteOffset(ref utf8Data, length), -1) > (sbyte)0x20) { break; } - uint possibleAsciiByte = Unsafe.Add(ref Unsafe.AddByteOffset(ref utf8Data, (IntPtr)length), -1); //TODO: remove IntPtr cast + uint possibleAsciiByte = Unsafe.Add(ref Unsafe.AddByteOffset(ref utf8Data, length), -1); if (UnicodeUtility.IsAsciiCodePoint(possibleAsciiByte)) { // The simple comparison failed. Let's read the actual byte value, diff --git a/src/libraries/System.Private.CoreLib/src/System/Text/Unicode/Utf8Utility.WhiteSpace.netstandard.cs b/src/libraries/System.Private.CoreLib/src/System/Text/Unicode/Utf8Utility.WhiteSpace.NonCoreLib.cs similarity index 96% rename from src/libraries/System.Private.CoreLib/src/System/Text/Unicode/Utf8Utility.WhiteSpace.netstandard.cs rename to src/libraries/System.Private.CoreLib/src/System/Text/Unicode/Utf8Utility.WhiteSpace.NonCoreLib.cs index 8e09579a5757c..86972da375612 100644 --- a/src/libraries/System.Private.CoreLib/src/System/Text/Unicode/Utf8Utility.WhiteSpace.netstandard.cs +++ b/src/libraries/System.Private.CoreLib/src/System/Text/Unicode/Utf8Utility.WhiteSpace.NonCoreLib.cs @@ -36,7 +36,7 @@ public static int GetIndexOfFirstNonWhiteSpaceChar(ReadOnlySpan utf8Data) // and if it's ASCII we can delegate to Rune's inlined method // implementation. - if (Rune.IsWhiteSpace(Rune.UnsafeCreate(possibleAsciiByte))) + if (Rune.IsWhiteSpace(new Rune(possibleAsciiByte))) { i++; continue; @@ -91,7 +91,7 @@ public static int GetIndexOfTrailingWhiteSpaceSequence(ReadOnlySpan utf8Da // and if it's ASCII we can delegate to Rune's inlined method // implementation. - if (Rune.IsWhiteSpace(Rune.UnsafeCreate(possibleAsciiByte))) + if (Rune.IsWhiteSpace(new Rune(possibleAsciiByte))) { length--; continue; diff --git a/src/libraries/System.Private.CoreLib/src/System/Text/Utf8Span.Manipulation.cs b/src/libraries/System.Private.CoreLib/src/System/Text/Utf8Span.Manipulation.cs index f679b14da76d0..a538e46e02bb3 100644 --- a/src/libraries/System.Private.CoreLib/src/System/Text/Utf8Span.Manipulation.cs +++ b/src/libraries/System.Private.CoreLib/src/System/Text/Utf8Span.Manipulation.cs @@ -431,7 +431,11 @@ internal readonly bool DeconstructHelper(in Utf8Span source, out Utf8Span firstI if (SearchRune >= 0) { +#if SYSTEM_PRIVATE_CORELIB || NETSTANDARD2_0 wasMatchFound = searchSpan.TryFind(Rune.UnsafeCreate((uint)SearchRune), out matchRange); +#else + wasMatchFound = searchSpan.TryFind(new Rune((uint)SearchRune), out matchRange); +#endif } else { diff --git a/src/libraries/System.Private.CoreLib/src/System/Utf8String.Manipulation.cs b/src/libraries/System.Private.CoreLib/src/System/Utf8String.Manipulation.cs index fcef008daf655..18a577a6cc693 100644 --- a/src/libraries/System.Private.CoreLib/src/System/Utf8String.Manipulation.cs +++ b/src/libraries/System.Private.CoreLib/src/System/Utf8String.Manipulation.cs @@ -656,7 +656,11 @@ internal readonly bool DeconstructHelper(in Utf8Span source, out Utf8Span firstI int searchRune = SearchRune; // local copy so as to avoid struct tearing if (searchRune >= 0) { +#if SYSTEM_PRIVATE_CORELIB || NETSTANDARD2_0 wasMatchFound = searchSpan.TryFind(Rune.UnsafeCreate((uint)searchRune), out matchRange); +#else + wasMatchFound = searchSpan.TryFind(new Rune((uint)searchRune), out matchRange); +#endif } else { diff --git a/src/libraries/System.Utf8String.Experimental/pkg/System.Utf8String.Experimental.pkgproj b/src/libraries/System.Utf8String.Experimental/pkg/System.Utf8String.Experimental.pkgproj index 53280e1329e0f..1577c85ef1902 100644 --- a/src/libraries/System.Utf8String.Experimental/pkg/System.Utf8String.Experimental.pkgproj +++ b/src/libraries/System.Utf8String.Experimental/pkg/System.Utf8String.Experimental.pkgproj @@ -3,7 +3,7 @@ - netcoreapp5.0; + net461;netcoreapp2.0;uap10.0.16299;$(AllXamarinFrameworks) diff --git a/src/libraries/System.Utf8String.Experimental/src/Resources/Strings.resx b/src/libraries/System.Utf8String.Experimental/src/Resources/Strings.resx index a323e92dc439f..8b82fef51b2bd 100644 --- a/src/libraries/System.Utf8String.Experimental/src/Resources/Strings.resx +++ b/src/libraries/System.Utf8String.Experimental/src/Resources/Strings.resx @@ -147,6 +147,9 @@ Cannot call Utf8Span.Equals(object). Use Equals(Utf8Span) or operator == instead. + + The callback populated its buffer with ill-formed UTF-8 data. Callbacks are required to populate the buffer only with well-formed UTF-8 data. + Cannot create the desired substring because it would split a multi-byte UTF-8 subsequence. diff --git a/src/libraries/System.Utf8String.Experimental/src/System.Utf8String.Experimental.csproj b/src/libraries/System.Utf8String.Experimental/src/System.Utf8String.Experimental.csproj index 2998f48326ad1..a8b26adfbca57 100644 --- a/src/libraries/System.Utf8String.Experimental/src/System.Utf8String.Experimental.csproj +++ b/src/libraries/System.Utf8String.Experimental/src/System.Utf8String.Experimental.csproj @@ -1,14 +1,17 @@  true +<<<<<<< HEAD <<<<<<< HEAD true $(NetCoreAppCurrent)-Windows_NT;$(NetCoreAppCurrent)-Unix enable ======= +======= + $(NoWarn);3019 +>>>>>>> Build for netcoreapp3.0. true - - netstandard2.0;$(NetCoreAppCurrent)-Windows_NT + netstandard2.0;netcoreapp3.0;$(NetCoreAppCurrent)-Windows_NT;$(NetCoreAppCurrent)-Unix enable $(DefineContants);FEATURE_UTF8STRING <<<<<<< HEAD @@ -23,6 +26,28 @@ + + System\Index.cs + + + System\Numerics\BitOperations.cs + + + System\Range.cs + + + System\Text\Rune.cs + + + System\Text\Unicode\Utf8.cs + + + + + System\Utf8String.netcoreapp.cs + + + @@ -37,12 +62,6 @@ System\Char8.cs - - System\Index.cs - - - System\Range.cs - System\Text\TrimType.cs @@ -58,15 +77,9 @@ System\Text\Unicode\Utf16Utility.Validation.cs - - System\Text\Rune.cs - System\Text\Utf8StringComparer.cs - - System\Numerics\BitOperations.cs - System\Numerics\Hashing\HashHelpers.cs @@ -76,9 +89,6 @@ System\Text\ASCIIUtility.Helpers.cs - - System\Text\Unicode\Utf8.cs - System\Text\Unicode\Utf8Utility.cs @@ -91,8 +101,8 @@ System\Text\Unicode\Utf8Utility.Transcoding.cs - - System\Text\Unicode\Utf8Utility.WhiteSpace.netstandard.cs + + System\Text\Unicode\Utf8Utility.WhiteSpace.NonCoreLib.cs System\Utf8StringSplitOptions.cs @@ -148,10 +158,17 @@ + + + + + + + diff --git a/src/libraries/System.Utf8String.Experimental/src/System/ThrowHelper.cs b/src/libraries/System.Utf8String.Experimental/src/System/ThrowHelper.cs index 359029d78e088..983dcd397b299 100644 --- a/src/libraries/System.Utf8String.Experimental/src/System/ThrowHelper.cs +++ b/src/libraries/System.Utf8String.Experimental/src/System/ThrowHelper.cs @@ -15,6 +15,7 @@ internal static void ThrowArgumentException(string resource, ExceptionArgument a throw new ArgumentException(resource, argument.ToString()); } + [DoesNotReturn] internal static void ThrowArgumentNullException(ExceptionArgument argument) { throw CreateArgumentNullException(argument); } [MethodImpl(MethodImplOptions.NoInlining)] private static Exception CreateArgumentNullException(ExceptionArgument argument) { return new ArgumentNullException(argument.ToString()); } @@ -33,6 +34,13 @@ internal static void ThrowValueArgumentOutOfRange_NeedNonNegNumException() SR.ArgumentOutOfRange_NeedNonNegNum); } + [DoesNotReturn] + internal static void ThrowLengthArgumentOutOfRange_ArgumentOutOfRange_NeedNonNegNum() + { + throw GetArgumentOutOfRangeException(ExceptionArgument.length, + SR.ArgumentOutOfRange_NeedNonNegNum); + } + [DoesNotReturn] internal static void ThrowInvalidOperationException() { throw CreateInvalidOperationException(); } [MethodImpl(MethodImplOptions.NoInlining)] @@ -67,6 +75,7 @@ private static ArgumentOutOfRangeException GetArgumentOutOfRangeException(Except // internal enum ExceptionArgument { + action, ch, comparisonType, culture, From 6c6fd97949f075ddc1341cb2c0803050263ecd1c Mon Sep 17 00:00:00 2001 From: Eric Erhardt Date: Wed, 4 Mar 2020 18:39:30 -0600 Subject: [PATCH 13/26] Get tests building on netfx --- ...ystem.Utf8String.Experimental.Tests.csproj | 9 +- .../tests/System/BoundedUtf8Span.cs | 5 + .../tests/System/MemoryTests.cs | 56 --------- .../tests/System/MemoryTests.netcoreapp.cs | 72 ++++++++++++ .../tests/System/RangeEqualityComparer.cs | 4 + .../tests/System/Utf8ExtensionsTests.cs | 62 ++-------- .../System/Utf8ExtensionsTests.netcoreapp.cs | 77 +++++++++++++ .../tests/System/Utf8SpanTests.Conversion.cs | 2 +- .../tests/System/Utf8SpanTests.TestData.cs | 4 +- .../tests/System/Utf8SpanTests.cs | 2 +- .../tests/System/Utf8StringTests.Ctor.cs | 100 +--------------- .../System/Utf8StringTests.Ctor.netcoreapp.cs | 108 ++++++++++++++++++ .../tests/System/Utf8TestUtilities.cs | 15 ++- 13 files changed, 299 insertions(+), 217 deletions(-) create mode 100644 src/libraries/System.Utf8String.Experimental/tests/System/MemoryTests.netcoreapp.cs create mode 100644 src/libraries/System.Utf8String.Experimental/tests/System/Utf8ExtensionsTests.netcoreapp.cs create mode 100644 src/libraries/System.Utf8String.Experimental/tests/System/Utf8StringTests.Ctor.netcoreapp.cs diff --git a/src/libraries/System.Utf8String.Experimental/tests/System.Utf8String.Experimental.Tests.csproj b/src/libraries/System.Utf8String.Experimental/tests/System.Utf8String.Experimental.Tests.csproj index 669fe610acf22..30b912642e976 100644 --- a/src/libraries/System.Utf8String.Experimental/tests/System.Utf8String.Experimental.Tests.csproj +++ b/src/libraries/System.Utf8String.Experimental/tests/System.Utf8String.Experimental.Tests.csproj @@ -1,8 +1,8 @@ - + true true - $(NetCoreAppCurrent) + $(NetCoreAppCurrent);$(NetFrameworkCurrent) true true @@ -38,4 +38,9 @@ + + + + + \ No newline at end of file diff --git a/src/libraries/System.Utf8String.Experimental/tests/System/BoundedUtf8Span.cs b/src/libraries/System.Utf8String.Experimental/tests/System/BoundedUtf8Span.cs index d8a1c4641dbb4..8eda11d8b6b0d 100644 --- a/src/libraries/System.Utf8String.Experimental/tests/System/BoundedUtf8Span.cs +++ b/src/libraries/System.Utf8String.Experimental/tests/System/BoundedUtf8Span.cs @@ -17,6 +17,11 @@ public sealed class BoundedUtf8Span : IDisposable { private readonly BoundedMemory _boundedMemory; + public BoundedUtf8Span(string utf16Data, PoisonPagePlacement placement = PoisonPagePlacement.After) + : this(utf16Data.AsSpan(), placement) + { + } + public BoundedUtf8Span(ReadOnlySpan utf16Data, PoisonPagePlacement placement = PoisonPagePlacement.After) : this(u8(utf16Data.ToString()).AsBytes(), placement) { diff --git a/src/libraries/System.Utf8String.Experimental/tests/System/MemoryTests.cs b/src/libraries/System.Utf8String.Experimental/tests/System/MemoryTests.cs index 8e7086a66f1b1..bacf672d87d4f 100644 --- a/src/libraries/System.Utf8String.Experimental/tests/System/MemoryTests.cs +++ b/src/libraries/System.Utf8String.Experimental/tests/System/MemoryTests.cs @@ -23,15 +23,6 @@ public static void MemoryMarshal_TryGetArrayOfByte_Utf8String() Assert.True(default(ArraySegment).Equals(segment)); } - [Fact] - public static void MemoryMarshal_TryGetArrayOfChar8_Utf8String() - { - ReadOnlyMemory rom = u8("Hello").AsMemory(); - - Assert.False(MemoryMarshal.TryGetArray(rom, out ArraySegment segment)); - Assert.True(default(ArraySegment).Equals(segment)); - } - [Fact] public static unsafe void MemoryOfByte_WithUtf8String_Pin() { @@ -56,30 +47,6 @@ public static void MemoryOfByte_WithUtf8String_ToString() Assert.Equal("System.Memory[5]", Unsafe.As, Memory>(ref rom).ToString()); } - [Fact] - public static unsafe void MemoryOfChar8_WithUtf8String_Pin() - { - Utf8String theString = u8("Hello"); - ReadOnlyMemory rom = theString.AsMemory(); - MemoryHandle memHandle = default; - try - { - memHandle = Unsafe.As, Memory>(ref rom).Pin(); - Assert.True(memHandle.Pointer == Unsafe.AsPointer(ref Unsafe.AsRef(in theString.GetPinnableReference()))); - } - finally - { - memHandle.Dispose(); - } - } - - [Fact] - public static void MemoryOfChar8_WithUtf8String_ToString() - { - ReadOnlyMemory rom = u8("Hello").AsMemory(); - Assert.Equal("Hello", Unsafe.As, Memory>(ref rom).ToString()); - } - [Fact] public static unsafe void ReadOnlyMemoryOfByte_WithUtf8String_Pin() { @@ -103,29 +70,6 @@ public static void ReadOnlyMemoryOfByte_WithUtf8String_ToString() Assert.Equal("System.ReadOnlyMemory[5]", u8("Hello").AsMemoryBytes().ToString()); } - [Fact] - public static unsafe void ReadOnlyMemoryOfChar8_WithUtf8String_Pin() - { - Utf8String theString = u8("Hello"); - ReadOnlyMemory rom = theString.AsMemory(); - MemoryHandle memHandle = default; - try - { - memHandle = rom.Pin(); - Assert.True(memHandle.Pointer == Unsafe.AsPointer(ref Unsafe.AsRef(in theString.GetPinnableReference()))); - } - finally - { - memHandle.Dispose(); - } - } - - [Fact] - public static void ReadOnlyMemoryOfChar8_WithUtf8String_ToString() - { - Assert.Equal("Hello", u8("Hello").AsMemory().ToString()); - } - [Fact] public static void ReadOnlySpanOfByte_ToString() { diff --git a/src/libraries/System.Utf8String.Experimental/tests/System/MemoryTests.netcoreapp.cs b/src/libraries/System.Utf8String.Experimental/tests/System/MemoryTests.netcoreapp.cs new file mode 100644 index 0000000000000..f1f8b4366d8ad --- /dev/null +++ b/src/libraries/System.Utf8String.Experimental/tests/System/MemoryTests.netcoreapp.cs @@ -0,0 +1,72 @@ +// Licensed to the .NET Foundation under one or more agreements. +// The .NET Foundation licenses this file to you under the MIT license. +// See the LICENSE file in the project root for more information. + +using System.Buffers; +using System.Runtime.CompilerServices; +using System.Runtime.InteropServices; +using Xunit; + +using static System.Tests.Utf8TestUtilities; + +namespace System.Tests +{ + public partial class MemoryTests + { + [Fact] + public static void MemoryMarshal_TryGetArrayOfChar8_Utf8String() + { + ReadOnlyMemory rom = u8("Hello").AsMemory(); + + Assert.False(MemoryMarshal.TryGetArray(rom, out ArraySegment segment)); + Assert.True(default(ArraySegment).Equals(segment)); + } + + [Fact] + public static unsafe void MemoryOfChar8_WithUtf8String_Pin() + { + Utf8String theString = u8("Hello"); + ReadOnlyMemory rom = theString.AsMemory(); + MemoryHandle memHandle = default; + try + { + memHandle = Unsafe.As, Memory>(ref rom).Pin(); + Assert.True(memHandle.Pointer == Unsafe.AsPointer(ref Unsafe.AsRef(in theString.GetPinnableReference()))); + } + finally + { + memHandle.Dispose(); + } + } + + [Fact] + public static void MemoryOfChar8_WithUtf8String_ToString() + { + ReadOnlyMemory rom = u8("Hello").AsMemory(); + Assert.Equal("Hello", Unsafe.As, Memory>(ref rom).ToString()); + } + + [Fact] + public static unsafe void ReadOnlyMemoryOfChar8_WithUtf8String_Pin() + { + Utf8String theString = u8("Hello"); + ReadOnlyMemory rom = theString.AsMemory(); + MemoryHandle memHandle = default; + try + { + memHandle = rom.Pin(); + Assert.True(memHandle.Pointer == Unsafe.AsPointer(ref Unsafe.AsRef(in theString.GetPinnableReference()))); + } + finally + { + memHandle.Dispose(); + } + } + + [Fact] + public static void ReadOnlyMemoryOfChar8_WithUtf8String_ToString() + { + Assert.Equal("Hello", u8("Hello").AsMemory().ToString()); + } + } +} diff --git a/src/libraries/System.Utf8String.Experimental/tests/System/RangeEqualityComparer.cs b/src/libraries/System.Utf8String.Experimental/tests/System/RangeEqualityComparer.cs index 8cc393896345b..7cd1ba3c726cd 100644 --- a/src/libraries/System.Utf8String.Experimental/tests/System/RangeEqualityComparer.cs +++ b/src/libraries/System.Utf8String.Experimental/tests/System/RangeEqualityComparer.cs @@ -32,7 +32,11 @@ public bool Equals(Range x, Range y) public int GetHashCode(Range obj) { (int offset, int length) = obj.GetOffsetAndLength(_length); +#if NETCOREAPP return HashCode.Combine(offset, length); +#else + return Tuple.Create(offset, length).GetHashCode(); +#endif } } } diff --git a/src/libraries/System.Utf8String.Experimental/tests/System/Utf8ExtensionsTests.cs b/src/libraries/System.Utf8String.Experimental/tests/System/Utf8ExtensionsTests.cs index 5b15679b006be..39b7a91a6cf81 100644 --- a/src/libraries/System.Utf8String.Experimental/tests/System/Utf8ExtensionsTests.cs +++ b/src/libraries/System.Utf8String.Experimental/tests/System/Utf8ExtensionsTests.cs @@ -24,20 +24,12 @@ public unsafe void AsBytes_FromSpan_Default() // Next, an empty but non-default span should become an empty but non-default span. Assert.True(new ReadOnlySpan((void*)0x12345, 0) == new ReadOnlySpan((void*)0x12345, 0).AsBytes()); - - // Finally, a span wrapping data should become a span wrapping that same data. - - Utf8String theString = u8("Hello"); - Assert.True(MemoryMarshal.CreateReadOnlySpan(ref Unsafe.AsRef(in theString.GetPinnableReference()), 5) == (theString.AsMemory().Span).AsBytes()); } [Fact] public void AsBytes_FromUtf8String() { Assert.True(default(ReadOnlySpan) == ((Utf8String)null).AsBytes()); - - Utf8String theString = u8("Hello"); - Assert.True(MemoryMarshal.CreateReadOnlySpan(ref Unsafe.AsRef(in theString.GetPinnableReference()), 5) == theString.AsBytes()); } [Fact] @@ -76,56 +68,18 @@ public void AsBytes_FromUtf8String_WithStartAndLength_ArgOutOfRange() } [Fact] - public void AsMemory_FromUtf8String() - { - Assert.True(default(ReadOnlyMemory).Equals(((Utf8String)null).AsMemory())); - - Utf8String theString = u8("Hello"); - Assert.True(MemoryMarshal.CreateReadOnlySpan(ref Unsafe.As(ref Unsafe.AsRef(in theString.GetPinnableReference())), 5) == theString.AsMemory().Span); - } - - [Fact] - public void AsMemory_FromUtf8String_WithStart() - { - Assert.True(default(ReadOnlyMemory).Equals(((Utf8String)null).AsMemory(0))); - Assert.True(u8("Hello").AsMemory(5).IsEmpty); - - SpanAssert.Equal(new Char8[] { (Char8)'e', (Char8)'l', (Char8)'l', (Char8)'o' }, u8("Hello").AsMemory(1).Span); - } - - [Fact] - public void AsMemory_FromUtf8String_WithStart_ArgOutOfRange() - { - Assert.Throws("start", () => ((Utf8String)null).AsMemory(1)); - Assert.Throws("start", () => u8("Hello").AsMemory(-1)); - Assert.Throws("start", () => u8("Hello").AsMemory(6)); - } - - [Fact] - public void AsMemory_FromUtf8String_WithStartAndLength() - { - Assert.True(default(ReadOnlyMemory).Equals(((Utf8String)null).AsMemory(0, 0))); - Assert.True(u8("Hello").AsMemory(5, 0).IsEmpty); - - SpanAssert.Equal(new Char8[] { (Char8)'e', (Char8)'l', (Char8)'l' }, u8("Hello").AsMemory(1, 3).Span); - } - - [Fact] - public void AsMemory_FromUtf8String_WithStartAndLength_ArgOutOfRange() - { - Assert.Throws("start", () => ((Utf8String)null).AsMemory(0, 1)); - Assert.Throws("start", () => ((Utf8String)null).AsMemory(1, 0)); - Assert.Throws("start", () => u8("Hello").AsMemory(5, 1)); - Assert.Throws("start", () => u8("Hello").AsMemory(4, -2)); - } - - [Fact] - public void AsMemoryBytes_FromUtf8String() + public unsafe void AsMemoryBytes_FromUtf8String() { Assert.True(default(ReadOnlyMemory).Equals(((Utf8String)null).AsMemoryBytes())); Utf8String theString = u8("Hello"); - Assert.True(MemoryMarshal.CreateReadOnlySpan(ref Unsafe.AsRef(in theString.GetPinnableReference()), 5) == theString.AsMemoryBytes().Span); + fixed (byte* pTheString = theString) + { + fixed (byte* pTheStringAsMemoryBytes = theString.AsMemoryBytes().Span) + { + Assert.True(pTheString == pTheStringAsMemoryBytes); + } + } } [Fact] diff --git a/src/libraries/System.Utf8String.Experimental/tests/System/Utf8ExtensionsTests.netcoreapp.cs b/src/libraries/System.Utf8String.Experimental/tests/System/Utf8ExtensionsTests.netcoreapp.cs new file mode 100644 index 0000000000000..9833e922e391b --- /dev/null +++ b/src/libraries/System.Utf8String.Experimental/tests/System/Utf8ExtensionsTests.netcoreapp.cs @@ -0,0 +1,77 @@ +// Licensed to the .NET Foundation under one or more agreements. +// The .NET Foundation licenses this file to you under the MIT license. +// See the LICENSE file in the project root for more information. + +using System; +using System.Runtime.CompilerServices; +using System.Runtime.InteropServices; +using Xunit; + +using static System.Tests.Utf8TestUtilities; + +namespace System.Tests +{ + public partial class Utf8ExtensionsTests + { + [Fact] + public void AsBytes_FromSpan_Default_netcoreapp() + { + // a span wrapping data should become a span wrapping that same data. + + Utf8String theString = u8("Hello"); + + Assert.True(MemoryMarshal.CreateReadOnlySpan(ref Unsafe.AsRef(in theString.GetPinnableReference()), 5) == (theString.AsMemory().Span).AsBytes()); + } + + [Fact] + public void AsBytes_FromUtf8String_netcoreapp() + { + Utf8String theString = u8("Hello"); + Assert.True(MemoryMarshal.CreateReadOnlySpan(ref Unsafe.AsRef(in theString.GetPinnableReference()), 5) == theString.AsBytes()); + } + + [Fact] + public void AsMemory_FromUtf8String() + { + Assert.True(default(ReadOnlyMemory).Equals(((Utf8String)null).AsMemory())); + + Utf8String theString = u8("Hello"); + Assert.True(MemoryMarshal.CreateReadOnlySpan(ref Unsafe.As(ref Unsafe.AsRef(in theString.GetPinnableReference())), 5) == theString.AsMemory().Span); + } + + [Fact] + public void AsMemory_FromUtf8String_WithStart() + { + Assert.True(default(ReadOnlyMemory).Equals(((Utf8String)null).AsMemory(0))); + Assert.True(u8("Hello").AsMemory(5).IsEmpty); + + SpanAssert.Equal(new Char8[] { (Char8)'e', (Char8)'l', (Char8)'l', (Char8)'o' }, u8("Hello").AsMemory(1).Span); + } + + [Fact] + public void AsMemory_FromUtf8String_WithStart_ArgOutOfRange() + { + Assert.Throws("start", () => ((Utf8String)null).AsMemory(1)); + Assert.Throws("start", () => u8("Hello").AsMemory(-1)); + Assert.Throws("start", () => u8("Hello").AsMemory(6)); + } + + [Fact] + public void AsMemory_FromUtf8String_WithStartAndLength() + { + Assert.True(default(ReadOnlyMemory).Equals(((Utf8String)null).AsMemory(0, 0))); + Assert.True(u8("Hello").AsMemory(5, 0).IsEmpty); + + SpanAssert.Equal(new Char8[] { (Char8)'e', (Char8)'l', (Char8)'l' }, u8("Hello").AsMemory(1, 3).Span); + } + + [Fact] + public void AsMemory_FromUtf8String_WithStartAndLength_ArgOutOfRange() + { + Assert.Throws("start", () => ((Utf8String)null).AsMemory(0, 1)); + Assert.Throws("start", () => ((Utf8String)null).AsMemory(1, 0)); + Assert.Throws("start", () => u8("Hello").AsMemory(5, 1)); + Assert.Throws("start", () => u8("Hello").AsMemory(4, -2)); + } + } +} diff --git a/src/libraries/System.Utf8String.Experimental/tests/System/Utf8SpanTests.Conversion.cs b/src/libraries/System.Utf8String.Experimental/tests/System/Utf8SpanTests.Conversion.cs index bd18fdd177fbf..9427c59340f33 100644 --- a/src/libraries/System.Utf8String.Experimental/tests/System/Utf8SpanTests.Conversion.cs +++ b/src/libraries/System.Utf8String.Experimental/tests/System/Utf8SpanTests.Conversion.cs @@ -42,7 +42,7 @@ public static void Normalize(string utf16Source, string utf16Expected, Normaliza { byte[] dest = new byte[bufferLength]; Assert.Equal(utf8Normalized.Length, utf8Source.Normalize(dest, normalizationForm)); - Utf8Span normalizedSpan = Utf8Span.UnsafeCreateWithoutValidation(dest[..utf8Normalized.Length]); + Utf8Span normalizedSpan = Utf8Span.UnsafeCreateWithoutValidation(dest.AsSpan().Slice(0, utf8Normalized.Length)); Assert.True(utf8Normalized.AsSpan() == normalizedSpan); // ordinal equality Assert.True(normalizedSpan.IsNormalized(normalizationForm)); } diff --git a/src/libraries/System.Utf8String.Experimental/tests/System/Utf8SpanTests.TestData.cs b/src/libraries/System.Utf8String.Experimental/tests/System/Utf8SpanTests.TestData.cs index e5ee9b31e8c18..10c13247ca12d 100644 --- a/src/libraries/System.Utf8String.Experimental/tests/System/Utf8SpanTests.TestData.cs +++ b/src/libraries/System.Utf8String.Experimental/tests/System/Utf8SpanTests.TestData.cs @@ -125,7 +125,7 @@ private static bool TryParseSearchTermAsRune(object searchTerm, out Rune parsed) } else if (searchTerm is string str) { - if (Rune.DecodeFromUtf16(str, out parsed, out int charsConsumed) == OperationStatus.Done + if (Rune.DecodeFromUtf16(str.AsSpan(), out parsed, out int charsConsumed) == OperationStatus.Done && charsConsumed == str.Length) { return true; @@ -161,7 +161,7 @@ private static bool TryParseSearchTermAsUtf8String(object searchTerm, out ustrin } else if (searchTerm is string str) { - if (ustring.TryCreateFrom(str, out parsed)) + if (ustring.TryCreateFrom(str.AsSpan(), out parsed)) { return true; } diff --git a/src/libraries/System.Utf8String.Experimental/tests/System/Utf8SpanTests.cs b/src/libraries/System.Utf8String.Experimental/tests/System/Utf8SpanTests.cs index 93148cb652951..ab1b528d49674 100644 --- a/src/libraries/System.Utf8String.Experimental/tests/System/Utf8SpanTests.cs +++ b/src/libraries/System.Utf8String.Experimental/tests/System/Utf8SpanTests.cs @@ -287,7 +287,7 @@ public static void ToCharsTest(string expected) { using BoundedMemory boundedMemory = BoundedMemory.Allocate(i); Assert.Equal(expected.Length, span.ToChars(boundedMemory.Span)); - Assert.True(boundedMemory.Span.Slice(0, expected.Length).SequenceEqual(expected)); + Assert.True(boundedMemory.Span.Slice(0, expected.Length).SequenceEqual(expected.AsSpan())); } } diff --git a/src/libraries/System.Utf8String.Experimental/tests/System/Utf8StringTests.Ctor.cs b/src/libraries/System.Utf8String.Experimental/tests/System/Utf8StringTests.Ctor.cs index 4b5e278faa5d6..a7c7a9c5a1bbb 100644 --- a/src/libraries/System.Utf8String.Experimental/tests/System/Utf8StringTests.Ctor.cs +++ b/src/libraries/System.Utf8String.Experimental/tests/System/Utf8StringTests.Ctor.cs @@ -255,76 +255,11 @@ public static void Ctor_NonValidating_FromByteSpan() Assert.Equal(u8("xyz"), actual); } - [Fact] - public static void Ctor_NonValidating_FromDelegate() - { - object expectedState = new object(); - SpanAction spanAction = (span, actualState) => - { - Assert.Same(expectedState, actualState); - Assert.NotEqual(0, span.Length); // shouldn't have been called for a zero-length span - - for (int i = 0; i < span.Length; i++) - { - Assert.Equal(0, span[i]); // should've been zero-inited - span[i] = (byte)('a' + (i % 26)); // writes "abc...xyzabc...xyz..." - } - }; - - ArgumentException exception = Assert.Throws(() => Utf8String.UnsafeCreateWithoutValidation(-1, expectedState, spanAction)); - Assert.Equal("length", exception.ParamName); - - exception = Assert.Throws(() => Utf8String.UnsafeCreateWithoutValidation(10, expectedState, action: null)); - Assert.Equal("action", exception.ParamName); - - Assert.Same(Utf8String.Empty, Utf8String.UnsafeCreateWithoutValidation(0, expectedState, spanAction)); - - Assert.Equal(u8("abcde"), Utf8String.UnsafeCreateWithoutValidation(5, expectedState, spanAction)); - } - - [Fact] - public static void Ctor_Validating_FromDelegate() - { - object expectedState = new object(); - SpanAction spanAction = (span, actualState) => - { - Assert.Same(expectedState, actualState); - Assert.NotEqual(0, span.Length); // shouldn't have been called for a zero-length span - - for (int i = 0; i < span.Length; i++) - { - Assert.Equal(0, span[i]); // should've been zero-inited - span[i] = (byte)('a' + (i % 26)); // writes "abc...xyzabc...xyz..." - } - }; - - ArgumentException exception = Assert.Throws(() => Utf8String.Create(-1, expectedState, spanAction)); - Assert.Equal("length", exception.ParamName); - - exception = Assert.Throws(() => Utf8String.Create(10, expectedState, action: null)); - Assert.Equal("action", exception.ParamName); - - Assert.Same(Utf8String.Empty, Utf8String.Create(0, expectedState, spanAction)); - - Assert.Equal(u8("abcde"), Utf8String.Create(5, expectedState, spanAction)); - } - - [Fact] - public static void Ctor_Validating_FromDelegate_ThrowsIfDelegateProvidesInvalidData() - { - SpanAction spanAction = (span, actualState) => - { - span[0] = 0xFF; // never a valid UTF-8 byte - }; - - Assert.Throws(() => Utf8String.Create(10, new object(), spanAction)); - } - [Fact] public static void Ctor_CreateFromRelaxed_Utf16() { Assert.Same(Utf8String.Empty, Utf8String.CreateFromRelaxed(ReadOnlySpan.Empty)); - Assert.Equal(u8("xy\uFFFDz"), Utf8String.CreateFromRelaxed("xy\ud800z")); + Assert.Equal(u8("xy\uFFFDz"), Utf8String.CreateFromRelaxed("xy\ud800z".AsSpan())); } [Fact] @@ -334,33 +269,6 @@ public static void Ctor_CreateFromRelaxed_Utf8() Assert.Equal(u8("xy\uFFFDz"), Utf8String.CreateFromRelaxed(new byte[] { (byte)'x', (byte)'y', 0xF4, 0x80, 0x80, (byte)'z' })); } - [Fact] - public static void Ctor_CreateRelaxed_FromDelegate() - { - object expectedState = new object(); - SpanAction spanAction = (span, actualState) => - { - Assert.Same(expectedState, actualState); - Assert.NotEqual(0, span.Length); // shouldn't have been called for a zero-length span - - for (int i = 0; i < span.Length; i++) - { - Assert.Equal(0, span[i]); // should've been zero-inited - span[i] = 0xFF; // never a valid UTF-8 byte - } - }; - - ArgumentException exception = Assert.Throws(() => Utf8String.CreateRelaxed(-1, expectedState, spanAction)); - Assert.Equal("length", exception.ParamName); - - exception = Assert.Throws(() => Utf8String.CreateRelaxed(10, expectedState, action: null)); - Assert.Equal("action", exception.ParamName); - - Assert.Same(Utf8String.Empty, Utf8String.CreateRelaxed(0, expectedState, spanAction)); - - Assert.Equal(u8("\uFFFD\uFFFD"), Utf8String.CreateRelaxed(2, expectedState, spanAction)); - } - [Fact] public static void Ctor_TryCreateFrom_Utf8() { @@ -399,17 +307,17 @@ public static void Ctor_TryCreateFrom_Utf16() // Well-formed ASCII contents - Assert.True(Utf8String.TryCreateFrom("Hello", out value)); + Assert.True(Utf8String.TryCreateFrom("Hello".AsSpan(), out value)); Assert.Equal(u8("Hello"), value); // Well-formed non-ASCII contents - Assert.True(Utf8String.TryCreateFrom("\U0001F47D", out value)); // U+1F47D EXTRATERRESTRIAL ALIEN + Assert.True(Utf8String.TryCreateFrom("\U0001F47D".AsSpan(), out value)); // U+1F47D EXTRATERRESTRIAL ALIEN Assert.Equal(u8("\U0001F47D"), value); // Ill-formed contents - Assert.False(Utf8String.TryCreateFrom("\uD800x", out value)); + Assert.False(Utf8String.TryCreateFrom("\uD800x".AsSpan(), out value)); Assert.Null(value); } } diff --git a/src/libraries/System.Utf8String.Experimental/tests/System/Utf8StringTests.Ctor.netcoreapp.cs b/src/libraries/System.Utf8String.Experimental/tests/System/Utf8StringTests.Ctor.netcoreapp.cs new file mode 100644 index 0000000000000..5012fbe9435e3 --- /dev/null +++ b/src/libraries/System.Utf8String.Experimental/tests/System/Utf8StringTests.Ctor.netcoreapp.cs @@ -0,0 +1,108 @@ +// Licensed to the .NET Foundation under one or more agreements. +// The .NET Foundation licenses this file to you under the MIT license. +// See the LICENSE file in the project root for more information. + +using System.Buffers; +using System.Runtime.CompilerServices; +using System.Runtime.InteropServices; +using Xunit; + +using static System.Tests.Utf8TestUtilities; + +namespace System.Tests +{ + public unsafe partial class Utf8StringTests + { + [Fact] + public static void Ctor_NonValidating_FromDelegate() + { + object expectedState = new object(); + SpanAction spanAction = (span, actualState) => + { + Assert.Same(expectedState, actualState); + Assert.NotEqual(0, span.Length); // shouldn't have been called for a zero-length span + + for (int i = 0; i < span.Length; i++) + { + Assert.Equal(0, span[i]); // should've been zero-inited + span[i] = (byte)('a' + (i % 26)); // writes "abc...xyzabc...xyz..." + } + }; + + ArgumentException exception = Assert.Throws(() => Utf8String.UnsafeCreateWithoutValidation(-1, expectedState, spanAction)); + Assert.Equal("length", exception.ParamName); + + exception = Assert.Throws(() => Utf8String.UnsafeCreateWithoutValidation(10, expectedState, action: null)); + Assert.Equal("action", exception.ParamName); + + Assert.Same(Utf8String.Empty, Utf8String.UnsafeCreateWithoutValidation(0, expectedState, spanAction)); + + Assert.Equal(u8("abcde"), Utf8String.UnsafeCreateWithoutValidation(5, expectedState, spanAction)); + } + + [Fact] + public static void Ctor_Validating_FromDelegate() + { + object expectedState = new object(); + SpanAction spanAction = (span, actualState) => + { + Assert.Same(expectedState, actualState); + Assert.NotEqual(0, span.Length); // shouldn't have been called for a zero-length span + + for (int i = 0; i < span.Length; i++) + { + Assert.Equal(0, span[i]); // should've been zero-inited + span[i] = (byte)('a' + (i % 26)); // writes "abc...xyzabc...xyz..." + } + }; + + ArgumentException exception = Assert.Throws(() => Utf8String.Create(-1, expectedState, spanAction)); + Assert.Equal("length", exception.ParamName); + + exception = Assert.Throws(() => Utf8String.Create(10, expectedState, action: null)); + Assert.Equal("action", exception.ParamName); + + Assert.Same(Utf8String.Empty, Utf8String.Create(0, expectedState, spanAction)); + + Assert.Equal(u8("abcde"), Utf8String.Create(5, expectedState, spanAction)); + } + + [Fact] + public static void Ctor_Validating_FromDelegate_ThrowsIfDelegateProvidesInvalidData() + { + SpanAction spanAction = (span, actualState) => + { + span[0] = 0xFF; // never a valid UTF-8 byte + }; + + Assert.Throws(() => Utf8String.Create(10, new object(), spanAction)); + } + + [Fact] + public static void Ctor_CreateRelaxed_FromDelegate() + { + object expectedState = new object(); + SpanAction spanAction = (span, actualState) => + { + Assert.Same(expectedState, actualState); + Assert.NotEqual(0, span.Length); // shouldn't have been called for a zero-length span + + for (int i = 0; i < span.Length; i++) + { + Assert.Equal(0, span[i]); // should've been zero-inited + span[i] = 0xFF; // never a valid UTF-8 byte + } + }; + + ArgumentException exception = Assert.Throws(() => Utf8String.CreateRelaxed(-1, expectedState, spanAction)); + Assert.Equal("length", exception.ParamName); + + exception = Assert.Throws(() => Utf8String.CreateRelaxed(10, expectedState, action: null)); + Assert.Equal("action", exception.ParamName); + + Assert.Same(Utf8String.Empty, Utf8String.CreateRelaxed(0, expectedState, spanAction)); + + Assert.Equal(u8("\uFFFD\uFFFD"), Utf8String.CreateRelaxed(2, expectedState, spanAction)); + } + } +} diff --git a/src/libraries/System.Utf8String.Experimental/tests/System/Utf8TestUtilities.cs b/src/libraries/System.Utf8String.Experimental/tests/System/Utf8TestUtilities.cs index 591dd8f5f0668..1cbbf25a06ea0 100644 --- a/src/libraries/System.Utf8String.Experimental/tests/System/Utf8TestUtilities.cs +++ b/src/libraries/System.Utf8String.Experimental/tests/System/Utf8TestUtilities.cs @@ -33,12 +33,17 @@ public unsafe static bool IsNull(this Utf8Span span) return Unsafe.AreSame(ref Unsafe.AsRef(null), ref MemoryMarshal.GetReference(span.Bytes)); } + /// + /// Parses an expression of the form "a..b" and returns a . + /// + public static Range ParseRangeExpr(string expression) => ParseRangeExpr(expression.AsSpan()); + /// /// Parses an expression of the form "a..b" and returns a . /// public static Range ParseRangeExpr(ReadOnlySpan expression) { - int idxOfDots = expression.IndexOf("..", StringComparison.Ordinal); + int idxOfDots = expression.IndexOf("..".AsSpan(), StringComparison.Ordinal); if (idxOfDots < 0) { goto Error; @@ -57,7 +62,7 @@ public static Range ParseRangeExpr(ReadOnlySpan expression) firstPart = firstPart[1..]; } - if (!int.TryParse(firstPart, NumberStyles.AllowLeadingWhite | NumberStyles.AllowTrailingWhite, CultureInfo.InvariantCulture, out int startIndex)) + if (!int.TryParse(firstPart.ToString(), NumberStyles.AllowLeadingWhite | NumberStyles.AllowTrailingWhite, CultureInfo.InvariantCulture, out int startIndex)) { goto Error; } @@ -78,7 +83,7 @@ public static Range ParseRangeExpr(ReadOnlySpan expression) secondPart = secondPart[1..]; } - if (!int.TryParse(secondPart, NumberStyles.AllowLeadingWhite | NumberStyles.AllowTrailingWhite, CultureInfo.InvariantCulture, out int endIndex)) + if (!int.TryParse(secondPart.ToString(), NumberStyles.AllowLeadingWhite | NumberStyles.AllowTrailingWhite, CultureInfo.InvariantCulture, out int endIndex)) { goto Error; } @@ -147,14 +152,14 @@ public static Utf8String u8(string str) MemoryStream memStream = new MemoryStream(); - Span utf8Bytes = stackalloc byte[4]; // 4 UTF-8 code units is the largest any scalar value can be encoded as + byte[] utf8Bytes = new byte[4]; // 4 UTF-8 code units is the largest any scalar value can be encoded as int index = 0; while (index < str.Length) { if (Rune.TryGetRuneAt(str, index, out Rune value) && value.TryEncodeToUtf8(utf8Bytes, out int bytesWritten)) { - memStream.Write(utf8Bytes.Slice(0, bytesWritten)); + memStream.Write(utf8Bytes, 0, bytesWritten); index += value.Utf16SequenceLength; } else From 545e3d24107380cae5f16543da3523e854ef51cf Mon Sep 17 00:00:00 2001 From: Eric Erhardt Date: Thu, 5 Mar 2020 17:40:33 -0600 Subject: [PATCH 14/26] Fix up merge --- .../System.Private.CoreLib.csproj | 4 --- .../src/System/Utf8String.CoreCLR.cs | 29 ------------------- .../System.Private.CoreLib.Shared.projitems | 4 --- .../src/System/Utf8String.Construction.cs | 7 ----- .../src/System.Utf8String.Experimental.csproj | 14 --------- 5 files changed, 58 deletions(-) diff --git a/src/coreclr/src/System.Private.CoreLib/System.Private.CoreLib.csproj b/src/coreclr/src/System.Private.CoreLib/System.Private.CoreLib.csproj index 94d9e7e217fa0..416252b005cef 100644 --- a/src/coreclr/src/System.Private.CoreLib/System.Private.CoreLib.csproj +++ b/src/coreclr/src/System.Private.CoreLib/System.Private.CoreLib.csproj @@ -280,7 +280,6 @@ - @@ -300,12 +299,9 @@ Common\Interop\Windows\OleAut32\Interop.SysAllocStringByteLen.cs -<<<<<<< HEAD -======= ->>>>>>> Move Utf8String code from coreclr to libraries diff --git a/src/coreclr/src/System.Private.CoreLib/src/System/Utf8String.CoreCLR.cs b/src/coreclr/src/System.Private.CoreLib/src/System/Utf8String.CoreCLR.cs index 9a04b332b24ed..f8ab2a5140b16 100644 --- a/src/coreclr/src/System.Private.CoreLib/src/System/Utf8String.CoreCLR.cs +++ b/src/coreclr/src/System.Private.CoreLib/src/System/Utf8String.CoreCLR.cs @@ -263,7 +263,6 @@ private Utf8String Ctor(string value) return Ctor(value.AsSpan()); } -<<<<<<< HEAD /* * METHODS */ @@ -281,15 +280,6 @@ internal ReadOnlySpan AsBytesSkipNullCheck() int length = Length; return new ReadOnlySpan(ref DangerousGetMutableReference(), length); } -======= - /// - /// Returns a mutable reference to the first byte of this - /// (or the null terminator if the string is empty). - /// - /// - [MethodImpl(MethodImplOptions.AggressiveInlining)] - internal ref byte DangerousGetMutableReference() => ref Unsafe.AsRef(in _firstByte); ->>>>>>> Get Utf8String building on netstandard2.0 /// /// Returns a mutable that can be used to populate this @@ -306,7 +296,6 @@ internal Span DangerousGetMutableSpan() } /// -<<<<<<< HEAD /// Returns a mutable reference to the first byte of this /// (or the null terminator if the string is empty). /// @@ -315,8 +304,6 @@ internal Span DangerousGetMutableSpan() internal ref byte DangerousGetMutableReference() => ref Unsafe.AsRef(in _firstByte); /// -======= ->>>>>>> Get Utf8String building on netstandard2.0 /// Gets an immutable reference that can be used in a statement. The resulting /// reference can be pinned and used as a null-terminated LPCUTF8STR. /// @@ -326,25 +313,9 @@ internal Span DangerousGetMutableSpan() [EditorBrowsable(EditorBrowsableState.Never)] // for compiler use only public ref readonly byte GetPinnableReference() => ref _firstByte; -<<<<<<< HEAD /* * HELPER METHODS */ -======= - /// - /// Similar to , but skips the null check on the input. - /// Throws a if the input is null. - /// - [MethodImpl(MethodImplOptions.AggressiveInlining)] - internal ReadOnlySpan AsBytesSkipNullCheck() - { - // By dereferencing Length first, the JIT will skip the null check that normally precedes - // most instance method calls, and it'll use the field dereference as the null check. - - int length = Length; - return new ReadOnlySpan(ref DangerousGetMutableReference(), length); - } ->>>>>>> Get Utf8String building on netstandard2.0 /// /// Creates a new zero-initialized instance of the specified length. Actual storage allocated is "length + 1" bytes diff --git a/src/libraries/System.Private.CoreLib/src/System.Private.CoreLib.Shared.projitems b/src/libraries/System.Private.CoreLib/src/System.Private.CoreLib.Shared.projitems index 89cb461992c68..b81c1109bd2fe 100644 --- a/src/libraries/System.Private.CoreLib/src/System.Private.CoreLib.Shared.projitems +++ b/src/libraries/System.Private.CoreLib/src/System.Private.CoreLib.Shared.projitems @@ -1829,8 +1829,4 @@ -<<<<<<< HEAD -======= - ->>>>>>> Move Utf8String code from coreclr to libraries diff --git a/src/libraries/System.Private.CoreLib/src/System/Utf8String.Construction.cs b/src/libraries/System.Private.CoreLib/src/System/Utf8String.Construction.cs index 83bbaca71f847..d83931cdc8d75 100644 --- a/src/libraries/System.Private.CoreLib/src/System/Utf8String.Construction.cs +++ b/src/libraries/System.Private.CoreLib/src/System/Utf8String.Construction.cs @@ -17,13 +17,6 @@ public sealed partial class Utf8String // For values beyond U+FFFF, it's 4 UTF-8 bytes per 2 UTF-16 chars (2:1 ratio) private const int MAX_UTF8_BYTES_PER_UTF16_CHAR = 3; -<<<<<<< HEAD -<<<<<<< HEAD -======= -#endif ->>>>>>> Get Utf8String building on netstandard2.0 -======= ->>>>>>> Build Utf8String Comparison and Enumeration for netstandard. /* * STATIC FACTORIES diff --git a/src/libraries/System.Utf8String.Experimental/src/System.Utf8String.Experimental.csproj b/src/libraries/System.Utf8String.Experimental/src/System.Utf8String.Experimental.csproj index a8b26adfbca57..6485852d02a37 100644 --- a/src/libraries/System.Utf8String.Experimental/src/System.Utf8String.Experimental.csproj +++ b/src/libraries/System.Utf8String.Experimental/src/System.Utf8String.Experimental.csproj @@ -1,25 +1,11 @@  true -<<<<<<< HEAD -<<<<<<< HEAD - true - $(NetCoreAppCurrent)-Windows_NT;$(NetCoreAppCurrent)-Unix - enable -======= -======= $(NoWarn);3019 ->>>>>>> Build for netcoreapp3.0. true netstandard2.0;netcoreapp3.0;$(NetCoreAppCurrent)-Windows_NT;$(NetCoreAppCurrent)-Unix enable $(DefineContants);FEATURE_UTF8STRING -<<<<<<< HEAD - - false ->>>>>>> Get Rune working on netstandard2.0 -======= ->>>>>>> Moving code around so it lines up with the ref assemblies. From 5963e60a9d85aebf6370f778735fc009f6a90644 Mon Sep 17 00:00:00 2001 From: Eric Erhardt Date: Fri, 6 Mar 2020 13:51:09 -0600 Subject: [PATCH 15/26] Get Utf8String constructors working on netstandard. --- .../src/System/Utf8String.Construction.cs | 88 +++++++++++++++++++ .../src/System/Utf8String.cs | 9 -- .../src/System/Utf8String.Portable.cs | 18 +++- 3 files changed, 104 insertions(+), 11 deletions(-) diff --git a/src/libraries/System.Private.CoreLib/src/System/Utf8String.Construction.cs b/src/libraries/System.Private.CoreLib/src/System/Utf8String.Construction.cs index d83931cdc8d75..4f1b9446beeea 100644 --- a/src/libraries/System.Private.CoreLib/src/System/Utf8String.Construction.cs +++ b/src/libraries/System.Private.CoreLib/src/System/Utf8String.Construction.cs @@ -264,6 +264,93 @@ internal static Utf8String CreateFromRune(Rune value) return newString; } + // Returns 'null' if the input buffer does not represent well-formed UTF-16 data and 'replaceInvalidSequences' is false. + private static byte[]? CreateBufferFromUtf16Common(ReadOnlySpan value, bool replaceInvalidSequences) + { + // Shortcut: Since we expect most strings to be small-ish, first try a one-pass + // operation where we transcode directly on to the stack and then copy the validated + // data into the new Utf8String instance. It's still O(n), but it should have a smaller + // constant factor than a typical "count + transcode" combo. + + OperationStatus status; + byte[] newBuffer; + + if (value.Length <= MAX_STACK_TRANSCODE_CHAR_COUNT /* in chars */) + { + if (value.IsEmpty) + { + return Array.Empty(); + } + + Span scratch = stackalloc byte[MAX_STACK_TRANSCODE_CHAR_COUNT * MAX_UTF8_BYTES_PER_UTF16_CHAR]; // largest possible expansion, as explained below + status = Utf8.FromUtf16(value, scratch, out _, out int scratchBytesWritten, replaceInvalidSequences); + Debug.Assert(status == OperationStatus.Done || status == OperationStatus.InvalidData); + + if (status == OperationStatus.InvalidData) + { + return null; + } + + // At this point we know transcoding succeeded, so the original input data was well-formed. + // We'll memcpy the scratch buffer into the new Utf8String instance, which is very fast. + + newBuffer = new byte[scratchBytesWritten]; + scratch.Slice(0, scratchBytesWritten).CopyTo(newBuffer); + return newBuffer; + } + + // First, determine how many UTF-8 bytes we'll need in order to represent this data. + // This also checks the input data for well-formedness. + + long utf8CodeUnitCountAdjustment; + + unsafe + { + fixed (char* pChars = &MemoryMarshal.GetReference(value)) + { + if (Utf16Utility.GetPointerToFirstInvalidChar(pChars, value.Length, out utf8CodeUnitCountAdjustment, out int _) != (pChars + (uint)value.Length)) + { + return null; + } + } + } + + // The max possible expansion transcoding UTF-16 to UTF-8 is that each input char corresponds + // to 3 UTF-8 bytes. This is most common in CJK languages. Since the input buffer could be + // up to int.MaxValue elements in length, we need to use a 64-bit value to hold the total + // required UTF-8 byte length. However, the VM places restrictions on how large a Utf8String + // instance can be, and the maximum allowed element count is just under int.MaxValue. (This + // mirrors the restrictions already in place for System.String.) The VM will throw an + // OutOfMemoryException if anybody tries to create a Utf8String instance larger than that, + // so if we detect any sort of overflow we'll end up passing int.MaxValue down to the allocation + // routine. This normalizes the OutOfMemoryException the caller sees. + + long totalUtf8BytesRequired = (uint)value.Length + utf8CodeUnitCountAdjustment; + if (totalUtf8BytesRequired > int.MaxValue) + { + totalUtf8BytesRequired = int.MaxValue; + } + + // We can get away with FastAllocateSkipZeroInit here because we're not going to return the + // new Utf8String instance to the caller if we don't overwrite every byte of the buffer. + + newBuffer = new byte[(int)totalUtf8BytesRequired]; + + // Now transcode the UTF-16 input into the newly allocated Utf8String's buffer. We can't call the + // "skip validation" transcoder because the caller could've mutated the input buffer between the + // initial counting step and the transcoding step below. + + status = Utf8.FromUtf16(value, newBuffer, out _, out int bytesWritten, replaceInvalidSequences: false); + if (status != OperationStatus.Done || bytesWritten != newBuffer.Length) + { + // Did somebody mutate our input buffer? Shouldn't be any other way this could happen. + + return null; + } + + return newBuffer; + } + /// /// Creates a new instance populated with a copy of the provided contents. /// Please see remarks for important safety information about this method. @@ -287,6 +374,7 @@ public static Utf8String UnsafeCreateWithoutValidation(ReadOnlySpan utf8Co Utf8String newString = FastAllocateSkipZeroInit(utf8Contents.Length); utf8Contents.CopyTo(newString.DangerousGetMutableSpan()); + // TODO_UTF8STRING: Zero-init was skipped above, but we didn't null-terminate the string // The line below is removed entirely in release builds. diff --git a/src/libraries/System.Private.CoreLib/src/System/Utf8String.cs b/src/libraries/System.Private.CoreLib/src/System/Utf8String.cs index 8479affa99b35..58a923ee1d1e4 100644 --- a/src/libraries/System.Private.CoreLib/src/System/Utf8String.cs +++ b/src/libraries/System.Private.CoreLib/src/System/Utf8String.cs @@ -56,10 +56,6 @@ public sealed partial class Utf8String : IComparable, IEquatable new Utf8Span(value); /* -<<<<<<< HEAD -<<<<<<< HEAD -======= ->>>>>>> Everything building * INDEXERS */ @@ -80,11 +76,6 @@ public Utf8String this[Range range] } /* -<<<<<<< HEAD -======= ->>>>>>> Get Utf8String building on netstandard2.0 -======= ->>>>>>> Everything building * METHODS */ diff --git a/src/libraries/System.Utf8String.Experimental/src/System/Utf8String.Portable.cs b/src/libraries/System.Utf8String.Experimental/src/System/Utf8String.Portable.cs index 0b1c1cfaf4c71..30b3bd75bea72 100644 --- a/src/libraries/System.Utf8String.Experimental/src/System/Utf8String.Portable.cs +++ b/src/libraries/System.Utf8String.Experimental/src/System/Utf8String.Portable.cs @@ -16,7 +16,7 @@ public sealed partial class Utf8String /// /// Returns the length (in UTF-8 code units, or s) of this instance. /// - public int Length => _bytes.Length - 1; // -1 because the bytes are always null-terminated + public int Length => _bytes.Length > 0 ? - 1 : 0; // -1 because the bytes are always null-terminated public Utf8String(ReadOnlySpan value) { @@ -52,7 +52,21 @@ public unsafe Utf8String(char* value) public Utf8String(string value) { - _bytes = Array.Empty(); //TODO: eerhardt + if (value is null) + { + ThrowHelper.ThrowArgumentNullException(ExceptionArgument.value); + } + + _bytes = CreateBufferFromUtf16Common(value.AsSpan(), replaceInvalidSequences: false); + + if (_bytes is null) + { + // Input buffer contained invalid UTF-16 data. + + throw new ArgumentException( + message: SR.Utf8String_InputContainedMalformedUtf16, + paramName: nameof(value)); + } } private Utf8String(byte[] bytes) From f6b917c2cc6e8f8941a04592f95b1ef00134ce79 Mon Sep 17 00:00:00 2001 From: Eric Erhardt Date: Sat, 7 Mar 2020 07:43:58 -0600 Subject: [PATCH 16/26] Get some tests running on net472 --- .../src/System/Utf8String.Comparison.cs | 2 +- .../src/System/Utf8String.Construction.cs | 4 ++-- .../src/System/Utf8String.Manipulation.cs | 4 ++-- .../src/System/Utf8String.cs | 6 +++--- .../ref/System.Utf8String.Experimental.cs | 1 - ...stem.Utf8String.Experimental.netcoreapp5.cs | 7 +++++++ .../src/System/Utf8Extensions.Portable.cs | 6 +++--- .../src/System/Utf8String.Portable.cs | 18 +++++++++++++++--- .../tests/System/Utf8SpanTests.Conversion.cs | 4 ++-- 9 files changed, 35 insertions(+), 17 deletions(-) diff --git a/src/libraries/System.Private.CoreLib/src/System/Utf8String.Comparison.cs b/src/libraries/System.Private.CoreLib/src/System/Utf8String.Comparison.cs index 0417789ae498c..8f98055613d35 100644 --- a/src/libraries/System.Private.CoreLib/src/System/Utf8String.Comparison.cs +++ b/src/libraries/System.Private.CoreLib/src/System/Utf8String.Comparison.cs @@ -181,7 +181,7 @@ public bool Contains(Rune value) ref DangerousGetMutableReference(), Length, ref MemoryMarshal.GetReference(runeBytes), runeBytesWritten) >= 0; #else - return DangerousGetMutableSpan() + return GetSpan() .IndexOf(runeBytes.Slice(0, runeBytesWritten)) >= 0; #endif } diff --git a/src/libraries/System.Private.CoreLib/src/System/Utf8String.Construction.cs b/src/libraries/System.Private.CoreLib/src/System/Utf8String.Construction.cs index 4f1b9446beeea..e975f2bec8567 100644 --- a/src/libraries/System.Private.CoreLib/src/System/Utf8String.Construction.cs +++ b/src/libraries/System.Private.CoreLib/src/System/Utf8String.Construction.cs @@ -294,7 +294,7 @@ internal static Utf8String CreateFromRune(Rune value) // At this point we know transcoding succeeded, so the original input data was well-formed. // We'll memcpy the scratch buffer into the new Utf8String instance, which is very fast. - newBuffer = new byte[scratchBytesWritten]; + newBuffer = new byte[scratchBytesWritten + 1]; // null-terminated scratch.Slice(0, scratchBytesWritten).CopyTo(newBuffer); return newBuffer; } @@ -334,7 +334,7 @@ internal static Utf8String CreateFromRune(Rune value) // We can get away with FastAllocateSkipZeroInit here because we're not going to return the // new Utf8String instance to the caller if we don't overwrite every byte of the buffer. - newBuffer = new byte[(int)totalUtf8BytesRequired]; + newBuffer = new byte[(int)totalUtf8BytesRequired + 1]; // null-terminated // Now transcode the UTF-16 input into the newly allocated Utf8String's buffer. We can't call the // "skip validation" transcoder because the caller could've mutated the input buffer between the diff --git a/src/libraries/System.Private.CoreLib/src/System/Utf8String.Manipulation.cs b/src/libraries/System.Private.CoreLib/src/System/Utf8String.Manipulation.cs index 18a577a6cc693..4a6e4625d7ecf 100644 --- a/src/libraries/System.Private.CoreLib/src/System/Utf8String.Manipulation.cs +++ b/src/libraries/System.Private.CoreLib/src/System/Utf8String.Manipulation.cs @@ -68,7 +68,7 @@ private Utf8String InternalSubstring(int startIndex, int length) #if SYSTEM_PRIVATE_CORELIB Buffer.Memmove(ref newString.DangerousGetMutableReference(), ref this.DangerousGetMutableReference(startIndex), (uint)length); #else - this.DangerousGetMutableSpan().Slice(startIndex, length).CopyTo(newString.DangerousGetMutableSpan()); + this.GetSpan().Slice(startIndex, length).CopyTo(newString.DangerousGetMutableSpan()); #endif return newString; @@ -101,7 +101,7 @@ private Utf8String InternalSubstringWithoutCorrectnessChecks(int startIndex, int #if SYSTEM_PRIVATE_CORELIB Buffer.Memmove(ref newString.DangerousGetMutableReference(), ref this.DangerousGetMutableReference(startIndex), (uint)length); #else - this.DangerousGetMutableSpan().Slice(startIndex, length).CopyTo(newString.DangerousGetMutableSpan()); + this.GetSpan().Slice(startIndex, length).CopyTo(newString.DangerousGetMutableSpan()); #endif return newString; } diff --git a/src/libraries/System.Private.CoreLib/src/System/Utf8String.cs b/src/libraries/System.Private.CoreLib/src/System/Utf8String.cs index 58a923ee1d1e4..3790026484f6f 100644 --- a/src/libraries/System.Private.CoreLib/src/System/Utf8String.cs +++ b/src/libraries/System.Private.CoreLib/src/System/Utf8String.cs @@ -159,7 +159,7 @@ public bool Equals(Utf8String? value) #if CORECLR && SpanHelpers.SequenceEqual(ref this.DangerousGetMutableReference(), ref value.DangerousGetMutableReference(), (uint)Length); #else - && this.DangerousGetMutableSpan().SequenceEqual(value.DangerousGetMutableSpan()); + && this.GetSpan().SequenceEqual(value.GetSpan()); #endif } @@ -188,7 +188,7 @@ public static bool Equals(Utf8String? left, Utf8String? right) #if CORECLR && SpanHelpers.SequenceEqual(ref left.DangerousGetMutableReference(), ref right.DangerousGetMutableReference(), (uint)left.Length); #else - && left.DangerousGetMutableSpan().SequenceEqual(right.DangerousGetMutableSpan()); + && left.GetSpan().SequenceEqual(right.GetSpan()); #endif } @@ -275,7 +275,7 @@ public override string ToString() byte[] buffer = ArrayPool.Shared.Rent(Length); try { - _bytes.CopyTo(buffer.AsSpan()); + _bytes.AsSpan(0, Length).CopyTo(buffer.AsSpan()); return Encoding.UTF8.GetString(buffer, 0, Length); } finally diff --git a/src/libraries/System.Utf8String.Experimental/ref/System.Utf8String.Experimental.cs b/src/libraries/System.Utf8String.Experimental/ref/System.Utf8String.Experimental.cs index 795d73f913651..a471001f0569c 100644 --- a/src/libraries/System.Utf8String.Experimental/ref/System.Utf8String.Experimental.cs +++ b/src/libraries/System.Utf8String.Experimental/ref/System.Utf8String.Experimental.cs @@ -269,7 +269,6 @@ public Utf8StringContent(System.Utf8String content) { } public Utf8StringContent(System.Utf8String content, string? mediaType) { } protected override System.Threading.Tasks.Task CreateContentReadStreamAsync() { throw null; } protected override System.Threading.Tasks.Task SerializeToStreamAsync(System.IO.Stream stream, System.Net.TransportContext? context) { throw null; } - protected override System.Threading.Tasks.Task SerializeToStreamAsync(System.IO.Stream stream, System.Net.TransportContext? context, System.Threading.CancellationToken cancellationToken) { throw null; } protected override bool TryComputeLength(out long length) { throw null; } } } diff --git a/src/libraries/System.Utf8String.Experimental/ref/System.Utf8String.Experimental.netcoreapp5.cs b/src/libraries/System.Utf8String.Experimental/ref/System.Utf8String.Experimental.netcoreapp5.cs index bc2c0ff22f6d8..d65563d272f25 100644 --- a/src/libraries/System.Utf8String.Experimental/ref/System.Utf8String.Experimental.netcoreapp5.cs +++ b/src/libraries/System.Utf8String.Experimental/ref/System.Utf8String.Experimental.netcoreapp5.cs @@ -16,3 +16,10 @@ public static partial class Utf8Extensions public static System.ReadOnlyMemory AsMemory(this System.Utf8String? text, System.Range range) { throw null; } } } +namespace System.Net.Http +{ + public sealed partial class Utf8StringContent : System.Net.Http.HttpContent + { + protected override System.Threading.Tasks.Task SerializeToStreamAsync(System.IO.Stream stream, System.Net.TransportContext? context, System.Threading.CancellationToken cancellationToken) { throw null; } + } +} diff --git a/src/libraries/System.Utf8String.Experimental/src/System/Utf8Extensions.Portable.cs b/src/libraries/System.Utf8String.Experimental/src/System/Utf8Extensions.Portable.cs index fca325f2ff4d3..84a03a37d84a5 100644 --- a/src/libraries/System.Utf8String.Experimental/src/System/Utf8Extensions.Portable.cs +++ b/src/libraries/System.Utf8String.Experimental/src/System/Utf8Extensions.Portable.cs @@ -9,15 +9,15 @@ namespace System public static partial class Utf8Extensions { [MethodImpl(MethodImplOptions.AggressiveInlining)] - private static ReadOnlySpan CreateSpan(Utf8String text) => text.DangerousGetMutableSpan(); + private static ReadOnlySpan CreateSpan(Utf8String text) => text.GetSpan(); [MethodImpl(MethodImplOptions.AggressiveInlining)] private static ReadOnlySpan CreateSpan(Utf8String text, int start) => - text.DangerousGetMutableSpan().Slice(start); + text.GetSpan().Slice(start); [MethodImpl(MethodImplOptions.AggressiveInlining)] private static ReadOnlySpan CreateSpan(Utf8String text, int start, int length) => - text.DangerousGetMutableSpan().Slice(start, length); + text.GetSpan().Slice(start, length); [MethodImpl(MethodImplOptions.AggressiveInlining)] private static ReadOnlyMemory CreateMemoryBytes(Utf8String text, int start, int length) => diff --git a/src/libraries/System.Utf8String.Experimental/src/System/Utf8String.Portable.cs b/src/libraries/System.Utf8String.Experimental/src/System/Utf8String.Portable.cs index 30b3bd75bea72..22e037f867982 100644 --- a/src/libraries/System.Utf8String.Experimental/src/System/Utf8String.Portable.cs +++ b/src/libraries/System.Utf8String.Experimental/src/System/Utf8String.Portable.cs @@ -3,6 +3,7 @@ // See the LICENSE file in the project root for more information. using System.ComponentModel; +using System.Diagnostics; using System.Runtime.CompilerServices; using System.Runtime.InteropServices; @@ -16,7 +17,7 @@ public sealed partial class Utf8String /// /// Returns the length (in UTF-8 code units, or s) of this instance. /// - public int Length => _bytes.Length > 0 ? - 1 : 0; // -1 because the bytes are always null-terminated + public int Length => _bytes.Length == 0 ? 0 : _bytes.Length - 1; // -1 because the bytes are always null-terminated public Utf8String(ReadOnlySpan value) { @@ -81,14 +82,25 @@ private Utf8String(byte[] bytes) /// [MethodImpl(MethodImplOptions.AggressiveInlining)] internal ref byte DangerousGetMutableReference() => - ref MemoryMarshal.GetReference(_bytes.Length > 0 ? _bytes.AsSpan() : s_EmptyRef); + ref MemoryMarshal.GetReference(Length > 0 ? _bytes.AsSpan() : s_EmptyRef); /// /// Returns a mutable that can be used to populate this /// instance. Only to be used during construction. /// [MethodImpl(MethodImplOptions.AggressiveInlining)] - internal Span DangerousGetMutableSpan() => _bytes; + internal Span DangerousGetMutableSpan() + { + Debug.Assert(Length > 0, $"This should only ever be called on a non-empty {nameof(Utf8String)}."); + return _bytes.AsSpan(0, Length); + } + + /// + /// Returns a for this + /// instance. + /// + [MethodImpl(MethodImplOptions.AggressiveInlining)] + internal ReadOnlySpan GetSpan() => Length > 0 ? _bytes.AsSpan(0, Length) : s_EmptyRef.Slice(0, 0); /// /// Gets an immutable reference that can be used in a statement. The resulting diff --git a/src/libraries/System.Utf8String.Experimental/tests/System/Utf8SpanTests.Conversion.cs b/src/libraries/System.Utf8String.Experimental/tests/System/Utf8SpanTests.Conversion.cs index 9427c59340f33..4057a855e9edd 100644 --- a/src/libraries/System.Utf8String.Experimental/tests/System/Utf8SpanTests.Conversion.cs +++ b/src/libraries/System.Utf8String.Experimental/tests/System/Utf8SpanTests.Conversion.cs @@ -159,7 +159,7 @@ static void RunTest(string testData, string expected, CultureInfo culture) foreach (int bufferSize in new[] { expectedUtf8.Length, expectedUtf8.Length + 1 }) { - byte[] buffer = new byte[expectedUtf8.Length]; + byte[] buffer = new byte[bufferSize]; if (culture is null) { @@ -170,7 +170,7 @@ static void RunTest(string testData, string expected, CultureInfo culture) Assert.Equal(expectedUtf8.Length, inputSpan.ToUpper(buffer, culture)); } - Assert.True(expectedUtf8.AsBytes().SequenceEqual(buffer)); + Assert.True(expectedUtf8.AsBytes().SequenceEqual(buffer.AsSpan(0, expectedUtf8.Length))); } } From 7c6e17d2c7126c3f4f066397aa2e87a7912424fd Mon Sep 17 00:00:00 2001 From: Eric Erhardt Date: Sat, 7 Mar 2020 08:32:08 -0600 Subject: [PATCH 17/26] More tests running on netfx --- eng/referenceFromRuntime.targets | 2 +- .../Utf8Utility.WhiteSpace.NonCoreLib.cs | 4 +- .../src/System/Utf8String.Portable.cs | 2 +- ...ystem.Utf8String.Experimental.Tests.csproj | 3 ++ .../tests/System/MemoryTests.cs | 23 ---------- .../tests/System/MemoryTests.netcoreapp.cs | 23 ++++++++++ .../tests/System/MemoryTests.netfx.cs | 43 +++++++++++++++++++ 7 files changed, 73 insertions(+), 27 deletions(-) create mode 100644 src/libraries/System.Utf8String.Experimental/tests/System/MemoryTests.netfx.cs diff --git a/eng/referenceFromRuntime.targets b/eng/referenceFromRuntime.targets index c428eb61d5807..8666173afdf46 100644 --- a/eng/referenceFromRuntime.targets +++ b/eng/referenceFromRuntime.targets @@ -106,7 +106,7 @@ - diff --git a/src/libraries/System.Private.CoreLib/src/System/Text/Unicode/Utf8Utility.WhiteSpace.NonCoreLib.cs b/src/libraries/System.Private.CoreLib/src/System/Text/Unicode/Utf8Utility.WhiteSpace.NonCoreLib.cs index 86972da375612..214f01a66e908 100644 --- a/src/libraries/System.Private.CoreLib/src/System/Text/Unicode/Utf8Utility.WhiteSpace.NonCoreLib.cs +++ b/src/libraries/System.Private.CoreLib/src/System/Text/Unicode/Utf8Utility.WhiteSpace.NonCoreLib.cs @@ -24,7 +24,7 @@ public static int GetIndexOfFirstNonWhiteSpaceChar(ReadOnlySpan utf8Data) // Very quick check: see if the byte is in the range [ 21 .. 7F ]. // If so, we can skip the more expensive logic later in this method. - if (utf8Data[i] > (sbyte)0x20) + if ((sbyte)utf8Data[i] > (sbyte)0x20) { break; } @@ -102,7 +102,7 @@ public static int GetIndexOfTrailingWhiteSpaceSequence(ReadOnlySpan utf8Da // Not ASCII data. Go back to the slower "decode the entire scalar" // code path, then compare it against our Unicode tables. - Rune.DecodeLastFromUtf8(utf8Data.Slice(length), out Rune decodedRune, out int bytesConsumed); + Rune.DecodeLastFromUtf8(utf8Data.Slice(0, length), out Rune decodedRune, out int bytesConsumed); if (Rune.IsWhiteSpace(decodedRune)) { length -= bytesConsumed; diff --git a/src/libraries/System.Utf8String.Experimental/src/System/Utf8String.Portable.cs b/src/libraries/System.Utf8String.Experimental/src/System/Utf8String.Portable.cs index 22e037f867982..ee4e676b314e4 100644 --- a/src/libraries/System.Utf8String.Experimental/src/System/Utf8String.Portable.cs +++ b/src/libraries/System.Utf8String.Experimental/src/System/Utf8String.Portable.cs @@ -117,7 +117,7 @@ internal Span DangerousGetMutableSpan() /// Throws a if the input is null. /// [MethodImpl(MethodImplOptions.AggressiveInlining)] - internal ReadOnlySpan AsBytesSkipNullCheck() => _bytes; + internal ReadOnlySpan AsBytesSkipNullCheck() => GetSpan(); /// /// Creates a new zero-initialized instance of the specified length. Actual storage allocated is "length + 1" bytes diff --git a/src/libraries/System.Utf8String.Experimental/tests/System.Utf8String.Experimental.Tests.csproj b/src/libraries/System.Utf8String.Experimental/tests/System.Utf8String.Experimental.Tests.csproj index 30b912642e976..cd50a39144860 100644 --- a/src/libraries/System.Utf8String.Experimental/tests/System.Utf8String.Experimental.Tests.csproj +++ b/src/libraries/System.Utf8String.Experimental/tests/System.Utf8String.Experimental.Tests.csproj @@ -43,4 +43,7 @@ + + + \ No newline at end of file diff --git a/src/libraries/System.Utf8String.Experimental/tests/System/MemoryTests.cs b/src/libraries/System.Utf8String.Experimental/tests/System/MemoryTests.cs index bacf672d87d4f..665bc978285c5 100644 --- a/src/libraries/System.Utf8String.Experimental/tests/System/MemoryTests.cs +++ b/src/libraries/System.Utf8String.Experimental/tests/System/MemoryTests.cs @@ -14,15 +14,6 @@ namespace System.Tests [SkipOnMono("The features from System.Utf8String.Experimental namespace are experimental.")] public partial class MemoryTests { - [Fact] - public static void MemoryMarshal_TryGetArrayOfByte_Utf8String() - { - ReadOnlyMemory rom = u8("Hello").AsMemoryBytes(); - - Assert.False(MemoryMarshal.TryGetArray(rom, out ArraySegment segment)); - Assert.True(default(ArraySegment).Equals(segment)); - } - [Fact] public static unsafe void MemoryOfByte_WithUtf8String_Pin() { @@ -77,25 +68,11 @@ public static void ReadOnlySpanOfByte_ToString() Assert.Equal("System.ReadOnlySpan[2]", span.ToString()); } - [Fact] - public static void ReadOnlySpanOfChar8_ToString() - { - ReadOnlySpan span = stackalloc Char8[] { (Char8)'H', (Char8)'i' }; - Assert.Equal("Hi", span.ToString()); - } - [Fact] public static void SpanOfByte_ToString() { Span span = stackalloc byte[] { (byte)'H', (byte)'i' }; Assert.Equal("System.Span[2]", span.ToString()); } - - [Fact] - public static void SpanOfChar8_ToString() - { - Span span = stackalloc Char8[] { (Char8)'H', (Char8)'i' }; - Assert.Equal("Hi", span.ToString()); - } } } diff --git a/src/libraries/System.Utf8String.Experimental/tests/System/MemoryTests.netcoreapp.cs b/src/libraries/System.Utf8String.Experimental/tests/System/MemoryTests.netcoreapp.cs index f1f8b4366d8ad..4a080cdc6b19e 100644 --- a/src/libraries/System.Utf8String.Experimental/tests/System/MemoryTests.netcoreapp.cs +++ b/src/libraries/System.Utf8String.Experimental/tests/System/MemoryTests.netcoreapp.cs @@ -13,6 +13,15 @@ namespace System.Tests { public partial class MemoryTests { + [Fact] + public static void MemoryMarshal_TryGetArrayOfByte_Utf8String() + { + ReadOnlyMemory rom = u8("Hello").AsMemoryBytes(); + + Assert.False(MemoryMarshal.TryGetArray(rom, out ArraySegment segment)); + Assert.True(default(ArraySegment).Equals(segment)); + } + [Fact] public static void MemoryMarshal_TryGetArrayOfChar8_Utf8String() { @@ -68,5 +77,19 @@ public static void ReadOnlyMemoryOfChar8_WithUtf8String_ToString() { Assert.Equal("Hello", u8("Hello").AsMemory().ToString()); } + + [Fact] + public static void ReadOnlySpanOfChar8_ToString() + { + ReadOnlySpan span = stackalloc Char8[] { (Char8)'H', (Char8)'i' }; + Assert.Equal("Hi", span.ToString()); + } + + [Fact] + public static void SpanOfChar8_ToString() + { + Span span = stackalloc Char8[] { (Char8)'H', (Char8)'i' }; + Assert.Equal("Hi", span.ToString()); + } } } diff --git a/src/libraries/System.Utf8String.Experimental/tests/System/MemoryTests.netfx.cs b/src/libraries/System.Utf8String.Experimental/tests/System/MemoryTests.netfx.cs new file mode 100644 index 0000000000000..8238c0a08be56 --- /dev/null +++ b/src/libraries/System.Utf8String.Experimental/tests/System/MemoryTests.netfx.cs @@ -0,0 +1,43 @@ +// Licensed to the .NET Foundation under one or more agreements. +// The .NET Foundation licenses this file to you under the MIT license. +// See the LICENSE file in the project root for more information. + +using System.Runtime.InteropServices; +using Xunit; + +using static System.Tests.Utf8TestUtilities; + +namespace System.Tests +{ + public partial class MemoryTests + { + [Fact] + public static void MemoryMarshal_TryGetArrayOfByte_Utf8String() + { + ReadOnlyMemory rom = u8("Hello").AsMemoryBytes(); + + Assert.True(MemoryMarshal.TryGetArray(rom, out ArraySegment segment)); + Assert.NotNull(segment.Array); + Assert.Equal(0, segment.Offset); + Assert.Equal(5, segment.Count); + } + + [Fact] + public static void ReadOnlySpanOfChar8_ToString() + { + // unable to override ReadOnlySpan.ToString on netfx + + ReadOnlySpan span = stackalloc Char8[] { (Char8)'H', (Char8)'i' }; + Assert.Equal("System.ReadOnlySpan[2]", span.ToString()); + } + + [Fact] + public static void SpanOfChar8_ToString() + { + // unable to override Span.ToString on netfx + + Span span = stackalloc Char8[] { (Char8)'H', (Char8)'i' }; + Assert.Equal("System.Span[2]", span.ToString()); + } + } +} From 5c2aba599ba46d5b8b307b5f3944aaf1246af8c0 Mon Sep 17 00:00:00 2001 From: Eric Erhardt Date: Sun, 8 Mar 2020 17:44:23 -0500 Subject: [PATCH 18/26] Get all tests running on netfx. --- .../src/System/Text/Unicode/Utf8Utility.cs | 36 +++++- .../src/System/Text/Utf8Span.Searching.cs | 9 +- .../src/System/Utf8String.Construction.cs | 4 +- .../src/System/Utf8String.cs | 2 +- .../src/Resources/Strings.resx | 3 + .../src/System/Utf8String.Portable.cs | 115 ++++++++++++++++-- ...ystem.Utf8String.Experimental.Tests.csproj | 2 + .../tests/System/ReflectionTests.cs | 9 -- .../System/ReflectionTests.netcoreapp.cs | 21 ++++ .../tests/System/ReflectionTests.netfx.cs | 21 ++++ .../Utf8SpanTests.Searching.TestData.cs | 4 + .../tests/System/Utf8StringTests.Ctor.cs | 28 +++-- .../tests/System/Utf8StringTests.cs | 5 + 13 files changed, 227 insertions(+), 32 deletions(-) create mode 100644 src/libraries/System.Utf8String.Experimental/tests/System/ReflectionTests.netcoreapp.cs create mode 100644 src/libraries/System.Utf8String.Experimental/tests/System/ReflectionTests.netfx.cs diff --git a/src/libraries/System.Private.CoreLib/src/System/Text/Unicode/Utf8Utility.cs b/src/libraries/System.Private.CoreLib/src/System/Text/Unicode/Utf8Utility.cs index 247013a1f9076..76d5bb4799ae3 100644 --- a/src/libraries/System.Private.CoreLib/src/System/Text/Unicode/Utf8Utility.cs +++ b/src/libraries/System.Private.CoreLib/src/System/Text/Unicode/Utf8Utility.cs @@ -25,7 +25,11 @@ internal static partial class Utf8Utility /// /// The UTF-8 representation of . /// +#if SYSTEM_PRIVATE_CORELIB || NETCOREAPP private static ReadOnlySpan ReplacementCharSequence => new byte[] { 0xEF, 0xBF, 0xBD }; +#else // NETSTANDARD + private static readonly byte[] ReplacementCharSequence = new byte[] { 0xEF, 0xBF, 0xBD }; +#endif /// /// Returns the byte index in where the first invalid UTF-8 sequence begins, @@ -86,7 +90,7 @@ public static Utf8String ValidateAndFixupUtf8String(Utf8String value) // (The faster implementation is in the dev/utf8string_bak branch currently.) MemoryStream memStream = new MemoryStream(); -#if CORECLR || NETCOREAPP // TODO: eerhardt +#if SYSTEM_PRIVATE_CORELIB || NETCOREAPP memStream.Write(valueAsBytes.Slice(0, idxOfFirstInvalidData)); valueAsBytes = valueAsBytes.Slice(idxOfFirstInvalidData); @@ -105,6 +109,36 @@ public static Utf8String ValidateAndFixupUtf8String(Utf8String value) valueAsBytes = valueAsBytes.Slice(bytesConsumed); } while (!valueAsBytes.IsEmpty); +#else + if (!MemoryMarshal.TryGetArray(value.AsMemoryBytes(), out ArraySegment valueArraySegment)) + { + Debug.Fail("Utf8String on netstandard should always be backed by an array."); + } + + memStream.Write(valueArraySegment.Array, valueArraySegment.Offset, idxOfFirstInvalidData); + + valueArraySegment = new ArraySegment( + valueArraySegment.Array, + idxOfFirstInvalidData, + valueArraySegment.Count - idxOfFirstInvalidData); + do + { + if (Rune.DecodeFromUtf8(valueArraySegment, out _, out int bytesConsumed) == OperationStatus.Done) + { + // Valid scalar value - copy data as-is to MemoryStream + memStream.Write(valueArraySegment.Array, valueArraySegment.Offset, bytesConsumed); + } + else + { + // Invalid scalar value - copy U+FFFD to MemoryStream + memStream.Write(ReplacementCharSequence, 0, ReplacementCharSequence.Length); + } + + valueArraySegment = new ArraySegment( + valueArraySegment.Array, + valueArraySegment.Offset + bytesConsumed, + valueArraySegment.Count - bytesConsumed); + } while (valueArraySegment.Count > 0); #endif bool success = memStream.TryGetBuffer(out ArraySegment memStreamBuffer); diff --git a/src/libraries/System.Private.CoreLib/src/System/Text/Utf8Span.Searching.cs b/src/libraries/System.Private.CoreLib/src/System/Text/Utf8Span.Searching.cs index 6ebe789d0c083..178238747d391 100644 --- a/src/libraries/System.Private.CoreLib/src/System/Text/Utf8Span.Searching.cs +++ b/src/libraries/System.Private.CoreLib/src/System/Text/Utf8Span.Searching.cs @@ -276,7 +276,14 @@ private unsafe bool TryFind(Utf8Span value, StringComparison comparisonType, out idx = compareInfo.IndexOf(thisTranscodedToUtf16, otherTranscodedToUtf16, 0, thisTranscodedToUtf16.Length, compareOptions, &matchLength, fromBeginning); } #else - idx = compareInfo.IndexOf(thisTranscodedToUtf16, otherTranscodedToUtf16, 0, thisTranscodedToUtf16.Length, compareOptions); + if (fromBeginning) + { + idx = compareInfo.IndexOf(thisTranscodedToUtf16, otherTranscodedToUtf16, 0, thisTranscodedToUtf16.Length, compareOptions); + } + else + { + idx = compareInfo.LastIndexOf(thisTranscodedToUtf16, otherTranscodedToUtf16, thisTranscodedToUtf16.Length, thisTranscodedToUtf16.Length, compareOptions); + } // TODO_UTF8STRING: matchLength is not correct here. Need to figure this out outside of CoreLib. matchLength = otherTranscodedToUtf16.Length; #endif diff --git a/src/libraries/System.Private.CoreLib/src/System/Utf8String.Construction.cs b/src/libraries/System.Private.CoreLib/src/System/Utf8String.Construction.cs index e975f2bec8567..9b28bb6aafe78 100644 --- a/src/libraries/System.Private.CoreLib/src/System/Utf8String.Construction.cs +++ b/src/libraries/System.Private.CoreLib/src/System/Utf8String.Construction.cs @@ -264,6 +264,7 @@ internal static Utf8String CreateFromRune(Rune value) return newString; } +#if !SYSTEM_PRIVATE_CORELIB // Returns 'null' if the input buffer does not represent well-formed UTF-16 data and 'replaceInvalidSequences' is false. private static byte[]? CreateBufferFromUtf16Common(ReadOnlySpan value, bool replaceInvalidSequences) { @@ -279,7 +280,7 @@ internal static Utf8String CreateFromRune(Rune value) { if (value.IsEmpty) { - return Array.Empty(); + return Utf8String.Empty._bytes; } Span scratch = stackalloc byte[MAX_STACK_TRANSCODE_CHAR_COUNT * MAX_UTF8_BYTES_PER_UTF16_CHAR]; // largest possible expansion, as explained below @@ -350,6 +351,7 @@ internal static Utf8String CreateFromRune(Rune value) return newBuffer; } +#endif /// /// Creates a new instance populated with a copy of the provided contents. diff --git a/src/libraries/System.Private.CoreLib/src/System/Utf8String.cs b/src/libraries/System.Private.CoreLib/src/System/Utf8String.cs index 3790026484f6f..89aa5d7054d04 100644 --- a/src/libraries/System.Private.CoreLib/src/System/Utf8String.cs +++ b/src/libraries/System.Private.CoreLib/src/System/Utf8String.cs @@ -275,7 +275,7 @@ public override string ToString() byte[] buffer = ArrayPool.Shared.Rent(Length); try { - _bytes.AsSpan(0, Length).CopyTo(buffer.AsSpan()); + GetSpan().CopyTo(buffer.AsSpan()); return Encoding.UTF8.GetString(buffer, 0, Length); } finally diff --git a/src/libraries/System.Utf8String.Experimental/src/Resources/Strings.resx b/src/libraries/System.Utf8String.Experimental/src/Resources/Strings.resx index 8b82fef51b2bd..b7b4320da1d12 100644 --- a/src/libraries/System.Utf8String.Experimental/src/Resources/Strings.resx +++ b/src/libraries/System.Utf8String.Experimental/src/Resources/Strings.resx @@ -141,6 +141,9 @@ Illegal enum value: {0}. + + The string must be null-terminated. + The string comparison type passed in is currently not supported. diff --git a/src/libraries/System.Utf8String.Experimental/src/System/Utf8String.Portable.cs b/src/libraries/System.Utf8String.Experimental/src/System/Utf8String.Portable.cs index ee4e676b314e4..49ef41d22bdb2 100644 --- a/src/libraries/System.Utf8String.Experimental/src/System/Utf8String.Portable.cs +++ b/src/libraries/System.Utf8String.Experimental/src/System/Utf8String.Portable.cs @@ -4,14 +4,15 @@ using System.ComponentModel; using System.Diagnostics; +using System.Diagnostics.CodeAnalysis; using System.Runtime.CompilerServices; using System.Runtime.InteropServices; +using System.Text.Unicode; namespace System { public sealed partial class Utf8String { - private static ReadOnlySpan s_EmptyRef => new byte[] { 0x00 }; private readonly byte[] _bytes; /// @@ -21,34 +22,54 @@ public sealed partial class Utf8String public Utf8String(ReadOnlySpan value) { - _bytes = Array.Empty(); //TODO: eerhardt + _bytes = InitializeBuffer(value); } public Utf8String(byte[] value, int startIndex, int length) { - _bytes = Array.Empty(); //TODO: eerhardt + if (value is null) + { + ThrowHelper.ThrowArgumentNullException(ExceptionArgument.value); + } + + _bytes = InitializeBuffer(new ReadOnlySpan(value, startIndex, length)); } [CLSCompliant(false)] public unsafe Utf8String(byte* value) { - _bytes = Array.Empty(); //TODO: eerhardt + if (value is null) + { + ThrowHelper.ThrowArgumentNullException(ExceptionArgument.value); + } + + _bytes = InitializeBuffer(new ReadOnlySpan(value, strlen(value))); } public Utf8String(ReadOnlySpan value) { - _bytes = Array.Empty(); //TODO: eerhardt + _bytes = InitializeBuffer(value); } public Utf8String(char[] value, int startIndex, int length) { - _bytes = Array.Empty(); //TODO: eerhardt + if (value is null) + { + ThrowHelper.ThrowArgumentNullException(ExceptionArgument.value); + } + + _bytes = InitializeBuffer(new ReadOnlySpan(value, startIndex, length)); } [CLSCompliant(false)] public unsafe Utf8String(char* value) { - _bytes = Array.Empty(); //TODO: eerhardt + if (value == null) + { + ThrowHelper.ThrowArgumentNullException(ExceptionArgument.value); + } + + _bytes = InitializeBuffer(new ReadOnlySpan(value, wcslen(value))); } public Utf8String(string value) @@ -58,9 +79,39 @@ public Utf8String(string value) ThrowHelper.ThrowArgumentNullException(ExceptionArgument.value); } - _bytes = CreateBufferFromUtf16Common(value.AsSpan(), replaceInvalidSequences: false); + _bytes = InitializeBuffer(value.AsSpan()); + } + + private static byte[] InitializeBuffer(ReadOnlySpan value) + { + if (value.IsEmpty) + { + return Empty._bytes; + } + + // Create and populate the Utf8String buffer. + + byte[] newBuffer = AllocateBuffer(value.Length); + value.CopyTo(newBuffer); - if (_bytes is null) + // Now perform validation. + // Reminder: Perform validation over the copy, not over the source. + + if (!Utf8Utility.IsWellFormedUtf8(newBuffer)) + { + throw new ArgumentException( + message: SR.Utf8String_InputContainedMalformedUtf8, + paramName: nameof(value)); + } + + return newBuffer; + } + + private static byte[] InitializeBuffer(ReadOnlySpan value) + { + byte[] newBuffer = CreateBufferFromUtf16Common(value, replaceInvalidSequences: false); + + if (newBuffer is null) { // Input buffer contained invalid UTF-16 data. @@ -68,6 +119,8 @@ public Utf8String(string value) message: SR.Utf8String_InputContainedMalformedUtf16, paramName: nameof(value)); } + + return newBuffer; } private Utf8String(byte[] bytes) @@ -82,7 +135,7 @@ private Utf8String(byte[] bytes) /// [MethodImpl(MethodImplOptions.AggressiveInlining)] internal ref byte DangerousGetMutableReference() => - ref MemoryMarshal.GetReference(Length > 0 ? _bytes.AsSpan() : s_EmptyRef); + ref MemoryMarshal.GetReference(_bytes.AsSpan()); /// /// Returns a mutable that can be used to populate this @@ -100,7 +153,7 @@ internal Span DangerousGetMutableSpan() /// instance. /// [MethodImpl(MethodImplOptions.AggressiveInlining)] - internal ReadOnlySpan GetSpan() => Length > 0 ? _bytes.AsSpan(0, Length) : s_EmptyRef.Slice(0, 0); + internal ReadOnlySpan GetSpan() => _bytes.AsSpan(0, Length); /// /// Gets an immutable reference that can be used in a statement. The resulting @@ -129,7 +182,45 @@ internal Span DangerousGetMutableSpan() private static Utf8String FastAllocate(int length) { // just simulate a "fast allocate", since this is portable - return new Utf8String(new byte[length + 1]); + return new Utf8String(AllocateBuffer(length)); + } + + private static byte[] AllocateBuffer(int length) + { + // Actual storage allocated is "length + 1" bytes because instances are null-terminated. + return new byte[length + 1]; + } + + [MethodImpl(MethodImplOptions.AggressiveInlining)] + internal static unsafe int wcslen(char* ptr) + { + // IndexOf processes memory in aligned chunks, and thus it won't crash even if it accesses memory beyond the null terminator. + int length = new ReadOnlySpan(ptr, int.MaxValue).IndexOf('\0'); + if (length < 0) + { + ThrowMustBeNullTerminatedString(); + } + + return length; + } + + [MethodImpl(MethodImplOptions.AggressiveInlining)] + private static unsafe int strlen(byte* ptr) + { + // IndexOf processes memory in aligned chunks, and thus it won't crash even if it accesses memory beyond the null terminator. + int length = new ReadOnlySpan(ptr, int.MaxValue).IndexOf((byte)'\0'); + if (length < 0) + { + ThrowMustBeNullTerminatedString(); + } + + return length; + } + + [DoesNotReturn] + private static void ThrowMustBeNullTerminatedString() + { + throw new ArgumentException(SR.Arg_MustBeNullTerminatedString); } [MethodImpl(MethodImplOptions.AggressiveInlining)] diff --git a/src/libraries/System.Utf8String.Experimental/tests/System.Utf8String.Experimental.Tests.csproj b/src/libraries/System.Utf8String.Experimental/tests/System.Utf8String.Experimental.Tests.csproj index cd50a39144860..3ecdaeeda2d3e 100644 --- a/src/libraries/System.Utf8String.Experimental/tests/System.Utf8String.Experimental.Tests.csproj +++ b/src/libraries/System.Utf8String.Experimental/tests/System.Utf8String.Experimental.Tests.csproj @@ -40,10 +40,12 @@ + + \ No newline at end of file diff --git a/src/libraries/System.Utf8String.Experimental/tests/System/ReflectionTests.cs b/src/libraries/System.Utf8String.Experimental/tests/System/ReflectionTests.cs index e45e8ddca07cc..5f5d9d71fab1a 100644 --- a/src/libraries/System.Utf8String.Experimental/tests/System/ReflectionTests.cs +++ b/src/libraries/System.Utf8String.Experimental/tests/System/ReflectionTests.cs @@ -24,14 +24,5 @@ public static void ActivatorCreateInstance_CannotCallParameterlessCtor() { Assert.Throws(() => Activator.CreateInstance(typeof(Utf8String))); } - - [Fact] - public static void FormatterServices_GetUninitializedObject_Throws() - { - // Like String, shouldn't be able to create an uninitialized Utf8String. - - Assert.Throws(() => FormatterServices.GetSafeUninitializedObject(typeof(Utf8String))); - Assert.Throws(() => FormatterServices.GetUninitializedObject(typeof(Utf8String))); - } } } diff --git a/src/libraries/System.Utf8String.Experimental/tests/System/ReflectionTests.netcoreapp.cs b/src/libraries/System.Utf8String.Experimental/tests/System/ReflectionTests.netcoreapp.cs new file mode 100644 index 0000000000000..4dc36558d6c1e --- /dev/null +++ b/src/libraries/System.Utf8String.Experimental/tests/System/ReflectionTests.netcoreapp.cs @@ -0,0 +1,21 @@ +// Licensed to the .NET Foundation under one or more agreements. +// The .NET Foundation licenses this file to you under the MIT license. +// See the LICENSE file in the project root for more information. + +using System.Runtime.Serialization; +using Xunit; + +namespace System.Tests +{ + public partial class ReflectionTests + { + [Fact] + public static void FormatterServices_GetUninitializedObject_Throws() + { + // Like String, shouldn't be able to create an uninitialized Utf8String. + + Assert.Throws(() => FormatterServices.GetSafeUninitializedObject(typeof(Utf8String))); + Assert.Throws(() => FormatterServices.GetUninitializedObject(typeof(Utf8String))); + } + } +} diff --git a/src/libraries/System.Utf8String.Experimental/tests/System/ReflectionTests.netfx.cs b/src/libraries/System.Utf8String.Experimental/tests/System/ReflectionTests.netfx.cs new file mode 100644 index 0000000000000..7289065493e4e --- /dev/null +++ b/src/libraries/System.Utf8String.Experimental/tests/System/ReflectionTests.netfx.cs @@ -0,0 +1,21 @@ +// Licensed to the .NET Foundation under one or more agreements. +// The .NET Foundation licenses this file to you under the MIT license. +// See the LICENSE file in the project root for more information. + +using System.Runtime.Serialization; +using Xunit; + +namespace System.Tests +{ + public partial class ReflectionTests + { + [Fact] + public static void FormatterServices_GetUninitializedObject_DoesntThrow() + { + // when OOB, we are unable to prevent FormatterServices from creating an uninitialized Utf8String + + Assert.NotNull(FormatterServices.GetSafeUninitializedObject(typeof(Utf8String))); + Assert.NotNull(FormatterServices.GetUninitializedObject(typeof(Utf8String))); + } + } +} diff --git a/src/libraries/System.Utf8String.Experimental/tests/System/Utf8SpanTests.Searching.TestData.cs b/src/libraries/System.Utf8String.Experimental/tests/System/Utf8SpanTests.Searching.TestData.cs index 19ccfd41518a1..2b77ccf123511 100644 --- a/src/libraries/System.Utf8String.Experimental/tests/System/Utf8SpanTests.Searching.TestData.cs +++ b/src/libraries/System.Utf8String.Experimental/tests/System/Utf8SpanTests.Searching.TestData.cs @@ -499,6 +499,7 @@ private static IEnumerable TryFindData_All() ExpectedFirstMatch = null, ExpectedLastMatch = null, }, +#if !NETFRAMEWORK new TryFindTestData { // Turkish I, case-sensitive @@ -509,6 +510,7 @@ private static IEnumerable TryFindData_All() ExpectedFirstMatch = 0..1, ExpectedLastMatch = 0..1, }, +#endif new TryFindTestData { // Turkish I, case-insensitive @@ -529,6 +531,7 @@ private static IEnumerable TryFindData_All() ExpectedFirstMatch = null, ExpectedLastMatch = null, }, +#if !NETFRAMEWORK new TryFindTestData { // denormalized forms, case-sensitive @@ -549,6 +552,7 @@ private static IEnumerable TryFindData_All() ExpectedFirstMatch = 3..6, ExpectedLastMatch = ^3.., }, +#endif }; return testDataEntries; diff --git a/src/libraries/System.Utf8String.Experimental/tests/System/Utf8StringTests.Ctor.cs b/src/libraries/System.Utf8String.Experimental/tests/System/Utf8StringTests.Ctor.cs index a7c7a9c5a1bbb..a0142b60dba73 100644 --- a/src/libraries/System.Utf8String.Experimental/tests/System/Utf8StringTests.Ctor.cs +++ b/src/libraries/System.Utf8String.Experimental/tests/System/Utf8StringTests.Ctor.cs @@ -17,7 +17,7 @@ public unsafe partial class Utf8StringTests public static void Ctor_ByteArrayOffset_Empty_ReturnsEmpty() { byte[] inputData = new byte[] { (byte)'H', (byte)'e', (byte)'l', (byte)'l', (byte)'o' }; - Assert.Same(Utf8String.Empty, new Utf8String(inputData, 3, 0)); + AssertSameAsEmpty(new Utf8String(inputData, 3, 0)); } [Fact] @@ -67,7 +67,7 @@ public static void Ctor_BytePointer_Empty_ReturnsEmpty() using (BoundedMemory boundedMemory = BoundedMemory.AllocateFromExistingData(inputData)) { - Assert.Same(Utf8String.Empty, new Utf8String((byte*)Unsafe.AsPointer(ref MemoryMarshal.GetReference(boundedMemory.Span)))); + AssertSameAsEmpty(new Utf8String((byte*)Unsafe.AsPointer(ref MemoryMarshal.GetReference(boundedMemory.Span)))); } } @@ -96,7 +96,7 @@ public static void Ctor_BytePointer_InvalidData_Throws() [Fact] public static void Ctor_ByteSpan_Empty_ReturnsEmpty() { - Assert.Same(Utf8String.Empty, new Utf8String(ReadOnlySpan.Empty)); + AssertSameAsEmpty(new Utf8String(ReadOnlySpan.Empty)); } [Fact] @@ -121,7 +121,7 @@ public static void Ctor_ByteSpan_InvalidData_Throws() public static void Ctor_CharArrayOffset_Empty_ReturnsEmpty() { char[] inputData = "H\U00012345ello".ToCharArray(); // ok to have an empty slice in the middle of a multi-byte subsequence - Assert.Same(Utf8String.Empty, new Utf8String(inputData, 3, 0)); + AssertSameAsEmpty(new Utf8String(inputData, 3, 0)); } [Fact] @@ -171,7 +171,7 @@ public static void Ctor_CharPointer_Empty_ReturnsEmpty() using (BoundedMemory boundedMemory = BoundedMemory.AllocateFromExistingData(inputData)) { - Assert.Same(Utf8String.Empty, new Utf8String((char*)Unsafe.AsPointer(ref MemoryMarshal.GetReference(boundedMemory.Span)))); + AssertSameAsEmpty(new Utf8String((char*)Unsafe.AsPointer(ref MemoryMarshal.GetReference(boundedMemory.Span)))); } } @@ -200,7 +200,7 @@ public static void Ctor_CharPointer_InvalidData_Throws() [Fact] public static void Ctor_CharSpan_Empty_ReturnsEmpty() { - Assert.Same(Utf8String.Empty, new Utf8String(ReadOnlySpan.Empty)); + AssertSameAsEmpty(new Utf8String(ReadOnlySpan.Empty)); } [Fact] @@ -231,7 +231,7 @@ public static void Ctor_String_Null_Throws() [Fact] public static void Ctor_String_Empty_ReturnsEmpty() { - Assert.Same(Utf8String.Empty, new Utf8String(string.Empty)); + AssertSameAsEmpty(new Utf8String(string.Empty)); } [Fact] @@ -320,5 +320,19 @@ public static void Ctor_TryCreateFrom_Utf16() Assert.False(Utf8String.TryCreateFrom("\uD800x".AsSpan(), out value)); Assert.Null(value); } + + private static void AssertSameAsEmpty(Utf8String value) + { +#if NETFRAMEWORK + // When OOB, we can't change the actual object returned from a constructor. + // So just assert the underlying "_bytes" is the same. + Assert.Equal(0, value.Length); + Assert.True(Unsafe.AreSame( + ref Unsafe.AsRef(in Utf8String.Empty.GetPinnableReference()), + ref Unsafe.AsRef(in value.GetPinnableReference()))); +#else + Assert.Same(Utf8String.Empty, new Utf8String(ReadOnlySpan.Empty)); +#endif + } } } diff --git a/src/libraries/System.Utf8String.Experimental/tests/System/Utf8StringTests.cs b/src/libraries/System.Utf8String.Experimental/tests/System/Utf8StringTests.cs index 12b8ad587d488..260382383a68a 100644 --- a/src/libraries/System.Utf8String.Experimental/tests/System/Utf8StringTests.cs +++ b/src/libraries/System.Utf8String.Experimental/tests/System/Utf8StringTests.cs @@ -234,7 +234,12 @@ public static void IsNullOrWhiteSpace(string input, bool expected) [Fact] public static void ToByteArray_Empty() { +#if NETFRAMEWORK + // An empty Span.ToArray doesn't return Array.Empty on netfx + Assert.Equal(Array.Empty(), Utf8String.Empty.ToByteArray()); +#else Assert.Same(Array.Empty(), Utf8String.Empty.ToByteArray()); +#endif } [Fact] From 7daf535984e522c7a6cfcd29034740d263af995d Mon Sep 17 00:00:00 2001 From: Eric Erhardt Date: Sun, 8 Mar 2020 21:02:04 -0500 Subject: [PATCH 19/26] Fix build for netcoreapp3.0, which doesn't have SerializeToStreamAsync with cancellation. --- .../src/System.Utf8String.Experimental.csproj | 5 +++ .../src/System/Net/Http/Utf8StringContent.cs | 36 +------------------ .../Net/Http/Utf8StringContent.netcoreapp3.cs | 19 ++++++++++ .../Net/Http/Utf8StringContent.netcoreapp5.cs | 22 ++++++++++++ .../Net/Http/Utf8StringContent.netstandard.cs | 36 +++++++++++++++++++ .../src/System/Utf8String.Portable.cs | 2 +- 6 files changed, 84 insertions(+), 36 deletions(-) create mode 100644 src/libraries/System.Utf8String.Experimental/src/System/Net/Http/Utf8StringContent.netcoreapp3.cs create mode 100644 src/libraries/System.Utf8String.Experimental/src/System/Net/Http/Utf8StringContent.netcoreapp5.cs create mode 100644 src/libraries/System.Utf8String.Experimental/src/System/Net/Http/Utf8StringContent.netstandard.cs diff --git a/src/libraries/System.Utf8String.Experimental/src/System.Utf8String.Experimental.csproj b/src/libraries/System.Utf8String.Experimental/src/System.Utf8String.Experimental.csproj index 6485852d02a37..a2f0fb448fd28 100644 --- a/src/libraries/System.Utf8String.Experimental/src/System.Utf8String.Experimental.csproj +++ b/src/libraries/System.Utf8String.Experimental/src/System.Utf8String.Experimental.csproj @@ -12,6 +12,7 @@ + System\Index.cs @@ -29,10 +30,14 @@ + System\Utf8String.netcoreapp.cs + + + diff --git a/src/libraries/System.Utf8String.Experimental/src/System/Net/Http/Utf8StringContent.cs b/src/libraries/System.Utf8String.Experimental/src/System/Net/Http/Utf8StringContent.cs index cf4a9f39959cd..25b86308e99e8 100644 --- a/src/libraries/System.Utf8String.Experimental/src/System/Net/Http/Utf8StringContent.cs +++ b/src/libraries/System.Utf8String.Experimental/src/System/Net/Http/Utf8StringContent.cs @@ -2,17 +2,13 @@ // The .NET Foundation licenses this file to you under the MIT license. // See the LICENSE file in the project root for more information. -#nullable enable -using System.Buffers; using System.IO; using System.Net.Http.Headers; -using System.Runtime.InteropServices; -using System.Threading; using System.Threading.Tasks; namespace System.Net.Http { - public sealed class Utf8StringContent : HttpContent + public sealed partial class Utf8StringContent : HttpContent { private const string DefaultMediaType = "text/plain"; @@ -43,36 +39,6 @@ public Utf8StringContent(Utf8String content, string? mediaType) protected override Task CreateContentReadStreamAsync() => Task.FromResult(new Utf8StringStream(_content)); -#if NETCOREAPP - protected override Task SerializeToStreamAsync(Stream stream, TransportContext? context) => - SerializeToStreamAsync(stream, context, default); - - protected override Task SerializeToStreamAsync(Stream stream, TransportContext? context, CancellationToken cancellationToken) => - stream.WriteAsync(_content.AsMemoryBytes(), cancellationToken).AsTask(); -#else - protected async override Task SerializeToStreamAsync(Stream stream, TransportContext? context) - { - ReadOnlyMemory buffer = _content.AsMemoryBytes(); - if (MemoryMarshal.TryGetArray(buffer, out ArraySegment array)) - { - await stream.WriteAsync(array.Array, array.Offset, array.Count).ConfigureAwait(false); - } - else - { - byte[] localBuffer = ArrayPool.Shared.Rent(buffer.Length); - try - { - buffer.Span.CopyTo(localBuffer); - await stream.WriteAsync(localBuffer, 0, buffer.Length).ConfigureAwait(false); - } - finally - { - ArrayPool.Shared.Return(localBuffer); - } - } - } -#endif - protected override bool TryComputeLength(out long length) { length = _content.Length; diff --git a/src/libraries/System.Utf8String.Experimental/src/System/Net/Http/Utf8StringContent.netcoreapp3.cs b/src/libraries/System.Utf8String.Experimental/src/System/Net/Http/Utf8StringContent.netcoreapp3.cs new file mode 100644 index 0000000000000..76f16c5b0e401 --- /dev/null +++ b/src/libraries/System.Utf8String.Experimental/src/System/Net/Http/Utf8StringContent.netcoreapp3.cs @@ -0,0 +1,19 @@ +// Licensed to the .NET Foundation under one or more agreements. +// The .NET Foundation licenses this file to you under the MIT license. +// See the LICENSE file in the project root for more information. + +using System.Buffers; +using System.IO; +using System.Net.Http.Headers; +using System.Runtime.InteropServices; +using System.Threading; +using System.Threading.Tasks; + +namespace System.Net.Http +{ + public sealed partial class Utf8StringContent : HttpContent + { + protected override Task SerializeToStreamAsync(Stream stream, TransportContext? context) => + stream.WriteAsync(_content.AsMemoryBytes()).AsTask(); + } +} diff --git a/src/libraries/System.Utf8String.Experimental/src/System/Net/Http/Utf8StringContent.netcoreapp5.cs b/src/libraries/System.Utf8String.Experimental/src/System/Net/Http/Utf8StringContent.netcoreapp5.cs new file mode 100644 index 0000000000000..537c5d53ce97e --- /dev/null +++ b/src/libraries/System.Utf8String.Experimental/src/System/Net/Http/Utf8StringContent.netcoreapp5.cs @@ -0,0 +1,22 @@ +// Licensed to the .NET Foundation under one or more agreements. +// The .NET Foundation licenses this file to you under the MIT license. +// See the LICENSE file in the project root for more information. + +using System.Buffers; +using System.IO; +using System.Net.Http.Headers; +using System.Runtime.InteropServices; +using System.Threading; +using System.Threading.Tasks; + +namespace System.Net.Http +{ + public sealed partial class Utf8StringContent : HttpContent + { + protected override Task SerializeToStreamAsync(Stream stream, TransportContext? context) => + SerializeToStreamAsync(stream, context, default); + + protected override Task SerializeToStreamAsync(Stream stream, TransportContext? context, CancellationToken cancellationToken) => + stream.WriteAsync(_content.AsMemoryBytes(), cancellationToken).AsTask(); + } +} diff --git a/src/libraries/System.Utf8String.Experimental/src/System/Net/Http/Utf8StringContent.netstandard.cs b/src/libraries/System.Utf8String.Experimental/src/System/Net/Http/Utf8StringContent.netstandard.cs new file mode 100644 index 0000000000000..c3a4fefd5c631 --- /dev/null +++ b/src/libraries/System.Utf8String.Experimental/src/System/Net/Http/Utf8StringContent.netstandard.cs @@ -0,0 +1,36 @@ +// Licensed to the .NET Foundation under one or more agreements. +// The .NET Foundation licenses this file to you under the MIT license. +// See the LICENSE file in the project root for more information. + +using System.Buffers; +using System.IO; +using System.Runtime.InteropServices; +using System.Threading.Tasks; + +namespace System.Net.Http +{ + public sealed partial class Utf8StringContent : HttpContent + { + protected async override Task SerializeToStreamAsync(Stream stream, TransportContext? context) + { + ReadOnlyMemory buffer = _content.AsMemoryBytes(); + if (MemoryMarshal.TryGetArray(buffer, out ArraySegment array)) + { + await stream.WriteAsync(array.Array, array.Offset, array.Count).ConfigureAwait(false); + } + else + { + byte[] localBuffer = ArrayPool.Shared.Rent(buffer.Length); + try + { + buffer.Span.CopyTo(localBuffer); + await stream.WriteAsync(localBuffer, 0, buffer.Length).ConfigureAwait(false); + } + finally + { + ArrayPool.Shared.Return(localBuffer); + } + } + } + } +} diff --git a/src/libraries/System.Utf8String.Experimental/src/System/Utf8String.Portable.cs b/src/libraries/System.Utf8String.Experimental/src/System/Utf8String.Portable.cs index 49ef41d22bdb2..4755570d1643b 100644 --- a/src/libraries/System.Utf8String.Experimental/src/System/Utf8String.Portable.cs +++ b/src/libraries/System.Utf8String.Experimental/src/System/Utf8String.Portable.cs @@ -109,7 +109,7 @@ private static byte[] InitializeBuffer(ReadOnlySpan value) private static byte[] InitializeBuffer(ReadOnlySpan value) { - byte[] newBuffer = CreateBufferFromUtf16Common(value, replaceInvalidSequences: false); + byte[]? newBuffer = CreateBufferFromUtf16Common(value, replaceInvalidSequences: false); if (newBuffer is null) { From 889c9a22b0e96e8ed9a20d8ac96a7f47d64f2077 Mon Sep 17 00:00:00 2001 From: Eric Erhardt Date: Mon, 9 Mar 2020 15:01:45 -0500 Subject: [PATCH 20/26] Add netstandard2.1 support to Utf8String.Experimental. --- .../System.Private.CoreLib.Shared.projitems | 2 - .../src/System/Text/Utf8Span.netcoreapp.cs | 19 --- .../src/System/Utf8String.Construction.cs | 140 ++++++++++++++++ .../src/System/Utf8String.netcoreapp.cs | 155 ------------------ ...System.Utf8String.Experimental.Forwards.cs | 6 + .../System.Utf8String.Experimental.Range.cs | 42 +++++ ...=> System.Utf8String.Experimental.Rune.cs} | 35 ---- .../ref/System.Utf8String.Experimental.cs | 11 +- .../ref/System.Utf8String.Experimental.csproj | 14 +- ...stem.Utf8String.Experimental.netcoreapp.cs | 16 -- ....Utf8String.Experimental.netcoreapp5.0.cs} | 0 .../src/System.Utf8String.Experimental.csproj | 25 +-- .../src/System/IO/Utf8StringStream.cs | 8 +- .../src/System/Net/Http/Utf8StringContent.cs | 36 ++++ .../Net/Http/Utf8StringContent.netcoreapp3.cs | 19 --- .../Net/Http/Utf8StringContent.netcoreapp5.cs | 22 --- .../Net/Http/Utf8StringContent.netstandard.cs | 36 ---- 17 files changed, 260 insertions(+), 326 deletions(-) delete mode 100644 src/libraries/System.Private.CoreLib/src/System/Text/Utf8Span.netcoreapp.cs delete mode 100644 src/libraries/System.Private.CoreLib/src/System/Utf8String.netcoreapp.cs create mode 100644 src/libraries/System.Utf8String.Experimental/ref/System.Utf8String.Experimental.Range.cs rename src/libraries/System.Utf8String.Experimental/ref/{System.Utf8String.Experimental.netstandard.cs => System.Utf8String.Experimental.Rune.cs} (76%) delete mode 100644 src/libraries/System.Utf8String.Experimental/ref/System.Utf8String.Experimental.netcoreapp.cs rename src/libraries/System.Utf8String.Experimental/ref/{System.Utf8String.Experimental.netcoreapp5.cs => System.Utf8String.Experimental.netcoreapp5.0.cs} (100%) delete mode 100644 src/libraries/System.Utf8String.Experimental/src/System/Net/Http/Utf8StringContent.netcoreapp3.cs delete mode 100644 src/libraries/System.Utf8String.Experimental/src/System/Net/Http/Utf8StringContent.netcoreapp5.cs delete mode 100644 src/libraries/System.Utf8String.Experimental/src/System/Net/Http/Utf8StringContent.netstandard.cs diff --git a/src/libraries/System.Private.CoreLib/src/System.Private.CoreLib.Shared.projitems b/src/libraries/System.Private.CoreLib/src/System.Private.CoreLib.Shared.projitems index b81c1109bd2fe..be9f0641bb87c 100644 --- a/src/libraries/System.Private.CoreLib/src/System.Private.CoreLib.Shared.projitems +++ b/src/libraries/System.Private.CoreLib/src/System.Private.CoreLib.Shared.projitems @@ -1817,7 +1817,6 @@ - @@ -1825,7 +1824,6 @@ - diff --git a/src/libraries/System.Private.CoreLib/src/System/Text/Utf8Span.netcoreapp.cs b/src/libraries/System.Private.CoreLib/src/System/Text/Utf8Span.netcoreapp.cs deleted file mode 100644 index 1c98ffd98c124..0000000000000 --- a/src/libraries/System.Private.CoreLib/src/System/Text/Utf8Span.netcoreapp.cs +++ /dev/null @@ -1,19 +0,0 @@ -// Licensed to the .NET Foundation under one or more agreements. -// The .NET Foundation licenses this file to you under the MIT license. -// See the LICENSE file in the project root for more information. - -using System.Buffers; -using System.ComponentModel; -using System.Diagnostics; -using System.Runtime.CompilerServices; -using System.Runtime.InteropServices; -using System.Text.Unicode; -using Internal.Runtime.CompilerServices; - -namespace System.Text -{ - public readonly ref partial struct Utf8Span - { - // TODO: eerhardt delete me - } -} diff --git a/src/libraries/System.Private.CoreLib/src/System/Utf8String.Construction.cs b/src/libraries/System.Private.CoreLib/src/System/Utf8String.Construction.cs index 9b28bb6aafe78..cbf736bc6473c 100644 --- a/src/libraries/System.Private.CoreLib/src/System/Utf8String.Construction.cs +++ b/src/libraries/System.Private.CoreLib/src/System/Utf8String.Construction.cs @@ -353,6 +353,146 @@ internal static Utf8String CreateFromRune(Rune value) } #endif +#if !NETSTANDARD2_0 + /// + /// Creates a new instance, allowing the provided delegate to populate the + /// instance data of the returned object. + /// + /// Type of the state object provided to . + /// The length, in bytes, of the instance to create. + /// The state object to provide to . + /// The callback which will be invoked to populate the returned . + /// + /// Thrown if populates the buffer with ill-formed UTF-8 data. + /// + /// + /// The runtime will perform UTF-8 validation over the contents provided by the delegate. + /// If an invalid UTF-8 subsequence is detected, an exception is thrown. + /// + public static Utf8String Create(int length, TState state, SpanAction action) + { + if (length < 0) + { + ThrowHelper.ThrowLengthArgumentOutOfRange_ArgumentOutOfRange_NeedNonNegNum(); + } + + if (action is null) + { + ThrowHelper.ThrowArgumentNullException(ExceptionArgument.action); + } + + if (length == 0) + { + return Empty; // special-case empty input + } + + // Create and populate the Utf8String instance. + // Can't use FastAllocateSkipZeroInit here because we're handing the raw buffer to user code. + + Utf8String newString = FastAllocate(length); + action(newString.DangerousGetMutableSpan(), state); + + // Now perform validation. + + if (!Utf8Utility.IsWellFormedUtf8(newString.AsBytes())) + { + throw new ArgumentException( + message: SR.Utf8String_CallbackProvidedMalformedData, + paramName: nameof(action)); + } + + return newString; + } + + /// + /// Creates a new instance, allowing the provided delegate to populate the + /// instance data of the returned object. + /// + /// Type of the state object provided to . + /// The length, in bytes, of the instance to create. + /// The state object to provide to . + /// The callback which will be invoked to populate the returned . + /// + /// The runtime will perform UTF-8 validation over the contents provided by the delegate. + /// If an invalid UTF-8 subsequence is detected, the invalid subsequence is replaced with + /// in the returned instance. This could result in the returned instance + /// having a different byte length than specified by the parameter. + /// + public static Utf8String CreateRelaxed(int length, TState state, SpanAction action) + { + if (length < 0) + { + ThrowHelper.ThrowLengthArgumentOutOfRange_ArgumentOutOfRange_NeedNonNegNum(); + } + + if (action is null) + { + ThrowHelper.ThrowArgumentNullException(ExceptionArgument.action); + } + + if (length == 0) + { + return Empty; // special-case empty input + } + + // Create and populate the Utf8String instance. + // Can't use FastAllocateSkipZeroInit here because we're handing the raw buffer to user code. + + Utf8String newString = FastAllocate(length); + action(newString.DangerousGetMutableSpan(), state); + + // Now perform validation and fixup. + + return Utf8Utility.ValidateAndFixupUtf8String(newString); + } + + /// + /// Creates a new instance, allowing the provided delegate to populate the + /// instance data of the returned object. Please see remarks for important safety information about + /// this method. + /// + /// Type of the state object provided to . + /// The length, in bytes, of the instance to create. + /// The state object to provide to . + /// The callback which will be invoked to populate the returned . + /// + /// This factory method can be used as an optimization to skip the validation step that + /// normally performs. The contract + /// of this method requires that populate the buffer with well-formed UTF-8 + /// data, as contractually guarantees that it contains only well-formed UTF-8 data, + /// and runtime instability could occur if a caller violates this guarantee. + /// + public static Utf8String UnsafeCreateWithoutValidation(int length, TState state, SpanAction action) + { + if (length < 0) + { + ThrowHelper.ThrowLengthArgumentOutOfRange_ArgumentOutOfRange_NeedNonNegNum(); + } + + if (action is null) + { + ThrowHelper.ThrowArgumentNullException(ExceptionArgument.action); + } + + if (length == 0) + { + return Empty; // special-case empty input + } + + // Create and populate the Utf8String instance. + // Can't use FastAllocateSkipZeroInit here because we're handing the raw buffer to user code. + + Utf8String newString = FastAllocate(length); + action(newString.DangerousGetMutableSpan(), state); + + // The line below is removed entirely in release builds. + + Debug.Assert(Utf8Utility.IsWellFormedUtf8(newString.AsBytes()), "Callback populated the buffer with ill-formed UTF-8 data."); + + return newString; + } +#endif // !NETSTANDARD2_0 + /// /// Creates a new instance populated with a copy of the provided contents. /// Please see remarks for important safety information about this method. diff --git a/src/libraries/System.Private.CoreLib/src/System/Utf8String.netcoreapp.cs b/src/libraries/System.Private.CoreLib/src/System/Utf8String.netcoreapp.cs deleted file mode 100644 index a034024e5b582..0000000000000 --- a/src/libraries/System.Private.CoreLib/src/System/Utf8String.netcoreapp.cs +++ /dev/null @@ -1,155 +0,0 @@ -// Licensed to the .NET Foundation under one or more agreements. -// The .NET Foundation licenses this file to you under the MIT license. -// See the LICENSE file in the project root for more information. - -using System.Buffers; -using System.ComponentModel; -using System.Diagnostics; -using System.Diagnostics.CodeAnalysis; -using System.Runtime.CompilerServices; -using System.Text; -using System.Text.Unicode; - -namespace System -{ - public sealed partial class Utf8String - { - /// - /// Creates a new instance, allowing the provided delegate to populate the - /// instance data of the returned object. - /// - /// Type of the state object provided to . - /// The length, in bytes, of the instance to create. - /// The state object to provide to . - /// The callback which will be invoked to populate the returned . - /// - /// Thrown if populates the buffer with ill-formed UTF-8 data. - /// - /// - /// The runtime will perform UTF-8 validation over the contents provided by the delegate. - /// If an invalid UTF-8 subsequence is detected, an exception is thrown. - /// - public static Utf8String Create(int length, TState state, SpanAction action) - { - if (length < 0) - { - ThrowHelper.ThrowLengthArgumentOutOfRange_ArgumentOutOfRange_NeedNonNegNum(); - } - - if (action is null) - { - ThrowHelper.ThrowArgumentNullException(ExceptionArgument.action); - } - - if (length == 0) - { - return Empty; // special-case empty input - } - - // Create and populate the Utf8String instance. - // Can't use FastAllocateSkipZeroInit here because we're handing the raw buffer to user code. - - Utf8String newString = FastAllocate(length); - action(newString.DangerousGetMutableSpan(), state); - - // Now perform validation. - - if (!Utf8Utility.IsWellFormedUtf8(newString.AsBytes())) - { - throw new ArgumentException( - message: SR.Utf8String_CallbackProvidedMalformedData, - paramName: nameof(action)); - } - - return newString; - } - - /// - /// Creates a new instance, allowing the provided delegate to populate the - /// instance data of the returned object. - /// - /// Type of the state object provided to . - /// The length, in bytes, of the instance to create. - /// The state object to provide to . - /// The callback which will be invoked to populate the returned . - /// - /// The runtime will perform UTF-8 validation over the contents provided by the delegate. - /// If an invalid UTF-8 subsequence is detected, the invalid subsequence is replaced with - /// in the returned instance. This could result in the returned instance - /// having a different byte length than specified by the parameter. - /// - public static Utf8String CreateRelaxed(int length, TState state, SpanAction action) - { - if (length < 0) - { - ThrowHelper.ThrowLengthArgumentOutOfRange_ArgumentOutOfRange_NeedNonNegNum(); - } - - if (action is null) - { - ThrowHelper.ThrowArgumentNullException(ExceptionArgument.action); - } - - if (length == 0) - { - return Empty; // special-case empty input - } - - // Create and populate the Utf8String instance. - // Can't use FastAllocateSkipZeroInit here because we're handing the raw buffer to user code. - - Utf8String newString = FastAllocate(length); - action(newString.DangerousGetMutableSpan(), state); - - // Now perform validation and fixup. - - return Utf8Utility.ValidateAndFixupUtf8String(newString); - } - - /// - /// Creates a new instance, allowing the provided delegate to populate the - /// instance data of the returned object. Please see remarks for important safety information about - /// this method. - /// - /// Type of the state object provided to . - /// The length, in bytes, of the instance to create. - /// The state object to provide to . - /// The callback which will be invoked to populate the returned . - /// - /// This factory method can be used as an optimization to skip the validation step that - /// normally performs. The contract - /// of this method requires that populate the buffer with well-formed UTF-8 - /// data, as contractually guarantees that it contains only well-formed UTF-8 data, - /// and runtime instability could occur if a caller violates this guarantee. - /// - public static Utf8String UnsafeCreateWithoutValidation(int length, TState state, SpanAction action) - { - if (length < 0) - { - ThrowHelper.ThrowLengthArgumentOutOfRange_ArgumentOutOfRange_NeedNonNegNum(); - } - - if (action is null) - { - ThrowHelper.ThrowArgumentNullException(ExceptionArgument.action); - } - - if (length == 0) - { - return Empty; // special-case empty input - } - - // Create and populate the Utf8String instance. - // Can't use FastAllocateSkipZeroInit here because we're handing the raw buffer to user code. - - Utf8String newString = FastAllocate(length); - action(newString.DangerousGetMutableSpan(), state); - - // The line below is removed entirely in release builds. - - Debug.Assert(Utf8Utility.IsWellFormedUtf8(newString.AsBytes()), "Callback populated the buffer with ill-formed UTF-8 data."); - - return newString; - } - } -} diff --git a/src/libraries/System.Utf8String.Experimental/ref/System.Utf8String.Experimental.Forwards.cs b/src/libraries/System.Utf8String.Experimental/ref/System.Utf8String.Experimental.Forwards.cs index 67c2ae89d5393..865608e4ac863 100644 --- a/src/libraries/System.Utf8String.Experimental/ref/System.Utf8String.Experimental.Forwards.cs +++ b/src/libraries/System.Utf8String.Experimental/ref/System.Utf8String.Experimental.Forwards.cs @@ -2,6 +2,12 @@ // The .NET Foundation licenses this file to you under the MIT license. // See the LICENSE file in the project root for more information. +#if !NETSTANDARD2_0 [assembly: System.Runtime.CompilerServices.TypeForwardedTo(typeof(System.Index))] [assembly: System.Runtime.CompilerServices.TypeForwardedTo(typeof(System.Range))] + +#if !NETSTANDARD2_1 [assembly: System.Runtime.CompilerServices.TypeForwardedTo(typeof(System.Text.Rune))] +#endif // !NETSTANDARD2_1 + +#endif // !NETSTANDARD2_0 diff --git a/src/libraries/System.Utf8String.Experimental/ref/System.Utf8String.Experimental.Range.cs b/src/libraries/System.Utf8String.Experimental/ref/System.Utf8String.Experimental.Range.cs new file mode 100644 index 0000000000000..58cadf8efc353 --- /dev/null +++ b/src/libraries/System.Utf8String.Experimental/ref/System.Utf8String.Experimental.Range.cs @@ -0,0 +1,42 @@ +// Licensed to the .NET Foundation under one or more agreements. +// The .NET Foundation licenses this file to you under the MIT license. +// See the LICENSE file in the project root for more information. +// ------------------------------------------------------------------------------ +// Changes to this file must follow the https://aka.ms/api-review process. +// ------------------------------------------------------------------------------ + +namespace System +{ + public readonly partial struct Index : System.IEquatable + { + private readonly int _dummyPrimitive; + public Index(int value, bool fromEnd = false) { throw null; } + public static System.Index End { get { throw null; } } + public bool IsFromEnd { get { throw null; } } + public static System.Index Start { get { throw null; } } + public int Value { get { throw null; } } + public bool Equals(System.Index other) { throw null; } + public override bool Equals(object? value) { throw null; } + public static System.Index FromEnd(int value) { throw null; } + public static System.Index FromStart(int value) { throw null; } + public override int GetHashCode() { throw null; } + public int GetOffset(int length) { throw null; } + public static implicit operator System.Index(int value) { throw null; } + public override string ToString() { throw null; } + } + public readonly partial struct Range : System.IEquatable + { + private readonly int _dummyPrimitive; + public Range(System.Index start, System.Index end) { throw null; } + public static System.Range All { get { throw null; } } + public System.Index End { get { throw null; } } + public System.Index Start { get { throw null; } } + public static System.Range EndAt(System.Index end) { throw null; } + public override bool Equals(object? value) { throw null; } + public bool Equals(System.Range other) { throw null; } + public override int GetHashCode() { throw null; } + public (int Offset, int Length) GetOffsetAndLength(int length) { throw null; } + public static System.Range StartAt(System.Index start) { throw null; } + public override string ToString() { throw null; } + } +} diff --git a/src/libraries/System.Utf8String.Experimental/ref/System.Utf8String.Experimental.netstandard.cs b/src/libraries/System.Utf8String.Experimental/ref/System.Utf8String.Experimental.Rune.cs similarity index 76% rename from src/libraries/System.Utf8String.Experimental/ref/System.Utf8String.Experimental.netstandard.cs rename to src/libraries/System.Utf8String.Experimental/ref/System.Utf8String.Experimental.Rune.cs index 4f7f31dc3cead..0cf292727a56f 100644 --- a/src/libraries/System.Utf8String.Experimental/ref/System.Utf8String.Experimental.netstandard.cs +++ b/src/libraries/System.Utf8String.Experimental/ref/System.Utf8String.Experimental.Rune.cs @@ -5,41 +5,6 @@ // Changes to this file must follow the https://aka.ms/api-review process. // ------------------------------------------------------------------------------ -namespace System -{ - public readonly partial struct Index : System.IEquatable - { - private readonly int _dummyPrimitive; - public Index(int value, bool fromEnd = false) { throw null; } - public static System.Index End { get { throw null; } } - public bool IsFromEnd { get { throw null; } } - public static System.Index Start { get { throw null; } } - public int Value { get { throw null; } } - public bool Equals(System.Index other) { throw null; } - public override bool Equals(object? value) { throw null; } - public static System.Index FromEnd(int value) { throw null; } - public static System.Index FromStart(int value) { throw null; } - public override int GetHashCode() { throw null; } - public int GetOffset(int length) { throw null; } - public static implicit operator System.Index(int value) { throw null; } - public override string ToString() { throw null; } - } - public readonly partial struct Range : System.IEquatable - { - private readonly int _dummyPrimitive; - public Range(System.Index start, System.Index end) { throw null; } - public static System.Range All { get { throw null; } } - public System.Index End { get { throw null; } } - public System.Index Start { get { throw null; } } - public static System.Range EndAt(System.Index end) { throw null; } - public override bool Equals(object? value) { throw null; } - public bool Equals(System.Range other) { throw null; } - public override int GetHashCode() { throw null; } - public (int Offset, int Length) GetOffsetAndLength(int length) { throw null; } - public static System.Range StartAt(System.Index start) { throw null; } - public override string ToString() { throw null; } - } -} namespace System.Text { public readonly partial struct Rune : System.IComparable, System.IEquatable diff --git a/src/libraries/System.Utf8String.Experimental/ref/System.Utf8String.Experimental.cs b/src/libraries/System.Utf8String.Experimental/ref/System.Utf8String.Experimental.cs index a471001f0569c..4a906ba12ca44 100644 --- a/src/libraries/System.Utf8String.Experimental/ref/System.Utf8String.Experimental.cs +++ b/src/libraries/System.Utf8String.Experimental/ref/System.Utf8String.Experimental.cs @@ -55,8 +55,8 @@ public static partial class Utf8Extensions public static System.ReadOnlySpan AsBytes(this System.Utf8String? text, int start) { throw null; } public static System.ReadOnlySpan AsBytes(this System.Utf8String? text, int start, int length) { throw null; } public static System.ReadOnlyMemory AsMemoryBytes(this System.Utf8String? text) { throw null; } - public static System.ReadOnlyMemory AsMemoryBytes(this System.Utf8String? text, int start) { throw null; } public static System.ReadOnlyMemory AsMemoryBytes(this System.Utf8String? text, System.Index startIndex) { throw null; } + public static System.ReadOnlyMemory AsMemoryBytes(this System.Utf8String? text, int start) { throw null; } public static System.ReadOnlyMemory AsMemoryBytes(this System.Utf8String? text, int start, int length) { throw null; } public static System.ReadOnlyMemory AsMemoryBytes(this System.Utf8String? text, System.Range range) { throw null; } public static System.Text.Utf8Span AsSpan(this System.Utf8String? text) { throw null; } @@ -91,8 +91,14 @@ public Utf8String(string value) { } public bool Contains(System.Text.Rune value, System.StringComparison comparison) { throw null; } public bool Contains(System.Utf8String value) { throw null; } public bool Contains(System.Utf8String value, System.StringComparison comparison) { throw null; } +#if !NETSTANDARD2_0 + public static System.Utf8String Create(int length, TState state, System.Buffers.SpanAction action) { throw null; } +#endif public static System.Utf8String CreateFromRelaxed(System.ReadOnlySpan buffer) { throw null; } public static System.Utf8String CreateFromRelaxed(System.ReadOnlySpan buffer) { throw null; } +#if !NETSTANDARD2_0 + public static System.Utf8String CreateRelaxed(int length, TState state, System.Buffers.SpanAction action) { throw null; } +#endif public bool EndsWith(char value) { throw null; } public bool EndsWith(char value, System.StringComparison comparison) { throw null; } public bool EndsWith(System.Text.Rune value) { throw null; } @@ -163,6 +169,9 @@ public Utf8String(string value) { } public bool TryFindLast(System.Utf8String value, out System.Range range) { throw null; } public bool TryFindLast(System.Utf8String value, System.StringComparison comparisonType, out System.Range range) { throw null; } public static System.Utf8String UnsafeCreateWithoutValidation(System.ReadOnlySpan utf8Contents) { throw null; } +#if !NETSTANDARD2_0 + public static System.Utf8String UnsafeCreateWithoutValidation(int length, TState state, System.Buffers.SpanAction action) { throw null; } +#endif public readonly partial struct ByteEnumerable : System.Collections.Generic.IEnumerable { private readonly object _dummy; diff --git a/src/libraries/System.Utf8String.Experimental/ref/System.Utf8String.Experimental.csproj b/src/libraries/System.Utf8String.Experimental/ref/System.Utf8String.Experimental.csproj index bf1c7bece927d..7932a85d40138 100644 --- a/src/libraries/System.Utf8String.Experimental/ref/System.Utf8String.Experimental.csproj +++ b/src/libraries/System.Utf8String.Experimental/ref/System.Utf8String.Experimental.csproj @@ -3,22 +3,22 @@ true $(NoWarn);0809;0618 - netstandard2.0;netcoreapp3.0;$(NetCoreAppCurrent) - true + netstandard2.0;netstandard2.1;netcoreapp3.0;$(NetCoreAppCurrent) enable + - + + - - - + + - + diff --git a/src/libraries/System.Utf8String.Experimental/ref/System.Utf8String.Experimental.netcoreapp.cs b/src/libraries/System.Utf8String.Experimental/ref/System.Utf8String.Experimental.netcoreapp.cs deleted file mode 100644 index 5b0733e695ff0..0000000000000 --- a/src/libraries/System.Utf8String.Experimental/ref/System.Utf8String.Experimental.netcoreapp.cs +++ /dev/null @@ -1,16 +0,0 @@ -// Licensed to the .NET Foundation under one or more agreements. -// The .NET Foundation licenses this file to you under the MIT license. -// See the LICENSE file in the project root for more information. -// ------------------------------------------------------------------------------ -// Changes to this file must follow the https://aka.ms/api-review process. -// ------------------------------------------------------------------------------ - -namespace System -{ - public sealed partial class Utf8String : System.IComparable, System.IEquatable - { - public static System.Utf8String Create(int length, TState state, System.Buffers.SpanAction action) { throw null; } - public static System.Utf8String CreateRelaxed(int length, TState state, System.Buffers.SpanAction action) { throw null; } - public static System.Utf8String UnsafeCreateWithoutValidation(int length, TState state, System.Buffers.SpanAction action) { throw null; } - } -} diff --git a/src/libraries/System.Utf8String.Experimental/ref/System.Utf8String.Experimental.netcoreapp5.cs b/src/libraries/System.Utf8String.Experimental/ref/System.Utf8String.Experimental.netcoreapp5.0.cs similarity index 100% rename from src/libraries/System.Utf8String.Experimental/ref/System.Utf8String.Experimental.netcoreapp5.cs rename to src/libraries/System.Utf8String.Experimental/ref/System.Utf8String.Experimental.netcoreapp5.0.cs diff --git a/src/libraries/System.Utf8String.Experimental/src/System.Utf8String.Experimental.csproj b/src/libraries/System.Utf8String.Experimental/src/System.Utf8String.Experimental.csproj index a2f0fb448fd28..6f34f9bf0594d 100644 --- a/src/libraries/System.Utf8String.Experimental/src/System.Utf8String.Experimental.csproj +++ b/src/libraries/System.Utf8String.Experimental/src/System.Utf8String.Experimental.csproj @@ -2,8 +2,8 @@ true $(NoWarn);3019 - true - netstandard2.0;netcoreapp3.0;$(NetCoreAppCurrent)-Windows_NT;$(NetCoreAppCurrent)-Unix + true + netstandard2.0;netstandard2.1;netcoreapp3.0;$(NetCoreAppCurrent)-Windows_NT;$(NetCoreAppCurrent)-Unix enable $(DefineContants);FEATURE_UTF8STRING @@ -12,7 +12,6 @@ - System\Index.cs @@ -29,14 +28,16 @@ System\Text\Unicode\Utf8.cs - - - - System\Utf8String.netcoreapp.cs + + + System\Numerics\BitOperations.cs + + + System\Text\Rune.cs + + + System\Text\Unicode\Utf8.cs - - - @@ -147,6 +148,10 @@ + + + + diff --git a/src/libraries/System.Utf8String.Experimental/src/System/IO/Utf8StringStream.cs b/src/libraries/System.Utf8String.Experimental/src/System/IO/Utf8StringStream.cs index 1fd9ef1c1197c..39577742f56a1 100644 --- a/src/libraries/System.Utf8String.Experimental/src/System/IO/Utf8StringStream.cs +++ b/src/libraries/System.Utf8String.Experimental/src/System/IO/Utf8StringStream.cs @@ -58,7 +58,7 @@ public override int Read(byte[] buffer, int offset, int count) } public -#if NETCOREAPP +#if !NETSTANDARD2_0 override #endif int Read(Span buffer) @@ -80,7 +80,7 @@ public override Task ReadAsync(byte[] buffer, int offset, int count, Cancel return Task.FromResult(Read(new Span(buffer, offset, count))); } -#if NETCOREAPP +#if !NETSTANDARD2_0 public override ValueTask ReadAsync(Memory buffer, CancellationToken cancellationToken = default) { return new ValueTask(Read(buffer.Span)); @@ -128,13 +128,13 @@ public override long Seek(long offset, SeekOrigin origin) public override void Write(byte[] buffer, int offset, int count) => throw new NotSupportedException(); -#if NETCOREAPP +#if !NETSTANDARD2_0 public override void Write(ReadOnlySpan buffer) => throw new NotSupportedException(); #endif public override Task WriteAsync(byte[] buffer, int offset, int count, CancellationToken cancellationToken) => throw new NotSupportedException(); -#if NETCOREAPP +#if !NETSTANDARD2_0 public override ValueTask WriteAsync(ReadOnlyMemory buffer, CancellationToken cancellationToken = default) => throw new NotSupportedException(); #endif public override void WriteByte(byte value) => throw new NotSupportedException(); diff --git a/src/libraries/System.Utf8String.Experimental/src/System/Net/Http/Utf8StringContent.cs b/src/libraries/System.Utf8String.Experimental/src/System/Net/Http/Utf8StringContent.cs index 25b86308e99e8..cab4ec11c8de9 100644 --- a/src/libraries/System.Utf8String.Experimental/src/System/Net/Http/Utf8StringContent.cs +++ b/src/libraries/System.Utf8String.Experimental/src/System/Net/Http/Utf8StringContent.cs @@ -2,8 +2,11 @@ // The .NET Foundation licenses this file to you under the MIT license. // See the LICENSE file in the project root for more information. +using System.Buffers; using System.IO; using System.Net.Http.Headers; +using System.Runtime.InteropServices; +using System.Threading; using System.Threading.Tasks; namespace System.Net.Http @@ -39,6 +42,39 @@ public Utf8StringContent(Utf8String content, string? mediaType) protected override Task CreateContentReadStreamAsync() => Task.FromResult(new Utf8StringStream(_content)); +#if NETSTANDARD2_0 + protected async override Task SerializeToStreamAsync(Stream stream, TransportContext? context) + { + ReadOnlyMemory buffer = _content.AsMemoryBytes(); + if (MemoryMarshal.TryGetArray(buffer, out ArraySegment array)) + { + await stream.WriteAsync(array.Array, array.Offset, array.Count).ConfigureAwait(false); + } + else + { + byte[] localBuffer = ArrayPool.Shared.Rent(buffer.Length); + try + { + buffer.Span.CopyTo(localBuffer); + await stream.WriteAsync(localBuffer, 0, buffer.Length).ConfigureAwait(false); + } + finally + { + ArrayPool.Shared.Return(localBuffer); + } + } + } +#elif NETSTANDARD2_1 || NETCOREAPP3_0 + protected override Task SerializeToStreamAsync(Stream stream, TransportContext? context) => + stream.WriteAsync(_content.AsMemoryBytes()).AsTask(); +#else + protected override Task SerializeToStreamAsync(Stream stream, TransportContext? context) => + SerializeToStreamAsync(stream, context, default); + + protected override Task SerializeToStreamAsync(Stream stream, TransportContext? context, CancellationToken cancellationToken) => + stream.WriteAsync(_content.AsMemoryBytes(), cancellationToken).AsTask(); +#endif + protected override bool TryComputeLength(out long length) { length = _content.Length; diff --git a/src/libraries/System.Utf8String.Experimental/src/System/Net/Http/Utf8StringContent.netcoreapp3.cs b/src/libraries/System.Utf8String.Experimental/src/System/Net/Http/Utf8StringContent.netcoreapp3.cs deleted file mode 100644 index 76f16c5b0e401..0000000000000 --- a/src/libraries/System.Utf8String.Experimental/src/System/Net/Http/Utf8StringContent.netcoreapp3.cs +++ /dev/null @@ -1,19 +0,0 @@ -// Licensed to the .NET Foundation under one or more agreements. -// The .NET Foundation licenses this file to you under the MIT license. -// See the LICENSE file in the project root for more information. - -using System.Buffers; -using System.IO; -using System.Net.Http.Headers; -using System.Runtime.InteropServices; -using System.Threading; -using System.Threading.Tasks; - -namespace System.Net.Http -{ - public sealed partial class Utf8StringContent : HttpContent - { - protected override Task SerializeToStreamAsync(Stream stream, TransportContext? context) => - stream.WriteAsync(_content.AsMemoryBytes()).AsTask(); - } -} diff --git a/src/libraries/System.Utf8String.Experimental/src/System/Net/Http/Utf8StringContent.netcoreapp5.cs b/src/libraries/System.Utf8String.Experimental/src/System/Net/Http/Utf8StringContent.netcoreapp5.cs deleted file mode 100644 index 537c5d53ce97e..0000000000000 --- a/src/libraries/System.Utf8String.Experimental/src/System/Net/Http/Utf8StringContent.netcoreapp5.cs +++ /dev/null @@ -1,22 +0,0 @@ -// Licensed to the .NET Foundation under one or more agreements. -// The .NET Foundation licenses this file to you under the MIT license. -// See the LICENSE file in the project root for more information. - -using System.Buffers; -using System.IO; -using System.Net.Http.Headers; -using System.Runtime.InteropServices; -using System.Threading; -using System.Threading.Tasks; - -namespace System.Net.Http -{ - public sealed partial class Utf8StringContent : HttpContent - { - protected override Task SerializeToStreamAsync(Stream stream, TransportContext? context) => - SerializeToStreamAsync(stream, context, default); - - protected override Task SerializeToStreamAsync(Stream stream, TransportContext? context, CancellationToken cancellationToken) => - stream.WriteAsync(_content.AsMemoryBytes(), cancellationToken).AsTask(); - } -} diff --git a/src/libraries/System.Utf8String.Experimental/src/System/Net/Http/Utf8StringContent.netstandard.cs b/src/libraries/System.Utf8String.Experimental/src/System/Net/Http/Utf8StringContent.netstandard.cs deleted file mode 100644 index c3a4fefd5c631..0000000000000 --- a/src/libraries/System.Utf8String.Experimental/src/System/Net/Http/Utf8StringContent.netstandard.cs +++ /dev/null @@ -1,36 +0,0 @@ -// Licensed to the .NET Foundation under one or more agreements. -// The .NET Foundation licenses this file to you under the MIT license. -// See the LICENSE file in the project root for more information. - -using System.Buffers; -using System.IO; -using System.Runtime.InteropServices; -using System.Threading.Tasks; - -namespace System.Net.Http -{ - public sealed partial class Utf8StringContent : HttpContent - { - protected async override Task SerializeToStreamAsync(Stream stream, TransportContext? context) - { - ReadOnlyMemory buffer = _content.AsMemoryBytes(); - if (MemoryMarshal.TryGetArray(buffer, out ArraySegment array)) - { - await stream.WriteAsync(array.Array, array.Offset, array.Count).ConfigureAwait(false); - } - else - { - byte[] localBuffer = ArrayPool.Shared.Rent(buffer.Length); - try - { - buffer.Span.CopyTo(localBuffer); - await stream.WriteAsync(localBuffer, 0, buffer.Length).ConfigureAwait(false); - } - finally - { - ArrayPool.Shared.Return(localBuffer); - } - } - } - } -} From b418ed2adeca2e6410389ea93b9b98c8d816bd53 Mon Sep 17 00:00:00 2001 From: Eric Erhardt Date: Mon, 9 Mar 2020 17:08:47 -0500 Subject: [PATCH 21/26] Clean up to prepare for review. --- .../System.Private.CoreLib.csproj | 1 - .../System.Private.CoreLib.Shared.projitems | 1 + .../src/System/Char8.cs | 2 +- .../src/System/Index.cs | 7 ++--- .../src/System/Range.cs | 21 +++----------- .../src/System/Text/Rune.cs | 20 ++++--------- .../src/System/Text/Unicode/Utf8Utility.cs | 6 ++-- .../src/System/Text/Utf8Span.Comparison.cs | 4 +-- .../src/System/Text/Utf8Span.Conversion.cs | 10 +++---- .../src/System/Text/Utf8Span.Manipulation.cs | 6 ++-- .../src/System/Text/Utf8Span.cs | 29 ++++++++++--------- .../src/System/Utf8Extensions.CoreLib.cs} | 0 .../src/System/Utf8String.Comparison.cs | 6 ++-- .../src/System/Utf8String.Construction.cs | 6 ++-- .../src/System/Utf8String.Manipulation.cs | 6 ++-- .../src/System/Utf8String.cs | 29 +++++++++---------- .../src/System.Utf8String.Experimental.csproj | 6 ++-- .../src/System/Net/Http/Utf8StringContent.cs | 2 +- .../src/System/Utf8String.Portable.cs | 9 +++--- 19 files changed, 73 insertions(+), 98 deletions(-) rename src/{coreclr/src/System.Private.CoreLib/src/System/Utf8Extensions.CoreCLR.cs => libraries/System.Private.CoreLib/src/System/Utf8Extensions.CoreLib.cs} (100%) diff --git a/src/coreclr/src/System.Private.CoreLib/System.Private.CoreLib.csproj b/src/coreclr/src/System.Private.CoreLib/System.Private.CoreLib.csproj index 416252b005cef..653e61bf4d8aa 100644 --- a/src/coreclr/src/System.Private.CoreLib/System.Private.CoreLib.csproj +++ b/src/coreclr/src/System.Private.CoreLib/System.Private.CoreLib.csproj @@ -279,7 +279,6 @@ - diff --git a/src/libraries/System.Private.CoreLib/src/System.Private.CoreLib.Shared.projitems b/src/libraries/System.Private.CoreLib/src/System.Private.CoreLib.Shared.projitems index be9f0641bb87c..1963af0025c7e 100644 --- a/src/libraries/System.Private.CoreLib/src/System.Private.CoreLib.Shared.projitems +++ b/src/libraries/System.Private.CoreLib/src/System.Private.CoreLib.Shared.projitems @@ -1811,6 +1811,7 @@ + diff --git a/src/libraries/System.Private.CoreLib/src/System/Char8.cs b/src/libraries/System.Private.CoreLib/src/System/Char8.cs index 70ecdd0dda408..ba22e91944b83 100644 --- a/src/libraries/System.Private.CoreLib/src/System/Char8.cs +++ b/src/libraries/System.Private.CoreLib/src/System/Char8.cs @@ -5,7 +5,7 @@ namespace System { /// - /// Represents a UTF-8 code unit, the elemental type of TODO cref Utf8String. + /// Represents a UTF-8 code unit, the elemental type of . /// public readonly struct Char8 : IComparable, IEquatable { diff --git a/src/libraries/System.Private.CoreLib/src/System/Index.cs b/src/libraries/System.Private.CoreLib/src/System/Index.cs index fb8e2e8aa68fe..22225ac0bc2ed 100644 --- a/src/libraries/System.Private.CoreLib/src/System/Index.cs +++ b/src/libraries/System.Private.CoreLib/src/System/Index.cs @@ -141,17 +141,14 @@ public override string ToString() private string ToStringFromEnd() { -#if SYSTEM_PRIVATE_CORELIB || NETCOREAPP +#if !NETSTANDARD2_0 Span span = stackalloc char[11]; // 1 for ^ and 10 for longest possible uint value bool formatted = ((uint)Value).TryFormat(span.Slice(1), out int charsWritten); Debug.Assert(formatted); span[0] = '^'; return new string(span.Slice(0, charsWritten + 1)); #else - var builder = new StringBuilder(11); - builder.Append('^'); - builder.Append((uint)Value); - return builder.ToString(); + return '^' + Value.ToString(); #endif } } diff --git a/src/libraries/System.Private.CoreLib/src/System/Range.cs b/src/libraries/System.Private.CoreLib/src/System/Range.cs index 5ae73b4efd49d..200b7de8f67a3 100644 --- a/src/libraries/System.Private.CoreLib/src/System/Range.cs +++ b/src/libraries/System.Private.CoreLib/src/System/Range.cs @@ -6,7 +6,7 @@ using System.Runtime.CompilerServices; using System.Text; -#if !SYSTEM_PRIVATE_CORELIB && !NETCOREAPP +#if NETSTANDARD2_0 using System.Numerics.Hashing; #endif @@ -52,7 +52,7 @@ value is Range r && /// Returns the hash code for this instance. public override int GetHashCode() { -#if SYSTEM_PRIVATE_CORELIB || NETCOREAPP +#if !NETSTANDARD2_0 return HashCode.Combine(Start.GetHashCode(), End.GetHashCode()); #else return HashHelpers.Combine(Start.GetHashCode(), End.GetHashCode()); @@ -62,7 +62,7 @@ public override int GetHashCode() /// Converts the value of the current Range object to its equivalent string representation. public override string ToString() { -#if SYSTEM_PRIVATE_CORELIB || NETCOREAPP +#if !NETSTANDARD2_0 Span span = stackalloc char[2 + (2 * 11)]; // 2 for "..", then for each index 1 for '^' and 10 for longest possible uint int pos = 0; @@ -88,20 +88,7 @@ public override string ToString() return new string(span.Slice(0, pos)); #else - var builder = new StringBuilder(2 + (2 * 11)); // 2 for "..", then for each index 1 for '^' and 10 for longest possible uint - if (Start.IsFromEnd) - { - builder.Append('^'); - } - builder.Append((uint)Start.Value); - builder.Append('.'); - builder.Append('.'); - if (End.IsFromEnd) - { - builder.Append('^'); - } - builder.Append((uint)End.Value); - return builder.ToString(); + return Start.ToString() + ".." + End.ToString(); #endif } diff --git a/src/libraries/System.Private.CoreLib/src/System/Text/Rune.cs b/src/libraries/System.Private.CoreLib/src/System/Text/Rune.cs index e0e1a4e25fc1b..0d000011dba2a 100644 --- a/src/libraries/System.Private.CoreLib/src/System/Text/Rune.cs +++ b/src/libraries/System.Private.CoreLib/src/System/Text/Rune.cs @@ -191,7 +191,7 @@ private static Rune ChangeCaseCultureAware(Rune rune, CultureInfo culture, bool original = original.Slice(0, charCount); modified = modified.Slice(0, charCount); -#if CORECLR +#if SYSTEM_PRIVATE_CORELIB if (toUpper) { culture!.TextInfo.ChangeCaseToUpper(original, modified); @@ -842,7 +842,7 @@ private static int ReadRuneFromString(string input, int index) /// public override string ToString() { -#if CORECLR +#if SYSTEM_PRIVATE_CORELIB if (IsBmp) { return string.CreateFromChar((char)_value); @@ -862,13 +862,7 @@ public override string ToString() { Span buffer = stackalloc char[2]; UnicodeUtility.GetUtf16SurrogatesFromSupplementaryPlaneScalar(_value, out buffer[0], out buffer[1]); - unsafe - { - fixed (char* pBuffer = buffer) - { - return new string(pBuffer, 0, 2); - } - } + return buffer.ToString(); } #endif } @@ -1105,10 +1099,9 @@ public static double GetNumericValue(Rune value) else { // not an ASCII char; fall back to globalization table -#if CORECLR +#if SYSTEM_PRIVATE_CORELIB return CharUnicodeInfo.GetNumericValue(value.Value); #else - // TODO: figure out a better way than allocating here return CharUnicodeInfo.GetNumericValue(value.ToString(), 0); #endif } @@ -1129,10 +1122,9 @@ public static UnicodeCategory GetUnicodeCategory(Rune value) private static UnicodeCategory GetUnicodeCategoryNonAscii(Rune value) { Debug.Assert(!value.IsAscii, "Shouldn't use this non-optimized code path for ASCII characters."); -#if CORECLR +#if !NETSTANDARD2_0 return CharUnicodeInfo.GetUnicodeCategory(value.Value); #else - // TODO: figure out a better way than allocating here return CharUnicodeInfo.GetUnicodeCategory(value.ToString(), 0); #endif } @@ -1286,7 +1278,7 @@ public static bool IsWhiteSpace(Rune value) // if the incoming value is within the BMP. return value.IsBmp && -#if CORECLR +#if SYSTEM_PRIVATE_CORELIB CharUnicodeInfo.GetIsWhiteSpace((char)value._value); #else char.IsWhiteSpace((char)value._value); diff --git a/src/libraries/System.Private.CoreLib/src/System/Text/Unicode/Utf8Utility.cs b/src/libraries/System.Private.CoreLib/src/System/Text/Unicode/Utf8Utility.cs index 76d5bb4799ae3..7d489a5a0ad88 100644 --- a/src/libraries/System.Private.CoreLib/src/System/Text/Unicode/Utf8Utility.cs +++ b/src/libraries/System.Private.CoreLib/src/System/Text/Unicode/Utf8Utility.cs @@ -25,9 +25,9 @@ internal static partial class Utf8Utility /// /// The UTF-8 representation of . /// -#if SYSTEM_PRIVATE_CORELIB || NETCOREAPP +#if !NETSTANDARD2_0 private static ReadOnlySpan ReplacementCharSequence => new byte[] { 0xEF, 0xBF, 0xBD }; -#else // NETSTANDARD +#else private static readonly byte[] ReplacementCharSequence = new byte[] { 0xEF, 0xBF, 0xBD }; #endif @@ -90,7 +90,7 @@ public static Utf8String ValidateAndFixupUtf8String(Utf8String value) // (The faster implementation is in the dev/utf8string_bak branch currently.) MemoryStream memStream = new MemoryStream(); -#if SYSTEM_PRIVATE_CORELIB || NETCOREAPP +#if !NETSTANDARD2_0 memStream.Write(valueAsBytes.Slice(0, idxOfFirstInvalidData)); valueAsBytes = valueAsBytes.Slice(idxOfFirstInvalidData); diff --git a/src/libraries/System.Private.CoreLib/src/System/Text/Utf8Span.Comparison.cs b/src/libraries/System.Private.CoreLib/src/System/Text/Utf8Span.Comparison.cs index a44f14402857f..72f20864f09e9 100644 --- a/src/libraries/System.Private.CoreLib/src/System/Text/Utf8Span.Comparison.cs +++ b/src/libraries/System.Private.CoreLib/src/System/Text/Utf8Span.Comparison.cs @@ -68,7 +68,7 @@ public bool Contains(Rune value, StringComparison comparison) { // TODO_UTF8STRING: Optimize me to avoid allocations. -#if SYSTEM_PRIVATE_CORELIB || NETCOREAPP +#if !NETSTANDARD2_0 return this.ToString().Contains(value.ToString(), comparison); #else return this.ToString().IndexOf(value.ToString(), comparison) >= 0; @@ -92,7 +92,7 @@ public bool Contains(Utf8Span value, StringComparison comparison) { // TODO_UTF8STRING: Optimize me to avoid allocations. -#if SYSTEM_PRIVATE_CORELIB || NETCOREAPP +#if !NETSTANDARD2_0 return this.ToString().Contains(value.ToString(), comparison); #else return this.ToString().IndexOf(value.ToString(), comparison) >= 0; diff --git a/src/libraries/System.Private.CoreLib/src/System/Text/Utf8Span.Conversion.cs b/src/libraries/System.Private.CoreLib/src/System/Text/Utf8Span.Conversion.cs index ad6586b61714c..9868be8f406b2 100644 --- a/src/libraries/System.Private.CoreLib/src/System/Text/Utf8Span.Conversion.cs +++ b/src/libraries/System.Private.CoreLib/src/System/Text/Utf8Span.Conversion.cs @@ -44,7 +44,7 @@ public int Normalize(Span destination, NormalizationForm normalizationForm // TODO_UTF8STRING: Reduce allocations in this code path. ReadOnlySpan normalized = this.ToString().Normalize(normalizationForm) -#if NETSTANDARD +#if NETSTANDARD2_0 .AsSpan() #endif ; @@ -163,7 +163,7 @@ public int ToLower(Span destination, CultureInfo culture) } ReadOnlySpan asLower = this.ToString().ToLower(culture) -#if NETSTANDARD +#if NETSTANDARD2_0 .AsSpan() #endif ; @@ -215,7 +215,7 @@ public int ToLowerInvariant(Span destination) // TODO_UTF8STRING: Avoid intermediate allocations. ReadOnlySpan asLowerInvariant = this.ToString().ToLowerInvariant() -#if NETSTANDARD +#if NETSTANDARD2_0 .AsSpan() #endif ; @@ -275,7 +275,7 @@ public int ToUpper(Span destination, CultureInfo culture) } ReadOnlySpan asUpper = this.ToString().ToUpper(culture) -#if NETSTANDARD +#if NETSTANDARD2_0 .AsSpan() #endif ; @@ -327,7 +327,7 @@ public int ToUpperInvariant(Span destination) // TODO_UTF8STRING: Avoid intermediate allocations. ReadOnlySpan asUpperInvariant = this.ToString().ToUpperInvariant() -#if NETSTANDARD +#if NETSTANDARD2_0 .AsSpan() #endif ; diff --git a/src/libraries/System.Private.CoreLib/src/System/Text/Utf8Span.Manipulation.cs b/src/libraries/System.Private.CoreLib/src/System/Text/Utf8Span.Manipulation.cs index a538e46e02bb3..2317de908ab37 100644 --- a/src/libraries/System.Private.CoreLib/src/System/Text/Utf8Span.Manipulation.cs +++ b/src/libraries/System.Private.CoreLib/src/System/Text/Utf8Span.Manipulation.cs @@ -431,10 +431,10 @@ internal readonly bool DeconstructHelper(in Utf8Span source, out Utf8Span firstI if (SearchRune >= 0) { -#if SYSTEM_PRIVATE_CORELIB || NETSTANDARD2_0 - wasMatchFound = searchSpan.TryFind(Rune.UnsafeCreate((uint)SearchRune), out matchRange); -#else +#if NETCOREAPP3_0 wasMatchFound = searchSpan.TryFind(new Rune((uint)SearchRune), out matchRange); +#else + wasMatchFound = searchSpan.TryFind(Rune.UnsafeCreate((uint)SearchRune), out matchRange); #endif } else diff --git a/src/libraries/System.Private.CoreLib/src/System/Text/Utf8Span.cs b/src/libraries/System.Private.CoreLib/src/System/Text/Utf8Span.cs index e754b664a9589..ec481eac0f0a3 100644 --- a/src/libraries/System.Private.CoreLib/src/System/Text/Utf8Span.cs +++ b/src/libraries/System.Private.CoreLib/src/System/Text/Utf8Span.cs @@ -16,7 +16,7 @@ #pragma warning disable 0809 //warning CS0809: Obsolete member 'Utf8Span.Equals(object)' overrides non-obsolete member 'object.Equals(object)' #pragma warning disable SA1121 // explicitly using type aliases instead of built-in types -#if CORECLR +#if SYSTEM_PRIVATE_CORELIB #if TARGET_64BIT using nint = System.Int64; using nuint = System.UInt64; @@ -129,7 +129,11 @@ internal ref byte DangerousGetMutableReference(nuint index) // Allow retrieving references to just past the end of the span (but shouldn't dereference this). Debug.Assert(index <= (uint)Length, "Caller should've performed bounds checking."); - return ref Unsafe.AddByteOffset(ref DangerousGetMutableReference(), (IntPtr)index); //TODO: nuint - remove cast +#if SYSTEM_PRIVATE_CORELIB + return ref Unsafe.AddByteOffset(ref DangerousGetMutableReference(), index); +#else + return ref Unsafe.AddByteOffset(ref DangerousGetMutableReference(), (IntPtr)index); +#endif } public bool IsEmptyOrWhiteSpace() => (Utf8Utility.GetIndexOfFirstNonWhiteSpaceChar(Bytes) == Length); @@ -168,7 +172,7 @@ public override int GetHashCode() // UTF-8 textual data, not over arbitrary binary sequences. ulong seed = Marvin.DefaultSeed; -#if CORECLR +#if SYSTEM_PRIVATE_CORELIB return Marvin.ComputeHash32(ref MemoryMarshal.GetReference(Bytes), (uint)Length /* in bytes */, (uint)seed, (uint)(seed >> 32)); #else return Marvin.ComputeHash32(Bytes, seed); @@ -220,7 +224,7 @@ public bool IsNormalized(NormalizationForm normalizationForm = NormalizationForm /// /// Gets an immutable reference that can be used in a statement. Unlike - /// TODO cref="Utf8String"/>, the resulting reference is not guaranteed to be null-terminated. + /// , the resulting reference is not guaranteed to be null-terminated. /// /// /// If this instance is empty, returns . Dereferencing @@ -241,18 +245,15 @@ public override string ToString() // TODO_UTF8STRING: Since we know the underlying data is immutable, well-formed UTF-8, // we can perform transcoding using an optimized code path that skips all safety checks. -#if CORECLR || NETCOREAPP +#if !NETSTANDARD2_0 return Encoding.UTF8.GetString(Bytes); #else - byte[] buffer = ArrayPool.Shared.Rent(Length); - try - { - Bytes.CopyTo(buffer); - return Encoding.UTF8.GetString(buffer, 0, Length); - } - finally + unsafe { - ArrayPool.Shared.Return(buffer); + fixed (byte* pBytes = Bytes) + { + return Encoding.UTF8.GetString(pBytes, Length); + } } #endif } @@ -282,7 +283,7 @@ internal unsafe string ToStringNoReplacement() int utf16CharCount = Length + utf16CodeUnitCountAdjustment; Debug.Assert(utf16CharCount <= Length && utf16CharCount >= 0); -#if CORECLR || NETCOREAPP +#if !NETSTANDARD2_0 // TODO_UTF8STRING: Can we call string.FastAllocate directly? return string.Create(utf16CharCount, (pbData: (IntPtr)pData, cbData: Length), (chars, state) => { diff --git a/src/coreclr/src/System.Private.CoreLib/src/System/Utf8Extensions.CoreCLR.cs b/src/libraries/System.Private.CoreLib/src/System/Utf8Extensions.CoreLib.cs similarity index 100% rename from src/coreclr/src/System.Private.CoreLib/src/System/Utf8Extensions.CoreCLR.cs rename to src/libraries/System.Private.CoreLib/src/System/Utf8Extensions.CoreLib.cs diff --git a/src/libraries/System.Private.CoreLib/src/System/Utf8String.Comparison.cs b/src/libraries/System.Private.CoreLib/src/System/Utf8String.Comparison.cs index 8f98055613d35..de5acc5e1bd91 100644 --- a/src/libraries/System.Private.CoreLib/src/System/Utf8String.Comparison.cs +++ b/src/libraries/System.Private.CoreLib/src/System/Utf8String.Comparison.cs @@ -59,7 +59,7 @@ public static bool AreEquivalent(Utf8String? utf8Text, string? utf16Text) } return AreEquivalentOrdinalSkipShortCircuitingChecks(utf8Text.AsBytes(), utf16Text -#if NETSTANDARD +#if NETSTANDARD2_0 .AsSpan() #endif ); @@ -194,7 +194,7 @@ public bool Contains(Rune value, StringComparison comparison) { // TODO_UTF8STRING: Optimize me to avoid allocations. -#if SYSTEM_PRIVATE_CORELIB || NETCOREAPP +#if !NETSTANDARD2_0 return ToString().Contains(value.ToString(), comparison); #else return ToString().IndexOf(value.ToString(), comparison) >= 0; @@ -228,7 +228,7 @@ public bool Contains(Utf8String value, StringComparison comparison) // TODO_UTF8STRING: Optimize me to avoid allocations. -#if SYSTEM_PRIVATE_CORELIB || NETCOREAPP +#if !NETSTANDARD2_0 return ToString().Contains(value.ToString(), comparison); #else return ToString().IndexOf(value.ToString(), comparison) >= 0; diff --git a/src/libraries/System.Private.CoreLib/src/System/Utf8String.Construction.cs b/src/libraries/System.Private.CoreLib/src/System/Utf8String.Construction.cs index cbf736bc6473c..c6bc878abd909 100644 --- a/src/libraries/System.Private.CoreLib/src/System/Utf8String.Construction.cs +++ b/src/libraries/System.Private.CoreLib/src/System/Utf8String.Construction.cs @@ -48,7 +48,7 @@ public static bool TryCreateFrom(ReadOnlySpan buffer, [NotNullWhen(true)] // Create and populate the Utf8String instance. Utf8String newString = FastAllocateSkipZeroInit(buffer.Length); -#if CORECLR +#if SYSTEM_PRIVATE_CORELIB Buffer.Memmove(ref newString.DangerousGetMutableReference(), ref MemoryMarshal.GetReference(buffer), (uint)buffer.Length); #else buffer.CopyTo(newString.DangerousGetMutableSpan()); @@ -115,7 +115,7 @@ public static Utf8String CreateFromRelaxed(ReadOnlySpan buffer) // Create and populate the Utf8String instance. Utf8String newString = FastAllocateSkipZeroInit(buffer.Length); -#if CORECLR +#if SYSTEM_PRIVATE_CORELIB Buffer.Memmove(ref newString.DangerousGetMutableReference(), ref MemoryMarshal.GetReference(buffer), (uint)buffer.Length); #else buffer.CopyTo(newString.DangerousGetMutableSpan()); @@ -351,7 +351,7 @@ internal static Utf8String CreateFromRune(Rune value) return newBuffer; } -#endif +#endif // !SYSTEM_PRIVATE_CORELIB #if !NETSTANDARD2_0 /// diff --git a/src/libraries/System.Private.CoreLib/src/System/Utf8String.Manipulation.cs b/src/libraries/System.Private.CoreLib/src/System/Utf8String.Manipulation.cs index 4a6e4625d7ecf..ac66e0e0668a3 100644 --- a/src/libraries/System.Private.CoreLib/src/System/Utf8String.Manipulation.cs +++ b/src/libraries/System.Private.CoreLib/src/System/Utf8String.Manipulation.cs @@ -656,10 +656,10 @@ internal readonly bool DeconstructHelper(in Utf8Span source, out Utf8Span firstI int searchRune = SearchRune; // local copy so as to avoid struct tearing if (searchRune >= 0) { -#if SYSTEM_PRIVATE_CORELIB || NETSTANDARD2_0 - wasMatchFound = searchSpan.TryFind(Rune.UnsafeCreate((uint)searchRune), out matchRange); -#else +#if NETCOREAPP3_0 wasMatchFound = searchSpan.TryFind(new Rune((uint)searchRune), out matchRange); +#else + wasMatchFound = searchSpan.TryFind(Rune.UnsafeCreate((uint)searchRune), out matchRange); #endif } else diff --git a/src/libraries/System.Private.CoreLib/src/System/Utf8String.cs b/src/libraries/System.Private.CoreLib/src/System/Utf8String.cs index 89aa5d7054d04..5b6c9b6b07743 100644 --- a/src/libraries/System.Private.CoreLib/src/System/Utf8String.cs +++ b/src/libraries/System.Private.CoreLib/src/System/Utf8String.cs @@ -9,7 +9,7 @@ using System.Text; using System.Text.Unicode; -#if CORECLR +#if SYSTEM_PRIVATE_CORELIB using Internal.Runtime.CompilerServices; #endif @@ -125,10 +125,10 @@ internal ref byte DangerousGetMutableReference(nuint index) // Allow retrieving references to the null terminator. Debug.Assert(index <= (uint)Length, "Caller should've performed bounds checking."); -#if CORECLR +#if SYSTEM_PRIVATE_CORELIB return ref Unsafe.AddByteOffset(ref DangerousGetMutableReference(), index); #else - return ref Unsafe.AddByteOffset(ref DangerousGetMutableReference(), (IntPtr)index); // TODO: IntPtr cast? + return ref Unsafe.AddByteOffset(ref DangerousGetMutableReference(), (IntPtr)index); #endif } @@ -156,7 +156,7 @@ public bool Equals(Utf8String? value) return !(value is null) && this.Length == value.Length -#if CORECLR +#if SYSTEM_PRIVATE_CORELIB && SpanHelpers.SequenceEqual(ref this.DangerousGetMutableReference(), ref value.DangerousGetMutableReference(), (uint)Length); #else && this.GetSpan().SequenceEqual(value.GetSpan()); @@ -185,7 +185,7 @@ public static bool Equals(Utf8String? left, Utf8String? right) return !(left is null) && !(right is null) && left.Length == right.Length -#if CORECLR +#if SYSTEM_PRIVATE_CORELIB && SpanHelpers.SequenceEqual(ref left.DangerousGetMutableReference(), ref right.DangerousGetMutableReference(), (uint)left.Length); #else && left.GetSpan().SequenceEqual(right.GetSpan()); @@ -211,7 +211,7 @@ public override int GetHashCode() // TODO_UTF8STRING: Consider whether this should use a different seed than String.GetHashCode. ulong seed = Marvin.DefaultSeed; -#if CORECLR +#if SYSTEM_PRIVATE_CORELIB return Marvin.ComputeHash32(ref DangerousGetMutableReference(), (uint)_length /* in bytes */, (uint)seed, (uint)(seed >> 32)); #else return Marvin.ComputeHash32(_bytes, seed); @@ -258,7 +258,7 @@ public static bool IsNullOrWhiteSpace([NotNullWhen(false)] Utf8String? value) } /// - /// Returns the entire as an array of UTF-8 bytes.GetPinnableReference + /// Returns the entire as an array of UTF-8 bytes. /// public byte[] ToByteArray() => this.AsSpanSkipNullCheck().ToByteArray(); @@ -269,18 +269,15 @@ public override string ToString() { // TODO_UTF8STRING: Optimize the call below, potentially by avoiding the two-pass. -#if CORECLR || NETCOREAPP +#if !NETSTANDARD2_0 return Encoding.UTF8.GetString(this.AsBytesSkipNullCheck()); #else - byte[] buffer = ArrayPool.Shared.Rent(Length); - try + unsafe { - GetSpan().CopyTo(buffer.AsSpan()); - return Encoding.UTF8.GetString(buffer, 0, Length); - } - finally - { - ArrayPool.Shared.Return(buffer); + fixed (byte* pBytes = this.AsBytesSkipNullCheck()) + { + return Encoding.UTF8.GetString(pBytes, Length); + } } #endif } diff --git a/src/libraries/System.Utf8String.Experimental/src/System.Utf8String.Experimental.csproj b/src/libraries/System.Utf8String.Experimental/src/System.Utf8String.Experimental.csproj index 6f34f9bf0594d..73f1738a1a75c 100644 --- a/src/libraries/System.Utf8String.Experimental/src/System.Utf8String.Experimental.csproj +++ b/src/libraries/System.Utf8String.Experimental/src/System.Utf8String.Experimental.csproj @@ -18,6 +18,9 @@ System\Numerics\BitOperations.cs + + System\Numerics\Hashing\HashHelpers.cs + System\Range.cs @@ -72,9 +75,6 @@ System\Text\Utf8StringComparer.cs - - System\Numerics\Hashing\HashHelpers.cs - System\Text\ASCIIUtility.cs diff --git a/src/libraries/System.Utf8String.Experimental/src/System/Net/Http/Utf8StringContent.cs b/src/libraries/System.Utf8String.Experimental/src/System/Net/Http/Utf8StringContent.cs index cab4ec11c8de9..a248e95d9488b 100644 --- a/src/libraries/System.Utf8String.Experimental/src/System/Net/Http/Utf8StringContent.cs +++ b/src/libraries/System.Utf8String.Experimental/src/System/Net/Http/Utf8StringContent.cs @@ -11,7 +11,7 @@ namespace System.Net.Http { - public sealed partial class Utf8StringContent : HttpContent + public sealed class Utf8StringContent : HttpContent { private const string DefaultMediaType = "text/plain"; diff --git a/src/libraries/System.Utf8String.Experimental/src/System/Utf8String.Portable.cs b/src/libraries/System.Utf8String.Experimental/src/System/Utf8String.Portable.cs index 4755570d1643b..a870b8102bbcb 100644 --- a/src/libraries/System.Utf8String.Experimental/src/System/Utf8String.Portable.cs +++ b/src/libraries/System.Utf8String.Experimental/src/System/Utf8String.Portable.cs @@ -123,6 +123,7 @@ private static byte[] InitializeBuffer(ReadOnlySpan value) return newBuffer; } + // This should only be called from FastAllocate private Utf8String(byte[] bytes) { _bytes = bytes; @@ -172,6 +173,10 @@ internal Span DangerousGetMutableSpan() [MethodImpl(MethodImplOptions.AggressiveInlining)] internal ReadOnlySpan AsBytesSkipNullCheck() => GetSpan(); + [MethodImpl(MethodImplOptions.AggressiveInlining)] + internal ReadOnlyMemory CreateMemoryBytes(int start, int length) => + _bytes.AsMemory(start, length); + /// /// Creates a new zero-initialized instance of the specified length. Actual storage allocated is "length + 1" bytes /// because instances are null-terminated. @@ -222,9 +227,5 @@ private static void ThrowMustBeNullTerminatedString() { throw new ArgumentException(SR.Arg_MustBeNullTerminatedString); } - - [MethodImpl(MethodImplOptions.AggressiveInlining)] - internal ReadOnlyMemory CreateMemoryBytes(int start, int length) => - _bytes.AsMemory(start, length); } } From aafb1d8ea8521ab521e6374b568738ce9b55a7dc Mon Sep 17 00:00:00 2001 From: Eric Erhardt Date: Tue, 10 Mar 2020 18:27:45 -0500 Subject: [PATCH 22/26] Address PR feedback --- eng/referenceFromRuntime.targets | 2 +- .../src/System/Index.cs | 1 - .../src/System/Range.cs | 1 - .../src/System/Text/Rune.cs | 76 +++- .../src/System/Text/Utf8Span.Conversion.cs | 30 +- .../src/System/Text/Utf8Span.Searching.cs | 21 +- .../src/System/Text/Utf8Span.cs | 5 + .../src/System/Utf8String.Comparison.cs | 6 +- .../src/System/Utf8String.cs | 5 + .../src/Resources/Strings.resx | 3 + .../src/System/ThrowHelper.cs | 6 + .../Utf8SpanTests.Searching.TestData.cs | 4 - .../tests/System/Utf8SpanTests.Searching.cs | 324 ++++++++++-------- .../tests/System/Utf8StringTests.Searching.cs | 324 ++++++++++-------- .../tests/System/Utf8TestUtilities.cs | 5 + 15 files changed, 479 insertions(+), 334 deletions(-) diff --git a/eng/referenceFromRuntime.targets b/eng/referenceFromRuntime.targets index 8666173afdf46..c428eb61d5807 100644 --- a/eng/referenceFromRuntime.targets +++ b/eng/referenceFromRuntime.targets @@ -106,7 +106,7 @@ - diff --git a/src/libraries/System.Private.CoreLib/src/System/Index.cs b/src/libraries/System.Private.CoreLib/src/System/Index.cs index 22225ac0bc2ed..1da3a691c7c76 100644 --- a/src/libraries/System.Private.CoreLib/src/System/Index.cs +++ b/src/libraries/System.Private.CoreLib/src/System/Index.cs @@ -4,7 +4,6 @@ using System.Diagnostics; using System.Runtime.CompilerServices; -using System.Text; namespace System { diff --git a/src/libraries/System.Private.CoreLib/src/System/Range.cs b/src/libraries/System.Private.CoreLib/src/System/Range.cs index 200b7de8f67a3..cdf937092fa52 100644 --- a/src/libraries/System.Private.CoreLib/src/System/Range.cs +++ b/src/libraries/System.Private.CoreLib/src/System/Range.cs @@ -4,7 +4,6 @@ using System.Diagnostics; using System.Runtime.CompilerServices; -using System.Text; #if NETSTANDARD2_0 using System.Numerics.Hashing; diff --git a/src/libraries/System.Private.CoreLib/src/System/Text/Rune.cs b/src/libraries/System.Private.CoreLib/src/System/Text/Rune.cs index 0d000011dba2a..ffb324ecefff9 100644 --- a/src/libraries/System.Private.CoreLib/src/System/Text/Rune.cs +++ b/src/libraries/System.Private.CoreLib/src/System/Text/Rune.cs @@ -20,9 +20,9 @@ namespace System.Text [DebuggerDisplay("{DebuggerDisplay,nq}")] public readonly struct Rune : IComparable, IEquatable { - private const char HIGH_SURROGATE_START = '\ud800'; - private const char LOW_SURROGATE_START = '\udc00'; - private const int HIGH_SURROGATE_RANGE = 0x3FF; + private const char HighSurrogateStart = '\ud800'; + private const char LowSurrogateStart = '\udc00'; + private const int HighSurrogateRange = 0x3FF; private const byte IsWhiteSpaceFlag = 0x80; private const byte IsLetterOrDigitFlag = 0x40; @@ -179,10 +179,11 @@ private Rune(uint scalarValue, bool unused) /// public int Value => (int)_value; - private static Rune ChangeCaseCultureAware(Rune rune, CultureInfo culture, bool toUpper) +#if SYSTEM_PRIVATE_CORELIB + private static Rune ChangeCaseCultureAware(Rune rune, TextInfo textInfo, bool toUpper) { Debug.Assert(!GlobalizationMode.Invariant, "This should've been checked by the caller."); - Debug.Assert(culture != null, "This should've been checked by the caller."); + Debug.Assert(textInfo != null, "This should've been checked by the caller."); Span original = stackalloc char[2]; // worst case scenario = 2 code units (for a surrogate pair) Span modified = stackalloc char[2]; // case change should preserve UTF-16 code unit count @@ -191,16 +192,41 @@ private static Rune ChangeCaseCultureAware(Rune rune, CultureInfo culture, bool original = original.Slice(0, charCount); modified = modified.Slice(0, charCount); -#if SYSTEM_PRIVATE_CORELIB if (toUpper) { - culture!.TextInfo.ChangeCaseToUpper(original, modified); + textInfo.ChangeCaseToUpper(original, modified); + } + else + { + textInfo.ChangeCaseToLower(original, modified); + } + + // We use simple case folding rules, which disallows moving between the BMP and supplementary + // planes when performing a case conversion. The helper methods which reconstruct a Rune + // contain debug asserts for this condition. + + if (rune.IsBmp) + { + return UnsafeCreate(modified[0]); } else { - culture!.TextInfo.ChangeCaseToLower(original, modified); + return UnsafeCreate(UnicodeUtility.GetScalarFromUtf16SurrogatePair(modified[0], modified[1])); } + } #else + private static Rune ChangeCaseCultureAware(Rune rune, CultureInfo culture, bool toUpper) + { + Debug.Assert(!GlobalizationMode.Invariant, "This should've been checked by the caller."); + Debug.Assert(culture != null, "This should've been checked by the caller."); + + Span original = stackalloc char[2]; // worst case scenario = 2 code units (for a surrogate pair) + Span modified = stackalloc char[2]; // case change should preserve UTF-16 code unit count + + int charCount = rune.EncodeToUtf16(original); + original = original.Slice(0, charCount); + modified = modified.Slice(0, charCount); + if (toUpper) { MemoryExtensions.ToUpper(original, modified, culture); @@ -209,7 +235,6 @@ private static Rune ChangeCaseCultureAware(Rune rune, CultureInfo culture, bool { MemoryExtensions.ToLower(original, modified, culture); } -#endif // We use simple case folding rules, which disallows moving between the BMP and supplementary // planes when performing a case conversion. The helper methods which reconstruct a Rune @@ -224,6 +249,7 @@ private static Rune ChangeCaseCultureAware(Rune rune, CultureInfo culture, bool return UnsafeCreate(UnicodeUtility.GetScalarFromUtf16SurrogatePair(modified[0], modified[1])); } } +#endif public int CompareTo(Rune other) => _value.CompareTo(other._value); @@ -894,17 +920,17 @@ public static bool TryCreate(char highSurrogate, char lowSurrogate, out Rune res // First, extend both to 32 bits, then calculate the offset of // each candidate surrogate char from the start of its range. - uint highSurrogateOffset = (uint)highSurrogate - HIGH_SURROGATE_START; - uint lowSurrogateOffset = (uint)lowSurrogate - LOW_SURROGATE_START; + uint highSurrogateOffset = (uint)highSurrogate - HighSurrogateStart; + uint lowSurrogateOffset = (uint)lowSurrogate - LowSurrogateStart; // This is a single comparison which allows us to check both for validity at once since // both the high surrogate range and the low surrogate range are the same length. // If the comparison fails, we call to a helper method to throw the correct exception message. - if ((highSurrogateOffset | lowSurrogateOffset) <= HIGH_SURROGATE_RANGE) + if ((highSurrogateOffset | lowSurrogateOffset) <= HighSurrogateRange) { // The 0x40u << 10 below is to account for uuuuu = wwww + 1 in the surrogate encoding. - result = UnsafeCreate((highSurrogateOffset << 10) + ((uint)lowSurrogate - LOW_SURROGATE_START) + (0x40u << 10)); + result = UnsafeCreate((highSurrogateOffset << 10) + ((uint)lowSurrogate - LowSurrogateStart) + (0x40u << 10)); return true; } else @@ -1102,6 +1128,10 @@ public static double GetNumericValue(Rune value) #if SYSTEM_PRIVATE_CORELIB return CharUnicodeInfo.GetNumericValue(value.Value); #else + if (value.IsBmp) + { + return CharUnicodeInfo.GetNumericValue((char)value._value); + } return CharUnicodeInfo.GetNumericValue(value.ToString(), 0); #endif } @@ -1125,6 +1155,10 @@ private static UnicodeCategory GetUnicodeCategoryNonAscii(Rune value) #if !NETSTANDARD2_0 return CharUnicodeInfo.GetUnicodeCategory(value.Value); #else + if (value.IsBmp) + { + return CharUnicodeInfo.GetUnicodeCategory((char)value._value); + } return CharUnicodeInfo.GetUnicodeCategory(value.ToString(), 0); #endif } @@ -1301,7 +1335,11 @@ public static Rune ToLower(Rune value, CultureInfo culture) return ToLowerInvariant(value); } +#if SYSTEM_PRIVATE_CORELIB + return ChangeCaseCultureAware(value, culture.TextInfo, toUpper: false); +#else return ChangeCaseCultureAware(value, culture, toUpper: false); +#endif } public static Rune ToLowerInvariant(Rune value) @@ -1325,7 +1363,11 @@ public static Rune ToLowerInvariant(Rune value) // Non-ASCII data requires going through the case folding tables. +#if SYSTEM_PRIVATE_CORELIB + return ChangeCaseCultureAware(value, TextInfo.Invariant, toUpper: false); +#else return ChangeCaseCultureAware(value, CultureInfo.InvariantCulture, toUpper: false); +#endif } public static Rune ToUpper(Rune value, CultureInfo culture) @@ -1344,7 +1386,11 @@ public static Rune ToUpper(Rune value, CultureInfo culture) return ToUpperInvariant(value); } +#if SYSTEM_PRIVATE_CORELIB + return ChangeCaseCultureAware(value, culture.TextInfo, toUpper: true); +#else return ChangeCaseCultureAware(value, culture, toUpper: true); +#endif } public static Rune ToUpperInvariant(Rune value) @@ -1368,7 +1414,11 @@ public static Rune ToUpperInvariant(Rune value) // Non-ASCII data requires going through the case folding tables. +#if SYSTEM_PRIVATE_CORELIB + return ChangeCaseCultureAware(value, TextInfo.Invariant, toUpper: true); +#else return ChangeCaseCultureAware(value, CultureInfo.InvariantCulture, toUpper: true); +#endif } } } diff --git a/src/libraries/System.Private.CoreLib/src/System/Text/Utf8Span.Conversion.cs b/src/libraries/System.Private.CoreLib/src/System/Text/Utf8Span.Conversion.cs index 9868be8f406b2..51f0840e10412 100644 --- a/src/libraries/System.Private.CoreLib/src/System/Text/Utf8Span.Conversion.cs +++ b/src/libraries/System.Private.CoreLib/src/System/Text/Utf8Span.Conversion.cs @@ -43,11 +43,7 @@ public int Normalize(Span destination, NormalizationForm normalizationForm { // TODO_UTF8STRING: Reduce allocations in this code path. - ReadOnlySpan normalized = this.ToString().Normalize(normalizationForm) -#if NETSTANDARD2_0 - .AsSpan() -#endif - ; + ReadOnlySpan normalized = this.ToString().Normalize(normalizationForm).AsSpan(); OperationStatus status = Utf8.FromUtf16(normalized, destination, out int _, out int bytesWritten, replaceInvalidSequences: false, isFinalBlock: true); Debug.Assert(status == OperationStatus.Done || status == OperationStatus.DestinationTooSmall, "Normalize shouldn't have produced malformed Unicode string."); @@ -162,11 +158,7 @@ public int ToLower(Span destination, CultureInfo culture) ThrowHelper.ThrowArgumentNullException(ExceptionArgument.culture); } - ReadOnlySpan asLower = this.ToString().ToLower(culture) -#if NETSTANDARD2_0 - .AsSpan() -#endif - ; + ReadOnlySpan asLower = this.ToString().ToLower(culture).AsSpan(); OperationStatus status = Utf8.FromUtf16(asLower, destination, out int _, out int bytesWritten, replaceInvalidSequences: false, isFinalBlock: true); Debug.Assert(status == OperationStatus.Done || status == OperationStatus.DestinationTooSmall, "ToLower shouldn't have produced malformed Unicode string."); @@ -214,11 +206,7 @@ public int ToLowerInvariant(Span destination) { // TODO_UTF8STRING: Avoid intermediate allocations. - ReadOnlySpan asLowerInvariant = this.ToString().ToLowerInvariant() -#if NETSTANDARD2_0 - .AsSpan() -#endif - ; + ReadOnlySpan asLowerInvariant = this.ToString().ToLowerInvariant().AsSpan(); OperationStatus status = Utf8.FromUtf16(asLowerInvariant, destination, out int _, out int bytesWritten, replaceInvalidSequences: false, isFinalBlock: true); Debug.Assert(status == OperationStatus.Done || status == OperationStatus.DestinationTooSmall, "ToLowerInvariant shouldn't have produced malformed Unicode string."); @@ -274,11 +262,7 @@ public int ToUpper(Span destination, CultureInfo culture) ThrowHelper.ThrowArgumentNullException(ExceptionArgument.culture); } - ReadOnlySpan asUpper = this.ToString().ToUpper(culture) -#if NETSTANDARD2_0 - .AsSpan() -#endif - ; + ReadOnlySpan asUpper = this.ToString().ToUpper(culture).AsSpan(); OperationStatus status = Utf8.FromUtf16(asUpper, destination, out int _, out int bytesWritten, replaceInvalidSequences: false, isFinalBlock: true); Debug.Assert(status == OperationStatus.Done || status == OperationStatus.DestinationTooSmall, "ToUpper shouldn't have produced malformed Unicode string."); @@ -326,11 +310,7 @@ public int ToUpperInvariant(Span destination) { // TODO_UTF8STRING: Avoid intermediate allocations. - ReadOnlySpan asUpperInvariant = this.ToString().ToUpperInvariant() -#if NETSTANDARD2_0 - .AsSpan() -#endif - ; + ReadOnlySpan asUpperInvariant = this.ToString().ToUpperInvariant().AsSpan(); OperationStatus status = Utf8.FromUtf16(asUpperInvariant, destination, out int _, out int bytesWritten, replaceInvalidSequences: false, isFinalBlock: true); Debug.Assert(status == OperationStatus.Done || status == OperationStatus.DestinationTooSmall, "ToUpperInvariant shouldn't have produced malformed Unicode string."); diff --git a/src/libraries/System.Private.CoreLib/src/System/Text/Utf8Span.Searching.cs b/src/libraries/System.Private.CoreLib/src/System/Text/Utf8Span.Searching.cs index 178238747d391..ea295dfa26c4d 100644 --- a/src/libraries/System.Private.CoreLib/src/System/Text/Utf8Span.Searching.cs +++ b/src/libraries/System.Private.CoreLib/src/System/Text/Utf8Span.Searching.cs @@ -221,7 +221,7 @@ private unsafe bool TryFind(Utf8Span value, StringComparison comparisonType, out // TODO_UTF8STRING: We should take advantage of the property described above to avoid the UTF-16 // transcoding step entirely. - if (compareOptions != CompareOptions.None) + if (compareOptions == CompareOptions.None) { return (fromBeginning) ? TryFind(value, out range) @@ -239,7 +239,11 @@ private unsafe bool TryFind(Utf8Span value, StringComparison comparisonType, out case StringComparison.OrdinalIgnoreCase: // TODO_UTF8STRING: Can probably optimize this case. +#if SYSTEM_PRIVATE_CORELIB + compareInfo = CompareInfo.Invariant; +#else compareInfo = CultureInfo.InvariantCulture.CompareInfo; +#endif break; case StringComparison.CurrentCulture: @@ -249,7 +253,11 @@ private unsafe bool TryFind(Utf8Span value, StringComparison comparisonType, out default: Debug.Assert(comparisonType == StringComparison.InvariantCulture || comparisonType == StringComparison.InvariantCultureIgnoreCase); +#if SYSTEM_PRIVATE_CORELIB + compareInfo = CompareInfo.Invariant; +#else compareInfo = CultureInfo.InvariantCulture.CompareInfo; +#endif break; } } @@ -276,6 +284,8 @@ private unsafe bool TryFind(Utf8Span value, StringComparison comparisonType, out idx = compareInfo.IndexOf(thisTranscodedToUtf16, otherTranscodedToUtf16, 0, thisTranscodedToUtf16.Length, compareOptions, &matchLength, fromBeginning); } #else + Debug.Assert(comparisonType == StringComparison.OrdinalIgnoreCase); + if (fromBeginning) { idx = compareInfo.IndexOf(thisTranscodedToUtf16, otherTranscodedToUtf16, 0, thisTranscodedToUtf16.Length, compareOptions); @@ -284,7 +294,6 @@ private unsafe bool TryFind(Utf8Span value, StringComparison comparisonType, out { idx = compareInfo.LastIndexOf(thisTranscodedToUtf16, otherTranscodedToUtf16, thisTranscodedToUtf16.Length, thisTranscodedToUtf16.Length, compareOptions); } - // TODO_UTF8STRING: matchLength is not correct here. Need to figure this out outside of CoreLib. matchLength = otherTranscodedToUtf16.Length; #endif @@ -529,6 +538,14 @@ private static void CheckStringComparison(StringComparison comparisonType) { ThrowHelper.ThrowArgumentException(SR.NotSupported_StringComparison, ExceptionArgument.comparisonType); } + + // There's no API that would allow getting the correct match length + // for other StringComparisons. + if (comparisonType != StringComparison.Ordinal && + comparisonType != StringComparison.OrdinalIgnoreCase) + { + ThrowHelper.ThrowNotSupportedException(SR.Utf8Span_TryFindOnlySupportsOrdinal); + } #endif } diff --git a/src/libraries/System.Private.CoreLib/src/System/Text/Utf8Span.cs b/src/libraries/System.Private.CoreLib/src/System/Text/Utf8Span.cs index ec481eac0f0a3..117e7ef479e28 100644 --- a/src/libraries/System.Private.CoreLib/src/System/Text/Utf8Span.cs +++ b/src/libraries/System.Private.CoreLib/src/System/Text/Utf8Span.cs @@ -248,6 +248,11 @@ public override string ToString() #if !NETSTANDARD2_0 return Encoding.UTF8.GetString(Bytes); #else + if (IsEmpty) + { + return string.Empty; + } + unsafe { fixed (byte* pBytes = Bytes) diff --git a/src/libraries/System.Private.CoreLib/src/System/Utf8String.Comparison.cs b/src/libraries/System.Private.CoreLib/src/System/Utf8String.Comparison.cs index de5acc5e1bd91..273df67eae1cc 100644 --- a/src/libraries/System.Private.CoreLib/src/System/Utf8String.Comparison.cs +++ b/src/libraries/System.Private.CoreLib/src/System/Utf8String.Comparison.cs @@ -58,11 +58,7 @@ public static bool AreEquivalent(Utf8String? utf8Text, string? utf16Text) return false; } - return AreEquivalentOrdinalSkipShortCircuitingChecks(utf8Text.AsBytes(), utf16Text -#if NETSTANDARD2_0 - .AsSpan() -#endif - ); + return AreEquivalentOrdinalSkipShortCircuitingChecks(utf8Text.AsBytes(), utf16Text.AsSpan()); } /// diff --git a/src/libraries/System.Private.CoreLib/src/System/Utf8String.cs b/src/libraries/System.Private.CoreLib/src/System/Utf8String.cs index 5b6c9b6b07743..10c0dbe1ef41a 100644 --- a/src/libraries/System.Private.CoreLib/src/System/Utf8String.cs +++ b/src/libraries/System.Private.CoreLib/src/System/Utf8String.cs @@ -272,6 +272,11 @@ public override string ToString() #if !NETSTANDARD2_0 return Encoding.UTF8.GetString(this.AsBytesSkipNullCheck()); #else + if (Length == 0) + { + return string.Empty; + } + unsafe { fixed (byte* pBytes = this.AsBytesSkipNullCheck()) diff --git a/src/libraries/System.Utf8String.Experimental/src/Resources/Strings.resx b/src/libraries/System.Utf8String.Experimental/src/Resources/Strings.resx index b7b4320da1d12..a02a1757a6902 100644 --- a/src/libraries/System.Utf8String.Experimental/src/Resources/Strings.resx +++ b/src/libraries/System.Utf8String.Experimental/src/Resources/Strings.resx @@ -150,6 +150,9 @@ Cannot call Utf8Span.Equals(object). Use Equals(Utf8Span) or operator == instead. + + UTF-8 searching only supports StringComparison Ordinal and OrdinalIgnoreCase on this platform. + The callback populated its buffer with ill-formed UTF-8 data. Callbacks are required to populate the buffer only with well-formed UTF-8 data. diff --git a/src/libraries/System.Utf8String.Experimental/src/System/ThrowHelper.cs b/src/libraries/System.Utf8String.Experimental/src/System/ThrowHelper.cs index 983dcd397b299..4d3a949111697 100644 --- a/src/libraries/System.Utf8String.Experimental/src/System/ThrowHelper.cs +++ b/src/libraries/System.Utf8String.Experimental/src/System/ThrowHelper.cs @@ -68,6 +68,12 @@ private static ArgumentOutOfRangeException GetArgumentOutOfRangeException(Except { return new ArgumentOutOfRangeException(argument.ToString(), resource); } + + [DoesNotReturn] + internal static void ThrowNotSupportedException(string message) + { + throw new NotSupportedException(message); + } } // diff --git a/src/libraries/System.Utf8String.Experimental/tests/System/Utf8SpanTests.Searching.TestData.cs b/src/libraries/System.Utf8String.Experimental/tests/System/Utf8SpanTests.Searching.TestData.cs index 2b77ccf123511..19ccfd41518a1 100644 --- a/src/libraries/System.Utf8String.Experimental/tests/System/Utf8SpanTests.Searching.TestData.cs +++ b/src/libraries/System.Utf8String.Experimental/tests/System/Utf8SpanTests.Searching.TestData.cs @@ -499,7 +499,6 @@ private static IEnumerable TryFindData_All() ExpectedFirstMatch = null, ExpectedLastMatch = null, }, -#if !NETFRAMEWORK new TryFindTestData { // Turkish I, case-sensitive @@ -510,7 +509,6 @@ private static IEnumerable TryFindData_All() ExpectedFirstMatch = 0..1, ExpectedLastMatch = 0..1, }, -#endif new TryFindTestData { // Turkish I, case-insensitive @@ -531,7 +529,6 @@ private static IEnumerable TryFindData_All() ExpectedFirstMatch = null, ExpectedLastMatch = null, }, -#if !NETFRAMEWORK new TryFindTestData { // denormalized forms, case-sensitive @@ -552,7 +549,6 @@ private static IEnumerable TryFindData_All() ExpectedFirstMatch = 3..6, ExpectedLastMatch = ^3.., }, -#endif }; return testDataEntries; diff --git a/src/libraries/System.Utf8String.Experimental/tests/System/Utf8SpanTests.Searching.cs b/src/libraries/System.Utf8String.Experimental/tests/System/Utf8SpanTests.Searching.cs index a46da8a6c29db..36b1842f48a7e 100644 --- a/src/libraries/System.Utf8String.Experimental/tests/System/Utf8SpanTests.Searching.cs +++ b/src/libraries/System.Utf8String.Experimental/tests/System/Utf8SpanTests.Searching.cs @@ -96,57 +96,71 @@ public static void TryFind_Char_WithComparison(ustring source, char searchTerm, CultureInfo.CurrentCulture = currentCulture; } - // First, search forward - - bool wasFound = searchSpan.TryFind(searchTerm, comparison, out Range actualForwardMatch); - Assert.Equal(expectedForwardMatch.HasValue, wasFound); - - if (wasFound) - { - AssertRangesEqual(searchSpan.Length, expectedForwardMatch.Value, actualForwardMatch); - } - - // Also check Contains / StartsWith / SplitOn - - Assert.Equal(wasFound, searchSpan.Contains(searchTerm, comparison)); - Assert.Equal(wasFound && searchSpan.Bytes[..actualForwardMatch.Start].IsEmpty, searchSpan.StartsWith(searchTerm, comparison)); - - (var before, var after) = searchSpan.SplitOn(searchTerm, comparison); - if (wasFound) - { - Assert.True(searchSpan.Bytes[..actualForwardMatch.Start] == before.Bytes); // check for referential equality - Assert.True(searchSpan.Bytes[actualForwardMatch.End..] == after.Bytes); // check for referential equality - } - else - { - Assert.True(searchSpan.Bytes == before.Bytes); // check for reference equality - Assert.True(after.IsNull()); - } - - // Now search backward - - wasFound = searchSpan.TryFindLast(searchTerm, comparison, out Range actualBackwardMatch); - Assert.Equal(expectedBackwardMatch.HasValue, wasFound); - - if (wasFound) + if (IsTryFindSupported(comparison)) { - AssertRangesEqual(searchSpan.Length, expectedBackwardMatch.Value, actualBackwardMatch); - } - - // Also check EndsWith / SplitOnLast - - Assert.Equal(wasFound && searchSpan.Bytes[actualBackwardMatch.End..].IsEmpty, searchSpan.EndsWith(searchTerm, comparison)); - - (before, after) = searchSpan.SplitOnLast(searchTerm, comparison); - if (wasFound) - { - Assert.True(searchSpan.Bytes[..actualBackwardMatch.Start] == before.Bytes); // check for referential equality - Assert.True(searchSpan.Bytes[actualBackwardMatch.End..] == after.Bytes); // check for referential equality + // First, search forward + + bool wasFound = searchSpan.TryFind(searchTerm, comparison, out Range actualForwardMatch); + Assert.Equal(expectedForwardMatch.HasValue, wasFound); + + if (wasFound) + { + AssertRangesEqual(searchSpan.Length, expectedForwardMatch.Value, actualForwardMatch); + } + + // Also check Contains / StartsWith / SplitOn + + Assert.Equal(wasFound, searchSpan.Contains(searchTerm, comparison)); + Assert.Equal(wasFound && searchSpan.Bytes[..actualForwardMatch.Start].IsEmpty, searchSpan.StartsWith(searchTerm, comparison)); + + (var before, var after) = searchSpan.SplitOn(searchTerm, comparison); + if (wasFound) + { + Assert.True(searchSpan.Bytes[..actualForwardMatch.Start] == before.Bytes); // check for referential equality + Assert.True(searchSpan.Bytes[actualForwardMatch.End..] == after.Bytes); // check for referential equality + } + else + { + Assert.True(searchSpan.Bytes == before.Bytes); // check for reference equality + Assert.True(after.IsNull()); + } + + // Now search backward + + wasFound = searchSpan.TryFindLast(searchTerm, comparison, out Range actualBackwardMatch); + Assert.Equal(expectedBackwardMatch.HasValue, wasFound); + + if (wasFound) + { + AssertRangesEqual(searchSpan.Length, expectedBackwardMatch.Value, actualBackwardMatch); + } + + // Also check EndsWith / SplitOnLast + + Assert.Equal(wasFound && searchSpan.Bytes[actualBackwardMatch.End..].IsEmpty, searchSpan.EndsWith(searchTerm, comparison)); + + (before, after) = searchSpan.SplitOnLast(searchTerm, comparison); + if (wasFound) + { + Assert.True(searchSpan.Bytes[..actualBackwardMatch.Start] == before.Bytes); // check for referential equality + Assert.True(searchSpan.Bytes[actualBackwardMatch.End..] == after.Bytes); // check for referential equality + } + else + { + Assert.True(searchSpan.Bytes == before.Bytes); // check for reference equality + Assert.True(after.IsNull()); + } } else { - Assert.True(searchSpan.Bytes == before.Bytes); // check for reference equality - Assert.True(after.IsNull()); + Assert.Throws(() => boundedSpan.Span.TryFind(searchTerm, comparison, out var _)); + Assert.Throws(() => boundedSpan.Span.TryFindLast(searchTerm, comparison, out var _)); + Assert.Throws(() => boundedSpan.Span.SplitOn(searchTerm, comparison)); + Assert.Throws(() => boundedSpan.Span.SplitOnLast(searchTerm, comparison)); + + Assert.Equal(expectedForwardMatch.HasValue, searchSpan.Contains(searchTerm, comparison)); + Assert.Equal(expectedForwardMatch.HasValue && searchSpan.Bytes[..expectedForwardMatch.Value.Start].IsEmpty, searchSpan.StartsWith(searchTerm, comparison)); + Assert.Equal(expectedBackwardMatch.HasValue && searchSpan.Bytes[expectedBackwardMatch.Value.End..].IsEmpty, searchSpan.EndsWith(searchTerm, comparison)); } }); } @@ -229,57 +243,71 @@ public static void TryFind_Rune_WithComparison(ustring source, Rune searchTerm, CultureInfo.CurrentCulture = currentCulture; } - // First, search forward - - bool wasFound = searchSpan.TryFind(searchTerm, comparison, out Range actualForwardMatch); - Assert.Equal(expectedForwardMatch.HasValue, wasFound); - - if (wasFound) - { - AssertRangesEqual(searchSpan.Length, expectedForwardMatch.Value, actualForwardMatch); - } - - // Also check Contains / StartsWith / SplitOn - - Assert.Equal(wasFound, searchSpan.Contains(searchTerm, comparison)); - Assert.Equal(wasFound && searchSpan.Bytes[..actualForwardMatch.Start].IsEmpty, searchSpan.StartsWith(searchTerm, comparison)); - - (var before, var after) = searchSpan.SplitOn(searchTerm, comparison); - if (wasFound) - { - Assert.True(searchSpan.Bytes[..actualForwardMatch.Start] == before.Bytes); // check for referential equality - Assert.True(searchSpan.Bytes[actualForwardMatch.End..] == after.Bytes); // check for referential equality - } - else - { - Assert.True(searchSpan.Bytes == before.Bytes); // check for reference equality - Assert.True(after.IsNull()); - } - - // Now search backward - - wasFound = searchSpan.TryFindLast(searchTerm, comparison, out Range actualBackwardMatch); - Assert.Equal(expectedBackwardMatch.HasValue, wasFound); - - if (wasFound) + if (IsTryFindSupported(comparison)) { - AssertRangesEqual(searchSpan.Length, expectedBackwardMatch.Value, actualBackwardMatch); - } - - // Also check EndsWith / SplitOnLast - - Assert.Equal(wasFound && searchSpan.Bytes[actualBackwardMatch.End..].IsEmpty, searchSpan.EndsWith(searchTerm, comparison)); - - (before, after) = searchSpan.SplitOnLast(searchTerm, comparison); - if (wasFound) - { - Assert.True(searchSpan.Bytes[..actualBackwardMatch.Start] == before.Bytes); // check for referential equality - Assert.True(searchSpan.Bytes[actualBackwardMatch.End..] == after.Bytes); // check for referential equality + // First, search forward + + bool wasFound = searchSpan.TryFind(searchTerm, comparison, out Range actualForwardMatch); + Assert.Equal(expectedForwardMatch.HasValue, wasFound); + + if (wasFound) + { + AssertRangesEqual(searchSpan.Length, expectedForwardMatch.Value, actualForwardMatch); + } + + // Also check Contains / StartsWith / SplitOn + + Assert.Equal(wasFound, searchSpan.Contains(searchTerm, comparison)); + Assert.Equal(wasFound && searchSpan.Bytes[..actualForwardMatch.Start].IsEmpty, searchSpan.StartsWith(searchTerm, comparison)); + + (var before, var after) = searchSpan.SplitOn(searchTerm, comparison); + if (wasFound) + { + Assert.True(searchSpan.Bytes[..actualForwardMatch.Start] == before.Bytes); // check for referential equality + Assert.True(searchSpan.Bytes[actualForwardMatch.End..] == after.Bytes); // check for referential equality + } + else + { + Assert.True(searchSpan.Bytes == before.Bytes); // check for reference equality + Assert.True(after.IsNull()); + } + + // Now search backward + + wasFound = searchSpan.TryFindLast(searchTerm, comparison, out Range actualBackwardMatch); + Assert.Equal(expectedBackwardMatch.HasValue, wasFound); + + if (wasFound) + { + AssertRangesEqual(searchSpan.Length, expectedBackwardMatch.Value, actualBackwardMatch); + } + + // Also check EndsWith / SplitOnLast + + Assert.Equal(wasFound && searchSpan.Bytes[actualBackwardMatch.End..].IsEmpty, searchSpan.EndsWith(searchTerm, comparison)); + + (before, after) = searchSpan.SplitOnLast(searchTerm, comparison); + if (wasFound) + { + Assert.True(searchSpan.Bytes[..actualBackwardMatch.Start] == before.Bytes); // check for referential equality + Assert.True(searchSpan.Bytes[actualBackwardMatch.End..] == after.Bytes); // check for referential equality + } + else + { + Assert.True(searchSpan.Bytes == before.Bytes); // check for reference equality + Assert.True(after.IsNull()); + } } else { - Assert.True(searchSpan.Bytes == before.Bytes); // check for reference equality - Assert.True(after.IsNull()); + Assert.Throws(() => boundedSpan.Span.TryFind(searchTerm, comparison, out var _)); + Assert.Throws(() => boundedSpan.Span.TryFindLast(searchTerm, comparison, out var _)); + Assert.Throws(() => boundedSpan.Span.SplitOn(searchTerm, comparison)); + Assert.Throws(() => boundedSpan.Span.SplitOnLast(searchTerm, comparison)); + + Assert.Equal(expectedForwardMatch.HasValue, searchSpan.Contains(searchTerm, comparison)); + Assert.Equal(expectedForwardMatch.HasValue && searchSpan.Bytes[..expectedForwardMatch.Value.Start].IsEmpty, searchSpan.StartsWith(searchTerm, comparison)); + Assert.Equal(expectedBackwardMatch.HasValue && searchSpan.Bytes[expectedBackwardMatch.Value.End..].IsEmpty, searchSpan.EndsWith(searchTerm, comparison)); } }); } @@ -362,57 +390,71 @@ public static void TryFind_Utf8Span_WithComparison(ustring source, ustring searc CultureInfo.CurrentCulture = currentCulture; } - // First, search forward - - bool wasFound = searchSpan.TryFind(searchTerm, comparison, out Range actualForwardMatch); - Assert.Equal(expectedForwardMatch.HasValue, wasFound); - - if (wasFound) - { - AssertRangesEqual(searchSpan.Length, expectedForwardMatch.Value, actualForwardMatch); - } - - // Also check Contains / StartsWith / SplitOn - - Assert.Equal(wasFound, searchSpan.Contains(searchTerm, comparison)); - Assert.Equal(wasFound && searchSpan.Bytes[..actualForwardMatch.Start].IsEmpty, searchSpan.StartsWith(searchTerm, comparison)); - - (var before, var after) = searchSpan.SplitOn(searchTerm, comparison); - if (wasFound) - { - Assert.True(searchSpan.Bytes[..actualForwardMatch.Start] == before.Bytes); // check for referential equality - Assert.True(searchSpan.Bytes[actualForwardMatch.End..] == after.Bytes); // check for referential equality - } - else - { - Assert.True(searchSpan.Bytes == before.Bytes); // check for reference equality - Assert.True(after.IsNull()); - } - - // Now search backward - - wasFound = searchSpan.TryFindLast(searchTerm, comparison, out Range actualBackwardMatch); - Assert.Equal(expectedBackwardMatch.HasValue, wasFound); - - if (wasFound) - { - AssertRangesEqual(searchSpan.Length, expectedBackwardMatch.Value, actualBackwardMatch); - } - - // Also check EndsWith / SplitOnLast - - Assert.Equal(wasFound && searchSpan.Bytes[actualBackwardMatch.End..].IsEmpty, searchSpan.EndsWith(searchTerm, comparison)); - - (before, after) = searchSpan.SplitOnLast(searchTerm, comparison); - if (wasFound) + if (IsTryFindSupported(comparison)) { - Assert.True(searchSpan.Bytes[..actualBackwardMatch.Start] == before.Bytes); // check for referential equality - Assert.True(searchSpan.Bytes[actualBackwardMatch.End..] == after.Bytes); // check for referential equality + // First, search forward + + bool wasFound = searchSpan.TryFind(searchTerm, comparison, out Range actualForwardMatch); + Assert.Equal(expectedForwardMatch.HasValue, wasFound); + + if (wasFound) + { + AssertRangesEqual(searchSpan.Length, expectedForwardMatch.Value, actualForwardMatch); + } + + // Also check Contains / StartsWith / SplitOn + + Assert.Equal(wasFound, searchSpan.Contains(searchTerm, comparison)); + Assert.Equal(wasFound && searchSpan.Bytes[..actualForwardMatch.Start].IsEmpty, searchSpan.StartsWith(searchTerm, comparison)); + + (var before, var after) = searchSpan.SplitOn(searchTerm, comparison); + if (wasFound) + { + Assert.True(searchSpan.Bytes[..actualForwardMatch.Start] == before.Bytes); // check for referential equality + Assert.True(searchSpan.Bytes[actualForwardMatch.End..] == after.Bytes); // check for referential equality + } + else + { + Assert.True(searchSpan.Bytes == before.Bytes); // check for reference equality + Assert.True(after.IsNull()); + } + + // Now search backward + + wasFound = searchSpan.TryFindLast(searchTerm, comparison, out Range actualBackwardMatch); + Assert.Equal(expectedBackwardMatch.HasValue, wasFound); + + if (wasFound) + { + AssertRangesEqual(searchSpan.Length, expectedBackwardMatch.Value, actualBackwardMatch); + } + + // Also check EndsWith / SplitOnLast + + Assert.Equal(wasFound && searchSpan.Bytes[actualBackwardMatch.End..].IsEmpty, searchSpan.EndsWith(searchTerm, comparison)); + + (before, after) = searchSpan.SplitOnLast(searchTerm, comparison); + if (wasFound) + { + Assert.True(searchSpan.Bytes[..actualBackwardMatch.Start] == before.Bytes); // check for referential equality + Assert.True(searchSpan.Bytes[actualBackwardMatch.End..] == after.Bytes); // check for referential equality + } + else + { + Assert.True(searchSpan.Bytes == before.Bytes); // check for reference equality + Assert.True(after.IsNull()); + } } else { - Assert.True(searchSpan.Bytes == before.Bytes); // check for reference equality - Assert.True(after.IsNull()); + Assert.Throws(() => boundedSpan.Span.TryFind(searchTerm, comparison, out var _)); + Assert.Throws(() => boundedSpan.Span.TryFindLast(searchTerm, comparison, out var _)); + Assert.Throws(() => boundedSpan.Span.SplitOn(searchTerm, comparison)); + Assert.Throws(() => boundedSpan.Span.SplitOnLast(searchTerm, comparison)); + + Assert.Equal(expectedForwardMatch.HasValue, searchSpan.Contains(searchTerm, comparison)); + Assert.Equal(expectedForwardMatch.HasValue && searchSpan.Bytes[..expectedForwardMatch.Value.Start].IsEmpty, searchSpan.StartsWith(searchTerm, comparison)); + Assert.Equal(expectedBackwardMatch.HasValue && searchSpan.Bytes[expectedBackwardMatch.Value.End..].IsEmpty, searchSpan.EndsWith(searchTerm, comparison)); } }); } diff --git a/src/libraries/System.Utf8String.Experimental/tests/System/Utf8StringTests.Searching.cs b/src/libraries/System.Utf8String.Experimental/tests/System/Utf8StringTests.Searching.cs index 573ffd34f8149..245877de18b0d 100644 --- a/src/libraries/System.Utf8String.Experimental/tests/System/Utf8StringTests.Searching.cs +++ b/src/libraries/System.Utf8String.Experimental/tests/System/Utf8StringTests.Searching.cs @@ -104,57 +104,71 @@ public static void TryFind_Char_WithComparison(ustring source, char searchTerm, CultureInfo.CurrentCulture = currentCulture; } - // First, search forward - - bool wasFound = source.TryFind(searchTerm, comparison, out Range actualForwardMatch); - Assert.Equal(expectedForwardMatch.HasValue, wasFound); - - if (wasFound) - { - AssertRangesEqual(source.Length, expectedForwardMatch.Value, actualForwardMatch); - } - - // Also check Contains / StartsWith / SplitOn - - Assert.Equal(wasFound, source.Contains(searchTerm, comparison)); - Assert.Equal(wasFound && source[..actualForwardMatch.Start].Length == 0, source.StartsWith(searchTerm, comparison)); - - (var before, var after) = source.SplitOn(searchTerm, comparison); - if (wasFound) - { - Assert.Equal(source[..actualForwardMatch.Start], before); - Assert.Equal(source[actualForwardMatch.End..], after); - } - else - { - Assert.Same(source, before); // check for reference equality - Assert.Null(after); - } - - // Now search backward - - wasFound = source.TryFindLast(searchTerm, comparison, out Range actualBackwardMatch); - Assert.Equal(expectedBackwardMatch.HasValue, wasFound); - - if (wasFound) + if (IsTryFindSupported(comparison)) { - AssertRangesEqual(source.Length, expectedBackwardMatch.Value, actualBackwardMatch); - } - - // Also check EndsWith / SplitOnLast - - Assert.Equal(wasFound && source[actualBackwardMatch.End..].Length == 0, source.EndsWith(searchTerm, comparison)); - - (before, after) = source.SplitOnLast(searchTerm, comparison); - if (wasFound) - { - Assert.Equal(source[..actualBackwardMatch.Start], before); - Assert.Equal(source[actualBackwardMatch.End..], after); + // First, search forward + + bool wasFound = source.TryFind(searchTerm, comparison, out Range actualForwardMatch); + Assert.Equal(expectedForwardMatch.HasValue, wasFound); + + if (wasFound) + { + AssertRangesEqual(source.Length, expectedForwardMatch.Value, actualForwardMatch); + } + + // Also check Contains / StartsWith / SplitOn + + Assert.Equal(wasFound, source.Contains(searchTerm, comparison)); + Assert.Equal(wasFound && source[..actualForwardMatch.Start].Length == 0, source.StartsWith(searchTerm, comparison)); + + (var before, var after) = source.SplitOn(searchTerm, comparison); + if (wasFound) + { + Assert.Equal(source[..actualForwardMatch.Start], before); + Assert.Equal(source[actualForwardMatch.End..], after); + } + else + { + Assert.Same(source, before); // check for reference equality + Assert.Null(after); + } + + // Now search backward + + wasFound = source.TryFindLast(searchTerm, comparison, out Range actualBackwardMatch); + Assert.Equal(expectedBackwardMatch.HasValue, wasFound); + + if (wasFound) + { + AssertRangesEqual(source.Length, expectedBackwardMatch.Value, actualBackwardMatch); + } + + // Also check EndsWith / SplitOnLast + + Assert.Equal(wasFound && source[actualBackwardMatch.End..].Length == 0, source.EndsWith(searchTerm, comparison)); + + (before, after) = source.SplitOnLast(searchTerm, comparison); + if (wasFound) + { + Assert.Equal(source[..actualBackwardMatch.Start], before); + Assert.Equal(source[actualBackwardMatch.End..], after); + } + else + { + Assert.Same(source, before); // check for reference equality + Assert.Null(after); + } } else { - Assert.Same(source, before); // check for reference equality - Assert.Null(after); + Assert.Throws(() => source.TryFind(searchTerm, comparison, out var _)); + Assert.Throws(() => source.TryFindLast(searchTerm, comparison, out var _)); + Assert.Throws(() => source.SplitOn(searchTerm, comparison)); + Assert.Throws(() => source.SplitOnLast(searchTerm, comparison)); + + Assert.Equal(expectedForwardMatch.HasValue, source.Contains(searchTerm, comparison)); + Assert.Equal(expectedForwardMatch.HasValue && source[..expectedForwardMatch.Value.Start].Length == 0, source.StartsWith(searchTerm, comparison)); + Assert.Equal(expectedBackwardMatch.HasValue && source[expectedBackwardMatch.Value.End..].Length == 0, source.EndsWith(searchTerm, comparison)); } }); } @@ -243,57 +257,71 @@ public static void TryFind_Rune_WithComparison(ustring source, Rune searchTerm, CultureInfo.CurrentCulture = currentCulture; } - // First, search forward - - bool wasFound = source.TryFind(searchTerm, comparison, out Range actualForwardMatch); - Assert.Equal(expectedForwardMatch.HasValue, wasFound); - - if (wasFound) - { - AssertRangesEqual(source.Length, expectedForwardMatch.Value, actualForwardMatch); - } - - // Also check Contains / StartsWith / SplitOn - - Assert.Equal(wasFound, source.Contains(searchTerm, comparison)); - Assert.Equal(wasFound && source[..actualForwardMatch.Start].Length == 0, source.StartsWith(searchTerm, comparison)); - - (var before, var after) = source.SplitOn(searchTerm, comparison); - if (wasFound) - { - Assert.Equal(source[..actualForwardMatch.Start], before); - Assert.Equal(source[actualForwardMatch.End..], after); - } - else - { - Assert.Same(source, before); // check for reference equality - Assert.Null(after); - } - - // Now search backward - - wasFound = source.TryFindLast(searchTerm, comparison, out Range actualBackwardMatch); - Assert.Equal(expectedBackwardMatch.HasValue, wasFound); - - if (wasFound) + if (IsTryFindSupported(comparison)) { - AssertRangesEqual(source.Length, expectedBackwardMatch.Value, actualBackwardMatch); - } - - // Also check EndsWith / SplitOnLast - - Assert.Equal(wasFound && source[actualBackwardMatch.End..].Length == 0, source.EndsWith(searchTerm, comparison)); - - (before, after) = source.SplitOnLast(searchTerm, comparison); - if (wasFound) - { - Assert.Equal(source[..actualBackwardMatch.Start], before); - Assert.Equal(source[actualBackwardMatch.End..], after); + // First, search forward + + bool wasFound = source.TryFind(searchTerm, comparison, out Range actualForwardMatch); + Assert.Equal(expectedForwardMatch.HasValue, wasFound); + + if (wasFound) + { + AssertRangesEqual(source.Length, expectedForwardMatch.Value, actualForwardMatch); + } + + // Also check Contains / StartsWith / SplitOn + + Assert.Equal(wasFound, source.Contains(searchTerm, comparison)); + Assert.Equal(wasFound && source[..actualForwardMatch.Start].Length == 0, source.StartsWith(searchTerm, comparison)); + + (var before, var after) = source.SplitOn(searchTerm, comparison); + if (wasFound) + { + Assert.Equal(source[..actualForwardMatch.Start], before); + Assert.Equal(source[actualForwardMatch.End..], after); + } + else + { + Assert.Same(source, before); // check for reference equality + Assert.Null(after); + } + + // Now search backward + + wasFound = source.TryFindLast(searchTerm, comparison, out Range actualBackwardMatch); + Assert.Equal(expectedBackwardMatch.HasValue, wasFound); + + if (wasFound) + { + AssertRangesEqual(source.Length, expectedBackwardMatch.Value, actualBackwardMatch); + } + + // Also check EndsWith / SplitOnLast + + Assert.Equal(wasFound && source[actualBackwardMatch.End..].Length == 0, source.EndsWith(searchTerm, comparison)); + + (before, after) = source.SplitOnLast(searchTerm, comparison); + if (wasFound) + { + Assert.Equal(source[..actualBackwardMatch.Start], before); + Assert.Equal(source[actualBackwardMatch.End..], after); + } + else + { + Assert.Same(source, before); // check for reference equality + Assert.Null(after); + } } else { - Assert.Same(source, before); // check for reference equality - Assert.Null(after); + Assert.Throws(() =>source.TryFind(searchTerm, comparison, out var _)); + Assert.Throws(() =>source.TryFindLast(searchTerm, comparison, out var _)); + Assert.Throws(() =>source.SplitOn(searchTerm, comparison)); + Assert.Throws(() =>source.SplitOnLast(searchTerm, comparison)); + + Assert.Equal(expectedForwardMatch.HasValue, source.Contains(searchTerm, comparison)); + Assert.Equal(expectedForwardMatch.HasValue && source[..expectedForwardMatch.Value.Start].Length == 0, source.StartsWith(searchTerm, comparison)); + Assert.Equal(expectedBackwardMatch.HasValue && source[expectedBackwardMatch.Value.End..].Length == 0, source.EndsWith(searchTerm, comparison)); } }); } @@ -382,57 +410,71 @@ public static void TryFind_Utf8String_WithComparison(ustring source, ustring sea CultureInfo.CurrentCulture = currentCulture; } - // First, search forward - - bool wasFound = source.TryFind(searchTerm, comparison, out Range actualForwardMatch); - Assert.Equal(expectedForwardMatch.HasValue, wasFound); - - if (wasFound) - { - AssertRangesEqual(source.Length, expectedForwardMatch.Value, actualForwardMatch); - } - - // Also check Contains / StartsWith / SplitOn - - Assert.Equal(wasFound, source.Contains(searchTerm, comparison)); - Assert.Equal(wasFound && source[..actualForwardMatch.Start].Length == 0, source.StartsWith(searchTerm, comparison)); - - (var before, var after) = source.SplitOn(searchTerm, comparison); - if (wasFound) - { - Assert.Equal(source[..actualForwardMatch.Start], before); - Assert.Equal(source[actualForwardMatch.End..], after); - } - else - { - Assert.Same(source, before); // check for reference equality - Assert.Null(after); - } - - // Now search backward - - wasFound = source.TryFindLast(searchTerm, comparison, out Range actualBackwardMatch); - Assert.Equal(expectedBackwardMatch.HasValue, wasFound); - - if (wasFound) - { - AssertRangesEqual(source.Length, expectedBackwardMatch.Value, actualBackwardMatch); - } - - // Also check EndsWith / SplitOnLast - - Assert.Equal(wasFound && source[actualBackwardMatch.End..].Length == 0, source.EndsWith(searchTerm, comparison)); - - (before, after) = source.SplitOnLast(searchTerm, comparison); - if (wasFound) + if (IsTryFindSupported(comparison)) { - Assert.Equal(source[..actualBackwardMatch.Start], before); - Assert.Equal(source[actualBackwardMatch.End..], after); + // First, search forward + + bool wasFound = source.TryFind(searchTerm, comparison, out Range actualForwardMatch); + Assert.Equal(expectedForwardMatch.HasValue, wasFound); + + if (wasFound) + { + AssertRangesEqual(source.Length, expectedForwardMatch.Value, actualForwardMatch); + } + + // Also check Contains / StartsWith / SplitOn + + Assert.Equal(wasFound, source.Contains(searchTerm, comparison)); + Assert.Equal(wasFound && source[..actualForwardMatch.Start].Length == 0, source.StartsWith(searchTerm, comparison)); + + (var before, var after) = source.SplitOn(searchTerm, comparison); + if (wasFound) + { + Assert.Equal(source[..actualForwardMatch.Start], before); + Assert.Equal(source[actualForwardMatch.End..], after); + } + else + { + Assert.Same(source, before); // check for reference equality + Assert.Null(after); + } + + // Now search backward + + wasFound = source.TryFindLast(searchTerm, comparison, out Range actualBackwardMatch); + Assert.Equal(expectedBackwardMatch.HasValue, wasFound); + + if (wasFound) + { + AssertRangesEqual(source.Length, expectedBackwardMatch.Value, actualBackwardMatch); + } + + // Also check EndsWith / SplitOnLast + + Assert.Equal(wasFound && source[actualBackwardMatch.End..].Length == 0, source.EndsWith(searchTerm, comparison)); + + (before, after) = source.SplitOnLast(searchTerm, comparison); + if (wasFound) + { + Assert.Equal(source[..actualBackwardMatch.Start], before); + Assert.Equal(source[actualBackwardMatch.End..], after); + } + else + { + Assert.Same(source, before); // check for reference equality + Assert.Null(after); + } } else { - Assert.Same(source, before); // check for reference equality - Assert.Null(after); + Assert.Throws(() => source.TryFind(searchTerm, comparison, out var _)); + Assert.Throws(() => source.TryFindLast(searchTerm, comparison, out var _)); + Assert.Throws(() => source.SplitOn(searchTerm, comparison)); + Assert.Throws(() => source.SplitOnLast(searchTerm, comparison)); + + Assert.Equal(expectedForwardMatch.HasValue, source.Contains(searchTerm, comparison)); + Assert.Equal(expectedForwardMatch.HasValue && source[..expectedForwardMatch.Value.Start].Length == 0, source.StartsWith(searchTerm, comparison)); + Assert.Equal(expectedBackwardMatch.HasValue && source[expectedBackwardMatch.Value.End..].Length == 0, source.EndsWith(searchTerm, comparison)); } }); } diff --git a/src/libraries/System.Utf8String.Experimental/tests/System/Utf8TestUtilities.cs b/src/libraries/System.Utf8String.Experimental/tests/System/Utf8TestUtilities.cs index 1cbbf25a06ea0..b1a5ba73db81b 100644 --- a/src/libraries/System.Utf8String.Experimental/tests/System/Utf8TestUtilities.cs +++ b/src/libraries/System.Utf8String.Experimental/tests/System/Utf8TestUtilities.cs @@ -200,5 +200,10 @@ public static bool IsEmpty(this Range range, int length) (_, int actualLength) = range.GetOffsetAndLength(length); return (actualLength == 0); } + + public static bool IsTryFindSupported(StringComparison comparison) => + !PlatformDetection.IsNetFramework || + comparison == StringComparison.Ordinal || + comparison == StringComparison.OrdinalIgnoreCase; } } From 735317d53100671949359806288d5e241f4f9e2e Mon Sep 17 00:00:00 2001 From: Eric Erhardt Date: Tue, 10 Mar 2020 21:34:48 -0500 Subject: [PATCH 23/26] Make Intrinsics IsSupported be const fields. --- .../src/System/Range.cs | 1 - .../src/System/Text/Rune.cs | 1 - .../src/System/Text/Unicode/Utf8Utility.cs | 2 +- .../src/System/Utf8String.Construction.cs | 1 - .../src/System.Utf8String.Experimental.csproj | 4 +++- .../Runtime/Intrinsics/Intrinsics.Shims.cs | 20 +++++++++---------- 6 files changed, 14 insertions(+), 15 deletions(-) diff --git a/src/libraries/System.Private.CoreLib/src/System/Range.cs b/src/libraries/System.Private.CoreLib/src/System/Range.cs index cdf937092fa52..f2c5493818e65 100644 --- a/src/libraries/System.Private.CoreLib/src/System/Range.cs +++ b/src/libraries/System.Private.CoreLib/src/System/Range.cs @@ -89,7 +89,6 @@ public override string ToString() #else return Start.ToString() + ".." + End.ToString(); #endif - } /// Create a Range object starting from start index to the end of the collection. diff --git a/src/libraries/System.Private.CoreLib/src/System/Text/Rune.cs b/src/libraries/System.Private.CoreLib/src/System/Text/Rune.cs index ffb324ecefff9..3a703d661ff78 100644 --- a/src/libraries/System.Private.CoreLib/src/System/Text/Rune.cs +++ b/src/libraries/System.Private.CoreLib/src/System/Text/Rune.cs @@ -881,7 +881,6 @@ public override string ToString() #else if (IsBmp) { - return ((char)_value).ToString(); } else diff --git a/src/libraries/System.Private.CoreLib/src/System/Text/Unicode/Utf8Utility.cs b/src/libraries/System.Private.CoreLib/src/System/Text/Unicode/Utf8Utility.cs index 7d489a5a0ad88..78de994a2091d 100644 --- a/src/libraries/System.Private.CoreLib/src/System/Text/Unicode/Utf8Utility.cs +++ b/src/libraries/System.Private.CoreLib/src/System/Text/Unicode/Utf8Utility.cs @@ -147,5 +147,5 @@ public static Utf8String ValidateAndFixupUtf8String(Utf8String value) return Utf8String.UnsafeCreateWithoutValidation(memStreamBuffer); } #endif // FEATURE_UTF8STRING - } + } } diff --git a/src/libraries/System.Private.CoreLib/src/System/Utf8String.Construction.cs b/src/libraries/System.Private.CoreLib/src/System/Utf8String.Construction.cs index c6bc878abd909..03a3d466b7dfa 100644 --- a/src/libraries/System.Private.CoreLib/src/System/Utf8String.Construction.cs +++ b/src/libraries/System.Private.CoreLib/src/System/Utf8String.Construction.cs @@ -525,7 +525,6 @@ public static Utf8String UnsafeCreateWithoutValidation(ReadOnlySpan utf8Co return newString; } - /* * HELPER METHODS */ diff --git a/src/libraries/System.Utf8String.Experimental/src/System.Utf8String.Experimental.csproj b/src/libraries/System.Utf8String.Experimental/src/System.Utf8String.Experimental.csproj index 73f1738a1a75c..dd5adc8a69195 100644 --- a/src/libraries/System.Utf8String.Experimental/src/System.Utf8String.Experimental.csproj +++ b/src/libraries/System.Utf8String.Experimental/src/System.Utf8String.Experimental.csproj @@ -1,7 +1,9 @@  true - $(NoWarn);3019 + + + $(NoWarn);CS3019;CS0162 true netstandard2.0;netstandard2.1;netcoreapp3.0;$(NetCoreAppCurrent)-Windows_NT;$(NetCoreAppCurrent)-Unix enable diff --git a/src/libraries/System.Utf8String.Experimental/src/System/Runtime/Intrinsics/Intrinsics.Shims.cs b/src/libraries/System.Utf8String.Experimental/src/System/Runtime/Intrinsics/Intrinsics.Shims.cs index 97a73737ec7b7..12fba80e1a98f 100644 --- a/src/libraries/System.Utf8String.Experimental/src/System/Runtime/Intrinsics/Intrinsics.Shims.cs +++ b/src/libraries/System.Utf8String.Experimental/src/System/Runtime/Intrinsics/Intrinsics.Shims.cs @@ -30,30 +30,30 @@ internal abstract class Bmi1 { public abstract class X64 { - public static bool IsSupported { get; } = false; + public const bool IsSupported = false; public static ulong TrailingZeroCount(ulong value) => throw new PlatformNotSupportedException(); } - public static bool IsSupported { get; } = false; + public const bool IsSupported = false; public static uint TrailingZeroCount(uint value) => throw new PlatformNotSupportedException(); } internal abstract class Lzcnt { public abstract class X64 { - public static bool IsSupported { get; } = false; + public const bool IsSupported = false; public static ulong LeadingZeroCount(ulong value) => throw new PlatformNotSupportedException(); } - public static bool IsSupported { get; } = false; + public const bool IsSupported = false; public static uint LeadingZeroCount(uint value) => throw new PlatformNotSupportedException(); } internal abstract class Popcnt { public abstract class X64 { - public static bool IsSupported { get; } = false; + public const bool IsSupported = false; public static ulong PopCount(ulong value) => throw new PlatformNotSupportedException(); } - public static bool IsSupported { get; } = false; + public const bool IsSupported = false; public static uint PopCount(uint value) => throw new PlatformNotSupportedException(); } @@ -61,11 +61,11 @@ internal abstract class Sse2 { public abstract class X64 { - public static bool IsSupported { get; } = false; + public const bool IsSupported = false; public static Vector128 ConvertScalarToVector128UInt64(ulong value) => throw new PlatformNotSupportedException(); public static ulong ConvertToUInt64(Vector128 value) => throw new PlatformNotSupportedException(); } - public static bool IsSupported { get; } = false; + public const bool IsSupported = false; public static Vector128 Add(Vector128 left, Vector128 right) => throw new PlatformNotSupportedException(); public static Vector128 AddSaturate(Vector128 left, Vector128 right) => throw new PlatformNotSupportedException(); public static Vector128 AndNot(Vector128 left, Vector128 right) => throw new PlatformNotSupportedException(); @@ -95,9 +95,9 @@ internal abstract class Sse41 { public abstract class X64 { - public static bool IsSupported { get; } = false; + public const bool IsSupported = false; } - public static bool IsSupported { get; } = false; + public const bool IsSupported = false; public static Vector128 Min(Vector128 left, Vector128 right) => throw new PlatformNotSupportedException(); public static bool TestZ(Vector128 left, Vector128 right) => throw new PlatformNotSupportedException(); public static bool TestZ(Vector128 left, Vector128 right) => throw new PlatformNotSupportedException(); From 6cef31b488dd9580715d57ef61bdd41b5360ad6a Mon Sep 17 00:00:00 2001 From: Eric Erhardt Date: Fri, 13 Mar 2020 15:19:17 -0500 Subject: [PATCH 24/26] Address PR feedback --- .../src/System/Text/Utf8StringComparer.cs | 5 +++++ .../src/System/Utf8String.Construction.cs | 6 +++--- .../src/System/Net/Http/Utf8StringContent.cs | 14 +++++-------- .../src/System/Utf8String.Portable.cs | 11 +++++++++- ...ystem.Utf8String.Experimental.Tests.csproj | 1 - .../tests/System/ReflectionTests.netfx.cs | 21 ------------------- .../tests/System/Utf8StringTests.Ctor.cs | 2 +- 7 files changed, 24 insertions(+), 36 deletions(-) delete mode 100644 src/libraries/System.Utf8String.Experimental/tests/System/ReflectionTests.netfx.cs diff --git a/src/libraries/System.Private.CoreLib/src/System/Text/Utf8StringComparer.cs b/src/libraries/System.Private.CoreLib/src/System/Text/Utf8StringComparer.cs index 7b477053fa506..0911fd77725a5 100644 --- a/src/libraries/System.Private.CoreLib/src/System/Text/Utf8StringComparer.cs +++ b/src/libraries/System.Private.CoreLib/src/System/Text/Utf8StringComparer.cs @@ -57,8 +57,13 @@ public static Utf8StringComparer FromComparison(StringComparison comparisonType) private sealed class CultureAwareComparer : Utf8StringComparer { +#if SYSTEM_PRIVATE_CORELIB + internal static readonly CultureAwareComparer Invariant = new CultureAwareComparer(CompareInfo.Invariant, CompareOptions.None); + internal static readonly CultureAwareComparer InvariantIgnoreCase = new CultureAwareComparer(CompareInfo.Invariant, CompareOptions.IgnoreCase); +#else internal static readonly CultureAwareComparer Invariant = new CultureAwareComparer(CultureInfo.InvariantCulture.CompareInfo, CompareOptions.None); internal static readonly CultureAwareComparer InvariantIgnoreCase = new CultureAwareComparer(CultureInfo.InvariantCulture.CompareInfo, CompareOptions.IgnoreCase); +#endif private readonly CompareInfo _compareInfo; private readonly CompareOptions _options; diff --git a/src/libraries/System.Private.CoreLib/src/System/Utf8String.Construction.cs b/src/libraries/System.Private.CoreLib/src/System/Utf8String.Construction.cs index 03a3d466b7dfa..ad52c4279b85b 100644 --- a/src/libraries/System.Private.CoreLib/src/System/Utf8String.Construction.cs +++ b/src/libraries/System.Private.CoreLib/src/System/Utf8String.Construction.cs @@ -327,9 +327,9 @@ internal static Utf8String CreateFromRune(Rune value) // routine. This normalizes the OutOfMemoryException the caller sees. long totalUtf8BytesRequired = (uint)value.Length + utf8CodeUnitCountAdjustment; - if (totalUtf8BytesRequired > int.MaxValue) + if (totalUtf8BytesRequired >= int.MaxValue) { - totalUtf8BytesRequired = int.MaxValue; + totalUtf8BytesRequired = int.MaxValue - 1; } // We can get away with FastAllocateSkipZeroInit here because we're not going to return the @@ -341,7 +341,7 @@ internal static Utf8String CreateFromRune(Rune value) // "skip validation" transcoder because the caller could've mutated the input buffer between the // initial counting step and the transcoding step below. - status = Utf8.FromUtf16(value, newBuffer, out _, out int bytesWritten, replaceInvalidSequences: false); + status = Utf8.FromUtf16(value, newBuffer.AsSpan(0, newBuffer.Length - 1), out _, out int bytesWritten, replaceInvalidSequences: false); if (status != OperationStatus.Done || bytesWritten != newBuffer.Length) { // Did somebody mutate our input buffer? Shouldn't be any other way this could happen. diff --git a/src/libraries/System.Utf8String.Experimental/src/System/Net/Http/Utf8StringContent.cs b/src/libraries/System.Utf8String.Experimental/src/System/Net/Http/Utf8StringContent.cs index a248e95d9488b..3db8f03c8fc18 100644 --- a/src/libraries/System.Utf8String.Experimental/src/System/Net/Http/Utf8StringContent.cs +++ b/src/libraries/System.Utf8String.Experimental/src/System/Net/Http/Utf8StringContent.cs @@ -53,15 +53,11 @@ protected async override Task SerializeToStreamAsync(Stream stream, TransportCon else { byte[] localBuffer = ArrayPool.Shared.Rent(buffer.Length); - try - { - buffer.Span.CopyTo(localBuffer); - await stream.WriteAsync(localBuffer, 0, buffer.Length).ConfigureAwait(false); - } - finally - { - ArrayPool.Shared.Return(localBuffer); - } + buffer.Span.CopyTo(localBuffer); + + await stream.WriteAsync(localBuffer, 0, buffer.Length).ConfigureAwait(false); + + ArrayPool.Shared.Return(localBuffer); } } #elif NETSTANDARD2_1 || NETCOREAPP3_0 diff --git a/src/libraries/System.Utf8String.Experimental/src/System/Utf8String.Portable.cs b/src/libraries/System.Utf8String.Experimental/src/System/Utf8String.Portable.cs index a870b8102bbcb..edbe70c2011a7 100644 --- a/src/libraries/System.Utf8String.Experimental/src/System/Utf8String.Portable.cs +++ b/src/libraries/System.Utf8String.Experimental/src/System/Utf8String.Portable.cs @@ -18,7 +18,7 @@ public sealed partial class Utf8String /// /// Returns the length (in UTF-8 code units, or s) of this instance. /// - public int Length => _bytes.Length == 0 ? 0 : _bytes.Length - 1; // -1 because the bytes are always null-terminated + public int Length => _bytes.Length - 1; // -1 because the bytes are always null-terminated public Utf8String(ReadOnlySpan value) { @@ -192,6 +192,15 @@ private static Utf8String FastAllocate(int length) private static byte[] AllocateBuffer(int length) { + Debug.Assert(length > 0); + + if (length == int.MaxValue) + { + // Ensure we don't overflow below. The VM will throw an OutOfMemoryException + // if we try to create a byte[] this large anyway. + length = int.MaxValue - 1; + } + // Actual storage allocated is "length + 1" bytes because instances are null-terminated. return new byte[length + 1]; } diff --git a/src/libraries/System.Utf8String.Experimental/tests/System.Utf8String.Experimental.Tests.csproj b/src/libraries/System.Utf8String.Experimental/tests/System.Utf8String.Experimental.Tests.csproj index 3ecdaeeda2d3e..e571bbc5877b6 100644 --- a/src/libraries/System.Utf8String.Experimental/tests/System.Utf8String.Experimental.Tests.csproj +++ b/src/libraries/System.Utf8String.Experimental/tests/System.Utf8String.Experimental.Tests.csproj @@ -46,6 +46,5 @@ - \ No newline at end of file diff --git a/src/libraries/System.Utf8String.Experimental/tests/System/ReflectionTests.netfx.cs b/src/libraries/System.Utf8String.Experimental/tests/System/ReflectionTests.netfx.cs deleted file mode 100644 index 7289065493e4e..0000000000000 --- a/src/libraries/System.Utf8String.Experimental/tests/System/ReflectionTests.netfx.cs +++ /dev/null @@ -1,21 +0,0 @@ -// Licensed to the .NET Foundation under one or more agreements. -// The .NET Foundation licenses this file to you under the MIT license. -// See the LICENSE file in the project root for more information. - -using System.Runtime.Serialization; -using Xunit; - -namespace System.Tests -{ - public partial class ReflectionTests - { - [Fact] - public static void FormatterServices_GetUninitializedObject_DoesntThrow() - { - // when OOB, we are unable to prevent FormatterServices from creating an uninitialized Utf8String - - Assert.NotNull(FormatterServices.GetSafeUninitializedObject(typeof(Utf8String))); - Assert.NotNull(FormatterServices.GetUninitializedObject(typeof(Utf8String))); - } - } -} diff --git a/src/libraries/System.Utf8String.Experimental/tests/System/Utf8StringTests.Ctor.cs b/src/libraries/System.Utf8String.Experimental/tests/System/Utf8StringTests.Ctor.cs index a0142b60dba73..53e73e7ab4f41 100644 --- a/src/libraries/System.Utf8String.Experimental/tests/System/Utf8StringTests.Ctor.cs +++ b/src/libraries/System.Utf8String.Experimental/tests/System/Utf8StringTests.Ctor.cs @@ -331,7 +331,7 @@ private static void AssertSameAsEmpty(Utf8String value) ref Unsafe.AsRef(in Utf8String.Empty.GetPinnableReference()), ref Unsafe.AsRef(in value.GetPinnableReference()))); #else - Assert.Same(Utf8String.Empty, new Utf8String(ReadOnlySpan.Empty)); + Assert.Same(Utf8String.Empty, value); #endif } } From d2d54419b6e4ada0ca54bd576beab4f55277c8e6 Mon Sep 17 00:00:00 2001 From: Eric Erhardt Date: Fri, 13 Mar 2020 16:29:20 -0500 Subject: [PATCH 25/26] Ensure nint and nuint are set correctly when building Utf8String outside of CoreLib. --- .../src/System/Text/ASCIIUtility.cs | 9 +++++++-- .../src/System/Text/Unicode/Utf16Utility.Validation.cs | 9 +++++++-- .../src/System/Text/Unicode/Utf8Utility.Transcoding.cs | 9 +++++++-- .../src/System/Text/Unicode/Utf8Utility.Validation.cs | 9 +++++++-- .../System.Private.CoreLib/src/System/Text/Utf8Span.cs | 2 +- .../src/System/Utf8String.Enumeration.cs | 9 --------- .../System.Private.CoreLib/src/System/Utf8String.cs | 5 +++++ 7 files changed, 34 insertions(+), 18 deletions(-) diff --git a/src/libraries/System.Private.CoreLib/src/System/Text/ASCIIUtility.cs b/src/libraries/System.Private.CoreLib/src/System/Text/ASCIIUtility.cs index 4f70613a3eef0..c0bdbf03f7059 100644 --- a/src/libraries/System.Private.CoreLib/src/System/Text/ASCIIUtility.cs +++ b/src/libraries/System.Private.CoreLib/src/System/Text/ASCIIUtility.cs @@ -13,6 +13,7 @@ #endif #pragma warning disable SA1121 // explicitly using type aliases instead of built-in types +#if SYSTEM_PRIVATE_CORELIB #if TARGET_64BIT using nint = System.Int64; using nuint = System.UInt64; @@ -20,18 +21,22 @@ using nint = System.Int32; using nuint = System.UInt32; #endif // TARGET_64BIT +#else +using nint = System.Int64; // https://github.com/dotnet/runtime/issues/33575 - use long/ulong outside of corelib until the compiler supports it +using nuint = System.UInt64; +#endif namespace System.Text { internal static partial class ASCIIUtility { -#if DEBUG +#if DEBUG && SYSTEM_PRIVATE_CORELIB static ASCIIUtility() { Debug.Assert(sizeof(nint) == IntPtr.Size && nint.MinValue < 0, "nint is defined incorrectly."); Debug.Assert(sizeof(nuint) == IntPtr.Size && nuint.MinValue == 0, "nuint is defined incorrectly."); } -#endif // DEBUG +#endif // DEBUG && SYSTEM_PRIVATE_CORELIB [MethodImpl(MethodImplOptions.AggressiveInlining)] private static bool AllBytesInUInt64AreAscii(ulong value) diff --git a/src/libraries/System.Private.CoreLib/src/System/Text/Unicode/Utf16Utility.Validation.cs b/src/libraries/System.Private.CoreLib/src/System/Text/Unicode/Utf16Utility.Validation.cs index caffdbf92e003..8d23f74cfe89b 100644 --- a/src/libraries/System.Private.CoreLib/src/System/Text/Unicode/Utf16Utility.Validation.cs +++ b/src/libraries/System.Private.CoreLib/src/System/Text/Unicode/Utf16Utility.Validation.cs @@ -13,6 +13,7 @@ #endif #pragma warning disable SA1121 // explicitly using type aliases instead of built-in types +#if SYSTEM_PRIVATE_CORELIB #if TARGET_64BIT using nint = System.Int64; using nuint = System.UInt64; @@ -20,18 +21,22 @@ using nint = System.Int32; using nuint = System.UInt32; #endif // TARGET_64BIT +#else +using nint = System.Int64; // https://github.com/dotnet/runtime/issues/33575 - use long/ulong outside of corelib until the compiler supports it +using nuint = System.UInt64; +#endif namespace System.Text.Unicode { internal static unsafe partial class Utf16Utility { -#if DEBUG +#if DEBUG && SYSTEM_PRIVATE_CORELIB static Utf16Utility() { Debug.Assert(sizeof(nint) == IntPtr.Size && nint.MinValue < 0, "nint is defined incorrectly."); Debug.Assert(sizeof(nuint) == IntPtr.Size && nuint.MinValue == 0, "nuint is defined incorrectly."); } -#endif // DEBUG +#endif // DEBUG && SYSTEM_PRIVATE_CORELIB // Returns &inputBuffer[inputLength] if the input buffer is valid. /// diff --git a/src/libraries/System.Private.CoreLib/src/System/Text/Unicode/Utf8Utility.Transcoding.cs b/src/libraries/System.Private.CoreLib/src/System/Text/Unicode/Utf8Utility.Transcoding.cs index bf6f31f1b4f72..a1288394d7045 100644 --- a/src/libraries/System.Private.CoreLib/src/System/Text/Unicode/Utf8Utility.Transcoding.cs +++ b/src/libraries/System.Private.CoreLib/src/System/Text/Unicode/Utf8Utility.Transcoding.cs @@ -15,6 +15,7 @@ #endif #pragma warning disable SA1121 // explicitly using type aliases instead of built-in types +#if SYSTEM_PRIVATE_CORELIB #if TARGET_64BIT using nint = System.Int64; using nuint = System.UInt64; @@ -22,12 +23,16 @@ using nint = System.Int32; using nuint = System.UInt32; #endif // TARGET_64BIT +#else +using nint = System.Int64; // https://github.com/dotnet/runtime/issues/33575 - use long/ulong outside of corelib until the compiler supports it +using nuint = System.UInt64; +#endif namespace System.Text.Unicode { internal static unsafe partial class Utf8Utility { -#if DEBUG +#if DEBUG && SYSTEM_PRIVATE_CORELIB static Utf8Utility() { Debug.Assert(sizeof(nint) == IntPtr.Size && nint.MinValue < 0, "nint is defined incorrectly."); @@ -35,7 +40,7 @@ static Utf8Utility() _ValidateAdditionalNIntDefinitions(); } -#endif // DEBUG +#endif // DEBUG && SYSTEM_PRIVATE_CORELIB // On method return, pInputBufferRemaining and pOutputBufferRemaining will both point to where // the next byte would have been consumed from / the next char would have been written to. diff --git a/src/libraries/System.Private.CoreLib/src/System/Text/Unicode/Utf8Utility.Validation.cs b/src/libraries/System.Private.CoreLib/src/System/Text/Unicode/Utf8Utility.Validation.cs index d218f4483e7f4..137b6775c8944 100644 --- a/src/libraries/System.Private.CoreLib/src/System/Text/Unicode/Utf8Utility.Validation.cs +++ b/src/libraries/System.Private.CoreLib/src/System/Text/Unicode/Utf8Utility.Validation.cs @@ -12,6 +12,7 @@ #endif #pragma warning disable SA1121 // explicitly using type aliases instead of built-in types +#if SYSTEM_PRIVATE_CORELIB #if TARGET_64BIT using nint = System.Int64; using nuint = System.UInt64; @@ -19,18 +20,22 @@ using nint = System.Int32; using nuint = System.UInt32; #endif // TARGET_64BIT +#else +using nint = System.Int64; // https://github.com/dotnet/runtime/issues/33575 - use long/ulong outside of corelib until the compiler supports it +using nuint = System.UInt64; +#endif namespace System.Text.Unicode { internal static unsafe partial class Utf8Utility { -#if DEBUG +#if DEBUG && SYSTEM_PRIVATE_CORELIB private static void _ValidateAdditionalNIntDefinitions() { Debug.Assert(sizeof(nint) == IntPtr.Size && nint.MinValue < 0, "nint is defined incorrectly."); Debug.Assert(sizeof(nuint) == IntPtr.Size && nuint.MinValue == 0, "nuint is defined incorrectly."); } -#endif // DEBUG +#endif // DEBUG && SYSTEM_PRIVATE_CORELIB // Returns &inputBuffer[inputLength] if the input buffer is valid. /// diff --git a/src/libraries/System.Private.CoreLib/src/System/Text/Utf8Span.cs b/src/libraries/System.Private.CoreLib/src/System/Text/Utf8Span.cs index 117e7ef479e28..eb6cdb6527d41 100644 --- a/src/libraries/System.Private.CoreLib/src/System/Text/Utf8Span.cs +++ b/src/libraries/System.Private.CoreLib/src/System/Text/Utf8Span.cs @@ -25,7 +25,7 @@ using nuint = System.UInt32; #endif #else -using nint = System.Int64; +using nint = System.Int64; // https://github.com/dotnet/runtime/issues/33575 - use long/ulong outside of corelib until the compiler supports it using nuint = System.UInt64; #endif diff --git a/src/libraries/System.Private.CoreLib/src/System/Utf8String.Enumeration.cs b/src/libraries/System.Private.CoreLib/src/System/Utf8String.Enumeration.cs index 2ae5ced65f567..9b2ea648539b2 100644 --- a/src/libraries/System.Private.CoreLib/src/System/Utf8String.Enumeration.cs +++ b/src/libraries/System.Private.CoreLib/src/System/Utf8String.Enumeration.cs @@ -8,15 +8,6 @@ using System.Diagnostics; using System.Text; -#pragma warning disable SA1121 // explicitly using type aliases instead of built-in types -#if TARGET_64BIT -using nint = System.Int64; -using nuint = System.UInt64; -#else -using nint = System.Int32; -using nuint = System.UInt32; -#endif - namespace System { public sealed partial class Utf8String diff --git a/src/libraries/System.Private.CoreLib/src/System/Utf8String.cs b/src/libraries/System.Private.CoreLib/src/System/Utf8String.cs index 10c0dbe1ef41a..e6cba4d2e4c2b 100644 --- a/src/libraries/System.Private.CoreLib/src/System/Utf8String.cs +++ b/src/libraries/System.Private.CoreLib/src/System/Utf8String.cs @@ -14,6 +14,7 @@ #endif #pragma warning disable SA1121 // explicitly using type aliases instead of built-in types +#if SYSTEM_PRIVATE_CORELIB #if TARGET_64BIT using nint = System.Int64; using nuint = System.UInt64; @@ -21,6 +22,10 @@ using nint = System.Int32; using nuint = System.UInt32; #endif +#else +using nint = System.Int64; // https://github.com/dotnet/runtime/issues/33575 - use long/ulong outside of corelib until the compiler supports it +using nuint = System.UInt64; +#endif namespace System { From 9e682d109174dc6c1c196483ed4a85edbb4843b4 Mon Sep 17 00:00:00 2001 From: Eric Erhardt Date: Fri, 13 Mar 2020 23:34:22 -0500 Subject: [PATCH 26/26] Fix bug in construction from long UTF16 string. --- .../src/System/Utf8String.Construction.cs | 2 +- .../tests/System/Utf8StringTests.Ctor.cs | 7 +++++++ 2 files changed, 8 insertions(+), 1 deletion(-) diff --git a/src/libraries/System.Private.CoreLib/src/System/Utf8String.Construction.cs b/src/libraries/System.Private.CoreLib/src/System/Utf8String.Construction.cs index ad52c4279b85b..5ef0f5cfbafa9 100644 --- a/src/libraries/System.Private.CoreLib/src/System/Utf8String.Construction.cs +++ b/src/libraries/System.Private.CoreLib/src/System/Utf8String.Construction.cs @@ -342,7 +342,7 @@ internal static Utf8String CreateFromRune(Rune value) // initial counting step and the transcoding step below. status = Utf8.FromUtf16(value, newBuffer.AsSpan(0, newBuffer.Length - 1), out _, out int bytesWritten, replaceInvalidSequences: false); - if (status != OperationStatus.Done || bytesWritten != newBuffer.Length) + if (status != OperationStatus.Done || bytesWritten != newBuffer.Length - 1) { // Did somebody mutate our input buffer? Shouldn't be any other way this could happen. diff --git a/src/libraries/System.Utf8String.Experimental/tests/System/Utf8StringTests.Ctor.cs b/src/libraries/System.Utf8String.Experimental/tests/System/Utf8StringTests.Ctor.cs index 53e73e7ab4f41..6776d5abf4e88 100644 --- a/src/libraries/System.Utf8String.Experimental/tests/System/Utf8StringTests.Ctor.cs +++ b/src/libraries/System.Utf8String.Experimental/tests/System/Utf8StringTests.Ctor.cs @@ -240,6 +240,13 @@ public static void Ctor_String_ValidData_ReturnsOriginalContents() Assert.Equal(u8("Hello"), new Utf8String("Hello")); } + [Fact] + public static void Ctor_String_Long_ReturnsOriginalContents() + { + string longString = new string('a', 500); + Assert.Equal(u8(longString), new Utf8String(longString)); + } + [Fact] public static void Ctor_String_InvalidData_Throws() {