Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Allow Utf8String package to work on netstandard 2.0 #33357

Merged
merged 26 commits into from
Mar 14, 2020
Merged
Show file tree
Hide file tree
Changes from 21 commits
Commits
Show all changes
26 commits
Select commit Hold shift + click to select a range
b5fe79a
Update ref assembly for building netstandard2.0.
eerhardt Feb 27, 2020
9a2168d
Move Utf8String code from coreclr to libraries
eerhardt Feb 28, 2020
f296c30
Get Rune working on netstandard2.0
eerhardt Feb 28, 2020
aa1e4c8
Get Utf8Span compiling on netstandard2.0.
eerhardt Feb 29, 2020
a52f01b
Get Utf8String building on netstandard2.0
eerhardt Mar 2, 2020
ee19225
Get Utf8StringContent building on netstandard2.0 again.
eerhardt Mar 3, 2020
8193573
Compile Utf8String/Span.Conversion.cs for netstandard.
eerhardt Mar 3, 2020
4e2615a
Build Utf8String Comparison and Enumeration for netstandard.
eerhardt Mar 3, 2020
cea9703
Enable Index and Range on netstandard2.0
eerhardt Mar 4, 2020
97b89d0
Everything building
eerhardt Mar 4, 2020
414c890
Moving code around so it lines up with the ref assemblies.
eerhardt Mar 4, 2020
f5b7269
Build for netcoreapp3.0.
eerhardt Mar 4, 2020
6c6fd97
Get tests building on netfx
eerhardt Mar 5, 2020
545e3d2
Fix up merge
eerhardt Mar 5, 2020
5963e60
Get Utf8String constructors working on netstandard.
eerhardt Mar 6, 2020
f6b917c
Get some tests running on net472
eerhardt Mar 7, 2020
7c6e17d
More tests running on netfx
eerhardt Mar 7, 2020
5c2aba5
Get all tests running on netfx.
eerhardt Mar 8, 2020
7daf535
Fix build for netcoreapp3.0, which doesn't have SerializeToStreamAsyn…
eerhardt Mar 9, 2020
889c9a2
Add netstandard2.1 support to Utf8String.Experimental.
eerhardt Mar 9, 2020
b418ed2
Clean up to prepare for review.
eerhardt Mar 9, 2020
aafb1d8
Address PR feedback
eerhardt Mar 10, 2020
735317d
Make Intrinsics IsSupported be const fields.
eerhardt Mar 11, 2020
6cef31b
Address PR feedback
eerhardt Mar 13, 2020
d2d5441
Ensure nint and nuint are set correctly when building Utf8String outs…
eerhardt Mar 13, 2020
9e682d1
Fix bug in construction from long UTF16 string.
eerhardt Mar 14, 2020
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion eng/referenceFromRuntime.targets
Original file line number Diff line number Diff line change
Expand Up @@ -106,7 +106,7 @@
<ReferencePath Include="@(_filteredReferencePathFromRuntimeByFileName->'%(ReferencePath)')" Condition="'%(_filteredReferencePathFromRuntimeByFileName.Aliases)' != ''" Aliases="" />
</ItemGroup>

<Error Condition="'@(_missingReferenceFromRuntime)' != ''"
<Error Condition="'@(_missingReferenceFromRuntime)' != '' and '$(DesignTimeBuild)' != 'true'"
eerhardt marked this conversation as resolved.
Show resolved Hide resolved
Text="Could not resolve ReferenceFromRuntime item(s) '%(_missingReferenceFromRuntime.OriginalReferenceFromRuntime)' from '$(RuntimeProjectFile)'." />
</Target>
</Project>
Original file line number Diff line number Diff line change
Expand Up @@ -861,7 +861,7 @@
<Compile Include="$(MSBuildThisFileDirectory)System\Text\Unicode\Utf8Utility.Helpers.cs" />
<Compile Include="$(MSBuildThisFileDirectory)System\Text\Unicode\Utf8Utility.Transcoding.cs" />
<Compile Include="$(MSBuildThisFileDirectory)System\Text\Unicode\Utf8Utility.Validation.cs" />
<Compile Include="$(MSBuildThisFileDirectory)System\Text\Unicode\Utf8Utility.WhiteSpace.cs" />
<Compile Include="$(MSBuildThisFileDirectory)System\Text\Unicode\Utf8Utility.WhiteSpace.CoreLib.cs" />
<Compile Include="$(MSBuildThisFileDirectory)System\Text\UnicodeDebug.cs" />
<Compile Include="$(MSBuildThisFileDirectory)System\Text\UnicodeEncoding.cs" />
<Compile Include="$(MSBuildThisFileDirectory)System\Text\UnicodeUtility.cs" />
Expand Down Expand Up @@ -1811,6 +1811,7 @@
<ItemGroup Condition="'$(FeatureUtf8String)' == 'true'">
<Compile Include="$(MSBuildThisFileDirectory)System\Char8.cs" />
<Compile Include="$(MSBuildThisFileDirectory)System\Utf8Extensions.cs" />
<Compile Include="$(MSBuildThisFileDirectory)System\Utf8Extensions.CoreLib.cs" />
<Compile Include="$(MSBuildThisFileDirectory)System\Utf8String.cs" />
<Compile Include="$(MSBuildThisFileDirectory)System\Utf8String.Comparison.cs" />
<Compile Include="$(MSBuildThisFileDirectory)System\Utf8String.Construction.cs" />
Expand All @@ -1827,4 +1828,4 @@
<Compile Include="$(MSBuildThisFileDirectory)System\Text\Utf8Span.Searching.cs" />
<Compile Include="$(MSBuildThisFileDirectory)System\Text\Utf8StringComparer.cs" />
</ItemGroup>
</Project>
</Project>
5 changes: 5 additions & 0 deletions src/libraries/System.Private.CoreLib/src/System/Index.cs
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@

using System.Diagnostics;
using System.Runtime.CompilerServices;
using System.Text;
eerhardt marked this conversation as resolved.
Show resolved Hide resolved

namespace System
{
Expand Down Expand Up @@ -140,11 +141,15 @@ public override string ToString()

private string ToStringFromEnd()
{
#if !NETSTANDARD2_0
Span<char> span = stackalloc char[11]; // 1 for ^ and 10 for longest possible uint value
bool formatted = ((uint)Value).TryFormat(span.Slice(1), out int charsWritten);
Debug.Assert(formatted);
span[0] = '^';
return new string(span.Slice(0, charsWritten + 1));
#else
return '^' + Value.ToString();
#endif
}
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,9 @@
using System.Runtime.InteropServices;
using System.Runtime.Intrinsics.X86;

#if SYSTEM_PRIVATE_CORELIB
using Internal.Runtime.CompilerServices;
#endif

// Some routines inspired by the Stanford Bit Twiddling Hacks by Sean Eron Anderson:
// http://graphics.stanford.edu/~seander/bithacks.html
Expand All @@ -18,7 +20,12 @@ namespace System.Numerics
/// The methods use hardware intrinsics when available on the underlying platform,
/// otherwise they use optimized software fallbacks.
/// </summary>
public static class BitOperations
#if SYSTEM_PRIVATE_CORELIB
public
#else
internal
#endif
static class BitOperations
{
// C# no-alloc optimization that directly wraps the data section of the dll (similar to string constants)
// https://github.com/dotnet/roslyn/pull/24621
Expand Down
14 changes: 14 additions & 0 deletions src/libraries/System.Private.CoreLib/src/System/Range.cs
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,11 @@

using System.Diagnostics;
using System.Runtime.CompilerServices;
using System.Text;

#if NETSTANDARD2_0
using System.Numerics.Hashing;
#endif

namespace System
{
Expand Down Expand Up @@ -47,12 +52,17 @@ value is Range r &&
/// <summary>Returns the hash code for this instance.</summary>
public override int GetHashCode()
{
#if !NETSTANDARD2_0
return HashCode.Combine(Start.GetHashCode(), End.GetHashCode());
#else
return HashHelpers.Combine(Start.GetHashCode(), End.GetHashCode());
#endif
}

/// <summary>Converts the value of the current Range object to its equivalent string representation.</summary>
public override string ToString()
{
#if !NETSTANDARD2_0
Span<char> span = stackalloc char[2 + (2 * 11)]; // 2 for "..", then for each index 1 for '^' and 10 for longest possible uint
int pos = 0;

Expand All @@ -77,6 +87,10 @@ public override string ToString()
pos += charsWritten;

return new string(span.Slice(0, pos));
#else
return Start.ToString() + ".." + End.ToString();
#endif

}

/// <summary>Create a Range object starting from start index to the end of the collection.</summary>
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,10 @@
using System.Runtime.CompilerServices;
using System.Runtime.Intrinsics;
using System.Runtime.Intrinsics.X86;

#if SYSTEM_PRIVATE_CORELIB
using Internal.Runtime.CompilerServices;
#endif

#pragma warning disable SA1121 // explicitly using type aliases instead of built-in types
#if TARGET_64BIT
Expand Down
68 changes: 55 additions & 13 deletions src/libraries/System.Private.CoreLib/src/System/Text/Rune.cs
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,10 @@ namespace System.Text
[DebuggerDisplay("{DebuggerDisplay,nq}")]
public readonly struct Rune : IComparable<Rune>, IEquatable<Rune>
{
private const char HIGH_SURROGATE_START = '\ud800';
private const char LOW_SURROGATE_START = '\udc00';
private const int HIGH_SURROGATE_RANGE = 0x3FF;
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Can we use PascalCase for such consts?

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I can. I copied this from existing code in CharUnicodeInfo. Do you think I should change all the existing occurrences to PascalCase as well?

https://github.com/dotnet/runtime/search?l=C%23&q=HIGH_SURROGATE_START


private const byte IsWhiteSpaceFlag = 0x80;
private const byte IsLetterOrDigitFlag = 0x40;
private const byte UnicodeCategoryMask = 0x1F;
Expand Down Expand Up @@ -175,10 +179,10 @@ private Rune(uint scalarValue, bool unused)
/// </summary>
public int Value => (int)_value;

private static Rune ChangeCaseCultureAware(Rune rune, TextInfo textInfo, bool toUpper)
private static Rune ChangeCaseCultureAware(Rune rune, CultureInfo culture, bool toUpper)
eerhardt marked this conversation as resolved.
Show resolved Hide resolved
{
Debug.Assert(!GlobalizationMode.Invariant, "This should've been checked by the caller.");
Debug.Assert(textInfo != null, "This should've been checked by the caller.");
Debug.Assert(culture != null, "This should've been checked by the caller.");

Span<char> original = stackalloc char[2]; // worst case scenario = 2 code units (for a surrogate pair)
Span<char> modified = stackalloc char[2]; // case change should preserve UTF-16 code unit count
Expand All @@ -187,14 +191,25 @@ private static Rune ChangeCaseCultureAware(Rune rune, TextInfo textInfo, bool to
original = original.Slice(0, charCount);
modified = modified.Slice(0, charCount);

#if SYSTEM_PRIVATE_CORELIB
if (toUpper)
{
culture!.TextInfo.ChangeCaseToUpper(original, modified);
}
else
{
culture!.TextInfo.ChangeCaseToLower(original, modified);
}
eerhardt marked this conversation as resolved.
Show resolved Hide resolved
#else
if (toUpper)
{
textInfo.ChangeCaseToUpper(original, modified);
MemoryExtensions.ToUpper(original, modified, culture);
}
else
{
textInfo.ChangeCaseToLower(original, modified);
MemoryExtensions.ToLower(original, modified, culture);
}
#endif

// We use simple case folding rules, which disallows moving between the BMP and supplementary
// planes when performing a case conversion. The helper methods which reconstruct a Rune
Expand Down Expand Up @@ -827,6 +842,7 @@ private static int ReadRuneFromString(string input, int index)
/// </summary>
public override string ToString()
{
#if SYSTEM_PRIVATE_CORELIB
if (IsBmp)
{
return string.CreateFromChar((char)_value);
Expand All @@ -836,6 +852,19 @@ public override string ToString()
UnicodeUtility.GetUtf16SurrogatesFromSupplementaryPlaneScalar(_value, out char high, out char low);
return string.CreateFromChar(high, low);
}
#else
if (IsBmp)
{

return ((char)_value).ToString();
}
else
{
Span<char> buffer = stackalloc char[2];
UnicodeUtility.GetUtf16SurrogatesFromSupplementaryPlaneScalar(_value, out buffer[0], out buffer[1]);
return buffer.ToString();
}
#endif
}

/// <summary>
Expand Down Expand Up @@ -865,17 +894,17 @@ public static bool TryCreate(char highSurrogate, char lowSurrogate, out Rune res
// First, extend both to 32 bits, then calculate the offset of
// each candidate surrogate char from the start of its range.

uint highSurrogateOffset = (uint)highSurrogate - CharUnicodeInfo.HIGH_SURROGATE_START;
uint lowSurrogateOffset = (uint)lowSurrogate - CharUnicodeInfo.LOW_SURROGATE_START;
uint highSurrogateOffset = (uint)highSurrogate - HIGH_SURROGATE_START;
uint lowSurrogateOffset = (uint)lowSurrogate - LOW_SURROGATE_START;

// This is a single comparison which allows us to check both for validity at once since
// both the high surrogate range and the low surrogate range are the same length.
// If the comparison fails, we call to a helper method to throw the correct exception message.

if ((highSurrogateOffset | lowSurrogateOffset) <= CharUnicodeInfo.HIGH_SURROGATE_RANGE)
if ((highSurrogateOffset | lowSurrogateOffset) <= HIGH_SURROGATE_RANGE)
{
// The 0x40u << 10 below is to account for uuuuu = wwww + 1 in the surrogate encoding.
result = UnsafeCreate((highSurrogateOffset << 10) + ((uint)lowSurrogate - CharUnicodeInfo.LOW_SURROGATE_START) + (0x40u << 10));
result = UnsafeCreate((highSurrogateOffset << 10) + ((uint)lowSurrogate - LOW_SURROGATE_START) + (0x40u << 10));
return true;
}
else
Expand Down Expand Up @@ -1070,7 +1099,11 @@ public static double GetNumericValue(Rune value)
else
{
// not an ASCII char; fall back to globalization table
#if SYSTEM_PRIVATE_CORELIB
return CharUnicodeInfo.GetNumericValue(value.Value);
#else
return CharUnicodeInfo.GetNumericValue(value.ToString(), 0);
eerhardt marked this conversation as resolved.
Show resolved Hide resolved
#endif
}
}

Expand All @@ -1089,7 +1122,11 @@ public static UnicodeCategory GetUnicodeCategory(Rune value)
private static UnicodeCategory GetUnicodeCategoryNonAscii(Rune value)
{
Debug.Assert(!value.IsAscii, "Shouldn't use this non-optimized code path for ASCII characters.");
#if !NETSTANDARD2_0
return CharUnicodeInfo.GetUnicodeCategory(value.Value);
#else
return CharUnicodeInfo.GetUnicodeCategory(value.ToString(), 0);
eerhardt marked this conversation as resolved.
Show resolved Hide resolved
#endif
}

// Returns true iff this Unicode category represents a letter
Expand Down Expand Up @@ -1240,7 +1277,12 @@ public static bool IsWhiteSpace(Rune value)
// Only BMP code points can be white space, so only call into CharUnicodeInfo
// if the incoming value is within the BMP.

return value.IsBmp && CharUnicodeInfo.GetIsWhiteSpace((char)value._value);
return value.IsBmp &&
#if SYSTEM_PRIVATE_CORELIB
CharUnicodeInfo.GetIsWhiteSpace((char)value._value);
#else
char.IsWhiteSpace((char)value._value);
#endif
}

public static Rune ToLower(Rune value, CultureInfo culture)
Expand All @@ -1259,7 +1301,7 @@ public static Rune ToLower(Rune value, CultureInfo culture)
return ToLowerInvariant(value);
}

return ChangeCaseCultureAware(value, culture!.TextInfo, toUpper: false);
return ChangeCaseCultureAware(value, culture, toUpper: false);
}

public static Rune ToLowerInvariant(Rune value)
Expand All @@ -1283,7 +1325,7 @@ public static Rune ToLowerInvariant(Rune value)

// Non-ASCII data requires going through the case folding tables.

return ChangeCaseCultureAware(value, TextInfo.Invariant, toUpper: false);
return ChangeCaseCultureAware(value, CultureInfo.InvariantCulture, toUpper: false);
}

public static Rune ToUpper(Rune value, CultureInfo culture)
Expand All @@ -1302,7 +1344,7 @@ public static Rune ToUpper(Rune value, CultureInfo culture)
return ToUpperInvariant(value);
}

return ChangeCaseCultureAware(value, culture!.TextInfo, toUpper: true);
return ChangeCaseCultureAware(value, culture, toUpper: true);
}

public static Rune ToUpperInvariant(Rune value)
Expand All @@ -1326,7 +1368,7 @@ public static Rune ToUpperInvariant(Rune value)

// Non-ASCII data requires going through the case folding tables.

return ChangeCaseCultureAware(value, TextInfo.Invariant, toUpper: true);
return ChangeCaseCultureAware(value, CultureInfo.InvariantCulture, toUpper: true);
}
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -5,8 +5,12 @@
using System.Diagnostics;
using System.Runtime.Intrinsics;
using System.Runtime.Intrinsics.X86;
using System.Runtime.CompilerServices;
using System.Numerics;

#if SYSTEM_PRIVATE_CORELIB
using Internal.Runtime.CompilerServices;
#endif

#pragma warning disable SA1121 // explicitly using type aliases instead of built-in types
#if TARGET_64BIT
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -4,12 +4,21 @@

using System.Buffers;
using System.Diagnostics;
using System.Runtime.CompilerServices;
using System.Runtime.InteropServices;

#if SYSTEM_PRIVATE_CORELIB
using Internal.Runtime.CompilerServices;
#endif

namespace System.Text.Unicode
{
public static class Utf8
#if SYSTEM_PRIVATE_CORELIB
public
#else
internal
#endif
static class Utf8
{
/*
* OperationStatus-based APIs for transcoding of chunked data.
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,10 @@
using System.Diagnostics;
using System.Numerics;
using System.Runtime.CompilerServices;

#if SYSTEM_PRIVATE_CORELIB
using Internal.Runtime.CompilerServices;
#endif

namespace System.Text.Unicode
{
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -6,9 +6,13 @@
using System.Buffers.Binary;
using System.Diagnostics;
using System.Numerics;
using System.Runtime.CompilerServices;
using System.Runtime.Intrinsics;
using System.Runtime.Intrinsics.X86;

#if SYSTEM_PRIVATE_CORELIB
using Internal.Runtime.CompilerServices;
#endif

#pragma warning disable SA1121 // explicitly using type aliases instead of built-in types
#if TARGET_64BIT
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,11 @@
using System.Diagnostics;
using System.Numerics;
using System.Runtime.Intrinsics.X86;
using System.Runtime.CompilerServices;

#if SYSTEM_PRIVATE_CORELIB
using Internal.Runtime.CompilerServices;
#endif

#pragma warning disable SA1121 // explicitly using type aliases instead of built-in types
#if TARGET_64BIT
Expand Down
Loading