Skip to content

Commit

Permalink
Add DecodeString(ReadOnlySpan) (#144)
Browse files Browse the repository at this point in the history
  • Loading branch information
sebastienros authored Jul 23, 2024
1 parent fab5f77 commit c479e5e
Show file tree
Hide file tree
Showing 6 changed files with 74 additions and 65 deletions.
80 changes: 47 additions & 33 deletions src/Parlot/Character.cs
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
using System;
using System.Buffers;
using System.Runtime.CompilerServices;

namespace Parlot
Expand All @@ -19,40 +20,45 @@ public static partial class Character
public static bool IsDecimalDigit(char ch) => IsInRange(ch, '0', '9');

[MethodImpl(MethodImplOptions.AggressiveInlining)]
public static bool IsInRange(char ch, char min, char max) => ch - (uint) min <= max - (uint) min;
public static bool IsInRange(char ch, char min, char max) => ch - (uint)min <= max - (uint)min;

public static bool IsHexDigit(char ch) => HexConverter.IsHexChar(ch);

public static bool IsIdentifierStart(char ch)
{
return (_characterData[ch] & (byte) CharacterMask.IdentifierStart) != 0;
return (_characterData[ch] & (byte)CharacterMask.IdentifierStart) != 0;
}

public static bool IsIdentifierPart(char ch)
{
return (_characterData[ch] & (byte) CharacterMask.IdentifierPart) != 0;
return (_characterData[ch] & (byte)CharacterMask.IdentifierPart) != 0;
}

public static bool IsWhiteSpace(char ch)
{
return (_characterData[ch] & (byte) CharacterMask.WhiteSpace) != 0;
return (_characterData[ch] & (byte)CharacterMask.WhiteSpace) != 0;
}

public static bool IsWhiteSpaceOrNewLine(char ch)
{
return (_characterData[ch] & (byte) CharacterMask.WhiteSpaceOrNewLine) != 0;
return (_characterData[ch] & (byte)CharacterMask.WhiteSpaceOrNewLine) != 0;
}

public static bool IsNewLine(char ch) => ch is '\n' or '\r' or '\v';

public static char ScanHexEscape(string text, int index, out int length)
{
var lastIndex = Math.Min(4 + index, text.Length - 1);
return ScanHexEscape(text.AsSpan(index), out length);
}

public static char ScanHexEscape(ReadOnlySpan<char> text, out int length)
{
var lastIndex = Math.Min(4, text.Length - 1);
var code = 0;

length = 0;

for (var i = index + 1; i < lastIndex + 1; i++)
for (var i = 1; i < lastIndex + 1; i++)
{
var d = text[i];

Expand All @@ -68,37 +74,32 @@ public static char ScanHexEscape(string text, int index, out int length)
return (char)code;
}

public static TextSpan DecodeString(string s) => DecodeString(new TextSpan(s));

public static TextSpan DecodeString(TextSpan span)
public static ReadOnlySpan<char> DecodeString(ReadOnlySpan<char> span)
{
// Nothing to do if the string doesn't have any escape char
if (string.IsNullOrEmpty(span.Buffer) || span.Buffer.AsSpan(span.Offset, span.Length).IndexOf('\\') == -1)
if (span.IsEmpty || span.IndexOf('\\') == -1)
{
return span;
}

#if NET6_0_OR_GREATER
var result = string.Create(span.Length, span, static (chars, source) =>
#else
var result = "".Create(span.Length, span, static (chars, source) =>
#endif
{
// The assumption is that the new string will be shorter since escapes results are smaller than their source
// The assumption is that the new string will be shorter since escapes results are smaller than their source
char[]? rentedBuffer = null;
Span<char> buffer = span.Length <= 128
? stackalloc char[span.Length]
: (rentedBuffer = ArrayPool<char>.Shared.Rent(span.Length));

try
{
var dataIndex = 0;
var buffer = source.Buffer!;
var start = source.Offset;
var end = source.Offset + source.Length;

for (var i = start; i < end; i++)
for (var i = 0; i < span.Length; i++)
{
var c = buffer[i];
var c = span[i];

if (c == '\\')
{
i++;
c = buffer[i];
c = span[i];

switch (c)
{
Expand All @@ -114,31 +115,44 @@ public static TextSpan DecodeString(TextSpan span)
case 't': c = '\t'; break;
case 'v': c = '\v'; break;
case 'u':
c = Character.ScanHexEscape(buffer, i, out var length);
c = Character.ScanHexEscape(span[i..], out var length);
i += length;
break;
case 'x':
c = Character.ScanHexEscape(buffer, i, out length);
c = Character.ScanHexEscape(span[i..], out length);
i += length;
break;
}
}

chars[dataIndex++] = c;
buffer[dataIndex++] = c;
}

chars[dataIndex++] = '\0';
});
var result = buffer[..dataIndex].ToString().AsSpan();

for (var i = result.Length - 1; i >= 0; i--)
return result;
}
finally
{
if (result[i] != '\0')
if (rentedBuffer != null)
{
return new TextSpan(result, 0, i + 1);
ArrayPool<char>.Shared.Return(rentedBuffer);
}
}
}

public static TextSpan DecodeString(string s) => DecodeString(new TextSpan(s));

public static TextSpan DecodeString(TextSpan span)
{
// Nothing to do if the string doesn't have any escape char
if (string.IsNullOrEmpty(span.Buffer) || span.Buffer.AsSpan(span.Offset, span.Length).IndexOf('\\') == -1)
{
return span;
}

return new TextSpan(DecodeString(span.Span).ToString());

return new TextSpan(result);
}

private static int HexValue(char ch) => HexConverter.FromChar(ch);
Expand Down
20 changes: 10 additions & 10 deletions src/Parlot/Fluent/StringLiteral.cs
Original file line number Diff line number Diff line change
Expand Up @@ -20,19 +20,19 @@ public sealed class StringLiteral : Parser<TextSpan>, ICompilable, ISeekable
static readonly char[] SingleQuotes = ['\''];
static readonly char[] DoubleQuotes = ['\"'];
static readonly char[] SingleOrDoubleQuotes = ['\'', '\"'];

private readonly StringLiteralQuotes _quotes;

public StringLiteral(StringLiteralQuotes quotes)
{
_quotes = quotes;

ExpectedChars = _quotes switch
{
StringLiteralQuotes.Single => SingleQuotes,
StringLiteralQuotes.Double => DoubleQuotes,
StringLiteralQuotes.SingleOrDouble => SingleOrDoubleQuotes,
_ => []
ExpectedChars = _quotes switch
{
StringLiteralQuotes.Single => SingleQuotes,
StringLiteralQuotes.Double => DoubleQuotes,
StringLiteralQuotes.SingleOrDouble => SingleOrDoubleQuotes,
_ => []
};
}

Expand Down Expand Up @@ -107,10 +107,10 @@ public CompilationResult Compile(CompilationContext context)
[end],
Expression.Assign(end, context.Offset()),
Expression.Assign(result.Success, Expression.Constant(true, typeof(bool))),
context.DiscardResult
context.DiscardResult
? Expression.Empty()
: Expression.Assign(result.Value,
Expression.Call(_decodeStringMethodInfo,
: Expression.Assign(result.Value,
Expression.Call(_decodeStringMethodInfo,
context.NewTextSpan(
context.Buffer(),
Expression.Add(start, Expression.Constant(1)),
Expand Down
2 changes: 1 addition & 1 deletion src/Parlot/Fluent/TextLiteral.cs
Original file line number Diff line number Diff line change
Expand Up @@ -64,7 +64,7 @@ public override bool Parse(ParseContext context, ref ParseResult<string> result)
if (cursor.Match(Text.AsSpan(), _comparisonType))
{
var start = cursor.Offset;

if (_hasNewLines)
{
cursor.Advance(Text.Length);
Expand Down
31 changes: 13 additions & 18 deletions src/Parlot/Scanner.cs
Original file line number Diff line number Diff line change
Expand Up @@ -149,7 +149,7 @@ public bool ReadDecimal(bool allowLeadingSign, bool allowDecimalSeparator, bool
{
if (Cursor.Current == '-' || Cursor.Current == '+')
{
Cursor.AdvanceNoNewLines(1);
Cursor.AdvanceNoNewLines(1);
}
}

Expand Down Expand Up @@ -259,7 +259,7 @@ public bool ReadInteger(out ReadOnlySpan<char> result)
return false;
}

Cursor.AdvanceNoNewLines(next);
Cursor.AdvanceNoNewLines(next);
result = Buffer.AsSpan(Cursor.Offset - next, next);

return true;
Expand Down Expand Up @@ -379,7 +379,7 @@ public bool ReadAnyOf(ReadOnlySpan<char> chars, StringComparison comparisonType,
var current = Cursor.Buffer.AsSpan(Cursor.Offset, 1);

var index = chars.IndexOf(current, comparisonType);

if (index == -1)
{
result = [];
Expand Down Expand Up @@ -522,17 +522,15 @@ public bool ReadQuotedString(out ReadOnlySpan<char> result)
private bool ReadQuotedString(char quoteChar, out ReadOnlySpan<char> result)
{
var startChar = Cursor.Current;
var start = Cursor.Position;

if (startChar != quoteChar)
{
result = [];
return false;
}

// Fast path if there aren't any escape char until next quote
var startOffset = Cursor.Offset + 1;

var nextQuote = Cursor.Buffer.AsSpan(startOffset).IndexOf(startChar);
var nextQuote = Cursor.Span.Slice(1).IndexOf(startChar);

if (nextQuote == -1)
{
Expand All @@ -541,18 +539,14 @@ private bool ReadQuotedString(char quoteChar, out ReadOnlySpan<char> result)
return false;
}

var start = Cursor.Position;

Cursor.Advance();

var nextEscape = Cursor.Buffer.AsSpan(startOffset, nextQuote).IndexOf('\\');
var nextEscape = Cursor.Span.IndexOf('\\');

// If the next escape if not before the next quote, we can return the string as-is
if (nextEscape == -1)
// If the next escape is not before the next quote, we can return the string as-is
if (nextEscape == -1 || nextEscape > nextQuote)
{
Cursor.Advance(nextQuote + 1);
Cursor.Advance(nextQuote + 2); // include start quote

result = Buffer.AsSpan(start.Offset, Cursor.Offset - start.Offset);
result = Cursor.Buffer.AsSpan().Slice(start.Offset, nextQuote + 2);
return true;
}

Expand Down Expand Up @@ -672,7 +666,8 @@ private bool ReadQuotedString(char quoteChar, out ReadOnlySpan<char> result)

if (Cursor.Match(startChar))
{
Cursor.Advance(nextEscape + 1);
// Read end quote
Cursor.Advance(1);
break;
}
else if (nextEscape == -1)
Expand All @@ -684,7 +679,7 @@ private bool ReadQuotedString(char quoteChar, out ReadOnlySpan<char> result)
}
}

result = Buffer.AsSpan(start.Offset, Cursor.Offset - start.Offset);
result = Cursor.Buffer.AsSpan()[start.Offset..Cursor.Offset];

return true;
}
Expand Down
5 changes: 2 additions & 3 deletions test/Parlot.Tests/CharacterTests.cs
Original file line number Diff line number Diff line change
Expand Up @@ -16,14 +16,13 @@ public class CharacterTests
[InlineData(" \u03B2 ", " β ")]
[InlineData(" \\a ", " \a ")]
[InlineData(" \\0hello ", " \0hello ")]

public void ShouldDescodeString(string text, string expected)
public void ShouldDecodeString(string text, string expected)
{
Assert.Equal(expected, Character.DecodeString(new TextSpan(text)).ToString());
}

[Fact]
public void ShouldDescodeStringInBuffer()
public void ShouldDecodeStringInBuffer()
{
var span = new TextSpan(" a\\nbc ", 3, 5);
Assert.Equal("a\nbc", Character.DecodeString(span).ToString());
Expand Down
1 change: 1 addition & 0 deletions test/Parlot.Tests/ScannerTests.cs
Original file line number Diff line number Diff line change
Expand Up @@ -38,6 +38,7 @@ public void ShouldReadEscapedStringWithMatchingQuotes(string text, string expect
[InlineData("'Lorem \\u1234 ipsum'", "'Lorem \\u1234 ipsum'")]
[InlineData("'Lorem \\xabcd ipsum'", "'Lorem \\xabcd ipsum'")]
[InlineData("'\\a ding'", "'\\a ding'")]
[InlineData("'Lorem ipsum' \\xabcd", "'Lorem ipsum'")]
public void ShouldReadStringWithEscapes(string text, string expected)
{
Scanner s = new(text);
Expand Down

0 comments on commit c479e5e

Please sign in to comment.