diff --git a/Directory.Packages.props b/Directory.Packages.props
index 4e2bdea802f..2c7e57038aa 100644
--- a/Directory.Packages.props
+++ b/Directory.Packages.props
@@ -113,7 +113,7 @@
-
+
diff --git a/src/Compiler/Directory.Packages.props b/src/Compiler/Directory.Packages.props
deleted file mode 100644
index 7154276a074..00000000000
--- a/src/Compiler/Directory.Packages.props
+++ /dev/null
@@ -1,23 +0,0 @@
-
-
- <_RoslynPackageVersion>4.9.2
- <_RoslynPackageVersion Condition="'$(DotNetBuildFromSource)' == 'true'">$(MicrosoftCodeAnalysisCommonPackageVersion)
-
-
-
-
-
-
-
-
-
-
-
-
-
diff --git a/src/Compiler/Microsoft.AspNetCore.Razor.Language/legacyTest/Legacy/CSharpAutoCompleteTest.cs b/src/Compiler/Microsoft.AspNetCore.Razor.Language/legacyTest/Legacy/CSharpAutoCompleteTest.cs
index ed3d64f00a1..95dcafb8b3c 100644
--- a/src/Compiler/Microsoft.AspNetCore.Razor.Language/legacyTest/Legacy/CSharpAutoCompleteTest.cs
+++ b/src/Compiler/Microsoft.AspNetCore.Razor.Language/legacyTest/Legacy/CSharpAutoCompleteTest.cs
@@ -9,7 +9,7 @@
namespace Microsoft.AspNetCore.Razor.Language.Legacy;
-public class CSharpAutoCompleteTest() : ParserTestBase(layer: TestProject.Layer.Compiler, validateSpanEditHandlers: true)
+public class CSharpAutoCompleteTest() : ParserTestBase(layer: TestProject.Layer.Compiler, validateSpanEditHandlers: true, useLegacyTokenizer: true)
{
[Fact]
public void FunctionsDirectiveAutoCompleteAtEOF()
diff --git a/src/Compiler/Microsoft.AspNetCore.Razor.Language/legacyTest/Legacy/CSharpBlockTest.cs b/src/Compiler/Microsoft.AspNetCore.Razor.Language/legacyTest/Legacy/CSharpBlockTest.cs
index be9741e66d2..7361b50c9b4 100644
--- a/src/Compiler/Microsoft.AspNetCore.Razor.Language/legacyTest/Legacy/CSharpBlockTest.cs
+++ b/src/Compiler/Microsoft.AspNetCore.Razor.Language/legacyTest/Legacy/CSharpBlockTest.cs
@@ -7,7 +7,7 @@
namespace Microsoft.AspNetCore.Razor.Language.Legacy;
-public class CSharpBlockTest() : ParserTestBase(layer: TestProject.Layer.Compiler, validateSpanEditHandlers: true)
+public class CSharpBlockTest() : ParserTestBase(layer: TestProject.Layer.Compiler, validateSpanEditHandlers: true, useLegacyTokenizer: true)
{
[Fact]
public void CSharpBlock_SingleLineControlFlowStatement_Error()
diff --git a/src/Compiler/Microsoft.AspNetCore.Razor.Language/legacyTest/Legacy/CSharpErrorTest.cs b/src/Compiler/Microsoft.AspNetCore.Razor.Language/legacyTest/Legacy/CSharpErrorTest.cs
index d7f288bef2c..0940f13c115 100644
--- a/src/Compiler/Microsoft.AspNetCore.Razor.Language/legacyTest/Legacy/CSharpErrorTest.cs
+++ b/src/Compiler/Microsoft.AspNetCore.Razor.Language/legacyTest/Legacy/CSharpErrorTest.cs
@@ -9,7 +9,7 @@
namespace Microsoft.AspNetCore.Razor.Language.Legacy;
-public class CSharpErrorTest() : ParserTestBase(layer: TestProject.Layer.Compiler, validateSpanEditHandlers: true)
+public class CSharpErrorTest() : ParserTestBase(layer: TestProject.Layer.Compiler, validateSpanEditHandlers: true, useLegacyTokenizer: true)
{
[Fact]
public void HandlesQuotesAfterTransition()
diff --git a/src/Compiler/Microsoft.AspNetCore.Razor.Language/legacyTest/Legacy/CSharpExplicitExpressionTest.cs b/src/Compiler/Microsoft.AspNetCore.Razor.Language/legacyTest/Legacy/CSharpExplicitExpressionTest.cs
index 8e6e0138d07..24ecb3f04dd 100644
--- a/src/Compiler/Microsoft.AspNetCore.Razor.Language/legacyTest/Legacy/CSharpExplicitExpressionTest.cs
+++ b/src/Compiler/Microsoft.AspNetCore.Razor.Language/legacyTest/Legacy/CSharpExplicitExpressionTest.cs
@@ -8,7 +8,7 @@
namespace Microsoft.AspNetCore.Razor.Language.Legacy;
-public class CSharpExplicitExpressionTest() : ParserTestBase(layer: TestProject.Layer.Compiler, validateSpanEditHandlers: true)
+public class CSharpExplicitExpressionTest() : ParserTestBase(layer: TestProject.Layer.Compiler, validateSpanEditHandlers: true, useLegacyTokenizer: true)
{
[Fact]
public void ShouldOutputZeroLengthCodeSpanIfExplicitExpressionIsEmpty()
diff --git a/src/Compiler/Microsoft.AspNetCore.Razor.Language/legacyTest/Legacy/CSharpFunctionsTest.cs b/src/Compiler/Microsoft.AspNetCore.Razor.Language/legacyTest/Legacy/CSharpFunctionsTest.cs
index be769bbc679..455c103d2ab 100644
--- a/src/Compiler/Microsoft.AspNetCore.Razor.Language/legacyTest/Legacy/CSharpFunctionsTest.cs
+++ b/src/Compiler/Microsoft.AspNetCore.Razor.Language/legacyTest/Legacy/CSharpFunctionsTest.cs
@@ -8,7 +8,7 @@
namespace Microsoft.AspNetCore.Razor.Language.Legacy;
-public class CSharpFunctionsTest() : ParserTestBase(layer: TestProject.Layer.Compiler, validateSpanEditHandlers: true)
+public class CSharpFunctionsTest() : ParserTestBase(layer: TestProject.Layer.Compiler, validateSpanEditHandlers: true, useLegacyTokenizer: true)
{
[Fact]
public void Functions_SingleLineControlFlowStatement_Error()
diff --git a/src/Compiler/Microsoft.AspNetCore.Razor.Language/legacyTest/Legacy/CSharpImplicitExpressionTest.cs b/src/Compiler/Microsoft.AspNetCore.Razor.Language/legacyTest/Legacy/CSharpImplicitExpressionTest.cs
index 9c597311385..d7ade8c07a8 100644
--- a/src/Compiler/Microsoft.AspNetCore.Razor.Language/legacyTest/Legacy/CSharpImplicitExpressionTest.cs
+++ b/src/Compiler/Microsoft.AspNetCore.Razor.Language/legacyTest/Legacy/CSharpImplicitExpressionTest.cs
@@ -8,7 +8,7 @@
namespace Microsoft.AspNetCore.Razor.Language.Legacy;
-public class CSharpImplicitExpressionTest() : ParserTestBase(layer: TestProject.Layer.Compiler, validateSpanEditHandlers: true)
+public class CSharpImplicitExpressionTest() : ParserTestBase(layer: TestProject.Layer.Compiler, validateSpanEditHandlers: true, useLegacyTokenizer: true)
{
[Fact]
public void ParsesNullConditionalOperatorImplicitExpression_Bracket1()
diff --git a/src/Compiler/Microsoft.AspNetCore.Razor.Language/legacyTest/Legacy/CSharpNestedStatementsTest.cs b/src/Compiler/Microsoft.AspNetCore.Razor.Language/legacyTest/Legacy/CSharpNestedStatementsTest.cs
index 4baa3d137e8..c26c9b7cce2 100644
--- a/src/Compiler/Microsoft.AspNetCore.Razor.Language/legacyTest/Legacy/CSharpNestedStatementsTest.cs
+++ b/src/Compiler/Microsoft.AspNetCore.Razor.Language/legacyTest/Legacy/CSharpNestedStatementsTest.cs
@@ -7,7 +7,7 @@
namespace Microsoft.AspNetCore.Razor.Language.Legacy;
-public class CSharpNestedStatementsTest() : ParserTestBase(layer: TestProject.Layer.Compiler, validateSpanEditHandlers: true)
+public class CSharpNestedStatementsTest() : ParserTestBase(layer: TestProject.Layer.Compiler, validateSpanEditHandlers: true, useLegacyTokenizer: true)
{
[Fact]
public void NestedSimpleStatement()
diff --git a/src/Compiler/Microsoft.AspNetCore.Razor.Language/legacyTest/Legacy/CSharpRazorCommentsTest.cs b/src/Compiler/Microsoft.AspNetCore.Razor.Language/legacyTest/Legacy/CSharpRazorCommentsTest.cs
index 87e79fea8db..e63d1b670f8 100644
--- a/src/Compiler/Microsoft.AspNetCore.Razor.Language/legacyTest/Legacy/CSharpRazorCommentsTest.cs
+++ b/src/Compiler/Microsoft.AspNetCore.Razor.Language/legacyTest/Legacy/CSharpRazorCommentsTest.cs
@@ -8,7 +8,7 @@
namespace Microsoft.AspNetCore.Razor.Language.Legacy;
-public class CSharpRazorCommentsTest() : ParserTestBase(layer: TestProject.Layer.Compiler, validateSpanEditHandlers: true)
+public class CSharpRazorCommentsTest() : ParserTestBase(layer: TestProject.Layer.Compiler, validateSpanEditHandlers: true, useLegacyTokenizer: true)
{
[Fact]
public void UnterminatedRazorComment()
diff --git a/src/Compiler/Microsoft.AspNetCore.Razor.Language/legacyTest/Legacy/CSharpReservedWordsTest.cs b/src/Compiler/Microsoft.AspNetCore.Razor.Language/legacyTest/Legacy/CSharpReservedWordsTest.cs
index 1c69574cb94..a049155bda0 100644
--- a/src/Compiler/Microsoft.AspNetCore.Razor.Language/legacyTest/Legacy/CSharpReservedWordsTest.cs
+++ b/src/Compiler/Microsoft.AspNetCore.Razor.Language/legacyTest/Legacy/CSharpReservedWordsTest.cs
@@ -7,7 +7,7 @@
namespace Microsoft.AspNetCore.Razor.Language.Legacy;
-public class CSharpReservedWordsTest() : ParserTestBase(layer: TestProject.Layer.Compiler, validateSpanEditHandlers: true)
+public class CSharpReservedWordsTest() : ParserTestBase(layer: TestProject.Layer.Compiler, validateSpanEditHandlers: true, useLegacyTokenizer: true)
{
[Fact]
public void ReservedWord()
diff --git a/src/Compiler/Microsoft.AspNetCore.Razor.Language/legacyTest/Legacy/CSharpSectionTest.cs b/src/Compiler/Microsoft.AspNetCore.Razor.Language/legacyTest/Legacy/CSharpSectionTest.cs
index be7c99f3a94..4dd97349b23 100644
--- a/src/Compiler/Microsoft.AspNetCore.Razor.Language/legacyTest/Legacy/CSharpSectionTest.cs
+++ b/src/Compiler/Microsoft.AspNetCore.Razor.Language/legacyTest/Legacy/CSharpSectionTest.cs
@@ -10,7 +10,7 @@
namespace Microsoft.AspNetCore.Razor.Language.Legacy;
-public class CSharpSectionTest() : ParserTestBase(layer: TestProject.Layer.Compiler, validateSpanEditHandlers: true)
+public class CSharpSectionTest() : ParserTestBase(layer: TestProject.Layer.Compiler, validateSpanEditHandlers: true, useLegacyTokenizer: true)
{
[Fact]
public void CapturesNewlineImmediatelyFollowing()
diff --git a/src/Compiler/Microsoft.AspNetCore.Razor.Language/legacyTest/Legacy/CSharpSpecialBlockTest.cs b/src/Compiler/Microsoft.AspNetCore.Razor.Language/legacyTest/Legacy/CSharpSpecialBlockTest.cs
index 9e7b33aa5ac..8d8b36127bf 100644
--- a/src/Compiler/Microsoft.AspNetCore.Razor.Language/legacyTest/Legacy/CSharpSpecialBlockTest.cs
+++ b/src/Compiler/Microsoft.AspNetCore.Razor.Language/legacyTest/Legacy/CSharpSpecialBlockTest.cs
@@ -8,7 +8,7 @@
namespace Microsoft.AspNetCore.Razor.Language.Legacy;
-public class CSharpSpecialBlockTest() : ParserTestBase(layer: TestProject.Layer.Compiler, validateSpanEditHandlers: true)
+public class CSharpSpecialBlockTest() : ParserTestBase(layer: TestProject.Layer.Compiler, validateSpanEditHandlers: true, useLegacyTokenizer: true)
{
[Fact]
public void NonKeywordStatementInCodeBlockIsHandledCorrectly()
diff --git a/src/Compiler/Microsoft.AspNetCore.Razor.Language/legacyTest/Legacy/CSharpStatementTest.cs b/src/Compiler/Microsoft.AspNetCore.Razor.Language/legacyTest/Legacy/CSharpStatementTest.cs
index 7e324bb19e6..b6cc6096228 100644
--- a/src/Compiler/Microsoft.AspNetCore.Razor.Language/legacyTest/Legacy/CSharpStatementTest.cs
+++ b/src/Compiler/Microsoft.AspNetCore.Razor.Language/legacyTest/Legacy/CSharpStatementTest.cs
@@ -16,7 +16,7 @@ namespace Microsoft.AspNetCore.Razor.Language.Legacy;
// * Tests for various types of nested statements
// * Comment tests
-public class CSharpStatementTest() : ParserTestBase(layer: TestProject.Layer.Compiler, validateSpanEditHandlers: true)
+public class CSharpStatementTest() : ParserTestBase(layer: TestProject.Layer.Compiler, validateSpanEditHandlers: true, useLegacyTokenizer: true)
{
[Fact]
public void ForStatement()
diff --git a/src/Compiler/Microsoft.AspNetCore.Razor.Language/legacyTest/Legacy/CSharpTemplateTest.cs b/src/Compiler/Microsoft.AspNetCore.Razor.Language/legacyTest/Legacy/CSharpTemplateTest.cs
index 51a49891627..75d999bc96b 100644
--- a/src/Compiler/Microsoft.AspNetCore.Razor.Language/legacyTest/Legacy/CSharpTemplateTest.cs
+++ b/src/Compiler/Microsoft.AspNetCore.Razor.Language/legacyTest/Legacy/CSharpTemplateTest.cs
@@ -8,7 +8,7 @@
namespace Microsoft.AspNetCore.Razor.Language.Legacy;
-public class CSharpTemplateTest() : ParserTestBase(layer: TestProject.Layer.Compiler, validateSpanEditHandlers: true)
+public class CSharpTemplateTest() : ParserTestBase(layer: TestProject.Layer.Compiler, validateSpanEditHandlers: true, useLegacyTokenizer: true)
{
[Fact]
public void HandlesSingleLineTemplate()
diff --git a/src/Compiler/Microsoft.AspNetCore.Razor.Language/legacyTest/Legacy/CSharpToMarkupSwitchTest.cs b/src/Compiler/Microsoft.AspNetCore.Razor.Language/legacyTest/Legacy/CSharpToMarkupSwitchTest.cs
index 5ca8a54f47d..562d5304c61 100644
--- a/src/Compiler/Microsoft.AspNetCore.Razor.Language/legacyTest/Legacy/CSharpToMarkupSwitchTest.cs
+++ b/src/Compiler/Microsoft.AspNetCore.Razor.Language/legacyTest/Legacy/CSharpToMarkupSwitchTest.cs
@@ -9,7 +9,7 @@
namespace Microsoft.AspNetCore.Razor.Language.Legacy;
-public class CSharpToMarkupSwitchTest() : ParserTestBase(layer: TestProject.Layer.Compiler, validateSpanEditHandlers: true)
+public class CSharpToMarkupSwitchTest() : ParserTestBase(layer: TestProject.Layer.Compiler, validateSpanEditHandlers: true, useLegacyTokenizer: true)
{
[Fact]
public void SingleAngleBracketDoesNotCauseSwitchIfOuterBlockIsTerminated()
diff --git a/src/Compiler/Microsoft.AspNetCore.Razor.Language/legacyTest/Legacy/CSharpTokenizerCommentTest.cs b/src/Compiler/Microsoft.AspNetCore.Razor.Language/legacyTest/Legacy/CSharpTokenizerCommentTest.cs
new file mode 100644
index 00000000000..760f272ba7d
--- /dev/null
+++ b/src/Compiler/Microsoft.AspNetCore.Razor.Language/legacyTest/Legacy/CSharpTokenizerCommentTest.cs
@@ -0,0 +1,96 @@
+// Licensed to the .NET Foundation under one or more agreements.
+// The .NET Foundation licenses this file to you under the MIT license.
+
+#nullable disable
+
+using Microsoft.AspNetCore.Razor.Language.Syntax.InternalSyntax;
+using Xunit;
+
+namespace Microsoft.AspNetCore.Razor.Language.Legacy;
+
+public class CSharpTokenizerCommentTest : CSharpTokenizerTestBase
+{
+ private new SyntaxToken IgnoreRemaining => (SyntaxToken)base.IgnoreRemaining;
+
+ [Fact]
+ public void Next_Ignores_Star_At_EOF_In_RazorComment()
+ {
+ TestTokenizer(
+ "@* Foo * Bar * Baz *",
+ SyntaxFactory.Token(SyntaxKind.RazorCommentTransition, "@"),
+ SyntaxFactory.Token(SyntaxKind.RazorCommentStar, "*"),
+ SyntaxFactory.Token(SyntaxKind.RazorCommentLiteral, " Foo * Bar * Baz *"));
+ }
+
+ [Fact]
+ public void Next_Ignores_Star_Without_Trailing_At()
+ {
+ TestTokenizer(
+ "@* Foo * Bar * Baz *@",
+ SyntaxFactory.Token(SyntaxKind.RazorCommentTransition, "@"),
+ SyntaxFactory.Token(SyntaxKind.RazorCommentStar, "*"),
+ SyntaxFactory.Token(SyntaxKind.RazorCommentLiteral, " Foo * Bar * Baz "),
+ SyntaxFactory.Token(SyntaxKind.RazorCommentStar, "*"),
+ SyntaxFactory.Token(SyntaxKind.RazorCommentTransition, "@"));
+ }
+
+ [Fact]
+ public void Next_Returns_RazorComment_Token_For_Entire_Razor_Comment()
+ {
+ TestTokenizer(
+ "@* Foo Bar Baz *@",
+ SyntaxFactory.Token(SyntaxKind.RazorCommentTransition, "@"),
+ SyntaxFactory.Token(SyntaxKind.RazorCommentStar, "*"),
+ SyntaxFactory.Token(SyntaxKind.RazorCommentLiteral, " Foo Bar Baz "),
+ SyntaxFactory.Token(SyntaxKind.RazorCommentStar, "*"),
+ SyntaxFactory.Token(SyntaxKind.RazorCommentTransition, "@"));
+ }
+
+ [Fact]
+ public void Next_Returns_Comment_Token_For_Entire_Single_Line_Comment()
+ {
+ TestTokenizer("// Foo Bar Baz", SyntaxFactory.Token(SyntaxKind.CSharpComment, "// Foo Bar Baz"));
+ }
+
+ [Fact]
+ public void Single_Line_Comment_Is_Terminated_By_Newline()
+ {
+ TestTokenizer("// Foo Bar Baz\na", SyntaxFactory.Token(SyntaxKind.CSharpComment, "// Foo Bar Baz"), IgnoreRemaining);
+ }
+
+ [Fact]
+ public void Multi_Line_Comment_In_Single_Line_Comment_Has_No_Effect()
+ {
+ TestTokenizer("// Foo/*Bar*/ Baz\na", SyntaxFactory.Token(SyntaxKind.CSharpComment, "// Foo/*Bar*/ Baz"), IgnoreRemaining);
+ }
+
+ [Fact]
+ public void Next_Returns_Comment_Token_For_Entire_Multi_Line_Comment()
+ {
+ TestTokenizer("/* Foo\nBar\nBaz */", SyntaxFactory.Token(SyntaxKind.CSharpComment, "/* Foo\nBar\nBaz */"));
+ }
+
+ [Fact]
+ public void Multi_Line_Comment_Is_Terminated_By_End_Sequence()
+ {
+ TestTokenizer("/* Foo\nBar\nBaz */a", SyntaxFactory.Token(SyntaxKind.CSharpComment, "/* Foo\nBar\nBaz */"), IgnoreRemaining);
+ }
+
+ [Fact]
+ public void Unterminated_Multi_Line_Comment_Captures_To_EOF()
+ {
+ TestTokenizer("/* Foo\nBar\nBaz", SyntaxFactory.Token(SyntaxKind.CSharpComment, "/* Foo\nBar\nBaz"), IgnoreRemaining);
+ }
+
+ [Fact]
+ public void Nested_Multi_Line_Comments_Terminated_At_First_End_Sequence()
+ {
+ TestTokenizer("/* Foo/*\nBar\nBaz*/ */", SyntaxFactory.Token(SyntaxKind.CSharpComment, "/* Foo/*\nBar\nBaz*/"), IgnoreRemaining);
+ }
+
+ [Fact]
+ public void Nested_Multi_Line_Comments_Terminated_At_Full_End_Sequence()
+ {
+ TestTokenizer("/* Foo\nBar\nBaz* */", SyntaxFactory.Token(SyntaxKind.CSharpComment, "/* Foo\nBar\nBaz* */"), IgnoreRemaining);
+ }
+}
diff --git a/src/Compiler/Microsoft.AspNetCore.Razor.Language/legacyTest/Legacy/CSharpTokenizerIdentifierTest.cs b/src/Compiler/Microsoft.AspNetCore.Razor.Language/legacyTest/Legacy/CSharpTokenizerIdentifierTest.cs
new file mode 100644
index 00000000000..33565d554a3
--- /dev/null
+++ b/src/Compiler/Microsoft.AspNetCore.Razor.Language/legacyTest/Legacy/CSharpTokenizerIdentifierTest.cs
@@ -0,0 +1,172 @@
+// Licensed to the .NET Foundation under one or more agreements.
+// The .NET Foundation licenses this file to you under the MIT license.
+
+#nullable disable
+
+using Microsoft.AspNetCore.Razor.Language.Syntax.InternalSyntax;
+using Xunit;
+
+namespace Microsoft.AspNetCore.Razor.Language.Legacy;
+
+public class CSharpTokenizerIdentifierTest : CSharpTokenizerTestBase
+{
+ [Fact]
+ public void Simple_Identifier_Is_Recognized()
+ {
+ TestTokenizer("foo", SyntaxFactory.Token(SyntaxKind.Identifier, "foo"));
+ }
+
+ [Fact]
+ public void Identifier_Starting_With_Underscore_Is_Recognized()
+ {
+ TestTokenizer("_foo", SyntaxFactory.Token(SyntaxKind.Identifier, "_foo"));
+ }
+
+ [Fact]
+ public void Identifier_Can_Contain_Digits()
+ {
+ TestTokenizer("foo4", SyntaxFactory.Token(SyntaxKind.Identifier, "foo4"));
+ }
+
+ [Fact]
+ public void Identifier_Can_Start_With_Titlecase_Letter()
+ {
+ TestTokenizer("ῼfoo", SyntaxFactory.Token(SyntaxKind.Identifier, "ῼfoo"));
+ }
+
+ [Fact]
+ public void Identifier_Can_Start_With_Letter_Modifier()
+ {
+ TestTokenizer("ᵊfoo", SyntaxFactory.Token(SyntaxKind.Identifier, "ᵊfoo"));
+ }
+
+ [Fact]
+ public void Identifier_Can_Start_With_Other_Letter()
+ {
+ TestTokenizer("ƻfoo", SyntaxFactory.Token(SyntaxKind.Identifier, "ƻfoo"));
+ }
+
+ [Fact]
+ public void Identifier_Can_Start_With_Number_Letter()
+ {
+ TestTokenizer("Ⅽool", SyntaxFactory.Token(SyntaxKind.Identifier, "Ⅽool"));
+ }
+
+ [Fact]
+ public void Identifier_Can_Contain_Non_Spacing_Mark()
+ {
+ TestTokenizer("foo\u0300", SyntaxFactory.Token(SyntaxKind.Identifier, "foo\u0300"));
+ }
+
+ [Fact]
+ public void Identifier_Can_Contain_Spacing_Combining_Mark()
+ {
+ TestTokenizer("fooः", SyntaxFactory.Token(SyntaxKind.Identifier, "fooः"));
+ }
+
+ [Fact]
+ public void Identifier_Can_Contain_Non_English_Digit()
+ {
+ TestTokenizer("foo١", SyntaxFactory.Token(SyntaxKind.Identifier, "foo١"));
+ }
+
+ [Fact]
+ public void Identifier_Can_Contain_Connector_Punctuation()
+ {
+ TestTokenizer("foo‿bar", SyntaxFactory.Token(SyntaxKind.Identifier, "foo‿bar"));
+ }
+
+ [Fact]
+ public void Identifier_Can_Contain_Format_Character()
+ {
+ TestTokenizer("foobar", SyntaxFactory.Token(SyntaxKind.Identifier, "foobar"));
+ }
+
+ [Fact]
+ public void Keywords_Are_Recognized_As_Keyword_Tokens()
+ {
+ TestKeyword("abstract");
+ TestKeyword("byte");
+ TestKeyword("class");
+ TestKeyword("delegate");
+ TestKeyword("event");
+ TestKeyword("fixed");
+ TestKeyword("if");
+ TestKeyword("internal");
+ TestKeyword("new");
+ TestKeyword("override");
+ TestKeyword("readonly");
+ TestKeyword("short");
+ TestKeyword("struct");
+ TestKeyword("try");
+ TestKeyword("unsafe");
+ TestKeyword("volatile");
+ TestKeyword("as");
+ TestKeyword("do");
+ TestKeyword("is");
+ TestKeyword("params");
+ TestKeyword("ref");
+ TestKeyword("switch");
+ TestKeyword("ushort");
+ TestKeyword("while");
+ TestKeyword("case");
+ TestKeyword("const");
+ TestKeyword("explicit");
+ TestKeyword("float");
+ TestKeyword("null");
+ TestKeyword("sizeof");
+ TestKeyword("typeof");
+ TestKeyword("implicit");
+ TestKeyword("private");
+ TestKeyword("this");
+ TestKeyword("using");
+ TestKeyword("extern");
+ TestKeyword("return");
+ TestKeyword("stackalloc");
+ TestKeyword("uint");
+ TestKeyword("base");
+ TestKeyword("catch");
+ TestKeyword("continue");
+ TestKeyword("double");
+ TestKeyword("for");
+ TestKeyword("in");
+ TestKeyword("lock");
+ TestKeyword("object");
+ TestKeyword("protected");
+ TestKeyword("static");
+ TestKeyword("false");
+ TestKeyword("public");
+ TestKeyword("sbyte");
+ TestKeyword("throw");
+ TestKeyword("virtual");
+ TestKeyword("decimal");
+ TestKeyword("else");
+ TestKeyword("operator");
+ TestKeyword("string");
+ TestKeyword("ulong");
+ TestKeyword("bool");
+ TestKeyword("char");
+ TestKeyword("default");
+ TestKeyword("foreach");
+ TestKeyword("long");
+ TestKeyword("void");
+ TestKeyword("enum");
+ TestKeyword("finally");
+ TestKeyword("int");
+ TestKeyword("out");
+ TestKeyword("sealed");
+ TestKeyword("true");
+ TestKeyword("goto");
+ TestKeyword("unchecked");
+ TestKeyword("interface");
+ TestKeyword("break");
+ TestKeyword("checked");
+ TestKeyword("namespace");
+ TestKeyword("when");
+ }
+
+ private void TestKeyword(string keyword)
+ {
+ TestTokenizer(keyword, SyntaxFactory.Token(SyntaxKind.Keyword, keyword));
+ }
+}
diff --git a/src/Compiler/Microsoft.AspNetCore.Razor.Language/legacyTest/Legacy/CSharpTokenizerLiteralTest.cs b/src/Compiler/Microsoft.AspNetCore.Razor.Language/legacyTest/Legacy/CSharpTokenizerLiteralTest.cs
new file mode 100644
index 00000000000..a096d208049
--- /dev/null
+++ b/src/Compiler/Microsoft.AspNetCore.Razor.Language/legacyTest/Legacy/CSharpTokenizerLiteralTest.cs
@@ -0,0 +1,289 @@
+// Licensed to the .NET Foundation under one or more agreements.
+// The .NET Foundation licenses this file to you under the MIT license.
+
+#nullable disable
+
+using System;
+using Microsoft.AspNetCore.Razor.Language.Syntax.InternalSyntax;
+using Xunit;
+
+namespace Microsoft.AspNetCore.Razor.Language.Legacy;
+
+public class CSharpTokenizerLiteralTest : CSharpTokenizerTestBase
+{
+ private new SyntaxToken IgnoreRemaining => (SyntaxToken)base.IgnoreRemaining;
+
+ [Fact]
+ public void Simple_Integer_Literal_Is_Recognized()
+ {
+ TestSingleToken("01189998819991197253", SyntaxKind.IntegerLiteral);
+ }
+
+ [Fact]
+ public void Integer_Type_Suffix_Is_Recognized()
+ {
+ TestSingleToken("42U", SyntaxKind.IntegerLiteral);
+ TestSingleToken("42u", SyntaxKind.IntegerLiteral);
+
+ TestSingleToken("42L", SyntaxKind.IntegerLiteral);
+ TestSingleToken("42l", SyntaxKind.IntegerLiteral);
+
+ TestSingleToken("42UL", SyntaxKind.IntegerLiteral);
+ TestSingleToken("42Ul", SyntaxKind.IntegerLiteral);
+
+ TestSingleToken("42uL", SyntaxKind.IntegerLiteral);
+ TestSingleToken("42ul", SyntaxKind.IntegerLiteral);
+
+ TestSingleToken("42LU", SyntaxKind.IntegerLiteral);
+ TestSingleToken("42Lu", SyntaxKind.IntegerLiteral);
+
+ TestSingleToken("42lU", SyntaxKind.IntegerLiteral);
+ TestSingleToken("42lu", SyntaxKind.IntegerLiteral);
+ }
+
+ [Fact]
+ public void Trailing_Letter_Is_Not_Part_Of_Integer_Literal_If_Not_Type_Sufix()
+ {
+ TestTokenizer("42a", SyntaxFactory.Token(SyntaxKind.IntegerLiteral, "42"), IgnoreRemaining);
+ }
+
+ [Fact]
+ public void Simple_Hex_Literal_Is_Recognized()
+ {
+ TestSingleToken("0x0123456789ABCDEF", SyntaxKind.IntegerLiteral);
+ }
+
+ [Fact]
+ public void Integer_Type_Suffix_Is_Recognized_In_Hex_Literal()
+ {
+ TestSingleToken("0xDEADBEEFU", SyntaxKind.IntegerLiteral);
+ TestSingleToken("0xDEADBEEFu", SyntaxKind.IntegerLiteral);
+
+ TestSingleToken("0xDEADBEEFL", SyntaxKind.IntegerLiteral);
+ TestSingleToken("0xDEADBEEFl", SyntaxKind.IntegerLiteral);
+
+ TestSingleToken("0xDEADBEEFUL", SyntaxKind.IntegerLiteral);
+ TestSingleToken("0xDEADBEEFUl", SyntaxKind.IntegerLiteral);
+
+ TestSingleToken("0xDEADBEEFuL", SyntaxKind.IntegerLiteral);
+ TestSingleToken("0xDEADBEEFul", SyntaxKind.IntegerLiteral);
+
+ TestSingleToken("0xDEADBEEFLU", SyntaxKind.IntegerLiteral);
+ TestSingleToken("0xDEADBEEFLu", SyntaxKind.IntegerLiteral);
+
+ TestSingleToken("0xDEADBEEFlU", SyntaxKind.IntegerLiteral);
+ TestSingleToken("0xDEADBEEFlu", SyntaxKind.IntegerLiteral);
+ }
+
+ [Fact]
+ public void Trailing_Letter_Is_Not_Part_Of_Hex_Literal_If_Not_Type_Sufix()
+ {
+ TestTokenizer("0xDEADBEEFz", SyntaxFactory.Token(SyntaxKind.IntegerLiteral, "0xDEADBEEF"), IgnoreRemaining);
+ }
+
+ [Fact]
+ public void Dot_Followed_By_Non_Digit_Is_Not_Part_Of_Real_Literal()
+ {
+ TestTokenizer("3.a", SyntaxFactory.Token(SyntaxKind.IntegerLiteral, "3"), IgnoreRemaining);
+ }
+
+ [Fact]
+ public void Simple_Real_Literal_Is_Recognized()
+ {
+ TestTokenizer("3.14159", SyntaxFactory.Token(SyntaxKind.RealLiteral, "3.14159"));
+ }
+
+ [Fact]
+ public void Real_Literal_Between_Zero_And_One_Is_Recognized()
+ {
+ TestTokenizer(".14159", SyntaxFactory.Token(SyntaxKind.RealLiteral, ".14159"));
+ }
+
+ [Fact]
+ public void Integer_With_Real_Type_Suffix_Is_Recognized()
+ {
+ TestSingleToken("42F", SyntaxKind.RealLiteral);
+ TestSingleToken("42f", SyntaxKind.RealLiteral);
+ TestSingleToken("42D", SyntaxKind.RealLiteral);
+ TestSingleToken("42d", SyntaxKind.RealLiteral);
+ TestSingleToken("42M", SyntaxKind.RealLiteral);
+ TestSingleToken("42m", SyntaxKind.RealLiteral);
+ }
+
+ [Fact]
+ public void Integer_With_Exponent_Is_Recognized()
+ {
+ TestSingleToken("1e10", SyntaxKind.RealLiteral);
+ TestSingleToken("1E10", SyntaxKind.RealLiteral);
+ TestSingleToken("1e+10", SyntaxKind.RealLiteral);
+ TestSingleToken("1E+10", SyntaxKind.RealLiteral);
+ TestSingleToken("1e-10", SyntaxKind.RealLiteral);
+ TestSingleToken("1E-10", SyntaxKind.RealLiteral);
+ }
+
+ [Fact]
+ public void Real_Number_With_Type_Suffix_Is_Recognized()
+ {
+ TestSingleToken("3.14F", SyntaxKind.RealLiteral);
+ TestSingleToken("3.14f", SyntaxKind.RealLiteral);
+ TestSingleToken("3.14D", SyntaxKind.RealLiteral);
+ TestSingleToken("3.14d", SyntaxKind.RealLiteral);
+ TestSingleToken("3.14M", SyntaxKind.RealLiteral);
+ TestSingleToken("3.14m", SyntaxKind.RealLiteral);
+ }
+
+ [Fact]
+ public void Real_Number_With_Exponent_Is_Recognized()
+ {
+ TestSingleToken("3.14E10", SyntaxKind.RealLiteral);
+ TestSingleToken("3.14e10", SyntaxKind.RealLiteral);
+ TestSingleToken("3.14E+10", SyntaxKind.RealLiteral);
+ TestSingleToken("3.14e+10", SyntaxKind.RealLiteral);
+ TestSingleToken("3.14E-10", SyntaxKind.RealLiteral);
+ TestSingleToken("3.14e-10", SyntaxKind.RealLiteral);
+ }
+
+ [Fact]
+ public void Real_Number_With_Exponent_And_Type_Suffix_Is_Recognized()
+ {
+ TestSingleToken("3.14E+10F", SyntaxKind.RealLiteral);
+ }
+
+ [Fact]
+ public void Single_Character_Literal_Is_Recognized()
+ {
+ TestSingleToken("'f'", SyntaxKind.CharacterLiteral);
+ }
+
+ [Fact]
+ public void Multi_Character_Literal_Is_Recognized()
+ {
+ TestSingleToken("'foo'", SyntaxKind.CharacterLiteral);
+ }
+
+ [Fact]
+ public void Character_Literal_Is_Terminated_By_EOF_If_Unterminated()
+ {
+ TestSingleToken("'foo bar", SyntaxKind.CharacterLiteral);
+ }
+
+ [Fact]
+ public void Character_Literal_Not_Terminated_By_Escaped_Quote()
+ {
+ TestSingleToken("'foo\\'bar'", SyntaxKind.CharacterLiteral);
+ }
+
+ [Fact]
+ public void Character_Literal_Is_Terminated_By_EOL_If_Unterminated()
+ {
+ TestTokenizer("'foo\n", SyntaxFactory.Token(SyntaxKind.CharacterLiteral, "'foo"), IgnoreRemaining);
+ }
+
+ [Fact]
+ public void Character_Literal_Terminated_By_EOL_Even_When_Last_Char_Is_Slash()
+ {
+ TestTokenizer("'foo\\\n", SyntaxFactory.Token(SyntaxKind.CharacterLiteral, "'foo\\"), IgnoreRemaining);
+ }
+
+ [Fact]
+ public void Character_Literal_Terminated_By_EOL_Even_When_Last_Char_Is_Slash_And_Followed_By_Stuff()
+ {
+ TestTokenizer("'foo\\\nflarg", SyntaxFactory.Token(SyntaxKind.CharacterLiteral, "'foo\\"), IgnoreRemaining);
+ }
+
+ [Fact]
+ public void Character_Literal_Terminated_By_CRLF_Even_When_Last_Char_Is_Slash()
+ {
+ TestTokenizer("'foo\\\r\n", SyntaxFactory.Token(SyntaxKind.CharacterLiteral, "'foo\\"), IgnoreRemaining);
+ }
+
+ [Fact]
+ public void Character_Literal_Terminated_By_CRLF_Even_When_Last_Char_Is_Slash_And_Followed_By_Stuff()
+ {
+ TestTokenizer($"'foo\\\r\nflarg", SyntaxFactory.Token(SyntaxKind.CharacterLiteral, "'foo\\"), IgnoreRemaining);
+ }
+
+ [Fact]
+ public void Character_Literal_Allows_Escaped_Escape()
+ {
+ TestTokenizer("'foo\\\\'blah", SyntaxFactory.Token(SyntaxKind.CharacterLiteral, "'foo\\\\'"), IgnoreRemaining);
+ }
+
+ [Fact]
+ public void String_Literal_Is_Recognized()
+ {
+ TestSingleToken("\"foo\"", SyntaxKind.StringLiteral);
+ }
+
+ [Fact]
+ public void String_Literal_Is_Terminated_By_EOF_If_Unterminated()
+ {
+ TestSingleToken("\"foo bar", SyntaxKind.StringLiteral);
+ }
+
+ [Fact]
+ public void String_Literal_Not_Terminated_By_Escaped_Quote()
+ {
+ TestSingleToken("\"foo\\\"bar\"", SyntaxKind.StringLiteral);
+ }
+
+ [Fact]
+ public void String_Literal_Is_Terminated_By_EOL_If_Unterminated()
+ {
+ TestTokenizer("\"foo\n", SyntaxFactory.Token(SyntaxKind.StringLiteral, "\"foo"), IgnoreRemaining);
+ }
+
+ [Fact]
+ public void String_Literal_Terminated_By_EOL_Even_When_Last_Char_Is_Slash()
+ {
+ TestTokenizer("\"foo\\\n", SyntaxFactory.Token(SyntaxKind.StringLiteral, "\"foo\\"), IgnoreRemaining);
+ }
+
+ [Fact]
+ public void String_Literal_Terminated_By_EOL_Even_When_Last_Char_Is_Slash_And_Followed_By_Stuff()
+ {
+ TestTokenizer("\"foo\\\nflarg", SyntaxFactory.Token(SyntaxKind.StringLiteral, "\"foo\\"), IgnoreRemaining);
+ }
+
+ [Fact]
+ public void String_Literal_Terminated_By_CRLF_Even_When_Last_Char_Is_Slash()
+ {
+ TestTokenizer("\"foo\\\r\n", SyntaxFactory.Token(SyntaxKind.StringLiteral, "\"foo\\"), IgnoreRemaining);
+ }
+
+ [Fact]
+ public void String_Literal_Terminated_By_CRLF_Even_When_Last_Char_Is_Slash_And_Followed_By_Stuff()
+ {
+ TestTokenizer($"\"foo\\\r\nflarg", SyntaxFactory.Token(SyntaxKind.StringLiteral, "\"foo\\"), IgnoreRemaining);
+ }
+
+ [Fact]
+ public void String_Literal_Allows_Escaped_Escape()
+ {
+ TestTokenizer("\"foo\\\\\"blah", SyntaxFactory.Token(SyntaxKind.StringLiteral, "\"foo\\\\\""), IgnoreRemaining);
+ }
+
+ [Fact]
+ public void Verbatim_String_Literal_Can_Contain_Newlines()
+ {
+ TestSingleToken("@\"foo\nbar\nbaz\"", SyntaxKind.StringLiteral);
+ }
+
+ [Fact]
+ public void Verbatim_String_Literal_Not_Terminated_By_Escaped_Double_Quote()
+ {
+ TestSingleToken("@\"foo\"\"bar\"", SyntaxKind.StringLiteral);
+ }
+
+ [Fact]
+ public void Verbatim_String_Literal_Is_Terminated_By_Slash_Double_Quote()
+ {
+ TestTokenizer("@\"foo\\\"bar\"", SyntaxFactory.Token(SyntaxKind.StringLiteral, "@\"foo\\\""), IgnoreRemaining);
+ }
+
+ [Fact]
+ public void Verbatim_String_Literal_Is_Terminated_By_EOF()
+ {
+ TestSingleToken("@\"foo", SyntaxKind.StringLiteral);
+ }
+}
diff --git a/src/Compiler/Microsoft.AspNetCore.Razor.Language/legacyTest/Legacy/CSharpTokenizerOperatorsTest.cs b/src/Compiler/Microsoft.AspNetCore.Razor.Language/legacyTest/Legacy/CSharpTokenizerOperatorsTest.cs
new file mode 100644
index 00000000000..8582c3b3da8
--- /dev/null
+++ b/src/Compiler/Microsoft.AspNetCore.Razor.Language/legacyTest/Legacy/CSharpTokenizerOperatorsTest.cs
@@ -0,0 +1,298 @@
+// Licensed to the .NET Foundation under one or more agreements.
+// The .NET Foundation licenses this file to you under the MIT license.
+
+#nullable disable
+
+using Microsoft.AspNetCore.Razor.Language.Syntax.InternalSyntax;
+using Xunit;
+
+namespace Microsoft.AspNetCore.Razor.Language.Legacy;
+
+public class CSharpTokenizerOperatorsTest : CSharpTokenizerTestBase
+{
+ [Fact]
+ public void LeftBrace_Is_Recognized()
+ {
+ TestSingleToken("{", SyntaxKind.LeftBrace);
+ }
+
+ [Fact]
+ public void Plus_Is_Recognized()
+ {
+ TestSingleToken("+", SyntaxKind.Plus);
+ }
+
+ [Fact]
+ public void Assign_Is_Recognized()
+ {
+ TestSingleToken("=", SyntaxKind.Assign);
+ }
+
+ [Fact]
+ public void Arrow_Is_Recognized()
+ {
+ TestSingleToken("->", SyntaxKind.Arrow);
+ }
+
+ [Fact]
+ public void AndAssign_Is_Recognized()
+ {
+ TestSingleToken("&=", SyntaxKind.AndAssign);
+ }
+
+ [Fact]
+ public void RightBrace_Is_Recognized()
+ {
+ TestSingleToken("}", SyntaxKind.RightBrace);
+ }
+
+ [Fact]
+ public void Minus_Is_Recognized()
+ {
+ TestSingleToken("-", SyntaxKind.Minus);
+ }
+
+ [Fact]
+ public void LessThan_Is_Recognized()
+ {
+ TestSingleToken("<", SyntaxKind.LessThan);
+ }
+
+ [Fact]
+ public void Equals_Is_Recognized()
+ {
+ TestSingleToken("==", SyntaxKind.Equals);
+ }
+
+ [Fact]
+ public void OrAssign_Is_Recognized()
+ {
+ TestSingleToken("|=", SyntaxKind.OrAssign);
+ }
+
+ [Fact]
+ public void LeftBracket_Is_Recognized()
+ {
+ TestSingleToken("[", SyntaxKind.LeftBracket);
+ }
+
+ [Fact]
+ public void Star_Is_Recognized()
+ {
+ TestSingleToken("*", SyntaxKind.Star);
+ }
+
+ [Fact]
+ public void GreaterThan_Is_Recognized()
+ {
+ TestSingleToken(">", SyntaxKind.GreaterThan);
+ }
+
+ [Fact]
+ public void NotEqual_Is_Recognized()
+ {
+ TestSingleToken("!=", SyntaxKind.NotEqual);
+ }
+
+ [Fact]
+ public void XorAssign_Is_Recognized()
+ {
+ TestSingleToken("^=", SyntaxKind.XorAssign);
+ }
+
+ [Fact]
+ public void RightBracket_Is_Recognized()
+ {
+ TestSingleToken("]", SyntaxKind.RightBracket);
+ }
+
+ [Fact]
+ public void Slash_Is_Recognized()
+ {
+ TestSingleToken("/", SyntaxKind.Slash);
+ }
+
+ [Fact]
+ public void QuestionMark_Is_Recognized()
+ {
+ TestSingleToken("?", SyntaxKind.QuestionMark);
+ }
+
+ [Fact]
+ public void LessThanEqual_Is_Recognized()
+ {
+ TestSingleToken("<=", SyntaxKind.LessThanEqual);
+ }
+
+ [Fact]
+ public void LeftShift_Is_Not_Specially_Recognized()
+ {
+ TestTokenizer("<<",
+ SyntaxFactory.Token(SyntaxKind.LessThan, "<"),
+ SyntaxFactory.Token(SyntaxKind.LessThan, "<"));
+ }
+
+ [Fact]
+ public void LeftParen_Is_Recognized()
+ {
+ TestSingleToken("(", SyntaxKind.LeftParenthesis);
+ }
+
+ [Fact]
+ public void Modulo_Is_Recognized()
+ {
+ TestSingleToken("%", SyntaxKind.Modulo);
+ }
+
+ [Fact]
+ public void NullCoalesce_Is_Recognized()
+ {
+ TestSingleToken("??", SyntaxKind.NullCoalesce);
+ }
+
+ [Fact]
+ public void GreaterThanEqual_Is_Recognized()
+ {
+ TestSingleToken(">=", SyntaxKind.GreaterThanEqual);
+ }
+
+ [Fact]
+ public void EqualGreaterThan_Is_Recognized()
+ {
+ TestSingleToken("=>", SyntaxKind.GreaterThanEqual);
+ }
+
+ [Fact]
+ public void RightParen_Is_Recognized()
+ {
+ TestSingleToken(")", SyntaxKind.RightParenthesis);
+ }
+
+ [Fact]
+ public void And_Is_Recognized()
+ {
+ TestSingleToken("&", SyntaxKind.And);
+ }
+
+ [Fact]
+ public void DoubleColon_Is_Recognized()
+ {
+ TestSingleToken("::", SyntaxKind.DoubleColon);
+ }
+
+ [Fact]
+ public void PlusAssign_Is_Recognized()
+ {
+ TestSingleToken("+=", SyntaxKind.PlusAssign);
+ }
+
+ [Fact]
+ public void Semicolon_Is_Recognized()
+ {
+ TestSingleToken(";", SyntaxKind.Semicolon);
+ }
+
+ [Fact]
+ public void Tilde_Is_Recognized()
+ {
+ TestSingleToken("~", SyntaxKind.Tilde);
+ }
+
+ [Fact]
+ public void DoubleOr_Is_Recognized()
+ {
+ TestSingleToken("||", SyntaxKind.DoubleOr);
+ }
+
+ [Fact]
+ public void ModuloAssign_Is_Recognized()
+ {
+ TestSingleToken("%=", SyntaxKind.ModuloAssign);
+ }
+
+ [Fact]
+ public void Colon_Is_Recognized()
+ {
+ TestSingleToken(":", SyntaxKind.Colon);
+ }
+
+ [Fact]
+ public void Not_Is_Recognized()
+ {
+ TestSingleToken("!", SyntaxKind.Not);
+ }
+
+ [Fact]
+ public void DoubleAnd_Is_Recognized()
+ {
+ TestSingleToken("&&", SyntaxKind.DoubleAnd);
+ }
+
+ [Fact]
+ public void DivideAssign_Is_Recognized()
+ {
+ TestSingleToken("/=", SyntaxKind.DivideAssign);
+ }
+
+ [Fact]
+ public void Comma_Is_Recognized()
+ {
+ TestSingleToken(",", SyntaxKind.Comma);
+ }
+
+ [Fact]
+ public void Xor_Is_Recognized()
+ {
+ TestSingleToken("^", SyntaxKind.Xor);
+ }
+
+ [Fact]
+ public void Decrement_Is_Recognized()
+ {
+ TestSingleToken("--", SyntaxKind.Decrement);
+ }
+
+ [Fact]
+ public void MultiplyAssign_Is_Recognized()
+ {
+ TestSingleToken("*=", SyntaxKind.MultiplyAssign);
+ }
+
+ [Fact]
+ public void Dot_Is_Recognized()
+ {
+ TestSingleToken(".", SyntaxKind.Dot);
+ }
+
+ [Fact]
+ public void Or_Is_Recognized()
+ {
+ TestSingleToken("|", SyntaxKind.Or);
+ }
+
+ [Fact]
+ public void Increment_Is_Recognized()
+ {
+ TestSingleToken("++", SyntaxKind.Increment);
+ }
+
+ [Fact]
+ public void MinusAssign_Is_Recognized()
+ {
+ TestSingleToken("-=", SyntaxKind.MinusAssign);
+ }
+
+ [Fact]
+ public void RightShift_Is_Not_Specially_Recognized()
+ {
+ TestTokenizer(">>",
+ SyntaxFactory.Token(SyntaxKind.GreaterThan, ">"),
+ SyntaxFactory.Token(SyntaxKind.GreaterThan, ">"));
+ }
+
+ [Fact]
+ public void Hash_Is_Recognized()
+ {
+ TestSingleToken("#", SyntaxKind.Hash);
+ }
+}
diff --git a/src/Compiler/Microsoft.AspNetCore.Razor.Language/legacyTest/Legacy/CSharpTokenizerTest.cs b/src/Compiler/Microsoft.AspNetCore.Razor.Language/legacyTest/Legacy/CSharpTokenizerTest.cs
new file mode 100644
index 00000000000..96b675eee3c
--- /dev/null
+++ b/src/Compiler/Microsoft.AspNetCore.Razor.Language/legacyTest/Legacy/CSharpTokenizerTest.cs
@@ -0,0 +1,108 @@
+// Licensed to the .NET Foundation under one or more agreements.
+// The .NET Foundation licenses this file to you under the MIT license.
+
+#nullable disable
+
+using Microsoft.AspNetCore.Razor.Language.Syntax.InternalSyntax;
+using Xunit;
+
+namespace Microsoft.AspNetCore.Razor.Language.Legacy;
+
+public class CSharpTokenizerTest : CSharpTokenizerTestBase
+{
+ private new SyntaxToken IgnoreRemaining => (SyntaxToken)base.IgnoreRemaining;
+
+ [Fact]
+ public void Next_Returns_Null_When_EOF_Reached()
+ {
+ TestTokenizer("");
+ }
+
+ [Fact]
+ public void Next_Returns_Newline_Token_For_Single_CR()
+ {
+ TestTokenizer(
+ "\r\ra",
+ SyntaxFactory.Token(SyntaxKind.NewLine, "\r"),
+ SyntaxFactory.Token(SyntaxKind.NewLine, "\r"),
+ IgnoreRemaining);
+ }
+
+ [Fact]
+ public void Next_Returns_Newline_Token_For_Single_LF()
+ {
+ TestTokenizer(
+ "\n\na",
+ SyntaxFactory.Token(SyntaxKind.NewLine, "\n"),
+ SyntaxFactory.Token(SyntaxKind.NewLine, "\n"),
+ IgnoreRemaining);
+ }
+
+ [Fact]
+ public void Next_Returns_Newline_Token_For_Single_NEL()
+ {
+ // NEL: Unicode "Next Line" U+0085
+ TestTokenizer(
+ "\u0085\u0085a",
+ SyntaxFactory.Token(SyntaxKind.NewLine, "\u0085"),
+ SyntaxFactory.Token(SyntaxKind.NewLine, "\u0085"),
+ IgnoreRemaining);
+ }
+
+ [Fact]
+ public void Next_Returns_Newline_Token_For_Single_Line_Separator()
+ {
+ // Unicode "Line Separator" U+2028
+ TestTokenizer(
+ "\u2028\u2028a",
+ SyntaxFactory.Token(SyntaxKind.NewLine, "\u2028"),
+ SyntaxFactory.Token(SyntaxKind.NewLine, "\u2028"),
+ IgnoreRemaining);
+ }
+
+ [Fact]
+ public void Next_Returns_Newline_Token_For_Single_Paragraph_Separator()
+ {
+ // Unicode "Paragraph Separator" U+2029
+ TestTokenizer(
+ "\u2029\u2029a",
+ SyntaxFactory.Token(SyntaxKind.NewLine, "\u2029"),
+ SyntaxFactory.Token(SyntaxKind.NewLine, "\u2029"),
+ IgnoreRemaining);
+ }
+
+ [Fact]
+ public void Next_Returns_Single_Newline_Token_For_CRLF()
+ {
+ TestTokenizer(
+ "\r\n\r\na",
+ SyntaxFactory.Token(SyntaxKind.NewLine, "\r\n"),
+ SyntaxFactory.Token(SyntaxKind.NewLine, "\r\n"),
+ IgnoreRemaining);
+ }
+
+ [Fact]
+ public void Next_Returns_Token_For_Whitespace_Characters()
+ {
+ TestTokenizer(
+ " \f\t\u000B \n ",
+ SyntaxFactory.Token(SyntaxKind.Whitespace, " \f\t\u000B "),
+ SyntaxFactory.Token(SyntaxKind.NewLine, "\n"),
+ SyntaxFactory.Token(SyntaxKind.Whitespace, " "));
+ }
+
+ [Fact]
+ public void Transition_Is_Recognized()
+ {
+ TestSingleToken("@", SyntaxKind.Transition);
+ }
+
+ [Fact]
+ public void Transition_Is_Recognized_As_SingleCharacter()
+ {
+ TestTokenizer(
+ "@(",
+ SyntaxFactory.Token(SyntaxKind.Transition, "@"),
+ SyntaxFactory.Token(SyntaxKind.LeftParenthesis, "("));
+ }
+}
diff --git a/src/Compiler/Microsoft.AspNetCore.Razor.Language/legacyTest/Legacy/CSharpTokenizerTestBase.cs b/src/Compiler/Microsoft.AspNetCore.Razor.Language/legacyTest/Legacy/CSharpTokenizerTestBase.cs
new file mode 100644
index 00000000000..24e4207828e
--- /dev/null
+++ b/src/Compiler/Microsoft.AspNetCore.Razor.Language/legacyTest/Legacy/CSharpTokenizerTestBase.cs
@@ -0,0 +1,28 @@
+// Licensed to the .NET Foundation under one or more agreements.
+// The .NET Foundation licenses this file to you under the MIT license.
+
+#nullable disable
+
+using Microsoft.AspNetCore.Razor.Language.Syntax.InternalSyntax;
+
+namespace Microsoft.AspNetCore.Razor.Language.Legacy;
+
+public abstract class CSharpTokenizerTestBase : TokenizerTestBase
+{
+ private static readonly SyntaxToken _ignoreRemaining = SyntaxFactory.Token(SyntaxKind.Marker, string.Empty);
+
+ internal override object IgnoreRemaining
+ {
+ get { return _ignoreRemaining; }
+ }
+
+ internal override object CreateTokenizer(SeekableTextReader source)
+ {
+ return new NativeCSharpTokenizer(source);
+ }
+
+ internal void TestSingleToken(string text, SyntaxKind expectedTokenKind)
+ {
+ TestTokenizer(text, SyntaxFactory.Token(expectedTokenKind, text));
+ }
+}
diff --git a/src/Compiler/Microsoft.AspNetCore.Razor.Language/legacyTest/Legacy/CSharpVerbatimBlockTest.cs b/src/Compiler/Microsoft.AspNetCore.Razor.Language/legacyTest/Legacy/CSharpVerbatimBlockTest.cs
index 417c629f3b7..69539f1cd39 100644
--- a/src/Compiler/Microsoft.AspNetCore.Razor.Language/legacyTest/Legacy/CSharpVerbatimBlockTest.cs
+++ b/src/Compiler/Microsoft.AspNetCore.Razor.Language/legacyTest/Legacy/CSharpVerbatimBlockTest.cs
@@ -8,7 +8,7 @@
namespace Microsoft.AspNetCore.Razor.Language.Legacy;
-public class CSharpVerbatimBlockTest() : ParserTestBase(layer: TestProject.Layer.Compiler, validateSpanEditHandlers: true)
+public class CSharpVerbatimBlockTest() : ParserTestBase(layer: TestProject.Layer.Compiler, validateSpanEditHandlers: true, useLegacyTokenizer: true)
{
[Fact]
public void VerbatimBlock()
diff --git a/src/Compiler/Microsoft.AspNetCore.Razor.Language/legacyTest/Legacy/CSharpWhitespaceHandlingTest.cs b/src/Compiler/Microsoft.AspNetCore.Razor.Language/legacyTest/Legacy/CSharpWhitespaceHandlingTest.cs
index 11c10a7a6b1..92e801c6467 100644
--- a/src/Compiler/Microsoft.AspNetCore.Razor.Language/legacyTest/Legacy/CSharpWhitespaceHandlingTest.cs
+++ b/src/Compiler/Microsoft.AspNetCore.Razor.Language/legacyTest/Legacy/CSharpWhitespaceHandlingTest.cs
@@ -8,7 +8,7 @@
namespace Microsoft.AspNetCore.Razor.Language.Legacy;
-public class CSharpWhitespaceHandlingTest() : ParserTestBase(layer: TestProject.Layer.Compiler, validateSpanEditHandlers: true)
+public class CSharpWhitespaceHandlingTest() : ParserTestBase(layer: TestProject.Layer.Compiler, validateSpanEditHandlers: true, useLegacyTokenizer: true)
{
[Fact]
public void StmtBlockDoesNotAcceptTrailingNewlineIfTheyAreSignificantToAncestor()
diff --git a/src/Compiler/Microsoft.AspNetCore.Razor.Language/legacyTest/Legacy/HtmlAttributeTest.cs b/src/Compiler/Microsoft.AspNetCore.Razor.Language/legacyTest/Legacy/HtmlAttributeTest.cs
index 5e48c72dfb6..121ac4c683f 100644
--- a/src/Compiler/Microsoft.AspNetCore.Razor.Language/legacyTest/Legacy/HtmlAttributeTest.cs
+++ b/src/Compiler/Microsoft.AspNetCore.Razor.Language/legacyTest/Legacy/HtmlAttributeTest.cs
@@ -8,7 +8,7 @@
namespace Microsoft.AspNetCore.Razor.Language.Legacy;
-public class HtmlAttributeTest() : ParserTestBase(layer: TestProject.Layer.Compiler, validateSpanEditHandlers: true)
+public class HtmlAttributeTest() : ParserTestBase(layer: TestProject.Layer.Compiler, validateSpanEditHandlers: true, useLegacyTokenizer: true)
{
[Fact]
public void SymbolBoundAttributes_BeforeEqualWhitespace1()
diff --git a/src/Compiler/Microsoft.AspNetCore.Razor.Language/legacyTest/Legacy/HtmlBlockTest.cs b/src/Compiler/Microsoft.AspNetCore.Razor.Language/legacyTest/Legacy/HtmlBlockTest.cs
index f6d0cff15b1..0986583dc18 100644
--- a/src/Compiler/Microsoft.AspNetCore.Razor.Language/legacyTest/Legacy/HtmlBlockTest.cs
+++ b/src/Compiler/Microsoft.AspNetCore.Razor.Language/legacyTest/Legacy/HtmlBlockTest.cs
@@ -8,7 +8,7 @@
namespace Microsoft.AspNetCore.Razor.Language.Legacy;
-public class HtmlBlockTest() : ParserTestBase(layer: TestProject.Layer.Compiler, validateSpanEditHandlers: true)
+public class HtmlBlockTest() : ParserTestBase(layer: TestProject.Layer.Compiler, validateSpanEditHandlers: true, useLegacyTokenizer: true)
{
[Fact]
public void HandlesUnbalancedTripleDashHTMLComments()
diff --git a/src/Compiler/Microsoft.AspNetCore.Razor.Language/legacyTest/Legacy/HtmlDocumentTest.cs b/src/Compiler/Microsoft.AspNetCore.Razor.Language/legacyTest/Legacy/HtmlDocumentTest.cs
index 16b2b477d75..8eca7bc7ee3 100644
--- a/src/Compiler/Microsoft.AspNetCore.Razor.Language/legacyTest/Legacy/HtmlDocumentTest.cs
+++ b/src/Compiler/Microsoft.AspNetCore.Razor.Language/legacyTest/Legacy/HtmlDocumentTest.cs
@@ -9,7 +9,7 @@
namespace Microsoft.AspNetCore.Razor.Language.Legacy;
-public class HtmlDocumentTest() : ParserTestBase(layer: TestProject.Layer.Compiler, validateSpanEditHandlers: true)
+public class HtmlDocumentTest() : ParserTestBase(layer: TestProject.Layer.Compiler, validateSpanEditHandlers: true, useLegacyTokenizer: true)
{
private static readonly TestFile Nested1000 = TestFile.Create("TestFiles/nested-1000.html", typeof(HtmlDocumentTest));
diff --git a/src/Compiler/Microsoft.AspNetCore.Razor.Language/legacyTest/Legacy/HtmlErrorTest.cs b/src/Compiler/Microsoft.AspNetCore.Razor.Language/legacyTest/Legacy/HtmlErrorTest.cs
index c810ff73a7b..b1a625d0217 100644
--- a/src/Compiler/Microsoft.AspNetCore.Razor.Language/legacyTest/Legacy/HtmlErrorTest.cs
+++ b/src/Compiler/Microsoft.AspNetCore.Razor.Language/legacyTest/Legacy/HtmlErrorTest.cs
@@ -7,7 +7,7 @@
namespace Microsoft.AspNetCore.Razor.Language.Legacy;
-public class HtmlErrorTest() : ParserTestBase(layer: TestProject.Layer.Compiler, validateSpanEditHandlers: true)
+public class HtmlErrorTest() : ParserTestBase(layer: TestProject.Layer.Compiler, validateSpanEditHandlers: true, useLegacyTokenizer: true)
{
[Fact]
public void AllowsInvalidTagNamesAsLongAsParserCanIdentifyEndTag()
diff --git a/src/Compiler/Microsoft.AspNetCore.Razor.Language/legacyTest/Legacy/HtmlTagsTest.cs b/src/Compiler/Microsoft.AspNetCore.Razor.Language/legacyTest/Legacy/HtmlTagsTest.cs
index e412c1daede..79260e77932 100644
--- a/src/Compiler/Microsoft.AspNetCore.Razor.Language/legacyTest/Legacy/HtmlTagsTest.cs
+++ b/src/Compiler/Microsoft.AspNetCore.Razor.Language/legacyTest/Legacy/HtmlTagsTest.cs
@@ -8,7 +8,7 @@
namespace Microsoft.AspNetCore.Razor.Language.Legacy;
-public class HtmlTagsTest() : ParserTestBase(layer: TestProject.Layer.Compiler, validateSpanEditHandlers: true)
+public class HtmlTagsTest() : ParserTestBase(layer: TestProject.Layer.Compiler, validateSpanEditHandlers: true, useLegacyTokenizer: true)
{
private static readonly string[] VoidElementNames = new[]
{
diff --git a/src/Compiler/Microsoft.AspNetCore.Razor.Language/legacyTest/Legacy/HtmlToCodeSwitchTest.cs b/src/Compiler/Microsoft.AspNetCore.Razor.Language/legacyTest/Legacy/HtmlToCodeSwitchTest.cs
index 0354b26bd0d..62b644043e1 100644
--- a/src/Compiler/Microsoft.AspNetCore.Razor.Language/legacyTest/Legacy/HtmlToCodeSwitchTest.cs
+++ b/src/Compiler/Microsoft.AspNetCore.Razor.Language/legacyTest/Legacy/HtmlToCodeSwitchTest.cs
@@ -10,7 +10,7 @@
namespace Microsoft.AspNetCore.Razor.Language.Legacy;
-public class HtmlToCodeSwitchTest() : ParserTestBase(layer: TestProject.Layer.Compiler, validateSpanEditHandlers: true)
+public class HtmlToCodeSwitchTest() : ParserTestBase(layer: TestProject.Layer.Compiler, validateSpanEditHandlers: true, useLegacyTokenizer: true)
{
[Fact]
public void SwitchesWhenCharacterBeforeSwapIsNonAlphanumeric()
diff --git a/src/Compiler/Microsoft.AspNetCore.Razor.Language/legacyTest/Legacy/ImplicitExpressionEditHandlerTest.cs b/src/Compiler/Microsoft.AspNetCore.Razor.Language/legacyTest/Legacy/ImplicitExpressionEditHandlerTest.cs
index 3e5c2feb2f0..18b1bbc547d 100644
--- a/src/Compiler/Microsoft.AspNetCore.Razor.Language/legacyTest/Legacy/ImplicitExpressionEditHandlerTest.cs
+++ b/src/Compiler/Microsoft.AspNetCore.Razor.Language/legacyTest/Legacy/ImplicitExpressionEditHandlerTest.cs
@@ -446,7 +446,7 @@ public void IsAcceptableInsertionInBalancedParenthesis_BalancedParenthesis_Retur
private static Syntax.MarkupTextLiteralSyntax GetSyntaxNode(SourceLocation start, string content)
{
var builder = SyntaxListBuilder.Create();
- var tokens = CSharpLanguageCharacteristics.Instance.TokenizeString(content).ToArray();
+ var tokens = NativeCSharpLanguageCharacteristics.Instance.TokenizeString(content).ToArray();
foreach (var token in tokens)
{
builder.Add(token);
diff --git a/src/Compiler/Microsoft.AspNetCore.Razor.Language/legacyTest/Legacy/MarkupElementGroupingTest.cs b/src/Compiler/Microsoft.AspNetCore.Razor.Language/legacyTest/Legacy/MarkupElementGroupingTest.cs
index a540864e811..54f9fcae75f 100644
--- a/src/Compiler/Microsoft.AspNetCore.Razor.Language/legacyTest/Legacy/MarkupElementGroupingTest.cs
+++ b/src/Compiler/Microsoft.AspNetCore.Razor.Language/legacyTest/Legacy/MarkupElementGroupingTest.cs
@@ -7,7 +7,7 @@
namespace Microsoft.AspNetCore.Razor.Language.Legacy;
-public class MarkupElementGroupingTest() : ParserTestBase(layer: TestProject.Layer.Compiler, validateSpanEditHandlers: true)
+public class MarkupElementGroupingTest() : ParserTestBase(layer: TestProject.Layer.Compiler, validateSpanEditHandlers: true, useLegacyTokenizer: true)
{
[Fact]
public void Handles_ValidTags()
diff --git a/src/Compiler/Microsoft.AspNetCore.Razor.Language/legacyTest/Legacy/RazorDirectivesTest.cs b/src/Compiler/Microsoft.AspNetCore.Razor.Language/legacyTest/Legacy/RazorDirectivesTest.cs
index 1584fc43772..07c6895fbc5 100644
--- a/src/Compiler/Microsoft.AspNetCore.Razor.Language/legacyTest/Legacy/RazorDirectivesTest.cs
+++ b/src/Compiler/Microsoft.AspNetCore.Razor.Language/legacyTest/Legacy/RazorDirectivesTest.cs
@@ -10,7 +10,7 @@
namespace Microsoft.AspNetCore.Razor.Language.Legacy;
-public class RazorDirectivesTest() : ParserTestBase(layer: TestProject.Layer.Compiler, validateSpanEditHandlers: true)
+public class RazorDirectivesTest() : ParserTestBase(layer: TestProject.Layer.Compiler, validateSpanEditHandlers: true, useLegacyTokenizer: true)
{
[Fact]
public void DirectiveDescriptor_FileScopedMultipleOccurring_CanHaveDuplicates()
diff --git a/src/Compiler/Microsoft.AspNetCore.Razor.Language/legacyTest/Legacy/TagHelperRewritingTestBase.cs b/src/Compiler/Microsoft.AspNetCore.Razor.Language/legacyTest/Legacy/TagHelperRewritingTestBase.cs
index f286244cf20..60e14438737 100644
--- a/src/Compiler/Microsoft.AspNetCore.Razor.Language/legacyTest/Legacy/TagHelperRewritingTestBase.cs
+++ b/src/Compiler/Microsoft.AspNetCore.Razor.Language/legacyTest/Legacy/TagHelperRewritingTestBase.cs
@@ -9,7 +9,7 @@
namespace Microsoft.AspNetCore.Razor.Language.Legacy;
-public class TagHelperRewritingTestBase() : ParserTestBase(layer: TestProject.Layer.Compiler, validateSpanEditHandlers: true)
+public class TagHelperRewritingTestBase() : ParserTestBase(layer: TestProject.Layer.Compiler, validateSpanEditHandlers: true, useLegacyTokenizer: true)
{
internal void RunParseTreeRewriterTest(string documentContent, params string[] tagNames)
{
diff --git a/src/Compiler/Microsoft.AspNetCore.Razor.Language/legacyTest/Legacy/TokenizerTestBase.cs b/src/Compiler/Microsoft.AspNetCore.Razor.Language/legacyTest/Legacy/TokenizerTestBase.cs
new file mode 100644
index 00000000000..8cb2474ea70
--- /dev/null
+++ b/src/Compiler/Microsoft.AspNetCore.Razor.Language/legacyTest/Legacy/TokenizerTestBase.cs
@@ -0,0 +1,73 @@
+// Licensed to the .NET Foundation under one or more agreements.
+// The .NET Foundation licenses this file to you under the MIT license.
+
+#nullable disable
+
+using System;
+using System.Diagnostics;
+using System.Globalization;
+using System.Text;
+using Microsoft.AspNetCore.Razor.Language.Syntax.InternalSyntax;
+using Xunit;
+
+namespace Microsoft.AspNetCore.Razor.Language.Legacy;
+
+public abstract class TokenizerTestBase
+{
+ internal abstract object IgnoreRemaining { get; }
+ internal abstract object CreateTokenizer(SeekableTextReader source);
+
+ internal void TestTokenizer(string input, params SyntaxToken[] expectedSymbols)
+ {
+ // Arrange
+ var success = true;
+ var output = new StringBuilder();
+ using (var source = new SeekableTextReader(input, filePath: null))
+ {
+ var tokenizer = (Tokenizer)CreateTokenizer(source);
+ var counter = 0;
+ SyntaxToken current = null;
+ while ((current = tokenizer.NextToken()) != null)
+ {
+ if (counter >= expectedSymbols.Length)
+ {
+ output.AppendLine(string.Format(CultureInfo.InvariantCulture, "F: Expected: << Nothing >>; Actual: {0}", current));
+ success = false;
+ }
+ else if (ReferenceEquals(expectedSymbols[counter], IgnoreRemaining))
+ {
+ output.AppendLine(string.Format(CultureInfo.InvariantCulture, "P: Ignored |{0}|", current));
+ }
+ else
+ {
+ if (!expectedSymbols[counter].IsEquivalentTo(current))
+ {
+ output.AppendLine(string.Format(CultureInfo.InvariantCulture, "F: Expected: {0}; Actual: {1}", expectedSymbols[counter], current));
+ success = false;
+ }
+ else
+ {
+ output.AppendLine(string.Format(CultureInfo.InvariantCulture, "P: Expected: {0}", expectedSymbols[counter]));
+ }
+ counter++;
+ }
+ }
+ if (counter < expectedSymbols.Length && !ReferenceEquals(expectedSymbols[counter], IgnoreRemaining))
+ {
+ success = false;
+ for (; counter < expectedSymbols.Length; counter++)
+ {
+ output.AppendLine(string.Format(CultureInfo.InvariantCulture, "F: Expected: {0}; Actual: << None >>", expectedSymbols[counter]));
+ }
+ }
+ }
+ Assert.True(success, Environment.NewLine + output.ToString());
+ WriteTraceLine(output.Replace("{", "{{").Replace("}", "}}").ToString());
+ }
+
+ [Conditional("PARSER_TRACE")]
+ private static void WriteTraceLine(string format, params object[] args)
+ {
+ Trace.WriteLine(string.Format(CultureInfo.InvariantCulture, format, args));
+ }
+}
diff --git a/src/Compiler/Microsoft.AspNetCore.Razor.Language/legacyTest/Legacy/WhiteSpaceRewriterTest.cs b/src/Compiler/Microsoft.AspNetCore.Razor.Language/legacyTest/Legacy/WhiteSpaceRewriterTest.cs
index 18837bc17b7..b17b71c2b86 100644
--- a/src/Compiler/Microsoft.AspNetCore.Razor.Language/legacyTest/Legacy/WhiteSpaceRewriterTest.cs
+++ b/src/Compiler/Microsoft.AspNetCore.Razor.Language/legacyTest/Legacy/WhiteSpaceRewriterTest.cs
@@ -8,7 +8,7 @@
namespace Microsoft.AspNetCore.Razor.Language.Legacy;
-public class WhiteSpaceRewriterTest() : ParserTestBase(layer: TestProject.Layer.Compiler, validateSpanEditHandlers: true)
+public class WhiteSpaceRewriterTest() : ParserTestBase(layer: TestProject.Layer.Compiler, validateSpanEditHandlers: true, useLegacyTokenizer: true)
{
[Fact]
public void Moves_Whitespace_Preceeding_ExpressionBlock_To_Parent_Block()
diff --git a/src/Compiler/Microsoft.AspNetCore.Razor.Language/test/DirectiveTokenEditHandlerTest.cs b/src/Compiler/Microsoft.AspNetCore.Razor.Language/test/DirectiveTokenEditHandlerTest.cs
index 897d5b682ec..c06b2d354b3 100644
--- a/src/Compiler/Microsoft.AspNetCore.Razor.Language/test/DirectiveTokenEditHandlerTest.cs
+++ b/src/Compiler/Microsoft.AspNetCore.Razor.Language/test/DirectiveTokenEditHandlerTest.cs
@@ -65,7 +65,7 @@ private static CSharpStatementLiteralSyntax GetSyntaxNode(DirectiveTokenEditHand
{
using var _ = SyntaxListBuilderPool.GetPooledBuilder(out var builder);
- var tokens = CSharpLanguageCharacteristics.Instance.TokenizeString(content).ToArray();
+ var tokens = NativeCSharpLanguageCharacteristics.Instance.TokenizeString(content).ToArray();
foreach (var token in tokens)
{
builder.Add((SyntaxToken)token.CreateRed());
diff --git a/src/Compiler/Microsoft.AspNetCore.Razor.Language/test/Legacy/CSharpLanguageCharacteristicsTest.cs b/src/Compiler/Microsoft.AspNetCore.Razor.Language/test/Legacy/CSharpLanguageCharacteristicsTest.cs
index 08e4b0bed9d..f01565053a6 100644
--- a/src/Compiler/Microsoft.AspNetCore.Razor.Language/test/Legacy/CSharpLanguageCharacteristicsTest.cs
+++ b/src/Compiler/Microsoft.AspNetCore.Razor.Language/test/Legacy/CSharpLanguageCharacteristicsTest.cs
@@ -13,7 +13,7 @@ public class CSharpLanguageCharacteristicsTest
public void GetSample_RightShiftAssign_ReturnsCorrectToken()
{
// Arrange & Act
- var token = CSharpLanguageCharacteristics.Instance.GetSample(SyntaxKind.RightShiftAssign);
+ var token = NativeCSharpLanguageCharacteristics.Instance.GetSample(SyntaxKind.RightShiftAssign);
// Assert
Assert.Equal(">>=", token);
diff --git a/src/Compiler/Microsoft.AspNetCore.Razor.Language/test/Legacy/CSharpTokenizerTestBase.cs b/src/Compiler/Microsoft.AspNetCore.Razor.Language/test/Legacy/CSharpTokenizerTestBase.cs
index ceb4986d631..d4528c1d8d7 100644
--- a/src/Compiler/Microsoft.AspNetCore.Razor.Language/test/Legacy/CSharpTokenizerTestBase.cs
+++ b/src/Compiler/Microsoft.AspNetCore.Razor.Language/test/Legacy/CSharpTokenizerTestBase.cs
@@ -18,7 +18,7 @@ internal override object IgnoreRemaining
internal override object CreateTokenizer(SeekableTextReader source)
{
- return new CSharpTokenizer(source);
+ return new RoslynCSharpTokenizer(source);
}
internal void TestSingleToken(string text, SyntaxKind expectedTokenKind)
diff --git a/src/Compiler/Microsoft.AspNetCore.Razor.Language/test/Legacy/CodeBlockEditHandlerTest.cs b/src/Compiler/Microsoft.AspNetCore.Razor.Language/test/Legacy/CodeBlockEditHandlerTest.cs
index 8ab3a089e6a..4e9f7392d21 100644
--- a/src/Compiler/Microsoft.AspNetCore.Razor.Language/test/Legacy/CodeBlockEditHandlerTest.cs
+++ b/src/Compiler/Microsoft.AspNetCore.Razor.Language/test/Legacy/CodeBlockEditHandlerTest.cs
@@ -283,7 +283,7 @@ private static SyntaxNode GetSpan(SourceLocation start, string content)
{
using var _ = SyntaxListBuilderPool.GetPooledBuilder(out var builder);
- var tokens = CSharpLanguageCharacteristics.Instance.TokenizeString(content).ToArray();
+ var tokens = NativeCSharpLanguageCharacteristics.Instance.TokenizeString(content).ToArray();
foreach (var token in tokens)
{
builder.Add((SyntaxToken)token.CreateRed());
diff --git a/src/Compiler/Microsoft.CodeAnalysis.Razor.Compiler/src/Language/Legacy/CSharpCodeParser.cs b/src/Compiler/Microsoft.CodeAnalysis.Razor.Compiler/src/Language/Legacy/CSharpCodeParser.cs
index 851ebd29ef8..a37a09e6c21 100644
--- a/src/Compiler/Microsoft.CodeAnalysis.Razor.Compiler/src/Language/Legacy/CSharpCodeParser.cs
+++ b/src/Compiler/Microsoft.CodeAnalysis.Razor.Compiler/src/Language/Legacy/CSharpCodeParser.cs
@@ -89,7 +89,11 @@ public CSharpCodeParser(ParserContext context)
}
public CSharpCodeParser(IEnumerable directives, ParserContext context)
- : base(context.ParseLeadingDirectives ? FirstDirectiveCSharpLanguageCharacteristics.Instance : CSharpLanguageCharacteristics.Instance, context)
+ : base(context.ParseLeadingDirectives
+ ? FirstDirectiveCSharpLanguageCharacteristics.Instance
+ : context.UseRoslynTokenizer
+ ? RoslynCSharpLanguageCharacteristics.Instance
+ : NativeCSharpLanguageCharacteristics.Instance, context)
{
if (directives == null)
{
@@ -1981,7 +1985,7 @@ private bool TryParseKeyword(
ref readonly PooledArrayBuilder whitespace,
CSharpTransitionSyntax? transition)
{
- var result = CSharpTokenizer.GetTokenKeyword(CurrentToken);
+ var result = _tokenizer.Tokenizer.GetTokenKeyword(CurrentToken);
Debug.Assert(CurrentToken.Kind == SyntaxKind.Keyword && result.HasValue);
if (_keywordParserMap.TryGetValue(result!.Value, out var handler))
{
@@ -1998,7 +2002,7 @@ private bool TryParseKeyword(
private bool TryParseKeyword(in SyntaxListBuilder builder)
{
- var result = CSharpTokenizer.GetTokenKeyword(CurrentToken);
+ var result = _tokenizer.Tokenizer.GetTokenKeyword(CurrentToken);
Debug.Assert(CurrentToken.Kind == SyntaxKind.Keyword && result.HasValue);
if (_keywordParserMap.TryGetValue(result!.Value, out var handler))
{
@@ -2011,7 +2015,7 @@ private bool TryParseKeyword(in SyntaxListBuilder builder)
private bool AtBooleanLiteral()
{
- return CSharpTokenizer.GetTokenKeyword(CurrentToken) is CSharpSyntaxKind.TrueKeyword or CSharpSyntaxKind.FalseKeyword;
+ return _tokenizer.Tokenizer.GetTokenKeyword(CurrentToken) is CSharpSyntaxKind.TrueKeyword or CSharpSyntaxKind.FalseKeyword;
}
private void ParseAwaitExpression(SyntaxListBuilder builder, CSharpTransitionSyntax? transition)
@@ -2065,7 +2069,7 @@ private void ParseConditionalBlock(in SyntaxListBuilder builder
builder.Add(transition);
}
- var block = new Block(CurrentToken, CurrentStart);
+ var block = new Block(GetBlockName(CurrentToken), CurrentStart);
ParseConditionalBlock(builder, block);
if (topLevel)
{
@@ -2147,7 +2151,7 @@ private void ParseExpectedCodeBlock(in SyntaxListBuilder builde
private void ParseUnconditionalBlock(in SyntaxListBuilder builder)
{
Assert(SyntaxKind.Keyword);
- var block = new Block(CurrentToken, CurrentStart);
+ var block = new Block(GetBlockName(CurrentToken), CurrentStart);
AcceptAndMoveNext();
AcceptWhile(IsSpacingTokenIncludingNewLinesAndComments);
ParseExpectedCodeBlock(builder, block);
@@ -2162,7 +2166,7 @@ private void ParseCaseStatement(SyntaxListBuilder builder, CSha
// If it does, just accept it and let the compiler complain.
builder.Add(transition);
}
- var result = CSharpTokenizer.GetTokenKeyword(CurrentToken);
+ var result = _tokenizer.Tokenizer.GetTokenKeyword(CurrentToken);
Debug.Assert(result is CSharpSyntaxKind.CaseKeyword or CSharpSyntaxKind.DefaultKeyword);
AcceptAndMoveNext();
while (EnsureCurrent() && CurrentToken.Kind != SyntaxKind.Colon)
@@ -2205,7 +2209,7 @@ private void ParseAfterIfClause(SyntaxListBuilder builder)
if (At(CSharpSyntaxKind.ElseKeyword))
{
Accept(in whitespace);
- Assert(CSharpSyntaxKind.ElseKeyword);
+ Assert(CSharpSyntaxKind.ElseKeyword);
ParseElseClause(builder);
}
else
@@ -2223,7 +2227,7 @@ private void ParseElseClause(in SyntaxListBuilder builder)
{
return;
}
- var block = new Block(CurrentToken, CurrentStart);
+ var block = new Block(GetBlockName(CurrentToken), CurrentStart);
AcceptAndMoveNext();
AcceptWhile(IsSpacingTokenIncludingNewLinesAndComments);
@@ -2291,7 +2295,7 @@ private void ParseFilterableCatchBlock(in SyntaxListBuilder bui
{
Assert(CSharpSyntaxKind.CatchKeyword);
- var block = new Block(CurrentToken, CurrentStart);
+ var block = new Block(GetBlockName(CurrentToken), CurrentStart);
// Accept "catch"
AcceptAndMoveNext();
@@ -2367,7 +2371,7 @@ private void ParseUsingKeyword(SyntaxListBuilder builder, CShar
{
Assert(CSharpSyntaxKind.UsingKeyword);
var topLevel = transition != null;
- var block = new Block(CurrentToken, CurrentStart);
+ var block = new Block(GetBlockName(CurrentToken), CurrentStart);
var usingToken = EatCurrentToken();
using var whitespaceOrComments = new PooledArrayBuilder();
ReadWhile(IsSpacingTokenIncludingComments, ref whitespaceOrComments.AsRef());
@@ -2913,7 +2917,7 @@ private void ParseEmbeddedTransition(in SyntaxListBuilder build
[Conditional("DEBUG")]
internal void Assert(CSharpSyntaxKind expectedKeyword)
{
- var result = CSharpTokenizer.GetTokenKeyword(CurrentToken);
+ var result = _tokenizer.Tokenizer.GetTokenKeyword(CurrentToken);
Debug.Assert(CurrentToken.Kind == SyntaxKind.Keyword &&
result.HasValue &&
result.Value == expectedKeyword);
@@ -2921,12 +2925,22 @@ internal void Assert(CSharpSyntaxKind expectedKeyword)
protected internal bool At(CSharpSyntaxKind keyword)
{
- var result = CSharpTokenizer.GetTokenKeyword(CurrentToken);
+ var result = _tokenizer.Tokenizer.GetTokenKeyword(CurrentToken);
return At(SyntaxKind.Keyword) &&
result.HasValue &&
result.Value == keyword;
}
+ private string GetBlockName(SyntaxToken token)
+ {
+ var result = _tokenizer.Tokenizer.GetTokenKeyword(token);
+ if (result is not CSharpSyntaxKind.None and { } value && token.Kind == SyntaxKind.Keyword)
+ {
+ return CSharpSyntaxFacts.GetText(value);
+ }
+ return token.Content;
+ }
+
protected class Block
{
public Block(string name, SourceLocation start)
@@ -2935,23 +2949,8 @@ public Block(string name, SourceLocation start)
Start = start;
}
- public Block(SyntaxToken token, SourceLocation start)
- : this(GetName(token), start)
- {
- }
-
public string Name { get; set; }
public SourceLocation Start { get; set; }
-
- private static string GetName(SyntaxToken token)
- {
- var result = CSharpTokenizer.GetTokenKeyword(token);
- if (result is not CSharpSyntaxKind.None and { } value && token.Kind == SyntaxKind.Keyword)
- {
- return CSharpSyntaxFacts.GetText(value);
- }
- return token.Content;
- }
}
internal class ParsedDirective
diff --git a/src/Compiler/Microsoft.CodeAnalysis.Razor.Compiler/src/Language/Legacy/CSharpTokenizer.cs b/src/Compiler/Microsoft.CodeAnalysis.Razor.Compiler/src/Language/Legacy/CSharpTokenizer.cs
index 66bdabb8cef..b3bcc915005 100644
--- a/src/Compiler/Microsoft.CodeAnalysis.Razor.Compiler/src/Language/Legacy/CSharpTokenizer.cs
+++ b/src/Compiler/Microsoft.CodeAnalysis.Razor.Compiler/src/Language/Legacy/CSharpTokenizer.cs
@@ -1,789 +1,16 @@
// Licensed to the .NET Foundation under one or more agreements.
// The .NET Foundation licenses this file to you under the MIT license.
-#nullable disable
-
-using System;
-using System.Collections.Generic;
-using System.Collections.Frozen;
-using System.Diagnostics;
using Microsoft.AspNetCore.Razor.Language.Syntax.InternalSyntax;
-using Microsoft.CodeAnalysis.CSharp;
-
-using SyntaxFactory = Microsoft.AspNetCore.Razor.Language.Syntax.InternalSyntax.SyntaxFactory;
using CSharpSyntaxKind = Microsoft.CodeAnalysis.CSharp.SyntaxKind;
namespace Microsoft.AspNetCore.Razor.Language.Legacy;
-internal class CSharpTokenizer : Tokenizer
+internal abstract class CSharpTokenizer : Tokenizer
{
- private readonly Dictionary> _operatorHandlers;
-
- private static readonly FrozenDictionary _keywords = (new[] {
- CSharpSyntaxKind.AwaitKeyword,
- CSharpSyntaxKind.AbstractKeyword,
- CSharpSyntaxKind.ByteKeyword,
- CSharpSyntaxKind.ClassKeyword,
- CSharpSyntaxKind.DelegateKeyword,
- CSharpSyntaxKind.EventKeyword,
- CSharpSyntaxKind.FixedKeyword,
- CSharpSyntaxKind.IfKeyword,
- CSharpSyntaxKind.InternalKeyword,
- CSharpSyntaxKind.NewKeyword,
- CSharpSyntaxKind.OverrideKeyword,
- CSharpSyntaxKind.ReadOnlyKeyword,
- CSharpSyntaxKind.ShortKeyword,
- CSharpSyntaxKind.StructKeyword,
- CSharpSyntaxKind.TryKeyword,
- CSharpSyntaxKind.UnsafeKeyword,
- CSharpSyntaxKind.VolatileKeyword,
- CSharpSyntaxKind.AsKeyword,
- CSharpSyntaxKind.DoKeyword,
- CSharpSyntaxKind.IsKeyword,
- CSharpSyntaxKind.ParamsKeyword,
- CSharpSyntaxKind.RefKeyword,
- CSharpSyntaxKind.SwitchKeyword,
- CSharpSyntaxKind.UShortKeyword,
- CSharpSyntaxKind.WhileKeyword,
- CSharpSyntaxKind.CaseKeyword,
- CSharpSyntaxKind.ConstKeyword,
- CSharpSyntaxKind.ExplicitKeyword,
- CSharpSyntaxKind.FloatKeyword,
- CSharpSyntaxKind.NullKeyword,
- CSharpSyntaxKind.SizeOfKeyword,
- CSharpSyntaxKind.TypeOfKeyword,
- CSharpSyntaxKind.ImplicitKeyword,
- CSharpSyntaxKind.PrivateKeyword,
- CSharpSyntaxKind.ThisKeyword,
- CSharpSyntaxKind.UsingKeyword,
- CSharpSyntaxKind.ExternKeyword,
- CSharpSyntaxKind.ReturnKeyword,
- CSharpSyntaxKind.StackAllocKeyword,
- CSharpSyntaxKind.UIntKeyword,
- CSharpSyntaxKind.BaseKeyword,
- CSharpSyntaxKind.CatchKeyword,
- CSharpSyntaxKind.ContinueKeyword,
- CSharpSyntaxKind.DoubleKeyword,
- CSharpSyntaxKind.ForKeyword,
- CSharpSyntaxKind.InKeyword,
- CSharpSyntaxKind.LockKeyword,
- CSharpSyntaxKind.ObjectKeyword,
- CSharpSyntaxKind.ProtectedKeyword,
- CSharpSyntaxKind.StaticKeyword,
- CSharpSyntaxKind.FalseKeyword,
- CSharpSyntaxKind.PublicKeyword,
- CSharpSyntaxKind.SByteKeyword,
- CSharpSyntaxKind.ThrowKeyword,
- CSharpSyntaxKind.VirtualKeyword,
- CSharpSyntaxKind.DecimalKeyword,
- CSharpSyntaxKind.ElseKeyword,
- CSharpSyntaxKind.OperatorKeyword,
- CSharpSyntaxKind.StringKeyword,
- CSharpSyntaxKind.ULongKeyword,
- CSharpSyntaxKind.BoolKeyword,
- CSharpSyntaxKind.CharKeyword,
- CSharpSyntaxKind.DefaultKeyword,
- CSharpSyntaxKind.ForEachKeyword,
- CSharpSyntaxKind.LongKeyword,
- CSharpSyntaxKind.VoidKeyword,
- CSharpSyntaxKind.EnumKeyword,
- CSharpSyntaxKind.FinallyKeyword,
- CSharpSyntaxKind.IntKeyword,
- CSharpSyntaxKind.OutKeyword,
- CSharpSyntaxKind.SealedKeyword,
- CSharpSyntaxKind.TrueKeyword,
- CSharpSyntaxKind.GotoKeyword,
- CSharpSyntaxKind.UncheckedKeyword,
- CSharpSyntaxKind.InterfaceKeyword,
- CSharpSyntaxKind.BreakKeyword,
- CSharpSyntaxKind.CheckedKeyword,
- CSharpSyntaxKind.NamespaceKeyword,
- CSharpSyntaxKind.WhenKeyword,
- CSharpSyntaxKind.WhereKeyword }).ToFrozenDictionary(keySelector: k => SyntaxFacts.GetText(k));
-
- public CSharpTokenizer(SeekableTextReader source)
- : base(source)
- {
- base.CurrentState = StartState;
-
- _operatorHandlers = new Dictionary>()
- {
- { '-', MinusOperator },
- { '<', LessThanOperator },
- { '>', GreaterThanOperator },
- { '&', CreateTwoCharOperatorHandler(SyntaxKind.And, '=', SyntaxKind.AndAssign, '&', SyntaxKind.DoubleAnd) },
- { '|', CreateTwoCharOperatorHandler(SyntaxKind.Or, '=', SyntaxKind.OrAssign, '|', SyntaxKind.DoubleOr) },
- { '+', CreateTwoCharOperatorHandler(SyntaxKind.Plus, '=', SyntaxKind.PlusAssign, '+', SyntaxKind.Increment) },
- { '=', CreateTwoCharOperatorHandler(SyntaxKind.Assign, '=', SyntaxKind.Equals, '>', SyntaxKind.GreaterThanEqual) },
- { '!', CreateTwoCharOperatorHandler(SyntaxKind.Not, '=', SyntaxKind.NotEqual) },
- { '%', CreateTwoCharOperatorHandler(SyntaxKind.Modulo, '=', SyntaxKind.ModuloAssign) },
- { '*', CreateTwoCharOperatorHandler(SyntaxKind.Star, '=', SyntaxKind.MultiplyAssign) },
- { ':', CreateTwoCharOperatorHandler(SyntaxKind.Colon, ':', SyntaxKind.DoubleColon) },
- { '?', CreateTwoCharOperatorHandler(SyntaxKind.QuestionMark, '?', SyntaxKind.NullCoalesce) },
- { '^', CreateTwoCharOperatorHandler(SyntaxKind.Xor, '=', SyntaxKind.XorAssign) },
- { '(', () => SyntaxKind.LeftParenthesis },
- { ')', () => SyntaxKind.RightParenthesis },
- { '{', () => SyntaxKind.LeftBrace },
- { '}', () => SyntaxKind.RightBrace },
- { '[', () => SyntaxKind.LeftBracket },
- { ']', () => SyntaxKind.RightBracket },
- { ',', () => SyntaxKind.Comma },
- { ';', () => SyntaxKind.Semicolon },
- { '~', () => SyntaxKind.Tilde },
- { '#', () => SyntaxKind.Hash }
- };
- }
-
- protected override int StartState => (int)CSharpTokenizerState.Data;
-
- private new CSharpTokenizerState? CurrentState => (CSharpTokenizerState?)base.CurrentState;
-
- public override SyntaxKind RazorCommentKind => SyntaxKind.RazorCommentLiteral;
-
- public override SyntaxKind RazorCommentTransitionKind => SyntaxKind.RazorCommentTransition;
-
- public override SyntaxKind RazorCommentStarKind => SyntaxKind.RazorCommentStar;
-
- protected override StateResult Dispatch()
- {
- switch (CurrentState)
- {
- case CSharpTokenizerState.Data:
- return Data();
- case CSharpTokenizerState.BlockComment:
- return BlockComment();
- case CSharpTokenizerState.QuotedCharacterLiteral:
- return QuotedCharacterLiteral();
- case CSharpTokenizerState.QuotedStringLiteral:
- return QuotedStringLiteral();
- case CSharpTokenizerState.VerbatimStringLiteral:
- return VerbatimStringLiteral();
- case CSharpTokenizerState.AfterRazorCommentTransition:
- return AfterRazorCommentTransition();
- case CSharpTokenizerState.EscapedRazorCommentTransition:
- return EscapedRazorCommentTransition();
- case CSharpTokenizerState.RazorCommentBody:
- return RazorCommentBody();
- case CSharpTokenizerState.StarAfterRazorCommentBody:
- return StarAfterRazorCommentBody();
- case CSharpTokenizerState.AtTokenAfterRazorCommentBody:
- return AtTokenAfterRazorCommentBody();
- default:
- Debug.Fail("Invalid TokenizerState");
- return default(StateResult);
- }
- }
-
- // Optimize memory allocation by returning constants for the most frequent cases
- protected override string GetTokenContent(SyntaxKind type)
- {
- var tokenLength = Buffer.Length;
-
- if (tokenLength == 1)
- {
- switch (type)
- {
- case SyntaxKind.IntegerLiteral:
- switch (Buffer[0])
- {
- case '0':
- return "0";
- case '1':
- return "1";
- case '2':
- return "2";
- case '3':
- return "3";
- case '4':
- return "4";
- case '5':
- return "5";
- case '6':
- return "6";
- case '7':
- return "7";
- case '8':
- return "8";
- case '9':
- return "9";
- }
- break;
- case SyntaxKind.NewLine:
- if (Buffer[0] == '\n')
- {
- return "\n";
- }
- break;
- case SyntaxKind.Whitespace:
- if (Buffer[0] == ' ')
- {
- return " ";
- }
- if (Buffer[0] == '\t')
- {
- return "\t";
- }
- break;
- case SyntaxKind.Minus:
- return "-";
- case SyntaxKind.Not:
- return "!";
- case SyntaxKind.Modulo:
- return "%";
- case SyntaxKind.And:
- return "&";
- case SyntaxKind.LeftParenthesis:
- return "(";
- case SyntaxKind.RightParenthesis:
- return ")";
- case SyntaxKind.Star:
- return "*";
- case SyntaxKind.Comma:
- return ",";
- case SyntaxKind.Dot:
- return ".";
- case SyntaxKind.Slash:
- return "/";
- case SyntaxKind.Colon:
- return ":";
- case SyntaxKind.Semicolon:
- return ";";
- case SyntaxKind.QuestionMark:
- return "?";
- case SyntaxKind.RightBracket:
- return "]";
- case SyntaxKind.LeftBracket:
- return "[";
- case SyntaxKind.Xor:
- return "^";
- case SyntaxKind.LeftBrace:
- return "{";
- case SyntaxKind.Or:
- return "|";
- case SyntaxKind.RightBrace:
- return "}";
- case SyntaxKind.Tilde:
- return "~";
- case SyntaxKind.Plus:
- return "+";
- case SyntaxKind.LessThan:
- return "<";
- case SyntaxKind.Assign:
- return "=";
- case SyntaxKind.GreaterThan:
- return ">";
- case SyntaxKind.Hash:
- return "#";
- case SyntaxKind.Transition:
- return "@";
-
- }
- }
- else if (tokenLength == 2)
- {
- switch (type)
- {
- case SyntaxKind.NewLine:
- return "\r\n";
- case SyntaxKind.Arrow:
- return "->";
- case SyntaxKind.Decrement:
- return "--";
- case SyntaxKind.MinusAssign:
- return "-=";
- case SyntaxKind.NotEqual:
- return "!=";
- case SyntaxKind.ModuloAssign:
- return "%=";
- case SyntaxKind.AndAssign:
- return "&=";
- case SyntaxKind.DoubleAnd:
- return "&&";
- case SyntaxKind.MultiplyAssign:
- return "*=";
- case SyntaxKind.DivideAssign:
- return "/=";
- case SyntaxKind.DoubleColon:
- return "::";
- case SyntaxKind.NullCoalesce:
- return "??";
- case SyntaxKind.XorAssign:
- return "^=";
- case SyntaxKind.OrAssign:
- return "|=";
- case SyntaxKind.DoubleOr:
- return "||";
- case SyntaxKind.PlusAssign:
- return "+=";
- case SyntaxKind.Increment:
- return "++";
- case SyntaxKind.LessThanEqual:
- return "<=";
- case SyntaxKind.LeftShift:
- return "<<";
- case SyntaxKind.Equals:
- return "==";
- case SyntaxKind.GreaterThanEqual:
- if (Buffer[0] == '=')
- {
- return "=>";
- }
- return ">=";
- case SyntaxKind.RightShift:
- return ">>";
-
-
- }
- }
- else if (tokenLength == 3)
- {
- switch (type)
- {
- case SyntaxKind.LeftShiftAssign:
- return "<<=";
- case SyntaxKind.RightShiftAssign:
- return ">>=";
- }
- }
-
- return base.GetTokenContent(type);
- }
-
- protected override SyntaxToken CreateToken(string content, SyntaxKind kind, RazorDiagnostic[] errors)
- {
- return SyntaxFactory.Token(kind, content, errors);
- }
-
- private StateResult Data()
- {
- if (SyntaxFacts.IsNewLine(CurrentCharacter))
- {
- // CSharp Spec §2.3.1
- var checkTwoCharNewline = CurrentCharacter == '\r';
- TakeCurrent();
- if (checkTwoCharNewline && CurrentCharacter == '\n')
- {
- TakeCurrent();
- }
- return Stay(EndToken(SyntaxKind.NewLine));
- }
- else if (SyntaxFacts.IsWhitespace(CurrentCharacter))
- {
- // CSharp Spec §2.3.3
- TakeUntil(c => !SyntaxFacts.IsWhitespace(c));
- return Stay(EndToken(SyntaxKind.Whitespace));
- }
- else if (SyntaxFacts.IsIdentifierStartCharacter(CurrentCharacter))
- {
- return Identifier();
- }
- else if (char.IsDigit(CurrentCharacter))
- {
- return NumericLiteral();
- }
- switch (CurrentCharacter)
- {
- case '@':
- return AtToken();
- case '\'':
- TakeCurrent();
- return Transition(CSharpTokenizerState.QuotedCharacterLiteral);
- case '"':
- TakeCurrent();
- return Transition(CSharpTokenizerState.QuotedStringLiteral);
- case '.':
- if (char.IsDigit(Peek()))
- {
- return RealLiteral();
- }
- return Stay(Single(SyntaxKind.Dot));
- case '/':
- TakeCurrent();
- if (CurrentCharacter == '/')
- {
- TakeCurrent();
- return SingleLineComment();
- }
- else if (CurrentCharacter == '*')
- {
- TakeCurrent();
- return Transition(CSharpTokenizerState.BlockComment);
- }
- else if (CurrentCharacter == '=')
- {
- TakeCurrent();
- return Stay(EndToken(SyntaxKind.DivideAssign));
- }
- else
- {
- return Stay(EndToken(SyntaxKind.Slash));
- }
- default:
- return Stay(EndToken(Operator()));
- }
- }
-
- private StateResult AtToken()
- {
- TakeCurrent();
- if (CurrentCharacter == '"')
- {
- TakeCurrent();
- return Transition(CSharpTokenizerState.VerbatimStringLiteral);
- }
- else if (CurrentCharacter == '*')
- {
- return Transition(
- CSharpTokenizerState.AfterRazorCommentTransition,
- EndToken(SyntaxKind.RazorCommentTransition));
- }
- else if (CurrentCharacter == '@')
- {
- // Could be escaped comment transition
- return Transition(
- CSharpTokenizerState.EscapedRazorCommentTransition,
- EndToken(SyntaxKind.Transition));
- }
-
- return Stay(EndToken(SyntaxKind.Transition));
- }
-
- private StateResult EscapedRazorCommentTransition()
- {
- TakeCurrent();
- return Transition(CSharpTokenizerState.Data, EndToken(SyntaxKind.Transition));
- }
-
- private SyntaxKind Operator()
- {
- var first = CurrentCharacter;
- TakeCurrent();
- Func handler;
- if (_operatorHandlers.TryGetValue(first, out handler))
- {
- return handler();
- }
- return SyntaxKind.Marker;
- }
-
- private SyntaxKind LessThanOperator()
- {
- if (CurrentCharacter == '=')
- {
- TakeCurrent();
- return SyntaxKind.LessThanEqual;
- }
- return SyntaxKind.LessThan;
- }
-
- private SyntaxKind GreaterThanOperator()
- {
- if (CurrentCharacter == '=')
- {
- TakeCurrent();
- return SyntaxKind.GreaterThanEqual;
- }
- return SyntaxKind.GreaterThan;
- }
-
- private SyntaxKind MinusOperator()
- {
- if (CurrentCharacter == '>')
- {
- TakeCurrent();
- return SyntaxKind.Arrow;
- }
- else if (CurrentCharacter == '-')
- {
- TakeCurrent();
- return SyntaxKind.Decrement;
- }
- else if (CurrentCharacter == '=')
- {
- TakeCurrent();
- return SyntaxKind.MinusAssign;
- }
- return SyntaxKind.Minus;
- }
-
- private Func CreateTwoCharOperatorHandler(SyntaxKind typeIfOnlyFirst, char second, SyntaxKind typeIfBoth)
- {
- return () =>
- {
- if (CurrentCharacter == second)
- {
- TakeCurrent();
- return typeIfBoth;
- }
- return typeIfOnlyFirst;
- };
- }
-
- private Func CreateTwoCharOperatorHandler(SyntaxKind typeIfOnlyFirst, char option1, SyntaxKind typeIfOption1, char option2, SyntaxKind typeIfOption2)
- {
- return () =>
- {
- if (CurrentCharacter == option1)
- {
- TakeCurrent();
- return typeIfOption1;
- }
- else if (CurrentCharacter == option2)
- {
- TakeCurrent();
- return typeIfOption2;
- }
- return typeIfOnlyFirst;
- };
- }
-
- private StateResult VerbatimStringLiteral()
- {
- TakeUntil(c => c == '"');
- if (CurrentCharacter == '"')
- {
- TakeCurrent();
- if (CurrentCharacter == '"')
- {
- TakeCurrent();
- // Stay in the literal, this is an escaped "
- return Stay();
- }
- }
- else if (EndOfFile)
- {
- CurrentErrors.Add(
- RazorDiagnosticFactory.CreateParsing_UnterminatedStringLiteral(
- new SourceSpan(CurrentStart, contentLength: 1 /* end of file */)));
- }
- return Transition(CSharpTokenizerState.Data, EndToken(SyntaxKind.StringLiteral));
- }
-
- private StateResult QuotedCharacterLiteral() => QuotedLiteral('\'', IsEndQuotedCharacterLiteral, SyntaxKind.CharacterLiteral);
-
- private StateResult QuotedStringLiteral() => QuotedLiteral('\"', IsEndQuotedStringLiteral, SyntaxKind.StringLiteral);
-
- private static readonly Func IsEndQuotedCharacterLiteral = static (c) => c == '\\' || c == '\'' || SyntaxFacts.IsNewLine(c);
- private static readonly Func IsEndQuotedStringLiteral = static (c) => c == '\\' || c == '\"' || SyntaxFacts.IsNewLine(c);
-
- private StateResult QuotedLiteral(char quote, Func isEndQuotedLiteral, SyntaxKind literalType)
- {
- TakeUntil(isEndQuotedLiteral);
- if (CurrentCharacter == '\\')
- {
- TakeCurrent(); // Take the '\'
-
- // If the next char is the same quote that started this
- if (CurrentCharacter == quote || CurrentCharacter == '\\')
- {
- TakeCurrent(); // Take it so that we don't prematurely end the literal.
- }
- return Stay();
- }
- else if (EndOfFile || SyntaxFacts.IsNewLine(CurrentCharacter))
- {
- CurrentErrors.Add(
- RazorDiagnosticFactory.CreateParsing_UnterminatedStringLiteral(
- new SourceSpan(CurrentStart, contentLength: 1 /* " */)));
- }
- else
- {
- TakeCurrent(); // No-op if at EOF
- }
- return Transition(CSharpTokenizerState.Data, EndToken(literalType));
- }
-
- // CSharp Spec §2.3.2
- private StateResult BlockComment()
- {
- TakeUntil(c => c == '*');
- if (EndOfFile)
- {
- CurrentErrors.Add(
- RazorDiagnosticFactory.CreateParsing_BlockCommentNotTerminated(
- new SourceSpan(CurrentStart, contentLength: 1 /* end of file */)));
-
- return Transition(CSharpTokenizerState.Data, EndToken(SyntaxKind.CSharpComment));
- }
- if (CurrentCharacter == '*')
- {
- TakeCurrent();
- if (CurrentCharacter == '/')
- {
- TakeCurrent();
- return Transition(CSharpTokenizerState.Data, EndToken(SyntaxKind.CSharpComment));
- }
- }
- return Stay();
- }
-
- // CSharp Spec §2.3.2
- private StateResult SingleLineComment()
- {
- TakeUntil(c => SyntaxFacts.IsNewLine(c));
- return Stay(EndToken(SyntaxKind.CSharpComment));
- }
-
- // CSharp Spec §2.4.4
- private StateResult NumericLiteral()
- {
- if (TakeAll("0x", caseSensitive: true))
- {
- return HexLiteral();
- }
- else
- {
- return DecimalLiteral();
- }
- }
-
- private StateResult HexLiteral()
- {
- TakeUntil(c => !IsHexDigit(c));
- TakeIntegerSuffix();
- return Stay(EndToken(SyntaxKind.IntegerLiteral));
- }
-
- private StateResult DecimalLiteral()
- {
- TakeUntil(c => !Char.IsDigit(c));
- if (CurrentCharacter == '.' && Char.IsDigit(Peek()))
- {
- return RealLiteral();
- }
- else if (IsRealLiteralSuffix(CurrentCharacter) ||
- CurrentCharacter == 'E' || CurrentCharacter == 'e')
- {
- return RealLiteralExponentPart();
- }
- else
- {
- TakeIntegerSuffix();
- return Stay(EndToken(SyntaxKind.IntegerLiteral));
- }
- }
-
- private StateResult RealLiteralExponentPart()
- {
- if (CurrentCharacter == 'E' || CurrentCharacter == 'e')
- {
- TakeCurrent();
- if (CurrentCharacter == '+' || CurrentCharacter == '-')
- {
- TakeCurrent();
- }
- TakeUntil(c => !Char.IsDigit(c));
- }
- if (IsRealLiteralSuffix(CurrentCharacter))
- {
- TakeCurrent();
- }
- return Stay(EndToken(SyntaxKind.RealLiteral));
- }
-
- // CSharp Spec §2.4.4.3
- private StateResult RealLiteral()
+ protected CSharpTokenizer(SeekableTextReader source) : base(source)
{
- AssertCurrent('.');
- TakeCurrent();
- Debug.Assert(Char.IsDigit(CurrentCharacter));
- TakeUntil(c => !Char.IsDigit(c));
- return RealLiteralExponentPart();
}
- private void TakeIntegerSuffix()
- {
- if (Char.ToLowerInvariant(CurrentCharacter) == 'u')
- {
- TakeCurrent();
- if (Char.ToLowerInvariant(CurrentCharacter) == 'l')
- {
- TakeCurrent();
- }
- }
- else if (Char.ToLowerInvariant(CurrentCharacter) == 'l')
- {
- TakeCurrent();
- if (Char.ToLowerInvariant(CurrentCharacter) == 'u')
- {
- TakeCurrent();
- }
- }
- }
-
- // CSharp Spec §2.4.2
- private StateResult Identifier()
- {
- Debug.Assert(SyntaxFacts.IsIdentifierStartCharacter(CurrentCharacter));
- TakeCurrent();
- TakeUntil(c => !SyntaxFacts.IsIdentifierPartCharacter(c));
- SyntaxToken token = null;
- if (HaveContent)
- {
- var type = SyntaxKind.Identifier;
- var tokenContent = Buffer.ToString();
- if (_keywords.TryGetValue(tokenContent, value: out _))
- {
- type = SyntaxKind.Keyword;
- }
-
- token = SyntaxFactory.Token(type, tokenContent);
-
- Buffer.Clear();
- CurrentErrors.Clear();
- }
-
- return Stay(token);
- }
-
- private StateResult Transition(CSharpTokenizerState state)
- {
- return Transition((int)state, result: null);
- }
-
- private StateResult Transition(CSharpTokenizerState state, SyntaxToken result)
- {
- return Transition((int)state, result);
- }
-
- private static bool IsRealLiteralSuffix(char character)
- {
- return character == 'F' ||
- character == 'f' ||
- character == 'D' ||
- character == 'd' ||
- character == 'M' ||
- character == 'm';
- }
-
- private static bool IsHexDigit(char value)
- {
- return (value >= '0' && value <= '9') || (value >= 'A' && value <= 'F') || (value >= 'a' && value <= 'f');
- }
-
- internal static CSharpSyntaxKind? GetTokenKeyword(SyntaxToken token)
- {
- if (token != null && _keywords.TryGetValue(token.Content, out var keyword))
- {
- return keyword;
- }
-
- return null;
- }
-
- private enum CSharpTokenizerState
- {
- Data,
- BlockComment,
- QuotedCharacterLiteral,
- QuotedStringLiteral,
- VerbatimStringLiteral,
-
- // Razor Comments - need to be the same for HTML and CSharp
- AfterRazorCommentTransition = RazorCommentTokenizerState.AfterRazorCommentTransition,
- EscapedRazorCommentTransition = RazorCommentTokenizerState.EscapedRazorCommentTransition,
- RazorCommentBody = RazorCommentTokenizerState.RazorCommentBody,
- StarAfterRazorCommentBody = RazorCommentTokenizerState.StarAfterRazorCommentBody,
- AtTokenAfterRazorCommentBody = RazorCommentTokenizerState.AtTokenAfterRazorCommentBody,
- }
+ internal abstract CSharpSyntaxKind? GetTokenKeyword(SyntaxToken token);
}
diff --git a/src/Compiler/Microsoft.CodeAnalysis.Razor.Compiler/src/Language/Legacy/DirectiveCSharpTokenizer.cs b/src/Compiler/Microsoft.CodeAnalysis.Razor.Compiler/src/Language/Legacy/DirectiveCSharpTokenizer.cs
index 19f520be306..3d2d81c63f7 100644
--- a/src/Compiler/Microsoft.CodeAnalysis.Razor.Compiler/src/Language/Legacy/DirectiveCSharpTokenizer.cs
+++ b/src/Compiler/Microsoft.CodeAnalysis.Razor.Compiler/src/Language/Legacy/DirectiveCSharpTokenizer.cs
@@ -8,7 +8,7 @@
namespace Microsoft.AspNetCore.Razor.Language.Legacy;
-internal class DirectiveCSharpTokenizer(SeekableTextReader source) : CSharpTokenizer(source)
+internal class DirectiveCSharpTokenizer(SeekableTextReader source) : NativeCSharpTokenizer(source)
{
private bool _visitedFirstTokenStart;
private bool _visitedFirstTokenLineEnd;
diff --git a/src/Compiler/Microsoft.CodeAnalysis.Razor.Compiler/src/Language/Legacy/FirstDirectiveCSharpLanguageCharacteristics.cs b/src/Compiler/Microsoft.CodeAnalysis.Razor.Compiler/src/Language/Legacy/FirstDirectiveCSharpLanguageCharacteristics.cs
index 723deba420d..29360d399f3 100644
--- a/src/Compiler/Microsoft.CodeAnalysis.Razor.Compiler/src/Language/Legacy/FirstDirectiveCSharpLanguageCharacteristics.cs
+++ b/src/Compiler/Microsoft.CodeAnalysis.Razor.Compiler/src/Language/Legacy/FirstDirectiveCSharpLanguageCharacteristics.cs
@@ -5,7 +5,7 @@
namespace Microsoft.AspNetCore.Razor.Language.Legacy;
-internal class FirstDirectiveCSharpLanguageCharacteristics : CSharpLanguageCharacteristics
+internal class FirstDirectiveCSharpLanguageCharacteristics : NativeCSharpLanguageCharacteristics
{
private FirstDirectiveCSharpLanguageCharacteristics()
{
diff --git a/src/Compiler/Microsoft.CodeAnalysis.Razor.Compiler/src/Language/Legacy/CSharpLanguageCharacteristics.cs b/src/Compiler/Microsoft.CodeAnalysis.Razor.Compiler/src/Language/Legacy/NativeCSharpLanguageCharacteristics.cs
similarity index 94%
rename from src/Compiler/Microsoft.CodeAnalysis.Razor.Compiler/src/Language/Legacy/CSharpLanguageCharacteristics.cs
rename to src/Compiler/Microsoft.CodeAnalysis.Razor.Compiler/src/Language/Legacy/NativeCSharpLanguageCharacteristics.cs
index 28008335175..a62f11cffc0 100644
--- a/src/Compiler/Microsoft.CodeAnalysis.Razor.Compiler/src/Language/Legacy/CSharpLanguageCharacteristics.cs
+++ b/src/Compiler/Microsoft.CodeAnalysis.Razor.Compiler/src/Language/Legacy/NativeCSharpLanguageCharacteristics.cs
@@ -10,7 +10,7 @@
namespace Microsoft.AspNetCore.Razor.Language.Legacy;
// Removal of this type is tracked by https://github.com/dotnet/razor/issues/8445
-internal class CSharpLanguageCharacteristics : LanguageCharacteristics
+internal class NativeCSharpLanguageCharacteristics : LanguageCharacteristics
{
private static readonly Dictionary _tokenSamples = new Dictionary()
{
@@ -65,17 +65,17 @@ internal class CSharpLanguageCharacteristics : LanguageCharacteristics _instance;
+ public static NativeCSharpLanguageCharacteristics Instance => _instance;
public override CSharpTokenizer CreateTokenizer(SeekableTextReader source)
{
- return new CSharpTokenizer(source);
+ return new NativeCSharpTokenizer(source);
}
protected override SyntaxToken CreateToken(string content, SyntaxKind kind, RazorDiagnostic[] errors)
diff --git a/src/Compiler/Microsoft.CodeAnalysis.Razor.Compiler/src/Language/Legacy/NativeCSharpTokenizer.cs b/src/Compiler/Microsoft.CodeAnalysis.Razor.Compiler/src/Language/Legacy/NativeCSharpTokenizer.cs
new file mode 100644
index 00000000000..1b42a196411
--- /dev/null
+++ b/src/Compiler/Microsoft.CodeAnalysis.Razor.Compiler/src/Language/Legacy/NativeCSharpTokenizer.cs
@@ -0,0 +1,793 @@
+// Licensed to the .NET Foundation under one or more agreements.
+// The .NET Foundation licenses this file to you under the MIT license.
+
+#nullable disable
+
+using System;
+using System.Collections.Generic;
+using System.Collections.Frozen;
+using System.Diagnostics;
+using Microsoft.AspNetCore.Razor.Language.Syntax.InternalSyntax;
+using Microsoft.CodeAnalysis.CSharp;
+
+using SyntaxFactory = Microsoft.AspNetCore.Razor.Language.Syntax.InternalSyntax.SyntaxFactory;
+using CSharpSyntaxKind = Microsoft.CodeAnalysis.CSharp.SyntaxKind;
+
+namespace Microsoft.AspNetCore.Razor.Language.Legacy;
+
+///
+/// This is the old tokenizer that was used in Razor. It natively implemented tokenization of C#, rather than using Roslyn. It is maintained for
+/// backwards compatibility, controlled by user using a Feature flag in their project file.
+///
+internal class NativeCSharpTokenizer : CSharpTokenizer
+{
+ private readonly Dictionary> _operatorHandlers;
+
+ private static readonly FrozenDictionary _keywords = (new[] {
+ CSharpSyntaxKind.AwaitKeyword,
+ CSharpSyntaxKind.AbstractKeyword,
+ CSharpSyntaxKind.ByteKeyword,
+ CSharpSyntaxKind.ClassKeyword,
+ CSharpSyntaxKind.DelegateKeyword,
+ CSharpSyntaxKind.EventKeyword,
+ CSharpSyntaxKind.FixedKeyword,
+ CSharpSyntaxKind.IfKeyword,
+ CSharpSyntaxKind.InternalKeyword,
+ CSharpSyntaxKind.NewKeyword,
+ CSharpSyntaxKind.OverrideKeyword,
+ CSharpSyntaxKind.ReadOnlyKeyword,
+ CSharpSyntaxKind.ShortKeyword,
+ CSharpSyntaxKind.StructKeyword,
+ CSharpSyntaxKind.TryKeyword,
+ CSharpSyntaxKind.UnsafeKeyword,
+ CSharpSyntaxKind.VolatileKeyword,
+ CSharpSyntaxKind.AsKeyword,
+ CSharpSyntaxKind.DoKeyword,
+ CSharpSyntaxKind.IsKeyword,
+ CSharpSyntaxKind.ParamsKeyword,
+ CSharpSyntaxKind.RefKeyword,
+ CSharpSyntaxKind.SwitchKeyword,
+ CSharpSyntaxKind.UShortKeyword,
+ CSharpSyntaxKind.WhileKeyword,
+ CSharpSyntaxKind.CaseKeyword,
+ CSharpSyntaxKind.ConstKeyword,
+ CSharpSyntaxKind.ExplicitKeyword,
+ CSharpSyntaxKind.FloatKeyword,
+ CSharpSyntaxKind.NullKeyword,
+ CSharpSyntaxKind.SizeOfKeyword,
+ CSharpSyntaxKind.TypeOfKeyword,
+ CSharpSyntaxKind.ImplicitKeyword,
+ CSharpSyntaxKind.PrivateKeyword,
+ CSharpSyntaxKind.ThisKeyword,
+ CSharpSyntaxKind.UsingKeyword,
+ CSharpSyntaxKind.ExternKeyword,
+ CSharpSyntaxKind.ReturnKeyword,
+ CSharpSyntaxKind.StackAllocKeyword,
+ CSharpSyntaxKind.UIntKeyword,
+ CSharpSyntaxKind.BaseKeyword,
+ CSharpSyntaxKind.CatchKeyword,
+ CSharpSyntaxKind.ContinueKeyword,
+ CSharpSyntaxKind.DoubleKeyword,
+ CSharpSyntaxKind.ForKeyword,
+ CSharpSyntaxKind.InKeyword,
+ CSharpSyntaxKind.LockKeyword,
+ CSharpSyntaxKind.ObjectKeyword,
+ CSharpSyntaxKind.ProtectedKeyword,
+ CSharpSyntaxKind.StaticKeyword,
+ CSharpSyntaxKind.FalseKeyword,
+ CSharpSyntaxKind.PublicKeyword,
+ CSharpSyntaxKind.SByteKeyword,
+ CSharpSyntaxKind.ThrowKeyword,
+ CSharpSyntaxKind.VirtualKeyword,
+ CSharpSyntaxKind.DecimalKeyword,
+ CSharpSyntaxKind.ElseKeyword,
+ CSharpSyntaxKind.OperatorKeyword,
+ CSharpSyntaxKind.StringKeyword,
+ CSharpSyntaxKind.ULongKeyword,
+ CSharpSyntaxKind.BoolKeyword,
+ CSharpSyntaxKind.CharKeyword,
+ CSharpSyntaxKind.DefaultKeyword,
+ CSharpSyntaxKind.ForEachKeyword,
+ CSharpSyntaxKind.LongKeyword,
+ CSharpSyntaxKind.VoidKeyword,
+ CSharpSyntaxKind.EnumKeyword,
+ CSharpSyntaxKind.FinallyKeyword,
+ CSharpSyntaxKind.IntKeyword,
+ CSharpSyntaxKind.OutKeyword,
+ CSharpSyntaxKind.SealedKeyword,
+ CSharpSyntaxKind.TrueKeyword,
+ CSharpSyntaxKind.GotoKeyword,
+ CSharpSyntaxKind.UncheckedKeyword,
+ CSharpSyntaxKind.InterfaceKeyword,
+ CSharpSyntaxKind.BreakKeyword,
+ CSharpSyntaxKind.CheckedKeyword,
+ CSharpSyntaxKind.NamespaceKeyword,
+ CSharpSyntaxKind.WhenKeyword,
+ CSharpSyntaxKind.WhereKeyword }).ToFrozenDictionary(keySelector: k => SyntaxFacts.GetText(k));
+
+ public NativeCSharpTokenizer(SeekableTextReader source)
+ : base(source)
+ {
+ base.CurrentState = StartState;
+
+ _operatorHandlers = new Dictionary>()
+ {
+ { '-', MinusOperator },
+ { '<', LessThanOperator },
+ { '>', GreaterThanOperator },
+ { '&', CreateTwoCharOperatorHandler(SyntaxKind.And, '=', SyntaxKind.AndAssign, '&', SyntaxKind.DoubleAnd) },
+ { '|', CreateTwoCharOperatorHandler(SyntaxKind.Or, '=', SyntaxKind.OrAssign, '|', SyntaxKind.DoubleOr) },
+ { '+', CreateTwoCharOperatorHandler(SyntaxKind.Plus, '=', SyntaxKind.PlusAssign, '+', SyntaxKind.Increment) },
+ { '=', CreateTwoCharOperatorHandler(SyntaxKind.Assign, '=', SyntaxKind.Equals, '>', SyntaxKind.GreaterThanEqual) },
+ { '!', CreateTwoCharOperatorHandler(SyntaxKind.Not, '=', SyntaxKind.NotEqual) },
+ { '%', CreateTwoCharOperatorHandler(SyntaxKind.Modulo, '=', SyntaxKind.ModuloAssign) },
+ { '*', CreateTwoCharOperatorHandler(SyntaxKind.Star, '=', SyntaxKind.MultiplyAssign) },
+ { ':', CreateTwoCharOperatorHandler(SyntaxKind.Colon, ':', SyntaxKind.DoubleColon) },
+ { '?', CreateTwoCharOperatorHandler(SyntaxKind.QuestionMark, '?', SyntaxKind.NullCoalesce) },
+ { '^', CreateTwoCharOperatorHandler(SyntaxKind.Xor, '=', SyntaxKind.XorAssign) },
+ { '(', () => SyntaxKind.LeftParenthesis },
+ { ')', () => SyntaxKind.RightParenthesis },
+ { '{', () => SyntaxKind.LeftBrace },
+ { '}', () => SyntaxKind.RightBrace },
+ { '[', () => SyntaxKind.LeftBracket },
+ { ']', () => SyntaxKind.RightBracket },
+ { ',', () => SyntaxKind.Comma },
+ { ';', () => SyntaxKind.Semicolon },
+ { '~', () => SyntaxKind.Tilde },
+ { '#', () => SyntaxKind.Hash }
+ };
+ }
+
+ protected override int StartState => (int)CSharpTokenizerState.Data;
+
+ private new CSharpTokenizerState? CurrentState => (CSharpTokenizerState?)base.CurrentState;
+
+ public override SyntaxKind RazorCommentKind => SyntaxKind.RazorCommentLiteral;
+
+ public override SyntaxKind RazorCommentTransitionKind => SyntaxKind.RazorCommentTransition;
+
+ public override SyntaxKind RazorCommentStarKind => SyntaxKind.RazorCommentStar;
+
+ protected override StateResult Dispatch()
+ {
+ switch (CurrentState)
+ {
+ case CSharpTokenizerState.Data:
+ return Data();
+ case CSharpTokenizerState.BlockComment:
+ return BlockComment();
+ case CSharpTokenizerState.QuotedCharacterLiteral:
+ return QuotedCharacterLiteral();
+ case CSharpTokenizerState.QuotedStringLiteral:
+ return QuotedStringLiteral();
+ case CSharpTokenizerState.VerbatimStringLiteral:
+ return VerbatimStringLiteral();
+ case CSharpTokenizerState.AfterRazorCommentTransition:
+ return AfterRazorCommentTransition();
+ case CSharpTokenizerState.EscapedRazorCommentTransition:
+ return EscapedRazorCommentTransition();
+ case CSharpTokenizerState.RazorCommentBody:
+ return RazorCommentBody();
+ case CSharpTokenizerState.StarAfterRazorCommentBody:
+ return StarAfterRazorCommentBody();
+ case CSharpTokenizerState.AtTokenAfterRazorCommentBody:
+ return AtTokenAfterRazorCommentBody();
+ default:
+ Debug.Fail("Invalid TokenizerState");
+ return default(StateResult);
+ }
+ }
+
+ // Optimize memory allocation by returning constants for the most frequent cases
+ protected override string GetTokenContent(SyntaxKind type)
+ {
+ var tokenLength = Buffer.Length;
+
+ if (tokenLength == 1)
+ {
+ switch (type)
+ {
+ case SyntaxKind.IntegerLiteral:
+ switch (Buffer[0])
+ {
+ case '0':
+ return "0";
+ case '1':
+ return "1";
+ case '2':
+ return "2";
+ case '3':
+ return "3";
+ case '4':
+ return "4";
+ case '5':
+ return "5";
+ case '6':
+ return "6";
+ case '7':
+ return "7";
+ case '8':
+ return "8";
+ case '9':
+ return "9";
+ }
+ break;
+ case SyntaxKind.NewLine:
+ if (Buffer[0] == '\n')
+ {
+ return "\n";
+ }
+ break;
+ case SyntaxKind.Whitespace:
+ if (Buffer[0] == ' ')
+ {
+ return " ";
+ }
+ if (Buffer[0] == '\t')
+ {
+ return "\t";
+ }
+ break;
+ case SyntaxKind.Minus:
+ return "-";
+ case SyntaxKind.Not:
+ return "!";
+ case SyntaxKind.Modulo:
+ return "%";
+ case SyntaxKind.And:
+ return "&";
+ case SyntaxKind.LeftParenthesis:
+ return "(";
+ case SyntaxKind.RightParenthesis:
+ return ")";
+ case SyntaxKind.Star:
+ return "*";
+ case SyntaxKind.Comma:
+ return ",";
+ case SyntaxKind.Dot:
+ return ".";
+ case SyntaxKind.Slash:
+ return "/";
+ case SyntaxKind.Colon:
+ return ":";
+ case SyntaxKind.Semicolon:
+ return ";";
+ case SyntaxKind.QuestionMark:
+ return "?";
+ case SyntaxKind.RightBracket:
+ return "]";
+ case SyntaxKind.LeftBracket:
+ return "[";
+ case SyntaxKind.Xor:
+ return "^";
+ case SyntaxKind.LeftBrace:
+ return "{";
+ case SyntaxKind.Or:
+ return "|";
+ case SyntaxKind.RightBrace:
+ return "}";
+ case SyntaxKind.Tilde:
+ return "~";
+ case SyntaxKind.Plus:
+ return "+";
+ case SyntaxKind.LessThan:
+ return "<";
+ case SyntaxKind.Assign:
+ return "=";
+ case SyntaxKind.GreaterThan:
+ return ">";
+ case SyntaxKind.Hash:
+ return "#";
+ case SyntaxKind.Transition:
+ return "@";
+
+ }
+ }
+ else if (tokenLength == 2)
+ {
+ switch (type)
+ {
+ case SyntaxKind.NewLine:
+ return "\r\n";
+ case SyntaxKind.Arrow:
+ return "->";
+ case SyntaxKind.Decrement:
+ return "--";
+ case SyntaxKind.MinusAssign:
+ return "-=";
+ case SyntaxKind.NotEqual:
+ return "!=";
+ case SyntaxKind.ModuloAssign:
+ return "%=";
+ case SyntaxKind.AndAssign:
+ return "&=";
+ case SyntaxKind.DoubleAnd:
+ return "&&";
+ case SyntaxKind.MultiplyAssign:
+ return "*=";
+ case SyntaxKind.DivideAssign:
+ return "/=";
+ case SyntaxKind.DoubleColon:
+ return "::";
+ case SyntaxKind.NullCoalesce:
+ return "??";
+ case SyntaxKind.XorAssign:
+ return "^=";
+ case SyntaxKind.OrAssign:
+ return "|=";
+ case SyntaxKind.DoubleOr:
+ return "||";
+ case SyntaxKind.PlusAssign:
+ return "+=";
+ case SyntaxKind.Increment:
+ return "++";
+ case SyntaxKind.LessThanEqual:
+ return "<=";
+ case SyntaxKind.LeftShift:
+ return "<<";
+ case SyntaxKind.Equals:
+ return "==";
+ case SyntaxKind.GreaterThanEqual:
+ if (Buffer[0] == '=')
+ {
+ return "=>";
+ }
+ return ">=";
+ case SyntaxKind.RightShift:
+ return ">>";
+
+
+ }
+ }
+ else if (tokenLength == 3)
+ {
+ switch (type)
+ {
+ case SyntaxKind.LeftShiftAssign:
+ return "<<=";
+ case SyntaxKind.RightShiftAssign:
+ return ">>=";
+ }
+ }
+
+ return base.GetTokenContent(type);
+ }
+
+ protected override SyntaxToken CreateToken(string content, SyntaxKind kind, RazorDiagnostic[] errors)
+ {
+ return SyntaxFactory.Token(kind, content, errors);
+ }
+
+ private StateResult Data()
+ {
+ if (SyntaxFacts.IsNewLine(CurrentCharacter))
+ {
+ // CSharp Spec §2.3.1
+ var checkTwoCharNewline = CurrentCharacter == '\r';
+ TakeCurrent();
+ if (checkTwoCharNewline && CurrentCharacter == '\n')
+ {
+ TakeCurrent();
+ }
+ return Stay(EndToken(SyntaxKind.NewLine));
+ }
+ else if (SyntaxFacts.IsWhitespace(CurrentCharacter))
+ {
+ // CSharp Spec §2.3.3
+ TakeUntil(c => !SyntaxFacts.IsWhitespace(c));
+ return Stay(EndToken(SyntaxKind.Whitespace));
+ }
+ else if (SyntaxFacts.IsIdentifierStartCharacter(CurrentCharacter))
+ {
+ return Identifier();
+ }
+ else if (char.IsDigit(CurrentCharacter))
+ {
+ return NumericLiteral();
+ }
+ switch (CurrentCharacter)
+ {
+ case '@':
+ return AtToken();
+ case '\'':
+ TakeCurrent();
+ return Transition(CSharpTokenizerState.QuotedCharacterLiteral);
+ case '"':
+ TakeCurrent();
+ return Transition(CSharpTokenizerState.QuotedStringLiteral);
+ case '.':
+ if (char.IsDigit(Peek()))
+ {
+ return RealLiteral();
+ }
+ return Stay(Single(SyntaxKind.Dot));
+ case '/':
+ TakeCurrent();
+ if (CurrentCharacter == '/')
+ {
+ TakeCurrent();
+ return SingleLineComment();
+ }
+ else if (CurrentCharacter == '*')
+ {
+ TakeCurrent();
+ return Transition(CSharpTokenizerState.BlockComment);
+ }
+ else if (CurrentCharacter == '=')
+ {
+ TakeCurrent();
+ return Stay(EndToken(SyntaxKind.DivideAssign));
+ }
+ else
+ {
+ return Stay(EndToken(SyntaxKind.Slash));
+ }
+ default:
+ return Stay(EndToken(Operator()));
+ }
+ }
+
+ private StateResult AtToken()
+ {
+ TakeCurrent();
+ if (CurrentCharacter == '"')
+ {
+ TakeCurrent();
+ return Transition(CSharpTokenizerState.VerbatimStringLiteral);
+ }
+ else if (CurrentCharacter == '*')
+ {
+ return Transition(
+ CSharpTokenizerState.AfterRazorCommentTransition,
+ EndToken(SyntaxKind.RazorCommentTransition));
+ }
+ else if (CurrentCharacter == '@')
+ {
+ // Could be escaped comment transition
+ return Transition(
+ CSharpTokenizerState.EscapedRazorCommentTransition,
+ EndToken(SyntaxKind.Transition));
+ }
+
+ return Stay(EndToken(SyntaxKind.Transition));
+ }
+
+ private StateResult EscapedRazorCommentTransition()
+ {
+ TakeCurrent();
+ return Transition(CSharpTokenizerState.Data, EndToken(SyntaxKind.Transition));
+ }
+
+ private SyntaxKind Operator()
+ {
+ var first = CurrentCharacter;
+ TakeCurrent();
+ Func handler;
+ if (_operatorHandlers.TryGetValue(first, out handler))
+ {
+ return handler();
+ }
+ return SyntaxKind.Marker;
+ }
+
+ private SyntaxKind LessThanOperator()
+ {
+ if (CurrentCharacter == '=')
+ {
+ TakeCurrent();
+ return SyntaxKind.LessThanEqual;
+ }
+ return SyntaxKind.LessThan;
+ }
+
+ private SyntaxKind GreaterThanOperator()
+ {
+ if (CurrentCharacter == '=')
+ {
+ TakeCurrent();
+ return SyntaxKind.GreaterThanEqual;
+ }
+ return SyntaxKind.GreaterThan;
+ }
+
+ private SyntaxKind MinusOperator()
+ {
+ if (CurrentCharacter == '>')
+ {
+ TakeCurrent();
+ return SyntaxKind.Arrow;
+ }
+ else if (CurrentCharacter == '-')
+ {
+ TakeCurrent();
+ return SyntaxKind.Decrement;
+ }
+ else if (CurrentCharacter == '=')
+ {
+ TakeCurrent();
+ return SyntaxKind.MinusAssign;
+ }
+ return SyntaxKind.Minus;
+ }
+
+ private Func CreateTwoCharOperatorHandler(SyntaxKind typeIfOnlyFirst, char second, SyntaxKind typeIfBoth)
+ {
+ return () =>
+ {
+ if (CurrentCharacter == second)
+ {
+ TakeCurrent();
+ return typeIfBoth;
+ }
+ return typeIfOnlyFirst;
+ };
+ }
+
+ private Func CreateTwoCharOperatorHandler(SyntaxKind typeIfOnlyFirst, char option1, SyntaxKind typeIfOption1, char option2, SyntaxKind typeIfOption2)
+ {
+ return () =>
+ {
+ if (CurrentCharacter == option1)
+ {
+ TakeCurrent();
+ return typeIfOption1;
+ }
+ else if (CurrentCharacter == option2)
+ {
+ TakeCurrent();
+ return typeIfOption2;
+ }
+ return typeIfOnlyFirst;
+ };
+ }
+
+ private StateResult VerbatimStringLiteral()
+ {
+ TakeUntil(c => c == '"');
+ if (CurrentCharacter == '"')
+ {
+ TakeCurrent();
+ if (CurrentCharacter == '"')
+ {
+ TakeCurrent();
+ // Stay in the literal, this is an escaped "
+ return Stay();
+ }
+ }
+ else if (EndOfFile)
+ {
+ CurrentErrors.Add(
+ RazorDiagnosticFactory.CreateParsing_UnterminatedStringLiteral(
+ new SourceSpan(CurrentStart, contentLength: 1 /* end of file */)));
+ }
+ return Transition(CSharpTokenizerState.Data, EndToken(SyntaxKind.StringLiteral));
+ }
+
+ private StateResult QuotedCharacterLiteral() => QuotedLiteral('\'', IsEndQuotedCharacterLiteral, SyntaxKind.CharacterLiteral);
+
+ private StateResult QuotedStringLiteral() => QuotedLiteral('\"', IsEndQuotedStringLiteral, SyntaxKind.StringLiteral);
+
+ private static readonly Func IsEndQuotedCharacterLiteral = static (c) => c == '\\' || c == '\'' || SyntaxFacts.IsNewLine(c);
+ private static readonly Func IsEndQuotedStringLiteral = static (c) => c == '\\' || c == '\"' || SyntaxFacts.IsNewLine(c);
+
+ private StateResult QuotedLiteral(char quote, Func isEndQuotedLiteral, SyntaxKind literalType)
+ {
+ TakeUntil(isEndQuotedLiteral);
+ if (CurrentCharacter == '\\')
+ {
+ TakeCurrent(); // Take the '\'
+
+ // If the next char is the same quote that started this
+ if (CurrentCharacter == quote || CurrentCharacter == '\\')
+ {
+ TakeCurrent(); // Take it so that we don't prematurely end the literal.
+ }
+ return Stay();
+ }
+ else if (EndOfFile || SyntaxFacts.IsNewLine(CurrentCharacter))
+ {
+ CurrentErrors.Add(
+ RazorDiagnosticFactory.CreateParsing_UnterminatedStringLiteral(
+ new SourceSpan(CurrentStart, contentLength: 1 /* " */)));
+ }
+ else
+ {
+ TakeCurrent(); // No-op if at EOF
+ }
+ return Transition(CSharpTokenizerState.Data, EndToken(literalType));
+ }
+
+ // CSharp Spec §2.3.2
+ private StateResult BlockComment()
+ {
+ TakeUntil(c => c == '*');
+ if (EndOfFile)
+ {
+ CurrentErrors.Add(
+ RazorDiagnosticFactory.CreateParsing_BlockCommentNotTerminated(
+ new SourceSpan(CurrentStart, contentLength: 1 /* end of file */)));
+
+ return Transition(CSharpTokenizerState.Data, EndToken(SyntaxKind.CSharpComment));
+ }
+ if (CurrentCharacter == '*')
+ {
+ TakeCurrent();
+ if (CurrentCharacter == '/')
+ {
+ TakeCurrent();
+ return Transition(CSharpTokenizerState.Data, EndToken(SyntaxKind.CSharpComment));
+ }
+ }
+ return Stay();
+ }
+
+ // CSharp Spec §2.3.2
+ private StateResult SingleLineComment()
+ {
+ TakeUntil(c => SyntaxFacts.IsNewLine(c));
+ return Stay(EndToken(SyntaxKind.CSharpComment));
+ }
+
+ // CSharp Spec §2.4.4
+ private StateResult NumericLiteral()
+ {
+ if (TakeAll("0x", caseSensitive: true))
+ {
+ return HexLiteral();
+ }
+ else
+ {
+ return DecimalLiteral();
+ }
+ }
+
+ private StateResult HexLiteral()
+ {
+ TakeUntil(c => !IsHexDigit(c));
+ TakeIntegerSuffix();
+ return Stay(EndToken(SyntaxKind.IntegerLiteral));
+ }
+
+ private StateResult DecimalLiteral()
+ {
+ TakeUntil(c => !Char.IsDigit(c));
+ if (CurrentCharacter == '.' && Char.IsDigit(Peek()))
+ {
+ return RealLiteral();
+ }
+ else if (IsRealLiteralSuffix(CurrentCharacter) ||
+ CurrentCharacter == 'E' || CurrentCharacter == 'e')
+ {
+ return RealLiteralExponentPart();
+ }
+ else
+ {
+ TakeIntegerSuffix();
+ return Stay(EndToken(SyntaxKind.IntegerLiteral));
+ }
+ }
+
+ private StateResult RealLiteralExponentPart()
+ {
+ if (CurrentCharacter == 'E' || CurrentCharacter == 'e')
+ {
+ TakeCurrent();
+ if (CurrentCharacter == '+' || CurrentCharacter == '-')
+ {
+ TakeCurrent();
+ }
+ TakeUntil(c => !Char.IsDigit(c));
+ }
+ if (IsRealLiteralSuffix(CurrentCharacter))
+ {
+ TakeCurrent();
+ }
+ return Stay(EndToken(SyntaxKind.RealLiteral));
+ }
+
+ // CSharp Spec §2.4.4.3
+ private StateResult RealLiteral()
+ {
+ AssertCurrent('.');
+ TakeCurrent();
+ Debug.Assert(Char.IsDigit(CurrentCharacter));
+ TakeUntil(c => !Char.IsDigit(c));
+ return RealLiteralExponentPart();
+ }
+
+ private void TakeIntegerSuffix()
+ {
+ if (Char.ToLowerInvariant(CurrentCharacter) == 'u')
+ {
+ TakeCurrent();
+ if (Char.ToLowerInvariant(CurrentCharacter) == 'l')
+ {
+ TakeCurrent();
+ }
+ }
+ else if (Char.ToLowerInvariant(CurrentCharacter) == 'l')
+ {
+ TakeCurrent();
+ if (Char.ToLowerInvariant(CurrentCharacter) == 'u')
+ {
+ TakeCurrent();
+ }
+ }
+ }
+
+ // CSharp Spec §2.4.2
+ private StateResult Identifier()
+ {
+ Debug.Assert(SyntaxFacts.IsIdentifierStartCharacter(CurrentCharacter));
+ TakeCurrent();
+ TakeUntil(c => !SyntaxFacts.IsIdentifierPartCharacter(c));
+ SyntaxToken token = null;
+ if (HaveContent)
+ {
+ var type = SyntaxKind.Identifier;
+ var tokenContent = Buffer.ToString();
+ if (_keywords.TryGetValue(tokenContent, value: out _))
+ {
+ type = SyntaxKind.Keyword;
+ }
+
+ token = SyntaxFactory.Token(type, tokenContent);
+
+ Buffer.Clear();
+ CurrentErrors.Clear();
+ }
+
+ return Stay(token);
+ }
+
+ private StateResult Transition(CSharpTokenizerState state)
+ {
+ return Transition((int)state, result: null);
+ }
+
+ private StateResult Transition(CSharpTokenizerState state, SyntaxToken result)
+ {
+ return Transition((int)state, result);
+ }
+
+ private static bool IsRealLiteralSuffix(char character)
+ {
+ return character == 'F' ||
+ character == 'f' ||
+ character == 'D' ||
+ character == 'd' ||
+ character == 'M' ||
+ character == 'm';
+ }
+
+ private static bool IsHexDigit(char value)
+ {
+ return (value >= '0' && value <= '9') || (value >= 'A' && value <= 'F') || (value >= 'a' && value <= 'f');
+ }
+
+ internal override CSharpSyntaxKind? GetTokenKeyword(SyntaxToken token)
+ {
+ if (token != null && _keywords.TryGetValue(token.Content, out var keyword))
+ {
+ return keyword;
+ }
+
+ return null;
+ }
+
+ private enum CSharpTokenizerState
+ {
+ Data,
+ BlockComment,
+ QuotedCharacterLiteral,
+ QuotedStringLiteral,
+ VerbatimStringLiteral,
+
+ // Razor Comments - need to be the same for HTML and CSharp
+ AfterRazorCommentTransition = RazorCommentTokenizerState.AfterRazorCommentTransition,
+ EscapedRazorCommentTransition = RazorCommentTokenizerState.EscapedRazorCommentTransition,
+ RazorCommentBody = RazorCommentTokenizerState.RazorCommentBody,
+ StarAfterRazorCommentBody = RazorCommentTokenizerState.StarAfterRazorCommentBody,
+ AtTokenAfterRazorCommentBody = RazorCommentTokenizerState.AtTokenAfterRazorCommentBody,
+ }
+}
diff --git a/src/Compiler/Microsoft.CodeAnalysis.Razor.Compiler/src/Language/Legacy/ParserContext.cs b/src/Compiler/Microsoft.CodeAnalysis.Razor.Compiler/src/Language/Legacy/ParserContext.cs
index b96ded3d778..3cebbf42426 100644
--- a/src/Compiler/Microsoft.CodeAnalysis.Razor.Compiler/src/Language/Legacy/ParserContext.cs
+++ b/src/Compiler/Microsoft.CodeAnalysis.Razor.Compiler/src/Language/Legacy/ParserContext.cs
@@ -42,6 +42,8 @@ public ParserContext(RazorSourceDocument source, RazorParserOptions options)
public bool ParseLeadingDirectives { get; }
+ public bool UseRoslynTokenizer { get; }
+
public bool EnableSpanEditHandlers { get; }
public bool WhiteSpaceIsSignificantToAncestorBlock { get; set; }
diff --git a/src/Compiler/Microsoft.CodeAnalysis.Razor.Compiler/src/Language/Legacy/RoslynCSharpLanguageCharacteristics.cs b/src/Compiler/Microsoft.CodeAnalysis.Razor.Compiler/src/Language/Legacy/RoslynCSharpLanguageCharacteristics.cs
new file mode 100644
index 00000000000..9955a3a6154
--- /dev/null
+++ b/src/Compiler/Microsoft.CodeAnalysis.Razor.Compiler/src/Language/Legacy/RoslynCSharpLanguageCharacteristics.cs
@@ -0,0 +1,173 @@
+// Licensed to the .NET Foundation under one or more agreements.
+// The .NET Foundation licenses this file to you under the MIT license.
+
+#nullable disable
+
+using System.Collections.Generic;
+using System.Diagnostics;
+using Microsoft.AspNetCore.Razor.Language.Syntax.InternalSyntax;
+
+namespace Microsoft.AspNetCore.Razor.Language.Legacy;
+
+// Removal of this type is tracked by https://github.com/dotnet/razor/issues/8445
+internal class RoslynCSharpLanguageCharacteristics : LanguageCharacteristics
+{
+ private static readonly Dictionary _tokenSamples = new Dictionary()
+ {
+ { SyntaxKind.Arrow, "->" },
+ { SyntaxKind.Minus, "-" },
+ { SyntaxKind.Decrement, "--" },
+ { SyntaxKind.MinusAssign, "-=" },
+ { SyntaxKind.NotEqual, "!=" },
+ { SyntaxKind.Not, "!" },
+ { SyntaxKind.Modulo, "%" },
+ { SyntaxKind.ModuloAssign, "%=" },
+ { SyntaxKind.AndAssign, "&=" },
+ { SyntaxKind.And, "&" },
+ { SyntaxKind.DoubleAnd, "&&" },
+ { SyntaxKind.LeftParenthesis, "(" },
+ { SyntaxKind.RightParenthesis, ")" },
+ { SyntaxKind.Star, "*" },
+ { SyntaxKind.MultiplyAssign, "*=" },
+ { SyntaxKind.Comma, "," },
+ { SyntaxKind.Dot, "." },
+ { SyntaxKind.Slash, "/" },
+ { SyntaxKind.DivideAssign, "/=" },
+ { SyntaxKind.DoubleColon, "::" },
+ { SyntaxKind.Colon, ":" },
+ { SyntaxKind.Semicolon, ";" },
+ { SyntaxKind.QuestionMark, "?" },
+ { SyntaxKind.NullCoalesce, "??" },
+ { SyntaxKind.RightBracket, "]" },
+ { SyntaxKind.LeftBracket, "[" },
+ { SyntaxKind.XorAssign, "^=" },
+ { SyntaxKind.Xor, "^" },
+ { SyntaxKind.LeftBrace, "{" },
+ { SyntaxKind.OrAssign, "|=" },
+ { SyntaxKind.DoubleOr, "||" },
+ { SyntaxKind.Or, "|" },
+ { SyntaxKind.RightBrace, "}" },
+ { SyntaxKind.Tilde, "~" },
+ { SyntaxKind.Plus, "+" },
+ { SyntaxKind.PlusAssign, "+=" },
+ { SyntaxKind.Increment, "++" },
+ { SyntaxKind.LessThan, "<" },
+ { SyntaxKind.LessThanEqual, "<=" },
+ { SyntaxKind.LeftShift, "<<" },
+ { SyntaxKind.LeftShiftAssign, "<<=" },
+ { SyntaxKind.Assign, "=" },
+ { SyntaxKind.Equals, "==" },
+ { SyntaxKind.GreaterThan, ">" },
+ { SyntaxKind.GreaterThanEqual, ">=" },
+ { SyntaxKind.RightShift, ">>" },
+ { SyntaxKind.RightShiftAssign, ">>=" },
+ { SyntaxKind.Hash, "#" },
+ { SyntaxKind.Transition, "@" },
+ };
+
+ private static readonly RoslynCSharpLanguageCharacteristics _instance = new RoslynCSharpLanguageCharacteristics();
+
+ protected RoslynCSharpLanguageCharacteristics()
+ {
+ }
+
+ public static RoslynCSharpLanguageCharacteristics Instance => _instance;
+
+ public override CSharpTokenizer CreateTokenizer(SeekableTextReader source)
+ {
+ return new RoslynCSharpTokenizer(source);
+ }
+
+ protected override SyntaxToken CreateToken(string content, SyntaxKind kind, RazorDiagnostic[] errors)
+ {
+ return SyntaxFactory.Token(kind, content, errors);
+ }
+
+ public override string GetSample(SyntaxKind kind)
+ {
+ string sample;
+ if (!_tokenSamples.TryGetValue(kind, out sample))
+ {
+ switch (kind)
+ {
+ case SyntaxKind.Identifier:
+ return Resources.CSharpToken_Identifier;
+ case SyntaxKind.Keyword:
+ return Resources.CSharpToken_Keyword;
+ case SyntaxKind.IntegerLiteral:
+ return Resources.CSharpToken_IntegerLiteral;
+ case SyntaxKind.NewLine:
+ return Resources.CSharpToken_Newline;
+ case SyntaxKind.Whitespace:
+ return Resources.CSharpToken_Whitespace;
+ case SyntaxKind.CSharpComment:
+ return Resources.CSharpToken_Comment;
+ case SyntaxKind.RealLiteral:
+ return Resources.CSharpToken_RealLiteral;
+ case SyntaxKind.CharacterLiteral:
+ return Resources.CSharpToken_CharacterLiteral;
+ case SyntaxKind.StringLiteral:
+ return Resources.CSharpToken_StringLiteral;
+ default:
+ return Resources.Token_Unknown;
+ }
+ }
+ return sample;
+ }
+
+ public override SyntaxToken CreateMarkerToken()
+ {
+ return SyntaxFactory.Token(SyntaxKind.Marker, string.Empty);
+ }
+
+ public override SyntaxKind GetKnownTokenType(KnownTokenType type)
+ {
+ switch (type)
+ {
+ case KnownTokenType.Identifier:
+ return SyntaxKind.Identifier;
+ case KnownTokenType.Keyword:
+ return SyntaxKind.Keyword;
+ case KnownTokenType.NewLine:
+ return SyntaxKind.NewLine;
+ case KnownTokenType.Whitespace:
+ return SyntaxKind.Whitespace;
+ case KnownTokenType.Transition:
+ return SyntaxKind.Transition;
+ case KnownTokenType.CommentStart:
+ return SyntaxKind.RazorCommentTransition;
+ case KnownTokenType.CommentStar:
+ return SyntaxKind.RazorCommentStar;
+ case KnownTokenType.CommentBody:
+ return SyntaxKind.RazorCommentLiteral;
+ default:
+ return SyntaxKind.Marker;
+ }
+ }
+
+ public override SyntaxKind FlipBracket(SyntaxKind bracket)
+ {
+ switch (bracket)
+ {
+ case SyntaxKind.LeftBrace:
+ return SyntaxKind.RightBrace;
+ case SyntaxKind.LeftBracket:
+ return SyntaxKind.RightBracket;
+ case SyntaxKind.LeftParenthesis:
+ return SyntaxKind.RightParenthesis;
+ case SyntaxKind.LessThan:
+ return SyntaxKind.GreaterThan;
+ case SyntaxKind.RightBrace:
+ return SyntaxKind.LeftBrace;
+ case SyntaxKind.RightBracket:
+ return SyntaxKind.LeftBracket;
+ case SyntaxKind.RightParenthesis:
+ return SyntaxKind.LeftParenthesis;
+ case SyntaxKind.GreaterThan:
+ return SyntaxKind.LessThan;
+ default:
+ Debug.Fail("FlipBracket must be called with a bracket character");
+ return SyntaxKind.Marker;
+ }
+ }
+}
diff --git a/src/Compiler/Microsoft.CodeAnalysis.Razor.Compiler/src/Language/Legacy/RoslynCSharpTokenizer.cs b/src/Compiler/Microsoft.CodeAnalysis.Razor.Compiler/src/Language/Legacy/RoslynCSharpTokenizer.cs
new file mode 100644
index 00000000000..8a1336a2183
--- /dev/null
+++ b/src/Compiler/Microsoft.CodeAnalysis.Razor.Compiler/src/Language/Legacy/RoslynCSharpTokenizer.cs
@@ -0,0 +1,789 @@
+// Licensed to the .NET Foundation under one or more agreements.
+// The .NET Foundation licenses this file to you under the MIT license.
+
+#nullable disable
+
+using System;
+using System.Collections.Generic;
+using System.Collections.Frozen;
+using System.Diagnostics;
+using Microsoft.AspNetCore.Razor.Language.Syntax.InternalSyntax;
+using Microsoft.CodeAnalysis.CSharp;
+
+using SyntaxFactory = Microsoft.AspNetCore.Razor.Language.Syntax.InternalSyntax.SyntaxFactory;
+using CSharpSyntaxKind = Microsoft.CodeAnalysis.CSharp.SyntaxKind;
+
+namespace Microsoft.AspNetCore.Razor.Language.Legacy;
+
+internal class RoslynCSharpTokenizer : CSharpTokenizer
+{
+ private readonly Dictionary> _operatorHandlers;
+
+ private static readonly FrozenDictionary _keywords = (new[] {
+ CSharpSyntaxKind.AwaitKeyword,
+ CSharpSyntaxKind.AbstractKeyword,
+ CSharpSyntaxKind.ByteKeyword,
+ CSharpSyntaxKind.ClassKeyword,
+ CSharpSyntaxKind.DelegateKeyword,
+ CSharpSyntaxKind.EventKeyword,
+ CSharpSyntaxKind.FixedKeyword,
+ CSharpSyntaxKind.IfKeyword,
+ CSharpSyntaxKind.InternalKeyword,
+ CSharpSyntaxKind.NewKeyword,
+ CSharpSyntaxKind.OverrideKeyword,
+ CSharpSyntaxKind.ReadOnlyKeyword,
+ CSharpSyntaxKind.ShortKeyword,
+ CSharpSyntaxKind.StructKeyword,
+ CSharpSyntaxKind.TryKeyword,
+ CSharpSyntaxKind.UnsafeKeyword,
+ CSharpSyntaxKind.VolatileKeyword,
+ CSharpSyntaxKind.AsKeyword,
+ CSharpSyntaxKind.DoKeyword,
+ CSharpSyntaxKind.IsKeyword,
+ CSharpSyntaxKind.ParamsKeyword,
+ CSharpSyntaxKind.RefKeyword,
+ CSharpSyntaxKind.SwitchKeyword,
+ CSharpSyntaxKind.UShortKeyword,
+ CSharpSyntaxKind.WhileKeyword,
+ CSharpSyntaxKind.CaseKeyword,
+ CSharpSyntaxKind.ConstKeyword,
+ CSharpSyntaxKind.ExplicitKeyword,
+ CSharpSyntaxKind.FloatKeyword,
+ CSharpSyntaxKind.NullKeyword,
+ CSharpSyntaxKind.SizeOfKeyword,
+ CSharpSyntaxKind.TypeOfKeyword,
+ CSharpSyntaxKind.ImplicitKeyword,
+ CSharpSyntaxKind.PrivateKeyword,
+ CSharpSyntaxKind.ThisKeyword,
+ CSharpSyntaxKind.UsingKeyword,
+ CSharpSyntaxKind.ExternKeyword,
+ CSharpSyntaxKind.ReturnKeyword,
+ CSharpSyntaxKind.StackAllocKeyword,
+ CSharpSyntaxKind.UIntKeyword,
+ CSharpSyntaxKind.BaseKeyword,
+ CSharpSyntaxKind.CatchKeyword,
+ CSharpSyntaxKind.ContinueKeyword,
+ CSharpSyntaxKind.DoubleKeyword,
+ CSharpSyntaxKind.ForKeyword,
+ CSharpSyntaxKind.InKeyword,
+ CSharpSyntaxKind.LockKeyword,
+ CSharpSyntaxKind.ObjectKeyword,
+ CSharpSyntaxKind.ProtectedKeyword,
+ CSharpSyntaxKind.StaticKeyword,
+ CSharpSyntaxKind.FalseKeyword,
+ CSharpSyntaxKind.PublicKeyword,
+ CSharpSyntaxKind.SByteKeyword,
+ CSharpSyntaxKind.ThrowKeyword,
+ CSharpSyntaxKind.VirtualKeyword,
+ CSharpSyntaxKind.DecimalKeyword,
+ CSharpSyntaxKind.ElseKeyword,
+ CSharpSyntaxKind.OperatorKeyword,
+ CSharpSyntaxKind.StringKeyword,
+ CSharpSyntaxKind.ULongKeyword,
+ CSharpSyntaxKind.BoolKeyword,
+ CSharpSyntaxKind.CharKeyword,
+ CSharpSyntaxKind.DefaultKeyword,
+ CSharpSyntaxKind.ForEachKeyword,
+ CSharpSyntaxKind.LongKeyword,
+ CSharpSyntaxKind.VoidKeyword,
+ CSharpSyntaxKind.EnumKeyword,
+ CSharpSyntaxKind.FinallyKeyword,
+ CSharpSyntaxKind.IntKeyword,
+ CSharpSyntaxKind.OutKeyword,
+ CSharpSyntaxKind.SealedKeyword,
+ CSharpSyntaxKind.TrueKeyword,
+ CSharpSyntaxKind.GotoKeyword,
+ CSharpSyntaxKind.UncheckedKeyword,
+ CSharpSyntaxKind.InterfaceKeyword,
+ CSharpSyntaxKind.BreakKeyword,
+ CSharpSyntaxKind.CheckedKeyword,
+ CSharpSyntaxKind.NamespaceKeyword,
+ CSharpSyntaxKind.WhenKeyword,
+ CSharpSyntaxKind.WhereKeyword }).ToFrozenDictionary(keySelector: k => SyntaxFacts.GetText(k));
+
+ public RoslynCSharpTokenizer(SeekableTextReader source)
+ : base(source)
+ {
+ base.CurrentState = StartState;
+
+ _operatorHandlers = new Dictionary>()
+ {
+ { '-', MinusOperator },
+ { '<', LessThanOperator },
+ { '>', GreaterThanOperator },
+ { '&', CreateTwoCharOperatorHandler(SyntaxKind.And, '=', SyntaxKind.AndAssign, '&', SyntaxKind.DoubleAnd) },
+ { '|', CreateTwoCharOperatorHandler(SyntaxKind.Or, '=', SyntaxKind.OrAssign, '|', SyntaxKind.DoubleOr) },
+ { '+', CreateTwoCharOperatorHandler(SyntaxKind.Plus, '=', SyntaxKind.PlusAssign, '+', SyntaxKind.Increment) },
+ { '=', CreateTwoCharOperatorHandler(SyntaxKind.Assign, '=', SyntaxKind.Equals, '>', SyntaxKind.GreaterThanEqual) },
+ { '!', CreateTwoCharOperatorHandler(SyntaxKind.Not, '=', SyntaxKind.NotEqual) },
+ { '%', CreateTwoCharOperatorHandler(SyntaxKind.Modulo, '=', SyntaxKind.ModuloAssign) },
+ { '*', CreateTwoCharOperatorHandler(SyntaxKind.Star, '=', SyntaxKind.MultiplyAssign) },
+ { ':', CreateTwoCharOperatorHandler(SyntaxKind.Colon, ':', SyntaxKind.DoubleColon) },
+ { '?', CreateTwoCharOperatorHandler(SyntaxKind.QuestionMark, '?', SyntaxKind.NullCoalesce) },
+ { '^', CreateTwoCharOperatorHandler(SyntaxKind.Xor, '=', SyntaxKind.XorAssign) },
+ { '(', () => SyntaxKind.LeftParenthesis },
+ { ')', () => SyntaxKind.RightParenthesis },
+ { '{', () => SyntaxKind.LeftBrace },
+ { '}', () => SyntaxKind.RightBrace },
+ { '[', () => SyntaxKind.LeftBracket },
+ { ']', () => SyntaxKind.RightBracket },
+ { ',', () => SyntaxKind.Comma },
+ { ';', () => SyntaxKind.Semicolon },
+ { '~', () => SyntaxKind.Tilde },
+ { '#', () => SyntaxKind.Hash }
+ };
+ }
+
+ protected override int StartState => (int)CSharpTokenizerState.Data;
+
+ private new CSharpTokenizerState? CurrentState => (CSharpTokenizerState?)base.CurrentState;
+
+ public override SyntaxKind RazorCommentKind => SyntaxKind.RazorCommentLiteral;
+
+ public override SyntaxKind RazorCommentTransitionKind => SyntaxKind.RazorCommentTransition;
+
+ public override SyntaxKind RazorCommentStarKind => SyntaxKind.RazorCommentStar;
+
+ protected override StateResult Dispatch()
+ {
+ switch (CurrentState)
+ {
+ case CSharpTokenizerState.Data:
+ return Data();
+ case CSharpTokenizerState.BlockComment:
+ return BlockComment();
+ case CSharpTokenizerState.QuotedCharacterLiteral:
+ return QuotedCharacterLiteral();
+ case CSharpTokenizerState.QuotedStringLiteral:
+ return QuotedStringLiteral();
+ case CSharpTokenizerState.VerbatimStringLiteral:
+ return VerbatimStringLiteral();
+ case CSharpTokenizerState.AfterRazorCommentTransition:
+ return AfterRazorCommentTransition();
+ case CSharpTokenizerState.EscapedRazorCommentTransition:
+ return EscapedRazorCommentTransition();
+ case CSharpTokenizerState.RazorCommentBody:
+ return RazorCommentBody();
+ case CSharpTokenizerState.StarAfterRazorCommentBody:
+ return StarAfterRazorCommentBody();
+ case CSharpTokenizerState.AtTokenAfterRazorCommentBody:
+ return AtTokenAfterRazorCommentBody();
+ default:
+ Debug.Fail("Invalid TokenizerState");
+ return default(StateResult);
+ }
+ }
+
+ // Optimize memory allocation by returning constants for the most frequent cases
+ protected override string GetTokenContent(SyntaxKind type)
+ {
+ var tokenLength = Buffer.Length;
+
+ if (tokenLength == 1)
+ {
+ switch (type)
+ {
+ case SyntaxKind.IntegerLiteral:
+ switch (Buffer[0])
+ {
+ case '0':
+ return "0";
+ case '1':
+ return "1";
+ case '2':
+ return "2";
+ case '3':
+ return "3";
+ case '4':
+ return "4";
+ case '5':
+ return "5";
+ case '6':
+ return "6";
+ case '7':
+ return "7";
+ case '8':
+ return "8";
+ case '9':
+ return "9";
+ }
+ break;
+ case SyntaxKind.NewLine:
+ if (Buffer[0] == '\n')
+ {
+ return "\n";
+ }
+ break;
+ case SyntaxKind.Whitespace:
+ if (Buffer[0] == ' ')
+ {
+ return " ";
+ }
+ if (Buffer[0] == '\t')
+ {
+ return "\t";
+ }
+ break;
+ case SyntaxKind.Minus:
+ return "-";
+ case SyntaxKind.Not:
+ return "!";
+ case SyntaxKind.Modulo:
+ return "%";
+ case SyntaxKind.And:
+ return "&";
+ case SyntaxKind.LeftParenthesis:
+ return "(";
+ case SyntaxKind.RightParenthesis:
+ return ")";
+ case SyntaxKind.Star:
+ return "*";
+ case SyntaxKind.Comma:
+ return ",";
+ case SyntaxKind.Dot:
+ return ".";
+ case SyntaxKind.Slash:
+ return "/";
+ case SyntaxKind.Colon:
+ return ":";
+ case SyntaxKind.Semicolon:
+ return ";";
+ case SyntaxKind.QuestionMark:
+ return "?";
+ case SyntaxKind.RightBracket:
+ return "]";
+ case SyntaxKind.LeftBracket:
+ return "[";
+ case SyntaxKind.Xor:
+ return "^";
+ case SyntaxKind.LeftBrace:
+ return "{";
+ case SyntaxKind.Or:
+ return "|";
+ case SyntaxKind.RightBrace:
+ return "}";
+ case SyntaxKind.Tilde:
+ return "~";
+ case SyntaxKind.Plus:
+ return "+";
+ case SyntaxKind.LessThan:
+ return "<";
+ case SyntaxKind.Assign:
+ return "=";
+ case SyntaxKind.GreaterThan:
+ return ">";
+ case SyntaxKind.Hash:
+ return "#";
+ case SyntaxKind.Transition:
+ return "@";
+
+ }
+ }
+ else if (tokenLength == 2)
+ {
+ switch (type)
+ {
+ case SyntaxKind.NewLine:
+ return "\r\n";
+ case SyntaxKind.Arrow:
+ return "->";
+ case SyntaxKind.Decrement:
+ return "--";
+ case SyntaxKind.MinusAssign:
+ return "-=";
+ case SyntaxKind.NotEqual:
+ return "!=";
+ case SyntaxKind.ModuloAssign:
+ return "%=";
+ case SyntaxKind.AndAssign:
+ return "&=";
+ case SyntaxKind.DoubleAnd:
+ return "&&";
+ case SyntaxKind.MultiplyAssign:
+ return "*=";
+ case SyntaxKind.DivideAssign:
+ return "/=";
+ case SyntaxKind.DoubleColon:
+ return "::";
+ case SyntaxKind.NullCoalesce:
+ return "??";
+ case SyntaxKind.XorAssign:
+ return "^=";
+ case SyntaxKind.OrAssign:
+ return "|=";
+ case SyntaxKind.DoubleOr:
+ return "||";
+ case SyntaxKind.PlusAssign:
+ return "+=";
+ case SyntaxKind.Increment:
+ return "++";
+ case SyntaxKind.LessThanEqual:
+ return "<=";
+ case SyntaxKind.LeftShift:
+ return "<<";
+ case SyntaxKind.Equals:
+ return "==";
+ case SyntaxKind.GreaterThanEqual:
+ if (Buffer[0] == '=')
+ {
+ return "=>";
+ }
+ return ">=";
+ case SyntaxKind.RightShift:
+ return ">>";
+
+
+ }
+ }
+ else if (tokenLength == 3)
+ {
+ switch (type)
+ {
+ case SyntaxKind.LeftShiftAssign:
+ return "<<=";
+ case SyntaxKind.RightShiftAssign:
+ return ">>=";
+ }
+ }
+
+ return base.GetTokenContent(type);
+ }
+
+ protected override SyntaxToken CreateToken(string content, SyntaxKind kind, RazorDiagnostic[] errors)
+ {
+ return SyntaxFactory.Token(kind, content, errors);
+ }
+
+ private StateResult Data()
+ {
+ if (SyntaxFacts.IsNewLine(CurrentCharacter))
+ {
+ // CSharp Spec §2.3.1
+ var checkTwoCharNewline = CurrentCharacter == '\r';
+ TakeCurrent();
+ if (checkTwoCharNewline && CurrentCharacter == '\n')
+ {
+ TakeCurrent();
+ }
+ return Stay(EndToken(SyntaxKind.NewLine));
+ }
+ else if (SyntaxFacts.IsWhitespace(CurrentCharacter))
+ {
+ // CSharp Spec §2.3.3
+ TakeUntil(c => !SyntaxFacts.IsWhitespace(c));
+ return Stay(EndToken(SyntaxKind.Whitespace));
+ }
+ else if (SyntaxFacts.IsIdentifierStartCharacter(CurrentCharacter))
+ {
+ return Identifier();
+ }
+ else if (char.IsDigit(CurrentCharacter))
+ {
+ return NumericLiteral();
+ }
+ switch (CurrentCharacter)
+ {
+ case '@':
+ return AtToken();
+ case '\'':
+ TakeCurrent();
+ return Transition(CSharpTokenizerState.QuotedCharacterLiteral);
+ case '"':
+ TakeCurrent();
+ return Transition(CSharpTokenizerState.QuotedStringLiteral);
+ case '.':
+ if (char.IsDigit(Peek()))
+ {
+ return RealLiteral();
+ }
+ return Stay(Single(SyntaxKind.Dot));
+ case '/':
+ TakeCurrent();
+ if (CurrentCharacter == '/')
+ {
+ TakeCurrent();
+ return SingleLineComment();
+ }
+ else if (CurrentCharacter == '*')
+ {
+ TakeCurrent();
+ return Transition(CSharpTokenizerState.BlockComment);
+ }
+ else if (CurrentCharacter == '=')
+ {
+ TakeCurrent();
+ return Stay(EndToken(SyntaxKind.DivideAssign));
+ }
+ else
+ {
+ return Stay(EndToken(SyntaxKind.Slash));
+ }
+ default:
+ return Stay(EndToken(Operator()));
+ }
+ }
+
+ private StateResult AtToken()
+ {
+ TakeCurrent();
+ if (CurrentCharacter == '"')
+ {
+ TakeCurrent();
+ return Transition(CSharpTokenizerState.VerbatimStringLiteral);
+ }
+ else if (CurrentCharacter == '*')
+ {
+ return Transition(
+ CSharpTokenizerState.AfterRazorCommentTransition,
+ EndToken(SyntaxKind.RazorCommentTransition));
+ }
+ else if (CurrentCharacter == '@')
+ {
+ // Could be escaped comment transition
+ return Transition(
+ CSharpTokenizerState.EscapedRazorCommentTransition,
+ EndToken(SyntaxKind.Transition));
+ }
+
+ return Stay(EndToken(SyntaxKind.Transition));
+ }
+
+ private StateResult EscapedRazorCommentTransition()
+ {
+ TakeCurrent();
+ return Transition(CSharpTokenizerState.Data, EndToken(SyntaxKind.Transition));
+ }
+
+ private SyntaxKind Operator()
+ {
+ var first = CurrentCharacter;
+ TakeCurrent();
+ Func handler;
+ if (_operatorHandlers.TryGetValue(first, out handler))
+ {
+ return handler();
+ }
+ return SyntaxKind.Marker;
+ }
+
+ private SyntaxKind LessThanOperator()
+ {
+ if (CurrentCharacter == '=')
+ {
+ TakeCurrent();
+ return SyntaxKind.LessThanEqual;
+ }
+ return SyntaxKind.LessThan;
+ }
+
+ private SyntaxKind GreaterThanOperator()
+ {
+ if (CurrentCharacter == '=')
+ {
+ TakeCurrent();
+ return SyntaxKind.GreaterThanEqual;
+ }
+ return SyntaxKind.GreaterThan;
+ }
+
+ private SyntaxKind MinusOperator()
+ {
+ if (CurrentCharacter == '>')
+ {
+ TakeCurrent();
+ return SyntaxKind.Arrow;
+ }
+ else if (CurrentCharacter == '-')
+ {
+ TakeCurrent();
+ return SyntaxKind.Decrement;
+ }
+ else if (CurrentCharacter == '=')
+ {
+ TakeCurrent();
+ return SyntaxKind.MinusAssign;
+ }
+ return SyntaxKind.Minus;
+ }
+
+ private Func CreateTwoCharOperatorHandler(SyntaxKind typeIfOnlyFirst, char second, SyntaxKind typeIfBoth)
+ {
+ return () =>
+ {
+ if (CurrentCharacter == second)
+ {
+ TakeCurrent();
+ return typeIfBoth;
+ }
+ return typeIfOnlyFirst;
+ };
+ }
+
+ private Func CreateTwoCharOperatorHandler(SyntaxKind typeIfOnlyFirst, char option1, SyntaxKind typeIfOption1, char option2, SyntaxKind typeIfOption2)
+ {
+ return () =>
+ {
+ if (CurrentCharacter == option1)
+ {
+ TakeCurrent();
+ return typeIfOption1;
+ }
+ else if (CurrentCharacter == option2)
+ {
+ TakeCurrent();
+ return typeIfOption2;
+ }
+ return typeIfOnlyFirst;
+ };
+ }
+
+ private StateResult VerbatimStringLiteral()
+ {
+ TakeUntil(c => c == '"');
+ if (CurrentCharacter == '"')
+ {
+ TakeCurrent();
+ if (CurrentCharacter == '"')
+ {
+ TakeCurrent();
+ // Stay in the literal, this is an escaped "
+ return Stay();
+ }
+ }
+ else if (EndOfFile)
+ {
+ CurrentErrors.Add(
+ RazorDiagnosticFactory.CreateParsing_UnterminatedStringLiteral(
+ new SourceSpan(CurrentStart, contentLength: 1 /* end of file */)));
+ }
+ return Transition(CSharpTokenizerState.Data, EndToken(SyntaxKind.StringLiteral));
+ }
+
+ private StateResult QuotedCharacterLiteral() => QuotedLiteral('\'', IsEndQuotedCharacterLiteral, SyntaxKind.CharacterLiteral);
+
+ private StateResult QuotedStringLiteral() => QuotedLiteral('\"', IsEndQuotedStringLiteral, SyntaxKind.StringLiteral);
+
+ private static readonly Func IsEndQuotedCharacterLiteral = static (c) => c == '\\' || c == '\'' || SyntaxFacts.IsNewLine(c);
+ private static readonly Func IsEndQuotedStringLiteral = static (c) => c == '\\' || c == '\"' || SyntaxFacts.IsNewLine(c);
+
+ private StateResult QuotedLiteral(char quote, Func isEndQuotedLiteral, SyntaxKind literalType)
+ {
+ TakeUntil(isEndQuotedLiteral);
+ if (CurrentCharacter == '\\')
+ {
+ TakeCurrent(); // Take the '\'
+
+ // If the next char is the same quote that started this
+ if (CurrentCharacter == quote || CurrentCharacter == '\\')
+ {
+ TakeCurrent(); // Take it so that we don't prematurely end the literal.
+ }
+ return Stay();
+ }
+ else if (EndOfFile || SyntaxFacts.IsNewLine(CurrentCharacter))
+ {
+ CurrentErrors.Add(
+ RazorDiagnosticFactory.CreateParsing_UnterminatedStringLiteral(
+ new SourceSpan(CurrentStart, contentLength: 1 /* " */)));
+ }
+ else
+ {
+ TakeCurrent(); // No-op if at EOF
+ }
+ return Transition(CSharpTokenizerState.Data, EndToken(literalType));
+ }
+
+ // CSharp Spec §2.3.2
+ private StateResult BlockComment()
+ {
+ TakeUntil(c => c == '*');
+ if (EndOfFile)
+ {
+ CurrentErrors.Add(
+ RazorDiagnosticFactory.CreateParsing_BlockCommentNotTerminated(
+ new SourceSpan(CurrentStart, contentLength: 1 /* end of file */)));
+
+ return Transition(CSharpTokenizerState.Data, EndToken(SyntaxKind.CSharpComment));
+ }
+ if (CurrentCharacter == '*')
+ {
+ TakeCurrent();
+ if (CurrentCharacter == '/')
+ {
+ TakeCurrent();
+ return Transition(CSharpTokenizerState.Data, EndToken(SyntaxKind.CSharpComment));
+ }
+ }
+ return Stay();
+ }
+
+ // CSharp Spec §2.3.2
+ private StateResult SingleLineComment()
+ {
+ TakeUntil(c => SyntaxFacts.IsNewLine(c));
+ return Stay(EndToken(SyntaxKind.CSharpComment));
+ }
+
+ // CSharp Spec §2.4.4
+ private StateResult NumericLiteral()
+ {
+ if (TakeAll("0x", caseSensitive: true))
+ {
+ return HexLiteral();
+ }
+ else
+ {
+ return DecimalLiteral();
+ }
+ }
+
+ private StateResult HexLiteral()
+ {
+ TakeUntil(c => !IsHexDigit(c));
+ TakeIntegerSuffix();
+ return Stay(EndToken(SyntaxKind.IntegerLiteral));
+ }
+
+ private StateResult DecimalLiteral()
+ {
+ TakeUntil(c => !Char.IsDigit(c));
+ if (CurrentCharacter == '.' && Char.IsDigit(Peek()))
+ {
+ return RealLiteral();
+ }
+ else if (IsRealLiteralSuffix(CurrentCharacter) ||
+ CurrentCharacter == 'E' || CurrentCharacter == 'e')
+ {
+ return RealLiteralExponentPart();
+ }
+ else
+ {
+ TakeIntegerSuffix();
+ return Stay(EndToken(SyntaxKind.IntegerLiteral));
+ }
+ }
+
+ private StateResult RealLiteralExponentPart()
+ {
+ if (CurrentCharacter == 'E' || CurrentCharacter == 'e')
+ {
+ TakeCurrent();
+ if (CurrentCharacter == '+' || CurrentCharacter == '-')
+ {
+ TakeCurrent();
+ }
+ TakeUntil(c => !Char.IsDigit(c));
+ }
+ if (IsRealLiteralSuffix(CurrentCharacter))
+ {
+ TakeCurrent();
+ }
+ return Stay(EndToken(SyntaxKind.RealLiteral));
+ }
+
+ // CSharp Spec §2.4.4.3
+ private StateResult RealLiteral()
+ {
+ AssertCurrent('.');
+ TakeCurrent();
+ Debug.Assert(Char.IsDigit(CurrentCharacter));
+ TakeUntil(c => !Char.IsDigit(c));
+ return RealLiteralExponentPart();
+ }
+
+ private void TakeIntegerSuffix()
+ {
+ if (Char.ToLowerInvariant(CurrentCharacter) == 'u')
+ {
+ TakeCurrent();
+ if (Char.ToLowerInvariant(CurrentCharacter) == 'l')
+ {
+ TakeCurrent();
+ }
+ }
+ else if (Char.ToLowerInvariant(CurrentCharacter) == 'l')
+ {
+ TakeCurrent();
+ if (Char.ToLowerInvariant(CurrentCharacter) == 'u')
+ {
+ TakeCurrent();
+ }
+ }
+ }
+
+ // CSharp Spec §2.4.2
+ private StateResult Identifier()
+ {
+ Debug.Assert(SyntaxFacts.IsIdentifierStartCharacter(CurrentCharacter));
+ TakeCurrent();
+ TakeUntil(c => !SyntaxFacts.IsIdentifierPartCharacter(c));
+ SyntaxToken token = null;
+ if (HaveContent)
+ {
+ var type = SyntaxKind.Identifier;
+ var tokenContent = Buffer.ToString();
+ if (_keywords.TryGetValue(tokenContent, value: out _))
+ {
+ type = SyntaxKind.Keyword;
+ }
+
+ token = SyntaxFactory.Token(type, tokenContent);
+
+ Buffer.Clear();
+ CurrentErrors.Clear();
+ }
+
+ return Stay(token);
+ }
+
+ private StateResult Transition(CSharpTokenizerState state)
+ {
+ return Transition((int)state, result: null);
+ }
+
+ private StateResult Transition(CSharpTokenizerState state, SyntaxToken result)
+ {
+ return Transition((int)state, result);
+ }
+
+ private static bool IsRealLiteralSuffix(char character)
+ {
+ return character == 'F' ||
+ character == 'f' ||
+ character == 'D' ||
+ character == 'd' ||
+ character == 'M' ||
+ character == 'm';
+ }
+
+ private static bool IsHexDigit(char value)
+ {
+ return (value >= '0' && value <= '9') || (value >= 'A' && value <= 'F') || (value >= 'a' && value <= 'f');
+ }
+
+ internal override CSharpSyntaxKind? GetTokenKeyword(SyntaxToken token)
+ {
+ if (token != null && _keywords.TryGetValue(token.Content, out var keyword))
+ {
+ return keyword;
+ }
+
+ return null;
+ }
+
+ private enum CSharpTokenizerState
+ {
+ Data,
+ BlockComment,
+ QuotedCharacterLiteral,
+ QuotedStringLiteral,
+ VerbatimStringLiteral,
+
+ // Razor Comments - need to be the same for HTML and CSharp
+ AfterRazorCommentTransition = RazorCommentTokenizerState.AfterRazorCommentTransition,
+ EscapedRazorCommentTransition = RazorCommentTokenizerState.EscapedRazorCommentTransition,
+ RazorCommentBody = RazorCommentTokenizerState.RazorCommentBody,
+ StarAfterRazorCommentBody = RazorCommentTokenizerState.StarAfterRazorCommentBody,
+ AtTokenAfterRazorCommentBody = RazorCommentTokenizerState.AtTokenAfterRazorCommentBody,
+ }
+}
diff --git a/src/Compiler/Microsoft.CodeAnalysis.Razor.Compiler/src/Language/Legacy/TokenizerBackedParser.cs b/src/Compiler/Microsoft.CodeAnalysis.Razor.Compiler/src/Language/Legacy/TokenizerBackedParser.cs
index f8aaf38ec1e..45e06cf5341 100644
--- a/src/Compiler/Microsoft.CodeAnalysis.Razor.Compiler/src/Language/Legacy/TokenizerBackedParser.cs
+++ b/src/Compiler/Microsoft.CodeAnalysis.Razor.Compiler/src/Language/Legacy/TokenizerBackedParser.cs
@@ -17,7 +17,7 @@ internal abstract class TokenizerBackedParser : ParserBase
protected delegate void SpanContextConfigActionWithPreviousConfig(SpanEditHandlerBuilder? editHandlerBuilder, ref ISpanChunkGenerator? chunkGenerator, SpanContextConfigAction? previousConfig);
private readonly SyntaxListPool _pool = new SyntaxListPool();
- private readonly TokenizerView _tokenizer;
+ protected readonly TokenizerView _tokenizer;
private SyntaxListBuilder? _tokenBuilder;
protected SpanEditHandlerBuilder? editHandlerBuilder;
diff --git a/src/Compiler/Microsoft.CodeAnalysis.Razor.Compiler/src/Language/RazorParserOptions.cs b/src/Compiler/Microsoft.CodeAnalysis.Razor.Compiler/src/Language/RazorParserOptions.cs
index 972647ce900..a949bd6bd94 100644
--- a/src/Compiler/Microsoft.CodeAnalysis.Razor.Compiler/src/Language/RazorParserOptions.cs
+++ b/src/Compiler/Microsoft.CodeAnalysis.Razor.Compiler/src/Language/RazorParserOptions.cs
@@ -16,6 +16,7 @@ public static RazorParserOptions CreateDefault()
Array.Empty(),
designTime: false,
parseLeadingDirectives: false,
+ useRoslynTokenizer: false,
version: RazorLanguageVersion.Latest,
fileKind: FileKinds.Legacy,
enableSpanEditHandlers: false);
@@ -59,16 +60,22 @@ public static RazorParserOptions CreateDesignTime(Action
public bool ParseLeadingDirectives { get; }
+ public bool UseRoslynTokenizer { get; }
+
public RazorLanguageVersion Version { get; } = RazorLanguageVersion.Latest;
internal string FileKind { get; }
diff --git a/src/Compiler/Microsoft.CodeAnalysis.Razor.Compiler/src/Language/RazorParserOptionsBuilder.cs b/src/Compiler/Microsoft.CodeAnalysis.Razor.Compiler/src/Language/RazorParserOptionsBuilder.cs
index 0fac8bc30af..13a2524420f 100644
--- a/src/Compiler/Microsoft.CodeAnalysis.Razor.Compiler/src/Language/RazorParserOptionsBuilder.cs
+++ b/src/Compiler/Microsoft.CodeAnalysis.Razor.Compiler/src/Language/RazorParserOptionsBuilder.cs
@@ -42,13 +42,15 @@ internal RazorParserOptionsBuilder(bool designTime, RazorLanguageVersion version
public bool ParseLeadingDirectives { get; set; }
+ public bool UseRoslynTokenizer { get; set; }
+
public RazorLanguageVersion LanguageVersion { get; }
internal bool EnableSpanEditHandlers { get; set; }
public RazorParserOptions Build()
{
- return new RazorParserOptions(Directives.ToArray(), DesignTime, ParseLeadingDirectives, LanguageVersion, FileKind ?? FileKinds.Legacy, EnableSpanEditHandlers);
+ return new RazorParserOptions(Directives.ToArray(), DesignTime, ParseLeadingDirectives, UseRoslynTokenizer, LanguageVersion, FileKind ?? FileKinds.Legacy, EnableSpanEditHandlers);
}
public void SetDesignTime(bool designTime)
diff --git a/src/Compiler/Microsoft.CodeAnalysis.Razor.Compiler/src/SourceGenerators/ConfigureRazorParserOptions.cs b/src/Compiler/Microsoft.CodeAnalysis.Razor.Compiler/src/SourceGenerators/ConfigureRazorParserOptions.cs
new file mode 100644
index 00000000000..0f9fdeed63b
--- /dev/null
+++ b/src/Compiler/Microsoft.CodeAnalysis.Razor.Compiler/src/SourceGenerators/ConfigureRazorParserOptions.cs
@@ -0,0 +1,16 @@
+// Copyright (c) .NET Foundation. All rights reserved.
+// Licensed under the Apache License, Version 2.0. See License.txt in the project root for license information.
+
+using Microsoft.AspNetCore.Razor.Language;
+
+namespace Microsoft.NET.Sdk.Razor.SourceGenerators;
+
+internal class ConfigureRazorParserOptions(bool useRoslynTokenizer) : RazorEngineFeatureBase, IConfigureRazorParserOptionsFeature
+{
+ public int Order { get; set; }
+
+ public void Configure(RazorParserOptionsBuilder options)
+ {
+ options.UseRoslynTokenizer = useRoslynTokenizer;
+ }
+}
diff --git a/src/Compiler/Microsoft.CodeAnalysis.Razor.Compiler/src/SourceGenerators/RazorSourceGenerationOptions.cs b/src/Compiler/Microsoft.CodeAnalysis.Razor.Compiler/src/SourceGenerators/RazorSourceGenerationOptions.cs
index 412dba45047..ec490b85ddd 100644
--- a/src/Compiler/Microsoft.CodeAnalysis.Razor.Compiler/src/SourceGenerators/RazorSourceGenerationOptions.cs
+++ b/src/Compiler/Microsoft.CodeAnalysis.Razor.Compiler/src/SourceGenerators/RazorSourceGenerationOptions.cs
@@ -33,6 +33,8 @@ internal sealed record RazorSourceGenerationOptions
///
internal string? TestSuppressUniqueIds { get; set; }
+ internal bool UseRoslynTokenizer { get; set; } = true;
+
public override int GetHashCode() => Configuration.GetHashCode();
}
}
diff --git a/src/Compiler/Microsoft.CodeAnalysis.Razor.Compiler/src/SourceGenerators/RazorSourceGenerator.Helpers.cs b/src/Compiler/Microsoft.CodeAnalysis.Razor.Compiler/src/SourceGenerators/RazorSourceGenerator.Helpers.cs
index bc0e302982d..a67af975b48 100644
--- a/src/Compiler/Microsoft.CodeAnalysis.Razor.Compiler/src/SourceGenerators/RazorSourceGenerator.Helpers.cs
+++ b/src/Compiler/Microsoft.CodeAnalysis.Razor.Compiler/src/SourceGenerators/RazorSourceGenerator.Helpers.cs
@@ -2,7 +2,6 @@
// Licensed under the Apache License, Version 2.0. See License.txt in the project root for license information.
using System.Collections.Generic;
-using System.Collections.Immutable;
using System.Text;
using Microsoft.AspNetCore.Mvc.Razor.Extensions;
using Microsoft.AspNetCore.Razor.Language;
@@ -55,6 +54,7 @@ private static RazorProjectEngine GetDeclarationProjectEngine(
options.SuppressChecksum = true;
options.SupportLocalizedComponentNames = razorSourceGeneratorOptions.SupportLocalizedComponentNames;
}));
+ b.Features.Add(new ConfigureRazorParserOptions(razorSourceGeneratorOptions.UseRoslynTokenizer));
b.SetRootNamespace(razorSourceGeneratorOptions.RootNamespace);
@@ -109,6 +109,7 @@ private static SourceGeneratorProjectEngine GetGenerationProjectEngine(
options.SuppressUniqueIds = razorSourceGeneratorOptions.TestSuppressUniqueIds;
options.SuppressAddComponentParameter = !isAddComponentParameterAvailable;
}));
+ b.Features.Add(new ConfigureRazorParserOptions(razorSourceGeneratorOptions.UseRoslynTokenizer));
CompilerFeatures.Register(b);
RazorExtensions.Register(b);
diff --git a/src/Compiler/Microsoft.CodeAnalysis.Razor.Compiler/src/SourceGenerators/RazorSourceGenerator.RazorProviders.cs b/src/Compiler/Microsoft.CodeAnalysis.Razor.Compiler/src/SourceGenerators/RazorSourceGenerator.RazorProviders.cs
index 7daf0ba4abf..aecd149ff3c 100644
--- a/src/Compiler/Microsoft.CodeAnalysis.Razor.Compiler/src/SourceGenerators/RazorSourceGenerator.RazorProviders.cs
+++ b/src/Compiler/Microsoft.CodeAnalysis.Razor.Compiler/src/SourceGenerators/RazorSourceGenerator.RazorProviders.cs
@@ -18,7 +18,7 @@ public partial class RazorSourceGenerator
{
var ((options, parseOptions), isSuppressed) = pair;
var globalOptions = options.GlobalOptions;
-
+
if (isSuppressed)
{
return default;
@@ -44,6 +44,10 @@ public partial class RazorSourceGenerator
var razorConfiguration = new RazorConfiguration(razorLanguageVersion, configurationName ?? "default", Extensions: [], UseConsolidatedMvcViews: true);
+ // We use the new tokenizer by default
+ var useRazorTokenizer = !parseOptions.Features.TryGetValue("use-razor-tokenizer", out var useRazorTokenizerValue)
+ || !string.Equals(useRazorTokenizerValue, "false", StringComparison.OrdinalIgnoreCase);
+
var razorSourceGenerationOptions = new RazorSourceGenerationOptions()
{
Configuration = razorConfiguration,
@@ -52,6 +56,7 @@ public partial class RazorSourceGenerator
SupportLocalizedComponentNames = supportLocalizedComponentNames == "true",
CSharpLanguageVersion = ((CSharpParseOptions)parseOptions).LanguageVersion,
TestSuppressUniqueIds = _testSuppressUniqueIds,
+ UseRoslynTokenizer = useRazorTokenizer,
};
return (razorSourceGenerationOptions, diagnostic);
diff --git a/src/Razor/src/Microsoft.VisualStudio.LegacyEditor.Razor/Parsing/VisualStudioRazorParser.cs b/src/Razor/src/Microsoft.VisualStudio.LegacyEditor.Razor/Parsing/VisualStudioRazorParser.cs
index 57964527885..ebc69bd57fa 100644
--- a/src/Razor/src/Microsoft.VisualStudio.LegacyEditor.Razor/Parsing/VisualStudioRazorParser.cs
+++ b/src/Razor/src/Microsoft.VisualStudio.LegacyEditor.Razor/Parsing/VisualStudioRazorParser.cs
@@ -614,6 +614,7 @@ internal class VisualStudioEnableTagHelpersFeature : RazorEngineFeatureBase, ICo
public void Configure(RazorParserOptionsBuilder options)
{
options.EnableSpanEditHandlers = true;
+ options.UseRoslynTokenizer = false;
}
}
diff --git a/src/Razor/test/Microsoft.AspNetCore.Razor.Test.Common.Tooling/Language/Legacy/ToolingParserTestBase.cs b/src/Razor/test/Microsoft.AspNetCore.Razor.Test.Common.Tooling/Language/Legacy/ToolingParserTestBase.cs
index 64c89e2de7a..5cbba5150c3 100644
--- a/src/Razor/test/Microsoft.AspNetCore.Razor.Test.Common.Tooling/Language/Legacy/ToolingParserTestBase.cs
+++ b/src/Razor/test/Microsoft.AspNetCore.Razor.Test.Common.Tooling/Language/Legacy/ToolingParserTestBase.cs
@@ -253,6 +253,7 @@ internal static RazorParserOptions CreateParserOptions(
directives.ToArray(),
designTime,
parseLeadingDirectives: false,
+ useRoslynTokenizer: false,
version: version,
fileKind: fileKind,
enableSpanEditHandlers)
diff --git a/src/Shared/Microsoft.AspNetCore.Razor.Test.Common/Language/Legacy/ParserTestBase.cs b/src/Shared/Microsoft.AspNetCore.Razor.Test.Common/Language/Legacy/ParserTestBase.cs
index 3d38abc1518..f4982d0d51e 100644
--- a/src/Shared/Microsoft.AspNetCore.Razor.Test.Common/Language/Legacy/ParserTestBase.cs
+++ b/src/Shared/Microsoft.AspNetCore.Razor.Test.Common/Language/Legacy/ParserTestBase.cs
@@ -26,11 +26,13 @@ public abstract class ParserTestBase : IParserTest
// UTF-8 with BOM
private static readonly Encoding _baselineEncoding = new UTF8Encoding(encoderShouldEmitUTF8Identifier: true);
private readonly bool _validateSpanEditHandlers;
+ private readonly bool _useLegacyTokenizer;
- internal ParserTestBase(TestProject.Layer layer, bool validateSpanEditHandlers = false)
+ internal ParserTestBase(TestProject.Layer layer, bool validateSpanEditHandlers = false, bool useLegacyTokenizer = false)
{
TestProjectRoot = TestProject.GetProjectDirectory(GetType(), layer);
_validateSpanEditHandlers = validateSpanEditHandlers;
+ _useLegacyTokenizer = useLegacyTokenizer;
}
///
@@ -196,7 +198,7 @@ internal virtual RazorSyntaxTree ParseDocument(RazorLanguageVersion version, str
var source = TestRazorSourceDocument.Create(document, filePath: null, relativePath: null, normalizeNewLines: true);
- var options = CreateParserOptions(version, directives, designTime, _validateSpanEditHandlers, featureFlags, fileKind);
+ var options = CreateParserOptions(version, directives, designTime, _validateSpanEditHandlers, _useLegacyTokenizer, featureFlags, fileKind);
var context = new ParserContext(source, options);
var codeParser = new CSharpCodeParser(directives, context);
@@ -257,6 +259,7 @@ internal static RazorParserOptions CreateParserOptions(
IEnumerable directives,
bool designTime,
bool enableSpanEditHandlers,
+ bool useLegacyTokenizer,
RazorParserFeatureFlags featureFlags = null,
string fileKind = null)
{
@@ -265,6 +268,7 @@ internal static RazorParserOptions CreateParserOptions(
directives.ToArray(),
designTime,
parseLeadingDirectives: false,
+ useRoslynTokenizer: !useLegacyTokenizer,
version,
fileKind,
enableSpanEditHandlers)