diff --git a/Directory.Packages.props b/Directory.Packages.props index 4e2bdea802f..2c7e57038aa 100644 --- a/Directory.Packages.props +++ b/Directory.Packages.props @@ -113,7 +113,7 @@ - + diff --git a/src/Compiler/Directory.Packages.props b/src/Compiler/Directory.Packages.props deleted file mode 100644 index 7154276a074..00000000000 --- a/src/Compiler/Directory.Packages.props +++ /dev/null @@ -1,23 +0,0 @@ - - - <_RoslynPackageVersion>4.9.2 - <_RoslynPackageVersion Condition="'$(DotNetBuildFromSource)' == 'true'">$(MicrosoftCodeAnalysisCommonPackageVersion) - - - - - - - - - - - - - diff --git a/src/Compiler/Microsoft.AspNetCore.Razor.Language/legacyTest/Legacy/CSharpAutoCompleteTest.cs b/src/Compiler/Microsoft.AspNetCore.Razor.Language/legacyTest/Legacy/CSharpAutoCompleteTest.cs index ed3d64f00a1..95dcafb8b3c 100644 --- a/src/Compiler/Microsoft.AspNetCore.Razor.Language/legacyTest/Legacy/CSharpAutoCompleteTest.cs +++ b/src/Compiler/Microsoft.AspNetCore.Razor.Language/legacyTest/Legacy/CSharpAutoCompleteTest.cs @@ -9,7 +9,7 @@ namespace Microsoft.AspNetCore.Razor.Language.Legacy; -public class CSharpAutoCompleteTest() : ParserTestBase(layer: TestProject.Layer.Compiler, validateSpanEditHandlers: true) +public class CSharpAutoCompleteTest() : ParserTestBase(layer: TestProject.Layer.Compiler, validateSpanEditHandlers: true, useLegacyTokenizer: true) { [Fact] public void FunctionsDirectiveAutoCompleteAtEOF() diff --git a/src/Compiler/Microsoft.AspNetCore.Razor.Language/legacyTest/Legacy/CSharpBlockTest.cs b/src/Compiler/Microsoft.AspNetCore.Razor.Language/legacyTest/Legacy/CSharpBlockTest.cs index be9741e66d2..7361b50c9b4 100644 --- a/src/Compiler/Microsoft.AspNetCore.Razor.Language/legacyTest/Legacy/CSharpBlockTest.cs +++ b/src/Compiler/Microsoft.AspNetCore.Razor.Language/legacyTest/Legacy/CSharpBlockTest.cs @@ -7,7 +7,7 @@ namespace Microsoft.AspNetCore.Razor.Language.Legacy; -public class CSharpBlockTest() : ParserTestBase(layer: TestProject.Layer.Compiler, validateSpanEditHandlers: true) +public class CSharpBlockTest() : ParserTestBase(layer: TestProject.Layer.Compiler, validateSpanEditHandlers: true, useLegacyTokenizer: true) { [Fact] public void CSharpBlock_SingleLineControlFlowStatement_Error() diff --git a/src/Compiler/Microsoft.AspNetCore.Razor.Language/legacyTest/Legacy/CSharpErrorTest.cs b/src/Compiler/Microsoft.AspNetCore.Razor.Language/legacyTest/Legacy/CSharpErrorTest.cs index d7f288bef2c..0940f13c115 100644 --- a/src/Compiler/Microsoft.AspNetCore.Razor.Language/legacyTest/Legacy/CSharpErrorTest.cs +++ b/src/Compiler/Microsoft.AspNetCore.Razor.Language/legacyTest/Legacy/CSharpErrorTest.cs @@ -9,7 +9,7 @@ namespace Microsoft.AspNetCore.Razor.Language.Legacy; -public class CSharpErrorTest() : ParserTestBase(layer: TestProject.Layer.Compiler, validateSpanEditHandlers: true) +public class CSharpErrorTest() : ParserTestBase(layer: TestProject.Layer.Compiler, validateSpanEditHandlers: true, useLegacyTokenizer: true) { [Fact] public void HandlesQuotesAfterTransition() diff --git a/src/Compiler/Microsoft.AspNetCore.Razor.Language/legacyTest/Legacy/CSharpExplicitExpressionTest.cs b/src/Compiler/Microsoft.AspNetCore.Razor.Language/legacyTest/Legacy/CSharpExplicitExpressionTest.cs index 8e6e0138d07..24ecb3f04dd 100644 --- a/src/Compiler/Microsoft.AspNetCore.Razor.Language/legacyTest/Legacy/CSharpExplicitExpressionTest.cs +++ b/src/Compiler/Microsoft.AspNetCore.Razor.Language/legacyTest/Legacy/CSharpExplicitExpressionTest.cs @@ -8,7 +8,7 @@ namespace Microsoft.AspNetCore.Razor.Language.Legacy; -public class CSharpExplicitExpressionTest() : ParserTestBase(layer: TestProject.Layer.Compiler, validateSpanEditHandlers: true) +public class CSharpExplicitExpressionTest() : ParserTestBase(layer: TestProject.Layer.Compiler, validateSpanEditHandlers: true, useLegacyTokenizer: true) { [Fact] public void ShouldOutputZeroLengthCodeSpanIfExplicitExpressionIsEmpty() diff --git a/src/Compiler/Microsoft.AspNetCore.Razor.Language/legacyTest/Legacy/CSharpFunctionsTest.cs b/src/Compiler/Microsoft.AspNetCore.Razor.Language/legacyTest/Legacy/CSharpFunctionsTest.cs index be769bbc679..455c103d2ab 100644 --- a/src/Compiler/Microsoft.AspNetCore.Razor.Language/legacyTest/Legacy/CSharpFunctionsTest.cs +++ b/src/Compiler/Microsoft.AspNetCore.Razor.Language/legacyTest/Legacy/CSharpFunctionsTest.cs @@ -8,7 +8,7 @@ namespace Microsoft.AspNetCore.Razor.Language.Legacy; -public class CSharpFunctionsTest() : ParserTestBase(layer: TestProject.Layer.Compiler, validateSpanEditHandlers: true) +public class CSharpFunctionsTest() : ParserTestBase(layer: TestProject.Layer.Compiler, validateSpanEditHandlers: true, useLegacyTokenizer: true) { [Fact] public void Functions_SingleLineControlFlowStatement_Error() diff --git a/src/Compiler/Microsoft.AspNetCore.Razor.Language/legacyTest/Legacy/CSharpImplicitExpressionTest.cs b/src/Compiler/Microsoft.AspNetCore.Razor.Language/legacyTest/Legacy/CSharpImplicitExpressionTest.cs index 9c597311385..d7ade8c07a8 100644 --- a/src/Compiler/Microsoft.AspNetCore.Razor.Language/legacyTest/Legacy/CSharpImplicitExpressionTest.cs +++ b/src/Compiler/Microsoft.AspNetCore.Razor.Language/legacyTest/Legacy/CSharpImplicitExpressionTest.cs @@ -8,7 +8,7 @@ namespace Microsoft.AspNetCore.Razor.Language.Legacy; -public class CSharpImplicitExpressionTest() : ParserTestBase(layer: TestProject.Layer.Compiler, validateSpanEditHandlers: true) +public class CSharpImplicitExpressionTest() : ParserTestBase(layer: TestProject.Layer.Compiler, validateSpanEditHandlers: true, useLegacyTokenizer: true) { [Fact] public void ParsesNullConditionalOperatorImplicitExpression_Bracket1() diff --git a/src/Compiler/Microsoft.AspNetCore.Razor.Language/legacyTest/Legacy/CSharpNestedStatementsTest.cs b/src/Compiler/Microsoft.AspNetCore.Razor.Language/legacyTest/Legacy/CSharpNestedStatementsTest.cs index 4baa3d137e8..c26c9b7cce2 100644 --- a/src/Compiler/Microsoft.AspNetCore.Razor.Language/legacyTest/Legacy/CSharpNestedStatementsTest.cs +++ b/src/Compiler/Microsoft.AspNetCore.Razor.Language/legacyTest/Legacy/CSharpNestedStatementsTest.cs @@ -7,7 +7,7 @@ namespace Microsoft.AspNetCore.Razor.Language.Legacy; -public class CSharpNestedStatementsTest() : ParserTestBase(layer: TestProject.Layer.Compiler, validateSpanEditHandlers: true) +public class CSharpNestedStatementsTest() : ParserTestBase(layer: TestProject.Layer.Compiler, validateSpanEditHandlers: true, useLegacyTokenizer: true) { [Fact] public void NestedSimpleStatement() diff --git a/src/Compiler/Microsoft.AspNetCore.Razor.Language/legacyTest/Legacy/CSharpRazorCommentsTest.cs b/src/Compiler/Microsoft.AspNetCore.Razor.Language/legacyTest/Legacy/CSharpRazorCommentsTest.cs index 87e79fea8db..e63d1b670f8 100644 --- a/src/Compiler/Microsoft.AspNetCore.Razor.Language/legacyTest/Legacy/CSharpRazorCommentsTest.cs +++ b/src/Compiler/Microsoft.AspNetCore.Razor.Language/legacyTest/Legacy/CSharpRazorCommentsTest.cs @@ -8,7 +8,7 @@ namespace Microsoft.AspNetCore.Razor.Language.Legacy; -public class CSharpRazorCommentsTest() : ParserTestBase(layer: TestProject.Layer.Compiler, validateSpanEditHandlers: true) +public class CSharpRazorCommentsTest() : ParserTestBase(layer: TestProject.Layer.Compiler, validateSpanEditHandlers: true, useLegacyTokenizer: true) { [Fact] public void UnterminatedRazorComment() diff --git a/src/Compiler/Microsoft.AspNetCore.Razor.Language/legacyTest/Legacy/CSharpReservedWordsTest.cs b/src/Compiler/Microsoft.AspNetCore.Razor.Language/legacyTest/Legacy/CSharpReservedWordsTest.cs index 1c69574cb94..a049155bda0 100644 --- a/src/Compiler/Microsoft.AspNetCore.Razor.Language/legacyTest/Legacy/CSharpReservedWordsTest.cs +++ b/src/Compiler/Microsoft.AspNetCore.Razor.Language/legacyTest/Legacy/CSharpReservedWordsTest.cs @@ -7,7 +7,7 @@ namespace Microsoft.AspNetCore.Razor.Language.Legacy; -public class CSharpReservedWordsTest() : ParserTestBase(layer: TestProject.Layer.Compiler, validateSpanEditHandlers: true) +public class CSharpReservedWordsTest() : ParserTestBase(layer: TestProject.Layer.Compiler, validateSpanEditHandlers: true, useLegacyTokenizer: true) { [Fact] public void ReservedWord() diff --git a/src/Compiler/Microsoft.AspNetCore.Razor.Language/legacyTest/Legacy/CSharpSectionTest.cs b/src/Compiler/Microsoft.AspNetCore.Razor.Language/legacyTest/Legacy/CSharpSectionTest.cs index be7c99f3a94..4dd97349b23 100644 --- a/src/Compiler/Microsoft.AspNetCore.Razor.Language/legacyTest/Legacy/CSharpSectionTest.cs +++ b/src/Compiler/Microsoft.AspNetCore.Razor.Language/legacyTest/Legacy/CSharpSectionTest.cs @@ -10,7 +10,7 @@ namespace Microsoft.AspNetCore.Razor.Language.Legacy; -public class CSharpSectionTest() : ParserTestBase(layer: TestProject.Layer.Compiler, validateSpanEditHandlers: true) +public class CSharpSectionTest() : ParserTestBase(layer: TestProject.Layer.Compiler, validateSpanEditHandlers: true, useLegacyTokenizer: true) { [Fact] public void CapturesNewlineImmediatelyFollowing() diff --git a/src/Compiler/Microsoft.AspNetCore.Razor.Language/legacyTest/Legacy/CSharpSpecialBlockTest.cs b/src/Compiler/Microsoft.AspNetCore.Razor.Language/legacyTest/Legacy/CSharpSpecialBlockTest.cs index 9e7b33aa5ac..8d8b36127bf 100644 --- a/src/Compiler/Microsoft.AspNetCore.Razor.Language/legacyTest/Legacy/CSharpSpecialBlockTest.cs +++ b/src/Compiler/Microsoft.AspNetCore.Razor.Language/legacyTest/Legacy/CSharpSpecialBlockTest.cs @@ -8,7 +8,7 @@ namespace Microsoft.AspNetCore.Razor.Language.Legacy; -public class CSharpSpecialBlockTest() : ParserTestBase(layer: TestProject.Layer.Compiler, validateSpanEditHandlers: true) +public class CSharpSpecialBlockTest() : ParserTestBase(layer: TestProject.Layer.Compiler, validateSpanEditHandlers: true, useLegacyTokenizer: true) { [Fact] public void NonKeywordStatementInCodeBlockIsHandledCorrectly() diff --git a/src/Compiler/Microsoft.AspNetCore.Razor.Language/legacyTest/Legacy/CSharpStatementTest.cs b/src/Compiler/Microsoft.AspNetCore.Razor.Language/legacyTest/Legacy/CSharpStatementTest.cs index 7e324bb19e6..b6cc6096228 100644 --- a/src/Compiler/Microsoft.AspNetCore.Razor.Language/legacyTest/Legacy/CSharpStatementTest.cs +++ b/src/Compiler/Microsoft.AspNetCore.Razor.Language/legacyTest/Legacy/CSharpStatementTest.cs @@ -16,7 +16,7 @@ namespace Microsoft.AspNetCore.Razor.Language.Legacy; // * Tests for various types of nested statements // * Comment tests -public class CSharpStatementTest() : ParserTestBase(layer: TestProject.Layer.Compiler, validateSpanEditHandlers: true) +public class CSharpStatementTest() : ParserTestBase(layer: TestProject.Layer.Compiler, validateSpanEditHandlers: true, useLegacyTokenizer: true) { [Fact] public void ForStatement() diff --git a/src/Compiler/Microsoft.AspNetCore.Razor.Language/legacyTest/Legacy/CSharpTemplateTest.cs b/src/Compiler/Microsoft.AspNetCore.Razor.Language/legacyTest/Legacy/CSharpTemplateTest.cs index 51a49891627..75d999bc96b 100644 --- a/src/Compiler/Microsoft.AspNetCore.Razor.Language/legacyTest/Legacy/CSharpTemplateTest.cs +++ b/src/Compiler/Microsoft.AspNetCore.Razor.Language/legacyTest/Legacy/CSharpTemplateTest.cs @@ -8,7 +8,7 @@ namespace Microsoft.AspNetCore.Razor.Language.Legacy; -public class CSharpTemplateTest() : ParserTestBase(layer: TestProject.Layer.Compiler, validateSpanEditHandlers: true) +public class CSharpTemplateTest() : ParserTestBase(layer: TestProject.Layer.Compiler, validateSpanEditHandlers: true, useLegacyTokenizer: true) { [Fact] public void HandlesSingleLineTemplate() diff --git a/src/Compiler/Microsoft.AspNetCore.Razor.Language/legacyTest/Legacy/CSharpToMarkupSwitchTest.cs b/src/Compiler/Microsoft.AspNetCore.Razor.Language/legacyTest/Legacy/CSharpToMarkupSwitchTest.cs index 5ca8a54f47d..562d5304c61 100644 --- a/src/Compiler/Microsoft.AspNetCore.Razor.Language/legacyTest/Legacy/CSharpToMarkupSwitchTest.cs +++ b/src/Compiler/Microsoft.AspNetCore.Razor.Language/legacyTest/Legacy/CSharpToMarkupSwitchTest.cs @@ -9,7 +9,7 @@ namespace Microsoft.AspNetCore.Razor.Language.Legacy; -public class CSharpToMarkupSwitchTest() : ParserTestBase(layer: TestProject.Layer.Compiler, validateSpanEditHandlers: true) +public class CSharpToMarkupSwitchTest() : ParserTestBase(layer: TestProject.Layer.Compiler, validateSpanEditHandlers: true, useLegacyTokenizer: true) { [Fact] public void SingleAngleBracketDoesNotCauseSwitchIfOuterBlockIsTerminated() diff --git a/src/Compiler/Microsoft.AspNetCore.Razor.Language/legacyTest/Legacy/CSharpTokenizerCommentTest.cs b/src/Compiler/Microsoft.AspNetCore.Razor.Language/legacyTest/Legacy/CSharpTokenizerCommentTest.cs new file mode 100644 index 00000000000..760f272ba7d --- /dev/null +++ b/src/Compiler/Microsoft.AspNetCore.Razor.Language/legacyTest/Legacy/CSharpTokenizerCommentTest.cs @@ -0,0 +1,96 @@ +// Licensed to the .NET Foundation under one or more agreements. +// The .NET Foundation licenses this file to you under the MIT license. + +#nullable disable + +using Microsoft.AspNetCore.Razor.Language.Syntax.InternalSyntax; +using Xunit; + +namespace Microsoft.AspNetCore.Razor.Language.Legacy; + +public class CSharpTokenizerCommentTest : CSharpTokenizerTestBase +{ + private new SyntaxToken IgnoreRemaining => (SyntaxToken)base.IgnoreRemaining; + + [Fact] + public void Next_Ignores_Star_At_EOF_In_RazorComment() + { + TestTokenizer( + "@* Foo * Bar * Baz *", + SyntaxFactory.Token(SyntaxKind.RazorCommentTransition, "@"), + SyntaxFactory.Token(SyntaxKind.RazorCommentStar, "*"), + SyntaxFactory.Token(SyntaxKind.RazorCommentLiteral, " Foo * Bar * Baz *")); + } + + [Fact] + public void Next_Ignores_Star_Without_Trailing_At() + { + TestTokenizer( + "@* Foo * Bar * Baz *@", + SyntaxFactory.Token(SyntaxKind.RazorCommentTransition, "@"), + SyntaxFactory.Token(SyntaxKind.RazorCommentStar, "*"), + SyntaxFactory.Token(SyntaxKind.RazorCommentLiteral, " Foo * Bar * Baz "), + SyntaxFactory.Token(SyntaxKind.RazorCommentStar, "*"), + SyntaxFactory.Token(SyntaxKind.RazorCommentTransition, "@")); + } + + [Fact] + public void Next_Returns_RazorComment_Token_For_Entire_Razor_Comment() + { + TestTokenizer( + "@* Foo Bar Baz *@", + SyntaxFactory.Token(SyntaxKind.RazorCommentTransition, "@"), + SyntaxFactory.Token(SyntaxKind.RazorCommentStar, "*"), + SyntaxFactory.Token(SyntaxKind.RazorCommentLiteral, " Foo Bar Baz "), + SyntaxFactory.Token(SyntaxKind.RazorCommentStar, "*"), + SyntaxFactory.Token(SyntaxKind.RazorCommentTransition, "@")); + } + + [Fact] + public void Next_Returns_Comment_Token_For_Entire_Single_Line_Comment() + { + TestTokenizer("// Foo Bar Baz", SyntaxFactory.Token(SyntaxKind.CSharpComment, "// Foo Bar Baz")); + } + + [Fact] + public void Single_Line_Comment_Is_Terminated_By_Newline() + { + TestTokenizer("// Foo Bar Baz\na", SyntaxFactory.Token(SyntaxKind.CSharpComment, "// Foo Bar Baz"), IgnoreRemaining); + } + + [Fact] + public void Multi_Line_Comment_In_Single_Line_Comment_Has_No_Effect() + { + TestTokenizer("// Foo/*Bar*/ Baz\na", SyntaxFactory.Token(SyntaxKind.CSharpComment, "// Foo/*Bar*/ Baz"), IgnoreRemaining); + } + + [Fact] + public void Next_Returns_Comment_Token_For_Entire_Multi_Line_Comment() + { + TestTokenizer("/* Foo\nBar\nBaz */", SyntaxFactory.Token(SyntaxKind.CSharpComment, "/* Foo\nBar\nBaz */")); + } + + [Fact] + public void Multi_Line_Comment_Is_Terminated_By_End_Sequence() + { + TestTokenizer("/* Foo\nBar\nBaz */a", SyntaxFactory.Token(SyntaxKind.CSharpComment, "/* Foo\nBar\nBaz */"), IgnoreRemaining); + } + + [Fact] + public void Unterminated_Multi_Line_Comment_Captures_To_EOF() + { + TestTokenizer("/* Foo\nBar\nBaz", SyntaxFactory.Token(SyntaxKind.CSharpComment, "/* Foo\nBar\nBaz"), IgnoreRemaining); + } + + [Fact] + public void Nested_Multi_Line_Comments_Terminated_At_First_End_Sequence() + { + TestTokenizer("/* Foo/*\nBar\nBaz*/ */", SyntaxFactory.Token(SyntaxKind.CSharpComment, "/* Foo/*\nBar\nBaz*/"), IgnoreRemaining); + } + + [Fact] + public void Nested_Multi_Line_Comments_Terminated_At_Full_End_Sequence() + { + TestTokenizer("/* Foo\nBar\nBaz* */", SyntaxFactory.Token(SyntaxKind.CSharpComment, "/* Foo\nBar\nBaz* */"), IgnoreRemaining); + } +} diff --git a/src/Compiler/Microsoft.AspNetCore.Razor.Language/legacyTest/Legacy/CSharpTokenizerIdentifierTest.cs b/src/Compiler/Microsoft.AspNetCore.Razor.Language/legacyTest/Legacy/CSharpTokenizerIdentifierTest.cs new file mode 100644 index 00000000000..33565d554a3 --- /dev/null +++ b/src/Compiler/Microsoft.AspNetCore.Razor.Language/legacyTest/Legacy/CSharpTokenizerIdentifierTest.cs @@ -0,0 +1,172 @@ +// Licensed to the .NET Foundation under one or more agreements. +// The .NET Foundation licenses this file to you under the MIT license. + +#nullable disable + +using Microsoft.AspNetCore.Razor.Language.Syntax.InternalSyntax; +using Xunit; + +namespace Microsoft.AspNetCore.Razor.Language.Legacy; + +public class CSharpTokenizerIdentifierTest : CSharpTokenizerTestBase +{ + [Fact] + public void Simple_Identifier_Is_Recognized() + { + TestTokenizer("foo", SyntaxFactory.Token(SyntaxKind.Identifier, "foo")); + } + + [Fact] + public void Identifier_Starting_With_Underscore_Is_Recognized() + { + TestTokenizer("_foo", SyntaxFactory.Token(SyntaxKind.Identifier, "_foo")); + } + + [Fact] + public void Identifier_Can_Contain_Digits() + { + TestTokenizer("foo4", SyntaxFactory.Token(SyntaxKind.Identifier, "foo4")); + } + + [Fact] + public void Identifier_Can_Start_With_Titlecase_Letter() + { + TestTokenizer("ῼfoo", SyntaxFactory.Token(SyntaxKind.Identifier, "ῼfoo")); + } + + [Fact] + public void Identifier_Can_Start_With_Letter_Modifier() + { + TestTokenizer("ᵊfoo", SyntaxFactory.Token(SyntaxKind.Identifier, "ᵊfoo")); + } + + [Fact] + public void Identifier_Can_Start_With_Other_Letter() + { + TestTokenizer("ƻfoo", SyntaxFactory.Token(SyntaxKind.Identifier, "ƻfoo")); + } + + [Fact] + public void Identifier_Can_Start_With_Number_Letter() + { + TestTokenizer("Ⅽool", SyntaxFactory.Token(SyntaxKind.Identifier, "Ⅽool")); + } + + [Fact] + public void Identifier_Can_Contain_Non_Spacing_Mark() + { + TestTokenizer("foo\u0300", SyntaxFactory.Token(SyntaxKind.Identifier, "foo\u0300")); + } + + [Fact] + public void Identifier_Can_Contain_Spacing_Combining_Mark() + { + TestTokenizer("fooः", SyntaxFactory.Token(SyntaxKind.Identifier, "fooः")); + } + + [Fact] + public void Identifier_Can_Contain_Non_English_Digit() + { + TestTokenizer("foo١", SyntaxFactory.Token(SyntaxKind.Identifier, "foo١")); + } + + [Fact] + public void Identifier_Can_Contain_Connector_Punctuation() + { + TestTokenizer("foo‿bar", SyntaxFactory.Token(SyntaxKind.Identifier, "foo‿bar")); + } + + [Fact] + public void Identifier_Can_Contain_Format_Character() + { + TestTokenizer("foo؃bar", SyntaxFactory.Token(SyntaxKind.Identifier, "foo؃bar")); + } + + [Fact] + public void Keywords_Are_Recognized_As_Keyword_Tokens() + { + TestKeyword("abstract"); + TestKeyword("byte"); + TestKeyword("class"); + TestKeyword("delegate"); + TestKeyword("event"); + TestKeyword("fixed"); + TestKeyword("if"); + TestKeyword("internal"); + TestKeyword("new"); + TestKeyword("override"); + TestKeyword("readonly"); + TestKeyword("short"); + TestKeyword("struct"); + TestKeyword("try"); + TestKeyword("unsafe"); + TestKeyword("volatile"); + TestKeyword("as"); + TestKeyword("do"); + TestKeyword("is"); + TestKeyword("params"); + TestKeyword("ref"); + TestKeyword("switch"); + TestKeyword("ushort"); + TestKeyword("while"); + TestKeyword("case"); + TestKeyword("const"); + TestKeyword("explicit"); + TestKeyword("float"); + TestKeyword("null"); + TestKeyword("sizeof"); + TestKeyword("typeof"); + TestKeyword("implicit"); + TestKeyword("private"); + TestKeyword("this"); + TestKeyword("using"); + TestKeyword("extern"); + TestKeyword("return"); + TestKeyword("stackalloc"); + TestKeyword("uint"); + TestKeyword("base"); + TestKeyword("catch"); + TestKeyword("continue"); + TestKeyword("double"); + TestKeyword("for"); + TestKeyword("in"); + TestKeyword("lock"); + TestKeyword("object"); + TestKeyword("protected"); + TestKeyword("static"); + TestKeyword("false"); + TestKeyword("public"); + TestKeyword("sbyte"); + TestKeyword("throw"); + TestKeyword("virtual"); + TestKeyword("decimal"); + TestKeyword("else"); + TestKeyword("operator"); + TestKeyword("string"); + TestKeyword("ulong"); + TestKeyword("bool"); + TestKeyword("char"); + TestKeyword("default"); + TestKeyword("foreach"); + TestKeyword("long"); + TestKeyword("void"); + TestKeyword("enum"); + TestKeyword("finally"); + TestKeyword("int"); + TestKeyword("out"); + TestKeyword("sealed"); + TestKeyword("true"); + TestKeyword("goto"); + TestKeyword("unchecked"); + TestKeyword("interface"); + TestKeyword("break"); + TestKeyword("checked"); + TestKeyword("namespace"); + TestKeyword("when"); + } + + private void TestKeyword(string keyword) + { + TestTokenizer(keyword, SyntaxFactory.Token(SyntaxKind.Keyword, keyword)); + } +} diff --git a/src/Compiler/Microsoft.AspNetCore.Razor.Language/legacyTest/Legacy/CSharpTokenizerLiteralTest.cs b/src/Compiler/Microsoft.AspNetCore.Razor.Language/legacyTest/Legacy/CSharpTokenizerLiteralTest.cs new file mode 100644 index 00000000000..a096d208049 --- /dev/null +++ b/src/Compiler/Microsoft.AspNetCore.Razor.Language/legacyTest/Legacy/CSharpTokenizerLiteralTest.cs @@ -0,0 +1,289 @@ +// Licensed to the .NET Foundation under one or more agreements. +// The .NET Foundation licenses this file to you under the MIT license. + +#nullable disable + +using System; +using Microsoft.AspNetCore.Razor.Language.Syntax.InternalSyntax; +using Xunit; + +namespace Microsoft.AspNetCore.Razor.Language.Legacy; + +public class CSharpTokenizerLiteralTest : CSharpTokenizerTestBase +{ + private new SyntaxToken IgnoreRemaining => (SyntaxToken)base.IgnoreRemaining; + + [Fact] + public void Simple_Integer_Literal_Is_Recognized() + { + TestSingleToken("01189998819991197253", SyntaxKind.IntegerLiteral); + } + + [Fact] + public void Integer_Type_Suffix_Is_Recognized() + { + TestSingleToken("42U", SyntaxKind.IntegerLiteral); + TestSingleToken("42u", SyntaxKind.IntegerLiteral); + + TestSingleToken("42L", SyntaxKind.IntegerLiteral); + TestSingleToken("42l", SyntaxKind.IntegerLiteral); + + TestSingleToken("42UL", SyntaxKind.IntegerLiteral); + TestSingleToken("42Ul", SyntaxKind.IntegerLiteral); + + TestSingleToken("42uL", SyntaxKind.IntegerLiteral); + TestSingleToken("42ul", SyntaxKind.IntegerLiteral); + + TestSingleToken("42LU", SyntaxKind.IntegerLiteral); + TestSingleToken("42Lu", SyntaxKind.IntegerLiteral); + + TestSingleToken("42lU", SyntaxKind.IntegerLiteral); + TestSingleToken("42lu", SyntaxKind.IntegerLiteral); + } + + [Fact] + public void Trailing_Letter_Is_Not_Part_Of_Integer_Literal_If_Not_Type_Sufix() + { + TestTokenizer("42a", SyntaxFactory.Token(SyntaxKind.IntegerLiteral, "42"), IgnoreRemaining); + } + + [Fact] + public void Simple_Hex_Literal_Is_Recognized() + { + TestSingleToken("0x0123456789ABCDEF", SyntaxKind.IntegerLiteral); + } + + [Fact] + public void Integer_Type_Suffix_Is_Recognized_In_Hex_Literal() + { + TestSingleToken("0xDEADBEEFU", SyntaxKind.IntegerLiteral); + TestSingleToken("0xDEADBEEFu", SyntaxKind.IntegerLiteral); + + TestSingleToken("0xDEADBEEFL", SyntaxKind.IntegerLiteral); + TestSingleToken("0xDEADBEEFl", SyntaxKind.IntegerLiteral); + + TestSingleToken("0xDEADBEEFUL", SyntaxKind.IntegerLiteral); + TestSingleToken("0xDEADBEEFUl", SyntaxKind.IntegerLiteral); + + TestSingleToken("0xDEADBEEFuL", SyntaxKind.IntegerLiteral); + TestSingleToken("0xDEADBEEFul", SyntaxKind.IntegerLiteral); + + TestSingleToken("0xDEADBEEFLU", SyntaxKind.IntegerLiteral); + TestSingleToken("0xDEADBEEFLu", SyntaxKind.IntegerLiteral); + + TestSingleToken("0xDEADBEEFlU", SyntaxKind.IntegerLiteral); + TestSingleToken("0xDEADBEEFlu", SyntaxKind.IntegerLiteral); + } + + [Fact] + public void Trailing_Letter_Is_Not_Part_Of_Hex_Literal_If_Not_Type_Sufix() + { + TestTokenizer("0xDEADBEEFz", SyntaxFactory.Token(SyntaxKind.IntegerLiteral, "0xDEADBEEF"), IgnoreRemaining); + } + + [Fact] + public void Dot_Followed_By_Non_Digit_Is_Not_Part_Of_Real_Literal() + { + TestTokenizer("3.a", SyntaxFactory.Token(SyntaxKind.IntegerLiteral, "3"), IgnoreRemaining); + } + + [Fact] + public void Simple_Real_Literal_Is_Recognized() + { + TestTokenizer("3.14159", SyntaxFactory.Token(SyntaxKind.RealLiteral, "3.14159")); + } + + [Fact] + public void Real_Literal_Between_Zero_And_One_Is_Recognized() + { + TestTokenizer(".14159", SyntaxFactory.Token(SyntaxKind.RealLiteral, ".14159")); + } + + [Fact] + public void Integer_With_Real_Type_Suffix_Is_Recognized() + { + TestSingleToken("42F", SyntaxKind.RealLiteral); + TestSingleToken("42f", SyntaxKind.RealLiteral); + TestSingleToken("42D", SyntaxKind.RealLiteral); + TestSingleToken("42d", SyntaxKind.RealLiteral); + TestSingleToken("42M", SyntaxKind.RealLiteral); + TestSingleToken("42m", SyntaxKind.RealLiteral); + } + + [Fact] + public void Integer_With_Exponent_Is_Recognized() + { + TestSingleToken("1e10", SyntaxKind.RealLiteral); + TestSingleToken("1E10", SyntaxKind.RealLiteral); + TestSingleToken("1e+10", SyntaxKind.RealLiteral); + TestSingleToken("1E+10", SyntaxKind.RealLiteral); + TestSingleToken("1e-10", SyntaxKind.RealLiteral); + TestSingleToken("1E-10", SyntaxKind.RealLiteral); + } + + [Fact] + public void Real_Number_With_Type_Suffix_Is_Recognized() + { + TestSingleToken("3.14F", SyntaxKind.RealLiteral); + TestSingleToken("3.14f", SyntaxKind.RealLiteral); + TestSingleToken("3.14D", SyntaxKind.RealLiteral); + TestSingleToken("3.14d", SyntaxKind.RealLiteral); + TestSingleToken("3.14M", SyntaxKind.RealLiteral); + TestSingleToken("3.14m", SyntaxKind.RealLiteral); + } + + [Fact] + public void Real_Number_With_Exponent_Is_Recognized() + { + TestSingleToken("3.14E10", SyntaxKind.RealLiteral); + TestSingleToken("3.14e10", SyntaxKind.RealLiteral); + TestSingleToken("3.14E+10", SyntaxKind.RealLiteral); + TestSingleToken("3.14e+10", SyntaxKind.RealLiteral); + TestSingleToken("3.14E-10", SyntaxKind.RealLiteral); + TestSingleToken("3.14e-10", SyntaxKind.RealLiteral); + } + + [Fact] + public void Real_Number_With_Exponent_And_Type_Suffix_Is_Recognized() + { + TestSingleToken("3.14E+10F", SyntaxKind.RealLiteral); + } + + [Fact] + public void Single_Character_Literal_Is_Recognized() + { + TestSingleToken("'f'", SyntaxKind.CharacterLiteral); + } + + [Fact] + public void Multi_Character_Literal_Is_Recognized() + { + TestSingleToken("'foo'", SyntaxKind.CharacterLiteral); + } + + [Fact] + public void Character_Literal_Is_Terminated_By_EOF_If_Unterminated() + { + TestSingleToken("'foo bar", SyntaxKind.CharacterLiteral); + } + + [Fact] + public void Character_Literal_Not_Terminated_By_Escaped_Quote() + { + TestSingleToken("'foo\\'bar'", SyntaxKind.CharacterLiteral); + } + + [Fact] + public void Character_Literal_Is_Terminated_By_EOL_If_Unterminated() + { + TestTokenizer("'foo\n", SyntaxFactory.Token(SyntaxKind.CharacterLiteral, "'foo"), IgnoreRemaining); + } + + [Fact] + public void Character_Literal_Terminated_By_EOL_Even_When_Last_Char_Is_Slash() + { + TestTokenizer("'foo\\\n", SyntaxFactory.Token(SyntaxKind.CharacterLiteral, "'foo\\"), IgnoreRemaining); + } + + [Fact] + public void Character_Literal_Terminated_By_EOL_Even_When_Last_Char_Is_Slash_And_Followed_By_Stuff() + { + TestTokenizer("'foo\\\nflarg", SyntaxFactory.Token(SyntaxKind.CharacterLiteral, "'foo\\"), IgnoreRemaining); + } + + [Fact] + public void Character_Literal_Terminated_By_CRLF_Even_When_Last_Char_Is_Slash() + { + TestTokenizer("'foo\\\r\n", SyntaxFactory.Token(SyntaxKind.CharacterLiteral, "'foo\\"), IgnoreRemaining); + } + + [Fact] + public void Character_Literal_Terminated_By_CRLF_Even_When_Last_Char_Is_Slash_And_Followed_By_Stuff() + { + TestTokenizer($"'foo\\\r\nflarg", SyntaxFactory.Token(SyntaxKind.CharacterLiteral, "'foo\\"), IgnoreRemaining); + } + + [Fact] + public void Character_Literal_Allows_Escaped_Escape() + { + TestTokenizer("'foo\\\\'blah", SyntaxFactory.Token(SyntaxKind.CharacterLiteral, "'foo\\\\'"), IgnoreRemaining); + } + + [Fact] + public void String_Literal_Is_Recognized() + { + TestSingleToken("\"foo\"", SyntaxKind.StringLiteral); + } + + [Fact] + public void String_Literal_Is_Terminated_By_EOF_If_Unterminated() + { + TestSingleToken("\"foo bar", SyntaxKind.StringLiteral); + } + + [Fact] + public void String_Literal_Not_Terminated_By_Escaped_Quote() + { + TestSingleToken("\"foo\\\"bar\"", SyntaxKind.StringLiteral); + } + + [Fact] + public void String_Literal_Is_Terminated_By_EOL_If_Unterminated() + { + TestTokenizer("\"foo\n", SyntaxFactory.Token(SyntaxKind.StringLiteral, "\"foo"), IgnoreRemaining); + } + + [Fact] + public void String_Literal_Terminated_By_EOL_Even_When_Last_Char_Is_Slash() + { + TestTokenizer("\"foo\\\n", SyntaxFactory.Token(SyntaxKind.StringLiteral, "\"foo\\"), IgnoreRemaining); + } + + [Fact] + public void String_Literal_Terminated_By_EOL_Even_When_Last_Char_Is_Slash_And_Followed_By_Stuff() + { + TestTokenizer("\"foo\\\nflarg", SyntaxFactory.Token(SyntaxKind.StringLiteral, "\"foo\\"), IgnoreRemaining); + } + + [Fact] + public void String_Literal_Terminated_By_CRLF_Even_When_Last_Char_Is_Slash() + { + TestTokenizer("\"foo\\\r\n", SyntaxFactory.Token(SyntaxKind.StringLiteral, "\"foo\\"), IgnoreRemaining); + } + + [Fact] + public void String_Literal_Terminated_By_CRLF_Even_When_Last_Char_Is_Slash_And_Followed_By_Stuff() + { + TestTokenizer($"\"foo\\\r\nflarg", SyntaxFactory.Token(SyntaxKind.StringLiteral, "\"foo\\"), IgnoreRemaining); + } + + [Fact] + public void String_Literal_Allows_Escaped_Escape() + { + TestTokenizer("\"foo\\\\\"blah", SyntaxFactory.Token(SyntaxKind.StringLiteral, "\"foo\\\\\""), IgnoreRemaining); + } + + [Fact] + public void Verbatim_String_Literal_Can_Contain_Newlines() + { + TestSingleToken("@\"foo\nbar\nbaz\"", SyntaxKind.StringLiteral); + } + + [Fact] + public void Verbatim_String_Literal_Not_Terminated_By_Escaped_Double_Quote() + { + TestSingleToken("@\"foo\"\"bar\"", SyntaxKind.StringLiteral); + } + + [Fact] + public void Verbatim_String_Literal_Is_Terminated_By_Slash_Double_Quote() + { + TestTokenizer("@\"foo\\\"bar\"", SyntaxFactory.Token(SyntaxKind.StringLiteral, "@\"foo\\\""), IgnoreRemaining); + } + + [Fact] + public void Verbatim_String_Literal_Is_Terminated_By_EOF() + { + TestSingleToken("@\"foo", SyntaxKind.StringLiteral); + } +} diff --git a/src/Compiler/Microsoft.AspNetCore.Razor.Language/legacyTest/Legacy/CSharpTokenizerOperatorsTest.cs b/src/Compiler/Microsoft.AspNetCore.Razor.Language/legacyTest/Legacy/CSharpTokenizerOperatorsTest.cs new file mode 100644 index 00000000000..8582c3b3da8 --- /dev/null +++ b/src/Compiler/Microsoft.AspNetCore.Razor.Language/legacyTest/Legacy/CSharpTokenizerOperatorsTest.cs @@ -0,0 +1,298 @@ +// Licensed to the .NET Foundation under one or more agreements. +// The .NET Foundation licenses this file to you under the MIT license. + +#nullable disable + +using Microsoft.AspNetCore.Razor.Language.Syntax.InternalSyntax; +using Xunit; + +namespace Microsoft.AspNetCore.Razor.Language.Legacy; + +public class CSharpTokenizerOperatorsTest : CSharpTokenizerTestBase +{ + [Fact] + public void LeftBrace_Is_Recognized() + { + TestSingleToken("{", SyntaxKind.LeftBrace); + } + + [Fact] + public void Plus_Is_Recognized() + { + TestSingleToken("+", SyntaxKind.Plus); + } + + [Fact] + public void Assign_Is_Recognized() + { + TestSingleToken("=", SyntaxKind.Assign); + } + + [Fact] + public void Arrow_Is_Recognized() + { + TestSingleToken("->", SyntaxKind.Arrow); + } + + [Fact] + public void AndAssign_Is_Recognized() + { + TestSingleToken("&=", SyntaxKind.AndAssign); + } + + [Fact] + public void RightBrace_Is_Recognized() + { + TestSingleToken("}", SyntaxKind.RightBrace); + } + + [Fact] + public void Minus_Is_Recognized() + { + TestSingleToken("-", SyntaxKind.Minus); + } + + [Fact] + public void LessThan_Is_Recognized() + { + TestSingleToken("<", SyntaxKind.LessThan); + } + + [Fact] + public void Equals_Is_Recognized() + { + TestSingleToken("==", SyntaxKind.Equals); + } + + [Fact] + public void OrAssign_Is_Recognized() + { + TestSingleToken("|=", SyntaxKind.OrAssign); + } + + [Fact] + public void LeftBracket_Is_Recognized() + { + TestSingleToken("[", SyntaxKind.LeftBracket); + } + + [Fact] + public void Star_Is_Recognized() + { + TestSingleToken("*", SyntaxKind.Star); + } + + [Fact] + public void GreaterThan_Is_Recognized() + { + TestSingleToken(">", SyntaxKind.GreaterThan); + } + + [Fact] + public void NotEqual_Is_Recognized() + { + TestSingleToken("!=", SyntaxKind.NotEqual); + } + + [Fact] + public void XorAssign_Is_Recognized() + { + TestSingleToken("^=", SyntaxKind.XorAssign); + } + + [Fact] + public void RightBracket_Is_Recognized() + { + TestSingleToken("]", SyntaxKind.RightBracket); + } + + [Fact] + public void Slash_Is_Recognized() + { + TestSingleToken("/", SyntaxKind.Slash); + } + + [Fact] + public void QuestionMark_Is_Recognized() + { + TestSingleToken("?", SyntaxKind.QuestionMark); + } + + [Fact] + public void LessThanEqual_Is_Recognized() + { + TestSingleToken("<=", SyntaxKind.LessThanEqual); + } + + [Fact] + public void LeftShift_Is_Not_Specially_Recognized() + { + TestTokenizer("<<", + SyntaxFactory.Token(SyntaxKind.LessThan, "<"), + SyntaxFactory.Token(SyntaxKind.LessThan, "<")); + } + + [Fact] + public void LeftParen_Is_Recognized() + { + TestSingleToken("(", SyntaxKind.LeftParenthesis); + } + + [Fact] + public void Modulo_Is_Recognized() + { + TestSingleToken("%", SyntaxKind.Modulo); + } + + [Fact] + public void NullCoalesce_Is_Recognized() + { + TestSingleToken("??", SyntaxKind.NullCoalesce); + } + + [Fact] + public void GreaterThanEqual_Is_Recognized() + { + TestSingleToken(">=", SyntaxKind.GreaterThanEqual); + } + + [Fact] + public void EqualGreaterThan_Is_Recognized() + { + TestSingleToken("=>", SyntaxKind.GreaterThanEqual); + } + + [Fact] + public void RightParen_Is_Recognized() + { + TestSingleToken(")", SyntaxKind.RightParenthesis); + } + + [Fact] + public void And_Is_Recognized() + { + TestSingleToken("&", SyntaxKind.And); + } + + [Fact] + public void DoubleColon_Is_Recognized() + { + TestSingleToken("::", SyntaxKind.DoubleColon); + } + + [Fact] + public void PlusAssign_Is_Recognized() + { + TestSingleToken("+=", SyntaxKind.PlusAssign); + } + + [Fact] + public void Semicolon_Is_Recognized() + { + TestSingleToken(";", SyntaxKind.Semicolon); + } + + [Fact] + public void Tilde_Is_Recognized() + { + TestSingleToken("~", SyntaxKind.Tilde); + } + + [Fact] + public void DoubleOr_Is_Recognized() + { + TestSingleToken("||", SyntaxKind.DoubleOr); + } + + [Fact] + public void ModuloAssign_Is_Recognized() + { + TestSingleToken("%=", SyntaxKind.ModuloAssign); + } + + [Fact] + public void Colon_Is_Recognized() + { + TestSingleToken(":", SyntaxKind.Colon); + } + + [Fact] + public void Not_Is_Recognized() + { + TestSingleToken("!", SyntaxKind.Not); + } + + [Fact] + public void DoubleAnd_Is_Recognized() + { + TestSingleToken("&&", SyntaxKind.DoubleAnd); + } + + [Fact] + public void DivideAssign_Is_Recognized() + { + TestSingleToken("/=", SyntaxKind.DivideAssign); + } + + [Fact] + public void Comma_Is_Recognized() + { + TestSingleToken(",", SyntaxKind.Comma); + } + + [Fact] + public void Xor_Is_Recognized() + { + TestSingleToken("^", SyntaxKind.Xor); + } + + [Fact] + public void Decrement_Is_Recognized() + { + TestSingleToken("--", SyntaxKind.Decrement); + } + + [Fact] + public void MultiplyAssign_Is_Recognized() + { + TestSingleToken("*=", SyntaxKind.MultiplyAssign); + } + + [Fact] + public void Dot_Is_Recognized() + { + TestSingleToken(".", SyntaxKind.Dot); + } + + [Fact] + public void Or_Is_Recognized() + { + TestSingleToken("|", SyntaxKind.Or); + } + + [Fact] + public void Increment_Is_Recognized() + { + TestSingleToken("++", SyntaxKind.Increment); + } + + [Fact] + public void MinusAssign_Is_Recognized() + { + TestSingleToken("-=", SyntaxKind.MinusAssign); + } + + [Fact] + public void RightShift_Is_Not_Specially_Recognized() + { + TestTokenizer(">>", + SyntaxFactory.Token(SyntaxKind.GreaterThan, ">"), + SyntaxFactory.Token(SyntaxKind.GreaterThan, ">")); + } + + [Fact] + public void Hash_Is_Recognized() + { + TestSingleToken("#", SyntaxKind.Hash); + } +} diff --git a/src/Compiler/Microsoft.AspNetCore.Razor.Language/legacyTest/Legacy/CSharpTokenizerTest.cs b/src/Compiler/Microsoft.AspNetCore.Razor.Language/legacyTest/Legacy/CSharpTokenizerTest.cs new file mode 100644 index 00000000000..96b675eee3c --- /dev/null +++ b/src/Compiler/Microsoft.AspNetCore.Razor.Language/legacyTest/Legacy/CSharpTokenizerTest.cs @@ -0,0 +1,108 @@ +// Licensed to the .NET Foundation under one or more agreements. +// The .NET Foundation licenses this file to you under the MIT license. + +#nullable disable + +using Microsoft.AspNetCore.Razor.Language.Syntax.InternalSyntax; +using Xunit; + +namespace Microsoft.AspNetCore.Razor.Language.Legacy; + +public class CSharpTokenizerTest : CSharpTokenizerTestBase +{ + private new SyntaxToken IgnoreRemaining => (SyntaxToken)base.IgnoreRemaining; + + [Fact] + public void Next_Returns_Null_When_EOF_Reached() + { + TestTokenizer(""); + } + + [Fact] + public void Next_Returns_Newline_Token_For_Single_CR() + { + TestTokenizer( + "\r\ra", + SyntaxFactory.Token(SyntaxKind.NewLine, "\r"), + SyntaxFactory.Token(SyntaxKind.NewLine, "\r"), + IgnoreRemaining); + } + + [Fact] + public void Next_Returns_Newline_Token_For_Single_LF() + { + TestTokenizer( + "\n\na", + SyntaxFactory.Token(SyntaxKind.NewLine, "\n"), + SyntaxFactory.Token(SyntaxKind.NewLine, "\n"), + IgnoreRemaining); + } + + [Fact] + public void Next_Returns_Newline_Token_For_Single_NEL() + { + // NEL: Unicode "Next Line" U+0085 + TestTokenizer( + "\u0085\u0085a", + SyntaxFactory.Token(SyntaxKind.NewLine, "\u0085"), + SyntaxFactory.Token(SyntaxKind.NewLine, "\u0085"), + IgnoreRemaining); + } + + [Fact] + public void Next_Returns_Newline_Token_For_Single_Line_Separator() + { + // Unicode "Line Separator" U+2028 + TestTokenizer( + "\u2028\u2028a", + SyntaxFactory.Token(SyntaxKind.NewLine, "\u2028"), + SyntaxFactory.Token(SyntaxKind.NewLine, "\u2028"), + IgnoreRemaining); + } + + [Fact] + public void Next_Returns_Newline_Token_For_Single_Paragraph_Separator() + { + // Unicode "Paragraph Separator" U+2029 + TestTokenizer( + "\u2029\u2029a", + SyntaxFactory.Token(SyntaxKind.NewLine, "\u2029"), + SyntaxFactory.Token(SyntaxKind.NewLine, "\u2029"), + IgnoreRemaining); + } + + [Fact] + public void Next_Returns_Single_Newline_Token_For_CRLF() + { + TestTokenizer( + "\r\n\r\na", + SyntaxFactory.Token(SyntaxKind.NewLine, "\r\n"), + SyntaxFactory.Token(SyntaxKind.NewLine, "\r\n"), + IgnoreRemaining); + } + + [Fact] + public void Next_Returns_Token_For_Whitespace_Characters() + { + TestTokenizer( + " \f\t\u000B \n ", + SyntaxFactory.Token(SyntaxKind.Whitespace, " \f\t\u000B "), + SyntaxFactory.Token(SyntaxKind.NewLine, "\n"), + SyntaxFactory.Token(SyntaxKind.Whitespace, " ")); + } + + [Fact] + public void Transition_Is_Recognized() + { + TestSingleToken("@", SyntaxKind.Transition); + } + + [Fact] + public void Transition_Is_Recognized_As_SingleCharacter() + { + TestTokenizer( + "@(", + SyntaxFactory.Token(SyntaxKind.Transition, "@"), + SyntaxFactory.Token(SyntaxKind.LeftParenthesis, "(")); + } +} diff --git a/src/Compiler/Microsoft.AspNetCore.Razor.Language/legacyTest/Legacy/CSharpTokenizerTestBase.cs b/src/Compiler/Microsoft.AspNetCore.Razor.Language/legacyTest/Legacy/CSharpTokenizerTestBase.cs new file mode 100644 index 00000000000..24e4207828e --- /dev/null +++ b/src/Compiler/Microsoft.AspNetCore.Razor.Language/legacyTest/Legacy/CSharpTokenizerTestBase.cs @@ -0,0 +1,28 @@ +// Licensed to the .NET Foundation under one or more agreements. +// The .NET Foundation licenses this file to you under the MIT license. + +#nullable disable + +using Microsoft.AspNetCore.Razor.Language.Syntax.InternalSyntax; + +namespace Microsoft.AspNetCore.Razor.Language.Legacy; + +public abstract class CSharpTokenizerTestBase : TokenizerTestBase +{ + private static readonly SyntaxToken _ignoreRemaining = SyntaxFactory.Token(SyntaxKind.Marker, string.Empty); + + internal override object IgnoreRemaining + { + get { return _ignoreRemaining; } + } + + internal override object CreateTokenizer(SeekableTextReader source) + { + return new NativeCSharpTokenizer(source); + } + + internal void TestSingleToken(string text, SyntaxKind expectedTokenKind) + { + TestTokenizer(text, SyntaxFactory.Token(expectedTokenKind, text)); + } +} diff --git a/src/Compiler/Microsoft.AspNetCore.Razor.Language/legacyTest/Legacy/CSharpVerbatimBlockTest.cs b/src/Compiler/Microsoft.AspNetCore.Razor.Language/legacyTest/Legacy/CSharpVerbatimBlockTest.cs index 417c629f3b7..69539f1cd39 100644 --- a/src/Compiler/Microsoft.AspNetCore.Razor.Language/legacyTest/Legacy/CSharpVerbatimBlockTest.cs +++ b/src/Compiler/Microsoft.AspNetCore.Razor.Language/legacyTest/Legacy/CSharpVerbatimBlockTest.cs @@ -8,7 +8,7 @@ namespace Microsoft.AspNetCore.Razor.Language.Legacy; -public class CSharpVerbatimBlockTest() : ParserTestBase(layer: TestProject.Layer.Compiler, validateSpanEditHandlers: true) +public class CSharpVerbatimBlockTest() : ParserTestBase(layer: TestProject.Layer.Compiler, validateSpanEditHandlers: true, useLegacyTokenizer: true) { [Fact] public void VerbatimBlock() diff --git a/src/Compiler/Microsoft.AspNetCore.Razor.Language/legacyTest/Legacy/CSharpWhitespaceHandlingTest.cs b/src/Compiler/Microsoft.AspNetCore.Razor.Language/legacyTest/Legacy/CSharpWhitespaceHandlingTest.cs index 11c10a7a6b1..92e801c6467 100644 --- a/src/Compiler/Microsoft.AspNetCore.Razor.Language/legacyTest/Legacy/CSharpWhitespaceHandlingTest.cs +++ b/src/Compiler/Microsoft.AspNetCore.Razor.Language/legacyTest/Legacy/CSharpWhitespaceHandlingTest.cs @@ -8,7 +8,7 @@ namespace Microsoft.AspNetCore.Razor.Language.Legacy; -public class CSharpWhitespaceHandlingTest() : ParserTestBase(layer: TestProject.Layer.Compiler, validateSpanEditHandlers: true) +public class CSharpWhitespaceHandlingTest() : ParserTestBase(layer: TestProject.Layer.Compiler, validateSpanEditHandlers: true, useLegacyTokenizer: true) { [Fact] public void StmtBlockDoesNotAcceptTrailingNewlineIfTheyAreSignificantToAncestor() diff --git a/src/Compiler/Microsoft.AspNetCore.Razor.Language/legacyTest/Legacy/HtmlAttributeTest.cs b/src/Compiler/Microsoft.AspNetCore.Razor.Language/legacyTest/Legacy/HtmlAttributeTest.cs index 5e48c72dfb6..121ac4c683f 100644 --- a/src/Compiler/Microsoft.AspNetCore.Razor.Language/legacyTest/Legacy/HtmlAttributeTest.cs +++ b/src/Compiler/Microsoft.AspNetCore.Razor.Language/legacyTest/Legacy/HtmlAttributeTest.cs @@ -8,7 +8,7 @@ namespace Microsoft.AspNetCore.Razor.Language.Legacy; -public class HtmlAttributeTest() : ParserTestBase(layer: TestProject.Layer.Compiler, validateSpanEditHandlers: true) +public class HtmlAttributeTest() : ParserTestBase(layer: TestProject.Layer.Compiler, validateSpanEditHandlers: true, useLegacyTokenizer: true) { [Fact] public void SymbolBoundAttributes_BeforeEqualWhitespace1() diff --git a/src/Compiler/Microsoft.AspNetCore.Razor.Language/legacyTest/Legacy/HtmlBlockTest.cs b/src/Compiler/Microsoft.AspNetCore.Razor.Language/legacyTest/Legacy/HtmlBlockTest.cs index f6d0cff15b1..0986583dc18 100644 --- a/src/Compiler/Microsoft.AspNetCore.Razor.Language/legacyTest/Legacy/HtmlBlockTest.cs +++ b/src/Compiler/Microsoft.AspNetCore.Razor.Language/legacyTest/Legacy/HtmlBlockTest.cs @@ -8,7 +8,7 @@ namespace Microsoft.AspNetCore.Razor.Language.Legacy; -public class HtmlBlockTest() : ParserTestBase(layer: TestProject.Layer.Compiler, validateSpanEditHandlers: true) +public class HtmlBlockTest() : ParserTestBase(layer: TestProject.Layer.Compiler, validateSpanEditHandlers: true, useLegacyTokenizer: true) { [Fact] public void HandlesUnbalancedTripleDashHTMLComments() diff --git a/src/Compiler/Microsoft.AspNetCore.Razor.Language/legacyTest/Legacy/HtmlDocumentTest.cs b/src/Compiler/Microsoft.AspNetCore.Razor.Language/legacyTest/Legacy/HtmlDocumentTest.cs index 16b2b477d75..8eca7bc7ee3 100644 --- a/src/Compiler/Microsoft.AspNetCore.Razor.Language/legacyTest/Legacy/HtmlDocumentTest.cs +++ b/src/Compiler/Microsoft.AspNetCore.Razor.Language/legacyTest/Legacy/HtmlDocumentTest.cs @@ -9,7 +9,7 @@ namespace Microsoft.AspNetCore.Razor.Language.Legacy; -public class HtmlDocumentTest() : ParserTestBase(layer: TestProject.Layer.Compiler, validateSpanEditHandlers: true) +public class HtmlDocumentTest() : ParserTestBase(layer: TestProject.Layer.Compiler, validateSpanEditHandlers: true, useLegacyTokenizer: true) { private static readonly TestFile Nested1000 = TestFile.Create("TestFiles/nested-1000.html", typeof(HtmlDocumentTest)); diff --git a/src/Compiler/Microsoft.AspNetCore.Razor.Language/legacyTest/Legacy/HtmlErrorTest.cs b/src/Compiler/Microsoft.AspNetCore.Razor.Language/legacyTest/Legacy/HtmlErrorTest.cs index c810ff73a7b..b1a625d0217 100644 --- a/src/Compiler/Microsoft.AspNetCore.Razor.Language/legacyTest/Legacy/HtmlErrorTest.cs +++ b/src/Compiler/Microsoft.AspNetCore.Razor.Language/legacyTest/Legacy/HtmlErrorTest.cs @@ -7,7 +7,7 @@ namespace Microsoft.AspNetCore.Razor.Language.Legacy; -public class HtmlErrorTest() : ParserTestBase(layer: TestProject.Layer.Compiler, validateSpanEditHandlers: true) +public class HtmlErrorTest() : ParserTestBase(layer: TestProject.Layer.Compiler, validateSpanEditHandlers: true, useLegacyTokenizer: true) { [Fact] public void AllowsInvalidTagNamesAsLongAsParserCanIdentifyEndTag() diff --git a/src/Compiler/Microsoft.AspNetCore.Razor.Language/legacyTest/Legacy/HtmlTagsTest.cs b/src/Compiler/Microsoft.AspNetCore.Razor.Language/legacyTest/Legacy/HtmlTagsTest.cs index e412c1daede..79260e77932 100644 --- a/src/Compiler/Microsoft.AspNetCore.Razor.Language/legacyTest/Legacy/HtmlTagsTest.cs +++ b/src/Compiler/Microsoft.AspNetCore.Razor.Language/legacyTest/Legacy/HtmlTagsTest.cs @@ -8,7 +8,7 @@ namespace Microsoft.AspNetCore.Razor.Language.Legacy; -public class HtmlTagsTest() : ParserTestBase(layer: TestProject.Layer.Compiler, validateSpanEditHandlers: true) +public class HtmlTagsTest() : ParserTestBase(layer: TestProject.Layer.Compiler, validateSpanEditHandlers: true, useLegacyTokenizer: true) { private static readonly string[] VoidElementNames = new[] { diff --git a/src/Compiler/Microsoft.AspNetCore.Razor.Language/legacyTest/Legacy/HtmlToCodeSwitchTest.cs b/src/Compiler/Microsoft.AspNetCore.Razor.Language/legacyTest/Legacy/HtmlToCodeSwitchTest.cs index 0354b26bd0d..62b644043e1 100644 --- a/src/Compiler/Microsoft.AspNetCore.Razor.Language/legacyTest/Legacy/HtmlToCodeSwitchTest.cs +++ b/src/Compiler/Microsoft.AspNetCore.Razor.Language/legacyTest/Legacy/HtmlToCodeSwitchTest.cs @@ -10,7 +10,7 @@ namespace Microsoft.AspNetCore.Razor.Language.Legacy; -public class HtmlToCodeSwitchTest() : ParserTestBase(layer: TestProject.Layer.Compiler, validateSpanEditHandlers: true) +public class HtmlToCodeSwitchTest() : ParserTestBase(layer: TestProject.Layer.Compiler, validateSpanEditHandlers: true, useLegacyTokenizer: true) { [Fact] public void SwitchesWhenCharacterBeforeSwapIsNonAlphanumeric() diff --git a/src/Compiler/Microsoft.AspNetCore.Razor.Language/legacyTest/Legacy/ImplicitExpressionEditHandlerTest.cs b/src/Compiler/Microsoft.AspNetCore.Razor.Language/legacyTest/Legacy/ImplicitExpressionEditHandlerTest.cs index 3e5c2feb2f0..18b1bbc547d 100644 --- a/src/Compiler/Microsoft.AspNetCore.Razor.Language/legacyTest/Legacy/ImplicitExpressionEditHandlerTest.cs +++ b/src/Compiler/Microsoft.AspNetCore.Razor.Language/legacyTest/Legacy/ImplicitExpressionEditHandlerTest.cs @@ -446,7 +446,7 @@ public void IsAcceptableInsertionInBalancedParenthesis_BalancedParenthesis_Retur private static Syntax.MarkupTextLiteralSyntax GetSyntaxNode(SourceLocation start, string content) { var builder = SyntaxListBuilder.Create(); - var tokens = CSharpLanguageCharacteristics.Instance.TokenizeString(content).ToArray(); + var tokens = NativeCSharpLanguageCharacteristics.Instance.TokenizeString(content).ToArray(); foreach (var token in tokens) { builder.Add(token); diff --git a/src/Compiler/Microsoft.AspNetCore.Razor.Language/legacyTest/Legacy/MarkupElementGroupingTest.cs b/src/Compiler/Microsoft.AspNetCore.Razor.Language/legacyTest/Legacy/MarkupElementGroupingTest.cs index a540864e811..54f9fcae75f 100644 --- a/src/Compiler/Microsoft.AspNetCore.Razor.Language/legacyTest/Legacy/MarkupElementGroupingTest.cs +++ b/src/Compiler/Microsoft.AspNetCore.Razor.Language/legacyTest/Legacy/MarkupElementGroupingTest.cs @@ -7,7 +7,7 @@ namespace Microsoft.AspNetCore.Razor.Language.Legacy; -public class MarkupElementGroupingTest() : ParserTestBase(layer: TestProject.Layer.Compiler, validateSpanEditHandlers: true) +public class MarkupElementGroupingTest() : ParserTestBase(layer: TestProject.Layer.Compiler, validateSpanEditHandlers: true, useLegacyTokenizer: true) { [Fact] public void Handles_ValidTags() diff --git a/src/Compiler/Microsoft.AspNetCore.Razor.Language/legacyTest/Legacy/RazorDirectivesTest.cs b/src/Compiler/Microsoft.AspNetCore.Razor.Language/legacyTest/Legacy/RazorDirectivesTest.cs index 1584fc43772..07c6895fbc5 100644 --- a/src/Compiler/Microsoft.AspNetCore.Razor.Language/legacyTest/Legacy/RazorDirectivesTest.cs +++ b/src/Compiler/Microsoft.AspNetCore.Razor.Language/legacyTest/Legacy/RazorDirectivesTest.cs @@ -10,7 +10,7 @@ namespace Microsoft.AspNetCore.Razor.Language.Legacy; -public class RazorDirectivesTest() : ParserTestBase(layer: TestProject.Layer.Compiler, validateSpanEditHandlers: true) +public class RazorDirectivesTest() : ParserTestBase(layer: TestProject.Layer.Compiler, validateSpanEditHandlers: true, useLegacyTokenizer: true) { [Fact] public void DirectiveDescriptor_FileScopedMultipleOccurring_CanHaveDuplicates() diff --git a/src/Compiler/Microsoft.AspNetCore.Razor.Language/legacyTest/Legacy/TagHelperRewritingTestBase.cs b/src/Compiler/Microsoft.AspNetCore.Razor.Language/legacyTest/Legacy/TagHelperRewritingTestBase.cs index f286244cf20..60e14438737 100644 --- a/src/Compiler/Microsoft.AspNetCore.Razor.Language/legacyTest/Legacy/TagHelperRewritingTestBase.cs +++ b/src/Compiler/Microsoft.AspNetCore.Razor.Language/legacyTest/Legacy/TagHelperRewritingTestBase.cs @@ -9,7 +9,7 @@ namespace Microsoft.AspNetCore.Razor.Language.Legacy; -public class TagHelperRewritingTestBase() : ParserTestBase(layer: TestProject.Layer.Compiler, validateSpanEditHandlers: true) +public class TagHelperRewritingTestBase() : ParserTestBase(layer: TestProject.Layer.Compiler, validateSpanEditHandlers: true, useLegacyTokenizer: true) { internal void RunParseTreeRewriterTest(string documentContent, params string[] tagNames) { diff --git a/src/Compiler/Microsoft.AspNetCore.Razor.Language/legacyTest/Legacy/TokenizerTestBase.cs b/src/Compiler/Microsoft.AspNetCore.Razor.Language/legacyTest/Legacy/TokenizerTestBase.cs new file mode 100644 index 00000000000..8cb2474ea70 --- /dev/null +++ b/src/Compiler/Microsoft.AspNetCore.Razor.Language/legacyTest/Legacy/TokenizerTestBase.cs @@ -0,0 +1,73 @@ +// Licensed to the .NET Foundation under one or more agreements. +// The .NET Foundation licenses this file to you under the MIT license. + +#nullable disable + +using System; +using System.Diagnostics; +using System.Globalization; +using System.Text; +using Microsoft.AspNetCore.Razor.Language.Syntax.InternalSyntax; +using Xunit; + +namespace Microsoft.AspNetCore.Razor.Language.Legacy; + +public abstract class TokenizerTestBase +{ + internal abstract object IgnoreRemaining { get; } + internal abstract object CreateTokenizer(SeekableTextReader source); + + internal void TestTokenizer(string input, params SyntaxToken[] expectedSymbols) + { + // Arrange + var success = true; + var output = new StringBuilder(); + using (var source = new SeekableTextReader(input, filePath: null)) + { + var tokenizer = (Tokenizer)CreateTokenizer(source); + var counter = 0; + SyntaxToken current = null; + while ((current = tokenizer.NextToken()) != null) + { + if (counter >= expectedSymbols.Length) + { + output.AppendLine(string.Format(CultureInfo.InvariantCulture, "F: Expected: << Nothing >>; Actual: {0}", current)); + success = false; + } + else if (ReferenceEquals(expectedSymbols[counter], IgnoreRemaining)) + { + output.AppendLine(string.Format(CultureInfo.InvariantCulture, "P: Ignored |{0}|", current)); + } + else + { + if (!expectedSymbols[counter].IsEquivalentTo(current)) + { + output.AppendLine(string.Format(CultureInfo.InvariantCulture, "F: Expected: {0}; Actual: {1}", expectedSymbols[counter], current)); + success = false; + } + else + { + output.AppendLine(string.Format(CultureInfo.InvariantCulture, "P: Expected: {0}", expectedSymbols[counter])); + } + counter++; + } + } + if (counter < expectedSymbols.Length && !ReferenceEquals(expectedSymbols[counter], IgnoreRemaining)) + { + success = false; + for (; counter < expectedSymbols.Length; counter++) + { + output.AppendLine(string.Format(CultureInfo.InvariantCulture, "F: Expected: {0}; Actual: << None >>", expectedSymbols[counter])); + } + } + } + Assert.True(success, Environment.NewLine + output.ToString()); + WriteTraceLine(output.Replace("{", "{{").Replace("}", "}}").ToString()); + } + + [Conditional("PARSER_TRACE")] + private static void WriteTraceLine(string format, params object[] args) + { + Trace.WriteLine(string.Format(CultureInfo.InvariantCulture, format, args)); + } +} diff --git a/src/Compiler/Microsoft.AspNetCore.Razor.Language/legacyTest/Legacy/WhiteSpaceRewriterTest.cs b/src/Compiler/Microsoft.AspNetCore.Razor.Language/legacyTest/Legacy/WhiteSpaceRewriterTest.cs index 18837bc17b7..b17b71c2b86 100644 --- a/src/Compiler/Microsoft.AspNetCore.Razor.Language/legacyTest/Legacy/WhiteSpaceRewriterTest.cs +++ b/src/Compiler/Microsoft.AspNetCore.Razor.Language/legacyTest/Legacy/WhiteSpaceRewriterTest.cs @@ -8,7 +8,7 @@ namespace Microsoft.AspNetCore.Razor.Language.Legacy; -public class WhiteSpaceRewriterTest() : ParserTestBase(layer: TestProject.Layer.Compiler, validateSpanEditHandlers: true) +public class WhiteSpaceRewriterTest() : ParserTestBase(layer: TestProject.Layer.Compiler, validateSpanEditHandlers: true, useLegacyTokenizer: true) { [Fact] public void Moves_Whitespace_Preceeding_ExpressionBlock_To_Parent_Block() diff --git a/src/Compiler/Microsoft.AspNetCore.Razor.Language/test/DirectiveTokenEditHandlerTest.cs b/src/Compiler/Microsoft.AspNetCore.Razor.Language/test/DirectiveTokenEditHandlerTest.cs index 897d5b682ec..c06b2d354b3 100644 --- a/src/Compiler/Microsoft.AspNetCore.Razor.Language/test/DirectiveTokenEditHandlerTest.cs +++ b/src/Compiler/Microsoft.AspNetCore.Razor.Language/test/DirectiveTokenEditHandlerTest.cs @@ -65,7 +65,7 @@ private static CSharpStatementLiteralSyntax GetSyntaxNode(DirectiveTokenEditHand { using var _ = SyntaxListBuilderPool.GetPooledBuilder(out var builder); - var tokens = CSharpLanguageCharacteristics.Instance.TokenizeString(content).ToArray(); + var tokens = NativeCSharpLanguageCharacteristics.Instance.TokenizeString(content).ToArray(); foreach (var token in tokens) { builder.Add((SyntaxToken)token.CreateRed()); diff --git a/src/Compiler/Microsoft.AspNetCore.Razor.Language/test/Legacy/CSharpLanguageCharacteristicsTest.cs b/src/Compiler/Microsoft.AspNetCore.Razor.Language/test/Legacy/CSharpLanguageCharacteristicsTest.cs index 08e4b0bed9d..f01565053a6 100644 --- a/src/Compiler/Microsoft.AspNetCore.Razor.Language/test/Legacy/CSharpLanguageCharacteristicsTest.cs +++ b/src/Compiler/Microsoft.AspNetCore.Razor.Language/test/Legacy/CSharpLanguageCharacteristicsTest.cs @@ -13,7 +13,7 @@ public class CSharpLanguageCharacteristicsTest public void GetSample_RightShiftAssign_ReturnsCorrectToken() { // Arrange & Act - var token = CSharpLanguageCharacteristics.Instance.GetSample(SyntaxKind.RightShiftAssign); + var token = NativeCSharpLanguageCharacteristics.Instance.GetSample(SyntaxKind.RightShiftAssign); // Assert Assert.Equal(">>=", token); diff --git a/src/Compiler/Microsoft.AspNetCore.Razor.Language/test/Legacy/CSharpTokenizerTestBase.cs b/src/Compiler/Microsoft.AspNetCore.Razor.Language/test/Legacy/CSharpTokenizerTestBase.cs index ceb4986d631..d4528c1d8d7 100644 --- a/src/Compiler/Microsoft.AspNetCore.Razor.Language/test/Legacy/CSharpTokenizerTestBase.cs +++ b/src/Compiler/Microsoft.AspNetCore.Razor.Language/test/Legacy/CSharpTokenizerTestBase.cs @@ -18,7 +18,7 @@ internal override object IgnoreRemaining internal override object CreateTokenizer(SeekableTextReader source) { - return new CSharpTokenizer(source); + return new RoslynCSharpTokenizer(source); } internal void TestSingleToken(string text, SyntaxKind expectedTokenKind) diff --git a/src/Compiler/Microsoft.AspNetCore.Razor.Language/test/Legacy/CodeBlockEditHandlerTest.cs b/src/Compiler/Microsoft.AspNetCore.Razor.Language/test/Legacy/CodeBlockEditHandlerTest.cs index 8ab3a089e6a..4e9f7392d21 100644 --- a/src/Compiler/Microsoft.AspNetCore.Razor.Language/test/Legacy/CodeBlockEditHandlerTest.cs +++ b/src/Compiler/Microsoft.AspNetCore.Razor.Language/test/Legacy/CodeBlockEditHandlerTest.cs @@ -283,7 +283,7 @@ private static SyntaxNode GetSpan(SourceLocation start, string content) { using var _ = SyntaxListBuilderPool.GetPooledBuilder(out var builder); - var tokens = CSharpLanguageCharacteristics.Instance.TokenizeString(content).ToArray(); + var tokens = NativeCSharpLanguageCharacteristics.Instance.TokenizeString(content).ToArray(); foreach (var token in tokens) { builder.Add((SyntaxToken)token.CreateRed()); diff --git a/src/Compiler/Microsoft.CodeAnalysis.Razor.Compiler/src/Language/Legacy/CSharpCodeParser.cs b/src/Compiler/Microsoft.CodeAnalysis.Razor.Compiler/src/Language/Legacy/CSharpCodeParser.cs index 851ebd29ef8..a37a09e6c21 100644 --- a/src/Compiler/Microsoft.CodeAnalysis.Razor.Compiler/src/Language/Legacy/CSharpCodeParser.cs +++ b/src/Compiler/Microsoft.CodeAnalysis.Razor.Compiler/src/Language/Legacy/CSharpCodeParser.cs @@ -89,7 +89,11 @@ public CSharpCodeParser(ParserContext context) } public CSharpCodeParser(IEnumerable directives, ParserContext context) - : base(context.ParseLeadingDirectives ? FirstDirectiveCSharpLanguageCharacteristics.Instance : CSharpLanguageCharacteristics.Instance, context) + : base(context.ParseLeadingDirectives + ? FirstDirectiveCSharpLanguageCharacteristics.Instance + : context.UseRoslynTokenizer + ? RoslynCSharpLanguageCharacteristics.Instance + : NativeCSharpLanguageCharacteristics.Instance, context) { if (directives == null) { @@ -1981,7 +1985,7 @@ private bool TryParseKeyword( ref readonly PooledArrayBuilder whitespace, CSharpTransitionSyntax? transition) { - var result = CSharpTokenizer.GetTokenKeyword(CurrentToken); + var result = _tokenizer.Tokenizer.GetTokenKeyword(CurrentToken); Debug.Assert(CurrentToken.Kind == SyntaxKind.Keyword && result.HasValue); if (_keywordParserMap.TryGetValue(result!.Value, out var handler)) { @@ -1998,7 +2002,7 @@ private bool TryParseKeyword( private bool TryParseKeyword(in SyntaxListBuilder builder) { - var result = CSharpTokenizer.GetTokenKeyword(CurrentToken); + var result = _tokenizer.Tokenizer.GetTokenKeyword(CurrentToken); Debug.Assert(CurrentToken.Kind == SyntaxKind.Keyword && result.HasValue); if (_keywordParserMap.TryGetValue(result!.Value, out var handler)) { @@ -2011,7 +2015,7 @@ private bool TryParseKeyword(in SyntaxListBuilder builder) private bool AtBooleanLiteral() { - return CSharpTokenizer.GetTokenKeyword(CurrentToken) is CSharpSyntaxKind.TrueKeyword or CSharpSyntaxKind.FalseKeyword; + return _tokenizer.Tokenizer.GetTokenKeyword(CurrentToken) is CSharpSyntaxKind.TrueKeyword or CSharpSyntaxKind.FalseKeyword; } private void ParseAwaitExpression(SyntaxListBuilder builder, CSharpTransitionSyntax? transition) @@ -2065,7 +2069,7 @@ private void ParseConditionalBlock(in SyntaxListBuilder builder builder.Add(transition); } - var block = new Block(CurrentToken, CurrentStart); + var block = new Block(GetBlockName(CurrentToken), CurrentStart); ParseConditionalBlock(builder, block); if (topLevel) { @@ -2147,7 +2151,7 @@ private void ParseExpectedCodeBlock(in SyntaxListBuilder builde private void ParseUnconditionalBlock(in SyntaxListBuilder builder) { Assert(SyntaxKind.Keyword); - var block = new Block(CurrentToken, CurrentStart); + var block = new Block(GetBlockName(CurrentToken), CurrentStart); AcceptAndMoveNext(); AcceptWhile(IsSpacingTokenIncludingNewLinesAndComments); ParseExpectedCodeBlock(builder, block); @@ -2162,7 +2166,7 @@ private void ParseCaseStatement(SyntaxListBuilder builder, CSha // If it does, just accept it and let the compiler complain. builder.Add(transition); } - var result = CSharpTokenizer.GetTokenKeyword(CurrentToken); + var result = _tokenizer.Tokenizer.GetTokenKeyword(CurrentToken); Debug.Assert(result is CSharpSyntaxKind.CaseKeyword or CSharpSyntaxKind.DefaultKeyword); AcceptAndMoveNext(); while (EnsureCurrent() && CurrentToken.Kind != SyntaxKind.Colon) @@ -2205,7 +2209,7 @@ private void ParseAfterIfClause(SyntaxListBuilder builder) if (At(CSharpSyntaxKind.ElseKeyword)) { Accept(in whitespace); - Assert(CSharpSyntaxKind.ElseKeyword); + Assert(CSharpSyntaxKind.ElseKeyword); ParseElseClause(builder); } else @@ -2223,7 +2227,7 @@ private void ParseElseClause(in SyntaxListBuilder builder) { return; } - var block = new Block(CurrentToken, CurrentStart); + var block = new Block(GetBlockName(CurrentToken), CurrentStart); AcceptAndMoveNext(); AcceptWhile(IsSpacingTokenIncludingNewLinesAndComments); @@ -2291,7 +2295,7 @@ private void ParseFilterableCatchBlock(in SyntaxListBuilder bui { Assert(CSharpSyntaxKind.CatchKeyword); - var block = new Block(CurrentToken, CurrentStart); + var block = new Block(GetBlockName(CurrentToken), CurrentStart); // Accept "catch" AcceptAndMoveNext(); @@ -2367,7 +2371,7 @@ private void ParseUsingKeyword(SyntaxListBuilder builder, CShar { Assert(CSharpSyntaxKind.UsingKeyword); var topLevel = transition != null; - var block = new Block(CurrentToken, CurrentStart); + var block = new Block(GetBlockName(CurrentToken), CurrentStart); var usingToken = EatCurrentToken(); using var whitespaceOrComments = new PooledArrayBuilder(); ReadWhile(IsSpacingTokenIncludingComments, ref whitespaceOrComments.AsRef()); @@ -2913,7 +2917,7 @@ private void ParseEmbeddedTransition(in SyntaxListBuilder build [Conditional("DEBUG")] internal void Assert(CSharpSyntaxKind expectedKeyword) { - var result = CSharpTokenizer.GetTokenKeyword(CurrentToken); + var result = _tokenizer.Tokenizer.GetTokenKeyword(CurrentToken); Debug.Assert(CurrentToken.Kind == SyntaxKind.Keyword && result.HasValue && result.Value == expectedKeyword); @@ -2921,12 +2925,22 @@ internal void Assert(CSharpSyntaxKind expectedKeyword) protected internal bool At(CSharpSyntaxKind keyword) { - var result = CSharpTokenizer.GetTokenKeyword(CurrentToken); + var result = _tokenizer.Tokenizer.GetTokenKeyword(CurrentToken); return At(SyntaxKind.Keyword) && result.HasValue && result.Value == keyword; } + private string GetBlockName(SyntaxToken token) + { + var result = _tokenizer.Tokenizer.GetTokenKeyword(token); + if (result is not CSharpSyntaxKind.None and { } value && token.Kind == SyntaxKind.Keyword) + { + return CSharpSyntaxFacts.GetText(value); + } + return token.Content; + } + protected class Block { public Block(string name, SourceLocation start) @@ -2935,23 +2949,8 @@ public Block(string name, SourceLocation start) Start = start; } - public Block(SyntaxToken token, SourceLocation start) - : this(GetName(token), start) - { - } - public string Name { get; set; } public SourceLocation Start { get; set; } - - private static string GetName(SyntaxToken token) - { - var result = CSharpTokenizer.GetTokenKeyword(token); - if (result is not CSharpSyntaxKind.None and { } value && token.Kind == SyntaxKind.Keyword) - { - return CSharpSyntaxFacts.GetText(value); - } - return token.Content; - } } internal class ParsedDirective diff --git a/src/Compiler/Microsoft.CodeAnalysis.Razor.Compiler/src/Language/Legacy/CSharpTokenizer.cs b/src/Compiler/Microsoft.CodeAnalysis.Razor.Compiler/src/Language/Legacy/CSharpTokenizer.cs index 66bdabb8cef..b3bcc915005 100644 --- a/src/Compiler/Microsoft.CodeAnalysis.Razor.Compiler/src/Language/Legacy/CSharpTokenizer.cs +++ b/src/Compiler/Microsoft.CodeAnalysis.Razor.Compiler/src/Language/Legacy/CSharpTokenizer.cs @@ -1,789 +1,16 @@ // Licensed to the .NET Foundation under one or more agreements. // The .NET Foundation licenses this file to you under the MIT license. -#nullable disable - -using System; -using System.Collections.Generic; -using System.Collections.Frozen; -using System.Diagnostics; using Microsoft.AspNetCore.Razor.Language.Syntax.InternalSyntax; -using Microsoft.CodeAnalysis.CSharp; - -using SyntaxFactory = Microsoft.AspNetCore.Razor.Language.Syntax.InternalSyntax.SyntaxFactory; using CSharpSyntaxKind = Microsoft.CodeAnalysis.CSharp.SyntaxKind; namespace Microsoft.AspNetCore.Razor.Language.Legacy; -internal class CSharpTokenizer : Tokenizer +internal abstract class CSharpTokenizer : Tokenizer { - private readonly Dictionary> _operatorHandlers; - - private static readonly FrozenDictionary _keywords = (new[] { - CSharpSyntaxKind.AwaitKeyword, - CSharpSyntaxKind.AbstractKeyword, - CSharpSyntaxKind.ByteKeyword, - CSharpSyntaxKind.ClassKeyword, - CSharpSyntaxKind.DelegateKeyword, - CSharpSyntaxKind.EventKeyword, - CSharpSyntaxKind.FixedKeyword, - CSharpSyntaxKind.IfKeyword, - CSharpSyntaxKind.InternalKeyword, - CSharpSyntaxKind.NewKeyword, - CSharpSyntaxKind.OverrideKeyword, - CSharpSyntaxKind.ReadOnlyKeyword, - CSharpSyntaxKind.ShortKeyword, - CSharpSyntaxKind.StructKeyword, - CSharpSyntaxKind.TryKeyword, - CSharpSyntaxKind.UnsafeKeyword, - CSharpSyntaxKind.VolatileKeyword, - CSharpSyntaxKind.AsKeyword, - CSharpSyntaxKind.DoKeyword, - CSharpSyntaxKind.IsKeyword, - CSharpSyntaxKind.ParamsKeyword, - CSharpSyntaxKind.RefKeyword, - CSharpSyntaxKind.SwitchKeyword, - CSharpSyntaxKind.UShortKeyword, - CSharpSyntaxKind.WhileKeyword, - CSharpSyntaxKind.CaseKeyword, - CSharpSyntaxKind.ConstKeyword, - CSharpSyntaxKind.ExplicitKeyword, - CSharpSyntaxKind.FloatKeyword, - CSharpSyntaxKind.NullKeyword, - CSharpSyntaxKind.SizeOfKeyword, - CSharpSyntaxKind.TypeOfKeyword, - CSharpSyntaxKind.ImplicitKeyword, - CSharpSyntaxKind.PrivateKeyword, - CSharpSyntaxKind.ThisKeyword, - CSharpSyntaxKind.UsingKeyword, - CSharpSyntaxKind.ExternKeyword, - CSharpSyntaxKind.ReturnKeyword, - CSharpSyntaxKind.StackAllocKeyword, - CSharpSyntaxKind.UIntKeyword, - CSharpSyntaxKind.BaseKeyword, - CSharpSyntaxKind.CatchKeyword, - CSharpSyntaxKind.ContinueKeyword, - CSharpSyntaxKind.DoubleKeyword, - CSharpSyntaxKind.ForKeyword, - CSharpSyntaxKind.InKeyword, - CSharpSyntaxKind.LockKeyword, - CSharpSyntaxKind.ObjectKeyword, - CSharpSyntaxKind.ProtectedKeyword, - CSharpSyntaxKind.StaticKeyword, - CSharpSyntaxKind.FalseKeyword, - CSharpSyntaxKind.PublicKeyword, - CSharpSyntaxKind.SByteKeyword, - CSharpSyntaxKind.ThrowKeyword, - CSharpSyntaxKind.VirtualKeyword, - CSharpSyntaxKind.DecimalKeyword, - CSharpSyntaxKind.ElseKeyword, - CSharpSyntaxKind.OperatorKeyword, - CSharpSyntaxKind.StringKeyword, - CSharpSyntaxKind.ULongKeyword, - CSharpSyntaxKind.BoolKeyword, - CSharpSyntaxKind.CharKeyword, - CSharpSyntaxKind.DefaultKeyword, - CSharpSyntaxKind.ForEachKeyword, - CSharpSyntaxKind.LongKeyword, - CSharpSyntaxKind.VoidKeyword, - CSharpSyntaxKind.EnumKeyword, - CSharpSyntaxKind.FinallyKeyword, - CSharpSyntaxKind.IntKeyword, - CSharpSyntaxKind.OutKeyword, - CSharpSyntaxKind.SealedKeyword, - CSharpSyntaxKind.TrueKeyword, - CSharpSyntaxKind.GotoKeyword, - CSharpSyntaxKind.UncheckedKeyword, - CSharpSyntaxKind.InterfaceKeyword, - CSharpSyntaxKind.BreakKeyword, - CSharpSyntaxKind.CheckedKeyword, - CSharpSyntaxKind.NamespaceKeyword, - CSharpSyntaxKind.WhenKeyword, - CSharpSyntaxKind.WhereKeyword }).ToFrozenDictionary(keySelector: k => SyntaxFacts.GetText(k)); - - public CSharpTokenizer(SeekableTextReader source) - : base(source) - { - base.CurrentState = StartState; - - _operatorHandlers = new Dictionary>() - { - { '-', MinusOperator }, - { '<', LessThanOperator }, - { '>', GreaterThanOperator }, - { '&', CreateTwoCharOperatorHandler(SyntaxKind.And, '=', SyntaxKind.AndAssign, '&', SyntaxKind.DoubleAnd) }, - { '|', CreateTwoCharOperatorHandler(SyntaxKind.Or, '=', SyntaxKind.OrAssign, '|', SyntaxKind.DoubleOr) }, - { '+', CreateTwoCharOperatorHandler(SyntaxKind.Plus, '=', SyntaxKind.PlusAssign, '+', SyntaxKind.Increment) }, - { '=', CreateTwoCharOperatorHandler(SyntaxKind.Assign, '=', SyntaxKind.Equals, '>', SyntaxKind.GreaterThanEqual) }, - { '!', CreateTwoCharOperatorHandler(SyntaxKind.Not, '=', SyntaxKind.NotEqual) }, - { '%', CreateTwoCharOperatorHandler(SyntaxKind.Modulo, '=', SyntaxKind.ModuloAssign) }, - { '*', CreateTwoCharOperatorHandler(SyntaxKind.Star, '=', SyntaxKind.MultiplyAssign) }, - { ':', CreateTwoCharOperatorHandler(SyntaxKind.Colon, ':', SyntaxKind.DoubleColon) }, - { '?', CreateTwoCharOperatorHandler(SyntaxKind.QuestionMark, '?', SyntaxKind.NullCoalesce) }, - { '^', CreateTwoCharOperatorHandler(SyntaxKind.Xor, '=', SyntaxKind.XorAssign) }, - { '(', () => SyntaxKind.LeftParenthesis }, - { ')', () => SyntaxKind.RightParenthesis }, - { '{', () => SyntaxKind.LeftBrace }, - { '}', () => SyntaxKind.RightBrace }, - { '[', () => SyntaxKind.LeftBracket }, - { ']', () => SyntaxKind.RightBracket }, - { ',', () => SyntaxKind.Comma }, - { ';', () => SyntaxKind.Semicolon }, - { '~', () => SyntaxKind.Tilde }, - { '#', () => SyntaxKind.Hash } - }; - } - - protected override int StartState => (int)CSharpTokenizerState.Data; - - private new CSharpTokenizerState? CurrentState => (CSharpTokenizerState?)base.CurrentState; - - public override SyntaxKind RazorCommentKind => SyntaxKind.RazorCommentLiteral; - - public override SyntaxKind RazorCommentTransitionKind => SyntaxKind.RazorCommentTransition; - - public override SyntaxKind RazorCommentStarKind => SyntaxKind.RazorCommentStar; - - protected override StateResult Dispatch() - { - switch (CurrentState) - { - case CSharpTokenizerState.Data: - return Data(); - case CSharpTokenizerState.BlockComment: - return BlockComment(); - case CSharpTokenizerState.QuotedCharacterLiteral: - return QuotedCharacterLiteral(); - case CSharpTokenizerState.QuotedStringLiteral: - return QuotedStringLiteral(); - case CSharpTokenizerState.VerbatimStringLiteral: - return VerbatimStringLiteral(); - case CSharpTokenizerState.AfterRazorCommentTransition: - return AfterRazorCommentTransition(); - case CSharpTokenizerState.EscapedRazorCommentTransition: - return EscapedRazorCommentTransition(); - case CSharpTokenizerState.RazorCommentBody: - return RazorCommentBody(); - case CSharpTokenizerState.StarAfterRazorCommentBody: - return StarAfterRazorCommentBody(); - case CSharpTokenizerState.AtTokenAfterRazorCommentBody: - return AtTokenAfterRazorCommentBody(); - default: - Debug.Fail("Invalid TokenizerState"); - return default(StateResult); - } - } - - // Optimize memory allocation by returning constants for the most frequent cases - protected override string GetTokenContent(SyntaxKind type) - { - var tokenLength = Buffer.Length; - - if (tokenLength == 1) - { - switch (type) - { - case SyntaxKind.IntegerLiteral: - switch (Buffer[0]) - { - case '0': - return "0"; - case '1': - return "1"; - case '2': - return "2"; - case '3': - return "3"; - case '4': - return "4"; - case '5': - return "5"; - case '6': - return "6"; - case '7': - return "7"; - case '8': - return "8"; - case '9': - return "9"; - } - break; - case SyntaxKind.NewLine: - if (Buffer[0] == '\n') - { - return "\n"; - } - break; - case SyntaxKind.Whitespace: - if (Buffer[0] == ' ') - { - return " "; - } - if (Buffer[0] == '\t') - { - return "\t"; - } - break; - case SyntaxKind.Minus: - return "-"; - case SyntaxKind.Not: - return "!"; - case SyntaxKind.Modulo: - return "%"; - case SyntaxKind.And: - return "&"; - case SyntaxKind.LeftParenthesis: - return "("; - case SyntaxKind.RightParenthesis: - return ")"; - case SyntaxKind.Star: - return "*"; - case SyntaxKind.Comma: - return ","; - case SyntaxKind.Dot: - return "."; - case SyntaxKind.Slash: - return "/"; - case SyntaxKind.Colon: - return ":"; - case SyntaxKind.Semicolon: - return ";"; - case SyntaxKind.QuestionMark: - return "?"; - case SyntaxKind.RightBracket: - return "]"; - case SyntaxKind.LeftBracket: - return "["; - case SyntaxKind.Xor: - return "^"; - case SyntaxKind.LeftBrace: - return "{"; - case SyntaxKind.Or: - return "|"; - case SyntaxKind.RightBrace: - return "}"; - case SyntaxKind.Tilde: - return "~"; - case SyntaxKind.Plus: - return "+"; - case SyntaxKind.LessThan: - return "<"; - case SyntaxKind.Assign: - return "="; - case SyntaxKind.GreaterThan: - return ">"; - case SyntaxKind.Hash: - return "#"; - case SyntaxKind.Transition: - return "@"; - - } - } - else if (tokenLength == 2) - { - switch (type) - { - case SyntaxKind.NewLine: - return "\r\n"; - case SyntaxKind.Arrow: - return "->"; - case SyntaxKind.Decrement: - return "--"; - case SyntaxKind.MinusAssign: - return "-="; - case SyntaxKind.NotEqual: - return "!="; - case SyntaxKind.ModuloAssign: - return "%="; - case SyntaxKind.AndAssign: - return "&="; - case SyntaxKind.DoubleAnd: - return "&&"; - case SyntaxKind.MultiplyAssign: - return "*="; - case SyntaxKind.DivideAssign: - return "/="; - case SyntaxKind.DoubleColon: - return "::"; - case SyntaxKind.NullCoalesce: - return "??"; - case SyntaxKind.XorAssign: - return "^="; - case SyntaxKind.OrAssign: - return "|="; - case SyntaxKind.DoubleOr: - return "||"; - case SyntaxKind.PlusAssign: - return "+="; - case SyntaxKind.Increment: - return "++"; - case SyntaxKind.LessThanEqual: - return "<="; - case SyntaxKind.LeftShift: - return "<<"; - case SyntaxKind.Equals: - return "=="; - case SyntaxKind.GreaterThanEqual: - if (Buffer[0] == '=') - { - return "=>"; - } - return ">="; - case SyntaxKind.RightShift: - return ">>"; - - - } - } - else if (tokenLength == 3) - { - switch (type) - { - case SyntaxKind.LeftShiftAssign: - return "<<="; - case SyntaxKind.RightShiftAssign: - return ">>="; - } - } - - return base.GetTokenContent(type); - } - - protected override SyntaxToken CreateToken(string content, SyntaxKind kind, RazorDiagnostic[] errors) - { - return SyntaxFactory.Token(kind, content, errors); - } - - private StateResult Data() - { - if (SyntaxFacts.IsNewLine(CurrentCharacter)) - { - // CSharp Spec §2.3.1 - var checkTwoCharNewline = CurrentCharacter == '\r'; - TakeCurrent(); - if (checkTwoCharNewline && CurrentCharacter == '\n') - { - TakeCurrent(); - } - return Stay(EndToken(SyntaxKind.NewLine)); - } - else if (SyntaxFacts.IsWhitespace(CurrentCharacter)) - { - // CSharp Spec §2.3.3 - TakeUntil(c => !SyntaxFacts.IsWhitespace(c)); - return Stay(EndToken(SyntaxKind.Whitespace)); - } - else if (SyntaxFacts.IsIdentifierStartCharacter(CurrentCharacter)) - { - return Identifier(); - } - else if (char.IsDigit(CurrentCharacter)) - { - return NumericLiteral(); - } - switch (CurrentCharacter) - { - case '@': - return AtToken(); - case '\'': - TakeCurrent(); - return Transition(CSharpTokenizerState.QuotedCharacterLiteral); - case '"': - TakeCurrent(); - return Transition(CSharpTokenizerState.QuotedStringLiteral); - case '.': - if (char.IsDigit(Peek())) - { - return RealLiteral(); - } - return Stay(Single(SyntaxKind.Dot)); - case '/': - TakeCurrent(); - if (CurrentCharacter == '/') - { - TakeCurrent(); - return SingleLineComment(); - } - else if (CurrentCharacter == '*') - { - TakeCurrent(); - return Transition(CSharpTokenizerState.BlockComment); - } - else if (CurrentCharacter == '=') - { - TakeCurrent(); - return Stay(EndToken(SyntaxKind.DivideAssign)); - } - else - { - return Stay(EndToken(SyntaxKind.Slash)); - } - default: - return Stay(EndToken(Operator())); - } - } - - private StateResult AtToken() - { - TakeCurrent(); - if (CurrentCharacter == '"') - { - TakeCurrent(); - return Transition(CSharpTokenizerState.VerbatimStringLiteral); - } - else if (CurrentCharacter == '*') - { - return Transition( - CSharpTokenizerState.AfterRazorCommentTransition, - EndToken(SyntaxKind.RazorCommentTransition)); - } - else if (CurrentCharacter == '@') - { - // Could be escaped comment transition - return Transition( - CSharpTokenizerState.EscapedRazorCommentTransition, - EndToken(SyntaxKind.Transition)); - } - - return Stay(EndToken(SyntaxKind.Transition)); - } - - private StateResult EscapedRazorCommentTransition() - { - TakeCurrent(); - return Transition(CSharpTokenizerState.Data, EndToken(SyntaxKind.Transition)); - } - - private SyntaxKind Operator() - { - var first = CurrentCharacter; - TakeCurrent(); - Func handler; - if (_operatorHandlers.TryGetValue(first, out handler)) - { - return handler(); - } - return SyntaxKind.Marker; - } - - private SyntaxKind LessThanOperator() - { - if (CurrentCharacter == '=') - { - TakeCurrent(); - return SyntaxKind.LessThanEqual; - } - return SyntaxKind.LessThan; - } - - private SyntaxKind GreaterThanOperator() - { - if (CurrentCharacter == '=') - { - TakeCurrent(); - return SyntaxKind.GreaterThanEqual; - } - return SyntaxKind.GreaterThan; - } - - private SyntaxKind MinusOperator() - { - if (CurrentCharacter == '>') - { - TakeCurrent(); - return SyntaxKind.Arrow; - } - else if (CurrentCharacter == '-') - { - TakeCurrent(); - return SyntaxKind.Decrement; - } - else if (CurrentCharacter == '=') - { - TakeCurrent(); - return SyntaxKind.MinusAssign; - } - return SyntaxKind.Minus; - } - - private Func CreateTwoCharOperatorHandler(SyntaxKind typeIfOnlyFirst, char second, SyntaxKind typeIfBoth) - { - return () => - { - if (CurrentCharacter == second) - { - TakeCurrent(); - return typeIfBoth; - } - return typeIfOnlyFirst; - }; - } - - private Func CreateTwoCharOperatorHandler(SyntaxKind typeIfOnlyFirst, char option1, SyntaxKind typeIfOption1, char option2, SyntaxKind typeIfOption2) - { - return () => - { - if (CurrentCharacter == option1) - { - TakeCurrent(); - return typeIfOption1; - } - else if (CurrentCharacter == option2) - { - TakeCurrent(); - return typeIfOption2; - } - return typeIfOnlyFirst; - }; - } - - private StateResult VerbatimStringLiteral() - { - TakeUntil(c => c == '"'); - if (CurrentCharacter == '"') - { - TakeCurrent(); - if (CurrentCharacter == '"') - { - TakeCurrent(); - // Stay in the literal, this is an escaped " - return Stay(); - } - } - else if (EndOfFile) - { - CurrentErrors.Add( - RazorDiagnosticFactory.CreateParsing_UnterminatedStringLiteral( - new SourceSpan(CurrentStart, contentLength: 1 /* end of file */))); - } - return Transition(CSharpTokenizerState.Data, EndToken(SyntaxKind.StringLiteral)); - } - - private StateResult QuotedCharacterLiteral() => QuotedLiteral('\'', IsEndQuotedCharacterLiteral, SyntaxKind.CharacterLiteral); - - private StateResult QuotedStringLiteral() => QuotedLiteral('\"', IsEndQuotedStringLiteral, SyntaxKind.StringLiteral); - - private static readonly Func IsEndQuotedCharacterLiteral = static (c) => c == '\\' || c == '\'' || SyntaxFacts.IsNewLine(c); - private static readonly Func IsEndQuotedStringLiteral = static (c) => c == '\\' || c == '\"' || SyntaxFacts.IsNewLine(c); - - private StateResult QuotedLiteral(char quote, Func isEndQuotedLiteral, SyntaxKind literalType) - { - TakeUntil(isEndQuotedLiteral); - if (CurrentCharacter == '\\') - { - TakeCurrent(); // Take the '\' - - // If the next char is the same quote that started this - if (CurrentCharacter == quote || CurrentCharacter == '\\') - { - TakeCurrent(); // Take it so that we don't prematurely end the literal. - } - return Stay(); - } - else if (EndOfFile || SyntaxFacts.IsNewLine(CurrentCharacter)) - { - CurrentErrors.Add( - RazorDiagnosticFactory.CreateParsing_UnterminatedStringLiteral( - new SourceSpan(CurrentStart, contentLength: 1 /* " */))); - } - else - { - TakeCurrent(); // No-op if at EOF - } - return Transition(CSharpTokenizerState.Data, EndToken(literalType)); - } - - // CSharp Spec §2.3.2 - private StateResult BlockComment() - { - TakeUntil(c => c == '*'); - if (EndOfFile) - { - CurrentErrors.Add( - RazorDiagnosticFactory.CreateParsing_BlockCommentNotTerminated( - new SourceSpan(CurrentStart, contentLength: 1 /* end of file */))); - - return Transition(CSharpTokenizerState.Data, EndToken(SyntaxKind.CSharpComment)); - } - if (CurrentCharacter == '*') - { - TakeCurrent(); - if (CurrentCharacter == '/') - { - TakeCurrent(); - return Transition(CSharpTokenizerState.Data, EndToken(SyntaxKind.CSharpComment)); - } - } - return Stay(); - } - - // CSharp Spec §2.3.2 - private StateResult SingleLineComment() - { - TakeUntil(c => SyntaxFacts.IsNewLine(c)); - return Stay(EndToken(SyntaxKind.CSharpComment)); - } - - // CSharp Spec §2.4.4 - private StateResult NumericLiteral() - { - if (TakeAll("0x", caseSensitive: true)) - { - return HexLiteral(); - } - else - { - return DecimalLiteral(); - } - } - - private StateResult HexLiteral() - { - TakeUntil(c => !IsHexDigit(c)); - TakeIntegerSuffix(); - return Stay(EndToken(SyntaxKind.IntegerLiteral)); - } - - private StateResult DecimalLiteral() - { - TakeUntil(c => !Char.IsDigit(c)); - if (CurrentCharacter == '.' && Char.IsDigit(Peek())) - { - return RealLiteral(); - } - else if (IsRealLiteralSuffix(CurrentCharacter) || - CurrentCharacter == 'E' || CurrentCharacter == 'e') - { - return RealLiteralExponentPart(); - } - else - { - TakeIntegerSuffix(); - return Stay(EndToken(SyntaxKind.IntegerLiteral)); - } - } - - private StateResult RealLiteralExponentPart() - { - if (CurrentCharacter == 'E' || CurrentCharacter == 'e') - { - TakeCurrent(); - if (CurrentCharacter == '+' || CurrentCharacter == '-') - { - TakeCurrent(); - } - TakeUntil(c => !Char.IsDigit(c)); - } - if (IsRealLiteralSuffix(CurrentCharacter)) - { - TakeCurrent(); - } - return Stay(EndToken(SyntaxKind.RealLiteral)); - } - - // CSharp Spec §2.4.4.3 - private StateResult RealLiteral() + protected CSharpTokenizer(SeekableTextReader source) : base(source) { - AssertCurrent('.'); - TakeCurrent(); - Debug.Assert(Char.IsDigit(CurrentCharacter)); - TakeUntil(c => !Char.IsDigit(c)); - return RealLiteralExponentPart(); } - private void TakeIntegerSuffix() - { - if (Char.ToLowerInvariant(CurrentCharacter) == 'u') - { - TakeCurrent(); - if (Char.ToLowerInvariant(CurrentCharacter) == 'l') - { - TakeCurrent(); - } - } - else if (Char.ToLowerInvariant(CurrentCharacter) == 'l') - { - TakeCurrent(); - if (Char.ToLowerInvariant(CurrentCharacter) == 'u') - { - TakeCurrent(); - } - } - } - - // CSharp Spec §2.4.2 - private StateResult Identifier() - { - Debug.Assert(SyntaxFacts.IsIdentifierStartCharacter(CurrentCharacter)); - TakeCurrent(); - TakeUntil(c => !SyntaxFacts.IsIdentifierPartCharacter(c)); - SyntaxToken token = null; - if (HaveContent) - { - var type = SyntaxKind.Identifier; - var tokenContent = Buffer.ToString(); - if (_keywords.TryGetValue(tokenContent, value: out _)) - { - type = SyntaxKind.Keyword; - } - - token = SyntaxFactory.Token(type, tokenContent); - - Buffer.Clear(); - CurrentErrors.Clear(); - } - - return Stay(token); - } - - private StateResult Transition(CSharpTokenizerState state) - { - return Transition((int)state, result: null); - } - - private StateResult Transition(CSharpTokenizerState state, SyntaxToken result) - { - return Transition((int)state, result); - } - - private static bool IsRealLiteralSuffix(char character) - { - return character == 'F' || - character == 'f' || - character == 'D' || - character == 'd' || - character == 'M' || - character == 'm'; - } - - private static bool IsHexDigit(char value) - { - return (value >= '0' && value <= '9') || (value >= 'A' && value <= 'F') || (value >= 'a' && value <= 'f'); - } - - internal static CSharpSyntaxKind? GetTokenKeyword(SyntaxToken token) - { - if (token != null && _keywords.TryGetValue(token.Content, out var keyword)) - { - return keyword; - } - - return null; - } - - private enum CSharpTokenizerState - { - Data, - BlockComment, - QuotedCharacterLiteral, - QuotedStringLiteral, - VerbatimStringLiteral, - - // Razor Comments - need to be the same for HTML and CSharp - AfterRazorCommentTransition = RazorCommentTokenizerState.AfterRazorCommentTransition, - EscapedRazorCommentTransition = RazorCommentTokenizerState.EscapedRazorCommentTransition, - RazorCommentBody = RazorCommentTokenizerState.RazorCommentBody, - StarAfterRazorCommentBody = RazorCommentTokenizerState.StarAfterRazorCommentBody, - AtTokenAfterRazorCommentBody = RazorCommentTokenizerState.AtTokenAfterRazorCommentBody, - } + internal abstract CSharpSyntaxKind? GetTokenKeyword(SyntaxToken token); } diff --git a/src/Compiler/Microsoft.CodeAnalysis.Razor.Compiler/src/Language/Legacy/DirectiveCSharpTokenizer.cs b/src/Compiler/Microsoft.CodeAnalysis.Razor.Compiler/src/Language/Legacy/DirectiveCSharpTokenizer.cs index 19f520be306..3d2d81c63f7 100644 --- a/src/Compiler/Microsoft.CodeAnalysis.Razor.Compiler/src/Language/Legacy/DirectiveCSharpTokenizer.cs +++ b/src/Compiler/Microsoft.CodeAnalysis.Razor.Compiler/src/Language/Legacy/DirectiveCSharpTokenizer.cs @@ -8,7 +8,7 @@ namespace Microsoft.AspNetCore.Razor.Language.Legacy; -internal class DirectiveCSharpTokenizer(SeekableTextReader source) : CSharpTokenizer(source) +internal class DirectiveCSharpTokenizer(SeekableTextReader source) : NativeCSharpTokenizer(source) { private bool _visitedFirstTokenStart; private bool _visitedFirstTokenLineEnd; diff --git a/src/Compiler/Microsoft.CodeAnalysis.Razor.Compiler/src/Language/Legacy/FirstDirectiveCSharpLanguageCharacteristics.cs b/src/Compiler/Microsoft.CodeAnalysis.Razor.Compiler/src/Language/Legacy/FirstDirectiveCSharpLanguageCharacteristics.cs index 723deba420d..29360d399f3 100644 --- a/src/Compiler/Microsoft.CodeAnalysis.Razor.Compiler/src/Language/Legacy/FirstDirectiveCSharpLanguageCharacteristics.cs +++ b/src/Compiler/Microsoft.CodeAnalysis.Razor.Compiler/src/Language/Legacy/FirstDirectiveCSharpLanguageCharacteristics.cs @@ -5,7 +5,7 @@ namespace Microsoft.AspNetCore.Razor.Language.Legacy; -internal class FirstDirectiveCSharpLanguageCharacteristics : CSharpLanguageCharacteristics +internal class FirstDirectiveCSharpLanguageCharacteristics : NativeCSharpLanguageCharacteristics { private FirstDirectiveCSharpLanguageCharacteristics() { diff --git a/src/Compiler/Microsoft.CodeAnalysis.Razor.Compiler/src/Language/Legacy/CSharpLanguageCharacteristics.cs b/src/Compiler/Microsoft.CodeAnalysis.Razor.Compiler/src/Language/Legacy/NativeCSharpLanguageCharacteristics.cs similarity index 94% rename from src/Compiler/Microsoft.CodeAnalysis.Razor.Compiler/src/Language/Legacy/CSharpLanguageCharacteristics.cs rename to src/Compiler/Microsoft.CodeAnalysis.Razor.Compiler/src/Language/Legacy/NativeCSharpLanguageCharacteristics.cs index 28008335175..a62f11cffc0 100644 --- a/src/Compiler/Microsoft.CodeAnalysis.Razor.Compiler/src/Language/Legacy/CSharpLanguageCharacteristics.cs +++ b/src/Compiler/Microsoft.CodeAnalysis.Razor.Compiler/src/Language/Legacy/NativeCSharpLanguageCharacteristics.cs @@ -10,7 +10,7 @@ namespace Microsoft.AspNetCore.Razor.Language.Legacy; // Removal of this type is tracked by https://github.com/dotnet/razor/issues/8445 -internal class CSharpLanguageCharacteristics : LanguageCharacteristics +internal class NativeCSharpLanguageCharacteristics : LanguageCharacteristics { private static readonly Dictionary _tokenSamples = new Dictionary() { @@ -65,17 +65,17 @@ internal class CSharpLanguageCharacteristics : LanguageCharacteristics _instance; + public static NativeCSharpLanguageCharacteristics Instance => _instance; public override CSharpTokenizer CreateTokenizer(SeekableTextReader source) { - return new CSharpTokenizer(source); + return new NativeCSharpTokenizer(source); } protected override SyntaxToken CreateToken(string content, SyntaxKind kind, RazorDiagnostic[] errors) diff --git a/src/Compiler/Microsoft.CodeAnalysis.Razor.Compiler/src/Language/Legacy/NativeCSharpTokenizer.cs b/src/Compiler/Microsoft.CodeAnalysis.Razor.Compiler/src/Language/Legacy/NativeCSharpTokenizer.cs new file mode 100644 index 00000000000..1b42a196411 --- /dev/null +++ b/src/Compiler/Microsoft.CodeAnalysis.Razor.Compiler/src/Language/Legacy/NativeCSharpTokenizer.cs @@ -0,0 +1,793 @@ +// Licensed to the .NET Foundation under one or more agreements. +// The .NET Foundation licenses this file to you under the MIT license. + +#nullable disable + +using System; +using System.Collections.Generic; +using System.Collections.Frozen; +using System.Diagnostics; +using Microsoft.AspNetCore.Razor.Language.Syntax.InternalSyntax; +using Microsoft.CodeAnalysis.CSharp; + +using SyntaxFactory = Microsoft.AspNetCore.Razor.Language.Syntax.InternalSyntax.SyntaxFactory; +using CSharpSyntaxKind = Microsoft.CodeAnalysis.CSharp.SyntaxKind; + +namespace Microsoft.AspNetCore.Razor.Language.Legacy; + +/// +/// This is the old tokenizer that was used in Razor. It natively implemented tokenization of C#, rather than using Roslyn. It is maintained for +/// backwards compatibility, controlled by user using a Feature flag in their project file. +/// +internal class NativeCSharpTokenizer : CSharpTokenizer +{ + private readonly Dictionary> _operatorHandlers; + + private static readonly FrozenDictionary _keywords = (new[] { + CSharpSyntaxKind.AwaitKeyword, + CSharpSyntaxKind.AbstractKeyword, + CSharpSyntaxKind.ByteKeyword, + CSharpSyntaxKind.ClassKeyword, + CSharpSyntaxKind.DelegateKeyword, + CSharpSyntaxKind.EventKeyword, + CSharpSyntaxKind.FixedKeyword, + CSharpSyntaxKind.IfKeyword, + CSharpSyntaxKind.InternalKeyword, + CSharpSyntaxKind.NewKeyword, + CSharpSyntaxKind.OverrideKeyword, + CSharpSyntaxKind.ReadOnlyKeyword, + CSharpSyntaxKind.ShortKeyword, + CSharpSyntaxKind.StructKeyword, + CSharpSyntaxKind.TryKeyword, + CSharpSyntaxKind.UnsafeKeyword, + CSharpSyntaxKind.VolatileKeyword, + CSharpSyntaxKind.AsKeyword, + CSharpSyntaxKind.DoKeyword, + CSharpSyntaxKind.IsKeyword, + CSharpSyntaxKind.ParamsKeyword, + CSharpSyntaxKind.RefKeyword, + CSharpSyntaxKind.SwitchKeyword, + CSharpSyntaxKind.UShortKeyword, + CSharpSyntaxKind.WhileKeyword, + CSharpSyntaxKind.CaseKeyword, + CSharpSyntaxKind.ConstKeyword, + CSharpSyntaxKind.ExplicitKeyword, + CSharpSyntaxKind.FloatKeyword, + CSharpSyntaxKind.NullKeyword, + CSharpSyntaxKind.SizeOfKeyword, + CSharpSyntaxKind.TypeOfKeyword, + CSharpSyntaxKind.ImplicitKeyword, + CSharpSyntaxKind.PrivateKeyword, + CSharpSyntaxKind.ThisKeyword, + CSharpSyntaxKind.UsingKeyword, + CSharpSyntaxKind.ExternKeyword, + CSharpSyntaxKind.ReturnKeyword, + CSharpSyntaxKind.StackAllocKeyword, + CSharpSyntaxKind.UIntKeyword, + CSharpSyntaxKind.BaseKeyword, + CSharpSyntaxKind.CatchKeyword, + CSharpSyntaxKind.ContinueKeyword, + CSharpSyntaxKind.DoubleKeyword, + CSharpSyntaxKind.ForKeyword, + CSharpSyntaxKind.InKeyword, + CSharpSyntaxKind.LockKeyword, + CSharpSyntaxKind.ObjectKeyword, + CSharpSyntaxKind.ProtectedKeyword, + CSharpSyntaxKind.StaticKeyword, + CSharpSyntaxKind.FalseKeyword, + CSharpSyntaxKind.PublicKeyword, + CSharpSyntaxKind.SByteKeyword, + CSharpSyntaxKind.ThrowKeyword, + CSharpSyntaxKind.VirtualKeyword, + CSharpSyntaxKind.DecimalKeyword, + CSharpSyntaxKind.ElseKeyword, + CSharpSyntaxKind.OperatorKeyword, + CSharpSyntaxKind.StringKeyword, + CSharpSyntaxKind.ULongKeyword, + CSharpSyntaxKind.BoolKeyword, + CSharpSyntaxKind.CharKeyword, + CSharpSyntaxKind.DefaultKeyword, + CSharpSyntaxKind.ForEachKeyword, + CSharpSyntaxKind.LongKeyword, + CSharpSyntaxKind.VoidKeyword, + CSharpSyntaxKind.EnumKeyword, + CSharpSyntaxKind.FinallyKeyword, + CSharpSyntaxKind.IntKeyword, + CSharpSyntaxKind.OutKeyword, + CSharpSyntaxKind.SealedKeyword, + CSharpSyntaxKind.TrueKeyword, + CSharpSyntaxKind.GotoKeyword, + CSharpSyntaxKind.UncheckedKeyword, + CSharpSyntaxKind.InterfaceKeyword, + CSharpSyntaxKind.BreakKeyword, + CSharpSyntaxKind.CheckedKeyword, + CSharpSyntaxKind.NamespaceKeyword, + CSharpSyntaxKind.WhenKeyword, + CSharpSyntaxKind.WhereKeyword }).ToFrozenDictionary(keySelector: k => SyntaxFacts.GetText(k)); + + public NativeCSharpTokenizer(SeekableTextReader source) + : base(source) + { + base.CurrentState = StartState; + + _operatorHandlers = new Dictionary>() + { + { '-', MinusOperator }, + { '<', LessThanOperator }, + { '>', GreaterThanOperator }, + { '&', CreateTwoCharOperatorHandler(SyntaxKind.And, '=', SyntaxKind.AndAssign, '&', SyntaxKind.DoubleAnd) }, + { '|', CreateTwoCharOperatorHandler(SyntaxKind.Or, '=', SyntaxKind.OrAssign, '|', SyntaxKind.DoubleOr) }, + { '+', CreateTwoCharOperatorHandler(SyntaxKind.Plus, '=', SyntaxKind.PlusAssign, '+', SyntaxKind.Increment) }, + { '=', CreateTwoCharOperatorHandler(SyntaxKind.Assign, '=', SyntaxKind.Equals, '>', SyntaxKind.GreaterThanEqual) }, + { '!', CreateTwoCharOperatorHandler(SyntaxKind.Not, '=', SyntaxKind.NotEqual) }, + { '%', CreateTwoCharOperatorHandler(SyntaxKind.Modulo, '=', SyntaxKind.ModuloAssign) }, + { '*', CreateTwoCharOperatorHandler(SyntaxKind.Star, '=', SyntaxKind.MultiplyAssign) }, + { ':', CreateTwoCharOperatorHandler(SyntaxKind.Colon, ':', SyntaxKind.DoubleColon) }, + { '?', CreateTwoCharOperatorHandler(SyntaxKind.QuestionMark, '?', SyntaxKind.NullCoalesce) }, + { '^', CreateTwoCharOperatorHandler(SyntaxKind.Xor, '=', SyntaxKind.XorAssign) }, + { '(', () => SyntaxKind.LeftParenthesis }, + { ')', () => SyntaxKind.RightParenthesis }, + { '{', () => SyntaxKind.LeftBrace }, + { '}', () => SyntaxKind.RightBrace }, + { '[', () => SyntaxKind.LeftBracket }, + { ']', () => SyntaxKind.RightBracket }, + { ',', () => SyntaxKind.Comma }, + { ';', () => SyntaxKind.Semicolon }, + { '~', () => SyntaxKind.Tilde }, + { '#', () => SyntaxKind.Hash } + }; + } + + protected override int StartState => (int)CSharpTokenizerState.Data; + + private new CSharpTokenizerState? CurrentState => (CSharpTokenizerState?)base.CurrentState; + + public override SyntaxKind RazorCommentKind => SyntaxKind.RazorCommentLiteral; + + public override SyntaxKind RazorCommentTransitionKind => SyntaxKind.RazorCommentTransition; + + public override SyntaxKind RazorCommentStarKind => SyntaxKind.RazorCommentStar; + + protected override StateResult Dispatch() + { + switch (CurrentState) + { + case CSharpTokenizerState.Data: + return Data(); + case CSharpTokenizerState.BlockComment: + return BlockComment(); + case CSharpTokenizerState.QuotedCharacterLiteral: + return QuotedCharacterLiteral(); + case CSharpTokenizerState.QuotedStringLiteral: + return QuotedStringLiteral(); + case CSharpTokenizerState.VerbatimStringLiteral: + return VerbatimStringLiteral(); + case CSharpTokenizerState.AfterRazorCommentTransition: + return AfterRazorCommentTransition(); + case CSharpTokenizerState.EscapedRazorCommentTransition: + return EscapedRazorCommentTransition(); + case CSharpTokenizerState.RazorCommentBody: + return RazorCommentBody(); + case CSharpTokenizerState.StarAfterRazorCommentBody: + return StarAfterRazorCommentBody(); + case CSharpTokenizerState.AtTokenAfterRazorCommentBody: + return AtTokenAfterRazorCommentBody(); + default: + Debug.Fail("Invalid TokenizerState"); + return default(StateResult); + } + } + + // Optimize memory allocation by returning constants for the most frequent cases + protected override string GetTokenContent(SyntaxKind type) + { + var tokenLength = Buffer.Length; + + if (tokenLength == 1) + { + switch (type) + { + case SyntaxKind.IntegerLiteral: + switch (Buffer[0]) + { + case '0': + return "0"; + case '1': + return "1"; + case '2': + return "2"; + case '3': + return "3"; + case '4': + return "4"; + case '5': + return "5"; + case '6': + return "6"; + case '7': + return "7"; + case '8': + return "8"; + case '9': + return "9"; + } + break; + case SyntaxKind.NewLine: + if (Buffer[0] == '\n') + { + return "\n"; + } + break; + case SyntaxKind.Whitespace: + if (Buffer[0] == ' ') + { + return " "; + } + if (Buffer[0] == '\t') + { + return "\t"; + } + break; + case SyntaxKind.Minus: + return "-"; + case SyntaxKind.Not: + return "!"; + case SyntaxKind.Modulo: + return "%"; + case SyntaxKind.And: + return "&"; + case SyntaxKind.LeftParenthesis: + return "("; + case SyntaxKind.RightParenthesis: + return ")"; + case SyntaxKind.Star: + return "*"; + case SyntaxKind.Comma: + return ","; + case SyntaxKind.Dot: + return "."; + case SyntaxKind.Slash: + return "/"; + case SyntaxKind.Colon: + return ":"; + case SyntaxKind.Semicolon: + return ";"; + case SyntaxKind.QuestionMark: + return "?"; + case SyntaxKind.RightBracket: + return "]"; + case SyntaxKind.LeftBracket: + return "["; + case SyntaxKind.Xor: + return "^"; + case SyntaxKind.LeftBrace: + return "{"; + case SyntaxKind.Or: + return "|"; + case SyntaxKind.RightBrace: + return "}"; + case SyntaxKind.Tilde: + return "~"; + case SyntaxKind.Plus: + return "+"; + case SyntaxKind.LessThan: + return "<"; + case SyntaxKind.Assign: + return "="; + case SyntaxKind.GreaterThan: + return ">"; + case SyntaxKind.Hash: + return "#"; + case SyntaxKind.Transition: + return "@"; + + } + } + else if (tokenLength == 2) + { + switch (type) + { + case SyntaxKind.NewLine: + return "\r\n"; + case SyntaxKind.Arrow: + return "->"; + case SyntaxKind.Decrement: + return "--"; + case SyntaxKind.MinusAssign: + return "-="; + case SyntaxKind.NotEqual: + return "!="; + case SyntaxKind.ModuloAssign: + return "%="; + case SyntaxKind.AndAssign: + return "&="; + case SyntaxKind.DoubleAnd: + return "&&"; + case SyntaxKind.MultiplyAssign: + return "*="; + case SyntaxKind.DivideAssign: + return "/="; + case SyntaxKind.DoubleColon: + return "::"; + case SyntaxKind.NullCoalesce: + return "??"; + case SyntaxKind.XorAssign: + return "^="; + case SyntaxKind.OrAssign: + return "|="; + case SyntaxKind.DoubleOr: + return "||"; + case SyntaxKind.PlusAssign: + return "+="; + case SyntaxKind.Increment: + return "++"; + case SyntaxKind.LessThanEqual: + return "<="; + case SyntaxKind.LeftShift: + return "<<"; + case SyntaxKind.Equals: + return "=="; + case SyntaxKind.GreaterThanEqual: + if (Buffer[0] == '=') + { + return "=>"; + } + return ">="; + case SyntaxKind.RightShift: + return ">>"; + + + } + } + else if (tokenLength == 3) + { + switch (type) + { + case SyntaxKind.LeftShiftAssign: + return "<<="; + case SyntaxKind.RightShiftAssign: + return ">>="; + } + } + + return base.GetTokenContent(type); + } + + protected override SyntaxToken CreateToken(string content, SyntaxKind kind, RazorDiagnostic[] errors) + { + return SyntaxFactory.Token(kind, content, errors); + } + + private StateResult Data() + { + if (SyntaxFacts.IsNewLine(CurrentCharacter)) + { + // CSharp Spec §2.3.1 + var checkTwoCharNewline = CurrentCharacter == '\r'; + TakeCurrent(); + if (checkTwoCharNewline && CurrentCharacter == '\n') + { + TakeCurrent(); + } + return Stay(EndToken(SyntaxKind.NewLine)); + } + else if (SyntaxFacts.IsWhitespace(CurrentCharacter)) + { + // CSharp Spec §2.3.3 + TakeUntil(c => !SyntaxFacts.IsWhitespace(c)); + return Stay(EndToken(SyntaxKind.Whitespace)); + } + else if (SyntaxFacts.IsIdentifierStartCharacter(CurrentCharacter)) + { + return Identifier(); + } + else if (char.IsDigit(CurrentCharacter)) + { + return NumericLiteral(); + } + switch (CurrentCharacter) + { + case '@': + return AtToken(); + case '\'': + TakeCurrent(); + return Transition(CSharpTokenizerState.QuotedCharacterLiteral); + case '"': + TakeCurrent(); + return Transition(CSharpTokenizerState.QuotedStringLiteral); + case '.': + if (char.IsDigit(Peek())) + { + return RealLiteral(); + } + return Stay(Single(SyntaxKind.Dot)); + case '/': + TakeCurrent(); + if (CurrentCharacter == '/') + { + TakeCurrent(); + return SingleLineComment(); + } + else if (CurrentCharacter == '*') + { + TakeCurrent(); + return Transition(CSharpTokenizerState.BlockComment); + } + else if (CurrentCharacter == '=') + { + TakeCurrent(); + return Stay(EndToken(SyntaxKind.DivideAssign)); + } + else + { + return Stay(EndToken(SyntaxKind.Slash)); + } + default: + return Stay(EndToken(Operator())); + } + } + + private StateResult AtToken() + { + TakeCurrent(); + if (CurrentCharacter == '"') + { + TakeCurrent(); + return Transition(CSharpTokenizerState.VerbatimStringLiteral); + } + else if (CurrentCharacter == '*') + { + return Transition( + CSharpTokenizerState.AfterRazorCommentTransition, + EndToken(SyntaxKind.RazorCommentTransition)); + } + else if (CurrentCharacter == '@') + { + // Could be escaped comment transition + return Transition( + CSharpTokenizerState.EscapedRazorCommentTransition, + EndToken(SyntaxKind.Transition)); + } + + return Stay(EndToken(SyntaxKind.Transition)); + } + + private StateResult EscapedRazorCommentTransition() + { + TakeCurrent(); + return Transition(CSharpTokenizerState.Data, EndToken(SyntaxKind.Transition)); + } + + private SyntaxKind Operator() + { + var first = CurrentCharacter; + TakeCurrent(); + Func handler; + if (_operatorHandlers.TryGetValue(first, out handler)) + { + return handler(); + } + return SyntaxKind.Marker; + } + + private SyntaxKind LessThanOperator() + { + if (CurrentCharacter == '=') + { + TakeCurrent(); + return SyntaxKind.LessThanEqual; + } + return SyntaxKind.LessThan; + } + + private SyntaxKind GreaterThanOperator() + { + if (CurrentCharacter == '=') + { + TakeCurrent(); + return SyntaxKind.GreaterThanEqual; + } + return SyntaxKind.GreaterThan; + } + + private SyntaxKind MinusOperator() + { + if (CurrentCharacter == '>') + { + TakeCurrent(); + return SyntaxKind.Arrow; + } + else if (CurrentCharacter == '-') + { + TakeCurrent(); + return SyntaxKind.Decrement; + } + else if (CurrentCharacter == '=') + { + TakeCurrent(); + return SyntaxKind.MinusAssign; + } + return SyntaxKind.Minus; + } + + private Func CreateTwoCharOperatorHandler(SyntaxKind typeIfOnlyFirst, char second, SyntaxKind typeIfBoth) + { + return () => + { + if (CurrentCharacter == second) + { + TakeCurrent(); + return typeIfBoth; + } + return typeIfOnlyFirst; + }; + } + + private Func CreateTwoCharOperatorHandler(SyntaxKind typeIfOnlyFirst, char option1, SyntaxKind typeIfOption1, char option2, SyntaxKind typeIfOption2) + { + return () => + { + if (CurrentCharacter == option1) + { + TakeCurrent(); + return typeIfOption1; + } + else if (CurrentCharacter == option2) + { + TakeCurrent(); + return typeIfOption2; + } + return typeIfOnlyFirst; + }; + } + + private StateResult VerbatimStringLiteral() + { + TakeUntil(c => c == '"'); + if (CurrentCharacter == '"') + { + TakeCurrent(); + if (CurrentCharacter == '"') + { + TakeCurrent(); + // Stay in the literal, this is an escaped " + return Stay(); + } + } + else if (EndOfFile) + { + CurrentErrors.Add( + RazorDiagnosticFactory.CreateParsing_UnterminatedStringLiteral( + new SourceSpan(CurrentStart, contentLength: 1 /* end of file */))); + } + return Transition(CSharpTokenizerState.Data, EndToken(SyntaxKind.StringLiteral)); + } + + private StateResult QuotedCharacterLiteral() => QuotedLiteral('\'', IsEndQuotedCharacterLiteral, SyntaxKind.CharacterLiteral); + + private StateResult QuotedStringLiteral() => QuotedLiteral('\"', IsEndQuotedStringLiteral, SyntaxKind.StringLiteral); + + private static readonly Func IsEndQuotedCharacterLiteral = static (c) => c == '\\' || c == '\'' || SyntaxFacts.IsNewLine(c); + private static readonly Func IsEndQuotedStringLiteral = static (c) => c == '\\' || c == '\"' || SyntaxFacts.IsNewLine(c); + + private StateResult QuotedLiteral(char quote, Func isEndQuotedLiteral, SyntaxKind literalType) + { + TakeUntil(isEndQuotedLiteral); + if (CurrentCharacter == '\\') + { + TakeCurrent(); // Take the '\' + + // If the next char is the same quote that started this + if (CurrentCharacter == quote || CurrentCharacter == '\\') + { + TakeCurrent(); // Take it so that we don't prematurely end the literal. + } + return Stay(); + } + else if (EndOfFile || SyntaxFacts.IsNewLine(CurrentCharacter)) + { + CurrentErrors.Add( + RazorDiagnosticFactory.CreateParsing_UnterminatedStringLiteral( + new SourceSpan(CurrentStart, contentLength: 1 /* " */))); + } + else + { + TakeCurrent(); // No-op if at EOF + } + return Transition(CSharpTokenizerState.Data, EndToken(literalType)); + } + + // CSharp Spec §2.3.2 + private StateResult BlockComment() + { + TakeUntil(c => c == '*'); + if (EndOfFile) + { + CurrentErrors.Add( + RazorDiagnosticFactory.CreateParsing_BlockCommentNotTerminated( + new SourceSpan(CurrentStart, contentLength: 1 /* end of file */))); + + return Transition(CSharpTokenizerState.Data, EndToken(SyntaxKind.CSharpComment)); + } + if (CurrentCharacter == '*') + { + TakeCurrent(); + if (CurrentCharacter == '/') + { + TakeCurrent(); + return Transition(CSharpTokenizerState.Data, EndToken(SyntaxKind.CSharpComment)); + } + } + return Stay(); + } + + // CSharp Spec §2.3.2 + private StateResult SingleLineComment() + { + TakeUntil(c => SyntaxFacts.IsNewLine(c)); + return Stay(EndToken(SyntaxKind.CSharpComment)); + } + + // CSharp Spec §2.4.4 + private StateResult NumericLiteral() + { + if (TakeAll("0x", caseSensitive: true)) + { + return HexLiteral(); + } + else + { + return DecimalLiteral(); + } + } + + private StateResult HexLiteral() + { + TakeUntil(c => !IsHexDigit(c)); + TakeIntegerSuffix(); + return Stay(EndToken(SyntaxKind.IntegerLiteral)); + } + + private StateResult DecimalLiteral() + { + TakeUntil(c => !Char.IsDigit(c)); + if (CurrentCharacter == '.' && Char.IsDigit(Peek())) + { + return RealLiteral(); + } + else if (IsRealLiteralSuffix(CurrentCharacter) || + CurrentCharacter == 'E' || CurrentCharacter == 'e') + { + return RealLiteralExponentPart(); + } + else + { + TakeIntegerSuffix(); + return Stay(EndToken(SyntaxKind.IntegerLiteral)); + } + } + + private StateResult RealLiteralExponentPart() + { + if (CurrentCharacter == 'E' || CurrentCharacter == 'e') + { + TakeCurrent(); + if (CurrentCharacter == '+' || CurrentCharacter == '-') + { + TakeCurrent(); + } + TakeUntil(c => !Char.IsDigit(c)); + } + if (IsRealLiteralSuffix(CurrentCharacter)) + { + TakeCurrent(); + } + return Stay(EndToken(SyntaxKind.RealLiteral)); + } + + // CSharp Spec §2.4.4.3 + private StateResult RealLiteral() + { + AssertCurrent('.'); + TakeCurrent(); + Debug.Assert(Char.IsDigit(CurrentCharacter)); + TakeUntil(c => !Char.IsDigit(c)); + return RealLiteralExponentPart(); + } + + private void TakeIntegerSuffix() + { + if (Char.ToLowerInvariant(CurrentCharacter) == 'u') + { + TakeCurrent(); + if (Char.ToLowerInvariant(CurrentCharacter) == 'l') + { + TakeCurrent(); + } + } + else if (Char.ToLowerInvariant(CurrentCharacter) == 'l') + { + TakeCurrent(); + if (Char.ToLowerInvariant(CurrentCharacter) == 'u') + { + TakeCurrent(); + } + } + } + + // CSharp Spec §2.4.2 + private StateResult Identifier() + { + Debug.Assert(SyntaxFacts.IsIdentifierStartCharacter(CurrentCharacter)); + TakeCurrent(); + TakeUntil(c => !SyntaxFacts.IsIdentifierPartCharacter(c)); + SyntaxToken token = null; + if (HaveContent) + { + var type = SyntaxKind.Identifier; + var tokenContent = Buffer.ToString(); + if (_keywords.TryGetValue(tokenContent, value: out _)) + { + type = SyntaxKind.Keyword; + } + + token = SyntaxFactory.Token(type, tokenContent); + + Buffer.Clear(); + CurrentErrors.Clear(); + } + + return Stay(token); + } + + private StateResult Transition(CSharpTokenizerState state) + { + return Transition((int)state, result: null); + } + + private StateResult Transition(CSharpTokenizerState state, SyntaxToken result) + { + return Transition((int)state, result); + } + + private static bool IsRealLiteralSuffix(char character) + { + return character == 'F' || + character == 'f' || + character == 'D' || + character == 'd' || + character == 'M' || + character == 'm'; + } + + private static bool IsHexDigit(char value) + { + return (value >= '0' && value <= '9') || (value >= 'A' && value <= 'F') || (value >= 'a' && value <= 'f'); + } + + internal override CSharpSyntaxKind? GetTokenKeyword(SyntaxToken token) + { + if (token != null && _keywords.TryGetValue(token.Content, out var keyword)) + { + return keyword; + } + + return null; + } + + private enum CSharpTokenizerState + { + Data, + BlockComment, + QuotedCharacterLiteral, + QuotedStringLiteral, + VerbatimStringLiteral, + + // Razor Comments - need to be the same for HTML and CSharp + AfterRazorCommentTransition = RazorCommentTokenizerState.AfterRazorCommentTransition, + EscapedRazorCommentTransition = RazorCommentTokenizerState.EscapedRazorCommentTransition, + RazorCommentBody = RazorCommentTokenizerState.RazorCommentBody, + StarAfterRazorCommentBody = RazorCommentTokenizerState.StarAfterRazorCommentBody, + AtTokenAfterRazorCommentBody = RazorCommentTokenizerState.AtTokenAfterRazorCommentBody, + } +} diff --git a/src/Compiler/Microsoft.CodeAnalysis.Razor.Compiler/src/Language/Legacy/ParserContext.cs b/src/Compiler/Microsoft.CodeAnalysis.Razor.Compiler/src/Language/Legacy/ParserContext.cs index b96ded3d778..3cebbf42426 100644 --- a/src/Compiler/Microsoft.CodeAnalysis.Razor.Compiler/src/Language/Legacy/ParserContext.cs +++ b/src/Compiler/Microsoft.CodeAnalysis.Razor.Compiler/src/Language/Legacy/ParserContext.cs @@ -42,6 +42,8 @@ public ParserContext(RazorSourceDocument source, RazorParserOptions options) public bool ParseLeadingDirectives { get; } + public bool UseRoslynTokenizer { get; } + public bool EnableSpanEditHandlers { get; } public bool WhiteSpaceIsSignificantToAncestorBlock { get; set; } diff --git a/src/Compiler/Microsoft.CodeAnalysis.Razor.Compiler/src/Language/Legacy/RoslynCSharpLanguageCharacteristics.cs b/src/Compiler/Microsoft.CodeAnalysis.Razor.Compiler/src/Language/Legacy/RoslynCSharpLanguageCharacteristics.cs new file mode 100644 index 00000000000..9955a3a6154 --- /dev/null +++ b/src/Compiler/Microsoft.CodeAnalysis.Razor.Compiler/src/Language/Legacy/RoslynCSharpLanguageCharacteristics.cs @@ -0,0 +1,173 @@ +// Licensed to the .NET Foundation under one or more agreements. +// The .NET Foundation licenses this file to you under the MIT license. + +#nullable disable + +using System.Collections.Generic; +using System.Diagnostics; +using Microsoft.AspNetCore.Razor.Language.Syntax.InternalSyntax; + +namespace Microsoft.AspNetCore.Razor.Language.Legacy; + +// Removal of this type is tracked by https://github.com/dotnet/razor/issues/8445 +internal class RoslynCSharpLanguageCharacteristics : LanguageCharacteristics +{ + private static readonly Dictionary _tokenSamples = new Dictionary() + { + { SyntaxKind.Arrow, "->" }, + { SyntaxKind.Minus, "-" }, + { SyntaxKind.Decrement, "--" }, + { SyntaxKind.MinusAssign, "-=" }, + { SyntaxKind.NotEqual, "!=" }, + { SyntaxKind.Not, "!" }, + { SyntaxKind.Modulo, "%" }, + { SyntaxKind.ModuloAssign, "%=" }, + { SyntaxKind.AndAssign, "&=" }, + { SyntaxKind.And, "&" }, + { SyntaxKind.DoubleAnd, "&&" }, + { SyntaxKind.LeftParenthesis, "(" }, + { SyntaxKind.RightParenthesis, ")" }, + { SyntaxKind.Star, "*" }, + { SyntaxKind.MultiplyAssign, "*=" }, + { SyntaxKind.Comma, "," }, + { SyntaxKind.Dot, "." }, + { SyntaxKind.Slash, "/" }, + { SyntaxKind.DivideAssign, "/=" }, + { SyntaxKind.DoubleColon, "::" }, + { SyntaxKind.Colon, ":" }, + { SyntaxKind.Semicolon, ";" }, + { SyntaxKind.QuestionMark, "?" }, + { SyntaxKind.NullCoalesce, "??" }, + { SyntaxKind.RightBracket, "]" }, + { SyntaxKind.LeftBracket, "[" }, + { SyntaxKind.XorAssign, "^=" }, + { SyntaxKind.Xor, "^" }, + { SyntaxKind.LeftBrace, "{" }, + { SyntaxKind.OrAssign, "|=" }, + { SyntaxKind.DoubleOr, "||" }, + { SyntaxKind.Or, "|" }, + { SyntaxKind.RightBrace, "}" }, + { SyntaxKind.Tilde, "~" }, + { SyntaxKind.Plus, "+" }, + { SyntaxKind.PlusAssign, "+=" }, + { SyntaxKind.Increment, "++" }, + { SyntaxKind.LessThan, "<" }, + { SyntaxKind.LessThanEqual, "<=" }, + { SyntaxKind.LeftShift, "<<" }, + { SyntaxKind.LeftShiftAssign, "<<=" }, + { SyntaxKind.Assign, "=" }, + { SyntaxKind.Equals, "==" }, + { SyntaxKind.GreaterThan, ">" }, + { SyntaxKind.GreaterThanEqual, ">=" }, + { SyntaxKind.RightShift, ">>" }, + { SyntaxKind.RightShiftAssign, ">>=" }, + { SyntaxKind.Hash, "#" }, + { SyntaxKind.Transition, "@" }, + }; + + private static readonly RoslynCSharpLanguageCharacteristics _instance = new RoslynCSharpLanguageCharacteristics(); + + protected RoslynCSharpLanguageCharacteristics() + { + } + + public static RoslynCSharpLanguageCharacteristics Instance => _instance; + + public override CSharpTokenizer CreateTokenizer(SeekableTextReader source) + { + return new RoslynCSharpTokenizer(source); + } + + protected override SyntaxToken CreateToken(string content, SyntaxKind kind, RazorDiagnostic[] errors) + { + return SyntaxFactory.Token(kind, content, errors); + } + + public override string GetSample(SyntaxKind kind) + { + string sample; + if (!_tokenSamples.TryGetValue(kind, out sample)) + { + switch (kind) + { + case SyntaxKind.Identifier: + return Resources.CSharpToken_Identifier; + case SyntaxKind.Keyword: + return Resources.CSharpToken_Keyword; + case SyntaxKind.IntegerLiteral: + return Resources.CSharpToken_IntegerLiteral; + case SyntaxKind.NewLine: + return Resources.CSharpToken_Newline; + case SyntaxKind.Whitespace: + return Resources.CSharpToken_Whitespace; + case SyntaxKind.CSharpComment: + return Resources.CSharpToken_Comment; + case SyntaxKind.RealLiteral: + return Resources.CSharpToken_RealLiteral; + case SyntaxKind.CharacterLiteral: + return Resources.CSharpToken_CharacterLiteral; + case SyntaxKind.StringLiteral: + return Resources.CSharpToken_StringLiteral; + default: + return Resources.Token_Unknown; + } + } + return sample; + } + + public override SyntaxToken CreateMarkerToken() + { + return SyntaxFactory.Token(SyntaxKind.Marker, string.Empty); + } + + public override SyntaxKind GetKnownTokenType(KnownTokenType type) + { + switch (type) + { + case KnownTokenType.Identifier: + return SyntaxKind.Identifier; + case KnownTokenType.Keyword: + return SyntaxKind.Keyword; + case KnownTokenType.NewLine: + return SyntaxKind.NewLine; + case KnownTokenType.Whitespace: + return SyntaxKind.Whitespace; + case KnownTokenType.Transition: + return SyntaxKind.Transition; + case KnownTokenType.CommentStart: + return SyntaxKind.RazorCommentTransition; + case KnownTokenType.CommentStar: + return SyntaxKind.RazorCommentStar; + case KnownTokenType.CommentBody: + return SyntaxKind.RazorCommentLiteral; + default: + return SyntaxKind.Marker; + } + } + + public override SyntaxKind FlipBracket(SyntaxKind bracket) + { + switch (bracket) + { + case SyntaxKind.LeftBrace: + return SyntaxKind.RightBrace; + case SyntaxKind.LeftBracket: + return SyntaxKind.RightBracket; + case SyntaxKind.LeftParenthesis: + return SyntaxKind.RightParenthesis; + case SyntaxKind.LessThan: + return SyntaxKind.GreaterThan; + case SyntaxKind.RightBrace: + return SyntaxKind.LeftBrace; + case SyntaxKind.RightBracket: + return SyntaxKind.LeftBracket; + case SyntaxKind.RightParenthesis: + return SyntaxKind.LeftParenthesis; + case SyntaxKind.GreaterThan: + return SyntaxKind.LessThan; + default: + Debug.Fail("FlipBracket must be called with a bracket character"); + return SyntaxKind.Marker; + } + } +} diff --git a/src/Compiler/Microsoft.CodeAnalysis.Razor.Compiler/src/Language/Legacy/RoslynCSharpTokenizer.cs b/src/Compiler/Microsoft.CodeAnalysis.Razor.Compiler/src/Language/Legacy/RoslynCSharpTokenizer.cs new file mode 100644 index 00000000000..8a1336a2183 --- /dev/null +++ b/src/Compiler/Microsoft.CodeAnalysis.Razor.Compiler/src/Language/Legacy/RoslynCSharpTokenizer.cs @@ -0,0 +1,789 @@ +// Licensed to the .NET Foundation under one or more agreements. +// The .NET Foundation licenses this file to you under the MIT license. + +#nullable disable + +using System; +using System.Collections.Generic; +using System.Collections.Frozen; +using System.Diagnostics; +using Microsoft.AspNetCore.Razor.Language.Syntax.InternalSyntax; +using Microsoft.CodeAnalysis.CSharp; + +using SyntaxFactory = Microsoft.AspNetCore.Razor.Language.Syntax.InternalSyntax.SyntaxFactory; +using CSharpSyntaxKind = Microsoft.CodeAnalysis.CSharp.SyntaxKind; + +namespace Microsoft.AspNetCore.Razor.Language.Legacy; + +internal class RoslynCSharpTokenizer : CSharpTokenizer +{ + private readonly Dictionary> _operatorHandlers; + + private static readonly FrozenDictionary _keywords = (new[] { + CSharpSyntaxKind.AwaitKeyword, + CSharpSyntaxKind.AbstractKeyword, + CSharpSyntaxKind.ByteKeyword, + CSharpSyntaxKind.ClassKeyword, + CSharpSyntaxKind.DelegateKeyword, + CSharpSyntaxKind.EventKeyword, + CSharpSyntaxKind.FixedKeyword, + CSharpSyntaxKind.IfKeyword, + CSharpSyntaxKind.InternalKeyword, + CSharpSyntaxKind.NewKeyword, + CSharpSyntaxKind.OverrideKeyword, + CSharpSyntaxKind.ReadOnlyKeyword, + CSharpSyntaxKind.ShortKeyword, + CSharpSyntaxKind.StructKeyword, + CSharpSyntaxKind.TryKeyword, + CSharpSyntaxKind.UnsafeKeyword, + CSharpSyntaxKind.VolatileKeyword, + CSharpSyntaxKind.AsKeyword, + CSharpSyntaxKind.DoKeyword, + CSharpSyntaxKind.IsKeyword, + CSharpSyntaxKind.ParamsKeyword, + CSharpSyntaxKind.RefKeyword, + CSharpSyntaxKind.SwitchKeyword, + CSharpSyntaxKind.UShortKeyword, + CSharpSyntaxKind.WhileKeyword, + CSharpSyntaxKind.CaseKeyword, + CSharpSyntaxKind.ConstKeyword, + CSharpSyntaxKind.ExplicitKeyword, + CSharpSyntaxKind.FloatKeyword, + CSharpSyntaxKind.NullKeyword, + CSharpSyntaxKind.SizeOfKeyword, + CSharpSyntaxKind.TypeOfKeyword, + CSharpSyntaxKind.ImplicitKeyword, + CSharpSyntaxKind.PrivateKeyword, + CSharpSyntaxKind.ThisKeyword, + CSharpSyntaxKind.UsingKeyword, + CSharpSyntaxKind.ExternKeyword, + CSharpSyntaxKind.ReturnKeyword, + CSharpSyntaxKind.StackAllocKeyword, + CSharpSyntaxKind.UIntKeyword, + CSharpSyntaxKind.BaseKeyword, + CSharpSyntaxKind.CatchKeyword, + CSharpSyntaxKind.ContinueKeyword, + CSharpSyntaxKind.DoubleKeyword, + CSharpSyntaxKind.ForKeyword, + CSharpSyntaxKind.InKeyword, + CSharpSyntaxKind.LockKeyword, + CSharpSyntaxKind.ObjectKeyword, + CSharpSyntaxKind.ProtectedKeyword, + CSharpSyntaxKind.StaticKeyword, + CSharpSyntaxKind.FalseKeyword, + CSharpSyntaxKind.PublicKeyword, + CSharpSyntaxKind.SByteKeyword, + CSharpSyntaxKind.ThrowKeyword, + CSharpSyntaxKind.VirtualKeyword, + CSharpSyntaxKind.DecimalKeyword, + CSharpSyntaxKind.ElseKeyword, + CSharpSyntaxKind.OperatorKeyword, + CSharpSyntaxKind.StringKeyword, + CSharpSyntaxKind.ULongKeyword, + CSharpSyntaxKind.BoolKeyword, + CSharpSyntaxKind.CharKeyword, + CSharpSyntaxKind.DefaultKeyword, + CSharpSyntaxKind.ForEachKeyword, + CSharpSyntaxKind.LongKeyword, + CSharpSyntaxKind.VoidKeyword, + CSharpSyntaxKind.EnumKeyword, + CSharpSyntaxKind.FinallyKeyword, + CSharpSyntaxKind.IntKeyword, + CSharpSyntaxKind.OutKeyword, + CSharpSyntaxKind.SealedKeyword, + CSharpSyntaxKind.TrueKeyword, + CSharpSyntaxKind.GotoKeyword, + CSharpSyntaxKind.UncheckedKeyword, + CSharpSyntaxKind.InterfaceKeyword, + CSharpSyntaxKind.BreakKeyword, + CSharpSyntaxKind.CheckedKeyword, + CSharpSyntaxKind.NamespaceKeyword, + CSharpSyntaxKind.WhenKeyword, + CSharpSyntaxKind.WhereKeyword }).ToFrozenDictionary(keySelector: k => SyntaxFacts.GetText(k)); + + public RoslynCSharpTokenizer(SeekableTextReader source) + : base(source) + { + base.CurrentState = StartState; + + _operatorHandlers = new Dictionary>() + { + { '-', MinusOperator }, + { '<', LessThanOperator }, + { '>', GreaterThanOperator }, + { '&', CreateTwoCharOperatorHandler(SyntaxKind.And, '=', SyntaxKind.AndAssign, '&', SyntaxKind.DoubleAnd) }, + { '|', CreateTwoCharOperatorHandler(SyntaxKind.Or, '=', SyntaxKind.OrAssign, '|', SyntaxKind.DoubleOr) }, + { '+', CreateTwoCharOperatorHandler(SyntaxKind.Plus, '=', SyntaxKind.PlusAssign, '+', SyntaxKind.Increment) }, + { '=', CreateTwoCharOperatorHandler(SyntaxKind.Assign, '=', SyntaxKind.Equals, '>', SyntaxKind.GreaterThanEqual) }, + { '!', CreateTwoCharOperatorHandler(SyntaxKind.Not, '=', SyntaxKind.NotEqual) }, + { '%', CreateTwoCharOperatorHandler(SyntaxKind.Modulo, '=', SyntaxKind.ModuloAssign) }, + { '*', CreateTwoCharOperatorHandler(SyntaxKind.Star, '=', SyntaxKind.MultiplyAssign) }, + { ':', CreateTwoCharOperatorHandler(SyntaxKind.Colon, ':', SyntaxKind.DoubleColon) }, + { '?', CreateTwoCharOperatorHandler(SyntaxKind.QuestionMark, '?', SyntaxKind.NullCoalesce) }, + { '^', CreateTwoCharOperatorHandler(SyntaxKind.Xor, '=', SyntaxKind.XorAssign) }, + { '(', () => SyntaxKind.LeftParenthesis }, + { ')', () => SyntaxKind.RightParenthesis }, + { '{', () => SyntaxKind.LeftBrace }, + { '}', () => SyntaxKind.RightBrace }, + { '[', () => SyntaxKind.LeftBracket }, + { ']', () => SyntaxKind.RightBracket }, + { ',', () => SyntaxKind.Comma }, + { ';', () => SyntaxKind.Semicolon }, + { '~', () => SyntaxKind.Tilde }, + { '#', () => SyntaxKind.Hash } + }; + } + + protected override int StartState => (int)CSharpTokenizerState.Data; + + private new CSharpTokenizerState? CurrentState => (CSharpTokenizerState?)base.CurrentState; + + public override SyntaxKind RazorCommentKind => SyntaxKind.RazorCommentLiteral; + + public override SyntaxKind RazorCommentTransitionKind => SyntaxKind.RazorCommentTransition; + + public override SyntaxKind RazorCommentStarKind => SyntaxKind.RazorCommentStar; + + protected override StateResult Dispatch() + { + switch (CurrentState) + { + case CSharpTokenizerState.Data: + return Data(); + case CSharpTokenizerState.BlockComment: + return BlockComment(); + case CSharpTokenizerState.QuotedCharacterLiteral: + return QuotedCharacterLiteral(); + case CSharpTokenizerState.QuotedStringLiteral: + return QuotedStringLiteral(); + case CSharpTokenizerState.VerbatimStringLiteral: + return VerbatimStringLiteral(); + case CSharpTokenizerState.AfterRazorCommentTransition: + return AfterRazorCommentTransition(); + case CSharpTokenizerState.EscapedRazorCommentTransition: + return EscapedRazorCommentTransition(); + case CSharpTokenizerState.RazorCommentBody: + return RazorCommentBody(); + case CSharpTokenizerState.StarAfterRazorCommentBody: + return StarAfterRazorCommentBody(); + case CSharpTokenizerState.AtTokenAfterRazorCommentBody: + return AtTokenAfterRazorCommentBody(); + default: + Debug.Fail("Invalid TokenizerState"); + return default(StateResult); + } + } + + // Optimize memory allocation by returning constants for the most frequent cases + protected override string GetTokenContent(SyntaxKind type) + { + var tokenLength = Buffer.Length; + + if (tokenLength == 1) + { + switch (type) + { + case SyntaxKind.IntegerLiteral: + switch (Buffer[0]) + { + case '0': + return "0"; + case '1': + return "1"; + case '2': + return "2"; + case '3': + return "3"; + case '4': + return "4"; + case '5': + return "5"; + case '6': + return "6"; + case '7': + return "7"; + case '8': + return "8"; + case '9': + return "9"; + } + break; + case SyntaxKind.NewLine: + if (Buffer[0] == '\n') + { + return "\n"; + } + break; + case SyntaxKind.Whitespace: + if (Buffer[0] == ' ') + { + return " "; + } + if (Buffer[0] == '\t') + { + return "\t"; + } + break; + case SyntaxKind.Minus: + return "-"; + case SyntaxKind.Not: + return "!"; + case SyntaxKind.Modulo: + return "%"; + case SyntaxKind.And: + return "&"; + case SyntaxKind.LeftParenthesis: + return "("; + case SyntaxKind.RightParenthesis: + return ")"; + case SyntaxKind.Star: + return "*"; + case SyntaxKind.Comma: + return ","; + case SyntaxKind.Dot: + return "."; + case SyntaxKind.Slash: + return "/"; + case SyntaxKind.Colon: + return ":"; + case SyntaxKind.Semicolon: + return ";"; + case SyntaxKind.QuestionMark: + return "?"; + case SyntaxKind.RightBracket: + return "]"; + case SyntaxKind.LeftBracket: + return "["; + case SyntaxKind.Xor: + return "^"; + case SyntaxKind.LeftBrace: + return "{"; + case SyntaxKind.Or: + return "|"; + case SyntaxKind.RightBrace: + return "}"; + case SyntaxKind.Tilde: + return "~"; + case SyntaxKind.Plus: + return "+"; + case SyntaxKind.LessThan: + return "<"; + case SyntaxKind.Assign: + return "="; + case SyntaxKind.GreaterThan: + return ">"; + case SyntaxKind.Hash: + return "#"; + case SyntaxKind.Transition: + return "@"; + + } + } + else if (tokenLength == 2) + { + switch (type) + { + case SyntaxKind.NewLine: + return "\r\n"; + case SyntaxKind.Arrow: + return "->"; + case SyntaxKind.Decrement: + return "--"; + case SyntaxKind.MinusAssign: + return "-="; + case SyntaxKind.NotEqual: + return "!="; + case SyntaxKind.ModuloAssign: + return "%="; + case SyntaxKind.AndAssign: + return "&="; + case SyntaxKind.DoubleAnd: + return "&&"; + case SyntaxKind.MultiplyAssign: + return "*="; + case SyntaxKind.DivideAssign: + return "/="; + case SyntaxKind.DoubleColon: + return "::"; + case SyntaxKind.NullCoalesce: + return "??"; + case SyntaxKind.XorAssign: + return "^="; + case SyntaxKind.OrAssign: + return "|="; + case SyntaxKind.DoubleOr: + return "||"; + case SyntaxKind.PlusAssign: + return "+="; + case SyntaxKind.Increment: + return "++"; + case SyntaxKind.LessThanEqual: + return "<="; + case SyntaxKind.LeftShift: + return "<<"; + case SyntaxKind.Equals: + return "=="; + case SyntaxKind.GreaterThanEqual: + if (Buffer[0] == '=') + { + return "=>"; + } + return ">="; + case SyntaxKind.RightShift: + return ">>"; + + + } + } + else if (tokenLength == 3) + { + switch (type) + { + case SyntaxKind.LeftShiftAssign: + return "<<="; + case SyntaxKind.RightShiftAssign: + return ">>="; + } + } + + return base.GetTokenContent(type); + } + + protected override SyntaxToken CreateToken(string content, SyntaxKind kind, RazorDiagnostic[] errors) + { + return SyntaxFactory.Token(kind, content, errors); + } + + private StateResult Data() + { + if (SyntaxFacts.IsNewLine(CurrentCharacter)) + { + // CSharp Spec §2.3.1 + var checkTwoCharNewline = CurrentCharacter == '\r'; + TakeCurrent(); + if (checkTwoCharNewline && CurrentCharacter == '\n') + { + TakeCurrent(); + } + return Stay(EndToken(SyntaxKind.NewLine)); + } + else if (SyntaxFacts.IsWhitespace(CurrentCharacter)) + { + // CSharp Spec §2.3.3 + TakeUntil(c => !SyntaxFacts.IsWhitespace(c)); + return Stay(EndToken(SyntaxKind.Whitespace)); + } + else if (SyntaxFacts.IsIdentifierStartCharacter(CurrentCharacter)) + { + return Identifier(); + } + else if (char.IsDigit(CurrentCharacter)) + { + return NumericLiteral(); + } + switch (CurrentCharacter) + { + case '@': + return AtToken(); + case '\'': + TakeCurrent(); + return Transition(CSharpTokenizerState.QuotedCharacterLiteral); + case '"': + TakeCurrent(); + return Transition(CSharpTokenizerState.QuotedStringLiteral); + case '.': + if (char.IsDigit(Peek())) + { + return RealLiteral(); + } + return Stay(Single(SyntaxKind.Dot)); + case '/': + TakeCurrent(); + if (CurrentCharacter == '/') + { + TakeCurrent(); + return SingleLineComment(); + } + else if (CurrentCharacter == '*') + { + TakeCurrent(); + return Transition(CSharpTokenizerState.BlockComment); + } + else if (CurrentCharacter == '=') + { + TakeCurrent(); + return Stay(EndToken(SyntaxKind.DivideAssign)); + } + else + { + return Stay(EndToken(SyntaxKind.Slash)); + } + default: + return Stay(EndToken(Operator())); + } + } + + private StateResult AtToken() + { + TakeCurrent(); + if (CurrentCharacter == '"') + { + TakeCurrent(); + return Transition(CSharpTokenizerState.VerbatimStringLiteral); + } + else if (CurrentCharacter == '*') + { + return Transition( + CSharpTokenizerState.AfterRazorCommentTransition, + EndToken(SyntaxKind.RazorCommentTransition)); + } + else if (CurrentCharacter == '@') + { + // Could be escaped comment transition + return Transition( + CSharpTokenizerState.EscapedRazorCommentTransition, + EndToken(SyntaxKind.Transition)); + } + + return Stay(EndToken(SyntaxKind.Transition)); + } + + private StateResult EscapedRazorCommentTransition() + { + TakeCurrent(); + return Transition(CSharpTokenizerState.Data, EndToken(SyntaxKind.Transition)); + } + + private SyntaxKind Operator() + { + var first = CurrentCharacter; + TakeCurrent(); + Func handler; + if (_operatorHandlers.TryGetValue(first, out handler)) + { + return handler(); + } + return SyntaxKind.Marker; + } + + private SyntaxKind LessThanOperator() + { + if (CurrentCharacter == '=') + { + TakeCurrent(); + return SyntaxKind.LessThanEqual; + } + return SyntaxKind.LessThan; + } + + private SyntaxKind GreaterThanOperator() + { + if (CurrentCharacter == '=') + { + TakeCurrent(); + return SyntaxKind.GreaterThanEqual; + } + return SyntaxKind.GreaterThan; + } + + private SyntaxKind MinusOperator() + { + if (CurrentCharacter == '>') + { + TakeCurrent(); + return SyntaxKind.Arrow; + } + else if (CurrentCharacter == '-') + { + TakeCurrent(); + return SyntaxKind.Decrement; + } + else if (CurrentCharacter == '=') + { + TakeCurrent(); + return SyntaxKind.MinusAssign; + } + return SyntaxKind.Minus; + } + + private Func CreateTwoCharOperatorHandler(SyntaxKind typeIfOnlyFirst, char second, SyntaxKind typeIfBoth) + { + return () => + { + if (CurrentCharacter == second) + { + TakeCurrent(); + return typeIfBoth; + } + return typeIfOnlyFirst; + }; + } + + private Func CreateTwoCharOperatorHandler(SyntaxKind typeIfOnlyFirst, char option1, SyntaxKind typeIfOption1, char option2, SyntaxKind typeIfOption2) + { + return () => + { + if (CurrentCharacter == option1) + { + TakeCurrent(); + return typeIfOption1; + } + else if (CurrentCharacter == option2) + { + TakeCurrent(); + return typeIfOption2; + } + return typeIfOnlyFirst; + }; + } + + private StateResult VerbatimStringLiteral() + { + TakeUntil(c => c == '"'); + if (CurrentCharacter == '"') + { + TakeCurrent(); + if (CurrentCharacter == '"') + { + TakeCurrent(); + // Stay in the literal, this is an escaped " + return Stay(); + } + } + else if (EndOfFile) + { + CurrentErrors.Add( + RazorDiagnosticFactory.CreateParsing_UnterminatedStringLiteral( + new SourceSpan(CurrentStart, contentLength: 1 /* end of file */))); + } + return Transition(CSharpTokenizerState.Data, EndToken(SyntaxKind.StringLiteral)); + } + + private StateResult QuotedCharacterLiteral() => QuotedLiteral('\'', IsEndQuotedCharacterLiteral, SyntaxKind.CharacterLiteral); + + private StateResult QuotedStringLiteral() => QuotedLiteral('\"', IsEndQuotedStringLiteral, SyntaxKind.StringLiteral); + + private static readonly Func IsEndQuotedCharacterLiteral = static (c) => c == '\\' || c == '\'' || SyntaxFacts.IsNewLine(c); + private static readonly Func IsEndQuotedStringLiteral = static (c) => c == '\\' || c == '\"' || SyntaxFacts.IsNewLine(c); + + private StateResult QuotedLiteral(char quote, Func isEndQuotedLiteral, SyntaxKind literalType) + { + TakeUntil(isEndQuotedLiteral); + if (CurrentCharacter == '\\') + { + TakeCurrent(); // Take the '\' + + // If the next char is the same quote that started this + if (CurrentCharacter == quote || CurrentCharacter == '\\') + { + TakeCurrent(); // Take it so that we don't prematurely end the literal. + } + return Stay(); + } + else if (EndOfFile || SyntaxFacts.IsNewLine(CurrentCharacter)) + { + CurrentErrors.Add( + RazorDiagnosticFactory.CreateParsing_UnterminatedStringLiteral( + new SourceSpan(CurrentStart, contentLength: 1 /* " */))); + } + else + { + TakeCurrent(); // No-op if at EOF + } + return Transition(CSharpTokenizerState.Data, EndToken(literalType)); + } + + // CSharp Spec §2.3.2 + private StateResult BlockComment() + { + TakeUntil(c => c == '*'); + if (EndOfFile) + { + CurrentErrors.Add( + RazorDiagnosticFactory.CreateParsing_BlockCommentNotTerminated( + new SourceSpan(CurrentStart, contentLength: 1 /* end of file */))); + + return Transition(CSharpTokenizerState.Data, EndToken(SyntaxKind.CSharpComment)); + } + if (CurrentCharacter == '*') + { + TakeCurrent(); + if (CurrentCharacter == '/') + { + TakeCurrent(); + return Transition(CSharpTokenizerState.Data, EndToken(SyntaxKind.CSharpComment)); + } + } + return Stay(); + } + + // CSharp Spec §2.3.2 + private StateResult SingleLineComment() + { + TakeUntil(c => SyntaxFacts.IsNewLine(c)); + return Stay(EndToken(SyntaxKind.CSharpComment)); + } + + // CSharp Spec §2.4.4 + private StateResult NumericLiteral() + { + if (TakeAll("0x", caseSensitive: true)) + { + return HexLiteral(); + } + else + { + return DecimalLiteral(); + } + } + + private StateResult HexLiteral() + { + TakeUntil(c => !IsHexDigit(c)); + TakeIntegerSuffix(); + return Stay(EndToken(SyntaxKind.IntegerLiteral)); + } + + private StateResult DecimalLiteral() + { + TakeUntil(c => !Char.IsDigit(c)); + if (CurrentCharacter == '.' && Char.IsDigit(Peek())) + { + return RealLiteral(); + } + else if (IsRealLiteralSuffix(CurrentCharacter) || + CurrentCharacter == 'E' || CurrentCharacter == 'e') + { + return RealLiteralExponentPart(); + } + else + { + TakeIntegerSuffix(); + return Stay(EndToken(SyntaxKind.IntegerLiteral)); + } + } + + private StateResult RealLiteralExponentPart() + { + if (CurrentCharacter == 'E' || CurrentCharacter == 'e') + { + TakeCurrent(); + if (CurrentCharacter == '+' || CurrentCharacter == '-') + { + TakeCurrent(); + } + TakeUntil(c => !Char.IsDigit(c)); + } + if (IsRealLiteralSuffix(CurrentCharacter)) + { + TakeCurrent(); + } + return Stay(EndToken(SyntaxKind.RealLiteral)); + } + + // CSharp Spec §2.4.4.3 + private StateResult RealLiteral() + { + AssertCurrent('.'); + TakeCurrent(); + Debug.Assert(Char.IsDigit(CurrentCharacter)); + TakeUntil(c => !Char.IsDigit(c)); + return RealLiteralExponentPart(); + } + + private void TakeIntegerSuffix() + { + if (Char.ToLowerInvariant(CurrentCharacter) == 'u') + { + TakeCurrent(); + if (Char.ToLowerInvariant(CurrentCharacter) == 'l') + { + TakeCurrent(); + } + } + else if (Char.ToLowerInvariant(CurrentCharacter) == 'l') + { + TakeCurrent(); + if (Char.ToLowerInvariant(CurrentCharacter) == 'u') + { + TakeCurrent(); + } + } + } + + // CSharp Spec §2.4.2 + private StateResult Identifier() + { + Debug.Assert(SyntaxFacts.IsIdentifierStartCharacter(CurrentCharacter)); + TakeCurrent(); + TakeUntil(c => !SyntaxFacts.IsIdentifierPartCharacter(c)); + SyntaxToken token = null; + if (HaveContent) + { + var type = SyntaxKind.Identifier; + var tokenContent = Buffer.ToString(); + if (_keywords.TryGetValue(tokenContent, value: out _)) + { + type = SyntaxKind.Keyword; + } + + token = SyntaxFactory.Token(type, tokenContent); + + Buffer.Clear(); + CurrentErrors.Clear(); + } + + return Stay(token); + } + + private StateResult Transition(CSharpTokenizerState state) + { + return Transition((int)state, result: null); + } + + private StateResult Transition(CSharpTokenizerState state, SyntaxToken result) + { + return Transition((int)state, result); + } + + private static bool IsRealLiteralSuffix(char character) + { + return character == 'F' || + character == 'f' || + character == 'D' || + character == 'd' || + character == 'M' || + character == 'm'; + } + + private static bool IsHexDigit(char value) + { + return (value >= '0' && value <= '9') || (value >= 'A' && value <= 'F') || (value >= 'a' && value <= 'f'); + } + + internal override CSharpSyntaxKind? GetTokenKeyword(SyntaxToken token) + { + if (token != null && _keywords.TryGetValue(token.Content, out var keyword)) + { + return keyword; + } + + return null; + } + + private enum CSharpTokenizerState + { + Data, + BlockComment, + QuotedCharacterLiteral, + QuotedStringLiteral, + VerbatimStringLiteral, + + // Razor Comments - need to be the same for HTML and CSharp + AfterRazorCommentTransition = RazorCommentTokenizerState.AfterRazorCommentTransition, + EscapedRazorCommentTransition = RazorCommentTokenizerState.EscapedRazorCommentTransition, + RazorCommentBody = RazorCommentTokenizerState.RazorCommentBody, + StarAfterRazorCommentBody = RazorCommentTokenizerState.StarAfterRazorCommentBody, + AtTokenAfterRazorCommentBody = RazorCommentTokenizerState.AtTokenAfterRazorCommentBody, + } +} diff --git a/src/Compiler/Microsoft.CodeAnalysis.Razor.Compiler/src/Language/Legacy/TokenizerBackedParser.cs b/src/Compiler/Microsoft.CodeAnalysis.Razor.Compiler/src/Language/Legacy/TokenizerBackedParser.cs index f8aaf38ec1e..45e06cf5341 100644 --- a/src/Compiler/Microsoft.CodeAnalysis.Razor.Compiler/src/Language/Legacy/TokenizerBackedParser.cs +++ b/src/Compiler/Microsoft.CodeAnalysis.Razor.Compiler/src/Language/Legacy/TokenizerBackedParser.cs @@ -17,7 +17,7 @@ internal abstract class TokenizerBackedParser : ParserBase protected delegate void SpanContextConfigActionWithPreviousConfig(SpanEditHandlerBuilder? editHandlerBuilder, ref ISpanChunkGenerator? chunkGenerator, SpanContextConfigAction? previousConfig); private readonly SyntaxListPool _pool = new SyntaxListPool(); - private readonly TokenizerView _tokenizer; + protected readonly TokenizerView _tokenizer; private SyntaxListBuilder? _tokenBuilder; protected SpanEditHandlerBuilder? editHandlerBuilder; diff --git a/src/Compiler/Microsoft.CodeAnalysis.Razor.Compiler/src/Language/RazorParserOptions.cs b/src/Compiler/Microsoft.CodeAnalysis.Razor.Compiler/src/Language/RazorParserOptions.cs index 972647ce900..a949bd6bd94 100644 --- a/src/Compiler/Microsoft.CodeAnalysis.Razor.Compiler/src/Language/RazorParserOptions.cs +++ b/src/Compiler/Microsoft.CodeAnalysis.Razor.Compiler/src/Language/RazorParserOptions.cs @@ -16,6 +16,7 @@ public static RazorParserOptions CreateDefault() Array.Empty(), designTime: false, parseLeadingDirectives: false, + useRoslynTokenizer: false, version: RazorLanguageVersion.Latest, fileKind: FileKinds.Legacy, enableSpanEditHandlers: false); @@ -59,16 +60,22 @@ public static RazorParserOptions CreateDesignTime(Action public bool ParseLeadingDirectives { get; } + public bool UseRoslynTokenizer { get; } + public RazorLanguageVersion Version { get; } = RazorLanguageVersion.Latest; internal string FileKind { get; } diff --git a/src/Compiler/Microsoft.CodeAnalysis.Razor.Compiler/src/Language/RazorParserOptionsBuilder.cs b/src/Compiler/Microsoft.CodeAnalysis.Razor.Compiler/src/Language/RazorParserOptionsBuilder.cs index 0fac8bc30af..13a2524420f 100644 --- a/src/Compiler/Microsoft.CodeAnalysis.Razor.Compiler/src/Language/RazorParserOptionsBuilder.cs +++ b/src/Compiler/Microsoft.CodeAnalysis.Razor.Compiler/src/Language/RazorParserOptionsBuilder.cs @@ -42,13 +42,15 @@ internal RazorParserOptionsBuilder(bool designTime, RazorLanguageVersion version public bool ParseLeadingDirectives { get; set; } + public bool UseRoslynTokenizer { get; set; } + public RazorLanguageVersion LanguageVersion { get; } internal bool EnableSpanEditHandlers { get; set; } public RazorParserOptions Build() { - return new RazorParserOptions(Directives.ToArray(), DesignTime, ParseLeadingDirectives, LanguageVersion, FileKind ?? FileKinds.Legacy, EnableSpanEditHandlers); + return new RazorParserOptions(Directives.ToArray(), DesignTime, ParseLeadingDirectives, UseRoslynTokenizer, LanguageVersion, FileKind ?? FileKinds.Legacy, EnableSpanEditHandlers); } public void SetDesignTime(bool designTime) diff --git a/src/Compiler/Microsoft.CodeAnalysis.Razor.Compiler/src/SourceGenerators/ConfigureRazorParserOptions.cs b/src/Compiler/Microsoft.CodeAnalysis.Razor.Compiler/src/SourceGenerators/ConfigureRazorParserOptions.cs new file mode 100644 index 00000000000..0f9fdeed63b --- /dev/null +++ b/src/Compiler/Microsoft.CodeAnalysis.Razor.Compiler/src/SourceGenerators/ConfigureRazorParserOptions.cs @@ -0,0 +1,16 @@ +// Copyright (c) .NET Foundation. All rights reserved. +// Licensed under the Apache License, Version 2.0. See License.txt in the project root for license information. + +using Microsoft.AspNetCore.Razor.Language; + +namespace Microsoft.NET.Sdk.Razor.SourceGenerators; + +internal class ConfigureRazorParserOptions(bool useRoslynTokenizer) : RazorEngineFeatureBase, IConfigureRazorParserOptionsFeature +{ + public int Order { get; set; } + + public void Configure(RazorParserOptionsBuilder options) + { + options.UseRoslynTokenizer = useRoslynTokenizer; + } +} diff --git a/src/Compiler/Microsoft.CodeAnalysis.Razor.Compiler/src/SourceGenerators/RazorSourceGenerationOptions.cs b/src/Compiler/Microsoft.CodeAnalysis.Razor.Compiler/src/SourceGenerators/RazorSourceGenerationOptions.cs index 412dba45047..ec490b85ddd 100644 --- a/src/Compiler/Microsoft.CodeAnalysis.Razor.Compiler/src/SourceGenerators/RazorSourceGenerationOptions.cs +++ b/src/Compiler/Microsoft.CodeAnalysis.Razor.Compiler/src/SourceGenerators/RazorSourceGenerationOptions.cs @@ -33,6 +33,8 @@ internal sealed record RazorSourceGenerationOptions /// internal string? TestSuppressUniqueIds { get; set; } + internal bool UseRoslynTokenizer { get; set; } = true; + public override int GetHashCode() => Configuration.GetHashCode(); } } diff --git a/src/Compiler/Microsoft.CodeAnalysis.Razor.Compiler/src/SourceGenerators/RazorSourceGenerator.Helpers.cs b/src/Compiler/Microsoft.CodeAnalysis.Razor.Compiler/src/SourceGenerators/RazorSourceGenerator.Helpers.cs index bc0e302982d..a67af975b48 100644 --- a/src/Compiler/Microsoft.CodeAnalysis.Razor.Compiler/src/SourceGenerators/RazorSourceGenerator.Helpers.cs +++ b/src/Compiler/Microsoft.CodeAnalysis.Razor.Compiler/src/SourceGenerators/RazorSourceGenerator.Helpers.cs @@ -2,7 +2,6 @@ // Licensed under the Apache License, Version 2.0. See License.txt in the project root for license information. using System.Collections.Generic; -using System.Collections.Immutable; using System.Text; using Microsoft.AspNetCore.Mvc.Razor.Extensions; using Microsoft.AspNetCore.Razor.Language; @@ -55,6 +54,7 @@ private static RazorProjectEngine GetDeclarationProjectEngine( options.SuppressChecksum = true; options.SupportLocalizedComponentNames = razorSourceGeneratorOptions.SupportLocalizedComponentNames; })); + b.Features.Add(new ConfigureRazorParserOptions(razorSourceGeneratorOptions.UseRoslynTokenizer)); b.SetRootNamespace(razorSourceGeneratorOptions.RootNamespace); @@ -109,6 +109,7 @@ private static SourceGeneratorProjectEngine GetGenerationProjectEngine( options.SuppressUniqueIds = razorSourceGeneratorOptions.TestSuppressUniqueIds; options.SuppressAddComponentParameter = !isAddComponentParameterAvailable; })); + b.Features.Add(new ConfigureRazorParserOptions(razorSourceGeneratorOptions.UseRoslynTokenizer)); CompilerFeatures.Register(b); RazorExtensions.Register(b); diff --git a/src/Compiler/Microsoft.CodeAnalysis.Razor.Compiler/src/SourceGenerators/RazorSourceGenerator.RazorProviders.cs b/src/Compiler/Microsoft.CodeAnalysis.Razor.Compiler/src/SourceGenerators/RazorSourceGenerator.RazorProviders.cs index 7daf0ba4abf..aecd149ff3c 100644 --- a/src/Compiler/Microsoft.CodeAnalysis.Razor.Compiler/src/SourceGenerators/RazorSourceGenerator.RazorProviders.cs +++ b/src/Compiler/Microsoft.CodeAnalysis.Razor.Compiler/src/SourceGenerators/RazorSourceGenerator.RazorProviders.cs @@ -18,7 +18,7 @@ public partial class RazorSourceGenerator { var ((options, parseOptions), isSuppressed) = pair; var globalOptions = options.GlobalOptions; - + if (isSuppressed) { return default; @@ -44,6 +44,10 @@ public partial class RazorSourceGenerator var razorConfiguration = new RazorConfiguration(razorLanguageVersion, configurationName ?? "default", Extensions: [], UseConsolidatedMvcViews: true); + // We use the new tokenizer by default + var useRazorTokenizer = !parseOptions.Features.TryGetValue("use-razor-tokenizer", out var useRazorTokenizerValue) + || !string.Equals(useRazorTokenizerValue, "false", StringComparison.OrdinalIgnoreCase); + var razorSourceGenerationOptions = new RazorSourceGenerationOptions() { Configuration = razorConfiguration, @@ -52,6 +56,7 @@ public partial class RazorSourceGenerator SupportLocalizedComponentNames = supportLocalizedComponentNames == "true", CSharpLanguageVersion = ((CSharpParseOptions)parseOptions).LanguageVersion, TestSuppressUniqueIds = _testSuppressUniqueIds, + UseRoslynTokenizer = useRazorTokenizer, }; return (razorSourceGenerationOptions, diagnostic); diff --git a/src/Razor/src/Microsoft.VisualStudio.LegacyEditor.Razor/Parsing/VisualStudioRazorParser.cs b/src/Razor/src/Microsoft.VisualStudio.LegacyEditor.Razor/Parsing/VisualStudioRazorParser.cs index 57964527885..ebc69bd57fa 100644 --- a/src/Razor/src/Microsoft.VisualStudio.LegacyEditor.Razor/Parsing/VisualStudioRazorParser.cs +++ b/src/Razor/src/Microsoft.VisualStudio.LegacyEditor.Razor/Parsing/VisualStudioRazorParser.cs @@ -614,6 +614,7 @@ internal class VisualStudioEnableTagHelpersFeature : RazorEngineFeatureBase, ICo public void Configure(RazorParserOptionsBuilder options) { options.EnableSpanEditHandlers = true; + options.UseRoslynTokenizer = false; } } diff --git a/src/Razor/test/Microsoft.AspNetCore.Razor.Test.Common.Tooling/Language/Legacy/ToolingParserTestBase.cs b/src/Razor/test/Microsoft.AspNetCore.Razor.Test.Common.Tooling/Language/Legacy/ToolingParserTestBase.cs index 64c89e2de7a..5cbba5150c3 100644 --- a/src/Razor/test/Microsoft.AspNetCore.Razor.Test.Common.Tooling/Language/Legacy/ToolingParserTestBase.cs +++ b/src/Razor/test/Microsoft.AspNetCore.Razor.Test.Common.Tooling/Language/Legacy/ToolingParserTestBase.cs @@ -253,6 +253,7 @@ internal static RazorParserOptions CreateParserOptions( directives.ToArray(), designTime, parseLeadingDirectives: false, + useRoslynTokenizer: false, version: version, fileKind: fileKind, enableSpanEditHandlers) diff --git a/src/Shared/Microsoft.AspNetCore.Razor.Test.Common/Language/Legacy/ParserTestBase.cs b/src/Shared/Microsoft.AspNetCore.Razor.Test.Common/Language/Legacy/ParserTestBase.cs index 3d38abc1518..f4982d0d51e 100644 --- a/src/Shared/Microsoft.AspNetCore.Razor.Test.Common/Language/Legacy/ParserTestBase.cs +++ b/src/Shared/Microsoft.AspNetCore.Razor.Test.Common/Language/Legacy/ParserTestBase.cs @@ -26,11 +26,13 @@ public abstract class ParserTestBase : IParserTest // UTF-8 with BOM private static readonly Encoding _baselineEncoding = new UTF8Encoding(encoderShouldEmitUTF8Identifier: true); private readonly bool _validateSpanEditHandlers; + private readonly bool _useLegacyTokenizer; - internal ParserTestBase(TestProject.Layer layer, bool validateSpanEditHandlers = false) + internal ParserTestBase(TestProject.Layer layer, bool validateSpanEditHandlers = false, bool useLegacyTokenizer = false) { TestProjectRoot = TestProject.GetProjectDirectory(GetType(), layer); _validateSpanEditHandlers = validateSpanEditHandlers; + _useLegacyTokenizer = useLegacyTokenizer; } /// @@ -196,7 +198,7 @@ internal virtual RazorSyntaxTree ParseDocument(RazorLanguageVersion version, str var source = TestRazorSourceDocument.Create(document, filePath: null, relativePath: null, normalizeNewLines: true); - var options = CreateParserOptions(version, directives, designTime, _validateSpanEditHandlers, featureFlags, fileKind); + var options = CreateParserOptions(version, directives, designTime, _validateSpanEditHandlers, _useLegacyTokenizer, featureFlags, fileKind); var context = new ParserContext(source, options); var codeParser = new CSharpCodeParser(directives, context); @@ -257,6 +259,7 @@ internal static RazorParserOptions CreateParserOptions( IEnumerable directives, bool designTime, bool enableSpanEditHandlers, + bool useLegacyTokenizer, RazorParserFeatureFlags featureFlags = null, string fileKind = null) { @@ -265,6 +268,7 @@ internal static RazorParserOptions CreateParserOptions( directives.ToArray(), designTime, parseLeadingDirectives: false, + useRoslynTokenizer: !useLegacyTokenizer, version, fileKind, enableSpanEditHandlers)