From c54cb1043eac4c8f9252a6f4c75b27663955a1fe Mon Sep 17 00:00:00 2001 From: jrfnl Date: Wed, 15 May 2024 19:59:11 +0200 Subject: [PATCH] Tokenizers/PHP: bug fix - empty block comment This commit fixes an edge case tokenizer bug, where a - completely empty, not even whitespace - _block comment_, would be tokenized as a docblock. Without this commit, the `/**/` code snippet was tokenized as: ``` 8 | L07 | C 1 | CC 0 | ( 0) | T_DOC_COMMENT_OPEN_TAG | [ 4]: /**/ 9 | L07 | C 5 | CC 0 | ( 0) | T_DOC_COMMENT_CLOSE_TAG | [ 0]: ``` With the fix applied, it will be tokenized as: ``` 8 | L07 | C 1 | CC 0 | ( 0) | T_COMMENT | [ 4]: /**/ ``` --- src/Tokenizers/PHP.php | 2 +- .../Comment/SingleLineDocBlockTest.inc | 3 +++ .../Comment/SingleLineDocBlockTest.php | 20 +++++++++++++++++++ 3 files changed, 24 insertions(+), 1 deletion(-) diff --git a/src/Tokenizers/PHP.php b/src/Tokenizers/PHP.php index 55baf41689..6e500e6cd1 100644 --- a/src/Tokenizers/PHP.php +++ b/src/Tokenizers/PHP.php @@ -786,7 +786,7 @@ protected function tokenize($string) if ($tokenIsArray === true && ($token[0] === T_DOC_COMMENT - || ($token[0] === T_COMMENT && strpos($token[1], '/**') === 0)) + || ($token[0] === T_COMMENT && strpos($token[1], '/**') === 0 && $token[1] !== '/**/')) ) { $commentTokens = $commentTokenizer->tokenizeString($token[1], $this->eolChar, $newStackPtr); foreach ($commentTokens as $commentToken) { diff --git a/tests/Core/Tokenizer/Comment/SingleLineDocBlockTest.inc b/tests/Core/Tokenizer/Comment/SingleLineDocBlockTest.inc index 5775ea563e..923e0fc1b2 100644 --- a/tests/Core/Tokenizer/Comment/SingleLineDocBlockTest.inc +++ b/tests/Core/Tokenizer/Comment/SingleLineDocBlockTest.inc @@ -1,5 +1,8 @@ '/**/'], + ]; + + $target = $this->getTargetToken('/* '.__FUNCTION__.' */', [T_COMMENT, T_DOC_COMMENT_OPEN_TAG]); + + $this->checkTokenSequence($target, $expectedSequence); + + }//end testEmptyBlockCommentNoWhiteSpace() + + /** * Verify tokenization of an empty, single line DocBlock. *