diff --git a/composer.json b/composer.json index cc3bb7958..ee56360a9 100644 --- a/composer.json +++ b/composer.json @@ -20,7 +20,7 @@ "jms/metadata": "^2.0", "doctrine/annotations": "^1.0", "doctrine/instantiator": "^1.0.3", - "hoa/compiler": "^3.17.08.08" + "doctrine/lexer": "^1.1" }, "suggest": { "symfony/yaml": "Required if you'd like to use the YAML metadata format.", diff --git a/src/Type/InnerParser.php b/src/Type/InnerParser.php deleted file mode 100644 index 736f52e42..000000000 --- a/src/Type/InnerParser.php +++ /dev/null @@ -1,94 +0,0 @@ - [ - 'skip' => '\s+', - 'array_' => '\[', - '_array' => '\]', - 'parenthesis_' => '<', - '_parenthesis' => '>', - 'empty_string' => '""|\'\'', - 'number' => '(\+|\-)?(0|[1-9]\d*)(\.\d+)?', - 'null' => 'null', - 'comma' => ',', - 'name' => '(?:[a-zA-Z_\x7f-\xff][a-zA-Z0-9_\x7f-\xff]*\\\)*[a-zA-Z_\x7f-\xff][a-zA-Z0-9_\x7f-\xff]*', - 'quote_:quoted_string' => '"', - 'apostrophe_:apostrophed_string' => '\'', - ], - 'quoted_string' => [ - 'quoted_string' => '[^"]+', - '_quote:default' => '"', - ], - 'apostrophed_string' => [ - 'apostrophed_string' => '[^\']+', - '_apostrophe:default' => '\'', - ], - ], - [ - 'type' => new Choice('type', ['simple_type', 'compound_type'], null), - 1 => new Token(1, 'name', null, -1, true), - 2 => new Concatenation(2, [1], '#simple_type'), - 3 => new Token(3, 'number', null, -1, true), - 4 => new Concatenation(4, [3], '#simple_type'), - 5 => new Token(5, 'null', null, -1, true), - 6 => new Concatenation(6, [5], '#simple_type'), - 7 => new Token(7, 'empty_string', null, -1, true), - 8 => new Concatenation(8, [7], '#simple_type'), - 9 => new Token(9, 'quote_', null, -1, false), - 10 => new Token(10, 'quoted_string', null, -1, true), - 11 => new Token(11, '_quote', null, -1, false), - 12 => new Concatenation(12, [9, 10, 11], '#simple_type'), - 13 => new Token(13, 'apostrophe_', null, -1, false), - 14 => new Token(14, 'apostrophed_string', null, -1, true), - 15 => new Token(15, '_apostrophe', null, -1, false), - 16 => new Concatenation(16, [13, 14, 15], '#simple_type'), - 17 => new Concatenation(17, ['array'], '#simple_type'), - 'simple_type' => new Choice('simple_type', [2, 4, 6, 8, 12, 16, 17], null), - 19 => new Token(19, 'name', null, -1, true), - 20 => new Token(20, 'parenthesis_', null, -1, false), - 21 => new Token(21, 'comma', null, -1, false), - 22 => new Concatenation(22, [21, 'type'], '#compound_type'), - 23 => new Repetition(23, 0, -1, 22, null), - 24 => new Token(24, '_parenthesis', null, -1, false), - 'compound_type' => new Concatenation('compound_type', [19, 20, 'type', 23, 24], null), - 26 => new Token(26, 'array_', null, -1, false), - 27 => new Token(27, 'comma', null, -1, false), - 28 => new Concatenation(28, [27, 'simple_type'], '#array'), - 29 => new Repetition(29, 0, -1, 28, null), - 30 => new Concatenation(30, ['simple_type', 29], null), - 31 => new Repetition(31, 0, 1, 30, null), - 32 => new Token(32, '_array', null, -1, false), - 'array' => new Concatenation('array', [26, 31, 32], null), - ], - [] - ); - - $this->getRule('type')->setPPRepresentation(' simple_type() | compound_type()'); - $this->getRule('simple_type')->setDefaultId('#simple_type'); - $this->getRule('simple_type')->setPPRepresentation(' | | | | ::quote_:: ::_quote:: | ::apostrophe_:: ::_apostrophe:: | array()'); - $this->getRule('compound_type')->setDefaultId('#compound_type'); - $this->getRule('compound_type')->setPPRepresentation(' ::parenthesis_:: type() ( ::comma:: type() )* ::_parenthesis::'); - $this->getRule('array')->setDefaultId('#array'); - $this->getRule('array')->setPPRepresentation(' ::array_:: ( simple_type() ( ::comma:: simple_type() )* )? ::_array::'); - } -} diff --git a/src/Type/Lexer.php b/src/Type/Lexer.php new file mode 100644 index 000000000..ffedcbd78 --- /dev/null +++ b/src/Type/Lexer.php @@ -0,0 +1,103 @@ +getType($type); + } catch (\Throwable $e) { + throw new SyntaxError($e->getMessage(), 0, $e); + } + } + + protected function getCatchablePatterns(): array + { + return [ + '[a-z][a-z_\\\\0-9]*', // identifier or qualified name + "'(?:[^']|'')*'", // single quoted strings + '(?:[0-9]+(?:[\.][0-9]+)*)(?:e[+-]?[0-9]+)?', // numbers + '"(?:[^"]|"")*"', // double quoted strings + '<', + '>', + '\\[', + '\\]', + ]; + } + + protected function getNonCatchablePatterns(): array + { + return ['\s+']; + } + + /** + * {{@inheritDoc}} + */ + protected function getType(&$value) + { + $type = self::T_UNKNOWN; + + switch (true) { + // Recognize numeric values + case is_numeric($value): + if (false !== strpos($value, '.') || false !== stripos($value, 'e')) { + return self::T_FLOAT; + } + + return self::T_INTEGER; + + // Recognize quoted strings + case "'" === $value[0]: + $value = str_replace("''", "'", substr($value, 1, strlen($value) - 2)); + + return self::T_STRING; + case '"' === $value[0]: + $value = str_replace('""', '"', substr($value, 1, strlen($value) - 2)); + + return self::T_STRING; + case 'null' === $value: + return self::T_NULL; + // Recognize identifiers, aliased or qualified names + case ctype_alpha($value[0]) || '\\' === $value[0]: + return self::T_IDENTIFIER; + case ',' === $value: + return self::T_COMMA; + case '>' === $value: + return self::T_TYPE_END; + case '<' === $value: + return self::T_TYPE_START; + case ']' === $value: + return self::T_ARRAY_END; + case '[' === $value: + return self::T_ARRAY_START; + + // Default + default: + // Do nothing + } + + return $type; + } +} diff --git a/src/Type/Parser.php b/src/Type/Parser.php index de5a7c3a7..06b5ed135 100644 --- a/src/Type/Parser.php +++ b/src/Type/Parser.php @@ -4,32 +4,149 @@ namespace JMS\Serializer\Type; -use Hoa\Exception\Exception; -use Hoa\Visitor\Visit; use JMS\Serializer\Type\Exception\SyntaxError; +/** + * @internal + */ final class Parser implements ParserInterface { - /** @var InnerParser */ - private $parser; + /** + * @var Lexer + */ + private $lexer; - /** @var Visit */ - private $visitor; + /** + * @var bool + */ + private $root = true; - public function __construct() + public function parse(string $string): array { - $this->parser = new InnerParser(); - $this->visitor = new TypeVisitor(); + $this->lexer = new Lexer(); + $this->lexer->setInput($string); + $this->lexer->moveNext(); + return $this->visit(); } - public function parse(string $type): array + /** + * @return mixed + */ + private function visit() { - try { - $ast = $this->parser->parse($type, 'type'); + $this->lexer->moveNext(); - return $this->visitor->visit($ast); - } catch (Exception $e) { - throw new SyntaxError($e->getMessage(), 0, $e); + if (!$this->lexer->token) { + throw new SyntaxError( + 'Syntax error, unexpected end of stream' + ); } + + if (Lexer::T_FLOAT === $this->lexer->token['type']) { + return floatval($this->lexer->token['value']); + } elseif (Lexer::T_INTEGER === $this->lexer->token['type']) { + return intval($this->lexer->token['value']); + } elseif (Lexer::T_NULL === $this->lexer->token['type']) { + return null; + } elseif (Lexer::T_STRING === $this->lexer->token['type']) { + return $this->lexer->token['value']; + } elseif (Lexer::T_IDENTIFIER === $this->lexer->token['type']) { + if ($this->lexer->isNextToken(Lexer::T_TYPE_START)) { + return $this->visitCompoundType(); + } elseif ($this->lexer->isNextToken(Lexer::T_ARRAY_START)) { + return $this->visitArrayType(); + } + return $this->visitSimpleType(); + } elseif (!$this->root && Lexer::T_ARRAY_START === $this->lexer->token['type']) { + return $this->visitArrayType(); + } + + throw new SyntaxError(sprintf( + 'Syntax error, unexpected "%s" (%s)', + $this->lexer->token['value'], + $this->getConstant($this->lexer->token['type']) + )); + } + + /** + * @return string|mixed[] + */ + private function visitSimpleType() + { + $value = $this->lexer->token['value']; + return ['name' => $value, 'params' => []]; + } + + private function visitCompoundType(): array + { + $this->root = false; + $name = $this->lexer->token['value']; + $this->match(Lexer::T_TYPE_START); + + $params = []; + if (!$this->lexer->isNextToken(Lexer::T_TYPE_END)) { + while (true) { + $params[] = $this->visit(); + + if ($this->lexer->isNextToken(Lexer::T_TYPE_END)) { + break; + } + $this->match(Lexer::T_COMMA); + } + } + $this->match(Lexer::T_TYPE_END); + return [ + 'name' => $name, + 'params' => $params, + ]; + } + + private function visitArrayType(): array + { + /* + * Here we should call $this->match(Lexer::T_ARRAY_START); to make it clean + * but the token has already been consumed by moveNext() in visit() + */ + + $params = []; + if (!$this->lexer->isNextToken(Lexer::T_ARRAY_END)) { + while (true) { + $params[] = $this->visit(); + if ($this->lexer->isNextToken(Lexer::T_ARRAY_END)) { + break; + } + $this->match(Lexer::T_COMMA); + } + } + $this->match(Lexer::T_ARRAY_END); + return $params; + } + + private function match(int $token): void + { + if (!$this->lexer->lookahead) { + throw new SyntaxError( + sprintf('Syntax error, unexpected end of stream, expected %s', $this->getConstant($token)) + ); + } + + if ($this->lexer->lookahead['type'] === $token) { + $this->lexer->moveNext(); + + return; + } + + throw new SyntaxError(sprintf( + 'Syntax error, unexpected "%s" (%s), expected was %s', + $this->lexer->lookahead['value'], + $this->getConstant($this->lexer->lookahead['type']), + $this->getConstant($token) + )); + } + + private function getConstant(int $value): string + { + $oClass = new \ReflectionClass(Lexer::class); + return array_search($value, $oClass->getConstants()); } } diff --git a/src/Type/TypeVisitor.php b/src/Type/TypeVisitor.php deleted file mode 100644 index ec140b100..000000000 --- a/src/Type/TypeVisitor.php +++ /dev/null @@ -1,96 +0,0 @@ -getId()) { - case '#simple_type': - return $this->visitSimpleType($element); - case '#compound_type': - return $this->visitCompoundType($element, $handle, $eldnah); - case '#array': - return $this->visitArrayType($element, $handle, $eldnah); - } - - throw new InvalidNode(); - } - - /** - * @return string|mixed[] - */ - private function visitSimpleType(TreeNode $element) - { - $tokenNode = $element->getChild(0); - - if (!$tokenNode->isToken()) { - return $tokenNode->accept($this); - } - - $token = $tokenNode->getValueToken(); - $value = $tokenNode->getValueValue(); - - if ('name' === $token) { - return ['name' => $value, 'params' => []]; - } - - if ('empty_string' === $token) { - return ''; - } - - if ('null' === $token) { - return null; - } - - if ('number' === $token) { - return false === strpos($value, '.') ? intval($value) : floatval($value); - } - - $escapeChar = 'quoted_string' === $token ? '"' : "'"; - - if (false === strpos($value, $escapeChar)) { - return $value; - } - - return str_replace($escapeChar . $escapeChar, $escapeChar, $value); - } - - private function visitCompoundType(TreeNode $element, ?int &$handle, ?int $eldnah): array - { - $nameToken = $element->getChild(0); - $parameters = array_slice($element->getChildren(), 1); - - return [ - 'name' => $nameToken->getValueValue(), - 'params' => array_map( - function (TreeNode $node) use ($handle, $eldnah) { - return $node->accept($this, $handle, $eldnah); - }, - $parameters - ), - ]; - } - - private function visitArrayType(TreeNode $node, ?int &$handle, ?int $eldnah): array - { - return array_map( - function (TreeNode $child) { - return $child->accept($this); - }, - $node->getChildren() - ); - } -} diff --git a/src/Type/grammar.pp b/src/Type/grammar.pp deleted file mode 100644 index 2192d6c86..000000000 --- a/src/Type/grammar.pp +++ /dev/null @@ -1,41 +0,0 @@ -%skip whitespace \s+ - -%token array_ \[ -%token _array \] -%token parenthesis_ < -%token _parenthesis > -%token empty_string ""|'' -%token number (\+|\-)?(0|[1-9]\d*)(\.\d+)? -%token null null -%token comma , -%token name (?:[a-zA-Z_\x7f-\xff][a-zA-Z0-9_\x7f-\xff]*\\)*[a-zA-Z_\x7f-\xff][a-zA-Z0-9_\x7f-\xff]* - -%token quote_ " -> quoted_string -%token quoted_string:quoted_string [^"]+ -%token quoted_string:_quote " -> default - -%token apostrophe_ ' -> apostrophed_string -%token apostrophed_string:apostrophed_string [^']+ -%token apostrophed_string:_apostrophe ' -> default - -type: - simple_type() | compound_type() - -#simple_type: - - | - | - | - | ::quote_:: ::_quote:: - | ::apostrophe_:: ::_apostrophe:: - | array() - -#compound_type: - - ::parenthesis_:: - type() - ( ::comma:: type() )* - ::_parenthesis:: - -#array: - ::array_:: ( simple_type() ( ::comma:: simple_type() )* )? ::_array:: diff --git a/src/Type/regenerate-parser.php b/src/Type/regenerate-parser.php deleted file mode 100755 index c93c84aea..000000000 --- a/src/Type/regenerate-parser.php +++ /dev/null @@ -1,29 +0,0 @@ -#!/usr/bin/env php -', $type('Foo', ['a']), ]; + yield [ + 'Foo<>', + $type('Foo', []), + ]; yield [ 'Foo<5>', $type('Foo', [5]), @@ -122,75 +126,25 @@ public function validTypesProvider(): iterable ]; } - public function testEmptyString(): void - { - $this->expectException(SyntaxError::class); - $this->expectExceptionMessage( - "Unexpected token \"EOF\" (EOF) at line 1 and column 1:\n" - . "\n" - . '↑' - ); - - $this->parser->parse(''); - } - - public function testParamTypeMustEndWithBracket(): void - { - $this->expectException(SyntaxError::class); - $this->expectExceptionMessage( - "Unexpected token \"EOF\" (EOF) at line 1 and column 8:\n" - . "Fooparser->parse('FooexpectException(SyntaxError::class); - $this->expectExceptionMessage( - "Unexpected token \",\" (comma) at line 1 and column 1:\n" - . ",\n" - . '↑' - ); - - $this->parser->parse(','); - } - - public function testEmptyParams(): void - { - $this->expectException(SyntaxError::class); - $this->expectExceptionMessage( - "Unexpected token \">\" (_parenthesis) at line 1 and column 5:\n" - . "Foo<>\n" - . ' ↑' - ); - - $this->parser->parse('Foo<>'); - } - - public function testNoTrailingComma(): void + /** + * @dataProvider wrongSyntax + */ + public function testSyntaxError($value): void { $this->expectException(SyntaxError::class); - $this->expectExceptionMessage( - "Unexpected token \",\" (comma) at line 1 and column 7:\n" - . "Foo\n" - . ' ↑' - ); - - $this->parser->parse('Foo'); + $this->parser->parse($value); } - public function testLeadingBackslash(): void + public function wrongSyntax() { - $this->expectException(SyntaxError::class); - $this->expectExceptionMessage( - "Unrecognized token \"\\\" at line 1 and column 5:\n" - . "Foo<\Bar>\n" - . ' ↑' - ); - - $this->parser->parse('Foo<\Bar>'); + return [ + ['Foo<\Bar>]'], + ['Foo'], + ['Foo'], + ]; } }