From c71314073b2ad3d70a429f133604aa2cae3b3f6b Mon Sep 17 00:00:00 2001 From: Gleb Mazovetskiy Date: Mon, 17 Jun 2019 21:11:24 +0100 Subject: [PATCH] Use `Util::ascii_*` in prelexer Also removes "unicode" and replaces "nonascii". They were identical in behaviour. --- src/lexer.cpp | 97 +++++++++--------------------------------------- src/lexer.hpp | 21 +++-------- src/parser.cpp | 6 +-- src/prelexer.cpp | 8 ++-- 4 files changed, 29 insertions(+), 103 deletions(-) diff --git a/src/lexer.cpp b/src/lexer.cpp index fd9101192..bb02f9b85 100644 --- a/src/lexer.cpp +++ b/src/lexer.cpp @@ -6,6 +6,7 @@ #include #include "lexer.hpp" #include "constants.hpp" +#include "util_string.hpp" namespace Sass { @@ -27,77 +28,14 @@ namespace Sass { const char* kwd_minus(const char* src) { return exactly<'-'>(src); }; const char* kwd_slash(const char* src) { return exactly<'/'>(src); }; - //#################################### - // implement some function that do exist in the standard - // but those are locale aware which brought some trouble - // this even seems to improve performance by quite a bit - //#################################### - - bool is_alpha(const char& chr) - { - return unsigned(chr - 'A') <= 'Z' - 'A' || - unsigned(chr - 'a') <= 'z' - 'a'; - } - - bool is_space(const char& chr) - { - // adapted the technique from is_alpha - return chr == ' ' || unsigned(chr - '\t') <= '\r' - '\t'; - } - - bool is_digit(const char& chr) - { - // adapted the technique from is_alpha - return unsigned(chr - '0') <= '9' - '0'; - } - - bool is_number(const char& chr) - { - // adapted the technique from is_alpha - return is_digit(chr) || chr == '-' || chr == '+'; - } - - bool is_xdigit(const char& chr) - { - // adapted the technique from is_alpha - return unsigned(chr - '0') <= '9' - '0' || - unsigned(chr - 'a') <= 'f' - 'a' || - unsigned(chr - 'A') <= 'F' - 'A'; - } - - bool is_punct(const char& chr) - { - // locale independent - return chr == '.'; - } - - bool is_alnum(const char& chr) - { - return is_alpha(chr) || is_digit(chr); - } - - // check if char is outside ascii range - bool is_unicode(const char& chr) - { - // check for unicode range - return unsigned(chr) > 127; - } - - // check if char is outside ascii range - // but with specific ranges (copied from Ruby Sass) - bool is_nonascii(const char& chr) - { - unsigned int cmp = unsigned(chr); - return ( - (cmp >= 128 && cmp <= 15572911) || - (cmp >= 15630464 && cmp <= 15712189) || - (cmp >= 4036001920) - ); + bool is_number(char chr) { + return Util::ascii_isdigit(static_cast(chr)) || + chr == '-' || chr == '+'; } // check if char is within a reduced ascii range // valid in a uri (copied from Ruby Sass) - bool is_uri_character(const char& chr) + bool is_uri_character(char chr) { unsigned int cmp = unsigned(chr); return (cmp > 41 && cmp < 127) || @@ -106,17 +44,19 @@ namespace Sass { // check if char is within a reduced ascii range // valid for escaping (copied from Ruby Sass) - bool is_escapable_character(const char& chr) + bool is_escapable_character(char chr) { unsigned int cmp = unsigned(chr); return cmp > 31 && cmp < 127; } // Match word character (look ahead) - bool is_character(const char& chr) + bool is_character(char chr) { // valid alpha, numeric or unicode char (plus hyphen) - return is_alnum(chr) || is_unicode(chr) || chr == '-'; + return Util::ascii_isalnum(static_cast(chr)) || + !Util::ascii_isascii(static_cast(chr)) || + chr == '-'; } //#################################### @@ -124,16 +64,13 @@ namespace Sass { //#################################### // create matchers that advance the position - const char* space(const char* src) { return is_space(*src) ? src + 1 : 0; } - const char* alpha(const char* src) { return is_alpha(*src) ? src + 1 : 0; } - const char* unicode(const char* src) { return is_unicode(*src) ? src + 1 : 0; } - const char* nonascii(const char* src) { return is_nonascii(*src) ? src + 1 : 0; } - const char* digit(const char* src) { return is_digit(*src) ? src + 1 : 0; } - const char* xdigit(const char* src) { return is_xdigit(*src) ? src + 1 : 0; } - const char* alnum(const char* src) { return is_alnum(*src) ? src + 1 : 0; } - const char* punct(const char* src) { return is_punct(*src) ? src + 1 : 0; } - const char* hyphen(const char* src) { return *src && *src == '-' ? src + 1 : 0; } - const char* character(const char* src) { return is_character(*src) ? src + 1 : 0; } + const char* space(const char* src) { return Util::ascii_isspace(static_cast(*src)) ? src + 1 : nullptr; } + const char* alpha(const char* src) { return Util::ascii_isalpha(static_cast(*src)) ? src + 1 : nullptr; } + const char* nonascii(const char* src) { return Util::ascii_isascii(static_cast(*src)) ? nullptr : src + 1; } + const char* digit(const char* src) { return Util::ascii_isdigit(static_cast(*src)) ? src + 1 : nullptr; } + const char* xdigit(const char* src) { return Util::ascii_isxdigit(static_cast(*src)) ? src + 1 : nullptr; } + const char* alnum(const char* src) { return Util::ascii_isalnum(static_cast(*src)) ? src + 1 : nullptr; } + const char* hyphen(const char* src) { return *src == '-' ? src + 1 : 0; } const char* uri_character(const char* src) { return is_uri_character(*src) ? src + 1 : 0; } const char* escapable_character(const char* src) { return is_escapable_character(*src) ? src + 1 : 0; } diff --git a/src/lexer.hpp b/src/lexer.hpp index 5838c291c..360ed2269 100644 --- a/src/lexer.hpp +++ b/src/lexer.hpp @@ -24,19 +24,11 @@ namespace Sass { // BASIC CLASS MATCHERS //#################################### - // These are locale independant - bool is_space(const char& src); - bool is_alpha(const char& src); - bool is_punct(const char& src); - bool is_digit(const char& src); - bool is_number(const char& src); - bool is_alnum(const char& src); - bool is_xdigit(const char& src); - bool is_unicode(const char& src); - bool is_nonascii(const char& src); - bool is_character(const char& src); - bool is_uri_character(const char& src); - bool escapable_character(const char& src); + // Matches ASCII digits, +, and -. + bool is_number(char src); + + bool is_uri_character(char src); + bool escapable_character(char src); // Match a single ctype predicate. const char* space(const char* src); @@ -44,11 +36,8 @@ namespace Sass { const char* digit(const char* src); const char* xdigit(const char* src); const char* alnum(const char* src); - const char* punct(const char* src); const char* hyphen(const char* src); - const char* unicode(const char* src); const char* nonascii(const char* src); - const char* character(const char* src); const char* uri_character(const char* src); const char* escapable_character(const char* src); diff --git a/src/parser.cpp b/src/parser.cpp index 12cecb6c8..7370a8f46 100644 --- a/src/parser.cpp +++ b/src/parser.cpp @@ -672,7 +672,7 @@ namespace Sass { } // EO parse_include_directive - + SimpleSelectorObj Parser::parse_simple_selector() { lex < css_comments >(false); @@ -2171,7 +2171,7 @@ namespace Sass { } } - + } std::vector queries; @@ -2907,7 +2907,7 @@ namespace Sass { } // backup position to last significant char while (trim && last_pos > source && last_pos < end) { - if (!Prelexer::is_space(*last_pos)) break; + if (!Util::ascii_isspace(static_cast(*last_pos))) break; utf8::prior(last_pos, source); } diff --git a/src/prelexer.cpp b/src/prelexer.cpp index d2e8e06f0..acd3061e3 100644 --- a/src/prelexer.cpp +++ b/src/prelexer.cpp @@ -336,7 +336,7 @@ namespace Sass { return alternatives< unicode_seq, alpha, - unicode, + nonascii, exactly<'-'>, exactly<'_'>, NONASCII, @@ -351,7 +351,7 @@ namespace Sass { return alternatives< unicode_seq, alnum, - unicode, + nonascii, exactly<'-'>, exactly<'_'>, NONASCII, @@ -385,7 +385,7 @@ namespace Sass { { return alternatives < alpha, - unicode, + nonascii, escape_seq, exactly<'_'> >(src); @@ -395,7 +395,7 @@ namespace Sass { { return alternatives < alnum, - unicode, + nonascii, escape_seq, exactly<'_'> >(src);