From 6df8cd9c037728564cae569d10f8ba646a91793d Mon Sep 17 00:00:00 2001 From: Gleb Mazovetskiy Date: Mon, 25 Mar 2019 07:56:55 +0000 Subject: [PATCH] Newlines: Parse \f and normalize in comments (#2849) Also: 1. Moves the `normalize*` functions and `rtrim` to a separate object file, so that they can be unit tested quickly and easily. 2. Changes `string_to_output` to also replace `\r\n`, fixing Windows tests. 3. Improves other `normalize_*` functions and adds tests. Refs #2843 sass-spec PRs: 1. Bug fixes to make it run: sass/sass-spec#1365 2. Enabling the newly passing tests: sass/sass-spec#1366 --- Makefile.conf | 1 + src/emitter.cpp | 13 ++-- src/eval.cpp | 1 + src/fn_miscs.cpp | 1 + src/fn_utils.cpp | 1 + src/lexer.cpp | 6 +- src/output.cpp | 1 - src/parser.cpp | 1 + src/prelexer.cpp | 2 +- src/util.cpp | 98 +++++++++++++--------------- src/util.hpp | 7 +- src/util_string.cpp | 57 +++++++++++++++++ src/util_string.hpp | 17 +++++ test/Makefile | 12 +++- test/test_util_string.cpp | 123 ++++++++++++++++++++++++++++++++++++ win/libsass.targets | 2 + win/libsass.vcxproj.filters | 6 ++ 17 files changed, 276 insertions(+), 73 deletions(-) create mode 100644 src/util_string.cpp create mode 100644 src/util_string.hpp create mode 100644 test/test_util_string.cpp diff --git a/Makefile.conf b/Makefile.conf index 049971d9f..f93497437 100644 --- a/Makefile.conf +++ b/Makefile.conf @@ -29,6 +29,7 @@ SOURCES = \ bind.cpp \ file.cpp \ util.cpp \ + util_string.cpp \ json.cpp \ units.cpp \ values.cpp \ diff --git a/src/emitter.cpp b/src/emitter.cpp index c1add350d..c0334b250 100644 --- a/src/emitter.cpp +++ b/src/emitter.cpp @@ -3,6 +3,7 @@ #include "context.hpp" #include "output.hpp" #include "emitter.hpp" +#include "util_string.hpp" #include "utf8_string.hpp" namespace Sass { @@ -134,13 +135,13 @@ namespace Sass { // write space/lf flush_schedules(); - if (in_comment && output_style() == COMPACT) { - // unescape comment nodes - std::string out = comment_to_string(text); - // add to buffer - wbuf.buffer += out; - // account for data in source-maps + if (in_comment) { + std::string out = Util::normalize_newlines(text); + if (output_style() == COMPACT) { + out = comment_to_compact_string(out); + } wbuf.smap.append(Offset(out)); + wbuf.buffer += std::move(out); } else { // add to buffer wbuf.buffer += text; diff --git a/src/eval.cpp b/src/eval.cpp index dd26e5db6..412db232e 100644 --- a/src/eval.cpp +++ b/src/eval.cpp @@ -30,6 +30,7 @@ #include "expand.hpp" #include "color_maps.hpp" #include "sass_functions.hpp" +#include "util_string.hpp" namespace Sass { diff --git a/src/fn_miscs.cpp b/src/fn_miscs.cpp index dbfd9c4d5..6d01c7fae 100644 --- a/src/fn_miscs.cpp +++ b/src/fn_miscs.cpp @@ -2,6 +2,7 @@ #include "expand.hpp" #include "fn_utils.hpp" #include "fn_miscs.hpp" +#include "util_string.hpp" namespace Sass { diff --git a/src/fn_utils.cpp b/src/fn_utils.cpp index 5c8dc3a6e..defee09f6 100644 --- a/src/fn_utils.cpp +++ b/src/fn_utils.cpp @@ -4,6 +4,7 @@ #include "parser.hpp" #include "fn_utils.hpp" +#include "util_string.hpp" namespace Sass { diff --git a/src/lexer.cpp b/src/lexer.cpp index 73cfdad4c..593fc9c9e 100644 --- a/src/lexer.cpp +++ b/src/lexer.cpp @@ -153,11 +153,11 @@ namespace Sass { // Match word boundary (zero-width lookahead). const char* word_boundary(const char* src) { return is_character(*src) || *src == '#' ? 0 : src; } - // Match linefeed /(?:\n|\r\n?)/ + // Match linefeed /(?:\n|\r\n?|\f)/ const char* re_linebreak(const char* src) { // end of file or unix linefeed return here - if (*src == 0 || *src == '\n') return src + 1; + if (*src == 0 || *src == '\n' || *src == '\f') return src + 1; // a carriage return may optionally be followed by a linefeed if (*src == '\r') return *(src + 1) == '\n' ? src + 2 : src + 1; // no linefeed @@ -169,7 +169,7 @@ namespace Sass { const char* end_of_line(const char* src) { // end of file or unix linefeed return here - return *src == 0 || *src == '\n' || *src == '\r' ? src : 0; + return *src == 0 || *src == '\n' || *src == '\r' || *src == '\f' ? src : 0; } // Assert end_of_file boundary (/\z/) diff --git a/src/output.cpp b/src/output.cpp index ebfe8c5a2..6ff17af37 100644 --- a/src/output.cpp +++ b/src/output.cpp @@ -92,7 +92,6 @@ namespace Sass { void Output::operator()(Comment* c) { - std::string txt = c->text()->to_string(opt); // if (indentation && txt == "/**/") return; bool important = c->is_important(); if (output_style() != COMPRESSED || important) { diff --git a/src/parser.cpp b/src/parser.cpp index e624bda4a..2736a9737 100644 --- a/src/parser.cpp +++ b/src/parser.cpp @@ -11,6 +11,7 @@ #include "color_maps.hpp" #include "sass/functions.h" #include "error_handling.hpp" +#include "util_string.hpp" // Notes about delayed: some ast nodes can have delayed evaluation so // they can preserve their original semantics if needed. This is most diff --git a/src/prelexer.cpp b/src/prelexer.cpp index 020831465..2f05a896e 100644 --- a/src/prelexer.cpp +++ b/src/prelexer.cpp @@ -263,7 +263,7 @@ namespace Sass { >(src); } - // Match a line comment (/.*?(?=\n|\r\n?|\Z)/. + // Match a line comment (/.*?(?=\n|\r\n?|\f|\Z)/. const char* line_comment(const char* src) { return sequence< diff --git a/src/util.cpp b/src/util.cpp index e1f394e81..62f5d899a 100644 --- a/src/util.cpp +++ b/src/util.cpp @@ -163,42 +163,64 @@ namespace Sass { std::replace(str.begin(), str.end(), '\n', ' '); } - // bell characters are replaced with spaces - // also eats spaces after line-feeds (ltrim) + // 1. Removes whitespace after newlines. + // 2. Replaces newlines with spaces. + // + // This method only considers LF and CRLF as newlines. std::string string_to_output(const std::string& str) { - std::string out(""); - bool lf = false; - for (auto i : str) { - if (i == '\n') { - out += ' '; - lf = true; - } else if (!(lf && isspace(i))) { - out += i; - lf = false; + std::string result; + result.reserve(str.size()); + std::size_t pos = 0; + while (true) { + const std::size_t newline = str.find_first_of("\n\r", pos); + if (newline == std::string::npos) break; + result.append(str, pos, newline - pos); + if (str[newline] == '\r') { + if (str[newline + 1] == '\n') { + pos = newline + 2; + } else { + // CR without LF: append as-is and continue. + result += '\r'; + pos = newline + 1; + continue; + } + } else { + pos = newline + 1; + } + result += ' '; + const std::size_t non_space = str.find_first_not_of(" \f\n\r\t\v", pos); + if (non_space != std::string::npos) { + pos = non_space; } } - return out; + result.append(str, pos, std::string::npos); + return result; } std::string escape_string(const std::string& str) { - std::string out(""); - for (auto i : str) { - if (i == '\n') { - out += "\\n"; - } else if (i == '\r') { - out += "\\r"; - } else if (i == '\t') { - out += "\\t"; - } else { - out += i; + std::string out; + out.reserve(str.size()); + for (char c : str) { + switch (c) { + case '\n': + out.append("\\n"); + break; + case '\r': + out.append("\\r"); + break; + case '\f': + out.append("\\f"); + break; + default: + out += c; } } return out; } - std::string comment_to_string(const std::string& text) + std::string comment_to_compact_string(const std::string& text) { std::string str = ""; size_t has = 0; @@ -207,7 +229,6 @@ namespace Sass { for (auto i : text) { if (clean) { if (i == '\n') { has = 0; } - else if (i == '\r') { has = 0; } else if (i == '\t') { ++ has; } else if (i == ' ') { ++ has; } else if (i == '*') {} @@ -219,8 +240,6 @@ namespace Sass { } } else if (i == '\n') { clean = true; - } else if (i == '\r') { - clean = true; } else { str += i; } @@ -508,33 +527,6 @@ namespace Sass { } namespace Util { - using std::string; - - std::string rtrim(const std::string &str) { - std::string trimmed = str; - size_t pos_ws = trimmed.find_last_not_of(" \t\n\v\f\r"); - if (pos_ws != std::string::npos) - { trimmed.erase(pos_ws + 1); } - else { trimmed.clear(); } - return trimmed; - } - - std::string normalize_underscores(const std::string& str) { - std::string normalized = str; - for(size_t i = 0, L = normalized.length(); i < L; ++i) { - if(normalized[i] == '_') { - normalized[i] = '-'; - } - } - return normalized; - } - - std::string normalize_decimals(const std::string& str) { - std::string prefix = "0"; - std::string normalized = str; - - return normalized[0] == '.' ? normalized.insert(0, prefix) : normalized; - } bool isPrintable(Ruleset* r, Sass_Output_Style style) { if (r == NULL) { diff --git a/src/util.hpp b/src/util.hpp index d1866a51b..3ed563565 100644 --- a/src/util.hpp +++ b/src/util.hpp @@ -38,7 +38,7 @@ namespace Sass { std::string read_css_string(const std::string& str, bool css = true); std::string evacuate_escapes(const std::string& str); std::string string_to_output(const std::string& str); - std::string comment_to_string(const std::string& text); + std::string comment_to_compact_string(const std::string& text); std::string read_hex_escapes(const std::string& str); std::string escape_string(const std::string& str); void newline_to_space(std::string& str); @@ -91,11 +91,6 @@ namespace Sass { namespace Util { - std::string rtrim(const std::string& str); - - std::string normalize_underscores(const std::string& str); - std::string normalize_decimals(const std::string& str); - bool isPrintable(Ruleset* r, Sass_Output_Style style = NESTED); bool isPrintable(Supports_Block* r, Sass_Output_Style style = NESTED); bool isPrintable(Media_Block* r, Sass_Output_Style style = NESTED); diff --git a/src/util_string.cpp b/src/util_string.cpp new file mode 100644 index 000000000..ea56402ca --- /dev/null +++ b/src/util_string.cpp @@ -0,0 +1,57 @@ +#include "util_string.hpp" + +#include + +namespace Sass { +namespace Util { + +std::string rtrim(const std::string &str) { + std::string trimmed = str; + size_t pos_ws = trimmed.find_last_not_of(" \t\n\v\f\r"); + if (pos_ws != std::string::npos) { + trimmed.erase(pos_ws + 1); + } else { + trimmed.clear(); + } + return trimmed; +} + +std::string normalize_newlines(const std::string& str) { + std::string result; + result.reserve(str.size()); + std::size_t pos = 0; + while (true) { + const std::size_t newline = str.find_first_of("\n\f\r", pos); + if (newline == std::string::npos) break; + result.append(str, pos, newline - pos); + result += '\n'; + if (str[newline] == '\r' && str[newline + 1] == '\n') { + pos = newline + 2; + } else { + pos = newline + 1; + } + } + result.append(str, pos, std::string::npos); + return result; +} + +std::string normalize_underscores(const std::string& str) { + std::string normalized = str; + std::replace(normalized.begin(), normalized.end(), '_', '-'); + return normalized; +} + +std::string normalize_decimals(const std::string& str) { + std::string normalized; + if (!str.empty() && str[0] == '.') { + normalized.reserve(str.size() + 1); + normalized += '0'; + normalized += str; + } else { + normalized = str; + } + return normalized; +} + +} // namespace Sass +} // namespace Util diff --git a/src/util_string.hpp b/src/util_string.hpp new file mode 100644 index 000000000..66109d30f --- /dev/null +++ b/src/util_string.hpp @@ -0,0 +1,17 @@ +#ifndef SASS_UTIL_STRING_H +#define SASS_UTIL_STRING_H + +#include + +namespace Sass { +namespace Util { + +std::string rtrim(const std::string& str); + +std::string normalize_newlines(const std::string& str); +std::string normalize_underscores(const std::string& str); +std::string normalize_decimals(const std::string& str); + +} // namespace Sass +} // namespace Util +#endif // SASS_UTIL_STRING_H diff --git a/test/Makefile b/test/Makefile index 8fecbb746..f30131119 100644 --- a/test/Makefile +++ b/test/Makefile @@ -1,16 +1,22 @@ CXX ?= c++ CXXFLAGS := -I ../include/ -std=c++11 -fsanitize=address -g -O1 -fno-omit-frame-pointer -test: test_shared_ptr +test: test_shared_ptr test_util_string test_shared_ptr: build/test_shared_ptr @ASAN_OPTIONS="symbolize=1" build/test_shared_ptr +test_util_string: build/test_util_string + @ASAN_OPTIONS="symbolize=1" build/test_util_string + build: @mkdir build -build/test_shared_ptr: | build - @$(CXX) $(CXXFLAGS) -o build/test_shared_ptr test_shared_ptr.cpp ../src/memory/SharedPtr.cpp +build/test_shared_ptr: test_shared_ptr.cpp ../src/memory/SharedPtr.cpp | build + $(CXX) $(CXXFLAGS) -o build/test_shared_ptr test_shared_ptr.cpp ../src/memory/SharedPtr.cpp + +build/test_util_string: test_util_string.cpp ../src/util_string.cpp | build + $(CXX) $(CXXFLAGS) -o build/test_util_string test_util_string.cpp ../src/util_string.cpp clean: | build rm -rf build diff --git a/test/test_util_string.cpp b/test/test_util_string.cpp new file mode 100644 index 000000000..5c73430f0 --- /dev/null +++ b/test/test_util_string.cpp @@ -0,0 +1,123 @@ +#include "../src/util_string.hpp" + +#include +#include +#include +#include + +namespace { + +std::string escape_string(const std::string& str) { + std::string out; + out.reserve(str.size()); + for (char c : str) { + switch (c) { + case '\n': + out.append("\\n"); + break; + case '\r': + out.append("\\r"); + break; + case '\f': + out.append("\\f"); + break; + default: + out += c; + } + } + return out; +} + +#define ASSERT_STR_EQ(a, b) \ + if (a != b) { \ + std::cerr << \ + "Expected LHS == RHS at " << __FILE__ << ":" << __LINE__ << \ + "\n LHS: [" << escape_string(a) << "]" \ + "\n RHS: [" << escape_string(b) << "]" << \ + std::endl; \ + return false; \ + } \ + +bool TestNormalizeNewlinesNoNewline() { + std::string input = "a"; + std::string normalized = Sass::Util::normalize_newlines(input); + ASSERT_STR_EQ(input, normalized); + return true; +} + +bool TestNormalizeNewlinesLF() { + std::string input = "a\nb"; + std::string normalized = Sass::Util::normalize_newlines(input); + ASSERT_STR_EQ(input, normalized); + return true; +} + +bool TestNormalizeNewlinesCR() { + std::string normalized = Sass::Util::normalize_newlines("a\rb"); + ASSERT_STR_EQ("a\nb", normalized); + return true; +} + +bool TestNormalizeNewlinesCRLF() { + std::string normalized = Sass::Util::normalize_newlines("a\r\nb\r\n"); + ASSERT_STR_EQ("a\nb\n", normalized); + return true; +} + +bool TestNormalizeNewlinesFF() { + std::string normalized = Sass::Util::normalize_newlines("a\fb\f"); + ASSERT_STR_EQ("a\nb\n", normalized); + return true; +} + +bool TestNormalizeNewlinesMixed() { + std::string normalized = Sass::Util::normalize_newlines("a\fb\nc\rd\r\ne\ff"); + ASSERT_STR_EQ("a\nb\nc\nd\ne\nf", normalized); + return true; +} + +bool TestNormalizeUnderscores() { + std::string normalized = Sass::Util::normalize_underscores("a_b_c"); + ASSERT_STR_EQ("a-b-c", normalized); + return true; +} + +bool TestNormalizeDecimalsLeadingZero() { + std::string normalized = Sass::Util::normalize_decimals("0.5"); + ASSERT_STR_EQ("0.5", normalized); + return true; +} + +bool TestNormalizeDecimalsNoLeadingZero() { + std::string normalized = Sass::Util::normalize_decimals(".5"); + ASSERT_STR_EQ("0.5", normalized); + return true; +} + +} // namespace + +#define TEST(fn) \ + if (fn()) { \ + passed.push_back(#fn); \ + } else { \ + failed.push_back(#fn); \ + std::cerr << "Failed: " #fn << std::endl; \ + } \ + +int main(int argc, char **argv) { + std::vector passed; + std::vector failed; + TEST(TestNormalizeNewlinesNoNewline); + TEST(TestNormalizeNewlinesLF); + TEST(TestNormalizeNewlinesCR); + TEST(TestNormalizeNewlinesCRLF); + TEST(TestNormalizeNewlinesFF); + TEST(TestNormalizeNewlinesMixed); + TEST(TestNormalizeUnderscores); + TEST(TestNormalizeDecimalsLeadingZero); + TEST(TestNormalizeDecimalsNoLeadingZero); + std::cerr << argv[0] << ": Passed: " << passed.size() + << ", failed: " << failed.size() + << "." << std::endl; + return failed.size(); +} diff --git a/win/libsass.targets b/win/libsass.targets index 5226ef6d7..0449c78eb 100644 --- a/win/libsass.targets +++ b/win/libsass.targets @@ -75,6 +75,7 @@ + @@ -137,6 +138,7 @@ + diff --git a/win/libsass.vcxproj.filters b/win/libsass.vcxproj.filters index 2e1e778f3..c92f48a79 100644 --- a/win/libsass.vcxproj.filters +++ b/win/libsass.vcxproj.filters @@ -234,6 +234,9 @@ Headers + + Headers + Headers @@ -422,6 +425,9 @@ Sources + + Sources + Sources