From 7f68ae0fce797850ffffb39ea23dd1a714c74e47 Mon Sep 17 00:00:00 2001 From: Antti Kaihola <13725+akaihola@users.noreply.github.com> Date: Mon, 30 Dec 2024 21:02:12 +0200 Subject: [PATCH 1/3] test: add failing test which splits lines on x85 character --- src/darkgraylib/tests/test_utils.py | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/src/darkgraylib/tests/test_utils.py b/src/darkgraylib/tests/test_utils.py index e926f38..be1b181 100644 --- a/src/darkgraylib/tests/test_utils.py +++ b/src/darkgraylib/tests/test_utils.py @@ -138,6 +138,12 @@ def test_textdocument_encoded_string(encoding, newline, expect): dict( doc=TextDocument(string="zéro\r\nun\r\n", newline="\r\n"), expect=("zéro", "un") ), + dict( + doc=TextDocument( + string="# coding: iso-8859-5\n# б\x85б\x86\n", encoding="iso-8859-5" + ), + expect=("# coding: iso-8859-5", "# б\x85б\x86"), + ), ) def test_textdocument_lines(doc, expect): """TextDocument.lines is correct after parsing a string with different newlines""" From e2cff510f677a8a13e9da265bc6af5c16bb30fad Mon Sep 17 00:00:00 2001 From: Antti Kaihola <13725+akaihola@users.noreply.github.com> Date: Mon, 30 Dec 2024 21:02:36 +0200 Subject: [PATCH 2/3] fix: only split lines at Python's universal newlines --- src/darkgraylib/utils.py | 18 +++++++++++++++++- 1 file changed, 17 insertions(+), 1 deletion(-) diff --git a/src/darkgraylib/utils.py b/src/darkgraylib/utils.py index fe933f4..65088e8 100644 --- a/src/darkgraylib/utils.py +++ b/src/darkgraylib/utils.py @@ -22,6 +22,22 @@ def detect_newline(string: str) -> str: return "\n" +def normalize_newlines(string: str) -> str: + """Normalize newlines in a string to LF""" + return io.IncrementalNewlineDecoder(None, True).decode(string) + + +def splitlines(string: str) -> list[str]: + """Split a string into lines at universal newlines.""" + if not string: + return [] + return ( + normalize_newlines(string) # Normalize newlines to LF + .rstrip("\n") # Remove trailing newline + .split("\n") # Split into lines + ) + + class TextDocument: """Store & handle a multi-line text document, either as a string or list of lines""" @@ -65,7 +81,7 @@ def encoded_string(self) -> bytes: def lines(self) -> TextLines: """Return the document as a list of lines converting and caching if necessary""" if self._lines is None: - self._lines = tuple((self._string or "").splitlines()) + self._lines = tuple(splitlines(self._string or "")) return self._lines @property From 3035dccb93430a1c0d0a0367e522d316b170f85d Mon Sep 17 00:00:00 2001 From: Antti Kaihola <13725+akaihola@users.noreply.github.com> Date: Fri, 3 Jan 2025 18:35:33 +0200 Subject: [PATCH 3/3] docs: update the change log --- CHANGES.rst | 2 ++ 1 file changed, 2 insertions(+) diff --git a/CHANGES.rst b/CHANGES.rst index d4a7274..b1a8041 100644 --- a/CHANGES.rst +++ b/CHANGES.rst @@ -13,6 +13,8 @@ Removed Fixed ----- +- Only split input files at Python's universal newlines (LF, CRLF, CR), not on more + exotic newline sequences. This fixes some edge cases in Darker. 2.1.0_ - 2024-11-19