From 0cf4c17ab66aa24904da9b461d384c602c229a1d Mon Sep 17 00:00:00 2001 From: Angus Gratton Date: Tue, 7 Jun 2022 08:47:49 +1000 Subject: [PATCH] migrate: Handle single newlines in WordPress comments WordPress renders a single newline in a comment as a
tag, but Isso renders a single newline in the comment as a single newline in the HTML. This is rendered the same as if it was a space, all text on one line. To fix, detect single newlines when importing WordPress comments and convert to a line break in Markdown. Add a test for this also. Example, this WordPress comment (as shown in CDATA of XML export): > First line of comment. > Second line of comment. Renders in WordPress as: > First line of comment. >
> Second line of comment. But renders in Isso after import as if it was: > First line of comment. Second line of comment. After this commit is applied and comments re-imported, it renders as: > First line of comment. > Second line of comment. --- CHANGES.rst | 4 +++- isso/migrate.py | 11 ++++++++++- isso/tests/test_migration.py | 8 ++++++-- isso/tests/wordpress.xml | 20 ++++++++++++++++++-- 4 files changed, 37 insertions(+), 6 deletions(-) diff --git a/CHANGES.rst b/CHANGES.rst index 6f206cdbf..f036a6379 100644 --- a/CHANGES.rst +++ b/CHANGES.rst @@ -17,8 +17,10 @@ Breaking Changes Bugfixes & Improvements ^^^^^^^^^^^^^^^^^^^^^^^ -- TBD +- When importing from WordPress single newlines are now converted to line breaks + (`#903`_, projectgus) +.. _#903: https://github.com/posativ/isso/pull/903 0.13.0.beta1 (2022-06-05) ------------------------- diff --git a/isso/migrate.py b/isso/migrate.py index 7fedf16f8..b8e0c2484 100644 --- a/isso/migrate.py +++ b/isso/migrate.py @@ -225,9 +225,18 @@ def migrate(self): progress.finish("{0} threads, {1} comments".format( len(items) - skip, self.count)) + def _process_comment_content(self, text): + # WordPress comment text renders a single newline between two blocks of + # text as a
tag, so add an explicit Markdown line break on import + # (Otherwise multiple blocks of text separated by single newlines are + # all shown as one long line.) + text = re.sub(r'(?!^\n)\n(?!^\n)', ' \n', text, 0) + + return strip(text) + def Comment(self, el): return { - "text": strip(el.find(self.ns + "comment_content").text), + "text": self._process_comment_content(el.find(self.ns + "comment_content").text), "author": strip(el.find(self.ns + "comment_author").text), "email": strip(el.find(self.ns + "comment_author_email").text), "website": strip(el.find(self.ns + "comment_author_url").text), diff --git a/isso/tests/test_migration.py b/isso/tests/test_migration.py index af896eaa5..ff5f55f05 100644 --- a/isso/tests/test_migration.py +++ b/isso/tests/test_migration.py @@ -87,7 +87,7 @@ def test_wordpress(self): self.assertEqual( len(db.execute("SELECT id FROM threads").fetchall()), 2) self.assertEqual( - len(db.execute("SELECT id FROM comments").fetchall()), 7) + len(db.execute("SELECT id FROM comments").fetchall()), 8) first = db.comments.get(1) self.assertEqual(first["author"], "Ohai") @@ -101,7 +101,11 @@ def test_wordpress(self): for i in (3, 4, 5): self.assertEqual(db.comments.get(i)["parent"], second["id"]) - last = db.comments.get(6) + # Ensure newlines in wordpress translate to two newlines in isso, to render the same + multiline = db.comments.get(6) + self.assertIn("multiple lines: \nWordPress", multiline["text"]) + + last = db.comments.get(7) self.assertEqual(last["author"], "Letzter :/") self.assertEqual(last["parent"], None) diff --git a/isso/tests/wordpress.xml b/isso/tests/wordpress.xml index 7560aefe3..53beb6c20 100644 --- a/isso/tests/wordpress.xml +++ b/isso/tests/wordpress.xml @@ -101,13 +101,29 @@ 10 + + multiline@example.org + + + ::ffff:86.52.1.0 + 2022-06-06 12:13:14 + 2022-06-06 02:13:14 + + 1 + + 0 + 1 + + + 11 info@posativ.org ::ffff:86.56.63.0 - 2014-04-29 15:21:56 - 2014-04-29 15:21:56 + 2022-06-07 15:21:56 + 2022-06-07 15:21:56 1