From 0d286213568cda7bf273a7e28cefe27a74b6055b Mon Sep 17 00:00:00 2001 From: Andrzej Nowikowski Date: Wed, 22 Mar 2023 21:39:33 +0100 Subject: [PATCH 1/2] Added handling singlepart emails --- redbox/tests/examples/INBOX/3.eml | 10 ++++++++++ redbox/tests/test_fetch.py | 25 ++++++++++++++++++++----- redbox/utils/inspector.py | 18 +++++++++++++----- 3 files changed, 43 insertions(+), 10 deletions(-) create mode 100644 redbox/tests/examples/INBOX/3.eml diff --git a/redbox/tests/examples/INBOX/3.eml b/redbox/tests/examples/INBOX/3.eml new file mode 100644 index 0000000..b77deb2 --- /dev/null +++ b/redbox/tests/examples/INBOX/3.eml @@ -0,0 +1,10 @@ +From: redacated_from@gmail.com +MIME-Version: 1.0 +Date: Wed, 22 Mar 2023 06:44:05 -0700 +Message-ID: +Subject: redacted subject +To: redacated_to@gmail.com +Content-Type: text/plain; charset="UTF-8" + +line 1 +line 2 diff --git a/redbox/tests/test_fetch.py b/redbox/tests/test_fetch.py index 0628cff..45c2f4b 100644 --- a/redbox/tests/test_fetch.py +++ b/redbox/tests/test_fetch.py @@ -13,7 +13,7 @@ def test_fetch(IMAP4): box = EmailBox(host="localhost", port=0, cls_imap=DummyGmailImap) - + msg = EmailMessage(uid=1, session=box.inbox.session, mailbox="MYBOX") assert msg.content == (ROOT / "MYBOX/1.eml").read_text() assert isinstance(msg.email, Message) @@ -49,7 +49,7 @@ def test_fetch(IMAP4): def test_set_flags(IMAP4): box = EmailBox(host="localhost", port=0, cls_imap=DummyGmailImap) - + msg = EmailMessage(uid=1, session=box.inbox.session, mailbox="MYBOX") assert msg.seen assert msg.flagged @@ -62,12 +62,12 @@ def test_set_flags(IMAP4): def test_set_flags_methods(IMAP4): box = EmailBox(host="localhost", port=0, cls_imap=DummyGmailImap) - + msg = EmailMessage(uid=2, session=box.inbox.session, mailbox="MYBOX") assert not msg.seen assert not msg.flagged assert not msg.deleted - + msg.read() msg.delete() msg.flag() @@ -80,4 +80,19 @@ def test_set_flags_methods(IMAP4): msg.unflag() assert not msg.seen assert not msg.flagged - assert not msg.deleted \ No newline at end of file + assert not msg.deleted + + +def test_plain_text(IMAP4): + box = EmailBox(host="localhost", port=0, cls_imap=DummyGmailImap) + + msg = EmailMessage(uid=3, session=box.inbox.session, mailbox="INBOX") + + assert msg.subject == "redacted subject" + assert msg.text_body == dedent(""" + line 1 + line 2 + """ + )[1:] + assert msg.html_body is None + diff --git a/redbox/utils/inspector.py b/redbox/utils/inspector.py index 5e7d4b2..2924074 100644 --- a/redbox/utils/inspector.py +++ b/redbox/utils/inspector.py @@ -2,17 +2,21 @@ from email.message import EmailMessage from redbox.models.attachment import Attachment -class Inspector: - def __init__(self, msg:EmailMessage): +class Inspector: + def __init__(self, msg: EmailMessage): self.message = msg def get_headers(self) -> Dict[str, str]: return dict(self.message.items()) def get_html_body(self) -> str: + if not self.message.is_multipart(): + if "text/html" == self.message.get_content_type(): + return self.message.get_payload() + return None for pl in self.message.get_payload(): - content_type = pl['Content-Type'].split(";") + content_type = pl["Content-Type"].split(";") if "text/html" in content_type: return pl.get_payload() elif "multipart/related" in content_type: @@ -21,8 +25,12 @@ def get_html_body(self) -> str: return Inspector(pl).get_html_body() def get_text_body(self) -> str: + if not self.message.is_multipart(): + if "text/plain" == self.message.get_content_type(): + return self.message.get_payload() + return None for pl in self.message.get_payload(): - content_type = pl['Content-Type'].split(";") + content_type = pl["Content-Type"].split(";") if "text/plain" in content_type: return pl.get_payload() elif "multipart/related" in content_type: @@ -39,6 +47,6 @@ def get_attachments(self) -> List: # text/plain: For textual files # application/octet-stream: For all others for pl in self.message.get_payload(): - content_type = pl['Content-Type'].split(";")[0] + content_type = pl["Content-Type"].split(";")[0] if content_type.startswith("application/"): yield Attachment.from_message(pl) From 2191304b75b66e8d9958e06637dc6e2e33e8531b Mon Sep 17 00:00:00 2001 From: Andrzej Nowikowski Date: Wed, 22 Mar 2023 22:11:24 +0100 Subject: [PATCH 2/2] added decoding of single text/html content --- redbox/tests/test_fetch.py | 15 ++++++++++++++- 1 file changed, 14 insertions(+), 1 deletion(-) diff --git a/redbox/tests/test_fetch.py b/redbox/tests/test_fetch.py index 45c2f4b..56aadad 100644 --- a/redbox/tests/test_fetch.py +++ b/redbox/tests/test_fetch.py @@ -83,7 +83,8 @@ def test_set_flags_methods(IMAP4): assert not msg.deleted -def test_plain_text(IMAP4): + +def test_single_part_text_plain(IMAP4): box = EmailBox(host="localhost", port=0, cls_imap=DummyGmailImap) msg = EmailMessage(uid=3, session=box.inbox.session, mailbox="INBOX") @@ -94,5 +95,17 @@ def test_plain_text(IMAP4): line 2 """ )[1:] + assert msg.html_body is None + +def test_single_part_text_html(IMAP4): + box = EmailBox(host="localhost", port=0, cls_imap=DummyGmailImap) + + msg = EmailMessage(uid=2, session=box.inbox.session, mailbox="INBOX") + + assert msg.text_body is None + assert msg.html_body.strip() == dedent(""" + +

+ """).strip()