Skip to content

Commit

Permalink
fix: Render "message" parts in multipart messages' HTML (#4462)
Browse files Browse the repository at this point in the history
This fixes the HTML display of messages containing forwarded messages. Before, forwarded messages
weren't rendered in HTML and if a forwarded message is long and therefore truncated in the chat, it
could only be seen in the "Message Info". In #4462 it was suggested to display "Show Full
Message..." for each truncated message part and save to `msgs.mime_headers` only the corresponding
part, but this is a quite huge change and refactoring and also it may be good that currently we save
the full message structure to `msgs.mime_headers`, so i'd suggest not to change this for now.
  • Loading branch information
iequidoo committed Dec 12, 2024
1 parent 2533628 commit cb21578
Show file tree
Hide file tree
Showing 2 changed files with 89 additions and 40 deletions.
78 changes: 53 additions & 25 deletions src/html.rs
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,8 @@
//! `MsgId.get_html()` will return HTML -
//! this allows nice quoting, handling linebreaks properly etc.
use std::mem;

use anyhow::{Context as _, Result};
use base64::Engine as _;
use lettre_email::mime::Mime;
Expand Down Expand Up @@ -77,21 +79,26 @@ fn get_mime_multipart_type(ctype: &ParsedContentType) -> MimeMultipartType {
struct HtmlMsgParser {
pub html: String,
pub plain: Option<PlainText>,
pub(crate) msg_html: String,
}

impl HtmlMsgParser {
/// Function takes a raw mime-message string,
/// searches for the main-text part
/// and returns that as parser.html
pub async fn from_bytes(context: &Context, rawmime: &[u8]) -> Result<Self> {
pub async fn from_bytes<'a>(
context: &Context,
rawmime: &'a [u8],
) -> Result<(Self, mailparse::ParsedMail<'a>)> {
let mut parser = HtmlMsgParser {
html: "".to_string(),
plain: None,
msg_html: "".to_string(),
};

let parsedmail = mailparse::parse_mail(rawmime)?;
let parsedmail = mailparse::parse_mail(rawmime).context("Failed to parse mail")?;

parser.collect_texts_recursive(&parsedmail).await?;
parser.collect_texts_recursive(context, &parsedmail).await?;

if parser.html.is_empty() {
if let Some(plain) = &parser.plain {
Expand All @@ -100,8 +107,8 @@ impl HtmlMsgParser {
} else {
parser.cid_to_data_recursive(context, &parsedmail).await?;
}

Ok(parser)
parser.html += &mem::take(&mut parser.msg_html);
Ok((parser, parsedmail))
}

/// Function iterates over all mime-parts
Expand All @@ -114,12 +121,13 @@ impl HtmlMsgParser {
/// therefore we use the first one.
async fn collect_texts_recursive<'a>(
&'a mut self,
context: &'a Context,
mail: &'a mailparse::ParsedMail<'a>,
) -> Result<()> {
match get_mime_multipart_type(&mail.ctype) {
MimeMultipartType::Multiple => {
for cur_data in &mail.subparts {
Box::pin(self.collect_texts_recursive(cur_data)).await?
Box::pin(self.collect_texts_recursive(context, cur_data)).await?
}
Ok(())
}
Expand All @@ -128,8 +136,35 @@ impl HtmlMsgParser {
if raw.is_empty() {
return Ok(());
}
let mail = mailparse::parse_mail(&raw).context("failed to parse mail")?;
Box::pin(self.collect_texts_recursive(&mail)).await
let (parser, mail) = Box::pin(HtmlMsgParser::from_bytes(context, &raw)).await?;
if !parser.html.is_empty() {
let mut text = "\r\n\r\n".to_string();
for h in mail.headers {
let key = h.get_key();
if matches!(
key.to_lowercase().as_str(),
"date"
| "from"
| "sender"
| "reply-to"
| "to"
| "cc"
| "bcc"
| "subject"
) {
text += &format!("{key}: {}\r\n", h.get_value());
}
}
text += "\r\n";
self.msg_html += &PlainText {
text,
flowed: false,
delsp: false,
}
.to_html();
self.msg_html += &parser.html;
}
Ok(())
}
MimeMultipartType::Single => {
let mimetype = mail.ctype.mimetype.parse::<Mime>()?;
Expand Down Expand Up @@ -175,14 +210,7 @@ impl HtmlMsgParser {
}
Ok(())
}
MimeMultipartType::Message => {
let raw = mail.get_body_raw()?;
if raw.is_empty() {
return Ok(());
}
let mail = mailparse::parse_mail(&raw).context("failed to parse mail")?;
Box::pin(self.cid_to_data_recursive(context, &mail)).await
}
MimeMultipartType::Message => Ok(()),
MimeMultipartType::Single => {
let mimetype = mail.ctype.mimetype.parse::<Mime>()?;
if mimetype.type_() == mime::IMAGE {
Expand Down Expand Up @@ -240,7 +268,7 @@ impl MsgId {
warn!(context, "get_html: parser error: {:#}", err);
Ok(None)
}
Ok(parser) => Ok(Some(parser.html)),
Ok((parser, _)) => Ok(Some(parser.html)),
}
} else {
warn!(context, "get_html: no mime for {}", self);
Expand Down Expand Up @@ -274,7 +302,7 @@ mod tests {
async fn test_htmlparse_plain_unspecified() {
let t = TestContext::new().await;
let raw = include_bytes!("../test-data/message/text_plain_unspecified.eml");
let parser = HtmlMsgParser::from_bytes(&t.ctx, raw).await.unwrap();
let (parser, _) = HtmlMsgParser::from_bytes(&t.ctx, raw).await.unwrap();
assert_eq!(
parser.html,
r#"<!DOCTYPE html>
Expand All @@ -292,7 +320,7 @@ This message does not have Content-Type nor Subject.<br/>
async fn test_htmlparse_plain_iso88591() {
let t = TestContext::new().await;
let raw = include_bytes!("../test-data/message/text_plain_iso88591.eml");
let parser = HtmlMsgParser::from_bytes(&t.ctx, raw).await.unwrap();
let (parser, _) = HtmlMsgParser::from_bytes(&t.ctx, raw).await.unwrap();
assert_eq!(
parser.html,
r#"<!DOCTYPE html>
Expand All @@ -310,7 +338,7 @@ message with a non-UTF-8 encoding: äöüßÄÖÜ<br/>
async fn test_htmlparse_plain_flowed() {
let t = TestContext::new().await;
let raw = include_bytes!("../test-data/message/text_plain_flowed.eml");
let parser = HtmlMsgParser::from_bytes(&t.ctx, raw).await.unwrap();
let (parser, _) = HtmlMsgParser::from_bytes(&t.ctx, raw).await.unwrap();
assert!(parser.plain.unwrap().flowed);
assert_eq!(
parser.html,
Expand All @@ -332,7 +360,7 @@ and will be wrapped as usual.<br/>
async fn test_htmlparse_alt_plain() {
let t = TestContext::new().await;
let raw = include_bytes!("../test-data/message/text_alt_plain.eml");
let parser = HtmlMsgParser::from_bytes(&t.ctx, raw).await.unwrap();
let (parser, _) = HtmlMsgParser::from_bytes(&t.ctx, raw).await.unwrap();
assert_eq!(
parser.html,
r#"<!DOCTYPE html>
Expand All @@ -353,7 +381,7 @@ test some special html-characters as &lt; &gt; and &amp; but also &quot; and &#x
async fn test_htmlparse_html() {
let t = TestContext::new().await;
let raw = include_bytes!("../test-data/message/text_html.eml");
let parser = HtmlMsgParser::from_bytes(&t.ctx, raw).await.unwrap();
let (parser, _) = HtmlMsgParser::from_bytes(&t.ctx, raw).await.unwrap();

// on windows, `\r\n` linends are returned from mimeparser,
// however, rust multiline-strings use just `\n`;
Expand All @@ -371,7 +399,7 @@ test some special html-characters as &lt; &gt; and &amp; but also &quot; and &#x
async fn test_htmlparse_alt_html() {
let t = TestContext::new().await;
let raw = include_bytes!("../test-data/message/text_alt_html.eml");
let parser = HtmlMsgParser::from_bytes(&t.ctx, raw).await.unwrap();
let (parser, _) = HtmlMsgParser::from_bytes(&t.ctx, raw).await.unwrap();
assert_eq!(
parser.html.replace('\r', ""), // see comment in test_htmlparse_html()
r##"<html>
Expand All @@ -386,7 +414,7 @@ test some special html-characters as &lt; &gt; and &amp; but also &quot; and &#x
async fn test_htmlparse_alt_plain_html() {
let t = TestContext::new().await;
let raw = include_bytes!("../test-data/message/text_alt_plain_html.eml");
let parser = HtmlMsgParser::from_bytes(&t.ctx, raw).await.unwrap();
let (parser, _) = HtmlMsgParser::from_bytes(&t.ctx, raw).await.unwrap();
assert_eq!(
parser.html.replace('\r', ""), // see comment in test_htmlparse_html()
r##"<html>
Expand All @@ -411,7 +439,7 @@ test some special html-characters as &lt; &gt; and &amp; but also &quot; and &#x
assert!(test.find("data:").is_none());

// parsing converts cid: to data:
let parser = HtmlMsgParser::from_bytes(&t.ctx, raw).await.unwrap();
let (parser, _) = HtmlMsgParser::from_bytes(&t.ctx, raw).await.unwrap();
assert!(parser.html.contains("<html>"));
assert!(!parser.html.contains("Content-Id:"));
assert!(parser.html.contains(""));
Expand Down
51 changes: 36 additions & 15 deletions src/receive_imf/tests.rs
Original file line number Diff line number Diff line change
Expand Up @@ -3834,30 +3834,51 @@ async fn test_big_forwarded_with_big_attachment() -> Result<()> {
let raw = include_bytes!("../../test-data/message/big_forwarded_with_big_attachment.eml");
let rcvd = receive_imf(t, raw, false).await?.unwrap();
assert_eq!(rcvd.msg_ids.len(), 3);

let msg = Message::load_from_db(t, rcvd.msg_ids[0]).await?;
assert_eq!(msg.get_viewtype(), Viewtype::Text);
assert_eq!(msg.get_text(), "Hello!");
// Wrong: the second bubble's text is truncated, but "Show Full Message..." is going to be shown
// in the first message bubble in the UIs.
assert_eq!(
msg.id
.get_html(t)
.await?
.unwrap()
.matches("Hello!")
.count(),
1
);
assert!(!msg.has_html());

let msg = Message::load_from_db(t, rcvd.msg_ids[1]).await?;
assert_eq!(msg.get_viewtype(), Viewtype::Text);
assert!(msg.get_text().starts_with("this text with 42 chars is just repeated."));
assert!(msg
.get_text()
.starts_with("this text with 42 chars is just repeated."));
assert!(msg.get_text().ends_with("[...]"));
// Wrong: the text is truncated, but it's not possible to see the full text in HTML.
assert!(!msg.has_html());

let msg = Message::load_from_db(t, rcvd.msg_ids[2]).await?;
assert_eq!(msg.get_viewtype(), Viewtype::File);
assert!(!msg.has_html());

assert!(msg.has_html());
let html = msg.id.get_html(t).await?.unwrap();
let tail = html
.split_once("Hello!")
.unwrap()
.1
.split_once("From: AAA")
.unwrap()
.1
.split_once("[email protected]")
.unwrap()
.1
.split_once("To: Alice")
.unwrap()
.1
.split_once("[email protected]")
.unwrap()
.1
.split_once("Subject: Some subject")
.unwrap()
.1
.split_once("Date: Fri, 2 Jun 2023 12:29:17 +0000")
.unwrap()
.1;
assert_eq!(
tail.matches("this text with 42 chars is just repeated.")
.count(),
128
);
Ok(())
}

Expand Down

0 comments on commit cb21578

Please sign in to comment.