From 228724623fec49a0c3dbf10fdd20b520f0096807 Mon Sep 17 00:00:00 2001 From: Jeffrey Stedfast Date: Sun, 22 Aug 2021 22:38:27 -0400 Subject: [PATCH] Added a new MimeReader The MimeReader is an alternative to the MimeParser. Unlike the MimeParser, the MimeReader does not construct a MimeMessage or MimeEntity "DOM". Instead, it works more like a SAX parser in that it emits events as logical components of a MIME structure are parsed, allowing the developer to process MIME data as it is parsed rather than waiting for the entire message or entity to be completely constructed. This also allows developers a way to reduce their memory overhead. Implements the API discussed in issue #695 --- MimeKit/AsyncMimeReader.cs | 776 ++++++++++++++ MimeKit/MimeKit.csproj | 2 + MimeKit/MimeKitLite.csproj | 2 + MimeKit/MimeReader.cs | 1852 ++++++++++++++++++++++++++++++++++ MimeKit/ParserOptions.cs | 21 +- UnitTests/MimeReaderTests.cs | 457 +++++++++ UnitTests/UnitTests.csproj | 1 + 7 files changed, 3103 insertions(+), 8 deletions(-) create mode 100644 MimeKit/AsyncMimeReader.cs create mode 100644 MimeKit/MimeReader.cs create mode 100644 UnitTests/MimeReaderTests.cs diff --git a/MimeKit/AsyncMimeReader.cs b/MimeKit/AsyncMimeReader.cs new file mode 100644 index 0000000000..b3c8b266d9 --- /dev/null +++ b/MimeKit/AsyncMimeReader.cs @@ -0,0 +1,776 @@ +// +// AsyncMimeParser.cs +// +// Author: Jeffrey Stedfast +// +// Copyright (c) 2013-2021 .NET Foundation and Contributors +// +// Permission is hereby granted, free of charge, to any person obtaining a copy +// of this software and associated documentation files (the "Software"), to deal +// in the Software without restriction, including without limitation the rights +// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +// copies of the Software, and to permit persons to whom the Software is +// furnished to do so, subject to the following conditions: +// +// The above copyright notice and this permission notice shall be included in +// all copies or substantial portions of the Software. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +// THE SOFTWARE. +// + +using System; +using System.Threading; +using System.Diagnostics; +using System.Threading.Tasks; + +namespace MimeKit { + public partial class MimeReader + { + async Task ReadAheadAsync (int atleast, int save, CancellationToken cancellationToken) + { + int left, start, end; + + if (!AlignReadAheadBuffer (atleast, save, out left, out start, out end)) + return left; + + int nread = await stream.ReadAsync (input, start, end - start, cancellationToken).ConfigureAwait (false); + + if (nread > 0) { + inputEnd += nread; + position += nread; + } else { + eos = true; + } + + return inputEnd - inputIndex; + } + + async Task StepByteOrderMarkAsync (CancellationToken cancellationToken) + { + int bomIndex = 0; + + do { + var available = await ReadAheadAsync (ReadAheadSize, 0, cancellationToken).ConfigureAwait (false); + + if (available <= 0) { + // failed to read any data... EOF + inputIndex = inputEnd; + return false; + } + + unsafe { + fixed (byte* inbuf = input) { + StepByteOrderMark (inbuf, ref bomIndex); + } + } + } while (inputIndex == inputEnd); + + return bomIndex == 0 || bomIndex == UTF8ByteOrderMark.Length; + } + + async Task StepMboxMarkerAsync (CancellationToken cancellationToken) + { + bool complete; + int left = 0; + + mboxMarkerLength = 0; + + do { + var available = await ReadAheadAsync (Math.Max (ReadAheadSize, left), 0, cancellationToken).ConfigureAwait (false); + + if (available <= left) { + // failed to find a From line; EOF reached + state = MimeParserState.Error; + inputIndex = inputEnd; + return; + } + + unsafe { + fixed (byte* inbuf = input) { + complete = StepMboxMarker (inbuf, ref left); + } + } + } while (!complete); + + await OnMboxMarkerReadAsync (mboxMarkerBuffer, 0, mboxMarkerLength, mboxMarkerOffset, lineNumber - 1, cancellationToken).ConfigureAwait (false); + + state = MimeParserState.MessageHeaders; + } + + async Task StepHeadersAsync (ParserOptions options, CancellationToken cancellationToken) + { + bool scanningFieldName = true; + bool checkFolded = false; + bool midline = false; + bool blank = false; + bool valid = true; + int left = 0; + + headerBlockBegin = GetOffset (inputIndex); + boundary = BoundaryType.None; + ResetRawHeaderData (); + headerCount = 0; + + currentContentLength = null; + currentContentType = null; + currentEncoding = null; + + await ReadAheadAsync (Math.Max (ReadAheadSize, left), 0, cancellationToken).ConfigureAwait (false); + + do { + unsafe { + fixed (byte* inbuf = input) { + if (!StepHeaders (options, inbuf, ref scanningFieldName, ref checkFolded, ref midline, ref blank, ref valid, ref left, cancellationToken)) + break; + } + } + + var available = await ReadAheadAsync (left + 1, 0, cancellationToken).ConfigureAwait (false); + + if (available == left) { + // EOF reached before we reached the end of the headers... + if (scanningFieldName && left > 0) { + // EOF reached right in the middle of a header field name. Throw an error. + // + // See private email from Feb 8, 2018 which contained a sample message w/o + // any breaks between the header and message body. The file also did not + // end with a newline sequence. + state = MimeParserState.Error; + } else { + // EOF reached somewhere in the middle of the value. + // + // Append whatever data we've got left and pretend we found the end + // of the header value (and the header block). + // + // For more details, see https://github.com/jstedfast/MimeKit/pull/51 + // and https://github.com/jstedfast/MimeKit/issues/348 + if (left > 0) { + AppendRawHeaderData (inputIndex, left); + inputIndex = inputEnd; + } + + ParseAndAppendHeader (options, cancellationToken); + + state = MimeParserState.Content; + } + break; + } + } while (true); + + headerBlockEnd = GetOffset (inputIndex); + } + + async Task SkipLineAsync (bool consumeNewLine, CancellationToken cancellationToken) + { + do { + unsafe { + fixed (byte* inbuf = input) { + if (SkipLine (inbuf, consumeNewLine)) + return true; + } + } + + if (await ReadAheadAsync (ReadAheadSize, 1, cancellationToken).ConfigureAwait (false) <= 0) + return false; + } while (true); + } + + async Task StepAsync (ParserOptions options, CancellationToken cancellationToken) + { + switch (state) { + case MimeParserState.Initialized: + if (!await StepByteOrderMarkAsync (cancellationToken).ConfigureAwait (false)) { + state = MimeParserState.Eos; + break; + } + + state = format == MimeFormat.Mbox ? MimeParserState.MboxMarker : MimeParserState.MessageHeaders; + break; + case MimeParserState.MboxMarker: + await StepMboxMarkerAsync (cancellationToken).ConfigureAwait (false); + break; + case MimeParserState.MessageHeaders: + case MimeParserState.Headers: + await StepHeadersAsync (options, cancellationToken).ConfigureAwait (false); + toplevel = false; + break; + } + + return state; + } + + async Task ScanContentAsync (ScanContentType type, long beginOffset, int beginLineNumber, bool trimNewLine, CancellationToken cancellationToken) + { + int atleast = Math.Max (ReadAheadSize, GetMaxBoundaryLength ()); + int contentIndex = inputIndex; + var formats = new bool[2]; + int contentLength = 0; + bool midline = false; + int nleft; + + do { + if (contentIndex < inputIndex) { + switch (type) { + case ScanContentType.MultipartPreamble: + await OnMultipartPreambleReadAsync (input, contentIndex, inputIndex - contentIndex, cancellationToken).ConfigureAwait (false); + break; + case ScanContentType.MultipartEpilogue: + await OnMultipartEpilogueReadAsync (input, contentIndex, inputIndex - contentIndex, cancellationToken).ConfigureAwait (false); + break; + default: + await OnMimePartContentReadAsync (input, contentIndex, inputIndex - contentIndex, cancellationToken).ConfigureAwait (false); + break; + } + + contentLength += inputIndex - contentIndex; + } + + nleft = inputEnd - inputIndex; + if (await ReadAheadAsync (atleast, 2, cancellationToken).ConfigureAwait (false) <= 0) { + boundary = BoundaryType.Eos; + contentIndex = inputIndex; + break; + } + + unsafe { + fixed (byte* inbuf = input) { + ScanContent (inbuf, ref contentIndex, ref nleft, ref midline, ref formats); + } + } + } while (boundary == BoundaryType.None); + + if (contentIndex < inputIndex) { + switch (type) { + case ScanContentType.MultipartPreamble: + await OnMultipartPreambleReadAsync (input, contentIndex, inputIndex - contentIndex, cancellationToken).ConfigureAwait (false); + break; + case ScanContentType.MultipartEpilogue: + await OnMultipartEpilogueReadAsync (input, contentIndex, inputIndex - contentIndex, cancellationToken).ConfigureAwait (false); + break; + default: + await OnMimePartContentReadAsync (input, contentIndex, inputIndex - contentIndex, cancellationToken).ConfigureAwait (false); + break; + } + + contentLength += inputIndex - contentIndex; + } + + // FIXME: need to redesign the above loop so that we don't consume the last that belongs to the boundary marker. + var isEmpty = contentLength == 0; + + if (boundary != BoundaryType.Eos && trimNewLine) { + // the last \r\n belongs to the boundary + if (contentLength > 0) { + if (input[inputIndex - 2] == (byte) '\r') + contentLength -= 2; + else + contentLength--; + } + } + + var endOffset = beginOffset + contentLength; + var lines = GetLineCount (beginLineNumber, beginOffset, endOffset); + + return new ScanContentResult (contentLength, lines, formats, isEmpty); + } + + async Task ConstructMimePartAsync (CancellationToken cancellationToken) + { + var beginOffset = GetOffset (inputIndex); + var beginLineNumber = lineNumber; + + await OnMimePartContentBeginAsync (beginOffset, beginLineNumber, cancellationToken).ConfigureAwait (false); + var result = await ScanContentAsync (ScanContentType.MimeContent, beginOffset, beginLineNumber, true, cancellationToken).ConfigureAwait (false); + await OnMimePartContentEndAsync (beginOffset, beginLineNumber, beginOffset + result.ContentLength, result.Lines, cancellationToken).ConfigureAwait (false); + + return result.Lines; + } + + async Task ConstructMessagePartAsync (ParserOptions options, int depth, CancellationToken cancellationToken) + { + var beginOffset = GetOffset (inputIndex); + var beginLineNumber = lineNumber; + + if (bounds.Count > 0) { + int atleast = Math.Max (ReadAheadSize, GetMaxBoundaryLength ()); + + if (await ReadAheadAsync (atleast, 0, cancellationToken).ConfigureAwait (false) <= 0) { + boundary = BoundaryType.Eos; + return 0; + } + + unsafe { + fixed (byte* inbuf = input) { + byte* start = inbuf + inputIndex; + byte* inend = inbuf + inputEnd; + byte* inptr = start; + + *inend = (byte) '\n'; + + while (*inptr != (byte) '\n') + inptr++; + + boundary = CheckBoundary (inputIndex, start, (int) (inptr - start)); + + switch (boundary) { + case BoundaryType.ImmediateEndBoundary: + case BoundaryType.ImmediateBoundary: + case BoundaryType.ParentBoundary: + return 0; + case BoundaryType.ParentEndBoundary: + // ignore "From " boundaries, broken mailers tend to include these... + if (!IsMboxMarker (start)) { + return 0; + } + break; + } + } + } + } + + // parse the headers... + state = MimeParserState.MessageHeaders; + if (await StepAsync (options, cancellationToken).ConfigureAwait (false) == MimeParserState.Error) { + // Note: this either means that StepHeaders() found the end of the stream + // or an invalid header field name at the start of the message headers, + // which likely means that this is not a valid MIME stream? + boundary = BoundaryType.Eos; + return GetLineCount (beginLineNumber, beginOffset, GetEndOffset (inputIndex)); + } + + var currentHeadersEndOffset = headerBlockEnd; + var currentBeginOffset = headerBlockBegin; + + await OnMimeMessageBeginAsync (currentBeginOffset, beginLineNumber, cancellationToken).ConfigureAwait (false); + + if (preHeaderBuffer.Length > 0) { + // FIXME: how to solve this? + //message.MboxMarker = new byte[preHeaderLength]; + //Buffer.BlockCopy (preHeaderBuffer, 0, message.MboxMarker, 0, preHeaderLength); + } + + var type = GetContentType (null); + MimeEntityType entityType; + int lines; + + if (depth < options.MaxMimeDepth && IsMultipart (type)) { + await OnMultipartBeginAsync (type, currentBeginOffset, beginLineNumber, cancellationToken).ConfigureAwait (false); + lines = await ConstructMultipartAsync (options, type, depth + 1, cancellationToken).ConfigureAwait (false); + entityType = MimeEntityType.Multipart; + } else if (depth < options.MaxMimeDepth && IsMessagePart (type, currentEncoding)) { + await OnMessagePartBeginAsync (type, currentBeginOffset, beginLineNumber, cancellationToken).ConfigureAwait (false); + lines = await ConstructMessagePartAsync (options, depth + 1, cancellationToken).ConfigureAwait (false); + entityType = MimeEntityType.MessagePart; + } else { + await OnMimePartBeginAsync (type, currentBeginOffset, beginLineNumber, cancellationToken).ConfigureAwait (false); + lines = await ConstructMimePartAsync (cancellationToken).ConfigureAwait (false); + entityType = MimeEntityType.MimePart; + } + + var endOffset = GetEndOffset (inputIndex); + currentHeadersEndOffset = Math.Min (currentHeadersEndOffset, endOffset); + + switch (entityType) { + case MimeEntityType.Multipart: + await OnMultipartEndAsync (type, currentBeginOffset, beginLineNumber, currentHeadersEndOffset, endOffset, lines, cancellationToken).ConfigureAwait (false); + break; + case MimeEntityType.MessagePart: + await OnMessagePartEndAsync (type, currentBeginOffset, beginLineNumber, currentHeadersEndOffset, endOffset, lines, cancellationToken).ConfigureAwait (false); + break; + default: + await OnMimePartEndAsync (type, currentBeginOffset, beginLineNumber, currentHeadersEndOffset, endOffset, lines, cancellationToken).ConfigureAwait (false); + break; + } + + await OnMimeMessageEndAsync (currentBeginOffset, beginLineNumber, currentHeadersEndOffset, endOffset, lines, cancellationToken).ConfigureAwait (false); + + return GetLineCount (beginLineNumber, beginOffset, endOffset); + } + + async Task MultipartScanPreambleAsync (CancellationToken cancellationToken) + { + var beginOffset = GetOffset (inputIndex); + var beginLineNumber = lineNumber; + + await OnMultipartPreambleBeginAsync (beginOffset, beginLineNumber, cancellationToken).ConfigureAwait (false); + var result = await ScanContentAsync (ScanContentType.MultipartPreamble, beginOffset, beginLineNumber, false, cancellationToken).ConfigureAwait (false); + await OnMultipartPreambleEndAsync (beginOffset, beginLineNumber, beginOffset + result.ContentLength, result.Lines, cancellationToken).ConfigureAwait (false); + } + + async Task MultipartScanEpilogueAsync (CancellationToken cancellationToken) + { + var beginOffset = GetOffset (inputIndex); + var beginLineNumber = lineNumber; + + await OnMultipartEpilogueBeginAsync (beginOffset, beginLineNumber, cancellationToken).ConfigureAwait (false); + var result = await ScanContentAsync (ScanContentType.MultipartEpilogue, beginOffset, beginLineNumber, true, cancellationToken).ConfigureAwait (false); + await OnMultipartEpilogueEndAsync (beginOffset, beginLineNumber, beginOffset + result.ContentLength, result.Lines, cancellationToken).ConfigureAwait (false); + } + + async Task MultipartScanSubpartsAsync (ParserOptions options, ContentType multipartContentType, int depth, CancellationToken cancellationToken) + { + var boundaryOffset = GetOffset (inputIndex); + + do { + // skip over the boundary marker + if (!await SkipLineAsync (true, cancellationToken).ConfigureAwait (false)) { + await OnMultipartBoundaryAsync (multipartContentType.Boundary, boundaryOffset, GetOffset (inputIndex), lineNumber, cancellationToken).ConfigureAwait (false); + boundary = BoundaryType.Eos; + return; + } + + await OnMultipartBoundaryAsync (multipartContentType.Boundary, boundaryOffset, GetOffset (inputIndex), lineNumber - 1, cancellationToken).ConfigureAwait (false); + + var beginLineNumber = lineNumber; + + // parse the headers + state = MimeParserState.Headers; + if (await StepAsync (options, cancellationToken).ConfigureAwait (false) == MimeParserState.Error) { + boundary = BoundaryType.Eos; + return; + } + + if (state == MimeParserState.Boundary) { + if (headerCount == 0) { + if (boundary == BoundaryType.ImmediateBoundary) { + //beginOffset = GetOffset (inputIndex); + continue; + } + return; + } + + // This part has no content, but that will be handled in ConstructMultipartAsync() + // or ConstructMimePartAsync(). + } + + //if (state == ParserState.Complete && headers.Count == 0) + // return BoundaryType.EndBoundary; + + var type = GetContentType (multipartContentType); + var currentHeadersEndOffset = headerBlockEnd; + var currentBeginOffset = headerBlockBegin; + MimeEntityType entityType; + int lines; + + if (depth < options.MaxMimeDepth && IsMultipart (type)) { + await OnMultipartBeginAsync (type, currentBeginOffset, beginLineNumber, cancellationToken).ConfigureAwait (false); + lines = await ConstructMultipartAsync (options, type, depth + 1, cancellationToken).ConfigureAwait (false); + entityType = MimeEntityType.Multipart; + } else if (depth < options.MaxMimeDepth && IsMessagePart (type, currentEncoding)) { + await OnMessagePartBeginAsync (type, currentBeginOffset, beginLineNumber, cancellationToken).ConfigureAwait (false); + lines = await ConstructMessagePartAsync (options, depth + 1, cancellationToken).ConfigureAwait (false); + entityType = MimeEntityType.MessagePart; + } else { + await OnMimePartBeginAsync (type, currentBeginOffset, beginLineNumber, cancellationToken).ConfigureAwait (false); + lines = await ConstructMimePartAsync (cancellationToken).ConfigureAwait (false); + entityType = MimeEntityType.MimePart; + } + + var endOffset = GetEndOffset (inputIndex); + currentHeadersEndOffset = Math.Min (currentHeadersEndOffset, endOffset); + + switch (entityType) { + case MimeEntityType.Multipart: + await OnMultipartEndAsync (type, currentBeginOffset, beginLineNumber, currentHeadersEndOffset, endOffset, lines, cancellationToken).ConfigureAwait (false); + break; + case MimeEntityType.MessagePart: + await OnMessagePartEndAsync (type, currentBeginOffset, beginLineNumber, currentHeadersEndOffset, endOffset, lines, cancellationToken).ConfigureAwait (false); + break; + default: + await OnMimePartEndAsync (type, currentBeginOffset, beginLineNumber, currentHeadersEndOffset, endOffset, lines, cancellationToken).ConfigureAwait (false); + break; + } + + boundaryOffset = endOffset; + } while (boundary == BoundaryType.ImmediateBoundary); + } + + async Task ConstructMultipartAsync (ParserOptions options, ContentType contentType, int depth, CancellationToken cancellationToken) + { + var beginOffset = GetOffset (inputIndex); + var marker = contentType.Boundary; + var beginLineNumber = lineNumber; + long endOffset; + + if (marker == null) { +#if DEBUG + Debug.WriteLine ("Multipart without a boundary encountered!"); +#endif + + // Note: this will scan all content into the preamble... + await MultipartScanPreambleAsync (cancellationToken).ConfigureAwait (false); + + endOffset = GetEndOffset (inputIndex); + + return GetLineCount (beginLineNumber, beginOffset, endOffset); + } + + PushBoundary (marker); + + await MultipartScanPreambleAsync (cancellationToken).ConfigureAwait (false); + if (boundary == BoundaryType.ImmediateBoundary) + await MultipartScanSubpartsAsync (options, contentType, depth, cancellationToken).ConfigureAwait (false); + + if (boundary == BoundaryType.ImmediateEndBoundary) { + // consume the end boundary and read the epilogue (if there is one) + // FIXME: multipart.WriteEndBoundary = true; + + var boundaryOffset = GetOffset (inputIndex); + var boundaryLineNumber = lineNumber; + + await SkipLineAsync (false, cancellationToken).ConfigureAwait (false); + + await OnMultipartEndBoundaryAsync (marker, boundaryOffset, GetOffset (inputIndex), boundaryLineNumber, cancellationToken).ConfigureAwait (false); + + PopBoundary (); + + await MultipartScanEpilogueAsync (cancellationToken).ConfigureAwait (false); + + endOffset = GetEndOffset (inputIndex); + + return GetLineCount (beginLineNumber, beginOffset, endOffset); + } + + // FIXME: multipart.WriteEndBoundary = false; + + // We either found the end of the stream or we found a parent's boundary + PopBoundary (); + + unsafe { + fixed (byte* inbuf = input) { + if (boundary == BoundaryType.ParentEndBoundary && FoundImmediateBoundary (inbuf, true)) + boundary = BoundaryType.ImmediateEndBoundary; + else if (boundary == BoundaryType.ParentBoundary && FoundImmediateBoundary (inbuf, false)) + boundary = BoundaryType.ImmediateBoundary; + } + } + + endOffset = GetEndOffset (inputIndex); + + return GetLineCount (beginLineNumber, beginOffset, endOffset); + } + + /// + /// Asynchronously parses an entity from the stream. + /// + /// + /// Parses an entity from the stream. + /// + /// An asynchronous task context. + /// The parser options. + /// The cancellation token. + /// + /// is null. + /// + /// + /// The operation was canceled via the cancellation token. + /// + /// + /// There was an error parsing the entity. + /// + /// + /// An I/O error occurred. + /// + public async Task ReadEntityAsync (ParserOptions options, CancellationToken cancellationToken = default (CancellationToken)) + { + if (options == null) + throw new ArgumentNullException (nameof (options)); + + var beginLineNumber = lineNumber; + + state = MimeParserState.Headers; + toplevel = true; + + if (await StepAsync (options, cancellationToken).ConfigureAwait (false) == MimeParserState.Error) + throw new FormatException ("Failed to parse entity headers."); + + var type = GetContentType (null); + var currentHeadersEndOffset = headerBlockEnd; + var currentBeginOffset = headerBlockBegin; + MimeEntityType entityType; + int lines; + + if (IsMultipart (type)) { + await OnMultipartBeginAsync (type, currentBeginOffset, beginLineNumber, cancellationToken).ConfigureAwait (false); + lines = await ConstructMultipartAsync (options, type, 0, cancellationToken).ConfigureAwait (false); + entityType = MimeEntityType.Multipart; + } else if (IsMessagePart (type, currentEncoding)) { + await OnMessagePartBeginAsync (type, currentBeginOffset, beginLineNumber, cancellationToken).ConfigureAwait (false); + lines = await ConstructMessagePartAsync (options, 0, cancellationToken).ConfigureAwait (false); + entityType = MimeEntityType.MessagePart; + } else { + await OnMimePartBeginAsync (type, currentBeginOffset, beginLineNumber, cancellationToken).ConfigureAwait (false); + lines = await ConstructMimePartAsync (cancellationToken).ConfigureAwait (false); + entityType = MimeEntityType.MimePart; + } + + var endOffset = GetEndOffset (inputIndex); + currentHeadersEndOffset = Math.Min (currentHeadersEndOffset, endOffset); + + switch (entityType) { + case MimeEntityType.Multipart: + await OnMultipartEndAsync (type, currentBeginOffset, beginLineNumber, currentHeadersEndOffset, endOffset, lines, cancellationToken).ConfigureAwait (false); + break; + case MimeEntityType.MessagePart: + await OnMessagePartEndAsync (type, currentBeginOffset, beginLineNumber, currentHeadersEndOffset, endOffset, lines, cancellationToken).ConfigureAwait (false); + break; + default: + await OnMimePartEndAsync (type, currentBeginOffset, beginLineNumber, currentHeadersEndOffset, endOffset, lines, cancellationToken).ConfigureAwait (false); + break; + } + + if (boundary != BoundaryType.Eos) + state = MimeParserState.Complete; + else + state = MimeParserState.Eos; + } + + /// + /// Asynchronously parses an entity from the stream. + /// + /// + /// Parses an entity from the stream. + /// + /// An asynchronous task context. + /// The cancellation token. + /// + /// The operation was canceled via the cancellation token. + /// + /// + /// There was an error parsing the entity. + /// + /// + /// An I/O error occurred. + /// + public Task ReadEntityAsync (CancellationToken cancellationToken = default (CancellationToken)) + { + return ReadEntityAsync (ParserOptions.Default, cancellationToken); + } + + /// + /// Asynchronously parses a message from the stream. + /// + /// + /// Parses a message from the stream. + /// + /// An asynchronous task context. + /// The parser options. + /// The cancellation token. + /// + /// is null. + /// + /// + /// The operation was canceled via the cancellation token. + /// + /// + /// There was an error parsing the message. + /// + /// + /// An I/O error occurred. + /// + public async Task ReadMessageAsync (ParserOptions options, CancellationToken cancellationToken = default (CancellationToken)) + { + if (options == null) + throw new ArgumentNullException (nameof (options)); + + // scan the from-line if we are parsing an mbox + while (state != MimeParserState.MessageHeaders) { + switch (await StepAsync (options, cancellationToken).ConfigureAwait (false)) { + case MimeParserState.Error: + throw new FormatException ("Failed to find mbox From marker."); + case MimeParserState.Eos: + throw new FormatException ("End of stream."); + } + } + + toplevel = true; + + // parse the headers + var beginLineNumber = lineNumber; + if (state < MimeParserState.Content && await StepAsync (options, cancellationToken).ConfigureAwait (false) == MimeParserState.Error) + throw new FormatException ("Failed to parse message headers."); + + var currentHeadersEndOffset = headerBlockEnd; + var currentBeginOffset = headerBlockBegin; + + await OnMimeMessageBeginAsync (currentBeginOffset, beginLineNumber, cancellationToken).ConfigureAwait (false); + + if (format == MimeFormat.Mbox && options.RespectContentLength && currentContentLength.HasValue && currentContentLength.Value != -1) + contentEnd = GetOffset (inputIndex) + currentContentLength.Value; + else + contentEnd = 0; + + var type = GetContentType (null); + MimeEntityType entityType; + int lines; + + if (IsMultipart (type)) { + await OnMultipartBeginAsync (type, currentBeginOffset, beginLineNumber, cancellationToken).ConfigureAwait (false); + lines = await ConstructMultipartAsync (options, type, 0, cancellationToken).ConfigureAwait (false); + entityType = MimeEntityType.Multipart; + } else if (IsMessagePart (type, currentEncoding)) { + await OnMessagePartBeginAsync (type, currentBeginOffset, beginLineNumber, cancellationToken).ConfigureAwait (false); + lines = await ConstructMessagePartAsync (options, 0, cancellationToken).ConfigureAwait (false); + entityType = MimeEntityType.MessagePart; + } else { + await OnMimePartBeginAsync (type, currentBeginOffset, beginLineNumber, cancellationToken).ConfigureAwait (false); + lines = await ConstructMimePartAsync (cancellationToken).ConfigureAwait (false); + entityType = MimeEntityType.MimePart; + } + + var endOffset = GetEndOffset (inputIndex); + currentHeadersEndOffset = Math.Min (currentHeadersEndOffset, endOffset); + + switch (entityType) { + case MimeEntityType.Multipart: + await OnMultipartEndAsync (type, currentBeginOffset, beginLineNumber, currentHeadersEndOffset, endOffset, lines, cancellationToken).ConfigureAwait (false); + break; + case MimeEntityType.MessagePart: + await OnMessagePartEndAsync (type, currentBeginOffset, beginLineNumber, currentHeadersEndOffset, endOffset, lines, cancellationToken).ConfigureAwait (false); + break; + default: + await OnMimePartEndAsync (type, currentBeginOffset, beginLineNumber, currentHeadersEndOffset, endOffset, lines, cancellationToken).ConfigureAwait (false); + break; + } + + await OnMimeMessageEndAsync (currentBeginOffset, beginLineNumber, currentHeadersEndOffset, endOffset, lines, cancellationToken).ConfigureAwait (false); + + if (boundary != BoundaryType.Eos) { + if (format == MimeFormat.Mbox) + state = MimeParserState.MboxMarker; + else + state = MimeParserState.Complete; + } else { + state = MimeParserState.Eos; + } + } + + /// + /// Asynchronously parses a message from the stream. + /// + /// + /// Parses a message from the stream. + /// + /// An asynchronous task context. + /// The cancellation token. + /// + /// The operation was canceled via the cancellation token. + /// + /// + /// There was an error parsing the message. + /// + /// + /// An I/O error occurred. + /// + public Task ReadMessageAsync (CancellationToken cancellationToken = default (CancellationToken)) + { + return ReadMessageAsync (ParserOptions.Default, cancellationToken); + } + } +} diff --git a/MimeKit/MimeKit.csproj b/MimeKit/MimeKit.csproj index 58c4d19ffa..084c2fd97e 100644 --- a/MimeKit/MimeKit.csproj +++ b/MimeKit/MimeKit.csproj @@ -258,6 +258,7 @@ + @@ -295,6 +296,7 @@ + diff --git a/MimeKit/MimeKitLite.csproj b/MimeKit/MimeKitLite.csproj index 527fbc3e5d..71c906032b 100644 --- a/MimeKit/MimeKitLite.csproj +++ b/MimeKit/MimeKitLite.csproj @@ -161,6 +161,7 @@ + @@ -198,6 +199,7 @@ + diff --git a/MimeKit/MimeReader.cs b/MimeKit/MimeReader.cs new file mode 100644 index 0000000000..7d4977f9b6 --- /dev/null +++ b/MimeKit/MimeReader.cs @@ -0,0 +1,1852 @@ +// +// MimeReader.cs +// +// Author: Jeffrey Stedfast +// +// Copyright (c) 2013-2021 .NET Foundation and Contributors +// +// Permission is hereby granted, free of charge, to any person obtaining a copy +// of this software and associated documentation files (the "Software"), to deal +// in the Software without restriction, including without limitation the rights +// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +// copies of the Software, and to permit persons to whom the Software is +// furnished to do so, subject to the following conditions: +// +// The above copyright notice and this permission notice shall be included in +// all copies or substantial portions of the Software. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +// THE SOFTWARE. +// + +using System; +using System.IO; +using System.Threading; +using System.Diagnostics; +using System.Threading.Tasks; +using System.Collections.Generic; + +using MimeKit.IO; +using MimeKit.Utils; + +namespace MimeKit { + /// + /// A MIME message and entity reader. + /// + /// + /// provides forward-only, read-only access to MIME data in a stream. + /// methods let you move through MIME data and read the contents of a node. + /// uses a pull model to retrieve data. + /// + public partial class MimeReader + { + enum MimeEntityType + { + MimePart, + MessagePart, + Multipart + } + + static readonly byte[] UTF8ByteOrderMark = { 0xEF, 0xBB, 0xBF }; + static readonly Task CompletedTask; + const int ReadAheadSize = 128; + const int BlockSize = 4096; + const int PadSize = 4; + + // I/O buffering + readonly byte[] input = new byte[ReadAheadSize + BlockSize + PadSize]; + const int inputStart = ReadAheadSize; + int inputIndex = ReadAheadSize; + int inputEnd = ReadAheadSize; + + // mbox From-line state + byte[] mboxMarkerBuffer; + long mboxMarkerOffset; + int mboxMarkerLength; + + // message/rfc822 mbox markers (shouldn't exist, but sometimes do) + byte[] preHeaderBuffer = new byte[128]; + int preHeaderLength; + + // header buffer + byte[] headerBuffer = new byte[512]; + long headerOffset; + int headerIndex; + int headerCount; + + readonly List bounds = new List (); + + ContentEncoding? currentEncoding; + ContentType currentContentType; + long? currentContentLength; + + MimeParserState state; + BoundaryType boundary; + MimeFormat format; + bool toplevel; + bool eos; + + long headerBlockBegin; + long headerBlockEnd; + long contentEnd; + + long prevLineBeginOffset; + long lineBeginOffset; + int lineNumber; + + Stream stream; + long position; + + static MimeReader () + { +#if NET45 + CompletedTask = Task.FromResult (true); +#else + CompletedTask = Task.CompletedTask; +#endif + } + + public MimeReader (Stream stream, MimeFormat format = MimeFormat.Default) + { + if (stream == null) + throw new ArgumentNullException (nameof (stream)); + + this.format = format; + this.stream = stream; + + inputIndex = inputStart; + inputEnd = inputStart; + + mboxMarkerOffset = 0; + mboxMarkerLength = 0; + headerBlockBegin = 0; + headerBlockEnd = 0; + lineNumber = 1; + contentEnd = 0; + + position = stream.CanSeek ? stream.Position : 0; + prevLineBeginOffset = position; + lineBeginOffset = position; + preHeaderLength = 0; + headerOffset = 0; + headerIndex = 0; + toplevel = false; + eos = false; + + if (format == MimeFormat.Mbox) { + bounds.Add (Boundary.CreateMboxBoundary ()); + + if (mboxMarkerBuffer == null) + mboxMarkerBuffer = new byte[ReadAheadSize]; + } + + state = MimeParserState.Initialized; + boundary = BoundaryType.None; + } + + /// + /// Gets a value indicating whether the parser has reached the end of the input stream. + /// + /// + /// Gets a value indicating whether the parser has reached the end of the input stream. + /// + /// true if this parser has reached the end of the input stream; + /// otherwise, false. + public bool IsEndOfStream { + get { return state == MimeParserState.Eos; } + } + + /// + /// Gets the current position of the parser within the stream. + /// + /// + /// Gets the current position of the parser within the stream. + /// + /// The stream offset. + public long Position { + get { return GetOffset (inputIndex); } + } + + protected virtual void OnMboxMarkerRead (byte[] marker, int startIndex, int count, long beginOffset, int lineNumber, CancellationToken cancellationToken) + { + } + + protected virtual Task OnMboxMarkerReadAsync (byte[] marker, int startIndex, int count, long beginOffset, int lineNumber, CancellationToken cancellationToken) + { + return CompletedTask; + } + + protected virtual void OnHeaderRead (Header header, int beginLineNumber, CancellationToken cancellationToken) + { + } + + // FIXME: make use of this + protected virtual Task OnHeaderReadAsync (Header header, int beginLineNumber, CancellationToken cancellationToken) + { + return CompletedTask; + } + + protected virtual void OnHeadersEnd (long offset, int lineNumber, CancellationToken cancellationToken) + { + } + + protected virtual Task OnHeadersEndAsync (long offset, int lineNumber, CancellationToken cancellationToken) + { + return CompletedTask; + } + + #region MimeMessage Events + + protected virtual void OnMimeMessageBegin (long beginOffset, int beginLineNumber, CancellationToken cancellationToken) + { + } + + protected virtual Task OnMimeMessageBeginAsync (long beginOffset, int beginLineNumber, CancellationToken cancellationToken) + { + return CompletedTask; + } + + protected virtual void OnMimeMessageEnd (long beginOffset, int beginLineNumber, long headersEndOffset, long endOffset, int lines, CancellationToken cancellationToken) + { + } + + protected virtual Task OnMimeMessageEndAsync (long beginOffset, int beginLineNumber, long headersEndOffset, long endOffset, int lines, CancellationToken cancellationToken) + { + return CompletedTask; + } + +#endregion MimeMessage Events + +#region MimePart Events + + protected virtual void OnMimePartBegin (ContentType contentType, long beginOffset, int beginLineNumber, CancellationToken cancellationToken) + { + } + + protected virtual Task OnMimePartBeginAsync (ContentType contentType, long beginOffset, int beginLineNumber, CancellationToken cancellationToken) + { + return CompletedTask; + } + + protected virtual void OnMimePartContentBegin (long beginOffset, int beginLineNumber, CancellationToken cancellationToken) + { + } + + protected virtual Task OnMimePartContentBeginAsync (long beginOffset, int beginLineNumber, CancellationToken cancellationToken) + { + return CompletedTask; + } + + protected virtual void OnMimePartContentRead (byte[] content, int startIndex, int count, CancellationToken cancellationToken) + { + } + + protected virtual Task OnMimePartContentReadAsync (byte[] content, int startIndex, int count, CancellationToken cancellationToken) + { + return CompletedTask; + } + + protected virtual void OnMimePartContentEnd (long beginOffset, int beginLineNumber, long endOffset, int lines, CancellationToken cancellationToken) + { + } + + protected virtual Task OnMimePartContentEndAsync (long beginOffset, int beginLineNumber, long endOffset, int lines, CancellationToken cancellationToken) + { + return CompletedTask; + } + + protected virtual void OnMimePartEnd (ContentType contentType, long beginOffset, int beginLineNumber, long headersEndOffset, long endOffset, int lines, CancellationToken cancellationToken) + { + } + + protected virtual Task OnMimePartEndAsync (ContentType contentType, long beginOffset, int beginLineNumber, long headersEndOffset, long endOffset, int lines, CancellationToken cancellationToken) + { + return CompletedTask; + } + +#endregion MimePart Events + +#region MessagePart Events + + protected virtual void OnMessagePartBegin (ContentType contentType, long beginOffset, int beginLineNumber, CancellationToken cancellationToken) + { + } + + protected virtual Task OnMessagePartBeginAsync (ContentType contentType, long beginOffset, int beginLineNumber, CancellationToken cancellationToken) + { + return CompletedTask; + } + + protected virtual void OnMessagePartEnd (ContentType contentType, long beginOffset, int beginLineNumber, long headersEndOffset, long endOffset, int lines, CancellationToken cancellationToken) + { + } + + protected virtual Task OnMessagePartEndAsync (ContentType contentType, long beginOffset, int beginLineNumber, long headersEndOffset, long endOffset, int lines, CancellationToken cancellationToken) + { + return CompletedTask; + } + +#endregion MessagePart Events + +#region Multipart Events + + protected virtual void OnMultipartBegin (ContentType contentType, long beginOffset, int beginLineNumber, CancellationToken cancellationToken) + { + } + + protected virtual Task OnMultipartBeginAsync (ContentType contentType, long beginOffset, int beginLineNumber, CancellationToken cancellationToken) + { + return CompletedTask; + } + + protected virtual void OnMultipartBoundary (string boundary, long beginOffset, long endOffset, int lineNumber, CancellationToken cancellationToken) + { + } + + protected virtual Task OnMultipartBoundaryAsync (string boundary, long beginOffset, long endOffset, int lineNumber, CancellationToken cancellationToken) + { + return CompletedTask; + } + + protected virtual void OnMultipartEndBoundary (string boundary, long beginOffset, long endOffset, int lineNumber, CancellationToken cancellationToken) + { + } + + protected virtual Task OnMultipartEndBoundaryAsync (string boundary, long beginOffset, long endOffset, int lineNumber, CancellationToken cancellationToken) + { + return CompletedTask; + } + + protected virtual void OnMultipartPreambleBegin (long beginOffset, int beginLineNumber, CancellationToken cancellationToken) + { + } + + protected virtual Task OnMultipartPreambleBeginAsync (long beginOffset, int beginLineNumber, CancellationToken cancellationToken) + { + return CompletedTask; + } + + protected virtual void OnMultipartPreambleRead (byte[] content, int startIndex, int count, CancellationToken cancellationToken) + { + } + + protected virtual Task OnMultipartPreambleReadAsync (byte[] content, int startIndex, int count, CancellationToken cancellationToken) + { + return CompletedTask; + } + + protected virtual void OnMultipartPreambleEnd (long beginOffset, int beginLineNumber, long endOffset, int lines, CancellationToken cancellationToken) + { + } + + protected virtual Task OnMultipartPreambleEndAsync (long beginOffset, int beginLineNumber, long endOffset, int lines, CancellationToken cancellationToken) + { + return CompletedTask; + } + + protected virtual void OnMultipartEpilogueBegin (long beginOffset, int lineNumber, CancellationToken cancellationToken) + { + } + + protected virtual Task OnMultipartEpilogueBeginAsync (long beginOffset, int lineNumber, CancellationToken cancellationToken) + { + return CompletedTask; + } + + protected virtual void OnMultipartEpilogueRead (byte[] content, int startIndex, int count, CancellationToken cancellationToken) + { + } + + protected virtual Task OnMultipartEpilogueReadAsync (byte[] content, int startIndex, int count, CancellationToken cancellationToken) + { + return CompletedTask; + } + + protected virtual void OnMultipartEpilogueEnd (long beginOffset, int beginLineNumber, long endOffset, int lines, CancellationToken cancellationToken) + { + } + + protected virtual Task OnMultipartEpilogueEndAsync (long beginOffset, int beginLineNumber, long endOffset, int lines, CancellationToken cancellationToken) + { + return CompletedTask; + } + + protected virtual void OnMultipartEnd (ContentType contentType, long beginOffset, int beginLineNumber, long headersEndOffset, long endOffset, int lines, CancellationToken cancellationToken) + { + } + + protected virtual Task OnMultipartEndAsync (ContentType contentType, long beginOffset, int beginLineNumber, long headersEndOffset, long endOffset, int lines, CancellationToken cancellationToken) + { + return CompletedTask; + } + +#endregion Multipart Events + + static int NextAllocSize (int need) + { + return (need + 63) & ~63; + } + + bool AlignReadAheadBuffer (int atleast, int save, out int left, out int start, out int end) + { + left = inputEnd - inputIndex; + start = inputStart; + end = inputEnd; + + if (left >= atleast || eos) + return false; + + left += save; + + if (left > 0) { + int index = inputIndex - save; + + // attempt to align the end of the remaining input with ReadAheadSize + if (index >= start) { + start -= Math.Min (ReadAheadSize, left); + Buffer.BlockCopy (input, index, input, start, left); + index = start; + start += left; + } else if (index > 0) { + int shift = Math.Min (index, end - start); + Buffer.BlockCopy (input, index, input, index - shift, left); + index -= shift; + start = index + left; + } else { + // we can't shift... + start = end; + } + + inputIndex = index + save; + inputEnd = start; + } else { + inputIndex = start; + inputEnd = start; + } + + end = input.Length - PadSize; + + return true; + } + + int ReadAhead (int atleast, int save, CancellationToken cancellationToken) + { + int nread, left, start, end; + + if (!AlignReadAheadBuffer (atleast, save, out left, out start, out end)) + return left; + + // use the cancellable stream interface if available... + var cancellable = stream as ICancellableStream; + if (cancellable != null) { + nread = cancellable.Read (input, start, end - start, cancellationToken); + } else { + cancellationToken.ThrowIfCancellationRequested (); + nread = stream.Read (input, start, end - start); + } + + if (nread > 0) { + inputEnd += nread; + position += nread; + } else { + eos = true; + } + + return inputEnd - inputIndex; + } + + long GetOffset (int index) + { + if (position == -1) + return -1; + + return position - (inputEnd - index); + } + + long GetEndOffset (int index) + { + if (boundary != BoundaryType.Eos && index > 1 && input[index - 1] == (byte) '\n') { + index--; + + if (index > 1 && input[index - 1] == (byte) '\r') + index--; + } + + return GetOffset (index); + } + + int GetLineCount (int beginLineNumber, long beginOffset, long endOffset) + { + var lines = lineNumber - beginLineNumber; + + if (lineBeginOffset >= beginOffset && endOffset > lineBeginOffset) + lines++; + + if (boundary != BoundaryType.Eos && endOffset == prevLineBeginOffset) + lines--; + + return lines; + } + + static unsafe bool CStringsEqual (byte* str1, byte* str2, int length) + { + byte* se = str1 + length; + byte* s1 = str1; + byte* s2 = str2; + + while (s1 < se) { + if (*s1++ != *s2++) + return false; + } + + return true; + } + + unsafe void StepByteOrderMark (byte* inbuf, ref int bomIndex) + { + byte* inptr = inbuf + inputIndex; + byte* inend = inbuf + inputEnd; + + while (inptr < inend && bomIndex < UTF8ByteOrderMark.Length && *inptr == UTF8ByteOrderMark[bomIndex]) { + bomIndex++; + inptr++; + } + + inputIndex = (int) (inptr - inbuf); + } + + unsafe bool StepByteOrderMark (byte* inbuf, CancellationToken cancellationToken) + { + int bomIndex = 0; + + do { + var available = ReadAhead (ReadAheadSize, 0, cancellationToken); + + if (available <= 0) { + // failed to read any data... EOF + inputIndex = inputEnd; + return false; + } + + StepByteOrderMark (inbuf, ref bomIndex); + } while (inputIndex == inputEnd); + + return bomIndex == 0 || bomIndex == UTF8ByteOrderMark.Length; + } + + static unsafe bool IsMboxMarker (byte* text, bool allowMunged = false) + { +#if COMPARE_QWORD + const ulong FromMask = 0x000000FFFFFFFFFF; + const ulong From = 0x000000206D6F7246; + ulong* qword = (ulong*) text; + + return (*qword & FromMask) == From; +#else + byte* inptr = text; + + if (allowMunged && *inptr == (byte) '>') + inptr++; + + return *inptr++ == (byte) 'F' && *inptr++ == (byte) 'r' && *inptr++ == (byte) 'o' && *inptr++ == (byte) 'm' && *inptr == (byte) ' '; +#endif + } + + unsafe bool StepMboxMarker (byte* inbuf, ref int left) + { + byte* inptr = inbuf + inputIndex; + byte* inend = inbuf + inputEnd; + + *inend = (byte) '\n'; + + while (inptr < inend) { + int startIndex = inputIndex; + byte* start = inptr; + + // scan for the end of the line + while (*inptr != (byte) '\n') + inptr++; + + var markerLength = (int) (inptr - start); + + if (inptr > start && *(inptr - 1) == (byte) '\r') + markerLength--; + + // consume the '\n' + inptr++; + + var lineLength = (int) (inptr - start); + + if (inptr >= inend) { + // we don't have enough input data + left = lineLength; + return false; + } + + inputIndex += lineLength; + prevLineBeginOffset = lineBeginOffset; + lineBeginOffset = GetOffset (inputIndex); + lineNumber++; + + if (markerLength >= 5 && IsMboxMarker (start)) { + mboxMarkerOffset = GetOffset (startIndex); + mboxMarkerLength = markerLength; + + if (mboxMarkerBuffer.Length < mboxMarkerLength) + Array.Resize (ref mboxMarkerBuffer, mboxMarkerLength); + + Buffer.BlockCopy (input, startIndex, mboxMarkerBuffer, 0, markerLength); + + return true; + } + } + + left = 0; + + return false; + } + + unsafe void StepMboxMarker (byte* inbuf, CancellationToken cancellationToken) + { + bool complete; + int left = 0; + + mboxMarkerLength = 0; + + do { + var available = ReadAhead (Math.Max (ReadAheadSize, left), 0, cancellationToken); + + if (available <= left) { + // failed to find a From line; EOF reached + state = MimeParserState.Error; + inputIndex = inputEnd; + return; + } + + complete = StepMboxMarker (inbuf, ref left); + } while (!complete); + + OnMboxMarkerRead (mboxMarkerBuffer, 0, mboxMarkerLength, mboxMarkerOffset, lineNumber - 1, cancellationToken); + + state = MimeParserState.MessageHeaders; + } + + void AppendRawHeaderData (int startIndex, int length) + { + int left = headerBuffer.Length - headerIndex; + + if (left < length) + Array.Resize (ref headerBuffer, NextAllocSize (headerIndex + length)); + + Buffer.BlockCopy (input, startIndex, headerBuffer, headerIndex, length); + headerIndex += length; + } + + void ResetRawHeaderData () + { + preHeaderLength = 0; + headerIndex = 0; + } + + unsafe void ParseAndAppendHeader (ParserOptions options, CancellationToken cancellationToken) + { + if (headerIndex == 0) + return; + + fixed (byte* buf = headerBuffer) { + if (Header.TryParse (options, buf, headerIndex, false, out var header)) { + var rawValue = header.RawValue; + int index = 0; + + header.Offset = headerOffset; + + switch (header.Id) { + case HeaderId.ContentTransferEncoding: + if (!currentEncoding.HasValue) { + MimeUtils.TryParse (header.Value, out ContentEncoding encoding); + currentEncoding = encoding; + } + break; + case HeaderId.ContentLength: + if (!currentContentLength.HasValue) { + if (ParseUtils.SkipWhiteSpace (rawValue, ref index, rawValue.Length) && ParseUtils.TryParseInt32 (rawValue, ref index, rawValue.Length, out int length)) + currentContentLength = length; + else + currentContentLength = -1; + } + break; + case HeaderId.ContentType: + if (currentContentType == null) { + // FIXME: do we really need all this fallback stuff for parameters? I doubt it. + if (!ContentType.TryParse (options, rawValue, ref index, rawValue.Length, false, out var type) && type == null) { + // if 'type' is null, then it means that even the mime-type was unintelligible + type = new ContentType ("application", "octet-stream"); + + // attempt to recover any parameters... + while (index < rawValue.Length && rawValue[index] != ';') + index++; + + if (++index < rawValue.Length) { + if (ParameterList.TryParse (options, rawValue, ref index, rawValue.Length, false, out var parameters)) + type.Parameters = parameters; + } + } + + currentContentType = type; + } + break; + } + + OnHeaderRead (header, -1, cancellationToken); // FIXME: track the line number that the header starts on? + headerIndex = 0; + headerCount++; + } + } + } + + static bool IsControl (byte c) + { + return c.IsCtrl (); + } + + static bool IsBlank (byte c) + { + return c.IsBlank (); + } + + static unsafe bool IsEoln (byte* text) + { + if (*text == (byte) '\r') + text++; + + return *text == (byte) '\n'; + } + + unsafe bool StepHeaders (ParserOptions options, byte* inbuf, ref bool scanningFieldName, ref bool checkFolded, ref bool midline, ref bool blank, ref bool valid, ref int left, CancellationToken cancellationToken) + { + byte* inptr = inbuf + inputIndex; + byte* inend = inbuf + inputEnd; + bool needInput = false; + long length; + bool eoln; + + *inend = (byte) '\n'; + + while (inptr < inend) { + byte* start = inptr; + + // if we are scanning a new line, check for a folded header + if (!midline && checkFolded && !IsBlank (*inptr)) { + ParseAndAppendHeader (options, cancellationToken); + + headerOffset = GetOffset ((int) (inptr - inbuf)); + scanningFieldName = true; + checkFolded = false; + blank = false; + valid = true; + } + + eoln = IsEoln (inptr); + if (scanningFieldName && !eoln) { + // scan and validate the field name + if (*inptr != (byte) ':') { + *inend = (byte) ':'; + + while (*inptr != (byte) ':') { + // Blank spaces are allowed between the field name and + // the ':', but field names themselves are not allowed + // to contain spaces. + if (IsBlank (*inptr)) { + blank = true; + } else if (blank || IsControl (*inptr)) { + valid = false; + break; + } + + inptr++; + } + + if (inptr == inend) { + // we don't have enough input data; restore state back to the beginning of the line + left = (int) (inend - start); + inputIndex = (int) (start - inbuf); + needInput = true; + break; + } + + *inend = (byte) '\n'; + } else { + valid = false; + } + + if (!valid) { + length = inptr - start; + + if (format == MimeFormat.Mbox && inputIndex >= contentEnd && length >= 5 && IsMboxMarker (start)) { + // we've found the start of the next message... + inputIndex = (int) (start - inbuf); + state = MimeParserState.Complete; + headerIndex = 0; + return false; + } + + if (headerCount == 0) { + if (state == MimeParserState.MessageHeaders) { + // ignore From-lines that might appear at the start of a message + if (toplevel && (length < 5 || !IsMboxMarker (start, true))) { + // not a From-line... + inputIndex = (int) (start - inbuf); + state = MimeParserState.Error; + headerIndex = 0; + return false; + } + } else if (toplevel && state == MimeParserState.Headers) { + inputIndex = (int) (start - inbuf); + state = MimeParserState.Error; + headerIndex = 0; + return false; + } + } + } + } + + scanningFieldName = false; + + while (*inptr != (byte) '\n') + inptr++; + + if (inptr == inend) { + // we didn't manage to slurp up a full line, save what we have and refill our input buffer + length = inptr - start; + + if (inptr > start) { + // Note: if the last byte we got was a '\r', rewind a byte + inptr--; + if (*inptr == (byte) '\r') + length--; + else + inptr++; + } + + if (length > 0) { + AppendRawHeaderData ((int) (start - inbuf), (int) length); + midline = true; + } + + inputIndex = (int) (inptr - inbuf); + left = (int) (inend - inptr); + needInput = true; + break; + } + + prevLineBeginOffset = lineBeginOffset; + lineBeginOffset = GetOffset ((int) (inptr - inbuf) + 1); + lineNumber++; + + // check to see if we've reached the end of the headers + if (!midline && IsEoln (start)) { + inputIndex = (int) (inptr - inbuf) + 1; + state = MimeParserState.Content; + ParseAndAppendHeader (options, cancellationToken); + headerIndex = 0; + return false; + } + + length = (inptr + 1) - start; + + if ((boundary = CheckBoundary ((int) (start - inbuf), start, (int) length)) != BoundaryType.None) { + inputIndex = (int) (start - inbuf); + state = MimeParserState.Boundary; + headerIndex = 0; + return false; + } + + if (!valid && headerCount == 0) { + if (length > 0 && preHeaderLength == 0) { + if (inptr[-1] == (byte) '\r') + length--; + length--; + + preHeaderLength = (int) length; + + if (preHeaderLength > preHeaderBuffer.Length) + Array.Resize (ref preHeaderBuffer, NextAllocSize (preHeaderLength)); + + Buffer.BlockCopy (input, (int) (start - inbuf), preHeaderBuffer, 0, preHeaderLength); + } + scanningFieldName = true; + checkFolded = false; + blank = false; + valid = true; + } else { + AppendRawHeaderData ((int) (start - inbuf), (int) length); + checkFolded = true; + } + + midline = false; + inptr++; + } + + if (!needInput) { + inputIndex = (int) (inptr - inbuf); + left = (int) (inend - inptr); + } + + return true; + } + + unsafe void StepHeaders (ParserOptions options, byte* inbuf, CancellationToken cancellationToken) + { + bool scanningFieldName = true; + bool checkFolded = false; + bool midline = false; + bool blank = false; + bool valid = true; + int left = 0; + + headerBlockBegin = GetOffset (inputIndex); + boundary = BoundaryType.None; + ResetRawHeaderData (); + headerCount = 0; + + currentContentLength = null; + currentContentType = null; + currentEncoding = null; + + ReadAhead (ReadAheadSize, 0, cancellationToken); + + do { + if (!StepHeaders (options, inbuf, ref scanningFieldName, ref checkFolded, ref midline, ref blank, ref valid, ref left, cancellationToken)) + break; + + var available = ReadAhead (left + 1, 0, cancellationToken); + + if (available == left) { + // EOF reached before we reached the end of the headers... + if (scanningFieldName && left > 0) { + // EOF reached right in the middle of a header field name. Throw an error. + // + // See private email from Feb 8, 2018 which contained a sample message w/o + // any breaks between the header and message body. The file also did not + // end with a newline sequence. + state = MimeParserState.Error; + } else { + // EOF reached somewhere in the middle of the value. + // + // Append whatever data we've got left and pretend we found the end + // of the header value (and the header block). + // + // For more details, see https://github.com/jstedfast/MimeKit/pull/51 + // and https://github.com/jstedfast/MimeKit/issues/348 + if (left > 0) { + AppendRawHeaderData (inputIndex, left); + inputIndex = inputEnd; + } + + ParseAndAppendHeader (options, cancellationToken); + + state = MimeParserState.Content; + } + break; + } + } while (true); + + headerBlockEnd = GetOffset (inputIndex); + } + + unsafe bool SkipLine (byte* inbuf, bool consumeNewLine) + { + byte* inptr = inbuf + inputIndex; + byte* inend = inbuf + inputEnd; + + *inend = (byte) '\n'; + + while (*inptr != (byte) '\n') + inptr++; + + if (inptr < inend) { + inputIndex = (int) (inptr - inbuf); + + if (consumeNewLine) { + inputIndex++; + lineNumber++; + prevLineBeginOffset = lineBeginOffset; + lineBeginOffset = GetOffset (inputIndex); + } else if (*(inptr - 1) == (byte) '\r') { + inputIndex--; + } + + return true; + } + + inputIndex = inputEnd; + + return false; + } + + unsafe bool SkipLine (byte* inbuf, bool consumeNewLine, CancellationToken cancellationToken) + { + do { + if (SkipLine (inbuf, consumeNewLine)) + return true; + + if (ReadAhead (ReadAheadSize, 1, cancellationToken) <= 0) + return false; + } while (true); + } + + unsafe MimeParserState Step (ParserOptions options, byte* inbuf, CancellationToken cancellationToken) + { + switch (state) { + case MimeParserState.Initialized: + if (!StepByteOrderMark (inbuf, cancellationToken)) { + state = MimeParserState.Eos; + break; + } + + state = format == MimeFormat.Mbox ? MimeParserState.MboxMarker : MimeParserState.MessageHeaders; + break; + case MimeParserState.MboxMarker: + StepMboxMarker (inbuf, cancellationToken); + break; + case MimeParserState.MessageHeaders: + case MimeParserState.Headers: + StepHeaders (options, inbuf, cancellationToken); + toplevel = false; + break; + } + + return state; + } + + ContentType GetContentType (ContentType parent) + { + if (currentContentType != null) + return currentContentType; + + if (parent == null || !parent.IsMimeType ("multipart", "digest")) + return new ContentType ("text", "plain"); + + return new ContentType ("message", "rfc822"); + } + + unsafe bool IsPossibleBoundary (byte* text, int length) + { + if (length < 2) + return false; + + if (*text == (byte) '-' && *(text + 1) == (byte) '-') + return true; + + if (format == MimeFormat.Mbox && length >= 5 && IsMboxMarker (text)) + return true; + + return false; + } + + static unsafe bool IsBoundary (byte* text, int length, byte[] boundary, int boundaryLength) + { + if (boundaryLength > length) + return false; + + fixed (byte* boundaryptr = boundary) { + // make sure that the text matches the boundary + if (!CStringsEqual (text, boundaryptr, boundaryLength)) + return false; + + // if this is an mbox marker, we're done + if (IsMboxMarker (text)) + return true; + + // the boundary may optionally be followed by lwsp + byte* inptr = text + boundaryLength; + byte* inend = text + length; + + while (inptr < inend) { + if (!(*inptr).IsWhitespace ()) + return false; + + inptr++; + } + } + + return true; + } + + unsafe BoundaryType CheckBoundary (int startIndex, byte* start, int length) + { + int count = bounds.Count; + + if (!IsPossibleBoundary (start, length)) + return BoundaryType.None; + + if (contentEnd > 0) { + // We'll need to special-case checking for the mbox From-marker when respecting Content-Length + count--; + } + + for (int i = 0; i < count; i++) { + var boundary = bounds[i]; + + if (IsBoundary (start, length, boundary.Marker, boundary.FinalLength)) + return i == 0 ? BoundaryType.ImmediateEndBoundary : BoundaryType.ParentEndBoundary; + + if (IsBoundary (start, length, boundary.Marker, boundary.Length)) + return i == 0 ? BoundaryType.ImmediateBoundary : BoundaryType.ParentBoundary; + } + + if (contentEnd > 0) { + // now it is time to check the mbox From-marker for the Content-Length case + long curOffset = GetOffset (startIndex); + var boundary = bounds[count]; + + if (curOffset >= contentEnd && IsBoundary (start, length, boundary.Marker, boundary.Length)) + return BoundaryType.ImmediateEndBoundary; + } + + return BoundaryType.None; + } + + unsafe bool FoundImmediateBoundary (byte* inbuf, bool final) + { + int boundaryLength = final ? bounds[0].FinalLength : bounds[0].Length; + byte* start = inbuf + inputIndex; + byte* inend = inbuf + inputEnd; + byte* inptr = start; + + *inend = (byte) '\n'; + + while (*inptr != (byte) '\n') + inptr++; + + return IsBoundary (start, (int) (inptr - start), bounds[0].Marker, boundaryLength); + } + + int GetMaxBoundaryLength () + { + return bounds.Count > 0 ? bounds[0].MaxLength + 2 : 0; + } + + static bool IsMultipart (ContentType contentType) + { + return contentType.MediaType.Equals ("multipart", StringComparison.OrdinalIgnoreCase); + } + + static readonly string[] MessageMediaSubtypes = { "rfc822", "news", "global", "global-headers", "external-body", "rfc2822" }; + + static bool IsMessagePart (ContentType contentType, ContentEncoding? encoding) + { + if (encoding.HasValue && ParserOptions.IsEncoded (encoding.Value)) + return false; + + if (contentType.MediaType.Equals ("message", StringComparison.OrdinalIgnoreCase)) { + for (int i = 0; i < MessageMediaSubtypes.Length; i++) { + if (contentType.MediaSubtype.Equals (MessageMediaSubtypes[i], StringComparison.OrdinalIgnoreCase)) + return true; + } + } + + if (contentType.IsMimeType ("text", "rfc822-headers")) + return true; + + return false; + } + + unsafe void ScanContent (byte* inbuf, ref int contentIndex, ref int nleft, ref bool midline, ref bool[] formats) + { + int length = inputEnd - inputIndex; + byte* inptr = inbuf + inputIndex; + byte* inend = inbuf + inputEnd; + int startIndex = inputIndex; + + contentIndex = inputIndex; + + if (midline && length == nleft) + boundary = BoundaryType.Eos; + + *inend = (byte) '\n'; + + while (inptr < inend) { + // Note: we can always depend on byte[] arrays being 4-byte aligned on 32bit and 64bit architectures + int alignment = (startIndex + 3) & ~3; + byte* aligned = inbuf + alignment; + byte* start = inptr; + byte c = *aligned; + uint mask; + + *aligned = (byte) '\n'; + while (*inptr != (byte) '\n') + inptr++; + *aligned = c; + + if (inptr == aligned && c != (byte) '\n') { + // -funroll-loops, yippee ki-yay. + uint* dword = (uint*) inptr; + + do { + mask = *dword++ ^ 0x0A0A0A0A; + mask = ((mask - 0x01010101) & (~mask & 0x80808080)); + } while (mask == 0); + + inptr = (byte*) (dword - 1); + while (*inptr != (byte) '\n') + inptr++; + } + + length = (int) (inptr - start); + + if (inptr < inend) { + if ((boundary = CheckBoundary (startIndex, start, length)) != BoundaryType.None) + break; + + if (length > 0 && *(inptr - 1) == (byte) '\r') + formats[(int) NewLineFormat.Dos] = true; + else + formats[(int) NewLineFormat.Unix] = true; + + lineNumber++; + length++; + inptr++; + + prevLineBeginOffset = lineBeginOffset; + lineBeginOffset = GetOffset ((int) (inptr - inbuf)); + } else { + // didn't find the end of the line... + midline = true; + + if (boundary == BoundaryType.None) { + // not enough to tell if we found a boundary + break; + } + + if ((boundary = CheckBoundary (startIndex, start, length)) != BoundaryType.None) + break; + } + + startIndex += length; + } + + inputIndex = startIndex; + } + + class ScanContentResult + { + public readonly NewLineFormat? Format; + public readonly bool IsEmpty; + public readonly int ContentLength; + public readonly int Lines; + + public ScanContentResult (int contentLength, int lines, bool[] formats, bool isEmpty) + { + ContentLength = contentLength; + if (formats[(int) NewLineFormat.Unix] && formats[(int) NewLineFormat.Dos]) + Format = NewLineFormat.Mixed; + else if (formats[(int) NewLineFormat.Unix]) + Format = NewLineFormat.Unix; + else if (formats[(int) NewLineFormat.Dos]) + Format = NewLineFormat.Dos; + else + Format = null; + IsEmpty = isEmpty; + Lines = lines; + } + } + + enum ScanContentType + { + MimeContent, + MultipartPreamble, + MultipartEpilogue + } + + unsafe ScanContentResult ScanContent (ScanContentType type, byte* inbuf, long beginOffset, int beginLineNumber, bool trimNewLine, CancellationToken cancellationToken) + { + int atleast = Math.Max (ReadAheadSize, GetMaxBoundaryLength ()); + int contentIndex = inputIndex; + var formats = new bool[2]; + int contentLength = 0; + bool midline = false; + int nleft; + + do { + if (contentIndex < inputIndex) { + switch (type) { + case ScanContentType.MultipartPreamble: + OnMultipartPreambleRead (input, contentIndex, inputIndex - contentIndex, cancellationToken); + break; + case ScanContentType.MultipartEpilogue: + OnMultipartEpilogueRead (input, contentIndex, inputIndex - contentIndex, cancellationToken); + break; + default: + OnMimePartContentRead (input, contentIndex, inputIndex - contentIndex, cancellationToken); + break; + } + + contentLength += inputIndex - contentIndex; + } + + nleft = inputEnd - inputIndex; + if (ReadAhead (atleast, 2, cancellationToken) <= 0) { + boundary = BoundaryType.Eos; + contentIndex = inputIndex; + break; + } + + ScanContent (inbuf, ref contentIndex, ref nleft, ref midline, ref formats); + } while (boundary == BoundaryType.None); + + if (contentIndex < inputIndex) { + switch (type) { + case ScanContentType.MultipartPreamble: + OnMultipartPreambleRead (input, contentIndex, inputIndex - contentIndex, cancellationToken); + break; + case ScanContentType.MultipartEpilogue: + OnMultipartEpilogueRead (input, contentIndex, inputIndex - contentIndex, cancellationToken); + break; + default: + OnMimePartContentRead (input, contentIndex, inputIndex - contentIndex, cancellationToken); + break; + } + + contentLength += inputIndex - contentIndex; + } + + // FIXME: need to redesign the above loop so that we don't consume the last that belongs to the boundary marker. + var isEmpty = contentLength == 0; + + if (boundary != BoundaryType.Eos && trimNewLine) { + // the last \r\n belongs to the boundary + if (contentLength > 0) { + if (input[inputIndex - 2] == (byte) '\r') + contentLength -= 2; + else + contentLength--; + } + } + + var endOffset = beginOffset + contentLength; + var lines = GetLineCount (beginLineNumber, beginOffset, endOffset); + + return new ScanContentResult (contentLength, lines, formats, isEmpty); + } + + unsafe int ConstructMimePart (byte* inbuf, CancellationToken cancellationToken) + { + var beginOffset = GetOffset (inputIndex); + var beginLineNumber = lineNumber; + + OnMimePartContentBegin (beginOffset, beginLineNumber, cancellationToken); + var result = ScanContent (ScanContentType.MimeContent, inbuf, beginOffset, beginLineNumber, true, cancellationToken); + OnMimePartContentEnd (beginOffset, beginLineNumber, beginOffset + result.ContentLength, result.Lines, cancellationToken); + + return result.Lines; + } + + unsafe int ConstructMessagePart (ParserOptions options, byte* inbuf, int depth, CancellationToken cancellationToken) + { + var beginOffset = GetOffset (inputIndex); + var beginLineNumber = lineNumber; + + if (bounds.Count > 0) { + int atleast = Math.Max (ReadAheadSize, GetMaxBoundaryLength ()); + + if (ReadAhead (atleast, 0, cancellationToken) <= 0) { + boundary = BoundaryType.Eos; + return 0; + } + + byte* start = inbuf + inputIndex; + byte* inend = inbuf + inputEnd; + byte* inptr = start; + + *inend = (byte) '\n'; + + while (*inptr != (byte) '\n') + inptr++; + + boundary = CheckBoundary (inputIndex, start, (int) (inptr - start)); + + switch (boundary) { + case BoundaryType.ImmediateEndBoundary: + case BoundaryType.ImmediateBoundary: + case BoundaryType.ParentBoundary: + return 0; + case BoundaryType.ParentEndBoundary: + // ignore "From " boundaries, broken mailers tend to include these... + if (!IsMboxMarker (start)) { + return 0; + } + break; + } + } + + // parse the headers... + state = MimeParserState.MessageHeaders; + if (Step (options, inbuf, cancellationToken) == MimeParserState.Error) { + // Note: this either means that StepHeaders() found the end of the stream + // or an invalid header field name at the start of the message headers, + // which likely means that this is not a valid MIME stream? + boundary = BoundaryType.Eos; + return GetLineCount (beginLineNumber, beginOffset, GetEndOffset (inputIndex)); + } + + var currentHeadersEndOffset = headerBlockEnd; + var currentBeginOffset = headerBlockBegin; + + OnMimeMessageBegin (currentBeginOffset, beginLineNumber, cancellationToken); + + if (preHeaderBuffer.Length > 0) { + // FIXME: how to solve this? + //message.MboxMarker = new byte[preHeaderLength]; + //Buffer.BlockCopy (preHeaderBuffer, 0, message.MboxMarker, 0, preHeaderLength); + } + + var type = GetContentType (null); + MimeEntityType entityType; + int lines; + + if (depth < options.MaxMimeDepth && IsMultipart (type)) { + OnMultipartBegin (type, currentBeginOffset, beginLineNumber, cancellationToken); + lines = ConstructMultipart (options, type, inbuf, depth + 1, cancellationToken); + entityType = MimeEntityType.Multipart; + } else if (depth < options.MaxMimeDepth && IsMessagePart (type, currentEncoding)) { + OnMessagePartBegin (type, currentBeginOffset, beginLineNumber, cancellationToken); + lines = ConstructMessagePart (options, inbuf, depth + 1, cancellationToken); + entityType = MimeEntityType.MessagePart; + } else { + OnMimePartBegin (type, currentBeginOffset, beginLineNumber, cancellationToken); + lines = ConstructMimePart (inbuf, cancellationToken); + entityType = MimeEntityType.MimePart; + } + + var endOffset = GetEndOffset (inputIndex); + currentHeadersEndOffset = Math.Min (currentHeadersEndOffset, endOffset); + + switch (entityType) { + case MimeEntityType.Multipart: + OnMultipartEnd (type, currentBeginOffset, beginLineNumber, currentHeadersEndOffset, endOffset, lines, cancellationToken); + break; + case MimeEntityType.MessagePart: + OnMessagePartEnd (type, currentBeginOffset, beginLineNumber, currentHeadersEndOffset, endOffset, lines, cancellationToken); + break; + default: + OnMimePartEnd (type, currentBeginOffset, beginLineNumber, currentHeadersEndOffset, endOffset, lines, cancellationToken); + break; + } + + OnMimeMessageEnd (currentBeginOffset, beginLineNumber, currentHeadersEndOffset, endOffset, lines, cancellationToken); + + return GetLineCount (beginLineNumber, beginOffset, endOffset); + } + + unsafe void MultipartScanPreamble (byte* inbuf, CancellationToken cancellationToken) + { + var beginOffset = GetOffset (inputIndex); + var beginLineNumber = lineNumber; + + OnMultipartPreambleBegin (beginOffset, beginLineNumber, cancellationToken); + var result = ScanContent (ScanContentType.MultipartPreamble, inbuf, beginOffset, beginLineNumber, false, cancellationToken); + OnMultipartPreambleEnd (beginOffset, beginLineNumber, beginOffset + result.ContentLength, result.Lines, cancellationToken); + } + + unsafe void MultipartScanEpilogue (byte* inbuf, CancellationToken cancellationToken) + { + var beginOffset = GetOffset (inputIndex); + var beginLineNumber = lineNumber; + + OnMultipartEpilogueBegin (beginOffset, beginLineNumber, cancellationToken); + var result = ScanContent (ScanContentType.MultipartEpilogue, inbuf, beginOffset, beginLineNumber, true, cancellationToken); + OnMultipartEpilogueEnd (beginOffset, beginLineNumber, beginOffset + result.ContentLength, result.Lines, cancellationToken); + } + + unsafe void MultipartScanSubparts (ParserOptions options, ContentType multipartContentType, byte* inbuf, int depth, CancellationToken cancellationToken) + { + var boundaryOffset = GetOffset (inputIndex); + + do { + // skip over the boundary marker + if (!SkipLine (inbuf, true, cancellationToken)) { + OnMultipartBoundary (multipartContentType.Boundary, boundaryOffset, GetOffset (inputIndex), lineNumber, cancellationToken); + boundary = BoundaryType.Eos; + return; + } + + OnMultipartBoundary (multipartContentType.Boundary, boundaryOffset, GetOffset (inputIndex), lineNumber - 1, cancellationToken); + + var beginLineNumber = lineNumber; + + // parse the headers + state = MimeParserState.Headers; + if (Step (options, inbuf, cancellationToken) == MimeParserState.Error) { + boundary = BoundaryType.Eos; + return; + } + + if (state == MimeParserState.Boundary) { + if (headerCount == 0) { + if (boundary == BoundaryType.ImmediateBoundary) { + boundaryOffset = GetOffset (inputIndex); + continue; + } + return; + } + + // This part has no content, but that will be handled in ConstructMultipart() + // or ConstructMimePart(). + } + + //if (state == ParserState.Complete && headers.Count == 0) + // return BoundaryType.EndBoundary; + + var type = GetContentType (multipartContentType); + var currentHeadersEndOffset = headerBlockEnd; + var currentBeginOffset = headerBlockBegin; + MimeEntityType entityType; + int lines; + + if (depth < options.MaxMimeDepth && IsMultipart (type)) { + OnMultipartBegin (type, currentBeginOffset, beginLineNumber, cancellationToken); + lines = ConstructMultipart (options, type, inbuf, depth + 1, cancellationToken); + entityType = MimeEntityType.Multipart; + } else if (depth < options.MaxMimeDepth && IsMessagePart (type, currentEncoding)) { + OnMessagePartBegin (type, currentBeginOffset, beginLineNumber, cancellationToken); + lines = ConstructMessagePart (options, inbuf, depth + 1, cancellationToken); + entityType = MimeEntityType.MessagePart; + } else { + OnMimePartBegin (type, currentBeginOffset, beginLineNumber, cancellationToken); + lines = ConstructMimePart (inbuf, cancellationToken); + entityType = MimeEntityType.MimePart; + } + + var endOffset = GetEndOffset (inputIndex); + currentHeadersEndOffset = Math.Min (currentHeadersEndOffset, endOffset); + + switch (entityType) { + case MimeEntityType.Multipart: + OnMultipartEnd (type, currentBeginOffset, beginLineNumber, currentHeadersEndOffset, endOffset, lines, cancellationToken); + break; + case MimeEntityType.MessagePart: + OnMessagePartEnd (type, currentBeginOffset, beginLineNumber, currentHeadersEndOffset, endOffset, lines, cancellationToken); + break; + default: + OnMimePartEnd (type, currentBeginOffset, beginLineNumber, currentHeadersEndOffset, endOffset, lines, cancellationToken); + break; + } + + boundaryOffset = endOffset; + } while (boundary == BoundaryType.ImmediateBoundary); + } + + void PushBoundary (string boundary) + { + if (bounds.Count > 0) + bounds.Insert (0, new Boundary (boundary, bounds[0].MaxLength)); + else + bounds.Add (new Boundary (boundary, 0)); + } + + void PopBoundary () + { + bounds.RemoveAt (0); + } + + unsafe int ConstructMultipart (ParserOptions options, ContentType contentType, byte* inbuf, int depth, CancellationToken cancellationToken) + { + var beginOffset = GetOffset (inputIndex); + var marker = contentType.Boundary; + var beginLineNumber = lineNumber; + long endOffset; + + if (marker == null) { +#if DEBUG + Debug.WriteLine ("Multipart without a boundary encountered!"); +#endif + + // Note: this will scan all content into the preamble... + MultipartScanPreamble (inbuf, cancellationToken); + + endOffset = GetEndOffset (inputIndex); + + return GetLineCount (beginLineNumber, beginOffset, endOffset); + } + + PushBoundary (marker); + + MultipartScanPreamble (inbuf, cancellationToken); + if (boundary == BoundaryType.ImmediateBoundary) + MultipartScanSubparts (options, contentType, inbuf, depth, cancellationToken); + + if (boundary == BoundaryType.ImmediateEndBoundary) { + // consume the end boundary and read the epilogue (if there is one) + // FIXME: multipart.WriteEndBoundary = true; + + var boundaryOffset = GetOffset (inputIndex); + var boundaryLineNumber = lineNumber; + + SkipLine (inbuf, false, cancellationToken); + + OnMultipartEndBoundary (marker, boundaryOffset, GetOffset (inputIndex), boundaryLineNumber, cancellationToken); + + PopBoundary (); + + MultipartScanEpilogue (inbuf, cancellationToken); + + endOffset = GetEndOffset (inputIndex); + + return GetLineCount (beginLineNumber, beginOffset, endOffset); + } + + // FIXME: multipart.WriteEndBoundary = false; + + // We either found the end of the stream or we found a parent's boundary + PopBoundary (); + + if (boundary == BoundaryType.ParentEndBoundary && FoundImmediateBoundary (inbuf, true)) + boundary = BoundaryType.ImmediateEndBoundary; + else if (boundary == BoundaryType.ParentBoundary && FoundImmediateBoundary (inbuf, false)) + boundary = BoundaryType.ImmediateBoundary; + + endOffset = GetEndOffset (inputIndex); + + return GetLineCount (beginLineNumber, beginOffset, endOffset); + } + + unsafe void ReadEntity (ParserOptions options, byte* inbuf, CancellationToken cancellationToken) + { + var beginLineNumber = lineNumber; + + state = MimeParserState.Headers; + toplevel = true; + + if (Step (options, inbuf, cancellationToken) == MimeParserState.Error) + throw new FormatException ("Failed to parse entity headers."); + + var type = GetContentType (null); + var currentHeadersEndOffset = headerBlockEnd; + var currentBeginOffset = headerBlockBegin; + MimeEntityType entityType; + int lines; + + if (IsMultipart (type)) { + OnMultipartBegin (type, currentBeginOffset, beginLineNumber, cancellationToken); + lines = ConstructMultipart (options, type, inbuf, 0, cancellationToken); + entityType = MimeEntityType.Multipart; + } else if (IsMessagePart (type, currentEncoding)) { + OnMessagePartBegin (type, currentBeginOffset, beginLineNumber, cancellationToken); + lines = ConstructMessagePart (options, inbuf, 0, cancellationToken); + entityType = MimeEntityType.MessagePart; + } else { + OnMimePartBegin (type, currentBeginOffset, beginLineNumber, cancellationToken); + lines = ConstructMimePart (inbuf, cancellationToken); + entityType = MimeEntityType.MimePart; + } + + var endOffset = GetEndOffset (inputIndex); + currentHeadersEndOffset = Math.Min (currentHeadersEndOffset, endOffset); + + switch (entityType) { + case MimeEntityType.Multipart: + OnMultipartEnd (type, currentBeginOffset, beginLineNumber, currentHeadersEndOffset, endOffset, lines, cancellationToken); + break; + case MimeEntityType.MessagePart: + OnMessagePartEnd (type, currentBeginOffset, beginLineNumber, currentHeadersEndOffset, endOffset, lines, cancellationToken); + break; + default: + OnMimePartEnd (type, currentBeginOffset, beginLineNumber, currentHeadersEndOffset, endOffset, lines, cancellationToken); + break; + } + + if (boundary != BoundaryType.Eos) + state = MimeParserState.Complete; + else + state = MimeParserState.Eos; + } + + /// + /// Parses an entity from the stream. + /// + /// + /// Parses an entity from the stream. + /// + /// The parser options. + /// The cancellation token. + /// + /// is null. + /// + /// + /// The operation was canceled via the cancellation token. + /// + /// + /// There was an error parsing the entity. + /// + /// + /// An I/O error occurred. + /// + public void ReadEntity (ParserOptions options, CancellationToken cancellationToken = default (CancellationToken)) + { + if (options == null) + throw new ArgumentNullException (nameof (options)); + + unsafe { + fixed (byte* inbuf = input) { + ReadEntity (options, inbuf, cancellationToken); + } + } + } + + /// + /// Parses an entity from the stream. + /// + /// + /// Parses an entity from the stream. + /// + /// The cancellation token. + /// + /// The operation was canceled via the cancellation token. + /// + /// + /// There was an error parsing the entity. + /// + /// + /// An I/O error occurred. + /// + public void ReadEntity (CancellationToken cancellationToken = default (CancellationToken)) + { + ReadEntity (ParserOptions.Default, cancellationToken); + } + + unsafe void ReadMessage (ParserOptions options, byte* inbuf, CancellationToken cancellationToken) + { + // scan the from-line if we are parsing an mbox + while (state != MimeParserState.MessageHeaders) { + switch (Step (options, inbuf, cancellationToken)) { + case MimeParserState.Error: + throw new FormatException ("Failed to find mbox From marker."); + case MimeParserState.Eos: + throw new FormatException ("End of stream."); + } + } + + toplevel = true; + + // parse the headers + var beginLineNumber = lineNumber; + if (state < MimeParserState.Content && Step (options, inbuf, cancellationToken) == MimeParserState.Error) + throw new FormatException ("Failed to parse message headers."); + + var currentHeadersEndOffset = headerBlockEnd; + var currentBeginOffset = headerBlockBegin; + + OnMimeMessageBegin (currentBeginOffset, beginLineNumber, cancellationToken); + + if (format == MimeFormat.Mbox && options.RespectContentLength && currentContentLength.HasValue && currentContentLength.Value != -1) + contentEnd = GetOffset (inputIndex) + currentContentLength.Value; + else + contentEnd = 0; + + var type = GetContentType (null); + MimeEntityType entityType; + int lines; + + if (IsMultipart (type)) { + OnMultipartBegin (type, currentBeginOffset, beginLineNumber, cancellationToken); + lines = ConstructMultipart (options, type, inbuf, 0, cancellationToken); + entityType = MimeEntityType.Multipart; + } else if (IsMessagePart (type, currentEncoding)) { + OnMessagePartBegin (type, currentBeginOffset, beginLineNumber, cancellationToken); + lines = ConstructMessagePart (options, inbuf, 0, cancellationToken); + entityType = MimeEntityType.MessagePart; + } else { + OnMimePartBegin (type, currentBeginOffset, beginLineNumber, cancellationToken); + lines = ConstructMimePart (inbuf, cancellationToken); + entityType = MimeEntityType.MimePart; + } + + var endOffset = GetEndOffset (inputIndex); + currentHeadersEndOffset = Math.Min (currentHeadersEndOffset, endOffset); + + switch (entityType) { + case MimeEntityType.Multipart: + OnMultipartEnd (type, currentBeginOffset, beginLineNumber, currentHeadersEndOffset, endOffset, lines, cancellationToken); + break; + case MimeEntityType.MessagePart: + OnMessagePartEnd (type, currentBeginOffset, beginLineNumber, currentHeadersEndOffset, endOffset, lines, cancellationToken); + break; + default: + OnMimePartEnd (type, currentBeginOffset, beginLineNumber, currentHeadersEndOffset, endOffset, lines, cancellationToken); + break; + } + + OnMimeMessageEnd (currentBeginOffset, beginLineNumber, currentHeadersEndOffset, endOffset, lines, cancellationToken); + + if (boundary != BoundaryType.Eos) { + if (format == MimeFormat.Mbox) + state = MimeParserState.MboxMarker; + else + state = MimeParserState.Complete; + } else { + state = MimeParserState.Eos; + } + } + + /// + /// Parses a message from the stream. + /// + /// + /// Parses a message from the stream. + /// + /// The parsed message. + /// The parser options. + /// The cancellation token. + /// + /// is null. + /// + /// + /// The operation was canceled via the cancellation token. + /// + /// + /// There was an error parsing the message. + /// + /// + /// An I/O error occurred. + /// + public void ReadMessage (ParserOptions options, CancellationToken cancellationToken = default (CancellationToken)) + { + if (options == null) + throw new ArgumentNullException (nameof (options)); + + unsafe { + fixed (byte* inbuf = input) { + ReadMessage (options, inbuf, cancellationToken); + } + } + } + + /// + /// Parses a message from the stream. + /// + /// + /// Parses a message from the stream. + /// + /// The parsed message. + /// The cancellation token. + /// + /// The operation was canceled via the cancellation token. + /// + /// + /// There was an error parsing the message. + /// + /// + /// An I/O error occurred. + /// + public void ReadMessage (CancellationToken cancellationToken = default (CancellationToken)) + { + ReadMessage (ParserOptions.Default, cancellationToken); + } + } +} diff --git a/MimeKit/ParserOptions.cs b/MimeKit/ParserOptions.cs index 64537f4efc..070a3a05ed 100644 --- a/MimeKit/ParserOptions.cs +++ b/MimeKit/ParserOptions.cs @@ -272,6 +272,18 @@ public void RegisterMimeType (string mimeType, Type type) mimeTypes[mimeType] = ctor; } + internal static bool IsEncoded (ContentEncoding encoding) + { + switch (encoding) { + case ContentEncoding.SevenBit: + case ContentEncoding.EightBit: + case ContentEncoding.Binary: + return false; + default: + return true; + } + } + static bool IsEncoded (IList
headers) { ContentEncoding encoding; @@ -282,14 +294,7 @@ static bool IsEncoded (IList
headers) MimeUtils.TryParse (headers[i].Value, out encoding); - switch (encoding) { - case ContentEncoding.SevenBit: - case ContentEncoding.EightBit: - case ContentEncoding.Binary: - return false; - default: - return true; - } + return IsEncoded (encoding); } return false; diff --git a/UnitTests/MimeReaderTests.cs b/UnitTests/MimeReaderTests.cs new file mode 100644 index 0000000000..5e92130eda --- /dev/null +++ b/UnitTests/MimeReaderTests.cs @@ -0,0 +1,457 @@ +// +// MimeReaderTests.cs +// +// Author: Jeffrey Stedfast +// +// Copyright (c) 2013-2021 .NET Foundation and Contributors +// +// Permission is hereby granted, free of charge, to any person obtaining a copy +// of this software and associated documentation files (the "Software"), to deal +// in the Software without restriction, including without limitation the rights +// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +// copies of the Software, and to permit persons to whom the Software is +// furnished to do so, subject to the following conditions: +// +// The above copyright notice and this permission notice shall be included in +// all copies or substantial portions of the Software. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +// THE SOFTWARE. +// + +using System; +using System.IO; +using System.Linq; +using System.Text; +using System.Threading; +using System.Globalization; +using System.Threading.Tasks; +using System.Collections.Generic; + +using NUnit.Framework; + +using Newtonsoft.Json; + +using MimeKit; +using MimeKit.IO; +using MimeKit.Utils; +using MimeKit.IO.Filters; + +namespace UnitTests { + [TestFixture] + public class MimeReaderTests + { + static readonly string MessagesDataDir = Path.Combine (TestHelper.ProjectDir, "TestData", "messages"); + static readonly string MboxDataDir = Path.Combine (TestHelper.ProjectDir, "TestData", "mbox"); + static FormatOptions UnixFormatOptions; + + public MimeReaderTests () + { + UnixFormatOptions = FormatOptions.Default.Clone (); + UnixFormatOptions.NewLineFormat = NewLineFormat.Unix; + } + + [Test] + public void TestArgumentExceptions () + { + Assert.Throws (() => new MimeReader (null)); + Assert.Throws (() => new MimeReader (null, MimeFormat.Default)); + + using (var stream = new MemoryStream ()) { + var reader = new MimeReader (stream); + + Assert.Throws (() => reader.ReadEntity (null)); + Assert.ThrowsAsync (() => reader.ReadEntityAsync (null)); + + Assert.Throws (() => reader.ReadMessage (null)); + Assert.ThrowsAsync (() => reader.ReadMessageAsync (null)); + } + } + + static NewLineFormat DetectNewLineFormat (string fileName) + { + using (var stream = File.OpenRead (fileName)) { + var buffer = new byte[1024]; + + var nread = stream.Read (buffer, 0, buffer.Length); + + for (int i = 0; i < nread; i++) { + if (buffer[i] == (byte) '\n') { + if (i > 0 && buffer[i - 1] == (byte) '\r') + return NewLineFormat.Dos; + + return NewLineFormat.Unix; + } + } + } + + return NewLineFormat.Dos; + } + + class MimeOffsets + { + [JsonProperty ("mimeType", NullValueHandling = NullValueHandling.Ignore)] + public string MimeType { get; set; } + + [JsonProperty ("mboxMarkerOffset", NullValueHandling = NullValueHandling.Ignore)] + public long? MboxMarkerOffset { get; set; } + + [JsonProperty ("lineNumber")] + public int LineNumber { get; set; } + + [JsonProperty ("beginOffset")] + public long BeginOffset { get; set; } + + [JsonProperty ("headersEndOffset")] + public long HeadersEndOffset { get; set; } + + [JsonProperty ("endOffset")] + public long EndOffset { get; set; } + + [JsonProperty ("message", NullValueHandling = NullValueHandling.Ignore)] + public MimeOffsets Message { get; set; } + + [JsonProperty ("body", NullValueHandling = NullValueHandling.Ignore)] + public MimeOffsets Body { get; set; } + + [JsonProperty ("children", NullValueHandling = NullValueHandling.Ignore)] + public List Children { get; set; } + + [JsonProperty ("octets")] + public long Octets { get; set; } + + [JsonProperty ("lines", NullValueHandling = NullValueHandling.Ignore)] + public int? Lines { get; set; } + } + + enum MimeType + { + Message, + MessagePart, + Multipart, + MimePart + } + + class MimeItem + { + public readonly MimeOffsets Offsets; + public readonly MimeType Type; + + public MimeItem (MimeType type, MimeOffsets offsets) + { + Offsets = offsets; + Type = type; + } + } + + static void AssertMimeOffsets (MimeOffsets expected, MimeOffsets actual, int message, string partSpecifier) + { + Assert.AreEqual (expected.MimeType, actual.MimeType, $"mime-type differs for message #{message}{partSpecifier}"); + Assert.AreEqual (expected.MboxMarkerOffset, actual.MboxMarkerOffset, $"mbox marker begin offset differs for message #{message}{partSpecifier}"); + Assert.AreEqual (expected.BeginOffset, actual.BeginOffset, $"begin offset differs for message #{message}{partSpecifier}"); + Assert.AreEqual (expected.LineNumber, actual.LineNumber, $"begin line differs for message #{message}{partSpecifier}"); + Assert.AreEqual (expected.HeadersEndOffset, actual.HeadersEndOffset, $"headers end offset differs for message #{message}{partSpecifier}"); + Assert.AreEqual (expected.EndOffset, actual.EndOffset, $"end offset differs for message #{message}{partSpecifier}"); + Assert.AreEqual (expected.Octets, actual.Octets, $"octets differs for message #{message}{partSpecifier}"); + Assert.AreEqual (expected.Lines, actual.Lines, $"lines differs for message #{message}{partSpecifier}"); + + if (expected.Message != null) { + Assert.NotNull (actual.Message, $"message content is null for message #{message}{partSpecifier}"); + AssertMimeOffsets (expected.Message, actual.Message, message, partSpecifier + "/message"); + } else if (expected.Body != null) { + Assert.NotNull (actual.Body, $"body content is null for message #{message}{partSpecifier}"); + AssertMimeOffsets (expected.Body, actual.Body, message, partSpecifier + "/0"); + } else if (expected.Children != null) { + Assert.AreEqual (expected.Children.Count, actual.Children.Count, $"children count differs for message #{message}{partSpecifier}"); + for (int i = 0; i < expected.Children.Count; i++) + AssertMimeOffsets (expected.Children[i], actual.Children[i], message, partSpecifier + $".{i}"); + } + } + + class CustomMimeReader : MimeReader + { + public readonly List Offsets = new List (); + public readonly List stack = new List (); + long mboxMarkerBeginOffset = -1; + int mboxMarkerLineNumber = -1; + + public CustomMimeReader (Stream stream, MimeFormat format) : base (stream, format) + { + } + + protected override void OnMboxMarkerRead (byte[] marker, int startIndex, int count, long beginOffset, int lineNumber, CancellationToken cancellationToken) + { + mboxMarkerBeginOffset = beginOffset; + mboxMarkerLineNumber = lineNumber; + } + + protected override Task OnMboxMarkerReadAsync (byte[] marker, int startIndex, int count, long beginOffset, int lineNumber, CancellationToken cancellationToken) + { + OnMboxMarkerRead (marker, startIndex, count, beginOffset, lineNumber, cancellationToken); + return base.OnMboxMarkerReadAsync (marker, startIndex, count, beginOffset, lineNumber, cancellationToken); + } + + protected override void OnMimeMessageBegin (long beginOffset, int beginLineNumber, CancellationToken cancellationToken) + { + var offsets = new MimeOffsets { + BeginOffset = beginOffset, + LineNumber = beginLineNumber + }; + + if (stack.Count > 0) { + var parent = stack[stack.Count - 1]; + Assert.AreEqual (MimeType.MessagePart, parent.Type); + parent.Offsets.Message = offsets; + } else { + offsets.MboxMarkerOffset = mboxMarkerBeginOffset; + Offsets.Add (offsets); + } + + stack.Add (new MimeItem (MimeType.Message, offsets)); + } + + protected override Task OnMimeMessageBeginAsync (long beginOffset, int beginLineNumber, CancellationToken cancellationToken) + { + OnMimeMessageBegin (beginOffset, beginLineNumber, cancellationToken); + return base.OnMimeMessageBeginAsync (beginOffset, beginLineNumber, cancellationToken); + } + + protected override void OnMimeMessageEnd (long beginOffset, int beginLineNumber, long headersEndOffset, long endOffset, int lines, CancellationToken cancellationToken) + { + var current = stack[stack.Count - 1]; + + Assert.AreEqual (MimeType.Message, current.Type); + + current.Offsets.Octets = endOffset - headersEndOffset; + current.Offsets.HeadersEndOffset = headersEndOffset; + current.Offsets.EndOffset = endOffset; + + stack.RemoveAt (stack.Count - 1); + } + + protected override Task OnMimeMessageEndAsync (long beginOffset, int beginLineNumber, long headersEndOffset, long endOffset, int lines, CancellationToken cancellationToken) + { + OnMimeMessageEnd (beginOffset, beginLineNumber, headersEndOffset, endOffset, lines, cancellationToken); + return base.OnMimeMessageEndAsync (beginOffset, beginLineNumber, headersEndOffset, endOffset, lines, cancellationToken); + } + + void Push (MimeType type, ContentType contentType, long beginOffset, int beginLineNumber) + { + var offsets = new MimeOffsets { + MimeType = contentType.MimeType, + BeginOffset = beginOffset, + LineNumber = beginLineNumber + }; + + if (stack.Count > 0) { + var parent = stack[stack.Count - 1]; + + switch (parent.Type) { + case MimeType.Message: + parent.Offsets.Body = offsets; + break; + case MimeType.Multipart: + if (parent.Offsets.Children == null) + parent.Offsets.Children = new List (); + parent.Offsets.Children.Add (offsets); + break; + default: + Assert.Fail (); + break; + } + } else { + Offsets.Add (offsets); + } + + stack.Add (new MimeItem (type, offsets)); + } + + void Pop (MimeType type, ContentType contentType, long beginOffset, int beginLineNumber, long headersEndOffset, long endOffset, int lines) + { + var current = stack[stack.Count - 1]; + + Assert.AreEqual (type, current.Type); + + current.Offsets.Octets = endOffset - headersEndOffset; + current.Offsets.HeadersEndOffset = headersEndOffset; + current.Offsets.EndOffset = endOffset; + current.Offsets.Lines = lines; + + stack.RemoveAt (stack.Count - 1); + } + + protected override void OnMessagePartBegin (ContentType contentType, long beginOffset, int beginLineNumber, CancellationToken cancellationToken) + { + Push (MimeType.MessagePart, contentType, beginOffset, beginLineNumber); + } + + protected override Task OnMessagePartBeginAsync (ContentType contentType, long beginOffset, int beginLineNumber, CancellationToken cancellationToken) + { + OnMessagePartBegin (contentType, beginOffset, beginLineNumber, cancellationToken); + return base.OnMessagePartBeginAsync (contentType, beginOffset, beginLineNumber, cancellationToken); + } + + protected override void OnMessagePartEnd (ContentType contentType, long beginOffset, int beginLineNumber, long headersEndOffset, long endOffset, int lines, CancellationToken cancellationToken) + { + Pop (MimeType.MessagePart, contentType, beginOffset, beginLineNumber, headersEndOffset, endOffset, lines); + } + + protected override Task OnMessagePartEndAsync (ContentType contentType, long beginOffset, int beginLineNumber, long headersEndOffset, long endOffset, int lines, CancellationToken cancellationToken) + { + OnMessagePartEnd (contentType, beginOffset, beginLineNumber, headersEndOffset, endOffset, lines, cancellationToken); + return base.OnMessagePartEndAsync (contentType, beginOffset, beginLineNumber, headersEndOffset, endOffset, lines, cancellationToken); + } + + protected override void OnMimePartBegin (ContentType contentType, long beginOffset, int beginLineNumber, CancellationToken cancellationToken) + { + Push (MimeType.MimePart, contentType, beginOffset, beginLineNumber); + } + + protected override Task OnMimePartBeginAsync (ContentType contentType, long beginOffset, int beginLineNumber, CancellationToken cancellationToken) + { + OnMimePartBegin (contentType, beginOffset, beginLineNumber, cancellationToken); + return base.OnMimePartBeginAsync (contentType, beginOffset, beginLineNumber, cancellationToken); + } + + protected override void OnMimePartEnd (ContentType contentType, long beginOffset, int beginLineNumber, long headersEndOffset, long endOffset, int lines, CancellationToken cancellationToken) + { + Pop (MimeType.MimePart, contentType, beginOffset, beginLineNumber, headersEndOffset, endOffset, lines); + } + + protected override Task OnMimePartEndAsync (ContentType contentType, long beginOffset, int beginLineNumber, long headersEndOffset, long endOffset, int lines, CancellationToken cancellationToken) + { + OnMimePartEnd (contentType, beginOffset, beginLineNumber, headersEndOffset, endOffset, lines, cancellationToken); + return base.OnMimePartEndAsync (contentType, beginOffset, beginLineNumber, headersEndOffset, endOffset, lines, cancellationToken); + } + + protected override void OnMultipartBegin (ContentType contentType, long beginOffset, int beginLineNumber, CancellationToken cancellationToken) + { + Push (MimeType.Multipart, contentType, beginOffset, beginLineNumber); + } + + protected override Task OnMultipartBeginAsync (ContentType contentType, long beginOffset, int beginLineNumber, CancellationToken cancellationToken) + { + OnMultipartBegin (contentType, beginOffset, beginLineNumber, cancellationToken); + return base.OnMultipartBeginAsync (contentType, beginOffset, beginLineNumber, cancellationToken); + } + + protected override void OnMultipartEnd (ContentType contentType, long beginOffset, int beginLineNumber, long headersEndOffset, long endOffset, int lines, CancellationToken cancellationToken) + { + Pop (MimeType.Multipart, contentType, beginOffset, beginLineNumber, headersEndOffset, endOffset, lines); + } + + protected override Task OnMultipartEndAsync (ContentType contentType, long beginOffset, int beginLineNumber, long headersEndOffset, long endOffset, int lines, CancellationToken cancellationToken) + { + OnMultipartEnd (contentType, beginOffset, beginLineNumber, headersEndOffset, endOffset, lines, cancellationToken); + return base.OnMultipartEndAsync (contentType, beginOffset, beginLineNumber, headersEndOffset, endOffset, lines, cancellationToken); + } + } + + static void AssertMboxResults (string baseName, List offsets, NewLineFormat newLineFormat) + { + var path = Path.Combine (MboxDataDir, baseName + "." + newLineFormat.ToString ().ToLowerInvariant () + "-offsets.json"); + var jsonSerializer = JsonSerializer.CreateDefault (); + + if (!File.Exists (path)) { + jsonSerializer.Formatting = Formatting.Indented; + + using (var writer = new StreamWriter (path)) + jsonSerializer.Serialize (writer, offsets); + } + + using (var reader = new StreamReader (path)) { + var expectedOffsets = (List) jsonSerializer.Deserialize (reader, typeof (List)); + + Assert.AreEqual (expectedOffsets.Count, offsets.Count, "message count"); + + for (int i = 0; i < expectedOffsets.Count; i++) + AssertMimeOffsets (expectedOffsets[i], offsets[i], i, string.Empty); + } + } + + void TestMbox (ParserOptions options, string baseName) + { + var mbox = Path.Combine (MboxDataDir, baseName + ".mbox.txt"); + NewLineFormat newLineFormat; + List offsets; + + using (var stream = File.OpenRead (mbox)) { + var reader = new CustomMimeReader (stream, MimeFormat.Mbox); + var format = FormatOptions.Default.Clone (); + + format.NewLineFormat = newLineFormat = DetectNewLineFormat (mbox); + + while (!reader.IsEndOfStream) { + if (options != null) + reader.ReadMessage (options); + else + reader.ReadMessage (); + } + + offsets = reader.Offsets; + } + + AssertMboxResults (baseName, offsets, newLineFormat); + } + + async Task TestMboxAsync (ParserOptions options, string baseName) + { + var mbox = Path.Combine (MboxDataDir, baseName + ".mbox.txt"); + NewLineFormat newLineFormat; + List offsets; + + using (var stream = File.OpenRead (mbox)) { + var reader = new CustomMimeReader (stream, MimeFormat.Mbox); + var format = FormatOptions.Default.Clone (); + + format.NewLineFormat = newLineFormat = DetectNewLineFormat (mbox); + + while (!reader.IsEndOfStream) { + if (options != null) + await reader.ReadMessageAsync (options); + else + await reader.ReadMessageAsync (); + } + + offsets = reader.Offsets; + } + + AssertMboxResults (baseName, offsets, newLineFormat); + } + + [Test] + public void TestContentLengthMbox () + { + var options = ParserOptions.Default.Clone (); + options.RespectContentLength = true; + + TestMbox (options, "content-length"); + } + + [Test] + public async Task TestContentLengthMboxAsync () + { + var options = ParserOptions.Default.Clone (); + options.RespectContentLength = true; + + await TestMboxAsync (options, "content-length"); + } + + [Test] + public void TestJwzMbox () + { + TestMbox (null, "jwz"); + } + + [Test] + public async Task TestJwzMboxAsync () + { + await TestMboxAsync (null, "jwz"); + } + } +} diff --git a/UnitTests/UnitTests.csproj b/UnitTests/UnitTests.csproj index a2c580ae41..6d89d35477 100644 --- a/UnitTests/UnitTests.csproj +++ b/UnitTests/UnitTests.csproj @@ -79,6 +79,7 @@ +