diff --git a/QuoteParser.Tests/EmailTestBase.cs b/QuoteParser.Tests/EmailTestBase.cs index 840adf4..5d828f8 100644 --- a/QuoteParser.Tests/EmailTestBase.cs +++ b/QuoteParser.Tests/EmailTestBase.cs @@ -1,4 +1,6 @@ -using System; +using MimeKit; +using MimeKit.Text; +using System; using System.IO; using Xunit; @@ -18,17 +20,18 @@ protected EmailTestBase(string folder) protected void Check(int emailNum, QuoteHeader expectedQuoteHeader) { - using (var stream = GetResourceStream(emailNum)) - { - var content = _parser.Value.Parse(stream); - Assert.Equal(expectedQuoteHeader, content.Header); - } + var content = _parser.Value.Parse(GetResourceTextBody(emailNum)); + Assert.Equal(expectedQuoteHeader, content.Header); } - protected Stream GetResourceStream(int emailNum) + protected string GetResourceTextBody(int emailNum) { var asm = typeof(ABTests).Assembly; - return asm.GetManifestResourceStream($"{asm.GetName().Name}.Resources.testEmls.{_folder}.{emailNum}.eml"); + using (var stream = asm.GetManifestResourceStream($"{asm.GetName().Name}.Resources.testEmls.{_folder}.{emailNum}.eml")) { + return MimeMessage + .Load(stream) + .GetTextBody(TextFormat.Plain); + } } protected virtual QuoteParser CreateQuoteParser() diff --git a/QuoteParser.Tests/QuoteParser.Tests.csproj b/QuoteParser.Tests/QuoteParser.Tests.csproj index 533dd83..3217fa8 100644 --- a/QuoteParser.Tests/QuoteParser.Tests.csproj +++ b/QuoteParser.Tests/QuoteParser.Tests.csproj @@ -164,6 +164,7 @@ + all diff --git a/QuoteParser.Tests/RecTests.cs b/QuoteParser.Tests/RecTests.cs index 2414f32..2814595 100644 --- a/QuoteParser.Tests/RecTests.cs +++ b/QuoteParser.Tests/RecTests.cs @@ -1,4 +1,5 @@ using System.Collections.Generic; +using System.IO; using Xunit; namespace QuoteParser.Tests @@ -41,13 +42,10 @@ public void TestEmail270() "In reply to:" } ); - - using (var stream = GetResourceStream(emailNum)) - { - var content = Parser.Parse(stream); - Assert.Equal(expectedQuoteHeader, content.Header); - Assert.Equal(expectedInnerQuoteHeader, content.Quote?.Header); - } + + var content = Parser.Parse(GetResourceTextBody(emailNum)); + Assert.Equal(expectedQuoteHeader, content.Header); + Assert.Equal(expectedInnerQuoteHeader, content.Quote?.Header); } [Fact] @@ -84,14 +82,11 @@ public void TestEmail6510() "##- Please type your reply above this line -## " } ); - - using (var stream = GetResourceStream(emailNum)) - { - var content = Parser.Parse(stream); - Assert.Equal(expectedQuoteHeader1, content.Header); - Assert.Equal(expectedQuoteHeader2, content.Quote?.Header); - Assert.Equal(expectedQuoteHeader3, content.Quote?.Quote?.Header); - } + + var content = Parser.Parse(GetResourceTextBody(emailNum)); + Assert.Equal(expectedQuoteHeader1, content.Header); + Assert.Equal(expectedQuoteHeader2, content.Quote?.Header); + Assert.Equal(expectedQuoteHeader3, content.Quote?.Quote?.Header); } } } diff --git a/QuoteParser/Parse.cs b/QuoteParser/Parse.cs deleted file mode 100644 index df90904..0000000 --- a/QuoteParser/Parse.cs +++ /dev/null @@ -1,24 +0,0 @@ -using System.IO; -using MimeKit; -using MimeKit.Text; - -namespace QuoteParser -{ - public static class Parse - { - public static bool ContainInReplyToHeader(MimeMessage msg) - { - return msg.Headers.Contains("In-Reply-To") || msg.Headers.Contains("References"); - } - - public static MimeMessage GetMimeMessage(Stream emlFile) - { - return MimeMessage.Load(emlFile); - } - - public static string GetEmailText(MimeMessage msg) - { - return msg.GetTextBody(TextFormat.Plain); - } - } -} diff --git a/QuoteParser/QuoteParser.cs b/QuoteParser/QuoteParser.cs index 0576806..0316da8 100644 --- a/QuoteParser/QuoteParser.cs +++ b/QuoteParser/QuoteParser.cs @@ -3,7 +3,6 @@ using System.IO; using System.Linq; using QuoteParser.Features; -using static QuoteParser.Parse; using static QuoteParser.Features.QuoteMarkFeature; using KeyPhrasesClass = QuoteParser.Features.KeyPhrases; @@ -137,11 +136,9 @@ private QuoteParser(Builder builder) ); } - public Content Parse(Stream emlFile) + public Content Parse(string text, bool hasInReplyToEmlHeader = true) { - var msg = GetMimeMessage(emlFile); - string emailText = GetEmailText(msg); - return Parse(emailText.Lines(), ContainInReplyToHeader(msg)); + return Parse(text.Split(new[] { "\n", "\r\n" }, StringSplitOptions.None), hasInReplyToEmlHeader); } public Content Parse(IEnumerable lines, bool hasInReplyToEmlHeader = true) diff --git a/QuoteParser/QuoteParser.csproj b/QuoteParser/QuoteParser.csproj index 23e953e..afdb3ba 100644 --- a/QuoteParser/QuoteParser.csproj +++ b/QuoteParser/QuoteParser.csproj @@ -10,11 +10,7 @@ https://raw.githubusercontent.com/feature23/QuoteParser.NET/master/logo.png quoteparser email-parser email parse parsing quote reply + 2.0.0 - - - - - diff --git a/README.md b/README.md index 30b3771..71112ad 100644 --- a/README.md +++ b/README.md @@ -1,2 +1,97 @@ # QuoteParser.NET [![Build Status](https://travis-ci.org/feature23/QuoteParser.NET.svg?branch=master)](https://travis-ci.org/feature23/QuoteParser.NET) A .NET Standard port of JetBrains' [email-parser](https://github.com/JetBrains/email-parser) library. + +## Usage Example +The `QuoteParser` class processes the plain text body content of an email message, separating the content of the latest reply from any previous quoted exchanges: + +```csharp +// create an instance of the parser builder +var builder = new QuoteParser.QuoteParser.Builder(); + +// configure the builder using the fluent interface to override any defaults +builder = builder.MinimumQuoteBlockSize(10); + +// build the parser +var parser = builder.Build(); + +// example reply email content that includes a quote of the original message +var emailContent = +@"this is the latest reply body text + +On Tue, Oct 30, 2019, at 12:00 AM, Person A wrote: + +> this is the original message that was replied to"; + +// parse the email content +var content = parser.Parse(emailContent); + +// write the content of the latest reply to the console +Console.Write(String.Join(Environment.NewLine, content.Body)); // this is the latest reply body text +``` + +### Processing raw MIME messages +If you are processing the full raw MIME message, you need to first extract the plain text body content using a MIME parser such as the one included with [MimeKit](http://www.mimekit.net/). The example below uses the `MimeMessage` class which is included with [MimeKitLite](https://www.nuget.org/packages/MimeKitLite/), [MimeKit](https://www.nuget.org/packages/MimeKit/) and [MailKit](https://www.nuget.org/packages/MailKit/). + +```csharp +// same example as above only in raw MIME format +var rawEmailContent = +@"MIME-Version: 1.0 +From: Person B +To: Person A +In-Reply-To: +Content-Type: multipart/alternative; boundary=""random-boundary-id text"" + +--random-boundary-id text +Content-Type: text/plain + +this is the latest reply body text + +On Tue, Oct 30, 2019, at 12:00 AM, Person A wrote: + +> this is the original message that was replied to + +--random-boundary-id text +Content-Type: text/html; +Content-Transfer-Encoding: quoted-printable + +

this is the latest reply body text

+ +

On Tue, Oct 30, 2019, at 12:00 AM, Person A wrote:

+ +

this is the original message that was replied to

+ +--random-boundary-id text-- +"; + +// parse the message using MimeKit +var message = MimeMessage.Load(new MemoryStream(Encoding.UTF8.GetBytes(rawEmailContent))); + +// get the plain text body +var emailContent = message.GetTextBody(TextFormat.Plain); + +// check for reply headers +var hasInReplyToHeader = message.Headers.Contains("In-Reply-To") || message.Headers.Contains("References"); + +// parse the text body using QuoteParser +var content = new QuoteParser.QuoteParser + .Builder() + .Build() + .Parse(emailContent, hasInReplyToHeader); + +// write the content of the latest reply to the console +Console.Write(String.Join(Environment.NewLine, content.Body)); // this is the latest reply body text +``` + +### QuoteParser.Builder Configuration + +|Builder Method|Default Value| +|---|---| +|`HeaderLinesCount`|`3`| +|`MultiLineHeaderLinesCount`|`6`| +|`MinimumQuoteBlockSize`|`7`| +|`DeleteQuoteMarks`|`true`| +|`Recursive`|`false`| +|`KeyPhrases`|`InReplyToRegex, ReplyAboveRegex, OriginalMsgRegex`| + +## Version 2.0.0 Breaking Changes +Removed MimeKitLite dependency in order to avoid duplicate type/namespace conflits for projects that already included (directly or indirectly) different versions of MimeKitLite, MimeKit or MailKit. See example above for processing raw MIME messages. \ No newline at end of file