From c3b7246ee9a57f4828cf73093569a005156f5091 Mon Sep 17 00:00:00 2001 From: Abin Simon Date: Thu, 16 Nov 2023 12:33:36 +0530 Subject: [PATCH] Improvements and tests for eml conversion (#4644) --- #### Does this PR need a docs update or release note? - [ ] :white_check_mark: Yes, it's included - [x] :clock1: Yes, but in a later PR - [ ] :no_entry: No #### Type of change - [ ] :sunflower: Feature - [ ] :bug: Bugfix - [ ] :world_map: Documentation - [x] :robot: Supportability/Tests - [ ] :computer: CI/Deployment - [ ] :broom: Tech Debt/Cleanup #### Issue(s) * https://github.com/alcionai/corso/issues/3893 #### Test Plan - [ ] :muscle: Manual - [x] :zap: Unit test - [ ] :green_heart: E2E --- CHANGELOG.md | 3 + src/cmd/converter/converter.go | 8 +-- src/internal/converters/eml/eml.go | 25 ++++---- src/internal/converters/eml/eml_test.go | 59 +++++++++++++++++-- .../m365/collection/exchange/export.go | 13 +--- 5 files changed, 74 insertions(+), 34 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index b68e7c43a..d9c57ba59 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -7,6 +7,9 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 ## [Unreleased] (beta) +### Added +- Added export support for emails in exchange backups as `.eml` files + ### Changed - Change file extension of messages export to json to match the content diff --git a/src/cmd/converter/converter.go b/src/cmd/converter/converter.go index 32c028d63..0b1d58959 100644 --- a/src/cmd/converter/converter.go +++ b/src/cmd/converter/converter.go @@ -7,7 +7,6 @@ import ( "os" "github.com/alcionai/corso/src/internal/converters/eml" - "github.com/alcionai/corso/src/pkg/services/m365/api" ) func main() { @@ -31,12 +30,7 @@ func main() { case "msg": switch to { case "eml": - msg, err := api.BytesToMessageable(body) - if err != nil { - log.Fatal(err) - } - - out, err = eml.ToEml(context.Background(), msg) + out, err = eml.FromJSON(context.Background(), body) if err != nil { log.Fatal(err) } diff --git a/src/internal/converters/eml/eml.go b/src/internal/converters/eml/eml.go index bda1dddd9..d17ba9011 100644 --- a/src/internal/converters/eml/eml.go +++ b/src/internal/converters/eml/eml.go @@ -13,26 +13,31 @@ import ( "fmt" "github.com/alcionai/clues" - "github.com/microsoftgraph/msgraph-sdk-go/models" mail "github.com/xhit/go-simple-mail/v2" "github.com/alcionai/corso/src/internal/common/ptr" "github.com/alcionai/corso/src/pkg/logger" + "github.com/alcionai/corso/src/pkg/services/m365/api" ) const ( - fromFormat = "%s <%s>" - dateFormat = "2006-01-02 15:04:05 MST" // from xhit/go-simple-mail + addressFormat = "%s <%s>" + dateFormat = "2006-01-02 15:04:05 MST" // from xhit/go-simple-mail ) -// ToEml converts a Messageable to .eml format -func ToEml(ctx context.Context, data models.Messageable) (string, error) { +// FromJSON converts a Messageable (as json) to .eml format +func FromJSON(ctx context.Context, body []byte) (string, error) { + data, err := api.BytesToMessageable(body) + if err != nil { + return "", clues.Wrap(err, "converting to messageble") + } + email := mail.NewMSG() if data.GetFrom() != nil { email.SetFrom( fmt.Sprintf( - fromFormat, + addressFormat, ptr.Val(data.GetFrom().GetEmailAddress().GetName()), ptr.Val(data.GetFrom().GetEmailAddress().GetAddress()))) } @@ -41,7 +46,7 @@ func ToEml(ctx context.Context, data models.Messageable) (string, error) { for _, recipient := range data.GetToRecipients() { email.AddTo( fmt.Sprintf( - fromFormat, + addressFormat, ptr.Val(recipient.GetEmailAddress().GetName()), ptr.Val(recipient.GetEmailAddress().GetAddress()))) } @@ -51,7 +56,7 @@ func ToEml(ctx context.Context, data models.Messageable) (string, error) { for _, recipient := range data.GetCcRecipients() { email.AddCc( fmt.Sprintf( - fromFormat, + addressFormat, ptr.Val(recipient.GetEmailAddress().GetName()), ptr.Val(recipient.GetEmailAddress().GetAddress()))) } @@ -61,7 +66,7 @@ func ToEml(ctx context.Context, data models.Messageable) (string, error) { for _, recipient := range data.GetBccRecipients() { email.AddBcc( fmt.Sprintf( - fromFormat, + addressFormat, ptr.Val(recipient.GetEmailAddress().GetName()), ptr.Val(recipient.GetEmailAddress().GetAddress()))) } @@ -77,7 +82,7 @@ func ToEml(ctx context.Context, data models.Messageable) (string, error) { } else if len(rts) != 0 { email.SetReplyTo( fmt.Sprintf( - fromFormat, + addressFormat, ptr.Val(rts[0].GetEmailAddress().GetName()), ptr.Val(rts[0].GetEmailAddress().GetAddress()))) } diff --git a/src/internal/converters/eml/eml_test.go b/src/internal/converters/eml/eml_test.go index 3615e1383..8a26a7e81 100644 --- a/src/internal/converters/eml/eml_test.go +++ b/src/internal/converters/eml/eml_test.go @@ -1,12 +1,15 @@ package eml import ( + "fmt" "testing" + "time" "github.com/stretchr/testify/assert" "github.com/stretchr/testify/require" "github.com/stretchr/testify/suite" + "github.com/alcionai/corso/src/internal/common/ptr" "github.com/alcionai/corso/src/internal/converters/eml/testdata" "github.com/alcionai/corso/src/internal/tester" "github.com/alcionai/corso/src/pkg/services/m365/api" @@ -26,11 +29,57 @@ func (suite *EMLUnitSuite) TestConvert_messageble_to_eml() { ctx, flush := tester.NewContext(t) defer flush() - msg, err := api.BytesToMessageable([]byte(testdata.EmailWithAttachments)) + body := []byte(testdata.EmailWithAttachments) + + out, err := FromJSON(ctx, body) + assert.NoError(t, err, "converting to eml") + + msg, err := api.BytesToMessageable(body) require.NoError(t, err, "creating message") - _, err = ToEml(ctx, msg) - // TODO(meain): add more tests on the generated content - // Cannot test output directly as it contains a random boundary - assert.NoError(t, err, "converting to eml") + assert.Contains(t, out, fmt.Sprintf("Subject: %s", ptr.Val(msg.GetSubject()))) + assert.Contains(t, out, fmt.Sprintf("Date: %s", msg.GetSentDateTime().Format(time.RFC1123Z))) + assert.Contains( + t, + out, + fmt.Sprintf( + `From: "%s" <%s>`, + ptr.Val(msg.GetFrom().GetEmailAddress().GetName()), + ptr.Val(msg.GetFrom().GetEmailAddress().GetAddress()))) + + for _, addr := range msg.GetToRecipients() { + assert.Contains( + t, + out, + fmt.Sprintf( + `To: "%s" <%s>`, + ptr.Val(addr.GetEmailAddress().GetName()), + ptr.Val(addr.GetEmailAddress().GetAddress()))) + } + + for _, addr := range msg.GetCcRecipients() { + assert.Contains( + t, + out, + fmt.Sprintf( + `Cc: "%s" <%s>`, + ptr.Val(addr.GetEmailAddress().GetName()), + ptr.Val(addr.GetEmailAddress().GetAddress()))) + } + + for _, addr := range msg.GetBccRecipients() { + assert.Contains( + t, + out, + fmt.Sprintf( + `Bcc: "%s" <%s>`, + ptr.Val(addr.GetEmailAddress().GetName()), + ptr.Val(addr.GetEmailAddress().GetAddress()))) + } + + // Only fist 30 chars as the .eml generator can introduce a + // newline in between the text to limit the column width of the + // output. It does not affect the data, but can break our tests and + // so using 30 as a safe limit to test. + assert.Contains(t, out, ptr.Val(msg.GetBody().GetContent())[:30], "body") } diff --git a/src/internal/m365/collection/exchange/export.go b/src/internal/m365/collection/exchange/export.go index 2a3b2b838..919d510fd 100644 --- a/src/internal/m365/collection/exchange/export.go +++ b/src/internal/m365/collection/exchange/export.go @@ -13,7 +13,6 @@ import ( "github.com/alcionai/corso/src/pkg/export" "github.com/alcionai/corso/src/pkg/fault" "github.com/alcionai/corso/src/pkg/path" - "github.com/alcionai/corso/src/pkg/services/m365/api" ) func NewExportCollection( @@ -65,17 +64,7 @@ func streamItems( continue } - msg, err := api.BytesToMessageable(content) - if err != nil { - ch <- export.Item{ - ID: id, - Error: clues.Wrap(err, "parsing email"), - } - - continue - } - - email, err := eml.ToEml(ctx, msg) + email, err := eml.FromJSON(ctx, content) if err != nil { ch <- export.Item{ ID: id,