diff --git a/CHANGELOG.md b/CHANGELOG.md index 3c0c93c38..83bef4dc8 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -6,9 +6,9 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/), and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html). ## [Unreleased] (beta) - ### Fixed - Handle the case where an email or event cannot be retrieved from Exchange due to an `ErrorCorruptData` error. Corso will skip over the item but report it in the backup summary. +- Emails attached within other emails are now correctly exported ## [v0.19.0] (beta) - 2024-02-06 diff --git a/src/internal/converters/eml/eml.go b/src/internal/converters/eml/eml.go index fcab7b5e1..35810e5eb 100644 --- a/src/internal/converters/eml/eml.go +++ b/src/internal/converters/eml/eml.go @@ -143,6 +143,105 @@ func getICalData(ctx context.Context, data models.Messageable) (string, error) { return ics.FromEventable(ctx, event) } +func getFileAttachment(ctx context.Context, attachment models.Attachmentable) (*mail.File, error) { + kind := ptr.Val(attachment.GetContentType()) + + bytes, err := attachment.GetBackingStore().Get("contentBytes") + if err != nil { + return nil, clues.WrapWC(ctx, err, "failed to get attachment bytes"). + With("kind", kind) + } + + if bytes == nil { + // TODO(meain): Handle non file attachments + // https://github.com/alcionai/corso/issues/4772 + logger.Ctx(ctx). + With("attachment_id", ptr.Val(attachment.GetId()), + "attachment_type", ptr.Val(attachment.GetOdataType())). + Info("no contentBytes for attachment") + + return nil, nil + } + + bts, ok := bytes.([]byte) + if !ok { + return nil, clues.WrapWC(ctx, err, "invalid content bytes"). + With("kind", kind). + With("interface_type", fmt.Sprintf("%T", bytes)) + } + + name := ptr.Val(attachment.GetName()) + + contentID, err := attachment.GetBackingStore().Get("contentId") + if err != nil { + return nil, clues.WrapWC(ctx, err, "getting content id for attachment"). + With("kind", kind) + } + + if contentID != nil { + cids, _ := str.AnyToString(contentID) + if len(cids) > 0 { + name = cids + } + } + + return &mail.File{ + // cannot use filename as inline attachment will not get mapped properly + Name: name, + MimeType: kind, + Data: bts, + Inline: ptr.Val(attachment.GetIsInline()), + }, nil +} + +func getItemAttachment(ctx context.Context, attachment models.Attachmentable) (*mail.File, error) { + it, err := attachment.GetBackingStore().Get("item") + if err != nil { + return nil, clues.WrapWC(ctx, err, "getting item for attachment"). + With("attachment_id", ptr.Val(attachment.GetId())) + } + + switch it := it.(type) { + case *models.Message: + cb, err := FromMessageable(ctx, it) + if err != nil { + return nil, clues.WrapWC(ctx, err, "converting item attachment to eml"). + With("attachment_id", ptr.Val(attachment.GetId())) + } + + return &mail.File{ + Name: ptr.Val(attachment.GetName()), + MimeType: "message/rfc822", + Data: []byte(cb), + }, nil + default: + logger.Ctx(ctx). + With("attachment_id", ptr.Val(attachment.GetId()), + "attachment_type", ptr.Val(attachment.GetOdataType())). + Info("unknown item attachment type") + } + + return nil, nil +} + +func getMailAttachment(ctx context.Context, att models.Attachmentable) (*mail.File, error) { + otyp := ptr.Val(att.GetOdataType()) + + switch otyp { + case "#microsoft.graph.fileAttachment": + return getFileAttachment(ctx, att) + case "#microsoft.graph.itemAttachment": + return getItemAttachment(ctx, att) + default: + logger.Ctx(ctx). + With("attachment_id", ptr.Val(att.GetId()), + "attachment_type", otyp). + Info("unknown attachment type") + + return nil, nil + } +} + // FromJSON converts a Messageable (as json) to .eml format func FromJSON(ctx context.Context, body []byte) (string, error) { ctx = clues.Add(ctx, "body_len", len(body)) @@ -152,6 +251,11 @@ func FromJSON(ctx context.Context, body []byte) (string, error) { return "", clues.WrapWC(ctx, err, "converting to messageble") } + return FromMessageable(ctx, data) +} + +// Converts a Messageable to .eml format +func FromMessageable(ctx context.Context, data models.Messageable) (string, error) { ctx = clues.Add(ctx, "item_id", ptr.Val(data.GetId())) email := mail.NewMSG() @@ -229,54 +333,16 @@ func FromJSON(ctx context.Context, body []byte) (string, error) { if data.GetAttachments() != nil { for _, attachment := range data.GetAttachments() { - kind := ptr.Val(attachment.GetContentType()) - - bytes, err := attachment.GetBackingStore().Get("contentBytes") + att, err := getMailAttachment(ctx, attachment) if err != nil { - return "", clues.WrapWC(ctx, err, "failed to get attachment bytes"). - With("kind", kind) + return "", clues.WrapWC(ctx, err, "getting mail attachment") } - if bytes == nil { - // TODO(meain): Handle non file attachments - // https://github.com/alcionai/corso/issues/4772 - logger.Ctx(ctx). - With("attachment_id", ptr.Val(attachment.GetId()), - "attachment_type", ptr.Val(attachment.GetOdataType())). - Info("no contentBytes for attachment") - - continue + // There are known cases where we just wanna log and + // ignore instead of erroring out + if att != nil { + email.Attach(att) } - - bts, ok := bytes.([]byte) - if !ok { - return "", clues.WrapWC(ctx, err, "invalid content bytes"). - With("kind", kind). - With("interface_type", fmt.Sprintf("%T", bytes)) - } - - name := ptr.Val(attachment.GetName()) - - contentID, err := attachment.GetBackingStore().Get("contentId") - if err != nil { - return "", clues.WrapWC(ctx, err, "getting content id for attachment"). - With("kind", kind) - } - - if contentID != nil { - cids, _ := str.AnyToString(contentID) - if len(cids) > 0 { - name = cids - } - } - - email.Attach(&mail.File{ - // cannot use filename as inline attachment will not get mapped properly - Name: name, - MimeType: kind, - Data: bts, - Inline: ptr.Val(attachment.GetIsInline()), - }) } } @@ -298,7 +364,7 @@ func FromJSON(ctx context.Context, body []byte) (string, error) { } } - if err = email.GetError(); err != nil { + if err := email.GetError(); err != nil { return "", clues.WrapWC(ctx, err, "converting to eml") } diff --git a/src/internal/converters/eml/eml_test.go b/src/internal/converters/eml/eml_test.go index 9a20bb747..fb95b8e55 100644 --- a/src/internal/converters/eml/eml_test.go +++ b/src/internal/converters/eml/eml_test.go @@ -398,3 +398,48 @@ func (suite *EMLUnitSuite) TestConvert_postable_to_eml() { assert.Equal(t, source, target) } + +// Tests an ics within an eml within another eml +func (suite *EMLUnitSuite) TestConvert_message_in_messageble_to_eml() { + t := suite.T() + + ctx, flush := tester.NewContext(t) + defer flush() + + body := []byte(testdata.EmailWithinEmail) + + out, err := FromJSON(ctx, body) + assert.NoError(t, err, "converting to eml") + + msg, err := api.BytesToMessageable(body) + require.NoError(t, err, "creating message") + + eml, err := enmime.ReadEnvelope(strings.NewReader(out)) + require.NoError(t, err, "reading created eml") + + assert.Equal(t, ptr.Val(msg.GetSubject()), eml.GetHeader("Subject")) + assert.Equal(t, msg.GetSentDateTime().Format(time.RFC1123Z), eml.GetHeader("Date")) + + assert.Equal(t, formatAddress(msg.GetFrom().GetEmailAddress()), eml.GetHeader("From")) + + attachments := eml.Attachments + assert.Equal(t, 1, len(attachments), "attachment count in parent email") + + ieml, err := enmime.ReadEnvelope(strings.NewReader(string(attachments[0].Content))) + require.NoError(t, err, "reading created eml") + + itm, err := msg.GetAttachments()[0].GetBackingStore().Get("item") + require.NoError(t, err, "getting item from message") + + imsg := itm.(*models.Message) + assert.Equal(t, ptr.Val(imsg.GetSubject()), ieml.GetHeader("Subject")) + assert.Equal(t, imsg.GetSentDateTime().Format(time.RFC1123Z), ieml.GetHeader("Date")) + + assert.Equal(t, formatAddress(imsg.GetFrom().GetEmailAddress()), ieml.GetHeader("From")) + + iattachments := ieml.Attachments + assert.Equal(t, 1, len(iattachments), "attachment count in child email") + + // Known from testdata + assert.Contains(t, string(iattachments[0].Content), "X-LIC-LOCATION:Africa/Abidjan") +} diff --git a/src/internal/converters/eml/testdata/email-within-email.json b/src/internal/converters/eml/testdata/email-within-email.json new file mode 100644 index 000000000..58263ae8d --- /dev/null +++ b/src/internal/converters/eml/testdata/email-within-email.json @@ -0,0 +1,128 @@ +{ + "id": "AAMkAGJiZmE2NGU4LTQ4YjktNDI1Mi1iMWQzLTQ1MmMxODJkZmQyNABGAAAAAABFdiK7oifWRb4ADuqgSRcnBwBBFDg0JJk7TY1fmsJrh7tNAAAAAAEJAABBFDg0JJk7TY1fmsJrh7tNAAFnbV-qAAA=", + "@odata.type": "#microsoft.graph.message", + "@odata.context": "https://graph.microsoft.com/v1.0/$metadata#users('7ceb8e03-bdc5-4509-a136-457526165ec0')/messages/$entity", + "@odata.etag": "W/\"CQAAABYAAABBFDg0JJk7TY1fmsJrh7tNAAFnDeBl\"", + "categories": [], + "changeKey": "CQAAABYAAABBFDg0JJk7TY1fmsJrh7tNAAFnDeBl", + "createdDateTime": "2024-02-05T09:33:23Z", + "lastModifiedDateTime": "2024-02-05T09:33:48Z", + "attachments": [ + { + "id": "AAMkAGJiZmE2NGU4LTQ4YjktNDI1Mi1iMWQzLTQ1MmMxODJkZmQyNABGAAAAAABFdiK7oifWRb4ADuqgSRcnBwBBFDg0JJk7TY1fmsJrh7tNAAAAAAEJAABBFDg0JJk7TY1fmsJrh7tNAAFnbV-qAAABEgAQAEUyH0VS3HJBgHDlZdWZl0k=", + "@odata.type": "#microsoft.graph.itemAttachment", + "item@odata.navigationLink": "https://graph.microsoft.com/v1.0/users('7ceb8e03-bdc5-4509-a136-457526165ec0')/messages('')", + "item@odata.associationLink": "https://graph.microsoft.com/v1.0/users('7ceb8e03-bdc5-4509-a136-457526165ec0')/messages('')/$ref", + "isInline": false, + "lastModifiedDateTime": "2024-02-05T09:33:46Z", + "name": "Purpose of life", + "size": 11840, + "item": { + "id": "", + "@odata.type": "#microsoft.graph.message", + "createdDateTime": "2024-02-05T09:33:24Z", + "lastModifiedDateTime": "2024-02-05T09:33:46Z", + "attachments": [ + { + "id": "AAMkAGJiZmE2NGU4LTQ4YjktNDI1Mi1iMWQzLTQ1MmMxODJkZmQyNABGAAAAAABFdiK7oifWRb4ADuqgSRcnBwBBFDg0JJk7TY1fmsJrh7tNAAAAAAEJAABBFDg0JJk7TY1fmsJrh7tNAAFnbV-qAAACEgAQAEUyH0VS3HJBgHDlZdWZl0kSABAAjBhd4-oQaUS969pTkS-gzA==", + "@odata.type": "#microsoft.graph.fileAttachment", + "@odata.mediaContentType": "text/calendar", + "contentType": "text/calendar", + "isInline": false, + "lastModifiedDateTime": "2024-02-05T09:33:46Z", + "name": "Abidjan.ics", + "size": 573, + "contentBytes": "QkVHSU46VkNBTEVOREFSDQpQUk9ESUQ6LS8vdHp1cmwub3JnLy9OT05TR01MIE9sc29uIDIwMjNkLy9FTg0KVkVSU0lPTjoyLjANCkJFR0lOOlZUSU1FWk9ORQ0KVFpJRDpBZnJpY2EvQWJpZGphbg0KTEFTVC1NT0RJRklFRDoyMDIzMTIyMlQyMzMzNThaDQpUWlVSTDpodHRwczovL3d3dy50enVybC5vcmcvem9uZWluZm8vQWZyaWNhL0FiaWRqYW4NClgtTElDLUxPQ0FUSU9OOkFmcmljYS9BYmlkamFuDQpYLVBST0xFUFRJQy1UWk5BTUU6TE1UDQpCRUdJTjpTVEFOREFSRA0KVFpOQU1FOkdNVA0KVFpPRkZTRVRGUk9NOi0wMDE2MDgNClRaT0ZGU0VUVE86KzAwMDANCkRUU1RBUlQ6MTkxMjAxMDFUMDAwMDAwDQpFTkQ6U1RBTkRBUkQNCkVORDpWVElNRVpPTkUNCkVORDpWQ0FMRU5EQVINCg==" + } + ], + "body": { + "content": "\r\n
I just realized the purpose of my life is to be a test case. Good to know.
", + "contentType": "html" + }, + "bodyPreview": "I just realized the purpose of my life is to be a test case. Good to know.", + "conversationId": "AAQkAGJiZmE2NGU4LTQ4YjktNDI1Mi1iMWQzLTQ1MmMxODJkZmQyNAAQAFEnxDqYmbJEm8d2l3qfS6A=", + "conversationIndex": "AQHaWBYiUSfEOpiZskSbx3aXep9LoA==", + "flag": { + "flagStatus": "notFlagged" + }, + "from": { + "emailAddress": { + "address": "JohannaL@10rqc2.onmicrosoft.com", + "name": "Johanna Lorenz" + } + }, + "hasAttachments": true, + "importance": "normal", + "internetMessageId": "", + "isDeliveryReceiptRequested": false, + "isDraft": false, + "isRead": true, + "isReadReceiptRequested": false, + "receivedDateTime": "2024-02-05T09:33:12Z", + "sender": { + "emailAddress": { + "address": "JohannaL@10rqc2.onmicrosoft.com", + "name": "Johanna Lorenz" + } + }, + "sentDateTime": "2024-02-05T09:33:11Z", + "subject": "Purpose of life", + "toRecipients": [ + { + "emailAddress": { + "address": "PradeepG@10rqc2.onmicrosoft.com", + "name": "Pradeep Gupta" + } + } + ], + "webLink": "https://outlook.office365.com/owa/?AttachmentItemID=AAMkAGJiZmE2NGU4LTQ4YjktNDI1Mi1iMWQzLTQ1MmMxODJkZmQyNABGAAAAAABFdiK7oifWRb4ADuqgSRcnBwBBFDg0JJk7TY1fmsJrh7tNAAAAAAEJAABBFDg0JJk7TY1fmsJrh7tNAAFnbV%2FqAAABEgAQAEUyH0VS3HJBgHDlZdWZl0k%3D&exvsurl=1&viewmodel=ItemAttachment" + } + } + ], + "bccRecipients": [], + "body": { + "content": "\r\n
Now, this is what we call nesting in this business.
", + "contentType": "html" + }, + "bodyPreview": "Now, this is what we call nesting in this business.", + "ccRecipients": [], + "conversationId": "AAQkAGJiZmE2NGU4LTQ4YjktNDI1Mi1iMWQzLTQ1MmMxODJkZmQyNAAQAIv2-4RHwDhJhlqBV5PTE3Y=", + "conversationIndex": "AQHaWBZdi/b/hEfAOEmGWoFXk9MTdg==", + "flag": { + "flagStatus": "notFlagged" + }, + "from": { + "emailAddress": { + "address": "JohannaL@10rqc2.onmicrosoft.com", + "name": "Johanna Lorenz" + } + }, + "hasAttachments": true, + "importance": "normal", + "inferenceClassification": "focused", + "internetMessageId": "", + "isDeliveryReceiptRequested": false, + "isDraft": false, + "isRead": true, + "isReadReceiptRequested": false, + "parentFolderId": "AQMkAGJiAGZhNjRlOC00OGI5LTQyNTItYjFkMy00NTJjMTgyZGZkMjQALgAAA0V2IruiJ9ZFvgAO6qBJFycBAEEUODQkmTtNjV_awmuHu00AAAIBCQAAAA==", + "receivedDateTime": "2024-02-05T09:33:46Z", + "replyTo": [], + "sender": { + "emailAddress": { + "address": "JohannaL@10rqc2.onmicrosoft.com", + "name": "Johanna Lorenz" + } + }, + "sentDateTime": "2024-02-05T09:33:45Z", + "subject": "Fw: Purpose of life", + "toRecipients": [ + { + "emailAddress": { + "address": "PradeepG@10rqc2.onmicrosoft.com", + "name": "Pradeep Gupta" + } + } + ], + "webLink": "https://outlook.office365.com/owa/?ItemID=AAMkAGJiZmE2NGU4LTQ4YjktNDI1Mi1iMWQzLTQ1MmMxODJkZmQyNABGAAAAAABFdiK7oifWRb4ADuqgSRcnBwBBFDg0JJk7TY1fmsJrh7tNAAAAAAEJAABBFDg0JJk7TY1fmsJrh7tNAAFnbV%2FqAAA%3D&exvsurl=1&viewmodel=ReadMessageItem" +} diff --git a/src/internal/converters/eml/testdata/testdata.go b/src/internal/converters/eml/testdata/testdata.go index 05d963107..6332de30b 100644 --- a/src/internal/converters/eml/testdata/testdata.go +++ b/src/internal/converters/eml/testdata/testdata.go @@ -10,3 +10,6 @@ var EmailWithEventInfo string //go:embed email-with-event-object.json var EmailWithEventObject string + +//go:embed email-within-email.json +var EmailWithinEmail string