diff --git a/src/internal/converters/eml/eml.go b/src/internal/converters/eml/eml.go index 9b0607a4c..fcab7b5e1 100644 --- a/src/internal/converters/eml/eml.go +++ b/src/internal/converters/eml/eml.go @@ -23,6 +23,7 @@ import ( "github.com/alcionai/corso/src/internal/common/ptr" "github.com/alcionai/corso/src/internal/common/str" "github.com/alcionai/corso/src/internal/converters/ics" + "github.com/alcionai/corso/src/internal/m365/collection/groups/metadata" "github.com/alcionai/corso/src/pkg/logger" "github.com/alcionai/corso/src/pkg/services/m365/api" ) @@ -303,3 +304,137 @@ func FromJSON(ctx context.Context, body []byte) (string, error) { return email.GetMessage(), nil } + +//------------------------------------------------------------- +// Postable -> EML +//------------------------------------------------------------- + +// FromJSONPostToEML converts a postable (as json) to .eml format. +// TODO(pandeyabs): This is a stripped down copy of messageable to +// eml conversion, it can be folded into one function by having a post +// to messageable converter. +func FromJSONPostToEML( + ctx context.Context, + body []byte, + postMetadata metadata.ConversationPostMetadata, +) (string, error) { + ctx = clues.Add(ctx, "body_len", len(body)) + + data, err := api.BytesToPostable(body) + if err != nil { + return "", clues.WrapWC(ctx, err, "converting to postable") + } + + ctx = clues.Add(ctx, "item_id", ptr.Val(data.GetId())) + + email := mail.NewMSG() + email.Encoding = mail.EncodingBase64 // Doing it to be safe for when we have eventMessage (newline issues) + email.AllowDuplicateAddress = true // More "correct" conversion + email.AddBccToHeader = true // Don't ignore Bcc + email.AllowEmptyAttachments = true // Don't error on empty attachments + email.UseProvidedAddress = true // Don't try to parse the email address + + if data.GetFrom() != nil { + email.SetFrom(formatAddress(data.GetFrom().GetEmailAddress())) + } + + // We don't have the To, Cc, Bcc recipient information for posts due to a graph + // limitation. All posts carry the group email address as the only recipient + // for now. + email.AddTo(postMetadata.Recipients...) + email.SetSubject(postMetadata.Topic) + + // Reply-To email address is not available for posts. Note that this is different + // from inReplyTo field. + + if data.GetCreatedDateTime() != nil { + email.SetDate(ptr.Val(data.GetCreatedDateTime()).Format(dateFormat)) + } + + if data.GetBody() != nil { + if data.GetBody().GetContentType() != nil { + var contentType mail.ContentType + + switch data.GetBody().GetContentType().String() { + case "html": + contentType = mail.TextHTML + case "text": + contentType = mail.TextPlain + default: + // https://learn.microsoft.com/en-us/graph/api/resources/itembody?view=graph-rest-1.0#properties + // This should not be possible according to the documentation + logger.Ctx(ctx). + With("body_type", data.GetBody().GetContentType().String()). + Info("unknown body content type") + + contentType = mail.TextPlain + } + + email.SetBody(contentType, ptr.Val(data.GetBody().GetContent())) + } + } + + if data.GetAttachments() != nil { + for _, attachment := range data.GetAttachments() { + kind := ptr.Val(attachment.GetContentType()) + + bytes, err := attachment.GetBackingStore().Get("contentBytes") + if err != nil { + return "", clues.WrapWC(ctx, err, "failed to get attachment bytes"). + With("kind", kind) + } + + if bytes == nil { + // TODO(meain): Handle non file attachments + // https://github.com/alcionai/corso/issues/4772 + // + // TODO(pandeyabs): Above issue is for messages. + // This is not a problem for posts but leaving it here for safety. + logger.Ctx(ctx). + With("attachment_id", ptr.Val(attachment.GetId()), + "attachment_type", ptr.Val(attachment.GetOdataType())). + Info("no contentBytes for attachment") + + continue + } + + bts, ok := bytes.([]byte) + if !ok { + return "", clues.WrapWC(ctx, err, "invalid content bytes"). + With("kind", kind). + With("interface_type", fmt.Sprintf("%T", bytes)) + } + + name := ptr.Val(attachment.GetName()) + + contentID, err := attachment.GetBackingStore().Get("contentId") + if err != nil { + return "", clues.WrapWC(ctx, err, "getting content id for attachment"). + With("kind", kind) + } + + if contentID != nil { + cids, _ := str.AnyToString(contentID) + if len(cids) > 0 { + name = cids + } + } + + email.Attach(&mail.File{ + // cannot use filename as inline attachment will not get mapped properly + Name: name, + MimeType: kind, + Data: bts, + Inline: ptr.Val(attachment.GetIsInline()), + }) + } + } + + // Note: Posts cannot be of type EventMessageResponse, EventMessage or + // CalendarSharingMessage. So we don't need to handle those cases here. + if err = email.GetError(); err != nil { + return "", clues.WrapWC(ctx, err, "converting to eml") + } + + return email.GetMessage(), nil +} diff --git a/src/internal/converters/eml/eml_test.go b/src/internal/converters/eml/eml_test.go index 1dbed62f0..9a20bb747 100644 --- a/src/internal/converters/eml/eml_test.go +++ b/src/internal/converters/eml/eml_test.go @@ -18,6 +18,8 @@ import ( "github.com/alcionai/corso/src/internal/common/ptr" "github.com/alcionai/corso/src/internal/converters/eml/testdata" "github.com/alcionai/corso/src/internal/converters/ics" + "github.com/alcionai/corso/src/internal/m365/collection/groups/metadata" + stub "github.com/alcionai/corso/src/internal/m365/service/groups/mock" "github.com/alcionai/corso/src/internal/tester" "github.com/alcionai/corso/src/pkg/services/m365/api" ) @@ -325,3 +327,74 @@ func (suite *EMLUnitSuite) TestConvert_eml_ics_from_event_obj() { assert.NotEqual(t, ptr.Val(msg.GetSubject()), event.GetProperty(ical.ComponentPropertySummary).Value) assert.Equal(t, ptr.Val(evt.GetSubject()), event.GetProperty(ical.ComponentPropertySummary).Value) } + +//------------------------------------------------------------- +// Postable -> EML tests +//------------------------------------------------------------- + +func (suite *EMLUnitSuite) TestConvert_postable_to_eml() { + t := suite.T() + + ctx, flush := tester.NewContext(t) + defer flush() + + body := []byte(stub.PostWithAttachments) + + postMetadata := metadata.ConversationPostMetadata{ + Recipients: []string{"group@example.com"}, + Topic: "test subject", + } + + out, err := FromJSONPostToEML(ctx, body, postMetadata) + assert.NoError(t, err, "converting to eml") + + post, err := api.BytesToPostable(body) + require.NoError(t, err, "creating post") + + eml, err := enmime.ReadEnvelope(strings.NewReader(out)) + require.NoError(t, err, "reading created eml") + + assert.Equal(t, postMetadata.Topic, eml.GetHeader("Subject")) + assert.Equal(t, post.GetCreatedDateTime().Format(time.RFC1123Z), eml.GetHeader("Date")) + + assert.Equal(t, formatAddress(post.GetFrom().GetEmailAddress()), eml.GetHeader("From")) + + // Test recipients. The post metadata should contain the group email address. + + tos := strings.Split(eml.GetHeader("To"), ", ") + for _, sourceTo := range postMetadata.Recipients { + assert.Contains(t, tos, sourceTo) + } + + // Assert cc, bcc to be empty since they are not supported for posts right now. + assert.Equal(t, "", eml.GetHeader("Cc")) + assert.Equal(t, "", eml.GetHeader("Bcc")) + + // Test attachments using PostWithAttachments data as a reference. + // This data has 1 direct attachment and 1 inline attachment. + assert.Equal(t, 1, len(eml.Attachments), "direct attachment count") + assert.Equal(t, 1, len(eml.Inlines), "inline attachment count") + + for _, sourceAttachment := range post.GetAttachments() { + targetContent := eml.Attachments[0].Content + if ptr.Val(sourceAttachment.GetIsInline()) { + targetContent = eml.Inlines[0].Content + } + + sourceContent, err := sourceAttachment.GetBackingStore().Get("contentBytes") + assert.NoError(t, err, "getting source attachment content") + + assert.Equal(t, sourceContent, targetContent) + } + + // Test body + source := strings.ReplaceAll(eml.HTML, "\n", "") + target := strings.ReplaceAll(ptr.Val(post.GetBody().GetContent()), "\n", "") + + // replace the cid with a constant value to make the comparison + re := regexp.MustCompile(`(?:src|originalSrc)="cid:[^"]*"`) + source = re.ReplaceAllString(source, `src="cid:replaced"`) + target = re.ReplaceAllString(target, `src="cid:replaced"`) + + assert.Equal(t, source, target) +} diff --git a/src/internal/m365/service/groups/mock/post-with-attachments.json b/src/internal/m365/service/groups/mock/post-with-attachments.json new file mode 100644 index 000000000..64bee328e --- /dev/null +++ b/src/internal/m365/service/groups/mock/post-with-attachments.json @@ -0,0 +1,85 @@ +{ + "@odata.context": "https://graph.microsoft.com/v1.0/$metadata#groups('1623c35a-b67a-473a-9b21-5e4891f22e70')/conversations('AAQkAGNhMGQwY2ZmLTEzZDctNDNhZC05Y2I4LWIyOTgzNjk4YWExZQAQAHUiDz4vCHZNqyz90GJoN54%3D')/threads('AAQkAGNhMGQwY2ZmLTEzZDctNDNhZC05Y2I4LWIyOTgzNjk4YWExZQMkABAAdSIPPi8Idk2rLP3QYmg3nhAAdSIPPi8Idk2rLP3QYmg3ng%3D%3D')/posts(*,attachments())/$entity", + "@odata.etag": "W/\"CQAAABYAAADxkJD2bSaUS7TYwOHY6vKrAAAiegI9\"", + "id": "AAMkAGNhMGQwY2ZmLTEzZDctNDNhZC05Y2I4LWIyOTgzNjk4YWExZQBGAAAAAADdwE6qobHzQo5d_R1eoqPKBwDxkJD2bSaUS7TYwOHY6vKrAAAAAAEMAADxkJD2bSaUS7TYwOHY6vKrAAAidRn9AAA=", + "createdDateTime": "2024-01-29T02:22:18Z", + "lastModifiedDateTime": "2024-01-29T02:22:19Z", + "changeKey": "CQAAABYAAADxkJD2bSaUS7TYwOHY6vKrAAAiegI9", + "categories": [], + "receivedDateTime": "2024-01-29T02:22:19Z", + "hasAttachments": true, + "conversationThreadId": "AAQkAGNhMGQwY2ZmLTEzZDctNDNhZC05Y2I4LWIyOTgzNjk4YWExZQMkABAAdSIPPi8Idk2rLP3QYmg3nhAAdSIPPi8Idk2rLP3QYmg3ng==", + "conversationId": null, + "body": { + "contentType": "html", + "content": "
\r\n
\r\n\r\n
\r\n
\r\n

\r\n
\r\n
Embedded + direct attachments.
\r\n" + }, + "from": { + "emailAddress": { + "name": "Dustin Corners", + "address": "Dustin.Corners@10rqc2.onmicrosoft.com" + } + }, + "sender": { + "emailAddress": { + "name": "Dustin Corners", + "address": "Dustin.Corners@10rqc2.onmicrosoft.com" + } + }, + "newParticipants": [], + "attachments": [ + { + "@odata.type": "#microsoft.graph.fileAttachment", + "@odata.mediaContentType": "image/png", + "id": "AAMkAGNhMGQwY2ZmLTEzZDctNDNhZC05Y2I4LWIyOTgzNjk4YWExZQBGAAAAAADdwE6qobHzQo5d_R1eoqPKBwDxkJD2bSaUS7TYwOHY6vKrAAAAAAEMAADxkJD2bSaUS7TYwOHY6vKrAAAidRn9AAABEgAQAJALn6ReFnlAuFpgf3BBdwM=", + "lastModifiedDateTime": "2024-01-29T02:22:18Z", + "name": "image.png", + "contentType": "image/png", + "size": 690, + "isInline": true, + "contentId": "7fa9ea6b-8e03-473c-8b34-cae13eaa33aa", + "contentLocation": null, + "contentBytes": "iVBORw0KGgoAAAANSUhEUgAAAEAAAABACAYAAACqaXHeAAAAAXNSR0IArs4c6QAAAXlJREFUeF7tmcFtwkAQRWcjhIKogQLSQ5pIIbnkEkqAC23QQ670EArIKeckQuICWsAGrayFsdhRtPO4YuOdN//NYjuI809wXr8AgAQ4J4ACzgPAEEQBFHBOAAWcB4BdAAVQwDkBFHAeAHYBFEAB5wRQwHkA2AVQQKbr3b/WYPZUtEkBAE0CCpNWp8xoXecEAKCsayRASwAFTrtTYTWZAe02WJi01gCrdZEAK9IkQEuAXYBd4HiTVng4MwQZgkbDRjsDrRqDAlakSYCWgJGaKIACRlHTGmDVGBRQk158jeRtslF3VHuCUTLz7wXS/+Gx+O+fDwnhWVtP7+PN7gW6Vnh58UPxv3MJ8tq7mD4nFgfQtag0fmnn46KsVOgDTXFO93u3FMD756qNfUXFR055AH+DRxlvlyIPL+29eSWdb0KSB7CRrYxkaPFgQpHaux6aB9BcqvAgumtFyh+7DqAy51M+14dgZc7fDqDyzueHYPy28s7nATgpPkLYA1p04EEgvdAkAAAAAElFTkSuQmCC" + }, + { + "@odata.type": "#microsoft.graph.fileAttachment", + "@odata.mediaContentType": "application/octet-stream", + "id": "AAMkAGNhMGQwY2ZmLTEzZDctNDNhZC05Y2I4LWIyOTgzNjk4YWExZQBGAAAAAADdwE6qobHzQo5d_R1eoqPKBwDxkJD2bSaUS7TYwOHY6vKrAAAAAAEMAADxkJD2bSaUS7TYwOHY6vKrAAAidRn9AAABEgAQAO6vI6h5OXZDlVIaM2DTB_I=", + "lastModifiedDateTime": "2024-01-29T02:22:18Z", + "name": "file_100bytes", + "contentType": "application/octet-stream", + "size": 250, + "isInline": false, + "contentId": null, + "contentLocation": null, + "contentBytes": "AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA==" + } + ], + "inReplyTo@odata.associationLink": "https://graph.microsoft.com/v1.0/groups('1623c35a-b67a-473a-9b21-5e4891f22e70')/threads('AAQkAGNhMGQwY2ZmLTEzZDctNDNhZC05Y2I4LWIyOTgzNjk4YWExZQMkABAAdSIPPi8Idk2rLP3QYmg3nhAAdSIPPi8Idk2rLP3QYmg3ng==')/posts('AAMkAGNhMGQwY2ZmLTEzZDctNDNhZC05Y2I4LWIyOTgzNjk4YWExZQBGAAAAAADdwE6qobHzQo5d_R1eoqPKBwDxkJD2bSaUS7TYwOHY6vKrAAAAAAEMAADxkJD2bSaUS7TYwOHY6vKrAAAidMn9AAA=')/$ref", + "inReplyTo@odata.navigationLink": "https://graph.microsoft.com/v1.0/groups('1623c35a-b67a-473a-9b21-5e4891f22e70')/threads('AAQkAGNhMGQwY2ZmLTEzZDctNDNhZC05Y2I4LWIyOTgzNjk4YWExZQMkABAAdSIPPi8Idk2rLP3QYmg3nhAAdSIPPi8Idk2rLP3QYmg3ng==')/posts('AAMkAGNhMGQwY2ZmLTEzZDctNDNhZC05Y2I4LWIyOTgzNjk4YWExZQBGAAAAAADdwE6qobHzQo5d_R1eoqPKBwDxkJD2bSaUS7TYwOHY6vKrAAAAAAEMAADxkJD2bSaUS7TYwOHY6vKrAAAidMn9AAA=')", + "inReplyTo": { + "@odata.etag": "W/\"CQAAABYAAADxkJD2bSaUS7TYwOHY6vKrAAAiegI9\"", + "id": "AAMkAGNhMGQwY2ZmLTEzZDctNDNhZC05Y2I4LWIyOTgzNjk4YWExZQBGAAAAAADdwE6qobHzQo5d_R1eoqPKBwDxkJD2bSaUS7TYwOHY6vKrAAAAAAEMAADxkJD2bSaUS7TYwOHY6vKrAAAidMn9AAA=", + "createdDateTime": "2024-01-29T02:21:18Z", + "lastModifiedDateTime": "2024-01-29T02:21:19Z", + "changeKey": "CQAAABYAAADxkJD2bSaUS7TYwOHY6vKrAAAiegI9", + "categories": [], + "receivedDateTime": "2024-01-29T02:21:19Z", + "hasAttachments": true, + "body": { + "contentType": "html", + "content": "
\r\n
\r\n
Test Reply
\r\n" }, + "from": { + "emailAddress": { + "name": "Dustin Corners", + "address": "Dustin.Corners@10rqc2.onmicrosoft.com" + } + }, + "sender": { + "emailAddress": { + "name": "Dustin Corners", + "address": "Dustin.Corners@10rqc2.onmicrosoft.com" + } + } + } +} diff --git a/src/internal/m365/service/groups/mock/testdata.go b/src/internal/m365/service/groups/mock/testdata.go new file mode 100644 index 000000000..d246d1579 --- /dev/null +++ b/src/internal/m365/service/groups/mock/testdata.go @@ -0,0 +1,6 @@ +package stub + +import _ "embed" + +//go:embed post-with-attachments.json +var PostWithAttachments string diff --git a/src/pkg/services/m365/api/conversations.go b/src/pkg/services/m365/api/conversations.go index 00d510947..f925df23b 100644 --- a/src/pkg/services/m365/api/conversations.go +++ b/src/pkg/services/m365/api/conversations.go @@ -2,13 +2,16 @@ package api import ( "context" + "strings" "github.com/alcionai/clues" "github.com/jaytaylor/html2text" + "github.com/microsoft/kiota-abstractions-go/serialization" "github.com/microsoftgraph/msgraph-sdk-go/groups" "github.com/microsoftgraph/msgraph-sdk-go/models" "github.com/alcionai/corso/src/internal/common/ptr" + "github.com/alcionai/corso/src/internal/common/sanitize" "github.com/alcionai/corso/src/internal/common/str" "github.com/alcionai/corso/src/pkg/backup/details" "github.com/alcionai/corso/src/pkg/logger" @@ -190,3 +193,29 @@ func (c Conversations) getAttachments( return result, totalSize, nil } + +func bytesToPostable(body []byte) (serialization.Parsable, error) { + v, err := CreateFromBytes(body, models.CreatePostFromDiscriminatorValue) + if err != nil { + if !strings.Contains(err.Error(), invalidJSON) { + return nil, clues.Wrap(err, "deserializing bytes to message") + } + + // If the JSON was invalid try sanitizing and deserializing again. + // Sanitizing should transform characters < 0x20 according to the spec where + // possible. The resulting JSON may still be invalid though. + body = sanitize.JSONBytes(body) + v, err = CreateFromBytes(body, models.CreatePostFromDiscriminatorValue) + } + + return v, clues.Stack(err).OrNil() +} + +func BytesToPostable(body []byte) (models.Postable, error) { + v, err := bytesToPostable(body) + if err != nil { + return nil, clues.Stack(err) + } + + return v.(models.Postable), nil +} diff --git a/src/pkg/services/m365/api/conversations_test.go b/src/pkg/services/m365/api/conversations_test.go index 97abf3347..19c093729 100644 --- a/src/pkg/services/m365/api/conversations_test.go +++ b/src/pkg/services/m365/api/conversations_test.go @@ -12,6 +12,8 @@ import ( "github.com/stretchr/testify/suite" "github.com/alcionai/corso/src/internal/common/ptr" + exchMock "github.com/alcionai/corso/src/internal/m365/service/exchange/mock" + stub "github.com/alcionai/corso/src/internal/m365/service/groups/mock" "github.com/alcionai/corso/src/internal/tester" "github.com/alcionai/corso/src/internal/tester/tconfig" "github.com/alcionai/corso/src/pkg/backup/details" @@ -115,6 +117,68 @@ func (suite *ConversationsAPIUnitSuite) TestConversationPostInfo() { } } +// TestBytesToPostable_InvalidError tests that the error message kiota returns +// for invalid JSON matches what we check for. This helps keep things in sync +// when kiota is updated. +func (suite *MailAPIUnitSuite) TestBytesToPostable_InvalidError() { + t := suite.T() + input := exchMock.MessageWithSpecialCharacters("m365 mail support test") + + _, err := CreateFromBytes(input, models.CreatePostFromDiscriminatorValue) + require.Error(t, err, clues.ToCore(err)) + + assert.Contains(t, err.Error(), invalidJSON) +} + +func (suite *ConversationsAPIUnitSuite) TestBytesToPostable() { + table := []struct { + name string + byteArray []byte + checkError assert.ErrorAssertionFunc + checkObject assert.ValueAssertionFunc + }{ + { + name: "Empty Bytes", + byteArray: make([]byte, 0), + checkError: assert.Error, + checkObject: assert.Nil, + }, + { + name: "post bytes", + // Note: inReplyTo is not serialized or deserialized by kiota so we can't + // test that aspect. The payload does contain inReplyTo data for future use. + byteArray: []byte(stub.PostWithAttachments), + checkError: assert.NoError, + checkObject: assert.NotNil, + }, + // Using test data from exchMock package for these tests because posts are + // essentially email messages. + { + name: "malformed JSON bytes passes sanitization", + byteArray: exchMock.MessageWithSpecialCharacters("m365 mail support test"), + checkError: assert.NoError, + checkObject: assert.NotNil, + }, + { + name: "invalid JSON bytes", + byteArray: append( + exchMock.MessageWithSpecialCharacters("m365 mail support test"), + []byte("}")...), + checkError: assert.Error, + checkObject: assert.Nil, + }, + } + for _, test := range table { + suite.Run(test.name, func() { + t := suite.T() + + result, err := BytesToPostable(test.byteArray) + test.checkError(t, err, clues.ToCore(err)) + test.checkObject(t, result) + }) + } +} + type ConversationAPIIntgSuite struct { tester.Suite its intgTesterSetup