Keepers 45886e2ad9
allow eml export when attachments have no name (#5199)
#### Does this PR need a docs update or release note?

- [x]  Yes, it's included

#### Type of change

- [x] 🐛 Bugfix

#### Test Plan

- [x]  Unit test
- [x] 💚 E2E
2024-02-09 18:41:10 +00:00

517 lines
16 KiB
Go

package eml
// This package helps convert from the json response
// received from Graph API to .eml format (rfc0822).
// RFC
// Original: https://www.ietf.org/rfc/rfc0822.txt
// New: https://datatracker.ietf.org/doc/html/rfc5322
// Extension for MIME: https://www.ietf.org/rfc/rfc1521.txt
// Data missing from backup:
// SetReturnPath SetPriority SetListUnsubscribe SetDkim
// AddAlternative SetDSN (and any other X-MS specific headers)
import (
"context"
"fmt"
"github.com/alcionai/clues"
"github.com/microsoftgraph/msgraph-sdk-go/models"
mail "github.com/xhit/go-simple-mail/v2"
"github.com/alcionai/corso/src/internal/common/ptr"
"github.com/alcionai/corso/src/internal/common/str"
"github.com/alcionai/corso/src/internal/converters/ics"
"github.com/alcionai/corso/src/internal/m365/collection/groups/metadata"
"github.com/alcionai/corso/src/pkg/logger"
"github.com/alcionai/corso/src/pkg/services/m365/api"
)
const (
addressFormat = `"%s" <%s>`
dateFormat = "2006-01-02 15:04:05 MST" // from xhit/go-simple-mail
)
func formatAddress(entry models.EmailAddressable) string {
name := ptr.Val(entry.GetName())
email := ptr.Val(entry.GetAddress())
if len(name) == 0 && len(email) == 0 {
return ""
}
if len(email) == 0 {
return fmt.Sprintf(`"%s"`, name)
}
if name == email || len(name) == 0 {
return email
}
return fmt.Sprintf(addressFormat, name, email)
}
// getICalData converts the emails to an event so that ical generation
// can generate from it.
func getICalData(ctx context.Context, data models.Messageable) (string, error) {
msg, ok := data.(*models.EventMessageRequest)
if !ok {
return "", clues.NewWC(ctx, "unexpected message type").
With("interface_type", fmt.Sprintf("%T", data))
}
// This method returns nil if data is not pulled using the necessary expand property
// .../messages/<message_id>/?expand=Microsoft.Graph.EventMessage/Event
// Also works for emails which are a result of someone accepting an
// invite. If we add this expand query parameter value when directly
// fetching a cancellation mail, the request fails. It however looks
// to be OK to run when listing emails although it gives empty({})
// event value for cancellations.
// TODO(meain): cancelled event details are available when pulling .eml
if mevent := msg.GetEvent(); mevent != nil {
return ics.FromEventable(ctx, mevent)
}
// Exceptions(modifications) are covered under this, although graph just sends the
// exception event and not the parent, which what eml obtained from graph also contains
if ptr.Val(msg.GetMeetingMessageType()) != models.MEETINGREQUEST_MEETINGMESSAGETYPE {
// We don't have event data if it not "REQUEST" type.
// Both cancellation and acceptance does not return enough
// information to recreate an event.
return "", nil
}
// If data was not fetch with an expand property, then we can
// approximate the details with the following
event := models.NewEvent()
event.SetId(msg.GetId())
event.SetCreatedDateTime(msg.GetCreatedDateTime())
event.SetLastModifiedDateTime(msg.GetLastModifiedDateTime())
event.SetIsAllDay(msg.GetIsAllDay())
event.SetStart(msg.GetStartDateTime())
event.SetEnd(msg.GetEndDateTime())
event.SetRecurrence(msg.GetRecurrence())
// event.SetIsCancelled()
event.SetSubject(msg.GetSubject())
event.SetBodyPreview(msg.GetBodyPreview())
event.SetBody(msg.GetBody())
// https://learn.microsoft.com/en-us/graph/api/resources/eventmessage?view=graph-rest-1.0
// In addition, Outlook automatically creates an event instance in
// the invitee's calendar, with the showAs property as tentative.
event.SetShowAs(ptr.To(models.TENTATIVE_FREEBUSYSTATUS))
event.SetCategories(msg.GetCategories())
event.SetWebLink(msg.GetWebLink())
event.SetOrganizer(msg.GetFrom())
// NOTE: If an event was previously created and we added people to
// it, the original list of attendee are not available.
atts := []models.Attendeeable{}
for _, to := range msg.GetToRecipients() {
att := models.NewAttendee()
att.SetEmailAddress(to.GetEmailAddress())
att.SetTypeEscaped(ptr.To(models.REQUIRED_ATTENDEETYPE))
atts = append(atts, att)
}
for _, cc := range msg.GetCcRecipients() {
att := models.NewAttendee()
att.SetEmailAddress(cc.GetEmailAddress())
att.SetTypeEscaped(ptr.To(models.OPTIONAL_ATTENDEETYPE))
atts = append(atts, att)
}
// bcc did not show up in my tests, but adding for completeness
for _, bcc := range msg.GetBccRecipients() {
att := models.NewAttendee()
att.SetEmailAddress(bcc.GetEmailAddress())
att.SetTypeEscaped(ptr.To(models.OPTIONAL_ATTENDEETYPE))
atts = append(atts, att)
}
event.SetAttendees(atts)
event.SetLocation(msg.GetLocation())
// event.SetSensitivity() // unavailable in msg
event.SetImportance(msg.GetImportance())
// event.SetOnlineMeeting() // not available in eml either
event.SetAttachments(msg.GetAttachments())
return ics.FromEventable(ctx, event)
}
func getFileAttachment(ctx context.Context, attachment models.Attachmentable) (*mail.File, error) {
kind := ptr.Val(attachment.GetContentType())
bytes, err := attachment.GetBackingStore().Get("contentBytes")
if err != nil {
return nil, clues.WrapWC(ctx, err, "failed to get attachment bytes").
With("kind", kind)
}
if bytes == nil {
// TODO(meain): Handle non file attachments
// https://github.com/alcionai/corso/issues/4772
logger.Ctx(ctx).
With("attachment_id", ptr.Val(attachment.GetId()),
"attachment_type", ptr.Val(attachment.GetOdataType())).
Info("no contentBytes for attachment")
return nil, nil
}
bts, ok := bytes.([]byte)
if !ok {
return nil, clues.WrapWC(ctx, err, "invalid content bytes").
With("kind", kind).
With("interface_type", fmt.Sprintf("%T", bytes))
}
name := ptr.Val(attachment.GetName())
if len(name) == 0 {
// Graph as of now does not let us create any attachments
// without a name, but we have run into instances where we have
// see attachments without a name, possibly from old
// data. This is for those cases.
name = "Unnamed"
}
contentID, err := attachment.GetBackingStore().Get("contentId")
if err != nil {
return nil, clues.WrapWC(ctx, err, "getting content id for attachment").
With("kind", kind)
}
if contentID != nil {
cids, _ := str.AnyToString(contentID)
if len(cids) > 0 {
name = cids
}
}
return &mail.File{
// cannot use filename as inline attachment will not get mapped properly
Name: name,
MimeType: kind,
Data: bts,
Inline: ptr.Val(attachment.GetIsInline()),
}, nil
}
func getItemAttachment(ctx context.Context, attachment models.Attachmentable) (*mail.File, error) {
it, err := attachment.GetBackingStore().Get("item")
if err != nil {
return nil, clues.WrapWC(ctx, err, "getting item for attachment").
With("attachment_id", ptr.Val(attachment.GetId()))
}
switch it := it.(type) {
case *models.Message:
cb, err := FromMessageable(ctx, it)
if err != nil {
return nil, clues.WrapWC(ctx, err, "converting item attachment to eml").
With("attachment_id", ptr.Val(attachment.GetId()))
}
return &mail.File{
Name: ptr.Val(attachment.GetName()),
MimeType: "message/rfc822",
Data: []byte(cb),
}, nil
default:
logger.Ctx(ctx).
With("attachment_id", ptr.Val(attachment.GetId()),
"attachment_type", ptr.Val(attachment.GetOdataType())).
Info("unknown item attachment type")
}
return nil, nil
}
func getMailAttachment(ctx context.Context, att models.Attachmentable) (*mail.File, error) {
otyp := ptr.Val(att.GetOdataType())
switch otyp {
case "#microsoft.graph.fileAttachment":
return getFileAttachment(ctx, att)
case "#microsoft.graph.itemAttachment":
return getItemAttachment(ctx, att)
default:
logger.Ctx(ctx).
With("attachment_id", ptr.Val(att.GetId()),
"attachment_type", otyp).
Info("unknown attachment type")
return nil, nil
}
}
// FromJSON converts a Messageable (as json) to .eml format
func FromJSON(ctx context.Context, body []byte) (string, error) {
ctx = clues.Add(ctx, "body_len", len(body))
data, err := api.BytesToMessageable(body)
if err != nil {
return "", clues.WrapWC(ctx, err, "converting to messageble")
}
return FromMessageable(ctx, data)
}
// Converts a Messageable to .eml format
func FromMessageable(ctx context.Context, data models.Messageable) (string, error) {
ctx = clues.Add(ctx, "item_id", ptr.Val(data.GetId()))
email := mail.NewMSG()
email.Encoding = mail.EncodingBase64 // Doing it to be safe for when we have eventMessage (newline issues)
email.AllowDuplicateAddress = true // More "correct" conversion
email.AddBccToHeader = true // Don't ignore Bcc
email.AllowEmptyAttachments = true // Don't error on empty attachments
email.UseProvidedAddress = true // Don't try to parse the email address
if data.GetFrom() != nil {
email.SetFrom(formatAddress(data.GetFrom().GetEmailAddress()))
}
if data.GetToRecipients() != nil {
for _, recipient := range data.GetToRecipients() {
email.AddTo(formatAddress(recipient.GetEmailAddress()))
}
}
if data.GetCcRecipients() != nil {
for _, recipient := range data.GetCcRecipients() {
email.AddCc(formatAddress(recipient.GetEmailAddress()))
}
}
if data.GetBccRecipients() != nil {
for _, recipient := range data.GetBccRecipients() {
email.AddBcc(formatAddress(recipient.GetEmailAddress()))
}
}
if data.GetReplyTo() != nil {
rts := data.GetReplyTo()
if len(rts) > 1 {
logger.Ctx(ctx).
With("reply_to_count", len(rts)).
Warn("more than 1 Reply-To, adding only the first one")
}
if len(rts) != 0 {
email.SetReplyTo(formatAddress(rts[0].GetEmailAddress()))
}
}
if data.GetSubject() != nil {
email.SetSubject(ptr.Val(data.GetSubject()))
}
if data.GetSentDateTime() != nil {
email.SetDate(ptr.Val(data.GetSentDateTime()).Format(dateFormat))
}
if data.GetBody() != nil {
if data.GetBody().GetContentType() != nil {
var contentType mail.ContentType
switch data.GetBody().GetContentType().String() {
case "html":
contentType = mail.TextHTML
case "text":
contentType = mail.TextPlain
default:
// https://learn.microsoft.com/en-us/graph/api/resources/itembody?view=graph-rest-1.0#properties
// This should not be possible according to the documentation
logger.Ctx(ctx).
With("body_type", data.GetBody().GetContentType().String()).
Info("unknown body content type")
contentType = mail.TextPlain
}
email.SetBody(contentType, ptr.Val(data.GetBody().GetContent()))
}
}
if data.GetAttachments() != nil {
for _, attachment := range data.GetAttachments() {
att, err := getMailAttachment(ctx, attachment)
if err != nil {
return "", clues.WrapWC(ctx, err, "getting mail attachment")
}
// There are known cases where we just wanna log and
// ignore instead of erroring out
if att != nil {
email.Attach(att)
}
}
}
switch data.(type) {
case *models.EventMessageResponse, *models.EventMessage:
// We can't handle this as of now, not enough information
// TODO: Fetch event object from graph when fetching email
case *models.CalendarSharingMessage:
// TODO: Parse out calendar sharing message
// https://github.com/alcionai/corso/issues/5041
case *models.EventMessageRequest:
cal, err := getICalData(ctx, data)
if err != nil {
return "", clues.Wrap(err, "getting ical attachment")
}
if len(cal) > 0 {
email.AddAlternative(mail.TextCalendar, cal)
}
}
if err := email.GetError(); err != nil {
return "", clues.WrapWC(ctx, err, "converting to eml")
}
return email.GetMessage(), nil
}
//-------------------------------------------------------------
// Postable -> EML
//-------------------------------------------------------------
// FromJSONPostToEML converts a postable (as json) to .eml format.
// TODO(pandeyabs): This is a stripped down copy of messageable to
// eml conversion, it can be folded into one function by having a post
// to messageable converter.
func FromJSONPostToEML(
ctx context.Context,
body []byte,
postMetadata metadata.ConversationPostMetadata,
) (string, error) {
ctx = clues.Add(ctx, "body_len", len(body))
data, err := api.BytesToPostable(body)
if err != nil {
return "", clues.WrapWC(ctx, err, "converting to postable")
}
ctx = clues.Add(ctx, "item_id", ptr.Val(data.GetId()))
email := mail.NewMSG()
email.Encoding = mail.EncodingBase64 // Doing it to be safe for when we have eventMessage (newline issues)
email.AllowDuplicateAddress = true // More "correct" conversion
email.AddBccToHeader = true // Don't ignore Bcc
email.AllowEmptyAttachments = true // Don't error on empty attachments
email.UseProvidedAddress = true // Don't try to parse the email address
if data.GetFrom() != nil {
email.SetFrom(formatAddress(data.GetFrom().GetEmailAddress()))
}
// We don't have the To, Cc, Bcc recipient information for posts due to a graph
// limitation. All posts carry the group email address as the only recipient
// for now.
email.AddTo(postMetadata.Recipients...)
email.SetSubject(postMetadata.Topic)
// Reply-To email address is not available for posts. Note that this is different
// from inReplyTo field.
if data.GetCreatedDateTime() != nil {
email.SetDate(ptr.Val(data.GetCreatedDateTime()).Format(dateFormat))
}
if data.GetBody() != nil {
if data.GetBody().GetContentType() != nil {
var contentType mail.ContentType
switch data.GetBody().GetContentType().String() {
case "html":
contentType = mail.TextHTML
case "text":
contentType = mail.TextPlain
default:
// https://learn.microsoft.com/en-us/graph/api/resources/itembody?view=graph-rest-1.0#properties
// This should not be possible according to the documentation
logger.Ctx(ctx).
With("body_type", data.GetBody().GetContentType().String()).
Info("unknown body content type")
contentType = mail.TextPlain
}
email.SetBody(contentType, ptr.Val(data.GetBody().GetContent()))
}
}
if data.GetAttachments() != nil {
for _, attachment := range data.GetAttachments() {
kind := ptr.Val(attachment.GetContentType())
bytes, err := attachment.GetBackingStore().Get("contentBytes")
if err != nil {
return "", clues.WrapWC(ctx, err, "failed to get attachment bytes").
With("kind", kind)
}
if bytes == nil {
// TODO(meain): Handle non file attachments
// https://github.com/alcionai/corso/issues/4772
//
// TODO(pandeyabs): Above issue is for messages.
// This is not a problem for posts but leaving it here for safety.
logger.Ctx(ctx).
With("attachment_id", ptr.Val(attachment.GetId()),
"attachment_type", ptr.Val(attachment.GetOdataType())).
Info("no contentBytes for attachment")
continue
}
bts, ok := bytes.([]byte)
if !ok {
return "", clues.WrapWC(ctx, err, "invalid content bytes").
With("kind", kind).
With("interface_type", fmt.Sprintf("%T", bytes))
}
name := ptr.Val(attachment.GetName())
if len(name) == 0 {
name = "Unnamed"
}
contentID, err := attachment.GetBackingStore().Get("contentId")
if err != nil {
return "", clues.WrapWC(ctx, err, "getting content id for attachment").
With("kind", kind)
}
if contentID != nil {
cids, _ := str.AnyToString(contentID)
if len(cids) > 0 {
name = cids
}
}
email.Attach(&mail.File{
// cannot use filename as inline attachment will not get mapped properly
Name: name,
MimeType: kind,
Data: bts,
Inline: ptr.Val(attachment.GetIsInline()),
})
}
}
// Note: Posts cannot be of type EventMessageResponse, EventMessage or
// CalendarSharingMessage. So we don't need to handle those cases here.
if err = email.GetError(); err != nil {
return "", clues.WrapWC(ctx, err, "converting to eml")
}
return email.GetMessage(), nil
}