diff --git a/src/cli/export/groups.go b/src/cli/export/groups.go index 8d5a9d51c..438cc54ef 100644 --- a/src/cli/export/groups.go +++ b/src/cli/export/groups.go @@ -7,7 +7,6 @@ import ( "github.com/alcionai/corso/src/cli/flags" "github.com/alcionai/corso/src/cli/utils" "github.com/alcionai/corso/src/pkg/control" - "github.com/alcionai/corso/src/pkg/selectors" ) // called by export.go to map subcommands to provider-specific handling. @@ -95,7 +94,7 @@ func exportGroupsCmd(cmd *cobra.Command, args []string) error { // TODO(pandeyabs): Exclude conversations from export since they are not // supported yet. https://github.com/alcionai/corso/issues/4822 - sel.Exclude(sel.Conversation(selectors.Any())) + // sel.Exclude(sel.Conversation(selectors.Any())) acceptedGroupsFormatTypes := []string{ string(control.DefaultFormat), diff --git a/src/internal/converters/eml/eml.go b/src/internal/converters/eml/eml.go index 9b0607a4c..842c57828 100644 --- a/src/internal/converters/eml/eml.go +++ b/src/internal/converters/eml/eml.go @@ -303,3 +303,138 @@ func FromJSON(ctx context.Context, body []byte) (string, error) { return email.GetMessage(), nil } + +// FromJSON converts a Messageable (as json) to .eml format +func FromJSONPost(ctx context.Context, body []byte, topic string) (string, error) { + data, err := api.BytesToPostable(body) + if err != nil { + return "", clues.WrapWC(ctx, err, "converting to postable") + } + + ctx = clues.Add(ctx, "item_id", ptr.Val(data.GetId())) + + email := mail.NewMSG() + email.AllowDuplicateAddress = true // More "correct" conversion + email.AddBccToHeader = true // Don't ignore Bcc + email.AllowEmptyAttachments = true // Don't error on empty attachments + email.UseProvidedAddress = true // Don't try to parse the email address + + if data.GetFrom() != nil { + email.SetFrom(formatAddress(data.GetFrom().GetEmailAddress())) + } + + if data.GetReceivedDateTime() != nil { + email.SetDate(ptr.Val(data.GetReceivedDateTime()).Format(dateFormat)) + } + + // if data.GetSender() != nil { + // email.SetReplyTo(formatAddress(data.GetSender().GetEmailAddress())) + // } + + // if data.GetCcRecipients() != nil { + // for _, recipient := range data.GetCcRecipients() { + // email.AddCc(formatAddress(recipient.GetEmailAddress())) + // } + // } + + // if data.GetBccRecipients() != nil { + // for _, recipient := range data.GetBccRecipients() { + // email.AddBcc(formatAddress(recipient.GetEmailAddress())) + // } + // } + + // if data.GetInReplyTo() != nil { + // rts := data.GetInReplyTo() + + // email.SetReplyTo(formatAddress(rts.GetEmailAddress())) + + // } + + email.SetSubject(topic) + email.AddTo("dc_test@10rqc2.onmicrosoft.com") + + // if data.GetSentDateTime() != nil { + // email.SetDate(ptr.Val(data.GetSentDateTime()).Format(dateFormat)) + // } + + if data.GetBody() != nil { + if data.GetBody().GetContentType() != nil { + var contentType mail.ContentType + + switch data.GetBody().GetContentType().String() { + case "html": + contentType = mail.TextHTML + case "text": + contentType = mail.TextPlain + default: + // https://learn.microsoft.com/en-us/graph/api/resources/itembody?view=graph-rest-1.0#properties + // This should not be possible according to the documentation + logger.Ctx(ctx). + With("body_type", data.GetBody().GetContentType().String()). + Info("unknown body content type") + + contentType = mail.TextPlain + } + + email.SetBody(contentType, ptr.Val(data.GetBody().GetContent())) + } + } + + if data.GetAttachments() != nil { + for _, attachment := range data.GetAttachments() { + kind := ptr.Val(attachment.GetContentType()) + + bytes, err := attachment.GetBackingStore().Get("contentBytes") + if err != nil { + return "", clues.WrapWC(ctx, err, "failed to get attachment bytes") + } + + if bytes == nil { + // Some attachments have an "item" field instead of + // "contentBytes". There are items like contacts, emails + // or calendar events which will not be a normal format + // and will have to be converted to a text format. + // TODO(meain): Handle custom attachments + // https://github.com/alcionai/corso/issues/4772 + logger.Ctx(ctx). + With("attachment_id", ptr.Val(attachment.GetId())). + Info("unhandled attachment type") + + continue + } + + bts, ok := bytes.([]byte) + if !ok { + return "", clues.WrapWC(ctx, err, "invalid content bytes") + } + + name := ptr.Val(attachment.GetName()) + + contentID, err := attachment.GetBackingStore().Get("contentId") + if err != nil { + return "", clues.WrapWC(ctx, err, "getting content id for attachment") + } + + if contentID != nil { + cids, _ := str.AnyToString(contentID) + if len(cids) > 0 { + name = cids + } + } + + email.Attach(&mail.File{ + // cannot use filename as inline attachment will not get mapped properly + Name: name, + MimeType: kind, + Data: bts, + Inline: ptr.Val(attachment.GetIsInline()), + }) + } + } + + if err = email.GetError(); err != nil { + return "", clues.WrapWC(ctx, err, "converting to eml") + } + + return email.GetMessage(), nil +} diff --git a/src/internal/m365/collection/groups/export.go b/src/internal/m365/collection/groups/export.go index c7577296b..050e0f7f8 100644 --- a/src/internal/m365/collection/groups/export.go +++ b/src/internal/m365/collection/groups/export.go @@ -11,10 +11,12 @@ import ( "github.com/microsoftgraph/msgraph-sdk-go/models" "github.com/alcionai/corso/src/internal/common/ptr" + "github.com/alcionai/corso/src/internal/converters/eml" "github.com/alcionai/corso/src/internal/data" "github.com/alcionai/corso/src/pkg/control" "github.com/alcionai/corso/src/pkg/export" "github.com/alcionai/corso/src/pkg/fault" + "github.com/alcionai/corso/src/pkg/logger" "github.com/alcionai/corso/src/pkg/metrics" "github.com/alcionai/corso/src/pkg/path" "github.com/alcionai/corso/src/pkg/services/m365/api" @@ -26,19 +28,25 @@ func NewExportCollection( backupVersion int, cec control.ExportConfig, stats *metrics.ExportStats, + cat path.CategoryType, ) export.Collectioner { + s := streamChannelItems + if cat == path.ConversationPostsCategory { + s = streamConversationPosts + } + return export.BaseCollection{ BaseDir: baseDir, BackingCollection: backingCollections, BackupVersion: backupVersion, Cfg: cec, - Stream: streamItems, + Stream: s, Stats: stats, } } // streamItems streams the items in the backingCollection into the export stream chan -func streamItems( +func streamChannelItems( ctx context.Context, drc []data.RestoreCollection, backupVersion int, @@ -198,3 +206,126 @@ func makeMinimumChannelMesasge(item models.ChatMessageable) minimumChannelMessag Subject: ptr.Val(item.GetSubject()), } } + +// streamItems streams the items in the backingCollection into the export stream chan +func streamConversationPosts( + ctx context.Context, + drc []data.RestoreCollection, + backupVersion int, + cec control.ExportConfig, + ch chan<- export.Item, + stats *metrics.ExportStats, +) { + defer close(ch) + + errs := fault.New(false) + + for _, rc := range drc { + ictx := clues.Add(ctx, "path_short_ref", rc.FullPath().ShortRef()) + + for item := range rc.Items(ctx, errs) { + name := item.ID() + ".eml" + + itemCtx := clues.Add(ictx, "stream_item_id", item.ID()) + + reader := item.ToReader() + content, err := io.ReadAll(reader) + + reader.Close() + + if err != nil { + ch <- export.Item{ + ID: item.ID(), + Error: err, + } + + continue + } + + topic := rc.FullPath().Folders()[0] + email, err := eml.FromJSONPost(itemCtx, content, topic) + if err != nil { + err = clues.Wrap(err, "converting JSON to eml") + + logger.CtxErr(ctx, err).Info("processing collection item") + + ch <- export.Item{ + ID: item.ID(), + Error: err, + } + + continue + } + + emlReader := io.NopCloser(bytes.NewReader([]byte(email))) + body := metrics.ReaderWithStats(emlReader, path.EmailCategory, stats) + + ch <- export.Item{ + ID: item.ID(), + Name: name, + Body: body, + } + } + + items, recovered := errs.ItemsAndRecovered() + + // Return all the items that we failed to source from the persistence layer + for _, item := range items { + ch <- export.Item{ + ID: item.ID, + Error: &item, + } + } + + for _, err := range recovered { + ch <- export.Item{ + Error: err, + } + } + } +} + +// func formatConversationPost( +// cec control.ExportConfig, +// rc io.ReadCloser, +// ) (io.ReadCloser, error) { +// if cec.Format == control.JSONFormat { +// return rc, nil +// } + +// bs, err := io.ReadAll(rc) +// if err != nil { +// return nil, clues.Wrap(err, "reading item bytes") +// } + +// defer rc.Close() + +// cfb, err := api.CreateFromBytes(bs, models.CreateChatMessageFromDiscriminatorValue) +// if err != nil { +// return nil, clues.Wrap(err, "deserializing bytes to message") +// } + +// msg, ok := cfb.(models.ChatMessageable) +// if !ok { +// return nil, clues.New("expected deserialized item to implement models.ChatMessageable") +// } + +// mItem := makeMinimumChannelMesasge(msg) +// replies := msg.GetReplies() + +// mcmar := minimumChannelMessageAndReplies{ +// minimumChannelMessage: mItem, +// Replies: make([]minimumChannelMessage, 0, len(replies)), +// } + +// for _, r := range replies { +// mcmar.Replies = append(mcmar.Replies, makeMinimumChannelMesasge(r)) +// } + +// bs, err = marshalJSONContainingHTML(mcmar) +// if err != nil { +// return nil, clues.Wrap(err, "serializing minimized channel message") +// } + +// return io.NopCloser(bytes.NewReader(bs)), nil +// } diff --git a/src/internal/m365/service/groups/export.go b/src/internal/m365/service/groups/export.go index c7610dc20..15457baab 100644 --- a/src/internal/m365/service/groups/export.go +++ b/src/internal/m365/service/groups/export.go @@ -90,7 +90,8 @@ func (h *baseGroupsHandler) ProduceExportCollections( []data.RestoreCollection{restoreColl}, backupVersion, exportCfg, - stats) + stats, + cat) case path.LibrariesCategory: drivePath, err := path.ToDrivePath(restoreColl.FullPath()) diff --git a/src/pkg/services/m365/api/mail.go b/src/pkg/services/m365/api/mail.go index 193cddd32..cfa8abe29 100644 --- a/src/pkg/services/m365/api/mail.go +++ b/src/pkg/services/m365/api/mail.go @@ -629,6 +629,32 @@ func BytesToMessageable(body []byte) (models.Messageable, error) { return v.(models.Messageable), nil } +func bytesToPostable(body []byte) (models.Postable, error) { + v, err := CreateFromBytes(body, models.CreatePostFromDiscriminatorValue) + if err != nil { + if !strings.Contains(err.Error(), invalidJSON) { + return nil, clues.Wrap(err, "deserializing bytes to message") + } + + // If the JSON was invalid try sanitizing and deserializing again. + // Sanitizing should transform characters < 0x20 according to the spec where + // possible. The resulting JSON may still be invalid though. + body = sanitize.JSONBytes(body) + v, err = CreateFromBytes(body, models.CreateMessageFromDiscriminatorValue) + } + + return v.(models.Postable), clues.Stack(err).OrNil() +} + +func BytesToPostable(body []byte) (models.Postable, error) { + v, err := bytesToPostable(body) + if err != nil { + return nil, clues.Stack(err) + } + + return v.(models.Postable), nil +} + func (c Mail) Serialize( ctx context.Context, item serialization.Parsable,