json to eml

This commit is contained in:
Abhishek Pandey 2024-01-05 14:43:28 -08:00
parent 5ec4e1d21d
commit 0b4ea2f213
5 changed files with 297 additions and 5 deletions

View File

@ -7,7 +7,6 @@ import (
"github.com/alcionai/corso/src/cli/flags"
"github.com/alcionai/corso/src/cli/utils"
"github.com/alcionai/corso/src/pkg/control"
"github.com/alcionai/corso/src/pkg/selectors"
)
// called by export.go to map subcommands to provider-specific handling.
@ -95,7 +94,7 @@ func exportGroupsCmd(cmd *cobra.Command, args []string) error {
// TODO(pandeyabs): Exclude conversations from export since they are not
// supported yet. https://github.com/alcionai/corso/issues/4822
sel.Exclude(sel.Conversation(selectors.Any()))
// sel.Exclude(sel.Conversation(selectors.Any()))
acceptedGroupsFormatTypes := []string{
string(control.DefaultFormat),

View File

@ -303,3 +303,138 @@ func FromJSON(ctx context.Context, body []byte) (string, error) {
return email.GetMessage(), nil
}
// FromJSON converts a Messageable (as json) to .eml format
func FromJSONPost(ctx context.Context, body []byte, topic string) (string, error) {
data, err := api.BytesToPostable(body)
if err != nil {
return "", clues.WrapWC(ctx, err, "converting to postable")
}
ctx = clues.Add(ctx, "item_id", ptr.Val(data.GetId()))
email := mail.NewMSG()
email.AllowDuplicateAddress = true // More "correct" conversion
email.AddBccToHeader = true // Don't ignore Bcc
email.AllowEmptyAttachments = true // Don't error on empty attachments
email.UseProvidedAddress = true // Don't try to parse the email address
if data.GetFrom() != nil {
email.SetFrom(formatAddress(data.GetFrom().GetEmailAddress()))
}
if data.GetReceivedDateTime() != nil {
email.SetDate(ptr.Val(data.GetReceivedDateTime()).Format(dateFormat))
}
// if data.GetSender() != nil {
// email.SetReplyTo(formatAddress(data.GetSender().GetEmailAddress()))
// }
// if data.GetCcRecipients() != nil {
// for _, recipient := range data.GetCcRecipients() {
// email.AddCc(formatAddress(recipient.GetEmailAddress()))
// }
// }
// if data.GetBccRecipients() != nil {
// for _, recipient := range data.GetBccRecipients() {
// email.AddBcc(formatAddress(recipient.GetEmailAddress()))
// }
// }
// if data.GetInReplyTo() != nil {
// rts := data.GetInReplyTo()
// email.SetReplyTo(formatAddress(rts.GetEmailAddress()))
// }
email.SetSubject(topic)
email.AddTo("dc_test@10rqc2.onmicrosoft.com")
// if data.GetSentDateTime() != nil {
// email.SetDate(ptr.Val(data.GetSentDateTime()).Format(dateFormat))
// }
if data.GetBody() != nil {
if data.GetBody().GetContentType() != nil {
var contentType mail.ContentType
switch data.GetBody().GetContentType().String() {
case "html":
contentType = mail.TextHTML
case "text":
contentType = mail.TextPlain
default:
// https://learn.microsoft.com/en-us/graph/api/resources/itembody?view=graph-rest-1.0#properties
// This should not be possible according to the documentation
logger.Ctx(ctx).
With("body_type", data.GetBody().GetContentType().String()).
Info("unknown body content type")
contentType = mail.TextPlain
}
email.SetBody(contentType, ptr.Val(data.GetBody().GetContent()))
}
}
if data.GetAttachments() != nil {
for _, attachment := range data.GetAttachments() {
kind := ptr.Val(attachment.GetContentType())
bytes, err := attachment.GetBackingStore().Get("contentBytes")
if err != nil {
return "", clues.WrapWC(ctx, err, "failed to get attachment bytes")
}
if bytes == nil {
// Some attachments have an "item" field instead of
// "contentBytes". There are items like contacts, emails
// or calendar events which will not be a normal format
// and will have to be converted to a text format.
// TODO(meain): Handle custom attachments
// https://github.com/alcionai/corso/issues/4772
logger.Ctx(ctx).
With("attachment_id", ptr.Val(attachment.GetId())).
Info("unhandled attachment type")
continue
}
bts, ok := bytes.([]byte)
if !ok {
return "", clues.WrapWC(ctx, err, "invalid content bytes")
}
name := ptr.Val(attachment.GetName())
contentID, err := attachment.GetBackingStore().Get("contentId")
if err != nil {
return "", clues.WrapWC(ctx, err, "getting content id for attachment")
}
if contentID != nil {
cids, _ := str.AnyToString(contentID)
if len(cids) > 0 {
name = cids
}
}
email.Attach(&mail.File{
// cannot use filename as inline attachment will not get mapped properly
Name: name,
MimeType: kind,
Data: bts,
Inline: ptr.Val(attachment.GetIsInline()),
})
}
}
if err = email.GetError(); err != nil {
return "", clues.WrapWC(ctx, err, "converting to eml")
}
return email.GetMessage(), nil
}

View File

@ -11,10 +11,12 @@ import (
"github.com/microsoftgraph/msgraph-sdk-go/models"
"github.com/alcionai/corso/src/internal/common/ptr"
"github.com/alcionai/corso/src/internal/converters/eml"
"github.com/alcionai/corso/src/internal/data"
"github.com/alcionai/corso/src/pkg/control"
"github.com/alcionai/corso/src/pkg/export"
"github.com/alcionai/corso/src/pkg/fault"
"github.com/alcionai/corso/src/pkg/logger"
"github.com/alcionai/corso/src/pkg/metrics"
"github.com/alcionai/corso/src/pkg/path"
"github.com/alcionai/corso/src/pkg/services/m365/api"
@ -26,19 +28,25 @@ func NewExportCollection(
backupVersion int,
cec control.ExportConfig,
stats *metrics.ExportStats,
cat path.CategoryType,
) export.Collectioner {
s := streamChannelItems
if cat == path.ConversationPostsCategory {
s = streamConversationPosts
}
return export.BaseCollection{
BaseDir: baseDir,
BackingCollection: backingCollections,
BackupVersion: backupVersion,
Cfg: cec,
Stream: streamItems,
Stream: s,
Stats: stats,
}
}
// streamItems streams the items in the backingCollection into the export stream chan
func streamItems(
func streamChannelItems(
ctx context.Context,
drc []data.RestoreCollection,
backupVersion int,
@ -198,3 +206,126 @@ func makeMinimumChannelMesasge(item models.ChatMessageable) minimumChannelMessag
Subject: ptr.Val(item.GetSubject()),
}
}
// streamItems streams the items in the backingCollection into the export stream chan
func streamConversationPosts(
ctx context.Context,
drc []data.RestoreCollection,
backupVersion int,
cec control.ExportConfig,
ch chan<- export.Item,
stats *metrics.ExportStats,
) {
defer close(ch)
errs := fault.New(false)
for _, rc := range drc {
ictx := clues.Add(ctx, "path_short_ref", rc.FullPath().ShortRef())
for item := range rc.Items(ctx, errs) {
name := item.ID() + ".eml"
itemCtx := clues.Add(ictx, "stream_item_id", item.ID())
reader := item.ToReader()
content, err := io.ReadAll(reader)
reader.Close()
if err != nil {
ch <- export.Item{
ID: item.ID(),
Error: err,
}
continue
}
topic := rc.FullPath().Folders()[0]
email, err := eml.FromJSONPost(itemCtx, content, topic)
if err != nil {
err = clues.Wrap(err, "converting JSON to eml")
logger.CtxErr(ctx, err).Info("processing collection item")
ch <- export.Item{
ID: item.ID(),
Error: err,
}
continue
}
emlReader := io.NopCloser(bytes.NewReader([]byte(email)))
body := metrics.ReaderWithStats(emlReader, path.EmailCategory, stats)
ch <- export.Item{
ID: item.ID(),
Name: name,
Body: body,
}
}
items, recovered := errs.ItemsAndRecovered()
// Return all the items that we failed to source from the persistence layer
for _, item := range items {
ch <- export.Item{
ID: item.ID,
Error: &item,
}
}
for _, err := range recovered {
ch <- export.Item{
Error: err,
}
}
}
}
// func formatConversationPost(
// cec control.ExportConfig,
// rc io.ReadCloser,
// ) (io.ReadCloser, error) {
// if cec.Format == control.JSONFormat {
// return rc, nil
// }
// bs, err := io.ReadAll(rc)
// if err != nil {
// return nil, clues.Wrap(err, "reading item bytes")
// }
// defer rc.Close()
// cfb, err := api.CreateFromBytes(bs, models.CreateChatMessageFromDiscriminatorValue)
// if err != nil {
// return nil, clues.Wrap(err, "deserializing bytes to message")
// }
// msg, ok := cfb.(models.ChatMessageable)
// if !ok {
// return nil, clues.New("expected deserialized item to implement models.ChatMessageable")
// }
// mItem := makeMinimumChannelMesasge(msg)
// replies := msg.GetReplies()
// mcmar := minimumChannelMessageAndReplies{
// minimumChannelMessage: mItem,
// Replies: make([]minimumChannelMessage, 0, len(replies)),
// }
// for _, r := range replies {
// mcmar.Replies = append(mcmar.Replies, makeMinimumChannelMesasge(r))
// }
// bs, err = marshalJSONContainingHTML(mcmar)
// if err != nil {
// return nil, clues.Wrap(err, "serializing minimized channel message")
// }
// return io.NopCloser(bytes.NewReader(bs)), nil
// }

View File

@ -90,7 +90,8 @@ func (h *baseGroupsHandler) ProduceExportCollections(
[]data.RestoreCollection{restoreColl},
backupVersion,
exportCfg,
stats)
stats,
cat)
case path.LibrariesCategory:
drivePath, err := path.ToDrivePath(restoreColl.FullPath())

View File

@ -629,6 +629,32 @@ func BytesToMessageable(body []byte) (models.Messageable, error) {
return v.(models.Messageable), nil
}
func bytesToPostable(body []byte) (models.Postable, error) {
v, err := CreateFromBytes(body, models.CreatePostFromDiscriminatorValue)
if err != nil {
if !strings.Contains(err.Error(), invalidJSON) {
return nil, clues.Wrap(err, "deserializing bytes to message")
}
// If the JSON was invalid try sanitizing and deserializing again.
// Sanitizing should transform characters < 0x20 according to the spec where
// possible. The resulting JSON may still be invalid though.
body = sanitize.JSONBytes(body)
v, err = CreateFromBytes(body, models.CreateMessageFromDiscriminatorValue)
}
return v.(models.Postable), clues.Stack(err).OrNil()
}
func BytesToPostable(body []byte) (models.Postable, error) {
v, err := bytesToPostable(body)
if err != nil {
return nil, clues.Stack(err)
}
return v.(models.Postable), nil
}
func (c Mail) Serialize(
ctx context.Context,
item serialization.Parsable,