json to eml
This commit is contained in:
parent
5ec4e1d21d
commit
0b4ea2f213
@ -7,7 +7,6 @@ import (
|
||||
"github.com/alcionai/corso/src/cli/flags"
|
||||
"github.com/alcionai/corso/src/cli/utils"
|
||||
"github.com/alcionai/corso/src/pkg/control"
|
||||
"github.com/alcionai/corso/src/pkg/selectors"
|
||||
)
|
||||
|
||||
// called by export.go to map subcommands to provider-specific handling.
|
||||
@ -95,7 +94,7 @@ func exportGroupsCmd(cmd *cobra.Command, args []string) error {
|
||||
|
||||
// TODO(pandeyabs): Exclude conversations from export since they are not
|
||||
// supported yet. https://github.com/alcionai/corso/issues/4822
|
||||
sel.Exclude(sel.Conversation(selectors.Any()))
|
||||
// sel.Exclude(sel.Conversation(selectors.Any()))
|
||||
|
||||
acceptedGroupsFormatTypes := []string{
|
||||
string(control.DefaultFormat),
|
||||
|
||||
@ -303,3 +303,138 @@ func FromJSON(ctx context.Context, body []byte) (string, error) {
|
||||
|
||||
return email.GetMessage(), nil
|
||||
}
|
||||
|
||||
// FromJSON converts a Messageable (as json) to .eml format
|
||||
func FromJSONPost(ctx context.Context, body []byte, topic string) (string, error) {
|
||||
data, err := api.BytesToPostable(body)
|
||||
if err != nil {
|
||||
return "", clues.WrapWC(ctx, err, "converting to postable")
|
||||
}
|
||||
|
||||
ctx = clues.Add(ctx, "item_id", ptr.Val(data.GetId()))
|
||||
|
||||
email := mail.NewMSG()
|
||||
email.AllowDuplicateAddress = true // More "correct" conversion
|
||||
email.AddBccToHeader = true // Don't ignore Bcc
|
||||
email.AllowEmptyAttachments = true // Don't error on empty attachments
|
||||
email.UseProvidedAddress = true // Don't try to parse the email address
|
||||
|
||||
if data.GetFrom() != nil {
|
||||
email.SetFrom(formatAddress(data.GetFrom().GetEmailAddress()))
|
||||
}
|
||||
|
||||
if data.GetReceivedDateTime() != nil {
|
||||
email.SetDate(ptr.Val(data.GetReceivedDateTime()).Format(dateFormat))
|
||||
}
|
||||
|
||||
// if data.GetSender() != nil {
|
||||
// email.SetReplyTo(formatAddress(data.GetSender().GetEmailAddress()))
|
||||
// }
|
||||
|
||||
// if data.GetCcRecipients() != nil {
|
||||
// for _, recipient := range data.GetCcRecipients() {
|
||||
// email.AddCc(formatAddress(recipient.GetEmailAddress()))
|
||||
// }
|
||||
// }
|
||||
|
||||
// if data.GetBccRecipients() != nil {
|
||||
// for _, recipient := range data.GetBccRecipients() {
|
||||
// email.AddBcc(formatAddress(recipient.GetEmailAddress()))
|
||||
// }
|
||||
// }
|
||||
|
||||
// if data.GetInReplyTo() != nil {
|
||||
// rts := data.GetInReplyTo()
|
||||
|
||||
// email.SetReplyTo(formatAddress(rts.GetEmailAddress()))
|
||||
|
||||
// }
|
||||
|
||||
email.SetSubject(topic)
|
||||
email.AddTo("dc_test@10rqc2.onmicrosoft.com")
|
||||
|
||||
// if data.GetSentDateTime() != nil {
|
||||
// email.SetDate(ptr.Val(data.GetSentDateTime()).Format(dateFormat))
|
||||
// }
|
||||
|
||||
if data.GetBody() != nil {
|
||||
if data.GetBody().GetContentType() != nil {
|
||||
var contentType mail.ContentType
|
||||
|
||||
switch data.GetBody().GetContentType().String() {
|
||||
case "html":
|
||||
contentType = mail.TextHTML
|
||||
case "text":
|
||||
contentType = mail.TextPlain
|
||||
default:
|
||||
// https://learn.microsoft.com/en-us/graph/api/resources/itembody?view=graph-rest-1.0#properties
|
||||
// This should not be possible according to the documentation
|
||||
logger.Ctx(ctx).
|
||||
With("body_type", data.GetBody().GetContentType().String()).
|
||||
Info("unknown body content type")
|
||||
|
||||
contentType = mail.TextPlain
|
||||
}
|
||||
|
||||
email.SetBody(contentType, ptr.Val(data.GetBody().GetContent()))
|
||||
}
|
||||
}
|
||||
|
||||
if data.GetAttachments() != nil {
|
||||
for _, attachment := range data.GetAttachments() {
|
||||
kind := ptr.Val(attachment.GetContentType())
|
||||
|
||||
bytes, err := attachment.GetBackingStore().Get("contentBytes")
|
||||
if err != nil {
|
||||
return "", clues.WrapWC(ctx, err, "failed to get attachment bytes")
|
||||
}
|
||||
|
||||
if bytes == nil {
|
||||
// Some attachments have an "item" field instead of
|
||||
// "contentBytes". There are items like contacts, emails
|
||||
// or calendar events which will not be a normal format
|
||||
// and will have to be converted to a text format.
|
||||
// TODO(meain): Handle custom attachments
|
||||
// https://github.com/alcionai/corso/issues/4772
|
||||
logger.Ctx(ctx).
|
||||
With("attachment_id", ptr.Val(attachment.GetId())).
|
||||
Info("unhandled attachment type")
|
||||
|
||||
continue
|
||||
}
|
||||
|
||||
bts, ok := bytes.([]byte)
|
||||
if !ok {
|
||||
return "", clues.WrapWC(ctx, err, "invalid content bytes")
|
||||
}
|
||||
|
||||
name := ptr.Val(attachment.GetName())
|
||||
|
||||
contentID, err := attachment.GetBackingStore().Get("contentId")
|
||||
if err != nil {
|
||||
return "", clues.WrapWC(ctx, err, "getting content id for attachment")
|
||||
}
|
||||
|
||||
if contentID != nil {
|
||||
cids, _ := str.AnyToString(contentID)
|
||||
if len(cids) > 0 {
|
||||
name = cids
|
||||
}
|
||||
}
|
||||
|
||||
email.Attach(&mail.File{
|
||||
// cannot use filename as inline attachment will not get mapped properly
|
||||
Name: name,
|
||||
MimeType: kind,
|
||||
Data: bts,
|
||||
Inline: ptr.Val(attachment.GetIsInline()),
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
if err = email.GetError(); err != nil {
|
||||
return "", clues.WrapWC(ctx, err, "converting to eml")
|
||||
}
|
||||
|
||||
return email.GetMessage(), nil
|
||||
}
|
||||
|
||||
@ -11,10 +11,12 @@ import (
|
||||
"github.com/microsoftgraph/msgraph-sdk-go/models"
|
||||
|
||||
"github.com/alcionai/corso/src/internal/common/ptr"
|
||||
"github.com/alcionai/corso/src/internal/converters/eml"
|
||||
"github.com/alcionai/corso/src/internal/data"
|
||||
"github.com/alcionai/corso/src/pkg/control"
|
||||
"github.com/alcionai/corso/src/pkg/export"
|
||||
"github.com/alcionai/corso/src/pkg/fault"
|
||||
"github.com/alcionai/corso/src/pkg/logger"
|
||||
"github.com/alcionai/corso/src/pkg/metrics"
|
||||
"github.com/alcionai/corso/src/pkg/path"
|
||||
"github.com/alcionai/corso/src/pkg/services/m365/api"
|
||||
@ -26,19 +28,25 @@ func NewExportCollection(
|
||||
backupVersion int,
|
||||
cec control.ExportConfig,
|
||||
stats *metrics.ExportStats,
|
||||
cat path.CategoryType,
|
||||
) export.Collectioner {
|
||||
s := streamChannelItems
|
||||
if cat == path.ConversationPostsCategory {
|
||||
s = streamConversationPosts
|
||||
}
|
||||
|
||||
return export.BaseCollection{
|
||||
BaseDir: baseDir,
|
||||
BackingCollection: backingCollections,
|
||||
BackupVersion: backupVersion,
|
||||
Cfg: cec,
|
||||
Stream: streamItems,
|
||||
Stream: s,
|
||||
Stats: stats,
|
||||
}
|
||||
}
|
||||
|
||||
// streamItems streams the items in the backingCollection into the export stream chan
|
||||
func streamItems(
|
||||
func streamChannelItems(
|
||||
ctx context.Context,
|
||||
drc []data.RestoreCollection,
|
||||
backupVersion int,
|
||||
@ -198,3 +206,126 @@ func makeMinimumChannelMesasge(item models.ChatMessageable) minimumChannelMessag
|
||||
Subject: ptr.Val(item.GetSubject()),
|
||||
}
|
||||
}
|
||||
|
||||
// streamItems streams the items in the backingCollection into the export stream chan
|
||||
func streamConversationPosts(
|
||||
ctx context.Context,
|
||||
drc []data.RestoreCollection,
|
||||
backupVersion int,
|
||||
cec control.ExportConfig,
|
||||
ch chan<- export.Item,
|
||||
stats *metrics.ExportStats,
|
||||
) {
|
||||
defer close(ch)
|
||||
|
||||
errs := fault.New(false)
|
||||
|
||||
for _, rc := range drc {
|
||||
ictx := clues.Add(ctx, "path_short_ref", rc.FullPath().ShortRef())
|
||||
|
||||
for item := range rc.Items(ctx, errs) {
|
||||
name := item.ID() + ".eml"
|
||||
|
||||
itemCtx := clues.Add(ictx, "stream_item_id", item.ID())
|
||||
|
||||
reader := item.ToReader()
|
||||
content, err := io.ReadAll(reader)
|
||||
|
||||
reader.Close()
|
||||
|
||||
if err != nil {
|
||||
ch <- export.Item{
|
||||
ID: item.ID(),
|
||||
Error: err,
|
||||
}
|
||||
|
||||
continue
|
||||
}
|
||||
|
||||
topic := rc.FullPath().Folders()[0]
|
||||
email, err := eml.FromJSONPost(itemCtx, content, topic)
|
||||
if err != nil {
|
||||
err = clues.Wrap(err, "converting JSON to eml")
|
||||
|
||||
logger.CtxErr(ctx, err).Info("processing collection item")
|
||||
|
||||
ch <- export.Item{
|
||||
ID: item.ID(),
|
||||
Error: err,
|
||||
}
|
||||
|
||||
continue
|
||||
}
|
||||
|
||||
emlReader := io.NopCloser(bytes.NewReader([]byte(email)))
|
||||
body := metrics.ReaderWithStats(emlReader, path.EmailCategory, stats)
|
||||
|
||||
ch <- export.Item{
|
||||
ID: item.ID(),
|
||||
Name: name,
|
||||
Body: body,
|
||||
}
|
||||
}
|
||||
|
||||
items, recovered := errs.ItemsAndRecovered()
|
||||
|
||||
// Return all the items that we failed to source from the persistence layer
|
||||
for _, item := range items {
|
||||
ch <- export.Item{
|
||||
ID: item.ID,
|
||||
Error: &item,
|
||||
}
|
||||
}
|
||||
|
||||
for _, err := range recovered {
|
||||
ch <- export.Item{
|
||||
Error: err,
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// func formatConversationPost(
|
||||
// cec control.ExportConfig,
|
||||
// rc io.ReadCloser,
|
||||
// ) (io.ReadCloser, error) {
|
||||
// if cec.Format == control.JSONFormat {
|
||||
// return rc, nil
|
||||
// }
|
||||
|
||||
// bs, err := io.ReadAll(rc)
|
||||
// if err != nil {
|
||||
// return nil, clues.Wrap(err, "reading item bytes")
|
||||
// }
|
||||
|
||||
// defer rc.Close()
|
||||
|
||||
// cfb, err := api.CreateFromBytes(bs, models.CreateChatMessageFromDiscriminatorValue)
|
||||
// if err != nil {
|
||||
// return nil, clues.Wrap(err, "deserializing bytes to message")
|
||||
// }
|
||||
|
||||
// msg, ok := cfb.(models.ChatMessageable)
|
||||
// if !ok {
|
||||
// return nil, clues.New("expected deserialized item to implement models.ChatMessageable")
|
||||
// }
|
||||
|
||||
// mItem := makeMinimumChannelMesasge(msg)
|
||||
// replies := msg.GetReplies()
|
||||
|
||||
// mcmar := minimumChannelMessageAndReplies{
|
||||
// minimumChannelMessage: mItem,
|
||||
// Replies: make([]minimumChannelMessage, 0, len(replies)),
|
||||
// }
|
||||
|
||||
// for _, r := range replies {
|
||||
// mcmar.Replies = append(mcmar.Replies, makeMinimumChannelMesasge(r))
|
||||
// }
|
||||
|
||||
// bs, err = marshalJSONContainingHTML(mcmar)
|
||||
// if err != nil {
|
||||
// return nil, clues.Wrap(err, "serializing minimized channel message")
|
||||
// }
|
||||
|
||||
// return io.NopCloser(bytes.NewReader(bs)), nil
|
||||
// }
|
||||
|
||||
@ -90,7 +90,8 @@ func (h *baseGroupsHandler) ProduceExportCollections(
|
||||
[]data.RestoreCollection{restoreColl},
|
||||
backupVersion,
|
||||
exportCfg,
|
||||
stats)
|
||||
stats,
|
||||
cat)
|
||||
|
||||
case path.LibrariesCategory:
|
||||
drivePath, err := path.ToDrivePath(restoreColl.FullPath())
|
||||
|
||||
@ -629,6 +629,32 @@ func BytesToMessageable(body []byte) (models.Messageable, error) {
|
||||
return v.(models.Messageable), nil
|
||||
}
|
||||
|
||||
func bytesToPostable(body []byte) (models.Postable, error) {
|
||||
v, err := CreateFromBytes(body, models.CreatePostFromDiscriminatorValue)
|
||||
if err != nil {
|
||||
if !strings.Contains(err.Error(), invalidJSON) {
|
||||
return nil, clues.Wrap(err, "deserializing bytes to message")
|
||||
}
|
||||
|
||||
// If the JSON was invalid try sanitizing and deserializing again.
|
||||
// Sanitizing should transform characters < 0x20 according to the spec where
|
||||
// possible. The resulting JSON may still be invalid though.
|
||||
body = sanitize.JSONBytes(body)
|
||||
v, err = CreateFromBytes(body, models.CreateMessageFromDiscriminatorValue)
|
||||
}
|
||||
|
||||
return v.(models.Postable), clues.Stack(err).OrNil()
|
||||
}
|
||||
|
||||
func BytesToPostable(body []byte) (models.Postable, error) {
|
||||
v, err := bytesToPostable(body)
|
||||
if err != nil {
|
||||
return nil, clues.Stack(err)
|
||||
}
|
||||
|
||||
return v.(models.Postable), nil
|
||||
}
|
||||
|
||||
func (c Mail) Serialize(
|
||||
ctx context.Context,
|
||||
item serialization.Parsable,
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user