From d9b5cda8f1f642b7055607580f8c25378bd59d7e Mon Sep 17 00:00:00 2001 From: Abin Simon Date: Wed, 21 Jun 2023 13:10:41 +0530 Subject: [PATCH] Fix uploading large attachments for exchange (#3634) Upload of large attachments were broken previously. Previously we were relying on the size reported by Graph API, but that is not reliable and we were not completing uploads because of that. --- #### Does this PR need a docs update or release note? - [x] :white_check_mark: Yes, it's included - [ ] :clock1: Yes, but in a later PR - [ ] :no_entry: No #### Type of change - [ ] :sunflower: Feature - [x] :bug: Bugfix - [ ] :world_map: Documentation - [ ] :robot: Supportability/Tests - [ ] :computer: CI/Deployment - [ ] :broom: Tech Debt/Cleanup #### Issue(s) * # #### Test Plan - [ ] :muscle: Manual - [ ] :zap: Unit test - [x] :green_heart: E2E --- CHANGELOG.md | 1 + src/internal/m365/exchange/attachment.go | 18 ++++-- src/internal/m365/graph/uploadsession.go | 38 +++++++++++-- src/pkg/services/m365/api/events.go | 19 +++---- src/pkg/services/m365/api/events_test.go | 70 ++++++++++++++++++++++++ src/pkg/services/m365/api/mail.go | 36 ++++++++---- src/pkg/services/m365/api/mail_test.go | 32 +++++++++++ 7 files changed, 179 insertions(+), 35 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 49b6eae30..8b73ade67 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -20,6 +20,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 - Fix Exchange backup issue caused by incorrect json serialization - Fix issues with details model containing duplicate entry for api consumers - Handle OLE conversion errors when trying to fetch attachments +- Fix uploading large attachments for emails and calendar ### Changed - Do not display all the items that we restored at the end if there are more than 15. You can override this with `--verbose`. diff --git a/src/internal/m365/exchange/attachment.go b/src/internal/m365/exchange/attachment.go index d09124523..3f97342ea 100644 --- a/src/internal/m365/exchange/attachment.go +++ b/src/internal/m365/exchange/attachment.go @@ -9,6 +9,7 @@ import ( "github.com/alcionai/corso/src/internal/common/ptr" "github.com/alcionai/corso/src/pkg/logger" + "github.com/alcionai/corso/src/pkg/services/m365/api" ) type attachmentPoster interface { @@ -20,15 +21,14 @@ type attachmentPoster interface { PostLargeAttachment( ctx context.Context, userID, containerID, itemID, name string, - size int64, - body models.Attachmentable, - ) (models.UploadSessionable, error) + content []byte, + ) (string, error) } const ( // Use large attachment logic for attachments > 3MB // https://learn.microsoft.com/en-us/graph/outlook-large-attachments - largeAttachmentSize = int32(3 * 1024 * 1024) + largeAttachmentSize = 3 * 1024 * 1024 fileAttachmentOdataValue = "#microsoft.graph.fileAttachment" itemAttachmentOdataValue = "#microsoft.graph.itemAttachment" referenceAttachmentOdataValue = "#microsoft.graph.referenceAttachment" @@ -95,7 +95,15 @@ func uploadAttachment( // for file attachments sized >= 3MB if attachmentType == models.FILE_ATTACHMENTTYPE && size >= largeAttachmentSize { - _, err := cli.PostLargeAttachment(ctx, userID, containerID, parentItemID, name, int64(size), attachment) + // We expect the entire attachment to fit in memory. + // Max attachment size is 150MB. + content, err := api.GetAttachmentContent(attachment) + if err != nil { + return clues.Wrap(err, "serializing attachment content").WithClues(ctx) + } + + _, err = cli.PostLargeAttachment(ctx, userID, containerID, parentItemID, name, content) + return err } diff --git a/src/internal/m365/graph/uploadsession.go b/src/internal/m365/graph/uploadsession.go index 77fefd5c8..74a696373 100644 --- a/src/internal/m365/graph/uploadsession.go +++ b/src/internal/m365/graph/uploadsession.go @@ -5,6 +5,7 @@ import ( "context" "fmt" "net/http" + "strings" "github.com/alcionai/clues" @@ -21,8 +22,11 @@ const ( // Writer implements an io.Writer for a M365 // UploadSession URL type largeItemWriter struct { + // ID is the id of the item created. + // Will be available after the upload is complete + ID string // Identifier - id string + parentID string // Upload URL for this item url string // Tracks how much data will be written @@ -32,8 +36,13 @@ type largeItemWriter struct { client httpWrapper } -func NewLargeItemWriter(id, url string, size int64) *largeItemWriter { - return &largeItemWriter{id: id, url: url, contentLength: size, client: *NewNoTimeoutHTTPWrapper()} +func NewLargeItemWriter(parentID, url string, size int64) *largeItemWriter { + return &largeItemWriter{ + parentID: parentID, + url: url, + contentLength: size, + client: *NewNoTimeoutHTTPWrapper(), + } } // Write will upload the provided data to M365. It sets the `Content-Length` and `Content-Range` headers based on @@ -44,7 +53,7 @@ func (iw *largeItemWriter) Write(p []byte) (int, error) { logger.Ctx(ctx). Debugf("WRITE for %s. Size:%d, Offset: %d, TotalSize: %d", - iw.id, rangeLength, iw.lastWrittenOffset, iw.contentLength) + iw.parentID, rangeLength, iw.lastWrittenOffset, iw.contentLength) endOffset := iw.lastWrittenOffset + int64(rangeLength) @@ -58,7 +67,7 @@ func (iw *largeItemWriter) Write(p []byte) (int, error) { iw.contentLength) headers[contentLengthHeaderKey] = fmt.Sprintf("%d", rangeLength) - _, err := iw.client.Request( + resp, err := iw.client.Request( ctx, http.MethodPut, iw.url, @@ -66,7 +75,7 @@ func (iw *largeItemWriter) Write(p []byte) (int, error) { headers) if err != nil { return 0, clues.Wrap(err, "uploading item").With( - "upload_id", iw.id, + "upload_id", iw.parentID, "upload_chunk_size", rangeLength, "upload_offset", iw.lastWrittenOffset, "upload_size", iw.contentLength) @@ -75,5 +84,22 @@ func (iw *largeItemWriter) Write(p []byte) (int, error) { // Update last offset iw.lastWrittenOffset = endOffset + // Once the upload is complete, we get a Location header in the + // below format from which we can get the id of the uploaded + // item. This will only be available after we have uploaded the + // entire content(based on the size in the req header). + // https://outlook.office.com/api/v2.0/Users('')/Messages('')/Attachments('') + // Ref: https://learn.microsoft.com/en-us/graph/outlook-large-attachments?tabs=http + loc := resp.Header.Get("Location") + if loc != "" { + splits := strings.Split(loc, "'") + if len(splits) != 7 || splits[4] != ")/Attachments(" || len(splits[5]) == 0 { + return 0, clues.New("invalid format for upload completion url"). + With("location", loc) + } + + iw.ID = splits[5] + } + return rangeLength, nil } diff --git a/src/pkg/services/m365/api/events.go b/src/pkg/services/m365/api/events.go index d24b69259..fff4a35cc 100644 --- a/src/pkg/services/m365/api/events.go +++ b/src/pkg/services/m365/api/events.go @@ -513,14 +513,9 @@ func (c Events) PostSmallAttachment( func (c Events) PostLargeAttachment( ctx context.Context, userID, containerID, parentItemID, itemName string, - size int64, - body models.Attachmentable, -) (models.UploadSessionable, error) { - bs, err := GetAttachmentContent(body) - if err != nil { - return nil, clues.Wrap(err, "serializing attachment content").WithClues(ctx) - } - + content []byte, +) (string, error) { + size := int64(len(content)) session := users.NewItemCalendarEventsItemAttachmentsCreateUploadSessionPostRequestBody() session.SetAttachmentItem(makeSessionAttachment(itemName, size)) @@ -536,19 +531,19 @@ func (c Events) PostLargeAttachment( CreateUploadSession(). Post(ctx, session, nil) if err != nil { - return nil, graph.Wrap(ctx, err, "uploading large event attachment") + return "", graph.Wrap(ctx, err, "uploading large event attachment") } url := ptr.Val(us.GetUploadUrl()) w := graph.NewLargeItemWriter(parentItemID, url, size) copyBuffer := make([]byte, graph.AttachmentChunkSize) - _, err = io.CopyBuffer(w, bytes.NewReader(bs), copyBuffer) + _, err = io.CopyBuffer(w, bytes.NewReader(content), copyBuffer) if err != nil { - return nil, clues.Wrap(err, "buffering large attachment content").WithClues(ctx) + return "", clues.Wrap(err, "buffering large attachment content").WithClues(ctx) } - return us, nil + return w.ID, nil } // --------------------------------------------------------------------------- diff --git a/src/pkg/services/m365/api/events_test.go b/src/pkg/services/m365/api/events_test.go index 2daa66454..1d4c39cc9 100644 --- a/src/pkg/services/m365/api/events_test.go +++ b/src/pkg/services/m365/api/events_test.go @@ -11,9 +11,12 @@ import ( "github.com/stretchr/testify/suite" "github.com/alcionai/corso/src/internal/common/dttm" + "github.com/alcionai/corso/src/internal/common/ptr" exchMock "github.com/alcionai/corso/src/internal/m365/exchange/mock" "github.com/alcionai/corso/src/internal/tester" + "github.com/alcionai/corso/src/pkg/account" "github.com/alcionai/corso/src/pkg/backup/details" + "github.com/alcionai/corso/src/pkg/control/testdata" ) type EventsAPIUnitSuite struct { @@ -212,3 +215,70 @@ func (suite *EventsAPIUnitSuite) TestBytesToEventable() { }) } } + +type EventsAPIIntgSuite struct { + tester.Suite + credentials account.M365Config + ac Client +} + +func TestEventsAPIntgSuite(t *testing.T) { + suite.Run(t, &EventsAPIIntgSuite{ + Suite: tester.NewIntegrationSuite( + t, + [][]string{tester.M365AcctCredEnvs}), + }) +} + +func (suite *EventsAPIIntgSuite) SetupSuite() { + t := suite.T() + + a := tester.NewM365Account(t) + m365, err := a.M365Config() + require.NoError(t, err, clues.ToCore(err)) + + suite.credentials = m365 + suite.ac, err = NewClient(m365) + require.NoError(t, err, clues.ToCore(err)) +} + +func (suite *EventsAPIIntgSuite) TestRestoreLargeAttachment() { + t := suite.T() + + ctx, flush := tester.NewContext(t) + defer flush() + + userID := tester.M365UserID(suite.T()) + + folderName := testdata.DefaultRestoreConfig("eventlargeattachmenttest").Location + evts := suite.ac.Events() + calendar, err := evts.CreateContainer(ctx, userID, folderName, "") + require.NoError(t, err, clues.ToCore(err)) + + tomorrow := time.Now().Add(24 * time.Hour) + evt := models.NewEvent() + sdtz := models.NewDateTimeTimeZone() + edtz := models.NewDateTimeTimeZone() + + evt.SetSubject(ptr.To("Event with attachment")) + sdtz.SetDateTime(ptr.To(dttm.Format(tomorrow))) + sdtz.SetTimeZone(ptr.To("UTC")) + edtz.SetDateTime(ptr.To(dttm.Format(tomorrow.Add(30 * time.Minute)))) + edtz.SetTimeZone(ptr.To("UTC")) + evt.SetStart(sdtz) + evt.SetEnd(edtz) + + item, err := evts.PostItem(ctx, userID, ptr.Val(calendar.GetId()), evt) + require.NoError(t, err, clues.ToCore(err)) + + id, err := evts.PostLargeAttachment( + ctx, + userID, + ptr.Val(calendar.GetId()), + ptr.Val(item.GetId()), + "raboganm", + []byte("mangobar"), + ) + require.NoError(t, err, clues.ToCore(err)) + require.NotEmpty(t, id, "empty id for large attachment") +} diff --git a/src/pkg/services/m365/api/mail.go b/src/pkg/services/m365/api/mail.go index f08cbb7c5..8073a6659 100644 --- a/src/pkg/services/m365/api/mail.go +++ b/src/pkg/services/m365/api/mail.go @@ -63,6 +63,23 @@ func (c Mail) CreateMailFolder( return mdl, nil } +func (c Mail) DeleteMailFolder( + ctx context.Context, + userID, id string, +) error { + err := c.Stable.Client(). + Users(). + ByUserId(userID). + MailFolders(). + ByMailFolderId(id). + Delete(ctx, nil) + if err != nil { + return graph.Wrap(ctx, err, "deleting mail folder") + } + + return nil +} + func (c Mail) CreateContainer( ctx context.Context, userID, containerName, parentContainerID string, @@ -407,14 +424,9 @@ func (c Mail) PostSmallAttachment( func (c Mail) PostLargeAttachment( ctx context.Context, userID, containerID, parentItemID, itemName string, - size int64, - body models.Attachmentable, -) (models.UploadSessionable, error) { - bs, err := GetAttachmentContent(body) - if err != nil { - return nil, clues.Wrap(err, "serializing attachment content").WithClues(ctx) - } - + content []byte, +) (string, error) { + size := int64(len(content)) session := users.NewItemMailFoldersItemMessagesItemAttachmentsCreateUploadSessionPostRequestBody() session.SetAttachmentItem(makeSessionAttachment(itemName, size)) @@ -430,19 +442,19 @@ func (c Mail) PostLargeAttachment( CreateUploadSession(). Post(ctx, session, nil) if err != nil { - return nil, graph.Wrap(ctx, err, "uploading large mail attachment") + return "", graph.Wrap(ctx, err, "uploading large mail attachment") } url := ptr.Val(us.GetUploadUrl()) w := graph.NewLargeItemWriter(parentItemID, url, size) copyBuffer := make([]byte, graph.AttachmentChunkSize) - _, err = io.CopyBuffer(w, bytes.NewReader(bs), copyBuffer) + _, err = io.CopyBuffer(w, bytes.NewReader(content), copyBuffer) if err != nil { - return nil, clues.Wrap(err, "buffering large attachment content").WithClues(ctx) + return "", clues.Wrap(err, "buffering large attachment content").WithClues(ctx) } - return us, nil + return w.ID, nil } // --------------------------------------------------------------------------- diff --git a/src/pkg/services/m365/api/mail_test.go b/src/pkg/services/m365/api/mail_test.go index 236bc9b4c..dcc9df9a7 100644 --- a/src/pkg/services/m365/api/mail_test.go +++ b/src/pkg/services/m365/api/mail_test.go @@ -19,6 +19,7 @@ import ( "github.com/alcionai/corso/src/internal/tester" "github.com/alcionai/corso/src/pkg/account" "github.com/alcionai/corso/src/pkg/backup/details" + "github.com/alcionai/corso/src/pkg/control/testdata" "github.com/alcionai/corso/src/pkg/fault" "github.com/alcionai/corso/src/pkg/services/m365/api" "github.com/alcionai/corso/src/pkg/services/m365/api/mock" @@ -410,3 +411,34 @@ func (suite *MailAPIIntgSuite) TestHugeAttachmentListDownload() { }) } } + +func (suite *MailAPIIntgSuite) TestRestoreLargeAttachment() { + t := suite.T() + + ctx, flush := tester.NewContext(t) + defer flush() + + userID := tester.M365UserID(suite.T()) + + folderName := testdata.DefaultRestoreConfig("maillargeattachmenttest").Location + msgs := suite.ac.Mail() + mailfolder, err := msgs.CreateMailFolder(ctx, userID, folderName) + require.NoError(t, err, clues.ToCore(err)) + + msg := models.NewMessage() + msg.SetSubject(ptr.To("Mail with attachment")) + + item, err := msgs.PostItem(ctx, userID, ptr.Val(mailfolder.GetId()), msg) + require.NoError(t, err, clues.ToCore(err)) + + id, err := msgs.PostLargeAttachment( + ctx, + userID, + ptr.Val(mailfolder.GetId()), + ptr.Val(item.GetId()), + "raboganm", + []byte("mangobar"), + ) + require.NoError(t, err, clues.ToCore(err)) + require.NotEmpty(t, id, "empty id for large attachment") +}