diff --git a/CHANGELOG.md b/CHANGELOG.md index 1d8cdfee0..8f1eb0fd2 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -14,7 +14,8 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 - Inline attachments (e.g. copy/paste ) are discovered and backed up correctly ([#2163](https://github.com/alcionai/corso/issues/2163)) - Guest and External users (for cloud accounts) and non-on-premise users (for systems that use on-prem AD syncs) are now excluded from backup and restore operations. - Remove the M365 license guid check in OneDrive backup which wasn't reliable. - +- Reduced extra socket consumption while downloading multiple drive files. +- Extended timeout boundaries for exchange attachment downloads, reducing risk of cancellation on large files. ## [v0.1.0] (alpha) - 2023-01-13 diff --git a/src/internal/connector/exchange/api/api.go b/src/internal/connector/exchange/api/api.go index 6edd68f57..c4858c5c1 100644 --- a/src/internal/connector/exchange/api/api.go +++ b/src/internal/connector/exchange/api/api.go @@ -2,9 +2,11 @@ package api import ( "context" + "strings" "time" "github.com/microsoft/kiota-abstractions-go/serialization" + "github.com/microsoftgraph/msgraph-sdk-go/models" "github.com/pkg/errors" "github.com/alcionai/corso/src/internal/connector/graph" @@ -57,6 +59,11 @@ type Client struct { // The stable service is re-usable for any non-paged request. // This allows us to maintain performance across async requests. stable graph.Servicer + + // The largeItem graph servicer is configured specifically for + // downloading large items. Specifically for use when handling + // attachments, and for no other use. + largeItem graph.Servicer } // NewClient produces a new exchange api client. Must be used in @@ -67,27 +74,45 @@ func NewClient(creds account.M365Config) (Client, error) { return Client{}, err } - return Client{creds, s}, nil + li, err := newLargeItemService(creds) + if err != nil { + return Client{}, err + } + + return Client{creds, s, li}, nil } // service generates a new service. Used for paged and other long-running // requests instead of the client's stable service, so that in-flight state // within the adapter doesn't get clobbered func (c Client) service() (*graph.Service, error) { - return newService(c.Credentials) + s, err := newService(c.Credentials) + return s, err } func newService(creds account.M365Config) (*graph.Service, error) { - adapter, err := graph.CreateAdapter( + a, err := graph.CreateAdapter( + creds.AzureTenantID, + creds.AzureClientID, + creds.AzureClientSecret) + if err != nil { + return nil, errors.Wrap(err, "generating no-timeout graph adapter") + } + + return graph.NewService(a), nil +} + +func newLargeItemService(creds account.M365Config) (*graph.Service, error) { + a, err := graph.CreateAdapter( creds.AzureTenantID, creds.AzureClientID, creds.AzureClientSecret, - ) + graph.NoTimeout()) if err != nil { - return nil, errors.Wrap(err, "generating graph api service client") + return nil, errors.Wrap(err, "generating no-timeout graph adapter") } - return graph.NewService(adapter), nil + return graph.NewService(a), nil } // --------------------------------------------------------------------------- @@ -117,3 +142,14 @@ func orNow(t *time.Time) time.Time { return *t } + +func HasAttachments(body models.ItemBodyable) bool { + if body.GetContent() == nil || body.GetContentType() == nil || + *body.GetContentType() == models.TEXT_BODYTYPE || len(*body.GetContent()) == 0 { + return false + } + + content := *body.GetContent() + + return strings.Contains(content, "src=\"cid:") +} diff --git a/src/internal/connector/exchange/api/api_test.go b/src/internal/connector/exchange/api/api_test.go index d3fe51350..4fe842452 100644 --- a/src/internal/connector/exchange/api/api_test.go +++ b/src/internal/connector/exchange/api/api_test.go @@ -3,11 +3,14 @@ package api import ( "testing" + "github.com/microsoftgraph/msgraph-sdk-go/models" "github.com/stretchr/testify/assert" "github.com/stretchr/testify/require" "github.com/stretchr/testify/suite" "github.com/alcionai/corso/src/internal/connector/graph" + "github.com/alcionai/corso/src/internal/connector/mockconnector" + "github.com/alcionai/corso/src/internal/connector/support" "github.com/alcionai/corso/src/internal/tester" "github.com/alcionai/corso/src/pkg/account" ) @@ -190,3 +193,57 @@ func (suite *ExchangeServiceSuite) TestGraphQueryFunctions() { }) } } + +//nolint:lll +var stubHTMLContent = "\r\n
Happy New Year,

In accordance with TPS report guidelines, there have been questions about how to address our activities SharePoint Cover page. Do you believe this is the best picture? 



Let me know if this meets our culture requirements.

Warm Regards,

Dustin
" + +func (suite *ExchangeServiceSuite) TestHasAttachments() { + tests := []struct { + name string + hasAttachment assert.BoolAssertionFunc + getBodyable func(t *testing.T) models.ItemBodyable + }{ + { + name: "Mock w/out attachment", + hasAttachment: assert.False, + getBodyable: func(t *testing.T) models.ItemBodyable { + byteArray := mockconnector.GetMockMessageWithBodyBytes( + "Test", + "This is testing", + "This is testing", + ) + message, err := support.CreateMessageFromBytes(byteArray) + require.NoError(t, err) + return message.GetBody() + }, + }, + { + name: "Mock w/ inline attachment", + hasAttachment: assert.True, + getBodyable: func(t *testing.T) models.ItemBodyable { + byteArray := mockconnector.GetMessageWithOneDriveAttachment("Test legacy") + message, err := support.CreateMessageFromBytes(byteArray) + require.NoError(t, err) + return message.GetBody() + }, + }, + { + name: "Edge Case", + hasAttachment: assert.True, + getBodyable: func(t *testing.T) models.ItemBodyable { + body := models.NewItemBody() + body.SetContent(&stubHTMLContent) + cat := models.HTML_BODYTYPE + body.SetContentType(&cat) + return body + }, + }, + } + + for _, test := range tests { + suite.T().Run(test.name, func(t *testing.T) { + found := HasAttachments(test.getBodyable(t)) + test.hasAttachment(t, found) + }) + } +} diff --git a/src/internal/connector/exchange/api/events.go b/src/internal/connector/exchange/api/events.go index cdb74ad19..cd968056d 100644 --- a/src/internal/connector/exchange/api/events.go +++ b/src/internal/connector/exchange/api/events.go @@ -85,12 +85,37 @@ func (c Events) GetItem( ctx context.Context, user, itemID string, ) (serialization.Parsable, *details.ExchangeInfo, error) { - evt, err := c.stable.Client().UsersById(user).EventsById(itemID).Get(ctx, nil) + event, err := c.stable.Client().UsersById(user).EventsById(itemID).Get(ctx, nil) if err != nil { return nil, nil, err } - return evt, EventInfo(evt), nil + var errs *multierror.Error + + if *event.GetHasAttachments() || HasAttachments(event.GetBody()) { + for count := 0; count < numberOfRetries; count++ { + attached, err := c.largeItem. + Client(). + UsersById(user). + EventsById(itemID). + Attachments(). + Get(ctx, nil) + if err == nil { + event.SetAttachments(attached.GetValue()) + break + } + + logger.Ctx(ctx).Debugw("retrying event attachment download", "err", err) + errs = multierror.Append(errs, err) + } + + if err != nil { + logger.Ctx(ctx).Errorw("event attachment download exceeded maximum retries", "err", errs) + return nil, nil, support.WrapAndAppend(itemID, errors.Wrap(err, "download event attachment"), nil) + } + } + + return event, EventInfo(event), nil } func (c Client) GetAllCalendarNamesForUser( @@ -249,8 +274,7 @@ func (c Events) GetAddedAndRemovedItemIDs( // Serialization // --------------------------------------------------------------------------- -// Serialize retrieves attachment data identified by the event item, and then -// serializes it into a byte slice. +// Serialize transforms the event into a byte slice. func (c Events) Serialize( ctx context.Context, item serialization.Parsable, @@ -268,31 +292,6 @@ func (c Events) Serialize( defer writer.Close() - if *event.GetHasAttachments() || support.HasAttachments(event.GetBody()) { - // getting all the attachments might take a couple attempts due to filesize - var retriesErr error - - for count := 0; count < numberOfRetries; count++ { - attached, err := c.stable. - Client(). - UsersById(user). - EventsById(itemID). - Attachments(). - Get(ctx, nil) - retriesErr = err - - if err == nil { - event.SetAttachments(attached.GetValue()) - break - } - } - - if retriesErr != nil { - logger.Ctx(ctx).Debug("exceeded maximum retries") - return nil, support.WrapAndAppend(itemID, errors.Wrap(retriesErr, "attachment failed"), nil) - } - } - if err = writer.WriteObjectValue("", event); err != nil { return nil, support.SetNonRecoverableError(errors.Wrap(err, itemID)) } diff --git a/src/internal/connector/exchange/api/mail.go b/src/internal/connector/exchange/api/mail.go index f29d64bd8..f4d2d2e33 100644 --- a/src/internal/connector/exchange/api/mail.go +++ b/src/internal/connector/exchange/api/mail.go @@ -97,7 +97,8 @@ func (c Mail) GetContainerByID( return service.Client().UsersById(userID).MailFoldersById(dirID).Get(ctx, ofmf) } -// GetItem retrieves a Messageable item. +// GetItem retrieves a Messageable item. If the item contains an attachment, that +// attachment is also downloaded. func (c Mail) GetItem( ctx context.Context, user, itemID string, @@ -107,6 +108,31 @@ func (c Mail) GetItem( return nil, nil, err } + var errs *multierror.Error + + if *mail.GetHasAttachments() || HasAttachments(mail.GetBody()) { + for count := 0; count < numberOfRetries; count++ { + attached, err := c.largeItem. + Client(). + UsersById(user). + MessagesById(itemID). + Attachments(). + Get(ctx, nil) + if err == nil { + mail.SetAttachments(attached.GetValue()) + break + } + + logger.Ctx(ctx).Debugw("retrying mail attachment download", "err", err) + errs = multierror.Append(errs, err) + } + + if err != nil { + logger.Ctx(ctx).Errorw("mail attachment download exceeded maximum retries", "err", errs) + return nil, nil, support.WrapAndAppend(itemID, errors.Wrap(err, "downloading mail attachment"), nil) + } + } + return mail, MailInfo(mail), nil } @@ -238,8 +264,7 @@ func (c Mail) GetAddedAndRemovedItemIDs( // Serialization // --------------------------------------------------------------------------- -// Serialize retrieves attachment data identified by the mail item, and then -// serializes it into a byte slice. +// Serialize transforms the mail item into a byte slice. func (c Mail) Serialize( ctx context.Context, item serialization.Parsable, @@ -257,31 +282,6 @@ func (c Mail) Serialize( defer writer.Close() - if *msg.GetHasAttachments() || support.HasAttachments(msg.GetBody()) { - // getting all the attachments might take a couple attempts due to filesize - var retriesErr error - - for count := 0; count < numberOfRetries; count++ { - attached, err := c.stable. - Client(). - UsersById(user). - MessagesById(itemID). - Attachments(). - Get(ctx, nil) - retriesErr = err - - if err == nil { - msg.SetAttachments(attached.GetValue()) - break - } - } - - if retriesErr != nil { - logger.Ctx(ctx).Debug("exceeded maximum retries") - return nil, support.WrapAndAppend(itemID, errors.Wrap(retriesErr, "attachment failed"), nil) - } - } - if err = writer.WriteObjectValue("", msg); err != nil { return nil, support.SetNonRecoverableError(errors.Wrap(err, itemID)) } diff --git a/src/internal/connector/graph/service.go b/src/internal/connector/graph/service.go index fa4662014..6c0e6dbc1 100644 --- a/src/internal/connector/graph/service.go +++ b/src/internal/connector/graph/service.go @@ -243,6 +243,8 @@ func (handler *LoggingMiddleware) Intercept( return resp, err } + // Return immediately if the response is good (2xx). + // If api logging is toggled, log a body-less dump of the request/resp. if (resp.StatusCode / 100) == 2 { if logger.DebugAPI || os.Getenv(logGraphRequestsEnvKey) != "" { respDump, _ := httputil.DumpResponse(resp, false) @@ -263,6 +265,10 @@ func (handler *LoggingMiddleware) Intercept( return resp, err } + // Log errors according to api debugging configurations. + // When debugging is toggled, every non-2xx is recorded with a respose dump. + // Otherwise, throttling cases and other non-2xx responses are logged + // with a slimmer reference for telemetry/supportability purposes. if logger.DebugAPI || os.Getenv(logGraphRequestsEnvKey) != "" { respDump, _ := httputil.DumpResponse(resp, true) diff --git a/src/internal/connector/support/m365Support.go b/src/internal/connector/support/m365Support.go index 99cb95577..d7e51e513 100644 --- a/src/internal/connector/support/m365Support.go +++ b/src/internal/connector/support/m365Support.go @@ -1,8 +1,6 @@ package support import ( - "strings" - absser "github.com/microsoft/kiota-abstractions-go/serialization" js "github.com/microsoft/kiota-serialization-json-go" "github.com/microsoftgraph/msgraph-sdk-go/models" @@ -73,14 +71,3 @@ func CreateListFromBytes(bytes []byte) (models.Listable, error) { return list, nil } - -func HasAttachments(body models.ItemBodyable) bool { - if body.GetContent() == nil || body.GetContentType() == nil || - *body.GetContentType() == models.TEXT_BODYTYPE || len(*body.GetContent()) == 0 { - return false - } - - content := *body.GetContent() - - return strings.Contains(content, "src=\"cid:") -} diff --git a/src/internal/connector/support/m365Support_test.go b/src/internal/connector/support/m365Support_test.go index dedde3536..c04c74604 100644 --- a/src/internal/connector/support/m365Support_test.go +++ b/src/internal/connector/support/m365Support_test.go @@ -3,7 +3,6 @@ package support import ( "testing" - "github.com/microsoftgraph/msgraph-sdk-go/models" "github.com/stretchr/testify/assert" "github.com/stretchr/testify/require" "github.com/stretchr/testify/suite" @@ -160,56 +159,3 @@ func (suite *DataSupportSuite) TestCreateListFromBytes() { }) } } - -func (suite *DataSupportSuite) TestHasAttachments() { - tests := []struct { - name string - hasAttachment assert.BoolAssertionFunc - getBodyable func(t *testing.T) models.ItemBodyable - }{ - { - name: "Mock w/out attachment", - hasAttachment: assert.False, - getBodyable: func(t *testing.T) models.ItemBodyable { - byteArray := mockconnector.GetMockMessageWithBodyBytes( - "Test", - "This is testing", - "This is testing", - ) - message, err := CreateMessageFromBytes(byteArray) - require.NoError(t, err) - return message.GetBody() - }, - }, - { - name: "Mock w/ inline attachment", - hasAttachment: assert.True, - getBodyable: func(t *testing.T) models.ItemBodyable { - byteArray := mockconnector.GetMessageWithOneDriveAttachment("Test legacy") - message, err := CreateMessageFromBytes(byteArray) - require.NoError(t, err) - return message.GetBody() - }, - }, - { - name: "Edge Case", - hasAttachment: assert.True, - getBodyable: func(t *testing.T) models.ItemBodyable { - //nolint:lll - content := "\r\n
Happy New Year,

In accordance with TPS report guidelines, there have been questions about how to address our activities SharePoint Cover page. Do you believe this is the best picture? 



Let me know if this meets our culture requirements.

Warm Regards,

Dustin
" - body := models.NewItemBody() - body.SetContent(&content) - cat := models.HTML_BODYTYPE - body.SetContentType(&cat) - return body - }, - }, - } - - for _, test := range tests { - suite.T().Run(test.name, func(t *testing.T) { - found := HasAttachments(test.getBodyable(t)) - test.hasAttachment(t, found) - }) - } -}