refactor exchange attachment downloads (#2302)

## Description

Three changes: adds a LargeItem servicer to
the exchange api client.  Migrates attachment
downloads in mail and events out of serialize
and into the GetItem func.  Finally, utilizes the
largeItem servicer to download attachments,
instead of the standard servicer.

A follow-up PR will add mocked test cases for
these changes.

## Does this PR need a docs update or release note?

- [x]  Yes, it's included

## Type of change

- [x] 🌻 Feature

## Issue(s)

* #2299

## Test Plan

- [x] 💚 E2E
This commit is contained in:
Keepers 2023-01-27 15:25:39 -07:00 committed by GitHub
parent 3e106dbac8
commit 0d8eb8f4fa
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
8 changed files with 163 additions and 131 deletions

View File

@ -14,7 +14,8 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
- Inline attachments (e.g. copy/paste ) are discovered and backed up correctly ([#2163](https://github.com/alcionai/corso/issues/2163))
- Guest and External users (for cloud accounts) and non-on-premise users (for systems that use on-prem AD syncs) are now excluded from backup and restore operations.
- Remove the M365 license guid check in OneDrive backup which wasn't reliable.
- Reduced extra socket consumption while downloading multiple drive files.
- Extended timeout boundaries for exchange attachment downloads, reducing risk of cancellation on large files.
## [v0.1.0] (alpha) - 2023-01-13

View File

@ -2,9 +2,11 @@ package api
import (
"context"
"strings"
"time"
"github.com/microsoft/kiota-abstractions-go/serialization"
"github.com/microsoftgraph/msgraph-sdk-go/models"
"github.com/pkg/errors"
"github.com/alcionai/corso/src/internal/connector/graph"
@ -57,6 +59,11 @@ type Client struct {
// The stable service is re-usable for any non-paged request.
// This allows us to maintain performance across async requests.
stable graph.Servicer
// The largeItem graph servicer is configured specifically for
// downloading large items. Specifically for use when handling
// attachments, and for no other use.
largeItem graph.Servicer
}
// NewClient produces a new exchange api client. Must be used in
@ -67,27 +74,45 @@ func NewClient(creds account.M365Config) (Client, error) {
return Client{}, err
}
return Client{creds, s}, nil
li, err := newLargeItemService(creds)
if err != nil {
return Client{}, err
}
return Client{creds, s, li}, nil
}
// service generates a new service. Used for paged and other long-running
// requests instead of the client's stable service, so that in-flight state
// within the adapter doesn't get clobbered
func (c Client) service() (*graph.Service, error) {
return newService(c.Credentials)
s, err := newService(c.Credentials)
return s, err
}
func newService(creds account.M365Config) (*graph.Service, error) {
adapter, err := graph.CreateAdapter(
a, err := graph.CreateAdapter(
creds.AzureTenantID,
creds.AzureClientID,
creds.AzureClientSecret)
if err != nil {
return nil, errors.Wrap(err, "generating no-timeout graph adapter")
}
return graph.NewService(a), nil
}
func newLargeItemService(creds account.M365Config) (*graph.Service, error) {
a, err := graph.CreateAdapter(
creds.AzureTenantID,
creds.AzureClientID,
creds.AzureClientSecret,
)
graph.NoTimeout())
if err != nil {
return nil, errors.Wrap(err, "generating graph api service client")
return nil, errors.Wrap(err, "generating no-timeout graph adapter")
}
return graph.NewService(adapter), nil
return graph.NewService(a), nil
}
// ---------------------------------------------------------------------------
@ -117,3 +142,14 @@ func orNow(t *time.Time) time.Time {
return *t
}
func HasAttachments(body models.ItemBodyable) bool {
if body.GetContent() == nil || body.GetContentType() == nil ||
*body.GetContentType() == models.TEXT_BODYTYPE || len(*body.GetContent()) == 0 {
return false
}
content := *body.GetContent()
return strings.Contains(content, "src=\"cid:")
}

View File

@ -3,11 +3,14 @@ package api
import (
"testing"
"github.com/microsoftgraph/msgraph-sdk-go/models"
"github.com/stretchr/testify/assert"
"github.com/stretchr/testify/require"
"github.com/stretchr/testify/suite"
"github.com/alcionai/corso/src/internal/connector/graph"
"github.com/alcionai/corso/src/internal/connector/mockconnector"
"github.com/alcionai/corso/src/internal/connector/support"
"github.com/alcionai/corso/src/internal/tester"
"github.com/alcionai/corso/src/pkg/account"
)
@ -190,3 +193,57 @@ func (suite *ExchangeServiceSuite) TestGraphQueryFunctions() {
})
}
}
//nolint:lll
var stubHTMLContent = "<html><head>\r\n<meta http-equiv=\"Content-Type\" content=\"text/html; charset=utf-8\"><style type=\"text/css\" style=\"display:none\">\r\n<!--\r\np\r\n\t{margin-top:0;\r\n\tmargin-bottom:0}\r\n-->\r\n</style></head><body dir=\"ltr\"><div class=\"elementToProof\" style=\"font-family:Calibri,Arial,Helvetica,sans-serif; font-size:12pt; color:rgb(0,0,0); background-color:rgb(255,255,255)\">Happy New Year,</div><div class=\"elementToProof\" style=\"font-family:Calibri,Arial,Helvetica,sans-serif; font-size:12pt; color:rgb(0,0,0); background-color:rgb(255,255,255)\"><br></div><div class=\"elementToProof\" style=\"font-family:Calibri,Arial,Helvetica,sans-serif; font-size:12pt; color:rgb(0,0,0); background-color:rgb(255,255,255)\">In accordance with TPS report guidelines, there have been questions about how to address our activities SharePoint Cover page. Do you believe this is the best picture?&nbsp;</div><div class=\"elementToProof\" style=\"font-family:Calibri,Arial,Helvetica,sans-serif; font-size:12pt; color:rgb(0,0,0); background-color:rgb(255,255,255)\"><br></div><div class=\"elementToProof\" style=\"font-family:Calibri,Arial,Helvetica,sans-serif; font-size:12pt; color:rgb(0,0,0); background-color:rgb(255,255,255)\"><img class=\"FluidPluginCopy ContentPasted0 w-2070 h-1380\" size=\"5854817\" data-outlook-trace=\"F:1|T:1\" src=\"cid:85f4faa3-9851-40c7-ba0a-e63dce1185f9\" style=\"max-width:100%\"><br></div><div class=\"elementToProof\" style=\"font-family:Calibri,Arial,Helvetica,sans-serif; font-size:12pt; color:rgb(0,0,0); background-color:rgb(255,255,255)\"><br></div><div class=\"elementToProof\" style=\"font-family:Calibri,Arial,Helvetica,sans-serif; font-size:12pt; color:rgb(0,0,0); background-color:rgb(255,255,255)\">Let me know if this meets our culture requirements.</div><div class=\"elementToProof\" style=\"font-family:Calibri,Arial,Helvetica,sans-serif; font-size:12pt; color:rgb(0,0,0); background-color:rgb(255,255,255)\"><br></div><div class=\"elementToProof\" style=\"font-family:Calibri,Arial,Helvetica,sans-serif; font-size:12pt; color:rgb(0,0,0); background-color:rgb(255,255,255)\">Warm Regards,</div><div class=\"elementToProof\" style=\"font-family:Calibri,Arial,Helvetica,sans-serif; font-size:12pt; color:rgb(0,0,0); background-color:rgb(255,255,255)\"><br></div><div class=\"elementToProof\" style=\"font-family:Calibri,Arial,Helvetica,sans-serif; font-size:12pt; color:rgb(0,0,0); background-color:rgb(255,255,255)\">Dustin</div></body></html>"
func (suite *ExchangeServiceSuite) TestHasAttachments() {
tests := []struct {
name string
hasAttachment assert.BoolAssertionFunc
getBodyable func(t *testing.T) models.ItemBodyable
}{
{
name: "Mock w/out attachment",
hasAttachment: assert.False,
getBodyable: func(t *testing.T) models.ItemBodyable {
byteArray := mockconnector.GetMockMessageWithBodyBytes(
"Test",
"This is testing",
"This is testing",
)
message, err := support.CreateMessageFromBytes(byteArray)
require.NoError(t, err)
return message.GetBody()
},
},
{
name: "Mock w/ inline attachment",
hasAttachment: assert.True,
getBodyable: func(t *testing.T) models.ItemBodyable {
byteArray := mockconnector.GetMessageWithOneDriveAttachment("Test legacy")
message, err := support.CreateMessageFromBytes(byteArray)
require.NoError(t, err)
return message.GetBody()
},
},
{
name: "Edge Case",
hasAttachment: assert.True,
getBodyable: func(t *testing.T) models.ItemBodyable {
body := models.NewItemBody()
body.SetContent(&stubHTMLContent)
cat := models.HTML_BODYTYPE
body.SetContentType(&cat)
return body
},
},
}
for _, test := range tests {
suite.T().Run(test.name, func(t *testing.T) {
found := HasAttachments(test.getBodyable(t))
test.hasAttachment(t, found)
})
}
}

View File

@ -85,12 +85,37 @@ func (c Events) GetItem(
ctx context.Context,
user, itemID string,
) (serialization.Parsable, *details.ExchangeInfo, error) {
evt, err := c.stable.Client().UsersById(user).EventsById(itemID).Get(ctx, nil)
event, err := c.stable.Client().UsersById(user).EventsById(itemID).Get(ctx, nil)
if err != nil {
return nil, nil, err
}
return evt, EventInfo(evt), nil
var errs *multierror.Error
if *event.GetHasAttachments() || HasAttachments(event.GetBody()) {
for count := 0; count < numberOfRetries; count++ {
attached, err := c.largeItem.
Client().
UsersById(user).
EventsById(itemID).
Attachments().
Get(ctx, nil)
if err == nil {
event.SetAttachments(attached.GetValue())
break
}
logger.Ctx(ctx).Debugw("retrying event attachment download", "err", err)
errs = multierror.Append(errs, err)
}
if err != nil {
logger.Ctx(ctx).Errorw("event attachment download exceeded maximum retries", "err", errs)
return nil, nil, support.WrapAndAppend(itemID, errors.Wrap(err, "download event attachment"), nil)
}
}
return event, EventInfo(event), nil
}
func (c Client) GetAllCalendarNamesForUser(
@ -249,8 +274,7 @@ func (c Events) GetAddedAndRemovedItemIDs(
// Serialization
// ---------------------------------------------------------------------------
// Serialize retrieves attachment data identified by the event item, and then
// serializes it into a byte slice.
// Serialize transforms the event into a byte slice.
func (c Events) Serialize(
ctx context.Context,
item serialization.Parsable,
@ -268,31 +292,6 @@ func (c Events) Serialize(
defer writer.Close()
if *event.GetHasAttachments() || support.HasAttachments(event.GetBody()) {
// getting all the attachments might take a couple attempts due to filesize
var retriesErr error
for count := 0; count < numberOfRetries; count++ {
attached, err := c.stable.
Client().
UsersById(user).
EventsById(itemID).
Attachments().
Get(ctx, nil)
retriesErr = err
if err == nil {
event.SetAttachments(attached.GetValue())
break
}
}
if retriesErr != nil {
logger.Ctx(ctx).Debug("exceeded maximum retries")
return nil, support.WrapAndAppend(itemID, errors.Wrap(retriesErr, "attachment failed"), nil)
}
}
if err = writer.WriteObjectValue("", event); err != nil {
return nil, support.SetNonRecoverableError(errors.Wrap(err, itemID))
}

View File

@ -97,7 +97,8 @@ func (c Mail) GetContainerByID(
return service.Client().UsersById(userID).MailFoldersById(dirID).Get(ctx, ofmf)
}
// GetItem retrieves a Messageable item.
// GetItem retrieves a Messageable item. If the item contains an attachment, that
// attachment is also downloaded.
func (c Mail) GetItem(
ctx context.Context,
user, itemID string,
@ -107,6 +108,31 @@ func (c Mail) GetItem(
return nil, nil, err
}
var errs *multierror.Error
if *mail.GetHasAttachments() || HasAttachments(mail.GetBody()) {
for count := 0; count < numberOfRetries; count++ {
attached, err := c.largeItem.
Client().
UsersById(user).
MessagesById(itemID).
Attachments().
Get(ctx, nil)
if err == nil {
mail.SetAttachments(attached.GetValue())
break
}
logger.Ctx(ctx).Debugw("retrying mail attachment download", "err", err)
errs = multierror.Append(errs, err)
}
if err != nil {
logger.Ctx(ctx).Errorw("mail attachment download exceeded maximum retries", "err", errs)
return nil, nil, support.WrapAndAppend(itemID, errors.Wrap(err, "downloading mail attachment"), nil)
}
}
return mail, MailInfo(mail), nil
}
@ -238,8 +264,7 @@ func (c Mail) GetAddedAndRemovedItemIDs(
// Serialization
// ---------------------------------------------------------------------------
// Serialize retrieves attachment data identified by the mail item, and then
// serializes it into a byte slice.
// Serialize transforms the mail item into a byte slice.
func (c Mail) Serialize(
ctx context.Context,
item serialization.Parsable,
@ -257,31 +282,6 @@ func (c Mail) Serialize(
defer writer.Close()
if *msg.GetHasAttachments() || support.HasAttachments(msg.GetBody()) {
// getting all the attachments might take a couple attempts due to filesize
var retriesErr error
for count := 0; count < numberOfRetries; count++ {
attached, err := c.stable.
Client().
UsersById(user).
MessagesById(itemID).
Attachments().
Get(ctx, nil)
retriesErr = err
if err == nil {
msg.SetAttachments(attached.GetValue())
break
}
}
if retriesErr != nil {
logger.Ctx(ctx).Debug("exceeded maximum retries")
return nil, support.WrapAndAppend(itemID, errors.Wrap(retriesErr, "attachment failed"), nil)
}
}
if err = writer.WriteObjectValue("", msg); err != nil {
return nil, support.SetNonRecoverableError(errors.Wrap(err, itemID))
}

View File

@ -243,6 +243,8 @@ func (handler *LoggingMiddleware) Intercept(
return resp, err
}
// Return immediately if the response is good (2xx).
// If api logging is toggled, log a body-less dump of the request/resp.
if (resp.StatusCode / 100) == 2 {
if logger.DebugAPI || os.Getenv(logGraphRequestsEnvKey) != "" {
respDump, _ := httputil.DumpResponse(resp, false)
@ -263,6 +265,10 @@ func (handler *LoggingMiddleware) Intercept(
return resp, err
}
// Log errors according to api debugging configurations.
// When debugging is toggled, every non-2xx is recorded with a respose dump.
// Otherwise, throttling cases and other non-2xx responses are logged
// with a slimmer reference for telemetry/supportability purposes.
if logger.DebugAPI || os.Getenv(logGraphRequestsEnvKey) != "" {
respDump, _ := httputil.DumpResponse(resp, true)

View File

@ -1,8 +1,6 @@
package support
import (
"strings"
absser "github.com/microsoft/kiota-abstractions-go/serialization"
js "github.com/microsoft/kiota-serialization-json-go"
"github.com/microsoftgraph/msgraph-sdk-go/models"
@ -73,14 +71,3 @@ func CreateListFromBytes(bytes []byte) (models.Listable, error) {
return list, nil
}
func HasAttachments(body models.ItemBodyable) bool {
if body.GetContent() == nil || body.GetContentType() == nil ||
*body.GetContentType() == models.TEXT_BODYTYPE || len(*body.GetContent()) == 0 {
return false
}
content := *body.GetContent()
return strings.Contains(content, "src=\"cid:")
}

View File

@ -3,7 +3,6 @@ package support
import (
"testing"
"github.com/microsoftgraph/msgraph-sdk-go/models"
"github.com/stretchr/testify/assert"
"github.com/stretchr/testify/require"
"github.com/stretchr/testify/suite"
@ -160,56 +159,3 @@ func (suite *DataSupportSuite) TestCreateListFromBytes() {
})
}
}
func (suite *DataSupportSuite) TestHasAttachments() {
tests := []struct {
name string
hasAttachment assert.BoolAssertionFunc
getBodyable func(t *testing.T) models.ItemBodyable
}{
{
name: "Mock w/out attachment",
hasAttachment: assert.False,
getBodyable: func(t *testing.T) models.ItemBodyable {
byteArray := mockconnector.GetMockMessageWithBodyBytes(
"Test",
"This is testing",
"This is testing",
)
message, err := CreateMessageFromBytes(byteArray)
require.NoError(t, err)
return message.GetBody()
},
},
{
name: "Mock w/ inline attachment",
hasAttachment: assert.True,
getBodyable: func(t *testing.T) models.ItemBodyable {
byteArray := mockconnector.GetMessageWithOneDriveAttachment("Test legacy")
message, err := CreateMessageFromBytes(byteArray)
require.NoError(t, err)
return message.GetBody()
},
},
{
name: "Edge Case",
hasAttachment: assert.True,
getBodyable: func(t *testing.T) models.ItemBodyable {
//nolint:lll
content := "<html><head>\r\n<meta http-equiv=\"Content-Type\" content=\"text/html; charset=utf-8\"><style type=\"text/css\" style=\"display:none\">\r\n<!--\r\np\r\n\t{margin-top:0;\r\n\tmargin-bottom:0}\r\n-->\r\n</style></head><body dir=\"ltr\"><div class=\"elementToProof\" style=\"font-family:Calibri,Arial,Helvetica,sans-serif; font-size:12pt; color:rgb(0,0,0); background-color:rgb(255,255,255)\">Happy New Year,</div><div class=\"elementToProof\" style=\"font-family:Calibri,Arial,Helvetica,sans-serif; font-size:12pt; color:rgb(0,0,0); background-color:rgb(255,255,255)\"><br></div><div class=\"elementToProof\" style=\"font-family:Calibri,Arial,Helvetica,sans-serif; font-size:12pt; color:rgb(0,0,0); background-color:rgb(255,255,255)\">In accordance with TPS report guidelines, there have been questions about how to address our activities SharePoint Cover page. Do you believe this is the best picture?&nbsp;</div><div class=\"elementToProof\" style=\"font-family:Calibri,Arial,Helvetica,sans-serif; font-size:12pt; color:rgb(0,0,0); background-color:rgb(255,255,255)\"><br></div><div class=\"elementToProof\" style=\"font-family:Calibri,Arial,Helvetica,sans-serif; font-size:12pt; color:rgb(0,0,0); background-color:rgb(255,255,255)\"><img class=\"FluidPluginCopy ContentPasted0 w-2070 h-1380\" size=\"5854817\" data-outlook-trace=\"F:1|T:1\" src=\"cid:85f4faa3-9851-40c7-ba0a-e63dce1185f9\" style=\"max-width:100%\"><br></div><div class=\"elementToProof\" style=\"font-family:Calibri,Arial,Helvetica,sans-serif; font-size:12pt; color:rgb(0,0,0); background-color:rgb(255,255,255)\"><br></div><div class=\"elementToProof\" style=\"font-family:Calibri,Arial,Helvetica,sans-serif; font-size:12pt; color:rgb(0,0,0); background-color:rgb(255,255,255)\">Let me know if this meets our culture requirements.</div><div class=\"elementToProof\" style=\"font-family:Calibri,Arial,Helvetica,sans-serif; font-size:12pt; color:rgb(0,0,0); background-color:rgb(255,255,255)\"><br></div><div class=\"elementToProof\" style=\"font-family:Calibri,Arial,Helvetica,sans-serif; font-size:12pt; color:rgb(0,0,0); background-color:rgb(255,255,255)\">Warm Regards,</div><div class=\"elementToProof\" style=\"font-family:Calibri,Arial,Helvetica,sans-serif; font-size:12pt; color:rgb(0,0,0); background-color:rgb(255,255,255)\"><br></div><div class=\"elementToProof\" style=\"font-family:Calibri,Arial,Helvetica,sans-serif; font-size:12pt; color:rgb(0,0,0); background-color:rgb(255,255,255)\">Dustin</div></body></html>"
body := models.NewItemBody()
body.SetContent(&content)
cat := models.HTML_BODYTYPE
body.SetContentType(&cat)
return body
},
},
}
for _, test := range tests {
suite.T().Run(test.name, func(t *testing.T) {
found := HasAttachments(test.getBodyable(t))
test.hasAttachment(t, found)
})
}
}