Retry handling for delta queries in Exchange (#2328)

## Description

Added retry handling for delta queries in OneDrive. Also, bumping time timeout for graph api calls from 90s to 3m as we were seeing client timeouts for graph api calls. ~Haven't added retry for every request in exchange as I'm hoping https://github.com/alcionai/corso/issues/2287 will be a better way to handle this.~

## Does this PR need a docs update or release note?

- [x]  Yes, it's included
- [ ] 🕐 Yes, but in a later PR
- [ ]  No 

## Type of change

<!--- Please check the type of change your PR introduces: --->
- [ ] 🌻 Feature
- [x] 🐛 Bugfix
- [ ] 🗺️ Documentation
- [ ] 🤖 Test
- [ ] 💻 CI/Deployment
- [ ] 🧹 Tech Debt/Cleanup

## Issue(s)

<!-- Can reference multiple issues. Use one of the following "magic words" - "closes, fixes" to auto-close the Github issue. -->
* #<issue>

## Test Plan

<!-- How will this be tested prior to merging.-->
- [x] 💪 Manual
- [ ]  Unit test
- [ ] 💚 E2E
This commit is contained in:
Abin Simon 2023-02-01 03:27:38 +05:30 committed by GitHub
parent 498fc325a9
commit f3b2e9a632
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
11 changed files with 424 additions and 37 deletions

View File

@ -10,6 +10,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
### Added ### Added
- Document Corso's fault-tolerance and restartability features - Document Corso's fault-tolerance and restartability features
- Add retries on timeouts and status code 500 for Exchange
## [v0.2.0] (alpha) - 2023-1-29 ## [v0.2.0] (alpha) - 2023-1-29

View File

@ -10,6 +10,7 @@ import (
"github.com/pkg/errors" "github.com/pkg/errors"
"github.com/alcionai/corso/src/internal/connector/graph" "github.com/alcionai/corso/src/internal/connector/graph"
"github.com/alcionai/corso/src/internal/connector/support"
"github.com/alcionai/corso/src/pkg/account" "github.com/alcionai/corso/src/pkg/account"
) )
@ -153,3 +154,26 @@ func HasAttachments(body models.ItemBodyable) bool {
return strings.Contains(content, "src=\"cid:") return strings.Contains(content, "src=\"cid:")
} }
// Run a function with retries
func runWithRetry(run func() error) error {
var err error
for i := 0; i < numberOfRetries; i++ {
err = run()
if err == nil {
return nil
}
// only retry on timeouts and 500-internal-errors.
if !(graph.IsErrTimeout(err) || graph.IsInternalServerError(err)) {
break
}
if i < numberOfRetries {
time.Sleep(time.Duration(3*(i+2)) * time.Second)
}
}
return support.ConnectorStackErrorTraceWrap(err, "maximum retries or unretryable")
}

View File

@ -61,7 +61,16 @@ func (c Contacts) GetItem(
ctx context.Context, ctx context.Context,
user, itemID string, user, itemID string,
) (serialization.Parsable, *details.ExchangeInfo, error) { ) (serialization.Parsable, *details.ExchangeInfo, error) {
cont, err := c.stable.Client().UsersById(user).ContactsById(itemID).Get(ctx, nil) var (
cont models.Contactable
err error
)
err = runWithRetry(func() error {
cont, err = c.stable.Client().UsersById(user).ContactsById(itemID).Get(ctx, nil)
return err
})
if err != nil { if err != nil {
return nil, nil, err return nil, nil, err
} }
@ -81,7 +90,14 @@ func (c Contacts) GetAllContactFolderNamesForUser(
return nil, err return nil, err
} }
return c.stable.Client().UsersById(user).ContactFolders().Get(ctx, options) var resp models.ContactFolderCollectionResponseable
err = runWithRetry(func() error {
resp, err = c.stable.Client().UsersById(user).ContactFolders().Get(ctx, options)
return err
})
return resp, err
} }
func (c Contacts) GetContainerByID( func (c Contacts) GetContainerByID(
@ -93,10 +109,14 @@ func (c Contacts) GetContainerByID(
return nil, errors.Wrap(err, "options for contact folder") return nil, errors.Wrap(err, "options for contact folder")
} }
return c.stable.Client(). var resp models.ContactFolderable
UsersById(userID).
ContactFoldersById(dirID). err = runWithRetry(func() error {
Get(ctx, ofcf) resp, err = c.stable.Client().UsersById(userID).ContactFoldersById(dirID).Get(ctx, ofcf)
return err
})
return resp, err
} }
// EnumerateContainers iterates through all of the users current // EnumerateContainers iterates through all of the users current
@ -117,6 +137,7 @@ func (c Contacts) EnumerateContainers(
var ( var (
errs *multierror.Error errs *multierror.Error
resp models.ContactFolderCollectionResponseable
fields = []string{"displayName", "parentFolderId"} fields = []string{"displayName", "parentFolderId"}
) )
@ -131,7 +152,11 @@ func (c Contacts) EnumerateContainers(
ChildFolders() ChildFolders()
for { for {
resp, err := builder.Get(ctx, ofcf) err = runWithRetry(func() error {
resp, err = builder.Get(ctx, ofcf)
return err
})
if err != nil { if err != nil {
return errors.Wrap(err, support.ConnectorStackErrorTrace(err)) return errors.Wrap(err, support.ConnectorStackErrorTrace(err))
} }
@ -174,7 +199,17 @@ type contactPager struct {
} }
func (p *contactPager) getPage(ctx context.Context) (api.DeltaPageLinker, error) { func (p *contactPager) getPage(ctx context.Context) (api.DeltaPageLinker, error) {
return p.builder.Get(ctx, p.options) var (
resp api.DeltaPageLinker
err error
)
err = runWithRetry(func() error {
resp, err = p.builder.Get(ctx, p.options)
return err
})
return resp, err
} }
func (p *contactPager) setNext(nextLink string) { func (p *contactPager) setNext(nextLink string) {

View File

@ -73,7 +73,13 @@ func (c Events) GetContainerByID(
return nil, errors.Wrap(err, "options for event calendar") return nil, errors.Wrap(err, "options for event calendar")
} }
cal, err := service.Client().UsersById(userID).CalendarsById(containerID).Get(ctx, ofc) var cal models.Calendarable
err = runWithRetry(func() error {
cal, err = service.Client().UsersById(userID).CalendarsById(containerID).Get(ctx, ofc)
return err
})
if err != nil { if err != nil {
return nil, err return nil, err
} }
@ -86,7 +92,16 @@ func (c Events) GetItem(
ctx context.Context, ctx context.Context,
user, itemID string, user, itemID string,
) (serialization.Parsable, *details.ExchangeInfo, error) { ) (serialization.Parsable, *details.ExchangeInfo, error) {
event, err := c.stable.Client().UsersById(user).EventsById(itemID).Get(ctx, nil) var (
event models.Eventable
err error
)
err = runWithRetry(func() error {
event, err = c.stable.Client().UsersById(user).EventsById(itemID).Get(ctx, nil)
return err
})
if err != nil { if err != nil {
return nil, nil, err return nil, nil, err
} }
@ -128,7 +143,14 @@ func (c Client) GetAllCalendarNamesForUser(
return nil, err return nil, err
} }
return c.stable.Client().UsersById(user).Calendars().Get(ctx, options) var resp models.CalendarCollectionResponseable
err = runWithRetry(func() error {
resp, err = c.stable.Client().UsersById(user).Calendars().Get(ctx, options)
return err
})
return resp, err
} }
// EnumerateContainers iterates through all of the users current // EnumerateContainers iterates through all of the users current
@ -147,7 +169,10 @@ func (c Events) EnumerateContainers(
return err return err
} }
var errs *multierror.Error var (
resp models.CalendarCollectionResponseable
errs *multierror.Error
)
ofc, err := optionsForCalendars([]string{"name"}) ofc, err := optionsForCalendars([]string{"name"})
if err != nil { if err != nil {
@ -157,7 +182,13 @@ func (c Events) EnumerateContainers(
builder := service.Client().UsersById(userID).Calendars() builder := service.Client().UsersById(userID).Calendars()
for { for {
resp, err := builder.Get(ctx, ofc) var err error
err = runWithRetry(func() error {
resp, err = builder.Get(ctx, ofc)
return err
})
if err != nil { if err != nil {
return errors.Wrap(err, support.ConnectorStackErrorTrace(err)) return errors.Wrap(err, support.ConnectorStackErrorTrace(err))
} }
@ -205,7 +236,16 @@ type eventPager struct {
} }
func (p *eventPager) getPage(ctx context.Context) (api.DeltaPageLinker, error) { func (p *eventPager) getPage(ctx context.Context) (api.DeltaPageLinker, error) {
resp, err := p.builder.Get(ctx, p.options) var (
resp api.DeltaPageLinker
err error
)
err = runWithRetry(func() error {
resp, err = p.builder.Get(ctx, p.options)
return err
})
return resp, err return resp, err
} }

View File

@ -95,7 +95,14 @@ func (c Mail) GetContainerByID(
return nil, errors.Wrap(err, "options for mail folder") return nil, errors.Wrap(err, "options for mail folder")
} }
return service.Client().UsersById(userID).MailFoldersById(dirID).Get(ctx, ofmf) var resp graph.Container
err = runWithRetry(func() error {
resp, err = service.Client().UsersById(userID).MailFoldersById(dirID).Get(ctx, ofmf)
return err
})
return resp, err
} }
// GetItem retrieves a Messageable item. If the item contains an attachment, that // GetItem retrieves a Messageable item. If the item contains an attachment, that
@ -104,7 +111,16 @@ func (c Mail) GetItem(
ctx context.Context, ctx context.Context,
user, itemID string, user, itemID string,
) (serialization.Parsable, *details.ExchangeInfo, error) { ) (serialization.Parsable, *details.ExchangeInfo, error) {
mail, err := c.stable.Client().UsersById(user).MessagesById(itemID).Get(ctx, nil) var (
mail models.Messageable
err error
)
err = runWithRetry(func() error {
mail, err = c.stable.Client().UsersById(user).MessagesById(itemID).Get(ctx, nil)
return err
})
if err != nil { if err != nil {
return nil, nil, err return nil, nil, err
} }
@ -154,6 +170,7 @@ func (c Mail) EnumerateContainers(
} }
var ( var (
resp users.ItemMailFoldersDeltaResponseable
errs *multierror.Error errs *multierror.Error
builder = service.Client(). builder = service.Client().
UsersById(userID). UsersById(userID).
@ -162,7 +179,13 @@ func (c Mail) EnumerateContainers(
) )
for { for {
resp, err := builder.Get(ctx, nil) var err error
err = runWithRetry(func() error {
resp, err = builder.Get(ctx, nil)
return err
})
if err != nil { if err != nil {
return errors.Wrap(err, support.ConnectorStackErrorTrace(err)) return errors.Wrap(err, support.ConnectorStackErrorTrace(err))
} }
@ -200,7 +223,17 @@ type mailPager struct {
} }
func (p *mailPager) getPage(ctx context.Context) (api.DeltaPageLinker, error) { func (p *mailPager) getPage(ctx context.Context) (api.DeltaPageLinker, error) {
return p.builder.Get(ctx, p.options) var (
page api.DeltaPageLinker
err error
)
err = runWithRetry(func() error {
page, err = p.builder.Get(ctx, p.options)
return err
})
return page, err
} }
func (p *mailPager) setNext(nextLink string) { func (p *mailPager) setNext(nextLink string) {

View File

@ -17,6 +17,7 @@ import (
// --------------------------------------------------------------------------- // ---------------------------------------------------------------------------
const ( const (
errCodeActivityLimitReached = "activityLimitReached"
errCodeItemNotFound = "ErrorItemNotFound" errCodeItemNotFound = "ErrorItemNotFound"
errCodeEmailFolderNotFound = "ErrorSyncFolderNotFound" errCodeEmailFolderNotFound = "ErrorSyncFolderNotFound"
errCodeResyncRequired = "ResyncRequired" errCodeResyncRequired = "ResyncRequired"
@ -31,8 +32,10 @@ var (
// normally the graph client will catch this for us, but in case we // normally the graph client will catch this for us, but in case we
// run our own client Do(), we need to translate it to a timeout type // run our own client Do(), we need to translate it to a timeout type
// failure locally. // failure locally.
Err429TooManyRequests = errors.New("429 too many requests") Err429TooManyRequests = errors.New("429 too many requests")
Err503ServiceUnavailable = errors.New("503 Service Unavailable") Err503ServiceUnavailable = errors.New("503 Service Unavailable")
Err504GatewayTimeout = errors.New("504 Gateway Timeout")
Err500InternalServerError = errors.New("500 Internal Server Error")
) )
// The folder or item was deleted between the time we identified // The folder or item was deleted between the time we identified
@ -113,6 +116,10 @@ func IsErrThrottled(err error) bool {
return true return true
} }
if hasErrorCode(err, errCodeActivityLimitReached) {
return true
}
e := ErrThrottled{} e := ErrThrottled{}
return errors.As(err, &e) return errors.As(err, &e)
@ -135,21 +142,18 @@ func IsErrUnauthorized(err error) bool {
return errors.As(err, &e) return errors.As(err, &e)
} }
type ErrServiceUnavailable struct { type ErrInternalServerError struct {
common.Err common.Err
} }
func IsSericeUnavailable(err error) bool { func IsInternalServerError(err error) bool {
if errors.Is(err, Err503ServiceUnavailable) { if errors.Is(err, Err500InternalServerError) {
return true return true
} }
e := ErrUnauthorized{} e := ErrInternalServerError{}
if errors.As(err, &e) {
return true
}
return true return errors.As(err, &e)
} }
// --------------------------------------------------------------------------- // ---------------------------------------------------------------------------

View File

@ -0,0 +1,248 @@
package graph
import (
"context"
"testing"
"github.com/microsoftgraph/msgraph-sdk-go/models/odataerrors"
"github.com/stretchr/testify/assert"
"github.com/stretchr/testify/suite"
"github.com/alcionai/corso/src/internal/common"
)
type GraphErrorsUnitSuite struct {
suite.Suite
}
func TestGraphErrorsUnitSuite(t *testing.T) {
suite.Run(t, new(GraphErrorsUnitSuite))
}
func odErr(code string) *odataerrors.ODataError {
odErr := &odataerrors.ODataError{}
merr := odataerrors.MainError{}
merr.SetCode(&code)
odErr.SetError(&merr)
return odErr
}
func (suite *GraphErrorsUnitSuite) TestIsErrDeletedInFlight() {
table := []struct {
name string
err error
expect assert.BoolAssertionFunc
}{
{
name: "nil",
err: nil,
expect: assert.False,
},
{
name: "non-matching",
err: assert.AnError,
expect: assert.False,
},
{
name: "as",
err: ErrDeletedInFlight{Err: *common.EncapsulateError(assert.AnError)},
expect: assert.True,
},
{
name: "non-matching oDataErr",
err: odErr("fnords"),
expect: assert.False,
},
{
name: "not-found oDataErr",
err: odErr(errCodeItemNotFound),
expect: assert.True,
},
{
name: "sync-not-found oDataErr",
err: odErr(errCodeSyncFolderNotFound),
expect: assert.True,
},
}
for _, test := range table {
suite.T().Run(test.name, func(t *testing.T) {
test.expect(t, IsErrDeletedInFlight(test.err))
})
}
}
func (suite *GraphErrorsUnitSuite) TestIsErrInvalidDelta() {
table := []struct {
name string
err error
expect assert.BoolAssertionFunc
}{
{
name: "nil",
err: nil,
expect: assert.False,
},
{
name: "non-matching",
err: assert.AnError,
expect: assert.False,
},
{
name: "as",
err: ErrInvalidDelta{Err: *common.EncapsulateError(assert.AnError)},
expect: assert.True,
},
{
name: "non-matching oDataErr",
err: odErr("fnords"),
expect: assert.False,
},
{
name: "resync-required oDataErr",
err: odErr(errCodeResyncRequired),
expect: assert.True,
},
}
for _, test := range table {
suite.T().Run(test.name, func(t *testing.T) {
test.expect(t, IsErrInvalidDelta(test.err))
})
}
}
func (suite *GraphErrorsUnitSuite) TestIsErrTimeout() {
table := []struct {
name string
err error
expect assert.BoolAssertionFunc
}{
{
name: "nil",
err: nil,
expect: assert.False,
},
{
name: "non-matching",
err: assert.AnError,
expect: assert.False,
},
{
name: "as",
err: ErrTimeout{Err: *common.EncapsulateError(assert.AnError)},
expect: assert.True,
},
{
name: "context deadline",
err: context.DeadlineExceeded,
expect: assert.True,
},
}
for _, test := range table {
suite.T().Run(test.name, func(t *testing.T) {
test.expect(t, IsErrTimeout(test.err))
})
}
}
func (suite *GraphErrorsUnitSuite) TestIsErrThrottled() {
table := []struct {
name string
err error
expect assert.BoolAssertionFunc
}{
{
name: "nil",
err: nil,
expect: assert.False,
},
{
name: "non-matching",
err: assert.AnError,
expect: assert.False,
},
{
name: "as",
err: ErrThrottled{Err: *common.EncapsulateError(assert.AnError)},
expect: assert.True,
},
{
name: "is429",
err: Err429TooManyRequests,
expect: assert.True,
},
}
for _, test := range table {
suite.T().Run(test.name, func(t *testing.T) {
test.expect(t, IsErrThrottled(test.err))
})
}
}
func (suite *GraphErrorsUnitSuite) TestIsErrUnauthorized() {
table := []struct {
name string
err error
expect assert.BoolAssertionFunc
}{
{
name: "nil",
err: nil,
expect: assert.False,
},
{
name: "non-matching",
err: assert.AnError,
expect: assert.False,
},
{
name: "as",
err: ErrUnauthorized{Err: *common.EncapsulateError(assert.AnError)},
expect: assert.True,
},
{
name: "is429",
err: Err401Unauthorized,
expect: assert.True,
},
}
for _, test := range table {
suite.T().Run(test.name, func(t *testing.T) {
test.expect(t, IsErrUnauthorized(test.err))
})
}
}
func (suite *GraphErrorsUnitSuite) TestIsInternalServerError() {
table := []struct {
name string
err error
expect assert.BoolAssertionFunc
}{
{
name: "nil",
err: nil,
expect: assert.False,
},
{
name: "non-matching",
err: assert.AnError,
expect: assert.False,
},
{
name: "as",
err: ErrInternalServerError{Err: *common.EncapsulateError(assert.AnError)},
expect: assert.True,
},
{
name: "is429",
err: Err500InternalServerError,
expect: assert.True,
},
}
for _, test := range table {
suite.T().Run(test.name, func(t *testing.T) {
test.expect(t, IsInternalServerError(test.err))
})
}
}

View File

@ -149,7 +149,7 @@ func HTTPClient(opts ...option) *http.Client {
middlewares := msgraphgocore.GetDefaultMiddlewaresWithOptions(&clientOptions) middlewares := msgraphgocore.GetDefaultMiddlewaresWithOptions(&clientOptions)
middlewares = append(middlewares, &LoggingMiddleware{}) middlewares = append(middlewares, &LoggingMiddleware{})
httpClient := msgraphgocore.GetDefaultClient(&clientOptions, middlewares...) httpClient := msgraphgocore.GetDefaultClient(&clientOptions, middlewares...)
httpClient.Timeout = time.Second * 90 httpClient.Timeout = time.Minute * 3
(&clientConfig{}). (&clientConfig{}).
populate(opts...). populate(opts...).
@ -250,7 +250,6 @@ func (handler *LoggingMiddleware) Intercept(
respDump, _ := httputil.DumpResponse(resp, false) respDump, _ := httputil.DumpResponse(resp, false)
metadata := []any{ metadata := []any{
"idx", middlewareIndex,
"method", req.Method, "method", req.Method,
"status", resp.Status, "status", resp.Status,
"statusCode", resp.StatusCode, "statusCode", resp.StatusCode,
@ -273,7 +272,6 @@ func (handler *LoggingMiddleware) Intercept(
respDump, _ := httputil.DumpResponse(resp, true) respDump, _ := httputil.DumpResponse(resp, true)
metadata := []any{ metadata := []any{
"idx", middlewareIndex,
"method", req.Method, "method", req.Method,
"status", resp.Status, "status", resp.Status,
"statusCode", resp.StatusCode, "statusCode", resp.StatusCode,

View File

@ -53,7 +53,7 @@ func (suite *GraphUnitSuite) TestHTTPClient() {
name: "no options", name: "no options",
opts: []option{}, opts: []option{},
check: func(t *testing.T, c *http.Client) { check: func(t *testing.T, c *http.Client) {
assert.Equal(t, 90*time.Second, c.Timeout, "default timeout") assert.Equal(t, 3*time.Minute, c.Timeout, "default timeout")
}, },
}, },
{ {

View File

@ -271,11 +271,11 @@ func (oc *Collection) populateItems(ctx context.Context) {
continue continue
} else if !graph.IsErrTimeout(err) && !graph.IsErrThrottled(err) && !graph.IsSericeUnavailable(err) { } else if !graph.IsErrTimeout(err) &&
// TODO: graphAPI will provides headers that state the duration to wait !graph.IsInternalServerError(err) {
// in order to succeed again. The one second sleep won't cut it here. // Don't retry for non-timeout, on-unauth, as
// // we are already retrying it in the default
// for all non-timeout, non-unauth, non-throttling errors, do not retry // retry middleware
break break
} }

View File

@ -105,6 +105,10 @@ func downloadItem(hc *http.Client, item models.DriveItemable) (*http.Response, e
return resp, graph.Err401Unauthorized return resp, graph.Err401Unauthorized
} }
if resp.StatusCode == http.StatusInternalServerError {
return resp, graph.Err500InternalServerError
}
if resp.StatusCode == http.StatusServiceUnavailable { if resp.StatusCode == http.StatusServiceUnavailable {
return resp, graph.Err503ServiceUnavailable return resp, graph.Err503ServiceUnavailable
} }