Add retries for transient graph 400 "invalidRequest" errors (#4993)
<!-- PR description--> * Adding a retry for 400 "invalidRequest" errors. Restricting it to GET requests to limit the scope of the fix. * Retrying at adapter wrapper `Send` level instead of doing it in retry middleware. If we do it in middleware, we'll have to resort to a string search of "invalidRequest" in the http response, which is not the best way. kiota-http nicely parses the http response into an odata error after the request has finished. So doing retry after that in adapter wrapper `Send` is better as we have better guarantees on it being an odata error type. * Also added a small test optimization to reduce test run time. --- #### Does this PR need a docs update or release note? - [x] ✅ Yes, it's included - [ ] 🕐 Yes, but in a later PR - [ ] ⛔ No #### Type of change <!--- Please check the type of change your PR introduces: ---> - [ ] 🌻 Feature - [x] 🐛 Bugfix - [ ] 🗺️ Documentation - [ ] 🤖 Supportability/Tests - [ ] 💻 CI/Deployment - [ ] 🧹 Tech Debt/Cleanup #### Issue(s) <!-- Can reference multiple issues. Use one of the following "magic words" - "closes, fixes" to auto-close the Github issue. --> * #<issue> #### Test Plan <!-- How will this be tested prior to merging.--> - [x] 💪 Manual - [x] ⚡ Unit test - [ ] 💚 E2E
This commit is contained in:
parent
a40807aa18
commit
dbac8a9fbf
@ -7,6 +7,9 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
|
|||||||
|
|
||||||
## [Unreleased] (beta)
|
## [Unreleased] (beta)
|
||||||
|
|
||||||
|
### Fixed
|
||||||
|
- Retry transient 400 "invalidRequest" errors during onedrive & sharepoint backup.
|
||||||
|
|
||||||
### Changed
|
### Changed
|
||||||
- When running `backup details` on an empty backup returns a more helpful error message.
|
- When running `backup details` on an empty backup returns a more helpful error message.
|
||||||
|
|
||||||
|
|||||||
@ -53,6 +53,7 @@ const (
|
|||||||
// the same name as another folder in the same parent. Such duplicate folder
|
// the same name as another folder in the same parent. Such duplicate folder
|
||||||
// names are not allowed by graph.
|
// names are not allowed by graph.
|
||||||
folderExists errorCode = "ErrorFolderExists"
|
folderExists errorCode = "ErrorFolderExists"
|
||||||
|
invalidRequest errorCode = "invalidRequest"
|
||||||
// Some datacenters are returning this when we try to get the inbox of a user
|
// Some datacenters are returning this when we try to get the inbox of a user
|
||||||
// that doesn't exist.
|
// that doesn't exist.
|
||||||
invalidUser errorCode = "ErrorInvalidUser"
|
invalidUser errorCode = "ErrorInvalidUser"
|
||||||
@ -169,6 +170,11 @@ func IsErrInvalidDelta(err error) bool {
|
|||||||
return parseODataErr(err).hasErrorCode(err, syncStateNotFound, resyncRequired, syncStateInvalid)
|
return parseODataErr(err).hasErrorCode(err, syncStateNotFound, resyncRequired, syncStateInvalid)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func IsErrInvalidRequest(err error) bool {
|
||||||
|
return parseODataErr(err).hasResponseCode(err, http.StatusBadRequest) &&
|
||||||
|
parseODataErr(err).hasErrorCode(err, invalidRequest)
|
||||||
|
}
|
||||||
|
|
||||||
func IsErrDeltaNotSupported(err error) bool {
|
func IsErrDeltaNotSupported(err error) bool {
|
||||||
return parseODataErr(err).hasErrorMessage(err, ParameterDeltaTokenNotSupported)
|
return parseODataErr(err).hasErrorMessage(err, ParameterDeltaTokenNotSupported)
|
||||||
}
|
}
|
||||||
@ -557,6 +563,12 @@ func parseODataErr(err error) oDataErr {
|
|||||||
return ode
|
return ode
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// hasResponseCode checks if the error is an ODataError and carries the provided
|
||||||
|
// http response code.
|
||||||
|
func (ode oDataErr) hasResponseCode(err error, httpResponseCode int) bool {
|
||||||
|
return ode.isODataErr && ode.Resp.StatusCode == httpResponseCode
|
||||||
|
}
|
||||||
|
|
||||||
func (ode oDataErr) hasErrorCode(err error, codes ...errorCode) bool {
|
func (ode oDataErr) hasErrorCode(err error, codes ...errorCode) bool {
|
||||||
if !ode.isODataErr {
|
if !ode.isODataErr {
|
||||||
return false
|
return false
|
||||||
|
|||||||
@ -335,6 +335,50 @@ func (suite *GraphErrorsUnitSuite) TestIsErrDeltaNotSupported() {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func (suite *GraphErrorsUnitSuite) TestIsErrInvalidRequest() {
|
||||||
|
table := []struct {
|
||||||
|
name string
|
||||||
|
err error
|
||||||
|
expect assert.BoolAssertionFunc
|
||||||
|
}{
|
||||||
|
{
|
||||||
|
name: "nil",
|
||||||
|
err: nil,
|
||||||
|
expect: assert.False,
|
||||||
|
},
|
||||||
|
{
|
||||||
|
name: "non-matching",
|
||||||
|
err: assert.AnError,
|
||||||
|
expect: assert.False,
|
||||||
|
},
|
||||||
|
{
|
||||||
|
name: "non-matching oDataErr",
|
||||||
|
err: graphTD.ODataErr("fnords"),
|
||||||
|
expect: assert.False,
|
||||||
|
},
|
||||||
|
{
|
||||||
|
name: "non-matching oDataErrMsg",
|
||||||
|
err: graphTD.ODataErrWithMsg("fnords", "not supported"),
|
||||||
|
expect: assert.False,
|
||||||
|
},
|
||||||
|
{
|
||||||
|
name: "invalidRequest oDataErr, missing 400 status code",
|
||||||
|
err: graphTD.ODataErr(string(invalidRequest)),
|
||||||
|
expect: assert.False,
|
||||||
|
},
|
||||||
|
{
|
||||||
|
name: "invalidRequest oDataErr, with 400 status code",
|
||||||
|
err: graphTD.ODataErrWithAPIResponse(string(invalidRequest), http.StatusBadRequest),
|
||||||
|
expect: assert.True,
|
||||||
|
},
|
||||||
|
}
|
||||||
|
for _, test := range table {
|
||||||
|
suite.Run(test.name, func() {
|
||||||
|
test.expect(suite.T(), IsErrInvalidRequest(test.err))
|
||||||
|
})
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
func (suite *GraphErrorsUnitSuite) TestIsErrQuotaExceeded() {
|
func (suite *GraphErrorsUnitSuite) TestIsErrQuotaExceeded() {
|
||||||
table := []struct {
|
table := []struct {
|
||||||
name string
|
name string
|
||||||
|
|||||||
@ -345,6 +345,11 @@ func kiotaMiddlewares(
|
|||||||
|
|
||||||
var _ abstractions.RequestAdapter = &adapterWrap{}
|
var _ abstractions.RequestAdapter = &adapterWrap{}
|
||||||
|
|
||||||
|
const (
|
||||||
|
// Delay between retry attempts
|
||||||
|
adapterRetryDelay = 3 * time.Second
|
||||||
|
)
|
||||||
|
|
||||||
// adapterWrap takes a GraphRequestAdapter and replaces the Send() function to
|
// adapterWrap takes a GraphRequestAdapter and replaces the Send() function to
|
||||||
// act as a middleware for all http calls. Certain error conditions never reach
|
// act as a middleware for all http calls. Certain error conditions never reach
|
||||||
// the the client middleware layer, and therefore miss out on logging and retries.
|
// the the client middleware layer, and therefore miss out on logging and retries.
|
||||||
@ -355,10 +360,15 @@ var _ abstractions.RequestAdapter = &adapterWrap{}
|
|||||||
type adapterWrap struct {
|
type adapterWrap struct {
|
||||||
abstractions.RequestAdapter
|
abstractions.RequestAdapter
|
||||||
config *clientConfig
|
config *clientConfig
|
||||||
|
retryDelay time.Duration
|
||||||
}
|
}
|
||||||
|
|
||||||
func wrapAdapter(gra *msgraphsdkgo.GraphRequestAdapter, cc *clientConfig) *adapterWrap {
|
func wrapAdapter(gra *msgraphsdkgo.GraphRequestAdapter, cc *clientConfig) *adapterWrap {
|
||||||
return &adapterWrap{gra, cc}
|
return &adapterWrap{
|
||||||
|
RequestAdapter: gra,
|
||||||
|
config: cc,
|
||||||
|
retryDelay: adapterRetryDelay,
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
var connectionEnded = filters.Contains([]string{
|
var connectionEnded = filters.Contains([]string{
|
||||||
@ -407,11 +417,18 @@ func (aw *adapterWrap) Send(
|
|||||||
case IsErrBadJWTToken(err):
|
case IsErrBadJWTToken(err):
|
||||||
logger.Ctx(ictx).Debug("bad jwt token")
|
logger.Ctx(ictx).Debug("bad jwt token")
|
||||||
events.Inc(events.APICall, "badjwttoken")
|
events.Inc(events.APICall, "badjwttoken")
|
||||||
|
case requestInfo.Method.String() == http.MethodGet && IsErrInvalidRequest(err):
|
||||||
|
// Graph may sometimes return a transient 400 response during onedrive
|
||||||
|
// and sharepoint backup. This is pending investigation on msft end, retry
|
||||||
|
// for now as it's a transient issue. Restrict retries to GET requests only
|
||||||
|
// to limit the scope of this fix.
|
||||||
|
logger.Ctx(ictx).Debug("invalid request")
|
||||||
|
events.Inc(events.APICall, "invalidrequest")
|
||||||
default:
|
default:
|
||||||
return nil, clues.StackWC(ictx, err).WithTrace(1)
|
return nil, clues.StackWC(ictx, err).WithTrace(1)
|
||||||
}
|
}
|
||||||
|
|
||||||
time.Sleep(3 * time.Second)
|
time.Sleep(aw.retryDelay)
|
||||||
}
|
}
|
||||||
|
|
||||||
return sp, clues.Stack(err).OrNil()
|
return sp, clues.Stack(err).OrNil()
|
||||||
|
|||||||
@ -1,6 +1,10 @@
|
|||||||
package graph
|
package graph
|
||||||
|
|
||||||
import (
|
import (
|
||||||
|
"bytes"
|
||||||
|
"context"
|
||||||
|
"encoding/json"
|
||||||
|
"io"
|
||||||
"net/http"
|
"net/http"
|
||||||
"strconv"
|
"strconv"
|
||||||
"syscall"
|
"syscall"
|
||||||
@ -67,6 +71,9 @@ func (suite *GraphIntgSuite) TestCreateAdapter() {
|
|||||||
|
|
||||||
assert.NoError(t, err, clues.ToCore(err))
|
assert.NoError(t, err, clues.ToCore(err))
|
||||||
assert.NotNil(t, adpt)
|
assert.NotNil(t, adpt)
|
||||||
|
|
||||||
|
aw := adpt.(*adapterWrap)
|
||||||
|
assert.Equal(t, adapterRetryDelay, aw.retryDelay, "default retry delay")
|
||||||
}
|
}
|
||||||
|
|
||||||
func (suite *GraphIntgSuite) TestHTTPClient() {
|
func (suite *GraphIntgSuite) TestHTTPClient() {
|
||||||
@ -198,6 +205,10 @@ func (suite *GraphIntgSuite) TestAdapterWrap_catchesPanic() {
|
|||||||
appendMiddleware(&alwaysPanicMiddleware))
|
appendMiddleware(&alwaysPanicMiddleware))
|
||||||
require.NoError(t, err, clues.ToCore(err))
|
require.NoError(t, err, clues.ToCore(err))
|
||||||
|
|
||||||
|
// Set retry delay to a low value to reduce test runtime.
|
||||||
|
aw := adpt.(*adapterWrap)
|
||||||
|
aw.retryDelay = 10 * time.Millisecond
|
||||||
|
|
||||||
// the query doesn't matter
|
// the query doesn't matter
|
||||||
_, err = users.NewItemCalendarsItemEventsDeltaRequestBuilder(url, adpt).Get(ctx, nil)
|
_, err = users.NewItemCalendarsItemEventsDeltaRequestBuilder(url, adpt).Get(ctx, nil)
|
||||||
require.Error(t, err, clues.ToCore(err))
|
require.Error(t, err, clues.ToCore(err))
|
||||||
@ -235,6 +246,10 @@ func (suite *GraphIntgSuite) TestAdapterWrap_retriesConnectionClose() {
|
|||||||
appendMiddleware(&alwaysECONNRESET))
|
appendMiddleware(&alwaysECONNRESET))
|
||||||
require.NoError(t, err, clues.ToCore(err))
|
require.NoError(t, err, clues.ToCore(err))
|
||||||
|
|
||||||
|
// Set retry delay to a low value to reduce test runtime.
|
||||||
|
aw := adpt.(*adapterWrap)
|
||||||
|
aw.retryDelay = 10 * time.Millisecond
|
||||||
|
|
||||||
// the query doesn't matter
|
// the query doesn't matter
|
||||||
_, err = users.NewItemCalendarsItemEventsDeltaRequestBuilder(url, adpt).Get(ctx, nil)
|
_, err = users.NewItemCalendarsItemEventsDeltaRequestBuilder(url, adpt).Get(ctx, nil)
|
||||||
require.ErrorIs(t, err, syscall.ECONNRESET, clues.ToCore(err))
|
require.ErrorIs(t, err, syscall.ECONNRESET, clues.ToCore(err))
|
||||||
@ -292,6 +307,10 @@ func (suite *GraphIntgSuite) TestAdapterWrap_retriesBadJWTToken() {
|
|||||||
appendMiddleware(&alwaysBadJWT))
|
appendMiddleware(&alwaysBadJWT))
|
||||||
require.NoError(t, err, clues.ToCore(err))
|
require.NoError(t, err, clues.ToCore(err))
|
||||||
|
|
||||||
|
// Set retry delay to a low value to reduce test runtime.
|
||||||
|
aw := adpt.(*adapterWrap)
|
||||||
|
aw.retryDelay = 10 * time.Millisecond
|
||||||
|
|
||||||
// When run locally this may fail. Not sure why it works in github but not locally.
|
// When run locally this may fail. Not sure why it works in github but not locally.
|
||||||
// Pester keepers if it bothers you.
|
// Pester keepers if it bothers you.
|
||||||
_, err = users.
|
_, err = users.
|
||||||
@ -307,3 +326,106 @@ func (suite *GraphIntgSuite) TestAdapterWrap_retriesBadJWTToken() {
|
|||||||
assert.True(t, IsErrBadJWTToken(err), clues.ToCore(err))
|
assert.True(t, IsErrBadJWTToken(err), clues.ToCore(err))
|
||||||
assert.Equal(t, 4, retryInc, "number of retries")
|
assert.Equal(t, 4, retryInc, "number of retries")
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// TestAdapterWrap_retriesInvalidRequest tests adapter retries for graph 400
|
||||||
|
// invalidRequest errors. It also tests that retries are only done for GET
|
||||||
|
// requests.
|
||||||
|
func (suite *GraphIntgSuite) TestAdapterWrap_retriesInvalidRequest() {
|
||||||
|
var (
|
||||||
|
t = suite.T()
|
||||||
|
retryInc = 0
|
||||||
|
// Formulate a graph error response which is parseable to odata error.
|
||||||
|
graphResp = map[string]any{
|
||||||
|
"error": map[string]any{
|
||||||
|
"code": invalidRequest,
|
||||||
|
"message": "Invalid request",
|
||||||
|
"innerError": map[string]any{
|
||||||
|
"date": "2024-01-01T18:00:00",
|
||||||
|
"request-id": "rid",
|
||||||
|
"client-request-id": "cid",
|
||||||
|
},
|
||||||
|
},
|
||||||
|
}
|
||||||
|
)
|
||||||
|
|
||||||
|
serialized, err := json.Marshal(graphResp)
|
||||||
|
require.NoError(t, err, clues.ToCore(err))
|
||||||
|
|
||||||
|
l := int64(len(serialized))
|
||||||
|
|
||||||
|
// Set up a test middleware to always return a graph 400 invalidRequest
|
||||||
|
// response.
|
||||||
|
returnsGraphResp := mwForceResp{
|
||||||
|
alternate: func(req *http.Request) (bool, *http.Response, error) {
|
||||||
|
retryInc++
|
||||||
|
|
||||||
|
header := http.Header{}
|
||||||
|
header.Set("Content-Length", strconv.Itoa(int(l)))
|
||||||
|
header.Set("Content-Type", "application/json")
|
||||||
|
|
||||||
|
resp := &http.Response{
|
||||||
|
Body: io.NopCloser(bytes.NewReader(serialized)),
|
||||||
|
ContentLength: l,
|
||||||
|
Header: header,
|
||||||
|
Proto: req.Proto,
|
||||||
|
Request: req,
|
||||||
|
StatusCode: http.StatusBadRequest, // Required retry condition
|
||||||
|
}
|
||||||
|
|
||||||
|
return true, resp, nil
|
||||||
|
},
|
||||||
|
}
|
||||||
|
|
||||||
|
adpt, err := CreateAdapter(
|
||||||
|
suite.credentials.AzureTenantID,
|
||||||
|
suite.credentials.AzureClientID,
|
||||||
|
suite.credentials.AzureClientSecret,
|
||||||
|
count.New(),
|
||||||
|
appendMiddleware(&returnsGraphResp))
|
||||||
|
require.NoError(t, err, clues.ToCore(err))
|
||||||
|
|
||||||
|
// Set retry delay to a low value to reduce test runtime.
|
||||||
|
aw := adpt.(*adapterWrap)
|
||||||
|
aw.retryDelay = 10 * time.Millisecond
|
||||||
|
|
||||||
|
table := []struct {
|
||||||
|
name string
|
||||||
|
apiRequest func(t *testing.T, ctx context.Context) error
|
||||||
|
expectedRetries int
|
||||||
|
}{
|
||||||
|
{
|
||||||
|
name: "GET request, retried",
|
||||||
|
apiRequest: func(t *testing.T, ctx context.Context) error {
|
||||||
|
_, err = NewService(adpt).Client().Users().Get(ctx, nil)
|
||||||
|
return err
|
||||||
|
},
|
||||||
|
expectedRetries: 4,
|
||||||
|
},
|
||||||
|
{
|
||||||
|
name: "POST request, no retry",
|
||||||
|
apiRequest: func(t *testing.T, ctx context.Context) error {
|
||||||
|
u := models.NewUser()
|
||||||
|
|
||||||
|
cfg := users.UsersRequestBuilderPostRequestConfiguration{}
|
||||||
|
|
||||||
|
_, err = NewService(adpt).Client().Users().Post(ctx, u, &cfg)
|
||||||
|
return err
|
||||||
|
},
|
||||||
|
expectedRetries: 1,
|
||||||
|
},
|
||||||
|
}
|
||||||
|
|
||||||
|
for _, test := range table {
|
||||||
|
suite.Run(test.name, func() {
|
||||||
|
tt := suite.T()
|
||||||
|
retryInc = 0
|
||||||
|
|
||||||
|
ctx, flush := tester.NewContext(tt)
|
||||||
|
defer flush()
|
||||||
|
|
||||||
|
err := test.apiRequest(tt, ctx)
|
||||||
|
assert.True(tt, IsErrInvalidRequest(err), clues.ToCore(err))
|
||||||
|
assert.Equal(tt, test.expectedRetries, retryInc, "number of retries")
|
||||||
|
})
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|||||||
@ -57,6 +57,18 @@ func ODataInner(innerCode string) *odataerrors.ODataError {
|
|||||||
return odErr
|
return odErr
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func ODataErrWithAPIResponse(
|
||||||
|
code string,
|
||||||
|
respCode int,
|
||||||
|
) *odataerrors.ODataError {
|
||||||
|
odErr := ODataErr(code)
|
||||||
|
|
||||||
|
// TODO(pandeyabs): Expand this function to set response headers as well.
|
||||||
|
odErr.SetStatusCode(respCode)
|
||||||
|
|
||||||
|
return odErr
|
||||||
|
}
|
||||||
|
|
||||||
func ParseableToMap(t *testing.T, thing serialization.Parsable) map[string]any {
|
func ParseableToMap(t *testing.T, thing serialization.Parsable) map[string]any {
|
||||||
sw := kjson.NewJsonSerializationWriter()
|
sw := kjson.NewJsonSerializationWriter()
|
||||||
|
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user