Add retries for transient graph 400 "invalidRequest" errors (#4993)
<!-- PR description--> * Adding a retry for 400 "invalidRequest" errors. Restricting it to GET requests to limit the scope of the fix. * Retrying at adapter wrapper `Send` level instead of doing it in retry middleware. If we do it in middleware, we'll have to resort to a string search of "invalidRequest" in the http response, which is not the best way. kiota-http nicely parses the http response into an odata error after the request has finished. So doing retry after that in adapter wrapper `Send` is better as we have better guarantees on it being an odata error type. * Also added a small test optimization to reduce test run time. --- #### Does this PR need a docs update or release note? - [x] ✅ Yes, it's included - [ ] 🕐 Yes, but in a later PR - [ ] ⛔ No #### Type of change <!--- Please check the type of change your PR introduces: ---> - [ ] 🌻 Feature - [x] 🐛 Bugfix - [ ] 🗺️ Documentation - [ ] 🤖 Supportability/Tests - [ ] 💻 CI/Deployment - [ ] 🧹 Tech Debt/Cleanup #### Issue(s) <!-- Can reference multiple issues. Use one of the following "magic words" - "closes, fixes" to auto-close the Github issue. --> * #<issue> #### Test Plan <!-- How will this be tested prior to merging.--> - [x] 💪 Manual - [x] ⚡ Unit test - [ ] 💚 E2E
This commit is contained in:
parent
a40807aa18
commit
dbac8a9fbf
@ -7,6 +7,9 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
|
||||
|
||||
## [Unreleased] (beta)
|
||||
|
||||
### Fixed
|
||||
- Retry transient 400 "invalidRequest" errors during onedrive & sharepoint backup.
|
||||
|
||||
### Changed
|
||||
- When running `backup details` on an empty backup returns a more helpful error message.
|
||||
|
||||
|
||||
@ -53,6 +53,7 @@ const (
|
||||
// the same name as another folder in the same parent. Such duplicate folder
|
||||
// names are not allowed by graph.
|
||||
folderExists errorCode = "ErrorFolderExists"
|
||||
invalidRequest errorCode = "invalidRequest"
|
||||
// Some datacenters are returning this when we try to get the inbox of a user
|
||||
// that doesn't exist.
|
||||
invalidUser errorCode = "ErrorInvalidUser"
|
||||
@ -169,6 +170,11 @@ func IsErrInvalidDelta(err error) bool {
|
||||
return parseODataErr(err).hasErrorCode(err, syncStateNotFound, resyncRequired, syncStateInvalid)
|
||||
}
|
||||
|
||||
func IsErrInvalidRequest(err error) bool {
|
||||
return parseODataErr(err).hasResponseCode(err, http.StatusBadRequest) &&
|
||||
parseODataErr(err).hasErrorCode(err, invalidRequest)
|
||||
}
|
||||
|
||||
func IsErrDeltaNotSupported(err error) bool {
|
||||
return parseODataErr(err).hasErrorMessage(err, ParameterDeltaTokenNotSupported)
|
||||
}
|
||||
@ -557,6 +563,12 @@ func parseODataErr(err error) oDataErr {
|
||||
return ode
|
||||
}
|
||||
|
||||
// hasResponseCode checks if the error is an ODataError and carries the provided
|
||||
// http response code.
|
||||
func (ode oDataErr) hasResponseCode(err error, httpResponseCode int) bool {
|
||||
return ode.isODataErr && ode.Resp.StatusCode == httpResponseCode
|
||||
}
|
||||
|
||||
func (ode oDataErr) hasErrorCode(err error, codes ...errorCode) bool {
|
||||
if !ode.isODataErr {
|
||||
return false
|
||||
|
||||
@ -335,6 +335,50 @@ func (suite *GraphErrorsUnitSuite) TestIsErrDeltaNotSupported() {
|
||||
}
|
||||
}
|
||||
|
||||
func (suite *GraphErrorsUnitSuite) TestIsErrInvalidRequest() {
|
||||
table := []struct {
|
||||
name string
|
||||
err error
|
||||
expect assert.BoolAssertionFunc
|
||||
}{
|
||||
{
|
||||
name: "nil",
|
||||
err: nil,
|
||||
expect: assert.False,
|
||||
},
|
||||
{
|
||||
name: "non-matching",
|
||||
err: assert.AnError,
|
||||
expect: assert.False,
|
||||
},
|
||||
{
|
||||
name: "non-matching oDataErr",
|
||||
err: graphTD.ODataErr("fnords"),
|
||||
expect: assert.False,
|
||||
},
|
||||
{
|
||||
name: "non-matching oDataErrMsg",
|
||||
err: graphTD.ODataErrWithMsg("fnords", "not supported"),
|
||||
expect: assert.False,
|
||||
},
|
||||
{
|
||||
name: "invalidRequest oDataErr, missing 400 status code",
|
||||
err: graphTD.ODataErr(string(invalidRequest)),
|
||||
expect: assert.False,
|
||||
},
|
||||
{
|
||||
name: "invalidRequest oDataErr, with 400 status code",
|
||||
err: graphTD.ODataErrWithAPIResponse(string(invalidRequest), http.StatusBadRequest),
|
||||
expect: assert.True,
|
||||
},
|
||||
}
|
||||
for _, test := range table {
|
||||
suite.Run(test.name, func() {
|
||||
test.expect(suite.T(), IsErrInvalidRequest(test.err))
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
func (suite *GraphErrorsUnitSuite) TestIsErrQuotaExceeded() {
|
||||
table := []struct {
|
||||
name string
|
||||
|
||||
@ -345,6 +345,11 @@ func kiotaMiddlewares(
|
||||
|
||||
var _ abstractions.RequestAdapter = &adapterWrap{}
|
||||
|
||||
const (
|
||||
// Delay between retry attempts
|
||||
adapterRetryDelay = 3 * time.Second
|
||||
)
|
||||
|
||||
// adapterWrap takes a GraphRequestAdapter and replaces the Send() function to
|
||||
// act as a middleware for all http calls. Certain error conditions never reach
|
||||
// the the client middleware layer, and therefore miss out on logging and retries.
|
||||
@ -355,10 +360,15 @@ var _ abstractions.RequestAdapter = &adapterWrap{}
|
||||
type adapterWrap struct {
|
||||
abstractions.RequestAdapter
|
||||
config *clientConfig
|
||||
retryDelay time.Duration
|
||||
}
|
||||
|
||||
func wrapAdapter(gra *msgraphsdkgo.GraphRequestAdapter, cc *clientConfig) *adapterWrap {
|
||||
return &adapterWrap{gra, cc}
|
||||
return &adapterWrap{
|
||||
RequestAdapter: gra,
|
||||
config: cc,
|
||||
retryDelay: adapterRetryDelay,
|
||||
}
|
||||
}
|
||||
|
||||
var connectionEnded = filters.Contains([]string{
|
||||
@ -407,11 +417,18 @@ func (aw *adapterWrap) Send(
|
||||
case IsErrBadJWTToken(err):
|
||||
logger.Ctx(ictx).Debug("bad jwt token")
|
||||
events.Inc(events.APICall, "badjwttoken")
|
||||
case requestInfo.Method.String() == http.MethodGet && IsErrInvalidRequest(err):
|
||||
// Graph may sometimes return a transient 400 response during onedrive
|
||||
// and sharepoint backup. This is pending investigation on msft end, retry
|
||||
// for now as it's a transient issue. Restrict retries to GET requests only
|
||||
// to limit the scope of this fix.
|
||||
logger.Ctx(ictx).Debug("invalid request")
|
||||
events.Inc(events.APICall, "invalidrequest")
|
||||
default:
|
||||
return nil, clues.StackWC(ictx, err).WithTrace(1)
|
||||
}
|
||||
|
||||
time.Sleep(3 * time.Second)
|
||||
time.Sleep(aw.retryDelay)
|
||||
}
|
||||
|
||||
return sp, clues.Stack(err).OrNil()
|
||||
|
||||
@ -1,6 +1,10 @@
|
||||
package graph
|
||||
|
||||
import (
|
||||
"bytes"
|
||||
"context"
|
||||
"encoding/json"
|
||||
"io"
|
||||
"net/http"
|
||||
"strconv"
|
||||
"syscall"
|
||||
@ -67,6 +71,9 @@ func (suite *GraphIntgSuite) TestCreateAdapter() {
|
||||
|
||||
assert.NoError(t, err, clues.ToCore(err))
|
||||
assert.NotNil(t, adpt)
|
||||
|
||||
aw := adpt.(*adapterWrap)
|
||||
assert.Equal(t, adapterRetryDelay, aw.retryDelay, "default retry delay")
|
||||
}
|
||||
|
||||
func (suite *GraphIntgSuite) TestHTTPClient() {
|
||||
@ -198,6 +205,10 @@ func (suite *GraphIntgSuite) TestAdapterWrap_catchesPanic() {
|
||||
appendMiddleware(&alwaysPanicMiddleware))
|
||||
require.NoError(t, err, clues.ToCore(err))
|
||||
|
||||
// Set retry delay to a low value to reduce test runtime.
|
||||
aw := adpt.(*adapterWrap)
|
||||
aw.retryDelay = 10 * time.Millisecond
|
||||
|
||||
// the query doesn't matter
|
||||
_, err = users.NewItemCalendarsItemEventsDeltaRequestBuilder(url, adpt).Get(ctx, nil)
|
||||
require.Error(t, err, clues.ToCore(err))
|
||||
@ -235,6 +246,10 @@ func (suite *GraphIntgSuite) TestAdapterWrap_retriesConnectionClose() {
|
||||
appendMiddleware(&alwaysECONNRESET))
|
||||
require.NoError(t, err, clues.ToCore(err))
|
||||
|
||||
// Set retry delay to a low value to reduce test runtime.
|
||||
aw := adpt.(*adapterWrap)
|
||||
aw.retryDelay = 10 * time.Millisecond
|
||||
|
||||
// the query doesn't matter
|
||||
_, err = users.NewItemCalendarsItemEventsDeltaRequestBuilder(url, adpt).Get(ctx, nil)
|
||||
require.ErrorIs(t, err, syscall.ECONNRESET, clues.ToCore(err))
|
||||
@ -292,6 +307,10 @@ func (suite *GraphIntgSuite) TestAdapterWrap_retriesBadJWTToken() {
|
||||
appendMiddleware(&alwaysBadJWT))
|
||||
require.NoError(t, err, clues.ToCore(err))
|
||||
|
||||
// Set retry delay to a low value to reduce test runtime.
|
||||
aw := adpt.(*adapterWrap)
|
||||
aw.retryDelay = 10 * time.Millisecond
|
||||
|
||||
// When run locally this may fail. Not sure why it works in github but not locally.
|
||||
// Pester keepers if it bothers you.
|
||||
_, err = users.
|
||||
@ -307,3 +326,106 @@ func (suite *GraphIntgSuite) TestAdapterWrap_retriesBadJWTToken() {
|
||||
assert.True(t, IsErrBadJWTToken(err), clues.ToCore(err))
|
||||
assert.Equal(t, 4, retryInc, "number of retries")
|
||||
}
|
||||
|
||||
// TestAdapterWrap_retriesInvalidRequest tests adapter retries for graph 400
|
||||
// invalidRequest errors. It also tests that retries are only done for GET
|
||||
// requests.
|
||||
func (suite *GraphIntgSuite) TestAdapterWrap_retriesInvalidRequest() {
|
||||
var (
|
||||
t = suite.T()
|
||||
retryInc = 0
|
||||
// Formulate a graph error response which is parseable to odata error.
|
||||
graphResp = map[string]any{
|
||||
"error": map[string]any{
|
||||
"code": invalidRequest,
|
||||
"message": "Invalid request",
|
||||
"innerError": map[string]any{
|
||||
"date": "2024-01-01T18:00:00",
|
||||
"request-id": "rid",
|
||||
"client-request-id": "cid",
|
||||
},
|
||||
},
|
||||
}
|
||||
)
|
||||
|
||||
serialized, err := json.Marshal(graphResp)
|
||||
require.NoError(t, err, clues.ToCore(err))
|
||||
|
||||
l := int64(len(serialized))
|
||||
|
||||
// Set up a test middleware to always return a graph 400 invalidRequest
|
||||
// response.
|
||||
returnsGraphResp := mwForceResp{
|
||||
alternate: func(req *http.Request) (bool, *http.Response, error) {
|
||||
retryInc++
|
||||
|
||||
header := http.Header{}
|
||||
header.Set("Content-Length", strconv.Itoa(int(l)))
|
||||
header.Set("Content-Type", "application/json")
|
||||
|
||||
resp := &http.Response{
|
||||
Body: io.NopCloser(bytes.NewReader(serialized)),
|
||||
ContentLength: l,
|
||||
Header: header,
|
||||
Proto: req.Proto,
|
||||
Request: req,
|
||||
StatusCode: http.StatusBadRequest, // Required retry condition
|
||||
}
|
||||
|
||||
return true, resp, nil
|
||||
},
|
||||
}
|
||||
|
||||
adpt, err := CreateAdapter(
|
||||
suite.credentials.AzureTenantID,
|
||||
suite.credentials.AzureClientID,
|
||||
suite.credentials.AzureClientSecret,
|
||||
count.New(),
|
||||
appendMiddleware(&returnsGraphResp))
|
||||
require.NoError(t, err, clues.ToCore(err))
|
||||
|
||||
// Set retry delay to a low value to reduce test runtime.
|
||||
aw := adpt.(*adapterWrap)
|
||||
aw.retryDelay = 10 * time.Millisecond
|
||||
|
||||
table := []struct {
|
||||
name string
|
||||
apiRequest func(t *testing.T, ctx context.Context) error
|
||||
expectedRetries int
|
||||
}{
|
||||
{
|
||||
name: "GET request, retried",
|
||||
apiRequest: func(t *testing.T, ctx context.Context) error {
|
||||
_, err = NewService(adpt).Client().Users().Get(ctx, nil)
|
||||
return err
|
||||
},
|
||||
expectedRetries: 4,
|
||||
},
|
||||
{
|
||||
name: "POST request, no retry",
|
||||
apiRequest: func(t *testing.T, ctx context.Context) error {
|
||||
u := models.NewUser()
|
||||
|
||||
cfg := users.UsersRequestBuilderPostRequestConfiguration{}
|
||||
|
||||
_, err = NewService(adpt).Client().Users().Post(ctx, u, &cfg)
|
||||
return err
|
||||
},
|
||||
expectedRetries: 1,
|
||||
},
|
||||
}
|
||||
|
||||
for _, test := range table {
|
||||
suite.Run(test.name, func() {
|
||||
tt := suite.T()
|
||||
retryInc = 0
|
||||
|
||||
ctx, flush := tester.NewContext(tt)
|
||||
defer flush()
|
||||
|
||||
err := test.apiRequest(tt, ctx)
|
||||
assert.True(tt, IsErrInvalidRequest(err), clues.ToCore(err))
|
||||
assert.Equal(tt, test.expectedRetries, retryInc, "number of retries")
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
@ -57,6 +57,18 @@ func ODataInner(innerCode string) *odataerrors.ODataError {
|
||||
return odErr
|
||||
}
|
||||
|
||||
func ODataErrWithAPIResponse(
|
||||
code string,
|
||||
respCode int,
|
||||
) *odataerrors.ODataError {
|
||||
odErr := ODataErr(code)
|
||||
|
||||
// TODO(pandeyabs): Expand this function to set response headers as well.
|
||||
odErr.SetStatusCode(respCode)
|
||||
|
||||
return odErr
|
||||
}
|
||||
|
||||
func ParseableToMap(t *testing.T, thing serialization.Parsable) map[string]any {
|
||||
sw := kjson.NewJsonSerializationWriter()
|
||||
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user