From e1f3ffd0b3f10fd9fcaac9d579d05c0fc64d85e2 Mon Sep 17 00:00:00 2001 From: Abhishek Pandey Date: Wed, 29 Nov 2023 18:25:59 -0800 Subject: [PATCH] Add configurable retries for 503 and 504 errors (#4750) We use kiota retry handlers for 503 and 504 retries. We have a request from SDK users to configure number of retry attempts for 5xx errors. We already have configurability around retry attempts, plumbing it through to kiota retry handler. --- #### Does this PR need a docs update or release note? - [ ] :white_check_mark: Yes, it's included - [ ] :clock1: Yes, but in a later PR - [x] :no_entry: No #### Type of change - [ ] :sunflower: Feature - [ ] :bug: Bugfix - [ ] :world_map: Documentation - [x] :robot: Supportability/Tests - [ ] :computer: CI/Deployment - [ ] :broom: Tech Debt/Cleanup #### Issue(s) * # #### Test Plan - [x] :muscle: Manual - [ ] :zap: Unit test - [ ] :green_heart: E2E --- .../services/m365/api/graph/http_wrapper.go | 16 +++++++- .../m365/api/graph/middleware_test.go | 39 +++++++++++++++---- src/pkg/services/m365/api/graph/service.go | 16 +++++++- 3 files changed, 62 insertions(+), 9 deletions(-) diff --git a/src/pkg/services/m365/api/graph/http_wrapper.go b/src/pkg/services/m365/api/graph/http_wrapper.go index 8a03f1301..8093738a0 100644 --- a/src/pkg/services/m365/api/graph/http_wrapper.go +++ b/src/pkg/services/m365/api/graph/http_wrapper.go @@ -193,12 +193,26 @@ func internalMiddleware( counter: counter, } + retryOptions := khttp.RetryHandlerOptions{ + ShouldRetry: func( + delay time.Duration, + executionCount int, + request *http.Request, + response *http.Response, + ) bool { + return true + }, + MaxRetries: cc.maxRetries, + DelaySeconds: int(cc.minDelay.Seconds()), + } + mw := []khttp.Middleware{ &RetryMiddleware{ MaxRetries: cc.maxRetries, Delay: cc.minDelay, }, - khttp.NewRetryHandler(), + // We use default kiota retry handler for 503 and 504 errors + khttp.NewRetryHandlerWithOptions(retryOptions), khttp.NewRedirectHandler(), &LoggingMiddleware{}, throttler, diff --git a/src/pkg/services/m365/api/graph/middleware_test.go b/src/pkg/services/m365/api/graph/middleware_test.go index b943dc9f7..8c7bd0dc8 100644 --- a/src/pkg/services/m365/api/graph/middleware_test.go +++ b/src/pkg/services/m365/api/graph/middleware_test.go @@ -182,13 +182,30 @@ func (suite *RetryMWIntgSuite) TestRetryMiddleware_Intercept_byStatusCode() { expectErr: assert.Error, }, { - // don't test 504: gets intercepted by graph client for long waits. name: "502", status: http.StatusBadGateway, providedErr: nil, expectRetryCount: defaultMaxRetries, expectErr: assert.Error, }, + // 503 and 504 retries are handled by kiota retry handler. Adding + // tests here to ensure we don't regress on retrying these errors. + // Configure retry count to 1 so that the test case doesn't run for too + // long due to exponential backoffs. + { + name: "503", + status: http.StatusServiceUnavailable, + providedErr: nil, + expectRetryCount: 1, + expectErr: assert.Error, + }, + { + name: "504", + status: http.StatusGatewayTimeout, + providedErr: nil, + expectRetryCount: 1, + expectErr: assert.Error, + }, { name: "conn reset with 5xx", status: http.StatusBadGateway, @@ -241,9 +258,14 @@ func (suite *RetryMWIntgSuite) TestRetryMiddleware_Intercept_byStatusCode() { newMWReturns(test.status, nil, test.providedErr)) mw.repeatReturn0 = true + // Add a large timeout of 100 seconds to ensure that the ctx deadline + // doesn't exceed. Otherwise, we'll end up retrying due to ctx deadline + // exceeded, instead of the actual test case. This is also important + // for 503 and 504 test cases which are handled by kiota retry handler. + // We don't want corso retry handler to kick in for these cases. cc := populateConfig( MinimumBackoff(10*time.Millisecond), - Timeout(25*time.Second), + Timeout(100*time.Second), MaxRetries(test.expectRetryCount)) adpt, err := mockAdapter(suite.creds, mw, cc) @@ -292,7 +314,7 @@ func (suite *RetryMWIntgSuite) TestRetryMiddleware_RetryRequest_resetBodyAfter50 cc := populateConfig( MinimumBackoff(10*time.Millisecond), - Timeout(15*time.Second)) + Timeout(100*time.Second)) adpt, err := mockAdapter(suite.creds, mw, cc) require.NoError(t, err, clues.ToCore(err)) @@ -324,13 +346,16 @@ func (suite *RetryMWIntgSuite) TestRetryMiddleware_RetryResponse_maintainBodyAft // intentional no-op, just need to conrol the response code func(*http.Request) {}, newMWReturns(http.StatusServiceUnavailable, body, nil), - newMWReturns(http.StatusServiceUnavailable, body, nil), - newMWReturns(http.StatusServiceUnavailable, body, nil), newMWReturns(http.StatusServiceUnavailable, body, nil)) + // Configure max retries to 1 so that the test case doesn't run for too + // long due to exponential backoffs. Also, add a large timeout of 100 seconds + // to ensure that the ctx deadline doesn't exceed. Otherwise, we'll end up + // retrying due to timeout exceeded, instead of 503s. cc := populateConfig( - MinimumBackoff(10*time.Millisecond), - Timeout(55*time.Second)) + MaxRetries(1), + MinimumBackoff(1*time.Second), + Timeout(100*time.Second)) adpt, err := mockAdapter(suite.creds, mw, cc) require.NoError(t, err, clues.ToCore(err)) diff --git a/src/pkg/services/m365/api/graph/service.go b/src/pkg/services/m365/api/graph/service.go index 2049ef4e1..e005030bd 100644 --- a/src/pkg/services/m365/api/graph/service.go +++ b/src/pkg/services/m365/api/graph/service.go @@ -285,13 +285,27 @@ func kiotaMiddlewares( cc *clientConfig, counter *count.Bus, ) []khttp.Middleware { + retryOptions := khttp.RetryHandlerOptions{ + ShouldRetry: func( + delay time.Duration, + executionCount int, + request *http.Request, + response *http.Response, + ) bool { + return true + }, + MaxRetries: cc.maxRetries, + DelaySeconds: int(cc.minDelay.Seconds()), + } + mw := []khttp.Middleware{ msgraphgocore.NewGraphTelemetryHandler(options), &RetryMiddleware{ MaxRetries: cc.maxRetries, Delay: cc.minDelay, }, - khttp.NewRetryHandler(), + // We use default kiota retry handler for 503 and 504 errors + khttp.NewRetryHandlerWithOptions(retryOptions), khttp.NewRedirectHandler(), khttp.NewCompressionHandler(), khttp.NewParametersNameDecodingHandler(),