Handle 503 retries where the Retry-After header exceeds what the Graph SDK will wait for (#2888)

This logic will only trigger if the Graph retry handler does not handle the 503 (it is invoked first). We remove the check in Corso for `absoluteMaxDelay` because our retry config will not trigger that limit (180s). This allows the retry to kick in if the 503 response contains a `Retry-After` value > 180s This could help with issues like https://github.com/alcionai/corso/issues/2876 #### Does this PR need a docs update or release note? - [ ] ✅ Yes, it's included - [ ] 🕐 Yes, but in a later PR - [x] ⛔ No #### Type of change  - [ ] 🌻 Feature - [x] 🐛 Bugfix - [ ] 🗺️ Documentation - [ ] 🤖 Supportability/Tests - [ ] 💻 CI/Deployment - [ ] 🧹 Tech Debt/Cleanup #### Issue(s)  * #2876 #### Test Plan  - [x] 💪 Manual - [ ] ⚡ Unit test - [ ] 💚 E2E
2023-03-20 22:37:24 -07:00 · 2023-03-20 22:37:24 -07:00 · c49e4bbef4
commit c49e4bbef4
parent ef5178668f
2 changed files with 3 additions and 6 deletions
--- a/src/internal/connector/graph/retry_middleware.go
+++ b/src/internal/connector/graph/retry_middleware.go
@ -35,8 +35,7 @@ func (middleware RetryHandler) retryRequest(
 ) (*http.Response, error) {
 	if (respErr != nil || middleware.isRetriableErrorCode(req, resp.StatusCode)) &&
 		middleware.isRetriableRequest(req) &&
-		executionCount < middleware.MaxRetries &&
-		cumulativeDelay < time.Duration(absoluteMaxDelaySeconds)*time.Second {
+		executionCount < middleware.MaxRetries {
 		executionCount++

 		delay := middleware.getRetryDelay(req, resp, exponentialBackoff)
@ -71,7 +70,7 @@ func (middleware RetryHandler) retryRequest(
 }

 func (middleware RetryHandler) isRetriableErrorCode(req *http.Request, code int) bool {
-	return code == http.StatusInternalServerError
+	return code == http.StatusInternalServerError || code == http.StatusServiceUnavailable
 }

 func (middleware RetryHandler) isRetriableRequest(req *http.Request) bool {
--- a/src/internal/connector/graph/service.go
+++ b/src/internal/connector/graph/service.go
@ -27,12 +27,10 @@ import (
 const (
 	logGraphRequestsEnvKey    = "LOG_GRAPH_REQUESTS"
 	log2xxGraphRequestsEnvKey = "LOG_2XX_GRAPH_REQUESTS"
-	numberOfRetries           = 3
 	retryAttemptHeader        = "Retry-Attempt"
 	retryAfterHeader          = "Retry-After"
 	defaultMaxRetries         = 3
 	defaultDelay              = 3 * time.Second
-	absoluteMaxDelaySeconds   = 180
 	rateLimitHeader           = "RateLimit-Limit"
 	rateRemainingHeader       = "RateLimit-Remaining"
 	rateResetHeader           = "RateLimit-Reset"
@ -309,7 +307,7 @@ func (handler *LoggingMiddleware) Intercept(
 	msg := fmt.Sprintf("graph api error: %s", resp.Status)

 	// special case for supportability: log all throttling cases.
-	if resp.StatusCode == http.StatusTooManyRequests {
+	if resp.StatusCode == http.StatusTooManyRequests || resp.StatusCode == http.StatusServiceUnavailable {
 		log.With(
 			"limit", resp.Header.Get(rateLimitHeader),
 			"remaining", resp.Header.Get(rateRemainingHeader),