Retry on econnreset regardless of http response (#3560)

Retry on `ECONNRESET` even if HTTP response is set to 2xx. This is unlikely but it can happen. Currently we fail the backup for this scenario with `connection reset by peer` error.


---

#### Does this PR need a docs update or release note?

- [ ]  Yes, it's included
- [ ] 🕐 Yes, but in a later PR
- [x]  No

#### Type of change

<!--- Please check the type of change your PR introduces: --->
- [ ] 🌻 Feature
- [x] 🐛 Bugfix
- [ ] 🗺️ Documentation
- [ ] 🤖 Supportability/Tests
- [ ] 💻 CI/Deployment
- [ ] 🧹 Tech Debt/Cleanup

#### Issue(s)

<!-- Can reference multiple issues. Use one of the following "magic words" - "closes, fixes" to auto-close the Github issue. -->
* #<issue>

#### Test Plan

<!-- How will this be tested prior to merging.-->
- [ ] 💪 Manual
- [x]  Unit test
- [ ] 💚 E2E
This commit is contained in:
Abhishek Pandey 2023-06-12 12:05:03 -07:00 committed by GitHub
parent 083f1b18e2
commit 3f79d790aa
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
2 changed files with 65 additions and 12 deletions

View File

@ -209,12 +209,16 @@ func (mw RetryMiddleware) Intercept(
ctx := req.Context() ctx := req.Context()
resp, err := pipeline.Next(req, middlewareIndex) resp, err := pipeline.Next(req, middlewareIndex)
if err != nil && !IsErrTimeout(err) && !IsErrConnectionReset(err) {
retriable := IsErrTimeout(err) || IsErrConnectionReset(err) ||
(resp != nil && (resp.StatusCode/100 == 4 || resp.StatusCode/100 == 5))
if !retriable {
if err != nil {
return resp, stackReq(ctx, req, resp, err) return resp, stackReq(ctx, req, resp, err)
} }
if resp != nil && resp.StatusCode/100 != 4 && resp.StatusCode/100 != 5 { return resp, nil
return resp, err
} }
exponentialBackOff := backoff.NewExponentialBackOff() exponentialBackOff := backoff.NewExponentialBackOff()
@ -304,7 +308,8 @@ func (mw RetryMiddleware) retryRequest(
return nextResp, stackReq(ctx, req, nextResp, err) return nextResp, stackReq(ctx, req, nextResp, err)
} }
return mw.retryRequest(ctx, return mw.retryRequest(
ctx,
pipeline, pipeline,
middlewareIndex, middlewareIndex,
req, req,

View File

@ -4,6 +4,7 @@ import (
"bytes" "bytes"
"io" "io"
"net/http" "net/http"
"syscall"
"testing" "testing"
"time" "time"
@ -37,12 +38,18 @@ func newMWReturns(code int, body []byte, err error) mwReturns {
brc = io.NopCloser(bytes.NewBuffer(body)) brc = io.NopCloser(bytes.NewBuffer(body))
} }
return mwReturns{ resp := &http.Response{
err: err,
resp: &http.Response{
StatusCode: code, StatusCode: code,
Body: brc, Body: brc,
}, }
if code == 0 {
resp = nil
}
return mwReturns{
err: err,
resp: resp,
} }
} }
@ -142,6 +149,7 @@ func (suite *RetryMWIntgSuite) TestRetryMiddleware_Intercept_byStatusCode() {
tests := []struct { tests := []struct {
name string name string
status int status int
providedErr error
expectRetryCount int expectRetryCount int
mw testMW mw testMW
expectErr assert.ErrorAssertionFunc expectErr assert.ErrorAssertionFunc
@ -149,12 +157,14 @@ func (suite *RetryMWIntgSuite) TestRetryMiddleware_Intercept_byStatusCode() {
{ {
name: "200, no retries", name: "200, no retries",
status: http.StatusOK, status: http.StatusOK,
providedErr: nil,
expectRetryCount: 0, expectRetryCount: 0,
expectErr: assert.NoError, expectErr: assert.NoError,
}, },
{ {
name: "400, no retries", name: "400, no retries",
status: http.StatusBadRequest, status: http.StatusBadRequest,
providedErr: nil,
expectRetryCount: 0, expectRetryCount: 0,
expectErr: assert.Error, expectErr: assert.Error,
}, },
@ -162,9 +172,47 @@ func (suite *RetryMWIntgSuite) TestRetryMiddleware_Intercept_byStatusCode() {
// don't test 504: gets intercepted by graph client for long waits. // don't test 504: gets intercepted by graph client for long waits.
name: "502", name: "502",
status: http.StatusBadGateway, status: http.StatusBadGateway,
providedErr: nil,
expectRetryCount: defaultMaxRetries, expectRetryCount: defaultMaxRetries,
expectErr: assert.Error, expectErr: assert.Error,
}, },
{
name: "conn reset with 5xx",
status: http.StatusBadGateway,
providedErr: syscall.ECONNRESET,
expectRetryCount: defaultMaxRetries,
expectErr: assert.Error,
},
{
name: "conn reset with 2xx",
status: http.StatusOK,
providedErr: syscall.ECONNRESET,
expectRetryCount: defaultMaxRetries,
expectErr: assert.Error,
},
{
name: "conn reset with nil resp",
providedErr: syscall.ECONNRESET,
// Use 0 to denote nil http response
status: 0,
expectRetryCount: 3,
expectErr: assert.Error,
},
{
// Unlikely but check if connection reset error takes precedence
name: "conn reset with 400 resp",
providedErr: syscall.ECONNRESET,
status: http.StatusBadRequest,
expectRetryCount: 3,
expectErr: assert.Error,
},
{
name: "http timeout",
providedErr: http.ErrHandlerTimeout,
status: 0,
expectRetryCount: 3,
expectErr: assert.Error,
},
} }
for _, test := range tests { for _, test := range tests {
@ -177,7 +225,7 @@ func (suite *RetryMWIntgSuite) TestRetryMiddleware_Intercept_byStatusCode() {
called := 0 called := 0
mw := newTestMW( mw := newTestMW(
func(*http.Request) { called++ }, func(*http.Request) { called++ },
newMWReturns(test.status, nil, nil)) newMWReturns(test.status, nil, test.providedErr))
mw.repeatReturn0 = true mw.repeatReturn0 = true
adpt, err := mockAdapter(suite.creds, mw) adpt, err := mockAdapter(suite.creds, mw)