From a680f13f843d2dec6a052e9f2eb379815e45d582 Mon Sep 17 00:00:00 2001 From: Keepers Date: Tue, 6 Feb 2024 11:02:34 -0700 Subject: [PATCH] add by-resp-code catcher to err transformer (#5184) Start catching graph responses by broad response code, in addition to the response body details. --- #### Does this PR need a docs update or release note? - [x] :no_entry: No #### Type of change - [x] :sunflower: Feature #### Test Plan - [x] :zap: Unit test - [x] :green_heart: E2E --- src/pkg/errs/core/core.go | 30 +++++ src/pkg/services/m365/api/graph/errors.go | 57 +++++++-- .../services/m365/api/graph/errors_test.go | 109 +++++++++++++++++- 3 files changed, 183 insertions(+), 13 deletions(-) diff --git a/src/pkg/errs/core/core.go b/src/pkg/errs/core/core.go index fd8b86587..259d8a2a4 100644 --- a/src/pkg/errs/core/core.go +++ b/src/pkg/errs/core/core.go @@ -66,6 +66,12 @@ var ( // about what it sounds like: we tried to look for a backup by ID, but the // storage layer couldn't find anything for that ID. ErrBackupNotFound = &Err{msg: "backup not found"} + // basically "internal server error". But not internal issues. We only return this + // when a downstream service (ex: graph api) responds with a 5xx style error. + // Note: producers may not funnel all 5xx errors in this umbrella, because + // different cases (ex: `StatusHTTPVersionNotSupported`) may need more specific + // attention and handling than standard gateway outages or service issues. + ErrDownstreamServerError = &Err{msg: "server error in downstream service"} // a catch-all for downstream api auth issues. doesn't matter which api. ErrInsufficientAuthorization = &Err{msg: "insufficient authorization"} // happens when we look up something using an identifier other than a canonical ID @@ -116,3 +122,27 @@ func As(err error) (*Err, bool) { return ce, ok } + +// --------------------------------------------------------------------------- +// Labels +// +// In some cases we like to attach well known labels to errors for additional +// metadata or metrics or other associations. Labeling differs from error +// typing or identification because a label won't explain the cause or clearly +// contextualize an error. Labels are tags for their own sake. +// +// Therefore, labels are expressly not for error identification. IE: if you +// see the check `if clues.HasLabel(err, labelFoo)` in place of +// `if errors.Is(err, errFoo)`, that's a red flag. +// --------------------------------------------------------------------------- + +const ( + // add this label when we might need to further investigate the cause of the + // error. For example, in the graph api layer we try to categorize errors + // by their specific identity, such as "the resource was locked out". If + // we're unsuccessful, we can still fall back to the more generic error code, + // "403 forbidden". But it tradeoff, we may end up catching (and gracefully + // handling) 403s, but not identifying an underlying root issue. This label + // is here to say, "maybe you should look for the reason why this happened". + LabelRootCauseUnknown = "root-cause-unknown" +) diff --git a/src/pkg/services/m365/api/graph/errors.go b/src/pkg/services/m365/api/graph/errors.go index 9a3aa3432..ab46c0ad9 100644 --- a/src/pkg/services/m365/api/graph/errors.go +++ b/src/pkg/services/m365/api/graph/errors.go @@ -117,6 +117,7 @@ func stackWithCoreErr(ctx context.Context, err error, traceDepth int) error { } ode := parseODataErr(err) + labels := []string{} switch { case isErrBadJWTToken(ode, err): @@ -133,16 +134,43 @@ func stackWithCoreErr(ctx context.Context, err error, traceDepth int) error { err = clues.Stack(core.ErrAlreadyExists, err) case isErrNotFound(ode, err): err = clues.Stack(core.ErrNotFound, err) + default: + err = toErrByRespCode(ode, err) + + labels = append(labels, core.LabelRootCauseUnknown) } - return stackWithDepth(ctx, err, 1+traceDepth) + stacked := stackWithDepth(ctx, err, 1+traceDepth) + + // labeling here because we want the context from stackWithDepth first + for _, label := range labels { + stacked = stacked.Label(label) + } + + return stacked } // unexported categorizers, for use with stackWithCoreErr +// for categorizing errors by their response code and no other response propery +func toErrByRespCode(ode oDataErr, err error) error { + switch { + case ode.hasResponseCode(http.StatusNotFound): + return clues.Stack(core.ErrNotFound, err) + case ode.hasResponseCode(http.StatusUnauthorized, http.StatusForbidden): + return clues.Stack(core.ErrInsufficientAuthorization, err) + case ode.hasResponseCode(http.StatusTooManyRequests): + return clues.Stack(core.ErrApplicationThrottled, err) + // catch any 5xx error + case ode.hasResponseCode(5): + return clues.Stack(core.ErrDownstreamServerError, err) + default: + return err + } +} + func isErrApplicationThrottled(ode oDataErr, err error) bool { - return ode.hasErrorCode(err, ApplicationThrottled) || - ode.hasResponseCode(err, http.StatusTooManyRequests) + return ode.hasErrorCode(err, ApplicationThrottled) } func isErrInsufficientAuthorization(ode oDataErr, err error) bool { @@ -151,7 +179,6 @@ func isErrInsufficientAuthorization(ode oDataErr, err error) bool { func isErrNotFound(ode oDataErr, err error) bool { return clues.HasLabel(err, LabelStatus(http.StatusNotFound)) || - ode.hasResponseCode(err, http.StatusNotFound) || ode.hasErrorCode( err, ErrorItemNotFound, @@ -200,7 +227,7 @@ func IsErrInvalidDelta(err error) bool { } func IsErrInvalidRequest(err error) bool { - return parseODataErr(err).hasResponseCode(err, http.StatusBadRequest) && + return parseODataErr(err).hasResponseCode(http.StatusBadRequest) && parseODataErr(err).hasErrorCode(err, invalidRequest) } @@ -572,8 +599,24 @@ func parseODataErr(err error) oDataErr { // hasResponseCode checks if the error is an ODataError and carries the provided // http response code. -func (ode oDataErr) hasResponseCode(err error, httpResponseCode int) bool { - return ode.isODataErr && ode.Resp.StatusCode == httpResponseCode +func (ode oDataErr) hasResponseCode(httpResponseCode ...int) bool { + if !ode.isODataErr { + return false + } + + orsc := ode.Resp.StatusCode + + for _, hrc := range httpResponseCode { + if orsc == hrc { + return true + } + + if hrc < 10 && orsc/100 == hrc { + return true + } + } + + return false } func (ode oDataErr) hasErrorCode(err error, codes ...errorCode) bool { diff --git a/src/pkg/services/m365/api/graph/errors_test.go b/src/pkg/services/m365/api/graph/errors_test.go index 19025b3f5..c312a47f1 100644 --- a/src/pkg/services/m365/api/graph/errors_test.go +++ b/src/pkg/services/m365/api/graph/errors_test.go @@ -84,9 +84,10 @@ func (suite *GraphErrorsUnitSuite) TestIsErrApplicationThrottled() { expect: assert.True, }, { - name: "too many requests resp status", - err: graphTD.ODataErrWithStatus(http.StatusTooManyRequests, "err"), - expect: assert.True, + name: "too many requests resp status", + err: graphTD.ODataErrWithStatus(http.StatusTooManyRequests, "err"), + // caught in the by-code handler + expect: assert.False, }, } for _, test := range table { @@ -214,9 +215,10 @@ func (suite *GraphErrorsUnitSuite) TestIsErrNotFound() { expect: assert.True, }, { - name: "not found resp status", - err: graphTD.ODataErrWithStatus(http.StatusNotFound, "err"), - expect: assert.True, + name: "not found resp status", + err: graphTD.ODataErrWithStatus(http.StatusNotFound, "err"), + // caught in the by-code handler + expect: assert.False, }, } for _, test := range table { @@ -991,3 +993,98 @@ func (suite *GraphErrorsUnitSuite) TestIsErrSharingDisabled() { }) } } + +func (suite *GraphErrorsUnitSuite) TestToErrByRespCode() { + table := []struct { + name string + err error + expectNoStack bool + expectIs error + }{ + { + name: "nil", + err: nil, + expectNoStack: true, + expectIs: nil, + }, + { + name: "non-matching", + err: assert.AnError, + expectNoStack: true, + expectIs: nil, + }, + { + name: "unidentified resp code", + err: graphTD.ODataErrWithStatus(http.StatusConflict, "err"), + expectNoStack: true, + expectIs: nil, + }, + { + name: "404", + err: graphTD.ODataErrWithStatus(http.StatusNotFound, "err"), + expectNoStack: false, + expectIs: core.ErrNotFound, + }, + { + name: "401", + err: graphTD.ODataErrWithStatus(http.StatusUnauthorized, "err"), + expectNoStack: false, + expectIs: core.ErrInsufficientAuthorization, + }, + { + name: "403", + err: graphTD.ODataErrWithStatus(http.StatusForbidden, "err"), + expectNoStack: false, + expectIs: core.ErrInsufficientAuthorization, + }, + { + name: "429", + err: graphTD.ODataErrWithStatus(http.StatusTooManyRequests, "err"), + expectNoStack: false, + expectIs: core.ErrApplicationThrottled, + }, + { + name: "500", + err: graphTD.ODataErrWithStatus(http.StatusInternalServerError, "err"), + expectNoStack: false, + expectIs: core.ErrDownstreamServerError, + }, + { + name: "501", + err: graphTD.ODataErrWithStatus(http.StatusNotImplemented, "err"), + expectNoStack: false, + expectIs: core.ErrDownstreamServerError, + }, + { + name: "502", + err: graphTD.ODataErrWithStatus(http.StatusBadGateway, "err"), + expectNoStack: false, + expectIs: core.ErrDownstreamServerError, + }, + { + name: "503", + err: graphTD.ODataErrWithStatus(http.StatusServiceUnavailable, "err"), + expectNoStack: false, + expectIs: core.ErrDownstreamServerError, + }, + { + name: "504", + err: graphTD.ODataErrWithStatus(http.StatusGatewayTimeout, "err"), + expectNoStack: false, + expectIs: core.ErrDownstreamServerError, + }, + } + for _, test := range table { + suite.Run(test.name, func() { + t := suite.T() + + err := toErrByRespCode(parseODataErr(test.err), test.err) + + if test.expectNoStack { + assert.Equal(t, test.err, err) + } else { + assert.ErrorIs(t, err, test.expectIs) + } + }) + } +}