From f2bf0ee685a311bf3baa753807824416dfc98e5f Mon Sep 17 00:00:00 2001 From: Keepers Date: Fri, 31 Mar 2023 18:43:52 -0600 Subject: [PATCH] introduce fail-after-recovery option (#3005) While fail-fast and best-effort make for good categories at the extreme ends of error handling, we keep finding outselves wanting to operate in a middle ground. This change introduces a new error handling category: FailAfterRecovery. This option tells corso to complete as much of its process as it can, even if it recovers from errors. But at the end of processing, if it recovered from any errors, an error is returned for the operation. This behavior is the new failure handling default, instead of failFast. --- #### Does this PR need a docs update or release note? - [x] :no_entry: No #### Type of change - [x] :sunflower: Feature #### Test Plan - [x] :muscle: Manual - [x] :zap: Unit test --- src/cli/options/options.go | 5 +- .../exchange/service_iterators_test.go | 12 +-- src/internal/operations/backup.go | 1 + .../operations/{logging.go => helpers.go} | 34 ++++++ src/internal/operations/helpers_test.go | 102 ++++++++++++++++++ src/internal/operations/operation.go | 2 +- src/internal/operations/restore.go | 1 + src/internal/operations/restore_test.go | 2 +- src/pkg/control/options.go | 19 +++- .../loadtest/repository_load_test.go | 4 +- 10 files changed, 167 insertions(+), 15 deletions(-) rename src/internal/operations/{logging.go => helpers.go} (57%) create mode 100644 src/internal/operations/helpers_test.go diff --git a/src/cli/options/options.go b/src/cli/options/options.go index d9bdd08c1..30cbdbf8a 100644 --- a/src/cli/options/options.go +++ b/src/cli/options/options.go @@ -11,7 +11,10 @@ import ( func Control() control.Options { opt := control.Defaults() - opt.FailFast = fastFail + if fastFail { + opt.FailureHandling = control.FailFast + } + opt.DisableMetrics = noStats opt.RestorePermissions = restorePermissions opt.SkipReduce = skipReduce diff --git a/src/internal/connector/exchange/service_iterators_test.go b/src/internal/connector/exchange/service_iterators_test.go index f14601b8c..9d72dc181 100644 --- a/src/internal/connector/exchange/service_iterators_test.go +++ b/src/internal/connector/exchange/service_iterators_test.go @@ -162,7 +162,7 @@ func (suite *ServiceIteratorsSuite) TestFilterContainersAndFillCollections() { getter mockGetter resolver graph.ContainerResolver scope selectors.ExchangeScope - failFast bool + failFast control.FailureBehavior expectErr assert.ErrorAssertionFunc expectNewColls int expectMetadataColls int @@ -271,7 +271,7 @@ func (suite *ServiceIteratorsSuite) TestFilterContainersAndFillCollections() { }, resolver: newMockResolver(container1, container2), scope: allScope, - failFast: true, + failFast: control.FailFast, expectErr: assert.NoError, expectNewColls: 2, expectMetadataColls: 1, @@ -285,7 +285,7 @@ func (suite *ServiceIteratorsSuite) TestFilterContainersAndFillCollections() { }, resolver: newMockResolver(container1, container2), scope: allScope, - failFast: true, + failFast: control.FailFast, expectErr: assert.Error, expectNewColls: 0, expectMetadataColls: 0, @@ -309,8 +309,8 @@ func (suite *ServiceIteratorsSuite) TestFilterContainersAndFillCollections() { test.resolver, test.scope, dps, - control.Options{FailFast: test.failFast}, - fault.New(test.failFast)) + control.Options{FailureHandling: test.failFast}, + fault.New(test.failFast == control.FailFast)) test.expectErr(t, err, clues.ToCore(err)) // collection assertions @@ -465,7 +465,7 @@ func (suite *ServiceIteratorsSuite) TestFilterContainersAndFillCollections_repea resolver, allScope, dps, - control.Options{FailFast: true}, + control.Options{FailureHandling: control.FailFast}, fault.New(true)) require.NoError(t, err, clues.ToCore(err)) diff --git a/src/internal/operations/backup.go b/src/internal/operations/backup.go index 8139d04ad..5f6259355 100644 --- a/src/internal/operations/backup.go +++ b/src/internal/operations/backup.go @@ -180,6 +180,7 @@ func (op *BackupOperation) Run(ctx context.Context) (err error) { op.Errors.Fail(clues.Wrap(err, "running backup")) } + finalizeErrorHandling(ctx, op.Options, op.Errors, "running backup") LogFaultErrors(ctx, op.Errors.Errors(), "running backup") // ----- diff --git a/src/internal/operations/logging.go b/src/internal/operations/helpers.go similarity index 57% rename from src/internal/operations/logging.go rename to src/internal/operations/helpers.go index e0e0cd51d..8cfbe66a0 100644 --- a/src/internal/operations/logging.go +++ b/src/internal/operations/helpers.go @@ -2,11 +2,45 @@ package operations import ( "context" + "fmt" + "github.com/alcionai/clues" + + "github.com/alcionai/corso/src/pkg/control" "github.com/alcionai/corso/src/pkg/fault" "github.com/alcionai/corso/src/pkg/logger" ) +// finalizeErrorHandling ensures the operation follow the options +// failure behavior requirements. +func finalizeErrorHandling( + ctx context.Context, + opts control.Options, + errs *fault.Bus, + prefix string, +) { + rcvd := errs.Recovered() + + // under certain conditions, there's nothing else left to do + if opts.FailureHandling == control.BestEffort || + errs.Failure() != nil || + len(rcvd) == 0 { + return + } + + if opts.FailureHandling == control.FailAfterRecovery { + msg := fmt.Sprintf("%s: partial success: %d errors occurred", prefix, len(rcvd)) + logger.Ctx(ctx).Error(msg) + + if len(rcvd) == 1 { + errs.Fail(rcvd[0]) + return + } + + errs.Fail(clues.New(msg)) + } +} + // LogFaultErrors is a helper function that logs all entries in the Errors struct. func LogFaultErrors(ctx context.Context, fe *fault.Errors, prefix string) { if fe == nil { diff --git a/src/internal/operations/helpers_test.go b/src/internal/operations/helpers_test.go new file mode 100644 index 000000000..c02f2131c --- /dev/null +++ b/src/internal/operations/helpers_test.go @@ -0,0 +1,102 @@ +package operations + +import ( + "testing" + + "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/suite" + + "github.com/alcionai/corso/src/internal/tester" + "github.com/alcionai/corso/src/pkg/control" + "github.com/alcionai/corso/src/pkg/fault" +) + +type HelpersUnitSuite struct { + tester.Suite +} + +func TestHelpersUnitSuite(t *testing.T) { + suite.Run(t, &HelpersUnitSuite{Suite: tester.NewUnitSuite(t)}) +} + +func (suite *HelpersUnitSuite) TestFinalizeErrorHandling() { + table := []struct { + name string + errs func() *fault.Bus + opts control.Options + expectErr assert.ErrorAssertionFunc + }{ + { + name: "no errors", + errs: func() *fault.Bus { + return fault.New(false) + }, + opts: control.Options{ + FailureHandling: control.FailAfterRecovery, + }, + expectErr: assert.NoError, + }, + { + name: "already failed", + errs: func() *fault.Bus { + fn := fault.New(false) + fn.Fail(assert.AnError) + return fn + }, + opts: control.Options{ + FailureHandling: control.FailAfterRecovery, + }, + expectErr: assert.Error, + }, + { + name: "best effort", + errs: func() *fault.Bus { + fn := fault.New(false) + fn.AddRecoverable(assert.AnError) + return fn + }, + opts: control.Options{ + FailureHandling: control.BestEffort, + }, + expectErr: assert.NoError, + }, + { + name: "recoverable errors produce hard fail", + errs: func() *fault.Bus { + fn := fault.New(false) + fn.AddRecoverable(assert.AnError) + return fn + }, + opts: control.Options{ + FailureHandling: control.FailAfterRecovery, + }, + expectErr: assert.Error, + }, + { + name: "multiple recoverable errors produce hard fail", + errs: func() *fault.Bus { + fn := fault.New(false) + fn.AddRecoverable(assert.AnError) + fn.AddRecoverable(assert.AnError) + fn.AddRecoverable(assert.AnError) + return fn + }, + opts: control.Options{ + FailureHandling: control.FailAfterRecovery, + }, + expectErr: assert.Error, + }, + } + for _, test := range table { + suite.Run(test.name, func() { + ctx, flush := tester.NewContext() + defer flush() + + t := suite.T() + errs := test.errs() + + finalizeErrorHandling(ctx, test.opts, errs, "test") + test.expectErr(t, errs.Failure()) + }) + } +} diff --git a/src/internal/operations/operation.go b/src/internal/operations/operation.go index e4cd201db..5144c4abb 100644 --- a/src/internal/operations/operation.go +++ b/src/internal/operations/operation.go @@ -66,7 +66,7 @@ func newOperation( ) operation { return operation{ CreatedAt: time.Now(), - Errors: fault.New(opts.FailFast), + Errors: fault.New(opts.FailureHandling == control.FailFast), Options: opts, bus: bus, diff --git a/src/internal/operations/restore.go b/src/internal/operations/restore.go index 4e45aad4c..1220cfe66 100644 --- a/src/internal/operations/restore.go +++ b/src/internal/operations/restore.go @@ -152,6 +152,7 @@ func (op *RestoreOperation) Run(ctx context.Context) (restoreDetails *details.De op.Errors.Fail(clues.Wrap(err, "running restore")) } + finalizeErrorHandling(ctx, op.Options, op.Errors, "running restore") LogFaultErrors(ctx, op.Errors.Errors(), "running restore") // ----- diff --git a/src/internal/operations/restore_test.go b/src/internal/operations/restore_test.go index 4344e08ee..33ac4a881 100644 --- a/src/internal/operations/restore_test.go +++ b/src/internal/operations/restore_test.go @@ -437,7 +437,7 @@ func (suite *RestoreOpIntegrationSuite) TestRestore_Run() { ro, err := NewRestoreOperation( ctx, - control.Options{FailFast: true}, + control.Options{FailureHandling: control.FailFast}, suite.kw, suite.sw, bup.gc, diff --git a/src/pkg/control/options.go b/src/pkg/control/options.go index f57ae492d..0bc119f52 100644 --- a/src/pkg/control/options.go +++ b/src/pkg/control/options.go @@ -8,18 +8,29 @@ import ( type Options struct { Collision CollisionPolicy `json:"-"` DisableMetrics bool `json:"disableMetrics"` - FailFast bool `json:"failFast"` + FailureHandling FailureBehavior `json:"failureHandling"` + ItemFetchParallelism int `json:"itemFetchParallelism"` RestorePermissions bool `json:"restorePermissions"` SkipReduce bool `json:"skipReduce"` - ItemFetchParallelism int `json:"itemFetchParallelism"` ToggleFeatures Toggles `json:"ToggleFeatures"` } +type FailureBehavior string + +const ( + // fails and exits the run immediately + FailFast FailureBehavior = "fail-fast" + // recovers whenever possible, reports non-zero recoveries as a failure + FailAfterRecovery FailureBehavior = "fail-after-recovery" + // recovers whenever possible, does not report recovery as failure + BestEffort FailureBehavior = "best-effort" +) + // Defaults provides an Options with the default values set. func Defaults() Options { return Options{ - FailFast: true, - ToggleFeatures: Toggles{}, + FailureHandling: FailAfterRecovery, + ToggleFeatures: Toggles{}, } } diff --git a/src/pkg/repository/loadtest/repository_load_test.go b/src/pkg/repository/loadtest/repository_load_test.go index 72093a27f..19d67fca8 100644 --- a/src/pkg/repository/loadtest/repository_load_test.go +++ b/src/pkg/repository/loadtest/repository_load_test.go @@ -94,8 +94,8 @@ func initM365Repo(t *testing.T) ( st := tester.NewPrefixedS3Storage(t) ac := tester.NewM365Account(t) opts := control.Options{ - DisableMetrics: true, - FailFast: true, + DisableMetrics: true, + FailureHandling: control.FailFast, } repo, err := repository.Initialize(ctx, ac, st, opts)