introduce fail-after-recovery option (#3005)

While fail-fast and best-effort make for good categories at the extreme ends of error handling, we keep finding outselves wanting to operate in a middle ground. This change introduces a new error handling category: FailAfterRecovery. This option tells corso to complete as much of its process as it can, even if it recovers from errors. But at the end of processing, if it recovered from any errors, an error is returned for the operation. This behavior is the new failure handling default, instead of failFast. --- #### Does this PR need a docs update or release note? - [x] ⛔ No #### Type of change - [x] 🌻 Feature #### Test Plan - [x] 💪 Manual - [x] ⚡ Unit test
2023-03-31 18:43:52 -06:00 · 2023-03-31 18:43:52 -06:00 · f2bf0ee685
commit f2bf0ee685
parent 983aaabdb2
10 changed files with 167 additions and 15 deletions
--- a/src/cli/options/options.go
+++ b/src/cli/options/options.go
@ -11,7 +11,10 @@ import (
 func Control() control.Options {
 	opt := control.Defaults()

-	opt.FailFast = fastFail
+	if fastFail {
+		opt.FailureHandling = control.FailFast
+	}
+
 	opt.DisableMetrics = noStats
 	opt.RestorePermissions = restorePermissions
 	opt.SkipReduce = skipReduce
--- a/src/internal/connector/exchange/service_iterators_test.go
+++ b/src/internal/connector/exchange/service_iterators_test.go
@ -162,7 +162,7 @@ func (suite *ServiceIteratorsSuite) TestFilterContainersAndFillCollections() {
 		getter                mockGetter
 		resolver              graph.ContainerResolver
 		scope                 selectors.ExchangeScope
-		failFast              bool
+		failFast              control.FailureBehavior
 		expectErr             assert.ErrorAssertionFunc
 		expectNewColls        int
 		expectMetadataColls   int
@ -271,7 +271,7 @@ func (suite *ServiceIteratorsSuite) TestFilterContainersAndFillCollections() {
 			},
 			resolver:              newMockResolver(container1, container2),
 			scope:                 allScope,
-			failFast:              true,
+			failFast:              control.FailFast,
 			expectErr:             assert.NoError,
 			expectNewColls:        2,
 			expectMetadataColls:   1,
@ -285,7 +285,7 @@ func (suite *ServiceIteratorsSuite) TestFilterContainersAndFillCollections() {
 			},
 			resolver:            newMockResolver(container1, container2),
 			scope:               allScope,
-			failFast:            true,
+			failFast:            control.FailFast,
 			expectErr:           assert.Error,
 			expectNewColls:      0,
 			expectMetadataColls: 0,
@ -309,8 +309,8 @@ func (suite *ServiceIteratorsSuite) TestFilterContainersAndFillCollections() {
 				test.resolver,
 				test.scope,
 				dps,
-				control.Options{FailFast: test.failFast},
-				fault.New(test.failFast))
+				control.Options{FailureHandling: test.failFast},
+				fault.New(test.failFast == control.FailFast))
 			test.expectErr(t, err, clues.ToCore(err))

 			// collection assertions
@ -465,7 +465,7 @@ func (suite *ServiceIteratorsSuite) TestFilterContainersAndFillCollections_repea
 				resolver,
 				allScope,
 				dps,
-				control.Options{FailFast: true},
+				control.Options{FailureHandling: control.FailFast},
 				fault.New(true))
 			require.NoError(t, err, clues.ToCore(err))

--- a/src/internal/operations/backup.go
+++ b/src/internal/operations/backup.go
@ -180,6 +180,7 @@ func (op *BackupOperation) Run(ctx context.Context) (err error) {
 		op.Errors.Fail(clues.Wrap(err, "running backup"))
 	}

+	finalizeErrorHandling(ctx, op.Options, op.Errors, "running backup")
 	LogFaultErrors(ctx, op.Errors.Errors(), "running backup")

 	// -----
--- a/src/internal/operations/helpers.go
+++ b/src/internal/operations/helpers.go
@ -2,11 +2,45 @@ package operations

 import (
 	"context"
+	"fmt"

+	"github.com/alcionai/clues"
+
+	"github.com/alcionai/corso/src/pkg/control"
 	"github.com/alcionai/corso/src/pkg/fault"
 	"github.com/alcionai/corso/src/pkg/logger"
 )

+// finalizeErrorHandling ensures the operation follow the options
+// failure behavior requirements.
+func finalizeErrorHandling(
+	ctx context.Context,
+	opts control.Options,
+	errs *fault.Bus,
+	prefix string,
+) {
+	rcvd := errs.Recovered()
+
+	// under certain conditions, there's nothing else left to do
+	if opts.FailureHandling == control.BestEffort ||
+		errs.Failure() != nil ||
+		len(rcvd) == 0 {
+		return
+	}
+
+	if opts.FailureHandling == control.FailAfterRecovery {
+		msg := fmt.Sprintf("%s: partial success: %d errors occurred", prefix, len(rcvd))
+		logger.Ctx(ctx).Error(msg)
+
+		if len(rcvd) == 1 {
+			errs.Fail(rcvd[0])
+			return
+		}
+
+		errs.Fail(clues.New(msg))
+	}
+}
+
 // LogFaultErrors is a helper function that logs all entries in the Errors struct.
 func LogFaultErrors(ctx context.Context, fe *fault.Errors, prefix string) {
 	if fe == nil {
--- a/src/internal/operations/helpers_test.go
+++ b/src/internal/operations/helpers_test.go
@ -0,0 +1,102 @@
+package operations
+
+import (
+	"testing"
+
+	"github.com/stretchr/testify/assert"
+	"github.com/stretchr/testify/suite"
+
+	"github.com/alcionai/corso/src/internal/tester"
+	"github.com/alcionai/corso/src/pkg/control"
+	"github.com/alcionai/corso/src/pkg/fault"
+)
+
+type HelpersUnitSuite struct {
+	tester.Suite
+}
+
+func TestHelpersUnitSuite(t *testing.T) {
+	suite.Run(t, &HelpersUnitSuite{Suite: tester.NewUnitSuite(t)})
+}
+
+func (suite *HelpersUnitSuite) TestFinalizeErrorHandling() {
+	table := []struct {
+		name      string
+		errs      func() *fault.Bus
+		opts      control.Options
+		expectErr assert.ErrorAssertionFunc
+	}{
+		{
+			name: "no errors",
+			errs: func() *fault.Bus {
+				return fault.New(false)
+			},
+			opts: control.Options{
+				FailureHandling: control.FailAfterRecovery,
+			},
+			expectErr: assert.NoError,
+		},
+		{
+			name: "already failed",
+			errs: func() *fault.Bus {
+				fn := fault.New(false)
+				fn.Fail(assert.AnError)
+				return fn
+			},
+			opts: control.Options{
+				FailureHandling: control.FailAfterRecovery,
+			},
+			expectErr: assert.Error,
+		},
+		{
+			name: "best effort",
+			errs: func() *fault.Bus {
+				fn := fault.New(false)
+				fn.AddRecoverable(assert.AnError)
+				return fn
+			},
+			opts: control.Options{
+				FailureHandling: control.BestEffort,
+			},
+			expectErr: assert.NoError,
+		},
+		{
+			name: "recoverable errors produce hard fail",
+			errs: func() *fault.Bus {
+				fn := fault.New(false)
+				fn.AddRecoverable(assert.AnError)
+				return fn
+			},
+			opts: control.Options{
+				FailureHandling: control.FailAfterRecovery,
+			},
+			expectErr: assert.Error,
+		},
+		{
+			name: "multiple recoverable errors produce hard fail",
+			errs: func() *fault.Bus {
+				fn := fault.New(false)
+				fn.AddRecoverable(assert.AnError)
+				fn.AddRecoverable(assert.AnError)
+				fn.AddRecoverable(assert.AnError)
+				return fn
+			},
+			opts: control.Options{
+				FailureHandling: control.FailAfterRecovery,
+			},
+			expectErr: assert.Error,
+		},
+	}
+	for _, test := range table {
+		suite.Run(test.name, func() {
+			ctx, flush := tester.NewContext()
+			defer flush()
+
+			t := suite.T()
+			errs := test.errs()
+
+			finalizeErrorHandling(ctx, test.opts, errs, "test")
+			test.expectErr(t, errs.Failure())
+		})
+	}
+}
--- a/src/internal/operations/operation.go
+++ b/src/internal/operations/operation.go
@ -66,7 +66,7 @@ func newOperation(
 ) operation {
 	return operation{
 		CreatedAt: time.Now(),
-		Errors:    fault.New(opts.FailFast),
+		Errors:    fault.New(opts.FailureHandling == control.FailFast),
 		Options:   opts,

 		bus:   bus,
--- a/src/internal/operations/restore.go
+++ b/src/internal/operations/restore.go
@ -152,6 +152,7 @@ func (op *RestoreOperation) Run(ctx context.Context) (restoreDetails *details.De
 		op.Errors.Fail(clues.Wrap(err, "running restore"))
 	}

+	finalizeErrorHandling(ctx, op.Options, op.Errors, "running restore")
 	LogFaultErrors(ctx, op.Errors.Errors(), "running restore")

 	// -----
--- a/src/internal/operations/restore_test.go
+++ b/src/internal/operations/restore_test.go
@ -437,7 +437,7 @@ func (suite *RestoreOpIntegrationSuite) TestRestore_Run() {

 			ro, err := NewRestoreOperation(
 				ctx,
-				control.Options{FailFast: true},
+				control.Options{FailureHandling: control.FailFast},
 				suite.kw,
 				suite.sw,
 				bup.gc,
--- a/src/pkg/control/options.go
+++ b/src/pkg/control/options.go
@ -8,17 +8,28 @@ import (
 type Options struct {
 	Collision            CollisionPolicy `json:"-"`
 	DisableMetrics       bool            `json:"disableMetrics"`
-	FailFast             bool            `json:"failFast"`
+	FailureHandling      FailureBehavior `json:"failureHandling"`
+	ItemFetchParallelism int             `json:"itemFetchParallelism"`
 	RestorePermissions   bool            `json:"restorePermissions"`
 	SkipReduce           bool            `json:"skipReduce"`
-	ItemFetchParallelism int             `json:"itemFetchParallelism"`
 	ToggleFeatures       Toggles         `json:"ToggleFeatures"`
 }

+type FailureBehavior string
+
+const (
+	// fails and exits the run immediately
+	FailFast FailureBehavior = "fail-fast"
+	// recovers whenever possible, reports non-zero recoveries as a failure
+	FailAfterRecovery FailureBehavior = "fail-after-recovery"
+	// recovers whenever possible, does not report recovery as failure
+	BestEffort FailureBehavior = "best-effort"
+)
+
 // Defaults provides an Options with the default values set.
 func Defaults() Options {
 	return Options{
-		FailFast:       true,
+		FailureHandling: FailAfterRecovery,
 		ToggleFeatures:  Toggles{},
 	}
 }
--- a/src/pkg/repository/loadtest/repository_load_test.go
+++ b/src/pkg/repository/loadtest/repository_load_test.go
@ -95,7 +95,7 @@ func initM365Repo(t *testing.T) (
 	ac := tester.NewM365Account(t)
 	opts := control.Options{
 		DisableMetrics:  true,
-		FailFast:       true,
+		FailureHandling: control.FailFast,
 	}

 	repo, err := repository.Initialize(ctx, ac, st, opts)