introduce fail-after-recovery option (#3005)

While fail-fast and best-effort make for good
categories at the extreme ends of error handling, 
we keep finding outselves wanting to operate
in a middle ground.  This change introduces a new
error handling category: FailAfterRecovery.  This
option tells corso to complete as much of its
process as it can, even if it recovers from errors.
But at the end of processing, if it recovered from
any errors, an error is returned for the operation.

This behavior is the new failure handling default,
instead of failFast.

---

#### Does this PR need a docs update or release note?

- [x]  No

#### Type of change

- [x] 🌻 Feature

#### Test Plan

- [x] 💪 Manual
- [x]  Unit test
This commit is contained in:
Keepers 2023-03-31 18:43:52 -06:00 committed by GitHub
parent 983aaabdb2
commit f2bf0ee685
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
10 changed files with 167 additions and 15 deletions

View File

@ -11,7 +11,10 @@ import (
func Control() control.Options {
opt := control.Defaults()
opt.FailFast = fastFail
if fastFail {
opt.FailureHandling = control.FailFast
}
opt.DisableMetrics = noStats
opt.RestorePermissions = restorePermissions
opt.SkipReduce = skipReduce

View File

@ -162,7 +162,7 @@ func (suite *ServiceIteratorsSuite) TestFilterContainersAndFillCollections() {
getter mockGetter
resolver graph.ContainerResolver
scope selectors.ExchangeScope
failFast bool
failFast control.FailureBehavior
expectErr assert.ErrorAssertionFunc
expectNewColls int
expectMetadataColls int
@ -271,7 +271,7 @@ func (suite *ServiceIteratorsSuite) TestFilterContainersAndFillCollections() {
},
resolver: newMockResolver(container1, container2),
scope: allScope,
failFast: true,
failFast: control.FailFast,
expectErr: assert.NoError,
expectNewColls: 2,
expectMetadataColls: 1,
@ -285,7 +285,7 @@ func (suite *ServiceIteratorsSuite) TestFilterContainersAndFillCollections() {
},
resolver: newMockResolver(container1, container2),
scope: allScope,
failFast: true,
failFast: control.FailFast,
expectErr: assert.Error,
expectNewColls: 0,
expectMetadataColls: 0,
@ -309,8 +309,8 @@ func (suite *ServiceIteratorsSuite) TestFilterContainersAndFillCollections() {
test.resolver,
test.scope,
dps,
control.Options{FailFast: test.failFast},
fault.New(test.failFast))
control.Options{FailureHandling: test.failFast},
fault.New(test.failFast == control.FailFast))
test.expectErr(t, err, clues.ToCore(err))
// collection assertions
@ -465,7 +465,7 @@ func (suite *ServiceIteratorsSuite) TestFilterContainersAndFillCollections_repea
resolver,
allScope,
dps,
control.Options{FailFast: true},
control.Options{FailureHandling: control.FailFast},
fault.New(true))
require.NoError(t, err, clues.ToCore(err))

View File

@ -180,6 +180,7 @@ func (op *BackupOperation) Run(ctx context.Context) (err error) {
op.Errors.Fail(clues.Wrap(err, "running backup"))
}
finalizeErrorHandling(ctx, op.Options, op.Errors, "running backup")
LogFaultErrors(ctx, op.Errors.Errors(), "running backup")
// -----

View File

@ -2,11 +2,45 @@ package operations
import (
"context"
"fmt"
"github.com/alcionai/clues"
"github.com/alcionai/corso/src/pkg/control"
"github.com/alcionai/corso/src/pkg/fault"
"github.com/alcionai/corso/src/pkg/logger"
)
// finalizeErrorHandling ensures the operation follow the options
// failure behavior requirements.
func finalizeErrorHandling(
ctx context.Context,
opts control.Options,
errs *fault.Bus,
prefix string,
) {
rcvd := errs.Recovered()
// under certain conditions, there's nothing else left to do
if opts.FailureHandling == control.BestEffort ||
errs.Failure() != nil ||
len(rcvd) == 0 {
return
}
if opts.FailureHandling == control.FailAfterRecovery {
msg := fmt.Sprintf("%s: partial success: %d errors occurred", prefix, len(rcvd))
logger.Ctx(ctx).Error(msg)
if len(rcvd) == 1 {
errs.Fail(rcvd[0])
return
}
errs.Fail(clues.New(msg))
}
}
// LogFaultErrors is a helper function that logs all entries in the Errors struct.
func LogFaultErrors(ctx context.Context, fe *fault.Errors, prefix string) {
if fe == nil {

View File

@ -0,0 +1,102 @@
package operations
import (
"testing"
"github.com/stretchr/testify/assert"
"github.com/stretchr/testify/suite"
"github.com/alcionai/corso/src/internal/tester"
"github.com/alcionai/corso/src/pkg/control"
"github.com/alcionai/corso/src/pkg/fault"
)
type HelpersUnitSuite struct {
tester.Suite
}
func TestHelpersUnitSuite(t *testing.T) {
suite.Run(t, &HelpersUnitSuite{Suite: tester.NewUnitSuite(t)})
}
func (suite *HelpersUnitSuite) TestFinalizeErrorHandling() {
table := []struct {
name string
errs func() *fault.Bus
opts control.Options
expectErr assert.ErrorAssertionFunc
}{
{
name: "no errors",
errs: func() *fault.Bus {
return fault.New(false)
},
opts: control.Options{
FailureHandling: control.FailAfterRecovery,
},
expectErr: assert.NoError,
},
{
name: "already failed",
errs: func() *fault.Bus {
fn := fault.New(false)
fn.Fail(assert.AnError)
return fn
},
opts: control.Options{
FailureHandling: control.FailAfterRecovery,
},
expectErr: assert.Error,
},
{
name: "best effort",
errs: func() *fault.Bus {
fn := fault.New(false)
fn.AddRecoverable(assert.AnError)
return fn
},
opts: control.Options{
FailureHandling: control.BestEffort,
},
expectErr: assert.NoError,
},
{
name: "recoverable errors produce hard fail",
errs: func() *fault.Bus {
fn := fault.New(false)
fn.AddRecoverable(assert.AnError)
return fn
},
opts: control.Options{
FailureHandling: control.FailAfterRecovery,
},
expectErr: assert.Error,
},
{
name: "multiple recoverable errors produce hard fail",
errs: func() *fault.Bus {
fn := fault.New(false)
fn.AddRecoverable(assert.AnError)
fn.AddRecoverable(assert.AnError)
fn.AddRecoverable(assert.AnError)
return fn
},
opts: control.Options{
FailureHandling: control.FailAfterRecovery,
},
expectErr: assert.Error,
},
}
for _, test := range table {
suite.Run(test.name, func() {
ctx, flush := tester.NewContext()
defer flush()
t := suite.T()
errs := test.errs()
finalizeErrorHandling(ctx, test.opts, errs, "test")
test.expectErr(t, errs.Failure())
})
}
}

View File

@ -66,7 +66,7 @@ func newOperation(
) operation {
return operation{
CreatedAt: time.Now(),
Errors: fault.New(opts.FailFast),
Errors: fault.New(opts.FailureHandling == control.FailFast),
Options: opts,
bus: bus,

View File

@ -152,6 +152,7 @@ func (op *RestoreOperation) Run(ctx context.Context) (restoreDetails *details.De
op.Errors.Fail(clues.Wrap(err, "running restore"))
}
finalizeErrorHandling(ctx, op.Options, op.Errors, "running restore")
LogFaultErrors(ctx, op.Errors.Errors(), "running restore")
// -----

View File

@ -437,7 +437,7 @@ func (suite *RestoreOpIntegrationSuite) TestRestore_Run() {
ro, err := NewRestoreOperation(
ctx,
control.Options{FailFast: true},
control.Options{FailureHandling: control.FailFast},
suite.kw,
suite.sw,
bup.gc,

View File

@ -8,17 +8,28 @@ import (
type Options struct {
Collision CollisionPolicy `json:"-"`
DisableMetrics bool `json:"disableMetrics"`
FailFast bool `json:"failFast"`
FailureHandling FailureBehavior `json:"failureHandling"`
ItemFetchParallelism int `json:"itemFetchParallelism"`
RestorePermissions bool `json:"restorePermissions"`
SkipReduce bool `json:"skipReduce"`
ItemFetchParallelism int `json:"itemFetchParallelism"`
ToggleFeatures Toggles `json:"ToggleFeatures"`
}
type FailureBehavior string
const (
// fails and exits the run immediately
FailFast FailureBehavior = "fail-fast"
// recovers whenever possible, reports non-zero recoveries as a failure
FailAfterRecovery FailureBehavior = "fail-after-recovery"
// recovers whenever possible, does not report recovery as failure
BestEffort FailureBehavior = "best-effort"
)
// Defaults provides an Options with the default values set.
func Defaults() Options {
return Options{
FailFast: true,
FailureHandling: FailAfterRecovery,
ToggleFeatures: Toggles{},
}
}

View File

@ -95,7 +95,7 @@ func initM365Repo(t *testing.T) (
ac := tester.NewM365Account(t)
opts := control.Options{
DisableMetrics: true,
FailFast: true,
FailureHandling: control.FailFast,
}
repo, err := repository.Initialize(ctx, ac, st, opts)