introduce fail-after-recovery option (#3005)
While fail-fast and best-effort make for good categories at the extreme ends of error handling, we keep finding outselves wanting to operate in a middle ground. This change introduces a new error handling category: FailAfterRecovery. This option tells corso to complete as much of its process as it can, even if it recovers from errors. But at the end of processing, if it recovered from any errors, an error is returned for the operation. This behavior is the new failure handling default, instead of failFast. --- #### Does this PR need a docs update or release note? - [x] ⛔ No #### Type of change - [x] 🌻 Feature #### Test Plan - [x] 💪 Manual - [x] ⚡ Unit test
This commit is contained in:
parent
983aaabdb2
commit
f2bf0ee685
@ -11,7 +11,10 @@ import (
|
||||
func Control() control.Options {
|
||||
opt := control.Defaults()
|
||||
|
||||
opt.FailFast = fastFail
|
||||
if fastFail {
|
||||
opt.FailureHandling = control.FailFast
|
||||
}
|
||||
|
||||
opt.DisableMetrics = noStats
|
||||
opt.RestorePermissions = restorePermissions
|
||||
opt.SkipReduce = skipReduce
|
||||
|
||||
@ -162,7 +162,7 @@ func (suite *ServiceIteratorsSuite) TestFilterContainersAndFillCollections() {
|
||||
getter mockGetter
|
||||
resolver graph.ContainerResolver
|
||||
scope selectors.ExchangeScope
|
||||
failFast bool
|
||||
failFast control.FailureBehavior
|
||||
expectErr assert.ErrorAssertionFunc
|
||||
expectNewColls int
|
||||
expectMetadataColls int
|
||||
@ -271,7 +271,7 @@ func (suite *ServiceIteratorsSuite) TestFilterContainersAndFillCollections() {
|
||||
},
|
||||
resolver: newMockResolver(container1, container2),
|
||||
scope: allScope,
|
||||
failFast: true,
|
||||
failFast: control.FailFast,
|
||||
expectErr: assert.NoError,
|
||||
expectNewColls: 2,
|
||||
expectMetadataColls: 1,
|
||||
@ -285,7 +285,7 @@ func (suite *ServiceIteratorsSuite) TestFilterContainersAndFillCollections() {
|
||||
},
|
||||
resolver: newMockResolver(container1, container2),
|
||||
scope: allScope,
|
||||
failFast: true,
|
||||
failFast: control.FailFast,
|
||||
expectErr: assert.Error,
|
||||
expectNewColls: 0,
|
||||
expectMetadataColls: 0,
|
||||
@ -309,8 +309,8 @@ func (suite *ServiceIteratorsSuite) TestFilterContainersAndFillCollections() {
|
||||
test.resolver,
|
||||
test.scope,
|
||||
dps,
|
||||
control.Options{FailFast: test.failFast},
|
||||
fault.New(test.failFast))
|
||||
control.Options{FailureHandling: test.failFast},
|
||||
fault.New(test.failFast == control.FailFast))
|
||||
test.expectErr(t, err, clues.ToCore(err))
|
||||
|
||||
// collection assertions
|
||||
@ -465,7 +465,7 @@ func (suite *ServiceIteratorsSuite) TestFilterContainersAndFillCollections_repea
|
||||
resolver,
|
||||
allScope,
|
||||
dps,
|
||||
control.Options{FailFast: true},
|
||||
control.Options{FailureHandling: control.FailFast},
|
||||
fault.New(true))
|
||||
require.NoError(t, err, clues.ToCore(err))
|
||||
|
||||
|
||||
@ -180,6 +180,7 @@ func (op *BackupOperation) Run(ctx context.Context) (err error) {
|
||||
op.Errors.Fail(clues.Wrap(err, "running backup"))
|
||||
}
|
||||
|
||||
finalizeErrorHandling(ctx, op.Options, op.Errors, "running backup")
|
||||
LogFaultErrors(ctx, op.Errors.Errors(), "running backup")
|
||||
|
||||
// -----
|
||||
|
||||
@ -2,11 +2,45 @@ package operations
|
||||
|
||||
import (
|
||||
"context"
|
||||
"fmt"
|
||||
|
||||
"github.com/alcionai/clues"
|
||||
|
||||
"github.com/alcionai/corso/src/pkg/control"
|
||||
"github.com/alcionai/corso/src/pkg/fault"
|
||||
"github.com/alcionai/corso/src/pkg/logger"
|
||||
)
|
||||
|
||||
// finalizeErrorHandling ensures the operation follow the options
|
||||
// failure behavior requirements.
|
||||
func finalizeErrorHandling(
|
||||
ctx context.Context,
|
||||
opts control.Options,
|
||||
errs *fault.Bus,
|
||||
prefix string,
|
||||
) {
|
||||
rcvd := errs.Recovered()
|
||||
|
||||
// under certain conditions, there's nothing else left to do
|
||||
if opts.FailureHandling == control.BestEffort ||
|
||||
errs.Failure() != nil ||
|
||||
len(rcvd) == 0 {
|
||||
return
|
||||
}
|
||||
|
||||
if opts.FailureHandling == control.FailAfterRecovery {
|
||||
msg := fmt.Sprintf("%s: partial success: %d errors occurred", prefix, len(rcvd))
|
||||
logger.Ctx(ctx).Error(msg)
|
||||
|
||||
if len(rcvd) == 1 {
|
||||
errs.Fail(rcvd[0])
|
||||
return
|
||||
}
|
||||
|
||||
errs.Fail(clues.New(msg))
|
||||
}
|
||||
}
|
||||
|
||||
// LogFaultErrors is a helper function that logs all entries in the Errors struct.
|
||||
func LogFaultErrors(ctx context.Context, fe *fault.Errors, prefix string) {
|
||||
if fe == nil {
|
||||
102
src/internal/operations/helpers_test.go
Normal file
102
src/internal/operations/helpers_test.go
Normal file
@ -0,0 +1,102 @@
|
||||
package operations
|
||||
|
||||
import (
|
||||
"testing"
|
||||
|
||||
"github.com/stretchr/testify/assert"
|
||||
"github.com/stretchr/testify/suite"
|
||||
|
||||
"github.com/alcionai/corso/src/internal/tester"
|
||||
"github.com/alcionai/corso/src/pkg/control"
|
||||
"github.com/alcionai/corso/src/pkg/fault"
|
||||
)
|
||||
|
||||
type HelpersUnitSuite struct {
|
||||
tester.Suite
|
||||
}
|
||||
|
||||
func TestHelpersUnitSuite(t *testing.T) {
|
||||
suite.Run(t, &HelpersUnitSuite{Suite: tester.NewUnitSuite(t)})
|
||||
}
|
||||
|
||||
func (suite *HelpersUnitSuite) TestFinalizeErrorHandling() {
|
||||
table := []struct {
|
||||
name string
|
||||
errs func() *fault.Bus
|
||||
opts control.Options
|
||||
expectErr assert.ErrorAssertionFunc
|
||||
}{
|
||||
{
|
||||
name: "no errors",
|
||||
errs: func() *fault.Bus {
|
||||
return fault.New(false)
|
||||
},
|
||||
opts: control.Options{
|
||||
FailureHandling: control.FailAfterRecovery,
|
||||
},
|
||||
expectErr: assert.NoError,
|
||||
},
|
||||
{
|
||||
name: "already failed",
|
||||
errs: func() *fault.Bus {
|
||||
fn := fault.New(false)
|
||||
fn.Fail(assert.AnError)
|
||||
return fn
|
||||
},
|
||||
opts: control.Options{
|
||||
FailureHandling: control.FailAfterRecovery,
|
||||
},
|
||||
expectErr: assert.Error,
|
||||
},
|
||||
{
|
||||
name: "best effort",
|
||||
errs: func() *fault.Bus {
|
||||
fn := fault.New(false)
|
||||
fn.AddRecoverable(assert.AnError)
|
||||
return fn
|
||||
},
|
||||
opts: control.Options{
|
||||
FailureHandling: control.BestEffort,
|
||||
},
|
||||
expectErr: assert.NoError,
|
||||
},
|
||||
{
|
||||
name: "recoverable errors produce hard fail",
|
||||
errs: func() *fault.Bus {
|
||||
fn := fault.New(false)
|
||||
fn.AddRecoverable(assert.AnError)
|
||||
return fn
|
||||
},
|
||||
opts: control.Options{
|
||||
FailureHandling: control.FailAfterRecovery,
|
||||
},
|
||||
expectErr: assert.Error,
|
||||
},
|
||||
{
|
||||
name: "multiple recoverable errors produce hard fail",
|
||||
errs: func() *fault.Bus {
|
||||
fn := fault.New(false)
|
||||
fn.AddRecoverable(assert.AnError)
|
||||
fn.AddRecoverable(assert.AnError)
|
||||
fn.AddRecoverable(assert.AnError)
|
||||
return fn
|
||||
},
|
||||
opts: control.Options{
|
||||
FailureHandling: control.FailAfterRecovery,
|
||||
},
|
||||
expectErr: assert.Error,
|
||||
},
|
||||
}
|
||||
for _, test := range table {
|
||||
suite.Run(test.name, func() {
|
||||
ctx, flush := tester.NewContext()
|
||||
defer flush()
|
||||
|
||||
t := suite.T()
|
||||
errs := test.errs()
|
||||
|
||||
finalizeErrorHandling(ctx, test.opts, errs, "test")
|
||||
test.expectErr(t, errs.Failure())
|
||||
})
|
||||
}
|
||||
}
|
||||
@ -66,7 +66,7 @@ func newOperation(
|
||||
) operation {
|
||||
return operation{
|
||||
CreatedAt: time.Now(),
|
||||
Errors: fault.New(opts.FailFast),
|
||||
Errors: fault.New(opts.FailureHandling == control.FailFast),
|
||||
Options: opts,
|
||||
|
||||
bus: bus,
|
||||
|
||||
@ -152,6 +152,7 @@ func (op *RestoreOperation) Run(ctx context.Context) (restoreDetails *details.De
|
||||
op.Errors.Fail(clues.Wrap(err, "running restore"))
|
||||
}
|
||||
|
||||
finalizeErrorHandling(ctx, op.Options, op.Errors, "running restore")
|
||||
LogFaultErrors(ctx, op.Errors.Errors(), "running restore")
|
||||
|
||||
// -----
|
||||
|
||||
@ -437,7 +437,7 @@ func (suite *RestoreOpIntegrationSuite) TestRestore_Run() {
|
||||
|
||||
ro, err := NewRestoreOperation(
|
||||
ctx,
|
||||
control.Options{FailFast: true},
|
||||
control.Options{FailureHandling: control.FailFast},
|
||||
suite.kw,
|
||||
suite.sw,
|
||||
bup.gc,
|
||||
|
||||
@ -8,17 +8,28 @@ import (
|
||||
type Options struct {
|
||||
Collision CollisionPolicy `json:"-"`
|
||||
DisableMetrics bool `json:"disableMetrics"`
|
||||
FailFast bool `json:"failFast"`
|
||||
FailureHandling FailureBehavior `json:"failureHandling"`
|
||||
ItemFetchParallelism int `json:"itemFetchParallelism"`
|
||||
RestorePermissions bool `json:"restorePermissions"`
|
||||
SkipReduce bool `json:"skipReduce"`
|
||||
ItemFetchParallelism int `json:"itemFetchParallelism"`
|
||||
ToggleFeatures Toggles `json:"ToggleFeatures"`
|
||||
}
|
||||
|
||||
type FailureBehavior string
|
||||
|
||||
const (
|
||||
// fails and exits the run immediately
|
||||
FailFast FailureBehavior = "fail-fast"
|
||||
// recovers whenever possible, reports non-zero recoveries as a failure
|
||||
FailAfterRecovery FailureBehavior = "fail-after-recovery"
|
||||
// recovers whenever possible, does not report recovery as failure
|
||||
BestEffort FailureBehavior = "best-effort"
|
||||
)
|
||||
|
||||
// Defaults provides an Options with the default values set.
|
||||
func Defaults() Options {
|
||||
return Options{
|
||||
FailFast: true,
|
||||
FailureHandling: FailAfterRecovery,
|
||||
ToggleFeatures: Toggles{},
|
||||
}
|
||||
}
|
||||
|
||||
@ -95,7 +95,7 @@ func initM365Repo(t *testing.T) (
|
||||
ac := tester.NewM365Account(t)
|
||||
opts := control.Options{
|
||||
DisableMetrics: true,
|
||||
FailFast: true,
|
||||
FailureHandling: control.FailFast,
|
||||
}
|
||||
|
||||
repo, err := repository.Initialize(ctx, ac, st, opts)
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user