From 4852667468fdbff4425ed4bb1d3c40ce53a0a8b7 Mon Sep 17 00:00:00 2001
From: Keepers <ryanfkeepers@gmail.com>
Date: Wed, 25 Jan 2023 15:00:23 -0700
Subject: [PATCH] Add fault pkg and errors struct (#2236)

## Does this PR need a docs update or release note?

- [x] :no_entry: No

## Type of change

- [x] :broom: Tech Debt/Cleanup

## Issue(s)

* #1970

## Test Plan

- [x] :zap: Unit test
---
 src/pkg/fault/example_fault_test.go | 291 ++++++++++++++++++++++++++++
 src/pkg/fault/fault.go              | 127 ++++++++++++
 src/pkg/fault/fault_test.go         | 202 +++++++++++++++++++
 3 files changed, 620 insertions(+)
 create mode 100644 src/pkg/fault/example_fault_test.go
 create mode 100644 src/pkg/fault/fault.go
 create mode 100644 src/pkg/fault/fault_test.go

diff --git a/src/pkg/fault/example_fault_test.go b/src/pkg/fault/example_fault_test.go
new file mode 100644
index 000000000..4ad5945d2
--- /dev/null
+++ b/src/pkg/fault/example_fault_test.go
@@ -0,0 +1,291 @@
+package fault_test
+
+import (
+	"fmt"
+
+	"github.com/pkg/errors"
+
+	"github.com/alcionai/corso/src/pkg/fault"
+)
+
+// ---------------------------------------------------------------------------
+// mock helpers
+// ---------------------------------------------------------------------------
+
+var (
+	ctrl  any
+	items = []string{}
+)
+
+type mockController struct {
+	errors any
+}
+
+func connectClient() error              { return nil }
+func dependencyCall() error             { return nil }
+func getIthItem(i string) error         { return nil }
+func getData() ([]string, error)        { return nil, nil }
+func storeData([]string, *fault.Errors) {}
+
+type mockOper struct {
+	Errors *fault.Errors
+}
+
+func newOperation() mockOper          { return mockOper{fault.New(true)} }
+func (m mockOper) Run() *fault.Errors { return m.Errors }
+
+// ---------------------------------------------------------------------------
+// examples
+// ---------------------------------------------------------------------------
+
+// ExampleNewErrors highlights assumptions and best practices
+// for generating Errors structs.
+func Example_new() {
+	// Errors should only be generated during the construction of
+	// another controller, such as a new Backup or Restore Operations.
+	// Configurations like failFast are set during construction.
+	//
+	// Generating new fault.Errors structs outside of an operation
+	// controller is a smell, and should be avoided.  If you need
+	// to aggregate errors, you should accept an interface and pass
+	// an Errors instance into it.
+	ctrl = mockController{
+		errors: fault.New(false),
+	}
+}
+
+// ExampleErrorsFail describes the assumptions and best practices
+// for setting the Failure error.
+func Example_errors_Fail() {
+	errs := fault.New(false)
+
+	// Fail() should be used to record any error that highlights a
+	// non-recoverable failure in a process.
+	//
+	// Fail() should only get called in the last step before returning
+	// a fault.Errors from a controller.  In all other cases, you
+	// should simply return an error and expect the upstream controller
+	// to call Fail() for you.
+	if err := connectClient(); err != nil {
+		// normally, you'd want to
+		// return errs.Fail(err)
+		errs.Fail(err)
+	}
+
+	// Only the topmost handler of the error should set the Fail() err.
+	// This will normally be the operation controller itself.
+	// IE: Fail() is not Wrap().  In lower levels, errors should get
+	// wrapped and returned like normal, and only handled by errors
+	// at the end.
+	lowLevelCall := func() error {
+		if err := dependencyCall(); err != nil {
+			// wrap here, deeper into the stack
+			return errors.Wrap(err, "dependency")
+		}
+
+		return nil
+	}
+
+	if err := lowLevelCall(); err != nil {
+		// fail here, at the top of the stack
+		errs.Fail(err)
+	}
+}
+
+// ExampleErrorsAdd describes the assumptions and best practices
+// for aggregating iterable or recoverable errors.
+func Example_errors_Add() {
+	errs := fault.New(false)
+
+	// Add() should be used to record any error in a recoverable
+	// part of processing.
+	//
+	// Add() should only get called in the last step in handling an
+	// error within a loop or stream that does not otherwise return
+	// an error.  In all other cases, you should simply return an error
+	// and expect the upstream point of iteration to call Add() for you.
+	for _, i := range items {
+		if err := getIthItem(i); err != nil {
+			errs.Add(err)
+		}
+	}
+
+	// In case of failFast behavior, iteration should exit as soon
+	// as an error occurs.  Errors does not expose the failFast flag
+	// directly.  Instead, iterators should check the value of Err().
+	// If it is non-nil, then the loop shold break.
+	for _, i := range items {
+		if errs.Err() != nil {
+			break
+		}
+
+		errs.Add(getIthItem(i))
+	}
+
+	// Only the topmost handler of the error should Add() the err.
+	// This will normally be the iteration loop itself.
+	// IE: Add() is not Wrap().  In lower levels, errors should get
+	// wrapped and returned like normally, and only added to the
+	// errors at the end.
+	clientBasedGetter := func(s string) error {
+		if err := dependencyCall(); err != nil {
+			// wrap here, deeper into the stack
+			return errors.Wrap(err, "dependency")
+		}
+
+		return nil
+	}
+
+	for _, i := range items {
+		if err := clientBasedGetter(i); err != nil {
+			// add here, within the iteraton loop
+			errs.Add(err)
+		}
+	}
+}
+
+// ExampleErrorsErr describes retrieving the non-recoverable error.
+func Example_errors_Err() {
+	errs := fault.New(false)
+	errs.Fail(errors.New("catastrophe"))
+
+	// Err() gets the primary failure error.
+	err := errs.Err()
+	fmt.Println(err)
+
+	// if multiple Failures occur, each one after the first gets
+	// added to the Errs slice.
+	errs.Fail(errors.New("another catastrophe"))
+	errSl := errs.Errs()
+
+	for _, e := range errSl {
+		fmt.Println(e)
+	}
+
+	// If Err() is nil, then you can assume the operation completed.
+	// A complete operation is not necessarily an error-free operation.
+	//
+	// Even if Err() is nil, Errs() can be non-empty.
+	// Make sure you check both.
+
+	errs = fault.New(true)
+
+	// If failFast is set to true, then the first error Add()ed gets
+	// promoted to the Err() position.
+
+	errs.Add(errors.New("not catastrophic, but still becomes the Err()"))
+	err = errs.Err()
+	fmt.Println(err)
+
+	// Output: catastrophe
+	// another catastrophe
+	// not catastrophic, but still becomes the Err()
+}
+
+// ExampleErrorsErrs describes retrieving individual errors.
+func Example_errors_Errs() {
+	errs := fault.New(false)
+	errs.Add(errors.New("not catastrophic"))
+	errs.Add(errors.New("something unwanted"))
+
+	// Errs() gets the slice errors that were recorded, but were
+	// considered recoverable.
+	errSl := errs.Errs()
+	for _, err := range errSl {
+		fmt.Println(err)
+	}
+
+	// Errs() only needs to be investigated by the end user at the
+	// conclusion of an operation.  Checking Errs() within lower-
+	// layer code is a smell.  Funcs should return an error if they
+	// need upstream handlers to recognize failure states.
+	//
+	// If Errs() is nil, then you can assume that no recoverable or
+	// iteration-based errors occurred.  But that does not necessarily
+	// mean the operation was able to complete.
+	//
+	// Even if Errs() contains zero items, Err() can be non-nil.
+	// Make sure you check both.
+
+	// Output: not catastrophic
+	// something unwanted
+}
+
+// ExampleErrorsE2e showcases a more complex integration.
+func Example_errors_e2e() {
+	oper := newOperation()
+
+	// imagine that we're a user, calling into corso SDK.
+	// (fake funcs used here to minimize example bloat)
+	//
+	// The operation is our controller, we expect it to
+	// generate a new fault.Errors when constructed, and
+	// to return that struct when we call Run()
+	errs := oper.Run()
+
+	// Let's investigate what went on inside.  Since we're at
+	// the top of our controller, and returning a fault.Errors,
+	// all the error handlers set the Fail() case.
+	/* Run() */
+	func() *fault.Errors {
+		if err := connectClient(); err != nil {
+			// Fail() here; we're top level in the controller
+			// and this is a non-recoverable issue
+			return oper.Errors.Fail(err)
+		}
+
+		data, err := getData()
+		if err != nil {
+			return oper.Errors.Fail(err)
+		}
+
+		// storeData will aggregate iterated errors into
+		// oper.Errors.
+		storeData(data, oper.Errors)
+
+		// return oper.Errors here, in part to ensure it's
+		// non-nil, and because we don't know if we've
+		// aggregated any iterated errors yet.
+		return oper.Errors
+	}()
+
+	// What about the lower level handling?  storeData didn't
+	// return an error, so what's happening there?
+	/* storeData */
+	func(data []any, errs *fault.Errors) {
+		// this is downstream in our code somewhere
+		storer := func(a any) error {
+			if err := dependencyCall(); err != nil {
+				// we're not passing in or calling fault.Errors here,
+				// because this isn't the iteration handler, it's just
+				// a regular error.
+				return errors.Wrap(err, "dependency")
+			}
+
+			return nil
+		}
+
+		for _, d := range data {
+			if errs.Err() != nil {
+				break
+			}
+
+			if err := storer(d); err != nil {
+				// Since we're at the top of the iteration, we need
+				// to add each error to the fault.Errors struct.
+				errs.Add(err)
+			}
+		}
+	}(nil, nil)
+
+	// then at the end of the oper.Run, we investigate the results.
+	if errs.Err() != nil {
+		// handle the primary error
+		fmt.Println("err occurred", errs.Err())
+	}
+
+	for _, err := range errs.Errs() {
+		// handle each recoverable error
+		fmt.Println("recoverable err occurred", err)
+	}
+}
diff --git a/src/pkg/fault/fault.go b/src/pkg/fault/fault.go
new file mode 100644
index 000000000..f4171ce77
--- /dev/null
+++ b/src/pkg/fault/fault.go
@@ -0,0 +1,127 @@
+package fault
+
+import (
+	"sync"
+
+	"golang.org/x/exp/slices"
+)
+
+type Errors struct {
+	mu *sync.Mutex
+
+	// err identifies non-recoverable errors.  This includes
+	// non-start cases (ex: cannot connect to client), hard-
+	// stop issues (ex: credentials expired) or conscious exit
+	// cases (ex: iteration error + failFast config).
+	err error
+
+	// errs is the accumulation of recoverable or iterated
+	// errors.  Eg: if a process is retrieving N items, and
+	// 1 of the items fails to be retrieved, but the rest of
+	// them succeed, we'd expect to see 1 error added to this
+	// slice.
+	errs []error
+
+	// if failFast is true, the first errs addition will
+	// get promoted to the err value.  This signifies a
+	// non-recoverable processing state, causing any running
+	// processes to exit.
+	failFast bool
+}
+
+// ErrorsData provides the errors data alone, without sync
+// controls, allowing the data to be persisted.
+type ErrorsData struct {
+	Err      error   `json:"err"`
+	Errs     []error `json:"errs"`
+	FailFast bool    `json:"failFast"`
+}
+
+// New constructs a new error with default values in place.
+func New(failFast bool) *Errors {
+	return &Errors{
+		mu:       &sync.Mutex{},
+		errs:     []error{},
+		failFast: failFast,
+	}
+}
+
+// Err returns the primary error.  If not nil, this
+// indicates the operation exited prior to completion.
+func (e *Errors) Err() error {
+	return e.err
+}
+
+// Errs returns the slice of recoverable and
+// iterated errors.
+func (e *Errors) Errs() []error {
+	return e.errs
+}
+
+// Data returns the plain set of error data
+// without any sync properties.
+func (e *Errors) Data() ErrorsData {
+	return ErrorsData{
+		Err:      e.err,
+		Errs:     slices.Clone(e.errs),
+		FailFast: e.failFast,
+	}
+}
+
+// TODO: introduce Failer interface
+
+// Fail sets the non-recoverable error (ie: errors.err)
+// in the errors struct.  If a non-recoverable error is
+// already present, the error gets added to the errs slice.
+func (e *Errors) Fail(err error) *Errors {
+	if err == nil {
+		return e
+	}
+
+	e.mu.Lock()
+	defer e.mu.Unlock()
+
+	return e.setErr(err)
+}
+
+// setErr handles setting errors.err.  Sync locking gets
+// handled upstream of this call.
+func (e *Errors) setErr(err error) *Errors {
+	if e.err != nil {
+		return e.addErr(err)
+	}
+
+	e.err = err
+
+	return e
+}
+
+// TODO: introduce Adder interface
+
+// Add appends the error to the slice of recoverable and
+// iterated errors (ie: errors.errs).  If failFast is true,
+// the first Added error will get copied to errors.err,
+// causing the errors struct to identify as non-recoverably
+// failed.
+func (e *Errors) Add(err error) *Errors {
+	if err == nil {
+		return e
+	}
+
+	e.mu.Lock()
+	defer e.mu.Unlock()
+
+	return e.addErr(err)
+}
+
+// addErr handles adding errors to errors.errs.  Sync locking
+// gets handled upstream of this call.
+func (e *Errors) addErr(err error) *Errors {
+	if e.err == nil && e.failFast {
+		e.setErr(err)
+	}
+
+	e.errs = append(e.errs, err)
+
+	return e
+}
diff --git a/src/pkg/fault/fault_test.go b/src/pkg/fault/fault_test.go
new file mode 100644
index 000000000..3f5ad127c
--- /dev/null
+++ b/src/pkg/fault/fault_test.go
@@ -0,0 +1,202 @@
+package fault_test
+
+import (
+	"errors"
+	"testing"
+
+	"github.com/stretchr/testify/assert"
+	"github.com/stretchr/testify/require"
+	"github.com/stretchr/testify/suite"
+
+	"github.com/alcionai/corso/src/pkg/fault"
+)
+
+type FaultErrorsUnitSuite struct {
+	suite.Suite
+}
+
+func TestFaultErrorsUnitSuite(t *testing.T) {
+	suite.Run(t, new(FaultErrorsUnitSuite))
+}
+
+func (suite *FaultErrorsUnitSuite) TestNew() {
+	t := suite.T()
+
+	n := fault.New(false)
+	assert.NotNil(t, n)
+
+	n = fault.New(true)
+	assert.NotNil(t, n)
+}
+
+func (suite *FaultErrorsUnitSuite) TestErr() {
+	table := []struct {
+		name     string
+		failFast bool
+		fail     error
+		add      error
+		expect   assert.ErrorAssertionFunc
+	}{
+		{
+			name:   "nil",
+			expect: assert.NoError,
+		},
+		{
+			name:     "nil, failFast",
+			failFast: true,
+			expect:   assert.NoError,
+		},
+		{
+			name:   "failed",
+			fail:   assert.AnError,
+			expect: assert.Error,
+		},
+		{
+			name:     "failed, failFast",
+			fail:     assert.AnError,
+			failFast: true,
+			expect:   assert.Error,
+		},
+		{
+			name:   "added",
+			add:    assert.AnError,
+			expect: assert.NoError,
+		},
+		{
+			name:     "added, failFast",
+			add:      assert.AnError,
+			failFast: true,
+			expect:   assert.Error,
+		},
+	}
+	for _, test := range table {
+		suite.T().Run(test.name, func(t *testing.T) {
+			n := fault.New(test.failFast)
+			require.NotNil(t, n)
+
+			e := n.Fail(test.fail)
+			require.NotNil(t, e)
+
+			e = n.Add(test.add)
+			require.NotNil(t, e)
+
+			test.expect(t, n.Err())
+		})
+	}
+}
+
+func (suite *FaultErrorsUnitSuite) TestFail() {
+	t := suite.T()
+
+	n := fault.New(false)
+	require.NotNil(t, n)
+
+	n.Fail(assert.AnError)
+	assert.Error(t, n.Err())
+	assert.Empty(t, n.Errs())
+
+	n.Fail(assert.AnError)
+	assert.Error(t, n.Err())
+	assert.NotEmpty(t, n.Errs())
+}
+
+func (suite *FaultErrorsUnitSuite) TestErrs() {
+	table := []struct {
+		name     string
+		failFast bool
+		fail     error
+		add      error
+		expect   assert.ValueAssertionFunc
+	}{
+		{
+			name:   "nil",
+			expect: assert.Empty,
+		},
+		{
+			name:     "nil, failFast",
+			failFast: true,
+			expect:   assert.Empty,
+		},
+		{
+			name:   "failed",
+			fail:   assert.AnError,
+			expect: assert.Empty,
+		},
+		{
+			name:     "failed, failFast",
+			fail:     assert.AnError,
+			failFast: true,
+			expect:   assert.Empty,
+		},
+		{
+			name:   "added",
+			add:    assert.AnError,
+			expect: assert.NotEmpty,
+		},
+		{
+			name:     "added, failFast",
+			add:      assert.AnError,
+			failFast: true,
+			expect:   assert.NotEmpty,
+		},
+	}
+	for _, test := range table {
+		suite.T().Run(test.name, func(t *testing.T) {
+			n := fault.New(test.failFast)
+			require.NotNil(t, n)
+
+			e := n.Fail(test.fail)
+			require.NotNil(t, e)
+
+			e = n.Add(test.add)
+			require.NotNil(t, e)
+
+			test.expect(t, n.Errs())
+		})
+	}
+}
+
+func (suite *FaultErrorsUnitSuite) TestAdd() {
+	t := suite.T()
+
+	n := fault.New(true)
+	require.NotNil(t, n)
+
+	n.Add(assert.AnError)
+	assert.Error(t, n.Err())
+	assert.Len(t, n.Errs(), 1)
+
+	n.Add(assert.AnError)
+	assert.Error(t, n.Err())
+	assert.Len(t, n.Errs(), 2)
+}
+
+func (suite *FaultErrorsUnitSuite) TestData() {
+	t := suite.T()
+
+	// not fail-fast
+	n := fault.New(false)
+	require.NotNil(t, n)
+
+	n.Fail(errors.New("fail"))
+	n.Add(errors.New("1"))
+	n.Add(errors.New("2"))
+
+	d := n.Data()
+	assert.Equal(t, n.Err(), d.Err)
+	assert.ElementsMatch(t, n.Errs(), d.Errs)
+	assert.False(t, d.FailFast)
+
+	// fail-fast
+	n = fault.New(true)
+	require.NotNil(t, n)
+
+	n.Fail(errors.New("fail"))
+	n.Add(errors.New("1"))
+	n.Add(errors.New("2"))
+
+	d = n.Data()
+	assert.Equal(t, n.Err(), d.Err)
+	assert.ElementsMatch(t, n.Errs(), d.Errs)
+	assert.True(t, d.FailFast)
+}