diff --git a/src/internal/m365/collection/exchange/backup.go b/src/internal/m365/collection/exchange/backup.go index d191428dc..445577a0a 100644 --- a/src/internal/m365/collection/exchange/backup.go +++ b/src/internal/m365/collection/exchange/backup.go @@ -296,6 +296,7 @@ func populateCollections( cl), qp.ProtectedResource.ID(), bh.itemHandler(), + bh, addAndRem.Added, addAndRem.Removed, // TODO: produce a feature flag that allows selective diff --git a/src/internal/m365/collection/exchange/backup_test.go b/src/internal/m365/collection/exchange/backup_test.go index 9cc953c24..b02abdf05 100644 --- a/src/internal/m365/collection/exchange/backup_test.go +++ b/src/internal/m365/collection/exchange/backup_test.go @@ -88,6 +88,14 @@ func (bh mockBackupHandler) folderGetter() containerGetter { return func (bh mockBackupHandler) previewIncludeContainers() []string { return bh.previewIncludes } func (bh mockBackupHandler) previewExcludeContainers() []string { return bh.previewExcludes } +func (bh mockBackupHandler) CanSkipItemFailure( + err error, + resourceID string, + opts control.Options, +) (fault.SkipCause, bool) { + return "", false +} + func (bh mockBackupHandler) NewContainerCache( userID string, ) (string, graph.ContainerResolver) { diff --git a/src/internal/m365/collection/exchange/collection.go b/src/internal/m365/collection/exchange/collection.go index efb6f7e94..fcb733b6e 100644 --- a/src/internal/m365/collection/exchange/collection.go +++ b/src/internal/m365/collection/exchange/collection.go @@ -19,6 +19,7 @@ import ( "github.com/alcionai/corso/src/internal/m365/support" "github.com/alcionai/corso/src/internal/observe" "github.com/alcionai/corso/src/pkg/backup/details" + "github.com/alcionai/corso/src/pkg/control" "github.com/alcionai/corso/src/pkg/count" "github.com/alcionai/corso/src/pkg/errs/core" "github.com/alcionai/corso/src/pkg/fault" @@ -68,21 +69,21 @@ func getItemAndInfo( ctx context.Context, getter itemGetterSerializer, userID string, - id string, + itemID string, useImmutableIDs bool, parentPath string, ) ([]byte, *details.ExchangeInfo, error) { item, info, err := getter.GetItem( ctx, userID, - id, + itemID, fault.New(true)) // temporary way to force a failFast error if err != nil { return nil, nil, clues.WrapWC(ctx, err, "fetching item"). Label(fault.LabelForceNoBackupCreation) } - itemData, err := getter.Serialize(ctx, item, userID, id) + itemData, err := getter.Serialize(ctx, item, userID, itemID) if err != nil { return nil, nil, clues.WrapWC(ctx, err, "serializing item") } @@ -108,6 +109,7 @@ func NewCollection( bc data.BaseCollection, user string, items itemGetterSerializer, + canSkipFailChecker canSkipItemFailurer, origAdded map[string]time.Time, origRemoved []string, validModTimes bool, @@ -140,6 +142,7 @@ func NewCollection( added: added, removed: removed, getter: items, + skipChecker: canSkipFailChecker, statusUpdater: statusUpdater, } } @@ -150,6 +153,7 @@ func NewCollection( added: added, removed: removed, getter: items, + skipChecker: canSkipFailChecker, statusUpdater: statusUpdater, counter: counter, } @@ -167,7 +171,8 @@ type prefetchCollection struct { // removed is a list of item IDs that were deleted from, or moved out, of a container removed map[string]struct{} - getter itemGetterSerializer + getter itemGetterSerializer + skipChecker canSkipItemFailurer statusUpdater support.StatusUpdater } @@ -194,11 +199,12 @@ func (col *prefetchCollection) streamItems( wg sync.WaitGroup progressMessage chan<- struct{} user = col.user + dataCategory = col.Category().String() ) ctx = clues.Add( ctx, - "category", col.Category().String()) + "category", dataCategory) defer func() { close(stream) @@ -227,7 +233,7 @@ func (col *prefetchCollection) streamItems( defer close(semaphoreCh) // delete all removed items - for id := range col.removed { + for itemID := range col.removed { semaphoreCh <- struct{}{} wg.Add(1) @@ -247,7 +253,7 @@ func (col *prefetchCollection) streamItems( if progressMessage != nil { progressMessage <- struct{}{} } - }(id) + }(itemID) } var ( @@ -256,7 +262,7 @@ func (col *prefetchCollection) streamItems( ) // add any new items - for id := range col.added { + for itemID := range col.added { if el.Failure() != nil { break } @@ -277,8 +283,23 @@ func (col *prefetchCollection) streamItems( col.Opts().ToggleFeatures.ExchangeImmutableIDs, parentPath) if err != nil { + // pulled outside the switch due to multiple return values. + cause, canSkip := col.skipChecker.CanSkipItemFailure( + err, + user, + col.Opts()) + // Handle known error cases switch { + case canSkip: + // this is a special case handler that allows the item to be skipped + // instead of producing an error. + errs.AddSkip(ctx, fault.FileSkip( + cause, + dataCategory, + id, + id, + nil)) case errors.Is(err, core.ErrNotFound): // Don't report errors for deleted items as there's no way for us to // back up data that is gone. Record it as a "success", since there's @@ -349,7 +370,7 @@ func (col *prefetchCollection) streamItems( if progressMessage != nil { progressMessage <- struct{}{} } - }(id) + }(itemID) } wg.Wait() @@ -377,7 +398,8 @@ type lazyFetchCollection struct { // removed is a list of item IDs that were deleted from, or moved out, of a container removed map[string]struct{} - getter itemGetterSerializer + getter itemGetterSerializer + skipChecker canSkipItemFailurer statusUpdater support.StatusUpdater @@ -404,8 +426,8 @@ func (col *lazyFetchCollection) streamItems( var ( success int64 progressMessage chan<- struct{} - - user = col.user + user = col.user + el = errs.Local() ) defer func() { @@ -417,7 +439,7 @@ func (col *lazyFetchCollection) streamItems( int(success), 0, col.FullPath().Folder(false), - errs.Failure()) + el.Failure()) }() if len(col.added)+len(col.removed) > 0 { @@ -443,7 +465,7 @@ func (col *lazyFetchCollection) streamItems( // add any new items for id, modTime := range col.added { - if errs.Failure() != nil { + if el.Failure() != nil { break } @@ -459,15 +481,18 @@ func (col *lazyFetchCollection) streamItems( &lazyItemGetter{ userID: user, itemID: id, + category: col.Category(), getter: col.getter, modTime: modTime, immutableIDs: col.Opts().ToggleFeatures.ExchangeImmutableIDs, parentPath: parentPath, + skipChecker: col.skipChecker, + opts: col.Opts(), }, id, modTime, col.counter, - errs) + el) atomic.AddInt64(&success, 1) @@ -481,9 +506,12 @@ type lazyItemGetter struct { getter itemGetterSerializer userID string itemID string + category path.CategoryType parentPath string modTime time.Time immutableIDs bool + skipChecker canSkipItemFailurer + opts control.Options } func (lig *lazyItemGetter) GetData( @@ -498,6 +526,25 @@ func (lig *lazyItemGetter) GetData( lig.immutableIDs, lig.parentPath) if err != nil { + if lig.skipChecker != nil { + cause, canSkip := lig.skipChecker.CanSkipItemFailure( + err, + lig.userID, + lig.opts) + if canSkip { + errs.AddSkip(ctx, fault.FileSkip( + cause, + lig.category.String(), + lig.itemID, + lig.itemID, + nil)) + + return nil, nil, false, clues. + NewWC(ctx, "error marked as skippable by handler"). + Label(graph.LabelsSkippable) + } + } + // If an item was deleted then return an empty file so we don't fail // the backup and return a sentinel error when asked for ItemInfo so // we don't display the item in the backup. @@ -512,7 +559,7 @@ func (lig *lazyItemGetter) GetData( err = clues.Stack(err) errs.AddRecoverable(ctx, err) - return nil, nil, false, err + return nil, nil, false, clues.Stack(err) } // Update the mod time to what we already told kopia about. This is required diff --git a/src/internal/m365/collection/exchange/collection_test.go b/src/internal/m365/collection/exchange/collection_test.go index 1ec9e81f3..24cd14da3 100644 --- a/src/internal/m365/collection/exchange/collection_test.go +++ b/src/internal/m365/collection/exchange/collection_test.go @@ -28,6 +28,7 @@ import ( "github.com/alcionai/corso/src/pkg/errs/core" "github.com/alcionai/corso/src/pkg/fault" "github.com/alcionai/corso/src/pkg/path" + "github.com/alcionai/corso/src/pkg/services/m365/api" "github.com/alcionai/corso/src/pkg/services/m365/api/graph" graphTD "github.com/alcionai/corso/src/pkg/services/m365/api/graph/testdata" ) @@ -153,6 +154,7 @@ func (suite *CollectionUnitSuite) TestNewCollection_state() { count.New()), "u", mock.DefaultItemGetSerialize(), + mock.NeverCanSkipFailChecker(), nil, nil, colType.validModTimes, @@ -298,6 +300,7 @@ func (suite *CollectionUnitSuite) TestPrefetchCollection_Items() { count.New()), "", &mock.ItemGetSerialize{}, + mock.NeverCanSkipFailChecker(), test.added, maps.Keys(test.removed), false, @@ -333,6 +336,232 @@ func (suite *CollectionUnitSuite) TestPrefetchCollection_Items() { } } +func (suite *CollectionUnitSuite) TestPrefetchCollection_Items_skipFailure() { + var ( + start = time.Now().Add(-time.Second) + statusUpdater = func(*support.ControllerOperationStatus) {} + ) + + table := []struct { + name string + category path.CategoryType + handler backupHandler + added map[string]time.Time + removed map[string]struct{} + expectItemCount int + expectSkippedCount int + expectErr assert.ErrorAssertionFunc + }{ + { + name: "no items", + category: path.EventsCategory, + handler: newEventBackupHandler(api.Client{}), + expectErr: assert.NoError, + }, + { + name: "events only added items", + category: path.EventsCategory, + handler: newEventBackupHandler(api.Client{}), + added: map[string]time.Time{ + "fisher": {}, + "flannigan": {}, + "fitzbog": {}, + }, + expectItemCount: 0, + expectSkippedCount: 3, + expectErr: assert.NoError, + }, + { + name: "events only removed items", + category: path.EventsCategory, + handler: newEventBackupHandler(api.Client{}), + removed: map[string]struct{}{ + "princess": {}, + "poppy": {}, + "petunia": {}, + }, + expectItemCount: 3, + expectSkippedCount: 0, + expectErr: assert.NoError, + }, + { + name: "events added and removed items", + category: path.EventsCategory, + handler: newEventBackupHandler(api.Client{}), + added: map[string]time.Time{ + "general": {}, + }, + removed: map[string]struct{}{ + "general": {}, + "goose": {}, + "grumbles": {}, + }, + expectItemCount: 3, + // not 1, because general is removed from the added + // map due to being in the removed map + expectSkippedCount: 0, + expectErr: assert.NoError, + }, + { + name: "contacts only added items", + category: path.ContactsCategory, + handler: newContactBackupHandler(api.Client{}), + added: map[string]time.Time{ + "fisher": {}, + "flannigan": {}, + "fitzbog": {}, + }, + expectItemCount: 0, + expectSkippedCount: 0, + expectErr: assert.Error, + }, + { + name: "contacts only removed items", + category: path.ContactsCategory, + handler: newContactBackupHandler(api.Client{}), + removed: map[string]struct{}{ + "princess": {}, + "poppy": {}, + "petunia": {}, + }, + expectItemCount: 3, + expectSkippedCount: 0, + expectErr: assert.NoError, + }, + { + name: "contacts added and removed items", + category: path.ContactsCategory, + handler: newContactBackupHandler(api.Client{}), + added: map[string]time.Time{ + "general": {}, + }, + removed: map[string]struct{}{ + "general": {}, + "goose": {}, + "grumbles": {}, + }, + expectItemCount: 3, + // not 1, because general is removed from the added + // map due to being in the removed map + expectSkippedCount: 0, + expectErr: assert.NoError, + }, + { + name: "mail only added items", + category: path.EmailCategory, + handler: newMailBackupHandler(api.Client{}), + added: map[string]time.Time{ + "fisher": {}, + "flannigan": {}, + "fitzbog": {}, + }, + expectItemCount: 0, + expectSkippedCount: 0, + expectErr: assert.Error, + }, + { + name: "mail only removed items", + category: path.EmailCategory, + handler: newMailBackupHandler(api.Client{}), + removed: map[string]struct{}{ + "princess": {}, + "poppy": {}, + "petunia": {}, + }, + expectItemCount: 3, + expectSkippedCount: 0, + expectErr: assert.NoError, + }, + { + name: "mail added and removed items", + category: path.EmailCategory, + handler: newMailBackupHandler(api.Client{}), + added: map[string]time.Time{ + "general": {}, + }, + removed: map[string]struct{}{ + "general": {}, + "goose": {}, + "grumbles": {}, + }, + expectItemCount: 3, + // not 1, because general is removed from the added + // map due to being in the removed map + expectSkippedCount: 0, + expectErr: assert.NoError, + }, + } + + for _, test := range table { + suite.Run(test.name, func() { + var ( + t = suite.T() + errs = fault.New(true) + itemCount int + ) + + ctx, flush := tester.NewContext(t) + defer flush() + + fullPath, err := path.Build("t", "pr", path.ExchangeService, test.category, false, "fnords", "smarf") + require.NoError(t, err, clues.ToCore(err)) + + locPath, err := path.Build("t", "pr", path.ExchangeService, test.category, false, "fnords", "smarf") + require.NoError(t, err, clues.ToCore(err)) + + opts := control.DefaultOptions() + opts.SkipEventsOnInstance503ForResources = map[string]struct{}{} + opts.SkipEventsOnInstance503ForResources["pr"] = struct{}{} + + col := NewCollection( + data.NewBaseCollection( + fullPath, + nil, + locPath.ToBuilder(), + opts, + false, + count.New()), + "pr", + &mock.ItemGetSerialize{ + SerializeErr: graph.ErrServiceUnavailableEmptyResp, + }, + test.handler, + test.added, + maps.Keys(test.removed), + false, + statusUpdater, + count.New()) + + for item := range col.Items(ctx, errs) { + itemCount++ + + _, rok := test.removed[item.ID()] + if rok { + dimt, ok := item.(data.ItemModTime) + require.True(t, ok, "item implements data.ItemModTime") + assert.True(t, dimt.ModTime().After(start), "deleted items should set mod time to now()") + assert.True(t, item.Deleted(), "removals should be marked as deleted") + } + + _, aok := test.added[item.ID()] + if !rok && aok { + assert.False(t, item.Deleted(), "additions should not be marked as deleted") + } + + assert.True(t, aok || rok, "item must be either added or removed: %q", item.ID()) + } + + test.expectErr(t, errs.Failure()) + assert.Equal( + t, + test.expectItemCount, + itemCount, + "should see all expected items") + assert.Len(t, errs.Skipped(), test.expectSkippedCount) + }) + } +} + // This test verifies skipped error cases are handled correctly by collection enumeration func (suite *CollectionUnitSuite) TestCollection_SkippedErrors() { var ( @@ -398,6 +627,7 @@ func (suite *CollectionUnitSuite) TestCollection_SkippedErrors() { count.New()), "", test.itemGetter, + mock.NeverCanSkipFailChecker(), test.added, nil, false, @@ -478,6 +708,7 @@ func (suite *CollectionUnitSuite) TestLazyFetchCollection_Items_LazyFetch() { expectItemCount: 3, expectReads: []string{ "fisher", + "flannigan", "fitzbog", }, }, @@ -530,6 +761,7 @@ func (suite *CollectionUnitSuite) TestLazyFetchCollection_Items_LazyFetch() { count.New()), "", mlg, + mock.NeverCanSkipFailChecker(), test.added, maps.Keys(test.removed), true, @@ -541,10 +773,10 @@ func (suite *CollectionUnitSuite) TestLazyFetchCollection_Items_LazyFetch() { _, rok := test.removed[item.ID()] if rok { - assert.True(t, item.Deleted(), "removals should be marked as deleted") dimt, ok := item.(data.ItemModTime) require.True(t, ok, "item implements data.ItemModTime") assert.True(t, dimt.ModTime().After(start), "deleted items should set mod time to now()") + assert.True(t, item.Deleted(), "removals should be marked as deleted") } modTime, aok := test.added[item.ID()] @@ -553,7 +785,6 @@ func (suite *CollectionUnitSuite) TestLazyFetchCollection_Items_LazyFetch() { // initializer. assert.Implements(t, (*data.ItemModTime)(nil), item) assert.Equal(t, modTime, item.(data.ItemModTime).ModTime(), "item mod time") - assert.False(t, item.Deleted(), "additions should not be marked as deleted") // Check if the test want's us to read the item's data so the lazy @@ -573,6 +804,8 @@ func (suite *CollectionUnitSuite) TestLazyFetchCollection_Items_LazyFetch() { // collection initializer. assert.NoError(t, err, clues.ToCore(err)) assert.Equal(t, modTime, info.Modified(), "ItemInfo mod time") + } else { + assert.Fail(t, "unexpected read on item %s", item.ID()) } } @@ -589,6 +822,294 @@ func (suite *CollectionUnitSuite) TestLazyFetchCollection_Items_LazyFetch() { } } +func (suite *CollectionUnitSuite) TestLazyFetchCollection_Items_skipFailure() { + var ( + start = time.Now().Add(-time.Second) + statusUpdater = func(*support.ControllerOperationStatus) {} + expectSkip = func(t *testing.T, err error) { + assert.Error(t, err, clues.ToCore(err)) + assert.ErrorContains(t, err, "skip") + assert.True(t, clues.HasLabel(err, graph.LabelsSkippable), clues.ToCore(err)) + } + expectNotSkipped = func(t *testing.T, err error) { + assert.Error(t, err, clues.ToCore(err)) + assert.NotContains(t, err.Error(), "skip") + } + ) + + table := []struct { + name string + added map[string]time.Time + removed map[string]struct{} + category path.CategoryType + handler backupHandler + expectItemCount int + expectSkippedCount int + expectReads []string + expectErr func(t *testing.T, err error) + expectFailure assert.ErrorAssertionFunc + }{ + { + name: "no items", + category: path.EventsCategory, + handler: newEventBackupHandler(api.Client{}), + expectFailure: assert.NoError, + }, + { + name: "events only added items", + category: path.EventsCategory, + handler: newEventBackupHandler(api.Client{}), + added: map[string]time.Time{ + "fisher": start.Add(time.Minute), + "flannigan": start.Add(2 * time.Minute), + "fitzbog": start.Add(3 * time.Minute), + }, + expectItemCount: 3, + expectSkippedCount: 3, + expectReads: []string{ + "fisher", + "flannigan", + "fitzbog", + }, + expectErr: expectSkip, + expectFailure: assert.NoError, + }, + { + name: "events only removed items", + category: path.EventsCategory, + handler: newEventBackupHandler(api.Client{}), + removed: map[string]struct{}{ + "princess": {}, + "poppy": {}, + "petunia": {}, + }, + expectItemCount: 3, + expectSkippedCount: 0, + expectErr: expectSkip, + expectFailure: assert.NoError, + }, + { + name: "events added and removed items", + category: path.EventsCategory, + handler: newEventBackupHandler(api.Client{}), + added: map[string]time.Time{ + "general": {}, + }, + removed: map[string]struct{}{ + "general": {}, + "goose": {}, + "grumbles": {}, + }, + expectItemCount: 3, + // not 1, because general is removed from the added + // map due to being in the removed map + expectSkippedCount: 0, + expectErr: expectSkip, + expectFailure: assert.NoError, + }, + { + name: "contacts only added items", + category: path.ContactsCategory, + handler: newContactBackupHandler(api.Client{}), + added: map[string]time.Time{ + "fisher": start.Add(time.Minute), + "flannigan": start.Add(2 * time.Minute), + "fitzbog": start.Add(3 * time.Minute), + }, + expectItemCount: 3, + expectSkippedCount: 0, + expectReads: []string{ + "fisher", + "flannigan", + "fitzbog", + }, + expectErr: expectNotSkipped, + expectFailure: assert.Error, + }, + { + name: "contacts only removed items", + category: path.ContactsCategory, + handler: newContactBackupHandler(api.Client{}), + removed: map[string]struct{}{ + "princess": {}, + "poppy": {}, + "petunia": {}, + }, + expectItemCount: 3, + expectSkippedCount: 0, + expectErr: expectNotSkipped, + expectFailure: assert.NoError, + }, + { + name: "contacts added and removed items", + category: path.ContactsCategory, + handler: newContactBackupHandler(api.Client{}), + added: map[string]time.Time{ + "general": {}, + }, + removed: map[string]struct{}{ + "general": {}, + "goose": {}, + "grumbles": {}, + }, + expectItemCount: 3, + // not 1, because general is removed from the added + // map due to being in the removed map + expectSkippedCount: 0, + expectErr: expectNotSkipped, + expectFailure: assert.NoError, + }, + { + name: "mail only added items", + category: path.EmailCategory, + handler: newMailBackupHandler(api.Client{}), + added: map[string]time.Time{ + "fisher": start.Add(time.Minute), + "flannigan": start.Add(2 * time.Minute), + "fitzbog": start.Add(3 * time.Minute), + }, + expectItemCount: 3, + expectSkippedCount: 0, + expectReads: []string{ + "fisher", + "flannigan", + "fitzbog", + }, + expectErr: expectNotSkipped, + expectFailure: assert.Error, + }, + { + name: "mail only removed items", + category: path.EmailCategory, + handler: newMailBackupHandler(api.Client{}), + removed: map[string]struct{}{ + "princess": {}, + "poppy": {}, + "petunia": {}, + }, + expectItemCount: 3, + expectSkippedCount: 0, + expectErr: expectNotSkipped, + expectFailure: assert.NoError, + }, + { + name: "mail added and removed items", + category: path.EmailCategory, + handler: newMailBackupHandler(api.Client{}), + added: map[string]time.Time{ + "general": {}, + }, + removed: map[string]struct{}{ + "general": {}, + "goose": {}, + "grumbles": {}, + }, + expectItemCount: 3, + // not 1, because general is removed from the added + // map due to being in the removed map + expectSkippedCount: 0, + expectErr: expectNotSkipped, + expectFailure: assert.NoError, + }, + } + + for _, test := range table { + suite.Run(test.name, func() { + var ( + t = suite.T() + errs = fault.New(false) + itemCount int + ) + + ctx, flush := tester.NewContext(t) + defer flush() + + fullPath, err := path.Build("t", "pr", path.ExchangeService, test.category, false, "fnords", "smarf") + require.NoError(t, err, clues.ToCore(err)) + + locPath, err := path.Build("t", "pr", path.ExchangeService, test.category, false, "fnords", "smarf") + require.NoError(t, err, clues.ToCore(err)) + + mlg := &mockLazyItemGetterSerializer{ + ItemGetSerialize: &mock.ItemGetSerialize{ + SerializeErr: graph.ErrServiceUnavailableEmptyResp, + }, + } + defer mlg.check(t, test.expectReads) + + opts := control.DefaultOptions() + opts.SkipEventsOnInstance503ForResources = map[string]struct{}{} + opts.SkipEventsOnInstance503ForResources["pr"] = struct{}{} + + col := NewCollection( + data.NewBaseCollection( + fullPath, + nil, + locPath.ToBuilder(), + opts, + false, + count.New()), + "pr", + mlg, + test.handler, + test.added, + maps.Keys(test.removed), + true, + statusUpdater, + count.New()) + + for item := range col.Items(ctx, errs) { + itemCount++ + + _, rok := test.removed[item.ID()] + if rok { + dimt, ok := item.(data.ItemModTime) + require.True(t, ok, "item implements data.ItemModTime") + assert.True(t, dimt.ModTime().After(start), "deleted items should set mod time to now()") + assert.True(t, item.Deleted(), "removals should be marked as deleted") + } + + modTime, aok := test.added[item.ID()] + if !rok && aok { + // Item's mod time should be what's passed into the collection + // initializer. + assert.Implements(t, (*data.ItemModTime)(nil), item) + assert.Equal(t, modTime, item.(data.ItemModTime).ModTime(), "item mod time") + assert.False(t, item.Deleted(), "additions should not be marked as deleted") + + // Check if the test want's us to read the item's data so the lazy + // data fetch is executed. + if slices.Contains(test.expectReads, item.ID()) { + r := item.ToReader() + + _, err := io.ReadAll(r) + test.expectErr(t, err) + + r.Close() + } else { + assert.Fail(t, "unexpected read on item %s", item.ID()) + } + } + + assert.True(t, aok || rok, "item must be either added or removed: %q", item.ID()) + } + + failure := errs.Failure() + if failure == nil && len(errs.Recovered()) > 0 { + failure = errs.Recovered()[0] + } + + test.expectFailure(t, failure, clues.ToCore(failure)) + assert.Equal( + t, + test.expectItemCount, + itemCount, + "should see all expected items") + assert.Len(t, errs.Skipped(), test.expectSkippedCount) + }) + } +} + func (suite *CollectionUnitSuite) TestLazyItem_NoRead_GetInfo_Errors() { t := suite.T() diff --git a/src/internal/m365/collection/exchange/contacts_backup.go b/src/internal/m365/collection/exchange/contacts_backup.go index 64a26f367..48c88b861 100644 --- a/src/internal/m365/collection/exchange/contacts_backup.go +++ b/src/internal/m365/collection/exchange/contacts_backup.go @@ -1,6 +1,8 @@ package exchange import ( + "github.com/alcionai/corso/src/pkg/control" + "github.com/alcionai/corso/src/pkg/fault" "github.com/alcionai/corso/src/pkg/services/m365/api" "github.com/alcionai/corso/src/pkg/services/m365/api/graph" ) @@ -52,3 +54,11 @@ func (h contactBackupHandler) NewContainerCache( getter: h.ac, } } + +func (h contactBackupHandler) CanSkipItemFailure( + err error, + resourceID string, + opts control.Options, +) (fault.SkipCause, bool) { + return "", false +} diff --git a/src/internal/m365/collection/exchange/contacts_backup_test.go b/src/internal/m365/collection/exchange/contacts_backup_test.go new file mode 100644 index 000000000..ad1508115 --- /dev/null +++ b/src/internal/m365/collection/exchange/contacts_backup_test.go @@ -0,0 +1,83 @@ +package exchange + +import ( + "testing" + + "github.com/google/uuid" + "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/suite" + + "github.com/alcionai/corso/src/internal/tester" + "github.com/alcionai/corso/src/pkg/control" + "github.com/alcionai/corso/src/pkg/fault" + "github.com/alcionai/corso/src/pkg/services/m365/api" +) + +type ContactsBackupHandlerUnitSuite struct { + tester.Suite +} + +func TestContactsBackupHandlerUnitSuite(t *testing.T) { + suite.Run(t, &ContactsBackupHandlerUnitSuite{Suite: tester.NewUnitSuite(t)}) +} + +func (suite *ContactsBackupHandlerUnitSuite) TestHandler_CanSkipItemFailure() { + resourceID := uuid.NewString() + + table := []struct { + name string + err error + opts control.Options + expect assert.BoolAssertionFunc + expectCause fault.SkipCause + }{ + { + name: "no config", + err: assert.AnError, + opts: control.Options{}, + expect: assert.False, + }, + { + name: "false when map is empty", + err: assert.AnError, + opts: control.Options{ + SkipEventsOnInstance503ForResources: map[string]struct{}{}, + }, + expect: assert.False, + }, + { + name: "false on nil error", + err: nil, + opts: control.Options{ + SkipEventsOnInstance503ForResources: map[string]struct{}{ + resourceID: {}, + }, + }, + expect: assert.False, + }, + { + name: "false even if resource matches", + err: assert.AnError, + opts: control.Options{ + SkipEventsOnInstance503ForResources: map[string]struct{}{ + resourceID: {}, + }, + }, + expect: assert.False, + }, + } + for _, test := range table { + suite.Run(test.name, func() { + t := suite.T() + + h := newContactBackupHandler(api.Client{}) + cause, result := h.CanSkipItemFailure( + test.err, + resourceID, + test.opts) + + test.expect(t, result) + assert.Equal(t, test.expectCause, cause) + }) + } +} diff --git a/src/internal/m365/collection/exchange/events_backup.go b/src/internal/m365/collection/exchange/events_backup.go index 86773f7b7..9e33b39aa 100644 --- a/src/internal/m365/collection/exchange/events_backup.go +++ b/src/internal/m365/collection/exchange/events_backup.go @@ -1,6 +1,13 @@ package exchange import ( + "errors" + "net/http" + + "github.com/alcionai/clues" + + "github.com/alcionai/corso/src/pkg/control" + "github.com/alcionai/corso/src/pkg/fault" "github.com/alcionai/corso/src/pkg/services/m365/api" "github.com/alcionai/corso/src/pkg/services/m365/api/graph" ) @@ -52,3 +59,32 @@ func (h eventBackupHandler) NewContainerCache( getter: h.ac, } } + +// todo: this could be further improved buy specifying the call source and matching that +// with the expected error. Might be necessary if we use this for more than one error. +// But since we only call this in a single place at this time, that additional guard isn't +// built into the func. +func (h eventBackupHandler) CanSkipItemFailure( + err error, + resourceID string, + opts control.Options, +) (fault.SkipCause, bool) { + if err == nil { + return "", false + } + + // this is a bit overly cautious. we do know that we get 503s with empty response bodies + // due to fauilures when getting too many instances. We don't know for sure if we get + // generic, well formed 503s. But since we're working with specific resources and item + // IDs in the first place, that extra caution will help make sure an unexpected error dosn't + // slip through the cracks on us. + if !errors.Is(err, graph.ErrServiceUnavailableEmptyResp) && + !clues.HasLabel(err, graph.LabelStatus(http.StatusServiceUnavailable)) { + return "", false + } + + _, ok := opts.SkipEventsOnInstance503ForResources[resourceID] + + // strict equals required here. ids are case sensitive. + return fault.SkipKnownEventInstance503s, ok +} diff --git a/src/internal/m365/collection/exchange/events_backup_test.go b/src/internal/m365/collection/exchange/events_backup_test.go new file mode 100644 index 000000000..dffac3b9c --- /dev/null +++ b/src/internal/m365/collection/exchange/events_backup_test.go @@ -0,0 +1,112 @@ +package exchange + +import ( + "net/http" + "testing" + + "github.com/alcionai/clues" + "github.com/google/uuid" + "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/suite" + + "github.com/alcionai/corso/src/internal/tester" + "github.com/alcionai/corso/src/pkg/control" + "github.com/alcionai/corso/src/pkg/fault" + "github.com/alcionai/corso/src/pkg/services/m365/api" + "github.com/alcionai/corso/src/pkg/services/m365/api/graph" +) + +type EventsBackupHandlerUnitSuite struct { + tester.Suite +} + +func TestEventsBackupHandlerUnitSuite(t *testing.T) { + suite.Run(t, &EventsBackupHandlerUnitSuite{Suite: tester.NewUnitSuite(t)}) +} + +func (suite *EventsBackupHandlerUnitSuite) TestHandler_CanSkipItemFailure() { + resourceID := uuid.NewString() + + table := []struct { + name string + err error + opts control.Options + expect assert.BoolAssertionFunc + expectCause fault.SkipCause + }{ + { + name: "no config", + err: graph.ErrServiceUnavailableEmptyResp, + opts: control.Options{}, + expect: assert.False, + expectCause: fault.SkipKnownEventInstance503s, + }, + { + name: "empty skip on 503", + err: graph.ErrServiceUnavailableEmptyResp, + opts: control.Options{ + SkipEventsOnInstance503ForResources: map[string]struct{}{}, + }, + expect: assert.False, + expectCause: fault.SkipKnownEventInstance503s, + }, + { + name: "nil error", + err: nil, + opts: control.Options{ + SkipEventsOnInstance503ForResources: map[string]struct{}{ + resourceID: {}, + }, + }, + expect: assert.False, + }, + { + name: "non-matching resource", + err: graph.ErrServiceUnavailableEmptyResp, + opts: control.Options{ + SkipEventsOnInstance503ForResources: map[string]struct{}{ + "foo": {}, + }, + }, + expect: assert.False, + expectCause: fault.SkipKnownEventInstance503s, + }, + { + name: "match on instance 503 empty resp", + err: graph.ErrServiceUnavailableEmptyResp, + opts: control.Options{ + SkipEventsOnInstance503ForResources: map[string]struct{}{ + resourceID: {}, + }, + }, + expect: assert.True, + expectCause: fault.SkipKnownEventInstance503s, + }, + { + name: "match on instance 503", + err: clues.New("arbitrary error"). + Label(graph.LabelStatus(http.StatusServiceUnavailable)), + opts: control.Options{ + SkipEventsOnInstance503ForResources: map[string]struct{}{ + resourceID: {}, + }, + }, + expect: assert.True, + expectCause: fault.SkipKnownEventInstance503s, + }, + } + for _, test := range table { + suite.Run(test.name, func() { + t := suite.T() + + h := newEventBackupHandler(api.Client{}) + cause, result := h.CanSkipItemFailure( + test.err, + resourceID, + test.opts) + + test.expect(t, result) + assert.Equal(t, test.expectCause, cause) + }) + } +} diff --git a/src/internal/m365/collection/exchange/handlers.go b/src/internal/m365/collection/exchange/handlers.go index 2da773a3d..2d1015a6f 100644 --- a/src/internal/m365/collection/exchange/handlers.go +++ b/src/internal/m365/collection/exchange/handlers.go @@ -26,6 +26,8 @@ type backupHandler interface { previewIncludeContainers() []string previewExcludeContainers() []string NewContainerCache(userID string) (string, graph.ContainerResolver) + + canSkipItemFailurer } type addedAndRemovedItemGetter interface { @@ -57,6 +59,14 @@ func BackupHandlers(ac api.Client) map[path.CategoryType]backupHandler { } } +type canSkipItemFailurer interface { + CanSkipItemFailure( + err error, + resourceID string, + opts control.Options, + ) (fault.SkipCause, bool) +} + // --------------------------------------------------------------------------- // restore // --------------------------------------------------------------------------- diff --git a/src/internal/m365/collection/exchange/mail_backup.go b/src/internal/m365/collection/exchange/mail_backup.go index 1bcc70ce5..551d1b890 100644 --- a/src/internal/m365/collection/exchange/mail_backup.go +++ b/src/internal/m365/collection/exchange/mail_backup.go @@ -1,6 +1,8 @@ package exchange import ( + "github.com/alcionai/corso/src/pkg/control" + "github.com/alcionai/corso/src/pkg/fault" "github.com/alcionai/corso/src/pkg/services/m365/api" "github.com/alcionai/corso/src/pkg/services/m365/api/graph" ) @@ -57,3 +59,11 @@ func (h mailBackupHandler) NewContainerCache( getter: h.ac, } } + +func (h mailBackupHandler) CanSkipItemFailure( + err error, + resourceID string, + opts control.Options, +) (fault.SkipCause, bool) { + return "", false +} diff --git a/src/internal/m365/collection/exchange/mail_backup_test.go b/src/internal/m365/collection/exchange/mail_backup_test.go new file mode 100644 index 000000000..2cd3aecd2 --- /dev/null +++ b/src/internal/m365/collection/exchange/mail_backup_test.go @@ -0,0 +1,83 @@ +package exchange + +import ( + "testing" + + "github.com/google/uuid" + "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/suite" + + "github.com/alcionai/corso/src/internal/tester" + "github.com/alcionai/corso/src/pkg/control" + "github.com/alcionai/corso/src/pkg/fault" + "github.com/alcionai/corso/src/pkg/services/m365/api" +) + +type MailBackupHandlerUnitSuite struct { + tester.Suite +} + +func TestMailBackupHandlerUnitSuite(t *testing.T) { + suite.Run(t, &MailBackupHandlerUnitSuite{Suite: tester.NewUnitSuite(t)}) +} + +func (suite *MailBackupHandlerUnitSuite) TestHandler_CanSkipItemFailure() { + resourceID := uuid.NewString() + + table := []struct { + name string + err error + opts control.Options + expect assert.BoolAssertionFunc + expectCause fault.SkipCause + }{ + { + name: "no config", + err: assert.AnError, + opts: control.Options{}, + expect: assert.False, + }, + { + name: "false when map is empty", + err: assert.AnError, + opts: control.Options{ + SkipEventsOnInstance503ForResources: map[string]struct{}{}, + }, + expect: assert.False, + }, + { + name: "false on nil error", + err: nil, + opts: control.Options{ + SkipEventsOnInstance503ForResources: map[string]struct{}{ + resourceID: {}, + }, + }, + expect: assert.False, + }, + { + name: "false even if resource matches", + err: assert.AnError, + opts: control.Options{ + SkipEventsOnInstance503ForResources: map[string]struct{}{ + resourceID: {}, + }, + }, + expect: assert.False, + }, + } + for _, test := range table { + suite.Run(test.name, func() { + t := suite.T() + + h := newMailBackupHandler(api.Client{}) + cause, result := h.CanSkipItemFailure( + test.err, + resourceID, + test.opts) + + test.expect(t, result) + assert.Equal(t, test.expectCause, cause) + }) + } +} diff --git a/src/internal/m365/collection/exchange/mock/item.go b/src/internal/m365/collection/exchange/mock/item.go index 712388af3..df1198a2d 100644 --- a/src/internal/m365/collection/exchange/mock/item.go +++ b/src/internal/m365/collection/exchange/mock/item.go @@ -6,10 +6,15 @@ import ( "github.com/microsoft/kiota-abstractions-go/serialization" "github.com/alcionai/corso/src/pkg/backup/details" + "github.com/alcionai/corso/src/pkg/control" "github.com/alcionai/corso/src/pkg/fault" "github.com/alcionai/corso/src/pkg/services/m365/api" ) +// --------------------------------------------------------------------------- +// get and serialize item mock +// --------------------------------------------------------------------------- + type ItemGetSerialize struct { GetData serialization.Parsable GetCount int @@ -44,3 +49,23 @@ func (m *ItemGetSerialize) Serialize( func DefaultItemGetSerialize() *ItemGetSerialize { return &ItemGetSerialize{} } + +// --------------------------------------------------------------------------- +// can skip item failure mock +// --------------------------------------------------------------------------- + +type canSkipFailChecker struct { + canSkip bool +} + +func (m canSkipFailChecker) CanSkipItemFailure( + err error, + resourceID string, + opts control.Options, +) (fault.SkipCause, bool) { + return fault.SkipCause("testing"), m.canSkip +} + +func NeverCanSkipFailChecker() *canSkipFailChecker { + return &canSkipFailChecker{} +} diff --git a/src/internal/operations/backup_test.go b/src/internal/operations/backup_test.go index e75255c54..1a7aefa7e 100644 --- a/src/internal/operations/backup_test.go +++ b/src/internal/operations/backup_test.go @@ -420,6 +420,9 @@ func (suite *BackupOpUnitSuite) TestNewBackupOperation_configuredOptionsMatchInp MaxPages: 46, Enabled: true, }, + SkipEventsOnInstance503ForResources: map[string]struct{}{ + "resource": {}, + }, } t := suite.T() diff --git a/src/pkg/control/options.go b/src/pkg/control/options.go index 9d985e5d3..715068f8a 100644 --- a/src/pkg/control/options.go +++ b/src/pkg/control/options.go @@ -27,6 +27,11 @@ type Options struct { // backup data until the set limits without paying attention to what the other // had already backed up. PreviewLimits PreviewItemLimits `json:"previewItemLimits"` + + // specifying a resource tuple in this map allows that resource to produce + // a Skip instead of a recoverable error in case of a failure due to 503 when + // retrieving calendar event item data. + SkipEventsOnInstance503ForResources map[string]struct{} } // RateLimiter is the set of options applied to any external service facing rate diff --git a/src/pkg/fault/skipped.go b/src/pkg/fault/skipped.go index 1754f3a06..32f4763e1 100644 --- a/src/pkg/fault/skipped.go +++ b/src/pkg/fault/skipped.go @@ -12,34 +12,39 @@ type AddSkipper interface { AddSkip(ctx context.Context, s *Skipped) } -// skipCause identifies the well-known conditions to Skip an item. It is +// SkipCause identifies the well-known conditions to Skip an item. It is // important that skip cause enumerations do not overlap with general error // handling. Skips must be well known, well documented, and consistent. // Transient failures, undocumented or unknown conditions, and arbitrary // handling should never produce a skipped item. Those cases should get // handled as normal errors. -type skipCause string +type SkipCause string const ( // SkipMalware identifies a malware detection case. Files that graph // api identifies as malware cannot be downloaded or uploaded, and will // permanently fail any attempts to backup or restore. - SkipMalware skipCause = "malware_detected" + SkipMalware SkipCause = "malware_detected" // SkipOneNote identifies that a file was skipped because it // was a OneNote file that remains inaccessible (503 server response) // regardless of the number of retries. //nolint:lll // https://support.microsoft.com/en-us/office/restrictions-and-limitations-in-onedrive-and-sharepoint-64883a5d-228e-48f5-b3d2-eb39e07630fa#onenotenotebooks - SkipOneNote skipCause = "inaccessible_one_note_file" + SkipOneNote SkipCause = "inaccessible_one_note_file" // SkipInvalidRecipients identifies that an email was skipped because Exchange // believes it is not valid and fails any attempt to read it. - SkipInvalidRecipients skipCause = "invalid_recipients_email" + SkipInvalidRecipients SkipCause = "invalid_recipients_email" // SkipCorruptData identifies that an email was skipped because graph reported // that the email data was corrupt and failed all attempts to read it. - SkipCorruptData skipCause = "corrupt_data" + SkipCorruptData SkipCause = "corrupt_data" + + // SkipKnownEventInstance503s identifies cases where we have a pre-configured list + // of event IDs where the events are known to fail with a 503 due to there being + // too many instances to retrieve from graph api. + SkipKnownEventInstance503s SkipCause = "known_event_instance_503" ) var _ print.Printable = &Skipped{} @@ -70,7 +75,7 @@ func (s *Skipped) String() string { } // HasCause compares the underlying cause against the parameter. -func (s *Skipped) HasCause(c skipCause) bool { +func (s *Skipped) HasCause(c SkipCause) bool { if s == nil { return false } @@ -105,27 +110,27 @@ func (s Skipped) Values(bool) []string { } // ContainerSkip produces a Container-kind Item for tracking skipped items. -func ContainerSkip(cause skipCause, namespace, id, name string, addtl map[string]any) *Skipped { +func ContainerSkip(cause SkipCause, namespace, id, name string, addtl map[string]any) *Skipped { return itemSkip(ContainerType, cause, namespace, id, name, addtl) } // EmailSkip produces a Email-kind Item for tracking skipped items. -func EmailSkip(cause skipCause, user, id string, addtl map[string]any) *Skipped { +func EmailSkip(cause SkipCause, user, id string, addtl map[string]any) *Skipped { return itemSkip(EmailType, cause, user, id, "", addtl) } // FileSkip produces a File-kind Item for tracking skipped items. -func FileSkip(cause skipCause, namespace, id, name string, addtl map[string]any) *Skipped { +func FileSkip(cause SkipCause, namespace, id, name string, addtl map[string]any) *Skipped { return itemSkip(FileType, cause, namespace, id, name, addtl) } // OnwerSkip produces a ResourceOwner-kind Item for tracking skipped items. -func OwnerSkip(cause skipCause, namespace, id, name string, addtl map[string]any) *Skipped { +func OwnerSkip(cause SkipCause, namespace, id, name string, addtl map[string]any) *Skipped { return itemSkip(ResourceOwnerType, cause, namespace, id, name, addtl) } // itemSkip produces a Item of the provided type for tracking skipped items. -func itemSkip(t ItemType, cause skipCause, namespace, id, name string, addtl map[string]any) *Skipped { +func itemSkip(t ItemType, cause SkipCause, namespace, id, name string, addtl map[string]any) *Skipped { return &Skipped{ Item: Item{ Namespace: namespace, diff --git a/src/pkg/services/m365/api/graph/errors.go b/src/pkg/services/m365/api/graph/errors.go index 0b7793adb..b3bd4b740 100644 --- a/src/pkg/services/m365/api/graph/errors.go +++ b/src/pkg/services/m365/api/graph/errors.go @@ -156,7 +156,8 @@ func stackWithCoreErr(ctx context.Context, err error, traceDepth int) error { labels = append(labels, core.LabelRootCauseUnknown) } - stacked := stackWithDepth(ctx, err, 1+traceDepth) + stacked := stackWithDepth(ctx, err, 1+traceDepth). + Label(LabelStatus(ode.Resp.StatusCode)) // labeling here because we want the context from stackWithDepth first for _, label := range labels {