From d16528be507fb799e598aa77193b1c5f2fd61846 Mon Sep 17 00:00:00 2001 From: Vaibhav Kamra Date: Tue, 12 Dec 2023 19:21:45 -0800 Subject: [PATCH] Skip emails that cannot be retrieved (#4834) Adds a skip condition for emails that can be enumerated but are not returned from the server (Exchange) because it believes they are corrupt/invalid. This handles the `ErrorInvalidRecipients` condition we believe is hit when exchange finds an email that pre-dates M365 mailbox creation (either a server corruption or triggered by on-prem->M365 migration) --- #### Does this PR need a docs update or release note? - [x] :white_check_mark: Yes, it's included - [ ] :clock1: Yes, but in a later PR - [ ] :no_entry: No #### Type of change - [ ] :sunflower: Feature - [x] :bug: Bugfix - [ ] :world_map: Documentation - [ ] :robot: Supportability/Tests - [ ] :computer: CI/Deployment - [ ] :broom: Tech Debt/Cleanup #### Issue(s) * # #### Test Plan - [ ] :muscle: Manual - [x] :zap: Unit test - [ ] :green_heart: E2E --- CHANGELOG.md | 4 + .../m365/collection/exchange/collection.go | 27 ++++-- .../collection/exchange/collection_test.go | 83 +++++++++++++++++++ src/pkg/fault/item.go | 3 + src/pkg/fault/skipped.go | 9 ++ src/pkg/services/m365/api/graph/errors.go | 9 ++ .../services/m365/api/graph/errors_test.go | 34 ++++++++ 7 files changed, 163 insertions(+), 6 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 6b75ed2eb..7209851ac 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -7,6 +7,10 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 ## [Unreleased] (beta) +### Fixed +- Handle the case where an email cannot be retrieved from Exchange due to an `ErrorInvalidRecipients` error. In +this case, Corso will skip over the item but report this in the backup summary. + ## [v0.17.0] (beta) - 2023-12-11 ### Changed diff --git a/src/internal/m365/collection/exchange/collection.go b/src/internal/m365/collection/exchange/collection.go index 9550de760..631119e5b 100644 --- a/src/internal/m365/collection/exchange/collection.go +++ b/src/internal/m365/collection/exchange/collection.go @@ -277,15 +277,30 @@ func (col *prefetchCollection) streamItems( col.Opts().ToggleFeatures.ExchangeImmutableIDs, parentPath) if err != nil { - // Don't report errors for deleted items as there's no way for us to - // back up data that is gone. Record it as a "success", since there's - // nothing else we can do, and not reporting it will make the status - // investigation upset. - if graph.IsErrDeletedInFlight(err) { + // Handle known error cases + switch { + case graph.IsErrDeletedInFlight(err): + // Don't report errors for deleted items as there's no way for us to + // back up data that is gone. Record it as a "success", since there's + // nothing else we can do, and not reporting it will make the status + // investigation upset. col.Counter.Inc(count.StreamItemsDeletedInFlight) atomic.AddInt64(&success, 1) logger.CtxErr(ctx, err).Info("item not found") - } else { + case graph.IsErrInvalidRecipients(err): + // These items cannot be downloaded (Exchange believes the recipient is invalid) + // Add this to the skipped list so this information is visible in backup info + logger. + CtxErr(ctx, err). + With("skipped_reason", fault.SkipInvalidRecipients). + Info("inaccessible email") + errs.AddSkip(ctx, fault.EmailSkip( + fault.SkipInvalidRecipients, + user, + id, + map[string]any{"parentPath": parentPath})) + atomic.AddInt64(&success, 1) + default: col.Counter.Inc(count.StreamItemsErred) el.AddRecoverable(ctx, clues.Wrap(err, "fetching item").Label(fault.LabelForceNoBackupCreation)) } diff --git a/src/internal/m365/collection/exchange/collection_test.go b/src/internal/m365/collection/exchange/collection_test.go index baec20c3d..90aaf28a5 100644 --- a/src/internal/m365/collection/exchange/collection_test.go +++ b/src/internal/m365/collection/exchange/collection_test.go @@ -28,6 +28,7 @@ import ( "github.com/alcionai/corso/src/pkg/fault" "github.com/alcionai/corso/src/pkg/path" "github.com/alcionai/corso/src/pkg/services/m365/api/graph" + graphTD "github.com/alcionai/corso/src/pkg/services/m365/api/graph/testdata" ) type CollectionUnitSuite struct { @@ -331,6 +332,88 @@ func (suite *CollectionUnitSuite) TestPrefetchCollection_Items() { } } +// This test verifies skipped error cases are handled correctly by collection enumeration +func (suite *CollectionUnitSuite) TestCollection_SkippedErrors() { + var ( + t = suite.T() + statusUpdater = func(*support.ControllerOperationStatus) {} + ) + + fullPath, err := path.Build("t", "pr", path.ExchangeService, path.EmailCategory, false, "fnords", "smarf") + require.NoError(t, err, clues.ToCore(err)) + + locPath, err := path.Build("t", "pr", path.ExchangeService, path.EmailCategory, false, "fnords", "smarf") + require.NoError(t, err, clues.ToCore(err)) + + table := []struct { + name string + added map[string]time.Time + expectItemCount int + itemGetter itemGetterSerializer + expectedSkipError *fault.Skipped + }{ + { + name: "ErrorInvalidRecipients", + added: map[string]time.Time{ + "fisher": {}, + }, + expectItemCount: 0, + itemGetter: &mock.ItemGetSerialize{ + GetErr: graphTD.ODataErr(string(graph.ErrorInvalidRecipients)), + }, + expectedSkipError: fault.EmailSkip(fault.SkipInvalidRecipients, "", "fisher", nil), + }, + } + + for _, test := range table { + suite.Run(test.name, func() { + var ( + t = suite.T() + errs = fault.New(true) + itemCount int + ) + + ctx, flush := tester.NewContext(t) + defer flush() + + col := NewCollection( + data.NewBaseCollection( + fullPath, + nil, + locPath.ToBuilder(), + control.DefaultOptions(), + false, + count.New()), + "", + test.itemGetter, + test.added, + nil, + false, + statusUpdater, + count.New()) + + for range col.Items(ctx, errs) { + itemCount++ + } + + assert.NoError(t, errs.Failure()) + if test.expectedSkipError != nil { + assert.Len(t, errs.Skipped(), 1) + skippedItem := errs.Skipped()[0].Item + + assert.Equal(t, skippedItem.Cause, test.expectedSkipError.Item.Cause) + assert.Equal(t, skippedItem.ID, test.expectedSkipError.Item.ID) + } + + assert.Equal( + t, + test.expectItemCount, + itemCount, + "should see all expected items") + }) + } +} + type mockLazyItemGetterSerializer struct { *mock.ItemGetSerialize callIDs []string diff --git a/src/pkg/fault/item.go b/src/pkg/fault/item.go index d8f1b4c3e..ab68fcc99 100644 --- a/src/pkg/fault/item.go +++ b/src/pkg/fault/item.go @@ -14,6 +14,7 @@ const ( type ItemType string const ( + EmailType ItemType = "email" FileType ItemType = "file" ContainerType ItemType = "container" ResourceOwnerType ItemType = "resource_owner" @@ -21,6 +22,8 @@ const ( func (it ItemType) Printable() string { switch it { + case EmailType: + return "Email" case FileType: return "File" case ContainerType: diff --git a/src/pkg/fault/skipped.go b/src/pkg/fault/skipped.go index 836f079c7..3bb08af50 100644 --- a/src/pkg/fault/skipped.go +++ b/src/pkg/fault/skipped.go @@ -32,6 +32,10 @@ const ( //nolint:lll // https://support.microsoft.com/en-us/office/restrictions-and-limitations-in-onedrive-and-sharepoint-64883a5d-228e-48f5-b3d2-eb39e07630fa#onenotenotebooks SkipOneNote skipCause = "inaccessible_one_note_file" + + // SkipInvalidRecipients identifies that an email was skipped because Exchange + // believes it is not valid and fails any attempt to read it. + SkipInvalidRecipients skipCause = "invalid_recipients_email" ) var _ print.Printable = &Skipped{} @@ -101,6 +105,11 @@ func ContainerSkip(cause skipCause, namespace, id, name string, addtl map[string return itemSkip(ContainerType, cause, namespace, id, name, addtl) } +// EmailSkip produces a Email-kind Item for tracking skipped items. +func EmailSkip(cause skipCause, user, id string, addtl map[string]any) *Skipped { + return itemSkip(EmailType, cause, user, id, "", addtl) +} + // FileSkip produces a File-kind Item for tracking skipped items. func FileSkip(cause skipCause, namespace, id, name string, addtl map[string]any) *Skipped { return itemSkip(FileType, cause, namespace, id, name, addtl) diff --git a/src/pkg/services/m365/api/graph/errors.go b/src/pkg/services/m365/api/graph/errors.go index 8cfcfb157..b36c544e6 100644 --- a/src/pkg/services/m365/api/graph/errors.go +++ b/src/pkg/services/m365/api/graph/errors.go @@ -43,6 +43,11 @@ const ( emailFolderNotFound errorCode = "ErrorSyncFolderNotFound" ErrorAccessDenied errorCode = "ErrorAccessDenied" errorItemNotFound errorCode = "ErrorItemNotFound" + // This error occurs when an email is enumerated but retrieving it fails + // - we believe - due to it pre-dating mailbox creation. Possible explanations + // are mailbox creation racing with email receipt or a similar issue triggered + // due to on-prem->M365 mailbox migration. + ErrorInvalidRecipients errorCode = "ErrorInvalidRecipients" // This error occurs when an attempt is made to create a folder that has // the same name as another folder in the same parent. Such duplicate folder // names are not allowed by graph. @@ -201,6 +206,10 @@ func IsErrUserNotFound(err error) bool { return false } +func IsErrInvalidRecipients(err error) bool { + return hasErrorCode(err, ErrorInvalidRecipients) +} + func IsErrCannotOpenFileAttachment(err error) bool { return hasErrorCode(err, cannotOpenFileAttachment) } diff --git a/src/pkg/services/m365/api/graph/errors_test.go b/src/pkg/services/m365/api/graph/errors_test.go index f1801cbc6..8d3c5c5eb 100644 --- a/src/pkg/services/m365/api/graph/errors_test.go +++ b/src/pkg/services/m365/api/graph/errors_test.go @@ -203,6 +203,40 @@ func (suite *GraphErrorsUnitSuite) TestIsErrDeletedInFlight() { } } +func (suite *GraphErrorsUnitSuite) Test() { + table := []struct { + name string + err error + expect assert.BoolAssertionFunc + }{ + { + name: "nil", + err: nil, + expect: assert.False, + }, + { + name: "non-matching", + err: assert.AnError, + expect: assert.False, + }, + { + name: "non-matching oDataErr", + err: graphTD.ODataErr("fnords"), + expect: assert.False, + }, + { + name: "invalid receipient oDataErr", + err: graphTD.ODataErr(string(ErrorInvalidRecipients)), + expect: assert.True, + }, + } + for _, test := range table { + suite.Run(test.name, func() { + test.expect(suite.T(), IsErrInvalidRecipients(test.err)) + }) + } +} + func (suite *GraphErrorsUnitSuite) TestIsErrInvalidDelta() { table := []struct { name string