Skip emails that cannot be retrieved (#4834)

Adds a skip condition for emails that can be enumerated but are not returned from the server (Exchange) because
it believes they are corrupt/invalid.

This handles the `ErrorInvalidRecipients` condition we believe is hit when exchange finds an email that pre-dates
M365 mailbox creation (either a server corruption or triggered by on-prem->M365 migration)

---

#### Does this PR need a docs update or release note?

- [x]  Yes, it's included
- [ ] 🕐 Yes, but in a later PR
- [ ]  No

#### Type of change

<!--- Please check the type of change your PR introduces: --->
- [ ] 🌻 Feature
- [x] 🐛 Bugfix
- [ ] 🗺️ Documentation
- [ ] 🤖 Supportability/Tests
- [ ] 💻 CI/Deployment
- [ ] 🧹 Tech Debt/Cleanup

#### Issue(s)

<!-- Can reference multiple issues. Use one of the following "magic words" - "closes, fixes" to auto-close the Github issue. -->
* #<issue>

#### Test Plan

<!-- How will this be tested prior to merging.-->
- [ ] 💪 Manual
- [x]  Unit test
- [ ] 💚 E2E
This commit is contained in:
Vaibhav Kamra 2023-12-12 19:21:45 -08:00 committed by GitHub
parent b6dd06b458
commit d16528be50
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
7 changed files with 163 additions and 6 deletions

View File

@ -7,6 +7,10 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
## [Unreleased] (beta)
### Fixed
- Handle the case where an email cannot be retrieved from Exchange due to an `ErrorInvalidRecipients` error. In
this case, Corso will skip over the item but report this in the backup summary.
## [v0.17.0] (beta) - 2023-12-11
### Changed

View File

@ -277,15 +277,30 @@ func (col *prefetchCollection) streamItems(
col.Opts().ToggleFeatures.ExchangeImmutableIDs,
parentPath)
if err != nil {
// Don't report errors for deleted items as there's no way for us to
// back up data that is gone. Record it as a "success", since there's
// nothing else we can do, and not reporting it will make the status
// investigation upset.
if graph.IsErrDeletedInFlight(err) {
// Handle known error cases
switch {
case graph.IsErrDeletedInFlight(err):
// Don't report errors for deleted items as there's no way for us to
// back up data that is gone. Record it as a "success", since there's
// nothing else we can do, and not reporting it will make the status
// investigation upset.
col.Counter.Inc(count.StreamItemsDeletedInFlight)
atomic.AddInt64(&success, 1)
logger.CtxErr(ctx, err).Info("item not found")
} else {
case graph.IsErrInvalidRecipients(err):
// These items cannot be downloaded (Exchange believes the recipient is invalid)
// Add this to the skipped list so this information is visible in backup info
logger.
CtxErr(ctx, err).
With("skipped_reason", fault.SkipInvalidRecipients).
Info("inaccessible email")
errs.AddSkip(ctx, fault.EmailSkip(
fault.SkipInvalidRecipients,
user,
id,
map[string]any{"parentPath": parentPath}))
atomic.AddInt64(&success, 1)
default:
col.Counter.Inc(count.StreamItemsErred)
el.AddRecoverable(ctx, clues.Wrap(err, "fetching item").Label(fault.LabelForceNoBackupCreation))
}

View File

@ -28,6 +28,7 @@ import (
"github.com/alcionai/corso/src/pkg/fault"
"github.com/alcionai/corso/src/pkg/path"
"github.com/alcionai/corso/src/pkg/services/m365/api/graph"
graphTD "github.com/alcionai/corso/src/pkg/services/m365/api/graph/testdata"
)
type CollectionUnitSuite struct {
@ -331,6 +332,88 @@ func (suite *CollectionUnitSuite) TestPrefetchCollection_Items() {
}
}
// This test verifies skipped error cases are handled correctly by collection enumeration
func (suite *CollectionUnitSuite) TestCollection_SkippedErrors() {
var (
t = suite.T()
statusUpdater = func(*support.ControllerOperationStatus) {}
)
fullPath, err := path.Build("t", "pr", path.ExchangeService, path.EmailCategory, false, "fnords", "smarf")
require.NoError(t, err, clues.ToCore(err))
locPath, err := path.Build("t", "pr", path.ExchangeService, path.EmailCategory, false, "fnords", "smarf")
require.NoError(t, err, clues.ToCore(err))
table := []struct {
name string
added map[string]time.Time
expectItemCount int
itemGetter itemGetterSerializer
expectedSkipError *fault.Skipped
}{
{
name: "ErrorInvalidRecipients",
added: map[string]time.Time{
"fisher": {},
},
expectItemCount: 0,
itemGetter: &mock.ItemGetSerialize{
GetErr: graphTD.ODataErr(string(graph.ErrorInvalidRecipients)),
},
expectedSkipError: fault.EmailSkip(fault.SkipInvalidRecipients, "", "fisher", nil),
},
}
for _, test := range table {
suite.Run(test.name, func() {
var (
t = suite.T()
errs = fault.New(true)
itemCount int
)
ctx, flush := tester.NewContext(t)
defer flush()
col := NewCollection(
data.NewBaseCollection(
fullPath,
nil,
locPath.ToBuilder(),
control.DefaultOptions(),
false,
count.New()),
"",
test.itemGetter,
test.added,
nil,
false,
statusUpdater,
count.New())
for range col.Items(ctx, errs) {
itemCount++
}
assert.NoError(t, errs.Failure())
if test.expectedSkipError != nil {
assert.Len(t, errs.Skipped(), 1)
skippedItem := errs.Skipped()[0].Item
assert.Equal(t, skippedItem.Cause, test.expectedSkipError.Item.Cause)
assert.Equal(t, skippedItem.ID, test.expectedSkipError.Item.ID)
}
assert.Equal(
t,
test.expectItemCount,
itemCount,
"should see all expected items")
})
}
}
type mockLazyItemGetterSerializer struct {
*mock.ItemGetSerialize
callIDs []string

View File

@ -14,6 +14,7 @@ const (
type ItemType string
const (
EmailType ItemType = "email"
FileType ItemType = "file"
ContainerType ItemType = "container"
ResourceOwnerType ItemType = "resource_owner"
@ -21,6 +22,8 @@ const (
func (it ItemType) Printable() string {
switch it {
case EmailType:
return "Email"
case FileType:
return "File"
case ContainerType:

View File

@ -32,6 +32,10 @@ const (
//nolint:lll
// https://support.microsoft.com/en-us/office/restrictions-and-limitations-in-onedrive-and-sharepoint-64883a5d-228e-48f5-b3d2-eb39e07630fa#onenotenotebooks
SkipOneNote skipCause = "inaccessible_one_note_file"
// SkipInvalidRecipients identifies that an email was skipped because Exchange
// believes it is not valid and fails any attempt to read it.
SkipInvalidRecipients skipCause = "invalid_recipients_email"
)
var _ print.Printable = &Skipped{}
@ -101,6 +105,11 @@ func ContainerSkip(cause skipCause, namespace, id, name string, addtl map[string
return itemSkip(ContainerType, cause, namespace, id, name, addtl)
}
// EmailSkip produces a Email-kind Item for tracking skipped items.
func EmailSkip(cause skipCause, user, id string, addtl map[string]any) *Skipped {
return itemSkip(EmailType, cause, user, id, "", addtl)
}
// FileSkip produces a File-kind Item for tracking skipped items.
func FileSkip(cause skipCause, namespace, id, name string, addtl map[string]any) *Skipped {
return itemSkip(FileType, cause, namespace, id, name, addtl)

View File

@ -43,6 +43,11 @@ const (
emailFolderNotFound errorCode = "ErrorSyncFolderNotFound"
ErrorAccessDenied errorCode = "ErrorAccessDenied"
errorItemNotFound errorCode = "ErrorItemNotFound"
// This error occurs when an email is enumerated but retrieving it fails
// - we believe - due to it pre-dating mailbox creation. Possible explanations
// are mailbox creation racing with email receipt or a similar issue triggered
// due to on-prem->M365 mailbox migration.
ErrorInvalidRecipients errorCode = "ErrorInvalidRecipients"
// This error occurs when an attempt is made to create a folder that has
// the same name as another folder in the same parent. Such duplicate folder
// names are not allowed by graph.
@ -201,6 +206,10 @@ func IsErrUserNotFound(err error) bool {
return false
}
func IsErrInvalidRecipients(err error) bool {
return hasErrorCode(err, ErrorInvalidRecipients)
}
func IsErrCannotOpenFileAttachment(err error) bool {
return hasErrorCode(err, cannotOpenFileAttachment)
}

View File

@ -203,6 +203,40 @@ func (suite *GraphErrorsUnitSuite) TestIsErrDeletedInFlight() {
}
}
func (suite *GraphErrorsUnitSuite) Test() {
table := []struct {
name string
err error
expect assert.BoolAssertionFunc
}{
{
name: "nil",
err: nil,
expect: assert.False,
},
{
name: "non-matching",
err: assert.AnError,
expect: assert.False,
},
{
name: "non-matching oDataErr",
err: graphTD.ODataErr("fnords"),
expect: assert.False,
},
{
name: "invalid receipient oDataErr",
err: graphTD.ODataErr(string(ErrorInvalidRecipients)),
expect: assert.True,
},
}
for _, test := range table {
suite.Run(test.name, func() {
test.expect(suite.T(), IsErrInvalidRecipients(test.err))
})
}
}
func (suite *GraphErrorsUnitSuite) TestIsErrInvalidDelta() {
table := []struct {
name string