diff --git a/CHANGELOG.md b/CHANGELOG.md index 409d988fa..0583016a1 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -12,6 +12,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 this case, Corso will skip over the item but report this in the backup summary. - Guarantee Exchange email restoration when restoring multiple attachments. Some previous restores were failing with `ErrorItemNotFound`. - Avoid Graph SDK `Requests must contain extension changes exclusively.` errors by removing server-populated field from restored event items. +- Handle cases where Exchange backup stored invalid JSON blobs if there were special characters in the user content. These would result in errors during restore or restore errors. ### Known issues - Restoring OneDrive, SharePoint, or Teams & Groups items shared with external users while the tenant or site is configured to not allow sharing with external users will not restore permissions. diff --git a/src/internal/m365/collection/exchange/restore.go b/src/internal/m365/collection/exchange/restore.go index dc8395f19..038c98ae7 100644 --- a/src/internal/m365/collection/exchange/restore.go +++ b/src/internal/m365/collection/exchange/restore.go @@ -88,7 +88,7 @@ func RestoreCollection( ctr) if err != nil { if !graph.IsErrItemAlreadyExistsConflict(err) { - el.AddRecoverable(ictx, err) + el.AddRecoverable(ictx, clues.Wrap(err, "restoring item")) } continue diff --git a/src/internal/m365/controller_test.go b/src/internal/m365/controller_test.go index 3711ce287..25cc08c9a 100644 --- a/src/internal/m365/controller_test.go +++ b/src/internal/m365/controller_test.go @@ -777,6 +777,24 @@ func (suite *ControllerIntegrationSuite) TestRestoreAndBackup_core() { subjectText := "Test message for restore" table := []restoreBackupInfo{ + { + name: "EmailWithSpecialCharacters", + service: path.ExchangeService, + collections: []stub.ColInfo{ + { + PathElements: []string{api.MailInbox}, + Category: path.EmailCategory, + Items: []stub.ItemInfo{ + { + Name: "someencodeditemID", + Data: exchMock.MessageWithSpecialCharacters( + subjectText + "-1"), + LookupKey: subjectText + "-1", + }, + }, + }, + }, + }, { name: "EmailsWithAttachments", service: path.ExchangeService, diff --git a/src/internal/m365/service/exchange/export_test.go b/src/internal/m365/service/exchange/export_test.go index d8af878b6..baf4e4d37 100644 --- a/src/internal/m365/service/exchange/export_test.go +++ b/src/internal/m365/service/exchange/export_test.go @@ -13,6 +13,7 @@ import ( "github.com/alcionai/corso/src/internal/data" dataMock "github.com/alcionai/corso/src/internal/data/mock" "github.com/alcionai/corso/src/internal/m365/collection/exchange" + exchMock "github.com/alcionai/corso/src/internal/m365/service/exchange/mock" "github.com/alcionai/corso/src/internal/tester" "github.com/alcionai/corso/src/internal/version" "github.com/alcionai/corso/src/pkg/control" @@ -66,6 +67,29 @@ func (suite *ExportUnitSuite) TestGetItems() { }, }, }, + { + name: "single item with special characters", + version: 1, + backingCollection: data.NoFetchRestoreCollection{ + Collection: dataMock.Collection{ + Path: p, + ItemData: []data.Item{ + &dataMock.Item{ + ItemID: "id1", + Reader: io.NopCloser(bytes.NewReader( + exchMock.MessageWithSpecialCharacters("special characters"))), + }, + }, + }, + }, + expectedItems: []export.Item{ + { + ID: "id1", + Name: "id1.eml", + Body: io.NopCloser(bytes.NewReader(emailBodyBytes)), + }, + }, + }, { name: "multiple items", version: 1, diff --git a/src/internal/m365/service/exchange/mock/mail.go b/src/internal/m365/service/exchange/mock/mail.go index 5de3064b6..59a0361a9 100644 --- a/src/internal/m365/service/exchange/mock/mail.go +++ b/src/internal/m365/service/exchange/mock/mail.go @@ -101,8 +101,24 @@ const ( ], "webLink":"https://outlook.office365.com/owa/?ItemID=AAMkAGZmNjNlYjI3LWJlZWYtNGI4Mi04YjMyLTIxYThkNGQ4NmY1MwBGAAAAAADCNgjhM9QmQYWNcI7hCpPrBwDSEBNbUIB9RL6ePDeF3FIYAAAAAAEMAADSEBNbUIB9RL6ePDeF3FIYAAB3XwIkAAA%%3D&exvsurl=1&viewmodel=ReadMessageItem" }` + + emailWithSpecialCharacters = `{ + "importance": "normal", + "internetMessageId": "", + "sentDateTime": "2022-09-26T23:15:46Z", + "receivedDateTime": "2022-09-26T23:20:46Z", + "body":{ + "content":"abcd` + string(rune(8)) + string(rune(8)) + `\"", + "contentType":"text" + }, + "subject":"%s" + }` ) +func MessageWithSpecialCharacters(subject string) []byte { + return []byte(fmt.Sprintf(emailWithSpecialCharacters, subject)) +} + // MessageBytes returns bytes for a Messageable item. // Contents verified as working with sample data from kiota-serialization-json-go v0.5.5 func MessageBytes(subject string) []byte { diff --git a/src/pkg/services/m365/api/consts.go b/src/pkg/services/m365/api/consts.go index 13d88de14..2e9f48f6a 100644 --- a/src/pkg/services/m365/api/consts.go +++ b/src/pkg/services/m365/api/consts.go @@ -7,4 +7,7 @@ const ( DefaultContacts = "Contacts" MailInbox = "Inbox" MsgFolderRoot = "msgfolderroot" + + // Kiota JSON invalid JSON error message. + invalidJSON = "invalid json type" ) diff --git a/src/pkg/services/m365/api/contacts.go b/src/pkg/services/m365/api/contacts.go index 1fb2886cf..16041447e 100644 --- a/src/pkg/services/m365/api/contacts.go +++ b/src/pkg/services/m365/api/contacts.go @@ -3,6 +3,7 @@ package api import ( "context" "fmt" + "strings" "github.com/alcionai/clues" "github.com/microsoft/kiota-abstractions-go/serialization" @@ -11,6 +12,7 @@ import ( "github.com/microsoftgraph/msgraph-sdk-go/users" "github.com/alcionai/corso/src/internal/common/ptr" + "github.com/alcionai/corso/src/internal/common/sanitize" "github.com/alcionai/corso/src/pkg/backup/details" "github.com/alcionai/corso/src/pkg/fault" "github.com/alcionai/corso/src/pkg/services/m365/api/graph" @@ -252,10 +254,27 @@ func (c Contacts) DeleteItem( // Serialization // --------------------------------------------------------------------------- -func BytesToContactable(bytes []byte) (models.Contactable, error) { +func bytesToContactable(bytes []byte) (serialization.Parsable, error) { v, err := CreateFromBytes(bytes, models.CreateContactFromDiscriminatorValue) if err != nil { - return nil, clues.Wrap(err, "deserializing bytes to contact") + if !strings.Contains(err.Error(), invalidJSON) { + return nil, clues.Wrap(err, "deserializing bytes to message") + } + + // If the JSON was invalid try sanitizing and deserializing again. + // Sanitizing should transform characters < 0x20 according to the spec where + // possible. The resulting JSON may still be invalid though. + bytes = sanitize.JSONBytes(bytes) + v, err = CreateFromBytes(bytes, models.CreateContactFromDiscriminatorValue) + } + + return v, clues.Stack(err).OrNil() +} + +func BytesToContactable(bytes []byte) (models.Contactable, error) { + v, err := bytesToContactable(bytes) + if err != nil { + return nil, clues.Stack(err) } return v.(models.Contactable), nil diff --git a/src/pkg/services/m365/api/events.go b/src/pkg/services/m365/api/events.go index 3a2dd996c..7feb2837b 100644 --- a/src/pkg/services/m365/api/events.go +++ b/src/pkg/services/m365/api/events.go @@ -17,6 +17,7 @@ import ( "github.com/microsoftgraph/msgraph-sdk-go/users" "github.com/alcionai/corso/src/internal/common/ptr" + "github.com/alcionai/corso/src/internal/common/sanitize" "github.com/alcionai/corso/src/internal/common/str" "github.com/alcionai/corso/src/pkg/backup/details" "github.com/alcionai/corso/src/pkg/dttm" @@ -556,10 +557,27 @@ func (c Events) PostLargeAttachment( // Serialization // --------------------------------------------------------------------------- -func BytesToEventable(body []byte) (models.Eventable, error) { +func bytesToEventable(body []byte) (serialization.Parsable, error) { v, err := CreateFromBytes(body, models.CreateEventFromDiscriminatorValue) if err != nil { - return nil, clues.Wrap(err, "deserializing bytes to event") + if !strings.Contains(err.Error(), invalidJSON) { + return nil, clues.Wrap(err, "deserializing bytes to message") + } + + // If the JSON was invalid try sanitizing and deserializing again. + // Sanitizing should transform characters < 0x20 according to the spec where + // possible. The resulting JSON may still be invalid though. + body = sanitize.JSONBytes(body) + v, err = CreateFromBytes(body, models.CreateEventFromDiscriminatorValue) + } + + return v, clues.Stack(err).OrNil() +} + +func BytesToEventable(body []byte) (models.Eventable, error) { + v, err := bytesToEventable(body) + if err != nil { + return nil, clues.Stack(err) } return v.(models.Eventable), nil diff --git a/src/pkg/services/m365/api/mail.go b/src/pkg/services/m365/api/mail.go index d8797b12d..fe98200ee 100644 --- a/src/pkg/services/m365/api/mail.go +++ b/src/pkg/services/m365/api/mail.go @@ -5,6 +5,7 @@ import ( "context" "fmt" "io" + "strings" "github.com/alcionai/clues" "github.com/microsoft/kiota-abstractions-go/serialization" @@ -13,6 +14,7 @@ import ( "github.com/microsoftgraph/msgraph-sdk-go/users" "github.com/alcionai/corso/src/internal/common/ptr" + "github.com/alcionai/corso/src/internal/common/sanitize" "github.com/alcionai/corso/src/pkg/backup/details" "github.com/alcionai/corso/src/pkg/dttm" "github.com/alcionai/corso/src/pkg/fault" @@ -596,10 +598,27 @@ func (c Mail) PostLargeAttachment( // Serialization // --------------------------------------------------------------------------- -func BytesToMessageable(body []byte) (models.Messageable, error) { +func bytesToMessageable(body []byte) (serialization.Parsable, error) { v, err := CreateFromBytes(body, models.CreateMessageFromDiscriminatorValue) if err != nil { - return nil, clues.Wrap(err, "deserializing bytes to message") + if !strings.Contains(err.Error(), invalidJSON) { + return nil, clues.Wrap(err, "deserializing bytes to message") + } + + // If the JSON was invalid try sanitizing and deserializing again. + // Sanitizing should transform characters < 0x20 according to the spec where + // possible. The resulting JSON may still be invalid though. + body = sanitize.JSONBytes(body) + v, err = CreateFromBytes(body, models.CreateMessageFromDiscriminatorValue) + } + + return v, clues.Stack(err).OrNil() +} + +func BytesToMessageable(body []byte) (models.Messageable, error) { + v, err := bytesToMessageable(body) + if err != nil { + return nil, clues.Stack(err) } return v.(models.Messageable), nil diff --git a/src/pkg/services/m365/api/mail_test.go b/src/pkg/services/m365/api/mail_test.go index d7f7dd226..64abf5779 100644 --- a/src/pkg/services/m365/api/mail_test.go +++ b/src/pkg/services/m365/api/mail_test.go @@ -159,6 +159,19 @@ func (suite *MailAPIUnitSuite) TestMailInfo() { } } +// TestBytesToMessagable_InvalidError tests that the error message kiota returns +// for invalid JSON matches what we check for. This helps keep things in sync +// when kiota is updated. +func (suite *MailAPIUnitSuite) TestBytesToMessagable_InvalidError() { + t := suite.T() + input := exchMock.MessageWithSpecialCharacters("m365 mail support test") + + _, err := CreateFromBytes(input, models.CreateMessageFromDiscriminatorValue) + require.Error(t, err, clues.ToCore(err)) + + assert.Contains(t, err.Error(), invalidJSON) +} + func (suite *MailAPIUnitSuite) TestBytesToMessagable() { table := []struct { name string @@ -178,6 +191,20 @@ func (suite *MailAPIUnitSuite) TestBytesToMessagable() { checkError: assert.NoError, checkObject: assert.NotNil, }, + { + name: "malformed JSON bytes passes sanitization", + byteArray: exchMock.MessageWithSpecialCharacters("m365 mail support test"), + checkError: assert.NoError, + checkObject: assert.NotNil, + }, + { + name: "invalid JSON bytes", + byteArray: append( + exchMock.MessageWithSpecialCharacters("m365 mail support test"), + []byte("}")...), + checkError: assert.Error, + checkObject: assert.Nil, + }, } for _, test := range table { suite.Run(test.name, func() {