Sanitize JSON input if it's invalid on the first attempt (#4928)

Add code and tests for sanitizing emails

Also adds code for sanitizing events and contacts but we can remove
that if we'd like to address it if/when needed. We haven't yet seen
or tried to create instances of those

---

#### Does this PR need a docs update or release note?

- [x]  Yes, it's included
- [ ] 🕐 Yes, but in a later PR
- [ ]  No

#### Type of change

- [ ] 🌻 Feature
- [x] 🐛 Bugfix
- [ ] 🗺️ Documentation
- [ ] 🤖 Supportability/Tests
- [ ] 💻 CI/Deployment
- [ ] 🧹 Tech Debt/Cleanup

#### Test Plan

- [ ] 💪 Manual
- [x]  Unit test
- [ ] 💚 E2E
This commit is contained in:
ashmrtn 2023-12-26 12:26:07 -08:00 committed by GitHub
parent f63a6e9b4f
commit aa876cabb9
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
10 changed files with 152 additions and 7 deletions

View File

@ -12,6 +12,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
this case, Corso will skip over the item but report this in the backup summary. this case, Corso will skip over the item but report this in the backup summary.
- Guarantee Exchange email restoration when restoring multiple attachments. Some previous restores were failing with `ErrorItemNotFound`. - Guarantee Exchange email restoration when restoring multiple attachments. Some previous restores were failing with `ErrorItemNotFound`.
- Avoid Graph SDK `Requests must contain extension changes exclusively.` errors by removing server-populated field from restored event items. - Avoid Graph SDK `Requests must contain extension changes exclusively.` errors by removing server-populated field from restored event items.
- Handle cases where Exchange backup stored invalid JSON blobs if there were special characters in the user content. These would result in errors during restore or restore errors.
### Known issues ### Known issues
- Restoring OneDrive, SharePoint, or Teams & Groups items shared with external users while the tenant or site is configured to not allow sharing with external users will not restore permissions. - Restoring OneDrive, SharePoint, or Teams & Groups items shared with external users while the tenant or site is configured to not allow sharing with external users will not restore permissions.

View File

@ -88,7 +88,7 @@ func RestoreCollection(
ctr) ctr)
if err != nil { if err != nil {
if !graph.IsErrItemAlreadyExistsConflict(err) { if !graph.IsErrItemAlreadyExistsConflict(err) {
el.AddRecoverable(ictx, err) el.AddRecoverable(ictx, clues.Wrap(err, "restoring item"))
} }
continue continue

View File

@ -777,6 +777,24 @@ func (suite *ControllerIntegrationSuite) TestRestoreAndBackup_core() {
subjectText := "Test message for restore" subjectText := "Test message for restore"
table := []restoreBackupInfo{ table := []restoreBackupInfo{
{
name: "EmailWithSpecialCharacters",
service: path.ExchangeService,
collections: []stub.ColInfo{
{
PathElements: []string{api.MailInbox},
Category: path.EmailCategory,
Items: []stub.ItemInfo{
{
Name: "someencodeditemID",
Data: exchMock.MessageWithSpecialCharacters(
subjectText + "-1"),
LookupKey: subjectText + "-1",
},
},
},
},
},
{ {
name: "EmailsWithAttachments", name: "EmailsWithAttachments",
service: path.ExchangeService, service: path.ExchangeService,

View File

@ -13,6 +13,7 @@ import (
"github.com/alcionai/corso/src/internal/data" "github.com/alcionai/corso/src/internal/data"
dataMock "github.com/alcionai/corso/src/internal/data/mock" dataMock "github.com/alcionai/corso/src/internal/data/mock"
"github.com/alcionai/corso/src/internal/m365/collection/exchange" "github.com/alcionai/corso/src/internal/m365/collection/exchange"
exchMock "github.com/alcionai/corso/src/internal/m365/service/exchange/mock"
"github.com/alcionai/corso/src/internal/tester" "github.com/alcionai/corso/src/internal/tester"
"github.com/alcionai/corso/src/internal/version" "github.com/alcionai/corso/src/internal/version"
"github.com/alcionai/corso/src/pkg/control" "github.com/alcionai/corso/src/pkg/control"
@ -66,6 +67,29 @@ func (suite *ExportUnitSuite) TestGetItems() {
}, },
}, },
}, },
{
name: "single item with special characters",
version: 1,
backingCollection: data.NoFetchRestoreCollection{
Collection: dataMock.Collection{
Path: p,
ItemData: []data.Item{
&dataMock.Item{
ItemID: "id1",
Reader: io.NopCloser(bytes.NewReader(
exchMock.MessageWithSpecialCharacters("special characters"))),
},
},
},
},
expectedItems: []export.Item{
{
ID: "id1",
Name: "id1.eml",
Body: io.NopCloser(bytes.NewReader(emailBodyBytes)),
},
},
},
{ {
name: "multiple items", name: "multiple items",
version: 1, version: 1,

View File

@ -101,8 +101,24 @@ const (
], ],
"webLink":"https://outlook.office365.com/owa/?ItemID=AAMkAGZmNjNlYjI3LWJlZWYtNGI4Mi04YjMyLTIxYThkNGQ4NmY1MwBGAAAAAADCNgjhM9QmQYWNcI7hCpPrBwDSEBNbUIB9RL6ePDeF3FIYAAAAAAEMAADSEBNbUIB9RL6ePDeF3FIYAAB3XwIkAAA%%3D&exvsurl=1&viewmodel=ReadMessageItem" "webLink":"https://outlook.office365.com/owa/?ItemID=AAMkAGZmNjNlYjI3LWJlZWYtNGI4Mi04YjMyLTIxYThkNGQ4NmY1MwBGAAAAAADCNgjhM9QmQYWNcI7hCpPrBwDSEBNbUIB9RL6ePDeF3FIYAAAAAAEMAADSEBNbUIB9RL6ePDeF3FIYAAB3XwIkAAA%%3D&exvsurl=1&viewmodel=ReadMessageItem"
}` }`
emailWithSpecialCharacters = `{
"importance": "normal",
"internetMessageId": "<SJ0PR17MB562266A1E61A8EA12F5FB17BC3529@SJ0PR17MB5622.namprd17.prod.outlook.com>",
"sentDateTime": "2022-09-26T23:15:46Z",
"receivedDateTime": "2022-09-26T23:20:46Z",
"body":{
"content":"abcd` + string(rune(8)) + string(rune(8)) + `\"",
"contentType":"text"
},
"subject":"%s"
}`
) )
func MessageWithSpecialCharacters(subject string) []byte {
return []byte(fmt.Sprintf(emailWithSpecialCharacters, subject))
}
// MessageBytes returns bytes for a Messageable item. // MessageBytes returns bytes for a Messageable item.
// Contents verified as working with sample data from kiota-serialization-json-go v0.5.5 // Contents verified as working with sample data from kiota-serialization-json-go v0.5.5
func MessageBytes(subject string) []byte { func MessageBytes(subject string) []byte {

View File

@ -7,4 +7,7 @@ const (
DefaultContacts = "Contacts" DefaultContacts = "Contacts"
MailInbox = "Inbox" MailInbox = "Inbox"
MsgFolderRoot = "msgfolderroot" MsgFolderRoot = "msgfolderroot"
// Kiota JSON invalid JSON error message.
invalidJSON = "invalid json type"
) )

View File

@ -3,6 +3,7 @@ package api
import ( import (
"context" "context"
"fmt" "fmt"
"strings"
"github.com/alcionai/clues" "github.com/alcionai/clues"
"github.com/microsoft/kiota-abstractions-go/serialization" "github.com/microsoft/kiota-abstractions-go/serialization"
@ -11,6 +12,7 @@ import (
"github.com/microsoftgraph/msgraph-sdk-go/users" "github.com/microsoftgraph/msgraph-sdk-go/users"
"github.com/alcionai/corso/src/internal/common/ptr" "github.com/alcionai/corso/src/internal/common/ptr"
"github.com/alcionai/corso/src/internal/common/sanitize"
"github.com/alcionai/corso/src/pkg/backup/details" "github.com/alcionai/corso/src/pkg/backup/details"
"github.com/alcionai/corso/src/pkg/fault" "github.com/alcionai/corso/src/pkg/fault"
"github.com/alcionai/corso/src/pkg/services/m365/api/graph" "github.com/alcionai/corso/src/pkg/services/m365/api/graph"
@ -252,10 +254,27 @@ func (c Contacts) DeleteItem(
// Serialization // Serialization
// --------------------------------------------------------------------------- // ---------------------------------------------------------------------------
func BytesToContactable(bytes []byte) (models.Contactable, error) { func bytesToContactable(bytes []byte) (serialization.Parsable, error) {
v, err := CreateFromBytes(bytes, models.CreateContactFromDiscriminatorValue) v, err := CreateFromBytes(bytes, models.CreateContactFromDiscriminatorValue)
if err != nil { if err != nil {
return nil, clues.Wrap(err, "deserializing bytes to contact") if !strings.Contains(err.Error(), invalidJSON) {
return nil, clues.Wrap(err, "deserializing bytes to message")
}
// If the JSON was invalid try sanitizing and deserializing again.
// Sanitizing should transform characters < 0x20 according to the spec where
// possible. The resulting JSON may still be invalid though.
bytes = sanitize.JSONBytes(bytes)
v, err = CreateFromBytes(bytes, models.CreateContactFromDiscriminatorValue)
}
return v, clues.Stack(err).OrNil()
}
func BytesToContactable(bytes []byte) (models.Contactable, error) {
v, err := bytesToContactable(bytes)
if err != nil {
return nil, clues.Stack(err)
} }
return v.(models.Contactable), nil return v.(models.Contactable), nil

View File

@ -17,6 +17,7 @@ import (
"github.com/microsoftgraph/msgraph-sdk-go/users" "github.com/microsoftgraph/msgraph-sdk-go/users"
"github.com/alcionai/corso/src/internal/common/ptr" "github.com/alcionai/corso/src/internal/common/ptr"
"github.com/alcionai/corso/src/internal/common/sanitize"
"github.com/alcionai/corso/src/internal/common/str" "github.com/alcionai/corso/src/internal/common/str"
"github.com/alcionai/corso/src/pkg/backup/details" "github.com/alcionai/corso/src/pkg/backup/details"
"github.com/alcionai/corso/src/pkg/dttm" "github.com/alcionai/corso/src/pkg/dttm"
@ -556,10 +557,27 @@ func (c Events) PostLargeAttachment(
// Serialization // Serialization
// --------------------------------------------------------------------------- // ---------------------------------------------------------------------------
func BytesToEventable(body []byte) (models.Eventable, error) { func bytesToEventable(body []byte) (serialization.Parsable, error) {
v, err := CreateFromBytes(body, models.CreateEventFromDiscriminatorValue) v, err := CreateFromBytes(body, models.CreateEventFromDiscriminatorValue)
if err != nil { if err != nil {
return nil, clues.Wrap(err, "deserializing bytes to event") if !strings.Contains(err.Error(), invalidJSON) {
return nil, clues.Wrap(err, "deserializing bytes to message")
}
// If the JSON was invalid try sanitizing and deserializing again.
// Sanitizing should transform characters < 0x20 according to the spec where
// possible. The resulting JSON may still be invalid though.
body = sanitize.JSONBytes(body)
v, err = CreateFromBytes(body, models.CreateEventFromDiscriminatorValue)
}
return v, clues.Stack(err).OrNil()
}
func BytesToEventable(body []byte) (models.Eventable, error) {
v, err := bytesToEventable(body)
if err != nil {
return nil, clues.Stack(err)
} }
return v.(models.Eventable), nil return v.(models.Eventable), nil

View File

@ -5,6 +5,7 @@ import (
"context" "context"
"fmt" "fmt"
"io" "io"
"strings"
"github.com/alcionai/clues" "github.com/alcionai/clues"
"github.com/microsoft/kiota-abstractions-go/serialization" "github.com/microsoft/kiota-abstractions-go/serialization"
@ -13,6 +14,7 @@ import (
"github.com/microsoftgraph/msgraph-sdk-go/users" "github.com/microsoftgraph/msgraph-sdk-go/users"
"github.com/alcionai/corso/src/internal/common/ptr" "github.com/alcionai/corso/src/internal/common/ptr"
"github.com/alcionai/corso/src/internal/common/sanitize"
"github.com/alcionai/corso/src/pkg/backup/details" "github.com/alcionai/corso/src/pkg/backup/details"
"github.com/alcionai/corso/src/pkg/dttm" "github.com/alcionai/corso/src/pkg/dttm"
"github.com/alcionai/corso/src/pkg/fault" "github.com/alcionai/corso/src/pkg/fault"
@ -596,10 +598,27 @@ func (c Mail) PostLargeAttachment(
// Serialization // Serialization
// --------------------------------------------------------------------------- // ---------------------------------------------------------------------------
func BytesToMessageable(body []byte) (models.Messageable, error) { func bytesToMessageable(body []byte) (serialization.Parsable, error) {
v, err := CreateFromBytes(body, models.CreateMessageFromDiscriminatorValue) v, err := CreateFromBytes(body, models.CreateMessageFromDiscriminatorValue)
if err != nil { if err != nil {
return nil, clues.Wrap(err, "deserializing bytes to message") if !strings.Contains(err.Error(), invalidJSON) {
return nil, clues.Wrap(err, "deserializing bytes to message")
}
// If the JSON was invalid try sanitizing and deserializing again.
// Sanitizing should transform characters < 0x20 according to the spec where
// possible. The resulting JSON may still be invalid though.
body = sanitize.JSONBytes(body)
v, err = CreateFromBytes(body, models.CreateMessageFromDiscriminatorValue)
}
return v, clues.Stack(err).OrNil()
}
func BytesToMessageable(body []byte) (models.Messageable, error) {
v, err := bytesToMessageable(body)
if err != nil {
return nil, clues.Stack(err)
} }
return v.(models.Messageable), nil return v.(models.Messageable), nil

View File

@ -159,6 +159,19 @@ func (suite *MailAPIUnitSuite) TestMailInfo() {
} }
} }
// TestBytesToMessagable_InvalidError tests that the error message kiota returns
// for invalid JSON matches what we check for. This helps keep things in sync
// when kiota is updated.
func (suite *MailAPIUnitSuite) TestBytesToMessagable_InvalidError() {
t := suite.T()
input := exchMock.MessageWithSpecialCharacters("m365 mail support test")
_, err := CreateFromBytes(input, models.CreateMessageFromDiscriminatorValue)
require.Error(t, err, clues.ToCore(err))
assert.Contains(t, err.Error(), invalidJSON)
}
func (suite *MailAPIUnitSuite) TestBytesToMessagable() { func (suite *MailAPIUnitSuite) TestBytesToMessagable() {
table := []struct { table := []struct {
name string name string
@ -178,6 +191,20 @@ func (suite *MailAPIUnitSuite) TestBytesToMessagable() {
checkError: assert.NoError, checkError: assert.NoError,
checkObject: assert.NotNil, checkObject: assert.NotNil,
}, },
{
name: "malformed JSON bytes passes sanitization",
byteArray: exchMock.MessageWithSpecialCharacters("m365 mail support test"),
checkError: assert.NoError,
checkObject: assert.NotNil,
},
{
name: "invalid JSON bytes",
byteArray: append(
exchMock.MessageWithSpecialCharacters("m365 mail support test"),
[]byte("}")...),
checkError: assert.Error,
checkObject: assert.Nil,
},
} }
for _, test := range table { for _, test := range table {
suite.Run(test.name, func() { suite.Run(test.name, func() {