Compare commits

...

3 Commits

Author SHA1 Message Date
Ashlie Martinez
2a771dda90 Update test to check sanitized input
Test deserializing and sending the sanitized version of the output from
the original test. Also ensure that the returned body from the sanitized
input is the same as the body from the original input.

We compare only the body instead of the entire serialized content
because the item's ID, mod time, etc will change between the versions.
2023-12-21 16:54:46 -07:00
Ashlie Martinez
67dd227fb5 Simple sanitizer function for JSON
Convert JSON input that has unescaped characters < 0x20 to their escaped
version so they're considered valid again.
2023-12-21 16:54:35 -07:00
Ashlie Martinez
f94efecb23 Test sending and getting emails with special chars
Test how graph and the graph SDK behave when special characters in
various forms are sent and received.
2023-12-21 13:50:47 -07:00
3 changed files with 383 additions and 0 deletions

View File

@ -0,0 +1,48 @@
package sanitize
import (
"bytes"
"fmt"
"golang.org/x/exp/slices"
)
// JSONString takes a []byte containing JSON as input and returns a []byte
// containing the same content but with any character codes < 0x20 that weren't
// escaped in the original escaped properly.
func JSONBytes(input []byte) []byte {
if len(input) == 0 {
return input
}
// Avoid most reallocations by just getting a buffer of the right size to
// start with.
// TODO(ashmrtn): We may actually want to overshoot this a little so we won't
// cause a reallocation and possible doubling in size if we only need to
// escape a few characters.
buf := bytes.Buffer{}
buf.Grow(len(input))
for _, c := range input {
switch {
case c < 0x20:
// Escape character ranges taken from RFC 8259. This case doesn't handle
// escape characters (0x5c) or double quotes (0x22). We're assuming escape
// characters don't require additional processing and that double quotes
// are properly escaped by whatever handed us the JSON.
//
// We need to escape the character and transform it (e.x. linefeed -> \n).
// We could use transforms like linefeed to \n, but it's actually easier,
// if a little less space efficient, to just turn them into
// multi-character sequences denoting a unicode character.
buf.WriteString(fmt.Sprintf(`\u%04X`, c))
default:
buf.WriteByte(c)
}
}
// Return a copy just so we don't hold a reference to internal bytes.Buffer
// data.
return slices.Clone(buf.Bytes())
}

View File

@ -0,0 +1,88 @@
package sanitize_test
import (
"encoding/json"
"fmt"
"testing"
"github.com/stretchr/testify/assert"
"github.com/stretchr/testify/suite"
"github.com/alcionai/corso/src/internal/common/sanitize"
"github.com/alcionai/corso/src/internal/tester"
)
type SanitizeJSONUnitSuite struct {
tester.Suite
}
func TestSanitizeJSONUnitSuite(t *testing.T) {
suite.Run(t, &SanitizeJSONUnitSuite{Suite: tester.NewUnitSuite(t)})
}
type jsonTest struct {
name string
input []byte
expect []byte
expectValid assert.BoolAssertionFunc
}
func generateCharacterTests() []jsonTest {
var (
res []jsonTest
baseTestName = "Escape0x%02X"
baseTestData = `{"foo":"ba%sr"}`
expect = `{"foo":"ba%s\u00%02Xr"}`
)
for i := 0; i < 0x20; i++ {
res = append(
res,
jsonTest{
name: fmt.Sprintf(baseTestName, i),
input: []byte(fmt.Sprintf(baseTestData, string(rune(i)))),
expect: []byte(fmt.Sprintf(expect, "", string(rune(i)))),
expectValid: assert.True,
},
jsonTest{
name: fmt.Sprintf(baseTestName, i) + " WithEscapedEscape",
input: []byte(fmt.Sprintf(baseTestData, `\\`+string(rune(i)))),
expect: []byte(fmt.Sprintf(expect, `\\`, string(rune(i)))),
expectValid: assert.True,
},
)
}
return res
}
func (suite *SanitizeJSONUnitSuite) TestJSONString() {
table := []jsonTest{
{
name: "AlreadyValid NoSpecialCharacters",
input: []byte(`{"foo":"bar"}`),
expect: []byte(`{"foo":"bar"}`),
expectValid: assert.True,
},
{
name: "AlreadyValid SpecialCharacters",
input: []byte(`{"foo":"ba\\r\""}`),
expect: []byte(`{"foo":"ba\\r\""}`),
expectValid: assert.True,
},
}
allTests := append(generateCharacterTests(), table...)
for _, test := range allTests {
suite.Run(test.name, func() {
t := suite.T()
got := sanitize.JSONBytes(test.input)
assert.Equal(t, test.expect, got)
test.expectValid(t, json.Valid(got))
})
}
}

View File

@ -1,6 +1,9 @@
package api package api
import ( import (
"context"
"fmt"
"regexp"
"testing" "testing"
"time" "time"
@ -12,6 +15,7 @@ import (
"github.com/stretchr/testify/suite" "github.com/stretchr/testify/suite"
"github.com/alcionai/corso/src/internal/common/ptr" "github.com/alcionai/corso/src/internal/common/ptr"
"github.com/alcionai/corso/src/internal/common/sanitize"
exchMock "github.com/alcionai/corso/src/internal/m365/service/exchange/mock" exchMock "github.com/alcionai/corso/src/internal/m365/service/exchange/mock"
"github.com/alcionai/corso/src/internal/tester" "github.com/alcionai/corso/src/internal/tester"
"github.com/alcionai/corso/src/internal/tester/tconfig" "github.com/alcionai/corso/src/internal/tester/tconfig"
@ -523,3 +527,246 @@ func (suite *MailAPIIntgSuite) TestMail_GetContainerByName_mocked() {
}) })
} }
} }
func sendItemWithBodyAndGetSerialized(
t *testing.T,
ctx context.Context, //revive:disable-line:context-as-argument
msgs Mail,
userID string,
mailFolderID string,
subject string,
bodyContent string,
contentType models.BodyType,
) []byte {
msg := models.NewMessage()
msg.SetSubject(ptr.To(subject))
body := models.NewItemBody()
body.SetContent(ptr.To(bodyContent))
body.SetContentType(ptr.To(contentType))
msg.SetBody(body)
item, err := msgs.PostItem(ctx, userID, mailFolderID, msg)
require.NoError(t, err, clues.ToCore(err))
fetched, _, err := msgs.GetItem(
ctx,
userID,
ptr.Val(item.GetId()),
false,
fault.New(true))
require.NoError(t, err, clues.ToCore(err))
serialized, err := msgs.Serialize(
ctx,
fetched,
userID,
ptr.Val(item.GetId()))
require.NoError(t, err, clues.ToCore(err))
return serialized
}
func sendSerializedItemAndGetSerialized(
t *testing.T,
ctx context.Context, //revive:disable-line:context-as-argument
msgs Mail,
userID string,
mailFolderID string,
serializedInput []byte,
) []byte {
msg, err := BytesToMessageable(serializedInput)
require.NoError(t, err, clues.ToCore(err))
item, err := msgs.PostItem(ctx, userID, mailFolderID, msg)
require.NoError(t, err, clues.ToCore(err))
fetched, _, err := msgs.GetItem(
ctx,
userID,
ptr.Val(item.GetId()),
false,
fault.New(true))
require.NoError(t, err, clues.ToCore(err))
serialized, err := msgs.Serialize(
ctx,
fetched,
userID,
ptr.Val(item.GetId()))
require.NoError(t, err, clues.ToCore(err))
return serialized
}
func (suite *MailAPIIntgSuite) TestMail_WithSpecialCharacters() {
t := suite.T()
ctx, flush := tester.NewContext(t)
defer flush()
contentRegex := regexp.MustCompile(`"content": ?"(.*?"?)",?`)
userID := tconfig.M365UserID(suite.T())
folderName := testdata.DefaultRestoreConfig("EscapeCharacters").Location
msgs := suite.its.ac.Mail()
mailfolder, err := msgs.CreateContainer(ctx, userID, MsgFolderRoot, folderName)
require.NoError(t, err, clues.ToCore(err))
escapeCharRanges := [][]int{
{0x0, 0x20},
{0x22, 0x23},
{0x5c, 0x5d},
}
for _, charRange := range escapeCharRanges {
for i := charRange[0]; i < charRange[1]; i++ {
subject := fmt.Sprintf("plain text character %x", i)
//suite.Run(subject, func() {
// t := suite.T()
// ctx, flush := tester.NewContext(t)
// defer flush()
bodyContent := string(rune(i))
serialized := sendItemWithBodyAndGetSerialized(
t,
ctx,
msgs,
userID,
ptr.Val(mailfolder.GetId()),
subject,
bodyContent,
models.TEXT_BODYTYPE)
matches := contentRegex.FindAllSubmatch(serialized, -1)
switch {
case len(matches) == 0:
t.Logf("text of 0x%x wasn't found", i)
case len(matches[0]) < 2:
t.Logf("text of 0x%x was removed", i)
case bodyContent != string(matches[0][1]):
t.Logf("text of 0x%x has been transformed to %s", i, matches[0][1])
}
sanitized := sanitize.JSONBytes(serialized)
newSerialized := sendSerializedItemAndGetSerialized(
t,
ctx,
msgs,
userID,
ptr.Val(mailfolder.GetId()),
sanitized)
newMatches := contentRegex.FindAllSubmatch(newSerialized, -1)
switch {
case len(newMatches) == 0:
t.Logf("sanitized text of 0x%x wasn't found", i)
case len(newMatches[0]) < 2:
t.Logf("sanitized text of 0x%x was removed", i)
case bodyContent != string(newMatches[0][1]):
t.Logf(
"sanitized text of 0x%x has been transformed to %s",
i,
newMatches[0][1])
}
assert.Equal(t, matches[0][1], newMatches[0][1])
//})
}
}
testSequences := []string{
// Character code for backspace
"\u0008",
"\\u0008",
"u0008",
// Character code for \
"\u005c",
"\\u005c",
"u005c",
// Character code for "
"\u0022",
"\\u0022",
"u0022",
// Character code for B
"\u0042",
"\\u0042",
"u0042",
"\\n",
"\\\n",
"n" + string(rune(0)),
"n" + string(rune(0)) + "n",
}
for i, sequence := range testSequences {
subject := fmt.Sprintf("plain text sequence %d", i)
//suite.Run(subject, func() {
// t := suite.T()
// ctx, flush := tester.NewContext(t)
// defer flush()
serialized := sendItemWithBodyAndGetSerialized(
t,
ctx,
msgs,
userID,
ptr.Val(mailfolder.GetId()),
subject,
sequence,
models.TEXT_BODYTYPE)
matches := contentRegex.FindAllSubmatch(serialized, -1)
switch {
case len(matches) == 0:
t.Logf("sequence %d wasn't found", i)
case len(matches[0]) < 2:
t.Logf("sequence %d was removed", i)
case sequence != string(matches[0][1]):
t.Logf("sequence %d has been transformed to %s", i, matches[0][1])
}
sanitized := sanitize.JSONBytes(serialized)
newSerialized := sendSerializedItemAndGetSerialized(
t,
ctx,
msgs,
userID,
ptr.Val(mailfolder.GetId()),
sanitized)
newMatches := contentRegex.FindAllSubmatch(newSerialized, -1)
switch {
case len(newMatches) == 0:
t.Logf("sanitized sequence %d wasn't found", i)
case len(newMatches[0]) < 2:
t.Logf("sanitized sequence %d was removed", i)
case sequence != string(newMatches[0][1]):
t.Logf(
"sanitized sequence %d has been transformed to %s",
i,
newMatches[0][1])
}
assert.Equal(t, matches[0][1], newMatches[0][1])
//})
}
}