Compare commits
3 Commits
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
2a771dda90 | ||
|
|
67dd227fb5 | ||
|
|
f94efecb23 |
48
src/internal/common/sanitize/json.go
Normal file
48
src/internal/common/sanitize/json.go
Normal file
@ -0,0 +1,48 @@
|
||||
package sanitize
|
||||
|
||||
import (
|
||||
"bytes"
|
||||
"fmt"
|
||||
|
||||
"golang.org/x/exp/slices"
|
||||
)
|
||||
|
||||
// JSONString takes a []byte containing JSON as input and returns a []byte
|
||||
// containing the same content but with any character codes < 0x20 that weren't
|
||||
// escaped in the original escaped properly.
|
||||
func JSONBytes(input []byte) []byte {
|
||||
if len(input) == 0 {
|
||||
return input
|
||||
}
|
||||
|
||||
// Avoid most reallocations by just getting a buffer of the right size to
|
||||
// start with.
|
||||
// TODO(ashmrtn): We may actually want to overshoot this a little so we won't
|
||||
// cause a reallocation and possible doubling in size if we only need to
|
||||
// escape a few characters.
|
||||
buf := bytes.Buffer{}
|
||||
buf.Grow(len(input))
|
||||
|
||||
for _, c := range input {
|
||||
switch {
|
||||
case c < 0x20:
|
||||
// Escape character ranges taken from RFC 8259. This case doesn't handle
|
||||
// escape characters (0x5c) or double quotes (0x22). We're assuming escape
|
||||
// characters don't require additional processing and that double quotes
|
||||
// are properly escaped by whatever handed us the JSON.
|
||||
//
|
||||
// We need to escape the character and transform it (e.x. linefeed -> \n).
|
||||
// We could use transforms like linefeed to \n, but it's actually easier,
|
||||
// if a little less space efficient, to just turn them into
|
||||
// multi-character sequences denoting a unicode character.
|
||||
buf.WriteString(fmt.Sprintf(`\u%04X`, c))
|
||||
|
||||
default:
|
||||
buf.WriteByte(c)
|
||||
}
|
||||
}
|
||||
|
||||
// Return a copy just so we don't hold a reference to internal bytes.Buffer
|
||||
// data.
|
||||
return slices.Clone(buf.Bytes())
|
||||
}
|
||||
88
src/internal/common/sanitize/json_test.go
Normal file
88
src/internal/common/sanitize/json_test.go
Normal file
@ -0,0 +1,88 @@
|
||||
package sanitize_test
|
||||
|
||||
import (
|
||||
"encoding/json"
|
||||
"fmt"
|
||||
"testing"
|
||||
|
||||
"github.com/stretchr/testify/assert"
|
||||
"github.com/stretchr/testify/suite"
|
||||
|
||||
"github.com/alcionai/corso/src/internal/common/sanitize"
|
||||
"github.com/alcionai/corso/src/internal/tester"
|
||||
)
|
||||
|
||||
type SanitizeJSONUnitSuite struct {
|
||||
tester.Suite
|
||||
}
|
||||
|
||||
func TestSanitizeJSONUnitSuite(t *testing.T) {
|
||||
suite.Run(t, &SanitizeJSONUnitSuite{Suite: tester.NewUnitSuite(t)})
|
||||
}
|
||||
|
||||
type jsonTest struct {
|
||||
name string
|
||||
input []byte
|
||||
expect []byte
|
||||
expectValid assert.BoolAssertionFunc
|
||||
}
|
||||
|
||||
func generateCharacterTests() []jsonTest {
|
||||
var (
|
||||
res []jsonTest
|
||||
|
||||
baseTestName = "Escape0x%02X"
|
||||
baseTestData = `{"foo":"ba%sr"}`
|
||||
expect = `{"foo":"ba%s\u00%02Xr"}`
|
||||
)
|
||||
|
||||
for i := 0; i < 0x20; i++ {
|
||||
res = append(
|
||||
res,
|
||||
jsonTest{
|
||||
name: fmt.Sprintf(baseTestName, i),
|
||||
input: []byte(fmt.Sprintf(baseTestData, string(rune(i)))),
|
||||
expect: []byte(fmt.Sprintf(expect, "", string(rune(i)))),
|
||||
expectValid: assert.True,
|
||||
},
|
||||
jsonTest{
|
||||
name: fmt.Sprintf(baseTestName, i) + " WithEscapedEscape",
|
||||
input: []byte(fmt.Sprintf(baseTestData, `\\`+string(rune(i)))),
|
||||
expect: []byte(fmt.Sprintf(expect, `\\`, string(rune(i)))),
|
||||
expectValid: assert.True,
|
||||
},
|
||||
)
|
||||
}
|
||||
|
||||
return res
|
||||
}
|
||||
|
||||
func (suite *SanitizeJSONUnitSuite) TestJSONString() {
|
||||
table := []jsonTest{
|
||||
{
|
||||
name: "AlreadyValid NoSpecialCharacters",
|
||||
input: []byte(`{"foo":"bar"}`),
|
||||
expect: []byte(`{"foo":"bar"}`),
|
||||
expectValid: assert.True,
|
||||
},
|
||||
{
|
||||
name: "AlreadyValid SpecialCharacters",
|
||||
input: []byte(`{"foo":"ba\\r\""}`),
|
||||
expect: []byte(`{"foo":"ba\\r\""}`),
|
||||
expectValid: assert.True,
|
||||
},
|
||||
}
|
||||
|
||||
allTests := append(generateCharacterTests(), table...)
|
||||
|
||||
for _, test := range allTests {
|
||||
suite.Run(test.name, func() {
|
||||
t := suite.T()
|
||||
|
||||
got := sanitize.JSONBytes(test.input)
|
||||
|
||||
assert.Equal(t, test.expect, got)
|
||||
test.expectValid(t, json.Valid(got))
|
||||
})
|
||||
}
|
||||
}
|
||||
@ -1,6 +1,9 @@
|
||||
package api
|
||||
|
||||
import (
|
||||
"context"
|
||||
"fmt"
|
||||
"regexp"
|
||||
"testing"
|
||||
"time"
|
||||
|
||||
@ -12,6 +15,7 @@ import (
|
||||
"github.com/stretchr/testify/suite"
|
||||
|
||||
"github.com/alcionai/corso/src/internal/common/ptr"
|
||||
"github.com/alcionai/corso/src/internal/common/sanitize"
|
||||
exchMock "github.com/alcionai/corso/src/internal/m365/service/exchange/mock"
|
||||
"github.com/alcionai/corso/src/internal/tester"
|
||||
"github.com/alcionai/corso/src/internal/tester/tconfig"
|
||||
@ -523,3 +527,246 @@ func (suite *MailAPIIntgSuite) TestMail_GetContainerByName_mocked() {
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
func sendItemWithBodyAndGetSerialized(
|
||||
t *testing.T,
|
||||
ctx context.Context, //revive:disable-line:context-as-argument
|
||||
msgs Mail,
|
||||
userID string,
|
||||
mailFolderID string,
|
||||
subject string,
|
||||
bodyContent string,
|
||||
contentType models.BodyType,
|
||||
) []byte {
|
||||
msg := models.NewMessage()
|
||||
msg.SetSubject(ptr.To(subject))
|
||||
|
||||
body := models.NewItemBody()
|
||||
body.SetContent(ptr.To(bodyContent))
|
||||
body.SetContentType(ptr.To(contentType))
|
||||
|
||||
msg.SetBody(body)
|
||||
|
||||
item, err := msgs.PostItem(ctx, userID, mailFolderID, msg)
|
||||
require.NoError(t, err, clues.ToCore(err))
|
||||
|
||||
fetched, _, err := msgs.GetItem(
|
||||
ctx,
|
||||
userID,
|
||||
ptr.Val(item.GetId()),
|
||||
false,
|
||||
fault.New(true))
|
||||
require.NoError(t, err, clues.ToCore(err))
|
||||
|
||||
serialized, err := msgs.Serialize(
|
||||
ctx,
|
||||
fetched,
|
||||
userID,
|
||||
ptr.Val(item.GetId()))
|
||||
require.NoError(t, err, clues.ToCore(err))
|
||||
|
||||
return serialized
|
||||
}
|
||||
|
||||
func sendSerializedItemAndGetSerialized(
|
||||
t *testing.T,
|
||||
ctx context.Context, //revive:disable-line:context-as-argument
|
||||
msgs Mail,
|
||||
userID string,
|
||||
mailFolderID string,
|
||||
serializedInput []byte,
|
||||
) []byte {
|
||||
msg, err := BytesToMessageable(serializedInput)
|
||||
require.NoError(t, err, clues.ToCore(err))
|
||||
|
||||
item, err := msgs.PostItem(ctx, userID, mailFolderID, msg)
|
||||
require.NoError(t, err, clues.ToCore(err))
|
||||
|
||||
fetched, _, err := msgs.GetItem(
|
||||
ctx,
|
||||
userID,
|
||||
ptr.Val(item.GetId()),
|
||||
false,
|
||||
fault.New(true))
|
||||
require.NoError(t, err, clues.ToCore(err))
|
||||
|
||||
serialized, err := msgs.Serialize(
|
||||
ctx,
|
||||
fetched,
|
||||
userID,
|
||||
ptr.Val(item.GetId()))
|
||||
require.NoError(t, err, clues.ToCore(err))
|
||||
|
||||
return serialized
|
||||
}
|
||||
|
||||
func (suite *MailAPIIntgSuite) TestMail_WithSpecialCharacters() {
|
||||
t := suite.T()
|
||||
|
||||
ctx, flush := tester.NewContext(t)
|
||||
defer flush()
|
||||
|
||||
contentRegex := regexp.MustCompile(`"content": ?"(.*?"?)",?`)
|
||||
|
||||
userID := tconfig.M365UserID(suite.T())
|
||||
|
||||
folderName := testdata.DefaultRestoreConfig("EscapeCharacters").Location
|
||||
msgs := suite.its.ac.Mail()
|
||||
mailfolder, err := msgs.CreateContainer(ctx, userID, MsgFolderRoot, folderName)
|
||||
require.NoError(t, err, clues.ToCore(err))
|
||||
|
||||
escapeCharRanges := [][]int{
|
||||
{0x0, 0x20},
|
||||
{0x22, 0x23},
|
||||
{0x5c, 0x5d},
|
||||
}
|
||||
|
||||
for _, charRange := range escapeCharRanges {
|
||||
for i := charRange[0]; i < charRange[1]; i++ {
|
||||
subject := fmt.Sprintf("plain text character %x", i)
|
||||
|
||||
//suite.Run(subject, func() {
|
||||
// t := suite.T()
|
||||
|
||||
// ctx, flush := tester.NewContext(t)
|
||||
// defer flush()
|
||||
|
||||
bodyContent := string(rune(i))
|
||||
|
||||
serialized := sendItemWithBodyAndGetSerialized(
|
||||
t,
|
||||
ctx,
|
||||
msgs,
|
||||
userID,
|
||||
ptr.Val(mailfolder.GetId()),
|
||||
subject,
|
||||
bodyContent,
|
||||
models.TEXT_BODYTYPE)
|
||||
|
||||
matches := contentRegex.FindAllSubmatch(serialized, -1)
|
||||
|
||||
switch {
|
||||
case len(matches) == 0:
|
||||
t.Logf("text of 0x%x wasn't found", i)
|
||||
|
||||
case len(matches[0]) < 2:
|
||||
t.Logf("text of 0x%x was removed", i)
|
||||
|
||||
case bodyContent != string(matches[0][1]):
|
||||
t.Logf("text of 0x%x has been transformed to %s", i, matches[0][1])
|
||||
}
|
||||
|
||||
sanitized := sanitize.JSONBytes(serialized)
|
||||
newSerialized := sendSerializedItemAndGetSerialized(
|
||||
t,
|
||||
ctx,
|
||||
msgs,
|
||||
userID,
|
||||
ptr.Val(mailfolder.GetId()),
|
||||
sanitized)
|
||||
|
||||
newMatches := contentRegex.FindAllSubmatch(newSerialized, -1)
|
||||
|
||||
switch {
|
||||
case len(newMatches) == 0:
|
||||
t.Logf("sanitized text of 0x%x wasn't found", i)
|
||||
|
||||
case len(newMatches[0]) < 2:
|
||||
t.Logf("sanitized text of 0x%x was removed", i)
|
||||
|
||||
case bodyContent != string(newMatches[0][1]):
|
||||
t.Logf(
|
||||
"sanitized text of 0x%x has been transformed to %s",
|
||||
i,
|
||||
newMatches[0][1])
|
||||
}
|
||||
|
||||
assert.Equal(t, matches[0][1], newMatches[0][1])
|
||||
//})
|
||||
}
|
||||
}
|
||||
|
||||
testSequences := []string{
|
||||
// Character code for backspace
|
||||
"\u0008",
|
||||
"\\u0008",
|
||||
"u0008",
|
||||
// Character code for \
|
||||
"\u005c",
|
||||
"\\u005c",
|
||||
"u005c",
|
||||
// Character code for "
|
||||
"\u0022",
|
||||
"\\u0022",
|
||||
"u0022",
|
||||
// Character code for B
|
||||
"\u0042",
|
||||
"\\u0042",
|
||||
"u0042",
|
||||
"\\n",
|
||||
"\\\n",
|
||||
"n" + string(rune(0)),
|
||||
"n" + string(rune(0)) + "n",
|
||||
}
|
||||
|
||||
for i, sequence := range testSequences {
|
||||
subject := fmt.Sprintf("plain text sequence %d", i)
|
||||
|
||||
//suite.Run(subject, func() {
|
||||
// t := suite.T()
|
||||
|
||||
// ctx, flush := tester.NewContext(t)
|
||||
// defer flush()
|
||||
|
||||
serialized := sendItemWithBodyAndGetSerialized(
|
||||
t,
|
||||
ctx,
|
||||
msgs,
|
||||
userID,
|
||||
ptr.Val(mailfolder.GetId()),
|
||||
subject,
|
||||
sequence,
|
||||
models.TEXT_BODYTYPE)
|
||||
|
||||
matches := contentRegex.FindAllSubmatch(serialized, -1)
|
||||
|
||||
switch {
|
||||
case len(matches) == 0:
|
||||
t.Logf("sequence %d wasn't found", i)
|
||||
|
||||
case len(matches[0]) < 2:
|
||||
t.Logf("sequence %d was removed", i)
|
||||
|
||||
case sequence != string(matches[0][1]):
|
||||
t.Logf("sequence %d has been transformed to %s", i, matches[0][1])
|
||||
}
|
||||
|
||||
sanitized := sanitize.JSONBytes(serialized)
|
||||
newSerialized := sendSerializedItemAndGetSerialized(
|
||||
t,
|
||||
ctx,
|
||||
msgs,
|
||||
userID,
|
||||
ptr.Val(mailfolder.GetId()),
|
||||
sanitized)
|
||||
|
||||
newMatches := contentRegex.FindAllSubmatch(newSerialized, -1)
|
||||
|
||||
switch {
|
||||
case len(newMatches) == 0:
|
||||
t.Logf("sanitized sequence %d wasn't found", i)
|
||||
|
||||
case len(newMatches[0]) < 2:
|
||||
t.Logf("sanitized sequence %d was removed", i)
|
||||
|
||||
case sequence != string(newMatches[0][1]):
|
||||
t.Logf(
|
||||
"sanitized sequence %d has been transformed to %s",
|
||||
i,
|
||||
newMatches[0][1])
|
||||
}
|
||||
|
||||
assert.Equal(t, matches[0][1], newMatches[0][1])
|
||||
//})
|
||||
}
|
||||
}
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user