Compare commits
3 Commits
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
2a771dda90 | ||
|
|
67dd227fb5 | ||
|
|
f94efecb23 |
48
src/internal/common/sanitize/json.go
Normal file
48
src/internal/common/sanitize/json.go
Normal file
@ -0,0 +1,48 @@
|
|||||||
|
package sanitize
|
||||||
|
|
||||||
|
import (
|
||||||
|
"bytes"
|
||||||
|
"fmt"
|
||||||
|
|
||||||
|
"golang.org/x/exp/slices"
|
||||||
|
)
|
||||||
|
|
||||||
|
// JSONString takes a []byte containing JSON as input and returns a []byte
|
||||||
|
// containing the same content but with any character codes < 0x20 that weren't
|
||||||
|
// escaped in the original escaped properly.
|
||||||
|
func JSONBytes(input []byte) []byte {
|
||||||
|
if len(input) == 0 {
|
||||||
|
return input
|
||||||
|
}
|
||||||
|
|
||||||
|
// Avoid most reallocations by just getting a buffer of the right size to
|
||||||
|
// start with.
|
||||||
|
// TODO(ashmrtn): We may actually want to overshoot this a little so we won't
|
||||||
|
// cause a reallocation and possible doubling in size if we only need to
|
||||||
|
// escape a few characters.
|
||||||
|
buf := bytes.Buffer{}
|
||||||
|
buf.Grow(len(input))
|
||||||
|
|
||||||
|
for _, c := range input {
|
||||||
|
switch {
|
||||||
|
case c < 0x20:
|
||||||
|
// Escape character ranges taken from RFC 8259. This case doesn't handle
|
||||||
|
// escape characters (0x5c) or double quotes (0x22). We're assuming escape
|
||||||
|
// characters don't require additional processing and that double quotes
|
||||||
|
// are properly escaped by whatever handed us the JSON.
|
||||||
|
//
|
||||||
|
// We need to escape the character and transform it (e.x. linefeed -> \n).
|
||||||
|
// We could use transforms like linefeed to \n, but it's actually easier,
|
||||||
|
// if a little less space efficient, to just turn them into
|
||||||
|
// multi-character sequences denoting a unicode character.
|
||||||
|
buf.WriteString(fmt.Sprintf(`\u%04X`, c))
|
||||||
|
|
||||||
|
default:
|
||||||
|
buf.WriteByte(c)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Return a copy just so we don't hold a reference to internal bytes.Buffer
|
||||||
|
// data.
|
||||||
|
return slices.Clone(buf.Bytes())
|
||||||
|
}
|
||||||
88
src/internal/common/sanitize/json_test.go
Normal file
88
src/internal/common/sanitize/json_test.go
Normal file
@ -0,0 +1,88 @@
|
|||||||
|
package sanitize_test
|
||||||
|
|
||||||
|
import (
|
||||||
|
"encoding/json"
|
||||||
|
"fmt"
|
||||||
|
"testing"
|
||||||
|
|
||||||
|
"github.com/stretchr/testify/assert"
|
||||||
|
"github.com/stretchr/testify/suite"
|
||||||
|
|
||||||
|
"github.com/alcionai/corso/src/internal/common/sanitize"
|
||||||
|
"github.com/alcionai/corso/src/internal/tester"
|
||||||
|
)
|
||||||
|
|
||||||
|
type SanitizeJSONUnitSuite struct {
|
||||||
|
tester.Suite
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestSanitizeJSONUnitSuite(t *testing.T) {
|
||||||
|
suite.Run(t, &SanitizeJSONUnitSuite{Suite: tester.NewUnitSuite(t)})
|
||||||
|
}
|
||||||
|
|
||||||
|
type jsonTest struct {
|
||||||
|
name string
|
||||||
|
input []byte
|
||||||
|
expect []byte
|
||||||
|
expectValid assert.BoolAssertionFunc
|
||||||
|
}
|
||||||
|
|
||||||
|
func generateCharacterTests() []jsonTest {
|
||||||
|
var (
|
||||||
|
res []jsonTest
|
||||||
|
|
||||||
|
baseTestName = "Escape0x%02X"
|
||||||
|
baseTestData = `{"foo":"ba%sr"}`
|
||||||
|
expect = `{"foo":"ba%s\u00%02Xr"}`
|
||||||
|
)
|
||||||
|
|
||||||
|
for i := 0; i < 0x20; i++ {
|
||||||
|
res = append(
|
||||||
|
res,
|
||||||
|
jsonTest{
|
||||||
|
name: fmt.Sprintf(baseTestName, i),
|
||||||
|
input: []byte(fmt.Sprintf(baseTestData, string(rune(i)))),
|
||||||
|
expect: []byte(fmt.Sprintf(expect, "", string(rune(i)))),
|
||||||
|
expectValid: assert.True,
|
||||||
|
},
|
||||||
|
jsonTest{
|
||||||
|
name: fmt.Sprintf(baseTestName, i) + " WithEscapedEscape",
|
||||||
|
input: []byte(fmt.Sprintf(baseTestData, `\\`+string(rune(i)))),
|
||||||
|
expect: []byte(fmt.Sprintf(expect, `\\`, string(rune(i)))),
|
||||||
|
expectValid: assert.True,
|
||||||
|
},
|
||||||
|
)
|
||||||
|
}
|
||||||
|
|
||||||
|
return res
|
||||||
|
}
|
||||||
|
|
||||||
|
func (suite *SanitizeJSONUnitSuite) TestJSONString() {
|
||||||
|
table := []jsonTest{
|
||||||
|
{
|
||||||
|
name: "AlreadyValid NoSpecialCharacters",
|
||||||
|
input: []byte(`{"foo":"bar"}`),
|
||||||
|
expect: []byte(`{"foo":"bar"}`),
|
||||||
|
expectValid: assert.True,
|
||||||
|
},
|
||||||
|
{
|
||||||
|
name: "AlreadyValid SpecialCharacters",
|
||||||
|
input: []byte(`{"foo":"ba\\r\""}`),
|
||||||
|
expect: []byte(`{"foo":"ba\\r\""}`),
|
||||||
|
expectValid: assert.True,
|
||||||
|
},
|
||||||
|
}
|
||||||
|
|
||||||
|
allTests := append(generateCharacterTests(), table...)
|
||||||
|
|
||||||
|
for _, test := range allTests {
|
||||||
|
suite.Run(test.name, func() {
|
||||||
|
t := suite.T()
|
||||||
|
|
||||||
|
got := sanitize.JSONBytes(test.input)
|
||||||
|
|
||||||
|
assert.Equal(t, test.expect, got)
|
||||||
|
test.expectValid(t, json.Valid(got))
|
||||||
|
})
|
||||||
|
}
|
||||||
|
}
|
||||||
@ -1,6 +1,9 @@
|
|||||||
package api
|
package api
|
||||||
|
|
||||||
import (
|
import (
|
||||||
|
"context"
|
||||||
|
"fmt"
|
||||||
|
"regexp"
|
||||||
"testing"
|
"testing"
|
||||||
"time"
|
"time"
|
||||||
|
|
||||||
@ -12,6 +15,7 @@ import (
|
|||||||
"github.com/stretchr/testify/suite"
|
"github.com/stretchr/testify/suite"
|
||||||
|
|
||||||
"github.com/alcionai/corso/src/internal/common/ptr"
|
"github.com/alcionai/corso/src/internal/common/ptr"
|
||||||
|
"github.com/alcionai/corso/src/internal/common/sanitize"
|
||||||
exchMock "github.com/alcionai/corso/src/internal/m365/service/exchange/mock"
|
exchMock "github.com/alcionai/corso/src/internal/m365/service/exchange/mock"
|
||||||
"github.com/alcionai/corso/src/internal/tester"
|
"github.com/alcionai/corso/src/internal/tester"
|
||||||
"github.com/alcionai/corso/src/internal/tester/tconfig"
|
"github.com/alcionai/corso/src/internal/tester/tconfig"
|
||||||
@ -523,3 +527,246 @@ func (suite *MailAPIIntgSuite) TestMail_GetContainerByName_mocked() {
|
|||||||
})
|
})
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func sendItemWithBodyAndGetSerialized(
|
||||||
|
t *testing.T,
|
||||||
|
ctx context.Context, //revive:disable-line:context-as-argument
|
||||||
|
msgs Mail,
|
||||||
|
userID string,
|
||||||
|
mailFolderID string,
|
||||||
|
subject string,
|
||||||
|
bodyContent string,
|
||||||
|
contentType models.BodyType,
|
||||||
|
) []byte {
|
||||||
|
msg := models.NewMessage()
|
||||||
|
msg.SetSubject(ptr.To(subject))
|
||||||
|
|
||||||
|
body := models.NewItemBody()
|
||||||
|
body.SetContent(ptr.To(bodyContent))
|
||||||
|
body.SetContentType(ptr.To(contentType))
|
||||||
|
|
||||||
|
msg.SetBody(body)
|
||||||
|
|
||||||
|
item, err := msgs.PostItem(ctx, userID, mailFolderID, msg)
|
||||||
|
require.NoError(t, err, clues.ToCore(err))
|
||||||
|
|
||||||
|
fetched, _, err := msgs.GetItem(
|
||||||
|
ctx,
|
||||||
|
userID,
|
||||||
|
ptr.Val(item.GetId()),
|
||||||
|
false,
|
||||||
|
fault.New(true))
|
||||||
|
require.NoError(t, err, clues.ToCore(err))
|
||||||
|
|
||||||
|
serialized, err := msgs.Serialize(
|
||||||
|
ctx,
|
||||||
|
fetched,
|
||||||
|
userID,
|
||||||
|
ptr.Val(item.GetId()))
|
||||||
|
require.NoError(t, err, clues.ToCore(err))
|
||||||
|
|
||||||
|
return serialized
|
||||||
|
}
|
||||||
|
|
||||||
|
func sendSerializedItemAndGetSerialized(
|
||||||
|
t *testing.T,
|
||||||
|
ctx context.Context, //revive:disable-line:context-as-argument
|
||||||
|
msgs Mail,
|
||||||
|
userID string,
|
||||||
|
mailFolderID string,
|
||||||
|
serializedInput []byte,
|
||||||
|
) []byte {
|
||||||
|
msg, err := BytesToMessageable(serializedInput)
|
||||||
|
require.NoError(t, err, clues.ToCore(err))
|
||||||
|
|
||||||
|
item, err := msgs.PostItem(ctx, userID, mailFolderID, msg)
|
||||||
|
require.NoError(t, err, clues.ToCore(err))
|
||||||
|
|
||||||
|
fetched, _, err := msgs.GetItem(
|
||||||
|
ctx,
|
||||||
|
userID,
|
||||||
|
ptr.Val(item.GetId()),
|
||||||
|
false,
|
||||||
|
fault.New(true))
|
||||||
|
require.NoError(t, err, clues.ToCore(err))
|
||||||
|
|
||||||
|
serialized, err := msgs.Serialize(
|
||||||
|
ctx,
|
||||||
|
fetched,
|
||||||
|
userID,
|
||||||
|
ptr.Val(item.GetId()))
|
||||||
|
require.NoError(t, err, clues.ToCore(err))
|
||||||
|
|
||||||
|
return serialized
|
||||||
|
}
|
||||||
|
|
||||||
|
func (suite *MailAPIIntgSuite) TestMail_WithSpecialCharacters() {
|
||||||
|
t := suite.T()
|
||||||
|
|
||||||
|
ctx, flush := tester.NewContext(t)
|
||||||
|
defer flush()
|
||||||
|
|
||||||
|
contentRegex := regexp.MustCompile(`"content": ?"(.*?"?)",?`)
|
||||||
|
|
||||||
|
userID := tconfig.M365UserID(suite.T())
|
||||||
|
|
||||||
|
folderName := testdata.DefaultRestoreConfig("EscapeCharacters").Location
|
||||||
|
msgs := suite.its.ac.Mail()
|
||||||
|
mailfolder, err := msgs.CreateContainer(ctx, userID, MsgFolderRoot, folderName)
|
||||||
|
require.NoError(t, err, clues.ToCore(err))
|
||||||
|
|
||||||
|
escapeCharRanges := [][]int{
|
||||||
|
{0x0, 0x20},
|
||||||
|
{0x22, 0x23},
|
||||||
|
{0x5c, 0x5d},
|
||||||
|
}
|
||||||
|
|
||||||
|
for _, charRange := range escapeCharRanges {
|
||||||
|
for i := charRange[0]; i < charRange[1]; i++ {
|
||||||
|
subject := fmt.Sprintf("plain text character %x", i)
|
||||||
|
|
||||||
|
//suite.Run(subject, func() {
|
||||||
|
// t := suite.T()
|
||||||
|
|
||||||
|
// ctx, flush := tester.NewContext(t)
|
||||||
|
// defer flush()
|
||||||
|
|
||||||
|
bodyContent := string(rune(i))
|
||||||
|
|
||||||
|
serialized := sendItemWithBodyAndGetSerialized(
|
||||||
|
t,
|
||||||
|
ctx,
|
||||||
|
msgs,
|
||||||
|
userID,
|
||||||
|
ptr.Val(mailfolder.GetId()),
|
||||||
|
subject,
|
||||||
|
bodyContent,
|
||||||
|
models.TEXT_BODYTYPE)
|
||||||
|
|
||||||
|
matches := contentRegex.FindAllSubmatch(serialized, -1)
|
||||||
|
|
||||||
|
switch {
|
||||||
|
case len(matches) == 0:
|
||||||
|
t.Logf("text of 0x%x wasn't found", i)
|
||||||
|
|
||||||
|
case len(matches[0]) < 2:
|
||||||
|
t.Logf("text of 0x%x was removed", i)
|
||||||
|
|
||||||
|
case bodyContent != string(matches[0][1]):
|
||||||
|
t.Logf("text of 0x%x has been transformed to %s", i, matches[0][1])
|
||||||
|
}
|
||||||
|
|
||||||
|
sanitized := sanitize.JSONBytes(serialized)
|
||||||
|
newSerialized := sendSerializedItemAndGetSerialized(
|
||||||
|
t,
|
||||||
|
ctx,
|
||||||
|
msgs,
|
||||||
|
userID,
|
||||||
|
ptr.Val(mailfolder.GetId()),
|
||||||
|
sanitized)
|
||||||
|
|
||||||
|
newMatches := contentRegex.FindAllSubmatch(newSerialized, -1)
|
||||||
|
|
||||||
|
switch {
|
||||||
|
case len(newMatches) == 0:
|
||||||
|
t.Logf("sanitized text of 0x%x wasn't found", i)
|
||||||
|
|
||||||
|
case len(newMatches[0]) < 2:
|
||||||
|
t.Logf("sanitized text of 0x%x was removed", i)
|
||||||
|
|
||||||
|
case bodyContent != string(newMatches[0][1]):
|
||||||
|
t.Logf(
|
||||||
|
"sanitized text of 0x%x has been transformed to %s",
|
||||||
|
i,
|
||||||
|
newMatches[0][1])
|
||||||
|
}
|
||||||
|
|
||||||
|
assert.Equal(t, matches[0][1], newMatches[0][1])
|
||||||
|
//})
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
testSequences := []string{
|
||||||
|
// Character code for backspace
|
||||||
|
"\u0008",
|
||||||
|
"\\u0008",
|
||||||
|
"u0008",
|
||||||
|
// Character code for \
|
||||||
|
"\u005c",
|
||||||
|
"\\u005c",
|
||||||
|
"u005c",
|
||||||
|
// Character code for "
|
||||||
|
"\u0022",
|
||||||
|
"\\u0022",
|
||||||
|
"u0022",
|
||||||
|
// Character code for B
|
||||||
|
"\u0042",
|
||||||
|
"\\u0042",
|
||||||
|
"u0042",
|
||||||
|
"\\n",
|
||||||
|
"\\\n",
|
||||||
|
"n" + string(rune(0)),
|
||||||
|
"n" + string(rune(0)) + "n",
|
||||||
|
}
|
||||||
|
|
||||||
|
for i, sequence := range testSequences {
|
||||||
|
subject := fmt.Sprintf("plain text sequence %d", i)
|
||||||
|
|
||||||
|
//suite.Run(subject, func() {
|
||||||
|
// t := suite.T()
|
||||||
|
|
||||||
|
// ctx, flush := tester.NewContext(t)
|
||||||
|
// defer flush()
|
||||||
|
|
||||||
|
serialized := sendItemWithBodyAndGetSerialized(
|
||||||
|
t,
|
||||||
|
ctx,
|
||||||
|
msgs,
|
||||||
|
userID,
|
||||||
|
ptr.Val(mailfolder.GetId()),
|
||||||
|
subject,
|
||||||
|
sequence,
|
||||||
|
models.TEXT_BODYTYPE)
|
||||||
|
|
||||||
|
matches := contentRegex.FindAllSubmatch(serialized, -1)
|
||||||
|
|
||||||
|
switch {
|
||||||
|
case len(matches) == 0:
|
||||||
|
t.Logf("sequence %d wasn't found", i)
|
||||||
|
|
||||||
|
case len(matches[0]) < 2:
|
||||||
|
t.Logf("sequence %d was removed", i)
|
||||||
|
|
||||||
|
case sequence != string(matches[0][1]):
|
||||||
|
t.Logf("sequence %d has been transformed to %s", i, matches[0][1])
|
||||||
|
}
|
||||||
|
|
||||||
|
sanitized := sanitize.JSONBytes(serialized)
|
||||||
|
newSerialized := sendSerializedItemAndGetSerialized(
|
||||||
|
t,
|
||||||
|
ctx,
|
||||||
|
msgs,
|
||||||
|
userID,
|
||||||
|
ptr.Val(mailfolder.GetId()),
|
||||||
|
sanitized)
|
||||||
|
|
||||||
|
newMatches := contentRegex.FindAllSubmatch(newSerialized, -1)
|
||||||
|
|
||||||
|
switch {
|
||||||
|
case len(newMatches) == 0:
|
||||||
|
t.Logf("sanitized sequence %d wasn't found", i)
|
||||||
|
|
||||||
|
case len(newMatches[0]) < 2:
|
||||||
|
t.Logf("sanitized sequence %d was removed", i)
|
||||||
|
|
||||||
|
case sequence != string(newMatches[0][1]):
|
||||||
|
t.Logf(
|
||||||
|
"sanitized sequence %d has been transformed to %s",
|
||||||
|
i,
|
||||||
|
newMatches[0][1])
|
||||||
|
}
|
||||||
|
|
||||||
|
assert.Equal(t, matches[0][1], newMatches[0][1])
|
||||||
|
//})
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user