diff --git a/src/internal/common/sanitize/json.go b/src/internal/common/sanitize/json.go new file mode 100644 index 000000000..04c26b2b1 --- /dev/null +++ b/src/internal/common/sanitize/json.go @@ -0,0 +1,48 @@ +package sanitize + +import ( + "bytes" + "fmt" + + "golang.org/x/exp/slices" +) + +// JSONString takes a []byte containing JSON as input and returns a []byte +// containing the same content but with any character codes < 0x20 that weren't +// escaped in the original escaped properly. +func JSONBytes(input []byte) []byte { + if len(input) == 0 { + return input + } + + // Avoid most reallocations by just getting a buffer of the right size to + // start with. + // TODO(ashmrtn): We may actually want to overshoot this a little so we won't + // cause a reallocation and possible doubling in size if we only need to + // escape a few characters. + buf := bytes.Buffer{} + buf.Grow(len(input)) + + for _, c := range input { + switch { + case c < 0x20: + // Escape character ranges taken from RFC 8259. This case doesn't handle + // escape characters (0x5c) or double quotes (0x22). We're assuming escape + // characters don't require additional processing and that double quotes + // are properly escaped by whatever handed us the JSON. + // + // We need to escape the character and transform it (e.x. linefeed -> \n). + // We could use transforms like linefeed to \n, but it's actually easier, + // if a little less space efficient, to just turn them into + // multi-character sequences denoting a unicode character. + buf.WriteString(fmt.Sprintf(`\u%04X`, c)) + + default: + buf.WriteByte(c) + } + } + + // Return a copy just so we don't hold a reference to internal bytes.Buffer + // data. + return slices.Clone(buf.Bytes()) +} diff --git a/src/internal/common/sanitize/json_test.go b/src/internal/common/sanitize/json_test.go new file mode 100644 index 000000000..d2e3dd8a8 --- /dev/null +++ b/src/internal/common/sanitize/json_test.go @@ -0,0 +1,88 @@ +package sanitize_test + +import ( + "encoding/json" + "fmt" + "testing" + + "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/suite" + + "github.com/alcionai/corso/src/internal/common/sanitize" + "github.com/alcionai/corso/src/internal/tester" +) + +type SanitizeJSONUnitSuite struct { + tester.Suite +} + +func TestSanitizeJSONUnitSuite(t *testing.T) { + suite.Run(t, &SanitizeJSONUnitSuite{Suite: tester.NewUnitSuite(t)}) +} + +type jsonTest struct { + name string + input []byte + expect []byte + expectValid assert.BoolAssertionFunc +} + +func generateCharacterTests() []jsonTest { + var ( + res []jsonTest + + baseTestName = "Escape0x%02X" + baseTestData = `{"foo":"ba%sr"}` + expect = `{"foo":"ba%s\u00%02Xr"}` + ) + + for i := 0; i < 0x20; i++ { + res = append( + res, + jsonTest{ + name: fmt.Sprintf(baseTestName, i), + input: []byte(fmt.Sprintf(baseTestData, string(rune(i)))), + expect: []byte(fmt.Sprintf(expect, "", string(rune(i)))), + expectValid: assert.True, + }, + jsonTest{ + name: fmt.Sprintf(baseTestName, i) + " WithEscapedEscape", + input: []byte(fmt.Sprintf(baseTestData, `\\`+string(rune(i)))), + expect: []byte(fmt.Sprintf(expect, `\\`, string(rune(i)))), + expectValid: assert.True, + }, + ) + } + + return res +} + +func (suite *SanitizeJSONUnitSuite) TestJSONString() { + table := []jsonTest{ + { + name: "AlreadyValid NoSpecialCharacters", + input: []byte(`{"foo":"bar"}`), + expect: []byte(`{"foo":"bar"}`), + expectValid: assert.True, + }, + { + name: "AlreadyValid SpecialCharacters", + input: []byte(`{"foo":"ba\\r\""}`), + expect: []byte(`{"foo":"ba\\r\""}`), + expectValid: assert.True, + }, + } + + allTests := append(generateCharacterTests(), table...) + + for _, test := range allTests { + suite.Run(test.name, func() { + t := suite.T() + + got := sanitize.JSONBytes(test.input) + + assert.Equal(t, test.expect, got) + test.expectValid(t, json.Valid(got)) + }) + } +}