Simple sanitizer function for JSON

Convert JSON input that has unescaped characters < 0x20 to their escaped
version so they're considered valid again.
This commit is contained in:
Ashlie Martinez 2023-12-21 16:52:51 -07:00
parent f94efecb23
commit 67dd227fb5
2 changed files with 136 additions and 0 deletions

View File

@ -0,0 +1,48 @@
package sanitize
import (
"bytes"
"fmt"
"golang.org/x/exp/slices"
)
// JSONString takes a []byte containing JSON as input and returns a []byte
// containing the same content but with any character codes < 0x20 that weren't
// escaped in the original escaped properly.
func JSONBytes(input []byte) []byte {
if len(input) == 0 {
return input
}
// Avoid most reallocations by just getting a buffer of the right size to
// start with.
// TODO(ashmrtn): We may actually want to overshoot this a little so we won't
// cause a reallocation and possible doubling in size if we only need to
// escape a few characters.
buf := bytes.Buffer{}
buf.Grow(len(input))
for _, c := range input {
switch {
case c < 0x20:
// Escape character ranges taken from RFC 8259. This case doesn't handle
// escape characters (0x5c) or double quotes (0x22). We're assuming escape
// characters don't require additional processing and that double quotes
// are properly escaped by whatever handed us the JSON.
//
// We need to escape the character and transform it (e.x. linefeed -> \n).
// We could use transforms like linefeed to \n, but it's actually easier,
// if a little less space efficient, to just turn them into
// multi-character sequences denoting a unicode character.
buf.WriteString(fmt.Sprintf(`\u%04X`, c))
default:
buf.WriteByte(c)
}
}
// Return a copy just so we don't hold a reference to internal bytes.Buffer
// data.
return slices.Clone(buf.Bytes())
}

View File

@ -0,0 +1,88 @@
package sanitize_test
import (
"encoding/json"
"fmt"
"testing"
"github.com/stretchr/testify/assert"
"github.com/stretchr/testify/suite"
"github.com/alcionai/corso/src/internal/common/sanitize"
"github.com/alcionai/corso/src/internal/tester"
)
type SanitizeJSONUnitSuite struct {
tester.Suite
}
func TestSanitizeJSONUnitSuite(t *testing.T) {
suite.Run(t, &SanitizeJSONUnitSuite{Suite: tester.NewUnitSuite(t)})
}
type jsonTest struct {
name string
input []byte
expect []byte
expectValid assert.BoolAssertionFunc
}
func generateCharacterTests() []jsonTest {
var (
res []jsonTest
baseTestName = "Escape0x%02X"
baseTestData = `{"foo":"ba%sr"}`
expect = `{"foo":"ba%s\u00%02Xr"}`
)
for i := 0; i < 0x20; i++ {
res = append(
res,
jsonTest{
name: fmt.Sprintf(baseTestName, i),
input: []byte(fmt.Sprintf(baseTestData, string(rune(i)))),
expect: []byte(fmt.Sprintf(expect, "", string(rune(i)))),
expectValid: assert.True,
},
jsonTest{
name: fmt.Sprintf(baseTestName, i) + " WithEscapedEscape",
input: []byte(fmt.Sprintf(baseTestData, `\\`+string(rune(i)))),
expect: []byte(fmt.Sprintf(expect, `\\`, string(rune(i)))),
expectValid: assert.True,
},
)
}
return res
}
func (suite *SanitizeJSONUnitSuite) TestJSONString() {
table := []jsonTest{
{
name: "AlreadyValid NoSpecialCharacters",
input: []byte(`{"foo":"bar"}`),
expect: []byte(`{"foo":"bar"}`),
expectValid: assert.True,
},
{
name: "AlreadyValid SpecialCharacters",
input: []byte(`{"foo":"ba\\r\""}`),
expect: []byte(`{"foo":"ba\\r\""}`),
expectValid: assert.True,
},
}
allTests := append(generateCharacterTests(), table...)
for _, test := range allTests {
suite.Run(test.name, func() {
t := suite.T()
got := sanitize.JSONBytes(test.input)
assert.Equal(t, test.expect, got)
test.expectValid(t, json.Valid(got))
})
}
}