Simple sanitizer function for JSON
Convert JSON input that has unescaped characters < 0x20 to their escaped version so they're considered valid again.
This commit is contained in:
parent
f94efecb23
commit
67dd227fb5
48
src/internal/common/sanitize/json.go
Normal file
48
src/internal/common/sanitize/json.go
Normal file
@ -0,0 +1,48 @@
|
||||
package sanitize
|
||||
|
||||
import (
|
||||
"bytes"
|
||||
"fmt"
|
||||
|
||||
"golang.org/x/exp/slices"
|
||||
)
|
||||
|
||||
// JSONString takes a []byte containing JSON as input and returns a []byte
|
||||
// containing the same content but with any character codes < 0x20 that weren't
|
||||
// escaped in the original escaped properly.
|
||||
func JSONBytes(input []byte) []byte {
|
||||
if len(input) == 0 {
|
||||
return input
|
||||
}
|
||||
|
||||
// Avoid most reallocations by just getting a buffer of the right size to
|
||||
// start with.
|
||||
// TODO(ashmrtn): We may actually want to overshoot this a little so we won't
|
||||
// cause a reallocation and possible doubling in size if we only need to
|
||||
// escape a few characters.
|
||||
buf := bytes.Buffer{}
|
||||
buf.Grow(len(input))
|
||||
|
||||
for _, c := range input {
|
||||
switch {
|
||||
case c < 0x20:
|
||||
// Escape character ranges taken from RFC 8259. This case doesn't handle
|
||||
// escape characters (0x5c) or double quotes (0x22). We're assuming escape
|
||||
// characters don't require additional processing and that double quotes
|
||||
// are properly escaped by whatever handed us the JSON.
|
||||
//
|
||||
// We need to escape the character and transform it (e.x. linefeed -> \n).
|
||||
// We could use transforms like linefeed to \n, but it's actually easier,
|
||||
// if a little less space efficient, to just turn them into
|
||||
// multi-character sequences denoting a unicode character.
|
||||
buf.WriteString(fmt.Sprintf(`\u%04X`, c))
|
||||
|
||||
default:
|
||||
buf.WriteByte(c)
|
||||
}
|
||||
}
|
||||
|
||||
// Return a copy just so we don't hold a reference to internal bytes.Buffer
|
||||
// data.
|
||||
return slices.Clone(buf.Bytes())
|
||||
}
|
||||
88
src/internal/common/sanitize/json_test.go
Normal file
88
src/internal/common/sanitize/json_test.go
Normal file
@ -0,0 +1,88 @@
|
||||
package sanitize_test
|
||||
|
||||
import (
|
||||
"encoding/json"
|
||||
"fmt"
|
||||
"testing"
|
||||
|
||||
"github.com/stretchr/testify/assert"
|
||||
"github.com/stretchr/testify/suite"
|
||||
|
||||
"github.com/alcionai/corso/src/internal/common/sanitize"
|
||||
"github.com/alcionai/corso/src/internal/tester"
|
||||
)
|
||||
|
||||
type SanitizeJSONUnitSuite struct {
|
||||
tester.Suite
|
||||
}
|
||||
|
||||
func TestSanitizeJSONUnitSuite(t *testing.T) {
|
||||
suite.Run(t, &SanitizeJSONUnitSuite{Suite: tester.NewUnitSuite(t)})
|
||||
}
|
||||
|
||||
type jsonTest struct {
|
||||
name string
|
||||
input []byte
|
||||
expect []byte
|
||||
expectValid assert.BoolAssertionFunc
|
||||
}
|
||||
|
||||
func generateCharacterTests() []jsonTest {
|
||||
var (
|
||||
res []jsonTest
|
||||
|
||||
baseTestName = "Escape0x%02X"
|
||||
baseTestData = `{"foo":"ba%sr"}`
|
||||
expect = `{"foo":"ba%s\u00%02Xr"}`
|
||||
)
|
||||
|
||||
for i := 0; i < 0x20; i++ {
|
||||
res = append(
|
||||
res,
|
||||
jsonTest{
|
||||
name: fmt.Sprintf(baseTestName, i),
|
||||
input: []byte(fmt.Sprintf(baseTestData, string(rune(i)))),
|
||||
expect: []byte(fmt.Sprintf(expect, "", string(rune(i)))),
|
||||
expectValid: assert.True,
|
||||
},
|
||||
jsonTest{
|
||||
name: fmt.Sprintf(baseTestName, i) + " WithEscapedEscape",
|
||||
input: []byte(fmt.Sprintf(baseTestData, `\\`+string(rune(i)))),
|
||||
expect: []byte(fmt.Sprintf(expect, `\\`, string(rune(i)))),
|
||||
expectValid: assert.True,
|
||||
},
|
||||
)
|
||||
}
|
||||
|
||||
return res
|
||||
}
|
||||
|
||||
func (suite *SanitizeJSONUnitSuite) TestJSONString() {
|
||||
table := []jsonTest{
|
||||
{
|
||||
name: "AlreadyValid NoSpecialCharacters",
|
||||
input: []byte(`{"foo":"bar"}`),
|
||||
expect: []byte(`{"foo":"bar"}`),
|
||||
expectValid: assert.True,
|
||||
},
|
||||
{
|
||||
name: "AlreadyValid SpecialCharacters",
|
||||
input: []byte(`{"foo":"ba\\r\""}`),
|
||||
expect: []byte(`{"foo":"ba\\r\""}`),
|
||||
expectValid: assert.True,
|
||||
},
|
||||
}
|
||||
|
||||
allTests := append(generateCharacterTests(), table...)
|
||||
|
||||
for _, test := range allTests {
|
||||
suite.Run(test.name, func() {
|
||||
t := suite.T()
|
||||
|
||||
got := sanitize.JSONBytes(test.input)
|
||||
|
||||
assert.Equal(t, test.expect, got)
|
||||
test.expectValid(t, json.Valid(got))
|
||||
})
|
||||
}
|
||||
}
|
||||
Loading…
x
Reference in New Issue
Block a user