Simple sanitizer function for JSON
Convert JSON input that has unescaped characters < 0x20 to their escaped version so they're considered valid again.
This commit is contained in:
parent
f94efecb23
commit
67dd227fb5
48
src/internal/common/sanitize/json.go
Normal file
48
src/internal/common/sanitize/json.go
Normal file
@ -0,0 +1,48 @@
|
|||||||
|
package sanitize
|
||||||
|
|
||||||
|
import (
|
||||||
|
"bytes"
|
||||||
|
"fmt"
|
||||||
|
|
||||||
|
"golang.org/x/exp/slices"
|
||||||
|
)
|
||||||
|
|
||||||
|
// JSONString takes a []byte containing JSON as input and returns a []byte
|
||||||
|
// containing the same content but with any character codes < 0x20 that weren't
|
||||||
|
// escaped in the original escaped properly.
|
||||||
|
func JSONBytes(input []byte) []byte {
|
||||||
|
if len(input) == 0 {
|
||||||
|
return input
|
||||||
|
}
|
||||||
|
|
||||||
|
// Avoid most reallocations by just getting a buffer of the right size to
|
||||||
|
// start with.
|
||||||
|
// TODO(ashmrtn): We may actually want to overshoot this a little so we won't
|
||||||
|
// cause a reallocation and possible doubling in size if we only need to
|
||||||
|
// escape a few characters.
|
||||||
|
buf := bytes.Buffer{}
|
||||||
|
buf.Grow(len(input))
|
||||||
|
|
||||||
|
for _, c := range input {
|
||||||
|
switch {
|
||||||
|
case c < 0x20:
|
||||||
|
// Escape character ranges taken from RFC 8259. This case doesn't handle
|
||||||
|
// escape characters (0x5c) or double quotes (0x22). We're assuming escape
|
||||||
|
// characters don't require additional processing and that double quotes
|
||||||
|
// are properly escaped by whatever handed us the JSON.
|
||||||
|
//
|
||||||
|
// We need to escape the character and transform it (e.x. linefeed -> \n).
|
||||||
|
// We could use transforms like linefeed to \n, but it's actually easier,
|
||||||
|
// if a little less space efficient, to just turn them into
|
||||||
|
// multi-character sequences denoting a unicode character.
|
||||||
|
buf.WriteString(fmt.Sprintf(`\u%04X`, c))
|
||||||
|
|
||||||
|
default:
|
||||||
|
buf.WriteByte(c)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Return a copy just so we don't hold a reference to internal bytes.Buffer
|
||||||
|
// data.
|
||||||
|
return slices.Clone(buf.Bytes())
|
||||||
|
}
|
||||||
88
src/internal/common/sanitize/json_test.go
Normal file
88
src/internal/common/sanitize/json_test.go
Normal file
@ -0,0 +1,88 @@
|
|||||||
|
package sanitize_test
|
||||||
|
|
||||||
|
import (
|
||||||
|
"encoding/json"
|
||||||
|
"fmt"
|
||||||
|
"testing"
|
||||||
|
|
||||||
|
"github.com/stretchr/testify/assert"
|
||||||
|
"github.com/stretchr/testify/suite"
|
||||||
|
|
||||||
|
"github.com/alcionai/corso/src/internal/common/sanitize"
|
||||||
|
"github.com/alcionai/corso/src/internal/tester"
|
||||||
|
)
|
||||||
|
|
||||||
|
type SanitizeJSONUnitSuite struct {
|
||||||
|
tester.Suite
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestSanitizeJSONUnitSuite(t *testing.T) {
|
||||||
|
suite.Run(t, &SanitizeJSONUnitSuite{Suite: tester.NewUnitSuite(t)})
|
||||||
|
}
|
||||||
|
|
||||||
|
type jsonTest struct {
|
||||||
|
name string
|
||||||
|
input []byte
|
||||||
|
expect []byte
|
||||||
|
expectValid assert.BoolAssertionFunc
|
||||||
|
}
|
||||||
|
|
||||||
|
func generateCharacterTests() []jsonTest {
|
||||||
|
var (
|
||||||
|
res []jsonTest
|
||||||
|
|
||||||
|
baseTestName = "Escape0x%02X"
|
||||||
|
baseTestData = `{"foo":"ba%sr"}`
|
||||||
|
expect = `{"foo":"ba%s\u00%02Xr"}`
|
||||||
|
)
|
||||||
|
|
||||||
|
for i := 0; i < 0x20; i++ {
|
||||||
|
res = append(
|
||||||
|
res,
|
||||||
|
jsonTest{
|
||||||
|
name: fmt.Sprintf(baseTestName, i),
|
||||||
|
input: []byte(fmt.Sprintf(baseTestData, string(rune(i)))),
|
||||||
|
expect: []byte(fmt.Sprintf(expect, "", string(rune(i)))),
|
||||||
|
expectValid: assert.True,
|
||||||
|
},
|
||||||
|
jsonTest{
|
||||||
|
name: fmt.Sprintf(baseTestName, i) + " WithEscapedEscape",
|
||||||
|
input: []byte(fmt.Sprintf(baseTestData, `\\`+string(rune(i)))),
|
||||||
|
expect: []byte(fmt.Sprintf(expect, `\\`, string(rune(i)))),
|
||||||
|
expectValid: assert.True,
|
||||||
|
},
|
||||||
|
)
|
||||||
|
}
|
||||||
|
|
||||||
|
return res
|
||||||
|
}
|
||||||
|
|
||||||
|
func (suite *SanitizeJSONUnitSuite) TestJSONString() {
|
||||||
|
table := []jsonTest{
|
||||||
|
{
|
||||||
|
name: "AlreadyValid NoSpecialCharacters",
|
||||||
|
input: []byte(`{"foo":"bar"}`),
|
||||||
|
expect: []byte(`{"foo":"bar"}`),
|
||||||
|
expectValid: assert.True,
|
||||||
|
},
|
||||||
|
{
|
||||||
|
name: "AlreadyValid SpecialCharacters",
|
||||||
|
input: []byte(`{"foo":"ba\\r\""}`),
|
||||||
|
expect: []byte(`{"foo":"ba\\r\""}`),
|
||||||
|
expectValid: assert.True,
|
||||||
|
},
|
||||||
|
}
|
||||||
|
|
||||||
|
allTests := append(generateCharacterTests(), table...)
|
||||||
|
|
||||||
|
for _, test := range allTests {
|
||||||
|
suite.Run(test.name, func() {
|
||||||
|
t := suite.T()
|
||||||
|
|
||||||
|
got := sanitize.JSONBytes(test.input)
|
||||||
|
|
||||||
|
assert.Equal(t, test.expect, got)
|
||||||
|
test.expectValid(t, json.Valid(got))
|
||||||
|
})
|
||||||
|
}
|
||||||
|
}
|
||||||
Loading…
x
Reference in New Issue
Block a user