corso/src/internal/common/sanitize/json.go

package sanitize

import (
	"bytes"
	"fmt"

	"golang.org/x/exp/slices"
)

// JSONString takes a []byte containing JSON as input and returns a []byte
// containing the same content but with any character codes < 0x20 that weren't
// escaped in the original escaped properly.
func JSONBytes(input []byte) []byte {
	if len(input) == 0 {
		return input
	}

	// Avoid most reallocations by just getting a buffer of the right size to
	// start with.
	// TODO(ashmrtn): We may actually want to overshoot this a little so we won't
	// cause a reallocation and possible doubling in size if we only need to
	// escape a few characters.
	buf := bytes.Buffer{}
	buf.Grow(len(input))

	for _, c := range input {
		switch {
		case c == '\n' || c == '\t' || c == '\r':
			// Whitespace characters also seem to be getting transformed inside JSON
			// strings already. We shouldn't further transform them because they could
			// just be formatting around the JSON fields so changing them will result
			// in invalid JSON.
			//
			// The set of whitespace characters was taken from RFC 8259 although space
			// is not included in this set as it's already > 0x20.
			buf.WriteByte(c)

		case c < 0x20:
			// Escape character ranges taken from RFC 8259. This case doesn't handle
			// escape characters (0x5c) or double quotes (0x22). We're assuming escape
			// characters don't require additional processing and that double quotes
			// are properly escaped by whatever handed us the JSON.
			//
			// We need to escape the character and transform it (e.x. linefeed -> \n).
			// We could use transforms like linefeed to \n, but it's actually easier,
			// if a little less space efficient, to just turn them into
			// multi-character sequences denoting a unicode character.
			buf.WriteString(fmt.Sprintf(`\u%04X`, c))

		default:
			buf.WriteByte(c)
		}
	}

	// Return a copy just so we don't hold a reference to internal bytes.Buffer
	// data.
	return slices.Clone(buf.Bytes())
}