fill in more path functions (#401)

Segment-based ctor and tests
This commit is contained in:
ashmrtn 2022-07-27 08:24:37 -07:00 committed by GitHub
parent 3e792e69eb
commit 9b28d71705
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
2 changed files with 278 additions and 3 deletions

View File

@ -78,7 +78,8 @@ type Base struct {
}
// newPath takes a path that is broken into segments and elements in the segment
// and returns a Base. Each element in the input is escaped.
// and returns a Base. Each element in the input will get escaped.
// Example: [this, is\, a, path] will transform into [this, is\\, a, path].
func newPath(segments [][]string) Base {
if len(segments) == 0 {
return Base{}
@ -104,9 +105,35 @@ func newPath(segments [][]string) Base {
// NewPathFromEscapedSegments takes already escaped segments of a path, verifies
// the segments are escaped properly, and returns a new Base struct. If there is
// an unescaped trailing '/' it is removed.
// an unescaped trailing '/' it is removed. This function is safe to use with
// escaped user input where each chunk is a segment. For example, the input
// [this, is\//a, path] will produce:
// segments: [this, is\//a, path]
// elements: [this, is\/, a, path].
func newPathFromEscapedSegments(segments []string) (Base, error) {
return Base{}, errors.New("not implemented")
b := Base{}
if err := validateSegments(segments); err != nil {
return b, errors.Wrap(err, "validating escaped path")
}
// Make a copy of the input so we don't modify the original slice.
tmpSegments := make([]string, len(segments))
copy(tmpSegments, segments)
tmpSegments[len(tmpSegments)-1] = trimTrailingSlash(tmpSegments[len(tmpSegments)-1])
for _, s := range tmpSegments {
newElems := split(s)
if len(newElems) == 0 {
continue
}
b.segmentIdx = append(b.segmentIdx, len(b.elements))
b.elements = append(b.elements, newElems...)
}
return b, nil
}
// String returns a string that contains all path segments joined
@ -169,6 +196,65 @@ func escapeElement(element string) string {
return b.String()
}
// validateSegments takes a slice of segments and ensures that escaped
// sequences match the set of characters that need escaping and that there
// aren't hanging escape characters at the end of a segment.
func validateSegments(segments []string) error {
for _, segment := range segments {
prevWasEscape := false
for _, c := range segment {
switch prevWasEscape {
case true:
prevWasEscape = false
if _, ok := charactersToEscape[c]; !ok {
return errors.Errorf(
"bad escape sequence in path: '%c%c'", escapeCharacter, c)
}
case false:
if c == escapeCharacter {
prevWasEscape = true
}
}
}
if prevWasEscape {
return errors.New("trailing escape character in segment")
}
}
return nil
}
// trimTrailingSlash takes an escaped path element and returns an escaped path
// element with the trailing path separator character removed if it was not
// escaped. If there was no trailing path separator character or the separator
// was escaped the input is returned unchanged.
func trimTrailingSlash(element string) string {
lastIdx := len(element) - 1
if element[lastIdx] != pathSeparator {
return element
}
numSlashes := 0
for i := lastIdx - 1; i >= 0; i-- {
if element[i] != escapeCharacter {
break
}
numSlashes++
}
if numSlashes%2 != 0 {
return element
}
return element[:lastIdx]
}
// join returns a string containing the given elements joined by the path
// separator '/'.
func join(elements []string) string {
@ -176,3 +262,52 @@ func join(elements []string) string {
// '\' according to the escaping rules.
return strings.Join(elements, string(pathSeparator))
}
// split returns a slice of path elements for the given segment when the segment
// is split on the path separator according to the escaping rules.
func split(segment string) []string {
res := make([]string, 0)
numEscapes := 0
startIdx := 0
// Start with true to ignore leading separator.
prevWasSeparator := true
for i, c := range segment {
if c == escapeCharacter {
numEscapes++
prevWasSeparator = false
continue
}
if c != pathSeparator {
prevWasSeparator = false
numEscapes = 0
continue
}
// Remaining is just path separator handling.
if numEscapes%2 != 0 {
// This is an escaped separator.
prevWasSeparator = false
numEscapes = 0
continue
}
// Ignore leading separator characters and don't add elements that would
// be empty.
if !prevWasSeparator {
res = append(res, segment[startIdx:i])
}
// We don't want to include the path separator in the result.
startIdx = i + 1
prevWasSeparator = true
numEscapes = 0
}
// Add the final segment because the loop above won't catch it. There should
// be no trailing separator character, but do a bounds check to be safe.
res = append(res, segment[startIdx:])
return res
}

View File

@ -1,9 +1,12 @@
package path
import (
"fmt"
"strings"
"testing"
"github.com/stretchr/testify/assert"
"github.com/stretchr/testify/require"
"github.com/stretchr/testify/suite"
)
@ -109,3 +112,140 @@ func (suite *PathUnitSuite) TestPathEscapingAndSegments() {
})
}
}
func (suite *PathUnitSuite) TestPathSplitsEscapedPath() {
table := []struct {
name string
input []string
expected string
expectedSegments []string
}{
{
name: "SimplePath",
input: []string{`this`, `is/a`, `path`},
expected: "this/is/a/path",
expectedSegments: []string{`this`, `is/a`, `path`},
},
{
name: "EscapeSeparator",
input: []string{`this`, `is\/a`, `path`},
expected: `this/is\/a/path`,
expectedSegments: []string{`this`, `is\/a`, `path`},
},
{
name: "EscapeEscapeChar",
input: []string{`this`, `is\\/a`, `path`},
expected: `this/is\\/a/path`,
expectedSegments: []string{`this`, `is\\/a`, `path`},
},
{
name: "EmptyInternalElement",
input: []string{`this`, `is//a`, `path`},
expected: "this/is/a/path",
expectedSegments: []string{`this`, `is/a`, `path`},
},
{
name: "SeparatorAtEndOfElement",
input: []string{`this`, `is\//a`, `path`},
expected: `this/is\//a/path`,
expectedSegments: []string{`this`, `is\//a`, `path`},
},
{
name: "SeparatorAtEndOfPath",
input: []string{`this`, `is/a`, `path\/`},
expected: `this/is/a/path\/`,
expectedSegments: []string{`this`, `is/a`, `path\/`},
},
{
name: "TrailingSeparator",
input: []string{`this`, `is/a`, `path/`},
expected: `this/is/a/path`,
expectedSegments: []string{`this`, `is/a`, `path`},
},
{
name: "TrailingSeparator2",
input: []string{`this`, `is/a`, `path\\\\/`},
expected: `this/is/a/path\\\\`,
expectedSegments: []string{`this`, `is/a`, `path\\\\`},
},
{
name: "ManyEscapesNotSeparator",
input: []string{`this`, `is\\\\/a`, `path/`},
expected: `this/is\\\\/a/path`,
expectedSegments: []string{`this`, `is\\\\/a`, `path`},
},
{
name: "ManyEscapesAndSeparator",
input: []string{`this`, `is\\\/a`, `path`},
expected: `this/is\\\/a/path`,
expectedSegments: []string{`this`, `is\\\/a`, `path`},
},
}
for _, test := range table {
suite.T().Run(test.name, func(t *testing.T) {
p, err := newPathFromEscapedSegments(test.input)
require.NoError(t, err)
assert.Equal(t, test.expected, p.String())
for i, s := range test.expectedSegments {
segment := ""
require.NotPanics(t, func() {
segment = p.segment(i)
})
assert.Equal(t, s, segment)
}
})
}
}
func (suite *PathUnitSuite) TestEscapedFailure() {
target := "i_s/a"
for c := range charactersToEscape {
if c == pathSeparator {
// Extra path separators in the path will just lead to more segments, not
// a validation error.
continue
}
tmp := strings.ReplaceAll(target, "_", string(c))
basePath := []string{"this", tmp, "path"}
_, err := newPathFromEscapedSegments(basePath)
assert.Error(suite.T(), err, "path with unescaped %s did not error", string(c))
}
}
func (suite *PathUnitSuite) TestBadEscapeSequenceErrors() {
target := `i\_s/a`
notEscapes := []rune{'a', 'b', '#', '%'}
for _, c := range notEscapes {
tmp := strings.ReplaceAll(target, "_", string(c))
basePath := []string{"this", tmp, "path"}
_, err := newPathFromEscapedSegments(basePath)
assert.Error(
suite.T(),
err,
"path with bad escape sequence %c%c did not error",
escapeCharacter,
c,
)
}
}
func (suite *PathUnitSuite) TestTrailingEscapeChar() {
base := []string{"this", "is", "a", "path"}
for i := 0; i < len(base); i++ {
suite.T().Run(fmt.Sprintf("Segment%v", i), func(t *testing.T) {
path := make([]string, len(base))
copy(path, base)
path[i] = path[i] + string(escapeCharacter)
_, err := newPathFromEscapedSegments(path)
assert.Error(suite.T(), err)
})
}
}