add Concealer compliance to paths (#3017)

Adds compliance with clues.Concealer to the paths
package.  Also introduces a new struct in the same
space: Elements, which is a thin wrapper around
a slice of strings so that subsections of a path
or builder can carry the same pii behavior
without additional work on the consumer's end.

---

#### Does this PR need a docs update or release note?

- [x] 🕐 Yes, but in a later PR

#### Type of change

- [x] 🌻 Feature
- [x] 🤖 Supportability/Tests

#### Issue(s)

* #2024

#### Test Plan

- [x]  Unit test
- [x] 💚 E2E
This commit is contained in:
Keepers 2023-04-11 16:04:34 -06:00 committed by GitHub
parent 9cea2d5333
commit 50e92b65c6
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
16 changed files with 420 additions and 44 deletions

View File

@ -1,6 +1,11 @@
package pii
import "strings"
import (
"strings"
"github.com/alcionai/clues"
"golang.org/x/exp/slices"
)
// MapWithPlurls places the toLower value of each string
// into a map[string]struct{}, along with a copy of the that
@ -16,3 +21,23 @@ func MapWithPlurals(ss ...string) map[string]struct{} {
return mss
}
// ConcealElements conceals each element in elems that does not appear in
// the safe map. A copy of elems containing the changes is returned.
func ConcealElements(elems []string, safe map[string]struct{}) []string {
if len(elems) == 0 {
return []string{}
}
ces := slices.Clone(elems)
for i := range ces {
ce := ces[i]
if _, ok := safe[strings.ToLower(ce)]; !ok {
ces[i] = clues.Conceal(ce)
}
}
return ces
}

View File

@ -0,0 +1,97 @@
package pii_test
import (
"testing"
"github.com/alcionai/clues"
"github.com/stretchr/testify/assert"
"github.com/stretchr/testify/suite"
"github.com/alcionai/corso/src/internal/common/pii"
"github.com/alcionai/corso/src/internal/tester"
)
type PIIUnitSuite struct {
tester.Suite
}
func TestPIIUnitSuite(t *testing.T) {
suite.Run(t, &PIIUnitSuite{Suite: tester.NewUnitSuite(t)})
}
// set the clues hashing to mask for the span of this suite
func (suite *PIIUnitSuite) SetupSuite() {
clues.SetHasher(clues.HashCfg{HashAlg: clues.Flatmask})
}
// revert clues hashing to plaintext for all other tests
func (suite *PIIUnitSuite) TeardownSuite() {
clues.SetHasher(clues.NoHash())
}
func (suite *PIIUnitSuite) TestMapWithPlurals() {
t := suite.T()
mwp := pii.MapWithPlurals()
assert.Equal(t, map[string]struct{}{}, mwp)
mwp = pii.MapWithPlurals("")
assert.Equal(t, map[string]struct{}{"": {}, "s": {}}, mwp)
mwp = pii.MapWithPlurals(" ")
assert.Equal(t, map[string]struct{}{" ": {}, " s": {}}, mwp)
mwp = pii.MapWithPlurals("foo", "bar")
expect := map[string]struct{}{
"foo": {},
"foos": {},
"bar": {},
"bars": {},
}
assert.Equal(t, expect, mwp)
}
func (suite *PIIUnitSuite) TestConcealElements() {
table := []struct {
name string
in []string
safe map[string]struct{}
expect []string
}{
{
name: "nil",
expect: []string{},
},
{
name: "no safe words",
in: []string{"fnords", "smarfs"},
expect: []string{"***", "***"},
},
{
name: "safe words",
in: []string{"fnords", "smarfs"},
safe: map[string]struct{}{"fnords": {}},
expect: []string{"fnords", "***"},
},
{
name: "non-matching safe words",
in: []string{"fnords", "smarfs"},
safe: map[string]struct{}{"beaux": {}},
expect: []string{"***", "***"},
},
{
name: "case insensitivity",
in: []string{"FNORDS", "SMARFS"},
safe: map[string]struct{}{"fnords": {}},
expect: []string{"FNORDS", "***"},
},
}
for _, test := range table {
suite.Run(test.name, func() {
t := suite.T()
result := pii.ConcealElements(test.in, test.safe)
assert.ElementsMatch(t, test.expect, result)
})
}
}

View File

@ -7,7 +7,6 @@ import (
"github.com/alcionai/clues"
"golang.org/x/exp/maps"
"golang.org/x/exp/slices"
)
// SafeURL complies with the clues.Concealer and fmt.Stringer
@ -42,17 +41,7 @@ func (u SafeURL) Conceal() string {
return "malformed-URL"
}
elems := slices.Clone(strings.Split(p.EscapedPath(), "/"))
// conceal any non-safe path elem
for i := range elems {
e := elems[i]
if _, ok := u.SafePathElems[strings.ToLower(e)]; !ok {
elems[i] = clues.Conceal(e)
}
}
elems := ConcealElements(strings.Split(p.EscapedPath(), "/"), u.SafePathElems)
qry := maps.Clone(p.Query())
// conceal any non-safe query param values

View File

@ -30,7 +30,7 @@ func (suite *URLUnitSuite) TeardownSuite() {
clues.SetHasher(clues.NoHash())
}
func (suite *URLUnitSuite) TestDoesThings() {
func (suite *URLUnitSuite) TestSafeURL() {
stubURL := "https://host.com/foo/bar/baz/qux?fnords=smarfs&fnords=brunhilda&beaux=regard"
table := []struct {

View File

@ -419,8 +419,7 @@ func (oc *Collection) populateItems(ctx context.Context, errs *fault.Bus) {
folderProgress, colCloser := observe.ProgressWithCount(
ctx,
observe.ItemQueueMsg,
// TODO(keepers): conceal compliance in path, drop Hide()
clues.Hide(queuedPath),
path.NewElements(queuedPath),
int64(len(oc.driveItems)))
defer colCloser()
defer close(folderProgress)

View File

@ -77,7 +77,7 @@ func RestoreCollections(
ctx,
"resource_owner", clues.Hide(dc.FullPath().ResourceOwner()),
"category", dc.FullPath().Category(),
"path", dc.FullPath()) // TODO: pii, path needs concealer compliance
"path", dc.FullPath())
)
metrics, folderMetas, err = RestoreCollection(

View File

@ -186,8 +186,7 @@ func (sc *Collection) runPopulate(ctx context.Context, errs *fault.Bus) (support
colProgress, closer := observe.CollectionProgress(
ctx,
sc.fullPath.Category().String(),
// TODO(keepers): conceal compliance in path, drop Hide()
clues.Hide(sc.fullPath.Folder(false)))
sc.fullPath.Folders())
go closer()
defer func() {

View File

@ -26,7 +26,7 @@ const (
var (
wg sync.WaitGroup
// TODO: Revisit this being a global nd make it a parameter to the progress methods
// TODO: Revisit this being a global and make it a parameter to the progress methods
// so that each bar can be initialized with different contexts if needed.
contxt context.Context
writer io.Writer

View File

@ -568,7 +568,7 @@ func mergeDetails(
if err != nil {
return clues.New("parsing base item info path").
WithClues(mctx).
With("repo_ref", entry.RepoRef) // todo: pii, path needs concealer compliance
With("repo_ref", path.NewElements(entry.RepoRef))
}
// Although this base has an entry it may not be the most recent. Check

88
src/pkg/path/elements.go Normal file
View File

@ -0,0 +1,88 @@
package path
import (
"fmt"
"github.com/alcionai/clues"
"github.com/alcionai/corso/src/internal/common/pii"
)
var piiSafePathElems = pii.MapWithPlurals(
// services
UnknownService.String(),
ExchangeService.String(),
OneDriveService.String(),
SharePointService.String(),
ExchangeMetadataService.String(),
OneDriveMetadataService.String(),
SharePointMetadataService.String(),
// categories
UnknownCategory.String(),
EmailCategory.String(),
ContactsCategory.String(),
EventsCategory.String(),
FilesCategory.String(),
ListsCategory.String(),
LibrariesCategory.String(),
PagesCategory.String(),
DetailsCategory.String(),
)
var (
// interface compliance required for handling PII
_ clues.Concealer = &Elements{}
_ fmt.Stringer = &Elements{}
// interface compliance for the observe package to display
// values without concealing PII.
_ clues.PlainStringer = &Elements{}
)
// Elements are a PII Concealer-compliant slice of elements within a path.
type Elements []string
// NewElements creates a new Elements slice by splitting the provided string.
func NewElements(p string) Elements {
return Split(p)
}
// Conceal produces a concealed representation of the elements, suitable for
// logging, storing in errors, and other output.
func (el Elements) Conceal() string {
escaped := make([]string, 0, len(el))
for _, e := range el {
escaped = append(escaped, escapeElement(e))
}
return join(pii.ConcealElements(escaped, piiSafePathElems))
}
// Format produces a concealed representation of the elements, even when
// used within a PrintF, suitable for logging, storing in errors,
// and other output.
func (el Elements) Format(fs fmt.State, _ rune) {
fmt.Fprint(fs, el.Conceal())
}
// String returns a string that contains all path elements joined together.
// Elements that need escaping are escaped. The result is not concealed, and
// is not suitable for logging or structured errors.
func (el Elements) String() string {
escaped := make([]string, 0, len(el))
for _, e := range el {
escaped = append(escaped, escapeElement(e))
}
return join(escaped)
}
// PlainString returns an unescaped, unmodified string of the joined elements.
// The result is not concealed, and is not suitable for logging or structured
// errors.
func (el Elements) PlainString() string {
return join(el)
}

View File

@ -0,0 +1,100 @@
package path
import (
"fmt"
"testing"
"github.com/alcionai/clues"
"github.com/stretchr/testify/assert"
"github.com/stretchr/testify/suite"
"github.com/alcionai/corso/src/internal/tester"
)
type ElementsUnitSuite struct {
tester.Suite
}
func TestElementsUnitSuite(t *testing.T) {
suite.Run(t, &ElementsUnitSuite{Suite: tester.NewUnitSuite(t)})
}
// set the clues hashing to mask for the span of this suite
func (suite *ElementsUnitSuite) SetupSuite() {
clues.SetHasher(clues.HashCfg{HashAlg: clues.Flatmask})
}
// revert clues hashing to plaintext for all other tests
func (suite *ElementsUnitSuite) TeardownSuite() {
clues.SetHasher(clues.NoHash())
}
func (suite *ElementsUnitSuite) TestNewElements() {
t := suite.T()
result := NewElements("")
assert.Equal(t, Elements{""}, result)
result = NewElements("fnords")
assert.Equal(t, Elements{"fnords"}, result)
result = NewElements("fnords/smarf")
assert.Equal(t, Elements{"fnords", "smarf"}, result)
}
func (suite *ElementsUnitSuite) TestElements_piiHandling() {
table := []struct {
name string
elems Elements
expect string
expectString string
expectPlain string
}{
{
name: "all concealed",
elems: Elements{"foo", "bar/", "baz"},
expect: "***/***/***",
expectString: `foo/bar\//baz`,
expectPlain: `foo/bar//baz`,
},
{
name: "all safe",
elems: Elements{UnknownService.String(), UnknownCategory.String(), ExchangeMetadataService.String()},
expect: "UnknownService/UnknownCategory/exchangeMetadata",
expectString: "UnknownService/UnknownCategory/exchangeMetadata",
expectPlain: "UnknownService/UnknownCategory/exchangeMetadata",
},
{
name: "mixed",
elems: Elements{UnknownService.String(), "smarf", ExchangeMetadataService.String()},
expect: "UnknownService/***/exchangeMetadata",
expectString: "UnknownService/smarf/exchangeMetadata",
expectPlain: "UnknownService/smarf/exchangeMetadata",
},
{
name: "empty elements",
elems: Elements{},
expect: "",
expectString: "",
expectPlain: "",
},
{
name: "empty string",
elems: Elements{""},
expect: "",
expectString: "",
expectPlain: "",
},
}
for _, test := range table {
suite.Run(test.name, func() {
t := suite.T()
assert.Equal(t, test.expect, test.elems.Conceal(), "conceal")
assert.Equal(t, test.expectString, test.elems.String(), "string")
assert.Equal(t, test.expect, fmt.Sprintf("%s", test.elems), "fmt %%s")
assert.Equal(t, test.expect, fmt.Sprintf("%+v", test.elems), "fmt %%+v")
assert.Equal(t, test.expectPlain, join(test.elems), "plain")
})
}
}

View File

@ -10,7 +10,7 @@ import "github.com/alcionai/clues"
// folders[] is []{"Folder1", "Folder2"}
type DrivePath struct {
DriveID string
Folders []string
Folders Elements
}
func ToOneDrivePath(p Path) (*DrivePath, error) {

View File

@ -86,7 +86,7 @@ type Path interface {
Tenant() string
ResourceOwner() string
Folder(bool) string
Folders() []string
Folders() Elements
Item() string
// UpdateParent updates parent from old to new if the item/folder was
// parented by old path
@ -102,7 +102,7 @@ type Path interface {
// Elements returns all the elements in the path. This is a temporary function
// and will likely be updated to handle encoded elements instead of clear-text
// elements in the future.
Elements() []string
Elements() Elements
// Append returns a new Path object with the given element added to the end of
// the old Path if possible. If the old Path is an item Path then Append
// returns an error.
@ -113,8 +113,23 @@ type Path interface {
ShortRef() string
// ToBuilder returns a Builder instance that represents the current Path.
ToBuilder() *Builder
// Every path needs to comply with these funcs to ensure that PII
// is appropriately hidden from logging, errors, and other outputs.
clues.Concealer
fmt.Stringer
// In the rare case that the path needs to get printed as a plain string,
// without obscuring values for PII.
clues.PlainStringer
}
// interface compliance required for handling PII
var (
_ clues.Concealer = &Builder{}
_ fmt.Stringer = &Builder{}
)
// Builder is a simple path representation that only tracks path elements. It
// can join, escape, and unescape elements. Higher-level packages are expected
// to wrap this struct to build resource-speicific contexts (e.x. an
@ -125,7 +140,7 @@ type Path interface {
// tenant ID, service, user ID, etc).
type Builder struct {
// Unescaped version of elements.
elements []string
elements Elements
}
// UnescapeAndAppend creates a copy of this Builder and adds one or more already
@ -223,18 +238,6 @@ func (pb Builder) LastElem() string {
return pb.elements[len(pb.elements)-1]
}
// String returns a string that contains all path elements joined together.
// Elements of the path that need escaping are escaped.
func (pb Builder) String() string {
escaped := make([]string, 0, len(pb.elements))
for _, e := range pb.elements {
escaped = append(escaped, escapeElement(e))
}
return join(escaped)
}
func (pb Builder) ShortRef() string {
if len(pb.elements) == 0 {
return ""
@ -261,8 +264,8 @@ func (pb Builder) ShortRef() string {
// Elements returns all the elements in the path. This is a temporary function
// and will likely be updated to handle encoded elements instead of clear-text
// elements in the future.
func (pb Builder) Elements() []string {
return append([]string{}, pb.elements...)
func (pb Builder) Elements() Elements {
return append(Elements{}, pb.elements...)
}
func verifyInputValues(tenant, resourceOwner string) error {
@ -658,3 +661,35 @@ func Split(segment string) []string {
return res
}
// ---------------------------------------------------------------------------
// PII Concealer Compliance
// ---------------------------------------------------------------------------
// Conceal produces a concealed representation of the builder, suitable for
// logging, storing in errors, and other output.
func (pb Builder) Conceal() string {
return pb.elements.Conceal()
}
// Format produces a concealed representation of the builder, even when
// used within a PrintF, suitable for logging, storing in errors,
// and other output.
func (pb Builder) Format(fs fmt.State, _ rune) {
fmt.Fprint(fs, pb.Conceal())
}
// String returns a string that contains all path elements joined together.
// Elements of the path that need escaping are escaped.
// The result is not concealed, and is not suitable for logging or structured
// errors.
func (pb Builder) String() string {
return pb.elements.String()
}
// PlainString returns an unescaped, unmodified string of the builder.
// The result is not concealed, and is not suitable for logging or structured
// errors.
func (pb Builder) PlainString() string {
return pb.elements.PlainString()
}

View File

@ -223,6 +223,16 @@ func TestPathUnitSuite(t *testing.T) {
suite.Run(t, &PathUnitSuite{Suite: tester.NewUnitSuite(t)})
}
// set the clues hashing to mask for the span of this suite
func (suite *PathUnitSuite) SetupSuite() {
clues.SetHasher(clues.HashCfg{HashAlg: clues.Flatmask})
}
// revert clues hashing to plaintext for all other tests
func (suite *PathUnitSuite) TeardownSuite() {
clues.SetHasher(clues.NoHash())
}
func (suite *PathUnitSuite) TestAppend() {
table := append(append([]testData{}, genericCases...), basicUnescapedInputs...)
for _, test := range table {
@ -338,7 +348,7 @@ func (suite *PathUnitSuite) TestElements() {
p, err := test.pathFunc(test.input)
require.NoError(t, err, clues.ToCore(err))
assert.Equal(t, test.output, p.Elements())
assert.Equal(t, Elements(test.output), p.Elements())
})
}
}
@ -709,3 +719,33 @@ func (suite *PathUnitSuite) TestFromString() {
}
}
}
func (suite *PathUnitSuite) TestPath_piiHandling() {
p, err := Build("t", "ro", ExchangeService, EventsCategory, true, "dir", "item")
require.NoError(suite.T(), err)
table := []struct {
name string
p Path
expect string
expectPlain string
}{
{
name: "standard path",
p: p,
expect: "***/exchange/***/events/***/***",
expectPlain: "t/exchange/ro/events/dir/item",
},
}
for _, test := range table {
suite.Run(test.name, func() {
t := suite.T()
assert.Equal(t, test.expect, test.p.Conceal(), "conceal")
assert.Equal(t, test.expectPlain, test.p.String(), "string")
assert.Equal(t, test.expect, fmt.Sprintf("%s", test.p), "fmt %%s")
assert.Equal(t, test.expect, fmt.Sprintf("%+v", test.p), "fmt %%+v")
assert.Equal(t, test.expectPlain, test.p.PlainString(), "plain")
})
}
}

View File

@ -218,7 +218,7 @@ func (rp dataLayerResourcePath) Folder(escape bool) string {
// Folders returns the individual folder elements embedded in the
// dataLayerResourcePath.
func (rp dataLayerResourcePath) Folders() []string {
func (rp dataLayerResourcePath) Folders() Elements {
endIdx := rp.lastFolderIdx()
if endIdx == 4 {
return nil
@ -239,7 +239,7 @@ func (rp dataLayerResourcePath) Item() string {
func (rp dataLayerResourcePath) Dir() (Path, error) {
if len(rp.elements) <= 4 {
return nil, clues.New("unable to shorten path").With("path", fmt.Sprintf("%q", rp))
return nil, clues.New("unable to shorten path").With("path", rp)
}
return &dataLayerResourcePath{

View File

@ -136,6 +136,10 @@ func TestDataLayerResourcePath(t *testing.T) {
suite.Run(t, &DataLayerResourcePath{Suite: tester.NewUnitSuite(t)})
}
func (suite *DataLayerResourcePath) SetupSuite() {
clues.SetHasher(clues.NoHash())
}
func (suite *DataLayerResourcePath) TestMissingInfoErrors() {
for _, types := range serviceCategories {
suite.Run(types.service.String()+types.category.String(), func() {
@ -402,7 +406,7 @@ func (suite *DataLayerResourcePath) TestToExchangePathForCategory() {
assert.Equal(t, test.category, p.Category())
assert.Equal(t, testUser, p.ResourceOwner())
assert.Equal(t, strings.Join(m.expectedFolders, "/"), p.Folder(false))
assert.Equal(t, m.expectedFolders, p.Folders())
assert.Equal(t, path.Elements(m.expectedFolders), p.Folders())
assert.Equal(t, m.expectedItem, p.Item())
})
}
@ -496,7 +500,7 @@ func (suite *PopulatedDataLayerResourcePath) TestFolders() {
suite.Run(m.name, func() {
t := suite.T()
assert.Equal(t, m.expectedFolders, suite.paths[m.isItem].Folders())
assert.Equal(t, path.Elements(m.expectedFolders), suite.paths[m.isItem].Folders())
})
}
}