From 50e92b65c636deec9c51d44f2c84012591fc9795 Mon Sep 17 00:00:00 2001 From: Keepers Date: Tue, 11 Apr 2023 16:04:34 -0600 Subject: [PATCH] add Concealer compliance to paths (#3017) Adds compliance with clues.Concealer to the paths package. Also introduces a new struct in the same space: Elements, which is a thin wrapper around a slice of strings so that subsections of a path or builder can carry the same pii behavior without additional work on the consumer's end. --- #### Does this PR need a docs update or release note? - [x] :clock1: Yes, but in a later PR #### Type of change - [x] :sunflower: Feature - [x] :robot: Supportability/Tests #### Issue(s) * #2024 #### Test Plan - [x] :zap: Unit test - [x] :green_heart: E2E --- src/internal/common/pii/pii.go | 27 ++++- src/internal/common/pii/pii_test.go | 97 +++++++++++++++++ src/internal/common/pii/url.go | 13 +-- src/internal/common/pii/url_test.go | 2 +- src/internal/connector/onedrive/collection.go | 3 +- src/internal/connector/onedrive/restore.go | 2 +- .../connector/sharepoint/collection.go | 3 +- src/internal/observe/observe.go | 2 +- src/internal/operations/backup.go | 2 +- src/pkg/path/elements.go | 88 +++++++++++++++ src/pkg/path/elements_test.go | 100 ++++++++++++++++++ src/pkg/path/onedrive.go | 2 +- src/pkg/path/path.go | 69 +++++++++--- src/pkg/path/path_test.go | 42 +++++++- src/pkg/path/resource_path.go | 4 +- src/pkg/path/resource_path_test.go | 8 +- 16 files changed, 420 insertions(+), 44 deletions(-) create mode 100644 src/internal/common/pii/pii_test.go create mode 100644 src/pkg/path/elements.go create mode 100644 src/pkg/path/elements_test.go diff --git a/src/internal/common/pii/pii.go b/src/internal/common/pii/pii.go index 102d782d1..c2bd88330 100644 --- a/src/internal/common/pii/pii.go +++ b/src/internal/common/pii/pii.go @@ -1,6 +1,11 @@ package pii -import "strings" +import ( + "strings" + + "github.com/alcionai/clues" + "golang.org/x/exp/slices" +) // MapWithPlurls places the toLower value of each string // into a map[string]struct{}, along with a copy of the that @@ -16,3 +21,23 @@ func MapWithPlurals(ss ...string) map[string]struct{} { return mss } + +// ConcealElements conceals each element in elems that does not appear in +// the safe map. A copy of elems containing the changes is returned. +func ConcealElements(elems []string, safe map[string]struct{}) []string { + if len(elems) == 0 { + return []string{} + } + + ces := slices.Clone(elems) + + for i := range ces { + ce := ces[i] + + if _, ok := safe[strings.ToLower(ce)]; !ok { + ces[i] = clues.Conceal(ce) + } + } + + return ces +} diff --git a/src/internal/common/pii/pii_test.go b/src/internal/common/pii/pii_test.go new file mode 100644 index 000000000..5cac32ad1 --- /dev/null +++ b/src/internal/common/pii/pii_test.go @@ -0,0 +1,97 @@ +package pii_test + +import ( + "testing" + + "github.com/alcionai/clues" + "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/suite" + + "github.com/alcionai/corso/src/internal/common/pii" + "github.com/alcionai/corso/src/internal/tester" +) + +type PIIUnitSuite struct { + tester.Suite +} + +func TestPIIUnitSuite(t *testing.T) { + suite.Run(t, &PIIUnitSuite{Suite: tester.NewUnitSuite(t)}) +} + +// set the clues hashing to mask for the span of this suite +func (suite *PIIUnitSuite) SetupSuite() { + clues.SetHasher(clues.HashCfg{HashAlg: clues.Flatmask}) +} + +// revert clues hashing to plaintext for all other tests +func (suite *PIIUnitSuite) TeardownSuite() { + clues.SetHasher(clues.NoHash()) +} + +func (suite *PIIUnitSuite) TestMapWithPlurals() { + t := suite.T() + + mwp := pii.MapWithPlurals() + assert.Equal(t, map[string]struct{}{}, mwp) + + mwp = pii.MapWithPlurals("") + assert.Equal(t, map[string]struct{}{"": {}, "s": {}}, mwp) + + mwp = pii.MapWithPlurals(" ") + assert.Equal(t, map[string]struct{}{" ": {}, " s": {}}, mwp) + + mwp = pii.MapWithPlurals("foo", "bar") + expect := map[string]struct{}{ + "foo": {}, + "foos": {}, + "bar": {}, + "bars": {}, + } + assert.Equal(t, expect, mwp) +} + +func (suite *PIIUnitSuite) TestConcealElements() { + table := []struct { + name string + in []string + safe map[string]struct{} + expect []string + }{ + { + name: "nil", + expect: []string{}, + }, + { + name: "no safe words", + in: []string{"fnords", "smarfs"}, + expect: []string{"***", "***"}, + }, + { + name: "safe words", + in: []string{"fnords", "smarfs"}, + safe: map[string]struct{}{"fnords": {}}, + expect: []string{"fnords", "***"}, + }, + { + name: "non-matching safe words", + in: []string{"fnords", "smarfs"}, + safe: map[string]struct{}{"beaux": {}}, + expect: []string{"***", "***"}, + }, + { + name: "case insensitivity", + in: []string{"FNORDS", "SMARFS"}, + safe: map[string]struct{}{"fnords": {}}, + expect: []string{"FNORDS", "***"}, + }, + } + for _, test := range table { + suite.Run(test.name, func() { + t := suite.T() + + result := pii.ConcealElements(test.in, test.safe) + assert.ElementsMatch(t, test.expect, result) + }) + } +} diff --git a/src/internal/common/pii/url.go b/src/internal/common/pii/url.go index 34707a360..339bee19f 100644 --- a/src/internal/common/pii/url.go +++ b/src/internal/common/pii/url.go @@ -7,7 +7,6 @@ import ( "github.com/alcionai/clues" "golang.org/x/exp/maps" - "golang.org/x/exp/slices" ) // SafeURL complies with the clues.Concealer and fmt.Stringer @@ -42,17 +41,7 @@ func (u SafeURL) Conceal() string { return "malformed-URL" } - elems := slices.Clone(strings.Split(p.EscapedPath(), "/")) - - // conceal any non-safe path elem - for i := range elems { - e := elems[i] - - if _, ok := u.SafePathElems[strings.ToLower(e)]; !ok { - elems[i] = clues.Conceal(e) - } - } - + elems := ConcealElements(strings.Split(p.EscapedPath(), "/"), u.SafePathElems) qry := maps.Clone(p.Query()) // conceal any non-safe query param values diff --git a/src/internal/common/pii/url_test.go b/src/internal/common/pii/url_test.go index a89fd2d26..8b6467b31 100644 --- a/src/internal/common/pii/url_test.go +++ b/src/internal/common/pii/url_test.go @@ -30,7 +30,7 @@ func (suite *URLUnitSuite) TeardownSuite() { clues.SetHasher(clues.NoHash()) } -func (suite *URLUnitSuite) TestDoesThings() { +func (suite *URLUnitSuite) TestSafeURL() { stubURL := "https://host.com/foo/bar/baz/qux?fnords=smarfs&fnords=brunhilda&beaux=regard" table := []struct { diff --git a/src/internal/connector/onedrive/collection.go b/src/internal/connector/onedrive/collection.go index 6f45eb9bf..66c4396af 100644 --- a/src/internal/connector/onedrive/collection.go +++ b/src/internal/connector/onedrive/collection.go @@ -419,8 +419,7 @@ func (oc *Collection) populateItems(ctx context.Context, errs *fault.Bus) { folderProgress, colCloser := observe.ProgressWithCount( ctx, observe.ItemQueueMsg, - // TODO(keepers): conceal compliance in path, drop Hide() - clues.Hide(queuedPath), + path.NewElements(queuedPath), int64(len(oc.driveItems))) defer colCloser() defer close(folderProgress) diff --git a/src/internal/connector/onedrive/restore.go b/src/internal/connector/onedrive/restore.go index 765bc38b9..4f35267af 100644 --- a/src/internal/connector/onedrive/restore.go +++ b/src/internal/connector/onedrive/restore.go @@ -77,7 +77,7 @@ func RestoreCollections( ctx, "resource_owner", clues.Hide(dc.FullPath().ResourceOwner()), "category", dc.FullPath().Category(), - "path", dc.FullPath()) // TODO: pii, path needs concealer compliance + "path", dc.FullPath()) ) metrics, folderMetas, err = RestoreCollection( diff --git a/src/internal/connector/sharepoint/collection.go b/src/internal/connector/sharepoint/collection.go index 0350601e7..8ff364510 100644 --- a/src/internal/connector/sharepoint/collection.go +++ b/src/internal/connector/sharepoint/collection.go @@ -186,8 +186,7 @@ func (sc *Collection) runPopulate(ctx context.Context, errs *fault.Bus) (support colProgress, closer := observe.CollectionProgress( ctx, sc.fullPath.Category().String(), - // TODO(keepers): conceal compliance in path, drop Hide() - clues.Hide(sc.fullPath.Folder(false))) + sc.fullPath.Folders()) go closer() defer func() { diff --git a/src/internal/observe/observe.go b/src/internal/observe/observe.go index 67db8b7a2..ea822f431 100644 --- a/src/internal/observe/observe.go +++ b/src/internal/observe/observe.go @@ -26,7 +26,7 @@ const ( var ( wg sync.WaitGroup - // TODO: Revisit this being a global nd make it a parameter to the progress methods + // TODO: Revisit this being a global and make it a parameter to the progress methods // so that each bar can be initialized with different contexts if needed. contxt context.Context writer io.Writer diff --git a/src/internal/operations/backup.go b/src/internal/operations/backup.go index 0236c1c12..7afa0f7fa 100644 --- a/src/internal/operations/backup.go +++ b/src/internal/operations/backup.go @@ -568,7 +568,7 @@ func mergeDetails( if err != nil { return clues.New("parsing base item info path"). WithClues(mctx). - With("repo_ref", entry.RepoRef) // todo: pii, path needs concealer compliance + With("repo_ref", path.NewElements(entry.RepoRef)) } // Although this base has an entry it may not be the most recent. Check diff --git a/src/pkg/path/elements.go b/src/pkg/path/elements.go new file mode 100644 index 000000000..0a55bd8e4 --- /dev/null +++ b/src/pkg/path/elements.go @@ -0,0 +1,88 @@ +package path + +import ( + "fmt" + + "github.com/alcionai/clues" + + "github.com/alcionai/corso/src/internal/common/pii" +) + +var piiSafePathElems = pii.MapWithPlurals( + // services + UnknownService.String(), + ExchangeService.String(), + OneDriveService.String(), + SharePointService.String(), + ExchangeMetadataService.String(), + OneDriveMetadataService.String(), + SharePointMetadataService.String(), + + // categories + UnknownCategory.String(), + EmailCategory.String(), + ContactsCategory.String(), + EventsCategory.String(), + FilesCategory.String(), + ListsCategory.String(), + LibrariesCategory.String(), + PagesCategory.String(), + DetailsCategory.String(), +) + +var ( + // interface compliance required for handling PII + _ clues.Concealer = &Elements{} + _ fmt.Stringer = &Elements{} + + // interface compliance for the observe package to display + // values without concealing PII. + _ clues.PlainStringer = &Elements{} +) + +// Elements are a PII Concealer-compliant slice of elements within a path. +type Elements []string + +// NewElements creates a new Elements slice by splitting the provided string. +func NewElements(p string) Elements { + return Split(p) +} + +// Conceal produces a concealed representation of the elements, suitable for +// logging, storing in errors, and other output. +func (el Elements) Conceal() string { + escaped := make([]string, 0, len(el)) + + for _, e := range el { + escaped = append(escaped, escapeElement(e)) + } + + return join(pii.ConcealElements(escaped, piiSafePathElems)) +} + +// Format produces a concealed representation of the elements, even when +// used within a PrintF, suitable for logging, storing in errors, +// and other output. +func (el Elements) Format(fs fmt.State, _ rune) { + fmt.Fprint(fs, el.Conceal()) +} + +// String returns a string that contains all path elements joined together. +// Elements that need escaping are escaped. The result is not concealed, and +// is not suitable for logging or structured errors. +func (el Elements) String() string { + escaped := make([]string, 0, len(el)) + + for _, e := range el { + escaped = append(escaped, escapeElement(e)) + } + + return join(escaped) +} + +// PlainString returns an unescaped, unmodified string of the joined elements. +// The result is not concealed, and is not suitable for logging or structured +// errors. +func (el Elements) PlainString() string { + return join(el) +} diff --git a/src/pkg/path/elements_test.go b/src/pkg/path/elements_test.go new file mode 100644 index 000000000..f9f4c1d1a --- /dev/null +++ b/src/pkg/path/elements_test.go @@ -0,0 +1,100 @@ +package path + +import ( + "fmt" + "testing" + + "github.com/alcionai/clues" + "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/suite" + + "github.com/alcionai/corso/src/internal/tester" +) + +type ElementsUnitSuite struct { + tester.Suite +} + +func TestElementsUnitSuite(t *testing.T) { + suite.Run(t, &ElementsUnitSuite{Suite: tester.NewUnitSuite(t)}) +} + +// set the clues hashing to mask for the span of this suite +func (suite *ElementsUnitSuite) SetupSuite() { + clues.SetHasher(clues.HashCfg{HashAlg: clues.Flatmask}) +} + +// revert clues hashing to plaintext for all other tests +func (suite *ElementsUnitSuite) TeardownSuite() { + clues.SetHasher(clues.NoHash()) +} + +func (suite *ElementsUnitSuite) TestNewElements() { + t := suite.T() + + result := NewElements("") + assert.Equal(t, Elements{""}, result) + + result = NewElements("fnords") + assert.Equal(t, Elements{"fnords"}, result) + + result = NewElements("fnords/smarf") + assert.Equal(t, Elements{"fnords", "smarf"}, result) +} + +func (suite *ElementsUnitSuite) TestElements_piiHandling() { + table := []struct { + name string + elems Elements + expect string + expectString string + expectPlain string + }{ + { + name: "all concealed", + elems: Elements{"foo", "bar/", "baz"}, + expect: "***/***/***", + expectString: `foo/bar\//baz`, + expectPlain: `foo/bar//baz`, + }, + { + name: "all safe", + elems: Elements{UnknownService.String(), UnknownCategory.String(), ExchangeMetadataService.String()}, + expect: "UnknownService/UnknownCategory/exchangeMetadata", + expectString: "UnknownService/UnknownCategory/exchangeMetadata", + expectPlain: "UnknownService/UnknownCategory/exchangeMetadata", + }, + { + name: "mixed", + elems: Elements{UnknownService.String(), "smarf", ExchangeMetadataService.String()}, + expect: "UnknownService/***/exchangeMetadata", + expectString: "UnknownService/smarf/exchangeMetadata", + expectPlain: "UnknownService/smarf/exchangeMetadata", + }, + { + name: "empty elements", + elems: Elements{}, + expect: "", + expectString: "", + expectPlain: "", + }, + { + name: "empty string", + elems: Elements{""}, + expect: "", + expectString: "", + expectPlain: "", + }, + } + for _, test := range table { + suite.Run(test.name, func() { + t := suite.T() + + assert.Equal(t, test.expect, test.elems.Conceal(), "conceal") + assert.Equal(t, test.expectString, test.elems.String(), "string") + assert.Equal(t, test.expect, fmt.Sprintf("%s", test.elems), "fmt %%s") + assert.Equal(t, test.expect, fmt.Sprintf("%+v", test.elems), "fmt %%+v") + assert.Equal(t, test.expectPlain, join(test.elems), "plain") + }) + } +} diff --git a/src/pkg/path/onedrive.go b/src/pkg/path/onedrive.go index 5f2adf2db..76d5300be 100644 --- a/src/pkg/path/onedrive.go +++ b/src/pkg/path/onedrive.go @@ -10,7 +10,7 @@ import "github.com/alcionai/clues" // folders[] is []{"Folder1", "Folder2"} type DrivePath struct { DriveID string - Folders []string + Folders Elements } func ToOneDrivePath(p Path) (*DrivePath, error) { diff --git a/src/pkg/path/path.go b/src/pkg/path/path.go index 1af05a8c8..0a1decfa2 100644 --- a/src/pkg/path/path.go +++ b/src/pkg/path/path.go @@ -86,7 +86,7 @@ type Path interface { Tenant() string ResourceOwner() string Folder(bool) string - Folders() []string + Folders() Elements Item() string // UpdateParent updates parent from old to new if the item/folder was // parented by old path @@ -102,7 +102,7 @@ type Path interface { // Elements returns all the elements in the path. This is a temporary function // and will likely be updated to handle encoded elements instead of clear-text // elements in the future. - Elements() []string + Elements() Elements // Append returns a new Path object with the given element added to the end of // the old Path if possible. If the old Path is an item Path then Append // returns an error. @@ -113,8 +113,23 @@ type Path interface { ShortRef() string // ToBuilder returns a Builder instance that represents the current Path. ToBuilder() *Builder + + // Every path needs to comply with these funcs to ensure that PII + // is appropriately hidden from logging, errors, and other outputs. + clues.Concealer + fmt.Stringer + + // In the rare case that the path needs to get printed as a plain string, + // without obscuring values for PII. + clues.PlainStringer } +// interface compliance required for handling PII +var ( + _ clues.Concealer = &Builder{} + _ fmt.Stringer = &Builder{} +) + // Builder is a simple path representation that only tracks path elements. It // can join, escape, and unescape elements. Higher-level packages are expected // to wrap this struct to build resource-speicific contexts (e.x. an @@ -125,7 +140,7 @@ type Path interface { // tenant ID, service, user ID, etc). type Builder struct { // Unescaped version of elements. - elements []string + elements Elements } // UnescapeAndAppend creates a copy of this Builder and adds one or more already @@ -223,18 +238,6 @@ func (pb Builder) LastElem() string { return pb.elements[len(pb.elements)-1] } -// String returns a string that contains all path elements joined together. -// Elements of the path that need escaping are escaped. -func (pb Builder) String() string { - escaped := make([]string, 0, len(pb.elements)) - - for _, e := range pb.elements { - escaped = append(escaped, escapeElement(e)) - } - - return join(escaped) -} - func (pb Builder) ShortRef() string { if len(pb.elements) == 0 { return "" @@ -261,8 +264,8 @@ func (pb Builder) ShortRef() string { // Elements returns all the elements in the path. This is a temporary function // and will likely be updated to handle encoded elements instead of clear-text // elements in the future. -func (pb Builder) Elements() []string { - return append([]string{}, pb.elements...) +func (pb Builder) Elements() Elements { + return append(Elements{}, pb.elements...) } func verifyInputValues(tenant, resourceOwner string) error { @@ -658,3 +661,35 @@ func Split(segment string) []string { return res } + +// --------------------------------------------------------------------------- +// PII Concealer Compliance +// --------------------------------------------------------------------------- + +// Conceal produces a concealed representation of the builder, suitable for +// logging, storing in errors, and other output. +func (pb Builder) Conceal() string { + return pb.elements.Conceal() +} + +// Format produces a concealed representation of the builder, even when +// used within a PrintF, suitable for logging, storing in errors, +// and other output. +func (pb Builder) Format(fs fmt.State, _ rune) { + fmt.Fprint(fs, pb.Conceal()) +} + +// String returns a string that contains all path elements joined together. +// Elements of the path that need escaping are escaped. +// The result is not concealed, and is not suitable for logging or structured +// errors. +func (pb Builder) String() string { + return pb.elements.String() +} + +// PlainString returns an unescaped, unmodified string of the builder. +// The result is not concealed, and is not suitable for logging or structured +// errors. +func (pb Builder) PlainString() string { + return pb.elements.PlainString() +} diff --git a/src/pkg/path/path_test.go b/src/pkg/path/path_test.go index 51f5801cf..6af2b2b0e 100644 --- a/src/pkg/path/path_test.go +++ b/src/pkg/path/path_test.go @@ -223,6 +223,16 @@ func TestPathUnitSuite(t *testing.T) { suite.Run(t, &PathUnitSuite{Suite: tester.NewUnitSuite(t)}) } +// set the clues hashing to mask for the span of this suite +func (suite *PathUnitSuite) SetupSuite() { + clues.SetHasher(clues.HashCfg{HashAlg: clues.Flatmask}) +} + +// revert clues hashing to plaintext for all other tests +func (suite *PathUnitSuite) TeardownSuite() { + clues.SetHasher(clues.NoHash()) +} + func (suite *PathUnitSuite) TestAppend() { table := append(append([]testData{}, genericCases...), basicUnescapedInputs...) for _, test := range table { @@ -338,7 +348,7 @@ func (suite *PathUnitSuite) TestElements() { p, err := test.pathFunc(test.input) require.NoError(t, err, clues.ToCore(err)) - assert.Equal(t, test.output, p.Elements()) + assert.Equal(t, Elements(test.output), p.Elements()) }) } } @@ -709,3 +719,33 @@ func (suite *PathUnitSuite) TestFromString() { } } } + +func (suite *PathUnitSuite) TestPath_piiHandling() { + p, err := Build("t", "ro", ExchangeService, EventsCategory, true, "dir", "item") + require.NoError(suite.T(), err) + + table := []struct { + name string + p Path + expect string + expectPlain string + }{ + { + name: "standard path", + p: p, + expect: "***/exchange/***/events/***/***", + expectPlain: "t/exchange/ro/events/dir/item", + }, + } + for _, test := range table { + suite.Run(test.name, func() { + t := suite.T() + + assert.Equal(t, test.expect, test.p.Conceal(), "conceal") + assert.Equal(t, test.expectPlain, test.p.String(), "string") + assert.Equal(t, test.expect, fmt.Sprintf("%s", test.p), "fmt %%s") + assert.Equal(t, test.expect, fmt.Sprintf("%+v", test.p), "fmt %%+v") + assert.Equal(t, test.expectPlain, test.p.PlainString(), "plain") + }) + } +} diff --git a/src/pkg/path/resource_path.go b/src/pkg/path/resource_path.go index c7f570b14..a9db84d88 100644 --- a/src/pkg/path/resource_path.go +++ b/src/pkg/path/resource_path.go @@ -218,7 +218,7 @@ func (rp dataLayerResourcePath) Folder(escape bool) string { // Folders returns the individual folder elements embedded in the // dataLayerResourcePath. -func (rp dataLayerResourcePath) Folders() []string { +func (rp dataLayerResourcePath) Folders() Elements { endIdx := rp.lastFolderIdx() if endIdx == 4 { return nil @@ -239,7 +239,7 @@ func (rp dataLayerResourcePath) Item() string { func (rp dataLayerResourcePath) Dir() (Path, error) { if len(rp.elements) <= 4 { - return nil, clues.New("unable to shorten path").With("path", fmt.Sprintf("%q", rp)) + return nil, clues.New("unable to shorten path").With("path", rp) } return &dataLayerResourcePath{ diff --git a/src/pkg/path/resource_path_test.go b/src/pkg/path/resource_path_test.go index 84cc79761..afad4148c 100644 --- a/src/pkg/path/resource_path_test.go +++ b/src/pkg/path/resource_path_test.go @@ -136,6 +136,10 @@ func TestDataLayerResourcePath(t *testing.T) { suite.Run(t, &DataLayerResourcePath{Suite: tester.NewUnitSuite(t)}) } +func (suite *DataLayerResourcePath) SetupSuite() { + clues.SetHasher(clues.NoHash()) +} + func (suite *DataLayerResourcePath) TestMissingInfoErrors() { for _, types := range serviceCategories { suite.Run(types.service.String()+types.category.String(), func() { @@ -402,7 +406,7 @@ func (suite *DataLayerResourcePath) TestToExchangePathForCategory() { assert.Equal(t, test.category, p.Category()) assert.Equal(t, testUser, p.ResourceOwner()) assert.Equal(t, strings.Join(m.expectedFolders, "/"), p.Folder(false)) - assert.Equal(t, m.expectedFolders, p.Folders()) + assert.Equal(t, path.Elements(m.expectedFolders), p.Folders()) assert.Equal(t, m.expectedItem, p.Item()) }) } @@ -496,7 +500,7 @@ func (suite *PopulatedDataLayerResourcePath) TestFolders() { suite.Run(m.name, func() { t := suite.T() - assert.Equal(t, m.expectedFolders, suite.paths[m.isItem].Folders()) + assert.Equal(t, path.Elements(m.expectedFolders), suite.paths[m.isItem].Folders()) }) } }