conceal url pii (#3015)

Adds a new package to common (pii) with a clues.
Concealer compliant struct that can log and report
url values with a mix of hidden pii and exposed
public values.  Exposed values are based on
well-known graph api path and query params.

---

#### Does this PR need a docs update or release note?

- [x] 🕐 Yes, but in a later PR

#### Type of change

- [x] 🤖 Supportability/Tests

#### Issue(s)

* #2024

#### Test Plan

- [x]  Unit test
This commit is contained in:
Keepers 2023-04-04 15:01:37 -06:00 committed by GitHub
parent 2341d61842
commit 74e4a094e0
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
6 changed files with 322 additions and 15 deletions

View File

@ -0,0 +1,18 @@
package pii
import "strings"
// MapWithPlurls places the toLower value of each string
// into a map[string]struct{}, along with a copy of the that
// string as a plural (ex: FoO => foo, foos).
func MapWithPlurals(ss ...string) map[string]struct{} {
mss := make(map[string]struct{}, len(ss)*2)
for _, s := range ss {
tl := strings.ToLower(s)
mss[tl] = struct{}{}
mss[tl+"s"] = struct{}{}
}
return mss
}

View File

@ -0,0 +1,96 @@
package pii
import (
"fmt"
"net/url"
"strings"
"github.com/alcionai/clues"
"golang.org/x/exp/maps"
"golang.org/x/exp/slices"
)
// SafeURL complies with the clues.Concealer and fmt.Stringer
// interfaces to produce a safely loggable version of the URL.
// Path elements that equal a SafePathWords entry will show in
// plain text. All other path elements will get hashed by clues.
// Query parameters that match a key in SafeQueryParams will have
// their values displayed in plain text. All other query parames
// will get hashed by clues.
type SafeURL struct {
// the original URL
URL string
// path elements that do not need to be hidden
// keys should be lower-cased
SafePathElems map[string]struct{}
// query parameters that do not need to be hidden
// keys should be lower-cased
SafeQueryKeys map[string]struct{}
}
var _ clues.Concealer = &SafeURL{}
// Conceal produces a string of the url with the sensitive info
// obscured (hashed or replaced).
func (u SafeURL) Conceal() string {
if len(u.URL) == 0 {
return ""
}
p, err := url.Parse(u.URL)
if err != nil {
return "malformed-URL"
}
elems := slices.Clone(strings.Split(p.EscapedPath(), "/"))
// conceal any non-safe path elem
for i := range elems {
e := elems[i]
if _, ok := u.SafePathElems[strings.ToLower(e)]; !ok {
elems[i] = clues.Conceal(e)
}
}
qry := maps.Clone(p.Query())
// conceal any non-safe query param values
for k, v := range p.Query() {
if _, ok := u.SafeQueryKeys[strings.ToLower(k)]; ok {
continue
}
for i := range v {
v[i] = clues.Conceal(v[i])
}
qry[k] = v
}
je := strings.Join(elems, "/")
esc := p.Scheme + "://" + p.Hostname() + je
if len(qry) > 0 {
esc += "?" + qry.Encode()
}
unesc, err := url.QueryUnescape(esc)
if err != nil {
return esc
}
return unesc
}
// Format ensures the safeURL will output the Conceal() version
// even when used in a PrintF.
func (u SafeURL) Format(fs fmt.State, _ rune) {
fmt.Fprint(fs, u.Conceal())
}
// String complies with Stringer to ensure the Conceal() version
// of the url is printed anytime it gets transformed to a string.
func (u SafeURL) String() string {
return u.Conceal()
}

View File

@ -0,0 +1,123 @@
package pii_test
import (
"fmt"
"testing"
"github.com/alcionai/clues"
"github.com/stretchr/testify/assert"
"github.com/stretchr/testify/suite"
"github.com/alcionai/corso/src/internal/common/pii"
"github.com/alcionai/corso/src/internal/tester"
)
type URLUnitSuite struct {
tester.Suite
}
func TestURLUnitSuite(t *testing.T) {
suite.Run(t, &URLUnitSuite{Suite: tester.NewUnitSuite(t)})
}
// set the clues hashing to mask for the span of this suite
func (suite *URLUnitSuite) SetupSuite() {
clues.SetHasher(clues.HashCfg{HashAlg: clues.Flatmask})
}
// revert clues hashing to plaintext for all other tests
func (suite *URLUnitSuite) TeardownSuite() {
clues.SetHasher(clues.NoHash())
}
func (suite *URLUnitSuite) TestDoesThings() {
stubURL := "https://host.com/foo/bar/baz/qux?fnords=smarfs&fnords=brunhilda&beaux=regard"
table := []struct {
name string
input string
expect string
safePath map[string]struct{}
safeQuery map[string]struct{}
}{
{
name: "no safety",
input: stubURL,
expect: "https://host.com/***/***/***/***?beaux=***&fnords=***&fnords=***",
},
{
name: "safe paths",
input: stubURL,
expect: "https://host.com/foo/***/baz/***?beaux=***&fnords=***&fnords=***",
safePath: map[string]struct{}{"foo": {}, "baz": {}},
},
{
name: "safe query",
input: stubURL,
expect: "https://host.com/***/***/***/***?beaux=regard&fnords=***&fnords=***",
safeQuery: map[string]struct{}{"beaux": {}},
},
{
name: "safe path and query",
input: stubURL,
expect: "https://host.com/foo/***/baz/***?beaux=regard&fnords=***&fnords=***",
safePath: map[string]struct{}{"foo": {}, "baz": {}},
safeQuery: map[string]struct{}{"beaux": {}},
},
{
name: "empty elements",
input: "https://host.com/foo//baz/?fnords=&beaux=",
expect: "https://host.com/foo//baz/?beaux=&fnords=",
safePath: map[string]struct{}{"foo": {}, "baz": {}},
},
{
name: "no path",
input: "https://host.com/",
expect: "https://host.com/",
},
{
name: "no path with query",
input: "https://host.com/?fnords=smarfs&fnords=brunhilda&beaux=regard",
expect: "https://host.com/?beaux=***&fnords=***&fnords=***",
},
{
name: "relative path",
input: "/foo/bar/baz/qux?fnords=smarfs&fnords=brunhilda&beaux=regard",
expect: ":///***/***/***/***?beaux=***&fnords=***&fnords=***",
},
{
name: "malformed url",
input: "i am not a url",
expect: "://***",
},
{
name: "empty url",
input: "",
expect: "",
},
}
for _, test := range table {
suite.Run(test.name, func() {
var (
t = suite.T()
su = pii.SafeURL{
URL: test.input,
SafePathElems: test.safePath,
SafeQueryKeys: test.safeQuery,
}
)
result := su.Conceal()
assert.Equal(t, test.expect, result, "Conceal()")
result = su.String()
assert.Equal(t, test.expect, result, "String()")
result = fmt.Sprintf("%s", su)
assert.Equal(t, test.expect, result, "fmt %%s")
result = fmt.Sprintf("%+v", su)
assert.Equal(t, test.expect, result, "fmt %%+v")
})
}
}

View File

@ -9,6 +9,7 @@ import (
"github.com/microsoftgraph/msgraph-sdk-go/models" "github.com/microsoftgraph/msgraph-sdk-go/models"
"github.com/alcionai/corso/src/internal/common/ptr" "github.com/alcionai/corso/src/internal/common/ptr"
"github.com/alcionai/corso/src/internal/connector/graph"
"github.com/alcionai/corso/src/internal/connector/support" "github.com/alcionai/corso/src/internal/connector/support"
"github.com/alcionai/corso/src/internal/connector/uploadsession" "github.com/alcionai/corso/src/internal/connector/uploadsession"
"github.com/alcionai/corso/src/pkg/logger" "github.com/alcionai/corso/src/pkg/logger"
@ -104,8 +105,7 @@ func uploadLargeAttachment(
url := ptr.Val(session.GetUploadUrl()) url := ptr.Val(session.GetUploadUrl())
aw := uploadsession.NewWriter(uploader.getItemID(), url, size) aw := uploadsession.NewWriter(uploader.getItemID(), url, size)
// TODO: url pii refinementt logger.Ctx(ctx).Debugw("uploading large attachment", "attachment_url", graph.LoggableURL(url))
logger.Ctx(ctx).Debugw("uploading large attachment", "attachment_url", clues.Hide(url))
// Upload the stream data // Upload the stream data
copyBuffer := make([]byte, attachmentChunkSize) copyBuffer := make([]byte, attachmentChunkSize)

View File

@ -19,6 +19,7 @@ import (
msgraphgocore "github.com/microsoftgraph/msgraph-sdk-go-core" msgraphgocore "github.com/microsoftgraph/msgraph-sdk-go-core"
"golang.org/x/time/rate" "golang.org/x/time/rate"
"github.com/alcionai/corso/src/internal/common/pii"
"github.com/alcionai/corso/src/internal/events" "github.com/alcionai/corso/src/internal/events"
"github.com/alcionai/corso/src/pkg/account" "github.com/alcionai/corso/src/pkg/account"
"github.com/alcionai/corso/src/pkg/logger" "github.com/alcionai/corso/src/pkg/logger"
@ -271,20 +272,86 @@ type Servicer interface {
// LoggingMiddleware can be used to log the http request sent by the graph client // LoggingMiddleware can be used to log the http request sent by the graph client
type LoggingMiddleware struct{} type LoggingMiddleware struct{}
// well-known path names used by graph api calls
// used to un-hide path elements in a pii.SafeURL
var safePathParams = pii.MapWithPlurals(
//nolint:misspell
"alltime",
"analytics",
"archive",
"beta",
"calendargroup",
"calendar",
"calendarview",
"channel",
"childfolder",
"children",
"clone",
"column",
"contactfolder",
"contact",
"contenttype",
"delta",
"drive",
"event",
"group",
"inbox",
"instance",
"invitation",
"item",
"joinedteam",
"label",
"list",
"mailfolder",
"member",
"message",
"notification",
"page",
"primarychannel",
"root",
"security",
"site",
"subscription",
"team",
"unarchive",
"user",
"v1.0")
// well-known safe query parameters used by graph api calls
//
// used to un-hide query params in a pii.SafeURL
var safeQueryParams = map[string]struct{}{
"deltatoken": {},
"startdatetime": {},
"enddatetime": {},
"$count": {},
"$expand": {},
"$filter": {},
"$select": {},
"$top": {},
}
func LoggableURL(url string) pii.SafeURL {
return pii.SafeURL{
URL: url,
SafePathElems: safePathParams,
SafeQueryKeys: safeQueryParams,
}
}
func (handler *LoggingMiddleware) Intercept( func (handler *LoggingMiddleware) Intercept(
pipeline khttp.Pipeline, pipeline khttp.Pipeline,
middlewareIndex int, middlewareIndex int,
req *http.Request, req *http.Request,
) (*http.Response, error) { ) (*http.Response, error) {
var ( ctx := clues.Add(
ctx = clues.Add( req.Context(),
req.Context(), "method", req.Method,
"method", req.Method, "url", LoggableURL(req.URL.String()),
"url", req.URL, // TODO: pii, not hasing yet because we want debuggable urls "request_len", req.ContentLength)
"request_len", req.ContentLength,
) // call the next middleware
resp, err = pipeline.Next(req, middlewareIndex) resp, err := pipeline.Next(req, middlewareIndex)
)
if strings.Contains(req.URL.String(), "users//") { if strings.Contains(req.URL.String(), "users//") {
logger.Ctx(ctx).Error("malformed request url: missing resource") logger.Ctx(ctx).Error("malformed request url: missing resource")

View File

@ -40,8 +40,9 @@ func NewWriter(id, url string, size int64) *writer {
// https://docs.microsoft.com/en-us/graph/api/driveitem-createuploadsession // https://docs.microsoft.com/en-us/graph/api/driveitem-createuploadsession
func (iw *writer) Write(p []byte) (int, error) { func (iw *writer) Write(p []byte) (int, error) {
rangeLength := len(p) rangeLength := len(p)
logger.Ctx(context.Background()).Debugf("WRITE for %s. Size:%d, Offset: %d, TotalSize: %d", logger.Ctx(context.Background()).
iw.id, rangeLength, iw.lastWrittenOffset, iw.contentLength) Debugf("WRITE for %s. Size:%d, Offset: %d, TotalSize: %d",
iw.id, rangeLength, iw.lastWrittenOffset, iw.contentLength)
endOffset := iw.lastWrittenOffset + int64(rangeLength) endOffset := iw.lastWrittenOffset + int64(rangeLength)
@ -49,13 +50,15 @@ func (iw *writer) Write(p []byte) (int, error) {
// data in the current request // data in the current request
_, err := iw.client.R(). _, err := iw.client.R().
SetHeaders(map[string]string{ SetHeaders(map[string]string{
contentRangeHeaderKey: fmt.Sprintf(contentRangeHeaderValueFmt, contentRangeHeaderKey: fmt.Sprintf(
contentRangeHeaderValueFmt,
iw.lastWrittenOffset, iw.lastWrittenOffset,
endOffset-1, endOffset-1,
iw.contentLength), iw.contentLength),
contentLengthHeaderKey: fmt.Sprintf("%d", rangeLength), contentLengthHeaderKey: fmt.Sprintf("%d", rangeLength),
}). }).
SetBody(bytes.NewReader(p)).Put(iw.url) SetBody(bytes.NewReader(p)).
Put(iw.url)
if err != nil { if err != nil {
return 0, clues.Wrap(err, "uploading item").With( return 0, clues.Wrap(err, "uploading item").With(
"upload_id", iw.id, "upload_id", iw.id,