diff --git a/src/internal/common/pii/pii.go b/src/internal/common/pii/pii.go new file mode 100644 index 000000000..102d782d1 --- /dev/null +++ b/src/internal/common/pii/pii.go @@ -0,0 +1,18 @@ +package pii + +import "strings" + +// MapWithPlurls places the toLower value of each string +// into a map[string]struct{}, along with a copy of the that +// string as a plural (ex: FoO => foo, foos). +func MapWithPlurals(ss ...string) map[string]struct{} { + mss := make(map[string]struct{}, len(ss)*2) + + for _, s := range ss { + tl := strings.ToLower(s) + mss[tl] = struct{}{} + mss[tl+"s"] = struct{}{} + } + + return mss +} diff --git a/src/internal/common/pii/url.go b/src/internal/common/pii/url.go new file mode 100644 index 000000000..34707a360 --- /dev/null +++ b/src/internal/common/pii/url.go @@ -0,0 +1,96 @@ +package pii + +import ( + "fmt" + "net/url" + "strings" + + "github.com/alcionai/clues" + "golang.org/x/exp/maps" + "golang.org/x/exp/slices" +) + +// SafeURL complies with the clues.Concealer and fmt.Stringer +// interfaces to produce a safely loggable version of the URL. +// Path elements that equal a SafePathWords entry will show in +// plain text. All other path elements will get hashed by clues. +// Query parameters that match a key in SafeQueryParams will have +// their values displayed in plain text. All other query parames +// will get hashed by clues. +type SafeURL struct { + // the original URL + URL string + // path elements that do not need to be hidden + // keys should be lower-cased + SafePathElems map[string]struct{} + // query parameters that do not need to be hidden + // keys should be lower-cased + SafeQueryKeys map[string]struct{} +} + +var _ clues.Concealer = &SafeURL{} + +// Conceal produces a string of the url with the sensitive info +// obscured (hashed or replaced). +func (u SafeURL) Conceal() string { + if len(u.URL) == 0 { + return "" + } + + p, err := url.Parse(u.URL) + if err != nil { + return "malformed-URL" + } + + elems := slices.Clone(strings.Split(p.EscapedPath(), "/")) + + // conceal any non-safe path elem + for i := range elems { + e := elems[i] + + if _, ok := u.SafePathElems[strings.ToLower(e)]; !ok { + elems[i] = clues.Conceal(e) + } + } + + qry := maps.Clone(p.Query()) + + // conceal any non-safe query param values + for k, v := range p.Query() { + if _, ok := u.SafeQueryKeys[strings.ToLower(k)]; ok { + continue + } + + for i := range v { + v[i] = clues.Conceal(v[i]) + } + + qry[k] = v + } + + je := strings.Join(elems, "/") + esc := p.Scheme + "://" + p.Hostname() + je + + if len(qry) > 0 { + esc += "?" + qry.Encode() + } + + unesc, err := url.QueryUnescape(esc) + if err != nil { + return esc + } + + return unesc +} + +// Format ensures the safeURL will output the Conceal() version +// even when used in a PrintF. +func (u SafeURL) Format(fs fmt.State, _ rune) { + fmt.Fprint(fs, u.Conceal()) +} + +// String complies with Stringer to ensure the Conceal() version +// of the url is printed anytime it gets transformed to a string. +func (u SafeURL) String() string { + return u.Conceal() +} diff --git a/src/internal/common/pii/url_test.go b/src/internal/common/pii/url_test.go new file mode 100644 index 000000000..a89fd2d26 --- /dev/null +++ b/src/internal/common/pii/url_test.go @@ -0,0 +1,123 @@ +package pii_test + +import ( + "fmt" + "testing" + + "github.com/alcionai/clues" + "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/suite" + + "github.com/alcionai/corso/src/internal/common/pii" + "github.com/alcionai/corso/src/internal/tester" +) + +type URLUnitSuite struct { + tester.Suite +} + +func TestURLUnitSuite(t *testing.T) { + suite.Run(t, &URLUnitSuite{Suite: tester.NewUnitSuite(t)}) +} + +// set the clues hashing to mask for the span of this suite +func (suite *URLUnitSuite) SetupSuite() { + clues.SetHasher(clues.HashCfg{HashAlg: clues.Flatmask}) +} + +// revert clues hashing to plaintext for all other tests +func (suite *URLUnitSuite) TeardownSuite() { + clues.SetHasher(clues.NoHash()) +} + +func (suite *URLUnitSuite) TestDoesThings() { + stubURL := "https://host.com/foo/bar/baz/qux?fnords=smarfs&fnords=brunhilda&beaux=regard" + + table := []struct { + name string + input string + expect string + safePath map[string]struct{} + safeQuery map[string]struct{} + }{ + { + name: "no safety", + input: stubURL, + expect: "https://host.com/***/***/***/***?beaux=***&fnords=***&fnords=***", + }, + { + name: "safe paths", + input: stubURL, + expect: "https://host.com/foo/***/baz/***?beaux=***&fnords=***&fnords=***", + safePath: map[string]struct{}{"foo": {}, "baz": {}}, + }, + { + name: "safe query", + input: stubURL, + expect: "https://host.com/***/***/***/***?beaux=regard&fnords=***&fnords=***", + safeQuery: map[string]struct{}{"beaux": {}}, + }, + { + name: "safe path and query", + input: stubURL, + expect: "https://host.com/foo/***/baz/***?beaux=regard&fnords=***&fnords=***", + safePath: map[string]struct{}{"foo": {}, "baz": {}}, + safeQuery: map[string]struct{}{"beaux": {}}, + }, + { + name: "empty elements", + input: "https://host.com/foo//baz/?fnords=&beaux=", + expect: "https://host.com/foo//baz/?beaux=&fnords=", + safePath: map[string]struct{}{"foo": {}, "baz": {}}, + }, + { + name: "no path", + input: "https://host.com/", + expect: "https://host.com/", + }, + { + name: "no path with query", + input: "https://host.com/?fnords=smarfs&fnords=brunhilda&beaux=regard", + expect: "https://host.com/?beaux=***&fnords=***&fnords=***", + }, + { + name: "relative path", + input: "/foo/bar/baz/qux?fnords=smarfs&fnords=brunhilda&beaux=regard", + expect: ":///***/***/***/***?beaux=***&fnords=***&fnords=***", + }, + { + name: "malformed url", + input: "i am not a url", + expect: "://***", + }, + { + name: "empty url", + input: "", + expect: "", + }, + } + for _, test := range table { + suite.Run(test.name, func() { + var ( + t = suite.T() + su = pii.SafeURL{ + URL: test.input, + SafePathElems: test.safePath, + SafeQueryKeys: test.safeQuery, + } + ) + + result := su.Conceal() + assert.Equal(t, test.expect, result, "Conceal()") + + result = su.String() + assert.Equal(t, test.expect, result, "String()") + + result = fmt.Sprintf("%s", su) + assert.Equal(t, test.expect, result, "fmt %%s") + + result = fmt.Sprintf("%+v", su) + assert.Equal(t, test.expect, result, "fmt %%+v") + }) + } +} diff --git a/src/internal/connector/exchange/attachment.go b/src/internal/connector/exchange/attachment.go index f42f3348d..4c6c99d13 100644 --- a/src/internal/connector/exchange/attachment.go +++ b/src/internal/connector/exchange/attachment.go @@ -9,6 +9,7 @@ import ( "github.com/microsoftgraph/msgraph-sdk-go/models" "github.com/alcionai/corso/src/internal/common/ptr" + "github.com/alcionai/corso/src/internal/connector/graph" "github.com/alcionai/corso/src/internal/connector/support" "github.com/alcionai/corso/src/internal/connector/uploadsession" "github.com/alcionai/corso/src/pkg/logger" @@ -104,8 +105,7 @@ func uploadLargeAttachment( url := ptr.Val(session.GetUploadUrl()) aw := uploadsession.NewWriter(uploader.getItemID(), url, size) - // TODO: url pii refinementt - logger.Ctx(ctx).Debugw("uploading large attachment", "attachment_url", clues.Hide(url)) + logger.Ctx(ctx).Debugw("uploading large attachment", "attachment_url", graph.LoggableURL(url)) // Upload the stream data copyBuffer := make([]byte, attachmentChunkSize) diff --git a/src/internal/connector/graph/service.go b/src/internal/connector/graph/service.go index bbe6de0aa..4bf449044 100644 --- a/src/internal/connector/graph/service.go +++ b/src/internal/connector/graph/service.go @@ -19,6 +19,7 @@ import ( msgraphgocore "github.com/microsoftgraph/msgraph-sdk-go-core" "golang.org/x/time/rate" + "github.com/alcionai/corso/src/internal/common/pii" "github.com/alcionai/corso/src/internal/events" "github.com/alcionai/corso/src/pkg/account" "github.com/alcionai/corso/src/pkg/logger" @@ -271,20 +272,86 @@ type Servicer interface { // LoggingMiddleware can be used to log the http request sent by the graph client type LoggingMiddleware struct{} +// well-known path names used by graph api calls +// used to un-hide path elements in a pii.SafeURL +var safePathParams = pii.MapWithPlurals( + //nolint:misspell + "alltime", + "analytics", + "archive", + "beta", + "calendargroup", + "calendar", + "calendarview", + "channel", + "childfolder", + "children", + "clone", + "column", + "contactfolder", + "contact", + "contenttype", + "delta", + "drive", + "event", + "group", + "inbox", + "instance", + "invitation", + "item", + "joinedteam", + "label", + "list", + "mailfolder", + "member", + "message", + "notification", + "page", + "primarychannel", + "root", + "security", + "site", + "subscription", + "team", + "unarchive", + "user", + "v1.0") + +// well-known safe query parameters used by graph api calls +// +// used to un-hide query params in a pii.SafeURL +var safeQueryParams = map[string]struct{}{ + "deltatoken": {}, + "startdatetime": {}, + "enddatetime": {}, + "$count": {}, + "$expand": {}, + "$filter": {}, + "$select": {}, + "$top": {}, +} + +func LoggableURL(url string) pii.SafeURL { + return pii.SafeURL{ + URL: url, + SafePathElems: safePathParams, + SafeQueryKeys: safeQueryParams, + } +} + func (handler *LoggingMiddleware) Intercept( pipeline khttp.Pipeline, middlewareIndex int, req *http.Request, ) (*http.Response, error) { - var ( - ctx = clues.Add( - req.Context(), - "method", req.Method, - "url", req.URL, // TODO: pii, not hasing yet because we want debuggable urls - "request_len", req.ContentLength, - ) - resp, err = pipeline.Next(req, middlewareIndex) - ) + ctx := clues.Add( + req.Context(), + "method", req.Method, + "url", LoggableURL(req.URL.String()), + "request_len", req.ContentLength) + + // call the next middleware + resp, err := pipeline.Next(req, middlewareIndex) if strings.Contains(req.URL.String(), "users//") { logger.Ctx(ctx).Error("malformed request url: missing resource") diff --git a/src/internal/connector/uploadsession/uploadsession.go b/src/internal/connector/uploadsession/uploadsession.go index 60f84f6ef..210abe018 100644 --- a/src/internal/connector/uploadsession/uploadsession.go +++ b/src/internal/connector/uploadsession/uploadsession.go @@ -40,8 +40,9 @@ func NewWriter(id, url string, size int64) *writer { // https://docs.microsoft.com/en-us/graph/api/driveitem-createuploadsession func (iw *writer) Write(p []byte) (int, error) { rangeLength := len(p) - logger.Ctx(context.Background()).Debugf("WRITE for %s. Size:%d, Offset: %d, TotalSize: %d", - iw.id, rangeLength, iw.lastWrittenOffset, iw.contentLength) + logger.Ctx(context.Background()). + Debugf("WRITE for %s. Size:%d, Offset: %d, TotalSize: %d", + iw.id, rangeLength, iw.lastWrittenOffset, iw.contentLength) endOffset := iw.lastWrittenOffset + int64(rangeLength) @@ -49,13 +50,15 @@ func (iw *writer) Write(p []byte) (int, error) { // data in the current request _, err := iw.client.R(). SetHeaders(map[string]string{ - contentRangeHeaderKey: fmt.Sprintf(contentRangeHeaderValueFmt, + contentRangeHeaderKey: fmt.Sprintf( + contentRangeHeaderValueFmt, iw.lastWrittenOffset, endOffset-1, iw.contentLength), contentLengthHeaderKey: fmt.Sprintf("%d", rangeLength), }). - SetBody(bytes.NewReader(p)).Put(iw.url) + SetBody(bytes.NewReader(p)). + Put(iw.url) if err != nil { return 0, clues.Wrap(err, "uploading item").With( "upload_id", iw.id,