conceal url pii (#3015)
Adds a new package to common (pii) with a clues. Concealer compliant struct that can log and report url values with a mix of hidden pii and exposed public values. Exposed values are based on well-known graph api path and query params. --- #### Does this PR need a docs update or release note? - [x] 🕐 Yes, but in a later PR #### Type of change - [x] 🤖 Supportability/Tests #### Issue(s) * #2024 #### Test Plan - [x] ⚡ Unit test
This commit is contained in:
parent
2341d61842
commit
74e4a094e0
18
src/internal/common/pii/pii.go
Normal file
18
src/internal/common/pii/pii.go
Normal file
@ -0,0 +1,18 @@
|
|||||||
|
package pii
|
||||||
|
|
||||||
|
import "strings"
|
||||||
|
|
||||||
|
// MapWithPlurls places the toLower value of each string
|
||||||
|
// into a map[string]struct{}, along with a copy of the that
|
||||||
|
// string as a plural (ex: FoO => foo, foos).
|
||||||
|
func MapWithPlurals(ss ...string) map[string]struct{} {
|
||||||
|
mss := make(map[string]struct{}, len(ss)*2)
|
||||||
|
|
||||||
|
for _, s := range ss {
|
||||||
|
tl := strings.ToLower(s)
|
||||||
|
mss[tl] = struct{}{}
|
||||||
|
mss[tl+"s"] = struct{}{}
|
||||||
|
}
|
||||||
|
|
||||||
|
return mss
|
||||||
|
}
|
||||||
96
src/internal/common/pii/url.go
Normal file
96
src/internal/common/pii/url.go
Normal file
@ -0,0 +1,96 @@
|
|||||||
|
package pii
|
||||||
|
|
||||||
|
import (
|
||||||
|
"fmt"
|
||||||
|
"net/url"
|
||||||
|
"strings"
|
||||||
|
|
||||||
|
"github.com/alcionai/clues"
|
||||||
|
"golang.org/x/exp/maps"
|
||||||
|
"golang.org/x/exp/slices"
|
||||||
|
)
|
||||||
|
|
||||||
|
// SafeURL complies with the clues.Concealer and fmt.Stringer
|
||||||
|
// interfaces to produce a safely loggable version of the URL.
|
||||||
|
// Path elements that equal a SafePathWords entry will show in
|
||||||
|
// plain text. All other path elements will get hashed by clues.
|
||||||
|
// Query parameters that match a key in SafeQueryParams will have
|
||||||
|
// their values displayed in plain text. All other query parames
|
||||||
|
// will get hashed by clues.
|
||||||
|
type SafeURL struct {
|
||||||
|
// the original URL
|
||||||
|
URL string
|
||||||
|
// path elements that do not need to be hidden
|
||||||
|
// keys should be lower-cased
|
||||||
|
SafePathElems map[string]struct{}
|
||||||
|
// query parameters that do not need to be hidden
|
||||||
|
// keys should be lower-cased
|
||||||
|
SafeQueryKeys map[string]struct{}
|
||||||
|
}
|
||||||
|
|
||||||
|
var _ clues.Concealer = &SafeURL{}
|
||||||
|
|
||||||
|
// Conceal produces a string of the url with the sensitive info
|
||||||
|
// obscured (hashed or replaced).
|
||||||
|
func (u SafeURL) Conceal() string {
|
||||||
|
if len(u.URL) == 0 {
|
||||||
|
return ""
|
||||||
|
}
|
||||||
|
|
||||||
|
p, err := url.Parse(u.URL)
|
||||||
|
if err != nil {
|
||||||
|
return "malformed-URL"
|
||||||
|
}
|
||||||
|
|
||||||
|
elems := slices.Clone(strings.Split(p.EscapedPath(), "/"))
|
||||||
|
|
||||||
|
// conceal any non-safe path elem
|
||||||
|
for i := range elems {
|
||||||
|
e := elems[i]
|
||||||
|
|
||||||
|
if _, ok := u.SafePathElems[strings.ToLower(e)]; !ok {
|
||||||
|
elems[i] = clues.Conceal(e)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
qry := maps.Clone(p.Query())
|
||||||
|
|
||||||
|
// conceal any non-safe query param values
|
||||||
|
for k, v := range p.Query() {
|
||||||
|
if _, ok := u.SafeQueryKeys[strings.ToLower(k)]; ok {
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
|
||||||
|
for i := range v {
|
||||||
|
v[i] = clues.Conceal(v[i])
|
||||||
|
}
|
||||||
|
|
||||||
|
qry[k] = v
|
||||||
|
}
|
||||||
|
|
||||||
|
je := strings.Join(elems, "/")
|
||||||
|
esc := p.Scheme + "://" + p.Hostname() + je
|
||||||
|
|
||||||
|
if len(qry) > 0 {
|
||||||
|
esc += "?" + qry.Encode()
|
||||||
|
}
|
||||||
|
|
||||||
|
unesc, err := url.QueryUnescape(esc)
|
||||||
|
if err != nil {
|
||||||
|
return esc
|
||||||
|
}
|
||||||
|
|
||||||
|
return unesc
|
||||||
|
}
|
||||||
|
|
||||||
|
// Format ensures the safeURL will output the Conceal() version
|
||||||
|
// even when used in a PrintF.
|
||||||
|
func (u SafeURL) Format(fs fmt.State, _ rune) {
|
||||||
|
fmt.Fprint(fs, u.Conceal())
|
||||||
|
}
|
||||||
|
|
||||||
|
// String complies with Stringer to ensure the Conceal() version
|
||||||
|
// of the url is printed anytime it gets transformed to a string.
|
||||||
|
func (u SafeURL) String() string {
|
||||||
|
return u.Conceal()
|
||||||
|
}
|
||||||
123
src/internal/common/pii/url_test.go
Normal file
123
src/internal/common/pii/url_test.go
Normal file
@ -0,0 +1,123 @@
|
|||||||
|
package pii_test
|
||||||
|
|
||||||
|
import (
|
||||||
|
"fmt"
|
||||||
|
"testing"
|
||||||
|
|
||||||
|
"github.com/alcionai/clues"
|
||||||
|
"github.com/stretchr/testify/assert"
|
||||||
|
"github.com/stretchr/testify/suite"
|
||||||
|
|
||||||
|
"github.com/alcionai/corso/src/internal/common/pii"
|
||||||
|
"github.com/alcionai/corso/src/internal/tester"
|
||||||
|
)
|
||||||
|
|
||||||
|
type URLUnitSuite struct {
|
||||||
|
tester.Suite
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestURLUnitSuite(t *testing.T) {
|
||||||
|
suite.Run(t, &URLUnitSuite{Suite: tester.NewUnitSuite(t)})
|
||||||
|
}
|
||||||
|
|
||||||
|
// set the clues hashing to mask for the span of this suite
|
||||||
|
func (suite *URLUnitSuite) SetupSuite() {
|
||||||
|
clues.SetHasher(clues.HashCfg{HashAlg: clues.Flatmask})
|
||||||
|
}
|
||||||
|
|
||||||
|
// revert clues hashing to plaintext for all other tests
|
||||||
|
func (suite *URLUnitSuite) TeardownSuite() {
|
||||||
|
clues.SetHasher(clues.NoHash())
|
||||||
|
}
|
||||||
|
|
||||||
|
func (suite *URLUnitSuite) TestDoesThings() {
|
||||||
|
stubURL := "https://host.com/foo/bar/baz/qux?fnords=smarfs&fnords=brunhilda&beaux=regard"
|
||||||
|
|
||||||
|
table := []struct {
|
||||||
|
name string
|
||||||
|
input string
|
||||||
|
expect string
|
||||||
|
safePath map[string]struct{}
|
||||||
|
safeQuery map[string]struct{}
|
||||||
|
}{
|
||||||
|
{
|
||||||
|
name: "no safety",
|
||||||
|
input: stubURL,
|
||||||
|
expect: "https://host.com/***/***/***/***?beaux=***&fnords=***&fnords=***",
|
||||||
|
},
|
||||||
|
{
|
||||||
|
name: "safe paths",
|
||||||
|
input: stubURL,
|
||||||
|
expect: "https://host.com/foo/***/baz/***?beaux=***&fnords=***&fnords=***",
|
||||||
|
safePath: map[string]struct{}{"foo": {}, "baz": {}},
|
||||||
|
},
|
||||||
|
{
|
||||||
|
name: "safe query",
|
||||||
|
input: stubURL,
|
||||||
|
expect: "https://host.com/***/***/***/***?beaux=regard&fnords=***&fnords=***",
|
||||||
|
safeQuery: map[string]struct{}{"beaux": {}},
|
||||||
|
},
|
||||||
|
{
|
||||||
|
name: "safe path and query",
|
||||||
|
input: stubURL,
|
||||||
|
expect: "https://host.com/foo/***/baz/***?beaux=regard&fnords=***&fnords=***",
|
||||||
|
safePath: map[string]struct{}{"foo": {}, "baz": {}},
|
||||||
|
safeQuery: map[string]struct{}{"beaux": {}},
|
||||||
|
},
|
||||||
|
{
|
||||||
|
name: "empty elements",
|
||||||
|
input: "https://host.com/foo//baz/?fnords=&beaux=",
|
||||||
|
expect: "https://host.com/foo//baz/?beaux=&fnords=",
|
||||||
|
safePath: map[string]struct{}{"foo": {}, "baz": {}},
|
||||||
|
},
|
||||||
|
{
|
||||||
|
name: "no path",
|
||||||
|
input: "https://host.com/",
|
||||||
|
expect: "https://host.com/",
|
||||||
|
},
|
||||||
|
{
|
||||||
|
name: "no path with query",
|
||||||
|
input: "https://host.com/?fnords=smarfs&fnords=brunhilda&beaux=regard",
|
||||||
|
expect: "https://host.com/?beaux=***&fnords=***&fnords=***",
|
||||||
|
},
|
||||||
|
{
|
||||||
|
name: "relative path",
|
||||||
|
input: "/foo/bar/baz/qux?fnords=smarfs&fnords=brunhilda&beaux=regard",
|
||||||
|
expect: ":///***/***/***/***?beaux=***&fnords=***&fnords=***",
|
||||||
|
},
|
||||||
|
{
|
||||||
|
name: "malformed url",
|
||||||
|
input: "i am not a url",
|
||||||
|
expect: "://***",
|
||||||
|
},
|
||||||
|
{
|
||||||
|
name: "empty url",
|
||||||
|
input: "",
|
||||||
|
expect: "",
|
||||||
|
},
|
||||||
|
}
|
||||||
|
for _, test := range table {
|
||||||
|
suite.Run(test.name, func() {
|
||||||
|
var (
|
||||||
|
t = suite.T()
|
||||||
|
su = pii.SafeURL{
|
||||||
|
URL: test.input,
|
||||||
|
SafePathElems: test.safePath,
|
||||||
|
SafeQueryKeys: test.safeQuery,
|
||||||
|
}
|
||||||
|
)
|
||||||
|
|
||||||
|
result := su.Conceal()
|
||||||
|
assert.Equal(t, test.expect, result, "Conceal()")
|
||||||
|
|
||||||
|
result = su.String()
|
||||||
|
assert.Equal(t, test.expect, result, "String()")
|
||||||
|
|
||||||
|
result = fmt.Sprintf("%s", su)
|
||||||
|
assert.Equal(t, test.expect, result, "fmt %%s")
|
||||||
|
|
||||||
|
result = fmt.Sprintf("%+v", su)
|
||||||
|
assert.Equal(t, test.expect, result, "fmt %%+v")
|
||||||
|
})
|
||||||
|
}
|
||||||
|
}
|
||||||
@ -9,6 +9,7 @@ import (
|
|||||||
"github.com/microsoftgraph/msgraph-sdk-go/models"
|
"github.com/microsoftgraph/msgraph-sdk-go/models"
|
||||||
|
|
||||||
"github.com/alcionai/corso/src/internal/common/ptr"
|
"github.com/alcionai/corso/src/internal/common/ptr"
|
||||||
|
"github.com/alcionai/corso/src/internal/connector/graph"
|
||||||
"github.com/alcionai/corso/src/internal/connector/support"
|
"github.com/alcionai/corso/src/internal/connector/support"
|
||||||
"github.com/alcionai/corso/src/internal/connector/uploadsession"
|
"github.com/alcionai/corso/src/internal/connector/uploadsession"
|
||||||
"github.com/alcionai/corso/src/pkg/logger"
|
"github.com/alcionai/corso/src/pkg/logger"
|
||||||
@ -104,8 +105,7 @@ func uploadLargeAttachment(
|
|||||||
|
|
||||||
url := ptr.Val(session.GetUploadUrl())
|
url := ptr.Val(session.GetUploadUrl())
|
||||||
aw := uploadsession.NewWriter(uploader.getItemID(), url, size)
|
aw := uploadsession.NewWriter(uploader.getItemID(), url, size)
|
||||||
// TODO: url pii refinementt
|
logger.Ctx(ctx).Debugw("uploading large attachment", "attachment_url", graph.LoggableURL(url))
|
||||||
logger.Ctx(ctx).Debugw("uploading large attachment", "attachment_url", clues.Hide(url))
|
|
||||||
|
|
||||||
// Upload the stream data
|
// Upload the stream data
|
||||||
copyBuffer := make([]byte, attachmentChunkSize)
|
copyBuffer := make([]byte, attachmentChunkSize)
|
||||||
|
|||||||
@ -19,6 +19,7 @@ import (
|
|||||||
msgraphgocore "github.com/microsoftgraph/msgraph-sdk-go-core"
|
msgraphgocore "github.com/microsoftgraph/msgraph-sdk-go-core"
|
||||||
"golang.org/x/time/rate"
|
"golang.org/x/time/rate"
|
||||||
|
|
||||||
|
"github.com/alcionai/corso/src/internal/common/pii"
|
||||||
"github.com/alcionai/corso/src/internal/events"
|
"github.com/alcionai/corso/src/internal/events"
|
||||||
"github.com/alcionai/corso/src/pkg/account"
|
"github.com/alcionai/corso/src/pkg/account"
|
||||||
"github.com/alcionai/corso/src/pkg/logger"
|
"github.com/alcionai/corso/src/pkg/logger"
|
||||||
@ -271,20 +272,86 @@ type Servicer interface {
|
|||||||
// LoggingMiddleware can be used to log the http request sent by the graph client
|
// LoggingMiddleware can be used to log the http request sent by the graph client
|
||||||
type LoggingMiddleware struct{}
|
type LoggingMiddleware struct{}
|
||||||
|
|
||||||
|
// well-known path names used by graph api calls
|
||||||
|
// used to un-hide path elements in a pii.SafeURL
|
||||||
|
var safePathParams = pii.MapWithPlurals(
|
||||||
|
//nolint:misspell
|
||||||
|
"alltime",
|
||||||
|
"analytics",
|
||||||
|
"archive",
|
||||||
|
"beta",
|
||||||
|
"calendargroup",
|
||||||
|
"calendar",
|
||||||
|
"calendarview",
|
||||||
|
"channel",
|
||||||
|
"childfolder",
|
||||||
|
"children",
|
||||||
|
"clone",
|
||||||
|
"column",
|
||||||
|
"contactfolder",
|
||||||
|
"contact",
|
||||||
|
"contenttype",
|
||||||
|
"delta",
|
||||||
|
"drive",
|
||||||
|
"event",
|
||||||
|
"group",
|
||||||
|
"inbox",
|
||||||
|
"instance",
|
||||||
|
"invitation",
|
||||||
|
"item",
|
||||||
|
"joinedteam",
|
||||||
|
"label",
|
||||||
|
"list",
|
||||||
|
"mailfolder",
|
||||||
|
"member",
|
||||||
|
"message",
|
||||||
|
"notification",
|
||||||
|
"page",
|
||||||
|
"primarychannel",
|
||||||
|
"root",
|
||||||
|
"security",
|
||||||
|
"site",
|
||||||
|
"subscription",
|
||||||
|
"team",
|
||||||
|
"unarchive",
|
||||||
|
"user",
|
||||||
|
"v1.0")
|
||||||
|
|
||||||
|
// well-known safe query parameters used by graph api calls
|
||||||
|
//
|
||||||
|
// used to un-hide query params in a pii.SafeURL
|
||||||
|
var safeQueryParams = map[string]struct{}{
|
||||||
|
"deltatoken": {},
|
||||||
|
"startdatetime": {},
|
||||||
|
"enddatetime": {},
|
||||||
|
"$count": {},
|
||||||
|
"$expand": {},
|
||||||
|
"$filter": {},
|
||||||
|
"$select": {},
|
||||||
|
"$top": {},
|
||||||
|
}
|
||||||
|
|
||||||
|
func LoggableURL(url string) pii.SafeURL {
|
||||||
|
return pii.SafeURL{
|
||||||
|
URL: url,
|
||||||
|
SafePathElems: safePathParams,
|
||||||
|
SafeQueryKeys: safeQueryParams,
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
func (handler *LoggingMiddleware) Intercept(
|
func (handler *LoggingMiddleware) Intercept(
|
||||||
pipeline khttp.Pipeline,
|
pipeline khttp.Pipeline,
|
||||||
middlewareIndex int,
|
middlewareIndex int,
|
||||||
req *http.Request,
|
req *http.Request,
|
||||||
) (*http.Response, error) {
|
) (*http.Response, error) {
|
||||||
var (
|
ctx := clues.Add(
|
||||||
ctx = clues.Add(
|
req.Context(),
|
||||||
req.Context(),
|
"method", req.Method,
|
||||||
"method", req.Method,
|
"url", LoggableURL(req.URL.String()),
|
||||||
"url", req.URL, // TODO: pii, not hasing yet because we want debuggable urls
|
"request_len", req.ContentLength)
|
||||||
"request_len", req.ContentLength,
|
|
||||||
)
|
// call the next middleware
|
||||||
resp, err = pipeline.Next(req, middlewareIndex)
|
resp, err := pipeline.Next(req, middlewareIndex)
|
||||||
)
|
|
||||||
|
|
||||||
if strings.Contains(req.URL.String(), "users//") {
|
if strings.Contains(req.URL.String(), "users//") {
|
||||||
logger.Ctx(ctx).Error("malformed request url: missing resource")
|
logger.Ctx(ctx).Error("malformed request url: missing resource")
|
||||||
|
|||||||
@ -40,8 +40,9 @@ func NewWriter(id, url string, size int64) *writer {
|
|||||||
// https://docs.microsoft.com/en-us/graph/api/driveitem-createuploadsession
|
// https://docs.microsoft.com/en-us/graph/api/driveitem-createuploadsession
|
||||||
func (iw *writer) Write(p []byte) (int, error) {
|
func (iw *writer) Write(p []byte) (int, error) {
|
||||||
rangeLength := len(p)
|
rangeLength := len(p)
|
||||||
logger.Ctx(context.Background()).Debugf("WRITE for %s. Size:%d, Offset: %d, TotalSize: %d",
|
logger.Ctx(context.Background()).
|
||||||
iw.id, rangeLength, iw.lastWrittenOffset, iw.contentLength)
|
Debugf("WRITE for %s. Size:%d, Offset: %d, TotalSize: %d",
|
||||||
|
iw.id, rangeLength, iw.lastWrittenOffset, iw.contentLength)
|
||||||
|
|
||||||
endOffset := iw.lastWrittenOffset + int64(rangeLength)
|
endOffset := iw.lastWrittenOffset + int64(rangeLength)
|
||||||
|
|
||||||
@ -49,13 +50,15 @@ func (iw *writer) Write(p []byte) (int, error) {
|
|||||||
// data in the current request
|
// data in the current request
|
||||||
_, err := iw.client.R().
|
_, err := iw.client.R().
|
||||||
SetHeaders(map[string]string{
|
SetHeaders(map[string]string{
|
||||||
contentRangeHeaderKey: fmt.Sprintf(contentRangeHeaderValueFmt,
|
contentRangeHeaderKey: fmt.Sprintf(
|
||||||
|
contentRangeHeaderValueFmt,
|
||||||
iw.lastWrittenOffset,
|
iw.lastWrittenOffset,
|
||||||
endOffset-1,
|
endOffset-1,
|
||||||
iw.contentLength),
|
iw.contentLength),
|
||||||
contentLengthHeaderKey: fmt.Sprintf("%d", rangeLength),
|
contentLengthHeaderKey: fmt.Sprintf("%d", rangeLength),
|
||||||
}).
|
}).
|
||||||
SetBody(bytes.NewReader(p)).Put(iw.url)
|
SetBody(bytes.NewReader(p)).
|
||||||
|
Put(iw.url)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return 0, clues.Wrap(err, "uploading item").With(
|
return 0, clues.Wrap(err, "uploading item").With(
|
||||||
"upload_id", iw.id,
|
"upload_id", iw.id,
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user