conceal url pii (#3015)
Adds a new package to common (pii) with a clues. Concealer compliant struct that can log and report url values with a mix of hidden pii and exposed public values. Exposed values are based on well-known graph api path and query params. --- #### Does this PR need a docs update or release note? - [x] 🕐 Yes, but in a later PR #### Type of change - [x] 🤖 Supportability/Tests #### Issue(s) * #2024 #### Test Plan - [x] ⚡ Unit test
This commit is contained in:
parent
2341d61842
commit
74e4a094e0
18
src/internal/common/pii/pii.go
Normal file
18
src/internal/common/pii/pii.go
Normal file
@ -0,0 +1,18 @@
|
||||
package pii
|
||||
|
||||
import "strings"
|
||||
|
||||
// MapWithPlurls places the toLower value of each string
|
||||
// into a map[string]struct{}, along with a copy of the that
|
||||
// string as a plural (ex: FoO => foo, foos).
|
||||
func MapWithPlurals(ss ...string) map[string]struct{} {
|
||||
mss := make(map[string]struct{}, len(ss)*2)
|
||||
|
||||
for _, s := range ss {
|
||||
tl := strings.ToLower(s)
|
||||
mss[tl] = struct{}{}
|
||||
mss[tl+"s"] = struct{}{}
|
||||
}
|
||||
|
||||
return mss
|
||||
}
|
||||
96
src/internal/common/pii/url.go
Normal file
96
src/internal/common/pii/url.go
Normal file
@ -0,0 +1,96 @@
|
||||
package pii
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"net/url"
|
||||
"strings"
|
||||
|
||||
"github.com/alcionai/clues"
|
||||
"golang.org/x/exp/maps"
|
||||
"golang.org/x/exp/slices"
|
||||
)
|
||||
|
||||
// SafeURL complies with the clues.Concealer and fmt.Stringer
|
||||
// interfaces to produce a safely loggable version of the URL.
|
||||
// Path elements that equal a SafePathWords entry will show in
|
||||
// plain text. All other path elements will get hashed by clues.
|
||||
// Query parameters that match a key in SafeQueryParams will have
|
||||
// their values displayed in plain text. All other query parames
|
||||
// will get hashed by clues.
|
||||
type SafeURL struct {
|
||||
// the original URL
|
||||
URL string
|
||||
// path elements that do not need to be hidden
|
||||
// keys should be lower-cased
|
||||
SafePathElems map[string]struct{}
|
||||
// query parameters that do not need to be hidden
|
||||
// keys should be lower-cased
|
||||
SafeQueryKeys map[string]struct{}
|
||||
}
|
||||
|
||||
var _ clues.Concealer = &SafeURL{}
|
||||
|
||||
// Conceal produces a string of the url with the sensitive info
|
||||
// obscured (hashed or replaced).
|
||||
func (u SafeURL) Conceal() string {
|
||||
if len(u.URL) == 0 {
|
||||
return ""
|
||||
}
|
||||
|
||||
p, err := url.Parse(u.URL)
|
||||
if err != nil {
|
||||
return "malformed-URL"
|
||||
}
|
||||
|
||||
elems := slices.Clone(strings.Split(p.EscapedPath(), "/"))
|
||||
|
||||
// conceal any non-safe path elem
|
||||
for i := range elems {
|
||||
e := elems[i]
|
||||
|
||||
if _, ok := u.SafePathElems[strings.ToLower(e)]; !ok {
|
||||
elems[i] = clues.Conceal(e)
|
||||
}
|
||||
}
|
||||
|
||||
qry := maps.Clone(p.Query())
|
||||
|
||||
// conceal any non-safe query param values
|
||||
for k, v := range p.Query() {
|
||||
if _, ok := u.SafeQueryKeys[strings.ToLower(k)]; ok {
|
||||
continue
|
||||
}
|
||||
|
||||
for i := range v {
|
||||
v[i] = clues.Conceal(v[i])
|
||||
}
|
||||
|
||||
qry[k] = v
|
||||
}
|
||||
|
||||
je := strings.Join(elems, "/")
|
||||
esc := p.Scheme + "://" + p.Hostname() + je
|
||||
|
||||
if len(qry) > 0 {
|
||||
esc += "?" + qry.Encode()
|
||||
}
|
||||
|
||||
unesc, err := url.QueryUnescape(esc)
|
||||
if err != nil {
|
||||
return esc
|
||||
}
|
||||
|
||||
return unesc
|
||||
}
|
||||
|
||||
// Format ensures the safeURL will output the Conceal() version
|
||||
// even when used in a PrintF.
|
||||
func (u SafeURL) Format(fs fmt.State, _ rune) {
|
||||
fmt.Fprint(fs, u.Conceal())
|
||||
}
|
||||
|
||||
// String complies with Stringer to ensure the Conceal() version
|
||||
// of the url is printed anytime it gets transformed to a string.
|
||||
func (u SafeURL) String() string {
|
||||
return u.Conceal()
|
||||
}
|
||||
123
src/internal/common/pii/url_test.go
Normal file
123
src/internal/common/pii/url_test.go
Normal file
@ -0,0 +1,123 @@
|
||||
package pii_test
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"testing"
|
||||
|
||||
"github.com/alcionai/clues"
|
||||
"github.com/stretchr/testify/assert"
|
||||
"github.com/stretchr/testify/suite"
|
||||
|
||||
"github.com/alcionai/corso/src/internal/common/pii"
|
||||
"github.com/alcionai/corso/src/internal/tester"
|
||||
)
|
||||
|
||||
type URLUnitSuite struct {
|
||||
tester.Suite
|
||||
}
|
||||
|
||||
func TestURLUnitSuite(t *testing.T) {
|
||||
suite.Run(t, &URLUnitSuite{Suite: tester.NewUnitSuite(t)})
|
||||
}
|
||||
|
||||
// set the clues hashing to mask for the span of this suite
|
||||
func (suite *URLUnitSuite) SetupSuite() {
|
||||
clues.SetHasher(clues.HashCfg{HashAlg: clues.Flatmask})
|
||||
}
|
||||
|
||||
// revert clues hashing to plaintext for all other tests
|
||||
func (suite *URLUnitSuite) TeardownSuite() {
|
||||
clues.SetHasher(clues.NoHash())
|
||||
}
|
||||
|
||||
func (suite *URLUnitSuite) TestDoesThings() {
|
||||
stubURL := "https://host.com/foo/bar/baz/qux?fnords=smarfs&fnords=brunhilda&beaux=regard"
|
||||
|
||||
table := []struct {
|
||||
name string
|
||||
input string
|
||||
expect string
|
||||
safePath map[string]struct{}
|
||||
safeQuery map[string]struct{}
|
||||
}{
|
||||
{
|
||||
name: "no safety",
|
||||
input: stubURL,
|
||||
expect: "https://host.com/***/***/***/***?beaux=***&fnords=***&fnords=***",
|
||||
},
|
||||
{
|
||||
name: "safe paths",
|
||||
input: stubURL,
|
||||
expect: "https://host.com/foo/***/baz/***?beaux=***&fnords=***&fnords=***",
|
||||
safePath: map[string]struct{}{"foo": {}, "baz": {}},
|
||||
},
|
||||
{
|
||||
name: "safe query",
|
||||
input: stubURL,
|
||||
expect: "https://host.com/***/***/***/***?beaux=regard&fnords=***&fnords=***",
|
||||
safeQuery: map[string]struct{}{"beaux": {}},
|
||||
},
|
||||
{
|
||||
name: "safe path and query",
|
||||
input: stubURL,
|
||||
expect: "https://host.com/foo/***/baz/***?beaux=regard&fnords=***&fnords=***",
|
||||
safePath: map[string]struct{}{"foo": {}, "baz": {}},
|
||||
safeQuery: map[string]struct{}{"beaux": {}},
|
||||
},
|
||||
{
|
||||
name: "empty elements",
|
||||
input: "https://host.com/foo//baz/?fnords=&beaux=",
|
||||
expect: "https://host.com/foo//baz/?beaux=&fnords=",
|
||||
safePath: map[string]struct{}{"foo": {}, "baz": {}},
|
||||
},
|
||||
{
|
||||
name: "no path",
|
||||
input: "https://host.com/",
|
||||
expect: "https://host.com/",
|
||||
},
|
||||
{
|
||||
name: "no path with query",
|
||||
input: "https://host.com/?fnords=smarfs&fnords=brunhilda&beaux=regard",
|
||||
expect: "https://host.com/?beaux=***&fnords=***&fnords=***",
|
||||
},
|
||||
{
|
||||
name: "relative path",
|
||||
input: "/foo/bar/baz/qux?fnords=smarfs&fnords=brunhilda&beaux=regard",
|
||||
expect: ":///***/***/***/***?beaux=***&fnords=***&fnords=***",
|
||||
},
|
||||
{
|
||||
name: "malformed url",
|
||||
input: "i am not a url",
|
||||
expect: "://***",
|
||||
},
|
||||
{
|
||||
name: "empty url",
|
||||
input: "",
|
||||
expect: "",
|
||||
},
|
||||
}
|
||||
for _, test := range table {
|
||||
suite.Run(test.name, func() {
|
||||
var (
|
||||
t = suite.T()
|
||||
su = pii.SafeURL{
|
||||
URL: test.input,
|
||||
SafePathElems: test.safePath,
|
||||
SafeQueryKeys: test.safeQuery,
|
||||
}
|
||||
)
|
||||
|
||||
result := su.Conceal()
|
||||
assert.Equal(t, test.expect, result, "Conceal()")
|
||||
|
||||
result = su.String()
|
||||
assert.Equal(t, test.expect, result, "String()")
|
||||
|
||||
result = fmt.Sprintf("%s", su)
|
||||
assert.Equal(t, test.expect, result, "fmt %%s")
|
||||
|
||||
result = fmt.Sprintf("%+v", su)
|
||||
assert.Equal(t, test.expect, result, "fmt %%+v")
|
||||
})
|
||||
}
|
||||
}
|
||||
@ -9,6 +9,7 @@ import (
|
||||
"github.com/microsoftgraph/msgraph-sdk-go/models"
|
||||
|
||||
"github.com/alcionai/corso/src/internal/common/ptr"
|
||||
"github.com/alcionai/corso/src/internal/connector/graph"
|
||||
"github.com/alcionai/corso/src/internal/connector/support"
|
||||
"github.com/alcionai/corso/src/internal/connector/uploadsession"
|
||||
"github.com/alcionai/corso/src/pkg/logger"
|
||||
@ -104,8 +105,7 @@ func uploadLargeAttachment(
|
||||
|
||||
url := ptr.Val(session.GetUploadUrl())
|
||||
aw := uploadsession.NewWriter(uploader.getItemID(), url, size)
|
||||
// TODO: url pii refinementt
|
||||
logger.Ctx(ctx).Debugw("uploading large attachment", "attachment_url", clues.Hide(url))
|
||||
logger.Ctx(ctx).Debugw("uploading large attachment", "attachment_url", graph.LoggableURL(url))
|
||||
|
||||
// Upload the stream data
|
||||
copyBuffer := make([]byte, attachmentChunkSize)
|
||||
|
||||
@ -19,6 +19,7 @@ import (
|
||||
msgraphgocore "github.com/microsoftgraph/msgraph-sdk-go-core"
|
||||
"golang.org/x/time/rate"
|
||||
|
||||
"github.com/alcionai/corso/src/internal/common/pii"
|
||||
"github.com/alcionai/corso/src/internal/events"
|
||||
"github.com/alcionai/corso/src/pkg/account"
|
||||
"github.com/alcionai/corso/src/pkg/logger"
|
||||
@ -271,20 +272,86 @@ type Servicer interface {
|
||||
// LoggingMiddleware can be used to log the http request sent by the graph client
|
||||
type LoggingMiddleware struct{}
|
||||
|
||||
// well-known path names used by graph api calls
|
||||
// used to un-hide path elements in a pii.SafeURL
|
||||
var safePathParams = pii.MapWithPlurals(
|
||||
//nolint:misspell
|
||||
"alltime",
|
||||
"analytics",
|
||||
"archive",
|
||||
"beta",
|
||||
"calendargroup",
|
||||
"calendar",
|
||||
"calendarview",
|
||||
"channel",
|
||||
"childfolder",
|
||||
"children",
|
||||
"clone",
|
||||
"column",
|
||||
"contactfolder",
|
||||
"contact",
|
||||
"contenttype",
|
||||
"delta",
|
||||
"drive",
|
||||
"event",
|
||||
"group",
|
||||
"inbox",
|
||||
"instance",
|
||||
"invitation",
|
||||
"item",
|
||||
"joinedteam",
|
||||
"label",
|
||||
"list",
|
||||
"mailfolder",
|
||||
"member",
|
||||
"message",
|
||||
"notification",
|
||||
"page",
|
||||
"primarychannel",
|
||||
"root",
|
||||
"security",
|
||||
"site",
|
||||
"subscription",
|
||||
"team",
|
||||
"unarchive",
|
||||
"user",
|
||||
"v1.0")
|
||||
|
||||
// well-known safe query parameters used by graph api calls
|
||||
//
|
||||
// used to un-hide query params in a pii.SafeURL
|
||||
var safeQueryParams = map[string]struct{}{
|
||||
"deltatoken": {},
|
||||
"startdatetime": {},
|
||||
"enddatetime": {},
|
||||
"$count": {},
|
||||
"$expand": {},
|
||||
"$filter": {},
|
||||
"$select": {},
|
||||
"$top": {},
|
||||
}
|
||||
|
||||
func LoggableURL(url string) pii.SafeURL {
|
||||
return pii.SafeURL{
|
||||
URL: url,
|
||||
SafePathElems: safePathParams,
|
||||
SafeQueryKeys: safeQueryParams,
|
||||
}
|
||||
}
|
||||
|
||||
func (handler *LoggingMiddleware) Intercept(
|
||||
pipeline khttp.Pipeline,
|
||||
middlewareIndex int,
|
||||
req *http.Request,
|
||||
) (*http.Response, error) {
|
||||
var (
|
||||
ctx = clues.Add(
|
||||
ctx := clues.Add(
|
||||
req.Context(),
|
||||
"method", req.Method,
|
||||
"url", req.URL, // TODO: pii, not hasing yet because we want debuggable urls
|
||||
"request_len", req.ContentLength,
|
||||
)
|
||||
resp, err = pipeline.Next(req, middlewareIndex)
|
||||
)
|
||||
"url", LoggableURL(req.URL.String()),
|
||||
"request_len", req.ContentLength)
|
||||
|
||||
// call the next middleware
|
||||
resp, err := pipeline.Next(req, middlewareIndex)
|
||||
|
||||
if strings.Contains(req.URL.String(), "users//") {
|
||||
logger.Ctx(ctx).Error("malformed request url: missing resource")
|
||||
|
||||
@ -40,7 +40,8 @@ func NewWriter(id, url string, size int64) *writer {
|
||||
// https://docs.microsoft.com/en-us/graph/api/driveitem-createuploadsession
|
||||
func (iw *writer) Write(p []byte) (int, error) {
|
||||
rangeLength := len(p)
|
||||
logger.Ctx(context.Background()).Debugf("WRITE for %s. Size:%d, Offset: %d, TotalSize: %d",
|
||||
logger.Ctx(context.Background()).
|
||||
Debugf("WRITE for %s. Size:%d, Offset: %d, TotalSize: %d",
|
||||
iw.id, rangeLength, iw.lastWrittenOffset, iw.contentLength)
|
||||
|
||||
endOffset := iw.lastWrittenOffset + int64(rangeLength)
|
||||
@ -49,13 +50,15 @@ func (iw *writer) Write(p []byte) (int, error) {
|
||||
// data in the current request
|
||||
_, err := iw.client.R().
|
||||
SetHeaders(map[string]string{
|
||||
contentRangeHeaderKey: fmt.Sprintf(contentRangeHeaderValueFmt,
|
||||
contentRangeHeaderKey: fmt.Sprintf(
|
||||
contentRangeHeaderValueFmt,
|
||||
iw.lastWrittenOffset,
|
||||
endOffset-1,
|
||||
iw.contentLength),
|
||||
contentLengthHeaderKey: fmt.Sprintf("%d", rangeLength),
|
||||
}).
|
||||
SetBody(bytes.NewReader(p)).Put(iw.url)
|
||||
SetBody(bytes.NewReader(p)).
|
||||
Put(iw.url)
|
||||
if err != nil {
|
||||
return 0, clues.Wrap(err, "uploading item").With(
|
||||
"upload_id", iw.id,
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user