From b6ae9c6d07c2dbe7d41ae3296469292f5e38fb89 Mon Sep 17 00:00:00 2001 From: Georgi Matev Date: Mon, 10 Apr 2023 09:44:23 -0700 Subject: [PATCH] Switching user identification and hashing (#3071) Encompasses the following changes: * distinct ID for users and events set to m365 tenant ID hash * still record local repoId * switches the hashing method for the m365 tenant ID to tuncated sha256 * continue logging deprecated md5 hash for a few releases to facilitate event merge --- #### Does this PR need a docs update or release note? - [ ] :white_check_mark: Yes, it's included - [ ] :clock1: Yes, but in a later PR - [x] :no_entry: No #### Type of change - [x] :sunflower: Feature - [ ] :bug: Bugfix - [ ] :world_map: Documentation - [ ] :robot: Supportability/Tests - [ ] :computer: CI/Deployment - [ ] :broom: Tech Debt/Cleanup #### Issue(s) * # #### Test Plan - [x] :muscle: Manual - [ ] :zap: Unit test - [ ] :green_heart: E2E --- src/internal/events/events.go | 50 +++++++++++++++++++++++++---------- 1 file changed, 36 insertions(+), 14 deletions(-) diff --git a/src/internal/events/events.go b/src/internal/events/events.go index da7f8cfe3..47a15f5e9 100644 --- a/src/internal/events/events.go +++ b/src/internal/events/events.go @@ -3,8 +3,10 @@ package events import ( "context" "crypto/md5" + "crypto/sha256" "fmt" "io" + "math" "os" "time" @@ -20,9 +22,10 @@ import ( // keys for ease of use const ( - corsoVersion = "corso_version" - repoID = "repo_id" - tenantID = "m365_tenant_hash" + corsoVersion = "corso_version" + repoID = "repo_id" + tenantID = "m365_tenant_hash" + tenantIDDeprecated = "m365_tenant_hash_deprecated" // Event Keys CorsoStart = "Corso Start" @@ -49,6 +52,11 @@ const ( Status = "status" ) +const ( + sha256OutputLength = 64 + truncatedHashLength = 32 +) + type Eventer interface { Event(context.Context, string, map[string]any) Close() error @@ -58,9 +66,10 @@ type Eventer interface { type Bus struct { client analytics.Client - repoID string // one-way hash that uniquely identifies the repo. - tenant string // one-way hash that uniquely identifies the tenant. - version string // the Corso release version + repoID string // one-way hash that uniquely identifies the repo. + tenant string // one-way hash that uniquely identifies the tenant. + tenantDeprecated string // one-way hash that uniquely identified the tenand (old hashing algo for continuity). + version string // the Corso release version } var ( @@ -100,9 +109,10 @@ func NewBus(ctx context.Context, s storage.Storage, tenID string, opts control.O } return Bus{ - client: client, - tenant: tenantHash(tenID), - version: version.Version, + client: client, + tenant: sha256Truncated(tenID), + tenantDeprecated: tenantHash(tenID), + version: version.Version, }, nil } @@ -123,19 +133,22 @@ func (b Bus) Event(ctx context.Context, key string, data map[string]any) { NewProperties(). Set(repoID, b.repoID). Set(tenantID, b.tenant). + Set(tenantIDDeprecated, b.tenantDeprecated). Set(corsoVersion, b.version) for k, v := range data { props.Set(k, v) } - // need to setup identity when initializing a new repo - if key == RepoInit { + // need to setup identity when initializing or connecting to a repo + if key == RepoInit || key == RepoConnect { err := b.client.Enqueue(analytics.Identify{ - UserId: b.repoID, + UserId: b.tenant, Traits: analytics.NewTraits(). SetName(b.tenant). - Set(tenantID, b.tenant), + Set(tenantID, b.tenant). + Set(tenantIDDeprecated, b.tenantDeprecated). + Set(repoID, b.repoID), }) if err != nil { logger.CtxErr(ctx, err).Debug("analytics event failure: repo identity") @@ -144,7 +157,7 @@ func (b Bus) Event(ctx context.Context, key string, data map[string]any) { err := b.client.Enqueue(analytics.Track{ Event: key, - UserId: b.repoID, + UserId: b.tenant, Timestamp: time.Now().UTC(), Properties: props, }) @@ -157,6 +170,15 @@ func (b *Bus) SetRepoID(hash string) { b.repoID = hash } +func sha256Truncated(tenID string) string { + outputLength := int(math.Min(truncatedHashLength, sha256OutputLength)) + + hash := sha256.Sum256([]byte(tenID)) + hexHash := fmt.Sprintf("%x", hash) + + return hexHash[0:outputLength] +} + func tenantHash(tenID string) string { sum := md5.Sum([]byte(tenID)) return fmt.Sprintf("%x", sum)