Switching user identification and hashing (#3071)

Encompasses the following changes:
* distinct ID for users and events set to m365 tenant ID hash
* still record local repoId
* switches the hashing method for the m365 tenant ID to tuncated sha256
* continue logging deprecated md5 hash for a few releases to facilitate event merge

---

#### Does this PR need a docs update or release note?

- [ ]  Yes, it's included
- [ ] 🕐 Yes, but in a later PR
- [x]  No

#### Type of change

<!--- Please check the type of change your PR introduces: --->
- [x] 🌻 Feature
- [ ] 🐛 Bugfix
- [ ] 🗺️ Documentation
- [ ] 🤖 Supportability/Tests
- [ ] 💻 CI/Deployment
- [ ] 🧹 Tech Debt/Cleanup

#### Issue(s)

<!-- Can reference multiple issues. Use one of the following "magic words" - "closes, fixes" to auto-close the Github issue. -->
* #<issue>

#### Test Plan

<!-- How will this be tested prior to merging.-->
- [x] 💪 Manual
- [ ]  Unit test
- [ ] 💚 E2E
This commit is contained in:
Georgi Matev 2023-04-10 09:44:23 -07:00 committed by GitHub
parent 896c05f623
commit b6ae9c6d07
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23

View File

@ -3,8 +3,10 @@ package events
import (
"context"
"crypto/md5"
"crypto/sha256"
"fmt"
"io"
"math"
"os"
"time"
@ -20,9 +22,10 @@ import (
// keys for ease of use
const (
corsoVersion = "corso_version"
repoID = "repo_id"
tenantID = "m365_tenant_hash"
corsoVersion = "corso_version"
repoID = "repo_id"
tenantID = "m365_tenant_hash"
tenantIDDeprecated = "m365_tenant_hash_deprecated"
// Event Keys
CorsoStart = "Corso Start"
@ -49,6 +52,11 @@ const (
Status = "status"
)
const (
sha256OutputLength = 64
truncatedHashLength = 32
)
type Eventer interface {
Event(context.Context, string, map[string]any)
Close() error
@ -58,9 +66,10 @@ type Eventer interface {
type Bus struct {
client analytics.Client
repoID string // one-way hash that uniquely identifies the repo.
tenant string // one-way hash that uniquely identifies the tenant.
version string // the Corso release version
repoID string // one-way hash that uniquely identifies the repo.
tenant string // one-way hash that uniquely identifies the tenant.
tenantDeprecated string // one-way hash that uniquely identified the tenand (old hashing algo for continuity).
version string // the Corso release version
}
var (
@ -100,9 +109,10 @@ func NewBus(ctx context.Context, s storage.Storage, tenID string, opts control.O
}
return Bus{
client: client,
tenant: tenantHash(tenID),
version: version.Version,
client: client,
tenant: sha256Truncated(tenID),
tenantDeprecated: tenantHash(tenID),
version: version.Version,
}, nil
}
@ -123,19 +133,22 @@ func (b Bus) Event(ctx context.Context, key string, data map[string]any) {
NewProperties().
Set(repoID, b.repoID).
Set(tenantID, b.tenant).
Set(tenantIDDeprecated, b.tenantDeprecated).
Set(corsoVersion, b.version)
for k, v := range data {
props.Set(k, v)
}
// need to setup identity when initializing a new repo
if key == RepoInit {
// need to setup identity when initializing or connecting to a repo
if key == RepoInit || key == RepoConnect {
err := b.client.Enqueue(analytics.Identify{
UserId: b.repoID,
UserId: b.tenant,
Traits: analytics.NewTraits().
SetName(b.tenant).
Set(tenantID, b.tenant),
Set(tenantID, b.tenant).
Set(tenantIDDeprecated, b.tenantDeprecated).
Set(repoID, b.repoID),
})
if err != nil {
logger.CtxErr(ctx, err).Debug("analytics event failure: repo identity")
@ -144,7 +157,7 @@ func (b Bus) Event(ctx context.Context, key string, data map[string]any) {
err := b.client.Enqueue(analytics.Track{
Event: key,
UserId: b.repoID,
UserId: b.tenant,
Timestamp: time.Now().UTC(),
Properties: props,
})
@ -157,6 +170,15 @@ func (b *Bus) SetRepoID(hash string) {
b.repoID = hash
}
func sha256Truncated(tenID string) string {
outputLength := int(math.Min(truncatedHashLength, sha256OutputLength))
hash := sha256.Sum256([]byte(tenID))
hexHash := fmt.Sprintf("%x", hash)
return hexHash[0:outputLength]
}
func tenantHash(tenID string) string {
sum := md5.Sum([]byte(tenID))
return fmt.Sprintf("%x", sum)