From ce4d97ef482804ce445a4e8ddb3b49ff9d97d297 Mon Sep 17 00:00:00 2001 From: Keepers Date: Wed, 21 Sep 2022 11:43:16 -0600 Subject: [PATCH] report totalFileSize on backup (#921) ## Description Extend the kopia metrics to include the snapshot's TotalFileSize parameter. This value also gets used as the metric for BytesWritten at the end of a backup. ## Type of change - [x] :sunflower: Feature ## Issue(s) * #894 ## Test Plan - [x] :zap: Unit test - [x] :green_heart: E2E --- src/internal/events/events.go | 21 +++-- src/internal/kopia/wrapper.go | 22 ++++- src/internal/kopia/wrapper_test.go | 123 ++++++++++++++++--------- src/internal/operations/backup.go | 17 ++-- src/internal/operations/backup_test.go | 19 ++-- src/internal/operations/restore.go | 18 ++-- src/internal/stats/stats.go | 1 + 7 files changed, 141 insertions(+), 80 deletions(-) diff --git a/src/internal/events/events.go b/src/internal/events/events.go index 0f37e1130..1bc6b9e29 100644 --- a/src/internal/events/events.go +++ b/src/internal/events/events.go @@ -28,16 +28,17 @@ const ( RestoreEnd = "restore-end" // Event Data Keys - BackupID = "backup-id" - ExchangeResources = "exchange-resources" - ExchangeDataRetrieved = "exchange-data-retrieved" - ExchangeDataStored = "exchange-data-stored" - EndTime = "end-time" - StartTime = "start-time" - Duration = "duration" - Status = "status" - ItemsRead = "items-read" - ItemsWritten = "items-written" + BackupID = "backup-id" + DataRetrieved = "data-retrieved" + DataStored = "data-stored" + Duration = "duration" + EndTime = "end-time" + ItemsRead = "items-read" + ItemsWritten = "items-written" + Resources = "resources" + Service = "service" + StartTime = "start-time" + Status = "status" ) type Eventer interface { diff --git a/src/internal/kopia/wrapper.go b/src/internal/kopia/wrapper.go index 621254ad5..d4c213c7f 100644 --- a/src/internal/kopia/wrapper.go +++ b/src/internal/kopia/wrapper.go @@ -3,6 +3,7 @@ package kopia import ( "context" "sync" + "sync/atomic" "github.com/hashicorp/go-multierror" "github.com/kopia/kopia/fs" @@ -35,6 +36,7 @@ var ( type BackupStats struct { SnapshotID string TotalFileCount int + TotalHashedBytes int64 TotalDirectoryCount int IgnoredErrorCount int ErrorCount int @@ -42,10 +44,11 @@ type BackupStats struct { IncompleteReason string } -func manifestToStats(man *snapshot.Manifest) BackupStats { +func manifestToStats(man *snapshot.Manifest, progress *corsoProgress) BackupStats { return BackupStats{ SnapshotID: string(man.ID), TotalFileCount: int(man.Stats.TotalFileCount), + TotalHashedBytes: progress.totalBytes, TotalDirectoryCount: int(man.Stats.TotalDirectoryCount), IgnoredErrorCount: int(man.Stats.IgnoredErrorCount), ErrorCount: int(man.Stats.ErrorCount), @@ -61,9 +64,10 @@ type itemDetails struct { type corsoProgress struct { snapshotfs.UploadProgress - pending map[string]*itemDetails - deets *details.Details - mu sync.RWMutex + pending map[string]*itemDetails + deets *details.Details + mu sync.RWMutex + totalBytes int64 } // Kopia interface function used as a callback when kopia finishes processing a @@ -120,6 +124,14 @@ func (cp *corsoProgress) FinishedFile(relativePath string, err error) { cp.deets.AddFolders(folders) } +// Kopia interface function used as a callback when kopia finishes hashing a file. +func (cp *corsoProgress) FinishedHashingFile(fname string, bytes int64) { + // Pass the call through as well so we don't break expected functionality. + defer cp.UploadProgress.FinishedHashingFile(fname, bytes) + + atomic.AddInt64(&cp.totalBytes, bytes) +} + func (cp *corsoProgress) put(k string, v *itemDetails) { cp.mu.Lock() defer cp.mu.Unlock() @@ -441,7 +453,7 @@ func (w Wrapper) makeSnapshotWithRoot( return nil, errors.Wrap(err, "kopia backup") } - res := manifestToStats(man) + res := manifestToStats(man, progress) return &res, nil } diff --git a/src/internal/kopia/wrapper_test.go b/src/internal/kopia/wrapper_test.go index a0e22b88d..4ad36a13a 100644 --- a/src/internal/kopia/wrapper_test.go +++ b/src/internal/kopia/wrapper_test.go @@ -143,56 +143,69 @@ func (suite *CorsoProgressUnitSuite) SetupSuite() { suite.targetFileName = suite.targetFilePath.ToBuilder().Dir().String() } -func (suite *CorsoProgressUnitSuite) TestFinishedFile() { - type testInfo struct { - info *itemDetails - err error - } +type testInfo struct { + info *itemDetails + err error + totalBytes int64 +} - deets := &itemDetails{details.ItemInfo{}, suite.targetFilePath} - - table := []struct { - name string - cachedItems map[string]testInfo - expectedLen int - }{ - { - name: "DetailsExist", - cachedItems: map[string]testInfo{ - suite.targetFileName: { - info: deets, - err: nil, +var finishedFileTable = []struct { + name string + cachedItems func(fname string, fpath path.Path) map[string]testInfo + expectedBytes int64 + expectedNumEntries int + err error +}{ + { + name: "DetailsExist", + cachedItems: func(fname string, fpath path.Path) map[string]testInfo { + return map[string]testInfo{ + fname: { + info: &itemDetails{details.ItemInfo{}, fpath}, + err: nil, + totalBytes: 100, }, - }, - // 1 file and 5 folders. - expectedLen: 6, + } }, - { - name: "PendingNoDetails", - cachedItems: map[string]testInfo{ - suite.targetFileName: { + expectedBytes: 100, + // 1 file and 5 folders. + expectedNumEntries: 6, + }, + { + name: "PendingNoDetails", + cachedItems: func(fname string, fpath path.Path) map[string]testInfo { + return map[string]testInfo{ + fname: { info: nil, err: nil, }, - }, - expectedLen: 0, + } }, - { - name: "HadError", - cachedItems: map[string]testInfo{ - suite.targetFileName: { - info: deets, + expectedNumEntries: 0, + }, + { + name: "HadError", + cachedItems: func(fname string, fpath path.Path) map[string]testInfo { + return map[string]testInfo{ + fname: { + info: &itemDetails{details.ItemInfo{}, fpath}, err: assert.AnError, }, - }, - expectedLen: 0, + } }, - { - name: "NotPending", - expectedLen: 0, + expectedNumEntries: 0, + }, + { + name: "NotPending", + cachedItems: func(fname string, fpath path.Path) map[string]testInfo { + return nil }, - } - for _, test := range table { + expectedNumEntries: 0, + }, +} + +func (suite *CorsoProgressUnitSuite) TestFinishedFile() { + for _, test := range finishedFileTable { suite.T().Run(test.name, func(t *testing.T) { bd := &details.Details{} cp := corsoProgress{ @@ -201,18 +214,20 @@ func (suite *CorsoProgressUnitSuite) TestFinishedFile() { pending: map[string]*itemDetails{}, } - for k, v := range test.cachedItems { + ci := test.cachedItems(suite.targetFileName, suite.targetFilePath) + + for k, v := range ci { cp.put(k, v.info) } - require.Len(t, cp.pending, len(test.cachedItems)) + require.Len(t, cp.pending, len(ci)) - for k, v := range test.cachedItems { + for k, v := range ci { cp.FinishedFile(k, v.err) } assert.Empty(t, cp.pending) - assert.Len(t, bd.Entries, test.expectedLen) + assert.Len(t, bd.Entries, test.expectedNumEntries) }) } } @@ -276,6 +291,28 @@ func (suite *CorsoProgressUnitSuite) TestFinishedFileBuildsHierarchy() { assert.Empty(t, rootRef.ParentRef) } +func (suite *CorsoProgressUnitSuite) TestFinishedHashingFile() { + for _, test := range finishedFileTable { + suite.T().Run(test.name, func(t *testing.T) { + bd := &details.Details{} + cp := corsoProgress{ + UploadProgress: &snapshotfs.NullUploadProgress{}, + deets: bd, + pending: map[string]*itemDetails{}, + } + + ci := test.cachedItems(suite.targetFileName, suite.targetFilePath) + + for k, v := range ci { + cp.FinishedHashingFile(k, v.totalBytes) + } + + assert.Empty(t, cp.pending) + assert.Equal(t, test.expectedBytes, cp.totalBytes) + }) + } +} + type KopiaUnitSuite struct { suite.Suite testPath path.Path diff --git a/src/internal/operations/backup.go b/src/internal/operations/backup.go index 6f8288ca4..d840f6f2c 100644 --- a/src/internal/operations/backup.go +++ b/src/internal/operations/backup.go @@ -93,6 +93,7 @@ func (op *BackupOperation) Run(ctx context.Context) (err error) { events.BackupStart, map[string]any{ events.StartTime: startTime, + events.Service: op.Selectors.Service.String(), // TODO: initial backup ID, // TODO: events.ExchangeResources: , }, @@ -167,6 +168,7 @@ func (op *BackupOperation) persistResults( op.Results.ReadErrors = opStats.readErr op.Results.WriteErrors = opStats.writeErr + op.Results.BytesWritten = opStats.k.TotalHashedBytes op.Results.ItemsRead = opStats.gc.Successful op.Results.ItemsWritten = opStats.k.TotalFileCount op.Results.ResourceOwners = opStats.resourceCount @@ -207,14 +209,15 @@ func (op *BackupOperation) createBackupModels( ctx, events.BackupEnd, map[string]any{ - events.BackupID: b.ID, - events.Status: op.Status, - events.StartTime: op.Results.StartedAt, - events.EndTime: op.Results.CompletedAt, - events.Duration: op.Results.CompletedAt.Sub(op.Results.StartedAt), - events.ExchangeResources: op.Results.ResourceOwners, + events.BackupID: b.ID, + events.Service: op.Selectors.Service.String(), + events.Status: op.Status, + events.StartTime: op.Results.StartedAt, + events.EndTime: op.Results.CompletedAt, + events.Duration: op.Results.CompletedAt.Sub(op.Results.StartedAt), + events.DataStored: op.Results.BytesWritten, + events.Resources: op.Results.ResourceOwners, // TODO: events.ExchangeDataObserved: , - // TODO: events.ExchangeDataStored: , }, ) diff --git a/src/internal/operations/backup_test.go b/src/internal/operations/backup_test.go index a80beb098..4fe152a17 100644 --- a/src/internal/operations/backup_test.go +++ b/src/internal/operations/backup_test.go @@ -43,11 +43,13 @@ func (suite *BackupOpSuite) TestBackupOperation_PersistResults() { acct = account.Account{} now = time.Now() stats = backupStats{ - started: true, - readErr: multierror.Append(nil, assert.AnError), - writeErr: assert.AnError, + started: true, + readErr: multierror.Append(nil, assert.AnError), + writeErr: assert.AnError, + resourceCount: 1, k: &kopia.BackupStats{ - TotalFileCount: 1, + TotalFileCount: 1, + TotalHashedBytes: 1, }, gc: &support.ConnectorOperationStatus{ Successful: 1, @@ -71,7 +73,8 @@ func (suite *BackupOpSuite) TestBackupOperation_PersistResults() { assert.Equal(t, op.Results.ItemsRead, stats.gc.Successful, "items read") assert.Equal(t, op.Results.ReadErrors, stats.readErr, "read errors") assert.Equal(t, op.Results.ItemsWritten, stats.k.TotalFileCount, "items written") - assert.Equal(t, 0, op.Results.ResourceOwners, "resource owners") + assert.Equal(t, op.Results.BytesWritten, stats.k.TotalHashedBytes, "bytes written") + assert.Equal(t, op.Results.ResourceOwners, stats.resourceCount, "resource owners") assert.Equal(t, op.Results.WriteErrors, stats.writeErr, "write errors") assert.Equal(t, op.Results.StartedAt, now, "started at") assert.Less(t, now, op.Results.CompletedAt, "completed at") @@ -213,8 +216,9 @@ func (suite *BackupOpIntegrationSuite) TestBackup_Run() { require.NotEmpty(t, bo.Results) require.NotEmpty(t, bo.Results.BackupID) assert.Equal(t, bo.Status, Completed) - assert.Greater(t, bo.Results.ItemsRead, 0) - assert.Greater(t, bo.Results.ItemsWritten, 0) + assert.Less(t, 0, bo.Results.ItemsRead) + assert.Less(t, 0, bo.Results.ItemsWritten) + assert.Less(t, int64(0), bo.Results.BytesWritten) assert.Equal(t, 1, bo.Results.ResourceOwners) assert.Zero(t, bo.Results.ReadErrors) assert.Zero(t, bo.Results.WriteErrors) @@ -273,6 +277,7 @@ func (suite *BackupOpIntegrationSuite) TestBackupOneDrive_Run() { require.NotEmpty(t, bo.Results.BackupID) assert.Equal(t, bo.Status, Completed) assert.Equal(t, bo.Results.ItemsRead, bo.Results.ItemsWritten) + assert.Less(t, int64(0), bo.Results.BytesWritten) assert.Equal(t, 1, bo.Results.ResourceOwners) assert.NoError(t, bo.Results.ReadErrors) assert.NoError(t, bo.Results.WriteErrors) diff --git a/src/internal/operations/restore.go b/src/internal/operations/restore.go index 7965ff742..ff6bc8ccb 100644 --- a/src/internal/operations/restore.go +++ b/src/internal/operations/restore.go @@ -92,6 +92,7 @@ func (op *RestoreOperation) Run(ctx context.Context) (err error) { events.RestoreStart, map[string]any{ events.StartTime: startTime, + events.Service: op.Selectors.Service.String(), events.BackupID: op.BackupID, // TODO: initial backup ID, // TODO: events.ExchangeResources: , @@ -231,14 +232,15 @@ func (op *RestoreOperation) persistResults( events.RestoreEnd, map[string]any{ // TODO: RestoreID - events.BackupID: op.BackupID, - events.Status: op.Status, - events.StartTime: op.Results.StartedAt, - events.EndTime: op.Results.CompletedAt, - events.Duration: op.Results.CompletedAt.Sub(op.Results.StartedAt), - events.ItemsRead: op.Results.ItemsRead, - events.ItemsWritten: op.Results.ItemsWritten, - events.ExchangeResources: op.Results.ResourceOwners, + events.BackupID: op.BackupID, + events.Service: op.Selectors.Service.String(), + events.Status: op.Status, + events.StartTime: op.Results.StartedAt, + events.EndTime: op.Results.CompletedAt, + events.Duration: op.Results.CompletedAt.Sub(op.Results.StartedAt), + events.ItemsRead: op.Results.ItemsRead, + events.ItemsWritten: op.Results.ItemsWritten, + events.Resources: op.Results.ResourceOwners, // TODO: events.ExchangeDataObserved: , }, ) diff --git a/src/internal/stats/stats.go b/src/internal/stats/stats.go index 92b0299a1..3d8eced1c 100644 --- a/src/internal/stats/stats.go +++ b/src/internal/stats/stats.go @@ -7,6 +7,7 @@ import "time" // assumed to be successful, so the total count of items involved // would be ItemsRead+ReadErrors. type ReadWrites struct { + BytesWritten int64 `json:"bytesWritten,omitempty"` ItemsRead int `json:"itemsRead,omitempty"` ItemsWritten int `json:"itemsWritten,omitempty"` ReadErrors error `json:"readErrors,omitempty"`