From fa2cf046bbf083521c797dee0fc68a5924438e66 Mon Sep 17 00:00:00 2001 From: Keepers Date: Mon, 20 Mar 2023 00:01:04 -0600 Subject: [PATCH] fixes, logging improvements for fault.Errors (#2848) A last-second change in 2708 caused us to pass along the wrong fault.Errors into backup persistence, thus slicing the count of skipped items. That's been fixed, along with improved end-of-operation logging of fault errors. --- #### Does this PR need a docs update or release note? - [x] :no_entry: No #### Type of change - [x] :bug: Bugfix #### Issue(s) * #2708 #### Test Plan - [x] :muscle: Manual - [x] :green_heart: E2E --- .../connector/onedrive/collections.go | 2 +- src/internal/operations/backup.go | 28 ++++--------- src/internal/operations/logging.go | 42 +++++++++++++++++++ src/internal/operations/restore.go | 9 +--- src/pkg/backup/backup.go | 22 ++++++---- src/pkg/fault/fault.go | 12 ++++-- .../repository/repository_unexported_test.go | 6 +-- 7 files changed, 77 insertions(+), 44 deletions(-) create mode 100644 src/internal/operations/logging.go diff --git a/src/internal/connector/onedrive/collections.go b/src/internal/connector/onedrive/collections.go index 1bb7ffbf2..044b3f0ad 100644 --- a/src/internal/connector/onedrive/collections.go +++ b/src/internal/connector/onedrive/collections.go @@ -673,7 +673,7 @@ func (c *Collections) UpdateCollections( // Skip items that don't match the folder selectors we were given. if shouldSkipDrive(ctx, collectionPath, c.matcher, driveName) { - logger.Ctx(ictx).Infow("Skipping path", "skipped_path", collectionPath.String()) + logger.Ctx(ictx).Debugw("Skipping drive path", "skipped_path", collectionPath.String()) continue } diff --git a/src/internal/operations/backup.go b/src/internal/operations/backup.go index a5d2080fa..c3d2e73c2 100644 --- a/src/internal/operations/backup.go +++ b/src/internal/operations/backup.go @@ -161,19 +161,7 @@ func (op *BackupOperation) Run(ctx context.Context) (err error) { op.Errors.Fail(errors.Wrap(err, "doing backup")) } - // TODO: the consumer (sdk or cli) should run this, not operations. - recoverableCount := len(op.Errors.Recovered()) - for i, err := range op.Errors.Recovered() { - logger.Ctx(ctx). - With("error", err). - With(clues.InErr(err).Slice()...). - Errorf("doing backup: recoverable error %d of %d", i+1, recoverableCount) - } - - skippedCount := len(op.Errors.Skipped()) - for i, skip := range op.Errors.Skipped() { - logger.Ctx(ctx).With("skip", skip).Infof("doing backup: skipped item %d of %d", i+1, skippedCount) - } + LogFaultErrors(ctx, op.Errors.Errors(), "doing backup") // ----- // Persistence @@ -693,7 +681,6 @@ func (op *BackupOperation) createBackupModels( return clues.Wrap(err, "persisting details and errors").WithClues(ctx) } - ctx = clues.Add(ctx, "streamstore_snapshot_id", ssid) b := backup.New( snapID, ssid, op.Status.String(), @@ -701,21 +688,24 @@ func (op *BackupOperation) createBackupModels( op.Selectors, op.Results.ReadWrites, op.Results.StartAndEndTime, - errs) + op.Errors.Errors()) - if err = op.store.Put(ctx, model.BackupSchema, b); err != nil { + err = op.store.Put( + clues.Add(ctx, "streamstore_snapshot_id", ssid), + model.BackupSchema, + b) + + if err != nil { return clues.Wrap(err, "creating backup model").WithClues(ctx) } - dur := op.Results.CompletedAt.Sub(op.Results.StartedAt) - op.bus.Event( ctx, events.BackupEnd, map[string]any{ events.BackupID: b.ID, events.DataStored: op.Results.BytesUploaded, - events.Duration: dur, + events.Duration: op.Results.CompletedAt.Sub(op.Results.StartedAt), events.EndTime: common.FormatTime(op.Results.CompletedAt), events.Resources: op.Results.ResourceOwners, events.Service: op.Selectors.PathService().String(), diff --git a/src/internal/operations/logging.go b/src/internal/operations/logging.go new file mode 100644 index 000000000..e0e0cd51d --- /dev/null +++ b/src/internal/operations/logging.go @@ -0,0 +1,42 @@ +package operations + +import ( + "context" + + "github.com/alcionai/corso/src/pkg/fault" + "github.com/alcionai/corso/src/pkg/logger" +) + +// LogFaultErrors is a helper function that logs all entries in the Errors struct. +func LogFaultErrors(ctx context.Context, fe *fault.Errors, prefix string) { + if fe == nil { + return + } + + var ( + log = logger.Ctx(ctx) + pfxMsg = prefix + ":" + li, ls, lr = len(fe.Items), len(fe.Skipped), len(fe.Recovered) + ) + + if fe.Failure == nil && li+ls+lr == 0 { + log.Info(pfxMsg, "no errors") + return + } + + if fe.Failure != nil { + log.With("error", fe.Failure).Error(pfxMsg, "primary failure") + } + + for i, item := range fe.Items { + log.With("failed_item", item).Errorf("%s item failure %d of %d", pfxMsg, i+1, li) + } + + for i, item := range fe.Skipped { + log.With("skipped_item", item).Errorf("%s skipped item %d of %d", pfxMsg, i+1, ls) + } + + for i, err := range fe.Recovered { + log.With("recovered_error", err).Errorf("%s recoverable error %d of %d", pfxMsg, i+1, lr) + } +} diff --git a/src/internal/operations/restore.go b/src/internal/operations/restore.go index 36986c8fb..c956d7514 100644 --- a/src/internal/operations/restore.go +++ b/src/internal/operations/restore.go @@ -153,14 +153,7 @@ func (op *RestoreOperation) Run(ctx context.Context) (restoreDetails *details.De op.Errors.Fail(errors.Wrap(err, "doing restore")) } - // TODO: the consumer (sdk or cli) should run this, not operations. - recoverableCount := len(op.Errors.Recovered()) - for i, err := range op.Errors.Recovered() { - logger.Ctx(ctx). - With("error", err). - With(clues.InErr(err).Slice()...). - Errorf("doing restore: recoverable error %d of %d", i+1, recoverableCount) - } + LogFaultErrors(ctx, op.Errors.Errors(), "doing restore") // ----- // Persistence diff --git a/src/pkg/backup/backup.go b/src/pkg/backup/backup.go index 67e5a9955..d08ccfbb6 100644 --- a/src/pkg/backup/backup.go +++ b/src/pkg/backup/backup.go @@ -63,22 +63,25 @@ func New( selector selectors.Selector, rw stats.ReadWrites, se stats.StartAndEndTime, - errs *fault.Bus, + fe *fault.Errors, ) *Backup { + if fe == nil { + fe = &fault.Errors{} + } + var ( - ee = errs.Errors() - // TODO: count errData.Items(), not all recovered errors. - errCount = len(ee.Recovered) + errCount = len(fe.Items) + skipCount = len(fe.Skipped) failMsg string malware, notFound, otherSkips int ) - if ee.Failure != nil { - failMsg = ee.Failure.Msg + if fe.Failure != nil { + failMsg = fe.Failure.Msg errCount++ } - for _, s := range ee.Skipped { + for _, s := range fe.Skipped { switch true { case s.HasCause(fault.SkipMalware): malware++ @@ -105,7 +108,7 @@ func New( Status: status, Selector: selector, - FailFast: errs.FailFast(), + FailFast: fe.FailFast, ErrorCount: errCount, Failure: failMsg, @@ -113,8 +116,9 @@ func New( ReadWrites: rw, StartAndEndTime: se, SkippedCounts: stats.SkippedCounts{ - TotalSkippedItems: len(ee.Skipped), + TotalSkippedItems: skipCount, SkippedMalware: malware, + SkippedNotFound: notFound, }, } } diff --git a/src/pkg/fault/fault.go b/src/pkg/fault/fault.go index a9c83c321..a228bd5a2 100644 --- a/src/pkg/fault/fault.go +++ b/src/pkg/fault/fault.go @@ -294,15 +294,15 @@ func (e *Errors) PrintItems(ctx context.Context, ignoreErrors, ignoreSkips, igno var _ print.Printable = &printableErrCore{} type printableErrCore struct { - msg string + *clues.ErrCore } func errCoreToPrintable(ec *clues.ErrCore) printableErrCore { if ec == nil { - return printableErrCore{""} + return printableErrCore{ErrCore: &clues.ErrCore{Msg: ""}} } - return printableErrCore{ec.Msg} + return printableErrCore{ErrCore: ec} } func (pec printableErrCore) MinimumPrintable() any { @@ -314,7 +314,11 @@ func (pec printableErrCore) Headers() []string { } func (pec printableErrCore) Values() []string { - return []string{pec.msg} + if pec.ErrCore == nil { + return []string{""} + } + + return []string{pec.Msg} } // --------------------------------------------------------------------------- diff --git a/src/pkg/repository/repository_unexported_test.go b/src/pkg/repository/repository_unexported_test.go index 6ad5672c9..d9d900fc7 100644 --- a/src/pkg/repository/repository_unexported_test.go +++ b/src/pkg/repository/repository_unexported_test.go @@ -128,7 +128,7 @@ func writeBackup( tID, snapID, backupID string, sel selectors.Selector, deets *details.Details, - errors *fault.Errors, + fe *fault.Errors, errs *fault.Bus, ) *backup.Backup { var ( @@ -139,7 +139,7 @@ func writeBackup( err := sstore.Collect(ctx, streamstore.DetailsCollector(deets)) require.NoError(t, err, "collecting details in streamstore") - err = sstore.Collect(ctx, streamstore.FaultErrorsCollector(errors)) + err = sstore.Collect(ctx, streamstore.FaultErrorsCollector(fe)) require.NoError(t, err, "collecting errors in streamstore") ssid, err := sstore.Write(ctx, errs) @@ -152,7 +152,7 @@ func writeBackup( sel, stats.ReadWrites{}, stats.StartAndEndTime{}, - errs) + fe) err = sw.Put(ctx, model.BackupSchema, b) require.NoError(t, err)