fixes, logging improvements for fault.Errors (#2848)

A last-second change in 2708 caused us to
pass along the wrong fault.Errors into backup
persistence, thus slicing the count of skipped
items.  That's been fixed, along with improved
end-of-operation logging of fault errors.

---

#### Does this PR need a docs update or release note?

- [x]  No

#### Type of change

- [x] 🐛 Bugfix

#### Issue(s)

* #2708

#### Test Plan

- [x] 💪 Manual
- [x] 💚 E2E
This commit is contained in:
Keepers 2023-03-20 00:01:04 -06:00 committed by GitHub
parent bf1c824f4a
commit fa2cf046bb
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
7 changed files with 77 additions and 44 deletions

View File

@ -673,7 +673,7 @@ func (c *Collections) UpdateCollections(
// Skip items that don't match the folder selectors we were given.
if shouldSkipDrive(ctx, collectionPath, c.matcher, driveName) {
logger.Ctx(ictx).Infow("Skipping path", "skipped_path", collectionPath.String())
logger.Ctx(ictx).Debugw("Skipping drive path", "skipped_path", collectionPath.String())
continue
}

View File

@ -161,19 +161,7 @@ func (op *BackupOperation) Run(ctx context.Context) (err error) {
op.Errors.Fail(errors.Wrap(err, "doing backup"))
}
// TODO: the consumer (sdk or cli) should run this, not operations.
recoverableCount := len(op.Errors.Recovered())
for i, err := range op.Errors.Recovered() {
logger.Ctx(ctx).
With("error", err).
With(clues.InErr(err).Slice()...).
Errorf("doing backup: recoverable error %d of %d", i+1, recoverableCount)
}
skippedCount := len(op.Errors.Skipped())
for i, skip := range op.Errors.Skipped() {
logger.Ctx(ctx).With("skip", skip).Infof("doing backup: skipped item %d of %d", i+1, skippedCount)
}
LogFaultErrors(ctx, op.Errors.Errors(), "doing backup")
// -----
// Persistence
@ -693,7 +681,6 @@ func (op *BackupOperation) createBackupModels(
return clues.Wrap(err, "persisting details and errors").WithClues(ctx)
}
ctx = clues.Add(ctx, "streamstore_snapshot_id", ssid)
b := backup.New(
snapID, ssid,
op.Status.String(),
@ -701,21 +688,24 @@ func (op *BackupOperation) createBackupModels(
op.Selectors,
op.Results.ReadWrites,
op.Results.StartAndEndTime,
errs)
op.Errors.Errors())
if err = op.store.Put(ctx, model.BackupSchema, b); err != nil {
err = op.store.Put(
clues.Add(ctx, "streamstore_snapshot_id", ssid),
model.BackupSchema,
b)
if err != nil {
return clues.Wrap(err, "creating backup model").WithClues(ctx)
}
dur := op.Results.CompletedAt.Sub(op.Results.StartedAt)
op.bus.Event(
ctx,
events.BackupEnd,
map[string]any{
events.BackupID: b.ID,
events.DataStored: op.Results.BytesUploaded,
events.Duration: dur,
events.Duration: op.Results.CompletedAt.Sub(op.Results.StartedAt),
events.EndTime: common.FormatTime(op.Results.CompletedAt),
events.Resources: op.Results.ResourceOwners,
events.Service: op.Selectors.PathService().String(),

View File

@ -0,0 +1,42 @@
package operations
import (
"context"
"github.com/alcionai/corso/src/pkg/fault"
"github.com/alcionai/corso/src/pkg/logger"
)
// LogFaultErrors is a helper function that logs all entries in the Errors struct.
func LogFaultErrors(ctx context.Context, fe *fault.Errors, prefix string) {
if fe == nil {
return
}
var (
log = logger.Ctx(ctx)
pfxMsg = prefix + ":"
li, ls, lr = len(fe.Items), len(fe.Skipped), len(fe.Recovered)
)
if fe.Failure == nil && li+ls+lr == 0 {
log.Info(pfxMsg, "no errors")
return
}
if fe.Failure != nil {
log.With("error", fe.Failure).Error(pfxMsg, "primary failure")
}
for i, item := range fe.Items {
log.With("failed_item", item).Errorf("%s item failure %d of %d", pfxMsg, i+1, li)
}
for i, item := range fe.Skipped {
log.With("skipped_item", item).Errorf("%s skipped item %d of %d", pfxMsg, i+1, ls)
}
for i, err := range fe.Recovered {
log.With("recovered_error", err).Errorf("%s recoverable error %d of %d", pfxMsg, i+1, lr)
}
}

View File

@ -153,14 +153,7 @@ func (op *RestoreOperation) Run(ctx context.Context) (restoreDetails *details.De
op.Errors.Fail(errors.Wrap(err, "doing restore"))
}
// TODO: the consumer (sdk or cli) should run this, not operations.
recoverableCount := len(op.Errors.Recovered())
for i, err := range op.Errors.Recovered() {
logger.Ctx(ctx).
With("error", err).
With(clues.InErr(err).Slice()...).
Errorf("doing restore: recoverable error %d of %d", i+1, recoverableCount)
}
LogFaultErrors(ctx, op.Errors.Errors(), "doing restore")
// -----
// Persistence

View File

@ -63,22 +63,25 @@ func New(
selector selectors.Selector,
rw stats.ReadWrites,
se stats.StartAndEndTime,
errs *fault.Bus,
fe *fault.Errors,
) *Backup {
if fe == nil {
fe = &fault.Errors{}
}
var (
ee = errs.Errors()
// TODO: count errData.Items(), not all recovered errors.
errCount = len(ee.Recovered)
errCount = len(fe.Items)
skipCount = len(fe.Skipped)
failMsg string
malware, notFound, otherSkips int
)
if ee.Failure != nil {
failMsg = ee.Failure.Msg
if fe.Failure != nil {
failMsg = fe.Failure.Msg
errCount++
}
for _, s := range ee.Skipped {
for _, s := range fe.Skipped {
switch true {
case s.HasCause(fault.SkipMalware):
malware++
@ -105,7 +108,7 @@ func New(
Status: status,
Selector: selector,
FailFast: errs.FailFast(),
FailFast: fe.FailFast,
ErrorCount: errCount,
Failure: failMsg,
@ -113,8 +116,9 @@ func New(
ReadWrites: rw,
StartAndEndTime: se,
SkippedCounts: stats.SkippedCounts{
TotalSkippedItems: len(ee.Skipped),
TotalSkippedItems: skipCount,
SkippedMalware: malware,
SkippedNotFound: notFound,
},
}
}

View File

@ -294,15 +294,15 @@ func (e *Errors) PrintItems(ctx context.Context, ignoreErrors, ignoreSkips, igno
var _ print.Printable = &printableErrCore{}
type printableErrCore struct {
msg string
*clues.ErrCore
}
func errCoreToPrintable(ec *clues.ErrCore) printableErrCore {
if ec == nil {
return printableErrCore{"<nil>"}
return printableErrCore{ErrCore: &clues.ErrCore{Msg: "<nil>"}}
}
return printableErrCore{ec.Msg}
return printableErrCore{ErrCore: ec}
}
func (pec printableErrCore) MinimumPrintable() any {
@ -314,7 +314,11 @@ func (pec printableErrCore) Headers() []string {
}
func (pec printableErrCore) Values() []string {
return []string{pec.msg}
if pec.ErrCore == nil {
return []string{"<nil>"}
}
return []string{pec.Msg}
}
// ---------------------------------------------------------------------------

View File

@ -128,7 +128,7 @@ func writeBackup(
tID, snapID, backupID string,
sel selectors.Selector,
deets *details.Details,
errors *fault.Errors,
fe *fault.Errors,
errs *fault.Bus,
) *backup.Backup {
var (
@ -139,7 +139,7 @@ func writeBackup(
err := sstore.Collect(ctx, streamstore.DetailsCollector(deets))
require.NoError(t, err, "collecting details in streamstore")
err = sstore.Collect(ctx, streamstore.FaultErrorsCollector(errors))
err = sstore.Collect(ctx, streamstore.FaultErrorsCollector(fe))
require.NoError(t, err, "collecting errors in streamstore")
ssid, err := sstore.Write(ctx, errs)
@ -152,7 +152,7 @@ func writeBackup(
sel,
stats.ReadWrites{},
stats.StartAndEndTime{},
errs)
fe)
err = sw.Put(ctx, model.BackupSchema, b)
require.NoError(t, err)