fixes, logging improvements for fault.Errors (#2848)
A last-second change in 2708 caused us to pass along the wrong fault.Errors into backup persistence, thus slicing the count of skipped items. That's been fixed, along with improved end-of-operation logging of fault errors. --- #### Does this PR need a docs update or release note? - [x] ⛔ No #### Type of change - [x] 🐛 Bugfix #### Issue(s) * #2708 #### Test Plan - [x] 💪 Manual - [x] 💚 E2E
This commit is contained in:
parent
bf1c824f4a
commit
fa2cf046bb
@ -673,7 +673,7 @@ func (c *Collections) UpdateCollections(
|
||||
|
||||
// Skip items that don't match the folder selectors we were given.
|
||||
if shouldSkipDrive(ctx, collectionPath, c.matcher, driveName) {
|
||||
logger.Ctx(ictx).Infow("Skipping path", "skipped_path", collectionPath.String())
|
||||
logger.Ctx(ictx).Debugw("Skipping drive path", "skipped_path", collectionPath.String())
|
||||
continue
|
||||
}
|
||||
|
||||
|
||||
@ -161,19 +161,7 @@ func (op *BackupOperation) Run(ctx context.Context) (err error) {
|
||||
op.Errors.Fail(errors.Wrap(err, "doing backup"))
|
||||
}
|
||||
|
||||
// TODO: the consumer (sdk or cli) should run this, not operations.
|
||||
recoverableCount := len(op.Errors.Recovered())
|
||||
for i, err := range op.Errors.Recovered() {
|
||||
logger.Ctx(ctx).
|
||||
With("error", err).
|
||||
With(clues.InErr(err).Slice()...).
|
||||
Errorf("doing backup: recoverable error %d of %d", i+1, recoverableCount)
|
||||
}
|
||||
|
||||
skippedCount := len(op.Errors.Skipped())
|
||||
for i, skip := range op.Errors.Skipped() {
|
||||
logger.Ctx(ctx).With("skip", skip).Infof("doing backup: skipped item %d of %d", i+1, skippedCount)
|
||||
}
|
||||
LogFaultErrors(ctx, op.Errors.Errors(), "doing backup")
|
||||
|
||||
// -----
|
||||
// Persistence
|
||||
@ -693,7 +681,6 @@ func (op *BackupOperation) createBackupModels(
|
||||
return clues.Wrap(err, "persisting details and errors").WithClues(ctx)
|
||||
}
|
||||
|
||||
ctx = clues.Add(ctx, "streamstore_snapshot_id", ssid)
|
||||
b := backup.New(
|
||||
snapID, ssid,
|
||||
op.Status.String(),
|
||||
@ -701,21 +688,24 @@ func (op *BackupOperation) createBackupModels(
|
||||
op.Selectors,
|
||||
op.Results.ReadWrites,
|
||||
op.Results.StartAndEndTime,
|
||||
errs)
|
||||
op.Errors.Errors())
|
||||
|
||||
if err = op.store.Put(ctx, model.BackupSchema, b); err != nil {
|
||||
err = op.store.Put(
|
||||
clues.Add(ctx, "streamstore_snapshot_id", ssid),
|
||||
model.BackupSchema,
|
||||
b)
|
||||
|
||||
if err != nil {
|
||||
return clues.Wrap(err, "creating backup model").WithClues(ctx)
|
||||
}
|
||||
|
||||
dur := op.Results.CompletedAt.Sub(op.Results.StartedAt)
|
||||
|
||||
op.bus.Event(
|
||||
ctx,
|
||||
events.BackupEnd,
|
||||
map[string]any{
|
||||
events.BackupID: b.ID,
|
||||
events.DataStored: op.Results.BytesUploaded,
|
||||
events.Duration: dur,
|
||||
events.Duration: op.Results.CompletedAt.Sub(op.Results.StartedAt),
|
||||
events.EndTime: common.FormatTime(op.Results.CompletedAt),
|
||||
events.Resources: op.Results.ResourceOwners,
|
||||
events.Service: op.Selectors.PathService().String(),
|
||||
|
||||
42
src/internal/operations/logging.go
Normal file
42
src/internal/operations/logging.go
Normal file
@ -0,0 +1,42 @@
|
||||
package operations
|
||||
|
||||
import (
|
||||
"context"
|
||||
|
||||
"github.com/alcionai/corso/src/pkg/fault"
|
||||
"github.com/alcionai/corso/src/pkg/logger"
|
||||
)
|
||||
|
||||
// LogFaultErrors is a helper function that logs all entries in the Errors struct.
|
||||
func LogFaultErrors(ctx context.Context, fe *fault.Errors, prefix string) {
|
||||
if fe == nil {
|
||||
return
|
||||
}
|
||||
|
||||
var (
|
||||
log = logger.Ctx(ctx)
|
||||
pfxMsg = prefix + ":"
|
||||
li, ls, lr = len(fe.Items), len(fe.Skipped), len(fe.Recovered)
|
||||
)
|
||||
|
||||
if fe.Failure == nil && li+ls+lr == 0 {
|
||||
log.Info(pfxMsg, "no errors")
|
||||
return
|
||||
}
|
||||
|
||||
if fe.Failure != nil {
|
||||
log.With("error", fe.Failure).Error(pfxMsg, "primary failure")
|
||||
}
|
||||
|
||||
for i, item := range fe.Items {
|
||||
log.With("failed_item", item).Errorf("%s item failure %d of %d", pfxMsg, i+1, li)
|
||||
}
|
||||
|
||||
for i, item := range fe.Skipped {
|
||||
log.With("skipped_item", item).Errorf("%s skipped item %d of %d", pfxMsg, i+1, ls)
|
||||
}
|
||||
|
||||
for i, err := range fe.Recovered {
|
||||
log.With("recovered_error", err).Errorf("%s recoverable error %d of %d", pfxMsg, i+1, lr)
|
||||
}
|
||||
}
|
||||
@ -153,14 +153,7 @@ func (op *RestoreOperation) Run(ctx context.Context) (restoreDetails *details.De
|
||||
op.Errors.Fail(errors.Wrap(err, "doing restore"))
|
||||
}
|
||||
|
||||
// TODO: the consumer (sdk or cli) should run this, not operations.
|
||||
recoverableCount := len(op.Errors.Recovered())
|
||||
for i, err := range op.Errors.Recovered() {
|
||||
logger.Ctx(ctx).
|
||||
With("error", err).
|
||||
With(clues.InErr(err).Slice()...).
|
||||
Errorf("doing restore: recoverable error %d of %d", i+1, recoverableCount)
|
||||
}
|
||||
LogFaultErrors(ctx, op.Errors.Errors(), "doing restore")
|
||||
|
||||
// -----
|
||||
// Persistence
|
||||
|
||||
@ -63,22 +63,25 @@ func New(
|
||||
selector selectors.Selector,
|
||||
rw stats.ReadWrites,
|
||||
se stats.StartAndEndTime,
|
||||
errs *fault.Bus,
|
||||
fe *fault.Errors,
|
||||
) *Backup {
|
||||
if fe == nil {
|
||||
fe = &fault.Errors{}
|
||||
}
|
||||
|
||||
var (
|
||||
ee = errs.Errors()
|
||||
// TODO: count errData.Items(), not all recovered errors.
|
||||
errCount = len(ee.Recovered)
|
||||
errCount = len(fe.Items)
|
||||
skipCount = len(fe.Skipped)
|
||||
failMsg string
|
||||
malware, notFound, otherSkips int
|
||||
)
|
||||
|
||||
if ee.Failure != nil {
|
||||
failMsg = ee.Failure.Msg
|
||||
if fe.Failure != nil {
|
||||
failMsg = fe.Failure.Msg
|
||||
errCount++
|
||||
}
|
||||
|
||||
for _, s := range ee.Skipped {
|
||||
for _, s := range fe.Skipped {
|
||||
switch true {
|
||||
case s.HasCause(fault.SkipMalware):
|
||||
malware++
|
||||
@ -105,7 +108,7 @@ func New(
|
||||
Status: status,
|
||||
|
||||
Selector: selector,
|
||||
FailFast: errs.FailFast(),
|
||||
FailFast: fe.FailFast,
|
||||
|
||||
ErrorCount: errCount,
|
||||
Failure: failMsg,
|
||||
@ -113,8 +116,9 @@ func New(
|
||||
ReadWrites: rw,
|
||||
StartAndEndTime: se,
|
||||
SkippedCounts: stats.SkippedCounts{
|
||||
TotalSkippedItems: len(ee.Skipped),
|
||||
TotalSkippedItems: skipCount,
|
||||
SkippedMalware: malware,
|
||||
SkippedNotFound: notFound,
|
||||
},
|
||||
}
|
||||
}
|
||||
|
||||
@ -294,15 +294,15 @@ func (e *Errors) PrintItems(ctx context.Context, ignoreErrors, ignoreSkips, igno
|
||||
var _ print.Printable = &printableErrCore{}
|
||||
|
||||
type printableErrCore struct {
|
||||
msg string
|
||||
*clues.ErrCore
|
||||
}
|
||||
|
||||
func errCoreToPrintable(ec *clues.ErrCore) printableErrCore {
|
||||
if ec == nil {
|
||||
return printableErrCore{"<nil>"}
|
||||
return printableErrCore{ErrCore: &clues.ErrCore{Msg: "<nil>"}}
|
||||
}
|
||||
|
||||
return printableErrCore{ec.Msg}
|
||||
return printableErrCore{ErrCore: ec}
|
||||
}
|
||||
|
||||
func (pec printableErrCore) MinimumPrintable() any {
|
||||
@ -314,7 +314,11 @@ func (pec printableErrCore) Headers() []string {
|
||||
}
|
||||
|
||||
func (pec printableErrCore) Values() []string {
|
||||
return []string{pec.msg}
|
||||
if pec.ErrCore == nil {
|
||||
return []string{"<nil>"}
|
||||
}
|
||||
|
||||
return []string{pec.Msg}
|
||||
}
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
@ -128,7 +128,7 @@ func writeBackup(
|
||||
tID, snapID, backupID string,
|
||||
sel selectors.Selector,
|
||||
deets *details.Details,
|
||||
errors *fault.Errors,
|
||||
fe *fault.Errors,
|
||||
errs *fault.Bus,
|
||||
) *backup.Backup {
|
||||
var (
|
||||
@ -139,7 +139,7 @@ func writeBackup(
|
||||
err := sstore.Collect(ctx, streamstore.DetailsCollector(deets))
|
||||
require.NoError(t, err, "collecting details in streamstore")
|
||||
|
||||
err = sstore.Collect(ctx, streamstore.FaultErrorsCollector(errors))
|
||||
err = sstore.Collect(ctx, streamstore.FaultErrorsCollector(fe))
|
||||
require.NoError(t, err, "collecting errors in streamstore")
|
||||
|
||||
ssid, err := sstore.Write(ctx, errs)
|
||||
@ -152,7 +152,7 @@ func writeBackup(
|
||||
sel,
|
||||
stats.ReadWrites{},
|
||||
stats.StartAndEndTime{},
|
||||
errs)
|
||||
fe)
|
||||
|
||||
err = sw.Put(ctx, model.BackupSchema, b)
|
||||
require.NoError(t, err)
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user