fixes, logging improvements for fault.Errors (#2848)
A last-second change in 2708 caused us to pass along the wrong fault.Errors into backup persistence, thus slicing the count of skipped items. That's been fixed, along with improved end-of-operation logging of fault errors. --- #### Does this PR need a docs update or release note? - [x] ⛔ No #### Type of change - [x] 🐛 Bugfix #### Issue(s) * #2708 #### Test Plan - [x] 💪 Manual - [x] 💚 E2E
This commit is contained in:
parent
bf1c824f4a
commit
fa2cf046bb
@ -673,7 +673,7 @@ func (c *Collections) UpdateCollections(
|
|||||||
|
|
||||||
// Skip items that don't match the folder selectors we were given.
|
// Skip items that don't match the folder selectors we were given.
|
||||||
if shouldSkipDrive(ctx, collectionPath, c.matcher, driveName) {
|
if shouldSkipDrive(ctx, collectionPath, c.matcher, driveName) {
|
||||||
logger.Ctx(ictx).Infow("Skipping path", "skipped_path", collectionPath.String())
|
logger.Ctx(ictx).Debugw("Skipping drive path", "skipped_path", collectionPath.String())
|
||||||
continue
|
continue
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|||||||
@ -161,19 +161,7 @@ func (op *BackupOperation) Run(ctx context.Context) (err error) {
|
|||||||
op.Errors.Fail(errors.Wrap(err, "doing backup"))
|
op.Errors.Fail(errors.Wrap(err, "doing backup"))
|
||||||
}
|
}
|
||||||
|
|
||||||
// TODO: the consumer (sdk or cli) should run this, not operations.
|
LogFaultErrors(ctx, op.Errors.Errors(), "doing backup")
|
||||||
recoverableCount := len(op.Errors.Recovered())
|
|
||||||
for i, err := range op.Errors.Recovered() {
|
|
||||||
logger.Ctx(ctx).
|
|
||||||
With("error", err).
|
|
||||||
With(clues.InErr(err).Slice()...).
|
|
||||||
Errorf("doing backup: recoverable error %d of %d", i+1, recoverableCount)
|
|
||||||
}
|
|
||||||
|
|
||||||
skippedCount := len(op.Errors.Skipped())
|
|
||||||
for i, skip := range op.Errors.Skipped() {
|
|
||||||
logger.Ctx(ctx).With("skip", skip).Infof("doing backup: skipped item %d of %d", i+1, skippedCount)
|
|
||||||
}
|
|
||||||
|
|
||||||
// -----
|
// -----
|
||||||
// Persistence
|
// Persistence
|
||||||
@ -693,7 +681,6 @@ func (op *BackupOperation) createBackupModels(
|
|||||||
return clues.Wrap(err, "persisting details and errors").WithClues(ctx)
|
return clues.Wrap(err, "persisting details and errors").WithClues(ctx)
|
||||||
}
|
}
|
||||||
|
|
||||||
ctx = clues.Add(ctx, "streamstore_snapshot_id", ssid)
|
|
||||||
b := backup.New(
|
b := backup.New(
|
||||||
snapID, ssid,
|
snapID, ssid,
|
||||||
op.Status.String(),
|
op.Status.String(),
|
||||||
@ -701,21 +688,24 @@ func (op *BackupOperation) createBackupModels(
|
|||||||
op.Selectors,
|
op.Selectors,
|
||||||
op.Results.ReadWrites,
|
op.Results.ReadWrites,
|
||||||
op.Results.StartAndEndTime,
|
op.Results.StartAndEndTime,
|
||||||
errs)
|
op.Errors.Errors())
|
||||||
|
|
||||||
if err = op.store.Put(ctx, model.BackupSchema, b); err != nil {
|
err = op.store.Put(
|
||||||
|
clues.Add(ctx, "streamstore_snapshot_id", ssid),
|
||||||
|
model.BackupSchema,
|
||||||
|
b)
|
||||||
|
|
||||||
|
if err != nil {
|
||||||
return clues.Wrap(err, "creating backup model").WithClues(ctx)
|
return clues.Wrap(err, "creating backup model").WithClues(ctx)
|
||||||
}
|
}
|
||||||
|
|
||||||
dur := op.Results.CompletedAt.Sub(op.Results.StartedAt)
|
|
||||||
|
|
||||||
op.bus.Event(
|
op.bus.Event(
|
||||||
ctx,
|
ctx,
|
||||||
events.BackupEnd,
|
events.BackupEnd,
|
||||||
map[string]any{
|
map[string]any{
|
||||||
events.BackupID: b.ID,
|
events.BackupID: b.ID,
|
||||||
events.DataStored: op.Results.BytesUploaded,
|
events.DataStored: op.Results.BytesUploaded,
|
||||||
events.Duration: dur,
|
events.Duration: op.Results.CompletedAt.Sub(op.Results.StartedAt),
|
||||||
events.EndTime: common.FormatTime(op.Results.CompletedAt),
|
events.EndTime: common.FormatTime(op.Results.CompletedAt),
|
||||||
events.Resources: op.Results.ResourceOwners,
|
events.Resources: op.Results.ResourceOwners,
|
||||||
events.Service: op.Selectors.PathService().String(),
|
events.Service: op.Selectors.PathService().String(),
|
||||||
|
|||||||
42
src/internal/operations/logging.go
Normal file
42
src/internal/operations/logging.go
Normal file
@ -0,0 +1,42 @@
|
|||||||
|
package operations
|
||||||
|
|
||||||
|
import (
|
||||||
|
"context"
|
||||||
|
|
||||||
|
"github.com/alcionai/corso/src/pkg/fault"
|
||||||
|
"github.com/alcionai/corso/src/pkg/logger"
|
||||||
|
)
|
||||||
|
|
||||||
|
// LogFaultErrors is a helper function that logs all entries in the Errors struct.
|
||||||
|
func LogFaultErrors(ctx context.Context, fe *fault.Errors, prefix string) {
|
||||||
|
if fe == nil {
|
||||||
|
return
|
||||||
|
}
|
||||||
|
|
||||||
|
var (
|
||||||
|
log = logger.Ctx(ctx)
|
||||||
|
pfxMsg = prefix + ":"
|
||||||
|
li, ls, lr = len(fe.Items), len(fe.Skipped), len(fe.Recovered)
|
||||||
|
)
|
||||||
|
|
||||||
|
if fe.Failure == nil && li+ls+lr == 0 {
|
||||||
|
log.Info(pfxMsg, "no errors")
|
||||||
|
return
|
||||||
|
}
|
||||||
|
|
||||||
|
if fe.Failure != nil {
|
||||||
|
log.With("error", fe.Failure).Error(pfxMsg, "primary failure")
|
||||||
|
}
|
||||||
|
|
||||||
|
for i, item := range fe.Items {
|
||||||
|
log.With("failed_item", item).Errorf("%s item failure %d of %d", pfxMsg, i+1, li)
|
||||||
|
}
|
||||||
|
|
||||||
|
for i, item := range fe.Skipped {
|
||||||
|
log.With("skipped_item", item).Errorf("%s skipped item %d of %d", pfxMsg, i+1, ls)
|
||||||
|
}
|
||||||
|
|
||||||
|
for i, err := range fe.Recovered {
|
||||||
|
log.With("recovered_error", err).Errorf("%s recoverable error %d of %d", pfxMsg, i+1, lr)
|
||||||
|
}
|
||||||
|
}
|
||||||
@ -153,14 +153,7 @@ func (op *RestoreOperation) Run(ctx context.Context) (restoreDetails *details.De
|
|||||||
op.Errors.Fail(errors.Wrap(err, "doing restore"))
|
op.Errors.Fail(errors.Wrap(err, "doing restore"))
|
||||||
}
|
}
|
||||||
|
|
||||||
// TODO: the consumer (sdk or cli) should run this, not operations.
|
LogFaultErrors(ctx, op.Errors.Errors(), "doing restore")
|
||||||
recoverableCount := len(op.Errors.Recovered())
|
|
||||||
for i, err := range op.Errors.Recovered() {
|
|
||||||
logger.Ctx(ctx).
|
|
||||||
With("error", err).
|
|
||||||
With(clues.InErr(err).Slice()...).
|
|
||||||
Errorf("doing restore: recoverable error %d of %d", i+1, recoverableCount)
|
|
||||||
}
|
|
||||||
|
|
||||||
// -----
|
// -----
|
||||||
// Persistence
|
// Persistence
|
||||||
|
|||||||
@ -63,22 +63,25 @@ func New(
|
|||||||
selector selectors.Selector,
|
selector selectors.Selector,
|
||||||
rw stats.ReadWrites,
|
rw stats.ReadWrites,
|
||||||
se stats.StartAndEndTime,
|
se stats.StartAndEndTime,
|
||||||
errs *fault.Bus,
|
fe *fault.Errors,
|
||||||
) *Backup {
|
) *Backup {
|
||||||
|
if fe == nil {
|
||||||
|
fe = &fault.Errors{}
|
||||||
|
}
|
||||||
|
|
||||||
var (
|
var (
|
||||||
ee = errs.Errors()
|
errCount = len(fe.Items)
|
||||||
// TODO: count errData.Items(), not all recovered errors.
|
skipCount = len(fe.Skipped)
|
||||||
errCount = len(ee.Recovered)
|
|
||||||
failMsg string
|
failMsg string
|
||||||
malware, notFound, otherSkips int
|
malware, notFound, otherSkips int
|
||||||
)
|
)
|
||||||
|
|
||||||
if ee.Failure != nil {
|
if fe.Failure != nil {
|
||||||
failMsg = ee.Failure.Msg
|
failMsg = fe.Failure.Msg
|
||||||
errCount++
|
errCount++
|
||||||
}
|
}
|
||||||
|
|
||||||
for _, s := range ee.Skipped {
|
for _, s := range fe.Skipped {
|
||||||
switch true {
|
switch true {
|
||||||
case s.HasCause(fault.SkipMalware):
|
case s.HasCause(fault.SkipMalware):
|
||||||
malware++
|
malware++
|
||||||
@ -105,7 +108,7 @@ func New(
|
|||||||
Status: status,
|
Status: status,
|
||||||
|
|
||||||
Selector: selector,
|
Selector: selector,
|
||||||
FailFast: errs.FailFast(),
|
FailFast: fe.FailFast,
|
||||||
|
|
||||||
ErrorCount: errCount,
|
ErrorCount: errCount,
|
||||||
Failure: failMsg,
|
Failure: failMsg,
|
||||||
@ -113,8 +116,9 @@ func New(
|
|||||||
ReadWrites: rw,
|
ReadWrites: rw,
|
||||||
StartAndEndTime: se,
|
StartAndEndTime: se,
|
||||||
SkippedCounts: stats.SkippedCounts{
|
SkippedCounts: stats.SkippedCounts{
|
||||||
TotalSkippedItems: len(ee.Skipped),
|
TotalSkippedItems: skipCount,
|
||||||
SkippedMalware: malware,
|
SkippedMalware: malware,
|
||||||
|
SkippedNotFound: notFound,
|
||||||
},
|
},
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|||||||
@ -294,15 +294,15 @@ func (e *Errors) PrintItems(ctx context.Context, ignoreErrors, ignoreSkips, igno
|
|||||||
var _ print.Printable = &printableErrCore{}
|
var _ print.Printable = &printableErrCore{}
|
||||||
|
|
||||||
type printableErrCore struct {
|
type printableErrCore struct {
|
||||||
msg string
|
*clues.ErrCore
|
||||||
}
|
}
|
||||||
|
|
||||||
func errCoreToPrintable(ec *clues.ErrCore) printableErrCore {
|
func errCoreToPrintable(ec *clues.ErrCore) printableErrCore {
|
||||||
if ec == nil {
|
if ec == nil {
|
||||||
return printableErrCore{"<nil>"}
|
return printableErrCore{ErrCore: &clues.ErrCore{Msg: "<nil>"}}
|
||||||
}
|
}
|
||||||
|
|
||||||
return printableErrCore{ec.Msg}
|
return printableErrCore{ErrCore: ec}
|
||||||
}
|
}
|
||||||
|
|
||||||
func (pec printableErrCore) MinimumPrintable() any {
|
func (pec printableErrCore) MinimumPrintable() any {
|
||||||
@ -314,7 +314,11 @@ func (pec printableErrCore) Headers() []string {
|
|||||||
}
|
}
|
||||||
|
|
||||||
func (pec printableErrCore) Values() []string {
|
func (pec printableErrCore) Values() []string {
|
||||||
return []string{pec.msg}
|
if pec.ErrCore == nil {
|
||||||
|
return []string{"<nil>"}
|
||||||
|
}
|
||||||
|
|
||||||
|
return []string{pec.Msg}
|
||||||
}
|
}
|
||||||
|
|
||||||
// ---------------------------------------------------------------------------
|
// ---------------------------------------------------------------------------
|
||||||
|
|||||||
@ -128,7 +128,7 @@ func writeBackup(
|
|||||||
tID, snapID, backupID string,
|
tID, snapID, backupID string,
|
||||||
sel selectors.Selector,
|
sel selectors.Selector,
|
||||||
deets *details.Details,
|
deets *details.Details,
|
||||||
errors *fault.Errors,
|
fe *fault.Errors,
|
||||||
errs *fault.Bus,
|
errs *fault.Bus,
|
||||||
) *backup.Backup {
|
) *backup.Backup {
|
||||||
var (
|
var (
|
||||||
@ -139,7 +139,7 @@ func writeBackup(
|
|||||||
err := sstore.Collect(ctx, streamstore.DetailsCollector(deets))
|
err := sstore.Collect(ctx, streamstore.DetailsCollector(deets))
|
||||||
require.NoError(t, err, "collecting details in streamstore")
|
require.NoError(t, err, "collecting details in streamstore")
|
||||||
|
|
||||||
err = sstore.Collect(ctx, streamstore.FaultErrorsCollector(errors))
|
err = sstore.Collect(ctx, streamstore.FaultErrorsCollector(fe))
|
||||||
require.NoError(t, err, "collecting errors in streamstore")
|
require.NoError(t, err, "collecting errors in streamstore")
|
||||||
|
|
||||||
ssid, err := sstore.Write(ctx, errs)
|
ssid, err := sstore.Write(ctx, errs)
|
||||||
@ -152,7 +152,7 @@ func writeBackup(
|
|||||||
sel,
|
sel,
|
||||||
stats.ReadWrites{},
|
stats.ReadWrites{},
|
||||||
stats.StartAndEndTime{},
|
stats.StartAndEndTime{},
|
||||||
errs)
|
fe)
|
||||||
|
|
||||||
err = sw.Put(ctx, model.BackupSchema, b)
|
err = sw.Put(ctx, model.BackupSchema, b)
|
||||||
require.NoError(t, err)
|
require.NoError(t, err)
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user