extra panic protection in operations (#2383)
## Does this PR need a docs update or release note? - [x] ⛔ No ## Type of change - [x] 🧹 Tech Debt/Cleanup ## Test Plan - [x] 💚 E2E
This commit is contained in:
parent
b00b41a6bd
commit
0436e0d128
@ -2,6 +2,7 @@ package operations
|
|||||||
|
|
||||||
import (
|
import (
|
||||||
"context"
|
"context"
|
||||||
|
"runtime/debug"
|
||||||
"time"
|
"time"
|
||||||
|
|
||||||
"github.com/alcionai/clues"
|
"github.com/alcionai/clues"
|
||||||
@ -106,7 +107,13 @@ type detailsWriter interface {
|
|||||||
// ---------------------------------------------------------------------------
|
// ---------------------------------------------------------------------------
|
||||||
|
|
||||||
// Run begins a synchronous backup operation.
|
// Run begins a synchronous backup operation.
|
||||||
func (op *BackupOperation) Run(ctx context.Context) error {
|
func (op *BackupOperation) Run(ctx context.Context) (err error) {
|
||||||
|
defer func() {
|
||||||
|
if r := recover(); r != nil {
|
||||||
|
err = clues.Wrap(r.(error), "panic recovery").WithClues(ctx).With("stacktrace", debug.Stack())
|
||||||
|
}
|
||||||
|
}()
|
||||||
|
|
||||||
ctx, end := D.Span(ctx, "operations:backup:run")
|
ctx, end := D.Span(ctx, "operations:backup:run")
|
||||||
defer func() {
|
defer func() {
|
||||||
end()
|
end()
|
||||||
@ -189,6 +196,8 @@ func (op *BackupOperation) do(ctx context.Context) (err error) {
|
|||||||
op.Errors.Fail(errors.Wrap(err, "collecting manifest heuristics"))
|
op.Errors.Fail(errors.Wrap(err, "collecting manifest heuristics"))
|
||||||
opStats.readErr = op.Errors.Err()
|
opStats.readErr = op.Errors.Err()
|
||||||
|
|
||||||
|
logger.Ctx(ctx).With("err", err).Errorw("producing manifests and metadata", clues.InErr(err).Slice()...)
|
||||||
|
|
||||||
return opStats.readErr
|
return opStats.readErr
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -197,6 +206,8 @@ func (op *BackupOperation) do(ctx context.Context) (err error) {
|
|||||||
op.Errors.Fail(errors.Wrap(err, "connecting to m365"))
|
op.Errors.Fail(errors.Wrap(err, "connecting to m365"))
|
||||||
opStats.readErr = op.Errors.Err()
|
opStats.readErr = op.Errors.Err()
|
||||||
|
|
||||||
|
logger.Ctx(ctx).With("err", err).Errorw("connectng to m365", clues.InErr(err).Slice()...)
|
||||||
|
|
||||||
return opStats.readErr
|
return opStats.readErr
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -205,6 +216,8 @@ func (op *BackupOperation) do(ctx context.Context) (err error) {
|
|||||||
op.Errors.Fail(errors.Wrap(err, "retrieving data to backup"))
|
op.Errors.Fail(errors.Wrap(err, "retrieving data to backup"))
|
||||||
opStats.readErr = op.Errors.Err()
|
opStats.readErr = op.Errors.Err()
|
||||||
|
|
||||||
|
logger.Ctx(ctx).With("err", err).Errorw("producing backup data collections", clues.InErr(err).Slice()...)
|
||||||
|
|
||||||
return opStats.readErr
|
return opStats.readErr
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -223,6 +236,8 @@ func (op *BackupOperation) do(ctx context.Context) (err error) {
|
|||||||
op.Errors.Fail(errors.Wrap(err, "backing up service data"))
|
op.Errors.Fail(errors.Wrap(err, "backing up service data"))
|
||||||
opStats.writeErr = op.Errors.Err()
|
opStats.writeErr = op.Errors.Err()
|
||||||
|
|
||||||
|
logger.Ctx(ctx).With("err", err).Errorw("persisting collection backups", clues.InErr(err).Slice()...)
|
||||||
|
|
||||||
return opStats.writeErr
|
return opStats.writeErr
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -237,6 +252,8 @@ func (op *BackupOperation) do(ctx context.Context) (err error) {
|
|||||||
op.Errors.Fail(errors.Wrap(err, "merging backup details"))
|
op.Errors.Fail(errors.Wrap(err, "merging backup details"))
|
||||||
opStats.writeErr = op.Errors.Err()
|
opStats.writeErr = op.Errors.Err()
|
||||||
|
|
||||||
|
logger.Ctx(ctx).With("err", err).Errorw("merging details", clues.InErr(err).Slice()...)
|
||||||
|
|
||||||
return opStats.writeErr
|
return opStats.writeErr
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -589,15 +606,21 @@ func (op *BackupOperation) persistResults(
|
|||||||
opStats.writeErr)
|
opStats.writeErr)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
op.Results.BytesRead = opStats.k.TotalHashedBytes
|
||||||
|
op.Results.BytesUploaded = opStats.k.TotalUploadedBytes
|
||||||
|
op.Results.ItemsWritten = opStats.k.TotalFileCount
|
||||||
|
op.Results.ResourceOwners = opStats.resourceCount
|
||||||
|
|
||||||
|
if opStats.gc == nil {
|
||||||
|
op.Status = Failed
|
||||||
|
return errors.New("data population never completed")
|
||||||
|
}
|
||||||
|
|
||||||
if opStats.readErr == nil && opStats.writeErr == nil && opStats.gc.Successful == 0 {
|
if opStats.readErr == nil && opStats.writeErr == nil && opStats.gc.Successful == 0 {
|
||||||
op.Status = NoData
|
op.Status = NoData
|
||||||
}
|
}
|
||||||
|
|
||||||
op.Results.BytesRead = opStats.k.TotalHashedBytes
|
|
||||||
op.Results.BytesUploaded = opStats.k.TotalUploadedBytes
|
|
||||||
op.Results.ItemsRead = opStats.gc.Successful
|
op.Results.ItemsRead = opStats.gc.Successful
|
||||||
op.Results.ItemsWritten = opStats.k.TotalFileCount
|
|
||||||
op.Results.ResourceOwners = opStats.resourceCount
|
|
||||||
|
|
||||||
return nil
|
return nil
|
||||||
}
|
}
|
||||||
|
|||||||
@ -3,6 +3,7 @@ package operations
|
|||||||
import (
|
import (
|
||||||
"context"
|
"context"
|
||||||
"fmt"
|
"fmt"
|
||||||
|
"runtime/debug"
|
||||||
"time"
|
"time"
|
||||||
|
|
||||||
"github.com/alcionai/clues"
|
"github.com/alcionai/clues"
|
||||||
@ -106,6 +107,12 @@ type restorer interface {
|
|||||||
|
|
||||||
// Run begins a synchronous restore operation.
|
// Run begins a synchronous restore operation.
|
||||||
func (op *RestoreOperation) Run(ctx context.Context) (restoreDetails *details.Details, err error) {
|
func (op *RestoreOperation) Run(ctx context.Context) (restoreDetails *details.Details, err error) {
|
||||||
|
defer func() {
|
||||||
|
if r := recover(); r != nil {
|
||||||
|
err = clues.Wrap(r.(error), "panic recovery").WithClues(ctx).With("stacktrace", debug.Stack())
|
||||||
|
}
|
||||||
|
}()
|
||||||
|
|
||||||
ctx, end := D.Span(ctx, "operations:restore:run")
|
ctx, end := D.Span(ctx, "operations:restore:run")
|
||||||
defer func() {
|
defer func() {
|
||||||
end()
|
end()
|
||||||
@ -250,14 +257,20 @@ func (op *RestoreOperation) persistResults(
|
|||||||
opStats.writeErr)
|
opStats.writeErr)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
op.Results.BytesRead = opStats.bytesRead.NumBytes
|
||||||
|
op.Results.ItemsRead = len(opStats.cs) // TODO: file count, not collection count
|
||||||
|
op.Results.ResourceOwners = opStats.resourceCount
|
||||||
|
|
||||||
|
if opStats.gc == nil {
|
||||||
|
op.Status = Failed
|
||||||
|
return errors.New("data restoration never completed")
|
||||||
|
}
|
||||||
|
|
||||||
if opStats.readErr == nil && opStats.writeErr == nil && opStats.gc.Successful == 0 {
|
if opStats.readErr == nil && opStats.writeErr == nil && opStats.gc.Successful == 0 {
|
||||||
op.Status = NoData
|
op.Status = NoData
|
||||||
}
|
}
|
||||||
|
|
||||||
op.Results.BytesRead = opStats.bytesRead.NumBytes
|
|
||||||
op.Results.ItemsRead = len(opStats.cs) // TODO: file count, not collection count
|
|
||||||
op.Results.ItemsWritten = opStats.gc.Successful
|
op.Results.ItemsWritten = opStats.gc.Successful
|
||||||
op.Results.ResourceOwners = opStats.resourceCount
|
|
||||||
|
|
||||||
dur := op.Results.CompletedAt.Sub(op.Results.StartedAt)
|
dur := op.Results.CompletedAt.Sub(op.Results.StartedAt)
|
||||||
|
|
||||||
|
|||||||
@ -87,11 +87,12 @@ func (e *Errors) Fail(err error) *Errors {
|
|||||||
// setErr handles setting errors.err. Sync locking gets
|
// setErr handles setting errors.err. Sync locking gets
|
||||||
// handled upstream of this call.
|
// handled upstream of this call.
|
||||||
func (e *Errors) setErr(err error) *Errors {
|
func (e *Errors) setErr(err error) *Errors {
|
||||||
if e.err != nil {
|
if e.err == nil {
|
||||||
return e.addErr(err)
|
e.err = err
|
||||||
|
return e
|
||||||
}
|
}
|
||||||
|
|
||||||
e.err = err
|
e.errs = append(e.errs, err)
|
||||||
|
|
||||||
return e
|
return e
|
||||||
}
|
}
|
||||||
|
|||||||
@ -73,6 +73,8 @@ func (suite *FaultErrorsUnitSuite) TestErr() {
|
|||||||
suite.T().Run(test.name, func(t *testing.T) {
|
suite.T().Run(test.name, func(t *testing.T) {
|
||||||
n := fault.New(test.failFast)
|
n := fault.New(test.failFast)
|
||||||
require.NotNil(t, n)
|
require.NotNil(t, n)
|
||||||
|
require.NoError(t, n.Err())
|
||||||
|
require.Empty(t, n.Errs())
|
||||||
|
|
||||||
e := n.Fail(test.fail)
|
e := n.Fail(test.fail)
|
||||||
require.NotNil(t, e)
|
require.NotNil(t, e)
|
||||||
@ -90,6 +92,8 @@ func (suite *FaultErrorsUnitSuite) TestFail() {
|
|||||||
|
|
||||||
n := fault.New(false)
|
n := fault.New(false)
|
||||||
require.NotNil(t, n)
|
require.NotNil(t, n)
|
||||||
|
require.NoError(t, n.Err())
|
||||||
|
require.Empty(t, n.Errs())
|
||||||
|
|
||||||
n.Fail(assert.AnError)
|
n.Fail(assert.AnError)
|
||||||
assert.Error(t, n.Err())
|
assert.Error(t, n.Err())
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user