Add retries for posting item attachments (#4840)
Add exponential backoff and some retries when posting item attachments. Manual testing was showing upwards of 50 retries in some cases when ~250-300 attachments total were being added. Of the retries, none were at the max amount. However, all tests were for relatively small datasets --- #### Does this PR need a docs update or release note? - [x] ✅ Yes, it's included - [ ] 🕐 Yes, but in a later PR - [ ] ⛔ No #### Type of change - [ ] 🌻 Feature - [x] 🐛 Bugfix - [ ] 🗺️ Documentation - [ ] 🤖 Supportability/Tests - [ ] 💻 CI/Deployment - [ ] 🧹 Tech Debt/Cleanup #### Test Plan - [x] 💪 Manual - [ ] ⚡ Unit test - [ ] 💚 E2E
This commit is contained in:
parent
d16528be50
commit
4b64faef70
@ -10,6 +10,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
|
|||||||
### Fixed
|
### Fixed
|
||||||
- Handle the case where an email cannot be retrieved from Exchange due to an `ErrorInvalidRecipients` error. In
|
- Handle the case where an email cannot be retrieved from Exchange due to an `ErrorInvalidRecipients` error. In
|
||||||
this case, Corso will skip over the item but report this in the backup summary.
|
this case, Corso will skip over the item but report this in the backup summary.
|
||||||
|
- Guarantee Exchange email restoration when restoring multiple attachments. Some previous restores were failing with `ErrorItemNotFound`.
|
||||||
|
|
||||||
## [v0.17.0] (beta) - 2023-12-11
|
## [v0.17.0] (beta) - 2023-12-11
|
||||||
|
|
||||||
|
|||||||
@ -4,8 +4,10 @@ import (
|
|||||||
"bytes"
|
"bytes"
|
||||||
"context"
|
"context"
|
||||||
"runtime/trace"
|
"runtime/trace"
|
||||||
|
"time"
|
||||||
|
|
||||||
"github.com/alcionai/clues"
|
"github.com/alcionai/clues"
|
||||||
|
"github.com/cenkalti/backoff/v4"
|
||||||
"github.com/microsoftgraph/msgraph-sdk-go/models"
|
"github.com/microsoftgraph/msgraph-sdk-go/models"
|
||||||
|
|
||||||
"github.com/alcionai/corso/src/internal/common/ptr"
|
"github.com/alcionai/corso/src/internal/common/ptr"
|
||||||
@ -213,6 +215,14 @@ func getOrPopulateContainer(
|
|||||||
return folderID, nil
|
return folderID, nil
|
||||||
}
|
}
|
||||||
|
|
||||||
|
const (
|
||||||
|
maxRetries = 3
|
||||||
|
retryInterval = 750 * time.Millisecond
|
||||||
|
retryMultiplier = 2
|
||||||
|
retryMaxInterval = 5 * time.Second
|
||||||
|
retryMaxElapsedTime = 0
|
||||||
|
)
|
||||||
|
|
||||||
func uploadAttachments(
|
func uploadAttachments(
|
||||||
ctx context.Context,
|
ctx context.Context,
|
||||||
ap attachmentPoster,
|
ap attachmentPoster,
|
||||||
@ -222,18 +232,57 @@ func uploadAttachments(
|
|||||||
) error {
|
) error {
|
||||||
el := errs.Local()
|
el := errs.Local()
|
||||||
|
|
||||||
for _, a := range as {
|
// Manual testing showed that even small delays between retries could get us
|
||||||
|
// past the issues. Use an exponential backoff that will allow us to keep
|
||||||
|
// runtimes low if there's just one failure for a given attachment but still
|
||||||
|
// waits a reasonable amount of time if there's multiple failures.
|
||||||
|
retryBackoff := backoff.NewExponentialBackOff()
|
||||||
|
retryBackoff.InitialInterval = retryInterval
|
||||||
|
retryBackoff.Multiplier = retryMultiplier
|
||||||
|
retryBackoff.MaxInterval = retryMaxInterval
|
||||||
|
retryBackoff.MaxElapsedTime = retryMaxElapsedTime
|
||||||
|
|
||||||
|
for i, a := range as {
|
||||||
if el.Failure() != nil {
|
if el.Failure() != nil {
|
||||||
return el.Failure()
|
return el.Failure()
|
||||||
}
|
}
|
||||||
|
|
||||||
err := uploadAttachment(
|
actx := clues.Add(ctx, "attachment_num", i)
|
||||||
ctx,
|
|
||||||
|
var (
|
||||||
|
retry int
|
||||||
|
err error
|
||||||
|
)
|
||||||
|
|
||||||
|
retryBackoff.Reset()
|
||||||
|
|
||||||
|
for (retry == 0 || graph.IsErrItemNotFound(err)) && retry <= maxRetries {
|
||||||
|
retry++
|
||||||
|
|
||||||
|
ictx := clues.Add(actx, "attempt_num", retry)
|
||||||
|
|
||||||
|
err = uploadAttachment(
|
||||||
|
ictx,
|
||||||
ap,
|
ap,
|
||||||
resourceID,
|
resourceID,
|
||||||
destinationID,
|
destinationID,
|
||||||
itemID,
|
itemID,
|
||||||
a)
|
a)
|
||||||
|
|
||||||
|
// Sometimes graph returns a 404 when we try to post the attachment.
|
||||||
|
// We're not sure why, but maybe it has to do with attaching many items.
|
||||||
|
// In any case, wait a little while and try again.
|
||||||
|
if graph.IsErrItemNotFound(err) && retry <= maxRetries {
|
||||||
|
waitTime := retryBackoff.NextBackOff()
|
||||||
|
|
||||||
|
logger.Ctx(ictx).Infow(
|
||||||
|
"error uploading attachment, retrying",
|
||||||
|
"retry_after", waitTime)
|
||||||
|
|
||||||
|
time.Sleep(waitTime)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
if err != nil {
|
if err != nil {
|
||||||
// FIXME: I don't know why we're swallowing this error case.
|
// FIXME: I don't know why we're swallowing this error case.
|
||||||
// It needs investigation: https://github.com/alcionai/corso/issues/3498
|
// It needs investigation: https://github.com/alcionai/corso/issues/3498
|
||||||
@ -247,7 +296,7 @@ func uploadAttachments(
|
|||||||
continue
|
continue
|
||||||
}
|
}
|
||||||
|
|
||||||
el.AddRecoverable(ctx, clues.WrapWC(ctx, err, "uploading mail attachment"))
|
el.AddRecoverable(actx, clues.WrapWC(ctx, err, "uploading mail attachment"))
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|||||||
@ -166,7 +166,7 @@ func IsErrDeletedInFlight(err error) bool {
|
|||||||
}
|
}
|
||||||
|
|
||||||
func IsErrItemNotFound(err error) bool {
|
func IsErrItemNotFound(err error) bool {
|
||||||
return hasErrorCode(err, itemNotFound)
|
return hasErrorCode(err, itemNotFound, errorItemNotFound)
|
||||||
}
|
}
|
||||||
|
|
||||||
func IsErrInvalidDelta(err error) bool {
|
func IsErrInvalidDelta(err error) bool {
|
||||||
|
|||||||
@ -858,10 +858,15 @@ func (suite *GraphErrorsUnitSuite) TestIsErrItemNotFound() {
|
|||||||
expect: assert.False,
|
expect: assert.False,
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
name: "item nott found oDataErr",
|
name: "item not found oDataErr",
|
||||||
err: graphTD.ODataErr(string(itemNotFound)),
|
err: graphTD.ODataErr(string(itemNotFound)),
|
||||||
expect: assert.True,
|
expect: assert.True,
|
||||||
},
|
},
|
||||||
|
{
|
||||||
|
name: "error item not found oDataErr",
|
||||||
|
err: graphTD.ODataErr(string(errorItemNotFound)),
|
||||||
|
expect: assert.True,
|
||||||
|
},
|
||||||
}
|
}
|
||||||
for _, test := range table {
|
for _, test := range table {
|
||||||
suite.Run(test.name, func() {
|
suite.Run(test.name, func() {
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user