retry on http/2 stream errors (#3802)
http/2 stream error failures don't allow us to leverage client middleware. As a result we're lacking a retry handler. This change adds retries for http_wrapper in the explicit case of a http/2 stream error. --- #### Does this PR need a docs update or release note? - [x] ✅ Yes, it's included #### Type of change - [x] 🐛 Bugfix #### Test Plan - [x] 💪 Manual - [x] 💚 E2E
This commit is contained in:
parent
ab1cb1ebbc
commit
fd6dff3270
@ -12,6 +12,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
|
|||||||
|
|
||||||
### Fixed
|
### Fixed
|
||||||
- Return a ServiceNotEnabled error when a tenant has no active SharePoint license.
|
- Return a ServiceNotEnabled error when a tenant has no active SharePoint license.
|
||||||
|
- Added retries for http/2 stream connection failures when downloading large item content.
|
||||||
|
|
||||||
### Known issues
|
### Known issues
|
||||||
- If a link share is created for an item with inheritance disabled
|
- If a link share is created for an item with inheritance disabled
|
||||||
|
|||||||
@ -194,13 +194,13 @@ func getDriveItemContent(
|
|||||||
|
|
||||||
resp, err := gr.Request(ctx, http.MethodGet, *url, nil, nil)
|
resp, err := gr.Request(ctx, http.MethodGet, *url, nil, nil)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return nil, clues.New("downloading item").With("error", err)
|
return nil, clues.New("requesting item content").With("error", err)
|
||||||
}
|
}
|
||||||
defer resp.Body.Close()
|
defer resp.Body.Close()
|
||||||
|
|
||||||
content, err := io.ReadAll(resp.Body)
|
content, err := io.ReadAll(resp.Body)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return nil, clues.New("read downloaded item").With("error", err)
|
return nil, clues.New("reading item content").With("error", err)
|
||||||
}
|
}
|
||||||
|
|
||||||
return content, nil
|
return content, nil
|
||||||
|
|||||||
@ -4,11 +4,15 @@ import (
|
|||||||
"context"
|
"context"
|
||||||
"io"
|
"io"
|
||||||
"net/http"
|
"net/http"
|
||||||
|
"regexp"
|
||||||
|
"time"
|
||||||
|
|
||||||
"github.com/alcionai/clues"
|
"github.com/alcionai/clues"
|
||||||
khttp "github.com/microsoft/kiota-http-go"
|
khttp "github.com/microsoft/kiota-http-go"
|
||||||
|
|
||||||
|
"github.com/alcionai/corso/src/internal/events"
|
||||||
"github.com/alcionai/corso/src/internal/version"
|
"github.com/alcionai/corso/src/internal/version"
|
||||||
|
"github.com/alcionai/corso/src/pkg/logger"
|
||||||
)
|
)
|
||||||
|
|
||||||
// ---------------------------------------------------------------------------
|
// ---------------------------------------------------------------------------
|
||||||
@ -70,6 +74,8 @@ func NewNoTimeoutHTTPWrapper(opts ...Option) *httpWrapper {
|
|||||||
// requests
|
// requests
|
||||||
// ---------------------------------------------------------------------------
|
// ---------------------------------------------------------------------------
|
||||||
|
|
||||||
|
var streamErrRE = regexp.MustCompile(`stream error: stream ID \d+; .+; received from peer`)
|
||||||
|
|
||||||
// Request does the provided request.
|
// Request does the provided request.
|
||||||
func (hw httpWrapper) Request(
|
func (hw httpWrapper) Request(
|
||||||
ctx context.Context,
|
ctx context.Context,
|
||||||
@ -91,7 +97,35 @@ func (hw httpWrapper) Request(
|
|||||||
// See https://learn.microsoft.com/en-us/sharepoint/dev/general-development/how-to-avoid-getting-throttled-or-blocked-in-sharepoint-online#how-to-decorate-your-http-traffic
|
// See https://learn.microsoft.com/en-us/sharepoint/dev/general-development/how-to-avoid-getting-throttled-or-blocked-in-sharepoint-online#how-to-decorate-your-http-traffic
|
||||||
req.Header.Set("User-Agent", "ISV|Alcion|Corso/"+version.Version)
|
req.Header.Set("User-Agent", "ISV|Alcion|Corso/"+version.Version)
|
||||||
|
|
||||||
resp, err := hw.client.Do(req)
|
var resp *http.Response
|
||||||
|
|
||||||
|
i := 0
|
||||||
|
|
||||||
|
// stream errors from http/2 will fail before we reach
|
||||||
|
// client middleware handling, therefore we don't get to
|
||||||
|
// make use of the retry middleware. This external
|
||||||
|
// retry wrapper is unsophisticated, but should only
|
||||||
|
// retry in the event of a `stream error`, which is not
|
||||||
|
// a common expectation.
|
||||||
|
for i < 3 {
|
||||||
|
ictx := clues.Add(ctx, "request_retry_iter", i)
|
||||||
|
|
||||||
|
resp, err = hw.client.Do(req)
|
||||||
|
if err != nil && !streamErrRE.MatchString(err.Error()) {
|
||||||
|
return nil, Stack(ictx, err)
|
||||||
|
}
|
||||||
|
|
||||||
|
if err == nil {
|
||||||
|
break
|
||||||
|
}
|
||||||
|
|
||||||
|
logger.Ctx(ictx).Debug("retrying after stream error")
|
||||||
|
events.Inc(events.APICall, "streamerror")
|
||||||
|
|
||||||
|
time.Sleep(3 * time.Second)
|
||||||
|
i++
|
||||||
|
}
|
||||||
|
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return nil, Stack(ctx, err)
|
return nil, Stack(ctx, err)
|
||||||
}
|
}
|
||||||
|
|||||||
@ -271,21 +271,20 @@ func (oc *Collection) getDriveItemContent(
|
|||||||
var (
|
var (
|
||||||
itemID = ptr.Val(item.GetId())
|
itemID = ptr.Val(item.GetId())
|
||||||
itemName = ptr.Val(item.GetName())
|
itemName = ptr.Val(item.GetName())
|
||||||
el = errs.Local()
|
|
||||||
)
|
)
|
||||||
|
|
||||||
itemData, err := downloadContent(ctx, oc.handler, oc.urlCache, item, oc.driveID)
|
itemData, err := downloadContent(ctx, oc.handler, oc.urlCache, item, oc.driveID)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
if clues.HasLabel(err, graph.LabelsMalware) || (item != nil && item.GetMalware() != nil) {
|
if clues.HasLabel(err, graph.LabelsMalware) || (item != nil && item.GetMalware() != nil) {
|
||||||
logger.CtxErr(ctx, err).With("skipped_reason", fault.SkipMalware).Info("item flagged as malware")
|
logger.CtxErr(ctx, err).With("skipped_reason", fault.SkipMalware).Info("item flagged as malware")
|
||||||
el.AddSkip(ctx, fault.FileSkip(fault.SkipMalware, driveID, itemID, itemName, graph.ItemInfo(item)))
|
errs.AddSkip(ctx, fault.FileSkip(fault.SkipMalware, driveID, itemID, itemName, graph.ItemInfo(item)))
|
||||||
|
|
||||||
return nil, clues.Wrap(err, "malware item").Label(graph.LabelsSkippable)
|
return nil, clues.Wrap(err, "malware item").Label(graph.LabelsSkippable)
|
||||||
}
|
}
|
||||||
|
|
||||||
if clues.HasLabel(err, graph.LabelStatus(http.StatusNotFound)) || graph.IsErrDeletedInFlight(err) {
|
if clues.HasLabel(err, graph.LabelStatus(http.StatusNotFound)) || graph.IsErrDeletedInFlight(err) {
|
||||||
logger.CtxErr(ctx, err).With("skipped_reason", fault.SkipNotFound).Info("item not found")
|
logger.CtxErr(ctx, err).With("skipped_reason", fault.SkipNotFound).Info("item not found")
|
||||||
el.AddSkip(ctx, fault.FileSkip(fault.SkipNotFound, driveID, itemID, itemName, graph.ItemInfo(item)))
|
errs.AddSkip(ctx, fault.FileSkip(fault.SkipNotFound, driveID, itemID, itemName, graph.ItemInfo(item)))
|
||||||
|
|
||||||
return nil, clues.Wrap(err, "deleted item").Label(graph.LabelsSkippable)
|
return nil, clues.Wrap(err, "deleted item").Label(graph.LabelsSkippable)
|
||||||
}
|
}
|
||||||
@ -300,13 +299,13 @@ func (oc *Collection) getDriveItemContent(
|
|||||||
// restore, or we have to handle it separately by somehow
|
// restore, or we have to handle it separately by somehow
|
||||||
// deleting the entire collection.
|
// deleting the entire collection.
|
||||||
logger.CtxErr(ctx, err).With("skipped_reason", fault.SkipBigOneNote).Info("max OneNote file size exceeded")
|
logger.CtxErr(ctx, err).With("skipped_reason", fault.SkipBigOneNote).Info("max OneNote file size exceeded")
|
||||||
el.AddSkip(ctx, fault.FileSkip(fault.SkipBigOneNote, driveID, itemID, itemName, graph.ItemInfo(item)))
|
errs.AddSkip(ctx, fault.FileSkip(fault.SkipBigOneNote, driveID, itemID, itemName, graph.ItemInfo(item)))
|
||||||
|
|
||||||
return nil, clues.Wrap(err, "max oneNote item").Label(graph.LabelsSkippable)
|
return nil, clues.Wrap(err, "max oneNote item").Label(graph.LabelsSkippable)
|
||||||
}
|
}
|
||||||
|
|
||||||
logger.CtxErr(ctx, err).Error("downloading item")
|
logger.CtxErr(ctx, err).Error("downloading item content")
|
||||||
el.AddRecoverable(ctx, clues.Stack(err).WithClues(ctx).Label(fault.LabelForceNoBackupCreation))
|
errs.AddRecoverable(ctx, clues.Stack(err).WithClues(ctx).Label(fault.LabelForceNoBackupCreation))
|
||||||
|
|
||||||
// return err, not el.Err(), because the lazy reader needs to communicate to
|
// return err, not el.Err(), because the lazy reader needs to communicate to
|
||||||
// the data consumer that this item is unreadable, regardless of the fault state.
|
// the data consumer that this item is unreadable, regardless of the fault state.
|
||||||
@ -416,7 +415,6 @@ type driveStats struct {
|
|||||||
// and uses the collection `itemReader` to read the item
|
// and uses the collection `itemReader` to read the item
|
||||||
func (oc *Collection) populateItems(ctx context.Context, errs *fault.Bus) {
|
func (oc *Collection) populateItems(ctx context.Context, errs *fault.Bus) {
|
||||||
var (
|
var (
|
||||||
el = errs.Local()
|
|
||||||
stats driveStats
|
stats driveStats
|
||||||
wg sync.WaitGroup
|
wg sync.WaitGroup
|
||||||
)
|
)
|
||||||
@ -442,7 +440,7 @@ func (oc *Collection) populateItems(ctx context.Context, errs *fault.Bus) {
|
|||||||
defer close(semaphoreCh)
|
defer close(semaphoreCh)
|
||||||
|
|
||||||
for _, item := range oc.driveItems {
|
for _, item := range oc.driveItems {
|
||||||
if el.Failure() != nil {
|
if errs.Failure() != nil {
|
||||||
break
|
break
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -481,7 +479,6 @@ func (oc *Collection) populateDriveItem(
|
|||||||
errs *fault.Bus,
|
errs *fault.Bus,
|
||||||
) {
|
) {
|
||||||
var (
|
var (
|
||||||
el = errs.Local()
|
|
||||||
itemID = ptr.Val(item.GetId())
|
itemID = ptr.Val(item.GetId())
|
||||||
itemName = ptr.Val(item.GetName())
|
itemName = ptr.Val(item.GetName())
|
||||||
itemSize = ptr.Val(item.GetSize())
|
itemSize = ptr.Val(item.GetSize())
|
||||||
@ -518,7 +515,7 @@ func (oc *Collection) populateDriveItem(
|
|||||||
// Fetch metadata for the file
|
// Fetch metadata for the file
|
||||||
itemMeta, itemMetaSize, err = downloadItemMeta(ctx, oc.handler, oc.driveID, item)
|
itemMeta, itemMetaSize, err = downloadItemMeta(ctx, oc.handler, oc.driveID, item)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
el.AddRecoverable(ctx, clues.Wrap(err, "getting item metadata").Label(fault.LabelForceNoBackupCreation))
|
errs.AddRecoverable(ctx, clues.Wrap(err, "getting item metadata").Label(fault.LabelForceNoBackupCreation))
|
||||||
return
|
return
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -546,7 +543,7 @@ func (oc *Collection) populateDriveItem(
|
|||||||
itemExtensionFactory)
|
itemExtensionFactory)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
err := clues.Wrap(err, "adding extensions").Label(fault.LabelForceNoBackupCreation)
|
err := clues.Wrap(err, "adding extensions").Label(fault.LabelForceNoBackupCreation)
|
||||||
el.AddRecoverable(ctx, err)
|
errs.AddRecoverable(ctx, err)
|
||||||
return nil, err
|
return nil, err
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|||||||
@ -137,9 +137,9 @@ func (e *Bus) logAndAddRecoverable(ctx context.Context, err error, skip int) {
|
|||||||
isFail := e.addRecoverableErr(err)
|
isFail := e.addRecoverableErr(err)
|
||||||
|
|
||||||
if isFail {
|
if isFail {
|
||||||
log.Error("recoverable error")
|
log.Errorf("recoverable error: %v", err)
|
||||||
} else {
|
} else {
|
||||||
log.Info("recoverable error")
|
log.Infof("recoverable error: %v", err)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|||||||
@ -34,7 +34,7 @@ var serviceToPathType = map[service]path.ServiceType{
|
|||||||
|
|
||||||
var (
|
var (
|
||||||
ErrorBadSelectorCast = clues.New("wrong selector service type")
|
ErrorBadSelectorCast = clues.New("wrong selector service type")
|
||||||
ErrorNoMatchingItems = clues.New("no items match the specified selectors")
|
ErrorNoMatchingItems = clues.New("no items match the provided selectors")
|
||||||
ErrorUnrecognizedService = clues.New("unrecognized service")
|
ErrorUnrecognizedService = clues.New("unrecognized service")
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user