Skip items returning 404 (#2805)
Treat a 404 as an unavailable item and skip it similar to malware items. --- #### Does this PR need a docs update or release note? - [x] ✅ Yes, it's included - [ ] 🕐 Yes, but in a later PR - [ ] ⛔ No #### Type of change <!--- Please check the type of change your PR introduces: ---> - [ ] 🌻 Feature - [x] 🐛 Bugfix - [ ] 🗺️ Documentation - [ ] 🤖 Test - [ ] 💻 CI/Deployment - [ ] 🧹 Tech Debt/Cleanup #### Issue(s) <!-- Can reference multiple issues. Use one of the following "magic words" - "closes, fixes" to auto-close the Github issue. --> * #<issue> #### Test Plan <!-- How will this be tested prior to merging.--> - [ ] 💪 Manual - [x] ⚡ Unit test - [ ] 💚 E2E Treat a 404 as an unavailable item and skip it similar to malware items.
This commit is contained in:
parent
766dbb2c94
commit
0125876192
@ -9,6 +9,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
|
|||||||
|
|
||||||
### Added
|
### Added
|
||||||
- Sharepoint library (document files) support: backup, list, details, and restore.
|
- Sharepoint library (document files) support: backup, list, details, and restore.
|
||||||
|
- OneDrive item downloads that return 404 during backup (normally due to external deletion while Corso processes) are now skipped instead of quietly dropped. These items will appear in the skipped list alongside other skipped cases such as malware detection.
|
||||||
|
|
||||||
### Fixed
|
### Fixed
|
||||||
- Fix repo connect not working without a config file
|
- Fix repo connect not working without a config file
|
||||||
|
|||||||
@ -46,6 +46,9 @@ const (
|
|||||||
const (
|
const (
|
||||||
LabelsMalware = "malware_detected"
|
LabelsMalware = "malware_detected"
|
||||||
LabelsMysiteNotFound = "mysite_not_found"
|
LabelsMysiteNotFound = "mysite_not_found"
|
||||||
|
|
||||||
|
// LabelsSkippable is used to determine if an error is skippable
|
||||||
|
LabelsSkippable = "skippable_errors"
|
||||||
)
|
)
|
||||||
|
|
||||||
var (
|
var (
|
||||||
@ -56,7 +59,7 @@ var (
|
|||||||
// Delta tokens can be desycned or expired. In either case, the token
|
// Delta tokens can be desycned or expired. In either case, the token
|
||||||
// becomes invalid, and cannot be used again.
|
// becomes invalid, and cannot be used again.
|
||||||
// https://learn.microsoft.com/en-us/graph/errors#code-property
|
// https://learn.microsoft.com/en-us/graph/errors#code-property
|
||||||
ErrInvalidDelta = clues.New("inalid delta token")
|
ErrInvalidDelta = clues.New("invalid delta token")
|
||||||
|
|
||||||
// Timeout errors are identified for tracking the need to retry calls.
|
// Timeout errors are identified for tracking the need to retry calls.
|
||||||
// Other delay errors, like throttling, are already handled by the
|
// Other delay errors, like throttling, are already handled by the
|
||||||
@ -255,9 +258,9 @@ func appendIf(a []any, k string, v *string) []any {
|
|||||||
return append(a, k, *v)
|
return append(a, k, *v)
|
||||||
}
|
}
|
||||||
|
|
||||||
// MalwareInfo gathers potentially useful information about a malware infected
|
// ItemInfo gathers potentially useful information about a drive item,
|
||||||
// drive item, and aggregates that data into a map.
|
// and aggregates that data into a map.
|
||||||
func MalwareInfo(item models.DriveItemable) map[string]any {
|
func ItemInfo(item models.DriveItemable) map[string]any {
|
||||||
m := map[string]any{}
|
m := map[string]any{}
|
||||||
|
|
||||||
creator := item.GetCreatedByUser()
|
creator := item.GetCreatedByUser()
|
||||||
|
|||||||
@ -259,5 +259,5 @@ func (suite *GraphErrorsUnitSuite) TestMalwareInfo() {
|
|||||||
fault.AddtlMalwareDesc: malDesc,
|
fault.AddtlMalwareDesc: malDesc,
|
||||||
}
|
}
|
||||||
|
|
||||||
assert.Equal(suite.T(), expect, MalwareInfo(&i))
|
assert.Equal(suite.T(), expect, ItemInfo(&i))
|
||||||
}
|
}
|
||||||
|
|||||||
@ -318,7 +318,6 @@ func (oc *Collection) getDriveItemContent(
|
|||||||
// Initial try with url from delta + 2 retries
|
// Initial try with url from delta + 2 retries
|
||||||
for i := 1; i <= maxDownloadRetires; i++ {
|
for i := 1; i <= maxDownloadRetires; i++ {
|
||||||
_, itemData, err = oc.itemReader(ctx, oc.itemClient, item)
|
_, itemData, err = oc.itemReader(ctx, oc.itemClient, item)
|
||||||
|
|
||||||
if err == nil || !graph.IsErrUnauthorized(err) {
|
if err == nil || !graph.IsErrUnauthorized(err) {
|
||||||
break
|
break
|
||||||
}
|
}
|
||||||
@ -339,16 +338,25 @@ func (oc *Collection) getDriveItemContent(
|
|||||||
// check for errors following retries
|
// check for errors following retries
|
||||||
if err != nil {
|
if err != nil {
|
||||||
if clues.HasLabel(err, graph.LabelsMalware) || (item != nil && item.GetMalware() != nil) {
|
if clues.HasLabel(err, graph.LabelsMalware) || (item != nil && item.GetMalware() != nil) {
|
||||||
logger.Ctx(ctx).With("error", err.Error(), "malware", true).Error("downloading item")
|
logger.CtxErr(ctx, err).With("skipped_reason", fault.SkipMalware).Info("item flagged as malware")
|
||||||
el.AddSkip(fault.FileSkip(fault.SkipMalware, itemID, itemName, graph.MalwareInfo(item)))
|
el.AddSkip(fault.FileSkip(fault.SkipMalware, itemID, itemName, graph.ItemInfo(item)))
|
||||||
} else {
|
|
||||||
logger.Ctx(ctx).With("error", err.Error()).Error("downloading item")
|
return nil, clues.Wrap(err, "downloading item").Label(graph.LabelsSkippable)
|
||||||
el.AddRecoverable(clues.Stack(err).WithClues(ctx).Label(fault.LabelForceNoBackupCreation))
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if clues.HasLabel(err, graph.LabelStatus(http.StatusNotFound)) || graph.IsErrDeletedInFlight(err) {
|
||||||
|
logger.CtxErr(ctx, err).With("skipped_reason", fault.SkipNotFound).Error("item not found")
|
||||||
|
el.AddSkip(fault.FileSkip(fault.SkipNotFound, itemID, itemName, graph.ItemInfo(item)))
|
||||||
|
|
||||||
|
return nil, clues.Wrap(err, "downloading item").Label(graph.LabelsSkippable)
|
||||||
|
}
|
||||||
|
|
||||||
|
logger.CtxErr(ctx, err).Error("downloading item")
|
||||||
|
el.AddRecoverable(clues.Stack(err).WithClues(ctx).Label(fault.LabelForceNoBackupCreation))
|
||||||
|
|
||||||
// return err, not el.Err(), because the lazy reader needs to communicate to
|
// return err, not el.Err(), because the lazy reader needs to communicate to
|
||||||
// the data consumer that this item is unreadable, regardless of the fault state.
|
// the data consumer that this item is unreadable, regardless of the fault state.
|
||||||
return nil, err
|
return nil, clues.Wrap(err, "downloading item")
|
||||||
}
|
}
|
||||||
|
|
||||||
return itemData, nil
|
return itemData, nil
|
||||||
|
|||||||
@ -90,7 +90,7 @@ func (suite *CollectionUnitTestSuite) TestCollection() {
|
|||||||
itemDeets nst
|
itemDeets nst
|
||||||
infoFrom func(*testing.T, details.ItemInfo) (string, string)
|
infoFrom func(*testing.T, details.ItemInfo) (string, string)
|
||||||
expectErr require.ErrorAssertionFunc
|
expectErr require.ErrorAssertionFunc
|
||||||
expectLabel string
|
expectLabels []string
|
||||||
}{
|
}{
|
||||||
{
|
{
|
||||||
name: "oneDrive, no duplicates",
|
name: "oneDrive, no duplicates",
|
||||||
@ -136,8 +136,24 @@ func (suite *CollectionUnitTestSuite) TestCollection() {
|
|||||||
require.NotNil(t, dii.OneDrive)
|
require.NotNil(t, dii.OneDrive)
|
||||||
return dii.OneDrive.ItemName, dii.OneDrive.ParentPath
|
return dii.OneDrive.ItemName, dii.OneDrive.ParentPath
|
||||||
},
|
},
|
||||||
expectErr: require.Error,
|
expectErr: require.Error,
|
||||||
expectLabel: graph.LabelsMalware,
|
expectLabels: []string{graph.LabelsMalware, graph.LabelsSkippable},
|
||||||
|
},
|
||||||
|
{
|
||||||
|
name: "oneDrive, not found",
|
||||||
|
numInstances: 3,
|
||||||
|
source: OneDriveSource,
|
||||||
|
itemDeets: nst{testItemName, 42, now},
|
||||||
|
// Usually `Not Found` is returned from itemGetter and not itemReader
|
||||||
|
itemReader: func(context.Context, *http.Client, models.DriveItemable) (details.ItemInfo, io.ReadCloser, error) {
|
||||||
|
return details.ItemInfo{}, nil, clues.New("test not found").Label(graph.LabelStatus(http.StatusNotFound))
|
||||||
|
},
|
||||||
|
infoFrom: func(t *testing.T, dii details.ItemInfo) (string, string) {
|
||||||
|
require.NotNil(t, dii.OneDrive)
|
||||||
|
return dii.OneDrive.ItemName, dii.OneDrive.ParentPath
|
||||||
|
},
|
||||||
|
expectErr: require.Error,
|
||||||
|
expectLabels: []string{graph.LabelStatus(http.StatusNotFound), graph.LabelsSkippable},
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
name: "sharePoint, no duplicates",
|
name: "sharePoint, no duplicates",
|
||||||
@ -267,8 +283,8 @@ func (suite *CollectionUnitTestSuite) TestCollection() {
|
|||||||
test.expectErr(t, err)
|
test.expectErr(t, err)
|
||||||
|
|
||||||
if err != nil {
|
if err != nil {
|
||||||
if len(test.expectLabel) > 0 {
|
for _, label := range test.expectLabels {
|
||||||
assert.True(t, clues.HasLabel(err, test.expectLabel), "has clues label:", test.expectLabel)
|
assert.True(t, clues.HasLabel(err, label), "has clues label:", label)
|
||||||
}
|
}
|
||||||
|
|
||||||
return
|
return
|
||||||
|
|||||||
@ -635,7 +635,7 @@ func (c *Collections) UpdateCollections(
|
|||||||
)
|
)
|
||||||
|
|
||||||
if item.GetMalware() != nil {
|
if item.GetMalware() != nil {
|
||||||
addtl := graph.MalwareInfo(item)
|
addtl := graph.ItemInfo(item)
|
||||||
skip := fault.FileSkip(fault.SkipMalware, itemID, itemName, addtl)
|
skip := fault.FileSkip(fault.SkipMalware, itemID, itemName, addtl)
|
||||||
|
|
||||||
if isFolder {
|
if isFolder {
|
||||||
|
|||||||
@ -286,11 +286,13 @@ func (cp *corsoProgress) CachedFile(fname string, size int64) {
|
|||||||
// during the upload process. This could be from reading a file or something
|
// during the upload process. This could be from reading a file or something
|
||||||
// else.
|
// else.
|
||||||
func (cp *corsoProgress) Error(relpath string, err error, isIgnored bool) {
|
func (cp *corsoProgress) Error(relpath string, err error, isIgnored bool) {
|
||||||
// The malware case is an artifact of being unable to skip the item
|
// LabelsSkippable is set of malware items or not found items.
|
||||||
// if we catch detection at a late enough stage in collection enumeration.
|
// The malware case is an artifact of being unable to skip the
|
||||||
// This is our next point of error handling, where we can identify and
|
// item if we catch detection at a late enough stage in collection
|
||||||
// skip over the case.
|
// enumeration. The not found could be items deleted in between a
|
||||||
if clues.HasLabel(err, graph.LabelsMalware) {
|
// delta query and a fetch. This is our next point of error
|
||||||
|
// handling, where we can identify and skip over the case.
|
||||||
|
if clues.HasLabel(err, graph.LabelsSkippable) {
|
||||||
cp.incExpectedErrs()
|
cp.incExpectedErrs()
|
||||||
return
|
return
|
||||||
}
|
}
|
||||||
|
|||||||
@ -32,4 +32,5 @@ func (bc *ByteCounter) Count(i int64) {
|
|||||||
type SkippedCounts struct {
|
type SkippedCounts struct {
|
||||||
TotalSkippedItems int `json:"totalSkippedItems"`
|
TotalSkippedItems int `json:"totalSkippedItems"`
|
||||||
SkippedMalware int `json:"skippedMalware"`
|
SkippedMalware int `json:"skippedMalware"`
|
||||||
|
SkippedNotFound int `json:"skippedNotFound"`
|
||||||
}
|
}
|
||||||
|
|||||||
@ -63,10 +63,9 @@ func New(
|
|||||||
var (
|
var (
|
||||||
ee = errs.Errors()
|
ee = errs.Errors()
|
||||||
// TODO: count errData.Items(), not all recovered errors.
|
// TODO: count errData.Items(), not all recovered errors.
|
||||||
errCount = len(ee.Recovered)
|
errCount = len(ee.Recovered)
|
||||||
failMsg string
|
failMsg string
|
||||||
malware int
|
malware, notFound, otherSkips int
|
||||||
otherSkips int
|
|
||||||
)
|
)
|
||||||
|
|
||||||
if ee.Failure != nil {
|
if ee.Failure != nil {
|
||||||
@ -75,9 +74,12 @@ func New(
|
|||||||
}
|
}
|
||||||
|
|
||||||
for _, s := range ee.Skipped {
|
for _, s := range ee.Skipped {
|
||||||
if s.HasCause(fault.SkipMalware) {
|
switch true {
|
||||||
|
case s.HasCause(fault.SkipMalware):
|
||||||
malware++
|
malware++
|
||||||
} else {
|
case s.HasCause(fault.SkipNotFound):
|
||||||
|
notFound++
|
||||||
|
default:
|
||||||
otherSkips++
|
otherSkips++
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@ -194,14 +196,22 @@ func (b Backup) Values() []string {
|
|||||||
|
|
||||||
if b.TotalSkippedItems > 0 {
|
if b.TotalSkippedItems > 0 {
|
||||||
status += fmt.Sprintf("%d skipped", b.TotalSkippedItems)
|
status += fmt.Sprintf("%d skipped", b.TotalSkippedItems)
|
||||||
}
|
|
||||||
|
|
||||||
if b.TotalSkippedItems > 0 && b.SkippedMalware > 0 {
|
if b.SkippedMalware+b.SkippedNotFound > 0 {
|
||||||
status += ": "
|
status += ": "
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
if b.SkippedMalware > 0 {
|
if b.SkippedMalware > 0 {
|
||||||
status += fmt.Sprintf("%d malware", b.SkippedMalware)
|
status += fmt.Sprintf("%d malware", b.SkippedMalware)
|
||||||
|
|
||||||
|
if b.SkippedNotFound > 0 {
|
||||||
|
status += ", "
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if b.SkippedNotFound > 0 {
|
||||||
|
status += fmt.Sprintf("%d not found", b.SkippedNotFound)
|
||||||
}
|
}
|
||||||
|
|
||||||
if errCount+b.TotalSkippedItems > 0 {
|
if errCount+b.TotalSkippedItems > 0 {
|
||||||
|
|||||||
@ -117,6 +117,17 @@ func (suite *BackupUnitSuite) TestBackup_Values_statusVariations() {
|
|||||||
},
|
},
|
||||||
expect: "test (2 skipped: 1 malware)",
|
expect: "test (2 skipped: 1 malware)",
|
||||||
},
|
},
|
||||||
|
{
|
||||||
|
name: "not found",
|
||||||
|
bup: backup.Backup{
|
||||||
|
Status: "test",
|
||||||
|
SkippedCounts: stats.SkippedCounts{
|
||||||
|
TotalSkippedItems: 2,
|
||||||
|
SkippedNotFound: 1,
|
||||||
|
},
|
||||||
|
},
|
||||||
|
expect: "test (2 skipped: 1 not found)",
|
||||||
|
},
|
||||||
{
|
{
|
||||||
name: "errors and malware",
|
name: "errors and malware",
|
||||||
bup: backup.Backup{
|
bup: backup.Backup{
|
||||||
@ -129,6 +140,31 @@ func (suite *BackupUnitSuite) TestBackup_Values_statusVariations() {
|
|||||||
},
|
},
|
||||||
expect: "test (42 errors, 1 skipped: 1 malware)",
|
expect: "test (42 errors, 1 skipped: 1 malware)",
|
||||||
},
|
},
|
||||||
|
{
|
||||||
|
name: "errors and not found",
|
||||||
|
bup: backup.Backup{
|
||||||
|
Status: "test",
|
||||||
|
ErrorCount: 42,
|
||||||
|
SkippedCounts: stats.SkippedCounts{
|
||||||
|
TotalSkippedItems: 1,
|
||||||
|
SkippedNotFound: 1,
|
||||||
|
},
|
||||||
|
},
|
||||||
|
expect: "test (42 errors, 1 skipped: 1 not found)",
|
||||||
|
},
|
||||||
|
{
|
||||||
|
name: "errors, malware, notFound",
|
||||||
|
bup: backup.Backup{
|
||||||
|
Status: "test",
|
||||||
|
ErrorCount: 42,
|
||||||
|
SkippedCounts: stats.SkippedCounts{
|
||||||
|
TotalSkippedItems: 1,
|
||||||
|
SkippedMalware: 1,
|
||||||
|
SkippedNotFound: 1,
|
||||||
|
},
|
||||||
|
},
|
||||||
|
expect: "test (42 errors, 1 skipped: 1 malware, 1 not found)",
|
||||||
|
},
|
||||||
}
|
}
|
||||||
for _, test := range table {
|
for _, test := range table {
|
||||||
suite.Run(test.name, func() {
|
suite.Run(test.name, func() {
|
||||||
|
|||||||
@ -109,6 +109,10 @@ type skipCause string
|
|||||||
// fail any attempts to backup or restore.
|
// fail any attempts to backup or restore.
|
||||||
const SkipMalware skipCause = "malware_detected"
|
const SkipMalware skipCause = "malware_detected"
|
||||||
|
|
||||||
|
// SkipNotFound identifies that a file was skipped because we could
|
||||||
|
// not find it when trying to download contents
|
||||||
|
const SkipNotFound skipCause = "file_not_found"
|
||||||
|
|
||||||
// Skipped items are permanently unprocessable due to well-known conditions.
|
// Skipped items are permanently unprocessable due to well-known conditions.
|
||||||
// In order to skip an item, the following conditions should be met:
|
// In order to skip an item, the following conditions should be met:
|
||||||
// 1. The conditions for skipping the item are well-known and
|
// 1. The conditions for skipping the item are well-known and
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user