From 21a572994724cfbbc8d82ebf5c68f6e670c1e7a8 Mon Sep 17 00:00:00 2001 From: Keepers Date: Wed, 8 Mar 2023 11:13:30 -0700 Subject: [PATCH] backup malware catch (#2703) assuming it's possible for a graph item to skip the malware detection, we still want to catch and handle 400's from attempted malware downloads downstream. --- #### Does this PR need a docs update or release note? - [x] :no_entry: No #### Type of change - [ ] :sunflower: Feature #### Issue(s) * #2701 #### Test Plan - [x] :muscle: Manual - [x] :zap: Unit test --- src/internal/connector/graph/errors.go | 39 ++++++++++++++++--- src/internal/connector/onedrive/collection.go | 6 +-- .../connector/onedrive/collection_test.go | 32 ++++++++++++++- src/internal/connector/onedrive/drive.go | 2 +- src/internal/connector/onedrive/item.go | 14 ++++++- src/internal/kopia/upload.go | 25 +++++++++++- src/internal/kopia/wrapper.go | 8 +++- src/internal/operations/backup.go | 3 +- 8 files changed, 112 insertions(+), 17 deletions(-) diff --git a/src/internal/connector/graph/errors.go b/src/internal/connector/graph/errors.go index a4910e975..ee345030c 100644 --- a/src/internal/connector/graph/errors.go +++ b/src/internal/connector/graph/errors.go @@ -4,6 +4,7 @@ import ( "context" "fmt" "net/http" + "net/http/httputil" "net/url" "os" "strings" @@ -15,6 +16,7 @@ import ( "github.com/alcionai/clues" "github.com/alcionai/corso/src/internal/common" "github.com/alcionai/corso/src/internal/common/ptr" + "github.com/alcionai/corso/src/pkg/logger" ) // --------------------------------------------------------------------------- @@ -27,6 +29,7 @@ const ( errCodeItemNotFoundShort = "itemNotFound" errCodeEmailFolderNotFound = "ErrorSyncFolderNotFound" errCodeResyncRequired = "ResyncRequired" // alt: resyncRequired + errCodeMalwareDetected = "malwareDetected" errCodeSyncFolderNotFound = "ErrorSyncFolderNotFound" errCodeSyncStateNotFound = "SyncStateNotFound" errCodeResourceNotFound = "ResourceNotFound" @@ -50,11 +53,10 @@ var ( mysiteNotFound = "user's mysite not found" ) -var Labels = struct { - MysiteNotFound string -}{ - MysiteNotFound: "mysite_not_found", -} +const ( + LabelsMalware = "malware_detected" + LabelsMysiteNotFound = "mysite_not_found" +) // The folder or item was deleted between the time we identified // it and when we tried to fetch data for it. @@ -183,6 +185,31 @@ func IsInternalServerError(err error) bool { return errors.As(err, &e) } +// IsMalware is true if the graphAPI returns a "malware detected" error code. +func IsMalware(err error) bool { + return hasErrorCode(err, errCodeMalwareDetected) +} + +func IsMalwareResp(ctx context.Context, resp *http.Response) bool { + // https://learn.microsoft.com/en-us/openspecs/sharepoint_protocols/ms-wsshp/ba4ee7a8-704c-4e9c-ab14-fa44c574bdf4 + // https://learn.microsoft.com/en-us/openspecs/sharepoint_protocols/ms-wdvmoduu/6fa6d4a9-ac18-4cd7-b696-8a3b14a98291 + if resp.Header.Get("X-Virus-Infected") == "true" { + return true + } + + respDump, err := httputil.DumpResponse(resp, true) + if err != nil { + logger.Ctx(ctx).Errorw("dumping http response", "error", err) + return false + } + + if strings.Contains(string(respDump), errCodeMalwareDetected) { + return true + } + + return false +} + // --------------------------------------------------------------------------- // error parsers // --------------------------------------------------------------------------- @@ -245,7 +272,7 @@ func Stack(ctx context.Context, e error) *clues.Err { func setLabels(err *clues.Err, msg string) *clues.Err { if strings.Contains(msg, mysiteNotFound) || strings.Contains(msg, mysiteURLNotFound) { - err = err.Label(Labels.MysiteNotFound) + err = err.Label(LabelsMysiteNotFound) } return err diff --git a/src/internal/connector/onedrive/collection.go b/src/internal/connector/onedrive/collection.go index ca7a5d64a..2ff31c173 100644 --- a/src/internal/connector/onedrive/collection.go +++ b/src/internal/connector/onedrive/collection.go @@ -416,13 +416,13 @@ func (oc *Collection) populateItems(ctx context.Context, errs *fault.Bus) { // check for errors following retries if err != nil { - if item.GetMalware() != nil { - logger.Ctx(ctx).With("error", err.Error(), "malware", true).Error("downloading item") + if clues.HasLabel(err, graph.LabelsMalware) { + logger.Ctx(ctx).Infow("malware item", clues.InErr(err).Slice()...) } else { logger.Ctx(ctx).With("error", err.Error()).Error("downloading item") + el.AddRecoverable(clues.Stack(err).WithClues(ctx).Label(fault.LabelForceNoBackupCreation)) } - el.AddRecoverable(clues.Stack(err).WithClues(ctx).Label(fault.LabelForceNoBackupCreation)) return nil, err } diff --git a/src/internal/connector/onedrive/collection_test.go b/src/internal/connector/onedrive/collection_test.go index 4cb13e8dd..1fdcb73a7 100644 --- a/src/internal/connector/onedrive/collection_test.go +++ b/src/internal/connector/onedrive/collection_test.go @@ -17,6 +17,7 @@ import ( "github.com/stretchr/testify/require" "github.com/stretchr/testify/suite" + "github.com/alcionai/clues" "github.com/alcionai/corso/src/internal/connector/graph" "github.com/alcionai/corso/src/internal/connector/support" "github.com/alcionai/corso/src/internal/data" @@ -88,6 +89,8 @@ func (suite *CollectionUnitTestSuite) TestCollection() { itemReader itemReaderFunc itemDeets nst infoFrom func(*testing.T, details.ItemInfo) (string, string) + expectErr require.ErrorAssertionFunc + expectLabel string }{ { name: "oneDrive, no duplicates", @@ -103,6 +106,7 @@ func (suite *CollectionUnitTestSuite) TestCollection() { require.NotNil(t, dii.OneDrive) return dii.OneDrive.ItemName, dii.OneDrive.ParentPath }, + expectErr: require.NoError, }, { name: "oneDrive, duplicates", @@ -118,6 +122,22 @@ func (suite *CollectionUnitTestSuite) TestCollection() { require.NotNil(t, dii.OneDrive) return dii.OneDrive.ItemName, dii.OneDrive.ParentPath }, + expectErr: require.NoError, + }, + { + name: "oneDrive, malware", + numInstances: 3, + source: OneDriveSource, + itemDeets: nst{testItemName, 42, now}, + itemReader: func(context.Context, *http.Client, models.DriveItemable) (details.ItemInfo, io.ReadCloser, error) { + return details.ItemInfo{}, nil, clues.New("test malware").Label(graph.LabelsMalware) + }, + infoFrom: func(t *testing.T, dii details.ItemInfo) (string, string) { + require.NotNil(t, dii.OneDrive) + return dii.OneDrive.ItemName, dii.OneDrive.ParentPath + }, + expectErr: require.Error, + expectLabel: graph.LabelsMalware, }, { name: "sharePoint, no duplicates", @@ -133,6 +153,7 @@ func (suite *CollectionUnitTestSuite) TestCollection() { require.NotNil(t, dii.SharePoint) return dii.SharePoint.ItemName, dii.SharePoint.ParentPath }, + expectErr: require.NoError, }, { name: "sharePoint, duplicates", @@ -148,6 +169,7 @@ func (suite *CollectionUnitTestSuite) TestCollection() { require.NotNil(t, dii.SharePoint) return dii.SharePoint.ItemName, dii.SharePoint.ParentPath }, + expectErr: require.NoError, }, } for _, test := range table { @@ -242,7 +264,15 @@ func (suite *CollectionUnitTestSuite) TestCollection() { assert.Equal(t, now, mt.ModTime()) readData, err := io.ReadAll(readItem.ToReader()) - require.NoError(t, err) + test.expectErr(t, err) + + if err != nil { + if len(test.expectLabel) > 0 { + assert.True(t, clues.HasLabel(err, test.expectLabel), "has clues label:", test.expectLabel) + } + + return + } name, parentPath := test.infoFrom(t, readItemInfo.Info()) diff --git a/src/internal/connector/onedrive/drive.go b/src/internal/connector/onedrive/drive.go index e3c985fbf..d33c332fa 100644 --- a/src/internal/connector/onedrive/drive.go +++ b/src/internal/connector/onedrive/drive.go @@ -91,7 +91,7 @@ func drives( for i := 0; i <= numberOfRetries; i++ { page, err = pager.GetPage(ctx) if err != nil { - if clues.HasLabel(err, graph.Labels.MysiteNotFound) { + if clues.HasLabel(err, graph.LabelsMysiteNotFound) { logger.Ctx(ctx).Infof("resource owner does not have a drive") return make([]models.Driveable, 0), nil // no license or drives. } diff --git a/src/internal/connector/onedrive/item.go b/src/internal/connector/onedrive/item.go index dacd26af6..76f66b1f6 100644 --- a/src/internal/connector/onedrive/item.go +++ b/src/internal/connector/onedrive/item.go @@ -140,7 +140,7 @@ func downloadItem(ctx context.Context, hc *http.Client, item models.DriveItemabl req, err := http.NewRequest(http.MethodGet, *url, nil) if err != nil { - return nil, graph.Wrap(ctx, err, "new request") + return nil, graph.Wrap(ctx, err, "new item download request") } //nolint:lll @@ -150,13 +150,23 @@ func downloadItem(ctx context.Context, hc *http.Client, item models.DriveItemabl resp, err := hc.Do(req) if err != nil { - return nil, err + cerr := graph.Wrap(ctx, err, "downloading item") + + if graph.IsMalware(err) { + cerr = cerr.Label(graph.LabelsMalware) + } + + return nil, cerr } if (resp.StatusCode / 100) == 2 { return resp, nil } + if graph.IsMalwareResp(context.Background(), resp) { + return nil, clues.New("malware detected").Label(graph.LabelsMalware) + } + if resp.StatusCode == http.StatusTooManyRequests { return resp, graph.Err429TooManyRequests } diff --git a/src/internal/kopia/upload.go b/src/internal/kopia/upload.go index 0e1531840..52ba99419 100644 --- a/src/internal/kopia/upload.go +++ b/src/internal/kopia/upload.go @@ -22,6 +22,7 @@ import ( "github.com/kopia/kopia/snapshot/snapshotfs" "github.com/pkg/errors" + "github.com/alcionai/corso/src/internal/connector/graph" "github.com/alcionai/corso/src/internal/connector/graph/metadata" "github.com/alcionai/corso/src/internal/data" D "github.com/alcionai/corso/src/internal/diagnostics" @@ -141,6 +142,18 @@ type corsoProgress struct { mu sync.RWMutex totalBytes int64 errs *fault.Bus + // expectedIgnoredErrors is a count of error cases caught in the Error wrapper + // which are well known and actually ignorable. At the end of a run, if the + // manifest ignored error count is equal to this count, then everything is good. + expectedIgnoredErrors int +} + +// mutexted wrapper around expectedIgnoredErrors++ +func (cp *corsoProgress) incExpectedErrs() { + cp.mu.Lock() + defer cp.mu.Unlock() + + cp.expectedIgnoredErrors++ } // Kopia interface function used as a callback when kopia finishes processing a @@ -262,6 +275,15 @@ func (cp *corsoProgress) CachedFile(fname string, size int64) { // during the upload process. This could be from reading a file or something // else. func (cp *corsoProgress) Error(relpath string, err error, isIgnored bool) { + // The malware case is an artifact of being unable to skip the item + // if we catch detection at a late enough stage in collection enumeration. + // This is our next point of error handling, where we can identify and + // skip over the case. + if clues.HasLabel(err, graph.LabelsMalware) { + cp.incExpectedErrs() + return + } + defer cp.UploadProgress.Error(relpath, err, isIgnored) cp.errs.AddRecoverable(clues.Wrap(err, "kopia reported error"). @@ -382,7 +404,8 @@ func collectionEntries( modTime, newBackupStreamReader(serializationVersion, e.ToReader())) - if err := cb(ctx, entry); err != nil { + err = cb(ctx, entry) + if err != nil { // Kopia's uploader swallows errors in most cases, so if we see // something here it's probably a big issue and we should return. return seen, clues.Wrap(err, "executing callback").WithClues(ctx).With("item_path", itemPath) diff --git a/src/internal/kopia/wrapper.go b/src/internal/kopia/wrapper.go index ba4d5f267..a82c4c273 100644 --- a/src/internal/kopia/wrapper.go +++ b/src/internal/kopia/wrapper.go @@ -52,9 +52,11 @@ type BackupStats struct { CachedFileCount int UncachedFileCount int TotalDirectoryCount int - IgnoredErrorCount int ErrorCount int + IgnoredErrorCount int + ExpectedIgnoredErrorCount int + Incomplete bool IncompleteReason string } @@ -74,9 +76,11 @@ func manifestToStats( CachedFileCount: int(man.Stats.CachedFiles), UncachedFileCount: int(man.Stats.NonCachedFiles), TotalDirectoryCount: int(man.Stats.TotalDirectoryCount), - IgnoredErrorCount: int(man.Stats.IgnoredErrorCount), ErrorCount: int(man.Stats.ErrorCount), + IgnoredErrorCount: int(man.Stats.IgnoredErrorCount), + ExpectedIgnoredErrorCount: progress.expectedIgnoredErrors, + Incomplete: man.IncompleteReason != "", IncompleteReason: man.IncompleteReason, } diff --git a/src/internal/operations/backup.go b/src/internal/operations/backup.go index 946445584..ceaf08dc6 100644 --- a/src/internal/operations/backup.go +++ b/src/internal/operations/backup.go @@ -470,7 +470,8 @@ func consumeBackupDataCollections( "kopia_ignored_errors", kopiaStats.IgnoredErrorCount) } - if kopiaStats.ErrorCount > 0 || kopiaStats.IgnoredErrorCount > 0 { + if kopiaStats.ErrorCount > 0 || + (kopiaStats.IgnoredErrorCount > kopiaStats.ExpectedIgnoredErrorCount) { err = clues.New("building kopia snapshot").With( "kopia_errors", kopiaStats.ErrorCount, "kopia_ignored_errors", kopiaStats.IgnoredErrorCount)