backup malware catch (#2703)

assuming it's possible for a graph
item to skip the malware detection,
we still want to catch and handle
400's from attempted malware
downloads downstream.

---

#### Does this PR need a docs update or release note?

- [x]  No

#### Type of change

- [ ] 🌻 Feature

#### Issue(s)

* #2701

#### Test Plan

- [x] 💪 Manual
- [x]  Unit test
This commit is contained in:
Keepers 2023-03-08 11:13:30 -07:00 committed by GitHub
parent b211892a80
commit 21a5729947
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
8 changed files with 112 additions and 17 deletions

View File

@ -4,6 +4,7 @@ import (
"context" "context"
"fmt" "fmt"
"net/http" "net/http"
"net/http/httputil"
"net/url" "net/url"
"os" "os"
"strings" "strings"
@ -15,6 +16,7 @@ import (
"github.com/alcionai/clues" "github.com/alcionai/clues"
"github.com/alcionai/corso/src/internal/common" "github.com/alcionai/corso/src/internal/common"
"github.com/alcionai/corso/src/internal/common/ptr" "github.com/alcionai/corso/src/internal/common/ptr"
"github.com/alcionai/corso/src/pkg/logger"
) )
// --------------------------------------------------------------------------- // ---------------------------------------------------------------------------
@ -27,6 +29,7 @@ const (
errCodeItemNotFoundShort = "itemNotFound" errCodeItemNotFoundShort = "itemNotFound"
errCodeEmailFolderNotFound = "ErrorSyncFolderNotFound" errCodeEmailFolderNotFound = "ErrorSyncFolderNotFound"
errCodeResyncRequired = "ResyncRequired" // alt: resyncRequired errCodeResyncRequired = "ResyncRequired" // alt: resyncRequired
errCodeMalwareDetected = "malwareDetected"
errCodeSyncFolderNotFound = "ErrorSyncFolderNotFound" errCodeSyncFolderNotFound = "ErrorSyncFolderNotFound"
errCodeSyncStateNotFound = "SyncStateNotFound" errCodeSyncStateNotFound = "SyncStateNotFound"
errCodeResourceNotFound = "ResourceNotFound" errCodeResourceNotFound = "ResourceNotFound"
@ -50,11 +53,10 @@ var (
mysiteNotFound = "user's mysite not found" mysiteNotFound = "user's mysite not found"
) )
var Labels = struct { const (
MysiteNotFound string LabelsMalware = "malware_detected"
}{ LabelsMysiteNotFound = "mysite_not_found"
MysiteNotFound: "mysite_not_found", )
}
// The folder or item was deleted between the time we identified // The folder or item was deleted between the time we identified
// it and when we tried to fetch data for it. // it and when we tried to fetch data for it.
@ -183,6 +185,31 @@ func IsInternalServerError(err error) bool {
return errors.As(err, &e) return errors.As(err, &e)
} }
// IsMalware is true if the graphAPI returns a "malware detected" error code.
func IsMalware(err error) bool {
return hasErrorCode(err, errCodeMalwareDetected)
}
func IsMalwareResp(ctx context.Context, resp *http.Response) bool {
// https://learn.microsoft.com/en-us/openspecs/sharepoint_protocols/ms-wsshp/ba4ee7a8-704c-4e9c-ab14-fa44c574bdf4
// https://learn.microsoft.com/en-us/openspecs/sharepoint_protocols/ms-wdvmoduu/6fa6d4a9-ac18-4cd7-b696-8a3b14a98291
if resp.Header.Get("X-Virus-Infected") == "true" {
return true
}
respDump, err := httputil.DumpResponse(resp, true)
if err != nil {
logger.Ctx(ctx).Errorw("dumping http response", "error", err)
return false
}
if strings.Contains(string(respDump), errCodeMalwareDetected) {
return true
}
return false
}
// --------------------------------------------------------------------------- // ---------------------------------------------------------------------------
// error parsers // error parsers
// --------------------------------------------------------------------------- // ---------------------------------------------------------------------------
@ -245,7 +272,7 @@ func Stack(ctx context.Context, e error) *clues.Err {
func setLabels(err *clues.Err, msg string) *clues.Err { func setLabels(err *clues.Err, msg string) *clues.Err {
if strings.Contains(msg, mysiteNotFound) || strings.Contains(msg, mysiteURLNotFound) { if strings.Contains(msg, mysiteNotFound) || strings.Contains(msg, mysiteURLNotFound) {
err = err.Label(Labels.MysiteNotFound) err = err.Label(LabelsMysiteNotFound)
} }
return err return err

View File

@ -416,13 +416,13 @@ func (oc *Collection) populateItems(ctx context.Context, errs *fault.Bus) {
// check for errors following retries // check for errors following retries
if err != nil { if err != nil {
if item.GetMalware() != nil { if clues.HasLabel(err, graph.LabelsMalware) {
logger.Ctx(ctx).With("error", err.Error(), "malware", true).Error("downloading item") logger.Ctx(ctx).Infow("malware item", clues.InErr(err).Slice()...)
} else { } else {
logger.Ctx(ctx).With("error", err.Error()).Error("downloading item") logger.Ctx(ctx).With("error", err.Error()).Error("downloading item")
el.AddRecoverable(clues.Stack(err).WithClues(ctx).Label(fault.LabelForceNoBackupCreation))
} }
el.AddRecoverable(clues.Stack(err).WithClues(ctx).Label(fault.LabelForceNoBackupCreation))
return nil, err return nil, err
} }

View File

@ -17,6 +17,7 @@ import (
"github.com/stretchr/testify/require" "github.com/stretchr/testify/require"
"github.com/stretchr/testify/suite" "github.com/stretchr/testify/suite"
"github.com/alcionai/clues"
"github.com/alcionai/corso/src/internal/connector/graph" "github.com/alcionai/corso/src/internal/connector/graph"
"github.com/alcionai/corso/src/internal/connector/support" "github.com/alcionai/corso/src/internal/connector/support"
"github.com/alcionai/corso/src/internal/data" "github.com/alcionai/corso/src/internal/data"
@ -88,6 +89,8 @@ func (suite *CollectionUnitTestSuite) TestCollection() {
itemReader itemReaderFunc itemReader itemReaderFunc
itemDeets nst itemDeets nst
infoFrom func(*testing.T, details.ItemInfo) (string, string) infoFrom func(*testing.T, details.ItemInfo) (string, string)
expectErr require.ErrorAssertionFunc
expectLabel string
}{ }{
{ {
name: "oneDrive, no duplicates", name: "oneDrive, no duplicates",
@ -103,6 +106,7 @@ func (suite *CollectionUnitTestSuite) TestCollection() {
require.NotNil(t, dii.OneDrive) require.NotNil(t, dii.OneDrive)
return dii.OneDrive.ItemName, dii.OneDrive.ParentPath return dii.OneDrive.ItemName, dii.OneDrive.ParentPath
}, },
expectErr: require.NoError,
}, },
{ {
name: "oneDrive, duplicates", name: "oneDrive, duplicates",
@ -118,6 +122,22 @@ func (suite *CollectionUnitTestSuite) TestCollection() {
require.NotNil(t, dii.OneDrive) require.NotNil(t, dii.OneDrive)
return dii.OneDrive.ItemName, dii.OneDrive.ParentPath return dii.OneDrive.ItemName, dii.OneDrive.ParentPath
}, },
expectErr: require.NoError,
},
{
name: "oneDrive, malware",
numInstances: 3,
source: OneDriveSource,
itemDeets: nst{testItemName, 42, now},
itemReader: func(context.Context, *http.Client, models.DriveItemable) (details.ItemInfo, io.ReadCloser, error) {
return details.ItemInfo{}, nil, clues.New("test malware").Label(graph.LabelsMalware)
},
infoFrom: func(t *testing.T, dii details.ItemInfo) (string, string) {
require.NotNil(t, dii.OneDrive)
return dii.OneDrive.ItemName, dii.OneDrive.ParentPath
},
expectErr: require.Error,
expectLabel: graph.LabelsMalware,
}, },
{ {
name: "sharePoint, no duplicates", name: "sharePoint, no duplicates",
@ -133,6 +153,7 @@ func (suite *CollectionUnitTestSuite) TestCollection() {
require.NotNil(t, dii.SharePoint) require.NotNil(t, dii.SharePoint)
return dii.SharePoint.ItemName, dii.SharePoint.ParentPath return dii.SharePoint.ItemName, dii.SharePoint.ParentPath
}, },
expectErr: require.NoError,
}, },
{ {
name: "sharePoint, duplicates", name: "sharePoint, duplicates",
@ -148,6 +169,7 @@ func (suite *CollectionUnitTestSuite) TestCollection() {
require.NotNil(t, dii.SharePoint) require.NotNil(t, dii.SharePoint)
return dii.SharePoint.ItemName, dii.SharePoint.ParentPath return dii.SharePoint.ItemName, dii.SharePoint.ParentPath
}, },
expectErr: require.NoError,
}, },
} }
for _, test := range table { for _, test := range table {
@ -242,7 +264,15 @@ func (suite *CollectionUnitTestSuite) TestCollection() {
assert.Equal(t, now, mt.ModTime()) assert.Equal(t, now, mt.ModTime())
readData, err := io.ReadAll(readItem.ToReader()) readData, err := io.ReadAll(readItem.ToReader())
require.NoError(t, err) test.expectErr(t, err)
if err != nil {
if len(test.expectLabel) > 0 {
assert.True(t, clues.HasLabel(err, test.expectLabel), "has clues label:", test.expectLabel)
}
return
}
name, parentPath := test.infoFrom(t, readItemInfo.Info()) name, parentPath := test.infoFrom(t, readItemInfo.Info())

View File

@ -91,7 +91,7 @@ func drives(
for i := 0; i <= numberOfRetries; i++ { for i := 0; i <= numberOfRetries; i++ {
page, err = pager.GetPage(ctx) page, err = pager.GetPage(ctx)
if err != nil { if err != nil {
if clues.HasLabel(err, graph.Labels.MysiteNotFound) { if clues.HasLabel(err, graph.LabelsMysiteNotFound) {
logger.Ctx(ctx).Infof("resource owner does not have a drive") logger.Ctx(ctx).Infof("resource owner does not have a drive")
return make([]models.Driveable, 0), nil // no license or drives. return make([]models.Driveable, 0), nil // no license or drives.
} }

View File

@ -140,7 +140,7 @@ func downloadItem(ctx context.Context, hc *http.Client, item models.DriveItemabl
req, err := http.NewRequest(http.MethodGet, *url, nil) req, err := http.NewRequest(http.MethodGet, *url, nil)
if err != nil { if err != nil {
return nil, graph.Wrap(ctx, err, "new request") return nil, graph.Wrap(ctx, err, "new item download request")
} }
//nolint:lll //nolint:lll
@ -150,13 +150,23 @@ func downloadItem(ctx context.Context, hc *http.Client, item models.DriveItemabl
resp, err := hc.Do(req) resp, err := hc.Do(req)
if err != nil { if err != nil {
return nil, err cerr := graph.Wrap(ctx, err, "downloading item")
if graph.IsMalware(err) {
cerr = cerr.Label(graph.LabelsMalware)
}
return nil, cerr
} }
if (resp.StatusCode / 100) == 2 { if (resp.StatusCode / 100) == 2 {
return resp, nil return resp, nil
} }
if graph.IsMalwareResp(context.Background(), resp) {
return nil, clues.New("malware detected").Label(graph.LabelsMalware)
}
if resp.StatusCode == http.StatusTooManyRequests { if resp.StatusCode == http.StatusTooManyRequests {
return resp, graph.Err429TooManyRequests return resp, graph.Err429TooManyRequests
} }

View File

@ -22,6 +22,7 @@ import (
"github.com/kopia/kopia/snapshot/snapshotfs" "github.com/kopia/kopia/snapshot/snapshotfs"
"github.com/pkg/errors" "github.com/pkg/errors"
"github.com/alcionai/corso/src/internal/connector/graph"
"github.com/alcionai/corso/src/internal/connector/graph/metadata" "github.com/alcionai/corso/src/internal/connector/graph/metadata"
"github.com/alcionai/corso/src/internal/data" "github.com/alcionai/corso/src/internal/data"
D "github.com/alcionai/corso/src/internal/diagnostics" D "github.com/alcionai/corso/src/internal/diagnostics"
@ -141,6 +142,18 @@ type corsoProgress struct {
mu sync.RWMutex mu sync.RWMutex
totalBytes int64 totalBytes int64
errs *fault.Bus errs *fault.Bus
// expectedIgnoredErrors is a count of error cases caught in the Error wrapper
// which are well known and actually ignorable. At the end of a run, if the
// manifest ignored error count is equal to this count, then everything is good.
expectedIgnoredErrors int
}
// mutexted wrapper around expectedIgnoredErrors++
func (cp *corsoProgress) incExpectedErrs() {
cp.mu.Lock()
defer cp.mu.Unlock()
cp.expectedIgnoredErrors++
} }
// Kopia interface function used as a callback when kopia finishes processing a // Kopia interface function used as a callback when kopia finishes processing a
@ -262,6 +275,15 @@ func (cp *corsoProgress) CachedFile(fname string, size int64) {
// during the upload process. This could be from reading a file or something // during the upload process. This could be from reading a file or something
// else. // else.
func (cp *corsoProgress) Error(relpath string, err error, isIgnored bool) { func (cp *corsoProgress) Error(relpath string, err error, isIgnored bool) {
// The malware case is an artifact of being unable to skip the item
// if we catch detection at a late enough stage in collection enumeration.
// This is our next point of error handling, where we can identify and
// skip over the case.
if clues.HasLabel(err, graph.LabelsMalware) {
cp.incExpectedErrs()
return
}
defer cp.UploadProgress.Error(relpath, err, isIgnored) defer cp.UploadProgress.Error(relpath, err, isIgnored)
cp.errs.AddRecoverable(clues.Wrap(err, "kopia reported error"). cp.errs.AddRecoverable(clues.Wrap(err, "kopia reported error").
@ -382,7 +404,8 @@ func collectionEntries(
modTime, modTime,
newBackupStreamReader(serializationVersion, e.ToReader())) newBackupStreamReader(serializationVersion, e.ToReader()))
if err := cb(ctx, entry); err != nil { err = cb(ctx, entry)
if err != nil {
// Kopia's uploader swallows errors in most cases, so if we see // Kopia's uploader swallows errors in most cases, so if we see
// something here it's probably a big issue and we should return. // something here it's probably a big issue and we should return.
return seen, clues.Wrap(err, "executing callback").WithClues(ctx).With("item_path", itemPath) return seen, clues.Wrap(err, "executing callback").WithClues(ctx).With("item_path", itemPath)

View File

@ -52,9 +52,11 @@ type BackupStats struct {
CachedFileCount int CachedFileCount int
UncachedFileCount int UncachedFileCount int
TotalDirectoryCount int TotalDirectoryCount int
IgnoredErrorCount int
ErrorCount int ErrorCount int
IgnoredErrorCount int
ExpectedIgnoredErrorCount int
Incomplete bool Incomplete bool
IncompleteReason string IncompleteReason string
} }
@ -74,9 +76,11 @@ func manifestToStats(
CachedFileCount: int(man.Stats.CachedFiles), CachedFileCount: int(man.Stats.CachedFiles),
UncachedFileCount: int(man.Stats.NonCachedFiles), UncachedFileCount: int(man.Stats.NonCachedFiles),
TotalDirectoryCount: int(man.Stats.TotalDirectoryCount), TotalDirectoryCount: int(man.Stats.TotalDirectoryCount),
IgnoredErrorCount: int(man.Stats.IgnoredErrorCount),
ErrorCount: int(man.Stats.ErrorCount), ErrorCount: int(man.Stats.ErrorCount),
IgnoredErrorCount: int(man.Stats.IgnoredErrorCount),
ExpectedIgnoredErrorCount: progress.expectedIgnoredErrors,
Incomplete: man.IncompleteReason != "", Incomplete: man.IncompleteReason != "",
IncompleteReason: man.IncompleteReason, IncompleteReason: man.IncompleteReason,
} }

View File

@ -470,7 +470,8 @@ func consumeBackupDataCollections(
"kopia_ignored_errors", kopiaStats.IgnoredErrorCount) "kopia_ignored_errors", kopiaStats.IgnoredErrorCount)
} }
if kopiaStats.ErrorCount > 0 || kopiaStats.IgnoredErrorCount > 0 { if kopiaStats.ErrorCount > 0 ||
(kopiaStats.IgnoredErrorCount > kopiaStats.ExpectedIgnoredErrorCount) {
err = clues.New("building kopia snapshot").With( err = clues.New("building kopia snapshot").With(
"kopia_errors", kopiaStats.ErrorCount, "kopia_errors", kopiaStats.ErrorCount,
"kopia_ignored_errors", kopiaStats.IgnoredErrorCount) "kopia_ignored_errors", kopiaStats.IgnoredErrorCount)