From 57accfc9c4879c03398c43ca09e9a72d00cb4cae Mon Sep 17 00:00:00 2001 From: ashmrtn Date: Fri, 27 Jan 2023 14:48:30 -0800 Subject: [PATCH] Use mocks to test getting drive info from Graph API (#2306) ## Description Make a separate interface for fetching drive information from Graph API. Interface allows for better testing via mocks Merges SharePoint and OneDrive code for getting drives. Also fixes potential bug where not all drives would be fetched. This could have occurred because the previous implementation for both SharePoint and OneDrive were not checking for paginated results Viewing by commit is recommended ## Does this PR need a docs update or release note? - [x] :white_check_mark: Yes, it's included - [ ] :clock1: Yes, but in a later PR - [ ] :no_entry: No ## Type of change - [ ] :sunflower: Feature - [ ] :bug: Bugfix - [ ] :world_map: Documentation - [x] :robot: Test - [ ] :computer: CI/Deployment - [x] :broom: Tech Debt/Cleanup ## Issue(s) * #2264 ## Test Plan - [ ] :muscle: Manual - [x] :zap: Unit test - [ ] :green_heart: E2E --- CHANGELOG.md | 1 + src/cmd/purge/purge.go | 7 +- src/internal/connector/onedrive/api/drive.go | 108 ++++++ .../connector/onedrive/collections.go | 9 +- src/internal/connector/onedrive/drive.go | 146 ++++---- src/internal/connector/onedrive/drive_test.go | 312 +++++++++++++++++- src/internal/connector/onedrive/item_test.go | 5 +- 7 files changed, 520 insertions(+), 68 deletions(-) create mode 100644 src/internal/connector/onedrive/api/drive.go diff --git a/CHANGELOG.md b/CHANGELOG.md index 8f1eb0fd2..696b33e9d 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -16,6 +16,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 - Remove the M365 license guid check in OneDrive backup which wasn't reliable. - Reduced extra socket consumption while downloading multiple drive files. - Extended timeout boundaries for exchange attachment downloads, reducing risk of cancellation on large files. +- Identify all drives associated with a user or SharePoint site instead of just the results on the first page returned by Graph API. ## [v0.1.0] (alpha) - 2023-01-13 diff --git a/src/cmd/purge/purge.go b/src/cmd/purge/purge.go index 3d7074835..cb9c9976f 100644 --- a/src/cmd/purge/purge.go +++ b/src/cmd/purge/purge.go @@ -151,7 +151,12 @@ func purgeOneDriveFolders( uid string, ) error { getter := func(gs graph.Servicer, uid, prefix string) ([]purgable, error) { - cfs, err := onedrive.GetAllFolders(ctx, gs, uid, prefix) + pager, err := onedrive.PagerForSource(onedrive.OneDriveSource, gs, uid, nil) + if err != nil { + return nil, err + } + + cfs, err := onedrive.GetAllFolders(ctx, gs, pager, prefix) if err != nil { return nil, err } diff --git a/src/internal/connector/onedrive/api/drive.go b/src/internal/connector/onedrive/api/drive.go new file mode 100644 index 000000000..7e5cfcaac --- /dev/null +++ b/src/internal/connector/onedrive/api/drive.go @@ -0,0 +1,108 @@ +package api + +import ( + "context" + + "github.com/microsoftgraph/msgraph-sdk-go/models" + mssites "github.com/microsoftgraph/msgraph-sdk-go/sites" + msusers "github.com/microsoftgraph/msgraph-sdk-go/users" + "github.com/pkg/errors" + + "github.com/alcionai/corso/src/internal/connector/graph" +) + +type PageLinker interface { + GetOdataNextLink() *string +} + +type userDrivePager struct { + gs graph.Servicer + builder *msusers.ItemDrivesRequestBuilder + options *msusers.ItemDrivesRequestBuilderGetRequestConfiguration +} + +func NewUserDrivePager( + gs graph.Servicer, + userID string, + fields []string, +) *userDrivePager { + requestConfig := &msusers.ItemDrivesRequestBuilderGetRequestConfiguration{ + QueryParameters: &msusers.ItemDrivesRequestBuilderGetQueryParameters{ + Select: fields, + }, + } + + res := &userDrivePager{ + gs: gs, + options: requestConfig, + builder: gs.Client().UsersById(userID).Drives(), + } + + return res +} + +func (p *userDrivePager) GetPage(ctx context.Context) (PageLinker, error) { + return p.builder.Get(ctx, p.options) +} + +func (p *userDrivePager) SetNext(link string) { + p.builder = msusers.NewItemDrivesRequestBuilder(link, p.gs.Adapter()) +} + +func (p *userDrivePager) ValuesIn(l PageLinker) ([]models.Driveable, error) { + page, ok := l.(interface{ GetValue() []models.Driveable }) + if !ok { + return nil, errors.Errorf( + "response of type [%T] does not comply with GetValue() interface", + l, + ) + } + + return page.GetValue(), nil +} + +type siteDrivePager struct { + gs graph.Servicer + builder *mssites.ItemDrivesRequestBuilder + options *mssites.ItemDrivesRequestBuilderGetRequestConfiguration +} + +func NewSiteDrivePager( + gs graph.Servicer, + siteID string, + fields []string, +) *siteDrivePager { + requestConfig := &mssites.ItemDrivesRequestBuilderGetRequestConfiguration{ + QueryParameters: &mssites.ItemDrivesRequestBuilderGetQueryParameters{ + Select: fields, + }, + } + + res := &siteDrivePager{ + gs: gs, + options: requestConfig, + builder: gs.Client().SitesById(siteID).Drives(), + } + + return res +} + +func (p *siteDrivePager) GetPage(ctx context.Context) (PageLinker, error) { + return p.builder.Get(ctx, p.options) +} + +func (p *siteDrivePager) SetNext(link string) { + p.builder = mssites.NewItemDrivesRequestBuilder(link, p.gs.Adapter()) +} + +func (p *siteDrivePager) ValuesIn(l PageLinker) ([]models.Driveable, error) { + page, ok := l.(interface{ GetValue() []models.Driveable }) + if !ok { + return nil, errors.Errorf( + "response of type [%T] does not comply with GetValue() interface", + l, + ) + } + + return page.GetValue(), nil +} diff --git a/src/internal/connector/onedrive/collections.go b/src/internal/connector/onedrive/collections.go index 8761a37ca..0813c13ed 100644 --- a/src/internal/connector/onedrive/collections.go +++ b/src/internal/connector/onedrive/collections.go @@ -95,7 +95,14 @@ func NewCollections( // Retrieves drive data as set of `data.Collections` func (c *Collections) Get(ctx context.Context) ([]data.Collection, error) { // Enumerate drives for the specified resourceOwner - drives, err := drives(ctx, c.service, c.resourceOwner, c.source) + pager, err := PagerForSource(c.source, c.service, c.resourceOwner, nil) + if err != nil { + return nil, err + } + + retry := c.source == OneDriveSource + + drives, err := drives(ctx, pager, retry) if err != nil { return nil, err } diff --git a/src/internal/connector/onedrive/drive.go b/src/internal/connector/onedrive/drive.go index c765e3719..4bc56aec0 100644 --- a/src/internal/connector/onedrive/drive.go +++ b/src/internal/connector/onedrive/drive.go @@ -10,11 +10,11 @@ import ( msdrives "github.com/microsoftgraph/msgraph-sdk-go/drives" "github.com/microsoftgraph/msgraph-sdk-go/models" "github.com/microsoftgraph/msgraph-sdk-go/models/odataerrors" - "github.com/microsoftgraph/msgraph-sdk-go/sites" "github.com/pkg/errors" "golang.org/x/exp/maps" "github.com/alcionai/corso/src/internal/connector/graph" + "github.com/alcionai/corso/src/internal/connector/onedrive/api" "github.com/alcionai/corso/src/internal/connector/support" "github.com/alcionai/corso/src/pkg/logger" ) @@ -22,86 +22,106 @@ import ( var errFolderNotFound = errors.New("folder not found") const ( + getDrivesRetries = 3 + // nextLinkKey is used to find the next link in a paged // graph response - nextLinkKey = "@odata.nextLink" - itemChildrenRawURLFmt = "https://graph.microsoft.com/v1.0/drives/%s/items/%s/children" - itemByPathRawURLFmt = "https://graph.microsoft.com/v1.0/drives/%s/items/%s:/%s" - itemNotFoundErrorCode = "itemNotFound" - userMysiteURLNotFound = "BadRequest Unable to retrieve user's mysite URL" - userMysiteNotFound = "ResourceNotFound User's mysite not found" + nextLinkKey = "@odata.nextLink" + itemChildrenRawURLFmt = "https://graph.microsoft.com/v1.0/drives/%s/items/%s/children" + itemByPathRawURLFmt = "https://graph.microsoft.com/v1.0/drives/%s/items/%s:/%s" + itemNotFoundErrorCode = "itemNotFound" + userMysiteURLNotFound = "BadRequest Unable to retrieve user's mysite URL" + userMysiteNotFound = "ResourceNotFound User's mysite not found" + contextDeadlineExceeded = "context deadline exceeded" ) -// Enumerates the drives for the specified user -func drives( - ctx context.Context, - service graph.Servicer, - resourceOwner string, +type drivePager interface { + GetPage(context.Context) (api.PageLinker, error) + SetNext(nextLink string) + ValuesIn(api.PageLinker) ([]models.Driveable, error) +} + +func PagerForSource( source driveSource, -) ([]models.Driveable, error) { + servicer graph.Servicer, + resourceOwner string, + fields []string, +) (drivePager, error) { switch source { case OneDriveSource: - return userDrives(ctx, service, resourceOwner) + return api.NewUserDrivePager(servicer, resourceOwner, fields), nil case SharePointSource: - return siteDrives(ctx, service, resourceOwner) + return api.NewSiteDrivePager(servicer, resourceOwner, fields), nil default: return nil, errors.Errorf("unrecognized drive data source") } } -func siteDrives(ctx context.Context, service graph.Servicer, site string) ([]models.Driveable, error) { - options := &sites.ItemDrivesRequestBuilderGetRequestConfiguration{ - QueryParameters: &sites.ItemDrivesRequestBuilderGetQueryParameters{ - Select: []string{"id", "name", "weburl", "system"}, - }, - } - - r, err := service.Client().SitesById(site).Drives().Get(ctx, options) - if err != nil { - return nil, errors.Wrapf(err, "failed to retrieve site drives. site: %s, details: %s", - site, support.ConnectorStackErrorTrace(err)) - } - - return r.GetValue(), nil -} - -func userDrives(ctx context.Context, service graph.Servicer, user string) ([]models.Driveable, error) { +func drives( + ctx context.Context, + pager drivePager, + retry bool, +) ([]models.Driveable, error) { var ( - numberOfRetries = 3 - r models.DriveCollectionResponseable err error + page api.PageLinker + numberOfRetries = getDrivesRetries + drives = []models.Driveable{} ) - // Retry Loop for Drive retrieval. Request can timeout - for i := 0; i <= numberOfRetries; i++ { - r, err = service.Client().UsersById(user).Drives().Get(ctx, nil) - if err != nil { - detailedError := support.ConnectorStackErrorTrace(err) - if strings.Contains(detailedError, userMysiteURLNotFound) || - strings.Contains(detailedError, userMysiteNotFound) { - logger.Ctx(ctx).Infof("User %s does not have a drive", user) - return make([]models.Driveable, 0), nil // no license - } - - if strings.Contains(detailedError, "context deadline exceeded") && i < numberOfRetries { - time.Sleep(time.Duration(3*(i+1)) * time.Second) - continue - } - - return nil, errors.Wrapf( - err, - "failed to retrieve user drives. user: %s, details: %s", - user, - detailedError, - ) - } - - break + if !retry { + numberOfRetries = 0 } - logger.Ctx(ctx).Debugf("Found %d drives for user %s", len(r.GetValue()), user) + // Loop through all pages returned by Graph API. + for { + // Retry Loop for Drive retrieval. Request can timeout + for i := 0; i <= numberOfRetries; i++ { + page, err = pager.GetPage(ctx) + if err != nil { + // Various error handling. May return an error or perform a retry. + detailedError := support.ConnectorStackErrorTrace(err) + if strings.Contains(detailedError, userMysiteURLNotFound) || + strings.Contains(detailedError, userMysiteNotFound) { + logger.Ctx(ctx).Infof("resource owner does not have a drive") + return make([]models.Driveable, 0), nil // no license or drives. + } - return r.GetValue(), nil + if strings.Contains(detailedError, contextDeadlineExceeded) && i < numberOfRetries { + time.Sleep(time.Duration(3*(i+1)) * time.Second) + continue + } + + return nil, errors.Wrapf( + err, + "failed to retrieve drives. details: %s", + detailedError, + ) + } + + // No error encountered, break the retry loop so we can extract results + // and see if there's another page to fetch. + break + } + + tmp, err := pager.ValuesIn(page) + if err != nil { + return nil, errors.Wrap(err, "extracting drives from response") + } + + drives = append(drives, tmp...) + + nextLink := page.GetOdataNextLink() + if nextLink == nil || len(*nextLink) == 0 { + break + } + + pager.SetNext(*nextLink) + } + + logger.Ctx(ctx).Debugf("Found %d drives", len(drives)) + + return drives, nil } // itemCollector functions collect the items found in a drive @@ -284,10 +304,10 @@ func (op *Displayable) GetDisplayName() *string { func GetAllFolders( ctx context.Context, gs graph.Servicer, - userID string, + pager drivePager, prefix string, ) ([]*Displayable, error) { - drives, err := drives(ctx, gs, userID, OneDriveSource) + drives, err := drives(ctx, pager, true) if err != nil { return nil, errors.Wrap(err, "getting OneDrive folders") } diff --git a/src/internal/connector/onedrive/drive_test.go b/src/internal/connector/onedrive/drive_test.go index 631381240..6ace0c3c2 100644 --- a/src/internal/connector/onedrive/drive_test.go +++ b/src/internal/connector/onedrive/drive_test.go @@ -1,21 +1,323 @@ package onedrive import ( + "context" "strings" "testing" + "github.com/google/uuid" + "github.com/microsoftgraph/msgraph-sdk-go/models" + "github.com/microsoftgraph/msgraph-sdk-go/models/odataerrors" "github.com/stretchr/testify/assert" "github.com/stretchr/testify/require" "github.com/stretchr/testify/suite" "github.com/alcionai/corso/src/internal/common" "github.com/alcionai/corso/src/internal/connector/graph" + "github.com/alcionai/corso/src/internal/connector/onedrive/api" "github.com/alcionai/corso/src/internal/tester" "github.com/alcionai/corso/src/pkg/control" "github.com/alcionai/corso/src/pkg/logger" "github.com/alcionai/corso/src/pkg/selectors" ) +type mockPageLinker struct { + link *string +} + +func (pl *mockPageLinker) GetOdataNextLink() *string { + return pl.link +} + +type pagerResult struct { + drives []models.Driveable + nextLink *string + err error +} + +type mockDrivePager struct { + toReturn []pagerResult + getIdx int +} + +func (p *mockDrivePager) GetPage(context.Context) (api.PageLinker, error) { + if len(p.toReturn) <= p.getIdx { + return nil, assert.AnError + } + + idx := p.getIdx + p.getIdx++ + + return &mockPageLinker{p.toReturn[idx].nextLink}, p.toReturn[idx].err +} + +func (p *mockDrivePager) SetNext(string) {} + +func (p *mockDrivePager) ValuesIn(api.PageLinker) ([]models.Driveable, error) { + idx := p.getIdx + if idx > 0 { + // Return values lag by one since we increment in GetPage(). + idx-- + } + + if len(p.toReturn) <= idx { + return nil, assert.AnError + } + + return p.toReturn[idx].drives, nil +} + +// Unit tests +type OneDriveUnitSuite struct { + suite.Suite +} + +func TestOneDriveUnitSuite(t *testing.T) { + suite.Run(t, new(OneDriveUnitSuite)) +} + +func (suite *OneDriveUnitSuite) TestDrives() { + numDriveResults := 4 + emptyLink := "" + link := "foo" + + // These errors won't be the "correct" format when compared to what graph + // returns, but they're close enough to have the same info when the inner + // details are extracted via support package. + tmp := userMysiteURLNotFound + tmpMySiteURLNotFound := odataerrors.NewMainError() + tmpMySiteURLNotFound.SetMessage(&tmp) + + mySiteURLNotFound := odataerrors.NewODataError() + mySiteURLNotFound.SetError(tmpMySiteURLNotFound) + + tmp2 := userMysiteNotFound + tmpMySiteNotFound := odataerrors.NewMainError() + tmpMySiteNotFound.SetMessage(&tmp2) + + mySiteNotFound := odataerrors.NewODataError() + mySiteNotFound.SetError(tmpMySiteNotFound) + + tmp3 := contextDeadlineExceeded + tmpDeadlineExceeded := odataerrors.NewMainError() + tmpDeadlineExceeded.SetMessage(&tmp3) + + deadlineExceeded := odataerrors.NewODataError() + deadlineExceeded.SetError(tmpDeadlineExceeded) + + resultDrives := make([]models.Driveable, 0, numDriveResults) + + for i := 0; i < numDriveResults; i++ { + d := models.NewDrive() + id := uuid.NewString() + d.SetId(&id) + + resultDrives = append(resultDrives, d) + } + + tooManyRetries := make([]pagerResult, 0, getDrivesRetries+1) + + for i := 0; i < getDrivesRetries+1; i++ { + tooManyRetries = append(tooManyRetries, pagerResult{ + err: deadlineExceeded, + }) + } + + table := []struct { + name string + pagerResults []pagerResult + retry bool + expectedErr assert.ErrorAssertionFunc + expectedResults []models.Driveable + }{ + { + name: "AllOneResultNilNextLink", + pagerResults: []pagerResult{ + { + drives: resultDrives, + nextLink: nil, + err: nil, + }, + }, + retry: false, + expectedErr: assert.NoError, + expectedResults: resultDrives, + }, + { + name: "AllOneResultEmptyNextLink", + pagerResults: []pagerResult{ + { + drives: resultDrives, + nextLink: &emptyLink, + err: nil, + }, + }, + retry: false, + expectedErr: assert.NoError, + expectedResults: resultDrives, + }, + { + name: "SplitResultsNilNextLink", + pagerResults: []pagerResult{ + { + drives: resultDrives[:numDriveResults/2], + nextLink: &link, + err: nil, + }, + { + drives: resultDrives[numDriveResults/2:], + nextLink: nil, + err: nil, + }, + }, + retry: false, + expectedErr: assert.NoError, + expectedResults: resultDrives, + }, + { + name: "SplitResultsEmptyNextLink", + pagerResults: []pagerResult{ + { + drives: resultDrives[:numDriveResults/2], + nextLink: &link, + err: nil, + }, + { + drives: resultDrives[numDriveResults/2:], + nextLink: &emptyLink, + err: nil, + }, + }, + retry: false, + expectedErr: assert.NoError, + expectedResults: resultDrives, + }, + { + name: "NonRetryableError", + pagerResults: []pagerResult{ + { + drives: resultDrives, + nextLink: &link, + err: nil, + }, + { + drives: nil, + nextLink: nil, + err: assert.AnError, + }, + }, + retry: true, + expectedErr: assert.Error, + expectedResults: nil, + }, + { + name: "SiteURLNotFound", + pagerResults: []pagerResult{ + { + drives: nil, + nextLink: nil, + err: mySiteURLNotFound, + }, + }, + retry: true, + expectedErr: assert.NoError, + expectedResults: nil, + }, + { + name: "SiteNotFound", + pagerResults: []pagerResult{ + { + drives: nil, + nextLink: nil, + err: mySiteNotFound, + }, + }, + retry: true, + expectedErr: assert.NoError, + expectedResults: nil, + }, + { + name: "SplitResultsContextTimeoutWithRetries", + pagerResults: []pagerResult{ + { + drives: resultDrives[:numDriveResults/2], + nextLink: &link, + err: nil, + }, + { + drives: nil, + nextLink: nil, + err: deadlineExceeded, + }, + { + drives: resultDrives[numDriveResults/2:], + nextLink: &emptyLink, + err: nil, + }, + }, + retry: true, + expectedErr: assert.NoError, + expectedResults: resultDrives, + }, + { + name: "SplitResultsContextTimeoutNoRetries", + pagerResults: []pagerResult{ + { + drives: resultDrives[:numDriveResults/2], + nextLink: &link, + err: nil, + }, + { + drives: nil, + nextLink: nil, + err: deadlineExceeded, + }, + { + drives: resultDrives[numDriveResults/2:], + nextLink: &emptyLink, + err: nil, + }, + }, + retry: false, + expectedErr: assert.Error, + expectedResults: nil, + }, + { + name: "TooManyRetries", + pagerResults: append( + []pagerResult{ + { + drives: resultDrives[:numDriveResults/2], + nextLink: &link, + err: nil, + }, + }, + tooManyRetries..., + ), + retry: true, + expectedErr: assert.Error, + expectedResults: nil, + }, + } + for _, test := range table { + suite.T().Run(test.name, func(t *testing.T) { + ctx, flush := tester.NewContext() + defer flush() + + pager := &mockDrivePager{ + toReturn: test.pagerResults, + } + + drives, err := drives(ctx, pager, test.retry) + test.expectedErr(t, err) + + assert.ElementsMatch(t, test.expectedResults, drives) + }) + } +} + +// Integration tests + type OneDriveSuite struct { suite.Suite userID string @@ -44,7 +346,10 @@ func (suite *OneDriveSuite) TestCreateGetDeleteFolder() { folderElements := []string{folderName1} gs := loadTestService(t) - drives, err := drives(ctx, gs, suite.userID, OneDriveSource) + pager, err := PagerForSource(OneDriveSource, gs, suite.userID, nil) + require.NoError(t, err) + + drives, err := drives(ctx, pager, true) require.NoError(t, err) require.NotEmpty(t, drives) @@ -89,7 +394,10 @@ func (suite *OneDriveSuite) TestCreateGetDeleteFolder() { for _, test := range table { suite.T().Run(test.name, func(t *testing.T) { - allFolders, err := GetAllFolders(ctx, gs, suite.userID, test.prefix) + pager, err := PagerForSource(OneDriveSource, gs, suite.userID, nil) + require.NoError(t, err) + + allFolders, err := GetAllFolders(ctx, gs, pager, test.prefix) require.NoError(t, err) foundFolderIDs := []string{} diff --git a/src/internal/connector/onedrive/item_test.go b/src/internal/connector/onedrive/item_test.go index 5364d543c..938748ca2 100644 --- a/src/internal/connector/onedrive/item_test.go +++ b/src/internal/connector/onedrive/item_test.go @@ -75,7 +75,10 @@ func (suite *ItemIntegrationSuite) SetupSuite() { suite.user = tester.SecondaryM365UserID(t) - odDrives, err := drives(ctx, suite, suite.user, OneDriveSource) + pager, err := PagerForSource(OneDriveSource, suite, suite.user, nil) + require.NoError(t, err) + + odDrives, err := drives(ctx, pager, true) require.NoError(t, err) // Test Requirement 1: Need a drive require.Greaterf(t, len(odDrives), 0, "user %s does not have a drive", suite.user)