diff --git a/CHANGELOG.md b/CHANGELOG.md index 8f1eb0fd2..696b33e9d 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -16,6 +16,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 - Remove the M365 license guid check in OneDrive backup which wasn't reliable. - Reduced extra socket consumption while downloading multiple drive files. - Extended timeout boundaries for exchange attachment downloads, reducing risk of cancellation on large files. +- Identify all drives associated with a user or SharePoint site instead of just the results on the first page returned by Graph API. ## [v0.1.0] (alpha) - 2023-01-13 diff --git a/src/cmd/purge/purge.go b/src/cmd/purge/purge.go index 3d7074835..cb9c9976f 100644 --- a/src/cmd/purge/purge.go +++ b/src/cmd/purge/purge.go @@ -151,7 +151,12 @@ func purgeOneDriveFolders( uid string, ) error { getter := func(gs graph.Servicer, uid, prefix string) ([]purgable, error) { - cfs, err := onedrive.GetAllFolders(ctx, gs, uid, prefix) + pager, err := onedrive.PagerForSource(onedrive.OneDriveSource, gs, uid, nil) + if err != nil { + return nil, err + } + + cfs, err := onedrive.GetAllFolders(ctx, gs, pager, prefix) if err != nil { return nil, err } diff --git a/src/internal/connector/onedrive/api/drive.go b/src/internal/connector/onedrive/api/drive.go new file mode 100644 index 000000000..7e5cfcaac --- /dev/null +++ b/src/internal/connector/onedrive/api/drive.go @@ -0,0 +1,108 @@ +package api + +import ( + "context" + + "github.com/microsoftgraph/msgraph-sdk-go/models" + mssites "github.com/microsoftgraph/msgraph-sdk-go/sites" + msusers "github.com/microsoftgraph/msgraph-sdk-go/users" + "github.com/pkg/errors" + + "github.com/alcionai/corso/src/internal/connector/graph" +) + +type PageLinker interface { + GetOdataNextLink() *string +} + +type userDrivePager struct { + gs graph.Servicer + builder *msusers.ItemDrivesRequestBuilder + options *msusers.ItemDrivesRequestBuilderGetRequestConfiguration +} + +func NewUserDrivePager( + gs graph.Servicer, + userID string, + fields []string, +) *userDrivePager { + requestConfig := &msusers.ItemDrivesRequestBuilderGetRequestConfiguration{ + QueryParameters: &msusers.ItemDrivesRequestBuilderGetQueryParameters{ + Select: fields, + }, + } + + res := &userDrivePager{ + gs: gs, + options: requestConfig, + builder: gs.Client().UsersById(userID).Drives(), + } + + return res +} + +func (p *userDrivePager) GetPage(ctx context.Context) (PageLinker, error) { + return p.builder.Get(ctx, p.options) +} + +func (p *userDrivePager) SetNext(link string) { + p.builder = msusers.NewItemDrivesRequestBuilder(link, p.gs.Adapter()) +} + +func (p *userDrivePager) ValuesIn(l PageLinker) ([]models.Driveable, error) { + page, ok := l.(interface{ GetValue() []models.Driveable }) + if !ok { + return nil, errors.Errorf( + "response of type [%T] does not comply with GetValue() interface", + l, + ) + } + + return page.GetValue(), nil +} + +type siteDrivePager struct { + gs graph.Servicer + builder *mssites.ItemDrivesRequestBuilder + options *mssites.ItemDrivesRequestBuilderGetRequestConfiguration +} + +func NewSiteDrivePager( + gs graph.Servicer, + siteID string, + fields []string, +) *siteDrivePager { + requestConfig := &mssites.ItemDrivesRequestBuilderGetRequestConfiguration{ + QueryParameters: &mssites.ItemDrivesRequestBuilderGetQueryParameters{ + Select: fields, + }, + } + + res := &siteDrivePager{ + gs: gs, + options: requestConfig, + builder: gs.Client().SitesById(siteID).Drives(), + } + + return res +} + +func (p *siteDrivePager) GetPage(ctx context.Context) (PageLinker, error) { + return p.builder.Get(ctx, p.options) +} + +func (p *siteDrivePager) SetNext(link string) { + p.builder = mssites.NewItemDrivesRequestBuilder(link, p.gs.Adapter()) +} + +func (p *siteDrivePager) ValuesIn(l PageLinker) ([]models.Driveable, error) { + page, ok := l.(interface{ GetValue() []models.Driveable }) + if !ok { + return nil, errors.Errorf( + "response of type [%T] does not comply with GetValue() interface", + l, + ) + } + + return page.GetValue(), nil +} diff --git a/src/internal/connector/onedrive/collections.go b/src/internal/connector/onedrive/collections.go index 8761a37ca..0813c13ed 100644 --- a/src/internal/connector/onedrive/collections.go +++ b/src/internal/connector/onedrive/collections.go @@ -95,7 +95,14 @@ func NewCollections( // Retrieves drive data as set of `data.Collections` func (c *Collections) Get(ctx context.Context) ([]data.Collection, error) { // Enumerate drives for the specified resourceOwner - drives, err := drives(ctx, c.service, c.resourceOwner, c.source) + pager, err := PagerForSource(c.source, c.service, c.resourceOwner, nil) + if err != nil { + return nil, err + } + + retry := c.source == OneDriveSource + + drives, err := drives(ctx, pager, retry) if err != nil { return nil, err } diff --git a/src/internal/connector/onedrive/drive.go b/src/internal/connector/onedrive/drive.go index c765e3719..4bc56aec0 100644 --- a/src/internal/connector/onedrive/drive.go +++ b/src/internal/connector/onedrive/drive.go @@ -10,11 +10,11 @@ import ( msdrives "github.com/microsoftgraph/msgraph-sdk-go/drives" "github.com/microsoftgraph/msgraph-sdk-go/models" "github.com/microsoftgraph/msgraph-sdk-go/models/odataerrors" - "github.com/microsoftgraph/msgraph-sdk-go/sites" "github.com/pkg/errors" "golang.org/x/exp/maps" "github.com/alcionai/corso/src/internal/connector/graph" + "github.com/alcionai/corso/src/internal/connector/onedrive/api" "github.com/alcionai/corso/src/internal/connector/support" "github.com/alcionai/corso/src/pkg/logger" ) @@ -22,86 +22,106 @@ import ( var errFolderNotFound = errors.New("folder not found") const ( + getDrivesRetries = 3 + // nextLinkKey is used to find the next link in a paged // graph response - nextLinkKey = "@odata.nextLink" - itemChildrenRawURLFmt = "https://graph.microsoft.com/v1.0/drives/%s/items/%s/children" - itemByPathRawURLFmt = "https://graph.microsoft.com/v1.0/drives/%s/items/%s:/%s" - itemNotFoundErrorCode = "itemNotFound" - userMysiteURLNotFound = "BadRequest Unable to retrieve user's mysite URL" - userMysiteNotFound = "ResourceNotFound User's mysite not found" + nextLinkKey = "@odata.nextLink" + itemChildrenRawURLFmt = "https://graph.microsoft.com/v1.0/drives/%s/items/%s/children" + itemByPathRawURLFmt = "https://graph.microsoft.com/v1.0/drives/%s/items/%s:/%s" + itemNotFoundErrorCode = "itemNotFound" + userMysiteURLNotFound = "BadRequest Unable to retrieve user's mysite URL" + userMysiteNotFound = "ResourceNotFound User's mysite not found" + contextDeadlineExceeded = "context deadline exceeded" ) -// Enumerates the drives for the specified user -func drives( - ctx context.Context, - service graph.Servicer, - resourceOwner string, +type drivePager interface { + GetPage(context.Context) (api.PageLinker, error) + SetNext(nextLink string) + ValuesIn(api.PageLinker) ([]models.Driveable, error) +} + +func PagerForSource( source driveSource, -) ([]models.Driveable, error) { + servicer graph.Servicer, + resourceOwner string, + fields []string, +) (drivePager, error) { switch source { case OneDriveSource: - return userDrives(ctx, service, resourceOwner) + return api.NewUserDrivePager(servicer, resourceOwner, fields), nil case SharePointSource: - return siteDrives(ctx, service, resourceOwner) + return api.NewSiteDrivePager(servicer, resourceOwner, fields), nil default: return nil, errors.Errorf("unrecognized drive data source") } } -func siteDrives(ctx context.Context, service graph.Servicer, site string) ([]models.Driveable, error) { - options := &sites.ItemDrivesRequestBuilderGetRequestConfiguration{ - QueryParameters: &sites.ItemDrivesRequestBuilderGetQueryParameters{ - Select: []string{"id", "name", "weburl", "system"}, - }, - } - - r, err := service.Client().SitesById(site).Drives().Get(ctx, options) - if err != nil { - return nil, errors.Wrapf(err, "failed to retrieve site drives. site: %s, details: %s", - site, support.ConnectorStackErrorTrace(err)) - } - - return r.GetValue(), nil -} - -func userDrives(ctx context.Context, service graph.Servicer, user string) ([]models.Driveable, error) { +func drives( + ctx context.Context, + pager drivePager, + retry bool, +) ([]models.Driveable, error) { var ( - numberOfRetries = 3 - r models.DriveCollectionResponseable err error + page api.PageLinker + numberOfRetries = getDrivesRetries + drives = []models.Driveable{} ) - // Retry Loop for Drive retrieval. Request can timeout - for i := 0; i <= numberOfRetries; i++ { - r, err = service.Client().UsersById(user).Drives().Get(ctx, nil) - if err != nil { - detailedError := support.ConnectorStackErrorTrace(err) - if strings.Contains(detailedError, userMysiteURLNotFound) || - strings.Contains(detailedError, userMysiteNotFound) { - logger.Ctx(ctx).Infof("User %s does not have a drive", user) - return make([]models.Driveable, 0), nil // no license - } - - if strings.Contains(detailedError, "context deadline exceeded") && i < numberOfRetries { - time.Sleep(time.Duration(3*(i+1)) * time.Second) - continue - } - - return nil, errors.Wrapf( - err, - "failed to retrieve user drives. user: %s, details: %s", - user, - detailedError, - ) - } - - break + if !retry { + numberOfRetries = 0 } - logger.Ctx(ctx).Debugf("Found %d drives for user %s", len(r.GetValue()), user) + // Loop through all pages returned by Graph API. + for { + // Retry Loop for Drive retrieval. Request can timeout + for i := 0; i <= numberOfRetries; i++ { + page, err = pager.GetPage(ctx) + if err != nil { + // Various error handling. May return an error or perform a retry. + detailedError := support.ConnectorStackErrorTrace(err) + if strings.Contains(detailedError, userMysiteURLNotFound) || + strings.Contains(detailedError, userMysiteNotFound) { + logger.Ctx(ctx).Infof("resource owner does not have a drive") + return make([]models.Driveable, 0), nil // no license or drives. + } - return r.GetValue(), nil + if strings.Contains(detailedError, contextDeadlineExceeded) && i < numberOfRetries { + time.Sleep(time.Duration(3*(i+1)) * time.Second) + continue + } + + return nil, errors.Wrapf( + err, + "failed to retrieve drives. details: %s", + detailedError, + ) + } + + // No error encountered, break the retry loop so we can extract results + // and see if there's another page to fetch. + break + } + + tmp, err := pager.ValuesIn(page) + if err != nil { + return nil, errors.Wrap(err, "extracting drives from response") + } + + drives = append(drives, tmp...) + + nextLink := page.GetOdataNextLink() + if nextLink == nil || len(*nextLink) == 0 { + break + } + + pager.SetNext(*nextLink) + } + + logger.Ctx(ctx).Debugf("Found %d drives", len(drives)) + + return drives, nil } // itemCollector functions collect the items found in a drive @@ -284,10 +304,10 @@ func (op *Displayable) GetDisplayName() *string { func GetAllFolders( ctx context.Context, gs graph.Servicer, - userID string, + pager drivePager, prefix string, ) ([]*Displayable, error) { - drives, err := drives(ctx, gs, userID, OneDriveSource) + drives, err := drives(ctx, pager, true) if err != nil { return nil, errors.Wrap(err, "getting OneDrive folders") } diff --git a/src/internal/connector/onedrive/drive_test.go b/src/internal/connector/onedrive/drive_test.go index 631381240..6ace0c3c2 100644 --- a/src/internal/connector/onedrive/drive_test.go +++ b/src/internal/connector/onedrive/drive_test.go @@ -1,21 +1,323 @@ package onedrive import ( + "context" "strings" "testing" + "github.com/google/uuid" + "github.com/microsoftgraph/msgraph-sdk-go/models" + "github.com/microsoftgraph/msgraph-sdk-go/models/odataerrors" "github.com/stretchr/testify/assert" "github.com/stretchr/testify/require" "github.com/stretchr/testify/suite" "github.com/alcionai/corso/src/internal/common" "github.com/alcionai/corso/src/internal/connector/graph" + "github.com/alcionai/corso/src/internal/connector/onedrive/api" "github.com/alcionai/corso/src/internal/tester" "github.com/alcionai/corso/src/pkg/control" "github.com/alcionai/corso/src/pkg/logger" "github.com/alcionai/corso/src/pkg/selectors" ) +type mockPageLinker struct { + link *string +} + +func (pl *mockPageLinker) GetOdataNextLink() *string { + return pl.link +} + +type pagerResult struct { + drives []models.Driveable + nextLink *string + err error +} + +type mockDrivePager struct { + toReturn []pagerResult + getIdx int +} + +func (p *mockDrivePager) GetPage(context.Context) (api.PageLinker, error) { + if len(p.toReturn) <= p.getIdx { + return nil, assert.AnError + } + + idx := p.getIdx + p.getIdx++ + + return &mockPageLinker{p.toReturn[idx].nextLink}, p.toReturn[idx].err +} + +func (p *mockDrivePager) SetNext(string) {} + +func (p *mockDrivePager) ValuesIn(api.PageLinker) ([]models.Driveable, error) { + idx := p.getIdx + if idx > 0 { + // Return values lag by one since we increment in GetPage(). + idx-- + } + + if len(p.toReturn) <= idx { + return nil, assert.AnError + } + + return p.toReturn[idx].drives, nil +} + +// Unit tests +type OneDriveUnitSuite struct { + suite.Suite +} + +func TestOneDriveUnitSuite(t *testing.T) { + suite.Run(t, new(OneDriveUnitSuite)) +} + +func (suite *OneDriveUnitSuite) TestDrives() { + numDriveResults := 4 + emptyLink := "" + link := "foo" + + // These errors won't be the "correct" format when compared to what graph + // returns, but they're close enough to have the same info when the inner + // details are extracted via support package. + tmp := userMysiteURLNotFound + tmpMySiteURLNotFound := odataerrors.NewMainError() + tmpMySiteURLNotFound.SetMessage(&tmp) + + mySiteURLNotFound := odataerrors.NewODataError() + mySiteURLNotFound.SetError(tmpMySiteURLNotFound) + + tmp2 := userMysiteNotFound + tmpMySiteNotFound := odataerrors.NewMainError() + tmpMySiteNotFound.SetMessage(&tmp2) + + mySiteNotFound := odataerrors.NewODataError() + mySiteNotFound.SetError(tmpMySiteNotFound) + + tmp3 := contextDeadlineExceeded + tmpDeadlineExceeded := odataerrors.NewMainError() + tmpDeadlineExceeded.SetMessage(&tmp3) + + deadlineExceeded := odataerrors.NewODataError() + deadlineExceeded.SetError(tmpDeadlineExceeded) + + resultDrives := make([]models.Driveable, 0, numDriveResults) + + for i := 0; i < numDriveResults; i++ { + d := models.NewDrive() + id := uuid.NewString() + d.SetId(&id) + + resultDrives = append(resultDrives, d) + } + + tooManyRetries := make([]pagerResult, 0, getDrivesRetries+1) + + for i := 0; i < getDrivesRetries+1; i++ { + tooManyRetries = append(tooManyRetries, pagerResult{ + err: deadlineExceeded, + }) + } + + table := []struct { + name string + pagerResults []pagerResult + retry bool + expectedErr assert.ErrorAssertionFunc + expectedResults []models.Driveable + }{ + { + name: "AllOneResultNilNextLink", + pagerResults: []pagerResult{ + { + drives: resultDrives, + nextLink: nil, + err: nil, + }, + }, + retry: false, + expectedErr: assert.NoError, + expectedResults: resultDrives, + }, + { + name: "AllOneResultEmptyNextLink", + pagerResults: []pagerResult{ + { + drives: resultDrives, + nextLink: &emptyLink, + err: nil, + }, + }, + retry: false, + expectedErr: assert.NoError, + expectedResults: resultDrives, + }, + { + name: "SplitResultsNilNextLink", + pagerResults: []pagerResult{ + { + drives: resultDrives[:numDriveResults/2], + nextLink: &link, + err: nil, + }, + { + drives: resultDrives[numDriveResults/2:], + nextLink: nil, + err: nil, + }, + }, + retry: false, + expectedErr: assert.NoError, + expectedResults: resultDrives, + }, + { + name: "SplitResultsEmptyNextLink", + pagerResults: []pagerResult{ + { + drives: resultDrives[:numDriveResults/2], + nextLink: &link, + err: nil, + }, + { + drives: resultDrives[numDriveResults/2:], + nextLink: &emptyLink, + err: nil, + }, + }, + retry: false, + expectedErr: assert.NoError, + expectedResults: resultDrives, + }, + { + name: "NonRetryableError", + pagerResults: []pagerResult{ + { + drives: resultDrives, + nextLink: &link, + err: nil, + }, + { + drives: nil, + nextLink: nil, + err: assert.AnError, + }, + }, + retry: true, + expectedErr: assert.Error, + expectedResults: nil, + }, + { + name: "SiteURLNotFound", + pagerResults: []pagerResult{ + { + drives: nil, + nextLink: nil, + err: mySiteURLNotFound, + }, + }, + retry: true, + expectedErr: assert.NoError, + expectedResults: nil, + }, + { + name: "SiteNotFound", + pagerResults: []pagerResult{ + { + drives: nil, + nextLink: nil, + err: mySiteNotFound, + }, + }, + retry: true, + expectedErr: assert.NoError, + expectedResults: nil, + }, + { + name: "SplitResultsContextTimeoutWithRetries", + pagerResults: []pagerResult{ + { + drives: resultDrives[:numDriveResults/2], + nextLink: &link, + err: nil, + }, + { + drives: nil, + nextLink: nil, + err: deadlineExceeded, + }, + { + drives: resultDrives[numDriveResults/2:], + nextLink: &emptyLink, + err: nil, + }, + }, + retry: true, + expectedErr: assert.NoError, + expectedResults: resultDrives, + }, + { + name: "SplitResultsContextTimeoutNoRetries", + pagerResults: []pagerResult{ + { + drives: resultDrives[:numDriveResults/2], + nextLink: &link, + err: nil, + }, + { + drives: nil, + nextLink: nil, + err: deadlineExceeded, + }, + { + drives: resultDrives[numDriveResults/2:], + nextLink: &emptyLink, + err: nil, + }, + }, + retry: false, + expectedErr: assert.Error, + expectedResults: nil, + }, + { + name: "TooManyRetries", + pagerResults: append( + []pagerResult{ + { + drives: resultDrives[:numDriveResults/2], + nextLink: &link, + err: nil, + }, + }, + tooManyRetries..., + ), + retry: true, + expectedErr: assert.Error, + expectedResults: nil, + }, + } + for _, test := range table { + suite.T().Run(test.name, func(t *testing.T) { + ctx, flush := tester.NewContext() + defer flush() + + pager := &mockDrivePager{ + toReturn: test.pagerResults, + } + + drives, err := drives(ctx, pager, test.retry) + test.expectedErr(t, err) + + assert.ElementsMatch(t, test.expectedResults, drives) + }) + } +} + +// Integration tests + type OneDriveSuite struct { suite.Suite userID string @@ -44,7 +346,10 @@ func (suite *OneDriveSuite) TestCreateGetDeleteFolder() { folderElements := []string{folderName1} gs := loadTestService(t) - drives, err := drives(ctx, gs, suite.userID, OneDriveSource) + pager, err := PagerForSource(OneDriveSource, gs, suite.userID, nil) + require.NoError(t, err) + + drives, err := drives(ctx, pager, true) require.NoError(t, err) require.NotEmpty(t, drives) @@ -89,7 +394,10 @@ func (suite *OneDriveSuite) TestCreateGetDeleteFolder() { for _, test := range table { suite.T().Run(test.name, func(t *testing.T) { - allFolders, err := GetAllFolders(ctx, gs, suite.userID, test.prefix) + pager, err := PagerForSource(OneDriveSource, gs, suite.userID, nil) + require.NoError(t, err) + + allFolders, err := GetAllFolders(ctx, gs, pager, test.prefix) require.NoError(t, err) foundFolderIDs := []string{} diff --git a/src/internal/connector/onedrive/item_test.go b/src/internal/connector/onedrive/item_test.go index 5364d543c..938748ca2 100644 --- a/src/internal/connector/onedrive/item_test.go +++ b/src/internal/connector/onedrive/item_test.go @@ -75,7 +75,10 @@ func (suite *ItemIntegrationSuite) SetupSuite() { suite.user = tester.SecondaryM365UserID(t) - odDrives, err := drives(ctx, suite, suite.user, OneDriveSource) + pager, err := PagerForSource(OneDriveSource, suite, suite.user, nil) + require.NoError(t, err) + + odDrives, err := drives(ctx, pager, true) require.NoError(t, err) // Test Requirement 1: Need a drive require.Greaterf(t, len(odDrives), 0, "user %s does not have a drive", suite.user)