From 1246f51b22ca9042cd189c635004fc25bffbae17 Mon Sep 17 00:00:00 2001 From: Vaibhav Kamra Date: Wed, 7 Sep 2022 16:47:12 -0700 Subject: [PATCH] OneDrive Backup Operation (#738) ## Description Wires up the OneDrive collection logic to `operation.Backup` Includes an integration test that runs against the test domain Two bug fixes: - Skip the "root" item that is returned by the delta query - Fix incorrect usage of the `filepath.SplitList` function which does not split a path into components. Instead use `strings.Split`. This is ok because the paths returned here are not OS specific. Regardless - this logic will be refactored when we use the `path` pkg. ## Type of change Please check the type of change your PR introduces: - [x] :sunflower: Feature - [x] :bug: Bugfix - [ ] :world_map: Documentation - [ ] :robot: Test - [ ] :hamster: Trivial/Minor ## Issue(s) #548 ## Test Plan - [ ] :muscle: Manual - [x] :zap: Unit test - [x] :green_heart: E2E --- src/internal/connector/graph_connector.go | 51 +++++++++++++++++++ src/internal/connector/onedrive/collection.go | 7 ++- .../connector/onedrive/collection_test.go | 3 +- .../connector/onedrive/collections.go | 8 ++- src/internal/operations/backup.go | 2 +- src/internal/operations/backup_test.go | 50 ++++++++++++++++++ 6 files changed, 114 insertions(+), 7 deletions(-) diff --git a/src/internal/connector/graph_connector.go b/src/internal/connector/graph_connector.go index 51b01c512..d615a1be1 100644 --- a/src/internal/connector/graph_connector.go +++ b/src/internal/connector/graph_connector.go @@ -16,11 +16,13 @@ import ( "github.com/alcionai/corso/src/internal/connector/exchange" "github.com/alcionai/corso/src/internal/connector/graph" + "github.com/alcionai/corso/src/internal/connector/onedrive" "github.com/alcionai/corso/src/internal/connector/support" "github.com/alcionai/corso/src/internal/data" "github.com/alcionai/corso/src/internal/path" "github.com/alcionai/corso/src/pkg/account" "github.com/alcionai/corso/src/pkg/control" + "github.com/alcionai/corso/src/pkg/logger" "github.com/alcionai/corso/src/pkg/selectors" ) @@ -418,3 +420,52 @@ func IsNonRecoverableError(e error) bool { var nonRecoverable support.NonRecoverableGCError return errors.As(e, &nonRecoverable) } + +func (gc *GraphConnector) DataCollections(ctx context.Context, sels selectors.Selector) ([]data.Collection, error) { + switch sels.Service { + case selectors.ServiceExchange: + return gc.ExchangeDataCollection(ctx, sels) + case selectors.ServiceOneDrive: + return gc.OneDriveDataCollections(ctx, sels) + default: + return nil, errors.Errorf("Service %s not supported", sels) + } +} + +// OneDriveDataCollections returns a set of DataCollection which represents the OneDrive data +// for the specified user +func (gc *GraphConnector) OneDriveDataCollections( + ctx context.Context, + selector selectors.Selector, +) ([]data.Collection, error) { + odb, err := selector.ToOneDriveBackup() + if err != nil { + return nil, errors.Wrap(err, "collecting onedrive data") + } + + collections := []data.Collection{} + + scopes := odb.DiscreteScopes(gc.GetUsers()) + + var errs error + + // for each scope that includes oneDrive items, get all + for _, scope := range scopes { + for _, user := range scope.Get(selectors.OneDriveUser) { + logger.Ctx(ctx).With("user", user).Debug("Creating OneDrive collections") + + odcs, err := onedrive.NewCollections(user, &gc.graphService, gc.UpdateStatus).Get(ctx) + if err != nil { + return nil, support.WrapAndAppend(user, err, errs) + } + + collections = append(collections, odcs...) + } + } + + for range collections { + gc.incrementAwaitingMessages() + } + + return collections, errs +} diff --git a/src/internal/connector/onedrive/collection.go b/src/internal/connector/onedrive/collection.go index 60f71a213..2c3488f05 100644 --- a/src/internal/connector/onedrive/collection.go +++ b/src/internal/connector/onedrive/collection.go @@ -4,7 +4,7 @@ package onedrive import ( "context" "io" - "path/filepath" + "strings" "github.com/alcionai/corso/src/internal/connector/graph" "github.com/alcionai/corso/src/internal/connector/support" @@ -77,7 +77,10 @@ func (oc *Collection) Items() <-chan data.Stream { } func (oc *Collection) FullPath() []string { - return filepath.SplitList(oc.folderPath) + path := oc.folderPath + // Remove leading `/` if any so that Split + // doesn't return a "" + return strings.Split(strings.TrimPrefix(path, "/"), "/") } // Item represents a single item retrieved from OneDrive diff --git a/src/internal/connector/onedrive/collection_test.go b/src/internal/connector/onedrive/collection_test.go index e66b03406..8cb0a64bb 100644 --- a/src/internal/connector/onedrive/collection_test.go +++ b/src/internal/connector/onedrive/collection_test.go @@ -5,7 +5,6 @@ import ( "context" "errors" "io" - "path/filepath" "sync" "testing" @@ -60,7 +59,7 @@ func (suite *OneDriveCollectionSuite) TestOneDriveCollection() { folderPath := "dir1/dir2/dir3" coll := NewCollection(folderPath, "fakeDriveID", suite, suite.testStatusUpdater(&wg, &collStatus)) require.NotNil(suite.T(), coll) - assert.Equal(suite.T(), filepath.SplitList(folderPath), coll.FullPath()) + assert.Equal(suite.T(), []string{"dir1", "dir2", "dir3"}, coll.FullPath()) testItemID := "fakeItemID" testItemName := "itemName" diff --git a/src/internal/connector/onedrive/collections.go b/src/internal/connector/onedrive/collections.go index 411d2cc44..166da2ea1 100644 --- a/src/internal/connector/onedrive/collections.go +++ b/src/internal/connector/onedrive/collections.go @@ -59,8 +59,8 @@ func (c *Collections) Get(ctx context.Context) ([]data.Collection, error) { } collections := make([]data.Collection, 0, len(c.collectionMap)) - for _, c := range c.collectionMap { - collections = append(collections, c) + for _, coll := range c.collectionMap { + collections = append(collections, coll) } return collections, nil @@ -74,6 +74,10 @@ func (c *Collections) updateCollections(ctx context.Context, driveID string, ite if err != nil { return err } + if item.GetRoot() != nil { + // Skip the root item + continue + } if item.GetParentReference() == nil || item.GetParentReference().GetPath() == nil { return errors.Errorf("item does not have a parent reference. item name : %s", *item.GetName()) } diff --git a/src/internal/operations/backup.go b/src/internal/operations/backup.go index 0472b73ce..96fbdcc44 100644 --- a/src/internal/operations/backup.go +++ b/src/internal/operations/backup.go @@ -106,7 +106,7 @@ func (op *BackupOperation) Run(ctx context.Context) (err error) { return err } - cs, err := gc.ExchangeDataCollection(ctx, op.Selectors) + cs, err := gc.DataCollections(ctx, op.Selectors) if err != nil { err = errors.Wrap(err, "retrieving service data") opStats.readErr = err diff --git a/src/internal/operations/backup_test.go b/src/internal/operations/backup_test.go index d355b4355..fe7cd3354 100644 --- a/src/internal/operations/backup_test.go +++ b/src/internal/operations/backup_test.go @@ -207,3 +207,53 @@ func (suite *BackupOpIntegrationSuite) TestBackup_Run() { }) } } + +func (suite *BackupOpIntegrationSuite) TestBackupOneDrive_Run() { + t := suite.T() + ctx := context.Background() + + m365UserID := tester.M365UserID(t) + acct := tester.NewM365Account(t) + + // need to initialize the repository before we can test connecting to it. + st := tester.NewPrefixedS3Storage(t) + + k := kopia.NewConn(st) + require.NoError(t, k.Initialize(ctx)) + + // kopiaRef comes with a count of 1 and Wrapper bumps it again so safe + // to close here. + defer k.Close(ctx) + + kw, err := kopia.NewWrapper(k) + require.NoError(t, err) + + defer kw.Close(ctx) + + ms, err := kopia.NewModelStore(k) + require.NoError(t, err) + + defer ms.Close(ctx) + + sw := store.NewKopiaStore(ms) + + sel := selectors.NewOneDriveBackup() + sel.Include(sel.Users([]string{m365UserID})) + + bo, err := NewBackupOperation( + ctx, + control.Options{}, + kw, + sw, + acct, + sel.Selector) + require.NoError(t, err) + + require.NoError(t, bo.Run(ctx)) + require.NotEmpty(t, bo.Results) + require.NotEmpty(t, bo.Results.BackupID) + assert.Equal(t, bo.Status, Completed) + assert.Equal(t, bo.Results.ItemsRead, bo.Results.ItemsWritten) + assert.NoError(t, bo.Results.ReadErrors) + assert.NoError(t, bo.Results.WriteErrors) +}