From 2458508969c05e6e6b099fc470c227d35d8a262c Mon Sep 17 00:00:00 2001 From: ashmrtn Date: Mon, 30 Jan 2023 11:44:06 -0800 Subject: [PATCH] Wire up GraphConnector <-> BackupOp item exclude list (#2245) ## Description Return an item exclude list from GraphConnector to BackupOp. BackupOp does not yet pass this to kopia wrapper. Returned list is set to nil (eventually) by all components so even if this were wired to kopia wrapper it wouldn't change the current behavior of the system ## Does this PR need a docs update or release note? - [ ] :white_check_mark: Yes, it's included - [ ] :clock1: Yes, but in a later PR - [x] :no_entry: No ## Type of change - [x] :sunflower: Feature - [ ] :bug: Bugfix - [ ] :world_map: Documentation - [ ] :robot: Test - [ ] :computer: CI/Deployment - [ ] :broom: Tech Debt/Cleanup ## Issue(s) * #2243 merge after: * #2143 ## Test Plan - [ ] :muscle: Manual - [x] :zap: Unit test - [ ] :green_heart: E2E --- src/internal/connector/data_collections.go | 36 ++++++++++--------- .../connector/data_collections_test.go | 19 +++++++--- .../connector/exchange/data_collections.go | 12 ++++--- .../connector/graph_connector_test.go | 8 +++-- .../connector/onedrive/collections.go | 14 ++++---- src/internal/connector/onedrive/drive_test.go | 4 ++- .../connector/sharepoint/data_collections.go | 20 +++++------ src/internal/operations/backup.go | 6 +++- 8 files changed, 73 insertions(+), 46 deletions(-) diff --git a/src/internal/connector/data_collections.go b/src/internal/connector/data_collections.go index 0b6d20b27..7d187a854 100644 --- a/src/internal/connector/data_collections.go +++ b/src/internal/connector/data_collections.go @@ -6,6 +6,7 @@ import ( "strings" "github.com/pkg/errors" + "golang.org/x/exp/maps" "github.com/alcionai/corso/src/internal/connector/discovery" "github.com/alcionai/corso/src/internal/connector/discovery/api" @@ -35,27 +36,27 @@ func (gc *GraphConnector) DataCollections( sels selectors.Selector, metadata []data.Collection, ctrlOpts control.Options, -) ([]data.Collection, error) { +) ([]data.Collection, map[string]struct{}, error) { ctx, end := D.Span(ctx, "gc:dataCollections", D.Index("service", sels.Service.String())) defer end() err := verifyBackupInputs(sels, gc.GetUsers(), gc.GetSiteIDs()) if err != nil { - return nil, err + return nil, nil, err } serviceEnabled, err := checkServiceEnabled(ctx, gc.Owners.Users(), path.ServiceType(sels.Service), sels.DiscreteOwner) if err != nil { - return nil, err + return nil, nil, err } if !serviceEnabled { - return []data.Collection{}, nil + return []data.Collection{}, nil, nil } switch sels.Service { case selectors.ServiceExchange: - colls, err := exchange.DataCollections( + colls, excludes, err := exchange.DataCollections( ctx, sels, metadata, @@ -64,7 +65,7 @@ func (gc *GraphConnector) DataCollections( gc.UpdateStatus, ctrlOpts) if err != nil { - return nil, err + return nil, nil, err } for _, c := range colls { @@ -79,13 +80,13 @@ func (gc *GraphConnector) DataCollections( } } - return colls, nil + return colls, excludes, nil case selectors.ServiceOneDrive: return gc.OneDriveDataCollections(ctx, sels, ctrlOpts) case selectors.ServiceSharePoint: - colls, err := sharepoint.DataCollections( + colls, excludes, err := sharepoint.DataCollections( ctx, gc.itemClient, sels, @@ -94,17 +95,17 @@ func (gc *GraphConnector) DataCollections( gc, ctrlOpts) if err != nil { - return nil, err + return nil, nil, err } for range colls { gc.incrementAwaitingMessages() } - return colls, nil + return colls, excludes, nil default: - return nil, errors.Errorf("service %s not supported", sels.Service.String()) + return nil, nil, errors.Errorf("service %s not supported", sels.Service.String()) } } @@ -182,15 +183,16 @@ func (gc *GraphConnector) OneDriveDataCollections( ctx context.Context, selector selectors.Selector, ctrlOpts control.Options, -) ([]data.Collection, error) { +) ([]data.Collection, map[string]struct{}, error) { odb, err := selector.ToOneDriveBackup() if err != nil { - return nil, errors.Wrap(err, "oneDriveDataCollection: parsing selector") + return nil, nil, errors.Wrap(err, "oneDriveDataCollection: parsing selector") } var ( user = selector.DiscreteOwner collections = []data.Collection{} + allExcludes = map[string]struct{}{} errs error ) @@ -198,7 +200,7 @@ func (gc *GraphConnector) OneDriveDataCollections( for _, scope := range odb.Scopes() { logger.Ctx(ctx).With("user", user).Debug("Creating OneDrive collections") - odcs, err := onedrive.NewCollections( + odcs, excludes, err := onedrive.NewCollections( gc.itemClient, gc.credentials.AzureTenantID, user, @@ -209,15 +211,17 @@ func (gc *GraphConnector) OneDriveDataCollections( ctrlOpts, ).Get(ctx) if err != nil { - return nil, support.WrapAndAppend(user, err, errs) + return nil, nil, support.WrapAndAppend(user, err, errs) } collections = append(collections, odcs...) + + maps.Copy(allExcludes, excludes) } for range collections { gc.incrementAwaitingMessages() } - return collections, errs + return collections, allExcludes, errs } diff --git a/src/internal/connector/data_collections_test.go b/src/internal/connector/data_collections_test.go index 877975bb5..c90bee511 100644 --- a/src/internal/connector/data_collections_test.go +++ b/src/internal/connector/data_collections_test.go @@ -99,7 +99,7 @@ func (suite *ConnectorDataCollectionIntegrationSuite) TestExchangeDataCollection for _, test := range tests { suite.T().Run(test.name, func(t *testing.T) { - collections, err := exchange.DataCollections( + collections, excludes, err := exchange.DataCollections( ctx, test.getSelector(t), nil, @@ -108,6 +108,8 @@ func (suite *ConnectorDataCollectionIntegrationSuite) TestExchangeDataCollection control.Options{}) require.NoError(t, err) + assert.Empty(t, excludes) + for range collections { connector.incrementAwaitingMessages() } @@ -199,9 +201,10 @@ func (suite *ConnectorDataCollectionIntegrationSuite) TestDataCollections_invali for _, test := range tests { suite.T().Run(test.name, func(t *testing.T) { - collections, err := connector.DataCollections(ctx, test.getSelector(t), nil, control.Options{}) + collections, excludes, err := connector.DataCollections(ctx, test.getSelector(t), nil, control.Options{}) assert.Error(t, err) assert.Empty(t, collections) + assert.Empty(t, excludes) }) } } @@ -242,7 +245,7 @@ func (suite *ConnectorDataCollectionIntegrationSuite) TestSharePointDataCollecti for _, test := range tests { suite.T().Run(test.name, func(t *testing.T) { - collections, err := sharepoint.DataCollections( + collections, excludes, err := sharepoint.DataCollections( ctx, graph.HTTPClient(graph.NoTimeout()), test.getSelector(), @@ -251,6 +254,8 @@ func (suite *ConnectorDataCollectionIntegrationSuite) TestSharePointDataCollecti connector, control.Options{}) require.NoError(t, err) + // Not expecting excludes as this isn't an incremental backup. + assert.Empty(t, excludes) for range collections { connector.incrementAwaitingMessages() @@ -320,9 +325,11 @@ func (suite *ConnectorCreateSharePointCollectionIntegrationSuite) TestCreateShar sel := selectors.NewSharePointBackup(siteIDs) sel.Include(sel.Libraries([]string{"foo"}, selectors.PrefixMatch())) - cols, err := gc.DataCollections(ctx, sel.Selector, nil, control.Options{}) + cols, excludes, err := gc.DataCollections(ctx, sel.Selector, nil, control.Options{}) require.NoError(t, err) assert.Len(t, cols, 1) + // No excludes yet as this isn't an incremental backup. + assert.Empty(t, excludes) for _, collection := range cols { t.Logf("Path: %s\n", collection.FullPath().String()) @@ -344,9 +351,11 @@ func (suite *ConnectorCreateSharePointCollectionIntegrationSuite) TestCreateShar sel := selectors.NewSharePointBackup(siteIDs) sel.Include(sel.Lists(selectors.Any(), selectors.PrefixMatch())) - cols, err := gc.DataCollections(ctx, sel.Selector, nil, control.Options{}) + cols, excludes, err := gc.DataCollections(ctx, sel.Selector, nil, control.Options{}) require.NoError(t, err) assert.Less(t, 0, len(cols)) + // No excludes yet as this isn't an incremental backup. + assert.Empty(t, excludes) for _, collection := range cols { t.Logf("Path: %s\n", collection.FullPath().String()) diff --git a/src/internal/connector/exchange/data_collections.go b/src/internal/connector/exchange/data_collections.go index 619d75cee..41bc16301 100644 --- a/src/internal/connector/exchange/data_collections.go +++ b/src/internal/connector/exchange/data_collections.go @@ -167,10 +167,10 @@ func DataCollections( acct account.M365Config, su support.StatusUpdater, ctrlOpts control.Options, -) ([]data.Collection, error) { +) ([]data.Collection, map[string]struct{}, error) { eb, err := selector.ToExchangeBackup() if err != nil { - return nil, errors.Wrap(err, "exchangeDataCollection: parsing selector") + return nil, nil, errors.Wrap(err, "exchangeDataCollection: parsing selector") } var ( @@ -181,7 +181,7 @@ func DataCollections( cdps, err := parseMetadataCollections(ctx, metadata) if err != nil { - return nil, err + return nil, nil, err } for _, scope := range eb.Scopes() { @@ -196,13 +196,15 @@ func DataCollections( ctrlOpts, su) if err != nil { - return nil, support.WrapAndAppend(user, err, errs) + return nil, nil, support.WrapAndAppend(user, err, errs) } collections = append(collections, dcs...) } - return collections, errs + // Exchange does not require adding items to the global exclude list so always + // return nil. + return collections, nil, errs } func getterByType(ac api.Client, category path.CategoryType) (addedAndRemovedItemIDsGetter, error) { diff --git a/src/internal/connector/graph_connector_test.go b/src/internal/connector/graph_connector_test.go index 1ed353be6..be1439c35 100644 --- a/src/internal/connector/graph_connector_test.go +++ b/src/internal/connector/graph_connector_test.go @@ -425,8 +425,10 @@ func runRestoreBackupTest( t.Logf("Selective backup of %s\n", backupSel) start = time.Now() - dcs, err := backupGC.DataCollections(ctx, backupSel, nil, control.Options{}) + dcs, excludes, err := backupGC.DataCollections(ctx, backupSel, nil, control.Options{}) require.NoError(t, err) + // No excludes yet because this isn't an incremental backup. + assert.Empty(t, excludes) t.Logf("Backup enumeration complete in %v\n", time.Since(start)) @@ -898,8 +900,10 @@ func (suite *GraphConnectorIntegrationSuite) TestMultiFolderBackupDifferentNames backupSel := backupSelectorForExpected(t, test.service, expectedDests) t.Log("Selective backup of", backupSel) - dcs, err := backupGC.DataCollections(ctx, backupSel, nil, control.Options{}) + dcs, excludes, err := backupGC.DataCollections(ctx, backupSel, nil, control.Options{}) require.NoError(t, err) + // No excludes yet because this isn't an incremental backup. + assert.Empty(t, excludes) t.Log("Backup enumeration complete") diff --git a/src/internal/connector/onedrive/collections.go b/src/internal/connector/onedrive/collections.go index 0813c13ed..f83ce342a 100644 --- a/src/internal/connector/onedrive/collections.go +++ b/src/internal/connector/onedrive/collections.go @@ -92,19 +92,20 @@ func NewCollections( } } -// Retrieves drive data as set of `data.Collections` -func (c *Collections) Get(ctx context.Context) ([]data.Collection, error) { +// Retrieves drive data as set of `data.Collections` and a set of item names to +// be excluded from the upcoming backup. +func (c *Collections) Get(ctx context.Context) ([]data.Collection, map[string]struct{}, error) { // Enumerate drives for the specified resourceOwner pager, err := PagerForSource(c.source, c.service, c.resourceOwner, nil) if err != nil { - return nil, err + return nil, nil, err } retry := c.source == OneDriveSource drives, err := drives(ctx, pager, retry) if err != nil { - return nil, err + return nil, nil, err } var ( @@ -133,7 +134,7 @@ func (c *Collections) Get(ctx context.Context) ([]data.Collection, error) { c.UpdateCollections, ) if err != nil { - return nil, err + return nil, nil, err } if len(delta) > 0 { @@ -185,7 +186,8 @@ func (c *Collections) Get(ctx context.Context) ([]data.Collection, error) { collections = append(collections, metadata) } - return collections, nil + // TODO(ashmrtn): Track and return the set of items to exclude. + return collections, nil, nil } // UpdateCollections initializes and adds the provided drive items to Collections diff --git a/src/internal/connector/onedrive/drive_test.go b/src/internal/connector/onedrive/drive_test.go index 66449a917..36fef30ab 100644 --- a/src/internal/connector/onedrive/drive_test.go +++ b/src/internal/connector/onedrive/drive_test.go @@ -454,7 +454,7 @@ func (suite *OneDriveSuite) TestOneDriveNewCollections() { scope := selectors. NewOneDriveBackup([]string{test.user}). AllData()[0] - odcs, err := NewCollections( + odcs, excludes, err := NewCollections( graph.HTTPClient(graph.NoTimeout()), creds.AzureTenantID, test.user, @@ -465,6 +465,8 @@ func (suite *OneDriveSuite) TestOneDriveNewCollections() { control.Options{}, ).Get(ctx) assert.NoError(t, err) + // Don't expect excludes as this isn't an incremental backup. + assert.Empty(t, excludes) for _, entry := range odcs { assert.NotEmpty(t, entry.FullPath()) diff --git a/src/internal/connector/sharepoint/data_collections.go b/src/internal/connector/sharepoint/data_collections.go index d64542271..6011c32a0 100644 --- a/src/internal/connector/sharepoint/data_collections.go +++ b/src/internal/connector/sharepoint/data_collections.go @@ -31,10 +31,10 @@ func DataCollections( serv graph.Servicer, su statusUpdater, ctrlOpts control.Options, -) ([]data.Collection, error) { +) ([]data.Collection, map[string]struct{}, error) { b, err := selector.ToSharePointBackup() if err != nil { - return nil, errors.Wrap(err, "sharePointDataCollection: parsing selector") + return nil, nil, errors.Wrap(err, "sharePointDataCollection: parsing selector") } var ( @@ -63,11 +63,11 @@ func DataCollections( su, ctrlOpts) if err != nil { - return nil, support.WrapAndAppend(site, err, errs) + return nil, nil, support.WrapAndAppend(site, err, errs) } case path.LibrariesCategory: - spcs, err = collectLibraries( + spcs, _, err = collectLibraries( ctx, itemClient, serv, @@ -77,7 +77,7 @@ func DataCollections( su, ctrlOpts) if err != nil { - return nil, support.WrapAndAppend(site, err, errs) + return nil, nil, support.WrapAndAppend(site, err, errs) } } @@ -85,7 +85,7 @@ func DataCollections( foldersComplete <- struct{}{} } - return collections, errs + return collections, nil, errs } func collectLists( @@ -134,7 +134,7 @@ func collectLibraries( scope selectors.SharePointScope, updater statusUpdater, ctrlOpts control.Options, -) ([]data.Collection, error) { +) ([]data.Collection, map[string]struct{}, error) { var ( collections = []data.Collection{} errs error @@ -152,12 +152,12 @@ func collectLibraries( updater.UpdateStatus, ctrlOpts) - odcs, err := colls.Get(ctx) + odcs, excludes, err := colls.Get(ctx) if err != nil { - return nil, support.WrapAndAppend(siteID, err, errs) + return nil, nil, support.WrapAndAppend(siteID, err, errs) } - return append(collections, odcs...), errs + return append(collections, odcs...), excludes, errs } type folderMatcher struct { diff --git a/src/internal/operations/backup.go b/src/internal/operations/backup.go index 7d6c14748..247f9859b 100644 --- a/src/internal/operations/backup.go +++ b/src/internal/operations/backup.go @@ -261,7 +261,11 @@ func produceBackupDataCollections( closer() }() - return gc.DataCollections(ctx, sel, metadata, ctrlOpts) + // TODO(ashmrtn): When we're ready to wire up the global exclude list return + // all values. + cols, _, errs := gc.DataCollections(ctx, sel, metadata, ctrlOpts) + + return cols, errs } // ---------------------------------------------------------------------------