From b82c994b91d9893117ef08836754b49d5a24bc16 Mon Sep 17 00:00:00 2001 From: Keepers Date: Thu, 23 Mar 2023 18:04:03 -0600 Subject: [PATCH] Always make empty collections for prefix directories (#2929) Use empty collections to ensure the prefix directories are available in kopia no matter what user data we find. This helps ensure we have some set of things we can assume about non-failed backups including: They have a valid snapshot ID for data They have a valid snapshot ID for backup details Eventually, this will allow us to say that backups using a base that doesn't have a prefix directory is invalid in some way SharePoint tests are disabled because lists and pages ignore the none target in selectors --- #### Does this PR need a docs update or release note? - [x] :no_entry: No #### Type of change - [x] :bug: Bugfix #### Issue(s) * #2550 #### Test Plan - [x] :zap: Unit test - [x] :green_heart: E2E --- .../connector/data_collections_test.go | 24 ++-- .../connector/exchange/data_collections.go | 19 +++ src/internal/connector/graph/collections.go | 91 ++++++++++++ .../connector/graph_connector_test.go | 131 ++++++++++++++++++ .../connector/onedrive/data_collections.go | 20 +++ .../connector/sharepoint/data_collections.go | 19 +++ 6 files changed, 294 insertions(+), 10 deletions(-) create mode 100644 src/internal/connector/graph/collections.go diff --git a/src/internal/connector/data_collections_test.go b/src/internal/connector/data_collections_test.go index 87b28e2ae..8a4c18f39 100644 --- a/src/internal/connector/data_collections_test.go +++ b/src/internal/connector/data_collections_test.go @@ -117,9 +117,9 @@ func (suite *ConnectorDataCollectionIntegrationSuite) TestExchangeDataCollection } // Categories with delta endpoints will produce a collection for metadata - // as well as the actual data pulled. + // as well as the actual data pulled, and the "temp" root collection. assert.GreaterOrEqual(t, len(collections), 1, "expected 1 <= num collections <= 2") - assert.GreaterOrEqual(t, 2, len(collections), "expected 1 <= num collections <= 2") + assert.GreaterOrEqual(t, 3, len(collections), "expected 1 <= num collections <= 3") for _, col := range collections { for object := range col.Items(ctx, fault.New(true)) { @@ -343,17 +343,21 @@ func (suite *ConnectorCreateSharePointCollectionIntegrationSuite) TestCreateShar control.Options{}, fault.New(true)) require.NoError(t, err, clues.ToCore(err)) - assert.Len(t, cols, 1) + require.Len(t, cols, 2) // 1 collection, 1 path prefix directory to ensure the root path exists. // No excludes yet as this isn't an incremental backup. assert.Empty(t, excludes) - for _, collection := range cols { - t.Logf("Path: %s\n", collection.FullPath().String()) - assert.Equal( - t, - path.SharePointMetadataService.String(), - collection.FullPath().Service().String()) - } + t.Logf("cols[0] Path: %s\n", cols[0].FullPath().String()) + assert.Equal( + t, + path.SharePointMetadataService.String(), + cols[0].FullPath().Service().String()) + + t.Logf("cols[1] Path: %s\n", cols[1].FullPath().String()) + assert.Equal( + t, + path.SharePointService.String(), + cols[1].FullPath().Service().String()) } func (suite *ConnectorCreateSharePointCollectionIntegrationSuite) TestCreateSharePointCollection_Lists() { diff --git a/src/internal/connector/exchange/data_collections.go b/src/internal/connector/exchange/data_collections.go index 24037db7b..ef20f3e59 100644 --- a/src/internal/connector/exchange/data_collections.go +++ b/src/internal/connector/exchange/data_collections.go @@ -179,6 +179,7 @@ func DataCollections( user = selector.DiscreteOwner collections = []data.BackupCollection{} el = errs.Local() + categories = map[path.CategoryType]struct{}{} ) cdps, err := parseMetadataCollections(ctx, metadata, errs) @@ -205,9 +206,27 @@ func DataCollections( continue } + categories[scope.Category().PathType()] = struct{}{} + collections = append(collections, dcs...) } + if len(collections) > 0 { + baseCols, err := graph.BaseCollections( + ctx, + acct.AzureTenantID, + user, + path.ExchangeService, + categories, + su, + errs) + if err != nil { + return nil, nil, err + } + + collections = append(collections, baseCols...) + } + return collections, nil, el.Failure() } diff --git a/src/internal/connector/graph/collections.go b/src/internal/connector/graph/collections.go new file mode 100644 index 000000000..b57ca5b38 --- /dev/null +++ b/src/internal/connector/graph/collections.go @@ -0,0 +1,91 @@ +package graph + +import ( + "context" + + "github.com/alcionai/clues" + + "github.com/alcionai/corso/src/internal/connector/support" + "github.com/alcionai/corso/src/internal/data" + "github.com/alcionai/corso/src/pkg/fault" + "github.com/alcionai/corso/src/pkg/path" +) + +var _ data.BackupCollection = emptyCollection{} + +type emptyCollection struct { + p path.Path + su support.StatusUpdater +} + +func (c emptyCollection) Items(ctx context.Context, _ *fault.Bus) <-chan data.Stream { + res := make(chan data.Stream) + close(res) + + s := support.CreateStatus(ctx, support.Backup, 0, support.CollectionMetrics{}, "") + c.su(s) + + return res +} + +func (c emptyCollection) FullPath() path.Path { + return c.p +} + +func (c emptyCollection) PreviousPath() path.Path { + return c.p +} + +func (c emptyCollection) State() data.CollectionState { + // This assumes we won't change the prefix path. Could probably use MovedState + // as well if we do need to change things around. + return data.NotMovedState +} + +func (c emptyCollection) DoNotMergeItems() bool { + return false +} + +func BaseCollections( + ctx context.Context, + tenant, rOwner string, + service path.ServiceType, + categories map[path.CategoryType]struct{}, + su support.StatusUpdater, + errs *fault.Bus, +) ([]data.BackupCollection, error) { + var ( + res = []data.BackupCollection{} + el = errs.Local() + lastErr error + ) + + for cat := range categories { + ictx := clues.Add(ctx, "base_service", service, "base_category", cat) + + p, err := path.Build(tenant, rOwner, service, cat, false, "tmp") + if err != nil { + // Shouldn't happen. + err = clues.Wrap(err, "making path").WithClues(ictx) + el.AddRecoverable(err) + lastErr = err + + continue + } + + // Pop off the last path element because we just want the prefix. + p, err = p.Dir() + if err != nil { + // Shouldn't happen. + err = clues.Wrap(err, "getting base prefix").WithClues(ictx) + el.AddRecoverable(err) + lastErr = err + + continue + } + + res = append(res, emptyCollection{p: p, su: su}) + } + + return res, lastErr +} diff --git a/src/internal/connector/graph_connector_test.go b/src/internal/connector/graph_connector_test.go index 3702abb0a..7b0e21ce8 100644 --- a/src/internal/connector/graph_connector_test.go +++ b/src/internal/connector/graph_connector_test.go @@ -1054,3 +1054,134 @@ func (suite *GraphConnectorIntegrationSuite) TestRestoreAndBackup_largeMailAttac }, ) } + +func (suite *GraphConnectorIntegrationSuite) TestBackup_CreatesPrefixCollections() { + table := []struct { + name string + resource resource + selectorFunc func(t *testing.T) selectors.Selector + service path.ServiceType + categories []string + }{ + { + name: "Exchange", + resource: Users, + selectorFunc: func(t *testing.T) selectors.Selector { + sel := selectors.NewExchangeBackup([]string{suite.user}) + sel.Include( + sel.ContactFolders([]string{selectors.NoneTgt}), + sel.EventCalendars([]string{selectors.NoneTgt}), + sel.MailFolders([]string{selectors.NoneTgt}), + ) + + return sel.Selector + }, + service: path.ExchangeService, + categories: []string{ + path.EmailCategory.String(), + path.ContactsCategory.String(), + path.EventsCategory.String(), + }, + }, + { + name: "OneDrive", + resource: Users, + selectorFunc: func(t *testing.T) selectors.Selector { + sel := selectors.NewOneDriveBackup([]string{suite.user}) + sel.Include( + sel.Folders([]string{selectors.NoneTgt}), + ) + + return sel.Selector + }, + service: path.OneDriveService, + categories: []string{ + path.FilesCategory.String(), + }, + }, + // SharePoint lists and pages don't seem to check selectors as expected. + //{ + // name: "SharePoint", + // resource: Sites, + // selectorFunc: func(t *testing.T) selectors.Selector { + // sel := selectors.NewSharePointBackup([]string{tester.M365SiteID(t)}) + // sel.Include( + // sel.Pages([]string{selectors.NoneTgt}), + // sel.Lists([]string{selectors.NoneTgt}), + // sel.Libraries([]string{selectors.NoneTgt}), + // ) + + // return sel.Selector + // }, + // service: path.SharePointService, + // categories: []string{ + // path.PagesCategory.String(), + // path.ListsCategory.String(), + // path.LibrariesCategory.String(), + // }, + //}, + } + + for _, test := range table { + suite.Run(test.name, func() { + ctx, flush := tester.NewContext() + defer flush() + + var ( + t = suite.T() + backupGC = loadConnector(ctx, t, graph.HTTPClient(graph.NoTimeout()), test.resource) + backupSel = test.selectorFunc(t) + errs = fault.New(true) + start = time.Now() + ) + + dcs, excludes, err := backupGC.DataCollections( + ctx, + backupSel, + nil, + control.Options{ + RestorePermissions: false, + ToggleFeatures: control.Toggles{EnablePermissionsBackup: false}, + }, + fault.New(true)) + require.NoError(t, err) + // No excludes yet because this isn't an incremental backup. + assert.Empty(t, excludes) + + t.Logf("Backup enumeration complete in %v\n", time.Since(start)) + + // Use a map to find duplicates. + foundCategories := []string{} + for _, col := range dcs { + // TODO(ashmrtn): We should be able to remove the below if we change how + // status updates are done. Ideally we shouldn't have to fetch items in + // these collections to avoid deadlocking. + var found int + + // Need to iterate through this before the continue below else we'll + // hang checking the status. + for range col.Items(ctx, errs) { + found++ + } + + // Ignore metadata collections. + fullPath := col.FullPath() + if fullPath.Service() != test.service { + continue + } + + assert.Empty(t, fullPath.Folders(), "non-prefix collection") + assert.NotEqual(t, col.State(), data.NewState, "prefix collection marked as new") + foundCategories = append(foundCategories, fullPath.Category().String()) + + assert.Zero(t, found, "non-empty collection") + } + + assert.ElementsMatch(t, test.categories, foundCategories) + + backupGC.AwaitStatus() + + assert.NoError(t, errs.Failure()) + }) + } +} diff --git a/src/internal/connector/onedrive/data_collections.go b/src/internal/connector/onedrive/data_collections.go index ec0315aa9..8da845503 100644 --- a/src/internal/connector/onedrive/data_collections.go +++ b/src/internal/connector/onedrive/data_collections.go @@ -11,6 +11,7 @@ import ( "github.com/alcionai/corso/src/pkg/control" "github.com/alcionai/corso/src/pkg/fault" "github.com/alcionai/corso/src/pkg/logger" + "github.com/alcionai/corso/src/pkg/path" "github.com/alcionai/corso/src/pkg/selectors" "golang.org/x/exp/maps" ) @@ -48,6 +49,7 @@ func DataCollections( var ( el = errs.Local() user = selector.DiscreteOwner + categories = map[path.CategoryType]struct{}{} collections = []data.BackupCollection{} allExcludes = map[string]map[string]struct{}{} ) @@ -75,6 +77,8 @@ func DataCollections( el.AddRecoverable(clues.Stack(err).Label(fault.LabelForceNoBackupCreation)) } + categories[scope.Category().PathType()] = struct{}{} + collections = append(collections, odcs...) for k, ex := range excludes { @@ -86,5 +90,21 @@ func DataCollections( } } + if len(collections) > 0 { + baseCols, err := graph.BaseCollections( + ctx, + tenant, + user, + path.OneDriveService, + categories, + su, + errs) + if err != nil { + return nil, nil, err + } + + collections = append(collections, baseCols...) + } + return collections, allExcludes, el.Failure() } diff --git a/src/internal/connector/sharepoint/data_collections.go b/src/internal/connector/sharepoint/data_collections.go index 83fa9ede3..9ee099c37 100644 --- a/src/internal/connector/sharepoint/data_collections.go +++ b/src/internal/connector/sharepoint/data_collections.go @@ -47,6 +47,7 @@ func DataCollections( el = errs.Local() site = b.DiscreteOwner collections = []data.BackupCollection{} + categories = map[path.CategoryType]struct{}{} ) for _, scope := range b.Scopes() { @@ -110,6 +111,24 @@ func DataCollections( collections = append(collections, spcs...) foldersComplete <- struct{}{} + + categories[scope.Category().PathType()] = struct{}{} + } + + if len(collections) > 0 { + baseCols, err := graph.BaseCollections( + ctx, + creds.AzureTenantID, + site, + path.SharePointService, + categories, + su.UpdateStatus, + errs) + if err != nil { + return nil, nil, err + } + + collections = append(collections, baseCols...) } return collections, nil, el.Failure()