diff --git a/src/internal/connector/data_collections_test.go b/src/internal/connector/data_collections_test.go index 87b28e2ae..8a4c18f39 100644 --- a/src/internal/connector/data_collections_test.go +++ b/src/internal/connector/data_collections_test.go @@ -117,9 +117,9 @@ func (suite *ConnectorDataCollectionIntegrationSuite) TestExchangeDataCollection } // Categories with delta endpoints will produce a collection for metadata - // as well as the actual data pulled. + // as well as the actual data pulled, and the "temp" root collection. assert.GreaterOrEqual(t, len(collections), 1, "expected 1 <= num collections <= 2") - assert.GreaterOrEqual(t, 2, len(collections), "expected 1 <= num collections <= 2") + assert.GreaterOrEqual(t, 3, len(collections), "expected 1 <= num collections <= 3") for _, col := range collections { for object := range col.Items(ctx, fault.New(true)) { @@ -343,17 +343,21 @@ func (suite *ConnectorCreateSharePointCollectionIntegrationSuite) TestCreateShar control.Options{}, fault.New(true)) require.NoError(t, err, clues.ToCore(err)) - assert.Len(t, cols, 1) + require.Len(t, cols, 2) // 1 collection, 1 path prefix directory to ensure the root path exists. // No excludes yet as this isn't an incremental backup. assert.Empty(t, excludes) - for _, collection := range cols { - t.Logf("Path: %s\n", collection.FullPath().String()) - assert.Equal( - t, - path.SharePointMetadataService.String(), - collection.FullPath().Service().String()) - } + t.Logf("cols[0] Path: %s\n", cols[0].FullPath().String()) + assert.Equal( + t, + path.SharePointMetadataService.String(), + cols[0].FullPath().Service().String()) + + t.Logf("cols[1] Path: %s\n", cols[1].FullPath().String()) + assert.Equal( + t, + path.SharePointService.String(), + cols[1].FullPath().Service().String()) } func (suite *ConnectorCreateSharePointCollectionIntegrationSuite) TestCreateSharePointCollection_Lists() { diff --git a/src/internal/connector/exchange/data_collections.go b/src/internal/connector/exchange/data_collections.go index 24037db7b..ef20f3e59 100644 --- a/src/internal/connector/exchange/data_collections.go +++ b/src/internal/connector/exchange/data_collections.go @@ -179,6 +179,7 @@ func DataCollections( user = selector.DiscreteOwner collections = []data.BackupCollection{} el = errs.Local() + categories = map[path.CategoryType]struct{}{} ) cdps, err := parseMetadataCollections(ctx, metadata, errs) @@ -205,9 +206,27 @@ func DataCollections( continue } + categories[scope.Category().PathType()] = struct{}{} + collections = append(collections, dcs...) } + if len(collections) > 0 { + baseCols, err := graph.BaseCollections( + ctx, + acct.AzureTenantID, + user, + path.ExchangeService, + categories, + su, + errs) + if err != nil { + return nil, nil, err + } + + collections = append(collections, baseCols...) + } + return collections, nil, el.Failure() } diff --git a/src/internal/connector/graph/collections.go b/src/internal/connector/graph/collections.go new file mode 100644 index 000000000..b57ca5b38 --- /dev/null +++ b/src/internal/connector/graph/collections.go @@ -0,0 +1,91 @@ +package graph + +import ( + "context" + + "github.com/alcionai/clues" + + "github.com/alcionai/corso/src/internal/connector/support" + "github.com/alcionai/corso/src/internal/data" + "github.com/alcionai/corso/src/pkg/fault" + "github.com/alcionai/corso/src/pkg/path" +) + +var _ data.BackupCollection = emptyCollection{} + +type emptyCollection struct { + p path.Path + su support.StatusUpdater +} + +func (c emptyCollection) Items(ctx context.Context, _ *fault.Bus) <-chan data.Stream { + res := make(chan data.Stream) + close(res) + + s := support.CreateStatus(ctx, support.Backup, 0, support.CollectionMetrics{}, "") + c.su(s) + + return res +} + +func (c emptyCollection) FullPath() path.Path { + return c.p +} + +func (c emptyCollection) PreviousPath() path.Path { + return c.p +} + +func (c emptyCollection) State() data.CollectionState { + // This assumes we won't change the prefix path. Could probably use MovedState + // as well if we do need to change things around. + return data.NotMovedState +} + +func (c emptyCollection) DoNotMergeItems() bool { + return false +} + +func BaseCollections( + ctx context.Context, + tenant, rOwner string, + service path.ServiceType, + categories map[path.CategoryType]struct{}, + su support.StatusUpdater, + errs *fault.Bus, +) ([]data.BackupCollection, error) { + var ( + res = []data.BackupCollection{} + el = errs.Local() + lastErr error + ) + + for cat := range categories { + ictx := clues.Add(ctx, "base_service", service, "base_category", cat) + + p, err := path.Build(tenant, rOwner, service, cat, false, "tmp") + if err != nil { + // Shouldn't happen. + err = clues.Wrap(err, "making path").WithClues(ictx) + el.AddRecoverable(err) + lastErr = err + + continue + } + + // Pop off the last path element because we just want the prefix. + p, err = p.Dir() + if err != nil { + // Shouldn't happen. + err = clues.Wrap(err, "getting base prefix").WithClues(ictx) + el.AddRecoverable(err) + lastErr = err + + continue + } + + res = append(res, emptyCollection{p: p, su: su}) + } + + return res, lastErr +} diff --git a/src/internal/connector/graph_connector_test.go b/src/internal/connector/graph_connector_test.go index 3702abb0a..7b0e21ce8 100644 --- a/src/internal/connector/graph_connector_test.go +++ b/src/internal/connector/graph_connector_test.go @@ -1054,3 +1054,134 @@ func (suite *GraphConnectorIntegrationSuite) TestRestoreAndBackup_largeMailAttac }, ) } + +func (suite *GraphConnectorIntegrationSuite) TestBackup_CreatesPrefixCollections() { + table := []struct { + name string + resource resource + selectorFunc func(t *testing.T) selectors.Selector + service path.ServiceType + categories []string + }{ + { + name: "Exchange", + resource: Users, + selectorFunc: func(t *testing.T) selectors.Selector { + sel := selectors.NewExchangeBackup([]string{suite.user}) + sel.Include( + sel.ContactFolders([]string{selectors.NoneTgt}), + sel.EventCalendars([]string{selectors.NoneTgt}), + sel.MailFolders([]string{selectors.NoneTgt}), + ) + + return sel.Selector + }, + service: path.ExchangeService, + categories: []string{ + path.EmailCategory.String(), + path.ContactsCategory.String(), + path.EventsCategory.String(), + }, + }, + { + name: "OneDrive", + resource: Users, + selectorFunc: func(t *testing.T) selectors.Selector { + sel := selectors.NewOneDriveBackup([]string{suite.user}) + sel.Include( + sel.Folders([]string{selectors.NoneTgt}), + ) + + return sel.Selector + }, + service: path.OneDriveService, + categories: []string{ + path.FilesCategory.String(), + }, + }, + // SharePoint lists and pages don't seem to check selectors as expected. + //{ + // name: "SharePoint", + // resource: Sites, + // selectorFunc: func(t *testing.T) selectors.Selector { + // sel := selectors.NewSharePointBackup([]string{tester.M365SiteID(t)}) + // sel.Include( + // sel.Pages([]string{selectors.NoneTgt}), + // sel.Lists([]string{selectors.NoneTgt}), + // sel.Libraries([]string{selectors.NoneTgt}), + // ) + + // return sel.Selector + // }, + // service: path.SharePointService, + // categories: []string{ + // path.PagesCategory.String(), + // path.ListsCategory.String(), + // path.LibrariesCategory.String(), + // }, + //}, + } + + for _, test := range table { + suite.Run(test.name, func() { + ctx, flush := tester.NewContext() + defer flush() + + var ( + t = suite.T() + backupGC = loadConnector(ctx, t, graph.HTTPClient(graph.NoTimeout()), test.resource) + backupSel = test.selectorFunc(t) + errs = fault.New(true) + start = time.Now() + ) + + dcs, excludes, err := backupGC.DataCollections( + ctx, + backupSel, + nil, + control.Options{ + RestorePermissions: false, + ToggleFeatures: control.Toggles{EnablePermissionsBackup: false}, + }, + fault.New(true)) + require.NoError(t, err) + // No excludes yet because this isn't an incremental backup. + assert.Empty(t, excludes) + + t.Logf("Backup enumeration complete in %v\n", time.Since(start)) + + // Use a map to find duplicates. + foundCategories := []string{} + for _, col := range dcs { + // TODO(ashmrtn): We should be able to remove the below if we change how + // status updates are done. Ideally we shouldn't have to fetch items in + // these collections to avoid deadlocking. + var found int + + // Need to iterate through this before the continue below else we'll + // hang checking the status. + for range col.Items(ctx, errs) { + found++ + } + + // Ignore metadata collections. + fullPath := col.FullPath() + if fullPath.Service() != test.service { + continue + } + + assert.Empty(t, fullPath.Folders(), "non-prefix collection") + assert.NotEqual(t, col.State(), data.NewState, "prefix collection marked as new") + foundCategories = append(foundCategories, fullPath.Category().String()) + + assert.Zero(t, found, "non-empty collection") + } + + assert.ElementsMatch(t, test.categories, foundCategories) + + backupGC.AwaitStatus() + + assert.NoError(t, errs.Failure()) + }) + } +} diff --git a/src/internal/connector/onedrive/data_collections.go b/src/internal/connector/onedrive/data_collections.go index ec0315aa9..8da845503 100644 --- a/src/internal/connector/onedrive/data_collections.go +++ b/src/internal/connector/onedrive/data_collections.go @@ -11,6 +11,7 @@ import ( "github.com/alcionai/corso/src/pkg/control" "github.com/alcionai/corso/src/pkg/fault" "github.com/alcionai/corso/src/pkg/logger" + "github.com/alcionai/corso/src/pkg/path" "github.com/alcionai/corso/src/pkg/selectors" "golang.org/x/exp/maps" ) @@ -48,6 +49,7 @@ func DataCollections( var ( el = errs.Local() user = selector.DiscreteOwner + categories = map[path.CategoryType]struct{}{} collections = []data.BackupCollection{} allExcludes = map[string]map[string]struct{}{} ) @@ -75,6 +77,8 @@ func DataCollections( el.AddRecoverable(clues.Stack(err).Label(fault.LabelForceNoBackupCreation)) } + categories[scope.Category().PathType()] = struct{}{} + collections = append(collections, odcs...) for k, ex := range excludes { @@ -86,5 +90,21 @@ func DataCollections( } } + if len(collections) > 0 { + baseCols, err := graph.BaseCollections( + ctx, + tenant, + user, + path.OneDriveService, + categories, + su, + errs) + if err != nil { + return nil, nil, err + } + + collections = append(collections, baseCols...) + } + return collections, allExcludes, el.Failure() } diff --git a/src/internal/connector/sharepoint/data_collections.go b/src/internal/connector/sharepoint/data_collections.go index 83fa9ede3..9ee099c37 100644 --- a/src/internal/connector/sharepoint/data_collections.go +++ b/src/internal/connector/sharepoint/data_collections.go @@ -47,6 +47,7 @@ func DataCollections( el = errs.Local() site = b.DiscreteOwner collections = []data.BackupCollection{} + categories = map[path.CategoryType]struct{}{} ) for _, scope := range b.Scopes() { @@ -110,6 +111,24 @@ func DataCollections( collections = append(collections, spcs...) foldersComplete <- struct{}{} + + categories[scope.Category().PathType()] = struct{}{} + } + + if len(collections) > 0 { + baseCols, err := graph.BaseCollections( + ctx, + creds.AzureTenantID, + site, + path.SharePointService, + categories, + su.UpdateStatus, + errs) + if err != nil { + return nil, nil, err + } + + collections = append(collections, baseCols...) } return collections, nil, el.Failure()