From 6aff258c8bb8f4f74be9e2328711b0bb162b6282 Mon Sep 17 00:00:00 2001 From: Keepers Date: Tue, 28 Nov 2023 14:07:49 -0700 Subject: [PATCH] add boilerplate for drive tree processing (#4716) begins implementation of the drive delta tree support by adding boilerplate funcs for getting the backup data, and adding a framework of unit tests that will be used bring testing parity up to par with current tests. --- #### Does this PR need a docs update or release note? - [x] :no_entry: No #### Issue(s) * #4689 #### Test Plan - [x] :zap: Unit test --- src/internal/data/implementations.go | 17 + .../m365/collection/drive/collections.go | 94 +- .../m365/collection/drive/collections_test.go | 1016 +---------------- .../m365/collection/drive/collections_tree.go | 326 ++++++ .../collection/drive/collections_tree_test.go | 510 +++++++++ src/internal/m365/collection/drive/limiter.go | 95 ++ .../m365/collection/drive/limiter_test.go | 1010 ++++++++++++++++ .../m365/service/onedrive/mock/handlers.go | 15 +- src/pkg/count/testdata/count.go | 30 + 9 files changed, 2054 insertions(+), 1059 deletions(-) create mode 100644 src/internal/m365/collection/drive/collections_tree.go create mode 100644 src/internal/m365/collection/drive/collections_tree_test.go create mode 100644 src/internal/m365/collection/drive/limiter.go create mode 100644 src/internal/m365/collection/drive/limiter_test.go create mode 100644 src/pkg/count/testdata/count.go diff --git a/src/internal/data/implementations.go b/src/internal/data/implementations.go index f497d51dd..771633eee 100644 --- a/src/internal/data/implementations.go +++ b/src/internal/data/implementations.go @@ -25,6 +25,23 @@ const ( DeletedState CollectionState = 3 ) +func (cs CollectionState) String() string { + s := "Unknown State" + + switch cs { + case 0: + s = "New" + case 1: + s = "Not Moved" + case 2: + s = "Moved" + case 3: + s = "Deleted" + } + + return s +} + type FetchRestoreCollection struct { Collection FetchItemByNamer diff --git a/src/internal/m365/collection/drive/collections.go b/src/internal/m365/collection/drive/collections.go index d501519c5..1c199025e 100644 --- a/src/internal/m365/collection/drive/collections.go +++ b/src/internal/m365/collection/drive/collections.go @@ -291,6 +291,17 @@ func (c *Collections) Get( ssmb *prefixmatcher.StringSetMatchBuilder, errs *fault.Bus, ) ([]data.BackupCollection, bool, error) { + if c.ctrl.ToggleFeatures.UseDeltaTree { + _, _, err := c.getTree(ctx, prevMetadata, ssmb, errs) + if err != nil { + return nil, false, clues.Wrap(err, "processing backup using tree") + } + + return nil, + false, + clues.New("forced error: cannot run tree-based backup: incomplete implementation") + } + deltasByDriveID, prevPathsByDriveID, canUsePrevBackup, err := deserializeAndValidateMetadata( ctx, prevMetadata, @@ -750,87 +761,6 @@ func (c *Collections) getCollectionPath( return collectionPath, nil } -type driveEnumerationStats struct { - numPages int - numAddedFiles int - numContainers int - numBytes int64 -} - -func newPagerLimiter(opts control.Options) *pagerLimiter { - res := &pagerLimiter{limits: opts.PreviewLimits} - - if res.limits.MaxContainers == 0 { - res.limits.MaxContainers = defaultPreviewMaxContainers - } - - if res.limits.MaxItemsPerContainer == 0 { - res.limits.MaxItemsPerContainer = defaultPreviewMaxItemsPerContainer - } - - if res.limits.MaxItems == 0 { - res.limits.MaxItems = defaultPreviewMaxItems - } - - if res.limits.MaxBytes == 0 { - res.limits.MaxBytes = defaultPreviewMaxBytes - } - - if res.limits.MaxPages == 0 { - res.limits.MaxPages = defaultPreviewMaxPages - } - - return res -} - -type pagerLimiter struct { - limits control.PreviewItemLimits -} - -func (l pagerLimiter) effectiveLimits() control.PreviewItemLimits { - return l.limits -} - -func (l pagerLimiter) enabled() bool { - return l.limits.Enabled -} - -// sizeLimit returns the total number of bytes this backup should try to -// contain. -func (l pagerLimiter) sizeLimit() int64 { - return l.limits.MaxBytes -} - -// atItemLimit returns true if the limiter is enabled and has reached the limit -// for individual items added to collections for this backup. -func (l pagerLimiter) atItemLimit(stats *driveEnumerationStats) bool { - return l.enabled() && - (stats.numAddedFiles >= l.limits.MaxItems || - stats.numBytes >= l.limits.MaxBytes) -} - -// atContainerItemsLimit returns true if the limiter is enabled and the current -// number of items is above the limit for the number of items for a container -// for this backup. -func (l pagerLimiter) atContainerItemsLimit(numItems int) bool { - return l.enabled() && numItems >= l.limits.MaxItemsPerContainer -} - -// atContainerPageLimit returns true if the limiter is enabled and the number of -// pages processed so far is beyond the limit for this backup. -func (l pagerLimiter) atPageLimit(stats *driveEnumerationStats) bool { - return l.enabled() && stats.numPages >= l.limits.MaxPages -} - -// atLimit returns true if the limiter is enabled and meets any of the -// conditions for max items, containers, etc for this backup. -func (l pagerLimiter) atLimit(stats *driveEnumerationStats) bool { - return l.enabled() && - (l.atItemLimit(stats) || - stats.numContainers >= l.limits.MaxContainers || - stats.numPages >= l.limits.MaxPages) -} - // PopulateDriveCollections initializes and adds the provided drive items to Collections // A new collection is created for every drive folder. // Along with populating the collection items and updating the excluded item IDs, this func @@ -926,7 +856,7 @@ func (c *Collections) PopulateDriveCollections( // Don't check for containers we've already seen. if _, ok := c.CollectionMap[driveID][id]; !ok { if id != lastContainerID { - if limiter.atLimit(stats) { + if limiter.atLimit(stats, ignoreMe) { break } diff --git a/src/internal/m365/collection/drive/collections_test.go b/src/internal/m365/collection/drive/collections_test.go index 919b0c869..077eedc4d 100644 --- a/src/internal/m365/collection/drive/collections_test.go +++ b/src/internal/m365/collection/drive/collections_test.go @@ -260,6 +260,12 @@ func parent(driveID any, elems ...string) string { elems...)...) } +// just for readability +const ( + doMergeItems = true + doNotMergeItems = false +) + // common item names const ( bar = "bar" @@ -1671,11 +1677,44 @@ func (suite *CollectionsUnitSuite) TestDeserializeMetadata_ReadFailure() { fc := failingColl{} - _, _, canUsePreviousBackup, err := deserializeAndValidateMetadata(ctx, []data.RestoreCollection{fc}, count.New(), fault.New(true)) + _, _, canUsePreviousBackup, err := deserializeAndValidateMetadata( + ctx, + []data.RestoreCollection{fc}, + count.New(), + fault.New(true)) require.NoError(t, err) require.False(t, canUsePreviousBackup) } +func (suite *CollectionsUnitSuite) TestGet_treeCannotBeUsedWhileIncomplete() { + t := suite.T() + + ctx, flush := tester.NewContext(t) + defer flush() + + drive := models.NewDrive() + drive.SetId(ptr.To("id")) + drive.SetName(ptr.To("name")) + + mbh := mock.DefaultOneDriveBH(user) + opts := control.DefaultOptions() + opts.ToggleFeatures.UseDeltaTree = true + + mockDrivePager := &apiMock.Pager[models.Driveable]{ + ToReturn: []apiMock.PagerResult[models.Driveable]{ + {Values: []models.Driveable{drive}}, + }, + } + + mbh.DrivePagerV = mockDrivePager + + c := collWithMBH(mbh) + c.ctrl = opts + + _, _, err := c.Get(ctx, nil, nil, fault.New(true)) + require.ErrorContains(t, err, "not yet implemented", clues.ToCore(err)) +} + func (suite *CollectionsUnitSuite) TestGet() { metadataPath, err := path.BuildMetadata( tenant, @@ -3564,981 +3603,6 @@ func (suite *CollectionsUnitSuite) TestGet() { } } -// TestGet_PreviewLimits checks that the limits set for preview backups in -// control.Options.ItemLimits are respected. These tests run a reduced set of -// checks that don't examine metadata, collection states, etc. They really just -// check the expected items appear. -func (suite *CollectionsUnitSuite) TestGet_PreviewLimits() { - metadataPath, err := path.BuildMetadata( - tenant, - user, - path.OneDriveService, - path.FilesCategory, - false) - require.NoError(suite.T(), err, "making metadata path", clues.ToCore(err)) - - drive1 := models.NewDrive() - drive1.SetId(ptr.To(idx(drive, 1))) - drive1.SetName(ptr.To(namex(drive, 1))) - - drive2 := models.NewDrive() - drive2.SetId(ptr.To(idx(drive, 2))) - drive2.SetName(ptr.To(namex(drive, 2))) - - table := []struct { - name string - limits control.PreviewItemLimits - drives []models.Driveable - enumerator mock.EnumerateItemsDeltaByDrive - // Collection name -> set of item IDs. We can't check item data because - // that's not mocked out. Metadata is checked separately. - expectedCollections map[string][]string - }{ - { - name: "OneDrive SinglePage ExcludeItemsOverMaxSize", - limits: control.PreviewItemLimits{ - Enabled: true, - MaxItems: 999, - MaxItemsPerContainer: 999, - MaxContainers: 999, - MaxBytes: 5, - MaxPages: 999, - }, - drives: []models.Driveable{drive1}, - enumerator: mock.EnumerateItemsDeltaByDrive{ - DrivePagers: map[string]*mock.DriveItemsDeltaPager{ - idx(drive, 1): { - Pages: []mock.NextPage{{ - Items: []models.DriveItemable{ - driveRootItem(rootID), // will be present, not needed - driveItemWithSize(idx(file, 1), namex(file, 1), parent(1), rootID, 7, isFile), - driveItemWithSize(idx(file, 2), namex(file, 2), parent(1), rootID, 1, isFile), - driveItemWithSize(idx(file, 3), namex(file, 3), parent(1), rootID, 1, isFile), - }, - }}, - DeltaUpdate: pagers.DeltaUpdate{URL: id(delta)}, - }, - }, - }, - expectedCollections: map[string][]string{ - fullPath(1): {idx(file, 2), idx(file, 3)}, - }, - }, - { - name: "OneDrive SinglePage SingleFolder ExcludeCombinedItemsOverMaxSize", - limits: control.PreviewItemLimits{ - Enabled: true, - MaxItems: 999, - MaxItemsPerContainer: 999, - MaxContainers: 999, - MaxBytes: 3, - MaxPages: 999, - }, - drives: []models.Driveable{drive1}, - enumerator: mock.EnumerateItemsDeltaByDrive{ - DrivePagers: map[string]*mock.DriveItemsDeltaPager{ - idx(drive, 1): { - Pages: []mock.NextPage{{ - Items: []models.DriveItemable{ - driveRootItem(rootID), // will be present, not needed - driveItemWithSize(idx(file, 1), namex(file, 1), parent(1), rootID, 1, isFile), - driveItemWithSize(idx(file, 2), namex(file, 2), parent(1), rootID, 2, isFile), - driveItemWithSize(idx(file, 3), namex(file, 3), parent(1), rootID, 1, isFile), - }, - }}, - DeltaUpdate: pagers.DeltaUpdate{URL: id(delta)}, - }, - }, - }, - expectedCollections: map[string][]string{ - fullPath(1): {idx(file, 1), idx(file, 2)}, - }, - }, - { - name: "OneDrive SinglePage MultipleFolders ExcludeCombinedItemsOverMaxSize", - limits: control.PreviewItemLimits{ - Enabled: true, - MaxItems: 999, - MaxItemsPerContainer: 999, - MaxContainers: 999, - MaxBytes: 3, - MaxPages: 999, - }, - drives: []models.Driveable{drive1}, - enumerator: mock.EnumerateItemsDeltaByDrive{ - DrivePagers: map[string]*mock.DriveItemsDeltaPager{ - idx(drive, 1): { - Pages: []mock.NextPage{{ - Items: []models.DriveItemable{ - driveRootItem(rootID), // will be present, not needed - driveItemWithSize(idx(file, 1), namex(file, 1), parent(1), rootID, 1, isFile), - driveItemWithSize(idx(folder, 1), namex(folder, 1), parent(1), rootID, 1, isFolder), - driveItemWithSize(idx(file, 2), namex(file, 2), parent(1, namex(folder, 1)), idx(folder, 1), 2, isFile), - driveItemWithSize(idx(file, 3), namex(file, 3), parent(1, namex(folder, 1)), idx(folder, 1), 1, isFile), - }, - }}, - DeltaUpdate: pagers.DeltaUpdate{URL: id(delta)}, - }, - }, - }, - expectedCollections: map[string][]string{ - fullPath(1): {idx(file, 1)}, - fullPath(1, namex(folder, 1)): {idx(folder, 1), idx(file, 2)}, - }, - }, - { - name: "OneDrive SinglePage SingleFolder ItemLimit", - limits: control.PreviewItemLimits{ - Enabled: true, - MaxItems: 3, - MaxItemsPerContainer: 999, - MaxContainers: 999, - MaxBytes: 999999, - MaxPages: 999, - }, - drives: []models.Driveable{drive1}, - enumerator: mock.EnumerateItemsDeltaByDrive{ - DrivePagers: map[string]*mock.DriveItemsDeltaPager{ - idx(drive, 1): { - Pages: []mock.NextPage{{ - Items: []models.DriveItemable{ - driveRootItem(rootID), // will be present, not needed - driveItem(idx(file, 1), namex(file, 1), parent(1), rootID, isFile), - driveItem(idx(file, 2), namex(file, 2), parent(1), rootID, isFile), - driveItem(idx(file, 3), namex(file, 3), parent(1), rootID, isFile), - driveItem(idx(file, 4), namex(file, 4), parent(1), rootID, isFile), - driveItem(idx(file, 5), namex(file, 5), parent(1), rootID, isFile), - driveItem(idx(file, 6), namex(file, 6), parent(1), rootID, isFile), - }, - }}, - DeltaUpdate: pagers.DeltaUpdate{URL: id(delta)}, - }, - }, - }, - expectedCollections: map[string][]string{ - fullPath(1): {idx(file, 1), idx(file, 2), idx(file, 3)}, - }, - }, - { - name: "OneDrive MultiplePages MultipleFolders ItemLimit WithRepeatedItem", - limits: control.PreviewItemLimits{ - Enabled: true, - MaxItems: 3, - MaxItemsPerContainer: 999, - MaxContainers: 999, - MaxBytes: 999999, - MaxPages: 999, - }, - drives: []models.Driveable{drive1}, - enumerator: mock.EnumerateItemsDeltaByDrive{ - DrivePagers: map[string]*mock.DriveItemsDeltaPager{ - idx(drive, 1): { - Pages: []mock.NextPage{ - { - Items: []models.DriveItemable{ - driveRootItem(rootID), // will be present, not needed - driveItem(idx(file, 1), namex(file, 1), parent(1), rootID, isFile), - driveItem(idx(file, 2), namex(file, 2), parent(1), rootID, isFile), - }, - }, - { - Items: []models.DriveItemable{ - // Repeated items shouldn't count against the limit. - driveItem(idx(file, 1), namex(file, 1), parent(1), rootID, isFile), - driveItem(idx(folder, 1), namex(folder, 1), parent(1), rootID, isFolder), - driveItem(idx(file, 3), namex(file, 3), parent(1, namex(folder, 1)), idx(folder, 1), isFile), - driveItem(idx(file, 4), namex(file, 4), parent(1, namex(folder, 1)), idx(folder, 1), isFile), - driveItem(idx(file, 5), namex(file, 5), parent(1, namex(folder, 1)), idx(folder, 1), isFile), - driveItem(idx(file, 6), namex(file, 6), parent(1, namex(folder, 1)), idx(folder, 1), isFile), - }, - }, - }, - DeltaUpdate: pagers.DeltaUpdate{URL: id(delta)}, - }, - }, - }, - expectedCollections: map[string][]string{ - fullPath(1): {idx(file, 1), idx(file, 2)}, - fullPath(1, namex(folder, 1)): {idx(folder, 1), idx(file, 3)}, - }, - }, - { - name: "OneDrive MultiplePages PageLimit", - limits: control.PreviewItemLimits{ - Enabled: true, - MaxItems: 999, - MaxItemsPerContainer: 999, - MaxContainers: 999, - MaxBytes: 999999, - MaxPages: 1, - }, - drives: []models.Driveable{drive1}, - enumerator: mock.EnumerateItemsDeltaByDrive{ - DrivePagers: map[string]*mock.DriveItemsDeltaPager{ - idx(drive, 1): { - Pages: []mock.NextPage{ - { - Items: []models.DriveItemable{ - driveRootItem(rootID), // will be present, not needed - driveItem(idx(file, 1), namex(file, 1), parent(1), rootID, isFile), - driveItem(idx(file, 2), namex(file, 2), parent(1), rootID, isFile), - }, - }, - { - Items: []models.DriveItemable{ - driveRootItem(rootID), // will be present, not needed - driveItem(idx(folder, 1), namex(folder, 1), parent(1), rootID, isFolder), - driveItem(idx(file, 3), namex(file, 3), parent(1, namex(folder, 1)), idx(folder, 1), isFile), - driveItem(idx(file, 4), namex(file, 4), parent(1, namex(folder, 1)), idx(folder, 1), isFile), - driveItem(idx(file, 5), namex(file, 5), parent(1, namex(folder, 1)), idx(folder, 1), isFile), - driveItem(idx(file, 6), namex(file, 6), parent(1, namex(folder, 1)), idx(folder, 1), isFile), - }, - }, - }, - DeltaUpdate: pagers.DeltaUpdate{URL: id(delta)}, - }, - }, - }, - expectedCollections: map[string][]string{ - fullPath(1): {idx(file, 1), idx(file, 2)}, - }, - }, - { - name: "OneDrive MultiplePages PerContainerItemLimit", - limits: control.PreviewItemLimits{ - Enabled: true, - MaxItems: 999, - MaxItemsPerContainer: 1, - MaxContainers: 999, - MaxBytes: 999999, - MaxPages: 999, - }, - drives: []models.Driveable{drive1}, - enumerator: mock.EnumerateItemsDeltaByDrive{ - DrivePagers: map[string]*mock.DriveItemsDeltaPager{ - idx(drive, 1): { - Pages: []mock.NextPage{ - { - Items: []models.DriveItemable{ - driveRootItem(rootID), // will be present, not needed - driveItem(idx(file, 1), namex(file, 1), parent(1), rootID, isFile), - driveItem(idx(file, 2), namex(file, 2), parent(1), rootID, isFile), - driveItem(idx(file, 3), namex(file, 3), parent(1), rootID, isFile), - }, - }, - { - Items: []models.DriveItemable{ - driveRootItem(rootID), // will be present, not needed - driveItem(idx(folder, 1), namex(folder, 1), parent(1), rootID, isFolder), - driveItem(idx(file, 4), namex(file, 4), parent(1, namex(folder, 1)), idx(folder, 1), isFile), - driveItem(idx(file, 5), namex(file, 5), parent(1, namex(folder, 1)), idx(folder, 1), isFile), - }, - }, - }, - DeltaUpdate: pagers.DeltaUpdate{URL: id(delta)}, - }, - }, - }, - expectedCollections: map[string][]string{ - // Root has an additional item. It's hard to fix that in the code - // though. - fullPath(1): {idx(file, 1), idx(file, 2)}, - fullPath(1, namex(folder, 1)): {idx(folder, 1), idx(file, 4)}, - }, - }, - { - name: "OneDrive MultiplePages PerContainerItemLimit ItemUpdated", - limits: control.PreviewItemLimits{ - Enabled: true, - MaxItems: 999, - MaxItemsPerContainer: 3, - MaxContainers: 999, - MaxBytes: 999999, - MaxPages: 999, - }, - drives: []models.Driveable{drive1}, - enumerator: mock.EnumerateItemsDeltaByDrive{ - DrivePagers: map[string]*mock.DriveItemsDeltaPager{ - idx(drive, 1): { - Pages: []mock.NextPage{ - { - Items: []models.DriveItemable{ - driveRootItem(rootID), // will be present, not needed - driveItem(idx(folder, 0), namex(folder, 0), parent(1), rootID, isFolder), - driveItem(idx(file, 1), namex(file, 1), parent(1, namex(folder, 0)), idx(folder, 0), isFile), - driveItem(idx(file, 2), namex(file, 2), parent(1, namex(folder, 0)), idx(folder, 0), isFile), - }, - }, - { - Items: []models.DriveItemable{ - driveRootItem(rootID), // will be present, not needed - driveItem(idx(folder, 0), namex(folder, 0), parent(1), rootID, isFolder), - // Updated item that shouldn't count against the limit a second time. - driveItem(idx(file, 2), namex(file, 2), parent(1, namex(folder, 0)), idx(folder, 0), isFile), - driveItem(idx(file, 3), namex(file, 3), parent(1, namex(folder, 0)), idx(folder, 0), isFile), - driveItem(idx(file, 4), namex(file, 4), parent(1, namex(folder, 0)), idx(folder, 0), isFile), - }, - }, - }, - DeltaUpdate: pagers.DeltaUpdate{URL: id(delta)}, - }, - }, - }, - expectedCollections: map[string][]string{ - fullPath(1): {}, - fullPath(1, namex(folder, 0)): {idx(folder, 0), idx(file, 1), idx(file, 2), idx(file, 3)}, - }, - }, - { - name: "OneDrive MultiplePages PerContainerItemLimit MoveItemBetweenFolders", - limits: control.PreviewItemLimits{ - Enabled: true, - MaxItems: 999, - MaxItemsPerContainer: 2, - MaxContainers: 999, - MaxBytes: 999999, - MaxPages: 999, - }, - drives: []models.Driveable{drive1}, - enumerator: mock.EnumerateItemsDeltaByDrive{ - DrivePagers: map[string]*mock.DriveItemsDeltaPager{ - idx(drive, 1): { - Pages: []mock.NextPage{ - { - Items: []models.DriveItemable{ - driveRootItem(rootID), // will be present, not needed - driveItem(idx(file, 1), namex(file, 1), parent(1), rootID, isFile), - driveItem(idx(file, 2), namex(file, 2), parent(1), rootID, isFile), - // Put folder 0 at limit. - driveItem(idx(folder, 0), namex(folder, 0), parent(1), rootID, isFolder), - driveItem(idx(file, 3), namex(file, 3), parent(1, namex(folder, 0)), idx(folder, 0), isFile), - driveItem(idx(file, 4), namex(file, 4), parent(1, namex(folder, 0)), idx(folder, 0), isFile), - }, - }, - { - Items: []models.DriveItemable{ - driveRootItem(rootID), // will be present, not needed - driveItem(idx(folder, 0), namex(folder, 0), parent(1), rootID, isFolder), - // Try to move item from root to folder 0 which is already at the limit. - driveItem(idx(file, 1), namex(file, 1), parent(1, namex(folder, 0)), idx(folder, 0), isFile), - }, - }, - }, - DeltaUpdate: pagers.DeltaUpdate{URL: id(delta)}, - }, - }, - }, - expectedCollections: map[string][]string{ - fullPath(1): {idx(file, 1), idx(file, 2)}, - fullPath(1, namex(folder, 0)): {idx(folder, 0), idx(file, 3), idx(file, 4)}, - }, - }, - { - name: "OneDrive MultiplePages ContainerLimit LastContainerSplitAcrossPages", - limits: control.PreviewItemLimits{ - Enabled: true, - MaxItems: 999, - MaxItemsPerContainer: 999, - MaxContainers: 2, - MaxBytes: 999999, - MaxPages: 999, - }, - drives: []models.Driveable{drive1}, - enumerator: mock.EnumerateItemsDeltaByDrive{ - DrivePagers: map[string]*mock.DriveItemsDeltaPager{ - idx(drive, 1): { - Pages: []mock.NextPage{ - { - Items: []models.DriveItemable{ - driveRootItem(rootID), // will be present, not needed - driveItem(idx(file, 1), namex(file, 1), parent(1), rootID, isFile), - driveItem(idx(file, 2), namex(file, 2), parent(1), rootID, isFile), - driveItem(idx(file, 3), namex(file, 3), parent(1), rootID, isFile), - }, - }, - { - Items: []models.DriveItemable{ - driveRootItem(rootID), // will be present, not needed - driveItem(idx(folder, 1), namex(folder, 1), parent(1), rootID, isFolder), - driveItem(idx(file, 4), namex(file, 4), parent(1, namex(folder, 1)), idx(folder, 1), isFile), - }, - }, - { - Items: []models.DriveItemable{ - driveRootItem(rootID), // will be present, not needed - driveItem(idx(folder, 1), namex(folder, 1), parent(1), rootID, isFolder), - driveItem(idx(file, 5), namex(file, 5), parent(1, namex(folder, 1)), idx(folder, 1), isFile), - }, - }, - }, - DeltaUpdate: pagers.DeltaUpdate{URL: id(delta)}, - }, - }, - }, - expectedCollections: map[string][]string{ - fullPath(1): {idx(file, 1), idx(file, 2), idx(file, 3)}, - fullPath(1, namex(folder, 1)): {idx(folder, 1), idx(file, 4), idx(file, 5)}, - }, - }, - { - name: "OneDrive MultiplePages ContainerLimit NextContainerOnSamePage", - limits: control.PreviewItemLimits{ - Enabled: true, - MaxItems: 999, - MaxItemsPerContainer: 999, - MaxContainers: 2, - MaxBytes: 999999, - MaxPages: 999, - }, - drives: []models.Driveable{drive1}, - enumerator: mock.EnumerateItemsDeltaByDrive{ - DrivePagers: map[string]*mock.DriveItemsDeltaPager{ - idx(drive, 1): { - Pages: []mock.NextPage{ - { - Items: []models.DriveItemable{ - driveRootItem(rootID), // will be present, not needed - driveItem(idx(file, 1), namex(file, 1), parent(1), rootID, isFile), - driveItem(idx(file, 2), namex(file, 2), parent(1), rootID, isFile), - driveItem(idx(file, 3), namex(file, 3), parent(1), rootID, isFile), - }, - }, - { - Items: []models.DriveItemable{ - driveRootItem(rootID), // will be present, not needed - driveItem(idx(folder, 1), namex(folder, 1), parent(1), rootID, isFolder), - driveItem(idx(file, 4), namex(file, 4), parent(1, namex(folder, 1)), idx(folder, 1), isFile), - driveItem(idx(file, 5), namex(file, 5), parent(1, namex(folder, 1)), idx(folder, 1), isFile), - // This container shouldn't be returned. - driveItem(idx(folder, 2), namex(folder, 2), parent(1), rootID, isFolder), - driveItem(idx(file, 7), namex(file, 7), parent(1, namex(folder, 2)), idx(folder, 2), isFile), - driveItem(idx(file, 8), namex(file, 8), parent(1, namex(folder, 2)), idx(folder, 2), isFile), - driveItem(idx(file, 9), namex(file, 9), parent(1, namex(folder, 2)), idx(folder, 2), isFile), - }, - }, - }, - DeltaUpdate: pagers.DeltaUpdate{URL: id(delta)}, - }, - }, - }, - expectedCollections: map[string][]string{ - fullPath(1): {idx(file, 1), idx(file, 2), idx(file, 3)}, - fullPath(1, namex(folder, 1)): {idx(folder, 1), idx(file, 4), idx(file, 5)}, - }, - }, - { - name: "OneDrive MultiplePages ContainerLimit NextContainerOnNextPage", - limits: control.PreviewItemLimits{ - Enabled: true, - MaxItems: 999, - MaxItemsPerContainer: 999, - MaxContainers: 2, - MaxBytes: 999999, - MaxPages: 999, - }, - drives: []models.Driveable{drive1}, - enumerator: mock.EnumerateItemsDeltaByDrive{ - DrivePagers: map[string]*mock.DriveItemsDeltaPager{ - idx(drive, 1): { - Pages: []mock.NextPage{ - { - Items: []models.DriveItemable{ - driveRootItem(rootID), // will be present, not needed - driveItem(idx(file, 1), namex(file, 1), parent(1), rootID, isFile), - driveItem(idx(file, 2), namex(file, 2), parent(1), rootID, isFile), - driveItem(idx(file, 3), namex(file, 3), parent(1), rootID, isFile), - }, - }, - { - Items: []models.DriveItemable{ - driveRootItem(rootID), // will be present, not needed - driveItem(idx(folder, 1), namex(folder, 1), parent(1), rootID, isFolder), - driveItem(idx(file, 4), namex(file, 4), parent(1, namex(folder, 1)), idx(folder, 1), isFile), - driveItem(idx(file, 5), namex(file, 5), parent(1, namex(folder, 1)), idx(folder, 1), isFile), - }, - }, - { - Items: []models.DriveItemable{ - driveRootItem(rootID), // will be present, not needed - // This container shouldn't be returned. - driveItem(idx(folder, 2), namex(folder, 2), parent(1), rootID, isFolder), - driveItem(idx(file, 7), namex(file, 7), parent(1, namex(folder, 2)), idx(folder, 2), isFile), - driveItem(idx(file, 8), namex(file, 8), parent(1, namex(folder, 2)), idx(folder, 2), isFile), - driveItem(idx(file, 9), namex(file, 9), parent(1, namex(folder, 2)), idx(folder, 2), isFile), - }, - }, - }, - DeltaUpdate: pagers.DeltaUpdate{URL: id(delta)}, - }, - }, - }, - expectedCollections: map[string][]string{ - fullPath(1): {idx(file, 1), idx(file, 2), idx(file, 3)}, - fullPath(1, namex(folder, 1)): {idx(folder, 1), idx(file, 4), idx(file, 5)}, - }, - }, - { - name: "TwoDrives SeparateLimitAccounting", - limits: control.PreviewItemLimits{ - Enabled: true, - MaxItems: 3, - MaxItemsPerContainer: 999, - MaxContainers: 999, - MaxBytes: 999999, - MaxPages: 999, - }, - drives: []models.Driveable{drive1, drive2}, - enumerator: mock.EnumerateItemsDeltaByDrive{ - DrivePagers: map[string]*mock.DriveItemsDeltaPager{ - idx(drive, 1): { - Pages: []mock.NextPage{ - { - Items: []models.DriveItemable{ - driveRootItem(rootID), // will be present, not needed - driveItem(idx(file, 1), namex(file, 1), parent(1), rootID, isFile), - driveItem(idx(file, 2), namex(file, 2), parent(1), rootID, isFile), - driveItem(idx(file, 3), namex(file, 3), parent(1), rootID, isFile), - driveItem(idx(file, 4), namex(file, 4), parent(1), rootID, isFile), - driveItem(idx(file, 5), namex(file, 5), parent(1), rootID, isFile), - }, - }, - }, - DeltaUpdate: pagers.DeltaUpdate{URL: id(delta)}, - }, - idx(drive, 2): { - Pages: []mock.NextPage{ - { - Items: []models.DriveItemable{ - driveRootItem(rootID), // will be present, not needed - driveItem(idx(file, 1), namex(file, 1), parent(2), rootID, isFile), - driveItem(idx(file, 2), namex(file, 2), parent(2), rootID, isFile), - driveItem(idx(file, 3), namex(file, 3), parent(2), rootID, isFile), - driveItem(idx(file, 4), namex(file, 4), parent(2), rootID, isFile), - driveItem(idx(file, 5), namex(file, 5), parent(2), rootID, isFile), - }, - }, - }, - DeltaUpdate: pagers.DeltaUpdate{URL: id(delta)}, - }, - }, - }, - expectedCollections: map[string][]string{ - fullPath(1): {idx(file, 1), idx(file, 2), idx(file, 3)}, - fullPath(2): {idx(file, 1), idx(file, 2), idx(file, 3)}, - }, - }, - { - name: "OneDrive PreviewDisabled MinimumLimitsIgnored", - limits: control.PreviewItemLimits{ - MaxItems: 1, - MaxItemsPerContainer: 1, - MaxContainers: 1, - MaxBytes: 1, - MaxPages: 1, - }, - drives: []models.Driveable{drive1}, - enumerator: mock.EnumerateItemsDeltaByDrive{ - DrivePagers: map[string]*mock.DriveItemsDeltaPager{ - idx(drive, 1): { - Pages: []mock.NextPage{ - { - Items: []models.DriveItemable{ - driveRootItem(rootID), // will be present, not needed - driveItem(idx(file, 1), namex(file, 1), parent(1), rootID, isFile), - driveItem(idx(file, 2), namex(file, 2), parent(1), rootID, isFile), - driveItem(idx(file, 3), namex(file, 3), parent(1), rootID, isFile), - }, - }, - { - Items: []models.DriveItemable{ - driveRootItem(rootID), // will be present, not needed - driveItem(idx(folder, 1), namex(folder, 1), parent(1), rootID, isFolder), - driveItem(idx(file, 4), namex(file, 4), parent(1, namex(folder, 1)), idx(folder, 1), isFile), - }, - }, - { - Items: []models.DriveItemable{ - driveRootItem(rootID), // will be present, not needed - driveItem(idx(folder, 1), namex(folder, 1), parent(1), rootID, isFolder), - driveItem(idx(file, 5), namex(file, 5), parent(1, namex(folder, 1)), idx(folder, 1), isFile), - }, - }, - }, - DeltaUpdate: pagers.DeltaUpdate{URL: id(delta)}, - }, - }, - }, - expectedCollections: map[string][]string{ - fullPath(1): {idx(file, 1), idx(file, 2), idx(file, 3)}, - fullPath(1, namex(folder, 1)): {idx(folder, 1), idx(file, 4), idx(file, 5)}, - }, - }, - } - for _, test := range table { - suite.Run(test.name, func() { - t := suite.T() - - ctx, flush := tester.NewContext(t) - defer flush() - - mockDrivePager := &apiMock.Pager[models.Driveable]{ - ToReturn: []apiMock.PagerResult[models.Driveable]{ - {Values: test.drives}, - }, - } - - mbh := mock.DefaultOneDriveBH(user) - mbh.DrivePagerV = mockDrivePager - mbh.DriveItemEnumeration = test.enumerator - - opts := control.DefaultOptions() - opts.PreviewLimits = test.limits - - c := NewCollections( - mbh, - tenant, - idname.NewProvider(user, user), - func(*support.ControllerOperationStatus) {}, - opts, - count.New()) - - errs := fault.New(true) - - delList := prefixmatcher.NewStringSetBuilder() - - cols, canUsePreviousBackup, err := c.Get(ctx, nil, delList, errs) - require.NoError(t, err, clues.ToCore(err)) - - assert.True(t, canUsePreviousBackup, "can use previous backup") - assert.Empty(t, errs.Skipped()) - - collPaths := []string{} - - for _, baseCol := range cols { - // There shouldn't be any deleted collections. - if !assert.NotEqual( - t, - data.DeletedState, - baseCol.State(), - "collection marked deleted") { - continue - } - - folderPath := baseCol.FullPath().String() - - if folderPath == metadataPath.String() { - continue - } - - collPaths = append(collPaths, folderPath) - - // TODO: We should really be getting items in the collection - // via the Items() channel. The lack of that makes this check a bit more - // bittle since internal details can change. The wiring to support - // mocked GetItems is available. We just haven't plugged it in yet. - col, ok := baseCol.(*Collection) - require.True(t, ok, "getting onedrive.Collection handle") - - itemIDs := make([]string, 0, len(col.driveItems)) - - for id := range col.driveItems { - itemIDs = append(itemIDs, id) - } - - assert.ElementsMatchf( - t, - test.expectedCollections[folderPath], - itemIDs, - "expected elements to match in collection with path %q", - folderPath) - } - - assert.ElementsMatch( - t, - maps.Keys(test.expectedCollections), - collPaths, - "collection paths") - }) - } -} - -// TestGet_PreviewLimits_Defaults checks that default values are used when -// making a preview backup if the user didn't provide some options. -// These tests run a reduced set of checks that really just look for item counts -// and such. Other tests are expected to provide more comprehensive checks. -func (suite *CollectionsUnitSuite) TestGet_PreviewLimits_Defaults() { - // Add a check that will fail if we make the default smaller than expected. - require.LessOrEqual( - suite.T(), - int64(1024*1024), - defaultPreviewMaxBytes, - "default number of bytes changed; DefaultNumBytes test case may need updating!") - require.Zero( - suite.T(), - defaultPreviewMaxBytes%(1024*1024), - "default number of bytes isn't divisible by 1MB; DefaultNumBytes test case may need updating!") - - // The number of pages returned can be indirectly tested by checking how many - // containers/items were returned. - type expected struct { - numItems int - numContainers int - numItemsPerContainer int - } - - metadataPath, err := path.BuildMetadata( - tenant, - user, - path.OneDriveService, - path.FilesCategory, - false) - require.NoError(suite.T(), err, "making metadata path", clues.ToCore(err)) - - drive1 := models.NewDrive() - drive1.SetId(ptr.To(idx(drive, 1))) - drive1.SetName(ptr.To(namex(drive, 1))) - - // The number of pages the test generates can be controlled by setting the - // number of containers. The test will put one (non-root) container per page. - table := []struct { - name string - numContainers int - numItemsPerContainer int - itemSize int64 - limits control.PreviewItemLimits - expect expected - }{ - { - name: "DefaultNumItems", - numContainers: 1, - numItemsPerContainer: defaultPreviewMaxItems + 1, - limits: control.PreviewItemLimits{ - Enabled: true, - MaxItemsPerContainer: 99999999, - MaxContainers: 99999999, - MaxBytes: 99999999, - MaxPages: 99999999, - }, - expect: expected{ - numItems: defaultPreviewMaxItems, - numContainers: 1, - numItemsPerContainer: defaultPreviewMaxItems, - }, - }, - { - name: "DefaultNumContainers", - numContainers: defaultPreviewMaxContainers + 1, - numItemsPerContainer: 1, - limits: control.PreviewItemLimits{ - Enabled: true, - MaxItems: 99999999, - MaxItemsPerContainer: 99999999, - MaxBytes: 99999999, - MaxPages: 99999999, - }, - expect: expected{ - // Root is counted as a container in the code but won't be counted or - // have items in the test. - numItems: defaultPreviewMaxContainers - 1, - numContainers: defaultPreviewMaxContainers - 1, - numItemsPerContainer: 1, - }, - }, - { - name: "DefaultNumItemsPerContainer", - numContainers: 1, - numItemsPerContainer: defaultPreviewMaxItemsPerContainer + 1, - limits: control.PreviewItemLimits{ - Enabled: true, - MaxItems: 99999999, - MaxContainers: 99999999, - MaxBytes: 99999999, - MaxPages: 99999999, - }, - expect: expected{ - numItems: defaultPreviewMaxItemsPerContainer, - numContainers: 1, - numItemsPerContainer: defaultPreviewMaxItemsPerContainer, - }, - }, - { - name: "DefaultNumPages", - numContainers: defaultPreviewMaxPages + 1, - numItemsPerContainer: 1, - limits: control.PreviewItemLimits{ - Enabled: true, - MaxItems: 99999999, - MaxContainers: 99999999, - MaxItemsPerContainer: 99999999, - MaxBytes: 99999999, - }, - expect: expected{ - numItems: defaultPreviewMaxPages, - numContainers: defaultPreviewMaxPages, - numItemsPerContainer: 1, - }, - }, - { - name: "DefaultNumBytes", - numContainers: 1, - numItemsPerContainer: int(defaultPreviewMaxBytes/1024/1024) + 1, - itemSize: 1024 * 1024, - limits: control.PreviewItemLimits{ - Enabled: true, - MaxItems: 99999999, - MaxContainers: 99999999, - MaxItemsPerContainer: 99999999, - MaxPages: 99999999, - }, - expect: expected{ - numItems: int(defaultPreviewMaxBytes) / 1024 / 1024, - numContainers: 1, - numItemsPerContainer: int(defaultPreviewMaxBytes) / 1024 / 1024, - }, - }, - } - for _, test := range table { - suite.Run(test.name, func() { - t := suite.T() - - ctx, flush := tester.NewContext(t) - defer flush() - - mockDrivePager := &apiMock.Pager[models.Driveable]{ - ToReturn: []apiMock.PagerResult[models.Driveable]{ - {Values: []models.Driveable{drive1}}, - }, - } - - mbh := mock.DefaultOneDriveBH(user) - mbh.DrivePagerV = mockDrivePager - - pages := make([]mock.NextPage, 0, test.numContainers) - - for containerIdx := 0; containerIdx < test.numContainers; containerIdx++ { - page := mock.NextPage{ - Items: []models.DriveItemable{ - driveRootItem(rootID), - driveItem( - idx(folder, containerIdx), - namex(folder, containerIdx), - parent(1), - rootID, - isFolder), - }, - } - - for itemIdx := 0; itemIdx < test.numItemsPerContainer; itemIdx++ { - itemSuffix := fmt.Sprintf("%d-%d", containerIdx, itemIdx) - - page.Items = append(page.Items, driveItemWithSize( - idx(file, itemSuffix), - namex(file, itemSuffix), - parent(1, namex(folder, containerIdx)), - idx(folder, containerIdx), - test.itemSize, - isFile)) - } - - pages = append(pages, page) - } - - mbh.DriveItemEnumeration = mock.EnumerateItemsDeltaByDrive{ - DrivePagers: map[string]*mock.DriveItemsDeltaPager{ - idx(drive, 1): { - Pages: pages, - DeltaUpdate: pagers.DeltaUpdate{URL: id(delta)}, - }, - }, - } - - opts := control.DefaultOptions() - opts.PreviewLimits = test.limits - - c := NewCollections( - mbh, - tenant, - idname.NewProvider(user, user), - func(*support.ControllerOperationStatus) {}, - opts, - count.New()) - - errs := fault.New(true) - - delList := prefixmatcher.NewStringSetBuilder() - - cols, canUsePreviousBackup, err := c.Get(ctx, nil, delList, errs) - require.NoError(t, err, clues.ToCore(err)) - - assert.True(t, canUsePreviousBackup, "can use previous backup") - assert.Empty(t, errs.Skipped()) - - var ( - numContainers int - numItems int - ) - - for _, baseCol := range cols { - // There shouldn't be any deleted collections. - if !assert.NotEqual( - t, - data.DeletedState, - baseCol.State(), - "collection marked deleted") { - continue - } - - folderPath := baseCol.FullPath().String() - - if folderPath == metadataPath.String() { - continue - } - - // Skip the root container and don't count it because we don't put - // anything in it. - dp, err := path.ToDrivePath(baseCol.FullPath()) - require.NoError(t, err, clues.ToCore(err)) - - if len(dp.Folders) == 0 { - continue - } - - numContainers++ - - // TODO: We should really be getting items in the collection - // via the Items() channel. The lack of that makes this check a bit more - // bittle since internal details can change. The wiring to support - // mocked GetItems is available. We just haven't plugged it in yet. - col, ok := baseCol.(*Collection) - require.True(t, ok, "getting onedrive.Collection handle") - - numItems += len(col.driveItems) - - // Add one to account for the folder permissions item. - assert.Len( - t, - col.driveItems, - test.expect.numItemsPerContainer+1, - "items in container %v", - col.FullPath()) - } - - assert.Equal( - t, - test.expect.numContainers, - numContainers, - "total containers") - - // Each container also gets an item so account for that here. - assert.Equal( - t, - test.expect.numItems+test.expect.numContainers, - numItems, - "total items across all containers") - }) - } -} - func (suite *CollectionsUnitSuite) TestAddURLCacheToDriveCollections() { drive1 := models.NewDrive() drive1.SetId(ptr.To(idx(drive, 1))) diff --git a/src/internal/m365/collection/drive/collections_tree.go b/src/internal/m365/collection/drive/collections_tree.go new file mode 100644 index 000000000..c28064c11 --- /dev/null +++ b/src/internal/m365/collection/drive/collections_tree.go @@ -0,0 +1,326 @@ +package drive + +import ( + "context" + + "github.com/alcionai/clues" + "github.com/microsoftgraph/msgraph-sdk-go/models" + + "github.com/alcionai/corso/src/internal/common/prefixmatcher" + "github.com/alcionai/corso/src/internal/common/ptr" + "github.com/alcionai/corso/src/internal/data" + bupMD "github.com/alcionai/corso/src/pkg/backup/metadata" + "github.com/alcionai/corso/src/pkg/count" + "github.com/alcionai/corso/src/pkg/fault" + "github.com/alcionai/corso/src/pkg/logger" + "github.com/alcionai/corso/src/pkg/services/m365/api" + "github.com/alcionai/corso/src/pkg/services/m365/api/graph" + "github.com/alcionai/corso/src/pkg/services/m365/api/pagers" +) + +// this file is used to separate the collections handling between the previous +// (list-based) design, and the in-progress (tree-based) redesign. +// see: https://github.com/alcionai/corso/issues/4688 + +func (c *Collections) getTree( + ctx context.Context, + prevMetadata []data.RestoreCollection, + ssmb *prefixmatcher.StringSetMatchBuilder, + errs *fault.Bus, +) ([]data.BackupCollection, bool, error) { + ctx = clues.AddTraceName(ctx, "GetTree") + + // extract the previous backup's metadata like: deltaToken urls and previousPath maps. + // We'll need these to reconstruct / ensure the correct state of the world, after + // enumerating through all the delta changes. + deltasByDriveID, prevPathsByDriveID, canUsePrevBackup, err := deserializeAndValidateMetadata( + ctx, + prevMetadata, + c.counter, + errs) + if err != nil { + return nil, false, err + } + + ctx = clues.Add(ctx, "can_use_previous_backup", canUsePrevBackup) + + // in sharepoint, it's possible to delete an entire drive. + // if we don't see a previously-existing drive in the drives enumeration, + // we assume it was deleted and will remove it from storage using a tombstone. + driveTombstones := map[string]struct{}{} + for driveID := range prevPathsByDriveID { + driveTombstones[driveID] = struct{}{} + } + + pager := c.handler.NewDrivePager(c.protectedResource.ID(), nil) + + drives, err := api.GetAllDrives(ctx, pager) + if err != nil { + return nil, false, err + } + + c.counter.Add(count.Drives, int64(len(drives))) + c.counter.Add(count.PrevDeltas, int64(len(deltasByDriveID))) + + var ( + el = errs.Local() + collections = []data.BackupCollection{} + driveIDToNewDeltaLink = map[string]string{} + driveIDToNewPrevPaths = map[string]map[string]string{} + ) + + // each drive owns its own delta history. We can't go more granular than that. + // so our first order of business is to enumerate each drive's delta data, and + // to use that as the basis for our backups. + for _, drv := range drives { + if el.Failure() != nil { + break + } + + var ( + driveID = ptr.Val(drv.GetId()) + cl = c.counter.Local() + ictx = clues.Add( + ctx, + "drive_id", driveID, + "drive_name", clues.Hide(ptr.Val(drv.GetName()))) + ) + + ictx = clues.AddLabelCounter(ictx, cl.PlainAdder()) + + // all the magic happens here. expecations are that this process will: + // - iterate over all data (new or delta, as needed) in the drive + // - condense that data into a set of collections to backup + // - stitch the new and previous path data into a new prevPaths map + // - report the latest delta token details + colls, newPrevPaths, du, err := c.makeDriveCollections( + ictx, + drv, + prevPathsByDriveID[driveID], + cl, + el.Local()) + if err != nil { + el.AddRecoverable(ictx, clues.Stack(err)) + continue + } + + // add all the freshly aggregated data into our results + collections = append(collections, colls...) + driveIDToNewPrevPaths[driveID] = newPrevPaths + driveIDToNewDeltaLink[driveID] = du.URL + + // this drive is still in use, so we'd better not delete it. + delete(driveTombstones, driveID) + } + + if el.Failure() != nil { + return nil, false, clues.Stack(el.Failure()) + } + + alertIfPrevPathsHaveCollisions(ctx, driveIDToNewPrevPaths, c.counter, errs) + + // clean up any drives that have been deleted since the last backup. + dts, err := c.makeDriveTombstones(ctx, driveTombstones, errs) + if err != nil { + return nil, false, clues.Stack(err) + } + + collections = append(collections, dts...) + + // persist our updated metadata for use on the next backup + colls := c.makeMetadataCollections( + ctx, + driveIDToNewDeltaLink, + driveIDToNewPrevPaths) + + collections = append(collections, colls...) + + logger.Ctx(ctx).Infow("produced collections", "count_collections", len(collections)) + + return collections, canUsePrevBackup, nil +} + +func (c *Collections) makeDriveCollections( + ctx context.Context, + d models.Driveable, + prevPaths map[string]string, + counter *count.Bus, + errs *fault.Bus, +) ([]data.BackupCollection, map[string]string, pagers.DeltaUpdate, error) { + cl := c.counter.Local() + + cl.Add(count.PrevPaths, int64(len(prevPaths))) + logger.Ctx(ctx).Infow( + "previous metadata for drive", + "count_old_prev_paths", len(prevPaths)) + + // TODO(keepers): leaving this code around for now as a guide + // while implementation progresses. + + // --- pager aggregation + + // du, newPrevPaths, err := c.PopulateDriveCollections( + // ctx, + // d, + // tree, + // cl.Local(), + // errs) + // if err != nil { + // return nil, false, clues.Stack(err) + // } + + // numDriveItems := c.NumItems - numPrevItems + // numPrevItems = c.NumItems + + // cl.Add(count.NewPrevPaths, int64(len(newPrevPaths))) + + // --- prev path incorporation + + // For both cases we don't need to do set difference on folder map if the + // delta token was valid because we should see all the changes. + // if !du.Reset { + // if len(excludedItemIDs) == 0 { + // continue + // } + + // p, err := c.handler.CanonicalPath(odConsts.DriveFolderPrefixBuilder(driveID), c.tenantID) + // if err != nil { + // return nil, false, clues.WrapWC(ictx, err, "making exclude prefix") + // } + + // ssmb.Add(p.String(), excludedItemIDs) + + // continue + // } + + // Set all folders in previous backup but not in the current one with state + // deleted. Need to compare by ID because it's possible to make new folders + // with the same path as deleted old folders. We shouldn't merge items or + // subtrees if that happens though. + + // --- post-processing + + // Attach an url cache to the drive if the number of discovered items is + // below the threshold. Attaching cache to larger drives can cause + // performance issues since cache delta queries start taking up majority of + // the hour the refreshed URLs are valid for. + + // if numDriveItems < urlCacheDriveItemThreshold { + // logger.Ctx(ictx).Infow( + // "adding url cache for drive", + // "num_drive_items", numDriveItems) + + // uc, err := newURLCache( + // driveID, + // prevDeltaLink, + // urlCacheRefreshInterval, + // c.handler, + // cl, + // errs) + // if err != nil { + // return nil, false, clues.Stack(err) + // } + + // // Set the URL cache instance for all collections in this drive. + // for id := range c.CollectionMap[driveID] { + // c.CollectionMap[driveID][id].urlCache = uc + // } + // } + + return nil, nil, pagers.DeltaUpdate{}, clues.New("not yet implemented") +} + +// quality-of-life wrapper that transforms each tombstone in the map +// into a backup collection that marks the backup as deleted. +func (c *Collections) makeDriveTombstones( + ctx context.Context, + driveTombstones map[string]struct{}, + errs *fault.Bus, +) ([]data.BackupCollection, error) { + c.counter.Add(count.DriveTombstones, int64(len(driveTombstones))) + + var ( + colls = make([]data.BackupCollection, 0, len(driveTombstones)) + el = errs.Local() + ) + + // generate tombstones for drives that were removed. + for driveID := range driveTombstones { + if el.Failure() != nil { + break + } + + prevDrivePath, err := c.handler.PathPrefix(c.tenantID, driveID) + if err != nil { + err = clues.WrapWC(ctx, err, "making drive tombstone for previous path").Label(count.BadPathPrefix) + el.AddRecoverable(ctx, err) + + continue + } + + // TODO: call NewTombstoneCollection + coll, err := NewCollection( + c.handler, + c.protectedResource, + nil, // delete the drive + prevDrivePath, + driveID, + c.statusUpdater, + c.ctrl, + false, + true, + nil, + c.counter.Local()) + if err != nil { + err = clues.WrapWC(ctx, err, "making drive tombstone") + el.AddRecoverable(ctx, err) + + continue + } + + colls = append(colls, coll) + } + + return colls, el.Failure() +} + +// quality-of-life wrapper that transforms the delta tokens and previous paths +// into a backup collections for persitence. +func (c *Collections) makeMetadataCollections( + ctx context.Context, + deltaTokens map[string]string, + prevPaths map[string]map[string]string, +) []data.BackupCollection { + colls := []data.BackupCollection{} + + pathPrefix, err := c.handler.MetadataPathPrefix(c.tenantID) + if err != nil { + logger.CtxErr(ctx, err).Info("making metadata collection path prefixes") + + // It's safe to return here because the logic for starting an + // incremental backup should eventually find that the metadata files are + // empty/missing and default to a full backup. + return colls + } + + entries := []graph.MetadataCollectionEntry{ + graph.NewMetadataEntry(bupMD.DeltaURLsFileName, deltaTokens), + graph.NewMetadataEntry(bupMD.PreviousPathFileName, prevPaths), + } + + md, err := graph.MakeMetadataCollection( + pathPrefix, + entries, + c.statusUpdater, + c.counter.Local()) + if err != nil { + logger.CtxErr(ctx, err).Info("making metadata collection for future incremental backups") + + // Technically it's safe to continue here because the logic for starting an + // incremental backup should eventually find that the metadata files are + // empty/missing and default to a full backup. + return colls + } + + return append(colls, md) +} diff --git a/src/internal/m365/collection/drive/collections_tree_test.go b/src/internal/m365/collection/drive/collections_tree_test.go new file mode 100644 index 000000000..3cb9b1fa3 --- /dev/null +++ b/src/internal/m365/collection/drive/collections_tree_test.go @@ -0,0 +1,510 @@ +package drive + +import ( + "testing" + + "github.com/alcionai/clues" + "github.com/microsoftgraph/msgraph-sdk-go/models" + "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/require" + "github.com/stretchr/testify/suite" + + "github.com/alcionai/corso/src/internal/common/idname" + "github.com/alcionai/corso/src/internal/common/prefixmatcher" + "github.com/alcionai/corso/src/internal/common/ptr" + "github.com/alcionai/corso/src/internal/data" + dataMock "github.com/alcionai/corso/src/internal/data/mock" + "github.com/alcionai/corso/src/internal/m365/service/onedrive/mock" + "github.com/alcionai/corso/src/internal/m365/support" + "github.com/alcionai/corso/src/internal/tester" + bupMD "github.com/alcionai/corso/src/pkg/backup/metadata" + "github.com/alcionai/corso/src/pkg/control" + "github.com/alcionai/corso/src/pkg/count" + countTD "github.com/alcionai/corso/src/pkg/count/testdata" + "github.com/alcionai/corso/src/pkg/fault" + "github.com/alcionai/corso/src/pkg/path" + "github.com/alcionai/corso/src/pkg/services/m365/api/graph" + apiMock "github.com/alcionai/corso/src/pkg/services/m365/api/mock" +) + +// --------------------------------------------------------------------------- +// helpers +// --------------------------------------------------------------------------- + +func collWithMBH(mbh BackupHandler) *Collections { + return NewCollections( + mbh, + tenant, + idname.NewProvider(user, user), + func(*support.ControllerOperationStatus) {}, + control.Options{ToggleFeatures: control.Toggles{ + UseDeltaTree: true, + }}, + count.New()) +} + +func fullOrPrevPath( + t *testing.T, + coll data.BackupCollection, +) path.Path { + var collPath path.Path + + if coll.State() != data.DeletedState { + collPath = coll.FullPath() + } else { + collPath = coll.PreviousPath() + } + + require.False( + t, + len(collPath.Elements()) < 4, + "malformed or missing collection path") + + return collPath +} + +func pagerForDrives(drives ...models.Driveable) *apiMock.Pager[models.Driveable] { + return &apiMock.Pager[models.Driveable]{ + ToReturn: []apiMock.PagerResult[models.Driveable]{ + {Values: drives}, + }, + } +} + +func makePrevMetadataColls( + t *testing.T, + mbh BackupHandler, + previousPaths map[string]map[string]string, +) []data.RestoreCollection { + pathPrefix, err := mbh.MetadataPathPrefix(tenant) + require.NoError(t, err, clues.ToCore(err)) + + prevDeltas := map[string]string{} + + for driveID := range previousPaths { + prevDeltas[driveID] = idx(delta, "prev") + } + + mdColl, err := graph.MakeMetadataCollection( + pathPrefix, + []graph.MetadataCollectionEntry{ + graph.NewMetadataEntry(bupMD.DeltaURLsFileName, prevDeltas), + graph.NewMetadataEntry(bupMD.PreviousPathFileName, previousPaths), + }, + func(*support.ControllerOperationStatus) {}, + count.New()) + require.NoError(t, err, "creating metadata collection", clues.ToCore(err)) + + return []data.RestoreCollection{ + dataMock.NewUnversionedRestoreCollection(t, data.NoFetchRestoreCollection{Collection: mdColl}), + } +} + +func compareMetadata( + t *testing.T, + mdColl data.Collection, + expectDeltas map[string]string, + expectPrevPaths map[string]map[string]string, +) { + ctx, flush := tester.NewContext(t) + defer flush() + + colls := []data.RestoreCollection{ + dataMock.NewUnversionedRestoreCollection(t, data.NoFetchRestoreCollection{Collection: mdColl}), + } + + deltas, prevs, _, err := deserializeAndValidateMetadata( + ctx, + colls, + count.New(), + fault.New(true)) + require.NoError(t, err, "deserializing metadata", clues.ToCore(err)) + assert.Equal(t, expectDeltas, deltas, "delta urls") + assert.Equal(t, expectPrevPaths, prevs, "previous paths") +} + +// for comparisons done by collection state +type stateAssertion struct { + itemIDs []string + // should never get set by the user. + // this flag gets flipped when calling assertions.compare. + // any unseen collection will error on requireNoUnseenCollections + sawCollection bool +} + +// for comparisons done by a given collection path +type collectionAssertion struct { + doNotMerge assert.BoolAssertionFunc + states map[data.CollectionState]*stateAssertion + excludedItems map[string]struct{} +} + +type statesToItemIDs map[data.CollectionState][]string + +// TODO(keepers): move excludeItems to a more global position. +func newCollAssertion( + doNotMerge bool, + itemsByState statesToItemIDs, + excludeItems ...string, +) collectionAssertion { + states := map[data.CollectionState]*stateAssertion{} + + for state, itemIDs := range itemsByState { + states[state] = &stateAssertion{ + itemIDs: itemIDs, + } + } + + dnm := assert.False + if doNotMerge { + dnm = assert.True + } + + return collectionAssertion{ + doNotMerge: dnm, + states: states, + excludedItems: makeExcludeMap(excludeItems...), + } +} + +// to aggregate all collection-related expectations in the backup +// map collection path -> collection state -> assertion +type collectionAssertions map[string]collectionAssertion + +// ensure the provided collection matches expectations as set by the test. +func (cas collectionAssertions) compare( + t *testing.T, + coll data.BackupCollection, + excludes *prefixmatcher.StringSetMatchBuilder, +) { + ctx, flush := tester.NewContext(t) + defer flush() + + var ( + itemCh = coll.Items(ctx, fault.New(true)) + itemIDs = []string{} + ) + + p := fullOrPrevPath(t, coll) + + for itm := range itemCh { + itemIDs = append(itemIDs, itm.ID()) + } + + expect := cas[p.String()] + expectState := expect.states[coll.State()] + expectState.sawCollection = true + + assert.ElementsMatchf( + t, + expectState.itemIDs, + itemIDs, + "expected all items to match in collection with:\nstate %q\npath %q", + coll.State(), + p) + + expect.doNotMerge( + t, + coll.DoNotMergeItems(), + "expected collection to have the appropariate doNotMerge flag") + + if result, ok := excludes.Get(p.String()); ok { + assert.Equal( + t, + expect.excludedItems, + result, + "excluded items") + } +} + +// ensure that no collections in the expected set are still flagged +// as sawCollection == false. +func (cas collectionAssertions) requireNoUnseenCollections( + t *testing.T, +) { + for p, withPath := range cas { + for _, state := range withPath.states { + require.True( + t, + state.sawCollection, + "results should have contained collection:\n\t%q\t\n%q", + state, p) + } + } +} + +// --------------------------------------------------------------------------- +// tests +// --------------------------------------------------------------------------- + +type CollectionsTreeUnitSuite struct { + tester.Suite +} + +func TestCollectionsTreeUnitSuite(t *testing.T) { + suite.Run(t, &CollectionsTreeUnitSuite{Suite: tester.NewUnitSuite(t)}) +} + +func (suite *CollectionsTreeUnitSuite) TestCollections_MakeDriveTombstones() { + badPfxMBH := mock.DefaultOneDriveBH(user) + badPfxMBH.PathPrefixErr = assert.AnError + + twostones := map[string]struct{}{ + "t1": {}, + "t2": {}, + } + + table := []struct { + name string + tombstones map[string]struct{} + c *Collections + expectErr assert.ErrorAssertionFunc + expect assert.ValueAssertionFunc + }{ + { + name: "nil", + tombstones: nil, + c: collWithMBH(mock.DefaultOneDriveBH(user)), + expectErr: assert.NoError, + expect: assert.Empty, + }, + { + name: "none", + tombstones: map[string]struct{}{}, + c: collWithMBH(mock.DefaultOneDriveBH(user)), + expectErr: assert.NoError, + expect: assert.Empty, + }, + { + name: "some tombstones", + tombstones: twostones, + c: collWithMBH(mock.DefaultOneDriveBH(user)), + expectErr: assert.NoError, + expect: assert.NotEmpty, + }, + { + name: "bad prefix path", + tombstones: twostones, + c: collWithMBH(badPfxMBH), + expectErr: assert.Error, + expect: assert.Empty, + }, + } + for _, test := range table { + suite.Run(test.name, func() { + t := suite.T() + + ctx, flush := tester.NewContext(t) + defer flush() + + colls, err := test.c.makeDriveTombstones(ctx, test.tombstones, fault.New(true)) + test.expectErr(t, err, clues.ToCore(err)) + test.expect(t, colls) + + for _, coll := range colls { + assert.Equal(t, data.DeletedState, coll.State(), "tombstones should always delete data") + } + }) + } +} + +func (suite *CollectionsTreeUnitSuite) TestCollections_MakeMetadataCollections() { + badMetaPfxMBH := mock.DefaultOneDriveBH(user) + badMetaPfxMBH.MetadataPathPrefixErr = assert.AnError + + table := []struct { + name string + c *Collections + expect assert.ValueAssertionFunc + }{ + { + name: "no errors", + c: collWithMBH(mock.DefaultOneDriveBH(user)), + expect: assert.NotEmpty, + }, + { + name: "bad prefix path", + c: collWithMBH(badMetaPfxMBH), + expect: assert.Empty, + }, + } + for _, test := range table { + suite.Run(test.name, func() { + var ( + t = suite.T() + deltaTokens = map[string]string{} + prevPaths = map[string]map[string]string{} + ) + + ctx, flush := tester.NewContext(t) + defer flush() + + colls := test.c.makeMetadataCollections(ctx, deltaTokens, prevPaths) + test.expect(t, colls) + + for _, coll := range colls { + assert.NotEqual(t, data.DeletedState, coll.State(), "metadata is never deleted") + } + }) + } +} + +// TODO(keepers): implement tree version of populateDriveCollections tests + +// TODO(keepers): implement tree version of TestGet single-drive tests + +func (suite *CollectionsTreeUnitSuite) TestCollections_MakeDriveCollections() { + drive1 := models.NewDrive() + drive1.SetId(ptr.To(idx(drive, 1))) + drive1.SetName(ptr.To(namex(drive, 1))) + + table := []struct { + name string + c *Collections + drive models.Driveable + prevPaths map[string]string + expectErr require.ErrorAssertionFunc + expectCounts countTD.Expected + }{ + { + name: "not yet implemented", + c: collWithMBH(mock.DefaultOneDriveBH(user)), + drive: drive1, + expectErr: require.Error, + expectCounts: countTD.Expected{ + count.PrevPaths: 0, + }, + }, + } + for _, test := range table { + suite.Run(test.name, func() { + t := suite.T() + + ctx, flush := tester.NewContext(t) + defer flush() + + colls, paths, delta, err := test.c.makeDriveCollections( + ctx, + test.drive, + test.prevPaths, + test.c.counter, + fault.New(true)) + + // TODO(keepers): awaiting implementation + test.expectErr(t, err, clues.ToCore(err)) + assert.Empty(t, colls) + assert.Empty(t, paths) + assert.Empty(t, delta.URL) + + test.expectCounts.Compare(t, test.c.counter) + }) + } +} + +// TODO(keepers): implement tree version of TestGet multi-drive tests + +func (suite *CollectionsTreeUnitSuite) TestCollections_GetTree() { + metadataPath, err := path.BuildMetadata( + tenant, + user, + path.OneDriveService, + path.FilesCategory, + false) + require.NoError(suite.T(), err, "making metadata path", clues.ToCore(err)) + + drive1 := models.NewDrive() + drive1.SetId(ptr.To(idx(drive, 1))) + drive1.SetName(ptr.To(namex(drive, 1))) + + type expected struct { + canUsePrevBackup assert.BoolAssertionFunc + collAssertions collectionAssertions + counts countTD.Expected + deltas map[string]string + err require.ErrorAssertionFunc + prevPaths map[string]map[string]string + skips int + } + + table := []struct { + name string + drivePager *apiMock.Pager[models.Driveable] + enumerator mock.EnumerateItemsDeltaByDrive + previousPaths map[string]map[string]string + + metadata []data.RestoreCollection + expect expected + }{ + { + name: "not yet implemented", + drivePager: pagerForDrives(drive1), + expect: expected{ + canUsePrevBackup: assert.False, + collAssertions: collectionAssertions{ + fullPath(1): newCollAssertion( + doNotMergeItems, + statesToItemIDs{data.NotMovedState: {}}, + id(file)), + }, + counts: countTD.Expected{ + count.PrevPaths: 0, + }, + deltas: map[string]string{}, + err: require.Error, + prevPaths: map[string]map[string]string{}, + skips: 0, + }, + }, + } + for _, test := range table { + suite.Run(test.name, func() { + t := suite.T() + + ctx, flush := tester.NewContext(t) + defer flush() + + var ( + mbh = mock.DefaultDriveBHWith(user, test.drivePager, test.enumerator) + c = collWithMBH(mbh) + prevMetadata = makePrevMetadataColls(t, mbh, test.previousPaths) + globalExcludes = prefixmatcher.NewStringSetBuilder() + errs = fault.New(true) + ) + + colls, canUsePrevBackup, err := c.getTree( + ctx, + prevMetadata, + globalExcludes, + errs) + + test.expect.err(t, err, clues.ToCore(err)) + // TODO(keepers): awaiting implementation + assert.Empty(t, colls) + assert.Equal(t, test.expect.skips, len(errs.Skipped())) + test.expect.canUsePrevBackup(t, canUsePrevBackup) + test.expect.counts.Compare(t, c.counter) + + if err != nil { + return + } + + for _, coll := range colls { + collPath := fullOrPrevPath(t, coll) + + if collPath.String() == metadataPath.String() { + compareMetadata( + t, + coll, + test.expect.deltas, + test.expect.prevPaths) + + continue + } + + test.expect.collAssertions.compare(t, coll, globalExcludes) + } + + test.expect.collAssertions.requireNoUnseenCollections(t) + }) + } +} diff --git a/src/internal/m365/collection/drive/limiter.go b/src/internal/m365/collection/drive/limiter.go new file mode 100644 index 000000000..7acf8f62a --- /dev/null +++ b/src/internal/m365/collection/drive/limiter.go @@ -0,0 +1,95 @@ +package drive + +import "github.com/alcionai/corso/src/pkg/control" + +// used to mark an unused variable while we transition handling. +const ignoreMe = -1 + +type driveEnumerationStats struct { + numPages int + numAddedFiles int + numContainers int + numBytes int64 +} + +func newPagerLimiter(opts control.Options) *pagerLimiter { + res := &pagerLimiter{limits: opts.PreviewLimits} + + if res.limits.MaxContainers == 0 { + res.limits.MaxContainers = defaultPreviewMaxContainers + } + + if res.limits.MaxItemsPerContainer == 0 { + res.limits.MaxItemsPerContainer = defaultPreviewMaxItemsPerContainer + } + + if res.limits.MaxItems == 0 { + res.limits.MaxItems = defaultPreviewMaxItems + } + + if res.limits.MaxBytes == 0 { + res.limits.MaxBytes = defaultPreviewMaxBytes + } + + if res.limits.MaxPages == 0 { + res.limits.MaxPages = defaultPreviewMaxPages + } + + return res +} + +type pagerLimiter struct { + limits control.PreviewItemLimits +} + +func (l pagerLimiter) effectiveLimits() control.PreviewItemLimits { + return l.limits +} + +func (l pagerLimiter) enabled() bool { + return l.limits.Enabled +} + +// sizeLimit returns the total number of bytes this backup should try to +// contain. +func (l pagerLimiter) sizeLimit() int64 { + return l.limits.MaxBytes +} + +// atItemLimit returns true if the limiter is enabled and has reached the limit +// for individual items added to collections for this backup. +func (l pagerLimiter) atItemLimit(stats *driveEnumerationStats) bool { + return l.enabled() && + (stats.numAddedFiles >= l.limits.MaxItems || + stats.numBytes >= l.limits.MaxBytes) +} + +// atContainerItemsLimit returns true if the limiter is enabled and the current +// number of items is above the limit for the number of items for a container +// for this backup. +func (l pagerLimiter) atContainerItemsLimit(numItems int) bool { + return l.enabled() && numItems >= l.limits.MaxItemsPerContainer +} + +// atContainerPageLimit returns true if the limiter is enabled and the number of +// pages processed so far is beyond the limit for this backup. +func (l pagerLimiter) atPageLimit(stats *driveEnumerationStats) bool { + return l.enabled() && stats.numPages >= l.limits.MaxPages +} + +// atLimit returns true if the limiter is enabled and meets any of the +// conditions for max items, containers, etc for this backup. +func (l pagerLimiter) atLimit( + stats *driveEnumerationStats, + containerCount int, +) bool { + nc := stats.numContainers + if nc == 0 && containerCount > 0 { + nc = containerCount + } + + return l.enabled() && + (l.atItemLimit(stats) || + nc >= l.limits.MaxContainers || + stats.numPages >= l.limits.MaxPages) +} diff --git a/src/internal/m365/collection/drive/limiter_test.go b/src/internal/m365/collection/drive/limiter_test.go new file mode 100644 index 000000000..47f070489 --- /dev/null +++ b/src/internal/m365/collection/drive/limiter_test.go @@ -0,0 +1,1010 @@ +package drive + +import ( + "fmt" + "testing" + + "github.com/alcionai/clues" + "github.com/microsoftgraph/msgraph-sdk-go/models" + "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/require" + "github.com/stretchr/testify/suite" + "golang.org/x/exp/maps" + + "github.com/alcionai/corso/src/internal/common/idname" + "github.com/alcionai/corso/src/internal/common/prefixmatcher" + "github.com/alcionai/corso/src/internal/common/ptr" + "github.com/alcionai/corso/src/internal/data" + "github.com/alcionai/corso/src/internal/m365/service/onedrive/mock" + "github.com/alcionai/corso/src/internal/m365/support" + "github.com/alcionai/corso/src/internal/tester" + "github.com/alcionai/corso/src/pkg/control" + "github.com/alcionai/corso/src/pkg/count" + "github.com/alcionai/corso/src/pkg/fault" + "github.com/alcionai/corso/src/pkg/path" + apiMock "github.com/alcionai/corso/src/pkg/services/m365/api/mock" + "github.com/alcionai/corso/src/pkg/services/m365/api/pagers" +) + +type LimiterUnitSuite struct { + tester.Suite +} + +func TestLimiterUnitSuite(t *testing.T) { + suite.Run(t, &LimiterUnitSuite{Suite: tester.NewUnitSuite(t)}) +} + +// TestGet_PreviewLimits checks that the limits set for preview backups in +// control.Options.ItemLimits are respected. These tests run a reduced set of +// checks that don't examine metadata, collection states, etc. They really just +// check the expected items appear. +func (suite *LimiterUnitSuite) TestGet_PreviewLimits() { + metadataPath, err := path.BuildMetadata( + tenant, + user, + path.OneDriveService, + path.FilesCategory, + false) + require.NoError(suite.T(), err, "making metadata path", clues.ToCore(err)) + + drive1 := models.NewDrive() + drive1.SetId(ptr.To(idx(drive, 1))) + drive1.SetName(ptr.To(namex(drive, 1))) + + drive2 := models.NewDrive() + drive2.SetId(ptr.To(idx(drive, 2))) + drive2.SetName(ptr.To(namex(drive, 2))) + + table := []struct { + name string + limits control.PreviewItemLimits + drives []models.Driveable + enumerator mock.EnumerateItemsDeltaByDrive + // Collection name -> set of item IDs. We can't check item data because + // that's not mocked out. Metadata is checked separately. + expectedCollections map[string][]string + }{ + { + name: "OneDrive SinglePage ExcludeItemsOverMaxSize", + limits: control.PreviewItemLimits{ + Enabled: true, + MaxItems: 999, + MaxItemsPerContainer: 999, + MaxContainers: 999, + MaxBytes: 5, + MaxPages: 999, + }, + drives: []models.Driveable{drive1}, + enumerator: mock.EnumerateItemsDeltaByDrive{ + DrivePagers: map[string]*mock.DriveItemsDeltaPager{ + idx(drive, 1): { + Pages: []mock.NextPage{{ + Items: []models.DriveItemable{ + driveRootItem(rootID), // will be present, not needed + driveItemWithSize(idx(file, 1), namex(file, 1), parent(1), rootID, 7, isFile), + driveItemWithSize(idx(file, 2), namex(file, 2), parent(1), rootID, 1, isFile), + driveItemWithSize(idx(file, 3), namex(file, 3), parent(1), rootID, 1, isFile), + }, + }}, + DeltaUpdate: pagers.DeltaUpdate{URL: id(delta)}, + }, + }, + }, + expectedCollections: map[string][]string{ + fullPath(1): {idx(file, 2), idx(file, 3)}, + }, + }, + { + name: "OneDrive SinglePage SingleFolder ExcludeCombinedItemsOverMaxSize", + limits: control.PreviewItemLimits{ + Enabled: true, + MaxItems: 999, + MaxItemsPerContainer: 999, + MaxContainers: 999, + MaxBytes: 3, + MaxPages: 999, + }, + drives: []models.Driveable{drive1}, + enumerator: mock.EnumerateItemsDeltaByDrive{ + DrivePagers: map[string]*mock.DriveItemsDeltaPager{ + idx(drive, 1): { + Pages: []mock.NextPage{{ + Items: []models.DriveItemable{ + driveRootItem(rootID), // will be present, not needed + driveItemWithSize(idx(file, 1), namex(file, 1), parent(1), rootID, 1, isFile), + driveItemWithSize(idx(file, 2), namex(file, 2), parent(1), rootID, 2, isFile), + driveItemWithSize(idx(file, 3), namex(file, 3), parent(1), rootID, 1, isFile), + }, + }}, + DeltaUpdate: pagers.DeltaUpdate{URL: id(delta)}, + }, + }, + }, + expectedCollections: map[string][]string{ + fullPath(1): {idx(file, 1), idx(file, 2)}, + }, + }, + { + name: "OneDrive SinglePage MultipleFolders ExcludeCombinedItemsOverMaxSize", + limits: control.PreviewItemLimits{ + Enabled: true, + MaxItems: 999, + MaxItemsPerContainer: 999, + MaxContainers: 999, + MaxBytes: 3, + MaxPages: 999, + }, + drives: []models.Driveable{drive1}, + enumerator: mock.EnumerateItemsDeltaByDrive{ + DrivePagers: map[string]*mock.DriveItemsDeltaPager{ + idx(drive, 1): { + Pages: []mock.NextPage{{ + Items: []models.DriveItemable{ + driveRootItem(rootID), // will be present, not needed + driveItemWithSize(idx(file, 1), namex(file, 1), parent(1), rootID, 1, isFile), + driveItemWithSize(idx(folder, 1), namex(folder, 1), parent(1), rootID, 1, isFolder), + driveItemWithSize(idx(file, 2), namex(file, 2), parent(1, namex(folder, 1)), idx(folder, 1), 2, isFile), + driveItemWithSize(idx(file, 3), namex(file, 3), parent(1, namex(folder, 1)), idx(folder, 1), 1, isFile), + }, + }}, + DeltaUpdate: pagers.DeltaUpdate{URL: id(delta)}, + }, + }, + }, + expectedCollections: map[string][]string{ + fullPath(1): {idx(file, 1)}, + fullPath(1, namex(folder, 1)): {idx(folder, 1), idx(file, 2)}, + }, + }, + { + name: "OneDrive SinglePage SingleFolder ItemLimit", + limits: control.PreviewItemLimits{ + Enabled: true, + MaxItems: 3, + MaxItemsPerContainer: 999, + MaxContainers: 999, + MaxBytes: 999999, + MaxPages: 999, + }, + drives: []models.Driveable{drive1}, + enumerator: mock.EnumerateItemsDeltaByDrive{ + DrivePagers: map[string]*mock.DriveItemsDeltaPager{ + idx(drive, 1): { + Pages: []mock.NextPage{{ + Items: []models.DriveItemable{ + driveRootItem(rootID), // will be present, not needed + driveItem(idx(file, 1), namex(file, 1), parent(1), rootID, isFile), + driveItem(idx(file, 2), namex(file, 2), parent(1), rootID, isFile), + driveItem(idx(file, 3), namex(file, 3), parent(1), rootID, isFile), + driveItem(idx(file, 4), namex(file, 4), parent(1), rootID, isFile), + driveItem(idx(file, 5), namex(file, 5), parent(1), rootID, isFile), + driveItem(idx(file, 6), namex(file, 6), parent(1), rootID, isFile), + }, + }}, + DeltaUpdate: pagers.DeltaUpdate{URL: id(delta)}, + }, + }, + }, + expectedCollections: map[string][]string{ + fullPath(1): {idx(file, 1), idx(file, 2), idx(file, 3)}, + }, + }, + { + name: "OneDrive MultiplePages MultipleFolders ItemLimit WithRepeatedItem", + limits: control.PreviewItemLimits{ + Enabled: true, + MaxItems: 3, + MaxItemsPerContainer: 999, + MaxContainers: 999, + MaxBytes: 999999, + MaxPages: 999, + }, + drives: []models.Driveable{drive1}, + enumerator: mock.EnumerateItemsDeltaByDrive{ + DrivePagers: map[string]*mock.DriveItemsDeltaPager{ + idx(drive, 1): { + Pages: []mock.NextPage{ + { + Items: []models.DriveItemable{ + driveRootItem(rootID), // will be present, not needed + driveItem(idx(file, 1), namex(file, 1), parent(1), rootID, isFile), + driveItem(idx(file, 2), namex(file, 2), parent(1), rootID, isFile), + }, + }, + { + Items: []models.DriveItemable{ + // Repeated items shouldn't count against the limit. + driveItem(idx(file, 1), namex(file, 1), parent(1), rootID, isFile), + driveItem(idx(folder, 1), namex(folder, 1), parent(1), rootID, isFolder), + driveItem(idx(file, 3), namex(file, 3), parent(1, namex(folder, 1)), idx(folder, 1), isFile), + driveItem(idx(file, 4), namex(file, 4), parent(1, namex(folder, 1)), idx(folder, 1), isFile), + driveItem(idx(file, 5), namex(file, 5), parent(1, namex(folder, 1)), idx(folder, 1), isFile), + driveItem(idx(file, 6), namex(file, 6), parent(1, namex(folder, 1)), idx(folder, 1), isFile), + }, + }, + }, + DeltaUpdate: pagers.DeltaUpdate{URL: id(delta)}, + }, + }, + }, + expectedCollections: map[string][]string{ + fullPath(1): {idx(file, 1), idx(file, 2)}, + fullPath(1, namex(folder, 1)): {idx(folder, 1), idx(file, 3)}, + }, + }, + { + name: "OneDrive MultiplePages PageLimit", + limits: control.PreviewItemLimits{ + Enabled: true, + MaxItems: 999, + MaxItemsPerContainer: 999, + MaxContainers: 999, + MaxBytes: 999999, + MaxPages: 1, + }, + drives: []models.Driveable{drive1}, + enumerator: mock.EnumerateItemsDeltaByDrive{ + DrivePagers: map[string]*mock.DriveItemsDeltaPager{ + idx(drive, 1): { + Pages: []mock.NextPage{ + { + Items: []models.DriveItemable{ + driveRootItem(rootID), // will be present, not needed + driveItem(idx(file, 1), namex(file, 1), parent(1), rootID, isFile), + driveItem(idx(file, 2), namex(file, 2), parent(1), rootID, isFile), + }, + }, + { + Items: []models.DriveItemable{ + driveRootItem(rootID), // will be present, not needed + driveItem(idx(folder, 1), namex(folder, 1), parent(1), rootID, isFolder), + driveItem(idx(file, 3), namex(file, 3), parent(1, namex(folder, 1)), idx(folder, 1), isFile), + driveItem(idx(file, 4), namex(file, 4), parent(1, namex(folder, 1)), idx(folder, 1), isFile), + driveItem(idx(file, 5), namex(file, 5), parent(1, namex(folder, 1)), idx(folder, 1), isFile), + driveItem(idx(file, 6), namex(file, 6), parent(1, namex(folder, 1)), idx(folder, 1), isFile), + }, + }, + }, + DeltaUpdate: pagers.DeltaUpdate{URL: id(delta)}, + }, + }, + }, + expectedCollections: map[string][]string{ + fullPath(1): {idx(file, 1), idx(file, 2)}, + }, + }, + { + name: "OneDrive MultiplePages PerContainerItemLimit", + limits: control.PreviewItemLimits{ + Enabled: true, + MaxItems: 999, + MaxItemsPerContainer: 1, + MaxContainers: 999, + MaxBytes: 999999, + MaxPages: 999, + }, + drives: []models.Driveable{drive1}, + enumerator: mock.EnumerateItemsDeltaByDrive{ + DrivePagers: map[string]*mock.DriveItemsDeltaPager{ + idx(drive, 1): { + Pages: []mock.NextPage{ + { + Items: []models.DriveItemable{ + driveRootItem(rootID), // will be present, not needed + driveItem(idx(file, 1), namex(file, 1), parent(1), rootID, isFile), + driveItem(idx(file, 2), namex(file, 2), parent(1), rootID, isFile), + driveItem(idx(file, 3), namex(file, 3), parent(1), rootID, isFile), + }, + }, + { + Items: []models.DriveItemable{ + driveRootItem(rootID), // will be present, not needed + driveItem(idx(folder, 1), namex(folder, 1), parent(1), rootID, isFolder), + driveItem(idx(file, 4), namex(file, 4), parent(1, namex(folder, 1)), idx(folder, 1), isFile), + driveItem(idx(file, 5), namex(file, 5), parent(1, namex(folder, 1)), idx(folder, 1), isFile), + }, + }, + }, + DeltaUpdate: pagers.DeltaUpdate{URL: id(delta)}, + }, + }, + }, + expectedCollections: map[string][]string{ + // Root has an additional item. It's hard to fix that in the code + // though. + fullPath(1): {idx(file, 1), idx(file, 2)}, + fullPath(1, namex(folder, 1)): {idx(folder, 1), idx(file, 4)}, + }, + }, + { + name: "OneDrive MultiplePages PerContainerItemLimit ItemUpdated", + limits: control.PreviewItemLimits{ + Enabled: true, + MaxItems: 999, + MaxItemsPerContainer: 3, + MaxContainers: 999, + MaxBytes: 999999, + MaxPages: 999, + }, + drives: []models.Driveable{drive1}, + enumerator: mock.EnumerateItemsDeltaByDrive{ + DrivePagers: map[string]*mock.DriveItemsDeltaPager{ + idx(drive, 1): { + Pages: []mock.NextPage{ + { + Items: []models.DriveItemable{ + driveRootItem(rootID), // will be present, not needed + driveItem(idx(folder, 0), namex(folder, 0), parent(1), rootID, isFolder), + driveItem(idx(file, 1), namex(file, 1), parent(1, namex(folder, 0)), idx(folder, 0), isFile), + driveItem(idx(file, 2), namex(file, 2), parent(1, namex(folder, 0)), idx(folder, 0), isFile), + }, + }, + { + Items: []models.DriveItemable{ + driveRootItem(rootID), // will be present, not needed + driveItem(idx(folder, 0), namex(folder, 0), parent(1), rootID, isFolder), + // Updated item that shouldn't count against the limit a second time. + driveItem(idx(file, 2), namex(file, 2), parent(1, namex(folder, 0)), idx(folder, 0), isFile), + driveItem(idx(file, 3), namex(file, 3), parent(1, namex(folder, 0)), idx(folder, 0), isFile), + driveItem(idx(file, 4), namex(file, 4), parent(1, namex(folder, 0)), idx(folder, 0), isFile), + }, + }, + }, + DeltaUpdate: pagers.DeltaUpdate{URL: id(delta)}, + }, + }, + }, + expectedCollections: map[string][]string{ + fullPath(1): {}, + fullPath(1, namex(folder, 0)): {idx(folder, 0), idx(file, 1), idx(file, 2), idx(file, 3)}, + }, + }, + { + name: "OneDrive MultiplePages PerContainerItemLimit MoveItemBetweenFolders", + limits: control.PreviewItemLimits{ + Enabled: true, + MaxItems: 999, + MaxItemsPerContainer: 2, + MaxContainers: 999, + MaxBytes: 999999, + MaxPages: 999, + }, + drives: []models.Driveable{drive1}, + enumerator: mock.EnumerateItemsDeltaByDrive{ + DrivePagers: map[string]*mock.DriveItemsDeltaPager{ + idx(drive, 1): { + Pages: []mock.NextPage{ + { + Items: []models.DriveItemable{ + driveRootItem(rootID), // will be present, not needed + driveItem(idx(file, 1), namex(file, 1), parent(1), rootID, isFile), + driveItem(idx(file, 2), namex(file, 2), parent(1), rootID, isFile), + // Put folder 0 at limit. + driveItem(idx(folder, 0), namex(folder, 0), parent(1), rootID, isFolder), + driveItem(idx(file, 3), namex(file, 3), parent(1, namex(folder, 0)), idx(folder, 0), isFile), + driveItem(idx(file, 4), namex(file, 4), parent(1, namex(folder, 0)), idx(folder, 0), isFile), + }, + }, + { + Items: []models.DriveItemable{ + driveRootItem(rootID), // will be present, not needed + driveItem(idx(folder, 0), namex(folder, 0), parent(1), rootID, isFolder), + // Try to move item from root to folder 0 which is already at the limit. + driveItem(idx(file, 1), namex(file, 1), parent(1, namex(folder, 0)), idx(folder, 0), isFile), + }, + }, + }, + DeltaUpdate: pagers.DeltaUpdate{URL: id(delta)}, + }, + }, + }, + expectedCollections: map[string][]string{ + fullPath(1): {idx(file, 1), idx(file, 2)}, + fullPath(1, namex(folder, 0)): {idx(folder, 0), idx(file, 3), idx(file, 4)}, + }, + }, + { + name: "OneDrive MultiplePages ContainerLimit LastContainerSplitAcrossPages", + limits: control.PreviewItemLimits{ + Enabled: true, + MaxItems: 999, + MaxItemsPerContainer: 999, + MaxContainers: 2, + MaxBytes: 999999, + MaxPages: 999, + }, + drives: []models.Driveable{drive1}, + enumerator: mock.EnumerateItemsDeltaByDrive{ + DrivePagers: map[string]*mock.DriveItemsDeltaPager{ + idx(drive, 1): { + Pages: []mock.NextPage{ + { + Items: []models.DriveItemable{ + driveRootItem(rootID), // will be present, not needed + driveItem(idx(file, 1), namex(file, 1), parent(1), rootID, isFile), + driveItem(idx(file, 2), namex(file, 2), parent(1), rootID, isFile), + driveItem(idx(file, 3), namex(file, 3), parent(1), rootID, isFile), + }, + }, + { + Items: []models.DriveItemable{ + driveRootItem(rootID), // will be present, not needed + driveItem(idx(folder, 1), namex(folder, 1), parent(1), rootID, isFolder), + driveItem(idx(file, 4), namex(file, 4), parent(1, namex(folder, 1)), idx(folder, 1), isFile), + }, + }, + { + Items: []models.DriveItemable{ + driveRootItem(rootID), // will be present, not needed + driveItem(idx(folder, 1), namex(folder, 1), parent(1), rootID, isFolder), + driveItem(idx(file, 5), namex(file, 5), parent(1, namex(folder, 1)), idx(folder, 1), isFile), + }, + }, + }, + DeltaUpdate: pagers.DeltaUpdate{URL: id(delta)}, + }, + }, + }, + expectedCollections: map[string][]string{ + fullPath(1): {idx(file, 1), idx(file, 2), idx(file, 3)}, + fullPath(1, namex(folder, 1)): {idx(folder, 1), idx(file, 4), idx(file, 5)}, + }, + }, + { + name: "OneDrive MultiplePages ContainerLimit NextContainerOnSamePage", + limits: control.PreviewItemLimits{ + Enabled: true, + MaxItems: 999, + MaxItemsPerContainer: 999, + MaxContainers: 2, + MaxBytes: 999999, + MaxPages: 999, + }, + drives: []models.Driveable{drive1}, + enumerator: mock.EnumerateItemsDeltaByDrive{ + DrivePagers: map[string]*mock.DriveItemsDeltaPager{ + idx(drive, 1): { + Pages: []mock.NextPage{ + { + Items: []models.DriveItemable{ + driveRootItem(rootID), // will be present, not needed + driveItem(idx(file, 1), namex(file, 1), parent(1), rootID, isFile), + driveItem(idx(file, 2), namex(file, 2), parent(1), rootID, isFile), + driveItem(idx(file, 3), namex(file, 3), parent(1), rootID, isFile), + }, + }, + { + Items: []models.DriveItemable{ + driveRootItem(rootID), // will be present, not needed + driveItem(idx(folder, 1), namex(folder, 1), parent(1), rootID, isFolder), + driveItem(idx(file, 4), namex(file, 4), parent(1, namex(folder, 1)), idx(folder, 1), isFile), + driveItem(idx(file, 5), namex(file, 5), parent(1, namex(folder, 1)), idx(folder, 1), isFile), + // This container shouldn't be returned. + driveItem(idx(folder, 2), namex(folder, 2), parent(1), rootID, isFolder), + driveItem(idx(file, 7), namex(file, 7), parent(1, namex(folder, 2)), idx(folder, 2), isFile), + driveItem(idx(file, 8), namex(file, 8), parent(1, namex(folder, 2)), idx(folder, 2), isFile), + driveItem(idx(file, 9), namex(file, 9), parent(1, namex(folder, 2)), idx(folder, 2), isFile), + }, + }, + }, + DeltaUpdate: pagers.DeltaUpdate{URL: id(delta)}, + }, + }, + }, + expectedCollections: map[string][]string{ + fullPath(1): {idx(file, 1), idx(file, 2), idx(file, 3)}, + fullPath(1, namex(folder, 1)): {idx(folder, 1), idx(file, 4), idx(file, 5)}, + }, + }, + { + name: "OneDrive MultiplePages ContainerLimit NextContainerOnNextPage", + limits: control.PreviewItemLimits{ + Enabled: true, + MaxItems: 999, + MaxItemsPerContainer: 999, + MaxContainers: 2, + MaxBytes: 999999, + MaxPages: 999, + }, + drives: []models.Driveable{drive1}, + enumerator: mock.EnumerateItemsDeltaByDrive{ + DrivePagers: map[string]*mock.DriveItemsDeltaPager{ + idx(drive, 1): { + Pages: []mock.NextPage{ + { + Items: []models.DriveItemable{ + driveRootItem(rootID), // will be present, not needed + driveItem(idx(file, 1), namex(file, 1), parent(1), rootID, isFile), + driveItem(idx(file, 2), namex(file, 2), parent(1), rootID, isFile), + driveItem(idx(file, 3), namex(file, 3), parent(1), rootID, isFile), + }, + }, + { + Items: []models.DriveItemable{ + driveRootItem(rootID), // will be present, not needed + driveItem(idx(folder, 1), namex(folder, 1), parent(1), rootID, isFolder), + driveItem(idx(file, 4), namex(file, 4), parent(1, namex(folder, 1)), idx(folder, 1), isFile), + driveItem(idx(file, 5), namex(file, 5), parent(1, namex(folder, 1)), idx(folder, 1), isFile), + }, + }, + { + Items: []models.DriveItemable{ + driveRootItem(rootID), // will be present, not needed + // This container shouldn't be returned. + driveItem(idx(folder, 2), namex(folder, 2), parent(1), rootID, isFolder), + driveItem(idx(file, 7), namex(file, 7), parent(1, namex(folder, 2)), idx(folder, 2), isFile), + driveItem(idx(file, 8), namex(file, 8), parent(1, namex(folder, 2)), idx(folder, 2), isFile), + driveItem(idx(file, 9), namex(file, 9), parent(1, namex(folder, 2)), idx(folder, 2), isFile), + }, + }, + }, + DeltaUpdate: pagers.DeltaUpdate{URL: id(delta)}, + }, + }, + }, + expectedCollections: map[string][]string{ + fullPath(1): {idx(file, 1), idx(file, 2), idx(file, 3)}, + fullPath(1, namex(folder, 1)): {idx(folder, 1), idx(file, 4), idx(file, 5)}, + }, + }, + { + name: "TwoDrives SeparateLimitAccounting", + limits: control.PreviewItemLimits{ + Enabled: true, + MaxItems: 3, + MaxItemsPerContainer: 999, + MaxContainers: 999, + MaxBytes: 999999, + MaxPages: 999, + }, + drives: []models.Driveable{drive1, drive2}, + enumerator: mock.EnumerateItemsDeltaByDrive{ + DrivePagers: map[string]*mock.DriveItemsDeltaPager{ + idx(drive, 1): { + Pages: []mock.NextPage{ + { + Items: []models.DriveItemable{ + driveRootItem(rootID), // will be present, not needed + driveItem(idx(file, 1), namex(file, 1), parent(1), rootID, isFile), + driveItem(idx(file, 2), namex(file, 2), parent(1), rootID, isFile), + driveItem(idx(file, 3), namex(file, 3), parent(1), rootID, isFile), + driveItem(idx(file, 4), namex(file, 4), parent(1), rootID, isFile), + driveItem(idx(file, 5), namex(file, 5), parent(1), rootID, isFile), + }, + }, + }, + DeltaUpdate: pagers.DeltaUpdate{URL: id(delta)}, + }, + idx(drive, 2): { + Pages: []mock.NextPage{ + { + Items: []models.DriveItemable{ + driveRootItem(rootID), // will be present, not needed + driveItem(idx(file, 1), namex(file, 1), parent(2), rootID, isFile), + driveItem(idx(file, 2), namex(file, 2), parent(2), rootID, isFile), + driveItem(idx(file, 3), namex(file, 3), parent(2), rootID, isFile), + driveItem(idx(file, 4), namex(file, 4), parent(2), rootID, isFile), + driveItem(idx(file, 5), namex(file, 5), parent(2), rootID, isFile), + }, + }, + }, + DeltaUpdate: pagers.DeltaUpdate{URL: id(delta)}, + }, + }, + }, + expectedCollections: map[string][]string{ + fullPath(1): {idx(file, 1), idx(file, 2), idx(file, 3)}, + fullPath(2): {idx(file, 1), idx(file, 2), idx(file, 3)}, + }, + }, + { + name: "OneDrive PreviewDisabled MinimumLimitsIgnored", + limits: control.PreviewItemLimits{ + MaxItems: 1, + MaxItemsPerContainer: 1, + MaxContainers: 1, + MaxBytes: 1, + MaxPages: 1, + }, + drives: []models.Driveable{drive1}, + enumerator: mock.EnumerateItemsDeltaByDrive{ + DrivePagers: map[string]*mock.DriveItemsDeltaPager{ + idx(drive, 1): { + Pages: []mock.NextPage{ + { + Items: []models.DriveItemable{ + driveRootItem(rootID), // will be present, not needed + driveItem(idx(file, 1), namex(file, 1), parent(1), rootID, isFile), + driveItem(idx(file, 2), namex(file, 2), parent(1), rootID, isFile), + driveItem(idx(file, 3), namex(file, 3), parent(1), rootID, isFile), + }, + }, + { + Items: []models.DriveItemable{ + driveRootItem(rootID), // will be present, not needed + driveItem(idx(folder, 1), namex(folder, 1), parent(1), rootID, isFolder), + driveItem(idx(file, 4), namex(file, 4), parent(1, namex(folder, 1)), idx(folder, 1), isFile), + }, + }, + { + Items: []models.DriveItemable{ + driveRootItem(rootID), // will be present, not needed + driveItem(idx(folder, 1), namex(folder, 1), parent(1), rootID, isFolder), + driveItem(idx(file, 5), namex(file, 5), parent(1, namex(folder, 1)), idx(folder, 1), isFile), + }, + }, + }, + DeltaUpdate: pagers.DeltaUpdate{URL: id(delta)}, + }, + }, + }, + expectedCollections: map[string][]string{ + fullPath(1): {idx(file, 1), idx(file, 2), idx(file, 3)}, + fullPath(1, namex(folder, 1)): {idx(folder, 1), idx(file, 4), idx(file, 5)}, + }, + }, + } + for _, test := range table { + suite.Run(test.name, func() { + t := suite.T() + + ctx, flush := tester.NewContext(t) + defer flush() + + mockDrivePager := &apiMock.Pager[models.Driveable]{ + ToReturn: []apiMock.PagerResult[models.Driveable]{ + {Values: test.drives}, + }, + } + + mbh := mock.DefaultOneDriveBH(user) + mbh.DrivePagerV = mockDrivePager + mbh.DriveItemEnumeration = test.enumerator + + opts := control.DefaultOptions() + opts.PreviewLimits = test.limits + + c := NewCollections( + mbh, + tenant, + idname.NewProvider(user, user), + func(*support.ControllerOperationStatus) {}, + opts, + count.New()) + + errs := fault.New(true) + + delList := prefixmatcher.NewStringSetBuilder() + + cols, canUsePreviousBackup, err := c.Get(ctx, nil, delList, errs) + require.NoError(t, err, clues.ToCore(err)) + + assert.True(t, canUsePreviousBackup, "can use previous backup") + assert.Empty(t, errs.Skipped()) + + collPaths := []string{} + + for _, baseCol := range cols { + // There shouldn't be any deleted collections. + if !assert.NotEqual( + t, + data.DeletedState, + baseCol.State(), + "collection marked deleted") { + continue + } + + folderPath := baseCol.FullPath().String() + + if folderPath == metadataPath.String() { + continue + } + + collPaths = append(collPaths, folderPath) + + // TODO: We should really be getting items in the collection + // via the Items() channel. The lack of that makes this check a bit more + // bittle since internal details can change. The wiring to support + // mocked GetItems is available. We just haven't plugged it in yet. + col, ok := baseCol.(*Collection) + require.True(t, ok, "getting onedrive.Collection handle") + + itemIDs := make([]string, 0, len(col.driveItems)) + + for id := range col.driveItems { + itemIDs = append(itemIDs, id) + } + + assert.ElementsMatchf( + t, + test.expectedCollections[folderPath], + itemIDs, + "expected elements to match in collection with path %q", + folderPath) + } + + assert.ElementsMatch( + t, + maps.Keys(test.expectedCollections), + collPaths, + "collection paths") + }) + } +} + +// TestGet_PreviewLimits_Defaults checks that default values are used when +// making a preview backup if the user didn't provide some options. +// These tests run a reduced set of checks that really just look for item counts +// and such. Other tests are expected to provide more comprehensive checks. +func (suite *LimiterUnitSuite) TestGet_PreviewLimits_Defaults() { + // Add a check that will fail if we make the default smaller than expected. + require.LessOrEqual( + suite.T(), + int64(1024*1024), + defaultPreviewMaxBytes, + "default number of bytes changed; DefaultNumBytes test case may need updating!") + require.Zero( + suite.T(), + defaultPreviewMaxBytes%(1024*1024), + "default number of bytes isn't divisible by 1MB; DefaultNumBytes test case may need updating!") + + // The number of pages returned can be indirectly tested by checking how many + // containers/items were returned. + type expected struct { + numItems int + numContainers int + numItemsPerContainer int + } + + metadataPath, err := path.BuildMetadata( + tenant, + user, + path.OneDriveService, + path.FilesCategory, + false) + require.NoError(suite.T(), err, "making metadata path", clues.ToCore(err)) + + drive1 := models.NewDrive() + drive1.SetId(ptr.To(idx(drive, 1))) + drive1.SetName(ptr.To(namex(drive, 1))) + + // The number of pages the test generates can be controlled by setting the + // number of containers. The test will put one (non-root) container per page. + table := []struct { + name string + numContainers int + numItemsPerContainer int + itemSize int64 + limits control.PreviewItemLimits + expect expected + }{ + { + name: "DefaultNumItems", + numContainers: 1, + numItemsPerContainer: defaultPreviewMaxItems + 1, + limits: control.PreviewItemLimits{ + Enabled: true, + MaxItemsPerContainer: 99999999, + MaxContainers: 99999999, + MaxBytes: 99999999, + MaxPages: 99999999, + }, + expect: expected{ + numItems: defaultPreviewMaxItems, + numContainers: 1, + numItemsPerContainer: defaultPreviewMaxItems, + }, + }, + { + name: "DefaultNumContainers", + numContainers: defaultPreviewMaxContainers + 1, + numItemsPerContainer: 1, + limits: control.PreviewItemLimits{ + Enabled: true, + MaxItems: 99999999, + MaxItemsPerContainer: 99999999, + MaxBytes: 99999999, + MaxPages: 99999999, + }, + expect: expected{ + // Root is counted as a container in the code but won't be counted or + // have items in the test. + numItems: defaultPreviewMaxContainers - 1, + numContainers: defaultPreviewMaxContainers - 1, + numItemsPerContainer: 1, + }, + }, + { + name: "DefaultNumItemsPerContainer", + numContainers: 1, + numItemsPerContainer: defaultPreviewMaxItemsPerContainer + 1, + limits: control.PreviewItemLimits{ + Enabled: true, + MaxItems: 99999999, + MaxContainers: 99999999, + MaxBytes: 99999999, + MaxPages: 99999999, + }, + expect: expected{ + numItems: defaultPreviewMaxItemsPerContainer, + numContainers: 1, + numItemsPerContainer: defaultPreviewMaxItemsPerContainer, + }, + }, + { + name: "DefaultNumPages", + numContainers: defaultPreviewMaxPages + 1, + numItemsPerContainer: 1, + limits: control.PreviewItemLimits{ + Enabled: true, + MaxItems: 99999999, + MaxContainers: 99999999, + MaxItemsPerContainer: 99999999, + MaxBytes: 99999999, + }, + expect: expected{ + numItems: defaultPreviewMaxPages, + numContainers: defaultPreviewMaxPages, + numItemsPerContainer: 1, + }, + }, + { + name: "DefaultNumBytes", + numContainers: 1, + numItemsPerContainer: int(defaultPreviewMaxBytes/1024/1024) + 1, + itemSize: 1024 * 1024, + limits: control.PreviewItemLimits{ + Enabled: true, + MaxItems: 99999999, + MaxContainers: 99999999, + MaxItemsPerContainer: 99999999, + MaxPages: 99999999, + }, + expect: expected{ + numItems: int(defaultPreviewMaxBytes) / 1024 / 1024, + numContainers: 1, + numItemsPerContainer: int(defaultPreviewMaxBytes) / 1024 / 1024, + }, + }, + } + for _, test := range table { + suite.Run(test.name, func() { + t := suite.T() + + ctx, flush := tester.NewContext(t) + defer flush() + + mockDrivePager := &apiMock.Pager[models.Driveable]{ + ToReturn: []apiMock.PagerResult[models.Driveable]{ + {Values: []models.Driveable{drive1}}, + }, + } + + mbh := mock.DefaultOneDriveBH(user) + mbh.DrivePagerV = mockDrivePager + + pages := make([]mock.NextPage, 0, test.numContainers) + + for containerIdx := 0; containerIdx < test.numContainers; containerIdx++ { + page := mock.NextPage{ + Items: []models.DriveItemable{ + driveRootItem(rootID), + driveItem( + idx(folder, containerIdx), + namex(folder, containerIdx), + parent(1), + rootID, + isFolder), + }, + } + + for itemIdx := 0; itemIdx < test.numItemsPerContainer; itemIdx++ { + itemSuffix := fmt.Sprintf("%d-%d", containerIdx, itemIdx) + + page.Items = append(page.Items, driveItemWithSize( + idx(file, itemSuffix), + namex(file, itemSuffix), + parent(1, namex(folder, containerIdx)), + idx(folder, containerIdx), + test.itemSize, + isFile)) + } + + pages = append(pages, page) + } + + mbh.DriveItemEnumeration = mock.EnumerateItemsDeltaByDrive{ + DrivePagers: map[string]*mock.DriveItemsDeltaPager{ + idx(drive, 1): { + Pages: pages, + DeltaUpdate: pagers.DeltaUpdate{URL: id(delta)}, + }, + }, + } + + opts := control.DefaultOptions() + opts.PreviewLimits = test.limits + + c := NewCollections( + mbh, + tenant, + idname.NewProvider(user, user), + func(*support.ControllerOperationStatus) {}, + opts, + count.New()) + + errs := fault.New(true) + + delList := prefixmatcher.NewStringSetBuilder() + + cols, canUsePreviousBackup, err := c.Get(ctx, nil, delList, errs) + require.NoError(t, err, clues.ToCore(err)) + + assert.True(t, canUsePreviousBackup, "can use previous backup") + assert.Empty(t, errs.Skipped()) + + var ( + numContainers int + numItems int + ) + + for _, baseCol := range cols { + // There shouldn't be any deleted collections. + if !assert.NotEqual( + t, + data.DeletedState, + baseCol.State(), + "collection marked deleted") { + continue + } + + folderPath := baseCol.FullPath().String() + + if folderPath == metadataPath.String() { + continue + } + + // Skip the root container and don't count it because we don't put + // anything in it. + dp, err := path.ToDrivePath(baseCol.FullPath()) + require.NoError(t, err, clues.ToCore(err)) + + if len(dp.Folders) == 0 { + continue + } + + numContainers++ + + // TODO: We should really be getting items in the collection + // via the Items() channel. The lack of that makes this check a bit more + // bittle since internal details can change. The wiring to support + // mocked GetItems is available. We just haven't plugged it in yet. + col, ok := baseCol.(*Collection) + require.True(t, ok, "getting onedrive.Collection handle") + + numItems += len(col.driveItems) + + // Add one to account for the folder permissions item. + assert.Len( + t, + col.driveItems, + test.expect.numItemsPerContainer+1, + "items in container %v", + col.FullPath()) + } + + assert.Equal( + t, + test.expect.numContainers, + numContainers, + "total containers") + + // Each container also gets an item so account for that here. + assert.Equal( + t, + test.expect.numItems+test.expect.numContainers, + numItems, + "total items across all containers") + }) + } +} diff --git a/src/internal/m365/service/onedrive/mock/handlers.go b/src/internal/m365/service/onedrive/mock/handlers.go index acffd04af..f46011800 100644 --- a/src/internal/m365/service/onedrive/mock/handlers.go +++ b/src/internal/m365/service/onedrive/mock/handlers.go @@ -15,6 +15,7 @@ import ( "github.com/alcionai/corso/src/pkg/path" "github.com/alcionai/corso/src/pkg/selectors" "github.com/alcionai/corso/src/pkg/services/m365/api" + apiMock "github.com/alcionai/corso/src/pkg/services/m365/api/mock" "github.com/alcionai/corso/src/pkg/services/m365/api/pagers" ) @@ -106,6 +107,18 @@ func DefaultSharePointBH(resourceOwner string) *BackupHandler[models.DriveItemab } } +func DefaultDriveBHWith( + resource string, + drivePager *apiMock.Pager[models.Driveable], + enumerator EnumerateItemsDeltaByDrive, +) *BackupHandler[models.DriveItemable] { + mbh := DefaultOneDriveBH(resource) + mbh.DrivePagerV = drivePager + mbh.DriveItemEnumeration = enumerator + + return mbh +} + func (h BackupHandler[T]) PathPrefix(tID, driveID string) (path.Path, error) { pp, err := h.PathPrefixFn(tID, h.ProtectedResource.ID(), driveID) if err != nil { @@ -121,7 +134,7 @@ func (h BackupHandler[T]) MetadataPathPrefix(tID string) (path.Path, error) { return nil, err } - return pp, h.PathPrefixErr + return pp, h.MetadataPathPrefixErr } func (h BackupHandler[T]) CanonicalPath(pb *path.Builder, tID string) (path.Path, error) { diff --git a/src/pkg/count/testdata/count.go b/src/pkg/count/testdata/count.go new file mode 100644 index 000000000..0f81318b9 --- /dev/null +++ b/src/pkg/count/testdata/count.go @@ -0,0 +1,30 @@ +package testdata + +import ( + "testing" + + "github.com/stretchr/testify/assert" + + "github.com/alcionai/corso/src/pkg/count" +) + +type Expected map[count.Key]int64 + +func (e Expected) Compare( + t *testing.T, + bus *count.Bus, +) { + vs := bus.Values() + results := map[count.Key]int64{} + + for k := range e { + results[k] = bus.Get(k) + delete(vs, string(k)) + } + + for k, v := range vs { + t.Logf("unchecked count %q: %d", k, v) + } + + assert.Equal(t, e, Expected(results)) +}