From bcf290d628ce72c4625498f73a83373242855bdf Mon Sep 17 00:00:00 2001 From: ashmrtn <3891298+ashmrtn@users.noreply.github.com> Date: Fri, 17 Nov 2023 16:46:36 -0800 Subject: [PATCH] Preview backup for drive-backed services (#4699) Add logic and tests for preview backups in drive-backed services. Does slightly change a few of the options for preview backup limits --- #### Does this PR need a docs update or release note? - [ ] :white_check_mark: Yes, it's included - [ ] :clock1: Yes, but in a later PR - [x] :no_entry: No #### Type of change - [x] :sunflower: Feature - [ ] :bug: Bugfix - [ ] :world_map: Documentation - [ ] :robot: Supportability/Tests - [ ] :computer: CI/Deployment - [ ] :broom: Tech Debt/Cleanup #### Test Plan - [ ] :muscle: Manual - [x] :zap: Unit test - [ ] :green_heart: E2E --- .../m365/collection/drive/collection.go | 16 + .../m365/collection/drive/collections.go | 171 ++++- .../m365/collection/drive/collections_test.go | 709 ++++++++++++++++++ .../m365/service/onedrive/mock/handlers.go | 2 + src/pkg/control/options.go | 3 +- src/pkg/services/m365/api/pagers/pagers.go | 1 + 6 files changed, 900 insertions(+), 2 deletions(-) diff --git a/src/internal/m365/collection/drive/collection.go b/src/internal/m365/collection/drive/collection.go index b416648aa..6375a71b1 100644 --- a/src/internal/m365/collection/drive/collection.go +++ b/src/internal/m365/collection/drive/collection.go @@ -215,6 +215,22 @@ func (oc *Collection) IsEmpty() bool { return len(oc.driveItems) == 0 } +// ContainsItem returns true if the collection has the given item as one of its +// children. +func (oc Collection) ContainsItem(item models.DriveItemable) bool { + _, ok := oc.driveItems[ptr.Val(item.GetId())] + return ok +} + +// AddedItems returns the number of non-deleted items in the collection. +func (oc Collection) CountAddedItems() int { + // Subtract one since the folder is added to the collection so we get folder + // metadata. The collection of the root folder of the drive doesn't have its + // own folder reference since it doesn't have permissions the user can change, + // but it's close enough for our purposes. + return len(oc.driveItems) - 1 +} + // Items() returns the channel containing M365 Exchange objects func (oc *Collection) Items( ctx context.Context, diff --git a/src/internal/m365/collection/drive/collections.go b/src/internal/m365/collection/drive/collections.go index ce93a9fa8..dbbb7761b 100644 --- a/src/internal/m365/collection/drive/collections.go +++ b/src/internal/m365/collection/drive/collections.go @@ -29,7 +29,15 @@ import ( "github.com/alcionai/corso/src/pkg/services/m365/api/pagers" ) -const restrictedDirectory = "Site Pages" +const ( + restrictedDirectory = "Site Pages" + + defaultPreviewNumContainers = 5 + defaultPreviewNumItemsPerContainer = 10 + defaultPreviewNumItems = defaultPreviewNumContainers * defaultPreviewNumItemsPerContainer + defaultPreviewNumBytes int64 = 100 * 1024 * 1024 + defaultPreviewNumPages = 50 +) // Collections is used to retrieve drive data for a // resource owner, which can be either a user or a sharepoint site. @@ -742,6 +750,83 @@ func (c *Collections) getCollectionPath( return collectionPath, nil } +type driveEnumerationStats struct { + numPages int + numAddedFiles int + numContainers int + numBytes int64 +} + +func newPagerLimiter(opts control.Options) *pagerLimiter { + res := &pagerLimiter{limits: opts.PreviewLimits} + + if res.limits.MaxContainers == 0 { + res.limits.MaxContainers = defaultPreviewNumContainers + } + + if res.limits.MaxItemsPerContainer == 0 { + res.limits.MaxItemsPerContainer = defaultPreviewNumItemsPerContainer + } + + if res.limits.MaxItems == 0 { + res.limits.MaxItems = defaultPreviewNumItems + } + + if res.limits.MaxBytes == 0 { + res.limits.MaxBytes = defaultPreviewNumBytes + } + + if res.limits.MaxPages == 0 { + res.limits.MaxPages = defaultPreviewNumPages + } + + return res +} + +type pagerLimiter struct { + limits control.PreviewItemLimits +} + +func (l pagerLimiter) enabled() bool { + return l.limits.Enabled +} + +// sizeLimit returns the total number of bytes this backup should try to +// contain. +func (l pagerLimiter) sizeLimit() int64 { + return l.limits.MaxBytes +} + +// atItemLimit returns true if the limiter is enabled and has reached the limit +// for individual items added to collections for this backup. +func (l pagerLimiter) atItemLimit(stats *driveEnumerationStats) bool { + return l.enabled() && + (stats.numAddedFiles >= l.limits.MaxItems || + stats.numBytes >= l.limits.MaxBytes) +} + +// atContainerItemsLimit returns true if the limiter is enabled and the current +// number of items is above the limit for the number of items for a container +// for this backup. +func (l pagerLimiter) atContainerItemsLimit(numItems int) bool { + return l.enabled() && numItems >= l.limits.MaxItemsPerContainer +} + +// atContainerPageLimit returns true if the limiter is enabled and the number of +// pages processed so far is beyond the limit for this backup. +func (l pagerLimiter) atPageLimit(stats *driveEnumerationStats) bool { + return l.enabled() && stats.numPages >= l.limits.MaxPages +} + +// atLimit returns true if the limiter is enabled and meets any of the +// conditions for max items, containers, etc for this backup. +func (l pagerLimiter) atLimit(stats *driveEnumerationStats) bool { + return l.enabled() && + (l.atItemLimit(stats) || + stats.numContainers >= l.limits.MaxContainers || + stats.numPages >= l.limits.MaxPages) +} + // PopulateDriveCollections initializes and adds the provided drive items to Collections // A new collection is created for every drive folder. // Along with populating the collection items and updating the excluded item IDs, this func @@ -772,9 +857,13 @@ func (c *Collections) PopulateDriveCollections( // already seen. This will help us track in case a folder was // recreated multiple times in between a run. seenFolders = map[string]string{} + + limiter = newPagerLimiter(c.ctrl) + stats = &driveEnumerationStats{} ) ctx = clues.Add(ctx, "invalid_prev_delta", invalidPrevDelta) + logger.Ctx(ctx).Infow("running backup with limiter", "limiter", limiter) if !invalidPrevDelta { maps.Copy(newPrevPaths, oldPrevPaths) @@ -788,6 +877,12 @@ func (c *Collections) PopulateDriveCollections( Select: api.DefaultDriveItemProps(), }) + // Needed since folders are mixed in with items. This allows us to handle + // hitting the maxContainer limit while (hopefully) still adding items to the + // container we reached the limit on. It may not behave as expected across + // page boundaries if items in other folders have also changed. + var lastContainerID string + for page, reset, done := pager.NextPage(); !done; page, reset, done = pager.NextPage() { if el.Failure() != nil { break @@ -805,7 +900,10 @@ func (c *Collections) PopulateDriveCollections( c.CollectionMap[driveID] = map[string]*Collection{} invalidPrevDelta = true + // Reset collections and stats counts since we're starting over. c.resetStats() + + stats = &driveEnumerationStats{} } for _, item := range page { @@ -813,6 +911,24 @@ func (c *Collections) PopulateDriveCollections( break } + // Check if we got the max number of containers we're looking for and also + // processed items for the final container. + if item.GetFolder() != nil || item.GetPackageEscaped() != nil { + id := ptr.Val(item.GetId()) + + // Don't check for containers we've already seen. + if _, ok := c.CollectionMap[driveID][id]; !ok { + if id != lastContainerID { + if limiter.atLimit(stats) { + break + } + + lastContainerID = id + stats.numContainers++ + } + } + } + err := c.processItem( ctx, item, @@ -826,13 +942,45 @@ func (c *Collections) PopulateDriveCollections( topLevelPackages, invalidPrevDelta, counter, + stats, + limiter, el) if err != nil { el.AddRecoverable(ctx, clues.Stack(err)) } + + // Check if we reached the item or size limit while processing this page. + // The check after this loop will get us out of the pager. + // We don't want to check all limits because it's possible we've reached + // the container limit but haven't reached the item limit or really added + // items to the last container we found. + if limiter.atItemLimit(stats) { + break + } + } + + stats.numPages++ + + // Stop enumeration early if we've reached the item or page limit. Do this + // at the end of the loop so we don't request another page in the + // background. + // + // We don't want to break on just the container limit here because it's + // possible that there's more items in the current (final) container that + // we're processing. We need to see the next page to determine if we've + // reached the end of the container. Note that this doesn't take into + // account the number of items in the current container, so it's possible it + // will fetch more data when it doesn't really need to. + if limiter.atPageLimit(stats) || limiter.atItemLimit(stats) { + break } } + // Always cancel the pager so that even if we exit early from the loop above + // we don't deadlock. Cancelling a pager that's already completed is + // essentially a noop. + pager.Cancel() + du, err := pager.Results() if err != nil { return du, nil, clues.Stack(err) @@ -853,6 +1001,8 @@ func (c *Collections) processItem( topLevelPackages map[string]struct{}, invalidPrevDelta bool, counter *count.Bus, + stats *driveEnumerationStats, + limiter *pagerLimiter, skipper fault.AddSkipper, ) error { var ( @@ -1037,6 +1187,23 @@ func (c *Collections) processItem( return clues.NewWC(ctx, "item seen before parent folder").Label(count.ItemBeforeParent) } + // Don't move items if the new collection's already reached it's limit. This + // helps ensure we don't get some pathological case where we end up dropping + // a bunch of items that got moved. + // + // We need to check if the collection already contains the item though since + // it could be an item update instead of a move. + if !collection.ContainsItem(item) && + limiter.atContainerItemsLimit(collection.CountAddedItems()) { + return nil + } + + // Skip large files that don't fit within the size limit. + if limiter.enabled() && + limiter.sizeLimit() < ptr.Val(item.GetSize())+stats.numBytes { + return nil + } + // This will only kick in if the file was moved multiple times // within a single delta query. We delete the file from the previous // collection so that it doesn't appear in two places. @@ -1061,6 +1228,8 @@ func (c *Collections) processItem( if collection.Add(item) && !alreadyAdded { c.NumItems++ c.NumFiles++ + stats.numAddedFiles++ + stats.numBytes += ptr.Val(item.GetSize()) } // Do this after adding the file to the collection so if we fail to add diff --git a/src/internal/m365/collection/drive/collections_test.go b/src/internal/m365/collection/drive/collections_test.go index 4065b39d3..277da771e 100644 --- a/src/internal/m365/collection/drive/collections_test.go +++ b/src/internal/m365/collection/drive/collections_test.go @@ -129,6 +129,17 @@ func driveItem( return coreItem(id, name, parentPath, parentID, it) } +func driveItemWithSize( + id, name, parentPath, parentID string, + size int64, + it itemType, +) models.DriveItemable { + res := coreItem(id, name, parentPath, parentID, it) + res.SetSize(ptr.To(size)) + + return res +} + func fileItem( id, name, parentPath, parentID, url string, deleted bool, @@ -3553,6 +3564,704 @@ func (suite *CollectionsUnitSuite) TestGet() { } } +// TestGet_PreviewLimits checks that the limits set for preview backups in +// control.Options.ItemLimits are respected. These tests run a reduced set of +// checks that don't examine metadata, collection states, etc. They really just +// check the expected items appear. +func (suite *CollectionsUnitSuite) TestGet_PreviewLimits() { + metadataPath, err := path.BuildMetadata( + tenant, + user, + path.OneDriveService, + path.FilesCategory, + false) + require.NoError(suite.T(), err, "making metadata path", clues.ToCore(err)) + + drive1 := models.NewDrive() + drive1.SetId(ptr.To(idx(drive, 1))) + drive1.SetName(ptr.To(namex(drive, 1))) + + drive2 := models.NewDrive() + drive2.SetId(ptr.To(idx(drive, 2))) + drive2.SetName(ptr.To(namex(drive, 2))) + + table := []struct { + name string + limits control.PreviewItemLimits + drives []models.Driveable + enumerator mock.EnumerateItemsDeltaByDrive + // Collection name -> set of item IDs. We can't check item data because + // that's not mocked out. Metadata is checked separately. + expectedCollections map[string][]string + }{ + { + name: "OneDrive SinglePage ExcludeItemsOverMaxSize", + limits: control.PreviewItemLimits{ + Enabled: true, + MaxItems: 999, + MaxItemsPerContainer: 999, + MaxContainers: 999, + MaxBytes: 5, + MaxPages: 999, + }, + drives: []models.Driveable{drive1}, + enumerator: mock.EnumerateItemsDeltaByDrive{ + DrivePagers: map[string]*mock.DriveItemsDeltaPager{ + idx(drive, 1): { + Pages: []mock.NextPage{{ + Items: []models.DriveItemable{ + driveRootItem(rootID), // will be present, not needed + driveItemWithSize(idx(file, 1), namex(file, 1), parent(1), rootID, 7, isFile), + driveItemWithSize(idx(file, 2), namex(file, 2), parent(1), rootID, 1, isFile), + driveItemWithSize(idx(file, 3), namex(file, 3), parent(1), rootID, 1, isFile), + }, + }}, + DeltaUpdate: pagers.DeltaUpdate{URL: id(delta)}, + }, + }, + }, + expectedCollections: map[string][]string{ + fullPath(1): {idx(file, 2), idx(file, 3)}, + }, + }, + { + name: "OneDrive SinglePage SingleFolder ExcludeCombinedItemsOverMaxSize", + limits: control.PreviewItemLimits{ + Enabled: true, + MaxItems: 999, + MaxItemsPerContainer: 999, + MaxContainers: 999, + MaxBytes: 3, + MaxPages: 999, + }, + drives: []models.Driveable{drive1}, + enumerator: mock.EnumerateItemsDeltaByDrive{ + DrivePagers: map[string]*mock.DriveItemsDeltaPager{ + idx(drive, 1): { + Pages: []mock.NextPage{{ + Items: []models.DriveItemable{ + driveRootItem(rootID), // will be present, not needed + driveItemWithSize(idx(file, 1), namex(file, 1), parent(1), rootID, 1, isFile), + driveItemWithSize(idx(file, 2), namex(file, 2), parent(1), rootID, 2, isFile), + driveItemWithSize(idx(file, 3), namex(file, 3), parent(1), rootID, 1, isFile), + }, + }}, + DeltaUpdate: pagers.DeltaUpdate{URL: id(delta)}, + }, + }, + }, + expectedCollections: map[string][]string{ + fullPath(1): {idx(file, 1), idx(file, 2)}, + }, + }, + { + name: "OneDrive SinglePage MultipleFolders ExcludeCombinedItemsOverMaxSize", + limits: control.PreviewItemLimits{ + Enabled: true, + MaxItems: 999, + MaxItemsPerContainer: 999, + MaxContainers: 999, + MaxBytes: 3, + MaxPages: 999, + }, + drives: []models.Driveable{drive1}, + enumerator: mock.EnumerateItemsDeltaByDrive{ + DrivePagers: map[string]*mock.DriveItemsDeltaPager{ + idx(drive, 1): { + Pages: []mock.NextPage{{ + Items: []models.DriveItemable{ + driveRootItem(rootID), // will be present, not needed + driveItemWithSize(idx(file, 1), namex(file, 1), parent(1), rootID, 1, isFile), + driveItemWithSize(idx(folder, 1), namex(folder, 1), parent(1), rootID, 1, isFolder), + driveItemWithSize(idx(file, 2), namex(file, 2), parent(1, namex(folder, 1)), idx(folder, 1), 2, isFile), + driveItemWithSize(idx(file, 3), namex(file, 3), parent(1, namex(folder, 1)), idx(folder, 1), 1, isFile), + }, + }}, + DeltaUpdate: pagers.DeltaUpdate{URL: id(delta)}, + }, + }, + }, + expectedCollections: map[string][]string{ + fullPath(1): {idx(file, 1)}, + fullPath(1, namex(folder, 1)): {idx(folder, 1), idx(file, 2)}, + }, + }, + { + name: "OneDrive SinglePage SingleFolder ItemLimit", + limits: control.PreviewItemLimits{ + Enabled: true, + MaxItems: 3, + MaxItemsPerContainer: 999, + MaxContainers: 999, + MaxBytes: 999999, + MaxPages: 999, + }, + drives: []models.Driveable{drive1}, + enumerator: mock.EnumerateItemsDeltaByDrive{ + DrivePagers: map[string]*mock.DriveItemsDeltaPager{ + idx(drive, 1): { + Pages: []mock.NextPage{{ + Items: []models.DriveItemable{ + driveRootItem(rootID), // will be present, not needed + driveItem(idx(file, 1), namex(file, 1), parent(1), rootID, isFile), + driveItem(idx(file, 2), namex(file, 2), parent(1), rootID, isFile), + driveItem(idx(file, 3), namex(file, 3), parent(1), rootID, isFile), + driveItem(idx(file, 4), namex(file, 4), parent(1), rootID, isFile), + driveItem(idx(file, 5), namex(file, 5), parent(1), rootID, isFile), + driveItem(idx(file, 6), namex(file, 6), parent(1), rootID, isFile), + }, + }}, + DeltaUpdate: pagers.DeltaUpdate{URL: id(delta)}, + }, + }, + }, + expectedCollections: map[string][]string{ + fullPath(1): {idx(file, 1), idx(file, 2), idx(file, 3)}, + }, + }, + { + name: "OneDrive MultiplePages MultipleFolders ItemLimit WithRepeatedItem", + limits: control.PreviewItemLimits{ + Enabled: true, + MaxItems: 3, + MaxItemsPerContainer: 999, + MaxContainers: 999, + MaxBytes: 999999, + MaxPages: 999, + }, + drives: []models.Driveable{drive1}, + enumerator: mock.EnumerateItemsDeltaByDrive{ + DrivePagers: map[string]*mock.DriveItemsDeltaPager{ + idx(drive, 1): { + Pages: []mock.NextPage{ + { + Items: []models.DriveItemable{ + driveRootItem(rootID), // will be present, not needed + driveItem(idx(file, 1), namex(file, 1), parent(1), rootID, isFile), + driveItem(idx(file, 2), namex(file, 2), parent(1), rootID, isFile), + }, + }, + { + Items: []models.DriveItemable{ + // Repeated items shouldn't count against the limit. + driveItem(idx(file, 1), namex(file, 1), parent(1), rootID, isFile), + driveItem(idx(folder, 1), namex(folder, 1), parent(1), rootID, isFolder), + driveItem(idx(file, 3), namex(file, 3), parent(1, namex(folder, 1)), idx(folder, 1), isFile), + driveItem(idx(file, 4), namex(file, 4), parent(1, namex(folder, 1)), idx(folder, 1), isFile), + driveItem(idx(file, 5), namex(file, 5), parent(1, namex(folder, 1)), idx(folder, 1), isFile), + driveItem(idx(file, 6), namex(file, 6), parent(1, namex(folder, 1)), idx(folder, 1), isFile), + }, + }, + }, + DeltaUpdate: pagers.DeltaUpdate{URL: id(delta)}, + }, + }, + }, + expectedCollections: map[string][]string{ + fullPath(1): {idx(file, 1), idx(file, 2)}, + fullPath(1, namex(folder, 1)): {idx(folder, 1), idx(file, 3)}, + }, + }, + { + name: "OneDrive MultiplePages PageLimit", + limits: control.PreviewItemLimits{ + Enabled: true, + MaxItems: 999, + MaxItemsPerContainer: 999, + MaxContainers: 999, + MaxBytes: 999999, + MaxPages: 1, + }, + drives: []models.Driveable{drive1}, + enumerator: mock.EnumerateItemsDeltaByDrive{ + DrivePagers: map[string]*mock.DriveItemsDeltaPager{ + idx(drive, 1): { + Pages: []mock.NextPage{ + { + Items: []models.DriveItemable{ + driveRootItem(rootID), // will be present, not needed + driveItem(idx(file, 1), namex(file, 1), parent(1), rootID, isFile), + driveItem(idx(file, 2), namex(file, 2), parent(1), rootID, isFile), + }, + }, + { + Items: []models.DriveItemable{ + driveRootItem(rootID), // will be present, not needed + driveItem(idx(folder, 1), namex(folder, 1), parent(1), rootID, isFolder), + driveItem(idx(file, 3), namex(file, 3), parent(1, namex(folder, 1)), idx(folder, 1), isFile), + driveItem(idx(file, 4), namex(file, 4), parent(1, namex(folder, 1)), idx(folder, 1), isFile), + driveItem(idx(file, 5), namex(file, 5), parent(1, namex(folder, 1)), idx(folder, 1), isFile), + driveItem(idx(file, 6), namex(file, 6), parent(1, namex(folder, 1)), idx(folder, 1), isFile), + }, + }, + }, + DeltaUpdate: pagers.DeltaUpdate{URL: id(delta)}, + }, + }, + }, + expectedCollections: map[string][]string{ + fullPath(1): {idx(file, 1), idx(file, 2)}, + }, + }, + { + name: "OneDrive MultiplePages PerContainerItemLimit", + limits: control.PreviewItemLimits{ + Enabled: true, + MaxItems: 999, + MaxItemsPerContainer: 1, + MaxContainers: 999, + MaxBytes: 999999, + MaxPages: 999, + }, + drives: []models.Driveable{drive1}, + enumerator: mock.EnumerateItemsDeltaByDrive{ + DrivePagers: map[string]*mock.DriveItemsDeltaPager{ + idx(drive, 1): { + Pages: []mock.NextPage{ + { + Items: []models.DriveItemable{ + driveRootItem(rootID), // will be present, not needed + driveItem(idx(file, 1), namex(file, 1), parent(1), rootID, isFile), + driveItem(idx(file, 2), namex(file, 2), parent(1), rootID, isFile), + driveItem(idx(file, 3), namex(file, 3), parent(1), rootID, isFile), + }, + }, + { + Items: []models.DriveItemable{ + driveRootItem(rootID), // will be present, not needed + driveItem(idx(folder, 1), namex(folder, 1), parent(1), rootID, isFolder), + driveItem(idx(file, 4), namex(file, 4), parent(1, namex(folder, 1)), idx(folder, 1), isFile), + driveItem(idx(file, 5), namex(file, 5), parent(1, namex(folder, 1)), idx(folder, 1), isFile), + }, + }, + }, + DeltaUpdate: pagers.DeltaUpdate{URL: id(delta)}, + }, + }, + }, + expectedCollections: map[string][]string{ + // Root has an additional item. It's hard to fix that in the code + // though. + fullPath(1): {idx(file, 1), idx(file, 2)}, + fullPath(1, namex(folder, 1)): {idx(folder, 1), idx(file, 4)}, + }, + }, + { + name: "OneDrive MultiplePages PerContainerItemLimit ItemUpdated", + limits: control.PreviewItemLimits{ + Enabled: true, + MaxItems: 999, + MaxItemsPerContainer: 3, + MaxContainers: 999, + MaxBytes: 999999, + MaxPages: 999, + }, + drives: []models.Driveable{drive1}, + enumerator: mock.EnumerateItemsDeltaByDrive{ + DrivePagers: map[string]*mock.DriveItemsDeltaPager{ + idx(drive, 1): { + Pages: []mock.NextPage{ + { + Items: []models.DriveItemable{ + driveRootItem(rootID), // will be present, not needed + driveItem(idx(folder, 0), namex(folder, 0), parent(1), rootID, isFolder), + driveItem(idx(file, 1), namex(file, 1), parent(1, namex(folder, 0)), idx(folder, 0), isFile), + driveItem(idx(file, 2), namex(file, 2), parent(1, namex(folder, 0)), idx(folder, 0), isFile), + }, + }, + { + Items: []models.DriveItemable{ + driveRootItem(rootID), // will be present, not needed + driveItem(idx(folder, 0), namex(folder, 0), parent(1), rootID, isFolder), + // Updated item that shouldn't count against the limit a second time. + driveItem(idx(file, 2), namex(file, 2), parent(1, namex(folder, 0)), idx(folder, 0), isFile), + driveItem(idx(file, 3), namex(file, 3), parent(1, namex(folder, 0)), idx(folder, 0), isFile), + driveItem(idx(file, 4), namex(file, 4), parent(1, namex(folder, 0)), idx(folder, 0), isFile), + }, + }, + }, + DeltaUpdate: pagers.DeltaUpdate{URL: id(delta)}, + }, + }, + }, + expectedCollections: map[string][]string{ + fullPath(1): {}, + fullPath(1, namex(folder, 0)): {idx(folder, 0), idx(file, 1), idx(file, 2), idx(file, 3)}, + }, + }, + { + name: "OneDrive MultiplePages PerContainerItemLimit MoveItemBetweenFolders", + limits: control.PreviewItemLimits{ + Enabled: true, + MaxItems: 999, + MaxItemsPerContainer: 2, + MaxContainers: 999, + MaxBytes: 999999, + MaxPages: 999, + }, + drives: []models.Driveable{drive1}, + enumerator: mock.EnumerateItemsDeltaByDrive{ + DrivePagers: map[string]*mock.DriveItemsDeltaPager{ + idx(drive, 1): { + Pages: []mock.NextPage{ + { + Items: []models.DriveItemable{ + driveRootItem(rootID), // will be present, not needed + driveItem(idx(file, 1), namex(file, 1), parent(1), rootID, isFile), + driveItem(idx(file, 2), namex(file, 2), parent(1), rootID, isFile), + // Put folder 0 at limit. + driveItem(idx(folder, 0), namex(folder, 0), parent(1), rootID, isFolder), + driveItem(idx(file, 3), namex(file, 3), parent(1, namex(folder, 0)), idx(folder, 0), isFile), + driveItem(idx(file, 4), namex(file, 4), parent(1, namex(folder, 0)), idx(folder, 0), isFile), + }, + }, + { + Items: []models.DriveItemable{ + driveRootItem(rootID), // will be present, not needed + driveItem(idx(folder, 0), namex(folder, 0), parent(1), rootID, isFolder), + // Try to move item from root to folder 0 which is already at the limit. + driveItem(idx(file, 1), namex(file, 1), parent(1, namex(folder, 0)), idx(folder, 0), isFile), + }, + }, + }, + DeltaUpdate: pagers.DeltaUpdate{URL: id(delta)}, + }, + }, + }, + expectedCollections: map[string][]string{ + fullPath(1): {idx(file, 1), idx(file, 2)}, + fullPath(1, namex(folder, 0)): {idx(folder, 0), idx(file, 3), idx(file, 4)}, + }, + }, + { + name: "OneDrive MultiplePages ContainerLimit LastContainerSplitAcrossPages", + limits: control.PreviewItemLimits{ + Enabled: true, + MaxItems: 999, + MaxItemsPerContainer: 999, + MaxContainers: 2, + MaxBytes: 999999, + MaxPages: 999, + }, + drives: []models.Driveable{drive1}, + enumerator: mock.EnumerateItemsDeltaByDrive{ + DrivePagers: map[string]*mock.DriveItemsDeltaPager{ + idx(drive, 1): { + Pages: []mock.NextPage{ + { + Items: []models.DriveItemable{ + driveRootItem(rootID), // will be present, not needed + driveItem(idx(file, 1), namex(file, 1), parent(1), rootID, isFile), + driveItem(idx(file, 2), namex(file, 2), parent(1), rootID, isFile), + driveItem(idx(file, 3), namex(file, 3), parent(1), rootID, isFile), + }, + }, + { + Items: []models.DriveItemable{ + driveRootItem(rootID), // will be present, not needed + driveItem(idx(folder, 1), namex(folder, 1), parent(1), rootID, isFolder), + driveItem(idx(file, 4), namex(file, 4), parent(1, namex(folder, 1)), idx(folder, 1), isFile), + }, + }, + { + Items: []models.DriveItemable{ + driveRootItem(rootID), // will be present, not needed + driveItem(idx(folder, 1), namex(folder, 1), parent(1), rootID, isFolder), + driveItem(idx(file, 5), namex(file, 5), parent(1, namex(folder, 1)), idx(folder, 1), isFile), + }, + }, + }, + DeltaUpdate: pagers.DeltaUpdate{URL: id(delta)}, + }, + }, + }, + expectedCollections: map[string][]string{ + fullPath(1): {idx(file, 1), idx(file, 2), idx(file, 3)}, + fullPath(1, namex(folder, 1)): {idx(folder, 1), idx(file, 4), idx(file, 5)}, + }, + }, + { + name: "OneDrive MultiplePages ContainerLimit NextContainerOnSamePage", + limits: control.PreviewItemLimits{ + Enabled: true, + MaxItems: 999, + MaxItemsPerContainer: 999, + MaxContainers: 2, + MaxBytes: 999999, + MaxPages: 999, + }, + drives: []models.Driveable{drive1}, + enumerator: mock.EnumerateItemsDeltaByDrive{ + DrivePagers: map[string]*mock.DriveItemsDeltaPager{ + idx(drive, 1): { + Pages: []mock.NextPage{ + { + Items: []models.DriveItemable{ + driveRootItem(rootID), // will be present, not needed + driveItem(idx(file, 1), namex(file, 1), parent(1), rootID, isFile), + driveItem(idx(file, 2), namex(file, 2), parent(1), rootID, isFile), + driveItem(idx(file, 3), namex(file, 3), parent(1), rootID, isFile), + }, + }, + { + Items: []models.DriveItemable{ + driveRootItem(rootID), // will be present, not needed + driveItem(idx(folder, 1), namex(folder, 1), parent(1), rootID, isFolder), + driveItem(idx(file, 4), namex(file, 4), parent(1, namex(folder, 1)), idx(folder, 1), isFile), + driveItem(idx(file, 5), namex(file, 5), parent(1, namex(folder, 1)), idx(folder, 1), isFile), + // This container shouldn't be returned. + driveItem(idx(folder, 2), namex(folder, 2), parent(1), rootID, isFolder), + driveItem(idx(file, 7), namex(file, 7), parent(1, namex(folder, 2)), idx(folder, 2), isFile), + driveItem(idx(file, 8), namex(file, 8), parent(1, namex(folder, 2)), idx(folder, 2), isFile), + driveItem(idx(file, 9), namex(file, 9), parent(1, namex(folder, 2)), idx(folder, 2), isFile), + }, + }, + }, + DeltaUpdate: pagers.DeltaUpdate{URL: id(delta)}, + }, + }, + }, + expectedCollections: map[string][]string{ + fullPath(1): {idx(file, 1), idx(file, 2), idx(file, 3)}, + fullPath(1, namex(folder, 1)): {idx(folder, 1), idx(file, 4), idx(file, 5)}, + }, + }, + { + name: "OneDrive MultiplePages ContainerLimit NextContainerOnNextPage", + limits: control.PreviewItemLimits{ + Enabled: true, + MaxItems: 999, + MaxItemsPerContainer: 999, + MaxContainers: 2, + MaxBytes: 999999, + MaxPages: 999, + }, + drives: []models.Driveable{drive1}, + enumerator: mock.EnumerateItemsDeltaByDrive{ + DrivePagers: map[string]*mock.DriveItemsDeltaPager{ + idx(drive, 1): { + Pages: []mock.NextPage{ + { + Items: []models.DriveItemable{ + driveRootItem(rootID), // will be present, not needed + driveItem(idx(file, 1), namex(file, 1), parent(1), rootID, isFile), + driveItem(idx(file, 2), namex(file, 2), parent(1), rootID, isFile), + driveItem(idx(file, 3), namex(file, 3), parent(1), rootID, isFile), + }, + }, + { + Items: []models.DriveItemable{ + driveRootItem(rootID), // will be present, not needed + driveItem(idx(folder, 1), namex(folder, 1), parent(1), rootID, isFolder), + driveItem(idx(file, 4), namex(file, 4), parent(1, namex(folder, 1)), idx(folder, 1), isFile), + driveItem(idx(file, 5), namex(file, 5), parent(1, namex(folder, 1)), idx(folder, 1), isFile), + }, + }, + { + Items: []models.DriveItemable{ + driveRootItem(rootID), // will be present, not needed + // This container shouldn't be returned. + driveItem(idx(folder, 2), namex(folder, 2), parent(1), rootID, isFolder), + driveItem(idx(file, 7), namex(file, 7), parent(1, namex(folder, 2)), idx(folder, 2), isFile), + driveItem(idx(file, 8), namex(file, 8), parent(1, namex(folder, 2)), idx(folder, 2), isFile), + driveItem(idx(file, 9), namex(file, 9), parent(1, namex(folder, 2)), idx(folder, 2), isFile), + }, + }, + }, + DeltaUpdate: pagers.DeltaUpdate{URL: id(delta)}, + }, + }, + }, + expectedCollections: map[string][]string{ + fullPath(1): {idx(file, 1), idx(file, 2), idx(file, 3)}, + fullPath(1, namex(folder, 1)): {idx(folder, 1), idx(file, 4), idx(file, 5)}, + }, + }, + { + name: "TwoDrives SeparateLimitAccounting", + limits: control.PreviewItemLimits{ + Enabled: true, + MaxItems: 3, + MaxItemsPerContainer: 999, + MaxContainers: 999, + MaxBytes: 999999, + MaxPages: 999, + }, + drives: []models.Driveable{drive1, drive2}, + enumerator: mock.EnumerateItemsDeltaByDrive{ + DrivePagers: map[string]*mock.DriveItemsDeltaPager{ + idx(drive, 1): { + Pages: []mock.NextPage{ + { + Items: []models.DriveItemable{ + driveRootItem(rootID), // will be present, not needed + driveItem(idx(file, 1), namex(file, 1), parent(1), rootID, isFile), + driveItem(idx(file, 2), namex(file, 2), parent(1), rootID, isFile), + driveItem(idx(file, 3), namex(file, 3), parent(1), rootID, isFile), + driveItem(idx(file, 4), namex(file, 4), parent(1), rootID, isFile), + driveItem(idx(file, 5), namex(file, 5), parent(1), rootID, isFile), + }, + }, + }, + DeltaUpdate: pagers.DeltaUpdate{URL: id(delta)}, + }, + idx(drive, 2): { + Pages: []mock.NextPage{ + { + Items: []models.DriveItemable{ + driveRootItem(rootID), // will be present, not needed + driveItem(idx(file, 1), namex(file, 1), parent(2), rootID, isFile), + driveItem(idx(file, 2), namex(file, 2), parent(2), rootID, isFile), + driveItem(idx(file, 3), namex(file, 3), parent(2), rootID, isFile), + driveItem(idx(file, 4), namex(file, 4), parent(2), rootID, isFile), + driveItem(idx(file, 5), namex(file, 5), parent(2), rootID, isFile), + }, + }, + }, + DeltaUpdate: pagers.DeltaUpdate{URL: id(delta)}, + }, + }, + }, + expectedCollections: map[string][]string{ + fullPath(1): {idx(file, 1), idx(file, 2), idx(file, 3)}, + fullPath(2): {idx(file, 1), idx(file, 2), idx(file, 3)}, + }, + }, + { + name: "OneDrive PreviewDisabled MinimumLimitsIgnored", + limits: control.PreviewItemLimits{ + MaxItems: 1, + MaxItemsPerContainer: 1, + MaxContainers: 1, + MaxBytes: 1, + MaxPages: 1, + }, + drives: []models.Driveable{drive1}, + enumerator: mock.EnumerateItemsDeltaByDrive{ + DrivePagers: map[string]*mock.DriveItemsDeltaPager{ + idx(drive, 1): { + Pages: []mock.NextPage{ + { + Items: []models.DriveItemable{ + driveRootItem(rootID), // will be present, not needed + driveItem(idx(file, 1), namex(file, 1), parent(1), rootID, isFile), + driveItem(idx(file, 2), namex(file, 2), parent(1), rootID, isFile), + driveItem(idx(file, 3), namex(file, 3), parent(1), rootID, isFile), + }, + }, + { + Items: []models.DriveItemable{ + driveRootItem(rootID), // will be present, not needed + driveItem(idx(folder, 1), namex(folder, 1), parent(1), rootID, isFolder), + driveItem(idx(file, 4), namex(file, 4), parent(1, namex(folder, 1)), idx(folder, 1), isFile), + }, + }, + { + Items: []models.DriveItemable{ + driveRootItem(rootID), // will be present, not needed + driveItem(idx(folder, 1), namex(folder, 1), parent(1), rootID, isFolder), + driveItem(idx(file, 5), namex(file, 5), parent(1, namex(folder, 1)), idx(folder, 1), isFile), + }, + }, + }, + DeltaUpdate: pagers.DeltaUpdate{URL: id(delta)}, + }, + }, + }, + expectedCollections: map[string][]string{ + fullPath(1): {idx(file, 1), idx(file, 2), idx(file, 3)}, + fullPath(1, namex(folder, 1)): {idx(folder, 1), idx(file, 4), idx(file, 5)}, + }, + }, + } + for _, test := range table { + suite.Run(test.name, func() { + t := suite.T() + + ctx, flush := tester.NewContext(t) + defer flush() + + mockDrivePager := &apiMock.Pager[models.Driveable]{ + ToReturn: []apiMock.PagerResult[models.Driveable]{ + {Values: test.drives}, + }, + } + + mbh := mock.DefaultOneDriveBH(user) + mbh.DrivePagerV = mockDrivePager + mbh.DriveItemEnumeration = test.enumerator + + opts := control.DefaultOptions() + opts.PreviewLimits = test.limits + + c := NewCollections( + mbh, + tenant, + idname.NewProvider(user, user), + func(*support.ControllerOperationStatus) {}, + opts, + count.New()) + + errs := fault.New(true) + + delList := prefixmatcher.NewStringSetBuilder() + + cols, canUsePreviousBackup, err := c.Get(ctx, nil, delList, errs) + require.NoError(t, err, clues.ToCore(err)) + + assert.True(t, canUsePreviousBackup, "can use previous backup") + assert.Empty(t, errs.Skipped()) + + collPaths := []string{} + + for _, baseCol := range cols { + // There shouldn't be any deleted collections. + if !assert.NotEqual( + t, + data.DeletedState, + baseCol.State(), + "collection marked deleted") { + continue + } + + folderPath := baseCol.FullPath().String() + + if folderPath == metadataPath.String() { + continue + } + + collPaths = append(collPaths, folderPath) + + // TODO: We should really be getting items in the collection + // via the Items() channel. The lack of that makes this check a bit more + // bittle since internal details can change. The wiring to support + // mocked GetItems is available. We just haven't plugged it in yet. + col, ok := baseCol.(*Collection) + require.True(t, ok, "getting onedrive.Collection handle") + + itemIDs := make([]string, 0, len(col.driveItems)) + + for id := range col.driveItems { + itemIDs = append(itemIDs, id) + } + + assert.ElementsMatchf( + t, + test.expectedCollections[folderPath], + itemIDs, + "expected elements to match in collection with path %q", + folderPath) + } + + assert.ElementsMatch( + t, + maps.Keys(test.expectedCollections), + collPaths, + "collection paths") + }) + } +} + func (suite *CollectionsUnitSuite) TestAddURLCacheToDriveCollections() { drive1 := models.NewDrive() drive1.SetId(ptr.To(idx(drive, 1))) diff --git a/src/internal/m365/service/onedrive/mock/handlers.go b/src/internal/m365/service/onedrive/mock/handlers.go index cbb304730..acffd04af 100644 --- a/src/internal/m365/service/onedrive/mock/handlers.go +++ b/src/internal/m365/service/onedrive/mock/handlers.go @@ -331,6 +331,8 @@ func (edi *DriveItemsDeltaPager) NextPage() ([]models.DriveItemable, bool, bool) return np.Items, np.Reset, false } +func (edi *DriveItemsDeltaPager) Cancel() {} + func (edi *DriveItemsDeltaPager) Results() (pagers.DeltaUpdate, error) { return edi.DeltaUpdate, edi.Err } diff --git a/src/pkg/control/options.go b/src/pkg/control/options.go index d7f15f164..f086856f2 100644 --- a/src/pkg/control/options.go +++ b/src/pkg/control/options.go @@ -43,7 +43,8 @@ type PreviewItemLimits struct { MaxItems int MaxItemsPerContainer int MaxContainers int - MaxBytes int + MaxBytes int64 + MaxPages int Enabled bool } diff --git a/src/pkg/services/m365/api/pagers/pagers.go b/src/pkg/services/m365/api/pagers/pagers.go index ad713209d..32a66b527 100644 --- a/src/pkg/services/m365/api/pagers/pagers.go +++ b/src/pkg/services/m365/api/pagers/pagers.go @@ -51,6 +51,7 @@ type nextPage[T any] struct { type NextPageResulter[T any] interface { NextPager[T] + Cancel() Results() (DeltaUpdate, error) }