From 4edd1c51652a006b021aa680742b9cbab551619e Mon Sep 17 00:00:00 2001 From: ryanfkeepers Date: Wed, 29 Nov 2023 16:11:17 -0700 Subject: [PATCH] add multi-delta unit tests adds testing (and some minor tweaks) to multi-delta enumeration within the collection tree processor. --- .../m365/collection/drive/collections.go | 2 +- .../m365/collection/drive/collections_tree.go | 14 +- .../collection/drive/collections_tree_test.go | 426 ++++++++++++++---- src/pkg/count/keys.go | 3 +- 4 files changed, 338 insertions(+), 107 deletions(-) diff --git a/src/internal/m365/collection/drive/collections.go b/src/internal/m365/collection/drive/collections.go index 59e651341..4d0d4e500 100644 --- a/src/internal/m365/collection/drive/collections.go +++ b/src/internal/m365/collection/drive/collections.go @@ -828,7 +828,7 @@ func (c *Collections) PopulateDriveCollections( break } - counter.Inc(count.PagesEnumerated) + counter.Inc(count.TotalPagesEnumerated) if reset { counter.Inc(count.PagerResets) diff --git a/src/internal/m365/collection/drive/collections_tree.go b/src/internal/m365/collection/drive/collections_tree.go index 8c97dc10d..64c70738a 100644 --- a/src/internal/m365/collection/drive/collections_tree.go +++ b/src/internal/m365/collection/drive/collections_tree.go @@ -286,6 +286,8 @@ func (c *Collections) populateTree( ) for !hitLimit && !finished && el.Failure() == nil { + counter.Inc(count.TotalDeltasProcessed) + var ( pageCount int pageItemCount int @@ -316,6 +318,8 @@ func (c *Collections) populateTree( pageCount = 0 pageItemCount = 0 hadReset = true + } else { + counter.Inc(count.TotalPagesEnumerated) } err = c.enumeratePageOfItems( @@ -335,19 +339,17 @@ func (c *Collections) populateTree( el.AddRecoverable(ctx, clues.Stack(err)) } - counter.Inc(count.PagesEnumerated) + pageCount++ + + pageItemCount += len(page) // Stop enumeration early if we've reached the page limit. Keep this // at the end of the loop so we don't request another page (pager.NextPage) // before seeing we've passed the limit. - if limiter.hitPageLimit(int(counter.Get(count.PagesEnumerated))) { + if limiter.hitPageLimit(pageCount) { hitLimit = true break } - - pageCount++ - - pageItemCount += len(page) } // Always cancel the pager so that even if we exit early from the loop above diff --git a/src/internal/m365/collection/drive/collections_tree_test.go b/src/internal/m365/collection/drive/collections_tree_test.go index a4c381565..06cbb7de9 100644 --- a/src/internal/m365/collection/drive/collections_tree_test.go +++ b/src/internal/m365/collection/drive/collections_tree_test.go @@ -557,32 +557,34 @@ func (suite *CollectionsTreeUnitSuite) TestCollections_MakeDriveCollections() { } } -// This test focuses on the population of a tree using delta enumeration data, -// and is not concerned with unifying previous paths or post-processing collections. -func (suite *CollectionsTreeUnitSuite) TestCollections_PopulateTree() { +type populateTreeExpected struct { + counts countTD.Expected + err require.ErrorAssertionFunc + numLiveFiles int + numLiveFolders int + shouldHitLimit bool + sizeBytes int64 + treeContainsFolderIDs []string + treeContainsTombstoneIDs []string + treeContainsFileIDsWithParent map[string]string +} + +type populateTreeTest struct { + name string + enumerator mock.EnumerateDriveItemsDelta + tree *folderyMcFolderFace + limiter *pagerLimiter + expect populateTreeExpected +} + +// this test focuses on the population of a tree using a single delta's enumeration data. +// It is not concerned with unifying previous paths or post-processing collections. +func (suite *CollectionsTreeUnitSuite) TestCollections_PopulateTree_singleDelta() { drv := models.NewDrive() drv.SetId(ptr.To(id(drive))) drv.SetName(ptr.To(name(drive))) - type expected struct { - counts countTD.Expected - err require.ErrorAssertionFunc - numLiveFiles int - numLiveFolders int - shouldHitLimit bool - sizeBytes int64 - treeContainsFolderIDs []string - treeContainsTombstoneIDs []string - treeContainsFileIDsWithParent map[string]string - } - - table := []struct { - name string - enumerator mock.EnumerateItemsDeltaByDrive - tree *folderyMcFolderFace - limiter *pagerLimiter - expect expected - }{ + table := []populateTreeTest{ { name: "nil page", tree: newFolderyMcFolderFace(nil, rootID), @@ -595,7 +597,7 @@ func (suite *CollectionsTreeUnitSuite) TestCollections_PopulateTree() { }, }, limiter: newPagerLimiter(control.DefaultOptions()), - expect: expected{ + expect: populateTreeExpected{ counts: countTD.Expected{}, err: require.NoError, numLiveFiles: 0, @@ -618,11 +620,11 @@ func (suite *CollectionsTreeUnitSuite) TestCollections_PopulateTree() { }, }, limiter: newPagerLimiter(control.DefaultOptions()), - expect: expected{ + expect: populateTreeExpected{ counts: countTD.Expected{ count.TotalFoldersProcessed: 1, count.TotalFilesProcessed: 0, - count.PagesEnumerated: 1, + count.TotalPagesEnumerated: 2, }, err: require.NoError, numLiveFiles: 0, @@ -647,11 +649,11 @@ func (suite *CollectionsTreeUnitSuite) TestCollections_PopulateTree() { }, }, limiter: newPagerLimiter(control.DefaultOptions()), - expect: expected{ + expect: populateTreeExpected{ counts: countTD.Expected{ count.TotalFoldersProcessed: 2, count.TotalFilesProcessed: 0, - count.PagesEnumerated: 2, + count.TotalPagesEnumerated: 3, }, err: require.NoError, numLiveFiles: 0, @@ -681,10 +683,10 @@ func (suite *CollectionsTreeUnitSuite) TestCollections_PopulateTree() { }, }, limiter: newPagerLimiter(control.DefaultOptions()), - expect: expected{ + expect: populateTreeExpected{ counts: countTD.Expected{ count.TotalFoldersProcessed: 7, - count.PagesEnumerated: 3, + count.TotalPagesEnumerated: 4, count.TotalFilesProcessed: 0, }, err: require.NoError, @@ -723,11 +725,11 @@ func (suite *CollectionsTreeUnitSuite) TestCollections_PopulateTree() { }, }, limiter: newPagerLimiter(control.DefaultOptions()), - expect: expected{ + expect: populateTreeExpected{ counts: countTD.Expected{ count.TotalFoldersProcessed: 7, count.TotalFilesProcessed: 3, - count.PagesEnumerated: 3, + count.TotalPagesEnumerated: 4, }, err: require.NoError, numLiveFiles: 3, @@ -741,9 +743,50 @@ func (suite *CollectionsTreeUnitSuite) TestCollections_PopulateTree() { }, treeContainsTombstoneIDs: []string{}, treeContainsFileIDsWithParent: map[string]string{ - id(file): id(folder), - idx(file, "sib"): idx(folder, "sib"), - idx(file, "chld"): idx(folder, "chld"), + id(file): id(folder), + idx(file, "fsib"): idx(folder, "sib"), + idx(file, "fchld"): idx(folder, "chld"), + }, + }, + }, + { + name: "many folders with files across multiple deltas", + tree: newFolderyMcFolderFace(nil, rootID), + enumerator: mock.DriveEnumerator( + mock.Drive(id(drive)).With( + mock.Delta(id(delta), nil).With(aPage( + folderAtRoot(), + fileAt(folder))), + mock.Delta(id(delta), nil).With(aPage( + folderxAtRoot("sib"), + filexAt("fsib", "sib"))), + mock.Delta(id(delta), nil).With(aPage( + folderAtRoot(), + folderxAt("chld", folder), + filexAt("fchld", "chld"))), + )), + limiter: newPagerLimiter(control.DefaultOptions()), + expect: populateTreeExpected{ + counts: countTD.Expected{ + count.TotalFoldersProcessed: 7, + count.TotalFilesProcessed: 3, + count.TotalPagesEnumerated: 4, + }, + err: require.NoError, + numLiveFiles: 3, + numLiveFolders: 4, + sizeBytes: 3 * 42, + treeContainsFolderIDs: []string{ + rootID, + id(folder), + idx(folder, "sib"), + idx(folder, "chld"), + }, + treeContainsTombstoneIDs: []string{}, + treeContainsFileIDsWithParent: map[string]string{ + id(file): id(folder), + idx(file, "fsib"): idx(folder, "sib"), + idx(file, "fchld"): idx(folder, "chld"), }, }, }, @@ -765,12 +808,12 @@ func (suite *CollectionsTreeUnitSuite) TestCollections_PopulateTree() { }, }, limiter: newPagerLimiter(control.DefaultOptions()), - expect: expected{ + expect: populateTreeExpected{ counts: countTD.Expected{ count.TotalFoldersProcessed: 3, count.TotalFilesProcessed: 1, count.TotalDeleteFoldersProcessed: 1, - count.PagesEnumerated: 2, + count.TotalPagesEnumerated: 3, }, err: require.NoError, numLiveFiles: 0, @@ -804,12 +847,12 @@ func (suite *CollectionsTreeUnitSuite) TestCollections_PopulateTree() { }, }, limiter: newPagerLimiter(control.DefaultOptions()), - expect: expected{ + expect: populateTreeExpected{ counts: countTD.Expected{ count.TotalFoldersProcessed: 4, count.TotalDeleteFoldersProcessed: 1, count.TotalFilesProcessed: 1, - count.PagesEnumerated: 2, + count.TotalPagesEnumerated: 3, }, err: require.NoError, numLiveFiles: 0, @@ -849,12 +892,12 @@ func (suite *CollectionsTreeUnitSuite) TestCollections_PopulateTree() { }, }, limiter: newPagerLimiter(minimumLimitOpts()), - expect: expected{ + expect: populateTreeExpected{ counts: countTD.Expected{ count.TotalDeleteFoldersProcessed: 0, count.TotalFoldersProcessed: 1, count.TotalFilesProcessed: 0, - count.PagesEnumerated: 0, + count.TotalPagesEnumerated: 1, }, err: require.NoError, shouldHitLimit: true, @@ -890,12 +933,12 @@ func (suite *CollectionsTreeUnitSuite) TestCollections_PopulateTree() { }, }, limiter: newPagerLimiter(minimumLimitOpts()), - expect: expected{ + expect: populateTreeExpected{ counts: countTD.Expected{ count.TotalDeleteFoldersProcessed: 0, count.TotalFoldersProcessed: 1, count.TotalFilesProcessed: 0, - count.PagesEnumerated: 0, + count.TotalPagesEnumerated: 1, }, err: require.NoError, shouldHitLimit: true, @@ -912,67 +955,252 @@ func (suite *CollectionsTreeUnitSuite) TestCollections_PopulateTree() { } for _, test := range table { suite.Run(test.name, func() { - t := suite.T() - - ctx, flush := tester.NewContext(t) - defer flush() - - mockDrivePager := &apiMock.Pager[models.Driveable]{ - ToReturn: []apiMock.PagerResult[models.Driveable]{ - {Values: []models.Driveable{drv}}, - }, - } - - mbh := mock.DefaultDriveBHWith(user, mockDrivePager, test.enumerator) - c := collWithMBH(mbh) - counter := count.New() - - _, err := c.populateTree( - ctx, - test.tree, - drv, - id(delta), - test.limiter, - counter, - fault.New(true)) - - test.expect.err(t, err, clues.ToCore(err)) - - assert.Equal( - t, - test.expect.numLiveFolders, - test.tree.countLiveFolders(), - "count folders in tree") - - countSize := test.tree.countLiveFilesAndSizes() - assert.Equal( - t, - test.expect.numLiveFiles, - countSize.numFiles, - "count files in tree") - assert.Equal( - t, - test.expect.sizeBytes, - countSize.totalBytes, - "count total bytes in tree") - test.expect.counts.Compare(t, counter) - - for _, id := range test.expect.treeContainsFolderIDs { - assert.NotNil(t, test.tree.folderIDToNode[id], "node exists") - } - - for _, id := range test.expect.treeContainsTombstoneIDs { - assert.NotNil(t, test.tree.tombstones[id], "tombstone exists") - } - - for iID, pID := range test.expect.treeContainsFileIDsWithParent { - assert.Contains(t, test.tree.fileIDToParentID, iID, "file should exist in tree") - assert.Equal(t, pID, test.tree.fileIDToParentID[iID], "file should reference correct parent") - } + runPopulateTreeTest(suite.T(), drv, test) }) } } +// this test focuses on quirks that can only arise from cases that occur across +// multiple delta enumerations. +// It is not concerned with unifying previous paths or post-processing collections. +func (suite *CollectionsTreeUnitSuite) TestCollections_PopulateTree_multiDelta() { + drv := models.NewDrive() + drv.SetId(ptr.To(id(drive))) + drv.SetName(ptr.To(name(drive))) + + table := []populateTreeTest{ + { + name: "sanity case: normal enumeration split across multiple deltas", + tree: newFolderyMcFolderFace(nil, rootID), + enumerator: mock.DriveEnumerator( + mock.Drive(id(drive)).With( + mock.Delta(id(delta), nil).With(aPage( + folderAtRoot(), + fileAt(folder))), + mock.Delta(id(delta), nil).With(aPage( + folderxAtRoot("sib"), + filexAt("fsib", "sib"))), + mock.Delta(id(delta), nil).With(aPage( + folderAtRoot(), + folderxAt("chld", folder), + filexAt("fchld", "chld"))), + )), + limiter: newPagerLimiter(control.DefaultOptions()), + expect: populateTreeExpected{ + counts: countTD.Expected{ + count.TotalDeltasProcessed: 4, + count.TotalDeleteFoldersProcessed: 0, + count.TotalDeleteFilesProcessed: 0, + count.TotalFilesProcessed: 3, + count.TotalFoldersProcessed: 7, + count.TotalPagesEnumerated: 4, + }, + err: require.NoError, + numLiveFiles: 3, + numLiveFolders: 4, + sizeBytes: 3 * 42, + treeContainsFolderIDs: []string{ + rootID, + id(folder), + idx(folder, "sib"), + idx(folder, "chld"), + }, + treeContainsTombstoneIDs: []string{}, + treeContainsFileIDsWithParent: map[string]string{ + id(file): id(folder), + idx(file, "fsib"): idx(folder, "sib"), + idx(file, "fchld"): idx(folder, "chld"), + }, + }, + }, + { + name: "create->delete,create", + tree: newFolderyMcFolderFace(nil, rootID), + enumerator: mock.DriveEnumerator( + mock.Drive(id(drive)).With( + mock.Delta(id(delta), nil).With(aPage( + folderAtRoot(), + fileAt(folder))), + // a (delete,create) pair in the same delta can occur when + // a user deletes and restores an item in-between deltas. + mock.Delta(id(delta), nil).With( + aPage( + delItem(id(folder), rootID, isFolder), + delItem(id(file), id(folder), isFile)), + aPage( + folderAtRoot(), + fileAt(folder))), + )), + limiter: newPagerLimiter(control.DefaultOptions()), + expect: populateTreeExpected{ + counts: countTD.Expected{ + count.TotalDeltasProcessed: 3, + count.TotalDeleteFoldersProcessed: 1, + count.TotalDeleteFilesProcessed: 1, + count.TotalFilesProcessed: 2, + count.TotalFoldersProcessed: 5, + count.TotalPagesEnumerated: 4, + }, + err: require.NoError, + numLiveFiles: 1, + numLiveFolders: 2, + sizeBytes: 42, + treeContainsFolderIDs: []string{ + rootID, + id(folder), + }, + treeContainsTombstoneIDs: []string{}, + treeContainsFileIDsWithParent: map[string]string{}, + }, + }, + { + name: "visit->rename", + tree: newFolderyMcFolderFace(nil, rootID), + enumerator: mock.DriveEnumerator( + mock.Drive(id(drive)).With( + mock.Delta(id(delta), nil).With(aPage( + folderAtRoot(), + fileAt(folder))), + mock.Delta(id(delta), nil).With(aPage( + driveItem(id(folder), namex(folder, "rename"), parentDir(), rootID, isFolder), + driveItem(id(file), namex(file, "rename"), parentDir(namex(folder, "rename")), id(folder), isFile))), + )), + limiter: newPagerLimiter(control.DefaultOptions()), + expect: populateTreeExpected{ + counts: countTD.Expected{ + count.TotalDeltasProcessed: 3, + count.TotalDeleteFilesProcessed: 0, + count.TotalDeleteFoldersProcessed: 0, + count.TotalFilesProcessed: 2, + count.TotalFoldersProcessed: 4, + count.TotalPagesEnumerated: 3, + }, + err: require.NoError, + numLiveFiles: 1, + numLiveFolders: 2, + sizeBytes: 42, + treeContainsFolderIDs: []string{ + rootID, + id(folder), + }, + treeContainsTombstoneIDs: []string{}, + treeContainsFileIDsWithParent: map[string]string{ + id(file): id(folder), + }, + }, + }, + { + name: "duplicate folder name from deferred delete marker", + tree: newFolderyMcFolderFace(nil, rootID), + enumerator: mock.DriveEnumerator( + mock.Drive(id(drive)).With( + mock.Delta(id(delta), nil).With( + // first page: create /root/folder and /root/folder/file + aPage( + folderAtRoot(), + fileAt(folder)), + // assume the user makes changes at this point: + // 1. delete /root/folder + // 2. create a new /root/folder + // 3. move /root/folder/file from old to new folder (same file ID) + // in drive deltas, this will show up as another folder creation sharing + // the same dirname, but we won't see the delete until... + aPage( + driveItem(idx(folder, 2), name(folder), parentDir(), rootID, isFolder), + driveItem(id(file), name(file), parentDir(name(folder)), idx(folder, 2), isFile))), + // the next delta, containing the delete marker for the original /root/folder + mock.Delta(id(delta), nil).With(aPage( + delItem(id(folder), rootID, isFolder), + )), + )), + limiter: newPagerLimiter(control.DefaultOptions()), + expect: populateTreeExpected{ + counts: countTD.Expected{ + count.TotalDeltasProcessed: 3, + count.TotalDeleteFilesProcessed: 0, + count.TotalDeleteFoldersProcessed: 1, + count.TotalFilesProcessed: 2, + count.TotalFoldersProcessed: 5, + count.TotalPagesEnumerated: 4, + }, + err: require.NoError, + numLiveFiles: 1, + numLiveFolders: 2, + sizeBytes: 42, + treeContainsFolderIDs: []string{ + rootID, + idx(folder, 2), + }, + treeContainsTombstoneIDs: []string{}, + treeContainsFileIDsWithParent: map[string]string{ + id(file): idx(folder, 2), + }, + }, + }, + } + for _, test := range table { + suite.Run(test.name, func() { + runPopulateTreeTest(suite.T(), drv, test) + }) + } +} + +func runPopulateTreeTest( + t *testing.T, + drv models.Driveable, + test populateTreeTest, +) { + ctx, flush := tester.NewContext(t) + defer flush() + + mbh := mock.DefaultDriveBHWith(user, pagerForDrives(drv), test.enumerator) + c := collWithMBH(mbh) + counter := count.New() + + _, err := c.populateTree( + ctx, + test.tree, + drv, + id(delta), + test.limiter, + counter, + fault.New(true)) + + test.expect.err(t, err, clues.ToCore(err)) + + assert.Equal( + t, + test.expect.numLiveFolders, + test.tree.countLiveFolders(), + "count live folders in tree") + + cAndS := test.tree.countLiveFilesAndSizes() + assert.Equal( + t, + test.expect.numLiveFiles, + cAndS.numFiles, + "count live files in tree") + assert.Equal( + t, + test.expect.sizeBytes, + cAndS.totalBytes, + "count total bytes in tree") + test.expect.counts.Compare(t, counter) + + for _, id := range test.expect.treeContainsFolderIDs { + assert.NotNil(t, test.tree.folderIDToNode[id], "node exists") + } + + for _, id := range test.expect.treeContainsTombstoneIDs { + assert.NotNil(t, test.tree.tombstones[id], "tombstone exists") + } + + for iID, pID := range test.expect.treeContainsFileIDsWithParent { + assert.Contains(t, test.tree.fileIDToParentID, iID, "file should exist in tree") + assert.Equal(t, pID, test.tree.fileIDToParentID[iID], "file should reference correct parent") + } +} + // --------------------------------------------------------------------------- // folder tests // --------------------------------------------------------------------------- diff --git a/src/pkg/count/keys.go b/src/pkg/count/keys.go index 1896a7db6..6a5f48932 100644 --- a/src/pkg/count/keys.go +++ b/src/pkg/count/keys.go @@ -50,7 +50,6 @@ const ( NoDeltaQueries Key = "cannot-make-delta-queries" Packages Key = "packages" PagerResets Key = "pager-resets" - PagesEnumerated Key = "pages-enumerated" PrevDeltas Key = "previous-deltas" PrevPaths Key = "previous-paths" PreviousPathMetadataCollision Key = "previous-path-metadata-collision" @@ -80,10 +79,12 @@ const ( const ( TotalDeleteFilesProcessed Key = "total-delete-files-processed" TotalDeleteFoldersProcessed Key = "total-delete-folders-processed" + TotalDeltasProcessed Key = "total-deltas-processed" TotalFilesProcessed Key = "total-files-processed" TotalFoldersProcessed Key = "total-folders-processed" TotalMalwareProcessed Key = "total-malware-processed" TotalPackagesProcessed Key = "total-packages-processed" + TotalPagesEnumerated Key = "total-pages-enumerated" ) // miscellaneous