add boilerplate for drive tree processing (#4716)

begins implementation of the drive delta tree support by adding boilerplate funcs for getting the backup data, and adding a framework of unit tests that will be used bring testing parity up to par with current tests. --- #### Does this PR need a docs update or release note? - [x] ⛔ No #### Issue(s) * #4689 #### Test Plan - [x] ⚡ Unit test
2023-11-28 14:07:49 -07:00 · 2023-11-28 14:07:49 -07:00 · 6aff258c8b
commit 6aff258c8b
parent 793658c790
9 changed files with 2054 additions and 1059 deletions
--- a/src/internal/data/implementations.go
+++ b/src/internal/data/implementations.go
@ -25,6 +25,23 @@ const (
 	DeletedState  CollectionState = 3
 )

+func (cs CollectionState) String() string {
+	s := "Unknown State"
+
+	switch cs {
+	case 0:
+		s = "New"
+	case 1:
+		s = "Not Moved"
+	case 2:
+		s = "Moved"
+	case 3:
+		s = "Deleted"
+	}
+
+	return s
+}
+
 type FetchRestoreCollection struct {
 	Collection
 	FetchItemByNamer
--- a/src/internal/m365/collection/drive/collections.go
+++ b/src/internal/m365/collection/drive/collections.go
@ -291,6 +291,17 @@ func (c *Collections) Get(
 	ssmb *prefixmatcher.StringSetMatchBuilder,
 	errs *fault.Bus,
 ) ([]data.BackupCollection, bool, error) {
+	if c.ctrl.ToggleFeatures.UseDeltaTree {
+		_, _, err := c.getTree(ctx, prevMetadata, ssmb, errs)
+		if err != nil {
+			return nil, false, clues.Wrap(err, "processing backup using tree")
+		}
+
+		return nil,
+			false,
+			clues.New("forced error: cannot run tree-based backup: incomplete implementation")
+	}
+
 	deltasByDriveID, prevPathsByDriveID, canUsePrevBackup, err := deserializeAndValidateMetadata(
 		ctx,
 		prevMetadata,
@ -750,87 +761,6 @@ func (c *Collections) getCollectionPath(
 	return collectionPath, nil
 }

-type driveEnumerationStats struct {
-	numPages      int
-	numAddedFiles int
-	numContainers int
-	numBytes      int64
-}
-
-func newPagerLimiter(opts control.Options) *pagerLimiter {
-	res := &pagerLimiter{limits: opts.PreviewLimits}
-
-	if res.limits.MaxContainers == 0 {
-		res.limits.MaxContainers = defaultPreviewMaxContainers
-	}
-
-	if res.limits.MaxItemsPerContainer == 0 {
-		res.limits.MaxItemsPerContainer = defaultPreviewMaxItemsPerContainer
-	}
-
-	if res.limits.MaxItems == 0 {
-		res.limits.MaxItems = defaultPreviewMaxItems
-	}
-
-	if res.limits.MaxBytes == 0 {
-		res.limits.MaxBytes = defaultPreviewMaxBytes
-	}
-
-	if res.limits.MaxPages == 0 {
-		res.limits.MaxPages = defaultPreviewMaxPages
-	}
-
-	return res
-}
-
-type pagerLimiter struct {
-	limits control.PreviewItemLimits
-}
-
-func (l pagerLimiter) effectiveLimits() control.PreviewItemLimits {
-	return l.limits
-}
-
-func (l pagerLimiter) enabled() bool {
-	return l.limits.Enabled
-}
-
-// sizeLimit returns the total number of bytes this backup should try to
-// contain.
-func (l pagerLimiter) sizeLimit() int64 {
-	return l.limits.MaxBytes
-}
-
-// atItemLimit returns true if the limiter is enabled and has reached the limit
-// for individual items added to collections for this backup.
-func (l pagerLimiter) atItemLimit(stats *driveEnumerationStats) bool {
-	return l.enabled() &&
-		(stats.numAddedFiles >= l.limits.MaxItems ||
-			stats.numBytes >= l.limits.MaxBytes)
-}
-
-// atContainerItemsLimit returns true if the limiter is enabled and the current
-// number of items is above the limit for the number of items for a container
-// for this backup.
-func (l pagerLimiter) atContainerItemsLimit(numItems int) bool {
-	return l.enabled() && numItems >= l.limits.MaxItemsPerContainer
-}
-
-// atContainerPageLimit returns true if the limiter is enabled and the number of
-// pages processed so far is beyond the limit for this backup.
-func (l pagerLimiter) atPageLimit(stats *driveEnumerationStats) bool {
-	return l.enabled() && stats.numPages >= l.limits.MaxPages
-}
-
-// atLimit returns true if the limiter is enabled and meets any of the
-// conditions for max items, containers, etc for this backup.
-func (l pagerLimiter) atLimit(stats *driveEnumerationStats) bool {
-	return l.enabled() &&
-		(l.atItemLimit(stats) ||
-			stats.numContainers >= l.limits.MaxContainers ||
-			stats.numPages >= l.limits.MaxPages)
-}
-
 // PopulateDriveCollections initializes and adds the provided drive items to Collections
 // A new collection is created for every drive folder.
 // Along with populating the collection items and updating the excluded item IDs, this func
@ -926,7 +856,7 @@ func (c *Collections) PopulateDriveCollections(
 				// Don't check for containers we've already seen.
 				if _, ok := c.CollectionMap[driveID][id]; !ok {
 					if id != lastContainerID {
-						if limiter.atLimit(stats) {
+						if limiter.atLimit(stats, ignoreMe) {
 							break
 						}

--- a/src/internal/m365/collection/drive/collections_test.go
+++ b/src/internal/m365/collection/drive/collections_test.go
--- a/src/internal/m365/collection/drive/collections_tree.go
+++ b/src/internal/m365/collection/drive/collections_tree.go
@ -0,0 +1,326 @@
+package drive
+
+import (
+	"context"
+
+	"github.com/alcionai/clues"
+	"github.com/microsoftgraph/msgraph-sdk-go/models"
+
+	"github.com/alcionai/corso/src/internal/common/prefixmatcher"
+	"github.com/alcionai/corso/src/internal/common/ptr"
+	"github.com/alcionai/corso/src/internal/data"
+	bupMD "github.com/alcionai/corso/src/pkg/backup/metadata"
+	"github.com/alcionai/corso/src/pkg/count"
+	"github.com/alcionai/corso/src/pkg/fault"
+	"github.com/alcionai/corso/src/pkg/logger"
+	"github.com/alcionai/corso/src/pkg/services/m365/api"
+	"github.com/alcionai/corso/src/pkg/services/m365/api/graph"
+	"github.com/alcionai/corso/src/pkg/services/m365/api/pagers"
+)
+
+// this file is used to separate the collections handling between the previous
+// (list-based) design, and the in-progress (tree-based) redesign.
+// see: https://github.com/alcionai/corso/issues/4688
+
+func (c *Collections) getTree(
+	ctx context.Context,
+	prevMetadata []data.RestoreCollection,
+	ssmb *prefixmatcher.StringSetMatchBuilder,
+	errs *fault.Bus,
+) ([]data.BackupCollection, bool, error) {
+	ctx = clues.AddTraceName(ctx, "GetTree")
+
+	// extract the previous backup's metadata like: deltaToken urls and previousPath maps.
+	// We'll need these to reconstruct / ensure the correct state of the world, after
+	// enumerating through all the delta changes.
+	deltasByDriveID, prevPathsByDriveID, canUsePrevBackup, err := deserializeAndValidateMetadata(
+		ctx,
+		prevMetadata,
+		c.counter,
+		errs)
+	if err != nil {
+		return nil, false, err
+	}
+
+	ctx = clues.Add(ctx, "can_use_previous_backup", canUsePrevBackup)
+
+	// in sharepoint, it's possible to delete an entire drive.
+	// if we don't see a previously-existing drive in the drives enumeration,
+	// we assume it was deleted and will remove it from storage using a tombstone.
+	driveTombstones := map[string]struct{}{}
+	for driveID := range prevPathsByDriveID {
+		driveTombstones[driveID] = struct{}{}
+	}
+
+	pager := c.handler.NewDrivePager(c.protectedResource.ID(), nil)
+
+	drives, err := api.GetAllDrives(ctx, pager)
+	if err != nil {
+		return nil, false, err
+	}
+
+	c.counter.Add(count.Drives, int64(len(drives)))
+	c.counter.Add(count.PrevDeltas, int64(len(deltasByDriveID)))
+
+	var (
+		el                    = errs.Local()
+		collections           = []data.BackupCollection{}
+		driveIDToNewDeltaLink = map[string]string{}
+		driveIDToNewPrevPaths = map[string]map[string]string{}
+	)
+
+	// each drive owns its own delta history.  We can't go more granular than that.
+	// so our first order of business is to enumerate each drive's delta data, and
+	// to use that as the basis for our backups.
+	for _, drv := range drives {
+		if el.Failure() != nil {
+			break
+		}
+
+		var (
+			driveID = ptr.Val(drv.GetId())
+			cl      = c.counter.Local()
+			ictx    = clues.Add(
+				ctx,
+				"drive_id", driveID,
+				"drive_name", clues.Hide(ptr.Val(drv.GetName())))
+		)
+
+		ictx = clues.AddLabelCounter(ictx, cl.PlainAdder())
+
+		// all the magic happens here.  expecations are that this process will:
+		// - iterate over all data (new or delta, as needed) in the drive
+		// - condense that data into a set of collections to backup
+		// - stitch the new and previous path data into a new prevPaths map
+		// - report the latest delta token details
+		colls, newPrevPaths, du, err := c.makeDriveCollections(
+			ictx,
+			drv,
+			prevPathsByDriveID[driveID],
+			cl,
+			el.Local())
+		if err != nil {
+			el.AddRecoverable(ictx, clues.Stack(err))
+			continue
+		}
+
+		// add all the freshly aggregated data into our results
+		collections = append(collections, colls...)
+		driveIDToNewPrevPaths[driveID] = newPrevPaths
+		driveIDToNewDeltaLink[driveID] = du.URL
+
+		// this drive is still in use, so we'd better not delete it.
+		delete(driveTombstones, driveID)
+	}
+
+	if el.Failure() != nil {
+		return nil, false, clues.Stack(el.Failure())
+	}
+
+	alertIfPrevPathsHaveCollisions(ctx, driveIDToNewPrevPaths, c.counter, errs)
+
+	// clean up any drives that have been deleted since the last backup.
+	dts, err := c.makeDriveTombstones(ctx, driveTombstones, errs)
+	if err != nil {
+		return nil, false, clues.Stack(err)
+	}
+
+	collections = append(collections, dts...)
+
+	// persist our updated metadata for use on the next backup
+	colls := c.makeMetadataCollections(
+		ctx,
+		driveIDToNewDeltaLink,
+		driveIDToNewPrevPaths)
+
+	collections = append(collections, colls...)
+
+	logger.Ctx(ctx).Infow("produced collections", "count_collections", len(collections))
+
+	return collections, canUsePrevBackup, nil
+}
+
+func (c *Collections) makeDriveCollections(
+	ctx context.Context,
+	d models.Driveable,
+	prevPaths map[string]string,
+	counter *count.Bus,
+	errs *fault.Bus,
+) ([]data.BackupCollection, map[string]string, pagers.DeltaUpdate, error) {
+	cl := c.counter.Local()
+
+	cl.Add(count.PrevPaths, int64(len(prevPaths)))
+	logger.Ctx(ctx).Infow(
+		"previous metadata for drive",
+		"count_old_prev_paths", len(prevPaths))
+
+	// TODO(keepers): leaving this code around for now as a guide
+	// while implementation progresses.
+
+	// --- pager aggregation
+
+	// du, newPrevPaths, err := c.PopulateDriveCollections(
+	// 	ctx,
+	// 	d,
+	// 	tree,
+	// 	cl.Local(),
+	// 	errs)
+	// if err != nil {
+	// 	return nil, false, clues.Stack(err)
+	// }
+
+	// numDriveItems := c.NumItems - numPrevItems
+	// numPrevItems = c.NumItems
+
+	// cl.Add(count.NewPrevPaths, int64(len(newPrevPaths)))
+
+	// --- prev path incorporation
+
+	// For both cases we don't need to do set difference on folder map if the
+	// delta token was valid because we should see all the changes.
+	// if !du.Reset {
+	// 	if len(excludedItemIDs) == 0 {
+	// 		continue
+	// 	}
+
+	// 	p, err := c.handler.CanonicalPath(odConsts.DriveFolderPrefixBuilder(driveID), c.tenantID)
+	// 	if err != nil {
+	// 		return nil, false, clues.WrapWC(ictx, err, "making exclude prefix")
+	// 	}
+
+	// 	ssmb.Add(p.String(), excludedItemIDs)
+
+	// 	continue
+	// }
+
+	// Set all folders in previous backup but not in the current one with state
+	// deleted. Need to compare by ID because it's possible to make new folders
+	// with the same path as deleted old folders. We shouldn't merge items or
+	// subtrees if that happens though.
+
+	// --- post-processing
+
+	// Attach an url cache to the drive if the number of discovered items is
+	// below the threshold. Attaching cache to larger drives can cause
+	// performance issues since cache delta queries start taking up majority of
+	// the hour the refreshed URLs are valid for.
+
+	// if numDriveItems < urlCacheDriveItemThreshold {
+	// 	logger.Ctx(ictx).Infow(
+	// 		"adding url cache for drive",
+	// 		"num_drive_items", numDriveItems)
+
+	// 	uc, err := newURLCache(
+	// 		driveID,
+	// 		prevDeltaLink,
+	// 		urlCacheRefreshInterval,
+	// 		c.handler,
+	// 		cl,
+	// 		errs)
+	// 	if err != nil {
+	// 		return nil, false, clues.Stack(err)
+	// 	}
+
+	// 	// Set the URL cache instance for all collections in this drive.
+	// 	for id := range c.CollectionMap[driveID] {
+	// 		c.CollectionMap[driveID][id].urlCache = uc
+	// 	}
+	// }
+
+	return nil, nil, pagers.DeltaUpdate{}, clues.New("not yet implemented")
+}
+
+// quality-of-life wrapper that transforms each tombstone in the map
+// into a backup collection that marks the backup as deleted.
+func (c *Collections) makeDriveTombstones(
+	ctx context.Context,
+	driveTombstones map[string]struct{},
+	errs *fault.Bus,
+) ([]data.BackupCollection, error) {
+	c.counter.Add(count.DriveTombstones, int64(len(driveTombstones)))
+
+	var (
+		colls = make([]data.BackupCollection, 0, len(driveTombstones))
+		el    = errs.Local()
+	)
+
+	// generate tombstones for drives that were removed.
+	for driveID := range driveTombstones {
+		if el.Failure() != nil {
+			break
+		}
+
+		prevDrivePath, err := c.handler.PathPrefix(c.tenantID, driveID)
+		if err != nil {
+			err = clues.WrapWC(ctx, err, "making drive tombstone for previous path").Label(count.BadPathPrefix)
+			el.AddRecoverable(ctx, err)
+
+			continue
+		}
+
+		// TODO: call NewTombstoneCollection
+		coll, err := NewCollection(
+			c.handler,
+			c.protectedResource,
+			nil, // delete the drive
+			prevDrivePath,
+			driveID,
+			c.statusUpdater,
+			c.ctrl,
+			false,
+			true,
+			nil,
+			c.counter.Local())
+		if err != nil {
+			err = clues.WrapWC(ctx, err, "making drive tombstone")
+			el.AddRecoverable(ctx, err)
+
+			continue
+		}
+
+		colls = append(colls, coll)
+	}
+
+	return colls, el.Failure()
+}
+
+// quality-of-life wrapper that transforms the delta tokens and previous paths
+// into a backup collections for persitence.
+func (c *Collections) makeMetadataCollections(
+	ctx context.Context,
+	deltaTokens map[string]string,
+	prevPaths map[string]map[string]string,
+) []data.BackupCollection {
+	colls := []data.BackupCollection{}
+
+	pathPrefix, err := c.handler.MetadataPathPrefix(c.tenantID)
+	if err != nil {
+		logger.CtxErr(ctx, err).Info("making metadata collection path prefixes")
+
+		// It's safe to return here because the logic for starting an
+		// incremental backup should eventually find that the metadata files are
+		// empty/missing and default to a full backup.
+		return colls
+	}
+
+	entries := []graph.MetadataCollectionEntry{
+		graph.NewMetadataEntry(bupMD.DeltaURLsFileName, deltaTokens),
+		graph.NewMetadataEntry(bupMD.PreviousPathFileName, prevPaths),
+	}
+
+	md, err := graph.MakeMetadataCollection(
+		pathPrefix,
+		entries,
+		c.statusUpdater,
+		c.counter.Local())
+	if err != nil {
+		logger.CtxErr(ctx, err).Info("making metadata collection for future incremental backups")
+
+		// Technically it's safe to continue here because the logic for starting an
+		// incremental backup should eventually find that the metadata files are
+		// empty/missing and default to a full backup.
+		return colls
+	}
+
+	return append(colls, md)
+}
--- a/src/internal/m365/collection/drive/collections_tree_test.go
+++ b/src/internal/m365/collection/drive/collections_tree_test.go
@ -0,0 +1,510 @@
+package drive
+
+import (
+	"testing"
+
+	"github.com/alcionai/clues"
+	"github.com/microsoftgraph/msgraph-sdk-go/models"
+	"github.com/stretchr/testify/assert"
+	"github.com/stretchr/testify/require"
+	"github.com/stretchr/testify/suite"
+
+	"github.com/alcionai/corso/src/internal/common/idname"
+	"github.com/alcionai/corso/src/internal/common/prefixmatcher"
+	"github.com/alcionai/corso/src/internal/common/ptr"
+	"github.com/alcionai/corso/src/internal/data"
+	dataMock "github.com/alcionai/corso/src/internal/data/mock"
+	"github.com/alcionai/corso/src/internal/m365/service/onedrive/mock"
+	"github.com/alcionai/corso/src/internal/m365/support"
+	"github.com/alcionai/corso/src/internal/tester"
+	bupMD "github.com/alcionai/corso/src/pkg/backup/metadata"
+	"github.com/alcionai/corso/src/pkg/control"
+	"github.com/alcionai/corso/src/pkg/count"
+	countTD "github.com/alcionai/corso/src/pkg/count/testdata"
+	"github.com/alcionai/corso/src/pkg/fault"
+	"github.com/alcionai/corso/src/pkg/path"
+	"github.com/alcionai/corso/src/pkg/services/m365/api/graph"
+	apiMock "github.com/alcionai/corso/src/pkg/services/m365/api/mock"
+)
+
+// ---------------------------------------------------------------------------
+// helpers
+// ---------------------------------------------------------------------------
+
+func collWithMBH(mbh BackupHandler) *Collections {
+	return NewCollections(
+		mbh,
+		tenant,
+		idname.NewProvider(user, user),
+		func(*support.ControllerOperationStatus) {},
+		control.Options{ToggleFeatures: control.Toggles{
+			UseDeltaTree: true,
+		}},
+		count.New())
+}
+
+func fullOrPrevPath(
+	t *testing.T,
+	coll data.BackupCollection,
+) path.Path {
+	var collPath path.Path
+
+	if coll.State() != data.DeletedState {
+		collPath = coll.FullPath()
+	} else {
+		collPath = coll.PreviousPath()
+	}
+
+	require.False(
+		t,
+		len(collPath.Elements()) < 4,
+		"malformed or missing collection path")
+
+	return collPath
+}
+
+func pagerForDrives(drives ...models.Driveable) *apiMock.Pager[models.Driveable] {
+	return &apiMock.Pager[models.Driveable]{
+		ToReturn: []apiMock.PagerResult[models.Driveable]{
+			{Values: drives},
+		},
+	}
+}
+
+func makePrevMetadataColls(
+	t *testing.T,
+	mbh BackupHandler,
+	previousPaths map[string]map[string]string,
+) []data.RestoreCollection {
+	pathPrefix, err := mbh.MetadataPathPrefix(tenant)
+	require.NoError(t, err, clues.ToCore(err))
+
+	prevDeltas := map[string]string{}
+
+	for driveID := range previousPaths {
+		prevDeltas[driveID] = idx(delta, "prev")
+	}
+
+	mdColl, err := graph.MakeMetadataCollection(
+		pathPrefix,
+		[]graph.MetadataCollectionEntry{
+			graph.NewMetadataEntry(bupMD.DeltaURLsFileName, prevDeltas),
+			graph.NewMetadataEntry(bupMD.PreviousPathFileName, previousPaths),
+		},
+		func(*support.ControllerOperationStatus) {},
+		count.New())
+	require.NoError(t, err, "creating metadata collection", clues.ToCore(err))
+
+	return []data.RestoreCollection{
+		dataMock.NewUnversionedRestoreCollection(t, data.NoFetchRestoreCollection{Collection: mdColl}),
+	}
+}
+
+func compareMetadata(
+	t *testing.T,
+	mdColl data.Collection,
+	expectDeltas map[string]string,
+	expectPrevPaths map[string]map[string]string,
+) {
+	ctx, flush := tester.NewContext(t)
+	defer flush()
+
+	colls := []data.RestoreCollection{
+		dataMock.NewUnversionedRestoreCollection(t, data.NoFetchRestoreCollection{Collection: mdColl}),
+	}
+
+	deltas, prevs, _, err := deserializeAndValidateMetadata(
+		ctx,
+		colls,
+		count.New(),
+		fault.New(true))
+	require.NoError(t, err, "deserializing metadata", clues.ToCore(err))
+	assert.Equal(t, expectDeltas, deltas, "delta urls")
+	assert.Equal(t, expectPrevPaths, prevs, "previous paths")
+}
+
+// for comparisons done by collection state
+type stateAssertion struct {
+	itemIDs []string
+	// should never get set by the user.
+	// this flag gets flipped when calling assertions.compare.
+	// any unseen collection will error on requireNoUnseenCollections
+	sawCollection bool
+}
+
+// for comparisons done by a given collection path
+type collectionAssertion struct {
+	doNotMerge    assert.BoolAssertionFunc
+	states        map[data.CollectionState]*stateAssertion
+	excludedItems map[string]struct{}
+}
+
+type statesToItemIDs map[data.CollectionState][]string
+
+// TODO(keepers): move excludeItems to a more global position.
+func newCollAssertion(
+	doNotMerge bool,
+	itemsByState statesToItemIDs,
+	excludeItems ...string,
+) collectionAssertion {
+	states := map[data.CollectionState]*stateAssertion{}
+
+	for state, itemIDs := range itemsByState {
+		states[state] = &stateAssertion{
+			itemIDs: itemIDs,
+		}
+	}
+
+	dnm := assert.False
+	if doNotMerge {
+		dnm = assert.True
+	}
+
+	return collectionAssertion{
+		doNotMerge:    dnm,
+		states:        states,
+		excludedItems: makeExcludeMap(excludeItems...),
+	}
+}
+
+// to aggregate all collection-related expectations in the backup
+// map collection path -> collection state -> assertion
+type collectionAssertions map[string]collectionAssertion
+
+// ensure the provided collection matches expectations as set by the test.
+func (cas collectionAssertions) compare(
+	t *testing.T,
+	coll data.BackupCollection,
+	excludes *prefixmatcher.StringSetMatchBuilder,
+) {
+	ctx, flush := tester.NewContext(t)
+	defer flush()
+
+	var (
+		itemCh  = coll.Items(ctx, fault.New(true))
+		itemIDs = []string{}
+	)
+
+	p := fullOrPrevPath(t, coll)
+
+	for itm := range itemCh {
+		itemIDs = append(itemIDs, itm.ID())
+	}
+
+	expect := cas[p.String()]
+	expectState := expect.states[coll.State()]
+	expectState.sawCollection = true
+
+	assert.ElementsMatchf(
+		t,
+		expectState.itemIDs,
+		itemIDs,
+		"expected all items to match in collection with:\nstate %q\npath %q",
+		coll.State(),
+		p)
+
+	expect.doNotMerge(
+		t,
+		coll.DoNotMergeItems(),
+		"expected collection to have the appropariate doNotMerge flag")
+
+	if result, ok := excludes.Get(p.String()); ok {
+		assert.Equal(
+			t,
+			expect.excludedItems,
+			result,
+			"excluded items")
+	}
+}
+
+// ensure that no collections in the expected set are still flagged
+// as sawCollection == false.
+func (cas collectionAssertions) requireNoUnseenCollections(
+	t *testing.T,
+) {
+	for p, withPath := range cas {
+		for _, state := range withPath.states {
+			require.True(
+				t,
+				state.sawCollection,
+				"results should have contained collection:\n\t%q\t\n%q",
+				state, p)
+		}
+	}
+}
+
+// ---------------------------------------------------------------------------
+// tests
+// ---------------------------------------------------------------------------
+
+type CollectionsTreeUnitSuite struct {
+	tester.Suite
+}
+
+func TestCollectionsTreeUnitSuite(t *testing.T) {
+	suite.Run(t, &CollectionsTreeUnitSuite{Suite: tester.NewUnitSuite(t)})
+}
+
+func (suite *CollectionsTreeUnitSuite) TestCollections_MakeDriveTombstones() {
+	badPfxMBH := mock.DefaultOneDriveBH(user)
+	badPfxMBH.PathPrefixErr = assert.AnError
+
+	twostones := map[string]struct{}{
+		"t1": {},
+		"t2": {},
+	}
+
+	table := []struct {
+		name       string
+		tombstones map[string]struct{}
+		c          *Collections
+		expectErr  assert.ErrorAssertionFunc
+		expect     assert.ValueAssertionFunc
+	}{
+		{
+			name:       "nil",
+			tombstones: nil,
+			c:          collWithMBH(mock.DefaultOneDriveBH(user)),
+			expectErr:  assert.NoError,
+			expect:     assert.Empty,
+		},
+		{
+			name:       "none",
+			tombstones: map[string]struct{}{},
+			c:          collWithMBH(mock.DefaultOneDriveBH(user)),
+			expectErr:  assert.NoError,
+			expect:     assert.Empty,
+		},
+		{
+			name:       "some tombstones",
+			tombstones: twostones,
+			c:          collWithMBH(mock.DefaultOneDriveBH(user)),
+			expectErr:  assert.NoError,
+			expect:     assert.NotEmpty,
+		},
+		{
+			name:       "bad prefix path",
+			tombstones: twostones,
+			c:          collWithMBH(badPfxMBH),
+			expectErr:  assert.Error,
+			expect:     assert.Empty,
+		},
+	}
+	for _, test := range table {
+		suite.Run(test.name, func() {
+			t := suite.T()
+
+			ctx, flush := tester.NewContext(t)
+			defer flush()
+
+			colls, err := test.c.makeDriveTombstones(ctx, test.tombstones, fault.New(true))
+			test.expectErr(t, err, clues.ToCore(err))
+			test.expect(t, colls)
+
+			for _, coll := range colls {
+				assert.Equal(t, data.DeletedState, coll.State(), "tombstones should always delete data")
+			}
+		})
+	}
+}
+
+func (suite *CollectionsTreeUnitSuite) TestCollections_MakeMetadataCollections() {
+	badMetaPfxMBH := mock.DefaultOneDriveBH(user)
+	badMetaPfxMBH.MetadataPathPrefixErr = assert.AnError
+
+	table := []struct {
+		name   string
+		c      *Collections
+		expect assert.ValueAssertionFunc
+	}{
+		{
+			name:   "no errors",
+			c:      collWithMBH(mock.DefaultOneDriveBH(user)),
+			expect: assert.NotEmpty,
+		},
+		{
+			name:   "bad prefix path",
+			c:      collWithMBH(badMetaPfxMBH),
+			expect: assert.Empty,
+		},
+	}
+	for _, test := range table {
+		suite.Run(test.name, func() {
+			var (
+				t           = suite.T()
+				deltaTokens = map[string]string{}
+				prevPaths   = map[string]map[string]string{}
+			)
+
+			ctx, flush := tester.NewContext(t)
+			defer flush()
+
+			colls := test.c.makeMetadataCollections(ctx, deltaTokens, prevPaths)
+			test.expect(t, colls)
+
+			for _, coll := range colls {
+				assert.NotEqual(t, data.DeletedState, coll.State(), "metadata is never deleted")
+			}
+		})
+	}
+}
+
+// TODO(keepers): implement tree version of populateDriveCollections tests
+
+// TODO(keepers): implement tree version of TestGet single-drive tests
+
+func (suite *CollectionsTreeUnitSuite) TestCollections_MakeDriveCollections() {
+	drive1 := models.NewDrive()
+	drive1.SetId(ptr.To(idx(drive, 1)))
+	drive1.SetName(ptr.To(namex(drive, 1)))
+
+	table := []struct {
+		name         string
+		c            *Collections
+		drive        models.Driveable
+		prevPaths    map[string]string
+		expectErr    require.ErrorAssertionFunc
+		expectCounts countTD.Expected
+	}{
+		{
+			name:      "not yet implemented",
+			c:         collWithMBH(mock.DefaultOneDriveBH(user)),
+			drive:     drive1,
+			expectErr: require.Error,
+			expectCounts: countTD.Expected{
+				count.PrevPaths: 0,
+			},
+		},
+	}
+	for _, test := range table {
+		suite.Run(test.name, func() {
+			t := suite.T()
+
+			ctx, flush := tester.NewContext(t)
+			defer flush()
+
+			colls, paths, delta, err := test.c.makeDriveCollections(
+				ctx,
+				test.drive,
+				test.prevPaths,
+				test.c.counter,
+				fault.New(true))
+
+			// TODO(keepers): awaiting implementation
+			test.expectErr(t, err, clues.ToCore(err))
+			assert.Empty(t, colls)
+			assert.Empty(t, paths)
+			assert.Empty(t, delta.URL)
+
+			test.expectCounts.Compare(t, test.c.counter)
+		})
+	}
+}
+
+// TODO(keepers): implement tree version of TestGet multi-drive tests
+
+func (suite *CollectionsTreeUnitSuite) TestCollections_GetTree() {
+	metadataPath, err := path.BuildMetadata(
+		tenant,
+		user,
+		path.OneDriveService,
+		path.FilesCategory,
+		false)
+	require.NoError(suite.T(), err, "making metadata path", clues.ToCore(err))
+
+	drive1 := models.NewDrive()
+	drive1.SetId(ptr.To(idx(drive, 1)))
+	drive1.SetName(ptr.To(namex(drive, 1)))
+
+	type expected struct {
+		canUsePrevBackup assert.BoolAssertionFunc
+		collAssertions   collectionAssertions
+		counts           countTD.Expected
+		deltas           map[string]string
+		err              require.ErrorAssertionFunc
+		prevPaths        map[string]map[string]string
+		skips            int
+	}
+
+	table := []struct {
+		name          string
+		drivePager    *apiMock.Pager[models.Driveable]
+		enumerator    mock.EnumerateItemsDeltaByDrive
+		previousPaths map[string]map[string]string
+
+		metadata []data.RestoreCollection
+		expect   expected
+	}{
+		{
+			name:       "not yet implemented",
+			drivePager: pagerForDrives(drive1),
+			expect: expected{
+				canUsePrevBackup: assert.False,
+				collAssertions: collectionAssertions{
+					fullPath(1): newCollAssertion(
+						doNotMergeItems,
+						statesToItemIDs{data.NotMovedState: {}},
+						id(file)),
+				},
+				counts: countTD.Expected{
+					count.PrevPaths: 0,
+				},
+				deltas:    map[string]string{},
+				err:       require.Error,
+				prevPaths: map[string]map[string]string{},
+				skips:     0,
+			},
+		},
+	}
+	for _, test := range table {
+		suite.Run(test.name, func() {
+			t := suite.T()
+
+			ctx, flush := tester.NewContext(t)
+			defer flush()
+
+			var (
+				mbh            = mock.DefaultDriveBHWith(user, test.drivePager, test.enumerator)
+				c              = collWithMBH(mbh)
+				prevMetadata   = makePrevMetadataColls(t, mbh, test.previousPaths)
+				globalExcludes = prefixmatcher.NewStringSetBuilder()
+				errs           = fault.New(true)
+			)
+
+			colls, canUsePrevBackup, err := c.getTree(
+				ctx,
+				prevMetadata,
+				globalExcludes,
+				errs)
+
+			test.expect.err(t, err, clues.ToCore(err))
+			// TODO(keepers): awaiting implementation
+			assert.Empty(t, colls)
+			assert.Equal(t, test.expect.skips, len(errs.Skipped()))
+			test.expect.canUsePrevBackup(t, canUsePrevBackup)
+			test.expect.counts.Compare(t, c.counter)
+
+			if err != nil {
+				return
+			}
+
+			for _, coll := range colls {
+				collPath := fullOrPrevPath(t, coll)
+
+				if collPath.String() == metadataPath.String() {
+					compareMetadata(
+						t,
+						coll,
+						test.expect.deltas,
+						test.expect.prevPaths)
+
+					continue
+				}
+
+				test.expect.collAssertions.compare(t, coll, globalExcludes)
+			}
+
+			test.expect.collAssertions.requireNoUnseenCollections(t)
+		})
+	}
+}
--- a/src/internal/m365/collection/drive/limiter.go
+++ b/src/internal/m365/collection/drive/limiter.go
@ -0,0 +1,95 @@
+package drive
+
+import "github.com/alcionai/corso/src/pkg/control"
+
+// used to mark an unused variable while we transition handling.
+const ignoreMe = -1
+
+type driveEnumerationStats struct {
+	numPages      int
+	numAddedFiles int
+	numContainers int
+	numBytes      int64
+}
+
+func newPagerLimiter(opts control.Options) *pagerLimiter {
+	res := &pagerLimiter{limits: opts.PreviewLimits}
+
+	if res.limits.MaxContainers == 0 {
+		res.limits.MaxContainers = defaultPreviewMaxContainers
+	}
+
+	if res.limits.MaxItemsPerContainer == 0 {
+		res.limits.MaxItemsPerContainer = defaultPreviewMaxItemsPerContainer
+	}
+
+	if res.limits.MaxItems == 0 {
+		res.limits.MaxItems = defaultPreviewMaxItems
+	}
+
+	if res.limits.MaxBytes == 0 {
+		res.limits.MaxBytes = defaultPreviewMaxBytes
+	}
+
+	if res.limits.MaxPages == 0 {
+		res.limits.MaxPages = defaultPreviewMaxPages
+	}
+
+	return res
+}
+
+type pagerLimiter struct {
+	limits control.PreviewItemLimits
+}
+
+func (l pagerLimiter) effectiveLimits() control.PreviewItemLimits {
+	return l.limits
+}
+
+func (l pagerLimiter) enabled() bool {
+	return l.limits.Enabled
+}
+
+// sizeLimit returns the total number of bytes this backup should try to
+// contain.
+func (l pagerLimiter) sizeLimit() int64 {
+	return l.limits.MaxBytes
+}
+
+// atItemLimit returns true if the limiter is enabled and has reached the limit
+// for individual items added to collections for this backup.
+func (l pagerLimiter) atItemLimit(stats *driveEnumerationStats) bool {
+	return l.enabled() &&
+		(stats.numAddedFiles >= l.limits.MaxItems ||
+			stats.numBytes >= l.limits.MaxBytes)
+}
+
+// atContainerItemsLimit returns true if the limiter is enabled and the current
+// number of items is above the limit for the number of items for a container
+// for this backup.
+func (l pagerLimiter) atContainerItemsLimit(numItems int) bool {
+	return l.enabled() && numItems >= l.limits.MaxItemsPerContainer
+}
+
+// atContainerPageLimit returns true if the limiter is enabled and the number of
+// pages processed so far is beyond the limit for this backup.
+func (l pagerLimiter) atPageLimit(stats *driveEnumerationStats) bool {
+	return l.enabled() && stats.numPages >= l.limits.MaxPages
+}
+
+// atLimit returns true if the limiter is enabled and meets any of the
+// conditions for max items, containers, etc for this backup.
+func (l pagerLimiter) atLimit(
+	stats *driveEnumerationStats,
+	containerCount int,
+) bool {
+	nc := stats.numContainers
+	if nc == 0 && containerCount > 0 {
+		nc = containerCount
+	}
+
+	return l.enabled() &&
+		(l.atItemLimit(stats) ||
+			nc >= l.limits.MaxContainers ||
+			stats.numPages >= l.limits.MaxPages)
+}
--- a/src/internal/m365/collection/drive/limiter_test.go
+++ b/src/internal/m365/collection/drive/limiter_test.go
--- a/src/internal/m365/service/onedrive/mock/handlers.go
+++ b/src/internal/m365/service/onedrive/mock/handlers.go
@ -15,6 +15,7 @@ import (
 	"github.com/alcionai/corso/src/pkg/path"
 	"github.com/alcionai/corso/src/pkg/selectors"
 	"github.com/alcionai/corso/src/pkg/services/m365/api"
+	apiMock "github.com/alcionai/corso/src/pkg/services/m365/api/mock"
 	"github.com/alcionai/corso/src/pkg/services/m365/api/pagers"
 )

@ -106,6 +107,18 @@ func DefaultSharePointBH(resourceOwner string) *BackupHandler[models.DriveItemab
 	}
 }

+func DefaultDriveBHWith(
+	resource string,
+	drivePager *apiMock.Pager[models.Driveable],
+	enumerator EnumerateItemsDeltaByDrive,
+) *BackupHandler[models.DriveItemable] {
+	mbh := DefaultOneDriveBH(resource)
+	mbh.DrivePagerV = drivePager
+	mbh.DriveItemEnumeration = enumerator
+
+	return mbh
+}
+
 func (h BackupHandler[T]) PathPrefix(tID, driveID string) (path.Path, error) {
 	pp, err := h.PathPrefixFn(tID, h.ProtectedResource.ID(), driveID)
 	if err != nil {
@ -121,7 +134,7 @@ func (h BackupHandler[T]) MetadataPathPrefix(tID string) (path.Path, error) {
 		return nil, err
 	}

-	return pp, h.PathPrefixErr
+	return pp, h.MetadataPathPrefixErr
 }

 func (h BackupHandler[T]) CanonicalPath(pb *path.Builder, tID string) (path.Path, error) {
--- a/src/pkg/count/testdata/count.go
+++ b/src/pkg/count/testdata/count.go
@ -0,0 +1,30 @@
+package testdata
+
+import (
+	"testing"
+
+	"github.com/stretchr/testify/assert"
+
+	"github.com/alcionai/corso/src/pkg/count"
+)
+
+type Expected map[count.Key]int64
+
+func (e Expected) Compare(
+	t *testing.T,
+	bus *count.Bus,
+) {
+	vs := bus.Values()
+	results := map[count.Key]int64{}
+
+	for k := range e {
+		results[k] = bus.Get(k)
+		delete(vs, string(k))
+	}
+
+	for k, v := range vs {
+		t.Logf("unchecked count %q: %d", k, v)
+	}
+
+	assert.Equal(t, e, Expected(results))
+}