diff --git a/src/internal/m365/collection/drive/collections.go b/src/internal/m365/collection/drive/collections.go index c64c6314d..2f85e0750 100644 --- a/src/internal/m365/collection/drive/collections.go +++ b/src/internal/m365/collection/drive/collections.go @@ -292,11 +292,11 @@ func DeserializeMap[T any](reader io.ReadCloser, alreadyFound map[string]T) erro func (c *Collections) Get( ctx context.Context, prevMetadata []data.RestoreCollection, - ssmb *prefixmatcher.StringSetMatchBuilder, + globalExcludeItemIDs *prefixmatcher.StringSetMatchBuilder, errs *fault.Bus, ) ([]data.BackupCollection, bool, error) { if c.ctrl.ToggleFeatures.UseDeltaTree { - colls, canUsePrevBackup, err := c.getTree(ctx, prevMetadata, ssmb, errs) + colls, canUsePrevBackup, err := c.getTree(ctx, prevMetadata, globalExcludeItemIDs, errs) if err != nil && !errors.Is(err, errGetTreeNotImplemented) { return nil, false, clues.Wrap(err, "processing backup using tree") } @@ -457,7 +457,7 @@ func (c *Collections) Get( return nil, false, clues.WrapWC(ictx, err, "making exclude prefix") } - ssmb.Add(p.String(), excludedItemIDs) + globalExcludeItemIDs.Add(p.String(), excludedItemIDs) continue } diff --git a/src/internal/m365/collection/drive/collections_tree.go b/src/internal/m365/collection/drive/collections_tree.go index a8ea5fb29..ae6800389 100644 --- a/src/internal/m365/collection/drive/collections_tree.go +++ b/src/internal/m365/collection/drive/collections_tree.go @@ -2,6 +2,7 @@ package drive import ( "context" + "fmt" "github.com/alcionai/clues" "github.com/microsoftgraph/msgraph-sdk-go/models" @@ -33,7 +34,7 @@ import ( func (c *Collections) getTree( ctx context.Context, prevMetadata []data.RestoreCollection, - ssmb *prefixmatcher.StringSetMatchBuilder, + globalExcludeItemIDsByDrivePrefix *prefixmatcher.StringSetMatchBuilder, errs *fault.Bus, ) ([]data.BackupCollection, bool, error) { ctx = clues.AddTraceName(ctx, "GetTree") @@ -114,6 +115,7 @@ func (c *Collections) getTree( prevPathsByDriveID[driveID], deltasByDriveID[driveID], limiter, + globalExcludeItemIDsByDrivePrefix, cl, el) if err != nil { @@ -168,15 +170,18 @@ func (c *Collections) makeDriveCollections( prevPaths map[string]string, prevDeltaLink string, limiter *pagerLimiter, + globalExcludeItemIDsByDrivePrefix *prefixmatcher.StringSetMatchBuilder, counter *count.Bus, errs *fault.Bus, ) ([]data.BackupCollection, map[string]string, pagers.DeltaUpdate, error) { - ppfx, err := c.handler.PathPrefix(c.tenantID, ptr.Val(drv.GetId())) + driveID := ptr.Val(drv.GetId()) + + ppfx, err := c.handler.PathPrefix(c.tenantID, driveID) if err != nil { return nil, nil, pagers.DeltaUpdate{}, clues.Wrap(err, "generating backup tree prefix") } - root, err := c.handler.GetRootFolder(ctx, ptr.Val(drv.GetId())) + root, err := c.handler.GetRootFolder(ctx, driveID) if err != nil { return nil, nil, pagers.DeltaUpdate{}, clues.Wrap(err, "getting root folder") } @@ -187,7 +192,7 @@ func (c *Collections) makeDriveCollections( // --- delta item aggregation - du, err := c.populateTree( + du, countPagesInDelta, err := c.populateTree( ctx, tree, drv, @@ -199,74 +204,44 @@ func (c *Collections) makeDriveCollections( return nil, nil, pagers.DeltaUpdate{}, clues.Stack(err) } - // numDriveItems := c.NumItems - numPrevItems - // numPrevItems = c.NumItems - - // cl.Add(count.NewPrevPaths, int64(len(newPrevPaths))) - // --- prev path incorporation - for folderID, p := range prevPaths { - // no check for errs.Failure here, despite the addRecoverable below. - // it's fine if we run through all of the collection generation even - // with failures present, and let the backup finish out. - prevPath, err := path.FromDataLayerPath(p, false) - if err != nil { - errs.AddRecoverable(ctx, clues.WrapWC(ctx, err, "invalid previous path"). - With("folderID", folderID, "prev_path", p). - Label(fault.LabelForceNoBackupCreation)) - - continue - } - - err = tree.setPreviousPath(folderID, prevPath) - if err != nil { - errs.AddRecoverable(ctx, clues.WrapWC(ctx, err, "setting previous path"). - With("folderID", folderID, "prev_path", p). - Label(fault.LabelForceNoBackupCreation)) - - continue - } + err = addPrevPathsToTree( + ctx, + tree, + prevPaths, + errs) + if err != nil { + return nil, nil, pagers.DeltaUpdate{}, clues.Stack(err).Label(fault.LabelForceNoBackupCreation) } - // TODO(keepers): leaving this code around for now as a guide - // while implementation progresses. - // --- post-processing - // Attach an url cache to the drive if the number of discovered items is - // below the threshold. Attaching cache to larger drives can cause - // performance issues since cache delta queries start taking up majority of - // the hour the refreshed URLs are valid for. - - // if numDriveItems < urlCacheDriveItemThreshold { - // logger.Ctx(ictx).Infow( - // "adding url cache for drive", - // "num_drive_items", numDriveItems) - - // uc, err := newURLCache( - // driveID, - // prevDeltaLink, - // urlCacheRefreshInterval, - // c.handler, - // cl, - // errs) - // if err != nil { - // return nil, false, clues.Stack(err) - // } - - // // Set the URL cache instance for all collections in this drive. - // for id := range c.CollectionMap[driveID] { - // c.CollectionMap[driveID][id].urlCache = uc - // } - // } - - // this is a dumb hack to satisfy the linter. - if ctx == nil { - return nil, nil, du, nil + collections, newPrevs, excludedItemIDs, err := c.turnTreeIntoCollections( + ctx, + tree, + driveID, + prevDeltaLink, + countPagesInDelta, + errs) + if err != nil { + return nil, nil, pagers.DeltaUpdate{}, clues.Stack(err).Label(fault.LabelForceNoBackupCreation) } - return nil, nil, du, errGetTreeNotImplemented + // only populate the global excluded items if no delta reset occurred. + // if a reset did occur, the collections should already be marked as + // "do not merge", therefore everything will get processed as a new addition. + if !tree.hadReset { + p, err := c.handler.CanonicalPath(odConsts.DriveFolderPrefixBuilder(driveID), c.tenantID) + if err != nil { + err = clues.WrapWC(ctx, err, "making canonical path for item exclusions") + return nil, nil, pagers.DeltaUpdate{}, err + } + + globalExcludeItemIDsByDrivePrefix.Add(p.String(), excludedItemIDs) + } + + return collections, newPrevs, du, nil } // populateTree constructs a new tree and populates it with items @@ -279,8 +254,8 @@ func (c *Collections) populateTree( limiter *pagerLimiter, counter *count.Bus, errs *fault.Bus, -) (pagers.DeltaUpdate, error) { - ctx = clues.Add(ctx, "invalid_prev_delta", len(prevDeltaLink) == 0) +) (pagers.DeltaUpdate, int, error) { + ctx = clues.Add(ctx, "has_prev_delta", len(prevDeltaLink) > 0) var ( currDeltaLink = prevDeltaLink @@ -290,24 +265,49 @@ func (c *Collections) populateTree( finished bool hitLimit bool // TODO: plug this into the limiter - maxDeltas = 100 - countDeltas = 0 + maximumTotalDeltasAllowed int64 = 100 + // pageCounter is intended as a separate local instance + // compared to the counter we use for other item tracking. + // IE: don't pass it around into other funcs. + // + // This allows us to reset pageCounter on a reset without + // cross-contaminating other counts. + // + // We use this to track three keys: 1. the total number of + // deltas enumerated (so that we don't hit an infinite + // loop); 2. the number of pages in each delta (for the + // limiter, but also for the URL cache so that it knows + // if we have too many pages for it to efficiently operate); + // and 3. the number of items in each delta (to know if we're + // done enumerating delta queries). + pageCounter = counter.Local() + ) + + const ( + // track the exact number of pages across all deltas (correct across resets) + // so that the url cache knows if it can operate within performance bounds. + truePageCount count.Key = "pages-with-items-across-all-deltas" ) // enumerate through multiple deltas until we either: // 1. hit a consistent state (ie: no changes since last delta enum) - // 2. hit the limit + // 2. hit the limit based on the limiter + // 3. run 100 total delta enumerations without hitting 1. (no infinite loops) for !hitLimit && !finished && el.Failure() == nil { counter.Inc(count.TotalDeltasProcessed) var ( - pageCount int - pageItemCount int - err error + // this is used to track stats the total number of items + // processed in each delta. Since delta queries don't give + // us a plain flag for "no changes occurred", we check for + // 0 items in the delta as the "no changes occurred" state. + // The final page of any delta query may also return 0 items, + // so we need to combine both the item count and the deltaPageCount + // to get a correct flag. + iPageCounter = pageCounter.Local() + err error ) - countDeltas++ - pager := c.handler.EnumerateDriveItemsDelta( ctx, driveID, @@ -318,19 +318,22 @@ func (c *Collections) populateTree( for page, reset, done := pager.NextPage(); !done; page, reset, done = pager.NextPage() { if el.Failure() != nil { - return du, el.Failure() + return du, 0, el.Failure() } + // track the exact number of pages within a single delta (correct across resets) + // so that we can check for "no changes occurred" results. + // Note: don't inc `count.TotalPagesEnumerated` outside of this (ie, for the + // truePageCount), or else we'll double up on the inc. + iPageCounter.Inc(count.TotalPagesEnumerated) + if reset { counter.Inc(count.PagerResets) tree.reset() c.resetStats() - pageCount = 0 - pageItemCount = 0 - countDeltas = 0 - } else { - counter.Inc(count.TotalPagesEnumerated) + pageCounter = counter.Local() + iPageCounter = pageCounter.Local() } err = c.enumeratePageOfItems( @@ -350,14 +353,17 @@ func (c *Collections) populateTree( el.AddRecoverable(ctx, clues.Stack(err)) } - pageCount++ + itemCount := int64(len(page)) + iPageCounter.Add(count.TotalItemsProcessed, itemCount) - pageItemCount += len(page) + if itemCount > 0 { + pageCounter.Inc(truePageCount) + } - // Stop enumeration early if we've reached the page limit. Keep this + // Stop enumeration early if we've reached the total page limit. Keep this // at the end of the loop so we don't request another page (pager.NextPage) // before seeing we've passed the limit. - if limiter.hitPageLimit(pageCount) { + if limiter.hitPageLimit(int(pageCounter.Get(truePageCount))) { hitLimit = true break } @@ -370,23 +376,32 @@ func (c *Collections) populateTree( du, err = pager.Results() if err != nil { - return du, clues.Stack(err) + return du, 0, clues.Stack(err) } currDeltaLink = du.URL // 0 pages is never expected. We should at least have one (empty) page to // consume. But checking pageCount == 1 is brittle in a non-helpful way. - finished = pageCount < 2 && pageItemCount == 0 + finished = iPageCounter.Get(count.TotalPagesEnumerated) < 2 && + iPageCounter.Get(count.TotalItemsProcessed) == 0 - if countDeltas >= maxDeltas { - return pagers.DeltaUpdate{}, clues.New("unable to produce consistent delta after 100 queries") + // ensure we don't enumerate more than the maximum allotted count of deltas. + if counter.Get(count.TotalDeltasProcessed) >= maximumTotalDeltasAllowed { + err := clues.NewWC( + ctx, + fmt.Sprintf("unable to produce consistent delta after %d queries", maximumTotalDeltasAllowed)) + + return pagers.DeltaUpdate{}, 0, err } } - logger.Ctx(ctx).Infow("enumerated collection delta", "stats", counter.Values()) + logger.Ctx(ctx).Infow( + "enumerated collection delta", + "stats", counter.Values(), + "delta_stats", pageCounter.Values()) - return du, el.Failure() + return du, int(pageCounter.Get(truePageCount)), el.Failure() } func (c *Collections) enumeratePageOfItems( @@ -401,12 +416,13 @@ func (c *Collections) enumeratePageOfItems( ctx = clues.Add(ctx, "page_lenth", len(page)) el := errs.Local() - for i, item := range page { + for i, driveItem := range page { if el.Failure() != nil { break } var ( + item = custom.ToCustomDriveItem(driveItem) isFolder = item.GetFolder() != nil || item.GetPackageEscaped() != nil isFile = item.GetFile() != nil itemID = ptr.Val(item.GetId()) @@ -452,7 +468,7 @@ func (c *Collections) addFolderToTree( ctx context.Context, tree *folderyMcFolderFace, drv models.Driveable, - folder models.DriveItemable, + folder *custom.DriveItem, limiter *pagerLimiter, counter *count.Bus, ) (*fault.Skipped, error) { @@ -501,7 +517,7 @@ func (c *Collections) addFolderToTree( driveID, folderID, folderName, - graph.ItemInfo(custom.ToCustomDriveItem(folder))) + graph.ItemInfo(folder)) logger.Ctx(ctx).Infow("malware folder detected") @@ -533,7 +549,7 @@ func (c *Collections) addFolderToTree( func (c *Collections) makeFolderCollectionPath( ctx context.Context, driveID string, - folder models.DriveItemable, + folder *custom.DriveItem, ) (path.Path, error) { if folder.GetRoot() != nil { pb := odConsts.DriveFolderPrefixBuilder(driveID) @@ -565,20 +581,19 @@ func (c *Collections) addFileToTree( ctx context.Context, tree *folderyMcFolderFace, drv models.Driveable, - file models.DriveItemable, + file *custom.DriveItem, limiter *pagerLimiter, counter *count.Bus, ) (*fault.Skipped, error) { var ( - driveID = ptr.Val(drv.GetId()) - fileID = ptr.Val(file.GetId()) - fileName = ptr.Val(file.GetName()) - fileSize = ptr.Val(file.GetSize()) - lastModified = ptr.Val(file.GetLastModifiedDateTime()) - isDeleted = file.GetDeleted() != nil - isMalware = file.GetMalware() != nil - parent = file.GetParentReference() - parentID string + driveID = ptr.Val(drv.GetId()) + fileID = ptr.Val(file.GetId()) + fileName = ptr.Val(file.GetName()) + fileSize = ptr.Val(file.GetSize()) + isDeleted = file.GetDeleted() != nil + isMalware = file.GetMalware() != nil + parent = file.GetParentReference() + parentID string ) if parent != nil { @@ -602,7 +617,7 @@ func (c *Collections) addFileToTree( driveID, fileID, fileName, - graph.ItemInfo(custom.ToCustomDriveItem(file))) + graph.ItemInfo(file)) logger.Ctx(ctx).Infow("malware file detected") @@ -635,7 +650,7 @@ func (c *Collections) addFileToTree( } } - err := tree.addFile(parentID, fileID, lastModified, fileSize) + err := tree.addFile(parentID, fileID, file) if err != nil { return nil, clues.StackWC(ctx, err) } @@ -737,3 +752,121 @@ func (c *Collections) makeMetadataCollections( return append(colls, md) } + +func addPrevPathsToTree( + ctx context.Context, + tree *folderyMcFolderFace, + prevPaths map[string]string, + errs *fault.Bus, +) error { + el := errs.Local() + + for folderID, p := range prevPaths { + if el.Failure() != nil { + break + } + + prevPath, err := path.FromDataLayerPath(p, false) + if err != nil { + el.AddRecoverable(ctx, clues.WrapWC(ctx, err, "invalid previous path"). + With("folderID", folderID, "prev_path", p). + Label(count.BadPrevPath)) + + continue + } + + err = tree.setPreviousPath(folderID, prevPath) + if err != nil { + el.AddRecoverable(ctx, clues.WrapWC(ctx, err, "setting previous path"). + With("folderID", folderID, "prev_path", p)) + + continue + } + } + + return el.Failure() +} + +func (c *Collections) turnTreeIntoCollections( + ctx context.Context, + tree *folderyMcFolderFace, + driveID string, + prevDeltaLink string, + countPagesInDelta int, + errs *fault.Bus, +) ( + []data.BackupCollection, + map[string]string, + map[string]struct{}, + error, +) { + collectables, err := tree.generateCollectables() + if err != nil { + err = clues.WrapWC(ctx, err, "generating backup collection data") + return nil, nil, nil, err + } + + var ( + collections = []data.BackupCollection{} + newPrevPaths = map[string]string{} + uc *urlCache + el = errs.Local() + ) + + // Attach an url cache to the drive if the number of discovered items is + // below the threshold. Attaching cache to larger drives can cause + // performance issues since cache delta queries start taking up majority of + // the hour the refreshed URLs are valid for. + if countPagesInDelta < urlCacheDriveItemThreshold { + logger.Ctx(ctx).Info("adding url cache for drive collections") + + uc, err = newURLCache( + driveID, + // we need the original prevDeltaLink here; a cache update will need + // to process all changes since the start of the backup. On the bright + // side, instead of running multiple delta enumerations, all changes + // in the backup should get compressed into the single delta query, which + // ensures the two states are sufficiently consistent with just the + // original delta token. + prevDeltaLink, + urlCacheRefreshInterval, + c.handler, + c.counter.Local(), + errs) + if err != nil { + return nil, nil, nil, clues.StackWC(ctx, err) + } + } + + for id, cbl := range collectables { + if el.Failure() != nil { + break + } + + if cbl.currPath != nil { + newPrevPaths[id] = cbl.currPath.String() + } + + coll, err := NewCollection( + c.handler, + c.protectedResource, + cbl.currPath, + cbl.prevPath, + driveID, + c.statusUpdater, + c.ctrl, + cbl.isPackageOrChildOfPackage, + tree.hadReset, + uc, + c.counter.Local()) + if err != nil { + return nil, nil, nil, clues.StackWC(ctx, err) + } + + coll.driveItems = cbl.files + + collections = append(collections, coll) + } + + return collections, newPrevPaths, tree.generateExcludeItemIDs(), el.Failure() +} diff --git a/src/internal/m365/collection/drive/collections_tree_test.go b/src/internal/m365/collection/drive/collections_tree_test.go index f73c738cf..d1bc1b355 100644 --- a/src/internal/m365/collection/drive/collections_tree_test.go +++ b/src/internal/m365/collection/drive/collections_tree_test.go @@ -21,6 +21,7 @@ import ( "github.com/alcionai/corso/src/pkg/fault" apiMock "github.com/alcionai/corso/src/pkg/services/m365/api/mock" "github.com/alcionai/corso/src/pkg/services/m365/api/pagers" + "github.com/alcionai/corso/src/pkg/services/m365/custom" ) type CollectionsTreeUnitSuite struct { @@ -151,7 +152,6 @@ func (suite *CollectionsTreeUnitSuite) TestCollections_GetTree() { type expected struct { canUsePrevBackup assert.BoolAssertionFunc - collAssertions collectionAssertions counts countTD.Expected deltas map[string]string prevPaths map[string]map[string]string @@ -176,12 +176,6 @@ func (suite *CollectionsTreeUnitSuite) TestCollections_GetTree() { aPage()))), expect: expected{ canUsePrevBackup: assert.False, - collAssertions: collectionAssertions{ - driveFullPath(1): newCollAssertion( - doNotMergeItems, - statesToItemIDs{data.NotMovedState: {}}, - id(file)), - }, counts: countTD.Expected{ count.PrevPaths: 0, }, @@ -281,7 +275,7 @@ func (suite *CollectionsTreeUnitSuite) TestCollections_MakeDriveCollections() { mock.Delta(id(delta), nil).With( aPage()))), prevPaths: map[string]string{ - id(folder): fullPath(id(folder)), + id(folder): fullPath(name(folder)), }, expectCounts: countTD.Expected{ count.PrevPaths: 1, @@ -307,7 +301,7 @@ func (suite *CollectionsTreeUnitSuite) TestCollections_MakeDriveCollections() { mock.Delta(id(delta), nil).With( aPage(folderAtRoot(), fileAt(folder))))), prevPaths: map[string]string{ - id(folder): fullPath(id(folder)), + id(folder): fullPath(name(folder)), }, expectCounts: countTD.Expected{ count.PrevPaths: 1, @@ -335,7 +329,7 @@ func (suite *CollectionsTreeUnitSuite) TestCollections_MakeDriveCollections() { aReset(), aPage()))), prevPaths: map[string]string{ - id(folder): fullPath(id(folder)), + id(folder): fullPath(name(folder)), }, expectCounts: countTD.Expected{ count.PrevPaths: 1, @@ -363,7 +357,7 @@ func (suite *CollectionsTreeUnitSuite) TestCollections_MakeDriveCollections() { aReset(), aPage(folderAtRoot(), fileAt(folder))))), prevPaths: map[string]string{ - id(folder): fullPath(id(folder)), + id(folder): fullPath(name(folder)), }, expectCounts: countTD.Expected{ count.PrevPaths: 1, @@ -389,18 +383,301 @@ func (suite *CollectionsTreeUnitSuite) TestCollections_MakeDriveCollections() { test.prevPaths, idx(delta, "prev"), newPagerLimiter(control.DefaultOptions()), + prefixmatcher.NewStringSetBuilder(), c.counter, fault.New(true)) - - // TODO(keepers): implementation is incomplete - // an error check is the best we can get at the moment. - require.ErrorIs(t, err, errGetTreeNotImplemented, clues.ToCore(err)) + require.NoError(t, err, clues.ToCore(err)) test.expectCounts.Compare(t, c.counter) }) } } +func (suite *CollectionsTreeUnitSuite) TestCollections_AddPrevPathsToTree_errors() { + table := []struct { + name string + tree func(t *testing.T) *folderyMcFolderFace + prevPaths map[string]string + expectErr require.ErrorAssertionFunc + }{ + { + name: "no error - normal usage", + tree: treeWithFolders, + prevPaths: map[string]string{ + idx(folder, "parent"): fullPath(namex(folder, "parent")), + id(folder): fullPath(namex(folder, "parent"), name(folder)), + }, + expectErr: require.NoError, + }, + { + name: "no error - prev paths are empty", + tree: treeWithFolders, + prevPaths: map[string]string{}, + expectErr: require.NoError, + }, + { + name: "no error - folder not visited in this delta", + tree: treeWithFolders, + prevPaths: map[string]string{ + id("santa"): fullPath(name("santa")), + }, + expectErr: require.NoError, + }, + { + name: "empty key in previous paths", + tree: treeWithFolders, + prevPaths: map[string]string{ + "": fullPath(namex(folder, "parent")), + }, + expectErr: require.Error, + }, + { + name: "empty value in previous paths", + tree: treeWithFolders, + prevPaths: map[string]string{ + id(folder): "", + }, + expectErr: require.Error, + }, + { + name: "malformed value in previous paths", + tree: treeWithFolders, + prevPaths: map[string]string{ + id(folder): "not a path", + }, + expectErr: require.Error, + }, + } + for _, test := range table { + suite.Run(test.name, func() { + t := suite.T() + + ctx, flush := tester.NewContext(t) + defer flush() + + tree := test.tree(t) + + err := addPrevPathsToTree( + ctx, + tree, + test.prevPaths, + fault.New(true)) + test.expectErr(t, err, clues.ToCore(err)) + }) + } +} + +func (suite *CollectionsTreeUnitSuite) TestCollections_TurnTreeIntoCollections() { + type expected struct { + prevPaths map[string]string + collections func(t *testing.T) expectedCollections + globalExcludedFileIDs map[string]struct{} + } + + table := []struct { + name string + tree func(t *testing.T) *folderyMcFolderFace + prevPaths map[string]string + enableURLCache bool + expect expected + }{ + { + name: "all new collections", + tree: fullTree, + prevPaths: map[string]string{}, + enableURLCache: true, + expect: expected{ + prevPaths: map[string]string{ + rootID: fullPath(), + idx(folder, "parent"): fullPath(namex(folder, "parent")), + id(folder): fullPath(namex(folder, "parent"), name(folder)), + }, + collections: func(t *testing.T) expectedCollections { + return expectCollections( + false, + true, + aColl( + fullPathPath(t), + nil, + idx(file, "r")), + aColl( + fullPathPath(t, namex(folder, "parent")), + nil, + idx(file, "p")), + aColl( + fullPathPath(t, namex(folder, "parent"), name(folder)), + nil, + id(file))) + }, + globalExcludedFileIDs: makeExcludeMap( + idx(file, "r"), + idx(file, "p"), + idx(file, "d"), + id(file)), + }, + }, + { + name: "all folders moved", + tree: fullTree, + enableURLCache: true, + prevPaths: map[string]string{ + rootID: fullPath(), + idx(folder, "parent"): fullPath(namex(folder, "parent-prev")), + id(folder): fullPath(namex(folder, "parent-prev"), name(folder)), + idx(folder, "tombstone"): fullPath(namex(folder, "tombstone-prev")), + }, + expect: expected{ + prevPaths: map[string]string{ + rootID: fullPath(), + idx(folder, "parent"): fullPath(namex(folder, "parent")), + id(folder): fullPath(namex(folder, "parent"), name(folder)), + }, + collections: func(t *testing.T) expectedCollections { + return expectCollections( + false, + true, + aColl( + fullPathPath(t), + fullPathPath(t), + idx(file, "r")), + aColl( + fullPathPath(t, namex(folder, "parent")), + fullPathPath(t, namex(folder, "parent-prev")), + idx(file, "p")), + aColl( + fullPathPath(t, namex(folder, "parent"), name(folder)), + fullPathPath(t, namex(folder, "parent-prev"), name(folder)), + id(file)), + aColl(nil, fullPathPath(t, namex(folder, "tombstone-prev")))) + }, + globalExcludedFileIDs: makeExcludeMap( + idx(file, "r"), + idx(file, "p"), + idx(file, "d"), + id(file)), + }, + }, + { + name: "all folders moved - todo: path separator string check", + tree: fullTreeWithNames("parent", "tombstone"), + enableURLCache: true, + prevPaths: map[string]string{ + rootID: fullPath(), + idx(folder, "parent"): fullPath(namex(folder, "parent-prev")), + id(folder): fullPath(namex(folder, "parent-prev"), name(folder)), + idx(folder, "tombstone"): fullPath(namex(folder, "tombstone-prev")), + }, + expect: expected{ + prevPaths: map[string]string{ + rootID: fullPath(), + idx(folder, "parent"): fullPath(namex(folder, "parent")), + id(folder): fullPath(namex(folder, "parent"), name(folder)), + }, + collections: func(t *testing.T) expectedCollections { + return expectCollections( + false, + true, + aColl( + fullPathPath(t), + fullPathPath(t), + idx(file, "r")), + aColl( + fullPathPath(t, namex(folder, "parent")), + fullPathPath(t, namex(folder, "parent-prev")), + idx(file, "p")), + aColl( + fullPathPath(t, namex(folder, "parent"), name(folder)), + fullPathPath(t, namex(folder, "parent-prev"), name(folder)), + id(file)), + aColl(nil, fullPathPath(t, namex(folder, "tombstone-prev")))) + }, + globalExcludedFileIDs: makeExcludeMap( + idx(file, "r"), + idx(file, "p"), + idx(file, "d"), + id(file)), + }, + }, + { + name: "no folders moved", + tree: fullTree, + enableURLCache: true, + prevPaths: map[string]string{ + rootID: fullPath(), + idx(folder, "parent"): fullPath(namex(folder, "parent")), + id(folder): fullPath(namex(folder, "parent"), name(folder)), + idx(folder, "tombstone"): fullPath(namex(folder, "tombstone")), + }, + expect: expected{ + prevPaths: map[string]string{ + rootID: fullPath(), + idx(folder, "parent"): fullPath(namex(folder, "parent")), + id(folder): fullPath(namex(folder, "parent"), name(folder)), + }, + collections: func(t *testing.T) expectedCollections { + return expectCollections( + false, + true, + aColl( + fullPathPath(t), + fullPathPath(t), + idx(file, "r")), + aColl( + fullPathPath(t, namex(folder, "parent")), + fullPathPath(t, namex(folder, "parent")), + idx(file, "p")), + aColl( + fullPathPath(t, namex(folder, "parent"), name(folder)), + fullPathPath(t, namex(folder, "parent"), name(folder)), + id(file)), + aColl(nil, fullPathPath(t, namex(folder, "tombstone")))) + }, + globalExcludedFileIDs: makeExcludeMap( + idx(file, "r"), + idx(file, "p"), + idx(file, "d"), + id(file)), + }, + }, + } + for _, test := range table { + suite.Run(test.name, func() { + t := suite.T() + + ctx, flush := tester.NewContext(t) + defer flush() + + tree := test.tree(t) + + err := addPrevPathsToTree(ctx, tree, test.prevPaths, fault.New(true)) + require.NoError(t, err, clues.ToCore(err)) + + c := collWithMBH(mock.DefaultOneDriveBH(user)) + + countPages := 9001 + if test.enableURLCache { + countPages = 1 + } + + colls, newPrevPaths, excluded, err := c.turnTreeIntoCollections( + ctx, + tree, + id(drive), + delta, + countPages, + fault.New(true)) + require.NoError(t, err, clues.ToCore(err)) + assert.Equal(t, test.expect.prevPaths, newPrevPaths, "new previous paths") + + expectColls := test.expect.collections(t) + expectColls.compare(t, colls) + expectColls.requireNoUnseenCollections(t) + + assert.Equal(t, test.expect.globalExcludedFileIDs, excluded) + }) + } +} + type populateTreeExpected struct { counts countTD.Expected err require.ErrorAssertionFunc @@ -416,7 +693,7 @@ type populateTreeExpected struct { type populateTreeTest struct { name string enumerator mock.EnumerateDriveItemsDelta - tree *folderyMcFolderFace + tree func(t *testing.T) *folderyMcFolderFace limiter *pagerLimiter expect populateTreeExpected } @@ -431,7 +708,7 @@ func (suite *CollectionsTreeUnitSuite) TestCollections_PopulateTree_singleDelta( table := []populateTreeTest{ { name: "nil page", - tree: newFolderyMcFolderFace(nil, rootID), + tree: newTree, // special case enumerator to generate a null page. // otherwise all enumerators should be DriveEnumerator()s. enumerator: mock.EnumerateDriveItemsDelta{ @@ -459,7 +736,7 @@ func (suite *CollectionsTreeUnitSuite) TestCollections_PopulateTree_singleDelta( }, { name: "root only", - tree: newFolderyMcFolderFace(nil, rootID), + tree: newTree, enumerator: mock.DriveEnumerator( mock.Drive(id(drive)).With( mock.Delta(id(delta), nil).With( @@ -484,7 +761,7 @@ func (suite *CollectionsTreeUnitSuite) TestCollections_PopulateTree_singleDelta( }, { name: "root only on two pages", - tree: newFolderyMcFolderFace(nil, rootID), + tree: newTree, enumerator: mock.DriveEnumerator( mock.Drive(id(drive)).With( mock.Delta(id(delta), nil).With( @@ -510,7 +787,7 @@ func (suite *CollectionsTreeUnitSuite) TestCollections_PopulateTree_singleDelta( }, { name: "many folders in a hierarchy across multiple pages", - tree: newFolderyMcFolderFace(nil, rootID), + tree: newTree, enumerator: mock.DriveEnumerator( mock.Drive(id(drive)).With( mock.Delta(id(delta), nil).With( @@ -542,7 +819,7 @@ func (suite *CollectionsTreeUnitSuite) TestCollections_PopulateTree_singleDelta( }, { name: "many folders with files", - tree: newFolderyMcFolderFace(nil, rootID), + tree: newTree, enumerator: mock.DriveEnumerator( mock.Drive(id(drive)).With( mock.Delta(id(delta), nil).With( @@ -583,7 +860,7 @@ func (suite *CollectionsTreeUnitSuite) TestCollections_PopulateTree_singleDelta( }, { name: "many folders with files across multiple deltas", - tree: newFolderyMcFolderFace(nil, rootID), + tree: newTree, enumerator: mock.DriveEnumerator( mock.Drive(id(drive)).With( mock.Delta(id(delta), nil).With(aPage( @@ -625,7 +902,7 @@ func (suite *CollectionsTreeUnitSuite) TestCollections_PopulateTree_singleDelta( // technically you won't see this behavior from graph deltas, since deletes always // precede creates/updates. But it's worth checking that we can handle it anyways. name: "create, delete on next page", - tree: newFolderyMcFolderFace(nil, rootID), + tree: newTree, enumerator: mock.DriveEnumerator( mock.Drive(id(drive)).With( mock.Delta(id(delta), nil).With( @@ -658,7 +935,7 @@ func (suite *CollectionsTreeUnitSuite) TestCollections_PopulateTree_singleDelta( // technically you won't see this behavior from graph deltas, since deletes always // precede creates/updates. But it's worth checking that we can handle it anyways. name: "move->delete folder with populated tree", - tree: treeWithFolders(), + tree: treeWithFolders, enumerator: mock.DriveEnumerator( mock.Drive(id(drive)).With( mock.Delta(id(delta), nil).With( @@ -693,7 +970,7 @@ func (suite *CollectionsTreeUnitSuite) TestCollections_PopulateTree_singleDelta( }, { name: "at folder limit before enumeration", - tree: treeWithFileAtRoot(), + tree: treeWithFileAtRoot, enumerator: mock.DriveEnumerator( mock.Drive(id(drive)).With( mock.Delta(id(delta), nil).With( @@ -729,7 +1006,7 @@ func (suite *CollectionsTreeUnitSuite) TestCollections_PopulateTree_singleDelta( }, { name: "hit folder limit during enumeration", - tree: newFolderyMcFolderFace(nil, rootID), + tree: newTree, enumerator: mock.DriveEnumerator( mock.Drive(id(drive)).With( mock.Delta(id(delta), nil).With( @@ -782,7 +1059,7 @@ func (suite *CollectionsTreeUnitSuite) TestCollections_PopulateTree_multiDelta() table := []populateTreeTest{ { name: "sanity case: normal enumeration split across multiple deltas", - tree: newFolderyMcFolderFace(nil, rootID), + tree: newTree, enumerator: mock.DriveEnumerator( mock.Drive(id(drive)).With( mock.Delta(id(delta), nil).With( @@ -828,7 +1105,7 @@ func (suite *CollectionsTreeUnitSuite) TestCollections_PopulateTree_multiDelta() }, { name: "create->delete,create", - tree: newFolderyMcFolderFace(nil, rootID), + tree: newTree, enumerator: mock.DriveEnumerator( mock.Drive(id(drive)).With( mock.Delta(id(delta), nil).With( @@ -868,7 +1145,7 @@ func (suite *CollectionsTreeUnitSuite) TestCollections_PopulateTree_multiDelta() }, { name: "visit->rename", - tree: newFolderyMcFolderFace(nil, rootID), + tree: newTree, enumerator: mock.DriveEnumerator( mock.Drive(id(drive)).With( mock.Delta(id(delta), nil).With( @@ -905,7 +1182,7 @@ func (suite *CollectionsTreeUnitSuite) TestCollections_PopulateTree_multiDelta() }, { name: "duplicate folder name from deferred delete marker", - tree: newFolderyMcFolderFace(nil, rootID), + tree: newTree, enumerator: mock.DriveEnumerator( mock.Drive(id(drive)).With( mock.Delta(id(delta), nil).With( @@ -966,13 +1243,16 @@ func runPopulateTreeTest( ctx, flush := tester.NewContext(t) defer flush() - mbh := mock.DefaultDriveBHWith(user, pagerForDrives(drv), test.enumerator) - c := collWithMBH(mbh) - counter := count.New() + var ( + mbh = mock.DefaultDriveBHWith(user, pagerForDrives(drv), test.enumerator) + c = collWithMBH(mbh) + counter = count.New() + tree = test.tree(t) + ) - _, err := c.populateTree( + _, _, err := c.populateTree( ctx, - test.tree, + tree, drv, id(delta), test.limiter, @@ -984,10 +1264,10 @@ func runPopulateTreeTest( assert.Equal( t, test.expect.numLiveFolders, - test.tree.countLiveFolders(), + tree.countLiveFolders(), "count live folders in tree") - cAndS := test.tree.countLiveFilesAndSizes() + cAndS := tree.countLiveFilesAndSizes() assert.Equal( t, test.expect.numLiveFiles, @@ -1001,16 +1281,16 @@ func runPopulateTreeTest( test.expect.counts.Compare(t, counter) for _, id := range test.expect.treeContainsFolderIDs { - assert.NotNil(t, test.tree.folderIDToNode[id], "node exists") + assert.NotNil(t, tree.folderIDToNode[id], "node exists") } for _, id := range test.expect.treeContainsTombstoneIDs { - assert.NotNil(t, test.tree.tombstones[id], "tombstone exists") + assert.NotNil(t, tree.tombstones[id], "tombstone exists") } for iID, pID := range test.expect.treeContainsFileIDsWithParent { - assert.Contains(t, test.tree.fileIDToParentID, iID, "file should exist in tree") - assert.Equal(t, pID, test.tree.fileIDToParentID[iID], "file should reference correct parent") + assert.Contains(t, tree.fileIDToParentID, iID, "file should exist in tree") + assert.Equal(t, pID, tree.fileIDToParentID[iID], "file should reference correct parent") } } @@ -1036,14 +1316,14 @@ func (suite *CollectionsTreeUnitSuite) TestCollections_EnumeratePageOfItems_fold table := []struct { name string - tree *folderyMcFolderFace + tree func(t *testing.T) *folderyMcFolderFace page mock.NextPage limiter *pagerLimiter expect expected }{ { name: "nil page", - tree: treeWithRoot(), + tree: treeWithRoot, page: mock.NextPage{}, limiter: newPagerLimiter(control.DefaultOptions()), expect: expected{ @@ -1058,7 +1338,7 @@ func (suite *CollectionsTreeUnitSuite) TestCollections_EnumeratePageOfItems_fold }, { name: "empty page", - tree: treeWithRoot(), + tree: treeWithRoot, page: mock.NextPage{Items: []models.DriveItemable{}}, limiter: newPagerLimiter(control.DefaultOptions()), expect: expected{ @@ -1073,7 +1353,7 @@ func (suite *CollectionsTreeUnitSuite) TestCollections_EnumeratePageOfItems_fold }, { name: "root only", - tree: treeWithRoot(), + tree: treeWithRoot, page: aPage(), limiter: newPagerLimiter(control.DefaultOptions()), expect: expected{ @@ -1090,7 +1370,7 @@ func (suite *CollectionsTreeUnitSuite) TestCollections_EnumeratePageOfItems_fold }, { name: "many folders in a hierarchy", - tree: treeWithRoot(), + tree: treeWithRoot, page: aPage( folderAtRoot(), folderxAtRoot("sib"), @@ -1113,7 +1393,7 @@ func (suite *CollectionsTreeUnitSuite) TestCollections_EnumeratePageOfItems_fold }, { name: "create->delete", - tree: treeWithRoot(), + tree: treeWithRoot, page: aPage( folderAtRoot(), delItem(id(folder), rootID, isFolder)), @@ -1133,7 +1413,7 @@ func (suite *CollectionsTreeUnitSuite) TestCollections_EnumeratePageOfItems_fold }, { name: "move->delete", - tree: treeWithFolders(), + tree: treeWithFolders, page: aPage( folderxAtRoot("parent"), driveItem(id(folder), namex(folder, "moved"), parentDir(namex(folder, "parent")), idx(folder, "parent"), isFolder), @@ -1157,7 +1437,7 @@ func (suite *CollectionsTreeUnitSuite) TestCollections_EnumeratePageOfItems_fold }, { name: "delete->create with previous path", - tree: treeWithRoot(), + tree: treeWithRoot, page: aPage( delItem(id(folder), rootID, isFolder), folderAtRoot()), @@ -1178,7 +1458,7 @@ func (suite *CollectionsTreeUnitSuite) TestCollections_EnumeratePageOfItems_fold }, { name: "delete->create without previous path", - tree: treeWithRoot(), + tree: treeWithRoot, page: aPage( delItem(id(folder), rootID, isFolder), folderAtRoot()), @@ -1205,12 +1485,15 @@ func (suite *CollectionsTreeUnitSuite) TestCollections_EnumeratePageOfItems_fold ctx, flush := tester.NewContext(t) defer flush() - c := collWithMBH(mock.DefaultOneDriveBH(user)) - counter := count.New() + var ( + c = collWithMBH(mock.DefaultOneDriveBH(user)) + counter = count.New() + tree = test.tree(t) + ) err := c.enumeratePageOfItems( ctx, - test.tree, + tree, drv, test.page.Items, test.limiter, @@ -1225,16 +1508,16 @@ func (suite *CollectionsTreeUnitSuite) TestCollections_EnumeratePageOfItems_fold assert.Equal( t, test.expect.treeSize, - len(test.tree.tombstones)+test.tree.countLiveFolders(), + len(tree.tombstones)+tree.countLiveFolders(), "count folders in tree") test.expect.counts.Compare(t, counter) for _, id := range test.expect.treeContainsFolderIDs { - assert.NotNil(t, test.tree.folderIDToNode[id], "node exists") + assert.NotNil(t, tree.folderIDToNode[id], "node exists") } for _, id := range test.expect.treeContainsTombstoneIDs { - assert.NotNil(t, test.tree.tombstones[id], "tombstone exists") + assert.NotNil(t, tree.tombstones[id], "tombstone exists") } }) } @@ -1245,11 +1528,14 @@ func (suite *CollectionsTreeUnitSuite) TestCollections_AddFolderToTree() { drv.SetId(ptr.To(id(drive))) drv.SetName(ptr.To(name(drive))) - fld := folderAtRoot() - subFld := folderAtDeep(driveParentDir(drv, namex(folder, "parent")), idx(folder, "parent")) - pack := driveItem(id(pkg), name(pkg), parentDir(), rootID, isPackage) - del := delItem(id(folder), rootID, isFolder) - mal := malwareItem(idx(folder, "mal"), namex(folder, "mal"), parentDir(), rootID, isFolder) + var ( + fld = custom.ToCustomDriveItem(folderAtRoot()) + subFld = custom.ToCustomDriveItem(folderAtDeep(driveParentDir(drv, namex(folder, "parent")), idx(folder, "parent"))) + pack = custom.ToCustomDriveItem(driveItem(id(pkg), name(pkg), parentDir(), rootID, isPackage)) + del = custom.ToCustomDriveItem(delItem(id(folder), rootID, isFolder)) + mal = custom.ToCustomDriveItem( + malwareItem(idx(folder, "mal"), namex(folder, "mal"), parentDir(), rootID, isFolder)) + ) type expected struct { countLiveFolders int @@ -1263,14 +1549,14 @@ func (suite *CollectionsTreeUnitSuite) TestCollections_AddFolderToTree() { table := []struct { name string - tree *folderyMcFolderFace - folder models.DriveItemable + tree func(t *testing.T) *folderyMcFolderFace + folder *custom.DriveItem limiter *pagerLimiter expect expected }{ { name: "add folder", - tree: treeWithRoot(), + tree: treeWithRoot, folder: fld, limiter: newPagerLimiter(control.DefaultOptions()), expect: expected{ @@ -1289,7 +1575,7 @@ func (suite *CollectionsTreeUnitSuite) TestCollections_AddFolderToTree() { }, { name: "re-add folder that already exists", - tree: treeWithFolders(), + tree: treeWithFolders, folder: subFld, limiter: newPagerLimiter(control.DefaultOptions()), expect: expected{ @@ -1308,7 +1594,7 @@ func (suite *CollectionsTreeUnitSuite) TestCollections_AddFolderToTree() { }, { name: "add package", - tree: treeWithRoot(), + tree: treeWithRoot, folder: pack, limiter: newPagerLimiter(control.DefaultOptions()), expect: expected{ @@ -1327,7 +1613,7 @@ func (suite *CollectionsTreeUnitSuite) TestCollections_AddFolderToTree() { }, { name: "tombstone a folder in a populated tree", - tree: treeWithFolders(), + tree: treeWithFolders, folder: del, limiter: newPagerLimiter(control.DefaultOptions()), expect: expected{ @@ -1346,7 +1632,7 @@ func (suite *CollectionsTreeUnitSuite) TestCollections_AddFolderToTree() { }, { name: "tombstone new folder in unpopulated tree", - tree: newFolderyMcFolderFace(nil, rootID), + tree: newTree, folder: del, limiter: newPagerLimiter(control.DefaultOptions()), expect: expected{ @@ -1364,7 +1650,7 @@ func (suite *CollectionsTreeUnitSuite) TestCollections_AddFolderToTree() { }, { name: "re-add tombstone that already exists", - tree: treeWithTombstone(), + tree: treeWithTombstone, folder: del, limiter: newPagerLimiter(control.DefaultOptions()), expect: expected{ @@ -1383,7 +1669,7 @@ func (suite *CollectionsTreeUnitSuite) TestCollections_AddFolderToTree() { }, { name: "add malware", - tree: treeWithRoot(), + tree: treeWithRoot, folder: mal, limiter: newPagerLimiter(control.DefaultOptions()), expect: expected{ @@ -1402,7 +1688,7 @@ func (suite *CollectionsTreeUnitSuite) TestCollections_AddFolderToTree() { }, { name: "already over container limit, folder seen twice", - tree: treeWithFolders(), + tree: treeWithFolders, folder: fld, limiter: newPagerLimiter(minimumLimitOpts()), expect: expected{ @@ -1422,7 +1708,7 @@ func (suite *CollectionsTreeUnitSuite) TestCollections_AddFolderToTree() { }, { name: "already at container limit", - tree: treeWithRoot(), + tree: treeWithRoot, folder: fld, limiter: newPagerLimiter(minimumLimitOpts()), expect: expected{ @@ -1442,7 +1728,7 @@ func (suite *CollectionsTreeUnitSuite) TestCollections_AddFolderToTree() { }, { name: "process tombstone when over folder limits", - tree: treeWithFolders(), + tree: treeWithFolders, folder: del, limiter: newPagerLimiter(minimumLimitOpts()), expect: expected{ @@ -1468,12 +1754,15 @@ func (suite *CollectionsTreeUnitSuite) TestCollections_AddFolderToTree() { ctx, flush := tester.NewContext(t) defer flush() - c := collWithMBH(mock.DefaultOneDriveBH(user)) - counter := count.New() + var ( + c = collWithMBH(mock.DefaultOneDriveBH(user)) + counter = count.New() + tree = test.tree(t) + ) skipped, err := c.addFolderToTree( ctx, - test.tree, + tree, drv, test.folder, test.limiter, @@ -1487,13 +1776,13 @@ func (suite *CollectionsTreeUnitSuite) TestCollections_AddFolderToTree() { } test.expect.counts.Compare(t, counter) - assert.Equal(t, test.expect.countLiveFolders, test.tree.countLiveFolders(), "live folders") + assert.Equal(t, test.expect.countLiveFolders, tree.countLiveFolders(), "live folders") assert.Equal( t, test.expect.treeSize, - len(test.tree.tombstones)+test.tree.countLiveFolders(), + len(tree.tombstones)+tree.countLiveFolders(), "folders in tree") - test.expect.treeContainsFolder(t, test.tree.containsFolder(ptr.Val(test.folder.GetId()))) + test.expect.treeContainsFolder(t, tree.containsFolder(ptr.Val(test.folder.GetId()))) }) } } @@ -1539,7 +1828,10 @@ func (suite *CollectionsTreeUnitSuite) TestCollections_MakeFolderCollectionPath( c := collWithMBH(mock.DefaultOneDriveBH(user)) - p, err := c.makeFolderCollectionPath(ctx, id(drive), test.folder) + p, err := c.makeFolderCollectionPath( + ctx, + id(drive), + custom.ToCustomDriveItem(test.folder)) test.expectErr(t, err, clues.ToCore(err)) if err == nil { @@ -1570,13 +1862,13 @@ func (suite *CollectionsTreeUnitSuite) TestCollections_EnumeratePageOfItems_file table := []struct { name string - tree *folderyMcFolderFace + tree func(t *testing.T) *folderyMcFolderFace page mock.NextPage expect expected }{ { name: "one file at root", - tree: treeWithRoot(), + tree: treeWithRoot, page: aPage(fileAtRoot()), expect: expected{ counts: countTD.Expected{ @@ -1594,7 +1886,7 @@ func (suite *CollectionsTreeUnitSuite) TestCollections_EnumeratePageOfItems_file }, { name: "one file in a folder", - tree: newFolderyMcFolderFace(nil, rootID), + tree: newTree, page: aPage( folderAtRoot(), fileAt(folder)), @@ -1614,7 +1906,7 @@ func (suite *CollectionsTreeUnitSuite) TestCollections_EnumeratePageOfItems_file }, { name: "many files in a hierarchy", - tree: treeWithRoot(), + tree: treeWithRoot, page: aPage( fileAtRoot(), folderAtRoot(), @@ -1636,7 +1928,7 @@ func (suite *CollectionsTreeUnitSuite) TestCollections_EnumeratePageOfItems_file }, { name: "many updates to the same file", - tree: treeWithRoot(), + tree: treeWithRoot, page: aPage( fileAtRoot(), driveItem(id(file), namex(file, 1), parentDir(), rootID, isFile), @@ -1657,7 +1949,7 @@ func (suite *CollectionsTreeUnitSuite) TestCollections_EnumeratePageOfItems_file }, { name: "delete an existing file", - tree: treeWithFileAtRoot(), + tree: treeWithFileAtRoot, page: aPage(delItem(id(file), rootID, isFile)), expect: expected{ counts: countTD.Expected{ @@ -1673,7 +1965,7 @@ func (suite *CollectionsTreeUnitSuite) TestCollections_EnumeratePageOfItems_file }, { name: "delete the same file twice", - tree: treeWithFileAtRoot(), + tree: treeWithFileAtRoot, page: aPage( delItem(id(file), rootID, isFile), delItem(id(file), rootID, isFile)), @@ -1691,7 +1983,7 @@ func (suite *CollectionsTreeUnitSuite) TestCollections_EnumeratePageOfItems_file }, { name: "create->delete", - tree: treeWithRoot(), + tree: treeWithRoot, page: aPage( fileAtRoot(), delItem(id(file), rootID, isFile)), @@ -1709,7 +2001,7 @@ func (suite *CollectionsTreeUnitSuite) TestCollections_EnumeratePageOfItems_file }, { name: "move->delete", - tree: treeWithFileAtRoot(), + tree: treeWithFileAtRoot, page: aPage( folderAtRoot(), fileAt(folder), @@ -1728,7 +2020,7 @@ func (suite *CollectionsTreeUnitSuite) TestCollections_EnumeratePageOfItems_file }, { name: "delete->create an existing file", - tree: treeWithFileAtRoot(), + tree: treeWithFileAtRoot, page: aPage( delItem(id(file), rootID, isFile), fileAtRoot()), @@ -1748,7 +2040,7 @@ func (suite *CollectionsTreeUnitSuite) TestCollections_EnumeratePageOfItems_file }, { name: "delete->create a non-existing file", - tree: treeWithRoot(), + tree: treeWithRoot, page: aPage( delItem(id(file), rootID, isFile), fileAtRoot()), @@ -1774,12 +2066,15 @@ func (suite *CollectionsTreeUnitSuite) TestCollections_EnumeratePageOfItems_file ctx, flush := tester.NewContext(t) defer flush() - c := collWithMBH(mock.DefaultOneDriveBH(user)) - counter := count.New() + var ( + c = collWithMBH(mock.DefaultOneDriveBH(user)) + counter = count.New() + tree = test.tree(t) + ) err := c.enumeratePageOfItems( ctx, - test.tree, + tree, drv, test.page.Items, newPagerLimiter(control.DefaultOptions()), @@ -1787,10 +2082,10 @@ func (suite *CollectionsTreeUnitSuite) TestCollections_EnumeratePageOfItems_file fault.New(true)) test.expect.err(t, err, clues.ToCore(err)) - countSize := test.tree.countLiveFilesAndSizes() + countSize := tree.countLiveFilesAndSizes() assert.Equal(t, test.expect.countLiveFiles, countSize.numFiles, "count of files") assert.Equal(t, test.expect.countTotalBytes, countSize.totalBytes, "total size in bytes") - assert.Equal(t, test.expect.treeContainsFileIDsWithParent, test.tree.fileIDToParentID) + assert.Equal(t, test.expect.treeContainsFileIDsWithParent, tree.fileIDToParentID) test.expect.counts.Compare(t, counter) }) } @@ -1813,14 +2108,14 @@ func (suite *CollectionsTreeUnitSuite) TestCollections_AddFileToTree() { table := []struct { name string - tree *folderyMcFolderFace + tree func(t *testing.T) *folderyMcFolderFace file models.DriveItemable limiter *pagerLimiter expect expected }{ { name: "add new file", - tree: treeWithRoot(), + tree: treeWithRoot, file: fileAtRoot(), limiter: newPagerLimiter(control.DefaultOptions()), expect: expected{ @@ -1838,7 +2133,7 @@ func (suite *CollectionsTreeUnitSuite) TestCollections_AddFileToTree() { }, { name: "duplicate file", - tree: treeWithFileAtRoot(), + tree: treeWithFileAtRoot, file: fileAtRoot(), limiter: newPagerLimiter(control.DefaultOptions()), expect: expected{ @@ -1856,7 +2151,7 @@ func (suite *CollectionsTreeUnitSuite) TestCollections_AddFileToTree() { }, { name: "error file seen before parent", - tree: treeWithRoot(), + tree: treeWithRoot, file: fileAt(folder), limiter: newPagerLimiter(control.DefaultOptions()), expect: expected{ @@ -1872,7 +2167,7 @@ func (suite *CollectionsTreeUnitSuite) TestCollections_AddFileToTree() { }, { name: "malware file", - tree: treeWithRoot(), + tree: treeWithRoot, file: malwareItem(id(file), name(file), parentDir(name(folder)), rootID, isFile), limiter: newPagerLimiter(control.DefaultOptions()), expect: expected{ @@ -1888,7 +2183,7 @@ func (suite *CollectionsTreeUnitSuite) TestCollections_AddFileToTree() { }, { name: "delete non-existing file", - tree: treeWithRoot(), + tree: treeWithRoot, file: delItem(id(file), id(folder), isFile), limiter: newPagerLimiter(control.DefaultOptions()), expect: expected{ @@ -1904,7 +2199,7 @@ func (suite *CollectionsTreeUnitSuite) TestCollections_AddFileToTree() { }, { name: "delete existing file", - tree: treeWithFileAtRoot(), + tree: treeWithFileAtRoot, file: delItem(id(file), rootID, isFile), limiter: newPagerLimiter(control.DefaultOptions()), expect: expected{ @@ -1920,7 +2215,7 @@ func (suite *CollectionsTreeUnitSuite) TestCollections_AddFileToTree() { }, { name: "already at container file limit", - tree: treeWithFileAtRoot(), + tree: treeWithFileAtRoot, file: filexAtRoot(2), limiter: newPagerLimiter(minimumLimitOpts()), expect: expected{ @@ -1939,7 +2234,7 @@ func (suite *CollectionsTreeUnitSuite) TestCollections_AddFileToTree() { }, { name: "goes over total byte limit", - tree: treeWithRoot(), + tree: treeWithRoot, file: fileAtRoot(), limiter: newPagerLimiter(minimumLimitOpts()), expect: expected{ @@ -1962,14 +2257,17 @@ func (suite *CollectionsTreeUnitSuite) TestCollections_AddFileToTree() { ctx, flush := tester.NewContext(t) defer flush() - c := collWithMBH(mock.DefaultOneDriveBH(user)) - counter := count.New() + var ( + c = collWithMBH(mock.DefaultOneDriveBH(user)) + counter = count.New() + tree = test.tree(t) + ) skipped, err := c.addFileToTree( ctx, - test.tree, + tree, drv, - test.file, + custom.ToCustomDriveItem(test.file), test.limiter, counter) @@ -1980,10 +2278,10 @@ func (suite *CollectionsTreeUnitSuite) TestCollections_AddFileToTree() { require.ErrorIs(t, err, errHitLimit, clues.ToCore(err)) } - assert.Equal(t, test.expect.treeContainsFileIDsWithParent, test.tree.fileIDToParentID) + assert.Equal(t, test.expect.treeContainsFileIDsWithParent, tree.fileIDToParentID) test.expect.counts.Compare(t, counter) - countSize := test.tree.countLiveFilesAndSizes() + countSize := tree.countLiveFilesAndSizes() assert.Equal(t, test.expect.countLiveFiles, countSize.numFiles, "count of files") assert.Equal(t, test.expect.countTotalBytes, countSize.totalBytes, "total size in bytes") }) diff --git a/src/internal/m365/collection/drive/delta_tree.go b/src/internal/m365/collection/drive/delta_tree.go index a888da1bd..45012bb53 100644 --- a/src/internal/m365/collection/drive/delta_tree.go +++ b/src/internal/m365/collection/drive/delta_tree.go @@ -2,12 +2,14 @@ package drive import ( "context" - "time" "github.com/alcionai/clues" + "github.com/alcionai/corso/src/internal/common/ptr" + "github.com/alcionai/corso/src/internal/m365/collection/drive/metadata" "github.com/alcionai/corso/src/pkg/logger" "github.com/alcionai/corso/src/pkg/path" + "github.com/alcionai/corso/src/pkg/services/m365/custom" ) // folderyMcFolderFace owns our delta processing tree. @@ -86,7 +88,7 @@ type nodeyMcNodeFace struct { // folderID -> node children map[string]*nodeyMcNodeFace // file item ID -> file metadata - files map[string]fileyMcFileFace + files map[string]*custom.DriveItem // for special handling protocols around packages isPackage bool } @@ -101,16 +103,11 @@ func newNodeyMcNodeFace( id: id, name: name, children: map[string]*nodeyMcNodeFace{}, - files: map[string]fileyMcFileFace{}, + files: map[string]*custom.DriveItem{}, isPackage: isPackage, } } -type fileyMcFileFace struct { - lastModified time.Time - contentSize int64 -} - // --------------------------------------------------------------------------- // folder handling // --------------------------------------------------------------------------- @@ -317,8 +314,7 @@ func (face *folderyMcFolderFace) setPreviousPath( // this func will update and/or clean up all the old references. func (face *folderyMcFolderFace) addFile( parentID, id string, - lastModified time.Time, - contentSize int64, + file *custom.DriveItem, ) error { if len(parentID) == 0 { return clues.New("item added without parent folder ID") @@ -347,10 +343,7 @@ func (face *folderyMcFolderFace) addFile( } face.fileIDToParentID[id] = parentID - parent.files[id] = fileyMcFileFace{ - lastModified: lastModified, - contentSize: contentSize, - } + parent.files[id] = file delete(face.deletedFileIDs, id) @@ -374,6 +367,114 @@ func (face *folderyMcFolderFace) deleteFile(id string) { face.deletedFileIDs[id] = struct{}{} } +// --------------------------------------------------------------------------- +// post-processing +// --------------------------------------------------------------------------- + +type collectable struct { + currPath path.Path + files map[string]*custom.DriveItem + folderID string + isPackageOrChildOfPackage bool + loc path.Elements + prevPath path.Path +} + +// produces a map of folderID -> collectable +func (face *folderyMcFolderFace) generateCollectables() (map[string]collectable, error) { + result := map[string]collectable{} + err := walkTreeAndBuildCollections( + face.root, + face.prefix, + &path.Builder{}, + false, + result) + + for id, tombstone := range face.tombstones { + // in case we got a folder deletion marker for a folder + // that has no previous path, drop the entry entirely. + // it doesn't exist in storage, so there's nothing to delete. + if tombstone.prev != nil { + result[id] = collectable{ + folderID: id, + prevPath: tombstone.prev, + } + } + } + + return result, clues.Stack(err).OrNil() +} + +func walkTreeAndBuildCollections( + node *nodeyMcNodeFace, + pathPfx path.Path, + parentPath *path.Builder, + isChildOfPackage bool, + result map[string]collectable, +) error { + if node == nil { + return nil + } + + parentLocation := parentPath.Elements() + currentLocation := parentPath.Append(node.name) + + for _, child := range node.children { + err := walkTreeAndBuildCollections( + child, + pathPfx, + currentLocation, + node.isPackage || isChildOfPackage, + result) + if err != nil { + return err + } + } + + collectionPath, err := pathPfx.Append(false, currentLocation.Elements()...) + if err != nil { + return clues.Wrap(err, "building collection path"). + With( + "path_prefix", pathPfx, + "path_suffix", currentLocation.Elements()) + } + + cbl := collectable{ + currPath: collectionPath, + files: node.files, + folderID: node.id, + isPackageOrChildOfPackage: node.isPackage || isChildOfPackage, + loc: parentLocation, + prevPath: node.prev, + } + + result[node.id] = cbl + + return nil +} + +func (face *folderyMcFolderFace) generateExcludeItemIDs() map[string]struct{} { + result := map[string]struct{}{} + + for iID, pID := range face.fileIDToParentID { + if _, itsAlive := face.folderIDToNode[pID]; !itsAlive { + // don't worry about items whose parents are tombstoned. + // those will get handled in the delete cascade. + continue + } + + result[iID+metadata.DataFileSuffix] = struct{}{} + result[iID+metadata.MetaFileSuffix] = struct{}{} + } + + for iID := range face.deletedFileIDs { + result[iID+metadata.DataFileSuffix] = struct{}{} + result[iID+metadata.MetaFileSuffix] = struct{}{} + } + + return result +} + // --------------------------------------------------------------------------- // quantification // --------------------------------------------------------------------------- @@ -414,7 +515,7 @@ func countFilesAndSizes(nodey *nodeyMcNodeFace) countAndSize { } for _, file := range nodey.files { - sumContentSize += file.contentSize + sumContentSize += ptr.Val(file.GetSize()) } return countAndSize{ diff --git a/src/internal/m365/collection/drive/delta_tree_test.go b/src/internal/m365/collection/drive/delta_tree_test.go index 2a8934feb..9495ddf14 100644 --- a/src/internal/m365/collection/drive/delta_tree_test.go +++ b/src/internal/m365/collection/drive/delta_tree_test.go @@ -2,15 +2,17 @@ package drive import ( "testing" - "time" "github.com/alcionai/clues" "github.com/stretchr/testify/assert" "github.com/stretchr/testify/require" "github.com/stretchr/testify/suite" + "golang.org/x/exp/maps" + "github.com/alcionai/corso/src/internal/common/ptr" "github.com/alcionai/corso/src/internal/tester" "github.com/alcionai/corso/src/pkg/path" + "github.com/alcionai/corso/src/pkg/services/m365/custom" ) // --------------------------------------------------------------------------- @@ -51,7 +53,7 @@ func (suite *DeltaTreeUnitSuite) TestNewNodeyMcNodeFace() { assert.Equal(t, parent, nodeFace.parent) assert.Equal(t, "id", nodeFace.id) assert.Equal(t, "name", nodeFace.name) - assert.NotEqual(t, loc, nodeFace.prev) + assert.Nil(t, nodeFace.prev) assert.True(t, nodeFace.isPackage) assert.NotNil(t, nodeFace.children) assert.NotNil(t, nodeFace.files) @@ -66,7 +68,7 @@ func (suite *DeltaTreeUnitSuite) TestNewNodeyMcNodeFace() { func (suite *DeltaTreeUnitSuite) TestFolderyMcFolderFace_SetFolder() { table := []struct { tname string - tree *folderyMcFolderFace + tree func(t *testing.T) *folderyMcFolderFace parentID string id string name string @@ -75,7 +77,7 @@ func (suite *DeltaTreeUnitSuite) TestFolderyMcFolderFace_SetFolder() { }{ { tname: "add root", - tree: newFolderyMcFolderFace(nil, rootID), + tree: newTree, id: rootID, name: rootName, isPackage: true, @@ -83,14 +85,14 @@ func (suite *DeltaTreeUnitSuite) TestFolderyMcFolderFace_SetFolder() { }, { tname: "root already exists", - tree: treeWithRoot(), + tree: treeWithRoot, id: rootID, name: rootName, expectErr: assert.NoError, }, { tname: "add folder", - tree: treeWithRoot(), + tree: treeWithRoot, parentID: rootID, id: id(folder), name: name(folder), @@ -98,7 +100,7 @@ func (suite *DeltaTreeUnitSuite) TestFolderyMcFolderFace_SetFolder() { }, { tname: "add package", - tree: treeWithRoot(), + tree: treeWithRoot, parentID: rootID, id: id(folder), name: name(folder), @@ -107,7 +109,7 @@ func (suite *DeltaTreeUnitSuite) TestFolderyMcFolderFace_SetFolder() { }, { tname: "missing ID", - tree: treeWithRoot(), + tree: treeWithRoot, parentID: rootID, name: name(folder), isPackage: true, @@ -115,7 +117,7 @@ func (suite *DeltaTreeUnitSuite) TestFolderyMcFolderFace_SetFolder() { }, { tname: "missing name", - tree: treeWithRoot(), + tree: treeWithRoot, parentID: rootID, id: id(folder), isPackage: true, @@ -123,7 +125,7 @@ func (suite *DeltaTreeUnitSuite) TestFolderyMcFolderFace_SetFolder() { }, { tname: "missing parentID", - tree: treeWithRoot(), + tree: treeWithRoot, id: id(folder), name: name(folder), isPackage: true, @@ -131,7 +133,7 @@ func (suite *DeltaTreeUnitSuite) TestFolderyMcFolderFace_SetFolder() { }, { tname: "already tombstoned", - tree: treeWithTombstone(), + tree: treeWithTombstone, parentID: rootID, id: id(folder), name: name(folder), @@ -139,8 +141,10 @@ func (suite *DeltaTreeUnitSuite) TestFolderyMcFolderFace_SetFolder() { }, { tname: "add folder before parent", - tree: &folderyMcFolderFace{ - folderIDToNode: map[string]*nodeyMcNodeFace{}, + tree: func(t *testing.T) *folderyMcFolderFace { + return &folderyMcFolderFace{ + folderIDToNode: map[string]*nodeyMcNodeFace{}, + } }, parentID: rootID, id: id(folder), @@ -150,7 +154,7 @@ func (suite *DeltaTreeUnitSuite) TestFolderyMcFolderFace_SetFolder() { }, { tname: "folder already exists", - tree: treeWithFolders(), + tree: treeWithFolders, parentID: idx(folder, "parent"), id: id(folder), name: name(folder), @@ -164,7 +168,9 @@ func (suite *DeltaTreeUnitSuite) TestFolderyMcFolderFace_SetFolder() { ctx, flush := tester.NewContext(t) defer flush() - err := test.tree.setFolder( + tree := test.tree(t) + + err := tree.setFolder( ctx, test.parentID, test.id, @@ -176,17 +182,17 @@ func (suite *DeltaTreeUnitSuite) TestFolderyMcFolderFace_SetFolder() { return } - result := test.tree.folderIDToNode[test.id] + result := tree.folderIDToNode[test.id] require.NotNil(t, result) assert.Equal(t, test.id, result.id) assert.Equal(t, test.name, result.name) assert.Equal(t, test.isPackage, result.isPackage) - _, ded := test.tree.tombstones[test.id] + _, ded := tree.tombstones[test.id] assert.False(t, ded) if len(test.parentID) > 0 { - parent := test.tree.folderIDToNode[test.parentID] + parent := tree.folderIDToNode[test.parentID] assert.Equal(t, parent, result.parent) } }) @@ -197,36 +203,36 @@ func (suite *DeltaTreeUnitSuite) TestFolderyMcFolderFace_AddTombstone() { table := []struct { name string id string - tree *folderyMcFolderFace + tree func(t *testing.T) *folderyMcFolderFace expectErr assert.ErrorAssertionFunc }{ { name: "add tombstone", id: id(folder), - tree: newFolderyMcFolderFace(nil, rootID), + tree: newTree, expectErr: assert.NoError, }, { name: "duplicate tombstone", id: id(folder), - tree: treeWithTombstone(), + tree: treeWithTombstone, expectErr: assert.NoError, }, { name: "missing ID", - tree: newFolderyMcFolderFace(nil, rootID), + tree: newTree, expectErr: assert.Error, }, { name: "conflict: folder alive", id: id(folder), - tree: treeWithTombstone(), + tree: treeWithTombstone, expectErr: assert.NoError, }, { name: "already tombstoned", id: id(folder), - tree: treeWithTombstone(), + tree: treeWithTombstone, expectErr: assert.NoError, }, } @@ -237,14 +243,16 @@ func (suite *DeltaTreeUnitSuite) TestFolderyMcFolderFace_AddTombstone() { ctx, flush := tester.NewContext(t) defer flush() - err := test.tree.setTombstone(ctx, test.id) + tree := test.tree(t) + + err := tree.setTombstone(ctx, test.id) test.expectErr(t, err, clues.ToCore(err)) if err != nil { return } - result := test.tree.tombstones[test.id] + result := tree.tombstones[test.id] require.NotNil(t, result) }) } @@ -262,7 +270,7 @@ func (suite *DeltaTreeUnitSuite) TestFolderyMcFolderFace_SetPreviousPath() { name string id string prev path.Path - tree *folderyMcFolderFace + tree func(t *testing.T) *folderyMcFolderFace expectErr assert.ErrorAssertionFunc expectLive bool expectTombstone bool @@ -270,8 +278,8 @@ func (suite *DeltaTreeUnitSuite) TestFolderyMcFolderFace_SetPreviousPath() { { name: "no changes become a no-op", id: id(folder), - prev: pathWith(loc), - tree: newFolderyMcFolderFace(nil, rootID), + prev: pathWith(defaultLoc()), + tree: newTree, expectErr: assert.NoError, expectLive: false, expectTombstone: false, @@ -279,8 +287,8 @@ func (suite *DeltaTreeUnitSuite) TestFolderyMcFolderFace_SetPreviousPath() { { name: "added folders after reset", id: id(folder), - prev: pathWith(loc), - tree: treeWithFoldersAfterReset(), + prev: pathWith(defaultLoc()), + tree: treeWithFoldersAfterReset, expectErr: assert.NoError, expectLive: true, expectTombstone: false, @@ -288,16 +296,16 @@ func (suite *DeltaTreeUnitSuite) TestFolderyMcFolderFace_SetPreviousPath() { { name: "create tombstone after reset", id: id(folder), - prev: pathWith(loc), - tree: treeAfterReset(), + prev: pathWith(defaultLoc()), + tree: treeAfterReset, expectErr: assert.NoError, expectLive: false, expectTombstone: true, }, { name: "missing ID", - prev: pathWith(loc), - tree: newFolderyMcFolderFace(nil, rootID), + prev: pathWith(defaultLoc()), + tree: newTree, expectErr: assert.Error, expectLive: false, expectTombstone: false, @@ -305,7 +313,7 @@ func (suite *DeltaTreeUnitSuite) TestFolderyMcFolderFace_SetPreviousPath() { { name: "missing prev", id: id(folder), - tree: newFolderyMcFolderFace(nil, rootID), + tree: newTree, expectErr: assert.Error, expectLive: false, expectTombstone: false, @@ -313,8 +321,8 @@ func (suite *DeltaTreeUnitSuite) TestFolderyMcFolderFace_SetPreviousPath() { { name: "update live folder", id: id(folder), - prev: pathWith(loc), - tree: treeWithFolders(), + prev: pathWith(defaultLoc()), + tree: treeWithFolders, expectErr: assert.NoError, expectLive: true, expectTombstone: false, @@ -322,8 +330,8 @@ func (suite *DeltaTreeUnitSuite) TestFolderyMcFolderFace_SetPreviousPath() { { name: "update tombstone", id: id(folder), - prev: pathWith(loc), - tree: treeWithTombstone(), + prev: pathWith(defaultLoc()), + tree: treeWithTombstone, expectErr: assert.NoError, expectLive: false, expectTombstone: true, @@ -332,22 +340,23 @@ func (suite *DeltaTreeUnitSuite) TestFolderyMcFolderFace_SetPreviousPath() { for _, test := range table { suite.Run(test.name, func() { t := suite.T() + tree := test.tree(t) - err := test.tree.setPreviousPath(test.id, test.prev) + err := tree.setPreviousPath(test.id, test.prev) test.expectErr(t, err, clues.ToCore(err)) if test.expectLive { - require.Contains(t, test.tree.folderIDToNode, test.id) - assert.Equal(t, test.prev, test.tree.folderIDToNode[test.id].prev) + require.Contains(t, tree.folderIDToNode, test.id) + assert.Equal(t, test.prev.String(), tree.folderIDToNode[test.id].prev.String()) } else { - require.NotContains(t, test.tree.folderIDToNode, test.id) + require.NotContains(t, tree.folderIDToNode, test.id) } if test.expectTombstone { - require.Contains(t, test.tree.tombstones, test.id) - assert.Equal(t, test.prev, test.tree.tombstones[test.id].prev) + require.Contains(t, tree.tombstones, test.id) + assert.Equal(t, test.prev, tree.tombstones[test.id].prev) } else { - require.NotContains(t, test.tree.tombstones, test.id) + require.NotContains(t, tree.tombstones, test.id) } }) } @@ -469,7 +478,7 @@ func (suite *DeltaTreeUnitSuite) TestFolderyMcFolderFace_SetFolder_correctTree() ctx, flush := tester.NewContext(t) defer flush() - tree := treeWithRoot() + tree := treeWithRoot(t) set := func( parentID, fid, fname string, @@ -555,7 +564,7 @@ func (suite *DeltaTreeUnitSuite) TestFolderyMcFolderFace_SetFolder_correctTombst ctx, flush := tester.NewContext(t) defer flush() - tree := treeWithRoot() + tree := treeWithRoot(t) set := func( parentID, fid, fname string, @@ -730,7 +739,7 @@ func (suite *DeltaTreeUnitSuite) TestFolderyMcFolderFace_SetFolder_correctTombst func (suite *DeltaTreeUnitSuite) TestFolderyMcFolderFace_AddFile() { table := []struct { tname string - tree *folderyMcFolderFace + tree func(t *testing.T) *folderyMcFolderFace oldParentID string parentID string contentSize int64 @@ -739,7 +748,7 @@ func (suite *DeltaTreeUnitSuite) TestFolderyMcFolderFace_AddFile() { }{ { tname: "add file to root", - tree: treeWithRoot(), + tree: treeWithRoot, oldParentID: "", parentID: rootID, contentSize: 42, @@ -748,7 +757,7 @@ func (suite *DeltaTreeUnitSuite) TestFolderyMcFolderFace_AddFile() { }, { tname: "add file to folder", - tree: treeWithFolders(), + tree: treeWithFolders, oldParentID: "", parentID: id(folder), contentSize: 24, @@ -757,7 +766,7 @@ func (suite *DeltaTreeUnitSuite) TestFolderyMcFolderFace_AddFile() { }, { tname: "re-add file at the same location", - tree: treeWithFileAtRoot(), + tree: treeWithFileAtRoot, oldParentID: rootID, parentID: rootID, contentSize: 84, @@ -766,7 +775,7 @@ func (suite *DeltaTreeUnitSuite) TestFolderyMcFolderFace_AddFile() { }, { tname: "move file from folder to root", - tree: treeWithFileInFolder(), + tree: treeWithFileInFolder, oldParentID: id(folder), parentID: rootID, contentSize: 48, @@ -775,7 +784,7 @@ func (suite *DeltaTreeUnitSuite) TestFolderyMcFolderFace_AddFile() { }, { tname: "move file from tombstone to root", - tree: treeWithFileInTombstone(), + tree: treeWithFileInTombstone, oldParentID: id(folder), parentID: rootID, contentSize: 2, @@ -784,7 +793,7 @@ func (suite *DeltaTreeUnitSuite) TestFolderyMcFolderFace_AddFile() { }, { tname: "error adding file to tombstone", - tree: treeWithTombstone(), + tree: treeWithTombstone, oldParentID: "", parentID: id(folder), contentSize: 4, @@ -793,7 +802,7 @@ func (suite *DeltaTreeUnitSuite) TestFolderyMcFolderFace_AddFile() { }, { tname: "error adding file before parent", - tree: treeWithTombstone(), + tree: treeWithTombstone, oldParentID: "", parentID: idx(folder, 1), contentSize: 8, @@ -802,7 +811,7 @@ func (suite *DeltaTreeUnitSuite) TestFolderyMcFolderFace_AddFile() { }, { tname: "error adding file without parent id", - tree: treeWithTombstone(), + tree: treeWithTombstone, oldParentID: "", parentID: "", contentSize: 16, @@ -813,33 +822,33 @@ func (suite *DeltaTreeUnitSuite) TestFolderyMcFolderFace_AddFile() { for _, test := range table { suite.Run(test.tname, func() { t := suite.T() + tree := test.tree(t) - err := test.tree.addFile( + df := driveFile(file, parentDir(), test.parentID) + df.SetSize(ptr.To(test.contentSize)) + + err := tree.addFile( test.parentID, id(file), - time.Now(), - test.contentSize) + custom.ToCustomDriveItem(df)) test.expectErr(t, err, clues.ToCore(err)) - assert.Equal(t, test.expectFiles, test.tree.fileIDToParentID) + assert.Equal(t, test.expectFiles, tree.fileIDToParentID) if err != nil { return } - parent := test.tree.getNode(test.parentID) + parent := tree.getNode(test.parentID) require.NotNil(t, parent) assert.Contains(t, parent.files, id(file)) - countSize := test.tree.countLiveFilesAndSizes() + countSize := tree.countLiveFilesAndSizes() assert.Equal(t, 1, countSize.numFiles, "should have one file in the tree") assert.Equal(t, test.contentSize, countSize.totalBytes, "tree should be sized to test file contents") if len(test.oldParentID) > 0 && test.oldParentID != test.parentID { - old, ok := test.tree.folderIDToNode[test.oldParentID] - if !ok { - old = test.tree.tombstones[test.oldParentID] - } + old := tree.getNode(test.oldParentID) require.NotNil(t, old) assert.NotContains(t, old.files, id(file)) @@ -851,49 +860,50 @@ func (suite *DeltaTreeUnitSuite) TestFolderyMcFolderFace_AddFile() { func (suite *DeltaTreeUnitSuite) TestFolderyMcFolderFace_DeleteFile() { table := []struct { tname string - tree *folderyMcFolderFace + tree func(t *testing.T) *folderyMcFolderFace parentID string }{ { tname: "delete unseen file", - tree: treeWithRoot(), + tree: treeWithRoot, parentID: rootID, }, { tname: "delete file from root", - tree: treeWithFolders(), + tree: treeWithFolders, parentID: rootID, }, { tname: "delete file from folder", - tree: treeWithFileInFolder(), + tree: treeWithFileInFolder, parentID: id(folder), }, { tname: "delete file from tombstone", - tree: treeWithFileInTombstone(), + tree: treeWithFileInTombstone, parentID: id(folder), }, } for _, test := range table { suite.Run(test.tname, func() { t := suite.T() + tree := test.tree(t) - test.tree.deleteFile(id(file)) + tree.deleteFile(id(file)) - parent := test.tree.getNode(test.parentID) + parent := tree.getNode(test.parentID) require.NotNil(t, parent) assert.NotContains(t, parent.files, id(file)) - assert.NotContains(t, test.tree.fileIDToParentID, id(file)) - assert.Contains(t, test.tree.deletedFileIDs, id(file)) + assert.NotContains(t, tree.fileIDToParentID, id(file)) + assert.Contains(t, tree.deletedFileIDs, id(file)) }) } } func (suite *DeltaTreeUnitSuite) TestFolderyMcFolderFace_addAndDeleteFile() { t := suite.T() - tree := treeWithRoot() + tree := treeWithRoot(t) fID := id(file) require.Len(t, tree.fileIDToParentID, 0) @@ -906,7 +916,7 @@ func (suite *DeltaTreeUnitSuite) TestFolderyMcFolderFace_addAndDeleteFile() { assert.Len(t, tree.deletedFileIDs, 1) assert.Contains(t, tree.deletedFileIDs, fID) - err := tree.addFile(rootID, fID, time.Now(), defaultItemSize) + err := tree.addFile(rootID, fID, custom.ToCustomDriveItem(fileAtRoot())) require.NoError(t, err, clues.ToCore(err)) assert.Len(t, tree.fileIDToParentID, 1) @@ -921,3 +931,283 @@ func (suite *DeltaTreeUnitSuite) TestFolderyMcFolderFace_addAndDeleteFile() { assert.Len(t, tree.deletedFileIDs, 1) assert.Contains(t, tree.deletedFileIDs, fID) } + +func (suite *DeltaTreeUnitSuite) TestFolderyMcFolderFace_GenerateExcludeItemIDs() { + table := []struct { + name string + tree func(t *testing.T) *folderyMcFolderFace + expect map[string]struct{} + }{ + { + name: "no files", + tree: treeWithRoot, + expect: map[string]struct{}{}, + }, + { + name: "one file in a folder", + tree: treeWithFileInFolder, + expect: makeExcludeMap(id(file)), + }, + { + name: "one file in a tombstone", + tree: treeWithFileInTombstone, + expect: map[string]struct{}{}, + }, + { + name: "one deleted file", + tree: treeWithDeletedFile, + expect: makeExcludeMap(idx(file, "d")), + }, + { + name: "files in folders and tombstones", + tree: fullTree, + expect: makeExcludeMap( + id(file), + idx(file, "r"), + idx(file, "p"), + idx(file, "d")), + }, + } + for _, test := range table { + suite.Run(test.name, func() { + t := suite.T() + tree := test.tree(t) + + result := tree.generateExcludeItemIDs() + assert.Equal(t, test.expect, result) + }) + } +} + +// --------------------------------------------------------------------------- +// post-processing tests +// --------------------------------------------------------------------------- + +func (suite *DeltaTreeUnitSuite) TestFolderyMcFolderFace_GenerateCollectables() { + t := suite.T() + + table := []struct { + name string + tree func(t *testing.T) *folderyMcFolderFace + prevPaths map[string]string + expectErr require.ErrorAssertionFunc + expect map[string]collectable + }{ + { + name: "empty tree", + tree: newTree, + expectErr: require.NoError, + expect: map[string]collectable{}, + }, + { + name: "root only", + tree: treeWithRoot, + expectErr: require.NoError, + expect: map[string]collectable{ + rootID: { + currPath: fullPathPath(t), + files: map[string]*custom.DriveItem{}, + folderID: rootID, + isPackageOrChildOfPackage: false, + loc: path.Elements{}, + }, + }, + }, + { + name: "root with files", + tree: treeWithFileAtRoot, + expectErr: require.NoError, + expect: map[string]collectable{ + rootID: { + currPath: fullPathPath(t), + files: map[string]*custom.DriveItem{ + id(file): custom.ToCustomDriveItem(fileAtRoot()), + }, + folderID: rootID, + isPackageOrChildOfPackage: false, + loc: path.Elements{}, + }, + }, + }, + { + name: "folder hierarchy, no previous", + tree: treeWithFileInFolder, + expectErr: require.NoError, + expect: map[string]collectable{ + rootID: { + currPath: fullPathPath(t), + files: map[string]*custom.DriveItem{}, + folderID: rootID, + isPackageOrChildOfPackage: false, + loc: path.Elements{}, + }, + idx(folder, "parent"): { + currPath: fullPathPath(t, namex(folder, "parent")), + files: map[string]*custom.DriveItem{}, + folderID: idx(folder, "parent"), + isPackageOrChildOfPackage: false, + loc: path.Elements{rootName}, + }, + id(folder): { + currPath: fullPathPath(t, namex(folder, "parent"), name(folder)), + files: map[string]*custom.DriveItem{ + id(file): custom.ToCustomDriveItem(fileAt("parent")), + }, + folderID: id(folder), + isPackageOrChildOfPackage: false, + loc: path.Elements{rootName, namex(folder, "parent")}, + }, + }, + }, + { + name: "package in hierarchy", + tree: func(t *testing.T) *folderyMcFolderFace { + ctx, flush := tester.NewContext(t) + defer flush() + + tree := treeWithRoot(t) + err := tree.setFolder(ctx, rootID, id(pkg), name(pkg), true) + require.NoError(t, err, clues.ToCore(err)) + + err = tree.setFolder(ctx, id(pkg), id(folder), name(folder), false) + require.NoError(t, err, clues.ToCore(err)) + + return tree + }, + expectErr: require.NoError, + expect: map[string]collectable{ + rootID: { + currPath: fullPathPath(t), + files: map[string]*custom.DriveItem{}, + folderID: rootID, + isPackageOrChildOfPackage: false, + loc: path.Elements{}, + }, + id(pkg): { + currPath: fullPathPath(t, name(pkg)), + files: map[string]*custom.DriveItem{}, + folderID: id(pkg), + isPackageOrChildOfPackage: true, + loc: path.Elements{rootName}, + }, + id(folder): { + currPath: fullPathPath(t, name(pkg), name(folder)), + files: map[string]*custom.DriveItem{}, + folderID: id(folder), + isPackageOrChildOfPackage: true, + loc: path.Elements{rootName, name(pkg)}, + }, + }, + }, + { + name: "folder hierarchy with previous paths", + tree: treeWithFileInFolder, + expectErr: require.NoError, + prevPaths: map[string]string{ + rootID: fullPath(), + idx(folder, "parent"): fullPath(namex(folder, "parent-prev")), + id(folder): fullPath(namex(folder, "parent-prev"), name(folder)), + }, + expect: map[string]collectable{ + rootID: { + currPath: fullPathPath(t), + files: map[string]*custom.DriveItem{}, + folderID: rootID, + isPackageOrChildOfPackage: false, + loc: path.Elements{}, + prevPath: fullPathPath(t), + }, + idx(folder, "parent"): { + currPath: fullPathPath(t, namex(folder, "parent")), + files: map[string]*custom.DriveItem{}, + folderID: idx(folder, "parent"), + isPackageOrChildOfPackage: false, + loc: path.Elements{rootName}, + prevPath: fullPathPath(t, namex(folder, "parent-prev")), + }, + id(folder): { + currPath: fullPathPath(t, namex(folder, "parent"), name(folder)), + folderID: id(folder), + isPackageOrChildOfPackage: false, + files: map[string]*custom.DriveItem{ + id(file): custom.ToCustomDriveItem(fileAt("parent")), + }, + loc: path.Elements{rootName, namex(folder, "parent")}, + prevPath: fullPathPath(t, namex(folder, "parent-prev"), name(folder)), + }, + }, + }, + { + name: "root and tombstones", + tree: treeWithFileInTombstone, + prevPaths: map[string]string{ + rootID: fullPath(), + id(folder): fullPath(name(folder)), + }, + expectErr: require.NoError, + expect: map[string]collectable{ + rootID: { + currPath: fullPathPath(t), + files: map[string]*custom.DriveItem{}, + folderID: rootID, + isPackageOrChildOfPackage: false, + loc: path.Elements{}, + prevPath: fullPathPath(t), + }, + id(folder): { + files: map[string]*custom.DriveItem{}, + folderID: id(folder), + isPackageOrChildOfPackage: false, + prevPath: fullPathPath(t, name(folder)), + }, + }, + }, + } + for _, test := range table { + suite.Run(test.name, func() { + t := suite.T() + tree := test.tree(t) + + if len(test.prevPaths) > 0 { + for id, ps := range test.prevPaths { + pp, err := path.FromDataLayerPath(ps, false) + require.NoError(t, err, clues.ToCore(err)) + + err = tree.setPreviousPath(id, pp) + require.NoError(t, err, clues.ToCore(err)) + } + } + + results, err := tree.generateCollectables() + test.expectErr(t, err, clues.ToCore(err)) + assert.Len(t, results, len(test.expect)) + + for id, expect := range test.expect { + require.Contains(t, results, id) + + result := results[id] + assert.Equal(t, id, result.folderID) + + if expect.currPath == nil { + assert.Nil(t, result.currPath) + } else { + assert.Equal(t, expect.currPath.String(), result.currPath.String()) + } + + if expect.prevPath == nil { + assert.Nil(t, result.prevPath) + } else { + assert.Equal(t, expect.prevPath.String(), result.prevPath.String()) + } + + if expect.loc == nil { + assert.Nil(t, result.loc) + } else { + assert.Equal(t, expect.loc.PlainString(), result.loc.PlainString()) + } + + assert.ElementsMatch(t, maps.Keys(expect.files), maps.Keys(result.files)) + } + }) + } +} diff --git a/src/internal/m365/collection/drive/helper_test.go b/src/internal/m365/collection/drive/helper_test.go index 727a32163..c60208083 100644 --- a/src/internal/m365/collection/drive/helper_test.go +++ b/src/internal/m365/collection/drive/helper_test.go @@ -19,6 +19,7 @@ import ( odConsts "github.com/alcionai/corso/src/internal/m365/service/onedrive/consts" "github.com/alcionai/corso/src/internal/m365/service/onedrive/mock" "github.com/alcionai/corso/src/internal/m365/support" + "github.com/alcionai/corso/src/internal/tester" "github.com/alcionai/corso/src/internal/tester/tconfig" "github.com/alcionai/corso/src/pkg/account" bupMD "github.com/alcionai/corso/src/pkg/backup/metadata" @@ -30,6 +31,7 @@ import ( "github.com/alcionai/corso/src/pkg/services/m365/api" "github.com/alcionai/corso/src/pkg/services/m365/api/graph" apiMock "github.com/alcionai/corso/src/pkg/services/m365/api/mock" + "github.com/alcionai/corso/src/pkg/services/m365/custom" ) const defaultItemSize int64 = 42 @@ -152,6 +154,7 @@ func coreItem( item := models.NewDriveItem() item.SetName(&name) item.SetId(&id) + item.SetLastModifiedDateTime(ptr.To(time.Now())) parentReference := models.NewItemReference() parentReference.SetPath(&parentPath) @@ -178,6 +181,21 @@ func driveItem( return coreItem(id, name, parentPath, parentID, it) } +func driveFile( + idX any, + parentPath, parentID string, +) models.DriveItemable { + i := id(file) + n := name(file) + + if idX != file { + i = idx(file, idX) + n = namex(file, idX) + } + + return driveItem(i, n, parentPath, parentID, isFile) +} + func fileAtRoot() models.DriveItemable { return driveItem(id(file), name(file), parentDir(), rootID, isFile) } @@ -444,6 +462,13 @@ func fullPath(elems ...string) string { elems...)...) } +func fullPathPath(t *testing.T, elems ...string) path.Path { + p, err := path.FromDataLayerPath(fullPath(elems...), false) + require.NoError(t, err, clues.ToCore(err)) + + return p +} + func driveFullPath(driveID any, elems ...string) string { return toPath(append( []string{ @@ -468,12 +493,6 @@ func driveParentDir(driveID any, elems ...string) string { elems...)...) } -// just for readability -const ( - doMergeItems = true - doNotMergeItems = false -) - // common item names const ( bar = "bar" @@ -564,26 +583,6 @@ func collWithMBHAndOpts( count.New()) } -// func fullOrPrevPath( -// t *testing.T, -// coll data.BackupCollection, -// ) path.Path { -// var collPath path.Path - -// if coll.State() != data.DeletedState { -// collPath = coll.FullPath() -// } else { -// collPath = coll.PreviousPath() -// } - -// require.False( -// t, -// len(collPath.Elements()) < 4, -// "malformed or missing collection path") - -// return collPath -// } - func pagerForDrives(drives ...models.Driveable) *apiMock.Pager[models.Driveable] { return &apiMock.Pager[models.Driveable]{ ToReturn: []apiMock.PagerResult[models.Driveable]{ @@ -592,6 +591,30 @@ func pagerForDrives(drives ...models.Driveable) *apiMock.Pager[models.Driveable] } } +func aPage(items ...models.DriveItemable) mock.NextPage { + return mock.NextPage{ + Items: append([]models.DriveItemable{driveRootItem()}, items...), + } +} + +func aPageWReset(items ...models.DriveItemable) mock.NextPage { + return mock.NextPage{ + Items: append([]models.DriveItemable{driveRootItem()}, items...), + Reset: true, + } +} + +func aReset(items ...models.DriveItemable) mock.NextPage { + return mock.NextPage{ + Items: []models.DriveItemable{}, + Reset: true, + } +} + +// --------------------------------------------------------------------------- +// metadata +// --------------------------------------------------------------------------- + func makePrevMetadataColls( t *testing.T, mbh BackupHandler, @@ -644,133 +667,150 @@ func makePrevMetadataColls( // assert.Equal(t, expectPrevPaths, prevs, "previous paths") // } -// for comparisons done by collection state -type stateAssertion struct { - itemIDs []string - // should never get set by the user. - // this flag gets flipped when calling assertions.compare. - // any unseen collection will error on requireNoUnseenCollections - // sawCollection bool -} +// --------------------------------------------------------------------------- +// collections +// --------------------------------------------------------------------------- // for comparisons done by a given collection path type collectionAssertion struct { - doNotMerge assert.BoolAssertionFunc - states map[data.CollectionState]*stateAssertion - excludedItems map[string]struct{} + curr path.Path + prev path.Path + state data.CollectionState + fileIDs []string + // should never get set by the user. + // this flag gets flipped when calling assertions.compare. + // any unseen collection will error on requireNoUnseenCollections + sawCollection bool } -type statesToItemIDs map[data.CollectionState][]string +func aColl( + curr, prev path.Path, + fileIDs ...string, +) *collectionAssertion { + ids := make([]string, 0, 2*len(fileIDs)) -// TODO(keepers): move excludeItems to a more global position. -func newCollAssertion( - doNotMerge bool, - itemsByState statesToItemIDs, - excludeItems ...string, -) collectionAssertion { - states := map[data.CollectionState]*stateAssertion{} - - for state, itemIDs := range itemsByState { - states[state] = &stateAssertion{ - itemIDs: itemIDs, - } + for _, fUD := range fileIDs { + ids = append(ids, fUD+metadata.DataFileSuffix) + ids = append(ids, fUD+metadata.MetaFileSuffix) } - dnm := assert.False - if doNotMerge { - dnm = assert.True - } - - return collectionAssertion{ - doNotMerge: dnm, - states: states, - excludedItems: makeExcludeMap(excludeItems...), + return &collectionAssertion{ + curr: curr, + prev: prev, + state: data.StateOf(prev, curr, count.New()), + fileIDs: ids, } } // to aggregate all collection-related expectations in the backup // map collection path -> collection state -> assertion -type collectionAssertions map[string]collectionAssertion +type expectedCollections struct { + assertions map[string]*collectionAssertion + doNotMerge assert.BoolAssertionFunc + hasURLCache assert.ValueAssertionFunc +} -// ensure the provided collection matches expectations as set by the test. -// func (cas collectionAssertions) compare( -// t *testing.T, -// coll data.BackupCollection, -// excludes *prefixmatcher.StringSetMatchBuilder, -// ) { -// ctx, flush := tester.NewContext(t) -// defer flush() +func expectCollections( + doNotMerge bool, + hasURLCache bool, + colls ...*collectionAssertion, +) expectedCollections { + as := map[string]*collectionAssertion{} -// var ( -// itemCh = coll.Items(ctx, fault.New(true)) -// itemIDs = []string{} -// ) + for _, coll := range colls { + as[expectFullOrPrev(coll).String()] = coll + } -// p := fullOrPrevPath(t, coll) + dontMerge := assert.False + if doNotMerge { + dontMerge = assert.True + } -// for itm := range itemCh { -// itemIDs = append(itemIDs, itm.ID()) -// } + hasCache := assert.Nil + if hasURLCache { + hasCache = assert.NotNil + } -// expect := cas[p.String()] -// expectState := expect.states[coll.State()] -// expectState.sawCollection = true + return expectedCollections{ + assertions: as, + doNotMerge: dontMerge, + hasURLCache: hasCache, + } +} -// assert.ElementsMatchf( -// t, -// expectState.itemIDs, -// itemIDs, -// "expected all items to match in collection with:\nstate %q\npath %q", -// coll.State(), -// p) +func (ecs expectedCollections) compare( + t *testing.T, + colls []data.BackupCollection, +) { + for _, coll := range colls { + ecs.compareColl(t, coll) + } +} -// expect.doNotMerge( -// t, -// coll.DoNotMergeItems(), -// "expected collection to have the appropariate doNotMerge flag") +func (ecs expectedCollections) compareColl(t *testing.T, coll data.BackupCollection) { + ctx, flush := tester.NewContext(t) + defer flush() -// if result, ok := excludes.Get(p.String()); ok { -// assert.Equal( -// t, -// expect.excludedItems, -// result, -// "excluded items") -// } -// } + var ( + itemIDs = []string{} + p = fullOrPrevPath(t, coll) + ) + + if coll.State() != data.DeletedState { + for itm := range coll.Items(ctx, fault.New(true)) { + itemIDs = append(itemIDs, itm.ID()) + } + } + + expect := ecs.assertions[p.String()] + require.NotNil( + t, + expect, + "test should have an expected entry for collection with:\n\tstate %q\n\tpath %q", + coll.State(), + p) + + expect.sawCollection = true + + assert.ElementsMatchf( + t, + expect.fileIDs, + itemIDs, + "expected all items to match in collection with:\n\tstate %q\n\tpath %q", + coll.State(), + p) + + if expect.prev == nil { + assert.Nil(t, coll.PreviousPath(), "previous path") + } else { + assert.Equal(t, expect.prev, coll.PreviousPath()) + } + + if expect.curr == nil { + assert.Nil(t, coll.FullPath(), "collection path") + } else { + assert.Equal(t, expect.curr, coll.FullPath()) + } + + ecs.doNotMerge( + t, + coll.DoNotMergeItems(), + "expected collection to have the appropariate doNotMerge flag") + + driveColl := coll.(*Collection) + + ecs.hasURLCache(t, driveColl.urlCache, "has a populated url cache handler") +} // ensure that no collections in the expected set are still flagged // as sawCollection == false. -// func (cas collectionAssertions) requireNoUnseenCollections( -// t *testing.T, -// ) { -// for p, withPath := range cas { -// for _, state := range withPath.states { -// require.True( -// t, -// state.sawCollection, -// "results should have contained collection:\n\t%q\t\n%q", -// state, p) -// } -// } -// } - -func aPage(items ...models.DriveItemable) mock.NextPage { - return mock.NextPage{ - Items: append([]models.DriveItemable{driveRootItem()}, items...), - } -} - -func aPageWReset(items ...models.DriveItemable) mock.NextPage { - return mock.NextPage{ - Items: append([]models.DriveItemable{driveRootItem()}, items...), - Reset: true, - } -} - -func aReset(items ...models.DriveItemable) mock.NextPage { - return mock.NextPage{ - Items: []models.DriveItemable{}, - Reset: true, +func (ecs expectedCollections) requireNoUnseenCollections(t *testing.T) { + for _, ca := range ecs.assertions { + require.True( + t, + ca.sawCollection, + "results did not include collection at:\n\tstate %q\t\npath %q", + ca.state, expectFullOrPrev(ca)) } } @@ -778,10 +818,33 @@ func aReset(items ...models.DriveItemable) mock.NextPage { // delta trees // --------------------------------------------------------------------------- -var loc = path.NewElements("root:/foo/bar/baz/qux/fnords/smarf/voi/zumba/bangles/howdyhowdyhowdy") +func defaultTreePfx(t *testing.T) path.Path { + fpb := fullPathPath(t).ToBuilder() + fpe := fpb.Elements() + fpe = fpe[:len(fpe)-1] + fpb = path.Builder{}.Append(fpe...) -func treeWithRoot() *folderyMcFolderFace { - tree := newFolderyMcFolderFace(nil, rootID) + p, err := path.FromDataLayerPath(fpb.String(), false) + require.NoErrorf( + t, + err, + "err processing path:\n\terr %+v\n\tpath %q", + clues.ToCore(err), + fpb) + + return p +} + +func defaultLoc() path.Elements { + return path.NewElements("root:/foo/bar/baz/qux/fnords/smarf/voi/zumba/bangles/howdyhowdyhowdy") +} + +func newTree(t *testing.T) *folderyMcFolderFace { + return newFolderyMcFolderFace(defaultTreePfx(t), rootID) +} + +func treeWithRoot(t *testing.T) *folderyMcFolderFace { + tree := newFolderyMcFolderFace(defaultTreePfx(t), rootID) rootey := newNodeyMcNodeFace(nil, rootID, rootName, false) tree.root = rootey tree.folderIDToNode[rootID] = rootey @@ -789,29 +852,29 @@ func treeWithRoot() *folderyMcFolderFace { return tree } -func treeAfterReset() *folderyMcFolderFace { - tree := newFolderyMcFolderFace(nil, rootID) +func treeAfterReset(t *testing.T) *folderyMcFolderFace { + tree := newFolderyMcFolderFace(defaultTreePfx(t), rootID) tree.reset() return tree } -func treeWithFoldersAfterReset() *folderyMcFolderFace { - tree := treeWithFolders() +func treeWithFoldersAfterReset(t *testing.T) *folderyMcFolderFace { + tree := treeWithFolders(t) tree.hadReset = true return tree } -func treeWithTombstone() *folderyMcFolderFace { - tree := treeWithRoot() +func treeWithTombstone(t *testing.T) *folderyMcFolderFace { + tree := treeWithRoot(t) tree.tombstones[id(folder)] = newNodeyMcNodeFace(nil, id(folder), "", false) return tree } -func treeWithFolders() *folderyMcFolderFace { - tree := treeWithRoot() +func treeWithFolders(t *testing.T) *folderyMcFolderFace { + tree := treeWithRoot(t) parent := newNodeyMcNodeFace(tree.root, idx(folder, "parent"), namex(folder, "parent"), true) tree.folderIDToNode[parent.id] = parent @@ -824,35 +887,146 @@ func treeWithFolders() *folderyMcFolderFace { return tree } -func treeWithFileAtRoot() *folderyMcFolderFace { - tree := treeWithRoot() - tree.root.files[id(file)] = fileyMcFileFace{ - lastModified: time.Now(), - contentSize: 42, - } +func treeWithFileAtRoot(t *testing.T) *folderyMcFolderFace { + tree := treeWithRoot(t) + tree.root.files[id(file)] = custom.ToCustomDriveItem(fileAtRoot()) tree.fileIDToParentID[id(file)] = rootID return tree } -func treeWithFileInFolder() *folderyMcFolderFace { - tree := treeWithFolders() - tree.folderIDToNode[id(folder)].files[id(file)] = fileyMcFileFace{ - lastModified: time.Now(), - contentSize: 42, - } +func treeWithDeletedFile(t *testing.T) *folderyMcFolderFace { + tree := treeWithRoot(t) + tree.deleteFile(idx(file, "d")) + + return tree +} + +func treeWithFileInFolder(t *testing.T) *folderyMcFolderFace { + tree := treeWithFolders(t) + tree.folderIDToNode[id(folder)].files[id(file)] = custom.ToCustomDriveItem(fileAt(folder)) tree.fileIDToParentID[id(file)] = id(folder) return tree } -func treeWithFileInTombstone() *folderyMcFolderFace { - tree := treeWithTombstone() - tree.tombstones[id(folder)].files[id(file)] = fileyMcFileFace{ - lastModified: time.Now(), - contentSize: 42, - } +func treeWithFileInTombstone(t *testing.T) *folderyMcFolderFace { + tree := treeWithTombstone(t) + tree.tombstones[id(folder)].files[id(file)] = custom.ToCustomDriveItem(fileAt("tombstone")) tree.fileIDToParentID[id(file)] = id(folder) return tree } + +// root -> idx(folder, parent) -> id(folder) +// one item at each dir +// one tombstone: idx(folder, tombstone) +// one item in the tombstone +// one deleted item +func fullTree(t *testing.T) *folderyMcFolderFace { + return fullTreeWithNames("parent", "tombstone")(t) +} + +func fullTreeWithNames( + parentFolderX, tombstoneX any, +) func(t *testing.T) *folderyMcFolderFace { + return func(t *testing.T) *folderyMcFolderFace { + ctx, flush := tester.NewContext(t) + defer flush() + + tree := treeWithRoot(t) + + // file in root + df := driveFile("r", parentDir(), rootID) + err := tree.addFile( + rootID, + idx(file, "r"), + custom.ToCustomDriveItem(df)) + require.NoError(t, err, clues.ToCore(err)) + + // root -> idx(folder, parent) + err = tree.setFolder(ctx, rootID, idx(folder, parentFolderX), namex(folder, parentFolderX), false) + require.NoError(t, err, clues.ToCore(err)) + + // file in idx(folder, parent) + df = driveFile("p", parentDir(namex(folder, parentFolderX)), idx(folder, parentFolderX)) + err = tree.addFile( + idx(folder, parentFolderX), + idx(file, "p"), + custom.ToCustomDriveItem(df)) + require.NoError(t, err, clues.ToCore(err)) + + // idx(folder, parent) -> id(folder) + err = tree.setFolder(ctx, idx(folder, parentFolderX), id(folder), name(folder), false) + require.NoError(t, err, clues.ToCore(err)) + + // file in id(folder) + df = driveFile(file, parentDir(name(folder)), id(folder)) + err = tree.addFile( + id(folder), + id(file), + custom.ToCustomDriveItem(df)) + require.NoError(t, err, clues.ToCore(err)) + + // tombstone - have to set a non-tombstone folder first, then add the item, then tombstone the folder + err = tree.setFolder(ctx, rootID, idx(folder, tombstoneX), namex(folder, tombstoneX), false) + require.NoError(t, err, clues.ToCore(err)) + + // file in tombstone + df = driveFile("t", parentDir(namex(folder, tombstoneX)), idx(folder, tombstoneX)) + err = tree.addFile( + idx(folder, tombstoneX), + idx(file, "t"), + custom.ToCustomDriveItem(df)) + require.NoError(t, err, clues.ToCore(err)) + + err = tree.setTombstone(ctx, idx(folder, tombstoneX)) + require.NoError(t, err, clues.ToCore(err)) + + // deleted file + tree.deleteFile(idx(file, "d")) + + return tree + } +} + +// --------------------------------------------------------------------------- +// misc +// --------------------------------------------------------------------------- +func expectFullOrPrev(ca *collectionAssertion) path.Path { + var p path.Path + + if ca.state != data.DeletedState { + p = ca.curr + } else { + p = ca.prev + } + + return p +} + +func fullOrPrevPath( + t *testing.T, + coll data.BackupCollection, +) path.Path { + var collPath path.Path + + if coll.State() == data.DeletedState { + collPath = coll.PreviousPath() + } else { + collPath = coll.FullPath() + } + + require.NotNil( + t, + collPath, + "full or prev path are nil for collection with state:\n\t%s", + coll.State()) + + require.False( + t, + len(collPath.Elements()) < 4, + "malformed or missing collection path") + + return collPath +} diff --git a/src/internal/m365/collection/drive/url_cache.go b/src/internal/m365/collection/drive/url_cache.go index 5fb705254..7a07c4672 100644 --- a/src/internal/m365/collection/drive/url_cache.go +++ b/src/internal/m365/collection/drive/url_cache.go @@ -19,7 +19,10 @@ import ( const ( urlCacheDriveItemThreshold = 300 * 1000 - urlCacheRefreshInterval = 1 * time.Hour + // 600 pages = 300k items, since delta enumeration produces 500 items per page + // TODO: export standard page size and swap to 300k/defaultDeltaPageSize + urlCacheDrivePagesThreshold = 600 + urlCacheRefreshInterval = 1 * time.Hour ) type getItemPropertyer interface { diff --git a/src/pkg/count/keys.go b/src/pkg/count/keys.go index 423a603c8..9aad8acf7 100644 --- a/src/pkg/count/keys.go +++ b/src/pkg/count/keys.go @@ -92,6 +92,7 @@ const ( TotalDeltasProcessed Key = "total-deltas-processed" TotalFilesProcessed Key = "total-files-processed" TotalFoldersProcessed Key = "total-folders-processed" + TotalItemsProcessed Key = "total-items-processed" TotalMalwareProcessed Key = "total-malware-processed" TotalPackagesProcessed Key = "total-packages-processed" TotalPagesEnumerated Key = "total-pages-enumerated"