From 108c243e547914e17f72817276c9c8fd28f0dae6 Mon Sep 17 00:00:00 2001 From: ashmrtn <3891298+ashmrtn@users.noreply.github.com> Date: Tue, 19 Sep 2023 18:56:44 -0700 Subject: [PATCH] Implement selective subtree pruning (#4133) Determine if a subtree has any directory object changes and if it doesn't skip traversing that subtree during hierarchy merging. "Directory object changes" for a subtree are defined as: * moving/renaming a directory within the subtree * deleting a directory in the subtree * moving a directory into the subtree * creating a directory in the subtree * moving a directory out of the subtree The resulting snapshot still contains all data in the pruned subtree. --- #### Does this PR need a docs update or release note? - [x] :white_check_mark: Yes, it's included - [ ] :clock1: Yes, but in a later PR - [ ] :no_entry: No #### Type of change - [x] :sunflower: Feature - [ ] :bug: Bugfix - [ ] :world_map: Documentation - [ ] :robot: Supportability/Tests - [ ] :computer: CI/Deployment - [ ] :broom: Tech Debt/Cleanup #### Issue(s) * #4117 #### Test Plan - [ ] :muscle: Manual - [x] :zap: Unit test - [x] :green_heart: E2E --- CHANGELOG.md | 1 + src/internal/kopia/upload.go | 172 +++++-- src/internal/kopia/upload_test.go | 773 ++++++++++++++++++++++++++++++ 3 files changed, 908 insertions(+), 38 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index f81e4e096..5662a4f8e 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -9,6 +9,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 ### Added - Enables local or network-attached storage for Corso repositories. +- Reduce backup runtime for OneDrive and SharePoint incremental backups that have no file changes. ## [v0.13.0] (beta) - 2023-09-18 diff --git a/src/internal/kopia/upload.go b/src/internal/kopia/upload.go index 5a28760eb..fe6cf3f51 100644 --- a/src/internal/kopia/upload.go +++ b/src/internal/kopia/upload.go @@ -452,6 +452,7 @@ func streamBaseEntries( dir fs.Directory, encodedSeen map[string]struct{}, globalExcludeSet prefixmatcher.StringSetReader, + streamDirEnts bool, progress *corsoProgress, ) error { if dir == nil { @@ -477,9 +478,51 @@ func streamBaseEntries( return clues.Stack(err).WithClues(ctx) } - // Don't walk subdirectories in this function. - if _, ok := entry.(fs.Directory); ok { - return nil + entName, err := decodeElement(entry.Name()) + if err != nil { + return clues.Wrap(err, "decoding entry name"). + WithClues(ctx). + With("entry_name", entry.Name()) + } + + if d, ok := entry.(fs.Directory); ok { + // Don't walk subdirectories in this function. + if !streamDirEnts { + return nil + } + + // Do walk subdirectories. The previous and current path of the directory + // can be generated by appending the directory name onto the previous and + // current path of this directory. Since the directory has no + // BackupCollection associated with it (part of the criteria for allowing + // walking directories in this function) there shouldn't be any + // LocationPath information associated with the directory. + newP, err := curPath.Append(false, entName) + if err != nil { + return clues.Wrap(err, "getting current directory path").WithClues(ctx) + } + + oldP, err := prevPath.Append(false, entName) + if err != nil { + return clues.Wrap(err, "getting previous directory path").WithClues(ctx) + } + + e := virtualfs.NewStreamingDirectory( + entry.Name(), + getStreamItemFunc( + newP, + oldP, + nil, + nil, + d, + globalExcludeSet, + streamDirEnts, + progress)) + + return clues.Wrap(ctr(ctx, e), "executing callback on subdirectory"). + WithClues(ctx). + With("directory_path", newP). + OrNil() } // This entry was either updated or deleted. In either case, the external @@ -489,13 +532,6 @@ func streamBaseEntries( return nil } - entName, err := decodeElement(entry.Name()) - if err != nil { - return clues.Wrap(err, "decoding entry name"). - WithClues(ctx). - With("entry_name", entry.Name()) - } - // This entry was marked as deleted by a service that can't tell us the // previous path of deleted items, only the item ID. if _, ok := excludeSet[entName]; ok { @@ -563,6 +599,7 @@ func getStreamItemFunc( streamedEnts data.BackupCollection, baseDir fs.Directory, globalExcludeSet prefixmatcher.StringSetReader, + streamDirEnts bool, progress *corsoProgress, ) func(context.Context, func(context.Context, fs.Entry) error) error { return func(ctx context.Context, ctr func(context.Context, fs.Entry) error) error { @@ -596,6 +633,7 @@ func getStreamItemFunc( baseDir, seen, globalExcludeSet, + streamDirEnts, progress); err != nil { return clues.Wrap(err, "streaming base snapshot entries") } @@ -643,6 +681,7 @@ func buildKopiaDirs( dir.collection, dir.baseDir, globalExcludeSet, + !dir.subtreeChanged, progress)), nil } @@ -667,6 +706,12 @@ type treeMap struct { // be added to childDirs while building the hierarchy. They will be ignored // when iterating through the directory to hand items to kopia. baseDir fs.Directory + + // subtreeChanged denotes whether any directories under this node have been + // moved, renamed, deleted, or added. If not then we should return both the + // kopia files and the kopia directories in the base entry because we're also + // doing selective subtree pruning during hierarchy merging. + subtreeChanged bool } func newTreeMap() *treeMap { @@ -863,6 +908,50 @@ func inflateCollectionTree( return roots, updatedPaths, nil } +func subtreeChanged( + roots map[string]*treeMap, + updatedPaths map[string]path.Path, + oldDirPath *path.Builder, + currentPath *path.Builder, +) bool { + // Can't combine with the inner if-block because go evaluates the assignment + // prior to all conditional checks. + if currentPath != nil { + // Either there's an input collection for this path or there's some + // descendant that has a collection with this path as a prefix. + // + // The base traversal code is single-threaded and this check is before + // processing child base directories so we don't have to worry about + // something else creating the in-memory subtree for unchanged things. + // Having only one base per Reason also means we haven't added this branch + // of the in-memory tree in a base processed before this one. + if node := maybeGetTreeNode(roots, currentPath.Elements()); node != nil && + (node.collection != nil || len(node.childDirs) > 0) { + return true + } + } + + oldPath := oldDirPath.String() + + // We only need to check the old paths here because the check on the in-memory + // tree above will catch cases where the new path is in the subtree rooted at + // currentPath. We're mostly concerned with things being moved out of the + // subtree or deleted within the subtree in this block. Renames, moves within, + // and moves into the subtree will be caught by the above in-memory tree + // check. + // + // We can ignore exact matches on the old path because they would correspond + // to deleting the root of the subtree which we can handle as long as + // everything under the subtree root is also deleted. + for oldP := range updatedPaths { + if strings.HasPrefix(oldP, oldPath) && len(oldP) != len(oldPath) { + return true + } + } + + return false +} + // traverseBaseDir is an unoptimized function that reads items in a directory // and traverses subdirectories in the given directory. oldDirPath is the path // the directory would be at if the hierarchy was unchanged. expectedDirPath is the @@ -946,34 +1035,36 @@ func traverseBaseDir( ctx = clues.Add(ctx, "new_path", currentPath) - // TODO(ashmrtn): If we can do prefix matching on elements in updatedPaths and - // we know that the tree node for this directory has no collection reference - // and no child nodes then we can skip traversing this directory. This will - // only work if we know what directory deleted items used to belong in (e.x. - // it won't work for OneDrive because we only know the ID of the deleted - // item). + // Figure out if the subtree rooted at this directory is either unchanged or + // completely deleted. We can accomplish this by checking the in-memory tree + // and the updatedPaths map. + changed := subtreeChanged(roots, updatedPaths, oldDirPath, currentPath) var hasItems bool - err = dir.IterateEntries(ctx, func(innerCtx context.Context, entry fs.Entry) error { - dEntry, ok := entry.(fs.Directory) - if !ok { - hasItems = true - return nil - } + if changed { + err = dir.IterateEntries(ctx, func(innerCtx context.Context, entry fs.Entry) error { + dEntry, ok := entry.(fs.Directory) + if !ok { + hasItems = true + return nil + } - return traverseBaseDir( - innerCtx, - depth+1, - updatedPaths, - oldDirPath, - currentPath, - dEntry, - roots, - stats) - }) - if err != nil { - return clues.Wrap(err, "traversing base directory").WithClues(ctx) + return traverseBaseDir( + innerCtx, + depth+1, + updatedPaths, + oldDirPath, + currentPath, + dEntry, + roots, + stats) + }) + if err != nil { + return clues.Wrap(err, "traversing base directory").WithClues(ctx) + } + } else { + stats.Inc(statPruned) } // We only need to add this base directory to the tree we're building if it @@ -981,7 +1072,7 @@ func traverseBaseDir( // subdirectories. This optimization will not be valid if we dynamically // determine the subdirectories this directory has when handing items to // kopia. - if currentPath != nil && hasItems { + if currentPath != nil && (hasItems || !changed) { // Having this in the if-block has the effect of removing empty directories // from backups that have a base snapshot. If we'd like to preserve empty // directories across incremental backups, move getting the node outside of @@ -1014,12 +1105,12 @@ func traverseBaseDir( stats.Inc(statRecursiveMove) } - curP, err := path.FromDataLayerPath(currentPath.String(), false) + curP, err := path.PrefixOrPathFromDataLayerPath(currentPath.String(), false) if err != nil { return clues.New("converting current path to path.Path").WithClues(ctx) } - oldP, err := path.FromDataLayerPath(oldDirPath.String(), false) + oldP, err := path.PrefixOrPathFromDataLayerPath(oldDirPath.String(), false) if err != nil { return clues.New("converting old path to path.Path").WithClues(ctx) } @@ -1027,6 +1118,7 @@ func traverseBaseDir( node.baseDir = dir node.currentPath = curP node.prevPath = oldP + node.subtreeChanged = changed } return nil @@ -1070,6 +1162,9 @@ const ( // statSkipMerge denotes the number of directories that weren't merged because // they were marked either DoNotMerge or New. statSkipMerge = "directories_skipped_merging" + // statPruned denotes the number of subtree roots that selective subtree + // pruning applied to. + statPruned = "subtrees_pruned" ) func inflateBaseTree( @@ -1163,7 +1258,8 @@ func inflateBaseTree( statRecursiveMove, stats.Get(statRecursiveMove), statDel, stats.Get(statDel), statRecursiveDel, stats.Get(statRecursiveDel), - statSkipMerge, stats.Get(statSkipMerge)) + statSkipMerge, stats.Get(statSkipMerge), + statPruned, stats.Get(statPruned)) } return nil diff --git a/src/internal/kopia/upload_test.go b/src/internal/kopia/upload_test.go index 51d8c7410..55455c1b2 100644 --- a/src/internal/kopia/upload_test.go +++ b/src/internal/kopia/upload_test.go @@ -36,6 +36,14 @@ func makePath(t *testing.T, elements []string, isItem bool) path.Path { return p } +func newExpectedFile(name string, fileData []byte) *expectedNode { + return &expectedNode{ + name: name, + data: fileData, + children: []*expectedNode{}, + } +} + // baseWithChildren returns an fs.Entry hierarchy where the first len(basic) // levels are the encoded values of basic in order. All items in children are // used as the direct descendents of the final entry in basic. @@ -2952,3 +2960,768 @@ func (suite *HierarchyBuilderUnitSuite) TestBuildDirectoryTreeSelectsMigrateSubt expectTree(t, ctx, expected, dirTree) } + +func newMockStaticDirectory( + name string, + entries []fs.Entry, +) (fs.Directory, *int) { + res := &mockStaticDirectory{ + Directory: virtualfs.NewStaticDirectory(name, entries), + } + + return res, &res.iterateCount +} + +type mockStaticDirectory struct { + fs.Directory + iterateCount int +} + +func (msd *mockStaticDirectory) IterateEntries( + ctx context.Context, + callback func(context.Context, fs.Entry) error, +) error { + msd.iterateCount++ + return msd.Directory.IterateEntries(ctx, callback) +} + +func (suite *HierarchyBuilderUnitSuite) TestBuildDirectoryTree_SelectiveSubtreePruning() { + var ( + tenant = "tenant-id" + service = path.OneDriveService.String() + user = "user-id" + category = path.FilesCategory.String() + + // Not using drive/drive-id/root folders for brevity. + folderID1 = "folder1-id" + folderID2 = "folder2-id" + folderID3 = "folder3-id" + folderID4 = "folder4-id" + + folderName1 = "folder1-name" + folderName2 = "folder2-name" + folderName3 = "folder3-name" + folderName4 = "folder4-name" + + fileName1 = "file1" + fileName2 = "file2" + fileName3 = "file3" + fileName4 = "file4" + fileName5 = "file5" + fileName6 = "file6" + fileName7 = "file7" + fileName8 = "file8" + + fileData1 = []byte("1") + fileData2 = []byte("2") + fileData3 = []byte("3") + fileData4 = []byte("4") + fileData5 = []byte("5") + fileData6 = []byte("6") + fileData7 = []byte("7") + fileData8 = []byte("8") + ) + + var ( + folderPath1 = makePath( + suite.T(), + []string{tenant, service, user, category, folderID1}, + false) + folderLocPath1 = makePath( + suite.T(), + []string{tenant, service, user, category, folderName1}, + false) + + folderPath2 = makePath( + suite.T(), + append(folderPath1.Elements(), folderID2), + false) + folderLocPath2 = makePath( + suite.T(), + append(folderLocPath1.Elements(), folderName2), + false) + + folderPath3 = makePath( + suite.T(), + append(folderPath2.Elements(), folderID3), + false) + + folderPath4 = makePath( + suite.T(), + []string{tenant, service, user, category, folderID4}, + false) + folderLocPath4 = makePath( + suite.T(), + []string{tenant, service, user, category, folderName4}, + false) + + prefixFolders = []string{ + tenant, + service, + user, + category, + } + ) + + folder4Unchanged := exchMock.NewCollection(folderPath4, folderLocPath4, 0) + folder4Unchanged.PrevPath = folderPath4 + folder4Unchanged.ColState = data.NotMovedState + + // Must be a function that returns a new instance each time as StreamingFile + // can only return its Reader once. Each directory below the prefix directory + // is also wrapped in a mock so we can count the number of times + // IterateEntries was called on it. + // baseSnapshot with the following layout: + // - tenant-id + // - onedrive + // - user-id + // - files + // - folder1-id + // - file1 + // - file2 + // - folder2-id + // - file3 + // - file4 + // - folder3-id + // - file5 + // - file6 + // - folder4-id + // - file7 + // - file8 + getBaseSnapshot := func() (fs.Entry, map[string]*int) { + counters := map[string]*int{} + + folder, count := newMockStaticDirectory( + encodeElements(folderID3)[0], + []fs.Entry{ + virtualfs.StreamingFileWithModTimeFromReader( + encodeElements(fileName5)[0], + time.Time{}, + newBackupStreamReader( + serializationVersion, + io.NopCloser(bytes.NewReader(fileData5)))), + virtualfs.StreamingFileWithModTimeFromReader( + encodeElements(fileName6)[0], + time.Time{}, + newBackupStreamReader( + serializationVersion, + io.NopCloser(bytes.NewReader(fileData6)))), + }) + counters[folderID3] = count + + folder, count = newMockStaticDirectory( + encodeElements(folderID2)[0], + []fs.Entry{ + virtualfs.StreamingFileWithModTimeFromReader( + encodeElements(fileName3)[0], + time.Time{}, + newBackupStreamReader( + serializationVersion, + io.NopCloser(bytes.NewReader(fileData3)))), + virtualfs.StreamingFileWithModTimeFromReader( + encodeElements(fileName4)[0], + time.Time{}, + newBackupStreamReader( + serializationVersion, + io.NopCloser(bytes.NewReader(fileData4)))), + folder, + }) + counters[folderID2] = count + + folder, count = newMockStaticDirectory( + encodeElements(folderID1)[0], + []fs.Entry{ + virtualfs.StreamingFileWithModTimeFromReader( + encodeElements(fileName1)[0], + time.Time{}, + newBackupStreamReader( + serializationVersion, + io.NopCloser(bytes.NewReader(fileData1)))), + virtualfs.StreamingFileWithModTimeFromReader( + encodeElements(fileName2)[0], + time.Time{}, + newBackupStreamReader( + serializationVersion, + io.NopCloser(bytes.NewReader(fileData2)))), + folder, + }) + counters[folderID1] = count + + folder2, count := newMockStaticDirectory( + encodeElements(folderID4)[0], + []fs.Entry{ + virtualfs.StreamingFileWithModTimeFromReader( + encodeElements(fileName7)[0], + time.Time{}, + newBackupStreamReader( + serializationVersion, + io.NopCloser(bytes.NewReader(fileData7)))), + virtualfs.StreamingFileWithModTimeFromReader( + encodeElements(fileName8)[0], + time.Time{}, + newBackupStreamReader( + serializationVersion, + io.NopCloser(bytes.NewReader(fileData8)))), + }) + counters[folderID4] = count + + return baseWithChildren( + prefixFolders, + []fs.Entry{ + folder, + folder2, + }), + counters + } + + table := []struct { + name string + inputCollections func(t *testing.T) []data.BackupCollection + inputExcludes *pmMock.PrefixMap + expected *expectedNode + expectedIterateCounts map[string]int + }{ + { + // Test that even if files are excluded in the subtree selective subtree + // pruning skips traversing, the file is properly excluded during upload. + // + // It's safe to prune the subtree during merging because the directory + // layout hasn't changed. We still require traversal of all directories + // during data upload which allows us to exclude the file properly. + name: "NoDirectoryChanges ExcludedFile PrunesSubtree", + inputCollections: func(t *testing.T) []data.BackupCollection { + return []data.BackupCollection{folder4Unchanged} + }, + inputExcludes: pmMock.NewPrefixMap(map[string]map[string]struct{}{ + "": { + fileName3: {}, + }, + }), + expected: expectedTreeWithChildren( + prefixFolders, + []*expectedNode{ + { + name: folderID1, + children: []*expectedNode{ + newExpectedFile(fileName1, fileData1), + newExpectedFile(fileName2, fileData2), + { + name: folderID2, + children: []*expectedNode{ + newExpectedFile(fileName4, fileData4), + { + name: folderID3, + children: []*expectedNode{ + newExpectedFile(fileName5, fileData5), + newExpectedFile(fileName6, fileData6), + }, + }, + }, + }, + }, + }, + { + name: folderID4, + children: []*expectedNode{ + newExpectedFile(fileName7, fileData7), + newExpectedFile(fileName8, fileData8), + }, + }, + }), + expectedIterateCounts: map[string]int{ + folderID1: 0, + folderID2: 0, + folderID3: 0, + folderID4: 1, + }, + }, + { + // Test that if a subtree is deleted in its entirety selective subtree + // pruning skips traversing it during hierarchy merging. + name: "SubtreeDelete PrunesSubtree", + inputCollections: func(t *testing.T) []data.BackupCollection { + mc := exchMock.NewCollection(nil, nil, 0) + mc.PrevPath = folderPath1 + mc.ColState = data.DeletedState + + return []data.BackupCollection{folder4Unchanged, mc} + }, + expected: expectedTreeWithChildren( + prefixFolders, + []*expectedNode{ + { + name: folderID4, + children: []*expectedNode{ + newExpectedFile(fileName7, fileData7), + newExpectedFile(fileName8, fileData8), + }, + }, + }), + expectedIterateCounts: map[string]int{ + // Deleted collections aren't added to the in-memory tree. + folderID1: 0, + folderID2: 0, + folderID3: 0, + folderID4: 1, + }, + }, + { + // Test that if a directory is moved but the subtree rooted at the moved + // directory is unchanged selective subtree pruning skips traversing all + // directories under the moved directory during hierarchy merging even if + // a new directory is created at the path of one of the unchanged (pruned) + // subdirectories of the moved directory. + name: "ParentMoved NewFolderAtOldPath PrunesSubtree", + inputCollections: func(t *testing.T) []data.BackupCollection { + newPath := makePath( + suite.T(), + []string{tenant, service, user, category, "foo-id"}, + false) + newLoc := makePath( + suite.T(), + []string{tenant, service, user, category, "foo"}, + false) + + mc := exchMock.NewCollection(newPath, newLoc, 0) + mc.PrevPath = folderPath1 + mc.ColState = data.MovedState + + newMC := exchMock.NewCollection(folderPath2, folderLocPath2, 0) + + return []data.BackupCollection{folder4Unchanged, mc, newMC} + }, + expected: expectedTreeWithChildren( + prefixFolders, + []*expectedNode{ + { + name: "foo-id", + children: []*expectedNode{ + newExpectedFile(fileName1, fileData1), + newExpectedFile(fileName2, fileData2), + { + name: folderID2, + children: []*expectedNode{ + newExpectedFile(fileName3, fileData3), + newExpectedFile(fileName4, fileData4), + { + name: folderID3, + children: []*expectedNode{ + newExpectedFile(fileName5, fileData5), + newExpectedFile(fileName6, fileData6), + }, + }, + }, + }, + }, + }, + { + name: folderID1, + children: []*expectedNode{ + { + name: folderID2, + }, + }, + }, + { + name: folderID4, + children: []*expectedNode{ + newExpectedFile(fileName7, fileData7), + newExpectedFile(fileName8, fileData8), + }, + }, + }), + expectedIterateCounts: map[string]int{ + folderID1: 1, + folderID2: 0, + folderID3: 0, + folderID4: 1, + }, + }, + { + // Test that if a directory and its subtree is deleted in its entirety + // selective subtree pruning skips traversing the subtree during hierarchy + // merging even if a new directory is created at the path of one of the + // deleted (pruned) directories. + name: "SubtreeDelete NewFolderAtOldPath PrunesSubtree", + inputCollections: func(t *testing.T) []data.BackupCollection { + mc := exchMock.NewCollection(nil, nil, 0) + mc.PrevPath = folderPath2 + mc.ColState = data.DeletedState + + newMC := exchMock.NewCollection(folderPath2, folderLocPath2, 0) + + return []data.BackupCollection{folder4Unchanged, mc, newMC} + }, + expected: expectedTreeWithChildren( + prefixFolders, + []*expectedNode{ + { + name: folderID1, + children: []*expectedNode{ + newExpectedFile(fileName1, fileData1), + newExpectedFile(fileName2, fileData2), + { + name: folderID2, + }, + }, + }, + { + name: folderID4, + children: []*expectedNode{ + newExpectedFile(fileName7, fileData7), + newExpectedFile(fileName8, fileData8), + }, + }, + }), + expectedIterateCounts: map[string]int{ + folderID1: 1, + // Deleted collections aren't added to the in-memory tree. + folderID2: 0, + folderID3: 0, + folderID4: 1, + }, + }, + // These tests check that subtree pruning isn't triggered. + { + // Test that creating a new directory in an otherwise unchanged subtree + // doesn't trigger selective subtree merging for any subtree of the full + // hierarchy that includes the new directory but does trigger selective + // subtree pruning for unchanged subtrees without the new directory. + name: "NewDirectory DoesntPruneSubtree", + inputCollections: func(t *testing.T) []data.BackupCollection { + newP, err := folderPath2.Append(false, "foo-id") + require.NoError(t, err, clues.ToCore(err)) + + newL, err := folderLocPath2.Append(false, "foo") + require.NoError(t, err, clues.ToCore(err)) + + newMC := exchMock.NewCollection(newP, newL, 0) + + return []data.BackupCollection{folder4Unchanged, newMC} + }, + expected: expectedTreeWithChildren( + prefixFolders, + []*expectedNode{ + { + name: folderID1, + children: []*expectedNode{ + newExpectedFile(fileName1, fileData1), + newExpectedFile(fileName2, fileData2), + { + name: folderID2, + children: []*expectedNode{ + newExpectedFile(fileName3, fileData3), + newExpectedFile(fileName4, fileData4), + { + name: "foo-id", + }, + { + name: folderID3, + children: []*expectedNode{ + newExpectedFile(fileName5, fileData5), + newExpectedFile(fileName6, fileData6), + }, + }, + }, + }, + }, + }, + { + name: folderID4, + children: []*expectedNode{ + newExpectedFile(fileName7, fileData7), + newExpectedFile(fileName8, fileData8), + }, + }, + }), + expectedIterateCounts: map[string]int{ + folderID1: 1, + folderID2: 1, + // Folder 3 triggers pruning because it has nothing changed under it. + folderID3: 0, + folderID4: 1, + }, + }, + { + // Test that moving a directory within a subtree doesn't trigger selective + // subtree merging for any subtree of the full hierarchy that includes the + // moved directory. + name: "MoveWithinSubtree DoesntPruneSubtree", + inputCollections: func(t *testing.T) []data.BackupCollection { + newP, err := folderPath1.Append(false, folderID3) + require.NoError(t, err, clues.ToCore(err)) + + newL, err := folderLocPath1.Append(false, folderName3) + require.NoError(t, err, clues.ToCore(err)) + + mc := exchMock.NewCollection(newP, newL, 0) + mc.PrevPath = folderPath3 + mc.ColState = data.MovedState + + return []data.BackupCollection{folder4Unchanged, mc} + }, + expected: expectedTreeWithChildren( + prefixFolders, + []*expectedNode{ + { + name: folderID1, + children: []*expectedNode{ + newExpectedFile(fileName1, fileData1), + newExpectedFile(fileName2, fileData2), + { + name: folderID2, + children: []*expectedNode{ + newExpectedFile(fileName3, fileData3), + newExpectedFile(fileName4, fileData4), + }, + }, + { + name: folderID3, + children: []*expectedNode{ + newExpectedFile(fileName5, fileData5), + newExpectedFile(fileName6, fileData6), + }, + }, + }, + }, + { + name: folderID4, + children: []*expectedNode{ + newExpectedFile(fileName7, fileData7), + newExpectedFile(fileName8, fileData8), + }, + }, + }), + expectedIterateCounts: map[string]int{ + folderID1: 1, + // Folder 2 can't be pruned because there's subtree changes under it + // (folder3 move). + folderID2: 1, + // Folder 3 can't be pruned because it has a collection associated with + // it. + folderID3: 1, + folderID4: 1, + }, + }, + { + // Test that moving a directory out of a subtree doesn't trigger selective + // subtree merging for any subtree of the full hierarchy that includes the + // moved directory in the previous hierarchy. + name: "MoveOutOfSubtree DoesntPruneSubtree", + inputCollections: func(t *testing.T) []data.BackupCollection { + newP := makePath( + suite.T(), + []string{tenant, service, user, category, folderID3}, + false) + newL := makePath( + suite.T(), + []string{tenant, service, user, category, folderName3}, + false) + + mc := exchMock.NewCollection(newP, newL, 0) + mc.PrevPath = folderPath3 + mc.ColState = data.MovedState + + return []data.BackupCollection{folder4Unchanged, mc} + }, + expected: expectedTreeWithChildren( + prefixFolders, + []*expectedNode{ + { + name: folderID1, + children: []*expectedNode{ + newExpectedFile(fileName1, fileData1), + newExpectedFile(fileName2, fileData2), + { + name: folderID2, + children: []*expectedNode{ + newExpectedFile(fileName3, fileData3), + newExpectedFile(fileName4, fileData4), + }, + }, + }, + }, + { + name: folderID3, + children: []*expectedNode{ + newExpectedFile(fileName5, fileData5), + newExpectedFile(fileName6, fileData6), + }, + }, + { + name: folderID4, + children: []*expectedNode{ + newExpectedFile(fileName7, fileData7), + newExpectedFile(fileName8, fileData8), + }, + }, + }), + expectedIterateCounts: map[string]int{ + folderID1: 1, + // Folder 2 can't be pruned because there's subtree changes under it + // (folder3 move). + folderID2: 1, + // Folder 3 can't be pruned because it has a collection associated with + // it. + folderID3: 1, + folderID4: 1, + }, + }, + { + // Test that deleting a directory in a subtree doesn't trigger selective + // subtree merging for any subtree of the full hierarchy that includes the + // deleted directory in the previous hierarchy. + name: "DeleteInSubtree DoesntPruneSubtree", + inputCollections: func(t *testing.T) []data.BackupCollection { + mc := exchMock.NewCollection(nil, nil, 0) + mc.PrevPath = folderPath3 + mc.ColState = data.DeletedState + + return []data.BackupCollection{folder4Unchanged, mc} + }, + expected: expectedTreeWithChildren( + prefixFolders, + []*expectedNode{ + { + name: folderID1, + children: []*expectedNode{ + newExpectedFile(fileName1, fileData1), + newExpectedFile(fileName2, fileData2), + { + name: folderID2, + children: []*expectedNode{ + newExpectedFile(fileName3, fileData3), + newExpectedFile(fileName4, fileData4), + }, + }, + }, + }, + { + name: folderID4, + children: []*expectedNode{ + newExpectedFile(fileName7, fileData7), + newExpectedFile(fileName8, fileData8), + }, + }, + }), + expectedIterateCounts: map[string]int{ + folderID1: 1, + // Folder 2 can't be pruned because there's subtree changes under it + // (folder3 delete). + folderID2: 1, + // Folder3 is pruned because there's no changes under it. + folderID3: 0, + folderID4: 1, + }, + }, + { + // Test that moving an existing directory into a subtree doesn't trigger + // selective subtree merging for any subtree of the full hierarchy that + // includes the moved directory in the current hierarchy. + name: "MoveIntoSubtree DoesntPruneSubtree", + inputCollections: func(t *testing.T) []data.BackupCollection { + newP, err := folderPath1.Append(false, folderID4) + require.NoError(t, err, clues.ToCore(err)) + + newL, err := folderLocPath1.Append(false, folderName4) + require.NoError(t, err, clues.ToCore(err)) + + mc := exchMock.NewCollection(newP, newL, 0) + mc.PrevPath = folderPath4 + mc.ColState = data.MovedState + + return []data.BackupCollection{mc} + }, + expected: expectedTreeWithChildren( + prefixFolders, + []*expectedNode{ + { + name: folderID1, + children: []*expectedNode{ + newExpectedFile(fileName1, fileData1), + newExpectedFile(fileName2, fileData2), + { + name: folderID2, + children: []*expectedNode{ + newExpectedFile(fileName3, fileData3), + newExpectedFile(fileName4, fileData4), + { + name: folderID3, + children: []*expectedNode{ + newExpectedFile(fileName5, fileData5), + newExpectedFile(fileName6, fileData6), + }, + }, + }, + }, + { + name: folderID4, + children: []*expectedNode{ + newExpectedFile(fileName7, fileData7), + newExpectedFile(fileName8, fileData8), + }, + }, + }, + }, + }), + expectedIterateCounts: map[string]int{ + // Folder 1 can't be pruned because there's subtree changes under it + // (folder4 move). + folderID1: 1, + // Folder 2 is pruned because nothing changes below it and it has no + // collection associated with it. + folderID2: 0, + folderID3: 0, + // Folder 4 can't be pruned because it has a collection associated with + // it. + folderID4: 1, + }, + }, + } + + for _, test := range table { + suite.Run(test.name, func() { + t := suite.T() + + ctx, flush := tester.NewContext(t) + defer flush() + + progress := &corsoProgress{ + ctx: ctx, + pending: map[string]*itemDetails{}, + toMerge: newMergeDetails(), + errs: fault.New(true), + } + snapshotRoot, counters := getBaseSnapshot() + msw := &mockSnapshotWalker{ + snapshotRoot: snapshotRoot, + } + + ie := pmMock.NewPrefixMap(nil) + if test.inputExcludes != nil { + ie = test.inputExcludes + } + + dirTree, err := inflateDirTree( + ctx, + msw, + []ManifestEntry{ + makeManifestEntry("", tenant, user, path.OneDriveService, path.FilesCategory), + }, + test.inputCollections(t), + ie, + progress) + require.NoError(t, err, clues.ToCore(err)) + + // Check iterate counts before checking tree content as checking tree + // content can disturb the counter values. + for name, count := range test.expectedIterateCounts { + c, ok := counters[name] + assert.True(t, ok, "unexpected counter %q", name) + assert.Equal(t, count, *c, "folder %q iterate count", name) + } + + expectTree(t, ctx, test.expected, dirTree) + }) + } +}