Implement selective subtree pruning (#4133)

Determine if a subtree has any directory object changes and if it
doesn't skip traversing that subtree during hierarchy merging.
"Directory object changes" for a subtree are defined as:
  * moving/renaming a directory within the subtree
  * deleting a directory in the subtree
  * moving a directory into the subtree
  * creating a directory in the subtree
  * moving a directory out of the subtree

The resulting snapshot still contains all data in the
pruned subtree.

---

#### Does this PR need a docs update or release note?

- [x]  Yes, it's included
- [ ] 🕐 Yes, but in a later PR
- [ ]  No

#### Type of change

- [x] 🌻 Feature
- [ ] 🐛 Bugfix
- [ ] 🗺️ Documentation
- [ ] 🤖 Supportability/Tests
- [ ] 💻 CI/Deployment
- [ ] 🧹 Tech Debt/Cleanup

#### Issue(s)

* #4117

#### Test Plan

- [ ] 💪 Manual
- [x]  Unit test
- [x] 💚 E2E
This commit is contained in:
ashmrtn 2023-09-19 18:56:44 -07:00 committed by GitHub
parent b00ac292c7
commit 108c243e54
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
3 changed files with 908 additions and 38 deletions

View File

@ -9,6 +9,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
### Added
- Enables local or network-attached storage for Corso repositories.
- Reduce backup runtime for OneDrive and SharePoint incremental backups that have no file changes.
## [v0.13.0] (beta) - 2023-09-18

View File

@ -452,6 +452,7 @@ func streamBaseEntries(
dir fs.Directory,
encodedSeen map[string]struct{},
globalExcludeSet prefixmatcher.StringSetReader,
streamDirEnts bool,
progress *corsoProgress,
) error {
if dir == nil {
@ -477,11 +478,53 @@ func streamBaseEntries(
return clues.Stack(err).WithClues(ctx)
}
entName, err := decodeElement(entry.Name())
if err != nil {
return clues.Wrap(err, "decoding entry name").
WithClues(ctx).
With("entry_name", entry.Name())
}
if d, ok := entry.(fs.Directory); ok {
// Don't walk subdirectories in this function.
if _, ok := entry.(fs.Directory); ok {
if !streamDirEnts {
return nil
}
// Do walk subdirectories. The previous and current path of the directory
// can be generated by appending the directory name onto the previous and
// current path of this directory. Since the directory has no
// BackupCollection associated with it (part of the criteria for allowing
// walking directories in this function) there shouldn't be any
// LocationPath information associated with the directory.
newP, err := curPath.Append(false, entName)
if err != nil {
return clues.Wrap(err, "getting current directory path").WithClues(ctx)
}
oldP, err := prevPath.Append(false, entName)
if err != nil {
return clues.Wrap(err, "getting previous directory path").WithClues(ctx)
}
e := virtualfs.NewStreamingDirectory(
entry.Name(),
getStreamItemFunc(
newP,
oldP,
nil,
nil,
d,
globalExcludeSet,
streamDirEnts,
progress))
return clues.Wrap(ctr(ctx, e), "executing callback on subdirectory").
WithClues(ctx).
With("directory_path", newP).
OrNil()
}
// This entry was either updated or deleted. In either case, the external
// service notified us about it and it's already been handled so we should
// skip it here.
@ -489,13 +532,6 @@ func streamBaseEntries(
return nil
}
entName, err := decodeElement(entry.Name())
if err != nil {
return clues.Wrap(err, "decoding entry name").
WithClues(ctx).
With("entry_name", entry.Name())
}
// This entry was marked as deleted by a service that can't tell us the
// previous path of deleted items, only the item ID.
if _, ok := excludeSet[entName]; ok {
@ -563,6 +599,7 @@ func getStreamItemFunc(
streamedEnts data.BackupCollection,
baseDir fs.Directory,
globalExcludeSet prefixmatcher.StringSetReader,
streamDirEnts bool,
progress *corsoProgress,
) func(context.Context, func(context.Context, fs.Entry) error) error {
return func(ctx context.Context, ctr func(context.Context, fs.Entry) error) error {
@ -596,6 +633,7 @@ func getStreamItemFunc(
baseDir,
seen,
globalExcludeSet,
streamDirEnts,
progress); err != nil {
return clues.Wrap(err, "streaming base snapshot entries")
}
@ -643,6 +681,7 @@ func buildKopiaDirs(
dir.collection,
dir.baseDir,
globalExcludeSet,
!dir.subtreeChanged,
progress)), nil
}
@ -667,6 +706,12 @@ type treeMap struct {
// be added to childDirs while building the hierarchy. They will be ignored
// when iterating through the directory to hand items to kopia.
baseDir fs.Directory
// subtreeChanged denotes whether any directories under this node have been
// moved, renamed, deleted, or added. If not then we should return both the
// kopia files and the kopia directories in the base entry because we're also
// doing selective subtree pruning during hierarchy merging.
subtreeChanged bool
}
func newTreeMap() *treeMap {
@ -863,6 +908,50 @@ func inflateCollectionTree(
return roots, updatedPaths, nil
}
func subtreeChanged(
roots map[string]*treeMap,
updatedPaths map[string]path.Path,
oldDirPath *path.Builder,
currentPath *path.Builder,
) bool {
// Can't combine with the inner if-block because go evaluates the assignment
// prior to all conditional checks.
if currentPath != nil {
// Either there's an input collection for this path or there's some
// descendant that has a collection with this path as a prefix.
//
// The base traversal code is single-threaded and this check is before
// processing child base directories so we don't have to worry about
// something else creating the in-memory subtree for unchanged things.
// Having only one base per Reason also means we haven't added this branch
// of the in-memory tree in a base processed before this one.
if node := maybeGetTreeNode(roots, currentPath.Elements()); node != nil &&
(node.collection != nil || len(node.childDirs) > 0) {
return true
}
}
oldPath := oldDirPath.String()
// We only need to check the old paths here because the check on the in-memory
// tree above will catch cases where the new path is in the subtree rooted at
// currentPath. We're mostly concerned with things being moved out of the
// subtree or deleted within the subtree in this block. Renames, moves within,
// and moves into the subtree will be caught by the above in-memory tree
// check.
//
// We can ignore exact matches on the old path because they would correspond
// to deleting the root of the subtree which we can handle as long as
// everything under the subtree root is also deleted.
for oldP := range updatedPaths {
if strings.HasPrefix(oldP, oldPath) && len(oldP) != len(oldPath) {
return true
}
}
return false
}
// traverseBaseDir is an unoptimized function that reads items in a directory
// and traverses subdirectories in the given directory. oldDirPath is the path
// the directory would be at if the hierarchy was unchanged. expectedDirPath is the
@ -946,15 +1035,14 @@ func traverseBaseDir(
ctx = clues.Add(ctx, "new_path", currentPath)
// TODO(ashmrtn): If we can do prefix matching on elements in updatedPaths and
// we know that the tree node for this directory has no collection reference
// and no child nodes then we can skip traversing this directory. This will
// only work if we know what directory deleted items used to belong in (e.x.
// it won't work for OneDrive because we only know the ID of the deleted
// item).
// Figure out if the subtree rooted at this directory is either unchanged or
// completely deleted. We can accomplish this by checking the in-memory tree
// and the updatedPaths map.
changed := subtreeChanged(roots, updatedPaths, oldDirPath, currentPath)
var hasItems bool
if changed {
err = dir.IterateEntries(ctx, func(innerCtx context.Context, entry fs.Entry) error {
dEntry, ok := entry.(fs.Directory)
if !ok {
@ -975,13 +1063,16 @@ func traverseBaseDir(
if err != nil {
return clues.Wrap(err, "traversing base directory").WithClues(ctx)
}
} else {
stats.Inc(statPruned)
}
// We only need to add this base directory to the tree we're building if it
// has items in it. The traversal of the directory here just finds
// subdirectories. This optimization will not be valid if we dynamically
// determine the subdirectories this directory has when handing items to
// kopia.
if currentPath != nil && hasItems {
if currentPath != nil && (hasItems || !changed) {
// Having this in the if-block has the effect of removing empty directories
// from backups that have a base snapshot. If we'd like to preserve empty
// directories across incremental backups, move getting the node outside of
@ -1014,12 +1105,12 @@ func traverseBaseDir(
stats.Inc(statRecursiveMove)
}
curP, err := path.FromDataLayerPath(currentPath.String(), false)
curP, err := path.PrefixOrPathFromDataLayerPath(currentPath.String(), false)
if err != nil {
return clues.New("converting current path to path.Path").WithClues(ctx)
}
oldP, err := path.FromDataLayerPath(oldDirPath.String(), false)
oldP, err := path.PrefixOrPathFromDataLayerPath(oldDirPath.String(), false)
if err != nil {
return clues.New("converting old path to path.Path").WithClues(ctx)
}
@ -1027,6 +1118,7 @@ func traverseBaseDir(
node.baseDir = dir
node.currentPath = curP
node.prevPath = oldP
node.subtreeChanged = changed
}
return nil
@ -1070,6 +1162,9 @@ const (
// statSkipMerge denotes the number of directories that weren't merged because
// they were marked either DoNotMerge or New.
statSkipMerge = "directories_skipped_merging"
// statPruned denotes the number of subtree roots that selective subtree
// pruning applied to.
statPruned = "subtrees_pruned"
)
func inflateBaseTree(
@ -1163,7 +1258,8 @@ func inflateBaseTree(
statRecursiveMove, stats.Get(statRecursiveMove),
statDel, stats.Get(statDel),
statRecursiveDel, stats.Get(statRecursiveDel),
statSkipMerge, stats.Get(statSkipMerge))
statSkipMerge, stats.Get(statSkipMerge),
statPruned, stats.Get(statPruned))
}
return nil

View File

@ -36,6 +36,14 @@ func makePath(t *testing.T, elements []string, isItem bool) path.Path {
return p
}
func newExpectedFile(name string, fileData []byte) *expectedNode {
return &expectedNode{
name: name,
data: fileData,
children: []*expectedNode{},
}
}
// baseWithChildren returns an fs.Entry hierarchy where the first len(basic)
// levels are the encoded values of basic in order. All items in children are
// used as the direct descendents of the final entry in basic.
@ -2952,3 +2960,768 @@ func (suite *HierarchyBuilderUnitSuite) TestBuildDirectoryTreeSelectsMigrateSubt
expectTree(t, ctx, expected, dirTree)
}
func newMockStaticDirectory(
name string,
entries []fs.Entry,
) (fs.Directory, *int) {
res := &mockStaticDirectory{
Directory: virtualfs.NewStaticDirectory(name, entries),
}
return res, &res.iterateCount
}
type mockStaticDirectory struct {
fs.Directory
iterateCount int
}
func (msd *mockStaticDirectory) IterateEntries(
ctx context.Context,
callback func(context.Context, fs.Entry) error,
) error {
msd.iterateCount++
return msd.Directory.IterateEntries(ctx, callback)
}
func (suite *HierarchyBuilderUnitSuite) TestBuildDirectoryTree_SelectiveSubtreePruning() {
var (
tenant = "tenant-id"
service = path.OneDriveService.String()
user = "user-id"
category = path.FilesCategory.String()
// Not using drive/drive-id/root folders for brevity.
folderID1 = "folder1-id"
folderID2 = "folder2-id"
folderID3 = "folder3-id"
folderID4 = "folder4-id"
folderName1 = "folder1-name"
folderName2 = "folder2-name"
folderName3 = "folder3-name"
folderName4 = "folder4-name"
fileName1 = "file1"
fileName2 = "file2"
fileName3 = "file3"
fileName4 = "file4"
fileName5 = "file5"
fileName6 = "file6"
fileName7 = "file7"
fileName8 = "file8"
fileData1 = []byte("1")
fileData2 = []byte("2")
fileData3 = []byte("3")
fileData4 = []byte("4")
fileData5 = []byte("5")
fileData6 = []byte("6")
fileData7 = []byte("7")
fileData8 = []byte("8")
)
var (
folderPath1 = makePath(
suite.T(),
[]string{tenant, service, user, category, folderID1},
false)
folderLocPath1 = makePath(
suite.T(),
[]string{tenant, service, user, category, folderName1},
false)
folderPath2 = makePath(
suite.T(),
append(folderPath1.Elements(), folderID2),
false)
folderLocPath2 = makePath(
suite.T(),
append(folderLocPath1.Elements(), folderName2),
false)
folderPath3 = makePath(
suite.T(),
append(folderPath2.Elements(), folderID3),
false)
folderPath4 = makePath(
suite.T(),
[]string{tenant, service, user, category, folderID4},
false)
folderLocPath4 = makePath(
suite.T(),
[]string{tenant, service, user, category, folderName4},
false)
prefixFolders = []string{
tenant,
service,
user,
category,
}
)
folder4Unchanged := exchMock.NewCollection(folderPath4, folderLocPath4, 0)
folder4Unchanged.PrevPath = folderPath4
folder4Unchanged.ColState = data.NotMovedState
// Must be a function that returns a new instance each time as StreamingFile
// can only return its Reader once. Each directory below the prefix directory
// is also wrapped in a mock so we can count the number of times
// IterateEntries was called on it.
// baseSnapshot with the following layout:
// - tenant-id
// - onedrive
// - user-id
// - files
// - folder1-id
// - file1
// - file2
// - folder2-id
// - file3
// - file4
// - folder3-id
// - file5
// - file6
// - folder4-id
// - file7
// - file8
getBaseSnapshot := func() (fs.Entry, map[string]*int) {
counters := map[string]*int{}
folder, count := newMockStaticDirectory(
encodeElements(folderID3)[0],
[]fs.Entry{
virtualfs.StreamingFileWithModTimeFromReader(
encodeElements(fileName5)[0],
time.Time{},
newBackupStreamReader(
serializationVersion,
io.NopCloser(bytes.NewReader(fileData5)))),
virtualfs.StreamingFileWithModTimeFromReader(
encodeElements(fileName6)[0],
time.Time{},
newBackupStreamReader(
serializationVersion,
io.NopCloser(bytes.NewReader(fileData6)))),
})
counters[folderID3] = count
folder, count = newMockStaticDirectory(
encodeElements(folderID2)[0],
[]fs.Entry{
virtualfs.StreamingFileWithModTimeFromReader(
encodeElements(fileName3)[0],
time.Time{},
newBackupStreamReader(
serializationVersion,
io.NopCloser(bytes.NewReader(fileData3)))),
virtualfs.StreamingFileWithModTimeFromReader(
encodeElements(fileName4)[0],
time.Time{},
newBackupStreamReader(
serializationVersion,
io.NopCloser(bytes.NewReader(fileData4)))),
folder,
})
counters[folderID2] = count
folder, count = newMockStaticDirectory(
encodeElements(folderID1)[0],
[]fs.Entry{
virtualfs.StreamingFileWithModTimeFromReader(
encodeElements(fileName1)[0],
time.Time{},
newBackupStreamReader(
serializationVersion,
io.NopCloser(bytes.NewReader(fileData1)))),
virtualfs.StreamingFileWithModTimeFromReader(
encodeElements(fileName2)[0],
time.Time{},
newBackupStreamReader(
serializationVersion,
io.NopCloser(bytes.NewReader(fileData2)))),
folder,
})
counters[folderID1] = count
folder2, count := newMockStaticDirectory(
encodeElements(folderID4)[0],
[]fs.Entry{
virtualfs.StreamingFileWithModTimeFromReader(
encodeElements(fileName7)[0],
time.Time{},
newBackupStreamReader(
serializationVersion,
io.NopCloser(bytes.NewReader(fileData7)))),
virtualfs.StreamingFileWithModTimeFromReader(
encodeElements(fileName8)[0],
time.Time{},
newBackupStreamReader(
serializationVersion,
io.NopCloser(bytes.NewReader(fileData8)))),
})
counters[folderID4] = count
return baseWithChildren(
prefixFolders,
[]fs.Entry{
folder,
folder2,
}),
counters
}
table := []struct {
name string
inputCollections func(t *testing.T) []data.BackupCollection
inputExcludes *pmMock.PrefixMap
expected *expectedNode
expectedIterateCounts map[string]int
}{
{
// Test that even if files are excluded in the subtree selective subtree
// pruning skips traversing, the file is properly excluded during upload.
//
// It's safe to prune the subtree during merging because the directory
// layout hasn't changed. We still require traversal of all directories
// during data upload which allows us to exclude the file properly.
name: "NoDirectoryChanges ExcludedFile PrunesSubtree",
inputCollections: func(t *testing.T) []data.BackupCollection {
return []data.BackupCollection{folder4Unchanged}
},
inputExcludes: pmMock.NewPrefixMap(map[string]map[string]struct{}{
"": {
fileName3: {},
},
}),
expected: expectedTreeWithChildren(
prefixFolders,
[]*expectedNode{
{
name: folderID1,
children: []*expectedNode{
newExpectedFile(fileName1, fileData1),
newExpectedFile(fileName2, fileData2),
{
name: folderID2,
children: []*expectedNode{
newExpectedFile(fileName4, fileData4),
{
name: folderID3,
children: []*expectedNode{
newExpectedFile(fileName5, fileData5),
newExpectedFile(fileName6, fileData6),
},
},
},
},
},
},
{
name: folderID4,
children: []*expectedNode{
newExpectedFile(fileName7, fileData7),
newExpectedFile(fileName8, fileData8),
},
},
}),
expectedIterateCounts: map[string]int{
folderID1: 0,
folderID2: 0,
folderID3: 0,
folderID4: 1,
},
},
{
// Test that if a subtree is deleted in its entirety selective subtree
// pruning skips traversing it during hierarchy merging.
name: "SubtreeDelete PrunesSubtree",
inputCollections: func(t *testing.T) []data.BackupCollection {
mc := exchMock.NewCollection(nil, nil, 0)
mc.PrevPath = folderPath1
mc.ColState = data.DeletedState
return []data.BackupCollection{folder4Unchanged, mc}
},
expected: expectedTreeWithChildren(
prefixFolders,
[]*expectedNode{
{
name: folderID4,
children: []*expectedNode{
newExpectedFile(fileName7, fileData7),
newExpectedFile(fileName8, fileData8),
},
},
}),
expectedIterateCounts: map[string]int{
// Deleted collections aren't added to the in-memory tree.
folderID1: 0,
folderID2: 0,
folderID3: 0,
folderID4: 1,
},
},
{
// Test that if a directory is moved but the subtree rooted at the moved
// directory is unchanged selective subtree pruning skips traversing all
// directories under the moved directory during hierarchy merging even if
// a new directory is created at the path of one of the unchanged (pruned)
// subdirectories of the moved directory.
name: "ParentMoved NewFolderAtOldPath PrunesSubtree",
inputCollections: func(t *testing.T) []data.BackupCollection {
newPath := makePath(
suite.T(),
[]string{tenant, service, user, category, "foo-id"},
false)
newLoc := makePath(
suite.T(),
[]string{tenant, service, user, category, "foo"},
false)
mc := exchMock.NewCollection(newPath, newLoc, 0)
mc.PrevPath = folderPath1
mc.ColState = data.MovedState
newMC := exchMock.NewCollection(folderPath2, folderLocPath2, 0)
return []data.BackupCollection{folder4Unchanged, mc, newMC}
},
expected: expectedTreeWithChildren(
prefixFolders,
[]*expectedNode{
{
name: "foo-id",
children: []*expectedNode{
newExpectedFile(fileName1, fileData1),
newExpectedFile(fileName2, fileData2),
{
name: folderID2,
children: []*expectedNode{
newExpectedFile(fileName3, fileData3),
newExpectedFile(fileName4, fileData4),
{
name: folderID3,
children: []*expectedNode{
newExpectedFile(fileName5, fileData5),
newExpectedFile(fileName6, fileData6),
},
},
},
},
},
},
{
name: folderID1,
children: []*expectedNode{
{
name: folderID2,
},
},
},
{
name: folderID4,
children: []*expectedNode{
newExpectedFile(fileName7, fileData7),
newExpectedFile(fileName8, fileData8),
},
},
}),
expectedIterateCounts: map[string]int{
folderID1: 1,
folderID2: 0,
folderID3: 0,
folderID4: 1,
},
},
{
// Test that if a directory and its subtree is deleted in its entirety
// selective subtree pruning skips traversing the subtree during hierarchy
// merging even if a new directory is created at the path of one of the
// deleted (pruned) directories.
name: "SubtreeDelete NewFolderAtOldPath PrunesSubtree",
inputCollections: func(t *testing.T) []data.BackupCollection {
mc := exchMock.NewCollection(nil, nil, 0)
mc.PrevPath = folderPath2
mc.ColState = data.DeletedState
newMC := exchMock.NewCollection(folderPath2, folderLocPath2, 0)
return []data.BackupCollection{folder4Unchanged, mc, newMC}
},
expected: expectedTreeWithChildren(
prefixFolders,
[]*expectedNode{
{
name: folderID1,
children: []*expectedNode{
newExpectedFile(fileName1, fileData1),
newExpectedFile(fileName2, fileData2),
{
name: folderID2,
},
},
},
{
name: folderID4,
children: []*expectedNode{
newExpectedFile(fileName7, fileData7),
newExpectedFile(fileName8, fileData8),
},
},
}),
expectedIterateCounts: map[string]int{
folderID1: 1,
// Deleted collections aren't added to the in-memory tree.
folderID2: 0,
folderID3: 0,
folderID4: 1,
},
},
// These tests check that subtree pruning isn't triggered.
{
// Test that creating a new directory in an otherwise unchanged subtree
// doesn't trigger selective subtree merging for any subtree of the full
// hierarchy that includes the new directory but does trigger selective
// subtree pruning for unchanged subtrees without the new directory.
name: "NewDirectory DoesntPruneSubtree",
inputCollections: func(t *testing.T) []data.BackupCollection {
newP, err := folderPath2.Append(false, "foo-id")
require.NoError(t, err, clues.ToCore(err))
newL, err := folderLocPath2.Append(false, "foo")
require.NoError(t, err, clues.ToCore(err))
newMC := exchMock.NewCollection(newP, newL, 0)
return []data.BackupCollection{folder4Unchanged, newMC}
},
expected: expectedTreeWithChildren(
prefixFolders,
[]*expectedNode{
{
name: folderID1,
children: []*expectedNode{
newExpectedFile(fileName1, fileData1),
newExpectedFile(fileName2, fileData2),
{
name: folderID2,
children: []*expectedNode{
newExpectedFile(fileName3, fileData3),
newExpectedFile(fileName4, fileData4),
{
name: "foo-id",
},
{
name: folderID3,
children: []*expectedNode{
newExpectedFile(fileName5, fileData5),
newExpectedFile(fileName6, fileData6),
},
},
},
},
},
},
{
name: folderID4,
children: []*expectedNode{
newExpectedFile(fileName7, fileData7),
newExpectedFile(fileName8, fileData8),
},
},
}),
expectedIterateCounts: map[string]int{
folderID1: 1,
folderID2: 1,
// Folder 3 triggers pruning because it has nothing changed under it.
folderID3: 0,
folderID4: 1,
},
},
{
// Test that moving a directory within a subtree doesn't trigger selective
// subtree merging for any subtree of the full hierarchy that includes the
// moved directory.
name: "MoveWithinSubtree DoesntPruneSubtree",
inputCollections: func(t *testing.T) []data.BackupCollection {
newP, err := folderPath1.Append(false, folderID3)
require.NoError(t, err, clues.ToCore(err))
newL, err := folderLocPath1.Append(false, folderName3)
require.NoError(t, err, clues.ToCore(err))
mc := exchMock.NewCollection(newP, newL, 0)
mc.PrevPath = folderPath3
mc.ColState = data.MovedState
return []data.BackupCollection{folder4Unchanged, mc}
},
expected: expectedTreeWithChildren(
prefixFolders,
[]*expectedNode{
{
name: folderID1,
children: []*expectedNode{
newExpectedFile(fileName1, fileData1),
newExpectedFile(fileName2, fileData2),
{
name: folderID2,
children: []*expectedNode{
newExpectedFile(fileName3, fileData3),
newExpectedFile(fileName4, fileData4),
},
},
{
name: folderID3,
children: []*expectedNode{
newExpectedFile(fileName5, fileData5),
newExpectedFile(fileName6, fileData6),
},
},
},
},
{
name: folderID4,
children: []*expectedNode{
newExpectedFile(fileName7, fileData7),
newExpectedFile(fileName8, fileData8),
},
},
}),
expectedIterateCounts: map[string]int{
folderID1: 1,
// Folder 2 can't be pruned because there's subtree changes under it
// (folder3 move).
folderID2: 1,
// Folder 3 can't be pruned because it has a collection associated with
// it.
folderID3: 1,
folderID4: 1,
},
},
{
// Test that moving a directory out of a subtree doesn't trigger selective
// subtree merging for any subtree of the full hierarchy that includes the
// moved directory in the previous hierarchy.
name: "MoveOutOfSubtree DoesntPruneSubtree",
inputCollections: func(t *testing.T) []data.BackupCollection {
newP := makePath(
suite.T(),
[]string{tenant, service, user, category, folderID3},
false)
newL := makePath(
suite.T(),
[]string{tenant, service, user, category, folderName3},
false)
mc := exchMock.NewCollection(newP, newL, 0)
mc.PrevPath = folderPath3
mc.ColState = data.MovedState
return []data.BackupCollection{folder4Unchanged, mc}
},
expected: expectedTreeWithChildren(
prefixFolders,
[]*expectedNode{
{
name: folderID1,
children: []*expectedNode{
newExpectedFile(fileName1, fileData1),
newExpectedFile(fileName2, fileData2),
{
name: folderID2,
children: []*expectedNode{
newExpectedFile(fileName3, fileData3),
newExpectedFile(fileName4, fileData4),
},
},
},
},
{
name: folderID3,
children: []*expectedNode{
newExpectedFile(fileName5, fileData5),
newExpectedFile(fileName6, fileData6),
},
},
{
name: folderID4,
children: []*expectedNode{
newExpectedFile(fileName7, fileData7),
newExpectedFile(fileName8, fileData8),
},
},
}),
expectedIterateCounts: map[string]int{
folderID1: 1,
// Folder 2 can't be pruned because there's subtree changes under it
// (folder3 move).
folderID2: 1,
// Folder 3 can't be pruned because it has a collection associated with
// it.
folderID3: 1,
folderID4: 1,
},
},
{
// Test that deleting a directory in a subtree doesn't trigger selective
// subtree merging for any subtree of the full hierarchy that includes the
// deleted directory in the previous hierarchy.
name: "DeleteInSubtree DoesntPruneSubtree",
inputCollections: func(t *testing.T) []data.BackupCollection {
mc := exchMock.NewCollection(nil, nil, 0)
mc.PrevPath = folderPath3
mc.ColState = data.DeletedState
return []data.BackupCollection{folder4Unchanged, mc}
},
expected: expectedTreeWithChildren(
prefixFolders,
[]*expectedNode{
{
name: folderID1,
children: []*expectedNode{
newExpectedFile(fileName1, fileData1),
newExpectedFile(fileName2, fileData2),
{
name: folderID2,
children: []*expectedNode{
newExpectedFile(fileName3, fileData3),
newExpectedFile(fileName4, fileData4),
},
},
},
},
{
name: folderID4,
children: []*expectedNode{
newExpectedFile(fileName7, fileData7),
newExpectedFile(fileName8, fileData8),
},
},
}),
expectedIterateCounts: map[string]int{
folderID1: 1,
// Folder 2 can't be pruned because there's subtree changes under it
// (folder3 delete).
folderID2: 1,
// Folder3 is pruned because there's no changes under it.
folderID3: 0,
folderID4: 1,
},
},
{
// Test that moving an existing directory into a subtree doesn't trigger
// selective subtree merging for any subtree of the full hierarchy that
// includes the moved directory in the current hierarchy.
name: "MoveIntoSubtree DoesntPruneSubtree",
inputCollections: func(t *testing.T) []data.BackupCollection {
newP, err := folderPath1.Append(false, folderID4)
require.NoError(t, err, clues.ToCore(err))
newL, err := folderLocPath1.Append(false, folderName4)
require.NoError(t, err, clues.ToCore(err))
mc := exchMock.NewCollection(newP, newL, 0)
mc.PrevPath = folderPath4
mc.ColState = data.MovedState
return []data.BackupCollection{mc}
},
expected: expectedTreeWithChildren(
prefixFolders,
[]*expectedNode{
{
name: folderID1,
children: []*expectedNode{
newExpectedFile(fileName1, fileData1),
newExpectedFile(fileName2, fileData2),
{
name: folderID2,
children: []*expectedNode{
newExpectedFile(fileName3, fileData3),
newExpectedFile(fileName4, fileData4),
{
name: folderID3,
children: []*expectedNode{
newExpectedFile(fileName5, fileData5),
newExpectedFile(fileName6, fileData6),
},
},
},
},
{
name: folderID4,
children: []*expectedNode{
newExpectedFile(fileName7, fileData7),
newExpectedFile(fileName8, fileData8),
},
},
},
},
}),
expectedIterateCounts: map[string]int{
// Folder 1 can't be pruned because there's subtree changes under it
// (folder4 move).
folderID1: 1,
// Folder 2 is pruned because nothing changes below it and it has no
// collection associated with it.
folderID2: 0,
folderID3: 0,
// Folder 4 can't be pruned because it has a collection associated with
// it.
folderID4: 1,
},
},
}
for _, test := range table {
suite.Run(test.name, func() {
t := suite.T()
ctx, flush := tester.NewContext(t)
defer flush()
progress := &corsoProgress{
ctx: ctx,
pending: map[string]*itemDetails{},
toMerge: newMergeDetails(),
errs: fault.New(true),
}
snapshotRoot, counters := getBaseSnapshot()
msw := &mockSnapshotWalker{
snapshotRoot: snapshotRoot,
}
ie := pmMock.NewPrefixMap(nil)
if test.inputExcludes != nil {
ie = test.inputExcludes
}
dirTree, err := inflateDirTree(
ctx,
msw,
[]ManifestEntry{
makeManifestEntry("", tenant, user, path.OneDriveService, path.FilesCategory),
},
test.inputCollections(t),
ie,
progress)
require.NoError(t, err, clues.ToCore(err))
// Check iterate counts before checking tree content as checking tree
// content can disturb the counter values.
for name, count := range test.expectedIterateCounts {
c, ok := counters[name]
assert.True(t, ok, "unexpected counter %q", name)
assert.Equal(t, count, *c, "folder %q iterate count", name)
}
expectTree(t, ctx, test.expected, dirTree)
})
}
}