Implement selective subtree pruning (#4133)
Determine if a subtree has any directory object changes and if it doesn't skip traversing that subtree during hierarchy merging. "Directory object changes" for a subtree are defined as: * moving/renaming a directory within the subtree * deleting a directory in the subtree * moving a directory into the subtree * creating a directory in the subtree * moving a directory out of the subtree The resulting snapshot still contains all data in the pruned subtree. --- #### Does this PR need a docs update or release note? - [x] ✅ Yes, it's included - [ ] 🕐 Yes, but in a later PR - [ ] ⛔ No #### Type of change - [x] 🌻 Feature - [ ] 🐛 Bugfix - [ ] 🗺️ Documentation - [ ] 🤖 Supportability/Tests - [ ] 💻 CI/Deployment - [ ] 🧹 Tech Debt/Cleanup #### Issue(s) * #4117 #### Test Plan - [ ] 💪 Manual - [x] ⚡ Unit test - [x] 💚 E2E
This commit is contained in:
parent
b00ac292c7
commit
108c243e54
@ -9,6 +9,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
|
||||
|
||||
### Added
|
||||
- Enables local or network-attached storage for Corso repositories.
|
||||
- Reduce backup runtime for OneDrive and SharePoint incremental backups that have no file changes.
|
||||
|
||||
## [v0.13.0] (beta) - 2023-09-18
|
||||
|
||||
|
||||
@ -452,6 +452,7 @@ func streamBaseEntries(
|
||||
dir fs.Directory,
|
||||
encodedSeen map[string]struct{},
|
||||
globalExcludeSet prefixmatcher.StringSetReader,
|
||||
streamDirEnts bool,
|
||||
progress *corsoProgress,
|
||||
) error {
|
||||
if dir == nil {
|
||||
@ -477,9 +478,51 @@ func streamBaseEntries(
|
||||
return clues.Stack(err).WithClues(ctx)
|
||||
}
|
||||
|
||||
// Don't walk subdirectories in this function.
|
||||
if _, ok := entry.(fs.Directory); ok {
|
||||
return nil
|
||||
entName, err := decodeElement(entry.Name())
|
||||
if err != nil {
|
||||
return clues.Wrap(err, "decoding entry name").
|
||||
WithClues(ctx).
|
||||
With("entry_name", entry.Name())
|
||||
}
|
||||
|
||||
if d, ok := entry.(fs.Directory); ok {
|
||||
// Don't walk subdirectories in this function.
|
||||
if !streamDirEnts {
|
||||
return nil
|
||||
}
|
||||
|
||||
// Do walk subdirectories. The previous and current path of the directory
|
||||
// can be generated by appending the directory name onto the previous and
|
||||
// current path of this directory. Since the directory has no
|
||||
// BackupCollection associated with it (part of the criteria for allowing
|
||||
// walking directories in this function) there shouldn't be any
|
||||
// LocationPath information associated with the directory.
|
||||
newP, err := curPath.Append(false, entName)
|
||||
if err != nil {
|
||||
return clues.Wrap(err, "getting current directory path").WithClues(ctx)
|
||||
}
|
||||
|
||||
oldP, err := prevPath.Append(false, entName)
|
||||
if err != nil {
|
||||
return clues.Wrap(err, "getting previous directory path").WithClues(ctx)
|
||||
}
|
||||
|
||||
e := virtualfs.NewStreamingDirectory(
|
||||
entry.Name(),
|
||||
getStreamItemFunc(
|
||||
newP,
|
||||
oldP,
|
||||
nil,
|
||||
nil,
|
||||
d,
|
||||
globalExcludeSet,
|
||||
streamDirEnts,
|
||||
progress))
|
||||
|
||||
return clues.Wrap(ctr(ctx, e), "executing callback on subdirectory").
|
||||
WithClues(ctx).
|
||||
With("directory_path", newP).
|
||||
OrNil()
|
||||
}
|
||||
|
||||
// This entry was either updated or deleted. In either case, the external
|
||||
@ -489,13 +532,6 @@ func streamBaseEntries(
|
||||
return nil
|
||||
}
|
||||
|
||||
entName, err := decodeElement(entry.Name())
|
||||
if err != nil {
|
||||
return clues.Wrap(err, "decoding entry name").
|
||||
WithClues(ctx).
|
||||
With("entry_name", entry.Name())
|
||||
}
|
||||
|
||||
// This entry was marked as deleted by a service that can't tell us the
|
||||
// previous path of deleted items, only the item ID.
|
||||
if _, ok := excludeSet[entName]; ok {
|
||||
@ -563,6 +599,7 @@ func getStreamItemFunc(
|
||||
streamedEnts data.BackupCollection,
|
||||
baseDir fs.Directory,
|
||||
globalExcludeSet prefixmatcher.StringSetReader,
|
||||
streamDirEnts bool,
|
||||
progress *corsoProgress,
|
||||
) func(context.Context, func(context.Context, fs.Entry) error) error {
|
||||
return func(ctx context.Context, ctr func(context.Context, fs.Entry) error) error {
|
||||
@ -596,6 +633,7 @@ func getStreamItemFunc(
|
||||
baseDir,
|
||||
seen,
|
||||
globalExcludeSet,
|
||||
streamDirEnts,
|
||||
progress); err != nil {
|
||||
return clues.Wrap(err, "streaming base snapshot entries")
|
||||
}
|
||||
@ -643,6 +681,7 @@ func buildKopiaDirs(
|
||||
dir.collection,
|
||||
dir.baseDir,
|
||||
globalExcludeSet,
|
||||
!dir.subtreeChanged,
|
||||
progress)), nil
|
||||
}
|
||||
|
||||
@ -667,6 +706,12 @@ type treeMap struct {
|
||||
// be added to childDirs while building the hierarchy. They will be ignored
|
||||
// when iterating through the directory to hand items to kopia.
|
||||
baseDir fs.Directory
|
||||
|
||||
// subtreeChanged denotes whether any directories under this node have been
|
||||
// moved, renamed, deleted, or added. If not then we should return both the
|
||||
// kopia files and the kopia directories in the base entry because we're also
|
||||
// doing selective subtree pruning during hierarchy merging.
|
||||
subtreeChanged bool
|
||||
}
|
||||
|
||||
func newTreeMap() *treeMap {
|
||||
@ -863,6 +908,50 @@ func inflateCollectionTree(
|
||||
return roots, updatedPaths, nil
|
||||
}
|
||||
|
||||
func subtreeChanged(
|
||||
roots map[string]*treeMap,
|
||||
updatedPaths map[string]path.Path,
|
||||
oldDirPath *path.Builder,
|
||||
currentPath *path.Builder,
|
||||
) bool {
|
||||
// Can't combine with the inner if-block because go evaluates the assignment
|
||||
// prior to all conditional checks.
|
||||
if currentPath != nil {
|
||||
// Either there's an input collection for this path or there's some
|
||||
// descendant that has a collection with this path as a prefix.
|
||||
//
|
||||
// The base traversal code is single-threaded and this check is before
|
||||
// processing child base directories so we don't have to worry about
|
||||
// something else creating the in-memory subtree for unchanged things.
|
||||
// Having only one base per Reason also means we haven't added this branch
|
||||
// of the in-memory tree in a base processed before this one.
|
||||
if node := maybeGetTreeNode(roots, currentPath.Elements()); node != nil &&
|
||||
(node.collection != nil || len(node.childDirs) > 0) {
|
||||
return true
|
||||
}
|
||||
}
|
||||
|
||||
oldPath := oldDirPath.String()
|
||||
|
||||
// We only need to check the old paths here because the check on the in-memory
|
||||
// tree above will catch cases where the new path is in the subtree rooted at
|
||||
// currentPath. We're mostly concerned with things being moved out of the
|
||||
// subtree or deleted within the subtree in this block. Renames, moves within,
|
||||
// and moves into the subtree will be caught by the above in-memory tree
|
||||
// check.
|
||||
//
|
||||
// We can ignore exact matches on the old path because they would correspond
|
||||
// to deleting the root of the subtree which we can handle as long as
|
||||
// everything under the subtree root is also deleted.
|
||||
for oldP := range updatedPaths {
|
||||
if strings.HasPrefix(oldP, oldPath) && len(oldP) != len(oldPath) {
|
||||
return true
|
||||
}
|
||||
}
|
||||
|
||||
return false
|
||||
}
|
||||
|
||||
// traverseBaseDir is an unoptimized function that reads items in a directory
|
||||
// and traverses subdirectories in the given directory. oldDirPath is the path
|
||||
// the directory would be at if the hierarchy was unchanged. expectedDirPath is the
|
||||
@ -946,34 +1035,36 @@ func traverseBaseDir(
|
||||
|
||||
ctx = clues.Add(ctx, "new_path", currentPath)
|
||||
|
||||
// TODO(ashmrtn): If we can do prefix matching on elements in updatedPaths and
|
||||
// we know that the tree node for this directory has no collection reference
|
||||
// and no child nodes then we can skip traversing this directory. This will
|
||||
// only work if we know what directory deleted items used to belong in (e.x.
|
||||
// it won't work for OneDrive because we only know the ID of the deleted
|
||||
// item).
|
||||
// Figure out if the subtree rooted at this directory is either unchanged or
|
||||
// completely deleted. We can accomplish this by checking the in-memory tree
|
||||
// and the updatedPaths map.
|
||||
changed := subtreeChanged(roots, updatedPaths, oldDirPath, currentPath)
|
||||
|
||||
var hasItems bool
|
||||
|
||||
err = dir.IterateEntries(ctx, func(innerCtx context.Context, entry fs.Entry) error {
|
||||
dEntry, ok := entry.(fs.Directory)
|
||||
if !ok {
|
||||
hasItems = true
|
||||
return nil
|
||||
}
|
||||
if changed {
|
||||
err = dir.IterateEntries(ctx, func(innerCtx context.Context, entry fs.Entry) error {
|
||||
dEntry, ok := entry.(fs.Directory)
|
||||
if !ok {
|
||||
hasItems = true
|
||||
return nil
|
||||
}
|
||||
|
||||
return traverseBaseDir(
|
||||
innerCtx,
|
||||
depth+1,
|
||||
updatedPaths,
|
||||
oldDirPath,
|
||||
currentPath,
|
||||
dEntry,
|
||||
roots,
|
||||
stats)
|
||||
})
|
||||
if err != nil {
|
||||
return clues.Wrap(err, "traversing base directory").WithClues(ctx)
|
||||
return traverseBaseDir(
|
||||
innerCtx,
|
||||
depth+1,
|
||||
updatedPaths,
|
||||
oldDirPath,
|
||||
currentPath,
|
||||
dEntry,
|
||||
roots,
|
||||
stats)
|
||||
})
|
||||
if err != nil {
|
||||
return clues.Wrap(err, "traversing base directory").WithClues(ctx)
|
||||
}
|
||||
} else {
|
||||
stats.Inc(statPruned)
|
||||
}
|
||||
|
||||
// We only need to add this base directory to the tree we're building if it
|
||||
@ -981,7 +1072,7 @@ func traverseBaseDir(
|
||||
// subdirectories. This optimization will not be valid if we dynamically
|
||||
// determine the subdirectories this directory has when handing items to
|
||||
// kopia.
|
||||
if currentPath != nil && hasItems {
|
||||
if currentPath != nil && (hasItems || !changed) {
|
||||
// Having this in the if-block has the effect of removing empty directories
|
||||
// from backups that have a base snapshot. If we'd like to preserve empty
|
||||
// directories across incremental backups, move getting the node outside of
|
||||
@ -1014,12 +1105,12 @@ func traverseBaseDir(
|
||||
stats.Inc(statRecursiveMove)
|
||||
}
|
||||
|
||||
curP, err := path.FromDataLayerPath(currentPath.String(), false)
|
||||
curP, err := path.PrefixOrPathFromDataLayerPath(currentPath.String(), false)
|
||||
if err != nil {
|
||||
return clues.New("converting current path to path.Path").WithClues(ctx)
|
||||
}
|
||||
|
||||
oldP, err := path.FromDataLayerPath(oldDirPath.String(), false)
|
||||
oldP, err := path.PrefixOrPathFromDataLayerPath(oldDirPath.String(), false)
|
||||
if err != nil {
|
||||
return clues.New("converting old path to path.Path").WithClues(ctx)
|
||||
}
|
||||
@ -1027,6 +1118,7 @@ func traverseBaseDir(
|
||||
node.baseDir = dir
|
||||
node.currentPath = curP
|
||||
node.prevPath = oldP
|
||||
node.subtreeChanged = changed
|
||||
}
|
||||
|
||||
return nil
|
||||
@ -1070,6 +1162,9 @@ const (
|
||||
// statSkipMerge denotes the number of directories that weren't merged because
|
||||
// they were marked either DoNotMerge or New.
|
||||
statSkipMerge = "directories_skipped_merging"
|
||||
// statPruned denotes the number of subtree roots that selective subtree
|
||||
// pruning applied to.
|
||||
statPruned = "subtrees_pruned"
|
||||
)
|
||||
|
||||
func inflateBaseTree(
|
||||
@ -1163,7 +1258,8 @@ func inflateBaseTree(
|
||||
statRecursiveMove, stats.Get(statRecursiveMove),
|
||||
statDel, stats.Get(statDel),
|
||||
statRecursiveDel, stats.Get(statRecursiveDel),
|
||||
statSkipMerge, stats.Get(statSkipMerge))
|
||||
statSkipMerge, stats.Get(statSkipMerge),
|
||||
statPruned, stats.Get(statPruned))
|
||||
}
|
||||
|
||||
return nil
|
||||
|
||||
@ -36,6 +36,14 @@ func makePath(t *testing.T, elements []string, isItem bool) path.Path {
|
||||
return p
|
||||
}
|
||||
|
||||
func newExpectedFile(name string, fileData []byte) *expectedNode {
|
||||
return &expectedNode{
|
||||
name: name,
|
||||
data: fileData,
|
||||
children: []*expectedNode{},
|
||||
}
|
||||
}
|
||||
|
||||
// baseWithChildren returns an fs.Entry hierarchy where the first len(basic)
|
||||
// levels are the encoded values of basic in order. All items in children are
|
||||
// used as the direct descendents of the final entry in basic.
|
||||
@ -2952,3 +2960,768 @@ func (suite *HierarchyBuilderUnitSuite) TestBuildDirectoryTreeSelectsMigrateSubt
|
||||
|
||||
expectTree(t, ctx, expected, dirTree)
|
||||
}
|
||||
|
||||
func newMockStaticDirectory(
|
||||
name string,
|
||||
entries []fs.Entry,
|
||||
) (fs.Directory, *int) {
|
||||
res := &mockStaticDirectory{
|
||||
Directory: virtualfs.NewStaticDirectory(name, entries),
|
||||
}
|
||||
|
||||
return res, &res.iterateCount
|
||||
}
|
||||
|
||||
type mockStaticDirectory struct {
|
||||
fs.Directory
|
||||
iterateCount int
|
||||
}
|
||||
|
||||
func (msd *mockStaticDirectory) IterateEntries(
|
||||
ctx context.Context,
|
||||
callback func(context.Context, fs.Entry) error,
|
||||
) error {
|
||||
msd.iterateCount++
|
||||
return msd.Directory.IterateEntries(ctx, callback)
|
||||
}
|
||||
|
||||
func (suite *HierarchyBuilderUnitSuite) TestBuildDirectoryTree_SelectiveSubtreePruning() {
|
||||
var (
|
||||
tenant = "tenant-id"
|
||||
service = path.OneDriveService.String()
|
||||
user = "user-id"
|
||||
category = path.FilesCategory.String()
|
||||
|
||||
// Not using drive/drive-id/root folders for brevity.
|
||||
folderID1 = "folder1-id"
|
||||
folderID2 = "folder2-id"
|
||||
folderID3 = "folder3-id"
|
||||
folderID4 = "folder4-id"
|
||||
|
||||
folderName1 = "folder1-name"
|
||||
folderName2 = "folder2-name"
|
||||
folderName3 = "folder3-name"
|
||||
folderName4 = "folder4-name"
|
||||
|
||||
fileName1 = "file1"
|
||||
fileName2 = "file2"
|
||||
fileName3 = "file3"
|
||||
fileName4 = "file4"
|
||||
fileName5 = "file5"
|
||||
fileName6 = "file6"
|
||||
fileName7 = "file7"
|
||||
fileName8 = "file8"
|
||||
|
||||
fileData1 = []byte("1")
|
||||
fileData2 = []byte("2")
|
||||
fileData3 = []byte("3")
|
||||
fileData4 = []byte("4")
|
||||
fileData5 = []byte("5")
|
||||
fileData6 = []byte("6")
|
||||
fileData7 = []byte("7")
|
||||
fileData8 = []byte("8")
|
||||
)
|
||||
|
||||
var (
|
||||
folderPath1 = makePath(
|
||||
suite.T(),
|
||||
[]string{tenant, service, user, category, folderID1},
|
||||
false)
|
||||
folderLocPath1 = makePath(
|
||||
suite.T(),
|
||||
[]string{tenant, service, user, category, folderName1},
|
||||
false)
|
||||
|
||||
folderPath2 = makePath(
|
||||
suite.T(),
|
||||
append(folderPath1.Elements(), folderID2),
|
||||
false)
|
||||
folderLocPath2 = makePath(
|
||||
suite.T(),
|
||||
append(folderLocPath1.Elements(), folderName2),
|
||||
false)
|
||||
|
||||
folderPath3 = makePath(
|
||||
suite.T(),
|
||||
append(folderPath2.Elements(), folderID3),
|
||||
false)
|
||||
|
||||
folderPath4 = makePath(
|
||||
suite.T(),
|
||||
[]string{tenant, service, user, category, folderID4},
|
||||
false)
|
||||
folderLocPath4 = makePath(
|
||||
suite.T(),
|
||||
[]string{tenant, service, user, category, folderName4},
|
||||
false)
|
||||
|
||||
prefixFolders = []string{
|
||||
tenant,
|
||||
service,
|
||||
user,
|
||||
category,
|
||||
}
|
||||
)
|
||||
|
||||
folder4Unchanged := exchMock.NewCollection(folderPath4, folderLocPath4, 0)
|
||||
folder4Unchanged.PrevPath = folderPath4
|
||||
folder4Unchanged.ColState = data.NotMovedState
|
||||
|
||||
// Must be a function that returns a new instance each time as StreamingFile
|
||||
// can only return its Reader once. Each directory below the prefix directory
|
||||
// is also wrapped in a mock so we can count the number of times
|
||||
// IterateEntries was called on it.
|
||||
// baseSnapshot with the following layout:
|
||||
// - tenant-id
|
||||
// - onedrive
|
||||
// - user-id
|
||||
// - files
|
||||
// - folder1-id
|
||||
// - file1
|
||||
// - file2
|
||||
// - folder2-id
|
||||
// - file3
|
||||
// - file4
|
||||
// - folder3-id
|
||||
// - file5
|
||||
// - file6
|
||||
// - folder4-id
|
||||
// - file7
|
||||
// - file8
|
||||
getBaseSnapshot := func() (fs.Entry, map[string]*int) {
|
||||
counters := map[string]*int{}
|
||||
|
||||
folder, count := newMockStaticDirectory(
|
||||
encodeElements(folderID3)[0],
|
||||
[]fs.Entry{
|
||||
virtualfs.StreamingFileWithModTimeFromReader(
|
||||
encodeElements(fileName5)[0],
|
||||
time.Time{},
|
||||
newBackupStreamReader(
|
||||
serializationVersion,
|
||||
io.NopCloser(bytes.NewReader(fileData5)))),
|
||||
virtualfs.StreamingFileWithModTimeFromReader(
|
||||
encodeElements(fileName6)[0],
|
||||
time.Time{},
|
||||
newBackupStreamReader(
|
||||
serializationVersion,
|
||||
io.NopCloser(bytes.NewReader(fileData6)))),
|
||||
})
|
||||
counters[folderID3] = count
|
||||
|
||||
folder, count = newMockStaticDirectory(
|
||||
encodeElements(folderID2)[0],
|
||||
[]fs.Entry{
|
||||
virtualfs.StreamingFileWithModTimeFromReader(
|
||||
encodeElements(fileName3)[0],
|
||||
time.Time{},
|
||||
newBackupStreamReader(
|
||||
serializationVersion,
|
||||
io.NopCloser(bytes.NewReader(fileData3)))),
|
||||
virtualfs.StreamingFileWithModTimeFromReader(
|
||||
encodeElements(fileName4)[0],
|
||||
time.Time{},
|
||||
newBackupStreamReader(
|
||||
serializationVersion,
|
||||
io.NopCloser(bytes.NewReader(fileData4)))),
|
||||
folder,
|
||||
})
|
||||
counters[folderID2] = count
|
||||
|
||||
folder, count = newMockStaticDirectory(
|
||||
encodeElements(folderID1)[0],
|
||||
[]fs.Entry{
|
||||
virtualfs.StreamingFileWithModTimeFromReader(
|
||||
encodeElements(fileName1)[0],
|
||||
time.Time{},
|
||||
newBackupStreamReader(
|
||||
serializationVersion,
|
||||
io.NopCloser(bytes.NewReader(fileData1)))),
|
||||
virtualfs.StreamingFileWithModTimeFromReader(
|
||||
encodeElements(fileName2)[0],
|
||||
time.Time{},
|
||||
newBackupStreamReader(
|
||||
serializationVersion,
|
||||
io.NopCloser(bytes.NewReader(fileData2)))),
|
||||
folder,
|
||||
})
|
||||
counters[folderID1] = count
|
||||
|
||||
folder2, count := newMockStaticDirectory(
|
||||
encodeElements(folderID4)[0],
|
||||
[]fs.Entry{
|
||||
virtualfs.StreamingFileWithModTimeFromReader(
|
||||
encodeElements(fileName7)[0],
|
||||
time.Time{},
|
||||
newBackupStreamReader(
|
||||
serializationVersion,
|
||||
io.NopCloser(bytes.NewReader(fileData7)))),
|
||||
virtualfs.StreamingFileWithModTimeFromReader(
|
||||
encodeElements(fileName8)[0],
|
||||
time.Time{},
|
||||
newBackupStreamReader(
|
||||
serializationVersion,
|
||||
io.NopCloser(bytes.NewReader(fileData8)))),
|
||||
})
|
||||
counters[folderID4] = count
|
||||
|
||||
return baseWithChildren(
|
||||
prefixFolders,
|
||||
[]fs.Entry{
|
||||
folder,
|
||||
folder2,
|
||||
}),
|
||||
counters
|
||||
}
|
||||
|
||||
table := []struct {
|
||||
name string
|
||||
inputCollections func(t *testing.T) []data.BackupCollection
|
||||
inputExcludes *pmMock.PrefixMap
|
||||
expected *expectedNode
|
||||
expectedIterateCounts map[string]int
|
||||
}{
|
||||
{
|
||||
// Test that even if files are excluded in the subtree selective subtree
|
||||
// pruning skips traversing, the file is properly excluded during upload.
|
||||
//
|
||||
// It's safe to prune the subtree during merging because the directory
|
||||
// layout hasn't changed. We still require traversal of all directories
|
||||
// during data upload which allows us to exclude the file properly.
|
||||
name: "NoDirectoryChanges ExcludedFile PrunesSubtree",
|
||||
inputCollections: func(t *testing.T) []data.BackupCollection {
|
||||
return []data.BackupCollection{folder4Unchanged}
|
||||
},
|
||||
inputExcludes: pmMock.NewPrefixMap(map[string]map[string]struct{}{
|
||||
"": {
|
||||
fileName3: {},
|
||||
},
|
||||
}),
|
||||
expected: expectedTreeWithChildren(
|
||||
prefixFolders,
|
||||
[]*expectedNode{
|
||||
{
|
||||
name: folderID1,
|
||||
children: []*expectedNode{
|
||||
newExpectedFile(fileName1, fileData1),
|
||||
newExpectedFile(fileName2, fileData2),
|
||||
{
|
||||
name: folderID2,
|
||||
children: []*expectedNode{
|
||||
newExpectedFile(fileName4, fileData4),
|
||||
{
|
||||
name: folderID3,
|
||||
children: []*expectedNode{
|
||||
newExpectedFile(fileName5, fileData5),
|
||||
newExpectedFile(fileName6, fileData6),
|
||||
},
|
||||
},
|
||||
},
|
||||
},
|
||||
},
|
||||
},
|
||||
{
|
||||
name: folderID4,
|
||||
children: []*expectedNode{
|
||||
newExpectedFile(fileName7, fileData7),
|
||||
newExpectedFile(fileName8, fileData8),
|
||||
},
|
||||
},
|
||||
}),
|
||||
expectedIterateCounts: map[string]int{
|
||||
folderID1: 0,
|
||||
folderID2: 0,
|
||||
folderID3: 0,
|
||||
folderID4: 1,
|
||||
},
|
||||
},
|
||||
{
|
||||
// Test that if a subtree is deleted in its entirety selective subtree
|
||||
// pruning skips traversing it during hierarchy merging.
|
||||
name: "SubtreeDelete PrunesSubtree",
|
||||
inputCollections: func(t *testing.T) []data.BackupCollection {
|
||||
mc := exchMock.NewCollection(nil, nil, 0)
|
||||
mc.PrevPath = folderPath1
|
||||
mc.ColState = data.DeletedState
|
||||
|
||||
return []data.BackupCollection{folder4Unchanged, mc}
|
||||
},
|
||||
expected: expectedTreeWithChildren(
|
||||
prefixFolders,
|
||||
[]*expectedNode{
|
||||
{
|
||||
name: folderID4,
|
||||
children: []*expectedNode{
|
||||
newExpectedFile(fileName7, fileData7),
|
||||
newExpectedFile(fileName8, fileData8),
|
||||
},
|
||||
},
|
||||
}),
|
||||
expectedIterateCounts: map[string]int{
|
||||
// Deleted collections aren't added to the in-memory tree.
|
||||
folderID1: 0,
|
||||
folderID2: 0,
|
||||
folderID3: 0,
|
||||
folderID4: 1,
|
||||
},
|
||||
},
|
||||
{
|
||||
// Test that if a directory is moved but the subtree rooted at the moved
|
||||
// directory is unchanged selective subtree pruning skips traversing all
|
||||
// directories under the moved directory during hierarchy merging even if
|
||||
// a new directory is created at the path of one of the unchanged (pruned)
|
||||
// subdirectories of the moved directory.
|
||||
name: "ParentMoved NewFolderAtOldPath PrunesSubtree",
|
||||
inputCollections: func(t *testing.T) []data.BackupCollection {
|
||||
newPath := makePath(
|
||||
suite.T(),
|
||||
[]string{tenant, service, user, category, "foo-id"},
|
||||
false)
|
||||
newLoc := makePath(
|
||||
suite.T(),
|
||||
[]string{tenant, service, user, category, "foo"},
|
||||
false)
|
||||
|
||||
mc := exchMock.NewCollection(newPath, newLoc, 0)
|
||||
mc.PrevPath = folderPath1
|
||||
mc.ColState = data.MovedState
|
||||
|
||||
newMC := exchMock.NewCollection(folderPath2, folderLocPath2, 0)
|
||||
|
||||
return []data.BackupCollection{folder4Unchanged, mc, newMC}
|
||||
},
|
||||
expected: expectedTreeWithChildren(
|
||||
prefixFolders,
|
||||
[]*expectedNode{
|
||||
{
|
||||
name: "foo-id",
|
||||
children: []*expectedNode{
|
||||
newExpectedFile(fileName1, fileData1),
|
||||
newExpectedFile(fileName2, fileData2),
|
||||
{
|
||||
name: folderID2,
|
||||
children: []*expectedNode{
|
||||
newExpectedFile(fileName3, fileData3),
|
||||
newExpectedFile(fileName4, fileData4),
|
||||
{
|
||||
name: folderID3,
|
||||
children: []*expectedNode{
|
||||
newExpectedFile(fileName5, fileData5),
|
||||
newExpectedFile(fileName6, fileData6),
|
||||
},
|
||||
},
|
||||
},
|
||||
},
|
||||
},
|
||||
},
|
||||
{
|
||||
name: folderID1,
|
||||
children: []*expectedNode{
|
||||
{
|
||||
name: folderID2,
|
||||
},
|
||||
},
|
||||
},
|
||||
{
|
||||
name: folderID4,
|
||||
children: []*expectedNode{
|
||||
newExpectedFile(fileName7, fileData7),
|
||||
newExpectedFile(fileName8, fileData8),
|
||||
},
|
||||
},
|
||||
}),
|
||||
expectedIterateCounts: map[string]int{
|
||||
folderID1: 1,
|
||||
folderID2: 0,
|
||||
folderID3: 0,
|
||||
folderID4: 1,
|
||||
},
|
||||
},
|
||||
{
|
||||
// Test that if a directory and its subtree is deleted in its entirety
|
||||
// selective subtree pruning skips traversing the subtree during hierarchy
|
||||
// merging even if a new directory is created at the path of one of the
|
||||
// deleted (pruned) directories.
|
||||
name: "SubtreeDelete NewFolderAtOldPath PrunesSubtree",
|
||||
inputCollections: func(t *testing.T) []data.BackupCollection {
|
||||
mc := exchMock.NewCollection(nil, nil, 0)
|
||||
mc.PrevPath = folderPath2
|
||||
mc.ColState = data.DeletedState
|
||||
|
||||
newMC := exchMock.NewCollection(folderPath2, folderLocPath2, 0)
|
||||
|
||||
return []data.BackupCollection{folder4Unchanged, mc, newMC}
|
||||
},
|
||||
expected: expectedTreeWithChildren(
|
||||
prefixFolders,
|
||||
[]*expectedNode{
|
||||
{
|
||||
name: folderID1,
|
||||
children: []*expectedNode{
|
||||
newExpectedFile(fileName1, fileData1),
|
||||
newExpectedFile(fileName2, fileData2),
|
||||
{
|
||||
name: folderID2,
|
||||
},
|
||||
},
|
||||
},
|
||||
{
|
||||
name: folderID4,
|
||||
children: []*expectedNode{
|
||||
newExpectedFile(fileName7, fileData7),
|
||||
newExpectedFile(fileName8, fileData8),
|
||||
},
|
||||
},
|
||||
}),
|
||||
expectedIterateCounts: map[string]int{
|
||||
folderID1: 1,
|
||||
// Deleted collections aren't added to the in-memory tree.
|
||||
folderID2: 0,
|
||||
folderID3: 0,
|
||||
folderID4: 1,
|
||||
},
|
||||
},
|
||||
// These tests check that subtree pruning isn't triggered.
|
||||
{
|
||||
// Test that creating a new directory in an otherwise unchanged subtree
|
||||
// doesn't trigger selective subtree merging for any subtree of the full
|
||||
// hierarchy that includes the new directory but does trigger selective
|
||||
// subtree pruning for unchanged subtrees without the new directory.
|
||||
name: "NewDirectory DoesntPruneSubtree",
|
||||
inputCollections: func(t *testing.T) []data.BackupCollection {
|
||||
newP, err := folderPath2.Append(false, "foo-id")
|
||||
require.NoError(t, err, clues.ToCore(err))
|
||||
|
||||
newL, err := folderLocPath2.Append(false, "foo")
|
||||
require.NoError(t, err, clues.ToCore(err))
|
||||
|
||||
newMC := exchMock.NewCollection(newP, newL, 0)
|
||||
|
||||
return []data.BackupCollection{folder4Unchanged, newMC}
|
||||
},
|
||||
expected: expectedTreeWithChildren(
|
||||
prefixFolders,
|
||||
[]*expectedNode{
|
||||
{
|
||||
name: folderID1,
|
||||
children: []*expectedNode{
|
||||
newExpectedFile(fileName1, fileData1),
|
||||
newExpectedFile(fileName2, fileData2),
|
||||
{
|
||||
name: folderID2,
|
||||
children: []*expectedNode{
|
||||
newExpectedFile(fileName3, fileData3),
|
||||
newExpectedFile(fileName4, fileData4),
|
||||
{
|
||||
name: "foo-id",
|
||||
},
|
||||
{
|
||||
name: folderID3,
|
||||
children: []*expectedNode{
|
||||
newExpectedFile(fileName5, fileData5),
|
||||
newExpectedFile(fileName6, fileData6),
|
||||
},
|
||||
},
|
||||
},
|
||||
},
|
||||
},
|
||||
},
|
||||
{
|
||||
name: folderID4,
|
||||
children: []*expectedNode{
|
||||
newExpectedFile(fileName7, fileData7),
|
||||
newExpectedFile(fileName8, fileData8),
|
||||
},
|
||||
},
|
||||
}),
|
||||
expectedIterateCounts: map[string]int{
|
||||
folderID1: 1,
|
||||
folderID2: 1,
|
||||
// Folder 3 triggers pruning because it has nothing changed under it.
|
||||
folderID3: 0,
|
||||
folderID4: 1,
|
||||
},
|
||||
},
|
||||
{
|
||||
// Test that moving a directory within a subtree doesn't trigger selective
|
||||
// subtree merging for any subtree of the full hierarchy that includes the
|
||||
// moved directory.
|
||||
name: "MoveWithinSubtree DoesntPruneSubtree",
|
||||
inputCollections: func(t *testing.T) []data.BackupCollection {
|
||||
newP, err := folderPath1.Append(false, folderID3)
|
||||
require.NoError(t, err, clues.ToCore(err))
|
||||
|
||||
newL, err := folderLocPath1.Append(false, folderName3)
|
||||
require.NoError(t, err, clues.ToCore(err))
|
||||
|
||||
mc := exchMock.NewCollection(newP, newL, 0)
|
||||
mc.PrevPath = folderPath3
|
||||
mc.ColState = data.MovedState
|
||||
|
||||
return []data.BackupCollection{folder4Unchanged, mc}
|
||||
},
|
||||
expected: expectedTreeWithChildren(
|
||||
prefixFolders,
|
||||
[]*expectedNode{
|
||||
{
|
||||
name: folderID1,
|
||||
children: []*expectedNode{
|
||||
newExpectedFile(fileName1, fileData1),
|
||||
newExpectedFile(fileName2, fileData2),
|
||||
{
|
||||
name: folderID2,
|
||||
children: []*expectedNode{
|
||||
newExpectedFile(fileName3, fileData3),
|
||||
newExpectedFile(fileName4, fileData4),
|
||||
},
|
||||
},
|
||||
{
|
||||
name: folderID3,
|
||||
children: []*expectedNode{
|
||||
newExpectedFile(fileName5, fileData5),
|
||||
newExpectedFile(fileName6, fileData6),
|
||||
},
|
||||
},
|
||||
},
|
||||
},
|
||||
{
|
||||
name: folderID4,
|
||||
children: []*expectedNode{
|
||||
newExpectedFile(fileName7, fileData7),
|
||||
newExpectedFile(fileName8, fileData8),
|
||||
},
|
||||
},
|
||||
}),
|
||||
expectedIterateCounts: map[string]int{
|
||||
folderID1: 1,
|
||||
// Folder 2 can't be pruned because there's subtree changes under it
|
||||
// (folder3 move).
|
||||
folderID2: 1,
|
||||
// Folder 3 can't be pruned because it has a collection associated with
|
||||
// it.
|
||||
folderID3: 1,
|
||||
folderID4: 1,
|
||||
},
|
||||
},
|
||||
{
|
||||
// Test that moving a directory out of a subtree doesn't trigger selective
|
||||
// subtree merging for any subtree of the full hierarchy that includes the
|
||||
// moved directory in the previous hierarchy.
|
||||
name: "MoveOutOfSubtree DoesntPruneSubtree",
|
||||
inputCollections: func(t *testing.T) []data.BackupCollection {
|
||||
newP := makePath(
|
||||
suite.T(),
|
||||
[]string{tenant, service, user, category, folderID3},
|
||||
false)
|
||||
newL := makePath(
|
||||
suite.T(),
|
||||
[]string{tenant, service, user, category, folderName3},
|
||||
false)
|
||||
|
||||
mc := exchMock.NewCollection(newP, newL, 0)
|
||||
mc.PrevPath = folderPath3
|
||||
mc.ColState = data.MovedState
|
||||
|
||||
return []data.BackupCollection{folder4Unchanged, mc}
|
||||
},
|
||||
expected: expectedTreeWithChildren(
|
||||
prefixFolders,
|
||||
[]*expectedNode{
|
||||
{
|
||||
name: folderID1,
|
||||
children: []*expectedNode{
|
||||
newExpectedFile(fileName1, fileData1),
|
||||
newExpectedFile(fileName2, fileData2),
|
||||
{
|
||||
name: folderID2,
|
||||
children: []*expectedNode{
|
||||
newExpectedFile(fileName3, fileData3),
|
||||
newExpectedFile(fileName4, fileData4),
|
||||
},
|
||||
},
|
||||
},
|
||||
},
|
||||
{
|
||||
name: folderID3,
|
||||
children: []*expectedNode{
|
||||
newExpectedFile(fileName5, fileData5),
|
||||
newExpectedFile(fileName6, fileData6),
|
||||
},
|
||||
},
|
||||
{
|
||||
name: folderID4,
|
||||
children: []*expectedNode{
|
||||
newExpectedFile(fileName7, fileData7),
|
||||
newExpectedFile(fileName8, fileData8),
|
||||
},
|
||||
},
|
||||
}),
|
||||
expectedIterateCounts: map[string]int{
|
||||
folderID1: 1,
|
||||
// Folder 2 can't be pruned because there's subtree changes under it
|
||||
// (folder3 move).
|
||||
folderID2: 1,
|
||||
// Folder 3 can't be pruned because it has a collection associated with
|
||||
// it.
|
||||
folderID3: 1,
|
||||
folderID4: 1,
|
||||
},
|
||||
},
|
||||
{
|
||||
// Test that deleting a directory in a subtree doesn't trigger selective
|
||||
// subtree merging for any subtree of the full hierarchy that includes the
|
||||
// deleted directory in the previous hierarchy.
|
||||
name: "DeleteInSubtree DoesntPruneSubtree",
|
||||
inputCollections: func(t *testing.T) []data.BackupCollection {
|
||||
mc := exchMock.NewCollection(nil, nil, 0)
|
||||
mc.PrevPath = folderPath3
|
||||
mc.ColState = data.DeletedState
|
||||
|
||||
return []data.BackupCollection{folder4Unchanged, mc}
|
||||
},
|
||||
expected: expectedTreeWithChildren(
|
||||
prefixFolders,
|
||||
[]*expectedNode{
|
||||
{
|
||||
name: folderID1,
|
||||
children: []*expectedNode{
|
||||
newExpectedFile(fileName1, fileData1),
|
||||
newExpectedFile(fileName2, fileData2),
|
||||
{
|
||||
name: folderID2,
|
||||
children: []*expectedNode{
|
||||
newExpectedFile(fileName3, fileData3),
|
||||
newExpectedFile(fileName4, fileData4),
|
||||
},
|
||||
},
|
||||
},
|
||||
},
|
||||
{
|
||||
name: folderID4,
|
||||
children: []*expectedNode{
|
||||
newExpectedFile(fileName7, fileData7),
|
||||
newExpectedFile(fileName8, fileData8),
|
||||
},
|
||||
},
|
||||
}),
|
||||
expectedIterateCounts: map[string]int{
|
||||
folderID1: 1,
|
||||
// Folder 2 can't be pruned because there's subtree changes under it
|
||||
// (folder3 delete).
|
||||
folderID2: 1,
|
||||
// Folder3 is pruned because there's no changes under it.
|
||||
folderID3: 0,
|
||||
folderID4: 1,
|
||||
},
|
||||
},
|
||||
{
|
||||
// Test that moving an existing directory into a subtree doesn't trigger
|
||||
// selective subtree merging for any subtree of the full hierarchy that
|
||||
// includes the moved directory in the current hierarchy.
|
||||
name: "MoveIntoSubtree DoesntPruneSubtree",
|
||||
inputCollections: func(t *testing.T) []data.BackupCollection {
|
||||
newP, err := folderPath1.Append(false, folderID4)
|
||||
require.NoError(t, err, clues.ToCore(err))
|
||||
|
||||
newL, err := folderLocPath1.Append(false, folderName4)
|
||||
require.NoError(t, err, clues.ToCore(err))
|
||||
|
||||
mc := exchMock.NewCollection(newP, newL, 0)
|
||||
mc.PrevPath = folderPath4
|
||||
mc.ColState = data.MovedState
|
||||
|
||||
return []data.BackupCollection{mc}
|
||||
},
|
||||
expected: expectedTreeWithChildren(
|
||||
prefixFolders,
|
||||
[]*expectedNode{
|
||||
{
|
||||
name: folderID1,
|
||||
children: []*expectedNode{
|
||||
newExpectedFile(fileName1, fileData1),
|
||||
newExpectedFile(fileName2, fileData2),
|
||||
{
|
||||
name: folderID2,
|
||||
children: []*expectedNode{
|
||||
newExpectedFile(fileName3, fileData3),
|
||||
newExpectedFile(fileName4, fileData4),
|
||||
{
|
||||
name: folderID3,
|
||||
children: []*expectedNode{
|
||||
newExpectedFile(fileName5, fileData5),
|
||||
newExpectedFile(fileName6, fileData6),
|
||||
},
|
||||
},
|
||||
},
|
||||
},
|
||||
{
|
||||
name: folderID4,
|
||||
children: []*expectedNode{
|
||||
newExpectedFile(fileName7, fileData7),
|
||||
newExpectedFile(fileName8, fileData8),
|
||||
},
|
||||
},
|
||||
},
|
||||
},
|
||||
}),
|
||||
expectedIterateCounts: map[string]int{
|
||||
// Folder 1 can't be pruned because there's subtree changes under it
|
||||
// (folder4 move).
|
||||
folderID1: 1,
|
||||
// Folder 2 is pruned because nothing changes below it and it has no
|
||||
// collection associated with it.
|
||||
folderID2: 0,
|
||||
folderID3: 0,
|
||||
// Folder 4 can't be pruned because it has a collection associated with
|
||||
// it.
|
||||
folderID4: 1,
|
||||
},
|
||||
},
|
||||
}
|
||||
|
||||
for _, test := range table {
|
||||
suite.Run(test.name, func() {
|
||||
t := suite.T()
|
||||
|
||||
ctx, flush := tester.NewContext(t)
|
||||
defer flush()
|
||||
|
||||
progress := &corsoProgress{
|
||||
ctx: ctx,
|
||||
pending: map[string]*itemDetails{},
|
||||
toMerge: newMergeDetails(),
|
||||
errs: fault.New(true),
|
||||
}
|
||||
snapshotRoot, counters := getBaseSnapshot()
|
||||
msw := &mockSnapshotWalker{
|
||||
snapshotRoot: snapshotRoot,
|
||||
}
|
||||
|
||||
ie := pmMock.NewPrefixMap(nil)
|
||||
if test.inputExcludes != nil {
|
||||
ie = test.inputExcludes
|
||||
}
|
||||
|
||||
dirTree, err := inflateDirTree(
|
||||
ctx,
|
||||
msw,
|
||||
[]ManifestEntry{
|
||||
makeManifestEntry("", tenant, user, path.OneDriveService, path.FilesCategory),
|
||||
},
|
||||
test.inputCollections(t),
|
||||
ie,
|
||||
progress)
|
||||
require.NoError(t, err, clues.ToCore(err))
|
||||
|
||||
// Check iterate counts before checking tree content as checking tree
|
||||
// content can disturb the counter values.
|
||||
for name, count := range test.expectedIterateCounts {
|
||||
c, ok := counters[name]
|
||||
assert.True(t, ok, "unexpected counter %q", name)
|
||||
assert.Equal(t, count, *c, "folder %q iterate count", name)
|
||||
}
|
||||
|
||||
expectTree(t, ctx, test.expected, dirTree)
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user